From 39c83f7d3bbaa54733783376f59db979793d2a1b Mon Sep 17 00:00:00 2001 From: Packit Date: Aug 25 2020 07:44:51 +0000 Subject: device-mapper-multipath-0.8.4 base --- diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..9926756 --- /dev/null +++ b/.gitignore @@ -0,0 +1,25 @@ +*.o +.dotest +*~ +*.so +*.so.0 +*.a +*.gz +*.d +kpartx/kpartx +multipath/multipath +multipathd/multipathd +mpathpersist/mpathpersist +.nfs* +*.swp +*.patch +*.rej +*.orig +libdmmp/docs/man/*.3.gz +libdmmp/*.so.* +libdmmp/test/libdmmp_test +libdmmp/test/libdmmp_speed_test +tests/*-test +tests/*.out +libmultipath/nvme-ioctl.c +libmultipath/nvme-ioctl.h diff --git a/COPYING b/COPYING new file mode 120000 index 0000000..0c0462d --- /dev/null +++ b/COPYING @@ -0,0 +1 @@ +LICENSES/LGPL-2.0 \ No newline at end of file diff --git a/LICENSES/GPL-2.0 b/LICENSES/GPL-2.0 new file mode 100644 index 0000000..d159169 --- /dev/null +++ b/LICENSES/GPL-2.0 @@ -0,0 +1,339 @@ + GNU GENERAL PUBLIC LICENSE + Version 2, June 1991 + + Copyright (C) 1989, 1991 Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + Everyone is permitted to copy and distribute verbatim copies + of this license document, but changing it is not allowed. + + Preamble + + The licenses for most software are designed to take away your +freedom to share and change it. By contrast, the GNU General Public +License is intended to guarantee your freedom to share and change free +software--to make sure the software is free for all its users. This +General Public License applies to most of the Free Software +Foundation's software and to any other program whose authors commit to +using it. (Some other Free Software Foundation software is covered by +the GNU Lesser General Public License instead.) You can apply it to +your programs, too. + + When we speak of free software, we are referring to freedom, not +price. Our General Public Licenses are designed to make sure that you +have the freedom to distribute copies of free software (and charge for +this service if you wish), that you receive source code or can get it +if you want it, that you can change the software or use pieces of it +in new free programs; and that you know you can do these things. + + To protect your rights, we need to make restrictions that forbid +anyone to deny you these rights or to ask you to surrender the rights. +These restrictions translate to certain responsibilities for you if you +distribute copies of the software, or if you modify it. + + For example, if you distribute copies of such a program, whether +gratis or for a fee, you must give the recipients all the rights that +you have. You must make sure that they, too, receive or can get the +source code. And you must show them these terms so they know their +rights. + + We protect your rights with two steps: (1) copyright the software, and +(2) offer you this license which gives you legal permission to copy, +distribute and/or modify the software. + + Also, for each author's protection and ours, we want to make certain +that everyone understands that there is no warranty for this free +software. If the software is modified by someone else and passed on, we +want its recipients to know that what they have is not the original, so +that any problems introduced by others will not reflect on the original +authors' reputations. + + Finally, any free program is threatened constantly by software +patents. We wish to avoid the danger that redistributors of a free +program will individually obtain patent licenses, in effect making the +program proprietary. To prevent this, we have made it clear that any +patent must be licensed for everyone's free use or not licensed at all. + + The precise terms and conditions for copying, distribution and +modification follow. + + GNU GENERAL PUBLIC LICENSE + TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION + + 0. This License applies to any program or other work which contains +a notice placed by the copyright holder saying it may be distributed +under the terms of this General Public License. The "Program", below, +refers to any such program or work, and a "work based on the Program" +means either the Program or any derivative work under copyright law: +that is to say, a work containing the Program or a portion of it, +either verbatim or with modifications and/or translated into another +language. (Hereinafter, translation is included without limitation in +the term "modification".) Each licensee is addressed as "you". + +Activities other than copying, distribution and modification are not +covered by this License; they are outside its scope. The act of +running the Program is not restricted, and the output from the Program +is covered only if its contents constitute a work based on the +Program (independent of having been made by running the Program). +Whether that is true depends on what the Program does. + + 1. You may copy and distribute verbatim copies of the Program's +source code as you receive it, in any medium, provided that you +conspicuously and appropriately publish on each copy an appropriate +copyright notice and disclaimer of warranty; keep intact all the +notices that refer to this License and to the absence of any warranty; +and give any other recipients of the Program a copy of this License +along with the Program. + +You may charge a fee for the physical act of transferring a copy, and +you may at your option offer warranty protection in exchange for a fee. + + 2. You may modify your copy or copies of the Program or any portion +of it, thus forming a work based on the Program, and copy and +distribute such modifications or work under the terms of Section 1 +above, provided that you also meet all of these conditions: + + a) You must cause the modified files to carry prominent notices + stating that you changed the files and the date of any change. + + b) You must cause any work that you distribute or publish, that in + whole or in part contains or is derived from the Program or any + part thereof, to be licensed as a whole at no charge to all third + parties under the terms of this License. + + c) If the modified program normally reads commands interactively + when run, you must cause it, when started running for such + interactive use in the most ordinary way, to print or display an + announcement including an appropriate copyright notice and a + notice that there is no warranty (or else, saying that you provide + a warranty) and that users may redistribute the program under + these conditions, and telling the user how to view a copy of this + License. (Exception: if the Program itself is interactive but + does not normally print such an announcement, your work based on + the Program is not required to print an announcement.) + +These requirements apply to the modified work as a whole. If +identifiable sections of that work are not derived from the Program, +and can be reasonably considered independent and separate works in +themselves, then this License, and its terms, do not apply to those +sections when you distribute them as separate works. But when you +distribute the same sections as part of a whole which is a work based +on the Program, the distribution of the whole must be on the terms of +this License, whose permissions for other licensees extend to the +entire whole, and thus to each and every part regardless of who wrote it. + +Thus, it is not the intent of this section to claim rights or contest +your rights to work written entirely by you; rather, the intent is to +exercise the right to control the distribution of derivative or +collective works based on the Program. + +In addition, mere aggregation of another work not based on the Program +with the Program (or with a work based on the Program) on a volume of +a storage or distribution medium does not bring the other work under +the scope of this License. + + 3. You may copy and distribute the Program (or a work based on it, +under Section 2) in object code or executable form under the terms of +Sections 1 and 2 above provided that you also do one of the following: + + a) Accompany it with the complete corresponding machine-readable + source code, which must be distributed under the terms of Sections + 1 and 2 above on a medium customarily used for software interchange; or, + + b) Accompany it with a written offer, valid for at least three + years, to give any third party, for a charge no more than your + cost of physically performing source distribution, a complete + machine-readable copy of the corresponding source code, to be + distributed under the terms of Sections 1 and 2 above on a medium + customarily used for software interchange; or, + + c) Accompany it with the information you received as to the offer + to distribute corresponding source code. (This alternative is + allowed only for noncommercial distribution and only if you + received the program in object code or executable form with such + an offer, in accord with Subsection b above.) + +The source code for a work means the preferred form of the work for +making modifications to it. For an executable work, complete source +code means all the source code for all modules it contains, plus any +associated interface definition files, plus the scripts used to +control compilation and installation of the executable. However, as a +special exception, the source code distributed need not include +anything that is normally distributed (in either source or binary +form) with the major components (compiler, kernel, and so on) of the +operating system on which the executable runs, unless that component +itself accompanies the executable. + +If distribution of executable or object code is made by offering +access to copy from a designated place, then offering equivalent +access to copy the source code from the same place counts as +distribution of the source code, even though third parties are not +compelled to copy the source along with the object code. + + 4. You may not copy, modify, sublicense, or distribute the Program +except as expressly provided under this License. Any attempt +otherwise to copy, modify, sublicense or distribute the Program is +void, and will automatically terminate your rights under this License. +However, parties who have received copies, or rights, from you under +this License will not have their licenses terminated so long as such +parties remain in full compliance. + + 5. You are not required to accept this License, since you have not +signed it. However, nothing else grants you permission to modify or +distribute the Program or its derivative works. These actions are +prohibited by law if you do not accept this License. Therefore, by +modifying or distributing the Program (or any work based on the +Program), you indicate your acceptance of this License to do so, and +all its terms and conditions for copying, distributing or modifying +the Program or works based on it. + + 6. Each time you redistribute the Program (or any work based on the +Program), the recipient automatically receives a license from the +original licensor to copy, distribute or modify the Program subject to +these terms and conditions. You may not impose any further +restrictions on the recipients' exercise of the rights granted herein. +You are not responsible for enforcing compliance by third parties to +this License. + + 7. If, as a consequence of a court judgment or allegation of patent +infringement or for any other reason (not limited to patent issues), +conditions are imposed on you (whether by court order, agreement or +otherwise) that contradict the conditions of this License, they do not +excuse you from the conditions of this License. If you cannot +distribute so as to satisfy simultaneously your obligations under this +License and any other pertinent obligations, then as a consequence you +may not distribute the Program at all. For example, if a patent +license would not permit royalty-free redistribution of the Program by +all those who receive copies directly or indirectly through you, then +the only way you could satisfy both it and this License would be to +refrain entirely from distribution of the Program. + +If any portion of this section is held invalid or unenforceable under +any particular circumstance, the balance of the section is intended to +apply and the section as a whole is intended to apply in other +circumstances. + +It is not the purpose of this section to induce you to infringe any +patents or other property right claims or to contest validity of any +such claims; this section has the sole purpose of protecting the +integrity of the free software distribution system, which is +implemented by public license practices. Many people have made +generous contributions to the wide range of software distributed +through that system in reliance on consistent application of that +system; it is up to the author/donor to decide if he or she is willing +to distribute software through any other system and a licensee cannot +impose that choice. + +This section is intended to make thoroughly clear what is believed to +be a consequence of the rest of this License. + + 8. If the distribution and/or use of the Program is restricted in +certain countries either by patents or by copyrighted interfaces, the +original copyright holder who places the Program under this License +may add an explicit geographical distribution limitation excluding +those countries, so that distribution is permitted only in or among +countries not thus excluded. In such case, this License incorporates +the limitation as if written in the body of this License. + + 9. The Free Software Foundation may publish revised and/or new versions +of the General Public License from time to time. Such new versions will +be similar in spirit to the present version, but may differ in detail to +address new problems or concerns. + +Each version is given a distinguishing version number. If the Program +specifies a version number of this License which applies to it and "any +later version", you have the option of following the terms and conditions +either of that version or of any later version published by the Free +Software Foundation. If the Program does not specify a version number of +this License, you may choose any version ever published by the Free Software +Foundation. + + 10. If you wish to incorporate parts of the Program into other free +programs whose distribution conditions are different, write to the author +to ask for permission. For software which is copyrighted by the Free +Software Foundation, write to the Free Software Foundation; we sometimes +make exceptions for this. Our decision will be guided by the two goals +of preserving the free status of all derivatives of our free software and +of promoting the sharing and reuse of software generally. + + NO WARRANTY + + 11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY +FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN +OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES +PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED +OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF +MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS +TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE +PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, +REPAIR OR CORRECTION. + + 12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING +WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR +REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, +INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING +OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED +TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY +YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER +PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE +POSSIBILITY OF SUCH DAMAGES. + + END OF TERMS AND CONDITIONS + + How to Apply These Terms to Your New Programs + + If you develop a new program, and you want it to be of the greatest +possible use to the public, the best way to achieve this is to make it +free software which everyone can redistribute and change under these terms. + + To do so, attach the following notices to the program. It is safest +to attach them to the start of each source file to most effectively +convey the exclusion of warranty; and each file should have at least +the "copyright" line and a pointer to where the full notice is found. + + + Copyright (C) + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +Also add information on how to contact you by electronic and paper mail. + +If the program is interactive, make it output a short notice like this +when it starts in an interactive mode: + + Gnomovision version 69, Copyright (C) year name of author + Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'. + This is free software, and you are welcome to redistribute it + under certain conditions; type `show c' for details. + +The hypothetical commands `show w' and `show c' should show the appropriate +parts of the General Public License. Of course, the commands you use may +be called something other than `show w' and `show c'; they could even be +mouse-clicks or menu items--whatever suits your program. + +You should also get your employer (if you work as a programmer) or your +school, if any, to sign a "copyright disclaimer" for the program, if +necessary. Here is a sample; alter the names: + + Yoyodyne, Inc., hereby disclaims all copyright interest in the program + `Gnomovision' (which makes passes at compilers) written by James Hacker. + + , 1 April 1989 + Ty Coon, President of Vice + +This General Public License does not permit incorporating your program into +proprietary programs. If your program is a subroutine library, you may +consider it more useful to permit linking proprietary applications with the +library. If this is what you want to do, use the GNU Lesser General +Public License instead of this License. diff --git a/LICENSES/GPL-3.0 b/LICENSES/GPL-3.0 new file mode 100644 index 0000000..f288702 --- /dev/null +++ b/LICENSES/GPL-3.0 @@ -0,0 +1,674 @@ + GNU GENERAL PUBLIC LICENSE + Version 3, 29 June 2007 + + Copyright (C) 2007 Free Software Foundation, Inc. + Everyone is permitted to copy and distribute verbatim copies + of this license document, but changing it is not allowed. + + Preamble + + The GNU General Public License is a free, copyleft license for +software and other kinds of works. + + The licenses for most software and other practical works are designed +to take away your freedom to share and change the works. By contrast, +the GNU General Public License is intended to guarantee your freedom to +share and change all versions of a program--to make sure it remains free +software for all its users. We, the Free Software Foundation, use the +GNU General Public License for most of our software; it applies also to +any other work released this way by its authors. You can apply it to +your programs, too. + + When we speak of free software, we are referring to freedom, not +price. Our General Public Licenses are designed to make sure that you +have the freedom to distribute copies of free software (and charge for +them if you wish), that you receive source code or can get it if you +want it, that you can change the software or use pieces of it in new +free programs, and that you know you can do these things. + + To protect your rights, we need to prevent others from denying you +these rights or asking you to surrender the rights. Therefore, you have +certain responsibilities if you distribute copies of the software, or if +you modify it: responsibilities to respect the freedom of others. + + For example, if you distribute copies of such a program, whether +gratis or for a fee, you must pass on to the recipients the same +freedoms that you received. You must make sure that they, too, receive +or can get the source code. And you must show them these terms so they +know their rights. + + Developers that use the GNU GPL protect your rights with two steps: +(1) assert copyright on the software, and (2) offer you this License +giving you legal permission to copy, distribute and/or modify it. + + For the developers' and authors' protection, the GPL clearly explains +that there is no warranty for this free software. For both users' and +authors' sake, the GPL requires that modified versions be marked as +changed, so that their problems will not be attributed erroneously to +authors of previous versions. + + Some devices are designed to deny users access to install or run +modified versions of the software inside them, although the manufacturer +can do so. This is fundamentally incompatible with the aim of +protecting users' freedom to change the software. The systematic +pattern of such abuse occurs in the area of products for individuals to +use, which is precisely where it is most unacceptable. Therefore, we +have designed this version of the GPL to prohibit the practice for those +products. If such problems arise substantially in other domains, we +stand ready to extend this provision to those domains in future versions +of the GPL, as needed to protect the freedom of users. + + Finally, every program is threatened constantly by software patents. +States should not allow patents to restrict development and use of +software on general-purpose computers, but in those that do, we wish to +avoid the special danger that patents applied to a free program could +make it effectively proprietary. To prevent this, the GPL assures that +patents cannot be used to render the program non-free. + + The precise terms and conditions for copying, distribution and +modification follow. + + TERMS AND CONDITIONS + + 0. Definitions. + + "This License" refers to version 3 of the GNU General Public License. + + "Copyright" also means copyright-like laws that apply to other kinds of +works, such as semiconductor masks. + + "The Program" refers to any copyrightable work licensed under this +License. Each licensee is addressed as "you". "Licensees" and +"recipients" may be individuals or organizations. + + To "modify" a work means to copy from or adapt all or part of the work +in a fashion requiring copyright permission, other than the making of an +exact copy. The resulting work is called a "modified version" of the +earlier work or a work "based on" the earlier work. + + A "covered work" means either the unmodified Program or a work based +on the Program. + + To "propagate" a work means to do anything with it that, without +permission, would make you directly or secondarily liable for +infringement under applicable copyright law, except executing it on a +computer or modifying a private copy. Propagation includes copying, +distribution (with or without modification), making available to the +public, and in some countries other activities as well. + + To "convey" a work means any kind of propagation that enables other +parties to make or receive copies. Mere interaction with a user through +a computer network, with no transfer of a copy, is not conveying. + + An interactive user interface displays "Appropriate Legal Notices" +to the extent that it includes a convenient and prominently visible +feature that (1) displays an appropriate copyright notice, and (2) +tells the user that there is no warranty for the work (except to the +extent that warranties are provided), that licensees may convey the +work under this License, and how to view a copy of this License. If +the interface presents a list of user commands or options, such as a +menu, a prominent item in the list meets this criterion. + + 1. Source Code. + + The "source code" for a work means the preferred form of the work +for making modifications to it. "Object code" means any non-source +form of a work. + + A "Standard Interface" means an interface that either is an official +standard defined by a recognized standards body, or, in the case of +interfaces specified for a particular programming language, one that +is widely used among developers working in that language. + + The "System Libraries" of an executable work include anything, other +than the work as a whole, that (a) is included in the normal form of +packaging a Major Component, but which is not part of that Major +Component, and (b) serves only to enable use of the work with that +Major Component, or to implement a Standard Interface for which an +implementation is available to the public in source code form. A +"Major Component", in this context, means a major essential component +(kernel, window system, and so on) of the specific operating system +(if any) on which the executable work runs, or a compiler used to +produce the work, or an object code interpreter used to run it. + + The "Corresponding Source" for a work in object code form means all +the source code needed to generate, install, and (for an executable +work) run the object code and to modify the work, including scripts to +control those activities. However, it does not include the work's +System Libraries, or general-purpose tools or generally available free +programs which are used unmodified in performing those activities but +which are not part of the work. For example, Corresponding Source +includes interface definition files associated with source files for +the work, and the source code for shared libraries and dynamically +linked subprograms that the work is specifically designed to require, +such as by intimate data communication or control flow between those +subprograms and other parts of the work. + + The Corresponding Source need not include anything that users +can regenerate automatically from other parts of the Corresponding +Source. + + The Corresponding Source for a work in source code form is that +same work. + + 2. Basic Permissions. + + All rights granted under this License are granted for the term of +copyright on the Program, and are irrevocable provided the stated +conditions are met. This License explicitly affirms your unlimited +permission to run the unmodified Program. The output from running a +covered work is covered by this License only if the output, given its +content, constitutes a covered work. This License acknowledges your +rights of fair use or other equivalent, as provided by copyright law. + + You may make, run and propagate covered works that you do not +convey, without conditions so long as your license otherwise remains +in force. You may convey covered works to others for the sole purpose +of having them make modifications exclusively for you, or provide you +with facilities for running those works, provided that you comply with +the terms of this License in conveying all material for which you do +not control copyright. Those thus making or running the covered works +for you must do so exclusively on your behalf, under your direction +and control, on terms that prohibit them from making any copies of +your copyrighted material outside their relationship with you. + + Conveying under any other circumstances is permitted solely under +the conditions stated below. Sublicensing is not allowed; section 10 +makes it unnecessary. + + 3. Protecting Users' Legal Rights From Anti-Circumvention Law. + + No covered work shall be deemed part of an effective technological +measure under any applicable law fulfilling obligations under article +11 of the WIPO copyright treaty adopted on 20 December 1996, or +similar laws prohibiting or restricting circumvention of such +measures. + + When you convey a covered work, you waive any legal power to forbid +circumvention of technological measures to the extent such circumvention +is effected by exercising rights under this License with respect to +the covered work, and you disclaim any intention to limit operation or +modification of the work as a means of enforcing, against the work's +users, your or third parties' legal rights to forbid circumvention of +technological measures. + + 4. Conveying Verbatim Copies. + + You may convey verbatim copies of the Program's source code as you +receive it, in any medium, provided that you conspicuously and +appropriately publish on each copy an appropriate copyright notice; +keep intact all notices stating that this License and any +non-permissive terms added in accord with section 7 apply to the code; +keep intact all notices of the absence of any warranty; and give all +recipients a copy of this License along with the Program. + + You may charge any price or no price for each copy that you convey, +and you may offer support or warranty protection for a fee. + + 5. Conveying Modified Source Versions. + + You may convey a work based on the Program, or the modifications to +produce it from the Program, in the form of source code under the +terms of section 4, provided that you also meet all of these conditions: + + a) The work must carry prominent notices stating that you modified + it, and giving a relevant date. + + b) The work must carry prominent notices stating that it is + released under this License and any conditions added under section + 7. This requirement modifies the requirement in section 4 to + "keep intact all notices". + + c) You must license the entire work, as a whole, under this + License to anyone who comes into possession of a copy. This + License will therefore apply, along with any applicable section 7 + additional terms, to the whole of the work, and all its parts, + regardless of how they are packaged. This License gives no + permission to license the work in any other way, but it does not + invalidate such permission if you have separately received it. + + d) If the work has interactive user interfaces, each must display + Appropriate Legal Notices; however, if the Program has interactive + interfaces that do not display Appropriate Legal Notices, your + work need not make them do so. + + A compilation of a covered work with other separate and independent +works, which are not by their nature extensions of the covered work, +and which are not combined with it such as to form a larger program, +in or on a volume of a storage or distribution medium, is called an +"aggregate" if the compilation and its resulting copyright are not +used to limit the access or legal rights of the compilation's users +beyond what the individual works permit. Inclusion of a covered work +in an aggregate does not cause this License to apply to the other +parts of the aggregate. + + 6. Conveying Non-Source Forms. + + You may convey a covered work in object code form under the terms +of sections 4 and 5, provided that you also convey the +machine-readable Corresponding Source under the terms of this License, +in one of these ways: + + a) Convey the object code in, or embodied in, a physical product + (including a physical distribution medium), accompanied by the + Corresponding Source fixed on a durable physical medium + customarily used for software interchange. + + b) Convey the object code in, or embodied in, a physical product + (including a physical distribution medium), accompanied by a + written offer, valid for at least three years and valid for as + long as you offer spare parts or customer support for that product + model, to give anyone who possesses the object code either (1) a + copy of the Corresponding Source for all the software in the + product that is covered by this License, on a durable physical + medium customarily used for software interchange, for a price no + more than your reasonable cost of physically performing this + conveying of source, or (2) access to copy the + Corresponding Source from a network server at no charge. + + c) Convey individual copies of the object code with a copy of the + written offer to provide the Corresponding Source. This + alternative is allowed only occasionally and noncommercially, and + only if you received the object code with such an offer, in accord + with subsection 6b. + + d) Convey the object code by offering access from a designated + place (gratis or for a charge), and offer equivalent access to the + Corresponding Source in the same way through the same place at no + further charge. You need not require recipients to copy the + Corresponding Source along with the object code. If the place to + copy the object code is a network server, the Corresponding Source + may be on a different server (operated by you or a third party) + that supports equivalent copying facilities, provided you maintain + clear directions next to the object code saying where to find the + Corresponding Source. Regardless of what server hosts the + Corresponding Source, you remain obligated to ensure that it is + available for as long as needed to satisfy these requirements. + + e) Convey the object code using peer-to-peer transmission, provided + you inform other peers where the object code and Corresponding + Source of the work are being offered to the general public at no + charge under subsection 6d. + + A separable portion of the object code, whose source code is excluded +from the Corresponding Source as a System Library, need not be +included in conveying the object code work. + + A "User Product" is either (1) a "consumer product", which means any +tangible personal property which is normally used for personal, family, +or household purposes, or (2) anything designed or sold for incorporation +into a dwelling. In determining whether a product is a consumer product, +doubtful cases shall be resolved in favor of coverage. For a particular +product received by a particular user, "normally used" refers to a +typical or common use of that class of product, regardless of the status +of the particular user or of the way in which the particular user +actually uses, or expects or is expected to use, the product. A product +is a consumer product regardless of whether the product has substantial +commercial, industrial or non-consumer uses, unless such uses represent +the only significant mode of use of the product. + + "Installation Information" for a User Product means any methods, +procedures, authorization keys, or other information required to install +and execute modified versions of a covered work in that User Product from +a modified version of its Corresponding Source. The information must +suffice to ensure that the continued functioning of the modified object +code is in no case prevented or interfered with solely because +modification has been made. + + If you convey an object code work under this section in, or with, or +specifically for use in, a User Product, and the conveying occurs as +part of a transaction in which the right of possession and use of the +User Product is transferred to the recipient in perpetuity or for a +fixed term (regardless of how the transaction is characterized), the +Corresponding Source conveyed under this section must be accompanied +by the Installation Information. But this requirement does not apply +if neither you nor any third party retains the ability to install +modified object code on the User Product (for example, the work has +been installed in ROM). + + The requirement to provide Installation Information does not include a +requirement to continue to provide support service, warranty, or updates +for a work that has been modified or installed by the recipient, or for +the User Product in which it has been modified or installed. Access to a +network may be denied when the modification itself materially and +adversely affects the operation of the network or violates the rules and +protocols for communication across the network. + + Corresponding Source conveyed, and Installation Information provided, +in accord with this section must be in a format that is publicly +documented (and with an implementation available to the public in +source code form), and must require no special password or key for +unpacking, reading or copying. + + 7. Additional Terms. + + "Additional permissions" are terms that supplement the terms of this +License by making exceptions from one or more of its conditions. +Additional permissions that are applicable to the entire Program shall +be treated as though they were included in this License, to the extent +that they are valid under applicable law. If additional permissions +apply only to part of the Program, that part may be used separately +under those permissions, but the entire Program remains governed by +this License without regard to the additional permissions. + + When you convey a copy of a covered work, you may at your option +remove any additional permissions from that copy, or from any part of +it. (Additional permissions may be written to require their own +removal in certain cases when you modify the work.) You may place +additional permissions on material, added by you to a covered work, +for which you have or can give appropriate copyright permission. + + Notwithstanding any other provision of this License, for material you +add to a covered work, you may (if authorized by the copyright holders of +that material) supplement the terms of this License with terms: + + a) Disclaiming warranty or limiting liability differently from the + terms of sections 15 and 16 of this License; or + + b) Requiring preservation of specified reasonable legal notices or + author attributions in that material or in the Appropriate Legal + Notices displayed by works containing it; or + + c) Prohibiting misrepresentation of the origin of that material, or + requiring that modified versions of such material be marked in + reasonable ways as different from the original version; or + + d) Limiting the use for publicity purposes of names of licensors or + authors of the material; or + + e) Declining to grant rights under trademark law for use of some + trade names, trademarks, or service marks; or + + f) Requiring indemnification of licensors and authors of that + material by anyone who conveys the material (or modified versions of + it) with contractual assumptions of liability to the recipient, for + any liability that these contractual assumptions directly impose on + those licensors and authors. + + All other non-permissive additional terms are considered "further +restrictions" within the meaning of section 10. If the Program as you +received it, or any part of it, contains a notice stating that it is +governed by this License along with a term that is a further +restriction, you may remove that term. If a license document contains +a further restriction but permits relicensing or conveying under this +License, you may add to a covered work material governed by the terms +of that license document, provided that the further restriction does +not survive such relicensing or conveying. + + If you add terms to a covered work in accord with this section, you +must place, in the relevant source files, a statement of the +additional terms that apply to those files, or a notice indicating +where to find the applicable terms. + + Additional terms, permissive or non-permissive, may be stated in the +form of a separately written license, or stated as exceptions; +the above requirements apply either way. + + 8. Termination. + + You may not propagate or modify a covered work except as expressly +provided under this License. Any attempt otherwise to propagate or +modify it is void, and will automatically terminate your rights under +this License (including any patent licenses granted under the third +paragraph of section 11). + + However, if you cease all violation of this License, then your +license from a particular copyright holder is reinstated (a) +provisionally, unless and until the copyright holder explicitly and +finally terminates your license, and (b) permanently, if the copyright +holder fails to notify you of the violation by some reasonable means +prior to 60 days after the cessation. + + Moreover, your license from a particular copyright holder is +reinstated permanently if the copyright holder notifies you of the +violation by some reasonable means, this is the first time you have +received notice of violation of this License (for any work) from that +copyright holder, and you cure the violation prior to 30 days after +your receipt of the notice. + + Termination of your rights under this section does not terminate the +licenses of parties who have received copies or rights from you under +this License. If your rights have been terminated and not permanently +reinstated, you do not qualify to receive new licenses for the same +material under section 10. + + 9. Acceptance Not Required for Having Copies. + + You are not required to accept this License in order to receive or +run a copy of the Program. Ancillary propagation of a covered work +occurring solely as a consequence of using peer-to-peer transmission +to receive a copy likewise does not require acceptance. However, +nothing other than this License grants you permission to propagate or +modify any covered work. These actions infringe copyright if you do +not accept this License. Therefore, by modifying or propagating a +covered work, you indicate your acceptance of this License to do so. + + 10. Automatic Licensing of Downstream Recipients. + + Each time you convey a covered work, the recipient automatically +receives a license from the original licensors, to run, modify and +propagate that work, subject to this License. You are not responsible +for enforcing compliance by third parties with this License. + + An "entity transaction" is a transaction transferring control of an +organization, or substantially all assets of one, or subdividing an +organization, or merging organizations. If propagation of a covered +work results from an entity transaction, each party to that +transaction who receives a copy of the work also receives whatever +licenses to the work the party's predecessor in interest had or could +give under the previous paragraph, plus a right to possession of the +Corresponding Source of the work from the predecessor in interest, if +the predecessor has it or can get it with reasonable efforts. + + You may not impose any further restrictions on the exercise of the +rights granted or affirmed under this License. For example, you may +not impose a license fee, royalty, or other charge for exercise of +rights granted under this License, and you may not initiate litigation +(including a cross-claim or counterclaim in a lawsuit) alleging that +any patent claim is infringed by making, using, selling, offering for +sale, or importing the Program or any portion of it. + + 11. Patents. + + A "contributor" is a copyright holder who authorizes use under this +License of the Program or a work on which the Program is based. The +work thus licensed is called the contributor's "contributor version". + + A contributor's "essential patent claims" are all patent claims +owned or controlled by the contributor, whether already acquired or +hereafter acquired, that would be infringed by some manner, permitted +by this License, of making, using, or selling its contributor version, +but do not include claims that would be infringed only as a +consequence of further modification of the contributor version. For +purposes of this definition, "control" includes the right to grant +patent sublicenses in a manner consistent with the requirements of +this License. + + Each contributor grants you a non-exclusive, worldwide, royalty-free +patent license under the contributor's essential patent claims, to +make, use, sell, offer for sale, import and otherwise run, modify and +propagate the contents of its contributor version. + + In the following three paragraphs, a "patent license" is any express +agreement or commitment, however denominated, not to enforce a patent +(such as an express permission to practice a patent or covenant not to +sue for patent infringement). To "grant" such a patent license to a +party means to make such an agreement or commitment not to enforce a +patent against the party. + + If you convey a covered work, knowingly relying on a patent license, +and the Corresponding Source of the work is not available for anyone +to copy, free of charge and under the terms of this License, through a +publicly available network server or other readily accessible means, +then you must either (1) cause the Corresponding Source to be so +available, or (2) arrange to deprive yourself of the benefit of the +patent license for this particular work, or (3) arrange, in a manner +consistent with the requirements of this License, to extend the patent +license to downstream recipients. "Knowingly relying" means you have +actual knowledge that, but for the patent license, your conveying the +covered work in a country, or your recipient's use of the covered work +in a country, would infringe one or more identifiable patents in that +country that you have reason to believe are valid. + + If, pursuant to or in connection with a single transaction or +arrangement, you convey, or propagate by procuring conveyance of, a +covered work, and grant a patent license to some of the parties +receiving the covered work authorizing them to use, propagate, modify +or convey a specific copy of the covered work, then the patent license +you grant is automatically extended to all recipients of the covered +work and works based on it. + + A patent license is "discriminatory" if it does not include within +the scope of its coverage, prohibits the exercise of, or is +conditioned on the non-exercise of one or more of the rights that are +specifically granted under this License. You may not convey a covered +work if you are a party to an arrangement with a third party that is +in the business of distributing software, under which you make payment +to the third party based on the extent of your activity of conveying +the work, and under which the third party grants, to any of the +parties who would receive the covered work from you, a discriminatory +patent license (a) in connection with copies of the covered work +conveyed by you (or copies made from those copies), or (b) primarily +for and in connection with specific products or compilations that +contain the covered work, unless you entered into that arrangement, +or that patent license was granted, prior to 28 March 2007. + + Nothing in this License shall be construed as excluding or limiting +any implied license or other defenses to infringement that may +otherwise be available to you under applicable patent law. + + 12. No Surrender of Others' Freedom. + + If conditions are imposed on you (whether by court order, agreement or +otherwise) that contradict the conditions of this License, they do not +excuse you from the conditions of this License. If you cannot convey a +covered work so as to satisfy simultaneously your obligations under this +License and any other pertinent obligations, then as a consequence you may +not convey it at all. For example, if you agree to terms that obligate you +to collect a royalty for further conveying from those to whom you convey +the Program, the only way you could satisfy both those terms and this +License would be to refrain entirely from conveying the Program. + + 13. Use with the GNU Affero General Public License. + + Notwithstanding any other provision of this License, you have +permission to link or combine any covered work with a work licensed +under version 3 of the GNU Affero General Public License into a single +combined work, and to convey the resulting work. The terms of this +License will continue to apply to the part which is the covered work, +but the special requirements of the GNU Affero General Public License, +section 13, concerning interaction through a network will apply to the +combination as such. + + 14. Revised Versions of this License. + + The Free Software Foundation may publish revised and/or new versions of +the GNU General Public License from time to time. Such new versions will +be similar in spirit to the present version, but may differ in detail to +address new problems or concerns. + + Each version is given a distinguishing version number. If the +Program specifies that a certain numbered version of the GNU General +Public License "or any later version" applies to it, you have the +option of following the terms and conditions either of that numbered +version or of any later version published by the Free Software +Foundation. If the Program does not specify a version number of the +GNU General Public License, you may choose any version ever published +by the Free Software Foundation. + + If the Program specifies that a proxy can decide which future +versions of the GNU General Public License can be used, that proxy's +public statement of acceptance of a version permanently authorizes you +to choose that version for the Program. + + Later license versions may give you additional or different +permissions. However, no additional obligations are imposed on any +author or copyright holder as a result of your choosing to follow a +later version. + + 15. Disclaimer of Warranty. + + THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY +APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT +HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY +OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, +THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM +IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF +ALL NECESSARY SERVICING, REPAIR OR CORRECTION. + + 16. Limitation of Liability. + + IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING +WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS +THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY +GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE +USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF +DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD +PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS), +EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF +SUCH DAMAGES. + + 17. Interpretation of Sections 15 and 16. + + If the disclaimer of warranty and limitation of liability provided +above cannot be given local legal effect according to their terms, +reviewing courts shall apply local law that most closely approximates +an absolute waiver of all civil liability in connection with the +Program, unless a warranty or assumption of liability accompanies a +copy of the Program in return for a fee. + + END OF TERMS AND CONDITIONS + + How to Apply These Terms to Your New Programs + + If you develop a new program, and you want it to be of the greatest +possible use to the public, the best way to achieve this is to make it +free software which everyone can redistribute and change under these terms. + + To do so, attach the following notices to the program. It is safest +to attach them to the start of each source file to most effectively +state the exclusion of warranty; and each file should have at least +the "copyright" line and a pointer to where the full notice is found. + + + Copyright (C) + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . + +Also add information on how to contact you by electronic and paper mail. + + If the program does terminal interaction, make it output a short +notice like this when it starts in an interactive mode: + + Copyright (C) + This program comes with ABSOLUTELY NO WARRANTY; for details type `show w'. + This is free software, and you are welcome to redistribute it + under certain conditions; type `show c' for details. + +The hypothetical commands `show w' and `show c' should show the appropriate +parts of the General Public License. Of course, your program's commands +might be different; for a GUI interface, you would use an "about box". + + You should also get your employer (if you work as a programmer) or school, +if any, to sign a "copyright disclaimer" for the program, if necessary. +For more information on this, and how to apply and follow the GNU GPL, see +. + + The GNU General Public License does not permit incorporating your program +into proprietary programs. If your program is a subroutine library, you +may consider it more useful to permit linking proprietary applications with +the library. If this is what you want to do, use the GNU Lesser General +Public License instead of this License. But first, please read +. diff --git a/LICENSES/LGPL-2.0 b/LICENSES/LGPL-2.0 new file mode 100644 index 0000000..5bc8fb2 --- /dev/null +++ b/LICENSES/LGPL-2.0 @@ -0,0 +1,481 @@ + GNU LIBRARY GENERAL PUBLIC LICENSE + Version 2, June 1991 + + Copyright (C) 1991 Free Software Foundation, Inc. + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + Everyone is permitted to copy and distribute verbatim copies + of this license document, but changing it is not allowed. + +[This is the first released version of the library GPL. It is + numbered 2 because it goes with version 2 of the ordinary GPL.] + + Preamble + + The licenses for most software are designed to take away your +freedom to share and change it. By contrast, the GNU General Public +Licenses are intended to guarantee your freedom to share and change +free software--to make sure the software is free for all its users. + + This license, the Library General Public License, applies to some +specially designated Free Software Foundation software, and to any +other libraries whose authors decide to use it. You can use it for +your libraries, too. + + When we speak of free software, we are referring to freedom, not +price. Our General Public Licenses are designed to make sure that you +have the freedom to distribute copies of free software (and charge for +this service if you wish), that you receive source code or can get it +if you want it, that you can change the software or use pieces of it +in new free programs; and that you know you can do these things. + + To protect your rights, we need to make restrictions that forbid +anyone to deny you these rights or to ask you to surrender the rights. +These restrictions translate to certain responsibilities for you if +you distribute copies of the library, or if you modify it. + + For example, if you distribute copies of the library, whether gratis +or for a fee, you must give the recipients all the rights that we gave +you. You must make sure that they, too, receive or can get the source +code. If you link a program with the library, you must provide +complete object files to the recipients so that they can relink them +with the library, after making changes to the library and recompiling +it. And you must show them these terms so they know their rights. + + Our method of protecting your rights has two steps: (1) copyright +the library, and (2) offer you this license which gives you legal +permission to copy, distribute and/or modify the library. + + Also, for each distributor's protection, we want to make certain +that everyone understands that there is no warranty for this free +library. If the library is modified by someone else and passed on, we +want its recipients to know that what they have is not the original +version, so that any problems introduced by others will not reflect on +the original authors' reputations. + + Finally, any free program is threatened constantly by software +patents. We wish to avoid the danger that companies distributing free +software will individually obtain patent licenses, thus in effect +transforming the program into proprietary software. To prevent this, +we have made it clear that any patent must be licensed for everyone's +free use or not licensed at all. + + Most GNU software, including some libraries, is covered by the ordinary +GNU General Public License, which was designed for utility programs. This +license, the GNU Library General Public License, applies to certain +designated libraries. This license is quite different from the ordinary +one; be sure to read it in full, and don't assume that anything in it is +the same as in the ordinary license. + + The reason we have a separate public license for some libraries is that +they blur the distinction we usually make between modifying or adding to a +program and simply using it. Linking a program with a library, without +changing the library, is in some sense simply using the library, and is +analogous to running a utility program or application program. However, in +a textual and legal sense, the linked executable is a combined work, a +derivative of the original library, and the ordinary General Public License +treats it as such. + + Because of this blurred distinction, using the ordinary General +Public License for libraries did not effectively promote software +sharing, because most developers did not use the libraries. We +concluded that weaker conditions might promote sharing better. + + However, unrestricted linking of non-free programs would deprive the +users of those programs of all benefit from the free status of the +libraries themselves. This Library General Public License is intended to +permit developers of non-free programs to use free libraries, while +preserving your freedom as a user of such programs to change the free +libraries that are incorporated in them. (We have not seen how to achieve +this as regards changes in header files, but we have achieved it as regards +changes in the actual functions of the Library.) The hope is that this +will lead to faster development of free libraries. + + The precise terms and conditions for copying, distribution and +modification follow. Pay close attention to the difference between a +"work based on the library" and a "work that uses the library". The +former contains code derived from the library, while the latter only +works together with the library. + + Note that it is possible for a library to be covered by the ordinary +General Public License rather than by this special one. + + GNU LIBRARY GENERAL PUBLIC LICENSE + TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION + + 0. This License Agreement applies to any software library which +contains a notice placed by the copyright holder or other authorized +party saying it may be distributed under the terms of this Library +General Public License (also called "this License"). Each licensee is +addressed as "you". + + A "library" means a collection of software functions and/or data +prepared so as to be conveniently linked with application programs +(which use some of those functions and data) to form executables. + + The "Library", below, refers to any such software library or work +which has been distributed under these terms. A "work based on the +Library" means either the Library or any derivative work under +copyright law: that is to say, a work containing the Library or a +portion of it, either verbatim or with modifications and/or translated +straightforwardly into another language. (Hereinafter, translation is +included without limitation in the term "modification".) + + "Source code" for a work means the preferred form of the work for +making modifications to it. For a library, complete source code means +all the source code for all modules it contains, plus any associated +interface definition files, plus the scripts used to control compilation +and installation of the library. + + Activities other than copying, distribution and modification are not +covered by this License; they are outside its scope. The act of +running a program using the Library is not restricted, and output from +such a program is covered only if its contents constitute a work based +on the Library (independent of the use of the Library in a tool for +writing it). Whether that is true depends on what the Library does +and what the program that uses the Library does. + + 1. You may copy and distribute verbatim copies of the Library's +complete source code as you receive it, in any medium, provided that +you conspicuously and appropriately publish on each copy an +appropriate copyright notice and disclaimer of warranty; keep intact +all the notices that refer to this License and to the absence of any +warranty; and distribute a copy of this License along with the +Library. + + You may charge a fee for the physical act of transferring a copy, +and you may at your option offer warranty protection in exchange for a +fee. + + 2. You may modify your copy or copies of the Library or any portion +of it, thus forming a work based on the Library, and copy and +distribute such modifications or work under the terms of Section 1 +above, provided that you also meet all of these conditions: + + a) The modified work must itself be a software library. + + b) You must cause the files modified to carry prominent notices + stating that you changed the files and the date of any change. + + c) You must cause the whole of the work to be licensed at no + charge to all third parties under the terms of this License. + + d) If a facility in the modified Library refers to a function or a + table of data to be supplied by an application program that uses + the facility, other than as an argument passed when the facility + is invoked, then you must make a good faith effort to ensure that, + in the event an application does not supply such function or + table, the facility still operates, and performs whatever part of + its purpose remains meaningful. + + (For example, a function in a library to compute square roots has + a purpose that is entirely well-defined independent of the + application. Therefore, Subsection 2d requires that any + application-supplied function or table used by this function must + be optional: if the application does not supply it, the square + root function must still compute square roots.) + +These requirements apply to the modified work as a whole. If +identifiable sections of that work are not derived from the Library, +and can be reasonably considered independent and separate works in +themselves, then this License, and its terms, do not apply to those +sections when you distribute them as separate works. But when you +distribute the same sections as part of a whole which is a work based +on the Library, the distribution of the whole must be on the terms of +this License, whose permissions for other licensees extend to the +entire whole, and thus to each and every part regardless of who wrote +it. + +Thus, it is not the intent of this section to claim rights or contest +your rights to work written entirely by you; rather, the intent is to +exercise the right to control the distribution of derivative or +collective works based on the Library. + +In addition, mere aggregation of another work not based on the Library +with the Library (or with a work based on the Library) on a volume of +a storage or distribution medium does not bring the other work under +the scope of this License. + + 3. You may opt to apply the terms of the ordinary GNU General Public +License instead of this License to a given copy of the Library. To do +this, you must alter all the notices that refer to this License, so +that they refer to the ordinary GNU General Public License, version 2, +instead of to this License. (If a newer version than version 2 of the +ordinary GNU General Public License has appeared, then you can specify +that version instead if you wish.) Do not make any other change in +these notices. + + Once this change is made in a given copy, it is irreversible for +that copy, so the ordinary GNU General Public License applies to all +subsequent copies and derivative works made from that copy. + + This option is useful when you wish to copy part of the code of +the Library into a program that is not a library. + + 4. You may copy and distribute the Library (or a portion or +derivative of it, under Section 2) in object code or executable form +under the terms of Sections 1 and 2 above provided that you accompany +it with the complete corresponding machine-readable source code, which +must be distributed under the terms of Sections 1 and 2 above on a +medium customarily used for software interchange. + + If distribution of object code is made by offering access to copy +from a designated place, then offering equivalent access to copy the +source code from the same place satisfies the requirement to +distribute the source code, even though third parties are not +compelled to copy the source along with the object code. + + 5. A program that contains no derivative of any portion of the +Library, but is designed to work with the Library by being compiled or +linked with it, is called a "work that uses the Library". Such a +work, in isolation, is not a derivative work of the Library, and +therefore falls outside the scope of this License. + + However, linking a "work that uses the Library" with the Library +creates an executable that is a derivative of the Library (because it +contains portions of the Library), rather than a "work that uses the +library". The executable is therefore covered by this License. +Section 6 states terms for distribution of such executables. + + When a "work that uses the Library" uses material from a header file +that is part of the Library, the object code for the work may be a +derivative work of the Library even though the source code is not. +Whether this is true is especially significant if the work can be +linked without the Library, or if the work is itself a library. The +threshold for this to be true is not precisely defined by law. + + If such an object file uses only numerical parameters, data +structure layouts and accessors, and small macros and small inline +functions (ten lines or less in length), then the use of the object +file is unrestricted, regardless of whether it is legally a derivative +work. (Executables containing this object code plus portions of the +Library will still fall under Section 6.) + + Otherwise, if the work is a derivative of the Library, you may +distribute the object code for the work under the terms of Section 6. +Any executables containing that work also fall under Section 6, +whether or not they are linked directly with the Library itself. + + 6. As an exception to the Sections above, you may also compile or +link a "work that uses the Library" with the Library to produce a +work containing portions of the Library, and distribute that work +under terms of your choice, provided that the terms permit +modification of the work for the customer's own use and reverse +engineering for debugging such modifications. + + You must give prominent notice with each copy of the work that the +Library is used in it and that the Library and its use are covered by +this License. You must supply a copy of this License. If the work +during execution displays copyright notices, you must include the +copyright notice for the Library among them, as well as a reference +directing the user to the copy of this License. Also, you must do one +of these things: + + a) Accompany the work with the complete corresponding + machine-readable source code for the Library including whatever + changes were used in the work (which must be distributed under + Sections 1 and 2 above); and, if the work is an executable linked + with the Library, with the complete machine-readable "work that + uses the Library", as object code and/or source code, so that the + user can modify the Library and then relink to produce a modified + executable containing the modified Library. (It is understood + that the user who changes the contents of definitions files in the + Library will not necessarily be able to recompile the application + to use the modified definitions.) + + b) Accompany the work with a written offer, valid for at + least three years, to give the same user the materials + specified in Subsection 6a, above, for a charge no more + than the cost of performing this distribution. + + c) If distribution of the work is made by offering access to copy + from a designated place, offer equivalent access to copy the above + specified materials from the same place. + + d) Verify that the user has already received a copy of these + materials or that you have already sent this user a copy. + + For an executable, the required form of the "work that uses the +Library" must include any data and utility programs needed for +reproducing the executable from it. However, as a special exception, +the source code distributed need not include anything that is normally +distributed (in either source or binary form) with the major +components (compiler, kernel, and so on) of the operating system on +which the executable runs, unless that component itself accompanies +the executable. + + It may happen that this requirement contradicts the license +restrictions of other proprietary libraries that do not normally +accompany the operating system. Such a contradiction means you cannot +use both them and the Library together in an executable that you +distribute. + + 7. You may place library facilities that are a work based on the +Library side-by-side in a single library together with other library +facilities not covered by this License, and distribute such a combined +library, provided that the separate distribution of the work based on +the Library and of the other library facilities is otherwise +permitted, and provided that you do these two things: + + a) Accompany the combined library with a copy of the same work + based on the Library, uncombined with any other library + facilities. This must be distributed under the terms of the + Sections above. + + b) Give prominent notice with the combined library of the fact + that part of it is a work based on the Library, and explaining + where to find the accompanying uncombined form of the same work. + + 8. You may not copy, modify, sublicense, link with, or distribute +the Library except as expressly provided under this License. Any +attempt otherwise to copy, modify, sublicense, link with, or +distribute the Library is void, and will automatically terminate your +rights under this License. However, parties who have received copies, +or rights, from you under this License will not have their licenses +terminated so long as such parties remain in full compliance. + + 9. You are not required to accept this License, since you have not +signed it. However, nothing else grants you permission to modify or +distribute the Library or its derivative works. These actions are +prohibited by law if you do not accept this License. Therefore, by +modifying or distributing the Library (or any work based on the +Library), you indicate your acceptance of this License to do so, and +all its terms and conditions for copying, distributing or modifying +the Library or works based on it. + + 10. Each time you redistribute the Library (or any work based on the +Library), the recipient automatically receives a license from the +original licensor to copy, distribute, link with or modify the Library +subject to these terms and conditions. You may not impose any further +restrictions on the recipients' exercise of the rights granted herein. +You are not responsible for enforcing compliance by third parties to +this License. + + 11. If, as a consequence of a court judgment or allegation of patent +infringement or for any other reason (not limited to patent issues), +conditions are imposed on you (whether by court order, agreement or +otherwise) that contradict the conditions of this License, they do not +excuse you from the conditions of this License. If you cannot +distribute so as to satisfy simultaneously your obligations under this +License and any other pertinent obligations, then as a consequence you +may not distribute the Library at all. For example, if a patent +license would not permit royalty-free redistribution of the Library by +all those who receive copies directly or indirectly through you, then +the only way you could satisfy both it and this License would be to +refrain entirely from distribution of the Library. + +If any portion of this section is held invalid or unenforceable under any +particular circumstance, the balance of the section is intended to apply, +and the section as a whole is intended to apply in other circumstances. + +It is not the purpose of this section to induce you to infringe any +patents or other property right claims or to contest validity of any +such claims; this section has the sole purpose of protecting the +integrity of the free software distribution system which is +implemented by public license practices. Many people have made +generous contributions to the wide range of software distributed +through that system in reliance on consistent application of that +system; it is up to the author/donor to decide if he or she is willing +to distribute software through any other system and a licensee cannot +impose that choice. + +This section is intended to make thoroughly clear what is believed to +be a consequence of the rest of this License. + + 12. If the distribution and/or use of the Library is restricted in +certain countries either by patents or by copyrighted interfaces, the +original copyright holder who places the Library under this License may add +an explicit geographical distribution limitation excluding those countries, +so that distribution is permitted only in or among countries not thus +excluded. In such case, this License incorporates the limitation as if +written in the body of this License. + + 13. The Free Software Foundation may publish revised and/or new +versions of the Library General Public License from time to time. +Such new versions will be similar in spirit to the present version, +but may differ in detail to address new problems or concerns. + +Each version is given a distinguishing version number. If the Library +specifies a version number of this License which applies to it and +"any later version", you have the option of following the terms and +conditions either of that version or of any later version published by +the Free Software Foundation. If the Library does not specify a +license version number, you may choose any version ever published by +the Free Software Foundation. + + 14. If you wish to incorporate parts of the Library into other free +programs whose distribution conditions are incompatible with these, +write to the author to ask for permission. For software which is +copyrighted by the Free Software Foundation, write to the Free +Software Foundation; we sometimes make exceptions for this. Our +decision will be guided by the two goals of preserving the free status +of all derivatives of our free software and of promoting the sharing +and reuse of software generally. + + NO WARRANTY + + 15. BECAUSE THE LIBRARY IS LICENSED FREE OF CHARGE, THERE IS NO +WARRANTY FOR THE LIBRARY, TO THE EXTENT PERMITTED BY APPLICABLE LAW. +EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR +OTHER PARTIES PROVIDE THE LIBRARY "AS IS" WITHOUT WARRANTY OF ANY +KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE +LIBRARY IS WITH YOU. SHOULD THE LIBRARY PROVE DEFECTIVE, YOU ASSUME +THE COST OF ALL NECESSARY SERVICING, REPAIR OR CORRECTION. + + 16. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN +WRITING WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY +AND/OR REDISTRIBUTE THE LIBRARY AS PERMITTED ABOVE, BE LIABLE TO YOU +FOR DAMAGES, INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR +CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OR INABILITY TO USE THE +LIBRARY (INCLUDING BUT NOT LIMITED TO LOSS OF DATA OR DATA BEING +RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD PARTIES OR A +FAILURE OF THE LIBRARY TO OPERATE WITH ANY OTHER SOFTWARE), EVEN IF +SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH +DAMAGES. + + END OF TERMS AND CONDITIONS + + How to Apply These Terms to Your New Libraries + + If you develop a new library, and you want it to be of the greatest +possible use to the public, we recommend making it free software that +everyone can redistribute and change. You can do so by permitting +redistribution under these terms (or, alternatively, under the terms of the +ordinary General Public License). + + To apply these terms, attach the following notices to the library. It is +safest to attach them to the start of each source file to most effectively +convey the exclusion of warranty; and each file should have at least the +"copyright" line and a pointer to where the full notice is found. + + + Copyright (C) + + This library is free software; you can redistribute it and/or + modify it under the terms of the GNU Library General Public + License as published by the Free Software Foundation; either + version 2 of the License, or (at your option) any later version. + + This library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Library General Public License for more details. + + You should have received a copy of the GNU Library General Public + License along with this library; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + +Also add information on how to contact you by electronic and paper mail. + +You should also get your employer (if you work as a programmer) or your +school, if any, to sign a "copyright disclaimer" for the library, if +necessary. Here is a sample; alter the names: + + Yoyodyne, Inc., hereby disclaims all copyright interest in the + library `Frob' (a library for tweaking knobs) written by James Random Hacker. + + , 1 April 1990 + Ty Coon, President of Vice + +That's all there is to it! diff --git a/LICENSES/LGPL-2.1 b/LICENSES/LGPL-2.1 new file mode 100644 index 0000000..4362b49 --- /dev/null +++ b/LICENSES/LGPL-2.1 @@ -0,0 +1,502 @@ + GNU LESSER GENERAL PUBLIC LICENSE + Version 2.1, February 1999 + + Copyright (C) 1991, 1999 Free Software Foundation, Inc. + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + Everyone is permitted to copy and distribute verbatim copies + of this license document, but changing it is not allowed. + +[This is the first released version of the Lesser GPL. It also counts + as the successor of the GNU Library Public License, version 2, hence + the version number 2.1.] + + Preamble + + The licenses for most software are designed to take away your +freedom to share and change it. By contrast, the GNU General Public +Licenses are intended to guarantee your freedom to share and change +free software--to make sure the software is free for all its users. + + This license, the Lesser General Public License, applies to some +specially designated software packages--typically libraries--of the +Free Software Foundation and other authors who decide to use it. You +can use it too, but we suggest you first think carefully about whether +this license or the ordinary General Public License is the better +strategy to use in any particular case, based on the explanations below. + + When we speak of free software, we are referring to freedom of use, +not price. Our General Public Licenses are designed to make sure that +you have the freedom to distribute copies of free software (and charge +for this service if you wish); that you receive source code or can get +it if you want it; that you can change the software and use pieces of +it in new free programs; and that you are informed that you can do +these things. + + To protect your rights, we need to make restrictions that forbid +distributors to deny you these rights or to ask you to surrender these +rights. These restrictions translate to certain responsibilities for +you if you distribute copies of the library or if you modify it. + + For example, if you distribute copies of the library, whether gratis +or for a fee, you must give the recipients all the rights that we gave +you. You must make sure that they, too, receive or can get the source +code. If you link other code with the library, you must provide +complete object files to the recipients, so that they can relink them +with the library after making changes to the library and recompiling +it. And you must show them these terms so they know their rights. + + We protect your rights with a two-step method: (1) we copyright the +library, and (2) we offer you this license, which gives you legal +permission to copy, distribute and/or modify the library. + + To protect each distributor, we want to make it very clear that +there is no warranty for the free library. Also, if the library is +modified by someone else and passed on, the recipients should know +that what they have is not the original version, so that the original +author's reputation will not be affected by problems that might be +introduced by others. + + Finally, software patents pose a constant threat to the existence of +any free program. We wish to make sure that a company cannot +effectively restrict the users of a free program by obtaining a +restrictive license from a patent holder. Therefore, we insist that +any patent license obtained for a version of the library must be +consistent with the full freedom of use specified in this license. + + Most GNU software, including some libraries, is covered by the +ordinary GNU General Public License. This license, the GNU Lesser +General Public License, applies to certain designated libraries, and +is quite different from the ordinary General Public License. We use +this license for certain libraries in order to permit linking those +libraries into non-free programs. + + When a program is linked with a library, whether statically or using +a shared library, the combination of the two is legally speaking a +combined work, a derivative of the original library. The ordinary +General Public License therefore permits such linking only if the +entire combination fits its criteria of freedom. The Lesser General +Public License permits more lax criteria for linking other code with +the library. + + We call this license the "Lesser" General Public License because it +does Less to protect the user's freedom than the ordinary General +Public License. It also provides other free software developers Less +of an advantage over competing non-free programs. These disadvantages +are the reason we use the ordinary General Public License for many +libraries. However, the Lesser license provides advantages in certain +special circumstances. + + For example, on rare occasions, there may be a special need to +encourage the widest possible use of a certain library, so that it becomes +a de-facto standard. To achieve this, non-free programs must be +allowed to use the library. A more frequent case is that a free +library does the same job as widely used non-free libraries. In this +case, there is little to gain by limiting the free library to free +software only, so we use the Lesser General Public License. + + In other cases, permission to use a particular library in non-free +programs enables a greater number of people to use a large body of +free software. For example, permission to use the GNU C Library in +non-free programs enables many more people to use the whole GNU +operating system, as well as its variant, the GNU/Linux operating +system. + + Although the Lesser General Public License is Less protective of the +users' freedom, it does ensure that the user of a program that is +linked with the Library has the freedom and the wherewithal to run +that program using a modified version of the Library. + + The precise terms and conditions for copying, distribution and +modification follow. Pay close attention to the difference between a +"work based on the library" and a "work that uses the library". The +former contains code derived from the library, whereas the latter must +be combined with the library in order to run. + + GNU LESSER GENERAL PUBLIC LICENSE + TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION + + 0. This License Agreement applies to any software library or other +program which contains a notice placed by the copyright holder or +other authorized party saying it may be distributed under the terms of +this Lesser General Public License (also called "this License"). +Each licensee is addressed as "you". + + A "library" means a collection of software functions and/or data +prepared so as to be conveniently linked with application programs +(which use some of those functions and data) to form executables. + + The "Library", below, refers to any such software library or work +which has been distributed under these terms. A "work based on the +Library" means either the Library or any derivative work under +copyright law: that is to say, a work containing the Library or a +portion of it, either verbatim or with modifications and/or translated +straightforwardly into another language. (Hereinafter, translation is +included without limitation in the term "modification".) + + "Source code" for a work means the preferred form of the work for +making modifications to it. For a library, complete source code means +all the source code for all modules it contains, plus any associated +interface definition files, plus the scripts used to control compilation +and installation of the library. + + Activities other than copying, distribution and modification are not +covered by this License; they are outside its scope. The act of +running a program using the Library is not restricted, and output from +such a program is covered only if its contents constitute a work based +on the Library (independent of the use of the Library in a tool for +writing it). Whether that is true depends on what the Library does +and what the program that uses the Library does. + + 1. You may copy and distribute verbatim copies of the Library's +complete source code as you receive it, in any medium, provided that +you conspicuously and appropriately publish on each copy an +appropriate copyright notice and disclaimer of warranty; keep intact +all the notices that refer to this License and to the absence of any +warranty; and distribute a copy of this License along with the +Library. + + You may charge a fee for the physical act of transferring a copy, +and you may at your option offer warranty protection in exchange for a +fee. + + 2. You may modify your copy or copies of the Library or any portion +of it, thus forming a work based on the Library, and copy and +distribute such modifications or work under the terms of Section 1 +above, provided that you also meet all of these conditions: + + a) The modified work must itself be a software library. + + b) You must cause the files modified to carry prominent notices + stating that you changed the files and the date of any change. + + c) You must cause the whole of the work to be licensed at no + charge to all third parties under the terms of this License. + + d) If a facility in the modified Library refers to a function or a + table of data to be supplied by an application program that uses + the facility, other than as an argument passed when the facility + is invoked, then you must make a good faith effort to ensure that, + in the event an application does not supply such function or + table, the facility still operates, and performs whatever part of + its purpose remains meaningful. + + (For example, a function in a library to compute square roots has + a purpose that is entirely well-defined independent of the + application. Therefore, Subsection 2d requires that any + application-supplied function or table used by this function must + be optional: if the application does not supply it, the square + root function must still compute square roots.) + +These requirements apply to the modified work as a whole. If +identifiable sections of that work are not derived from the Library, +and can be reasonably considered independent and separate works in +themselves, then this License, and its terms, do not apply to those +sections when you distribute them as separate works. But when you +distribute the same sections as part of a whole which is a work based +on the Library, the distribution of the whole must be on the terms of +this License, whose permissions for other licensees extend to the +entire whole, and thus to each and every part regardless of who wrote +it. + +Thus, it is not the intent of this section to claim rights or contest +your rights to work written entirely by you; rather, the intent is to +exercise the right to control the distribution of derivative or +collective works based on the Library. + +In addition, mere aggregation of another work not based on the Library +with the Library (or with a work based on the Library) on a volume of +a storage or distribution medium does not bring the other work under +the scope of this License. + + 3. You may opt to apply the terms of the ordinary GNU General Public +License instead of this License to a given copy of the Library. To do +this, you must alter all the notices that refer to this License, so +that they refer to the ordinary GNU General Public License, version 2, +instead of to this License. (If a newer version than version 2 of the +ordinary GNU General Public License has appeared, then you can specify +that version instead if you wish.) Do not make any other change in +these notices. + + Once this change is made in a given copy, it is irreversible for +that copy, so the ordinary GNU General Public License applies to all +subsequent copies and derivative works made from that copy. + + This option is useful when you wish to copy part of the code of +the Library into a program that is not a library. + + 4. You may copy and distribute the Library (or a portion or +derivative of it, under Section 2) in object code or executable form +under the terms of Sections 1 and 2 above provided that you accompany +it with the complete corresponding machine-readable source code, which +must be distributed under the terms of Sections 1 and 2 above on a +medium customarily used for software interchange. + + If distribution of object code is made by offering access to copy +from a designated place, then offering equivalent access to copy the +source code from the same place satisfies the requirement to +distribute the source code, even though third parties are not +compelled to copy the source along with the object code. + + 5. A program that contains no derivative of any portion of the +Library, but is designed to work with the Library by being compiled or +linked with it, is called a "work that uses the Library". Such a +work, in isolation, is not a derivative work of the Library, and +therefore falls outside the scope of this License. + + However, linking a "work that uses the Library" with the Library +creates an executable that is a derivative of the Library (because it +contains portions of the Library), rather than a "work that uses the +library". The executable is therefore covered by this License. +Section 6 states terms for distribution of such executables. + + When a "work that uses the Library" uses material from a header file +that is part of the Library, the object code for the work may be a +derivative work of the Library even though the source code is not. +Whether this is true is especially significant if the work can be +linked without the Library, or if the work is itself a library. The +threshold for this to be true is not precisely defined by law. + + If such an object file uses only numerical parameters, data +structure layouts and accessors, and small macros and small inline +functions (ten lines or less in length), then the use of the object +file is unrestricted, regardless of whether it is legally a derivative +work. (Executables containing this object code plus portions of the +Library will still fall under Section 6.) + + Otherwise, if the work is a derivative of the Library, you may +distribute the object code for the work under the terms of Section 6. +Any executables containing that work also fall under Section 6, +whether or not they are linked directly with the Library itself. + + 6. As an exception to the Sections above, you may also combine or +link a "work that uses the Library" with the Library to produce a +work containing portions of the Library, and distribute that work +under terms of your choice, provided that the terms permit +modification of the work for the customer's own use and reverse +engineering for debugging such modifications. + + You must give prominent notice with each copy of the work that the +Library is used in it and that the Library and its use are covered by +this License. You must supply a copy of this License. If the work +during execution displays copyright notices, you must include the +copyright notice for the Library among them, as well as a reference +directing the user to the copy of this License. Also, you must do one +of these things: + + a) Accompany the work with the complete corresponding + machine-readable source code for the Library including whatever + changes were used in the work (which must be distributed under + Sections 1 and 2 above); and, if the work is an executable linked + with the Library, with the complete machine-readable "work that + uses the Library", as object code and/or source code, so that the + user can modify the Library and then relink to produce a modified + executable containing the modified Library. (It is understood + that the user who changes the contents of definitions files in the + Library will not necessarily be able to recompile the application + to use the modified definitions.) + + b) Use a suitable shared library mechanism for linking with the + Library. A suitable mechanism is one that (1) uses at run time a + copy of the library already present on the user's computer system, + rather than copying library functions into the executable, and (2) + will operate properly with a modified version of the library, if + the user installs one, as long as the modified version is + interface-compatible with the version that the work was made with. + + c) Accompany the work with a written offer, valid for at + least three years, to give the same user the materials + specified in Subsection 6a, above, for a charge no more + than the cost of performing this distribution. + + d) If distribution of the work is made by offering access to copy + from a designated place, offer equivalent access to copy the above + specified materials from the same place. + + e) Verify that the user has already received a copy of these + materials or that you have already sent this user a copy. + + For an executable, the required form of the "work that uses the +Library" must include any data and utility programs needed for +reproducing the executable from it. However, as a special exception, +the materials to be distributed need not include anything that is +normally distributed (in either source or binary form) with the major +components (compiler, kernel, and so on) of the operating system on +which the executable runs, unless that component itself accompanies +the executable. + + It may happen that this requirement contradicts the license +restrictions of other proprietary libraries that do not normally +accompany the operating system. Such a contradiction means you cannot +use both them and the Library together in an executable that you +distribute. + + 7. You may place library facilities that are a work based on the +Library side-by-side in a single library together with other library +facilities not covered by this License, and distribute such a combined +library, provided that the separate distribution of the work based on +the Library and of the other library facilities is otherwise +permitted, and provided that you do these two things: + + a) Accompany the combined library with a copy of the same work + based on the Library, uncombined with any other library + facilities. This must be distributed under the terms of the + Sections above. + + b) Give prominent notice with the combined library of the fact + that part of it is a work based on the Library, and explaining + where to find the accompanying uncombined form of the same work. + + 8. You may not copy, modify, sublicense, link with, or distribute +the Library except as expressly provided under this License. Any +attempt otherwise to copy, modify, sublicense, link with, or +distribute the Library is void, and will automatically terminate your +rights under this License. However, parties who have received copies, +or rights, from you under this License will not have their licenses +terminated so long as such parties remain in full compliance. + + 9. You are not required to accept this License, since you have not +signed it. However, nothing else grants you permission to modify or +distribute the Library or its derivative works. These actions are +prohibited by law if you do not accept this License. Therefore, by +modifying or distributing the Library (or any work based on the +Library), you indicate your acceptance of this License to do so, and +all its terms and conditions for copying, distributing or modifying +the Library or works based on it. + + 10. Each time you redistribute the Library (or any work based on the +Library), the recipient automatically receives a license from the +original licensor to copy, distribute, link with or modify the Library +subject to these terms and conditions. You may not impose any further +restrictions on the recipients' exercise of the rights granted herein. +You are not responsible for enforcing compliance by third parties with +this License. + + 11. If, as a consequence of a court judgment or allegation of patent +infringement or for any other reason (not limited to patent issues), +conditions are imposed on you (whether by court order, agreement or +otherwise) that contradict the conditions of this License, they do not +excuse you from the conditions of this License. If you cannot +distribute so as to satisfy simultaneously your obligations under this +License and any other pertinent obligations, then as a consequence you +may not distribute the Library at all. For example, if a patent +license would not permit royalty-free redistribution of the Library by +all those who receive copies directly or indirectly through you, then +the only way you could satisfy both it and this License would be to +refrain entirely from distribution of the Library. + +If any portion of this section is held invalid or unenforceable under any +particular circumstance, the balance of the section is intended to apply, +and the section as a whole is intended to apply in other circumstances. + +It is not the purpose of this section to induce you to infringe any +patents or other property right claims or to contest validity of any +such claims; this section has the sole purpose of protecting the +integrity of the free software distribution system which is +implemented by public license practices. Many people have made +generous contributions to the wide range of software distributed +through that system in reliance on consistent application of that +system; it is up to the author/donor to decide if he or she is willing +to distribute software through any other system and a licensee cannot +impose that choice. + +This section is intended to make thoroughly clear what is believed to +be a consequence of the rest of this License. + + 12. If the distribution and/or use of the Library is restricted in +certain countries either by patents or by copyrighted interfaces, the +original copyright holder who places the Library under this License may add +an explicit geographical distribution limitation excluding those countries, +so that distribution is permitted only in or among countries not thus +excluded. In such case, this License incorporates the limitation as if +written in the body of this License. + + 13. The Free Software Foundation may publish revised and/or new +versions of the Lesser General Public License from time to time. +Such new versions will be similar in spirit to the present version, +but may differ in detail to address new problems or concerns. + +Each version is given a distinguishing version number. If the Library +specifies a version number of this License which applies to it and +"any later version", you have the option of following the terms and +conditions either of that version or of any later version published by +the Free Software Foundation. If the Library does not specify a +license version number, you may choose any version ever published by +the Free Software Foundation. + + 14. If you wish to incorporate parts of the Library into other free +programs whose distribution conditions are incompatible with these, +write to the author to ask for permission. For software which is +copyrighted by the Free Software Foundation, write to the Free +Software Foundation; we sometimes make exceptions for this. Our +decision will be guided by the two goals of preserving the free status +of all derivatives of our free software and of promoting the sharing +and reuse of software generally. + + NO WARRANTY + + 15. BECAUSE THE LIBRARY IS LICENSED FREE OF CHARGE, THERE IS NO +WARRANTY FOR THE LIBRARY, TO THE EXTENT PERMITTED BY APPLICABLE LAW. +EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR +OTHER PARTIES PROVIDE THE LIBRARY "AS IS" WITHOUT WARRANTY OF ANY +KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE +LIBRARY IS WITH YOU. SHOULD THE LIBRARY PROVE DEFECTIVE, YOU ASSUME +THE COST OF ALL NECESSARY SERVICING, REPAIR OR CORRECTION. + + 16. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN +WRITING WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY +AND/OR REDISTRIBUTE THE LIBRARY AS PERMITTED ABOVE, BE LIABLE TO YOU +FOR DAMAGES, INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR +CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OR INABILITY TO USE THE +LIBRARY (INCLUDING BUT NOT LIMITED TO LOSS OF DATA OR DATA BEING +RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD PARTIES OR A +FAILURE OF THE LIBRARY TO OPERATE WITH ANY OTHER SOFTWARE), EVEN IF +SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH +DAMAGES. + + END OF TERMS AND CONDITIONS + + How to Apply These Terms to Your New Libraries + + If you develop a new library, and you want it to be of the greatest +possible use to the public, we recommend making it free software that +everyone can redistribute and change. You can do so by permitting +redistribution under these terms (or, alternatively, under the terms of the +ordinary General Public License). + + To apply these terms, attach the following notices to the library. It is +safest to attach them to the start of each source file to most effectively +convey the exclusion of warranty; and each file should have at least the +"copyright" line and a pointer to where the full notice is found. + + + Copyright (C) + + This library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + This library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with this library; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + +Also add information on how to contact you by electronic and paper mail. + +You should also get your employer (if you work as a programmer) or your +school, if any, to sign a "copyright disclaimer" for the library, if +necessary. Here is a sample; alter the names: + + Yoyodyne, Inc., hereby disclaims all copyright interest in the + library `Frob' (a library for tweaking knobs) written by James Random Hacker. + + , 1 April 1990 + Ty Coon, President of Vice + +That's all there is to it! diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..1dee368 --- /dev/null +++ b/Makefile @@ -0,0 +1,55 @@ +# +# Copyright (C) 2003 Christophe Varoqui, +# + +BUILDDIRS := \ + libmpathcmd \ + libmultipath \ + libmultipath/prioritizers \ + libmultipath/checkers \ + libmultipath/foreign \ + libmpathpersist \ + multipath \ + multipathd \ + mpathpersist \ + kpartx + +ifneq ($(ENABLE_LIBDMMP),0) +BUILDDIRS += \ + libdmmp +endif + +BUILDDIRS.clean := $(BUILDDIRS:=.clean) tests.clean + +.PHONY: $(BUILDDIRS) $(BUILDDIRS:=.uninstall) $(BUILDDIRS:=.install) $(BUILDDIRS.clean) + +all: $(BUILDDIRS) + +$(BUILDDIRS): + $(MAKE) -C $@ + +multipath multipathd mpathpersist: libmultipath +mpathpersist: libmpathpersist + +$(BUILDDIRS.clean): + $(MAKE) -C ${@:.clean=} clean + +$(BUILDDIRS:=.install): + $(MAKE) -C ${@:.install=} install + +$(BUILDDIRS:=.uninstall): + $(MAKE) -C ${@:.uninstall=} uninstall + +clean: $(BUILDDIRS.clean) +install: $(BUILDDIRS:=.install) +uninstall: $(BUILDDIRS:=.uninstall) + +test: all + $(MAKE) -C tests + +.PHONY: TAGS +TAGS: + etags -a libmultipath/*.c + etags -a libmultipath/*.h + etags -a multipathd/*.c + etags -a multipathd/*.h diff --git a/Makefile.inc b/Makefile.inc new file mode 100644 index 0000000..d4d1e0d --- /dev/null +++ b/Makefile.inc @@ -0,0 +1,138 @@ +# +# Copyright (C) 2004 Christophe Varoqui, +# + +# +# Allow to force some libraries to be used statically. (Uncomment one of the +# following lines or define the values when calling make.) +# +# WITH_LOCAL_LIBDM = 1 +# WITH_LOCAL_LIBSYSFS = 1 +# +# Uncomment to disable libdmmp support +# ENABLE_LIBDMMP = 0 +# +# Uncomment to disable dmevents polling support +# ENABLE_DMEVENTS_POLL = 0 + +ifeq ($(TOPDIR),) + TOPDIR = .. +endif + +ifndef LIB + ifeq ($(shell test -d /lib64 && echo 1),1) + LIB=lib64 + else + LIB=lib + endif +endif + +ifndef RUN + ifeq ($(shell test -L /var/run -o ! -d /var/run && echo 1),1) + RUN=run + else + RUN=var/run + endif +endif + +ifndef SYSTEMD + ifeq ($(shell pkg-config --modversion libsystemd >/dev/null 2>&1 && echo 1), 1) + SYSTEMD = $(shell pkg-config --modversion libsystemd) + else + ifeq ($(shell systemctl --version >/dev/null 2>&1 && echo 1), 1) + SYSTEMD = $(shell systemctl --version 2> /dev/null | \ + sed -n 's/systemd \([0-9]*\).*/\1/p') + endif + endif +endif + +ifndef SYSTEMDPATH + SYSTEMDPATH=usr/lib +endif + +prefix = +exec_prefix = $(prefix) +usr_prefix = $(prefix) +bindir = $(exec_prefix)/sbin +libudevdir = $(prefix)/$(SYSTEMDPATH)/udev +udevrulesdir = $(libudevdir)/rules.d +multipathdir = $(TOPDIR)/libmultipath +man8dir = $(prefix)/usr/share/man/man8 +man5dir = $(prefix)/usr/share/man/man5 +man3dir = $(prefix)/usr/share/man/man3 +syslibdir = $(prefix)/$(LIB) +usrlibdir = $(usr_prefix)/$(LIB) +libdir = $(prefix)/$(LIB)/multipath +unitdir = $(prefix)/$(SYSTEMDPATH)/systemd/system +mpathpersistdir = $(TOPDIR)/libmpathpersist +mpathcmddir = $(TOPDIR)/libmpathcmd +thirdpartydir = $(TOPDIR)/third-party +libdmmpdir = $(TOPDIR)/libdmmp +nvmedir = $(TOPDIR)/libmultipath/nvme +includedir = $(prefix)/usr/include +pkgconfdir = $(usrlibdir)/pkgconfig + +GZIP = gzip -9 -c +RM = rm -f +LN = ln -sf +INSTALL_PROGRAM = install + +# $(call TEST_CC_OPTION,option,fallback) +# Test if the C compiler supports the option. +# Evaluates to "option" if yes, and "fallback" otherwise. +TEST_CC_OPTION = $(shell \ + if echo 'int main(void){return 0;}' | \ + $(CC) -o /dev/null -c -Werror "$(1)" -xc - >/dev/null 2>&1; \ + then \ + echo "$(1)"; \ + else \ + echo "$(2)"; \ + fi) + +STACKPROT := $(call TEST_CC_OPTION,-fstack-protector-strong,-fstack-protector) +ERROR_DISCARDED_QUALIFIERS := $(call TEST_CC_OPTION,-Werror=discarded-qualifiers,) +WNOCLOBBERED := $(call TEST_CC_OPTION,-Wno-clobbered,) + +OPTFLAGS = -O2 -g -pipe -Werror -Wall -Wextra -Wformat=2 -Werror=implicit-int \ + -Werror=implicit-function-declaration -Werror=format-security \ + $(WNOCLOBBERED) \ + -Werror=cast-qual $(ERROR_DISCARDED_QUALIFIERS) \ + $(STACKPROT) --param=ssp-buffer-size=4 +CPPFLAGS := -Wp,-D_FORTIFY_SOURCE=2 +CFLAGS := $(OPTFLAGS) -DBIN_DIR=\"$(bindir)\" -DLIB_STRING=\"${LIB}\" -DRUN_DIR=\"${RUN}\" \ + -MMD -MP $(CFLAGS) +BIN_CFLAGS = -fPIE -DPIE +LIB_CFLAGS = -fPIC +SHARED_FLAGS = -shared +LDFLAGS = -Wl,-z,relro -Wl,-z,now +BIN_LDFLAGS = -pie + +# Check whether a function with name $1 has been declared in header file $2. +check_func = $(shell \ + if grep -Eq "^[^[:blank:]]+[[:blank:]]+$1[[:blank:]]*(.*)*" "$2"; then \ + found=1; \ + status="yes"; \ + else \ + found=0; \ + status="no"; \ + fi; \ + echo 1>&2 "Checking for $1 in $2 ... $$status"; \ + echo "$$found" \ + ) + +# Checker whether a file with name $1 exists +check_file = $(shell \ + if [ -f "$1" ]; then \ + found=1; \ + status="yes"; \ + else \ + found=0; \ + status="no"; \ + fi; \ + echo 1>&2 "Checking if $1 exists ... $$status"; \ + echo "$$found" \ + ) + +%.o: %.c + @echo building $@ because of $? + $(CC) $(CFLAGS) $(CPPFLAGS) -c -o $@ $< diff --git a/README b/README new file mode 100644 index 0000000..2fc4a81 --- /dev/null +++ b/README @@ -0,0 +1,60 @@ + multipath-tools for Linux + + +This package provides the following binaries to drive the Device Mapper +multipathing driver: + +multipath - Device mapper target autoconfig. +multipathd - Multipath daemon. +mpathpersist - Manages SCSI persistent reservations on dm multipath devices. +kpartx - Create device maps from partition tables. + + +Releases +======== +Tarballs are not generated anymore, to get a specific release do: +git clone https://git.opensvc.com/multipath-tools/.git +cd multipath-tools +git tag +git archive --format=tar.gz --prefix=multipath-tools-X.Y.Z/ X.Y.Z > ../multipath-tools-X.Y.Z.tar.gz + +Alternatively it may be obtained from gitweb, go to: +https://git.opensvc.com/?p=multipath-tools/.git;a=tags +select a release-tag and then click on "snapshot". Or get it with +wget "https://git.opensvc.com/?p=multipath-tools/.git;a=snapshot;sf=tgz;h=refs/tags/X.Y.Z" -O multipath-tools-X.Y.Z.tar.gz + + +Source code +=========== +To get latest devel code: git clone https://git.opensvc.com/multipath-tools/.git +Gitweb: https://git.opensvc.com/?p=multipath-tools/.git + + +Add storage devices +=================== +Follow the instructions in the libmultipath/hwtable.c header. + + +Mailing list (subscribers-only) +============ +To subscribe and archives: https://www.redhat.com/mailman/listinfo/dm-devel +Searchable: https://marc.info/?l=dm-devel + + +Changelog +========= +pre-0.4.5: https://web.archive.org/web/20070309224034/http://christophe.varoqui.free.fr/wiki/wakka.php?wiki=ChangeLog +post-0.4.5: https://git.opensvc.com/?p=multipath-tools/.git;a=log + + +Maintainer +========== +Christophe Varoqui +Device-mapper development mailing list + +Licence +======= +The multipath-tools source code is covered by several different +licences. Refer to the individual source files for details. +Source files which do not specify a licence are shipped under +LGPL-2.0 (see LICENSES/LGPL-2.0). diff --git a/README.alua b/README.alua new file mode 100644 index 0000000..340ccba --- /dev/null +++ b/README.alua @@ -0,0 +1,21 @@ +This is a rough guide, consult your storage device manufacturer documentation. + +ALUA is supported in some devices, but usually it's disabled by default. +To enable ALUA, the following options should be changed: + +- EMC CLARiiON/VNX: + "Failover Mode" should be changed to "4". + +- HPE 3PAR: + "Host:" should be changed to "Generic-ALUA Persona 2 (UARepLun, SESLun, ALUA)". + +- Promise VTrak/Vess: + "LUN Affinity" and "ALUA" should be changed to "Enable", "Redundancy Type" + must be "Active-Active". + +- LSI/Engenio/NetApp RDAC class, as NetApp SANtricity E/EF Series and OEM arrays: + "Select operating system:" should be changed to "Linux DM-MP (Kernel 3.10 or later)". + +- NetApp ONTAP: + To check ALUA state: "igroup show -v ", and to enable ALUA: + "igroup set alua yes". diff --git a/kpartx/Makefile b/kpartx/Makefile new file mode 100644 index 0000000..2906a98 --- /dev/null +++ b/kpartx/Makefile @@ -0,0 +1,53 @@ +# +# Copyright (C) 2003 Christophe Varoqui, +# +include ../Makefile.inc + +CFLAGS += $(BIN_CFLAGS) -I. -I$(multipathdir) -D_LARGEFILE64_SOURCE -D_FILE_OFFSET_BITS=64 +LDFLAGS += $(BIN_LDFLAGS) + +LIBDEPS += -ldevmapper + +ifneq ($(call check_func,dm_task_set_cookie,/usr/include/libdevmapper.h),0) + CFLAGS += -DLIBDM_API_COOKIE +endif + +OBJS = bsd.o dos.o kpartx.o solaris.o unixware.o dasd.o sun.o \ + gpt.o mac.o ps3.o crc32.o lopart.o xstrncpy.o devmapper.o + +EXEC = kpartx + +all: $(EXEC) + +$(EXEC): $(OBJS) + $(CC) $(CFLAGS) $(OBJS) -o $(EXEC) $(LDFLAGS) $(LIBDEPS) + $(GZIP) $(EXEC).8 > $(EXEC).8.gz + +install: $(EXEC) $(EXEC).8 + $(INSTALL_PROGRAM) -d $(DESTDIR)$(bindir) + $(INSTALL_PROGRAM) -m 755 $(EXEC) $(DESTDIR)$(bindir) + $(INSTALL_PROGRAM) -d $(DESTDIR)$(libudevdir) + $(INSTALL_PROGRAM) -m 755 kpartx_id $(DESTDIR)$(libudevdir) + $(INSTALL_PROGRAM) -d $(DESTDIR)$(libudevdir)/rules.d + $(INSTALL_PROGRAM) -m 644 dm-parts.rules $(DESTDIR)$(libudevdir)/rules.d/11-dm-parts.rules + $(INSTALL_PROGRAM) -m 644 kpartx.rules $(DESTDIR)$(libudevdir)/rules.d/66-kpartx.rules + $(INSTALL_PROGRAM) -m 644 del-part-nodes.rules $(DESTDIR)$(libudevdir)/rules.d/68-del-part-nodes.rules + $(INSTALL_PROGRAM) -d $(DESTDIR)$(man8dir) + $(INSTALL_PROGRAM) -m 644 $(EXEC).8.gz $(DESTDIR)$(man8dir) + +uninstall: + $(RM) $(DESTDIR)$(bindir)/$(EXEC) + $(RM) $(DESTDIR)$(man8dir)/$(EXEC).8.gz + $(RM) $(DESTDIR)$(libudevdir)/kpartx_id + $(RM) $(DESTDIR)$(libudevdir)/rules.d/11-dm-parts.rules + $(RM) $(DESTDIR)$(libudevdir)/rules.d/66-kpartx.rules + $(RM) $(DESTDIR)$(libudevdir)/rules.d/67-kpartx-compat.rules + $(RM) $(DESTDIR)$(libudevdir)/rules.d/68-del-part-nodes.rules + +clean: dep_clean + $(RM) core *.o $(EXEC) *.gz + +include $(wildcard $(OBJS:.o=.d)) + +dep_clean: + $(RM) $(OBJS:.o=.d) diff --git a/kpartx/bsd.c b/kpartx/bsd.c new file mode 100644 index 0000000..0e661fb --- /dev/null +++ b/kpartx/bsd.c @@ -0,0 +1,114 @@ +#include "kpartx.h" +#include + +#define BSD_DISKMAGIC (0x82564557UL) /* The disk magic number */ +#define XBSD_MAXPARTITIONS 16 +#define BSD_FS_UNUSED 0 + +struct bsd_disklabel { + unsigned int d_magic; /* the magic number */ + short int d_type; /* drive type */ + short int d_subtype; /* controller/d_type specific */ + char d_typename[16]; /* type name, e.g. "eagle" */ + char d_packname[16]; /* pack identifier */ + unsigned int d_secsize; /* # of bytes per sector */ + unsigned int d_nsectors; /* # of data sectors per track */ + unsigned int d_ntracks; /* # of tracks per cylinder */ + unsigned int d_ncylinders; /* # of data cylinders per unit */ + unsigned int d_secpercyl; /* # of data sectors per cylinder */ + unsigned int d_secperunit; /* # of data sectors per unit */ + unsigned short d_sparespertrack;/* # of spare sectors per track */ + unsigned short d_sparespercyl; /* # of spare sectors per cylinder */ + unsigned int d_acylinders; /* # of alt. cylinders per unit */ + unsigned short d_rpm; /* rotational speed */ + unsigned short d_interleave; /* hardware sector interleave */ + unsigned short d_trackskew; /* sector 0 skew, per track */ + unsigned short d_cylskew; /* sector 0 skew, per cylinder */ + unsigned int d_headswitch; /* head switch time, usec */ + unsigned int d_trkseek; /* track-to-track seek, usec */ + unsigned int d_flags; /* generic flags */ + unsigned int d_drivedata[5]; /* drive-type specific information */ + unsigned int d_spare[5]; /* reserved for future use */ + unsigned int d_magic2; /* the magic number (again) */ + unsigned short d_checksum; /* xor of data incl. partitions */ + + /* filesystem and partition information: */ + unsigned short d_npartitions; /* number of partitions in following */ + unsigned int d_bbsize; /* size of boot area at sn0, bytes */ + unsigned int d_sbsize; /* max size of fs superblock, bytes */ + struct bsd_partition { /* the partition table */ + unsigned int p_size; /* number of sectors in partition */ + unsigned int p_offset; /* starting sector */ + unsigned int p_fsize; /* filesystem basic fragment size */ + unsigned char p_fstype; /* filesystem type, see below */ + unsigned char p_frag; /* filesystem fragments per block */ + unsigned short p_cpg; /* filesystem cylinders per group */ + } d_partitions[XBSD_MAXPARTITIONS];/* actually may be more */ +}; + +int +read_bsd_pt(int fd, struct slice all, struct slice *sp, unsigned int ns) { + struct bsd_disklabel *l; + struct bsd_partition *p; + unsigned int offset = all.start, end; + int max_partitions; + char *bp; + unsigned int n = 0, i, j; + + bp = getblock(fd, offset+1); /* 1 sector suffices */ + if (bp == NULL) + return -1; + + l = (struct bsd_disklabel *) bp; + if (l->d_magic != BSD_DISKMAGIC) + return -1; + + max_partitions = 16; + if (l->d_npartitions < max_partitions) + max_partitions = l->d_npartitions; + for (p = l->d_partitions; p - l->d_partitions < max_partitions; p++) { + if (p->p_fstype == BSD_FS_UNUSED) + /* nothing */; + else if (n < ns) { + sp[n].start = p->p_offset; + sp[n].size = p->p_size; + n++; + } else { + fprintf(stderr, + "bsd_partition: too many slices\n"); + break; + } + } + /* + * Convention has it that the bsd disklabel will always have + * the 'c' partition spanning the entire disk. + * So we have to check for contained slices. + */ + for(i = 0; i < n; i++) { + if (sp[i].size == 0) + continue; + + end = sp[i].start + sp[i].size; + for(j = 0; j < n; j ++) { + if ( i == j ) + continue; + if (sp[j].size == 0) + continue; + + if (sp[i].start < sp[j].start) { + if (end > sp[j].start && + end < sp[j].start + sp[j].size) { + /* Invalid slice */ + fprintf(stderr, + "bsd_disklabel: slice %d overlaps with %d\n", i , j); + sp[i].size = 0; + } + } else { + if (end <= sp[j].start + sp[j].size) { + sp[i].container = j + 1; + } + } + } + } + return n; +} diff --git a/kpartx/byteorder.h b/kpartx/byteorder.h new file mode 100644 index 0000000..199c66b --- /dev/null +++ b/kpartx/byteorder.h @@ -0,0 +1,29 @@ +#ifndef BYTEORDER_H_INCLUDED +#define BYTEORDER_H_INCLUDED + +#ifdef __linux__ +# include +# include +#else +# error unsupported +#endif + +#if BYTE_ORDER == LITTLE_ENDIAN +# define le16_to_cpu(x) (x) +# define be16_to_cpu(x) bswap_16(x) +# define le32_to_cpu(x) (x) +# define le64_to_cpu(x) (x) +# define be32_to_cpu(x) bswap_32(x) +# define be64_to_cpu(x) bswap_64(x) +#elif BYTE_ORDER == BIG_ENDIAN +# define le16_to_cpu(x) bswap_16(x) +# define be16_to_cpu(x) (x) +# define le32_to_cpu(x) bswap_32(x) +# define le64_to_cpu(x) bswap_64(x) +# define be32_to_cpu(x) (x) +# define be64_to_cpu(x) (x) +#else +# error unsupported +#endif + +#endif /* BYTEORDER_H_INCLUDED */ diff --git a/kpartx/crc32.c b/kpartx/crc32.c new file mode 100644 index 0000000..b23a083 --- /dev/null +++ b/kpartx/crc32.c @@ -0,0 +1,393 @@ +/* + * crc32.c + * This code is in the public domain; copyright abandoned. + * Liability for non-performance of this code is limited to the amount + * you paid for it. Since it is distributed for free, your refund will + * be very very small. If it breaks, you get to keep both pieces. + */ + +#include "crc32.h" + +#if __GNUC__ >= 3 /* 2.x has "attribute", but only 3.0 has "pure */ +#define attribute(x) __attribute__(x) +#else +#define attribute(x) +#endif + +/* + * There are multiple 16-bit CRC polynomials in common use, but this is + * *the* standard CRC-32 polynomial, first popularized by Ethernet. + * x^32+x^26+x^23+x^22+x^16+x^12+x^11+x^10+x^8+x^7+x^5+x^4+x^2+x^1+x^0 + */ +#define CRCPOLY_LE 0xedb88320 +#define CRCPOLY_BE 0x04c11db7 + +/* How many bits at a time to use. Requires a table of 4< 8 || CRC_LE_BITS < 1 || CRC_LE_BITS & CRC_LE_BITS-1 +# error CRC_LE_BITS must be a power of 2 between 1 and 8 +#endif + +#if CRC_LE_BITS == 1 +/* + * In fact, the table-based code will work in this case, but it can be + * simplified by inlining the table in ?: form. + */ +#define crc32init_le() +#define crc32cleanup_le() +/** + * crc32_le() - Calculate bitwise little-endian Ethernet AUTODIN II CRC32 + * @crc - seed value for computation. ~0 for Ethernet, sometimes 0 for + * other uses, or the previous crc32 value if computing incrementally. + * @p - pointer to buffer over which CRC is run + * @len - length of buffer @p + * + */ +uint32_t attribute((pure)) crc32_le(uint32_t crc, unsigned char const *p, size_t len) +{ + int i; + while (len--) { + crc ^= *p++; + for (i = 0; i < 8; i++) + crc = (crc >> 1) ^ ((crc & 1) ? CRCPOLY_LE : 0); + } + return crc; +} +#else /* Table-based approach */ + +static uint32_t *crc32table_le; +/** + * crc32init_le() - allocate and initialize LE table data + * + * crc is the crc of the byte i; other entries are filled in based on the + * fact that crctable[i^j] = crctable[i] ^ crctable[j]. + * + */ +static int +crc32init_le(void) +{ + unsigned i, j; + uint32_t crc = 1; + + crc32table_le = + malloc((1 << CRC_LE_BITS) * sizeof(uint32_t)); + if (!crc32table_le) + return 1; + crc32table_le[0] = 0; + + for (i = 1 << (CRC_LE_BITS - 1); i; i >>= 1) { + crc = (crc >> 1) ^ ((crc & 1) ? CRCPOLY_LE : 0); + for (j = 0; j < 1 << CRC_LE_BITS; j += 2 * i) + crc32table_le[i + j] = crc ^ crc32table_le[j]; + } + return 0; +} + +/** + * crc32cleanup_le(): free LE table data + */ +static void +crc32cleanup_le(void) +{ + if (crc32table_le) free(crc32table_le); + crc32table_le = NULL; +} + +/** + * crc32_le() - Calculate bitwise little-endian Ethernet AUTODIN II CRC32 + * @crc - seed value for computation. ~0 for Ethernet, sometimes 0 for + * other uses, or the previous crc32 value if computing incrementally. + * @p - pointer to buffer over which CRC is run + * @len - length of buffer @p + * + */ +uint32_t attribute((pure)) crc32_le(uint32_t crc, unsigned char const *p, size_t len) +{ + while (len--) { +# if CRC_LE_BITS == 8 + crc = (crc >> 8) ^ crc32table_le[(crc ^ *p++) & 255]; +# elif CRC_LE_BITS == 4 + crc ^= *p++; + crc = (crc >> 4) ^ crc32table_le[crc & 15]; + crc = (crc >> 4) ^ crc32table_le[crc & 15]; +# elif CRC_LE_BITS == 2 + crc ^= *p++; + crc = (crc >> 2) ^ crc32table_le[crc & 3]; + crc = (crc >> 2) ^ crc32table_le[crc & 3]; + crc = (crc >> 2) ^ crc32table_le[crc & 3]; + crc = (crc >> 2) ^ crc32table_le[crc & 3]; +# endif + } + return crc; +} +#endif + +/* + * Big-endian CRC computation. Used with serial bit streams sent + * msbit-first. Be sure to use cpu_to_be32() to append the computed CRC. + */ +#if CRC_BE_BITS > 8 || CRC_BE_BITS < 1 || CRC_BE_BITS & CRC_BE_BITS-1 +# error CRC_BE_BITS must be a power of 2 between 1 and 8 +#endif + +#if CRC_BE_BITS == 1 +/* + * In fact, the table-based code will work in this case, but it can be + * simplified by inlining the table in ?: form. + */ +#define crc32init_be() +#define crc32cleanup_be() + +/** + * crc32_be() - Calculate bitwise big-endian Ethernet AUTODIN II CRC32 + * @crc - seed value for computation. ~0 for Ethernet, sometimes 0 for + * other uses, or the previous crc32 value if computing incrementally. + * @p - pointer to buffer over which CRC is run + * @len - length of buffer @p + * + */ +uint32_t attribute((pure)) crc32_be(uint32_t crc, unsigned char const *p, size_t len) +{ + int i; + while (len--) { + crc ^= *p++ << 24; + for (i = 0; i < 8; i++) + crc = + (crc << 1) ^ ((crc & 0x80000000) ? CRCPOLY_BE : + 0); + } + return crc; +} + +#else /* Table-based approach */ +static uint32_t *crc32table_be; + +/** + * crc32init_be() - allocate and initialize BE table data + */ +static int +crc32init_be(void) +{ + unsigned i, j; + uint32_t crc = 0x80000000; + + crc32table_be = + malloc((1 << CRC_BE_BITS) * sizeof(uint32_t)); + if (!crc32table_be) + return 1; + crc32table_be[0] = 0; + + for (i = 1; i < 1 << CRC_BE_BITS; i <<= 1) { + crc = (crc << 1) ^ ((crc & 0x80000000) ? CRCPOLY_BE : 0); + for (j = 0; j < i; j++) + crc32table_be[i + j] = crc ^ crc32table_be[j]; + } + return 0; +} + +/** + * crc32cleanup_be(): free BE table data + */ +static void +crc32cleanup_be(void) +{ + if (crc32table_be) free(crc32table_be); + crc32table_be = NULL; +} + + +/** + * crc32_be() - Calculate bitwise big-endian Ethernet AUTODIN II CRC32 + * @crc - seed value for computation. ~0 for Ethernet, sometimes 0 for + * other uses, or the previous crc32 value if computing incrementally. + * @p - pointer to buffer over which CRC is run + * @len - length of buffer @p + * + */ +uint32_t attribute((pure)) crc32_be(uint32_t crc, unsigned char const *p, size_t len) +{ + while (len--) { +# if CRC_BE_BITS == 8 + crc = (crc << 8) ^ crc32table_be[(crc >> 24) ^ *p++]; +# elif CRC_BE_BITS == 4 + crc ^= *p++ << 24; + crc = (crc << 4) ^ crc32table_be[crc >> 28]; + crc = (crc << 4) ^ crc32table_be[crc >> 28]; +# elif CRC_BE_BITS == 2 + crc ^= *p++ << 24; + crc = (crc << 2) ^ crc32table_be[crc >> 30]; + crc = (crc << 2) ^ crc32table_be[crc >> 30]; + crc = (crc << 2) ^ crc32table_be[crc >> 30]; + crc = (crc << 2) ^ crc32table_be[crc >> 30]; +# endif + } + return crc; +} +#endif + +/* + * A brief CRC tutorial. + * + * A CRC is a long-division remainder. You add the CRC to the message, + * and the whole thing (message+CRC) is a multiple of the given + * CRC polynomial. To check the CRC, you can either check that the + * CRC matches the recomputed value, *or* you can check that the + * remainder computed on the message+CRC is 0. This latter approach + * is used by a lot of hardware implementations, and is why so many + * protocols put the end-of-frame flag after the CRC. + * + * It's actually the same long division you learned in school, except that + * - We're working in binary, so the digits are only 0 and 1, and + * - When dividing polynomials, there are no carries. Rather than add and + * subtract, we just xor. Thus, we tend to get a bit sloppy about + * the difference between adding and subtracting. + * + * A 32-bit CRC polynomial is actually 33 bits long. But since it's + * 33 bits long, bit 32 is always going to be set, so usually the CRC + * is written in hex with the most significant bit omitted. (If you're + * familiar with the IEEE 754 floating-point format, it's the same idea.) + * + * Note that a CRC is computed over a string of *bits*, so you have + * to decide on the endianness of the bits within each byte. To get + * the best error-detecting properties, this should correspond to the + * order they're actually sent. For example, standard RS-232 serial is + * little-endian; the most significant bit (sometimes used for parity) + * is sent last. And when appending a CRC word to a message, you should + * do it in the right order, matching the endianness. + * + * Just like with ordinary division, the remainder is always smaller than + * the divisor (the CRC polynomial) you're dividing by. Each step of the + * division, you take one more digit (bit) of the dividend and append it + * to the current remainder. Then you figure out the appropriate multiple + * of the divisor to subtract to being the remainder back into range. + * In binary, it's easy - it has to be either 0 or 1, and to make the + * XOR cancel, it's just a copy of bit 32 of the remainder. + * + * When computing a CRC, we don't care about the quotient, so we can + * throw the quotient bit away, but subtract the appropriate multiple of + * the polynomial from the remainder and we're back to where we started, + * ready to process the next bit. + * + * A big-endian CRC written this way would be coded like: + * for (i = 0; i < input_bits; i++) { + * multiple = remainder & 0x80000000 ? CRCPOLY : 0; + * remainder = (remainder << 1 | next_input_bit()) ^ multiple; + * } + * Notice how, to get at bit 32 of the shifted remainder, we look + * at bit 31 of the remainder *before* shifting it. + * + * But also notice how the next_input_bit() bits we're shifting into + * the remainder don't actually affect any decision-making until + * 32 bits later. Thus, the first 32 cycles of this are pretty boring. + * Also, to add the CRC to a message, we need a 32-bit-long hole for it at + * the end, so we have to add 32 extra cycles shifting in zeros at the + * end of every message, + * + * So the standard trick is to rearrage merging in the next_input_bit() + * until the moment it's needed. Then the first 32 cycles can be precomputed, + * and merging in the final 32 zero bits to make room for the CRC can be + * skipped entirely. + * This changes the code to: + * for (i = 0; i < input_bits; i++) { + * remainder ^= next_input_bit() << 31; + * multiple = (remainder & 0x80000000) ? CRCPOLY : 0; + * remainder = (remainder << 1) ^ multiple; + * } + * With this optimization, the little-endian code is simpler: + * for (i = 0; i < input_bits; i++) { + * remainder ^= next_input_bit(); + * multiple = (remainder & 1) ? CRCPOLY : 0; + * remainder = (remainder >> 1) ^ multiple; + * } + * + * Note that the other details of endianness have been hidden in CRCPOLY + * (which must be bit-reversed) and next_input_bit(). + * + * However, as long as next_input_bit is returning the bits in a sensible + * order, we can actually do the merging 8 or more bits at a time rather + * than one bit at a time: + * for (i = 0; i < input_bytes; i++) { + * remainder ^= next_input_byte() << 24; + * for (j = 0; j < 8; j++) { + * multiple = (remainder & 0x80000000) ? CRCPOLY : 0; + * remainder = (remainder << 1) ^ multiple; + * } + * } + * Or in little-endian: + * for (i = 0; i < input_bytes; i++) { + * remainder ^= next_input_byte(); + * for (j = 0; j < 8; j++) { + * multiple = (remainder & 1) ? CRCPOLY : 0; + * remainder = (remainder << 1) ^ multiple; + * } + * } + * If the input is a multiple of 32 bits, you can even XOR in a 32-bit + * word at a time and increase the inner loop count to 32. + * + * You can also mix and match the two loop styles, for example doing the + * bulk of a message byte-at-a-time and adding bit-at-a-time processing + * for any fractional bytes at the end. + * + * The only remaining optimization is to the byte-at-a-time table method. + * Here, rather than just shifting one bit of the remainder to decide + * in the correct multiple to subtract, we can shift a byte at a time. + * This produces a 40-bit (rather than a 33-bit) intermediate remainder, + * but again the multiple of the polynomial to subtract depends only on + * the high bits, the high 8 bits in this case. + * + * The multile we need in that case is the low 32 bits of a 40-bit + * value whose high 8 bits are given, and which is a multiple of the + * generator polynomial. This is simply the CRC-32 of the given + * one-byte message. + * + * Two more details: normally, appending zero bits to a message which + * is already a multiple of a polynomial produces a larger multiple of that + * polynomial. To enable a CRC to detect this condition, it's common to + * invert the CRC before appending it. This makes the remainder of the + * message+crc come out not as zero, but some fixed non-zero value. + * + * The same problem applies to zero bits prepended to the message, and + * a similar solution is used. Instead of starting with a remainder of + * 0, an initial remainder of all ones is used. As long as you start + * the same way on decoding, it doesn't make a difference. + */ + + +/** + * init_crc32(): generates CRC32 tables + * + * On successful initialization, use count is increased. + * This guarantees that the library functions will stay resident + * in memory, and prevents someone from 'rmmod crc32' while + * a driver that needs it is still loaded. + * This also greatly simplifies drivers, as there's no need + * to call an initialization/cleanup function from each driver. + * Since crc32.o is a library module, there's no requirement + * that the user can unload it. + */ +int +init_crc32(void) +{ + int rc1, rc2, rc; + rc1 = crc32init_le(); + rc2 = crc32init_be(); + rc = rc1 || rc2; + return rc; +} + +/** + * cleanup_crc32(): frees crc32 data when no longer needed + */ +void +cleanup_crc32(void) +{ + crc32cleanup_le(); + crc32cleanup_be(); +} diff --git a/kpartx/crc32.h b/kpartx/crc32.h new file mode 100644 index 0000000..a4505b8 --- /dev/null +++ b/kpartx/crc32.h @@ -0,0 +1,19 @@ +/* + * crc32.h + */ +#ifndef _CRC32_H +#define _CRC32_H + +#include +#include + +extern int init_crc32(void); +extern void cleanup_crc32(void); +extern uint32_t crc32_le(uint32_t crc, unsigned char const *p, size_t len); +extern uint32_t crc32_be(uint32_t crc, unsigned char const *p, size_t len); + +#define crc32(seed, data, length) crc32_le(seed, (unsigned char const *)data, length) +#define ether_crc_le(length, data) crc32_le(~0, data, length) +#define ether_crc(length, data) crc32_be(~0, data, length) + +#endif /* _CRC32_H */ diff --git a/kpartx/dasd.c b/kpartx/dasd.c new file mode 100644 index 0000000..14b9d3a --- /dev/null +++ b/kpartx/dasd.c @@ -0,0 +1,297 @@ +/* + * dasd.c + * + * IBM DASD partition table handling. + * + * Mostly taken from drivers/s390/block/dasd.c + * + * Copyright (c) 2005, Hannes Reinecke, SUSE Linux Products GmbH + * Copyright IBM Corporation, 2009 + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "devmapper.h" +#include "kpartx.h" +#include "byteorder.h" +#include "dasd.h" + +unsigned long long sectors512(unsigned long long sectors, int blocksize) +{ + return sectors * (blocksize >> 9); +} + +/* + * Magic records per track calculation, copied from fdasd.c + */ +static unsigned int ceil_quot(unsigned int d1, unsigned int d2) +{ + return (d1 + (d2 - 1)) / d2; +} + +unsigned int recs_per_track(unsigned int dl) +{ + int dn = ceil_quot(dl + 6, 232) + 1; + return 1729 / (10 + 9 + ceil_quot(dl + 6 * dn, 34)); +} + + +typedef unsigned int __attribute__((__may_alias__)) label_ints_t; + +/* + */ +int +read_dasd_pt(int fd, __attribute__((unused)) struct slice all, + struct slice *sp, __attribute__((unused)) unsigned int ns) +{ + int retval = -1; + int blocksize; + uint64_t disksize; + uint64_t offset, size, fmt_size; + dasd_information_t info; + struct hd_geometry geo; + char type[5] = {0,}; + volume_label_t vlabel; + unsigned char *data = NULL; + uint64_t blk; + int fd_dasd = -1; + struct stat sbuf; + dev_t dev; + char *devname; + char pathname[256]; + + if (fd < 0) { + return -1; + } + + if (fstat(fd, &sbuf) == -1) { + return -1; + } + + devname = dm_mapname(major(sbuf.st_rdev), minor(sbuf.st_rdev)); + + if (devname != NULL) { + /* We were passed a handle to a dm device. + * Get the first target and operate on that instead. + */ + if (!(dev = dm_get_first_dep(devname))) { + free(devname); + return -1; + } + free(devname); + + if ((unsigned int)major(dev) != 94) { + /* Not a DASD */ + return -1; + } + + /* + * Hard to believe, but there's no simple way to translate + * major/minor into an openable device file, so we have + * to create one for ourselves. + */ + + sprintf(pathname, "/dev/.kpartx-node-%u-%u", + (unsigned int)major(dev), (unsigned int)minor(dev)); + if ((fd_dasd = open(pathname, O_RDONLY)) == -1) { + /* Devicenode does not exist. Try to create one */ + if (mknod(pathname, 0600 | S_IFBLK, dev) == -1) { + /* Couldn't create a device node */ + return -1; + } + fd_dasd = open(pathname, O_RDONLY); + /* + * The file will vanish when the last process (we) + * has ceased to access it. + */ + unlink(pathname); + } + if (fd_dasd < 0) { + /* Couldn't open the device */ + return -1; + } + } else { + fd_dasd = dup(fd); + if (fd_dasd < 0) + return -1; + } + + if (ioctl(fd_dasd, BIODASDINFO, (unsigned long)&info) != 0) { + info.label_block = 2; + info.FBA_layout = 0; + memcpy(info.type, "ECKD", sizeof(info.type)); + } + + if (ioctl(fd_dasd, BLKSSZGET, &blocksize) != 0) + goto out; + + if (ioctl(fd_dasd, BLKGETSIZE64, &disksize) != 0) + goto out; + + if (ioctl(fd_dasd, HDIO_GETGEO, (unsigned long)&geo) != 0) { + unsigned int cyl; + + geo.heads = 15; + geo.sectors = recs_per_track(blocksize); + cyl = disksize / ((uint64_t)blocksize * geo.heads * + geo.sectors); + if (cyl < LV_COMPAT_CYL) + geo.cylinders = cyl; + else + geo.cylinders = LV_COMPAT_CYL; + geo.start = 0; + } + + disksize >>= 9; + + if (blocksize < 512 || blocksize > 4096) + goto out; + + /* + * Get volume label, extract name and type. + */ + + if (!(data = (unsigned char *)malloc(blocksize))) + goto out; + + + if (lseek(fd_dasd, info.label_block * blocksize, SEEK_SET) == -1) + goto out; + if (read(fd_dasd, data, blocksize) == -1) { + perror("read"); + goto out; + } + + if ((!info.FBA_layout) && (!memcmp(info.type, "ECKD", 4))) + memcpy (&vlabel, data, sizeof(vlabel)); + else { + bzero(&vlabel,4); + memcpy ((char *)&vlabel + 4, data, sizeof(vlabel) - 4); + } + vtoc_ebcdic_dec(vlabel.vollbl, type, 4); + + /* + * Three different types: CMS1, VOL1 and LNX1/unlabeled + */ + if (strncmp(type, "CMS1", 4) == 0) { + /* + * VM style CMS1 labeled disk + */ + label_ints_t *label = (label_ints_t *) &vlabel; + + blocksize = label[4]; + if (label[14] != 0) { + /* disk is reserved minidisk */ + offset = label[14]; + size = sectors512(label[8] - 1, blocksize); + } else { + offset = info.label_block + 1; + size = sectors512(label[8], blocksize); + } + sp[0].start = sectors512(offset, blocksize); + sp[0].size = size - sp[0].start; + retval = 1; + } else if ((strncmp(type, "VOL1", 4) == 0) && + (!info.FBA_layout) && (!memcmp(info.type, "ECKD",4))) { + /* + * New style VOL1 labeled disk + */ + int counter; + + /* get block number and read then go through format1 labels */ + blk = cchhb2blk(&vlabel.vtoc, &geo) + 1; + counter = 0; + if (lseek(fd_dasd, blk * blocksize, SEEK_SET) == -1) + goto out; + + while (read(fd_dasd, data, blocksize) != -1) { + format1_label_t f1; + + memcpy(&f1, data, sizeof(format1_label_t)); + + /* skip FMT4 / FMT5 / FMT7 labels */ + if (EBCtoASC[f1.DS1FMTID] == '4' + || EBCtoASC[f1.DS1FMTID] == '5' + || EBCtoASC[f1.DS1FMTID] == '7' + || EBCtoASC[f1.DS1FMTID] == '9') { + blk++; + continue; + } + + /* only FMT1 and FMT8 valid at this point */ + if (EBCtoASC[f1.DS1FMTID] != '1' && + EBCtoASC[f1.DS1FMTID] != '8') + break; + + /* OK, we got valid partition data */ + offset = cchh2blk(&f1.DS1EXT1.llimit, &geo); + size = cchh2blk(&f1.DS1EXT1.ulimit, &geo) - + offset + geo.sectors; + sp[counter].start = sectors512(offset, blocksize); + sp[counter].size = sectors512(size, blocksize); + counter++; + blk++; + } + retval = counter; + } else { + /* + * Old style LNX1 or unlabeled disk + */ + if (strncmp(type, "LNX1", 4) == 0) { + if (vlabel.ldl_version == 0xf2) { + fmt_size = sectors512(vlabel.formatted_blocks, + blocksize); + } else if (!memcmp(info.type, "ECKD",4)) { + /* formatted w/o large volume support */ + fmt_size = geo.cylinders * geo.heads + * geo.sectors * (blocksize >> 9); + } else { + /* old label and no usable disk geometry + * (e.g. DIAG) */ + fmt_size = disksize; + } + size = disksize; + if (fmt_size < size) + size = fmt_size; + } else if ((unsigned int)major(sbuf.st_rdev) != 94) { + /* Not a DASD */ + retval = -1; + goto out; + } else + size = disksize; + + sp[0].start = sectors512(info.label_block + 1, blocksize); + sp[0].size = size - sp[0].start; + retval = 1; + } + +out: + if (data != NULL) + free(data); + close(fd_dasd); + return retval; +} diff --git a/kpartx/dasd.h b/kpartx/dasd.h new file mode 100644 index 0000000..8ad5d62 --- /dev/null +++ b/kpartx/dasd.h @@ -0,0 +1,292 @@ +/* + * dasd.h + * + * IBM DASD partition table handling. + * + * Mostly taken from drivers/s390/block/dasd.c + * + * Copyright (c) 2005, Hannes Reinecke, SUSE Linux Products GmbH + * Copyright IBM Corporation, 2009 + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#ifndef _DASD_H +#define _DASD_H + +typedef struct ttr +{ + uint16_t tt; + uint8_t r; +} __attribute__ ((packed)) ttr_t; + +typedef struct cchhb +{ + uint16_t cc; + uint16_t hh; + uint8_t b; +} __attribute__ ((packed)) cchhb_t; + +typedef struct cchh +{ + uint16_t cc; + uint16_t hh; +} __attribute__ ((packed)) cchh_t; + +typedef struct labeldate +{ + uint8_t year; + uint16_t day; +} __attribute__ ((packed)) labeldate_t; + + +typedef struct volume_label +{ + char volkey[4]; /* volume key = volume label */ + char vollbl[4]; /* volume label */ + char volid[6]; /* volume identifier */ + uint8_t security; /* security byte */ + cchhb_t vtoc; /* VTOC address */ + char res1[5]; /* reserved */ + char cisize[4]; /* CI-size for FBA,... */ + /* ...blanks for CKD */ + char blkperci[4]; /* no of blocks per CI (FBA), blanks for CKD */ + char labperci[4]; /* no of labels per CI (FBA), blanks for CKD */ + char res2[4]; /* reserved */ + char lvtoc[14]; /* owner code for LVTOC */ + char res3[28]; /* reserved */ + uint8_t ldl_version; /* version number, valid for ldl format */ + uint64_t formatted_blocks; /* valid when ldl_version >= f2 */ +} __attribute__ ((packed, aligned(__alignof__(int)))) volume_label_t; + + +typedef struct extent +{ + uint8_t typeind; /* extent type indicator */ + uint8_t seqno; /* extent sequence number */ + cchh_t llimit; /* starting point of this extent */ + cchh_t ulimit; /* ending point of this extent */ +} __attribute__ ((packed)) extent_t; + + +typedef struct dev_const +{ + uint16_t DS4DSCYL; /* number of logical cyls */ + uint16_t DS4DSTRK; /* number of tracks in a logical cylinder */ + uint16_t DS4DEVTK; /* device track length */ + uint8_t DS4DEVI; /* non-last keyed record overhead */ + uint8_t DS4DEVL; /* last keyed record overhead */ + uint8_t DS4DEVK; /* non-keyed record overhead differential */ + uint8_t DS4DEVFG; /* flag byte */ + uint16_t DS4DEVTL; /* device tolerance */ + uint8_t DS4DEVDT; /* number of DSCB's per track */ + uint8_t DS4DEVDB; /* number of directory blocks per track */ +} __attribute__ ((packed)) dev_const_t; + + +typedef struct format1_label +{ + char DS1DSNAM[44]; /* data set name */ + uint8_t DS1FMTID; /* format identifier */ + char DS1DSSN[6]; /* data set serial number */ + uint16_t DS1VOLSQ; /* volume sequence number */ + labeldate_t DS1CREDT; /* creation date: ydd */ + labeldate_t DS1EXPDT; /* expiration date */ + uint8_t DS1NOEPV; /* number of extents on volume */ + uint8_t DS1NOBDB; /* no. of bytes used in last direction blk */ + uint8_t DS1FLAG1; /* flag 1 */ + char DS1SYSCD[13]; /* system code */ + labeldate_t DS1REFD; /* date last referenced */ + uint8_t DS1SMSFG; /* system managed storage indicators */ + uint8_t DS1SCXTF; /* sec. space extension flag byte */ + uint16_t DS1SCXTV; /* secondary space extension value */ + uint8_t DS1DSRG1; /* data set organisation byte 1 */ + uint8_t DS1DSRG2; /* data set organisation byte 2 */ + uint8_t DS1RECFM; /* record format */ + uint8_t DS1OPTCD; /* option code */ + uint16_t DS1BLKL; /* block length */ + uint16_t DS1LRECL; /* record length */ + uint8_t DS1KEYL; /* key length */ + uint16_t DS1RKP; /* relative key position */ + uint8_t DS1DSIND; /* data set indicators */ + uint8_t DS1SCAL1; /* secondary allocation flag byte */ + char DS1SCAL3[3]; /* secondary allocation quantity */ + ttr_t DS1LSTAR; /* last used track and block on track */ + uint16_t DS1TRBAL; /* space remaining on last used track */ + uint16_t res1; /* reserved */ + extent_t DS1EXT1; /* first extent description */ + extent_t DS1EXT2; /* second extent description */ + extent_t DS1EXT3; /* third extent description */ + cchhb_t DS1PTRDS; /* possible pointer to f2 or f3 DSCB */ +} __attribute__ ((packed)) format1_label_t; + + +/* + * struct dasd_information_t + * represents any data about the data, which is visible to userspace + */ +typedef struct dasd_information_t { + unsigned int devno; /* S/390 devno */ + unsigned int real_devno; /* for aliases */ + unsigned int schid; /* S/390 subchannel identifier */ + unsigned int cu_type : 16; /* from SenseID */ + unsigned int cu_model : 8; /* from SenseID */ + unsigned int dev_type : 16; /* from SenseID */ + unsigned int dev_model : 8; /* from SenseID */ + unsigned int open_count; + unsigned int req_queue_len; + unsigned int chanq_len; /* length of chanq */ + char type[4]; /* from discipline.name, 'none' for unknown */ + unsigned int status; /* current device level */ + unsigned int label_block; /* where to find the VOLSER */ + unsigned int FBA_layout; /* fixed block size (like AIXVOL) */ + unsigned int characteristics_size; + unsigned int confdata_size; + char characteristics[64]; /* from read_device_characteristics */ + char configuration_data[256]; /* from read_configuration_data */ +} dasd_information_t; + +#define DASD_IOCTL_LETTER 'D' +#define BIODASDINFO _IOR(DASD_IOCTL_LETTER,1,dasd_information_t) +#define BLKGETSIZE _IO(0x12,96) +#define BLKSSZGET _IO(0x12,104) +#define BLKGETSIZE64 _IOR(0x12,114,size_t) /* device size in bytes (u64 *arg)*/ +#define LV_COMPAT_CYL 0xFFFE + +/* + * Only compile this on S/390. Doesn't make any sense + * for other architectures. + */ + +static unsigned char EBCtoASC[256] = +{ +/* 0x00 NUL SOH STX ETX *SEL HT *RNL DEL */ + 0x00, 0x01, 0x02, 0x03, 0x07, 0x09, 0x07, 0x7F, +/* 0x08 -GE -SPS -RPT VT FF CR SO SI */ + 0x07, 0x07, 0x07, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F, +/* 0x10 DLE DC1 DC2 DC3 -RES -NL BS -POC + -ENP ->LF */ + 0x10, 0x11, 0x12, 0x13, 0x07, 0x0A, 0x08, 0x07, +/* 0x18 CAN EM -UBS -CU1 -IFS -IGS -IRS -ITB + -IUS */ + 0x18, 0x19, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, +/* 0x20 -DS -SOS FS -WUS -BYP LF ETB ESC + -INP */ + 0x07, 0x07, 0x1C, 0x07, 0x07, 0x0A, 0x17, 0x1B, +/* 0x28 -SA -SFE -SM -CSP -MFA ENQ ACK BEL + -SW */ + 0x07, 0x07, 0x07, 0x07, 0x07, 0x05, 0x06, 0x07, +/* 0x30 ---- ---- SYN -IR -PP -TRN -NBS EOT */ + 0x07, 0x07, 0x16, 0x07, 0x07, 0x07, 0x07, 0x04, +/* 0x38 -SBS -IT -RFF -CU3 DC4 NAK ---- SUB */ + 0x07, 0x07, 0x07, 0x07, 0x14, 0x15, 0x07, 0x1A, +/* 0x40 SP RSP ä ---- */ + 0x20, 0xFF, 0x83, 0x84, 0x85, 0xA0, 0x07, 0x86, +/* 0x48 . < ( + | */ + 0x87, 0xA4, 0x9B, 0x2E, 0x3C, 0x28, 0x2B, 0x7C, +/* 0x50 & ---- */ + 0x26, 0x82, 0x88, 0x89, 0x8A, 0xA1, 0x8C, 0x07, +/* 0x58 ß ! $ * ) ; */ + 0x8D, 0xE1, 0x21, 0x24, 0x2A, 0x29, 0x3B, 0xAA, +/* 0x60 - / ---- Ä ---- ---- ---- */ + 0x2D, 0x2F, 0x07, 0x8E, 0x07, 0x07, 0x07, 0x8F, +/* 0x68 ---- , % _ > ? */ + 0x80, 0xA5, 0x07, 0x2C, 0x25, 0x5F, 0x3E, 0x3F, +/* 0x70 --- ---- ---- ---- ---- ---- ---- */ + 0x07, 0x90, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, +/* 0x78 * ` : # @ ' = " */ + 0x70, 0x60, 0x3A, 0x23, 0x40, 0x27, 0x3D, 0x22, +/* 0x80 * a b c d e f g */ + 0x07, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, +/* 0x88 h i ---- ---- ---- */ + 0x68, 0x69, 0xAE, 0xAF, 0x07, 0x07, 0x07, 0xF1, +/* 0x90 ° j k l m n o p */ + 0xF8, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F, 0x70, +/* 0x98 q r ---- ---- */ + 0x71, 0x72, 0xA6, 0xA7, 0x91, 0x07, 0x92, 0x07, +/* 0xA0 ~ s t u v w x */ + 0xE6, 0x7E, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, +/* 0xA8 y z ---- ---- ---- ---- */ + 0x79, 0x7A, 0xAD, 0xAB, 0x07, 0x07, 0x07, 0x07, +/* 0xB0 ^ ---- § ---- */ + 0x5E, 0x9C, 0x9D, 0xFA, 0x07, 0x07, 0x07, 0xAC, +/* 0xB8 ---- [ ] ---- ---- ---- ---- */ + 0xAB, 0x07, 0x5B, 0x5D, 0x07, 0x07, 0x07, 0x07, +/* 0xC0 { A B C D E F G */ + 0x7B, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, +/* 0xC8 H I ---- ö ---- */ + 0x48, 0x49, 0x07, 0x93, 0x94, 0x95, 0xA2, 0x07, +/* 0xD0 } J K L M N O P */ + 0x7D, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F, 0x50, +/* 0xD8 Q R ---- ü */ + 0x51, 0x52, 0x07, 0x96, 0x81, 0x97, 0xA3, 0x98, +/* 0xE0 \ S T U V W X */ + 0x5C, 0xF6, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, +/* 0xE8 Y Z ---- Ö ---- ---- ---- */ + 0x59, 0x5A, 0xFD, 0x07, 0x99, 0x07, 0x07, 0x07, +/* 0xF0 0 1 2 3 4 5 6 7 */ + 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, +/* 0xF8 8 9 ---- ---- Ü ---- ---- ---- */ + 0x38, 0x39, 0x07, 0x07, 0x9A, 0x07, 0x07, 0x07 +}; + +static inline void +vtoc_ebcdic_dec (const char *source, char *target, int l) +{ + int i; + + for (i = 0; i < l; i++) + target[i]=(char)EBCtoASC[(unsigned char)(source[i])]; +} + +/* + * compute the block number from a + * cyl-cyl-head-head structure + */ +static inline uint64_t +cchh2blk (cchh_t *ptr, struct hd_geometry *geo) +{ + uint64_t cyl; + uint16_t head; + + /*decode cylinder and heads for large volumes */ + cyl = ptr->hh & 0xFFF0; + cyl <<= 12; + cyl |= ptr->cc; + head = ptr->hh & 0x000F; + return cyl * geo->heads * geo->sectors + + head * geo->sectors; +} + +/* + * compute the block number from a + * cyl-cyl-head-head-block structure + */ +static inline uint64_t +cchhb2blk (cchhb_t *ptr, struct hd_geometry *geo) +{ + uint64_t cyl; + uint16_t head; + + /*decode cylinder and heads for large volumes */ + cyl = ptr->hh & 0xFFF0; + cyl <<= 12; + cyl |= ptr->cc; + head = ptr->hh & 0x000F; + return cyl * geo->heads * geo->sectors + + head * geo->sectors + + ptr->b; +} + +#endif /* _DASD_H */ diff --git a/kpartx/del-part-nodes.rules b/kpartx/del-part-nodes.rules new file mode 100644 index 0000000..0ceecf5 --- /dev/null +++ b/kpartx/del-part-nodes.rules @@ -0,0 +1,33 @@ +# These rules can delete partitions devnodes for slave devices +# for certain aggregate devices such as multipath. +# This is desirable to avoid confusion and keep the number +# of device nodes and symlinks within limits. +# +# This is done only once on the first "add" or "change" event for +# any given device. +# +# To suppress this, use the kernel parameter "dont_del_part_nodes", +# or create an udev rule file that sets ENV{DONT_DEL_PART_NODES}="1". + +SUBSYSTEM!="block", GOTO="end_del_part_nodes" +KERNEL!="sd*|dasd*", GOTO="end_del_part_nodes" +ACTION!="add|change", GOTO="end_del_part_nodes" +ENV{DEVTYPE}=="partition", GOTO="end_del_part_nodes" + +IMPORT{cmdline}="dont_del_part_nodes" +ENV{dont_del_part_nodes}=="1", GOTO="end_del_part_nodes" +ENV{DONT_DEL_PART_NODES}=="1", GOTO="end_del_part_nodes" + +# dm-multipath +ENV{DM_MULTIPATH_DEVICE_PATH}=="1", GOTO="del_part_nodes" + +# Other aggregate device types can be added here. + +GOTO="end_del_part_nodes" + +LABEL="del_part_nodes" +IMPORT{db}="DM_DEL_PART_NODES" +ENV{DM_DEL_PART_NODES}!="1", ENV{DM_DEL_PART_NODES}="1", \ + RUN+="/usr/sbin/partx -d --nr 1-1024 $env{DEVNAME}" + +LABEL="end_del_part_nodes" diff --git a/kpartx/devmapper.c b/kpartx/devmapper.c new file mode 100644 index 0000000..86731ea --- /dev/null +++ b/kpartx/devmapper.c @@ -0,0 +1,741 @@ +/* + * Copyright (c) 2004, 2005 Christophe Varoqui + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include "devmapper.h" +#include "kpartx.h" + +#define _UUID_PREFIX "part" +#define UUID_PREFIX _UUID_PREFIX "%d-" +#define _UUID_PREFIX_LEN (sizeof(_UUID_PREFIX) - 1) +#define MAX_PREFIX_LEN (_UUID_PREFIX_LEN + 4) +#define PARAMS_SIZE 1024 + +int dm_prereq(char * str, uint32_t x, uint32_t y, uint32_t z) +{ + int r = 1; + struct dm_task *dmt; + struct dm_versions *target; + struct dm_versions *last_target; + + if (!(dmt = dm_task_create(DM_DEVICE_LIST_VERSIONS))) + return 1; + + dm_task_no_open_count(dmt); + + if (!dm_task_run(dmt)) + goto out; + + target = dm_task_get_versions(dmt); + + /* Fetch targets and print 'em */ + do { + last_target = target; + + if (!strncmp(str, target->name, strlen(str)) && + /* dummy prereq on multipath version */ + target->version[0] >= x && + target->version[1] >= y && + target->version[2] >= z + ) + r = 0; + + target = (void *) target + target->next; + } while (last_target != target); + +out: + dm_task_destroy(dmt); + return r; +} + +int dm_simplecmd(int task, const char *name, int no_flush, uint16_t udev_flags) +{ + int r = 0; + int udev_wait_flag = (task == DM_DEVICE_RESUME || + task == DM_DEVICE_REMOVE); +#ifdef LIBDM_API_COOKIE + uint32_t cookie = 0; +#endif + struct dm_task *dmt; + + if (!(dmt = dm_task_create(task))) + return 0; + + if (!dm_task_set_name(dmt, name)) + goto out; + + dm_task_no_open_count(dmt); + dm_task_skip_lockfs(dmt); + + if (no_flush) + dm_task_no_flush(dmt); + +#ifdef LIBDM_API_COOKIE + if (!udev_sync) + udev_flags |= DM_UDEV_DISABLE_LIBRARY_FALLBACK; + if (udev_wait_flag && !dm_task_set_cookie(dmt, &cookie, udev_flags)) + goto out; +#endif + r = dm_task_run(dmt); +#ifdef LIBDM_API_COOKIE + if (udev_wait_flag) + dm_udev_wait(cookie); +#endif +out: + dm_task_destroy(dmt); + return r; +} + +static void +strip_slash (char * device) +{ + char * p = device; + + while (*(p++) != 0x0) { + + if (*p == '/') + *p = '!'; + } +} + +static int format_partname(char *buf, size_t bufsiz, + const char *mapname, const char *delim, int part) +{ + if (safe_snprintf(buf, bufsiz, "%s%s%d", mapname, delim, part)) + return 0; + strip_slash(buf); + return 1; +} + +static char *make_prefixed_uuid(int part, const char *uuid) +{ + char *prefixed_uuid; + int len = MAX_PREFIX_LEN + strlen(uuid) + 1; + + prefixed_uuid = malloc(len); + if (!prefixed_uuid) { + fprintf(stderr, "cannot create prefixed uuid : %s\n", + strerror(errno)); + return NULL; + } + snprintf(prefixed_uuid, len, UUID_PREFIX "%s", part, uuid); + return prefixed_uuid; +} + +int dm_addmap(int task, const char *name, const char *target, + const char *params, uint64_t size, int ro, const char *uuid, + int part, mode_t mode, uid_t uid, gid_t gid) +{ + int r = 0; + struct dm_task *dmt; + char *prefixed_uuid = NULL; +#ifdef LIBDM_API_COOKIE + uint32_t cookie = 0; + uint16_t udev_flags = 0; +#endif + + if (!(dmt = dm_task_create (task))) + return 0; + + if (!dm_task_set_name (dmt, name)) + goto addout; + + if (!dm_task_add_target (dmt, 0, size, target, params)) + goto addout; + + if (ro && !dm_task_set_ro (dmt)) + goto addout; + + if (task == DM_DEVICE_CREATE && uuid) { + prefixed_uuid = make_prefixed_uuid(part, uuid); + if (prefixed_uuid == NULL) + goto addout; + if (!dm_task_set_uuid(dmt, prefixed_uuid)) + goto addout; + } + + if (!dm_task_set_mode(dmt, mode)) + goto addout; + if (!dm_task_set_uid(dmt, uid)) + goto addout; + if (!dm_task_set_gid(dmt, gid)) + goto addout; + + dm_task_no_open_count(dmt); + +#ifdef LIBDM_API_COOKIE + if (!udev_sync) + udev_flags = DM_UDEV_DISABLE_LIBRARY_FALLBACK; + if (task == DM_DEVICE_CREATE && + !dm_task_set_cookie(dmt, &cookie, udev_flags)) + goto addout; +#endif + r = dm_task_run (dmt); +#ifdef LIBDM_API_COOKIE + if (task == DM_DEVICE_CREATE) + dm_udev_wait(cookie); +#endif +addout: + dm_task_destroy (dmt); + free(prefixed_uuid); + + return r; +} + +static int dm_map_present(char *str, char **uuid) +{ + int r = 0; + struct dm_task *dmt; + const char *uuidtmp; + struct dm_info info; + + if (uuid) + *uuid = NULL; + + if (!(dmt = dm_task_create(DM_DEVICE_INFO))) + return 0; + + if (!dm_task_set_name(dmt, str)) + goto out; + + dm_task_no_open_count(dmt); + + if (!dm_task_run(dmt)) + goto out; + + if (!dm_task_get_info(dmt, &info)) + goto out; + + if (!info.exists) + goto out; + + r = 1; + if (uuid) { + uuidtmp = dm_task_get_uuid(dmt); + if (uuidtmp && strlen(uuidtmp)) + *uuid = strdup(uuidtmp); + } +out: + dm_task_destroy(dmt); + return r; +} + +static int dm_rename (const char *old, const char *new) +{ + int r = 0; + struct dm_task *dmt; + uint16_t udev_flags = DM_UDEV_DISABLE_LIBRARY_FALLBACK; + uint32_t cookie = 0; + + dmt = dm_task_create(DM_DEVICE_RENAME); + if (!dmt) + return r; + + if (!dm_task_set_name(dmt, old) || + !dm_task_set_newname(dmt, new) || + !dm_task_no_open_count(dmt) || + !dm_task_set_cookie(dmt, &cookie, udev_flags)) + goto out; + + r = dm_task_run(dmt); + dm_udev_wait(cookie); + +out: + dm_task_destroy(dmt); + return r; +} + +static char *dm_find_uuid(const char *uuid) +{ + struct dm_task *dmt; + char *name = NULL; + const char *tmp; + + if ((dmt = dm_task_create(DM_DEVICE_INFO)) == NULL) + return NULL; + + if (!dm_task_set_uuid(dmt, uuid) || + !dm_task_run(dmt)) + goto out; + + tmp = dm_task_get_name(dmt); + if (tmp != NULL && *tmp != '\0') + name = strdup(tmp); + +out: + dm_task_destroy(dmt); + return name; +} + +char * +dm_mapname(int major, int minor) +{ + struct dm_task *dmt; + char *mapname = NULL; + const char *map; + + if (!(dmt = dm_task_create(DM_DEVICE_INFO))) + return NULL; + + dm_task_no_open_count(dmt); + dm_task_set_major(dmt, major); + dm_task_set_minor(dmt, minor); + + if (!dm_task_run(dmt)) + goto out; + + map = dm_task_get_name(dmt); + if (map && strlen(map)) + mapname = strdup(map); + +out: + dm_task_destroy(dmt); + return mapname; +} + +/* + * dm_get_first_dep + * + * Return the device number of the first dependent device + * for a given target. + */ +dev_t dm_get_first_dep(char *devname) +{ + struct dm_task *dmt; + struct dm_deps *dm_deps; + dev_t ret = 0; + + if ((dmt = dm_task_create(DM_DEVICE_DEPS)) == NULL) { + return ret; + } + if (!dm_task_set_name(dmt, devname)) { + goto out; + } + if (!dm_task_run(dmt)) { + goto out; + } + if ((dm_deps = dm_task_get_deps(dmt)) == NULL) { + goto out; + } + if (dm_deps->count > 0) { + ret = dm_deps->device[0]; + } +out: + dm_task_destroy(dmt); + + return ret; +} + +char * +dm_mapuuid(const char *mapname) +{ + struct dm_task *dmt; + const char *tmp; + char *uuid = NULL; + + if (!(dmt = dm_task_create(DM_DEVICE_INFO))) + return NULL; + + if (!dm_task_set_name(dmt, mapname)) + goto out; + dm_task_no_open_count(dmt); + + if (!dm_task_run(dmt)) + goto out; + + tmp = dm_task_get_uuid(dmt); + if (tmp[0] != '\0') + uuid = strdup(tmp); +out: + dm_task_destroy(dmt); + return uuid; +} + +int +dm_devn (const char * mapname, unsigned int *major, unsigned int *minor) +{ + int r = 1; + struct dm_task *dmt; + struct dm_info info; + + if (!(dmt = dm_task_create(DM_DEVICE_INFO))) + return 1; + + if (!dm_task_set_name(dmt, mapname)) + goto out; + + if (!dm_task_run(dmt)) + goto out; + + if (!dm_task_get_info(dmt, &info) || info.exists == 0) + goto out; + + *major = info.major; + *minor = info.minor; + + r = 0; +out: + dm_task_destroy(dmt); + return r; +} + +static int +dm_get_map(const char *mapname, char * outparams) +{ + int r = 1; + struct dm_task *dmt; + uint64_t start, length; + char *target_type = NULL; + char *params = NULL; + + if (!(dmt = dm_task_create(DM_DEVICE_TABLE))) + return 1; + + if (!dm_task_set_name(dmt, mapname)) + goto out; + dm_task_no_open_count(dmt); + + if (!dm_task_run(dmt)) + goto out; + + /* Fetch 1st target */ + dm_get_next_target(dmt, NULL, &start, &length, + &target_type, ¶ms); + + if (snprintf(outparams, PARAMS_SIZE, "%s", params) <= PARAMS_SIZE) + r = 0; +out: + dm_task_destroy(dmt); + return r; +} + +static int +dm_get_opencount (const char * mapname) +{ + int r = -1; + struct dm_task *dmt; + struct dm_info info; + + if (!(dmt = dm_task_create(DM_DEVICE_INFO))) + return 0; + + if (!dm_task_set_name(dmt, mapname)) + goto out; + + if (!dm_task_run(dmt)) + goto out; + + if (!dm_task_get_info(dmt, &info)) + goto out; + + if (!info.exists) + goto out; + + r = info.open_count; +out: + dm_task_destroy(dmt); + return r; +} + +/* + * returns: + * 1 : match + * 0 : no match + * -1 : empty map + */ +static int +dm_type(const char * name, char * type) +{ + int r = 0; + struct dm_task *dmt; + uint64_t start, length; + char *target_type = NULL; + char *params; + + if (!(dmt = dm_task_create(DM_DEVICE_TABLE))) + return 0; + + if (!dm_task_set_name(dmt, name)) + goto out; + + dm_task_no_open_count(dmt); + + if (!dm_task_run(dmt)) + goto out; + + /* Fetch 1st target */ + if (dm_get_next_target(dmt, NULL, &start, &length, + &target_type, ¶ms) != NULL) + /* more than one target */ + r = -1; + else if (!target_type) + r = -1; + else if (!strcmp(target_type, type)) + r = 1; + +out: + dm_task_destroy(dmt); + return r; +} + +/* + * returns: + * 0 : if both uuids end with same suffix which starts with UUID_PREFIX + * 1 : otherwise + */ +int +dm_compare_uuid(const char *mapuuid, const char *partname) +{ + char *partuuid; + int r = 1; + + partuuid = dm_mapuuid(partname); + if (!partuuid) + return 1; + + if (!strncmp(partuuid, _UUID_PREFIX, _UUID_PREFIX_LEN)) { + char *p = partuuid + _UUID_PREFIX_LEN; + /* skip partition number */ + while (isdigit(*p)) + p++; + if (p != partuuid + _UUID_PREFIX_LEN && *p == '-' && + !strcmp(mapuuid, p + 1)) + r = 0; + } + free(partuuid); + return r; +} + +struct remove_data { + int verbose; +}; + +static int +do_foreach_partmaps (const char * mapname, const char *uuid, + dev_t devt, + int (*partmap_func)(const char *, void *), + void *data) +{ + struct dm_task *dmt; + struct dm_names *names; + struct remove_data *rd = data; + unsigned next = 0; + char params[PARAMS_SIZE]; + unsigned int major, minor; + char dev_t[32]; + int r = 1; + int is_dmdev = 1; + + if (!(dmt = dm_task_create(DM_DEVICE_LIST))) + return 1; + + dm_task_no_open_count(dmt); + + if (!dm_task_run(dmt)) + goto out; + + if (!(names = dm_task_get_names(dmt))) + goto out; + + if (!names->dev) { + r = 0; /* this is perfectly valid */ + goto out; + } + + if (dm_devn(mapname, &major, &minor) || + (major != major(devt) || minor != minor(devt))) + /* + * The latter could happen if a dm device "/dev/mapper/loop0" + * exits while kpartx is called on "/dev/loop0". + */ + is_dmdev = 0; + + sprintf(dev_t, "%d:%d", major(devt), minor(devt)); + do { + /* + * skip our devmap + */ + if (is_dmdev && !strcmp(names->name, mapname)) + goto next; + + /* + * skip if we cannot fetch the map table from the kernel + */ + if (dm_get_map(names->name, ¶ms[0])) + goto next; + + /* + * skip if the table does not map over the multipath map + */ + if (!strstr(params, dev_t)) + goto next; + + /* + * skip if devmap target is not "linear" + */ + if (dm_type(names->name, "linear") != 1) { + if (rd->verbose) + printf("%s: is not a linear target. Not removing\n", + names->name); + goto next; + } + + /* + * skip if uuids don't match + */ + if (uuid && dm_compare_uuid(uuid, names->name)) { + if (rd->verbose) + printf("%s: is not a kpartx partition. Not removing\n", + names->name); + goto next; + } + + if (partmap_func(names->name, data) != 0) + goto out; + next: + next = names->next; + names = (void *) names + next; + } while (next); + + r = 0; +out: + dm_task_destroy (dmt); + return r; +} + +static int +remove_partmap(const char *name, void *data) +{ + struct remove_data *rd = (struct remove_data *)data; + int r = 0; + + if (dm_get_opencount(name)) { + if (rd->verbose) + printf("%s is in use. Not removing", name); + return 1; + } + if (!dm_simplecmd(DM_DEVICE_REMOVE, name, 0, 0)) { + if (rd->verbose) + printf("%s: failed to remove\n", name); + r = 1; + } else if (rd->verbose) + printf("del devmap : %s\n", name); + return r; +} + +int +dm_remove_partmaps (char * mapname, char *uuid, dev_t devt, int verbose) +{ + struct remove_data rd = { verbose }; + return do_foreach_partmaps(mapname, uuid, devt, remove_partmap, &rd); +} + +int dm_find_part(const char *parent, const char *delim, int part, + const char *parent_uuid, + char *name, size_t namesiz, char **part_uuid, int verbose) +{ + int r; + char params[PARAMS_SIZE]; + char *tmp; + char *uuid; + unsigned int major, minor; + char dev_t[32]; + + if (!format_partname(name, namesiz, parent, delim, part)) { + if (verbose) + fprintf(stderr, "partname too small\n"); + return 0; + } + + r = dm_map_present(name, part_uuid); + if (r == 1 || parent_uuid == NULL || *parent_uuid == '\0') + return r; + + uuid = make_prefixed_uuid(part, parent_uuid); + if (!uuid) + return 0; + + tmp = dm_find_uuid(uuid); + if (tmp == NULL) + goto out; + + /* Sanity check on partition, see dm_foreach_partmaps */ + if (dm_type(tmp, "linear") != 1) + goto out; + + /* + * Try nondm uuid first. That way we avoid confusing + * a device name with a device mapper name. + */ + if (!nondm_parse_uuid(parent_uuid, &major, &minor) && + dm_devn(parent, &major, &minor)) + goto out; + snprintf(dev_t, sizeof(dev_t), "%d:%d", major, minor); + + if (dm_get_map(tmp, params)) + goto out; + + if (!strstr(params, dev_t)) + goto out; + + if (verbose) + fprintf(stderr, "found map %s for uuid %s, renaming to %s\n", + tmp, uuid, name); + + r = dm_rename(tmp, name); + if (r == 1) { + free(tmp); + *part_uuid = uuid; + return 1; + } + if (verbose) + fprintf(stderr, "renaming %s->%s failed\n", tmp, name); +out: + free(uuid); + free(tmp); + return r; +} + +char *nondm_create_uuid(dev_t devt) +{ +#define NONDM_UUID_BUFLEN (34 + sizeof(NONDM_UUID_PREFIX) + \ + sizeof(NONDM_UUID_SUFFIX)) + static char uuid_buf[NONDM_UUID_BUFLEN]; + snprintf(uuid_buf, sizeof(uuid_buf), "%s_%u:%u_%s", + NONDM_UUID_PREFIX, major(devt), minor(devt), + NONDM_UUID_SUFFIX); + uuid_buf[NONDM_UUID_BUFLEN-1] = '\0'; + return uuid_buf; +} + +int nondm_parse_uuid(const char *uuid, unsigned int *major, unsigned int *minor) +{ + const char *p; + char *e; + int ma, mi; + + if (strncmp(uuid, NONDM_UUID_PREFIX "_", sizeof(NONDM_UUID_PREFIX))) + return 0; + p = uuid + sizeof(NONDM_UUID_PREFIX); + ma = strtoul(p, &e, 10); + if (e == p || *e != ':') + return 0; + p = e + 1; + mi = strtoul(p, &e, 10); + if (e == p || *e != '_') + return 0; + p = e + 1; + if (strcmp(p, NONDM_UUID_SUFFIX)) + return 0; + + *major = ma; + *minor = mi; + return 1; +} diff --git a/kpartx/devmapper.h b/kpartx/devmapper.h new file mode 100644 index 0000000..701bdf6 --- /dev/null +++ b/kpartx/devmapper.h @@ -0,0 +1,39 @@ +#ifndef _KPARTX_DEVMAPPER_H +#define _KPARTX_DEVMAPPER_H + +#ifdef DM_SUBSYSTEM_UDEV_FLAG0 +#define MPATH_UDEV_RELOAD_FLAG DM_SUBSYSTEM_UDEV_FLAG0 +#else +#define MPATH_UDEV_RELOAD_FLAG 0 +#endif + +extern int udev_sync; + +int dm_prereq (char *, uint32_t, uint32_t, uint32_t); +int dm_simplecmd (int, const char *, int, uint16_t); +int dm_addmap (int, const char *, const char *, const char *, uint64_t, + int, const char *, int, mode_t, uid_t, gid_t); +char * dm_mapname(int major, int minor); +dev_t dm_get_first_dep(char *devname); +char * dm_mapuuid(const char *mapname); +int dm_devn (const char * mapname, unsigned int *major, unsigned int *minor); +int dm_remove_partmaps (char * mapname, char *uuid, dev_t devt, int verbose); +int dm_find_part(const char *parent, const char *delim, int part, + const char *parent_uuid, + char *name, size_t namesiz, char **part_uuid, int verbose); + +/* + * UUID format for partitions created on non-DM devices + * ${UUID_PREFIX}devnode_${MAJOR}:${MINOR}_${NONDM_UUID_SUFFIX}" + * where ${UUID_PREFIX} is "part${PARTNO}-" (see devmapper.c). + * + * The suffix should be sufficiently unique to avoid incidental conflicts; + * the value below is a base64-encoded random number. + * The UUID format shouldn't be changed between kpartx releases. + */ +#define NONDM_UUID_PREFIX "devnode" +#define NONDM_UUID_SUFFIX "Wh5pYvM" +char *nondm_create_uuid(dev_t devt); +int nondm_parse_uuid(const char *uuid, + unsigned int *major, unsigned int *minor); +#endif /* _KPARTX_DEVMAPPER_H */ diff --git a/kpartx/dm-parts.rules b/kpartx/dm-parts.rules new file mode 100644 index 0000000..b48b67c --- /dev/null +++ b/kpartx/dm-parts.rules @@ -0,0 +1,39 @@ +# Rules for partitions created by kpartx + +KERNEL!="dm-*", GOTO="dm_parts_end" +ACTION!="add|change", GOTO="dm_parts_end" +ENV{DM_UUID}!="part[0-9]*", GOTO="dm_parts_end" + +# We must take care that symlinks don't get lost, +# even if blkid fails in 13-dm-disk.rules later. +# +# Fixme: we have currently no way to avoid calling blkid on +# partitions of broken mpath maps such as DM_NOSCAN. +# But when partition devices appear, kpartx has likely read +# the partition table shortly before, so odds are not bad +# that blkid will also succeed. + +IMPORT{db}="ID_FS_USAGE" +IMPORT{db}="ID_FS_UUID_ENC" +IMPORT{db}="ID_FS_LABEL_ENC" +IMPORT{db}="ID_PART_ENTRY_NAME" +IMPORT{db}="ID_PART_ENTRY_UUID" +IMPORT{db}="ID_PART_ENTRY_SCHEME" + +# Maps should take precedence over their members. +ENV{DM_UDEV_LOW_PRIORITY_FLAG}!="1", OPTIONS+="link_priority=50" + +# Set some additional symlinks that typically exist for mpath +# path members, too, and should be overridden. +# +# kpartx_id is very robust, it works for suspended maps and maps +# with 0 dependencies. It sets DM_TYPE, DM_PART, DM_WWN +IMPORT{program}=="kpartx_id %M %m $env{DM_UUID}" + +# DM_TYPE only has a reasonable value for partitions on multipath. +ENV{DM_UUID}=="*-mpath-*", ENV{DM_TYPE}=="?*", ENV{DM_SERIAL}=="?*" \ + SYMLINK+="disk/by-id/$env{DM_TYPE}-$env{DM_SERIAL}-part$env{DM_PART}" +ENV{DM_WWN}=="?*", ENV{DM_PART}=="?*", \ + SYMLINK+="disk/by-id/wwn-$env{DM_WWN}-part$env{DM_PART}" + +LABEL="dm_parts_end" diff --git a/kpartx/dos.c b/kpartx/dos.c new file mode 100644 index 0000000..0c70669 --- /dev/null +++ b/kpartx/dos.c @@ -0,0 +1,114 @@ +/* + * Source: copy of util-linux' partx dos.c + * + * Copyrights of the original file apply + * Copyright (c) 2005 Bastian Blank + */ +#include "kpartx.h" +#include "byteorder.h" +#include +#include +#include "dos.h" + +static int +is_extended(int type) { + return (type == 5 || type == 0xf || type == 0x85); +} + +static int +read_extended_partition(int fd, struct partition *ep, int en, + struct slice *sp, int ns) +{ + struct partition p; + unsigned long start, here, next; + unsigned char *bp; + int loopct = 0; + int moretodo = 1; + int i, n=0; + + int sector_size_mul = get_sector_size(fd)/512; + + next = start = sector_size_mul * le32_to_cpu(ep->start_sect); + + while (moretodo) { + here = next; + moretodo = 0; + if (++loopct > 100) + return n; + + bp = (unsigned char *)getblock(fd, here); + if (bp == NULL) + return n; + + if (bp[510] != 0x55 || bp[511] != 0xaa) + return n; + + for (i=0; i<2; i++) { + memcpy(&p, bp + 0x1be + i * sizeof (p), sizeof (p)); + if (is_extended(p.sys_type)) { + if (p.start_sect && p.nr_sects && !moretodo) { + next = start + sector_size_mul * le32_to_cpu(p.start_sect); + moretodo = 1; + } + continue; + } + if (n < ns) { + sp[n].start = here + sector_size_mul * le32_to_cpu(p.start_sect); + sp[n].size = sector_size_mul * le32_to_cpu(p.nr_sects); + sp[n].container = en + 1; + n++; + } else { + fprintf(stderr, + "dos_extd_partition: too many slices\n"); + return n; + } + loopct = 0; + } + } + return n; +} + +static int +is_gpt(int type) { + return (type == 0xEE); +} + +int +read_dos_pt(int fd, struct slice all, struct slice *sp, unsigned int ns) { + struct partition p; + unsigned long offset = all.start; + unsigned int i, n=4; + unsigned char *bp; + uint64_t sector_size_mul = get_sector_size(fd)/512; + + bp = (unsigned char *)getblock(fd, offset); + if (bp == NULL) + return -1; + + if (bp[510] != 0x55 || bp[511] != 0xaa) + return -1; + + for (i=0; i<4; i++) { + memcpy(&p, bp + 0x1be + i * sizeof (p), sizeof (p)); + if (is_gpt(p.sys_type)) + return 0; + if (i < ns) { + sp[i].start = sector_size_mul * le32_to_cpu(p.start_sect); + sp[i].size = sector_size_mul * le32_to_cpu(p.nr_sects); + } else { + fprintf(stderr, + "dos_partition: too many slices\n"); + break; + } + if (is_extended(p.sys_type)) { + /* extended partitions only get one or + two sectors mapped for LILO to install, + whichever is needed to have 1kb of space */ + if (sector_size_mul == 1) + sp[i].size = 2; + else sp[i].size = sector_size_mul; + n += read_extended_partition(fd, &p, i, sp+n, ns-n); + } + } + return n; +} diff --git a/kpartx/dos.h b/kpartx/dos.h new file mode 100644 index 0000000..f45e7f6 --- /dev/null +++ b/kpartx/dos.h @@ -0,0 +1,13 @@ +#ifndef DOS_H_INCLUDED +#define DOS_H_INCLUDED + +struct partition { + unsigned char boot_ind; /* 0x80 - active */ + unsigned char bh, bs, bc; + unsigned char sys_type; + unsigned char eh, es, ec; + unsigned int start_sect; + unsigned int nr_sects; +} __attribute__((packed)); + +#endif /* DOS_H_INCLUDED */ diff --git a/kpartx/efi.h b/kpartx/efi.h new file mode 100644 index 0000000..af5660a --- /dev/null +++ b/kpartx/efi.h @@ -0,0 +1,57 @@ +/* + efi.[ch] - Manipulates EFI variables as exported in /proc/efi/vars + + Copyright (C) 2001 Dell Computer Corporation + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . + */ + +#ifndef EFI_H +#define EFI_H + +/* + * Extensible Firmware Interface + * Based on 'Extensible Firmware Interface Specification' + * version 1.02, 12 December, 2000 + */ +#include +#include + +typedef struct { + uint8_t b[16]; +} efi_guid_t; + +#define EFI_GUID(a,b,c,d0,d1,d2,d3,d4,d5,d6,d7) \ +((efi_guid_t) \ +{{ (a) & 0xff, ((a) >> 8) & 0xff, ((a) >> 16) & 0xff, ((a) >> 24) & 0xff, \ + (b) & 0xff, ((b) >> 8) & 0xff, \ + (c) & 0xff, ((c) >> 8) & 0xff, \ + (d0), (d1), (d2), (d3), (d4), (d5), (d6), (d7) }}) + + +/****************************************************** + * GUIDs + ******************************************************/ +#define NULL_GUID \ +EFI_GUID( 0x00000000, 0x0000, 0x0000, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00) + +static inline int +efi_guidcmp(efi_guid_t left, efi_guid_t right) +{ + return memcmp(&left, &right, sizeof (efi_guid_t)); +} + +typedef uint16_t efi_char16_t; /* UNICODE character */ + +#endif /* EFI_H */ diff --git a/kpartx/gpt.c b/kpartx/gpt.c new file mode 100644 index 0000000..785b34e --- /dev/null +++ b/kpartx/gpt.c @@ -0,0 +1,640 @@ +/* + gpt.[ch] + + Copyright (C) 2000-2001 Dell Computer Corporation + + EFI GUID Partition Table handling + Per Intel EFI Specification v1.02 + http://developer.intel.com/technology/efi/efi.htm + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . +*/ + +#define _FILE_OFFSET_BITS 64 + +#include "gpt.h" +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "crc32.h" +#include "kpartx.h" + +#if BYTE_ORDER == LITTLE_ENDIAN +# define __le16_to_cpu(x) (uint16_t)(x) +# define __le32_to_cpu(x) (uint32_t)(x) +# define __le64_to_cpu(x) (uint64_t)(x) +# define __cpu_to_le32(x) (x) +#elif BYTE_ORDER == BIG_ENDIAN +# define __le16_to_cpu(x) bswap_16(x) +# define __le32_to_cpu(x) bswap_32(x) +# define __le64_to_cpu(x) bswap_64(x) +# define __cpu_to_le32(x) bswap_32(x) +#endif + +#ifndef BLKGETLASTSECT +#define BLKGETLASTSECT _IO(0x12,108) /* get last sector of block device */ +#endif +#ifndef BLKGETSIZE +#define BLKGETSIZE _IO(0x12,96) /* return device size */ +#endif +#ifndef BLKSSZGET +#define BLKSSZGET _IO(0x12,104) /* get block device sector size */ +#endif +#ifndef BLKGETSIZE64 +#define BLKGETSIZE64 _IOR(0x12,114,sizeof(uint64_t)) /* return device size in bytes (u64 *arg) */ +#endif + +struct blkdev_ioctl_param { + unsigned int block; + size_t content_length; + char * block_contents; +}; + +/** + * efi_crc32() - EFI version of crc32 function + * @buf: buffer to calculate crc32 of + * @len - length of buf + * + * Description: Returns EFI-style CRC32 value for @buf + * + * This function uses the little endian Ethernet polynomial + * but seeds the function with ~0, and xor's with ~0 at the end. + * Note, the EFI Specification, v1.02, has a reference to + * Dr. Dobbs Journal, May 1994 (actually it's in May 1992). + */ +static inline uint32_t +efi_crc32(const void *buf, unsigned long len) +{ + return (crc32(~0L, buf, len) ^ ~0L); +} + +/** + * is_pmbr_valid(): test Protective MBR for validity + * @mbr: pointer to a legacy mbr structure + * + * Description: Returns 1 if PMBR is valid, 0 otherwise. + * Validity depends on two things: + * 1) MSDOS signature is in the last two bytes of the MBR + * 2) One partition of type 0xEE is found + */ +static int +is_pmbr_valid(legacy_mbr *mbr) +{ + int i, found = 0, signature = 0; + if (!mbr) + return 0; + signature = (__le16_to_cpu(mbr->signature) == MSDOS_MBR_SIGNATURE); + for (i = 0; signature && i < 4; i++) { + if (mbr->partition[i].sys_type == + EFI_PMBR_OSTYPE_EFI_GPT) { + found = 1; + break; + } + } + return (signature && found); +} + + +/************************************************************ + * _get_num_sectors + * Requires: + * - filedes is an open file descriptor, suitable for reading + * Modifies: nothing + * Returns: + * Last LBA value on success + * 0 on error + * + * Try getting BLKGETSIZE64 and BLKSSZGET first, + * then BLKGETSIZE if necessary. + * Kernels 2.4.15-2.4.18 and 2.5.0-2.5.3 have a broken BLKGETSIZE64 + * which returns the number of 512-byte sectors, not the size of + * the disk in bytes. Fixed in kernels 2.4.18-pre8 and 2.5.4-pre3. + ************************************************************/ +static uint64_t +_get_num_sectors(int filedes) +{ + int rc; + uint64_t bytes=0; + + rc = ioctl(filedes, BLKGETSIZE64, &bytes); + if (!rc) + return bytes / get_sector_size(filedes); + + return 0; +} + +/************************************************************ + * last_lba(): return number of last logical block of device + * + * @fd + * + * Description: returns Last LBA value on success, 0 on error. + * Notes: The value st_blocks gives the size of the file + * in 512-byte blocks, which is OK if + * EFI_BLOCK_SIZE_SHIFT == 9. + ************************************************************/ + +static uint64_t +last_lba(int filedes) +{ + int rc; + uint64_t sectors = 0; + struct stat s; + memset(&s, 0, sizeof (s)); + rc = fstat(filedes, &s); + if (rc == -1) { + fprintf(stderr, "last_lba() could not stat: %s\n", + strerror(errno)); + return 0; + } + + if (S_ISBLK(s.st_mode)) { + sectors = _get_num_sectors(filedes); + } else { + fprintf(stderr, + "last_lba(): I don't know how to handle files with mode %x\n", + s.st_mode); + sectors = 1; + } + + return sectors ? sectors - 1 : 0; +} + + +static ssize_t +read_lastoddsector(int fd, void *buffer, size_t count) +{ + int rc; + struct blkdev_ioctl_param ioctl_param; + + if (!buffer) return 0; + + ioctl_param.block = 0; /* read the last sector */ + ioctl_param.content_length = count; + ioctl_param.block_contents = buffer; + + rc = ioctl(fd, BLKGETLASTSECT, &ioctl_param); + if (rc == -1) perror("read failed"); + + return !rc; +} + +static ssize_t +read_lba(int fd, uint64_t lba, void *buffer, size_t bytes) +{ + int sector_size = get_sector_size(fd); + off_t offset = lba * sector_size; + uint64_t lastlba; + ssize_t bytesread; + + if (lseek(fd, offset, SEEK_SET) < 0) + return 0; + bytesread = read(fd, buffer, bytes); + + lastlba = last_lba(fd); + if (!lastlba) + return bytesread; + + /* Kludge. This is necessary to read/write the last + block of an odd-sized disk, until Linux 2.5.x kernel fixes. + This is only used by gpt.c, and only to read + one sector, so we don't have to be fancy. + */ + if (!bytesread && !(lastlba & 1) && lba == lastlba) { + bytesread = read_lastoddsector(fd, buffer, bytes); + } + return bytesread; +} + +/** + * alloc_read_gpt_entries(): reads partition entries from disk + * @fd is an open file descriptor to the whole disk + * @gpt is a buffer into which the GPT will be put + * Description: Returns ptes on success, NULL on error. + * Allocates space for PTEs based on information found in @gpt. + * Notes: remember to free pte when you're done! + */ +static gpt_entry * +alloc_read_gpt_entries(int fd, gpt_header * gpt) +{ + gpt_entry *pte; + size_t count = __le32_to_cpu(gpt->num_partition_entries) * + __le32_to_cpu(gpt->sizeof_partition_entry); + + if (!count) return NULL; + + pte = (gpt_entry *)malloc(count); + if (!pte) + return NULL; + memset(pte, 0, count); + + if (!read_lba(fd, __le64_to_cpu(gpt->partition_entry_lba), pte, + count)) { + free(pte); + return NULL; + } + return pte; +} + +/** + * alloc_read_gpt_header(): Allocates GPT header, reads into it from disk + * @fd is an open file descriptor to the whole disk + * @lba is the Logical Block Address of the partition table + * + * Description: returns GPT header on success, NULL on error. Allocates + * and fills a GPT header starting at @ from @bdev. + * Note: remember to free gpt when finished with it. + */ +static gpt_header * +alloc_read_gpt_header(int fd, uint64_t lba) +{ + gpt_header *gpt; + gpt = (gpt_header *) + malloc(sizeof (gpt_header)); + if (!gpt) + return NULL; + memset(gpt, 0, sizeof (*gpt)); + if (!read_lba(fd, lba, gpt, sizeof (gpt_header))) { + free(gpt); + return NULL; + } + + return gpt; +} + +/** + * is_gpt_valid() - tests one GPT header and PTEs for validity + * @fd is an open file descriptor to the whole disk + * @lba is the logical block address of the GPT header to test + * @gpt is a GPT header ptr, filled on return. + * @ptes is a PTEs ptr, filled on return. + * + * Description: returns 1 if valid, 0 on error. + * If valid, returns pointers to newly allocated GPT header and PTEs. + */ +static int +is_gpt_valid(int fd, uint64_t lba, + gpt_header ** gpt, gpt_entry ** ptes) +{ + int rc = 0; /* default to not valid */ + uint32_t crc, origcrc; + + if (!gpt || !ptes) + return 0; + if (!(*gpt = alloc_read_gpt_header(fd, lba))) + return 0; + + /* Check the GUID Partition Table signature */ + if (__le64_to_cpu((*gpt)->signature) != GPT_HEADER_SIGNATURE) { + /* + printf("GUID Partition Table Header signature is wrong: %" PRIx64" != %" PRIx64 "\n", + __le64_to_cpu((*gpt)->signature), GUID_PT_HEADER_SIGNATURE); + */ + free(*gpt); + *gpt = NULL; + return rc; + } + + /* Check the GUID Partition Table Header CRC */ + origcrc = __le32_to_cpu((*gpt)->header_crc32); + (*gpt)->header_crc32 = 0; + crc = efi_crc32(*gpt, __le32_to_cpu((*gpt)->header_size)); + if (crc != origcrc) { + // printf( "GPTH CRC check failed, %x != %x.\n", origcrc, crc); + (*gpt)->header_crc32 = __cpu_to_le32(origcrc); + free(*gpt); + *gpt = NULL; + return 0; + } + (*gpt)->header_crc32 = __cpu_to_le32(origcrc); + + /* Check that the my_lba entry points to the LBA + * that contains the GPT we read */ + if (__le64_to_cpu((*gpt)->my_lba) != lba) { + /* + printf( "my_lba % PRIx64 "x != lba %"PRIx64 "x.\n", + __le64_to_cpu((*gpt)->my_lba), lba); + */ + free(*gpt); + *gpt = NULL; + return 0; + } + + /* Check that sizeof_partition_entry has the correct value */ + if (__le32_to_cpu((*gpt)->sizeof_partition_entry) != sizeof(gpt_entry)) { + // printf("GUID partition entry size check failed.\n"); + free(*gpt); + *gpt = NULL; + return 0; + } + + if (!(*ptes = alloc_read_gpt_entries(fd, *gpt))) { + free(*gpt); + *gpt = NULL; + return 0; + } + + /* Check the GUID Partition Entry Array CRC */ + crc = efi_crc32(*ptes, + __le32_to_cpu((*gpt)->num_partition_entries) * + __le32_to_cpu((*gpt)->sizeof_partition_entry)); + if (crc != __le32_to_cpu((*gpt)->partition_entry_array_crc32)) { + // printf("GUID Partitition Entry Array CRC check failed.\n"); + free(*gpt); + *gpt = NULL; + free(*ptes); + *ptes = NULL; + return 0; + } + + /* We're done, all's well */ + return 1; +} +/** + * compare_gpts() - Search disk for valid GPT headers and PTEs + * @pgpt is the primary GPT header + * @agpt is the alternate GPT header + * @lastlba is the last LBA number + * Description: Returns nothing. Sanity checks pgpt and agpt fields + * and prints warnings on discrepancies. + * + */ +static void +compare_gpts(gpt_header *pgpt, gpt_header *agpt, uint64_t lastlba) +{ + int error_found = 0; + if (!pgpt || !agpt) + return; + if (__le64_to_cpu(pgpt->my_lba) != __le64_to_cpu(agpt->alternate_lba)) { + error_found++; + fprintf(stderr, + "GPT:Primary header LBA != Alt. header alternate_lba\n"); +#ifdef DEBUG + fprintf(stderr, "GPT:%" PRIx64 " != %" PRIx64 "\n", + __le64_to_cpu(pgpt->my_lba), + __le64_to_cpu(agpt->alternate_lba)); +#endif + } + if (__le64_to_cpu(pgpt->alternate_lba) != __le64_to_cpu(agpt->my_lba)) { + error_found++; + fprintf(stderr, + "GPT:Primary header alternate_lba != Alt. header my_lba\n"); +#ifdef DEBUG + fprintf(stderr, "GPT:%" PRIx64 " != %" PRIx64 "\n", + __le64_to_cpu(pgpt->alternate_lba), + __le64_to_cpu(agpt->my_lba)); +#endif + } + if (__le64_to_cpu(pgpt->first_usable_lba) != + __le64_to_cpu(agpt->first_usable_lba)) { + error_found++; + fprintf(stderr, "GPT:first_usable_lbas don't match.\n"); +#ifdef DEBUG + fprintf(stderr, "GPT:%" PRIx64 " != %" PRIx64 "\n", + __le64_to_cpu(pgpt->first_usable_lba), + __le64_to_cpu(agpt->first_usable_lba)); +#endif + } + if (__le64_to_cpu(pgpt->last_usable_lba) != + __le64_to_cpu(agpt->last_usable_lba)) { + error_found++; + fprintf(stderr, "GPT:last_usable_lbas don't match.\n"); +#ifdef DEBUG + fprintf(stderr, "GPT:%" PRIx64 " != %" PRIx64 "\n", + __le64_to_cpu(pgpt->last_usable_lba), + __le64_to_cpu(agpt->last_usable_lba)); +#endif + } + if (efi_guidcmp(pgpt->disk_guid, agpt->disk_guid)) { + error_found++; + fprintf(stderr, "GPT:disk_guids don't match.\n"); + } + if (__le32_to_cpu(pgpt->num_partition_entries) != + __le32_to_cpu(agpt->num_partition_entries)) { + error_found++; + fprintf(stderr, "GPT:num_partition_entries don't match: " + "0x%x != 0x%x\n", + __le32_to_cpu(pgpt->num_partition_entries), + __le32_to_cpu(agpt->num_partition_entries)); + } + if (__le32_to_cpu(pgpt->sizeof_partition_entry) != + __le32_to_cpu(agpt->sizeof_partition_entry)) { + error_found++; + fprintf(stderr, + "GPT:sizeof_partition_entry values don't match: " + "0x%x != 0x%x\n", + __le32_to_cpu(pgpt->sizeof_partition_entry), + __le32_to_cpu(agpt->sizeof_partition_entry)); + } + if (__le32_to_cpu(pgpt->partition_entry_array_crc32) != + __le32_to_cpu(agpt->partition_entry_array_crc32)) { + error_found++; + fprintf(stderr, + "GPT:partition_entry_array_crc32 values don't match: " + "0x%x != 0x%x\n", + __le32_to_cpu(pgpt->partition_entry_array_crc32), + __le32_to_cpu(agpt->partition_entry_array_crc32)); + } + if (__le64_to_cpu(pgpt->alternate_lba) != lastlba) { + error_found++; + fprintf(stderr, + "GPT:Primary header thinks Alt. header is not at the end of the disk.\n"); +#ifdef DEBUG + fprintf(stderr, "GPT:%" PRIx64 " != %" PRIx64 "\n", + __le64_to_cpu(pgpt->alternate_lba), lastlba); +#endif + } + + if (__le64_to_cpu(agpt->my_lba) != lastlba) { + error_found++; + fprintf(stderr, + "GPT:Alternate GPT header not at the end of the disk.\n"); +#ifdef DEBUG + fprintf(stderr, "GPT:%" PRIx64 " != %" PRIx64 "\n", + __le64_to_cpu(agpt->my_lba), lastlba); +#endif + } + + if (error_found) + fprintf(stderr, + "GPT: Use GNU Parted to correct GPT errors.\n"); + return; +} + +/** + * find_valid_gpt() - Search disk for valid GPT headers and PTEs + * @fd is an open file descriptor to the whole disk + * @gpt is a GPT header ptr, filled on return. + * @ptes is a PTEs ptr, filled on return. + * Description: Returns 1 if valid, 0 on error. + * If valid, returns pointers to newly allocated GPT header and PTEs. + * Validity depends on finding either the Primary GPT header and PTEs valid, + * or the Alternate GPT header and PTEs valid, and the PMBR valid. + */ +static int +find_valid_gpt(int fd, gpt_header ** gpt, gpt_entry ** ptes) +{ + int good_pgpt = 0, good_agpt = 0, good_pmbr = 0; + gpt_header *pgpt = NULL, *agpt = NULL; + gpt_entry *pptes = NULL, *aptes = NULL; + legacy_mbr *legacymbr = NULL; + uint64_t lastlba; + if (!gpt || !ptes) + return 0; + + if (!(lastlba = last_lba(fd))) + return 0; + good_pgpt = is_gpt_valid(fd, GPT_PRIMARY_PARTITION_TABLE_LBA, + &pgpt, &pptes); + if (good_pgpt) { + good_agpt = is_gpt_valid(fd, + __le64_to_cpu(pgpt->alternate_lba), + &agpt, &aptes); + if (!good_agpt) { + good_agpt = is_gpt_valid(fd, lastlba, + &agpt, &aptes); + } + } + else { + good_agpt = is_gpt_valid(fd, lastlba, + &agpt, &aptes); + } + + /* The obviously unsuccessful case */ + if (!good_pgpt && !good_agpt) { + goto fail; + } + + /* This will be added to the EFI Spec. per Intel after v1.02. */ + legacymbr = malloc(sizeof (*legacymbr)); + if (legacymbr) { + memset(legacymbr, 0, sizeof (*legacymbr)); + read_lba(fd, 0, (uint8_t *) legacymbr, + sizeof (*legacymbr)); + good_pmbr = is_pmbr_valid(legacymbr); + free(legacymbr); + legacymbr=NULL; + } + + /* Failure due to bad PMBR */ + if ((good_pgpt || good_agpt) && !good_pmbr && !force_gpt) { + fprintf(stderr, + " Warning: Disk has a valid GPT signature " + "but invalid PMBR.\n" + " Assuming this disk is *not* a GPT disk anymore.\n" + " Use gpt kernel option to override. " + "Use GNU Parted to correct disk.\n"); + goto fail; + } + + /* Would fail due to bad PMBR, but force GPT anyhow */ + if ((good_pgpt || good_agpt) && !good_pmbr && force_gpt) { + fprintf(stderr, + " Warning: Disk has a valid GPT signature but " + "invalid PMBR.\n" + " Use GNU Parted to correct disk.\n" + " gpt option taken, disk treated as GPT.\n"); + } + + compare_gpts(pgpt, agpt, lastlba); + + /* The good cases */ + if (good_pgpt && (good_pmbr || force_gpt)) { + *gpt = pgpt; + *ptes = pptes; + if (agpt) { free(agpt); agpt = NULL; } + if (aptes) { free(aptes); aptes = NULL; } + if (!good_agpt) { + fprintf(stderr, + "Alternate GPT is invalid, " + "using primary GPT.\n"); + } + return 1; + } + else if (good_agpt && (good_pmbr || force_gpt)) { + *gpt = agpt; + *ptes = aptes; + if (pgpt) { free(pgpt); pgpt = NULL; } + if (pptes) { free(pptes); pptes = NULL; } + fprintf(stderr, + "Primary GPT is invalid, using alternate GPT.\n"); + return 1; + } + +fail: + if (pgpt) { free(pgpt); pgpt=NULL; } + if (agpt) { free(agpt); agpt=NULL; } + if (pptes) { free(pptes); pptes=NULL; } + if (aptes) { free(aptes); aptes=NULL; } + *gpt = NULL; + *ptes = NULL; + return 0; +} + +/** + * read_gpt_pt() + * @fd + * @all - slice with start/size of whole disk + * + * 0 if this isn't our partition table + * number of partitions if successful + * + */ +int +read_gpt_pt (int fd, __attribute__((unused)) struct slice all, + struct slice *sp, unsigned int ns) +{ + gpt_header *gpt = NULL; + gpt_entry *ptes = NULL; + unsigned int i; + int n = 0; + int last_used_index=-1; + int sector_size_mul = get_sector_size(fd)/512; + + if (!find_valid_gpt (fd, &gpt, &ptes) || !gpt || !ptes) { + if (gpt) + free (gpt); + if (ptes) + free (ptes); + return 0; + } + + for (i = 0; i < __le32_to_cpu(gpt->num_partition_entries) && i < ns; i++) { + if (!efi_guidcmp (NULL_GUID, ptes[i].partition_type_guid)) { + sp[n].start = 0; + sp[n].size = 0; + n++; + } else { + sp[n].start = sector_size_mul * + __le64_to_cpu(ptes[i].starting_lba); + sp[n].size = sector_size_mul * + (__le64_to_cpu(ptes[i].ending_lba) - + __le64_to_cpu(ptes[i].starting_lba) + 1); + last_used_index=n; + n++; + } + } + free (ptes); + free (gpt); + return last_used_index+1; +} diff --git a/kpartx/gpt.h b/kpartx/gpt.h new file mode 100644 index 0000000..4e1b49a --- /dev/null +++ b/kpartx/gpt.h @@ -0,0 +1,111 @@ +/* + gpt.[ch] + + Copyright (C) 2000-2001 Dell Computer Corporation + + EFI GUID Partition Table handling + Per Intel EFI Specification v1.02 + http://developer.intel.com/technology/efi/efi.htm + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . +*/ + +#ifndef _GPT_H +#define _GPT_H + + +#include +#include "kpartx.h" +#include "dos.h" +#include "efi.h" + +#define EFI_PMBR_OSTYPE_EFI 0xEF +#define EFI_PMBR_OSTYPE_EFI_GPT 0xEE +#define MSDOS_MBR_SIGNATURE 0xaa55 +#define GPT_BLOCK_SIZE 512 + +#define GPT_HEADER_SIGNATURE 0x5452415020494645ULL +#define GPT_HEADER_REVISION_V1_02 0x00010200 +#define GPT_HEADER_REVISION_V1_00 0x00010000 +#define GPT_HEADER_REVISION_V0_99 0x00009900 +#define GPT_PRIMARY_PARTITION_TABLE_LBA 1 + +typedef struct _gpt_header { + uint64_t signature; + uint32_t revision; + uint32_t header_size; + uint32_t header_crc32; + uint32_t reserved1; + uint64_t my_lba; + uint64_t alternate_lba; + uint64_t first_usable_lba; + uint64_t last_usable_lba; + efi_guid_t disk_guid; + uint64_t partition_entry_lba; + uint32_t num_partition_entries; + uint32_t sizeof_partition_entry; + uint32_t partition_entry_array_crc32; + uint8_t reserved2[GPT_BLOCK_SIZE - 92]; +} __attribute__ ((packed)) gpt_header; + +typedef struct _gpt_entry_attributes { + uint64_t required_to_function:1; + uint64_t reserved:47; + uint64_t type_guid_specific:16; +} __attribute__ ((packed)) gpt_entry_attributes; + +typedef struct _gpt_entry { + efi_guid_t partition_type_guid; + efi_guid_t unique_partition_guid; + uint64_t starting_lba; + uint64_t ending_lba; + gpt_entry_attributes attributes; + efi_char16_t partition_name[72 / sizeof(efi_char16_t)]; +} __attribute__ ((packed)) gpt_entry; + + +/* + These values are only defaults. The actual on-disk structures + may define different sizes, so use those unless creating a new GPT disk! +*/ + +#define GPT_DEFAULT_RESERVED_PARTITION_ENTRY_ARRAY_SIZE 16384 +/* + Number of actual partition entries should be calculated + as: +*/ +#define GPT_DEFAULT_RESERVED_PARTITION_ENTRIES \ + (GPT_DEFAULT_RESERVED_PARTITION_ENTRY_ARRAY_SIZE / \ + sizeof(gpt_entry)) + + +/* Protected Master Boot Record & Legacy MBR share same structure */ +/* Needs to be packed because the u16s force misalignment. */ + +typedef struct _legacy_mbr { + uint8_t bootcode[440]; + uint32_t unique_mbr_signature; + uint16_t unknown; + struct partition partition[4]; + uint16_t signature; +} __attribute__ ((packed)) legacy_mbr; + + +#define EFI_GPT_PRIMARY_PARTITION_TABLE_LBA 1 + +/* Functions */ +int read_gpt_pt (int fd, struct slice all, struct slice *sp, unsigned int ns); + + +#endif diff --git a/kpartx/kpartx.8 b/kpartx/kpartx.8 new file mode 100644 index 0000000..08bb349 --- /dev/null +++ b/kpartx/kpartx.8 @@ -0,0 +1,130 @@ +.\" ---------------------------------------------------------------------------- +.\" Update the date below if you make any significant change. +.\" Make sure there are no errors with: +.\" groff -z -wall -b -e -t kpartx/kpartx.8 +.\" +.\" ---------------------------------------------------------------------------- +. +.TH KPARTX 8 2016-10-28 "Linux" +. +. +.\" ---------------------------------------------------------------------------- +.SH NAME +.\" ---------------------------------------------------------------------------- +. +kpartx \- Create device maps from partition tables. +. +. +.\" ---------------------------------------------------------------------------- +.SH SYNOPSIS +.\" ---------------------------------------------------------------------------- +. +.B kpartx +.RB [\| \-a | \-d | \-u | \-l \|] +.RB [\| \-r \|] +.RB [\| \-p \|] +.RB [\| \-f \|] +.RB [\| \-g \|] +.RB [\| \-s | \-n \|] +.RB [\| \-v \|] +.B wholedisk +. +. +.\" ---------------------------------------------------------------------------- +.SH DESCRIPTION +.\" ---------------------------------------------------------------------------- +. +This tool, derived from util-linux' partx, reads partition tables on specified +device and create device maps over partitions segments detected. It is called +from hotplug upon device maps creation and deletion. +. +. +.\" ---------------------------------------------------------------------------- +.SH OPTIONS +.\" ---------------------------------------------------------------------------- +. +.TP +.B \-a +Add partition mappings. +. +.TP +.B \-d +Delete partition mappings. +. +.TP +.B \-u +Update partition mappings. +. +.TP +.B \-l +List partition mappings that would be added \-a. +. +.TP +.B \-r +Read-only partition mappings. +. +.TP +.B \-p +Set device name-partition number delimiter. +. +.TP +.B \-f +Force creation of mappings; overrides 'no_partitions' feature. +. +.TP +.B \-g +Force GUID partition table (GPT). +. +.TP +.B \-s +Sync mode (Default). Don't return until the partitions are created. +. +.TP +.B \-n +Nosync mode. Return before the partitions are created. +. +.TP +.B \-v +Operate verbosely. +. +. +.\" ---------------------------------------------------------------------------- +.SH EXAMPLE +.\" ---------------------------------------------------------------------------- +. +To mount all the partitions in a raw disk image: +.IP +kpartx \-av disk.img +.PP +This will output lines such as: +.IP +add map loop1p1 (254:4): 0 409597 linear 7:1 3 +.PP +The \fIloop1p1\fR is the name of a device file under \fI/dev/mapper\fR which you +can use to access the partition, for example to fsck it: +.IP +fsck /dev/mapper/loop1p1 +.PP +When you're done, you need to remove the devices: +.IP +kpartx \-d disk.img +. +. +.\" ---------------------------------------------------------------------------- +.SH "SEE ALSO" +.\" ---------------------------------------------------------------------------- +. +.BR multipath (8) +.BR multipathd (8) +.BR hotplug (8) +. +. +.\" ---------------------------------------------------------------------------- +.SH AUTHORS +.\" ---------------------------------------------------------------------------- +. +This man page was assembled By Patrick Caulfield for the Debian project. +.PP +\fImultipath-tools\fR was developed by Christophe Varoqui +and others. +.\" EOF diff --git a/kpartx/kpartx.c b/kpartx/kpartx.c new file mode 100644 index 0000000..d3620c5 --- /dev/null +++ b/kpartx/kpartx.c @@ -0,0 +1,748 @@ +/* + * Source: copy of util-linux' partx partx.c + * + * Copyrights of the original file applies + * Copyright (c) 2004, 2005 Christophe Varoqui + * Copyright (c) 2005 Kiyoshi Ueda + * Copyright (c) 2005 Lars Soltau + */ + +/* + * Given a block device and a partition table type, + * try to parse the partition table, and list the + * contents. Optionally add or remove partitions. + * + * Read wholedisk and add all partitions: + * kpartx [-a|-d|-l] [-v] wholedisk + * + * aeb, 2000-03-21 + * cva, 2002-10-26 + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "devmapper.h" +#include "crc32.h" +#include "lopart.h" +#include "kpartx.h" +#include "version.h" + +#define SIZE(a) (sizeof(a)/sizeof((a)[0])) + +#define READ_SIZE 1024 +#define MAXTYPES 64 +#define MAXSLICES 256 +#define DM_TARGET "linear" +#define LO_NAME_SIZE 64 +#define PARTNAME_SIZE 128 +#define DELIM_SIZE 8 + +struct slice slices[MAXSLICES]; + +enum action { LIST, ADD, DELETE, UPDATE }; + +struct pt { + char *type; + ptreader *fn; +} pts[MAXTYPES]; + +int ptct = 0; +int udev_sync = 1; + +static void +addpts(char *t, ptreader f) +{ + if (ptct >= MAXTYPES) { + fprintf(stderr, "addpts: too many types\n"); + exit(1); + } + pts[ptct].type = t; + pts[ptct].fn = f; + ptct++; +} + +static void +initpts(void) +{ + addpts("gpt", read_gpt_pt); + addpts("dos", read_dos_pt); + addpts("bsd", read_bsd_pt); + addpts("solaris", read_solaris_pt); + addpts("unixware", read_unixware_pt); + addpts("dasd", read_dasd_pt); + addpts("mac", read_mac_pt); + addpts("sun", read_sun_pt); + addpts("ps3", read_ps3_pt); +} + +static char short_opts[] = "rladfgvp:t:snu"; + +/* Used in gpt.c */ +int force_gpt=0; + +int force_devmap=0; + +static int +usage(void) { + printf(VERSION_STRING); + printf("Usage:\n"); + printf(" kpartx [-a|-d|-u|-l] [-r] [-p] [-f] [-g] [-s|-n] [-v] wholedisk\n"); + printf("\t-a add partition devmappings\n"); + printf("\t-r devmappings will be readonly\n"); + printf("\t-d del partition devmappings\n"); + printf("\t-u update partition devmappings\n"); + printf("\t-l list partitions devmappings that would be added by -a\n"); + printf("\t-p set device name-partition number delimiter\n"); + printf("\t-g force GUID partition table (GPT)\n"); + printf("\t-f force devmap create\n"); + printf("\t-v verbose\n"); + printf("\t-n nosync mode. Return before the partitions are created\n"); + printf("\t-s sync mode (Default). Don't return until the partitions are created\n"); + return 1; +} + +static void +set_delimiter (char * device, char * delimiter) +{ + char * p = device; + + if (*p == 0x0) + return; + + while (*(++p) != 0x0) + continue; + + if (isdigit(*(p - 1))) + *delimiter = 'p'; +} + +static int +find_devname_offset (char * device) +{ + char *p, *q; + + q = p = device; + + while (*p) { + if (*p == '/') + q = p + 1; + p++; + } + + return (int)(q - device); +} + +static char * +get_hotplug_device(void) +{ + unsigned int major, minor, off, len; + char *mapname; + char *devname = NULL; + char *device = NULL; + char *var = NULL; + struct stat buf; + + var = getenv("ACTION"); + + if (!var || strcmp(var, "add")) + return NULL; + + /* Get dm mapname for hotpluged device. */ + if (!(devname = getenv("DEVNAME"))) + return NULL; + + if (stat(devname, &buf)) + return NULL; + + major = major(buf.st_rdev); + minor = minor(buf.st_rdev); + + if (!(mapname = dm_mapname(major, minor))) /* Not dm device. */ + return NULL; + + off = find_devname_offset(devname); + len = strlen(mapname); + + /* Dirname + mapname + \0 */ + if (!(device = (char *)malloc(sizeof(char) * (off + len + 1)))) { + free(mapname); + return NULL; + } + + /* Create new device name. */ + snprintf(device, off + 1, "%s", devname); + snprintf(device + off, len + 1, "%s", mapname); + + if (strlen(device) != (off + len)) { + free(device); + free(mapname); + return NULL; + } + free(mapname); + return device; +} + +static int +check_uuid(char *uuid, char *part_uuid, char **err_msg) { + char *map_uuid = strchr(part_uuid, '-'); + if (!map_uuid || strncmp(part_uuid, "part", 4) != 0) { + *err_msg = "not a kpartx partition"; + return -1; + } + map_uuid++; + if (strcmp(uuid, map_uuid) != 0) { + *err_msg = "a partition of a different device"; + return -1; + } + return 0; +} + +int +main(int argc, char **argv){ + int i, j, m, n, op, off, arg, c, d, ro=0; + int fd = -1; + struct slice all; + struct pt *ptp; + enum action what = LIST; + char *type, *diskdevice, *device, *progname; + int verbose = 0; + char partname[PARTNAME_SIZE], params[PARTNAME_SIZE + 16]; + char * loopdev = NULL; + char * delim = NULL; + char *uuid = NULL; + char *mapname = NULL; + int hotplug = 0; + int loopcreated = 0; + struct stat buf; + + initpts(); + init_crc32(); + + type = device = diskdevice = NULL; + memset(&all, 0, sizeof(all)); + memset(&partname, 0, sizeof(partname)); + + /* Check whether hotplug mode. */ + progname = strrchr(argv[0], '/'); + + if (!progname) + progname = argv[0]; + else + progname++; + + if (!strcmp(progname, "kpartx.dev")) { /* Hotplug mode */ + hotplug = 1; + + /* Setup for original kpartx variables */ + if (!(device = get_hotplug_device())) + exit(1); + + diskdevice = device; + what = ADD; + } else if (argc < 2) { + usage(); + exit(1); + } + + while ((arg = getopt(argc, argv, short_opts)) != EOF) + switch(arg) { + case 'r': + ro=1; + break; + case 'f': + force_devmap=1; + break; + case 'g': + force_gpt=1; + break; + case 't': + type = optarg; + break; + case 'v': + verbose = 1; + break; + case 'p': + delim = optarg; + break; + case 'l': + what = LIST; + break; + case 'a': + what = ADD; + break; + case 'd': + what = DELETE; + break; + case 's': + udev_sync = 1; + break; + case 'n': + udev_sync = 0; + break; + case 'u': + what = UPDATE; + break; + default: + usage(); + exit(1); + } + +#ifdef LIBDM_API_COOKIE + if (!udev_sync) + dm_udev_set_sync_support(0); + else + dm_udev_set_sync_support(1); +#endif + + if (dm_prereq(DM_TARGET, 0, 0, 0) && (what == ADD || what == DELETE || what == UPDATE)) { + fprintf(stderr, "device mapper prerequisites not met\n"); + exit(1); + } + + if (hotplug) { + /* already got [disk]device */ + } else if (optind == argc-2) { + device = argv[optind]; + diskdevice = argv[optind+1]; + } else if (optind == argc-1) { + diskdevice = device = argv[optind]; + } else { + usage(); + exit(1); + } + + if (stat(device, &buf)) { + printf("failed to stat() %s\n", device); + exit (1); + } + + if (S_ISREG (buf.st_mode)) { + /* already looped file ? */ + char rpath[PATH_MAX]; + if (realpath(device, rpath) == NULL) { + fprintf(stderr, "Error: %s: %s\n", device, + strerror(errno)); + exit (1); + } + loopdev = find_loop_by_file(rpath); + + if (!loopdev && what == DELETE) + exit (0); + + if (!loopdev) { + loopdev = find_unused_loop_device(); + + if (set_loop(loopdev, rpath, 0, &ro)) { + fprintf(stderr, "can't set up loop\n"); + exit (1); + } + loopcreated = 1; + } + device = loopdev; + + if (stat(device, &buf)) { + printf("failed to stat() %s\n", device); + exit (1); + } + } + else if (!S_ISBLK(buf.st_mode)) { + fprintf(stderr, "invalid device: %s\n", device); + exit(1); + } + + off = find_devname_offset(device); + + if (!loopdev) { + mapname = dm_mapname(major(buf.st_rdev), minor(buf.st_rdev)); + if (mapname) + uuid = dm_mapuuid(mapname); + } + + /* + * We are called for a non-DM device. + * Make up a fake UUID for the device, unless "-d -f" is given. + * This allows deletion of partitions created with older kpartx + * versions which didn't use the fake UUID during creation. + */ + if (!uuid && !(what == DELETE && force_devmap)) + uuid = nondm_create_uuid(buf.st_rdev); + + if (!mapname) + mapname = device + off; + + if (delim == NULL) { + delim = malloc(DELIM_SIZE); + memset(delim, 0, DELIM_SIZE); + set_delimiter(mapname, delim); + } + + fd = open(device, O_RDONLY); + + if (fd == -1) { + perror(device); + exit(1); + } + + /* add/remove partitions to the kernel devmapper tables */ + int r = 0; + + if (what == DELETE) { + r = dm_remove_partmaps(mapname, uuid, buf.st_rdev, + verbose); + if (loopdev) { + if (del_loop(loopdev)) { + if (verbose) + fprintf(stderr, "can't del loop : %s\n", + loopdev); + r = 1; + } else + fprintf(stderr, "loop deleted : %s\n", loopdev); + } + goto end; + } + + for (i = 0; i < ptct; i++) { + ptp = &pts[i]; + + if (type && strcmp(type, ptp->type)) + continue; + + /* here we get partitions */ + n = ptp->fn(fd, all, slices, SIZE(slices)); + +#ifdef DEBUG + if (n >= 0) + printf("%s: %d slices\n", ptp->type, n); +#endif + + if (n > 0) { + close(fd); + fd = -1; + } + else + continue; + + switch(what) { + case LIST: + for (j = 0, c = 0, m = 0; j < n; j++) { + if (slices[j].size == 0) + continue; + if (slices[j].container > 0) { + c++; + continue; + } + + slices[j].minor = m++; + + printf("%s%s%d : 0 %" PRIu64 " %s %" PRIu64"\n", + mapname, delim, j+1, + slices[j].size, device, + slices[j].start); + } + /* Loop to resolve contained slices */ + d = c; + while (c) { + for (j = 0; j < n; j++) { + uint64_t start; + int k = slices[j].container - 1; + + if (slices[j].size == 0) + continue; + if (slices[j].minor > 0) + continue; + if (slices[j].container == 0) + continue; + slices[j].minor = m++; + + start = slices[j].start - slices[k].start; + printf("%s%s%d : 0 %" PRIu64 " /dev/dm-%d %" PRIu64 "\n", + mapname, delim, j+1, + slices[j].size, + slices[k].minor, start); + c--; + } + /* Terminate loop if nothing more to resolve */ + if (d == c) + break; + } + + break; + + case ADD: + case UPDATE: + /* ADD and UPDATE share the same code that adds new partitions. */ + for (j = 0, c = 0; j < n; j++) { + char *part_uuid, *reason; + + if (slices[j].size == 0) + continue; + + /* Skip all contained slices */ + if (slices[j].container > 0) { + c++; + continue; + } + + if (safe_sprintf(params, "%d:%d %" PRIu64 , + major(buf.st_rdev), minor(buf.st_rdev), slices[j].start)) { + fprintf(stderr, "params too small\n"); + exit(1); + } + + op = (dm_find_part(mapname, delim, j + 1, uuid, + partname, sizeof(partname), + &part_uuid, verbose) ? + DM_DEVICE_RELOAD : DM_DEVICE_CREATE); + + if (part_uuid && uuid) { + if (check_uuid(uuid, part_uuid, &reason) != 0) { + fprintf(stderr, "%s is already in use, and %s\n", partname, reason); + r++; + free(part_uuid); + continue; + } + free(part_uuid); + } + + if (!dm_addmap(op, partname, DM_TARGET, params, + slices[j].size, ro, uuid, j+1, + buf.st_mode & 0777, buf.st_uid, + buf.st_gid)) { + fprintf(stderr, "create/reload failed on %s\n", + partname); + r++; + continue; + } + if (op == DM_DEVICE_RELOAD && + !dm_simplecmd(DM_DEVICE_RESUME, partname, + 1, MPATH_UDEV_RELOAD_FLAG)) { + fprintf(stderr, "resume failed on %s\n", + partname); + r++; + continue; + } + + dm_devn(partname, &slices[j].major, + &slices[j].minor); + + if (verbose) + printf("add map %s (%d:%d): 0 %" PRIu64 " %s %s\n", + partname, slices[j].major, + slices[j].minor, slices[j].size, + DM_TARGET, params); + } + /* Loop to resolve contained slices */ + d = c; + while (c) { + for (j = 0; j < n; j++) { + char *part_uuid, *reason; + int k = slices[j].container - 1; + + if (slices[j].size == 0) + continue; + + /* Skip all existing slices */ + if (slices[j].minor > 0) + continue; + + /* Skip all simple slices */ + if (slices[j].container == 0) + continue; + + /* Check container slice */ + if (slices[k].size == 0) + fprintf(stderr, "Invalid slice %d\n", + k); + + if (safe_sprintf(params, "%d:%d %" PRIu64, + major(buf.st_rdev), minor(buf.st_rdev), + slices[j].start)) { + fprintf(stderr, "params too small\n"); + exit(1); + } + + op = (dm_find_part(mapname, delim, j + 1, uuid, + partname, + sizeof(partname), + &part_uuid, verbose) ? + DM_DEVICE_RELOAD : DM_DEVICE_CREATE); + + if (part_uuid && uuid) { + if (check_uuid(uuid, part_uuid, &reason) != 0) { + fprintf(stderr, "%s is already in use, and %s\n", partname, reason); + free(part_uuid); + continue; + } + free(part_uuid); + } + + dm_addmap(op, partname, DM_TARGET, params, + slices[j].size, ro, uuid, j+1, + buf.st_mode & 0777, + buf.st_uid, buf.st_gid); + + if (op == DM_DEVICE_RELOAD) + dm_simplecmd(DM_DEVICE_RESUME, + partname, 1, + MPATH_UDEV_RELOAD_FLAG); + dm_devn(partname, &slices[j].major, + &slices[j].minor); + + if (verbose) + printf("add map %s (%d:%d): 0 %" PRIu64 " %s %s\n", + partname, slices[j].major, slices[j].minor, slices[j].size, + DM_TARGET, params); + c--; + } + /* Terminate loop */ + if (d == c) + break; + } + + if (what == ADD) { + /* Skip code that removes devmappings for deleted partitions */ + break; + } + + for (j = MAXSLICES-1; j >= 0; j--) { + char *part_uuid, *reason; + if (slices[j].size || + !dm_find_part(mapname, delim, j + 1, uuid, + partname, sizeof(partname), + &part_uuid, verbose)) + continue; + + if (part_uuid && uuid) { + if (check_uuid(uuid, part_uuid, &reason) != 0) { + fprintf(stderr, "%s is %s. Not removing\n", partname, reason); + free(part_uuid); + continue; + } + free(part_uuid); + } + + if (!dm_simplecmd(DM_DEVICE_REMOVE, + partname, 1, 0)) { + r++; + continue; + } + if (verbose) + printf("del devmap : %s\n", partname); + } + + default: + break; + + } + if (n > 0) + break; + } + if (what == LIST && loopcreated && S_ISREG (buf.st_mode)) { + if (fd != -1) + close(fd); + if (del_loop(device)) { + if (verbose) + printf("can't del loop : %s\n", + device); + exit(1); + } + printf("loop deleted : %s\n", device); + } + +end: + dm_lib_release(); + dm_lib_exit(); + + return r; +} + +void * +xmalloc (size_t size) { + void *t; + + if (size == 0) + return NULL; + + t = malloc (size); + + if (t == NULL) { + fprintf(stderr, "Out of memory\n"); + exit(1); + } + + return t; +} + +/* + * sseek: seek to specified sector + */ + +static int +sseek(int fd, unsigned int secnr) { + off64_t in, out; + in = ((off64_t) secnr << 9); + out = 1; + + if ((out = lseek64(fd, in, SEEK_SET)) != in) + { + fprintf(stderr, "llseek error\n"); + return -1; + } + return 0; +} + +static +struct block { + unsigned int secnr; + char *block; + struct block *next; +} *blockhead; + +char * +getblock (int fd, unsigned int secnr) { + struct block *bp; + + for (bp = blockhead; bp; bp = bp->next) + + if (bp->secnr == secnr) + return bp->block; + + if (sseek(fd, secnr)) + return NULL; + + bp = xmalloc(sizeof(struct block)); + bp->secnr = secnr; + bp->next = blockhead; + blockhead = bp; + bp->block = (char *) xmalloc(READ_SIZE); + + if (read(fd, bp->block, READ_SIZE) != READ_SIZE) { + fprintf(stderr, "read error, sector %d\n", secnr); + bp->block = NULL; + } + + return bp->block; +} + +int +get_sector_size(int filedes) +{ + int rc, sector_size = 512; + + rc = ioctl(filedes, BLKSSZGET, §or_size); + if (rc) + sector_size = 512; + return sector_size; +} diff --git a/kpartx/kpartx.h b/kpartx/kpartx.h new file mode 100644 index 0000000..67edeb8 --- /dev/null +++ b/kpartx/kpartx.h @@ -0,0 +1,71 @@ +#ifndef _KPARTX_H +#define _KPARTX_H + +#include +#include + +/* + * For each partition type there is a routine that takes + * a block device and a range, and returns the list of + * slices found there in the supplied array SP that can + * hold NS entries. The return value is the number of + * entries stored, or -1 if the appropriate type is not + * present. + */ + +#define likely(x) __builtin_expect(!!(x), 1) +#define unlikely(x) __builtin_expect(!!(x), 0) + +#define safe_snprintf(var, size, format, args...) \ + ({ \ + size_t __size = size; \ + int __ret; \ + \ + __ret = snprintf(var, __size, format, ##args); \ + __ret < 0 || (size_t)__ret >= __size; \ + }) + +#define safe_sprintf(var, format, args...) \ + safe_snprintf(var, sizeof(var), format, ##args) + +#ifndef BLKSSZGET +#define BLKSSZGET _IO(0x12,104) /* get block device sector size */ +#endif + +int +get_sector_size(int filedes); + +/* + * units: 512 byte sectors + */ +struct slice { + uint64_t start; + uint64_t size; + int container; + unsigned int major; + unsigned int minor; +}; + +typedef int (ptreader)(int fd, struct slice all, struct slice *sp, + unsigned int ns); + +extern int force_gpt; + +extern ptreader read_dos_pt; +extern ptreader read_bsd_pt; +extern ptreader read_solaris_pt; +extern ptreader read_unixware_pt; +extern ptreader read_gpt_pt; +extern ptreader read_dasd_pt; +extern ptreader read_mac_pt; +extern ptreader read_sun_pt; +extern ptreader read_ps3_pt; + +char *getblock(int fd, unsigned int secnr); + +static inline unsigned int +four2int(unsigned char *p) { + return p[0] + (p[1]<<8) + (p[2]<<16) + (p[3]<<24); +} + +#endif /* _KPARTX_H */ diff --git a/kpartx/kpartx.rules b/kpartx/kpartx.rules new file mode 100644 index 0000000..8f99049 --- /dev/null +++ b/kpartx/kpartx.rules @@ -0,0 +1,37 @@ +# +# persistent links for device-mapper devices +# only hardware-backed device-mapper devices (ie multipath, dmraid, +# and kpartx) have meaningful persistent device names +# + +KERNEL!="dm-*", GOTO="kpartx_end" +ACTION!="add|change", GOTO="kpartx_end" +ENV{DM_UUID}!="?*", GOTO="kpartx_end" + +# Create dm tables for partitions on multipath devices. +ENV{DM_UUID}!="mpath-?*", GOTO="mpath_kpartx_end" + +# DM_SUBSYSTEM_UDEV_FLAG1 is the "skip_kpartx" flag. +# For events not generated by libdevmapper, we need to fetch it from db. +ENV{DM_UDEV_PRIMARY_SOURCE_FLAG}!="1", IMPORT{db}="DM_SUBSYSTEM_UDEV_FLAG1" +ENV{DM_SUBSYSTEM_UDEV_FLAG1}=="1", GOTO="mpath_kpartx_end" + +# 11-dm-mpath.rules sets MPATH_UNCHANGED for events that can be ignored. +ENV{MPATH_UNCHANGED}=="1", GOTO="mpath_kpartx_end" + +# Don't run kpartx now if we know it will fail or hang. +ENV{DM_SUSPENDED}=="1", GOTO="mpath_kpartx_end" +ENV{DM_NOSCAN}=="1", GOTO="mpath_kpartx_end" + +# Run kpartx +GOTO="run_kpartx" +LABEL="mpath_kpartx_end" + +## Code for other subsystems (non-multipath) could be placed here ## + +GOTO="kpartx_end" + +LABEL="run_kpartx" +RUN+="/sbin/kpartx -un -p -part /dev/$name" + +LABEL="kpartx_end" diff --git a/kpartx/kpartx_id b/kpartx/kpartx_id new file mode 100755 index 0000000..c45db2f --- /dev/null +++ b/kpartx/kpartx_id @@ -0,0 +1,100 @@ +#!/bin/sh +# +# kpartx_id +# +# Generates ID information for device-mapper tables. +# +# Copyright (C) 2006 SUSE Linux Products GmbH +# Author: +# Hannes Reinecke +# +# +# This program is free software; you can redistribute it and/or modify it +# under the terms of the GNU General Public License as published by the +# Free Software Foundation version 2 of the License. +# +# This script generates ID information used to generate persistent symlinks. +# It relies on the UUID strings generated by the various programs; the name +# of the tables are of no consequence. +# +# Please note that dmraid does not provide the UUIDs (yet); a patch has been +# sent upstream but has not been accepted yet. +# + +DMSETUP=/sbin/dmsetup + +MAJOR=$1 +MINOR=$2 +UUID=$3 + +if [ -z "$MAJOR" -o -z "$MINOR" ]; then + echo "usage: $0 major minor" + exit 1; +fi + +# Device-mapper not installed; not an error +if [ ! -x $DMSETUP ] ; then + exit 0 +fi + + +# Table UUIDs are always '-'. +dmuuid=${UUID#*-} +dmtbl=${UUID%%-*} +dmpart=${dmtbl#part} +dmserial= +# kpartx types are 'part' +if [ "$dmpart" = "$dmtbl" ] ; then + dmpart= +else + dmtbl=part +fi + +# Set the name of the table. We're only interested in dmraid, +# multipath, and kpartx tables; everything else is ignored. +if [ "$dmtbl" = "part" ] ; then + dmname=$($DMSETUP info -c --noheadings -o name -u $dmuuid) + echo "DM_MPATH=$dmname" + # We need the dependencies of the parent table to figure out + # the type if the parent is a multipath table + case "$dmuuid" in + mpath-*) + dmdeps=$($DMSETUP deps -u $dmuuid) + dmserial=${dmuuid#mpath-} + ;; + esac +elif [ "$dmtbl" = "mpath" ] ; then + dmname="$dmuuid" + dmserial="$dmuuid" + # We need the dependencies of the table to figure out the type + dmdeps=$($DMSETUP deps -u $UUID) +fi + +[ -n "$dmpart" ] && echo "DM_PART=$dmpart" + +# Figure out the type of the map. For non-multipath maps it's +# always 'raid'. +if [ -n "$dmdeps" ] ; then + case "$dmdeps" in + *\(94,*) + echo "DM_TYPE=ccw" + ;; + *\(104,* | *\(105,* | *\(106,* | *\(107,* | *\(108,* | *\(109,* | *\(110,* | *\(112,*) + echo "DM_TYPE=cciss" + ;; + *\(9*) + echo "DM_TYPE=raid" + ;; + *) + echo "DM_TYPE=scsi" + echo "DM_WWN=0x${dmserial#?}" + ;; + esac +else + echo "DM_TYPE=raid" +fi +if [[ $dmserial ]]; then + echo "DM_SERIAL=$dmserial" +fi + +exit 0 diff --git a/kpartx/lopart.c b/kpartx/lopart.c new file mode 100644 index 0000000..9b65255 --- /dev/null +++ b/kpartx/lopart.c @@ -0,0 +1,325 @@ +/* Taken from Ted's losetup.c - Mitch */ +/* Added vfs mount options - aeb - 960223 */ +/* Removed lomount - aeb - 960224 */ + +/* 1999-02-22 Arkadiusz MiÅ›kiewicz + * - added Native Language Support + * Sun Mar 21 1999 - Arnaldo Carvalho de Melo + * - fixed strerr(errno) in gettext calls + */ + +#define PROC_DEVICES "/proc/devices" + +/* + * losetup.c - setup and control loop devices + */ + +#include "kpartx.h" +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "lopart.h" +#include "xstrncpy.h" + +#ifndef LOOP_CTL_GET_FREE +#define LOOP_CTL_GET_FREE 0x4C82 +#endif + +static char * +xstrdup (const char *s) +{ + char *t; + + if (s == NULL) + return NULL; + + t = strdup (s); + + if (t == NULL) { + fprintf(stderr, "not enough memory"); + exit(1); + } + + return t; +} + +#define SIZE(a) (sizeof(a)/sizeof(a[0])) + +char *find_loop_by_file(const char *filename) +{ + DIR *dir; + struct dirent *dent; + char dev[64], *found = NULL, *p; + int fd, bytes_read; + struct stat statbuf; + struct loop_info loopinfo; + const char VIRT_BLOCK[] = "/sys/devices/virtual/block"; + char path[PATH_MAX]; + char bf_path[PATH_MAX]; + char backing_file[PATH_MAX]; + + dir = opendir(VIRT_BLOCK); + if (!dir) + return NULL; + + while ((dent = readdir(dir)) != NULL) { + if (strncmp(dent->d_name,"loop",4)) + continue; + + if (snprintf(path, PATH_MAX, "%s/%s/dev", VIRT_BLOCK, + dent->d_name) >= PATH_MAX) + continue; + + fd = open(path, O_RDONLY); + if (fd < 0) + continue; + + bytes_read = read(fd, dev, sizeof(dev) - 1); + if (bytes_read <= 0) { + close(fd); + continue; + } + + close(fd); + + dev[bytes_read] = '\0'; + p = strchr(dev, '\n'); + if (p != NULL) + *p = '\0'; + if (snprintf(path, PATH_MAX, "/dev/block/%s", dev) >= PATH_MAX) + continue; + + fd = open (path, O_RDONLY); + if (fd < 0) + continue; + + if (fstat (fd, &statbuf) != 0 || + !S_ISBLK(statbuf.st_mode)) { + close (fd); + continue; + } + + if (ioctl (fd, LOOP_GET_STATUS, &loopinfo) != 0) { + close (fd); + continue; + } + + close (fd); + + if (0 == strcmp(filename, loopinfo.lo_name)) { + found = realpath(path, NULL); + break; + } + + /* + * filename is a realpath, while loopinfo.lo_name may hold just the + * basename. If that's the case, try to match filename against the + * backing_file entry for this loop entry + */ + if (snprintf(bf_path, PATH_MAX, "%s/%s/loop/backing_file", VIRT_BLOCK, + dent->d_name) >= PATH_MAX) + continue; + + fd = open(bf_path, O_RDONLY); + if (fd < 0) + continue; + + bytes_read = read(fd, backing_file, sizeof(backing_file) - 1); + if (bytes_read <= 0) { + close(fd); + continue; + } + + close(fd); + + backing_file[bytes_read-1] = '\0'; + + if (0 == strcmp(filename, backing_file)) { + found = realpath(path, NULL); + break; + } + } + closedir(dir); + return found; +} + +char *find_unused_loop_device(void) +{ + char dev[20], *next_loop_dev = NULL; + int fd, next_loop = 0, somedev = 0, someloop = 0, loop_known = 0; + struct stat statbuf; + struct loop_info loopinfo; + FILE *procdev; + + while (next_loop_dev == NULL) { + if (stat("/dev/loop-control", &statbuf) == 0 && + S_ISCHR(statbuf.st_mode)) { + int next_loop_fd; + + next_loop_fd = open("/dev/loop-control", O_RDWR); + if (next_loop_fd < 0) + return NULL; + next_loop = ioctl(next_loop_fd, LOOP_CTL_GET_FREE); + close(next_loop_fd); + if (next_loop < 0) + return NULL; + } + + sprintf(dev, "/dev/loop%d", next_loop); + + fd = open (dev, O_RDONLY); + if (fd >= 0) { + if (fstat (fd, &statbuf) == 0 && + S_ISBLK(statbuf.st_mode)) { + somedev++; + if(ioctl (fd, LOOP_GET_STATUS, &loopinfo) == 0) + someloop++; /* in use */ + else if (errno == ENXIO) + next_loop_dev = xstrdup(dev); + + } + close (fd); + + /* continue trying as long as devices exist */ + continue; + } + break; + } + if (next_loop_dev) + return next_loop_dev; + + /* Nothing found. Why not? */ + if ((procdev = fopen(PROC_DEVICES, "r")) != NULL) { + char line[100]; + + while (fgets (line, sizeof(line), procdev)) + + if (strstr (line, " loop\n")) { + loop_known = 1; + break; + } + + fclose(procdev); + + if (!loop_known) + loop_known = -1; + } + + if (!somedev) + fprintf(stderr, "mount: could not find any device /dev/loop#"); + + else if (!someloop) { + if (loop_known == 1) + fprintf(stderr, + "mount: Could not find any loop device.\n" + " Maybe /dev/loop# has a wrong major number?"); + else if (loop_known == -1) + fprintf(stderr, + "mount: Could not find any loop device, and, according to %s,\n" + " this kernel does not know about the loop device.\n" + " (If so, then recompile or `modprobe loop'.)", + PROC_DEVICES); + else + fprintf(stderr, + "mount: Could not find any loop device. Maybe this kernel does not know\n" + " about the loop device (then recompile or `modprobe loop'), or\n" + " maybe /dev/loop# has the wrong major number?"); + } else + fprintf(stderr, "mount: could not find any free loop device"); + return NULL; +} + +int set_loop(const char *device, const char *file, int offset, int *loopro) +{ + struct loop_info loopinfo; + int fd, ffd, mode; + + mode = (*loopro ? O_RDONLY : O_RDWR); + + if ((ffd = open (file, mode)) < 0) { + + if (!*loopro && (errno == EROFS || errno == EACCES)) + ffd = open (file, mode = O_RDONLY); + + if (ffd < 0) { + perror (file); + return 1; + } + } + + if ((fd = open (device, mode)) < 0) { + close(ffd); + perror (device); + return 1; + } + + *loopro = (mode == O_RDONLY); + memset (&loopinfo, 0, sizeof (loopinfo)); + + xstrncpy (loopinfo.lo_name, file, LO_NAME_SIZE); + loopinfo.lo_offset = offset; + loopinfo.lo_encrypt_type = LO_CRYPT_NONE; + loopinfo.lo_encrypt_key_size = 0; + + if (ioctl(fd, LOOP_SET_FD, (void*)(uintptr_t)(ffd)) < 0) { + perror ("ioctl: LOOP_SET_FD"); + close (fd); + close (ffd); + return 1; + } + + if (ioctl (fd, LOOP_SET_STATUS, &loopinfo) < 0) { + (void) ioctl (fd, LOOP_CLR_FD, 0); + perror ("ioctl: LOOP_SET_STATUS"); + close (fd); + close (ffd); + return 1; + } + + close (fd); + close (ffd); + return 0; +} + +int del_loop(const char *device) +{ + int retries = 5; + int fd; + + if ((fd = open (device, O_RDONLY)) < 0) { + int errsv = errno; + fprintf(stderr, "loop: can't delete device %s: %s\n", + device, strerror (errsv)); + return 1; + } + + while (ioctl (fd, LOOP_CLR_FD, 0) < 0) { + if (errno != EBUSY || retries-- <= 0) { + perror ("ioctl: LOOP_CLR_FD"); + close (fd); + return 1; + } + fprintf(stderr, + "loop: device %s still in use, retrying delete\n", + device); + sleep(1); + continue; + } + + close (fd); + return 0; +} diff --git a/kpartx/lopart.h b/kpartx/lopart.h new file mode 100644 index 0000000..d3bad10 --- /dev/null +++ b/kpartx/lopart.h @@ -0,0 +1,5 @@ +extern int verbose; +extern int set_loop (const char *, const char *, int, int *); +extern int del_loop (const char *); +extern char * find_unused_loop_device (void); +extern char * find_loop_by_file (const char *); diff --git a/kpartx/mac.c b/kpartx/mac.c new file mode 100644 index 0000000..c21ac70 --- /dev/null +++ b/kpartx/mac.c @@ -0,0 +1,48 @@ +#include "kpartx.h" +#include "byteorder.h" +#include +#include +#include "mac.h" + +int +read_mac_pt(int fd, __attribute__((unused)) struct slice all, + struct slice *sp, unsigned int ns) { + struct mac_driver_desc *md; + struct mac_partition *part; + unsigned secsize; + char *data; + unsigned int blk, blocks_in_map; + int n = 0; + + md = (struct mac_driver_desc *) getblock(fd, 0); + if (md == NULL) + return -1; + + if (be16_to_cpu(md->signature) != MAC_DRIVER_MAGIC) + return -1; + + secsize = be16_to_cpu(md->block_size); + data = getblock(fd, secsize/512); + if (!data) + return -1; + part = (struct mac_partition *) (data + secsize%512); + + if (be16_to_cpu(part->signature) != MAC_PARTITION_MAGIC) + return -1; + + blocks_in_map = be32_to_cpu(part->map_count); + for (blk = 1; blk <= blocks_in_map && blk <= ns; ++blk, ++n) { + int pos = blk * secsize; + data = getblock(fd, pos/512); + if (!data) + return -1; + + part = (struct mac_partition *) (data + pos%512); + if (be16_to_cpu(part->signature) != MAC_PARTITION_MAGIC) + break; + + sp[n].start = be32_to_cpu(part->start_block) * (secsize/512); + sp[n].size = be32_to_cpu(part->block_count) * (secsize/512); + } + return n; +} diff --git a/kpartx/mac.h b/kpartx/mac.h new file mode 100644 index 0000000..55c3ec9 --- /dev/null +++ b/kpartx/mac.h @@ -0,0 +1,30 @@ +#ifndef MAC_H +#define MAC_H + +#include + +#define MAC_PARTITION_MAGIC 0x504d + +/* type field value for A/UX or other Unix partitions */ +#define APPLE_AUX_TYPE "Apple_UNIX_SVR2" + +struct mac_partition { + uint16_t signature; /* expected to be MAC_PARTITION_MAGIC */ + uint16_t res1; + uint32_t map_count; /* # blocks in partition map */ + uint32_t start_block; /* absolute starting block # of partition */ + uint32_t block_count; /* number of blocks in partition */ + /* there is more stuff after this that we don't need */ +}; + +#define MAC_DRIVER_MAGIC 0x4552 + +/* Driver descriptor structure, in block 0 */ +struct mac_driver_desc { + uint16_t signature; /* expected to be MAC_DRIVER_MAGIC */ + uint16_t block_size; + uint32_t block_count; + /* ... more stuff */ +}; + +#endif diff --git a/kpartx/ps3.c b/kpartx/ps3.c new file mode 100644 index 0000000..42551bc --- /dev/null +++ b/kpartx/ps3.c @@ -0,0 +1,73 @@ +#include "kpartx.h" +#include "byteorder.h" +#include +#include + +#define SECTOR_SIZE 512 +#define MAX_ACL_ENTRIES 8 +#define MAX_PARTITIONS 8 + +#define MAGIC1 0x0FACE0FFULL +#define MAGIC2 0xDEADFACEULL + +struct p_acl_entry { + u_int64_t laid; + u_int64_t rights; +}; + +struct d_partition { + u_int64_t p_start; + u_int64_t p_size; + struct p_acl_entry p_acl[MAX_ACL_ENTRIES]; +}; + +struct disklabel { + u_int8_t d_res1[16]; + u_int64_t d_magic1; + u_int64_t d_magic2; + u_int64_t d_res2; + u_int64_t d_res3; + struct d_partition d_partitions[MAX_PARTITIONS]; + u_int8_t d_pad[0x600 - MAX_PARTITIONS * sizeof(struct d_partition) - 0x30]; +}; + +static int +read_disklabel(int fd, struct disklabel *label) { + unsigned char *data; + unsigned int i; + + for (i = 0; i < sizeof(struct disklabel) / SECTOR_SIZE; i++) { + data = (unsigned char *) getblock(fd, i); + if (!data) + return 0; + + memcpy((unsigned char *) label + i * SECTOR_SIZE, data, SECTOR_SIZE); + } + + return 1; +} + +int +read_ps3_pt(int fd, __attribute__((unused)) struct slice all, + struct slice *sp, __attribute__((unused)) unsigned int ns) { + struct disklabel label; + int n = 0; + int i; + + if (!read_disklabel(fd, &label)) + return -1; + + if ((be64_to_cpu(label.d_magic1) != MAGIC1) || + (be64_to_cpu(label.d_magic2) != MAGIC2)) + return -1; + + for (i = 0; i < MAX_PARTITIONS; i++) { + if (label.d_partitions[i].p_start && label.d_partitions[i].p_size) { + sp[n].start = be64_to_cpu(label.d_partitions[i].p_start); + sp[n].size = be64_to_cpu(label.d_partitions[i].p_size); + n++; + } + } + + return n; +} diff --git a/kpartx/solaris.c b/kpartx/solaris.c new file mode 100644 index 0000000..c2480b5 --- /dev/null +++ b/kpartx/solaris.c @@ -0,0 +1,68 @@ +#include "kpartx.h" +#include +#include +#include /* time_t */ + +#define SOLARIS_X86_NUMSLICE 8 +#define SOLARIS_X86_VTOC_SANE (0x600DDEEEUL) + +struct solaris_x86_slice { + unsigned short s_tag; /* ID tag of partition */ + unsigned short s_flag; /* permission flags */ + __kernel_daddr_t s_start; /* start sector no of partition */ + long s_size; /* # of blocks in partition */ +}; + +struct solaris_x86_vtoc { + unsigned long v_bootinfo[3]; /* info for mboot */ + unsigned long v_sanity; /* to verify vtoc sanity */ + unsigned long v_version; /* layout version */ + char v_volume[8]; /* volume name */ + unsigned short v_sectorsz; /* sector size in bytes */ + unsigned short v_nparts; /* number of partitions */ + unsigned long v_reserved[10]; /* free space */ + struct solaris_x86_slice + v_slice[SOLARIS_X86_NUMSLICE]; /* slice headers */ + time_t timestamp[SOLARIS_X86_NUMSLICE]; /* timestamp */ + char v_asciilabel[128]; /* for compatibility */ +}; + +int +read_solaris_pt(int fd, struct slice all, struct slice *sp, unsigned int ns) { + struct solaris_x86_vtoc *v; + struct solaris_x86_slice *s; + unsigned int offset = all.start; + unsigned int i, n; + char *bp; + + bp = getblock(fd, offset+1); /* 1 sector suffices */ + if (bp == NULL) + return -1; + + v = (struct solaris_x86_vtoc *) bp; + if(v->v_sanity != SOLARIS_X86_VTOC_SANE) + return -1; + + if(v->v_version != 1) { + fprintf(stderr, "Cannot handle solaris version %ld vtoc\n", + v->v_version); + return 0; + } + + for(i=0, n=0; iv_slice[i]; + + if (s->s_size == 0) + continue; + if (n < ns) { + sp[n].start = offset + s->s_start; + sp[n].size = s->s_size; + n++; + } else { + fprintf(stderr, + "solaris_x86_partition: too many slices\n"); + break; + } + } + return n; +} diff --git a/kpartx/sun.c b/kpartx/sun.c new file mode 100644 index 0000000..df630a7 --- /dev/null +++ b/kpartx/sun.c @@ -0,0 +1,128 @@ +/* + * Lifted from util-linux' partx sun.c + * + * Copyrights of the original file apply + * Copyright (c) 2007 Hannes Reinecke + */ +#include "kpartx.h" +#include "byteorder.h" +#include +#include +#include /* time_t */ + +#define SUN_DISK_MAGIC 0xDABE /* Disk magic number */ +#define SUN_DISK_MAXPARTITIONS 8 + +struct __attribute__ ((packed)) sun_raw_part { + u_int32_t start_cylinder; /* where the part starts... */ + u_int32_t num_sectors; /* ...and it's length */ +}; + +struct __attribute__ ((packed)) sun_part_info { + u_int8_t spare1; + u_int8_t id; /* Partition type */ + u_int8_t spare2; + u_int8_t flags; /* Partition flags */ +}; + +struct __attribute__ ((packed)) sun_disk_label { + char info[128]; /* Informative text string */ + u_int8_t spare0[14]; + struct sun_part_info infos[SUN_DISK_MAXPARTITIONS]; + u_int8_t spare1[246]; /* Boot information etc. */ + u_int16_t rspeed; /* Disk rotational speed */ + u_int16_t pcylcount; /* Physical cylinder count */ + u_int16_t sparecyl; /* extra sects per cylinder */ + u_int8_t spare2[4]; /* More magic... */ + u_int16_t ilfact; /* Interleave factor */ + u_int16_t ncyl; /* Data cylinder count */ + u_int16_t nacyl; /* Alt. cylinder count */ + u_int16_t ntrks; /* Tracks per cylinder */ + u_int16_t nsect; /* Sectors per track */ + u_int8_t spare3[4]; /* Even more magic... */ + struct sun_raw_part partitions[SUN_DISK_MAXPARTITIONS]; + u_int16_t magic; /* Magic number */ + u_int16_t csum; /* Label xor'd checksum */ +}; + +/* Checksum Verification */ +static int +sun_verify_checksum (struct sun_disk_label *label) +{ + u_int16_t *ush = ((u_int16_t *)(label + 1)) - 1; + u_int16_t csum = 0; + + while (ush >= (u_int16_t *)label) + csum ^= *ush--; + + return !csum; +} + +int +read_sun_pt(int fd, struct slice all, struct slice *sp, unsigned int ns) { + struct sun_disk_label *l; + struct sun_raw_part *s; + unsigned int offset = all.start, end; + unsigned int i, j, n; + char *bp; + + bp = getblock(fd, offset); + if (bp == NULL) + return -1; + + l = (struct sun_disk_label *) bp; + if(be16_to_cpu(l->magic) != SUN_DISK_MAGIC) + return -1; + + if (!sun_verify_checksum(l)) { + fprintf(stderr, "Corrupted Sun disk label\n"); + return -1; + } + + for(i=0, n=0; ipartitions[i]; + + if (n < ns) { + sp[n].start = offset + + be32_to_cpu(s->start_cylinder) * be16_to_cpu(l->nsect) * be16_to_cpu(l->ntrks); + sp[n].size = be32_to_cpu(s->num_sectors); + n++; + } else { + fprintf(stderr, + "sun_disklabel: too many slices\n"); + break; + } + } + /* + * Convention has it that the SUN disklabel will always have + * the 'c' partition spanning the entire disk. + * So we have to check for contained slices. + */ + for(i = 0; i < SUN_DISK_MAXPARTITIONS; i++) { + if (sp[i].size == 0) + continue; + + end = sp[i].start + sp[i].size; + for(j = 0; j < SUN_DISK_MAXPARTITIONS; j ++) { + if ( i == j ) + continue; + if (sp[j].size == 0) + continue; + + if (sp[i].start < sp[j].start) { + if (end > sp[j].start && + end < sp[j].start + sp[j].size) { + /* Invalid slice */ + fprintf(stderr, + "sun_disklabel: slice %d overlaps with %d\n", i , j); + sp[i].size = 0; + } + } else { + if (end <= sp[j].start + sp[j].size) { + sp[i].container = j + 1; + } + } + } + } + return n; +} diff --git a/kpartx/test-kpartx b/kpartx/test-kpartx new file mode 100755 index 0000000..d3c9aef --- /dev/null +++ b/kpartx/test-kpartx @@ -0,0 +1,335 @@ +#! /bin/bash + +# This is a unit test program for kpartx, in particular for deleting partitions. +# +# The rationale is the following: +# +# 1) kpartx should delete all mappings it created beforehand. +# 2) kpartx should handle partitions on dm devices and other devices +# (e.g. loop devices) equally well. +# 3) kpartx should only delete "partitions", which are single-target +# linear mappings into a block device. Other maps should not be touched. +# 4) kpartx should only delete mappings it created itself beforehand. +# In particular, it shouldn't delete LVM LVs, even if they are fully +# contained in the block device at hand and thus look like partitions +# in the first place. (For historical compatibility reasons, we allow +# such mappings to be deleted with the -f/--force flag). +# 5) DM map names may be changed, thus kpartx shouldn't rely on them to +# check whether a mapping is a partition of a particular device. It is +# legal for a partition of /dev/loop0 to be named "loop0". + +# Note: This program tries hard to clean up, but if tests fail, +# stale DM or loop devices may keep lurking around. + +# Set WORKDIR in environment to existing dir to for persistence +# WARNING: existing files will be truncated. +# If empty, test will be done in temporary dir +: ${WORKDIR:=} +# Set this environment variable to test an alternative kpartx executable +: ${KPARTX:=} +# Options to pass to kpartx always +: ${KPARTX_OPTS:=-s} +# Time to wait for device nodes to appear (microseconds) +# Waiting is only needed if "s" is not in $KPARTX_OPTS +: ${WAIT_US:=0} + +# IMPORTANT: The ERR trap is essential for this program to work correctly! +trap 'LINE=$LINENO; trap - ERR; echo "== error in $BASH_COMMAND on line $LINE ==" >&2; exit 1' ERR +trap 'cleanup' 0 + +CLEANUP=: +cleanup() { + trap - ERR + trap - 0 + if [[ $OK ]]; then + echo == all tests completed successfully == >&2 + else + echo == step $STEP failed == >&2 + fi + eval "$CLEANUP" &>/dev/null +} + +push_cleanup() { + CLEANUP="$@;$CLEANUP" +} + +pop_cleanup() { + # CAUTION: simplistic + CLEANUP=${CLEANUP#*;} +} + +step() { + STEP="$@" + echo == Test step: $STEP == >&2 +} + +mk_partitions() { + parted -s $1 mklabel msdos + parted -s -- $1 mkpart prim ext2 1MiB -1s +} + +wipe_ptable() { + dd if=/dev/zero of=$1 bs=1b count=1 +} + +step preparation + +[[ $UID -eq 0 ]] +[[ $KPARTX ]] || { + if [[ -x $PWD/kpartx/kpartx ]]; then + KPARTX=$PWD/kpartx/kpartx + else + KPARTX=$(which kpartx) + fi +} +[[ $KPARTX ]] + +FILE1=kpartx1 +FILE2=kpartx2 +FILE3=kpartx3 +FILE4=kpartx4 +SIZE=$((1024*1024*1024)) # use bytes as units here +SECTSIZ=512 +OFFS=32 # offset of linear mapping into dev, sectors +VG=kpvg # volume group name +LV=kplv # logical vol name +LVMCONF='devices { filter = [ "a|/dev/loop.*|", r".*" ] }' + +OK= + +[[ $WORKDIR ]] || { + WORKDIR=$(mktemp -d /tmp/kpartx-XXXXXX) + push_cleanup 'rm -rf $WORKDIR' +} + +push_cleanup "cd $PWD" +cd "$WORKDIR" + +step "create loop devices" +truncate -s $SIZE $FILE1 +truncate -s $SIZE $FILE2 +truncate -s $SIZE $FILE3 +truncate -s $SIZE $FILE4 + +LO1=$(losetup -f $FILE1 --show) +push_cleanup 'losetup -d $LO1' +LO2=$(losetup -f $FILE2 --show) +push_cleanup 'losetup -d $LO2' +LO3=$(losetup -f $FILE3 --show) +push_cleanup 'losetup -d $LO3' +LO4=$(losetup -f $FILE4 --show) +push_cleanup 'losetup -d $LO4' + +[[ $LO1 && $LO2 && $LO3 && $LO4 && -b $LO1 && -b $LO2 && -b $LO3 && -b $LO4 ]] +DEV1=$(stat -c "%t:%T" $LO1) +DEV2=$(stat -c "%t:%T" $LO2) +DEV3=$(stat -c "%t:%T" $LO3) + +usleep $WAIT_US + +step "create DM devices (spans)" +# Create two linear mappings spanning two loopdevs. +# One of them gets a pathological name colliding with +# the loop device name. +# These mappings must not be removed by kpartx. +# They also serve as DM devices to test partition removal on those. + +TABLE="\ +0 $((SIZE/SECTSIZ-OFFS)) linear $DEV1 $OFFS +$((SIZE/SECTSIZ-OFFS)) $((SIZE/SECTSIZ-OFFS)) linear $DEV2 $OFFS" + +SPAN1=kpt +SPAN2=$(basename $LO2) +dmsetup create $SPAN1 <<<"$TABLE" +push_cleanup 'dmsetup remove -f $SPAN1' + +dmsetup create $SPAN2 <<<"$TABLE" +push_cleanup 'dmsetup remove -f $SPAN2' + +# This is a non-kpartx pseudo "partition" mapping +USER1=user1 +push_cleanup 'dmsetup remove -f $USER1' +dmsetup create $USER1 <&2 +usleep $WAIT_US + +[[ -b $SPAN2P1 ]] +[[ -b $LO1P1 ]] +[[ -b $LO2P1 ]] +[[ ! -b $SPAN1P1 ]] + +$KPARTX $KPARTX_OPTS -d /dev/mapper/$SPAN2 +usleep $WAIT_US + +[[ -b $LO1P1 ]] +[[ -b $LO2P1 ]] +[[ ! -b $SPAN2P1 ]] + +step "rename partitions on loop device" +$KPARTX $KPARTX_OPTS -u -p -spam $LO2 +[[ ! -b ${LO2P1} ]] +[[ -b ${LO2P1//-foo/-spam} ]] + +step "rename partitions on loop device back" +$KPARTX $KPARTX_OPTS -u -p -foo $LO2 +[[ -b ${LO2P1} ]] +[[ ! -b ${LO2P1//-foo/-spam} ]] + +step "rename partitions on loop device to default" +$KPARTX $KPARTX_OPTS -u $LO2 +#read a +[[ ! -b ${LO2P1} ]] +# $LO1 ends in a digit +[[ -b ${LO2P1//-foo/p} ]] + +step "rename partitions on loop device back from default" +$KPARTX $KPARTX_OPTS -u -p -foo $LO2 +[[ -b ${LO2P1} ]] +[[ ! -b ${LO2P1//-foo/p} ]] + +step "rename partitions on loop devices" +$KPARTX $KPARTX_OPTS -u -p spam $LO2 + +step "delete partitions on loop devices" + +$KPARTX $KPARTX_OPTS -d $LO3 + +# This will also delete the loop device +$KPARTX $KPARTX_OPTS -d $FILE2 +$KPARTX $KPARTX_OPTS -d $LO1 +usleep $WAIT_US + +# ls -l /dev/mapper +[[ ! -b $LO1P1 ]] +pop_cleanup +[[ ! -b $LO2P1 ]] +pop_cleanup +# spans should not have been removed +[[ -b /dev/mapper/$SPAN1 ]] +[[ -b /dev/mapper/$SPAN2 ]] +[[ -b /dev/mapper/$USER1 ]] +# LVs neither +[[ -b /dev/mapper/$VG-$LV ]] + +step "delete partitions on $LO3 with -f" + +$KPARTX $KPARTX_OPTS -f -d $LO3 +# -d -f should delete the LV, too +[[ ! -b /dev/mapper/$VG-$LV ]] +[[ -b /dev/mapper/$SPAN1 ]] +[[ -b /dev/mapper/$SPAN2 ]] + +step "test kpartx creation/deletion on an image file with no existing loopdev" +losetup -d $LO4 + +OUTPUT=$($KPARTX $KPARTX_OPTS -v -a $FILE4 2>&1) +read loop dm < \ + <(sed -n 's/^add map \(loop[0-9]*\)p1 ([0-9]*:\([0-9]*\)).*$/\1 dm-\2/p' \ + <<<$OUTPUT) +[[ $dm && $loop ]] +push_cleanup "dmsetup remove -f /dev/$dm" +push_cleanup "losetup -d /dev/$loop" + +[[ -b /dev/mapper/${loop}p1 ]] +$KPARTX -d $KPARTX_OPTS $FILE4 +[[ ! -b /dev/mapper/${loop}p1 ]] +# /dev/$loop is _not_ automatically deleted +[[ -b /dev/${loop} ]] + +OK=yes diff --git a/kpartx/unixware.c b/kpartx/unixware.c new file mode 100644 index 0000000..2f663af --- /dev/null +++ b/kpartx/unixware.c @@ -0,0 +1,83 @@ +#include "kpartx.h" +#include + +#define UNIXWARE_FS_UNUSED 0 +#define UNIXWARE_NUMSLICE 16 +#define UNIXWARE_DISKMAGIC (0xCA5E600D) +#define UNIXWARE_DISKMAGIC2 (0x600DDEEE) + +struct unixware_slice { + unsigned short s_label; /* label */ + unsigned short s_flags; /* permission flags */ + unsigned int start_sect; /* starting sector */ + unsigned int nr_sects; /* number of sectors in slice */ +}; + +struct unixware_disklabel { + unsigned int d_type; /* drive type */ + unsigned char d_magic[4]; /* the magic number */ + unsigned int d_version; /* version number */ + char d_serial[12]; /* serial number of the device */ + unsigned int d_ncylinders; /* # of data cylinders per device */ + unsigned int d_ntracks; /* # of tracks per cylinder */ + unsigned int d_nsectors; /* # of data sectors per track */ + unsigned int d_secsize; /* # of bytes per sector */ + unsigned int d_part_start; /* # of first sector of this partition */ + unsigned int d_unknown1[12]; /* ? */ + unsigned int d_alt_tbl; /* byte offset of alternate table */ + unsigned int d_alt_len; /* byte length of alternate table */ + unsigned int d_phys_cyl; /* # of physical cylinders per device */ + unsigned int d_phys_trk; /* # of physical tracks per cylinder */ + unsigned int d_phys_sec; /* # of physical sectors per track */ + unsigned int d_phys_bytes; /* # of physical bytes per sector */ + unsigned int d_unknown2; /* ? */ + unsigned int d_unknown3; /* ? */ + unsigned int d_pad[8]; /* pad */ + + struct unixware_vtoc { + unsigned char v_magic[4]; /* the magic number */ + unsigned int v_version; /* version number */ + char v_name[8]; /* volume name */ + unsigned short v_nslices; /* # of slices */ + unsigned short v_unknown1; /* ? */ + unsigned int v_reserved[10]; /* reserved */ + struct unixware_slice + v_slice[UNIXWARE_NUMSLICE]; /* slice headers */ + } vtoc; + +}; /* 408 */ + +int +read_unixware_pt(int fd, struct slice all, struct slice *sp, unsigned int ns) { + struct unixware_disklabel *l; + struct unixware_slice *p; + unsigned int offset = all.start; + char *bp; + unsigned int n = 0; + + bp = getblock(fd, offset+29); /* 1 sector suffices */ + if (bp == NULL) + return -1; + + l = (struct unixware_disklabel *) bp; + if (four2int(l->d_magic) != UNIXWARE_DISKMAGIC || + four2int(l->vtoc.v_magic) != UNIXWARE_DISKMAGIC2) + return -1; + + p = &l->vtoc.v_slice[1]; /* slice 0 is the whole disk. */ + while (p - &l->vtoc.v_slice[0] < UNIXWARE_NUMSLICE) { + if (p->s_label == UNIXWARE_FS_UNUSED) + /* nothing */; + else if (n < ns) { + sp[n].start = p->start_sect; + sp[n].size = p->nr_sects; + n++; + } else { + fprintf(stderr, + "unixware_partition: too many slices\n"); + break; + } + p++; + } + return n; +} diff --git a/kpartx/xstrncpy.c b/kpartx/xstrncpy.c new file mode 100644 index 0000000..7975426 --- /dev/null +++ b/kpartx/xstrncpy.c @@ -0,0 +1,10 @@ +/* NUL-terminated version of strncpy() */ +#include +#include "xstrncpy.h" + +/* caller guarantees n > 0 */ +void +xstrncpy(char *dest, const char *src, size_t n) { + strncpy(dest, src, n-1); + dest[n-1] = 0; +} diff --git a/kpartx/xstrncpy.h b/kpartx/xstrncpy.h new file mode 100644 index 0000000..05c8fa2 --- /dev/null +++ b/kpartx/xstrncpy.h @@ -0,0 +1 @@ +extern void xstrncpy(char *dest, const char *src, size_t n); diff --git a/libdmmp/DEV_NOTES b/libdmmp/DEV_NOTES new file mode 100644 index 0000000..3460cdf --- /dev/null +++ b/libdmmp/DEV_NOTES @@ -0,0 +1,41 @@ +== Planned features == + * Expose all properties used by /usr/bin/multipath + +== Code style == + * Keep things as simple as possible. + * Linux Kernel code style. + * Don't use typedef. + * Don't use enum. + * We are not smarter than API user, so don't create wrapping function like: + + ``` + dmmp_mpath_search_by_id(struct dmmp_context *ctx, + struct dmmp_mpath **dmmp_mp, + uint32_t dmmp_mp_count, const char *id) + + dmmp_path_group_id_search(struct dmmp_mpath *dmmp_mp, + const char *blk_name) + ``` + * The performance is the same for query single mpath and query all mpaths, + so no `dmmp_mpath_of_wwid(struct dmmp_context *ctx, const char *wwid)` yet. + +== Naming scheme == + * Public constants should be named as `DMMP_XXX_YYY`. + * Public functions should be named as `dmmp__`. + * Private constants should be named as `_DMMP_XXX_YYY`. + * Private functions should be named as `_dmmp__`. + +== Code Layout == + * libdmmp_private.h + Internal functions or macros. + * libdmmp.c + Handling multipathd IPC and generate dmmp_context and + dmmp_mpath_array_get(). + * libdmmp_mp.c + For `struct dmmp_mpath` + * libdmmp_pg.c + For `struct dmmp_path_group` + * libdmmp_path.c + For `struct dmmp_path` + * libdmmp_misc.c + Misc functions. diff --git a/libdmmp/Makefile b/libdmmp/Makefile new file mode 100644 index 0000000..1dd3f34 --- /dev/null +++ b/libdmmp/Makefile @@ -0,0 +1,91 @@ +# Makefile +# +# Copyright (C) 2015 - 2016 Red Hat, Inc. +# Gris Ge +# +include ../Makefile.inc + +LIBDMMP_VERSION=0.2.0 +SONAME=$(LIBDMMP_VERSION) +DEVLIB = libdmmp.so +LIBS = $(DEVLIB).$(SONAME) +PKGFILE = libdmmp.pc +EXTRA_MAN_FILES = libdmmp.h.3 +HEADERS = libdmmp/libdmmp.h + +OBJS = libdmmp.o libdmmp_mp.o libdmmp_pg.o libdmmp_path.o libdmmp_misc.o + +CFLAGS += $(LIB_CFLAGS) -fvisibility=hidden -I$(libdmmpdir) -I$(mpathcmddir) \ + $(shell pkg-config --cflags json-c) + +LIBDEPS += $(shell pkg-config --libs json-c) -L$(mpathcmddir) -lmpathcmd -lpthread + +all: $(LIBS) doc + +$(LIBS): $(OBJS) + $(CC) $(LDFLAGS) $(SHARED_FLAGS) -Wl,-soname=$@ -o $@ $(OBJS) $(LIBDEPS) + $(LN) $@ $(DEVLIB) + +install: + mkdir -p $(DESTDIR)$(usrlibdir) + $(INSTALL_PROGRAM) -m 755 $(LIBS) $(DESTDIR)$(usrlibdir)/$(LIBS) + $(INSTALL_PROGRAM) -m 644 -D \ + $(HEADERS) $(DESTDIR)$(includedir)/$(HEADERS) + $(LN) $(LIBS) $(DESTDIR)$(usrlibdir)/$(DEVLIB) + $(INSTALL_PROGRAM) -m 644 -D \ + $(PKGFILE).in $(DESTDIR)$(pkgconfdir)/$(PKGFILE) + perl -i -pe 's|__VERSION__|$(LIBDMMP_VERSION)|g' \ + $(DESTDIR)$(pkgconfdir)/$(PKGFILE) + perl -i -pe 's|__LIBDIR__|$(usrlibdir)|g' \ + $(DESTDIR)$(pkgconfdir)/$(PKGFILE) + perl -i -pe 's|__INCLUDEDIR__|$(includedir)|g' \ + $(DESTDIR)$(pkgconfdir)/$(PKGFILE) + @for file in docs/man/*.3.gz; do \ + $(INSTALL_PROGRAM) -m 644 -D \ + $$file \ + $(DESTDIR)$(man3dir)/ || exit $?; \ + done + +uninstall: + $(RM) $(DESTDIR)$(usrlibdir)/$(LIBS) + $(RM) $(DESTDIR)$(includedir)/$(HEADERS) + $(RM) $(DESTDIR)$(usrlibdir)/$(DEVLIB) + @for file in $(DESTDIR)$(man3dir)/dmmp_*; do \ + $(RM) $$file; \ + done + $(RM) $(DESTDIR)$(man3dir)/libdmmp.h* + $(RM) $(DESTDIR)$(pkgconfdir)/$(PKGFILE) + +clean: dep_clean + $(RM) core *.a *.o *.gz *.so *.so.* + $(RM) -r docs/man + $(MAKE) -C test clean + +include $(wildcard $(OBJS:.o=.d)) + +check: all + $(MAKE) -C test check + +speed_test: all + $(MAKE) -C test speed_test + +doc: docs/man/$(EXTRA_MAN_FILES).gz + +TEMPFILE := $(shell mktemp) + +docs/man/$(EXTRA_MAN_FILES).gz: $(HEADERS) + @for file in $(EXTRA_MAN_FILES); do \ + $(INSTALL_PROGRAM) -v -m 644 -D docs/$$file docs/man/$$file; \ + done + cat $(HEADERS) | \ + perl docs/doc-preclean.pl > "$(TEMPFILE)" + perl docs/kernel-doc -man "$(TEMPFILE)" | \ + perl docs/split-man.pl docs/man + -rm -f "$(TEMPFILE)" + @for file in docs/man/*.3; do \ + gzip -f $$file; \ + done + find docs/man -type f -name \*[0-9].gz + +dep_clean: + $(RM) $(OBJS:.o=.d) diff --git a/libdmmp/docs/doc-preclean.pl b/libdmmp/docs/doc-preclean.pl new file mode 100755 index 0000000..9a9a4ce --- /dev/null +++ b/libdmmp/docs/doc-preclean.pl @@ -0,0 +1,28 @@ +#!/usr/bin/perl +# Copyright (C) 2016 Red Hat, Inc. +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . +# +# Author: Gris Ge + +use strict; + +my @REMOVE_KEY_LIST=("DMMP_DLL_EXPORT"); + +while (<>) { + for my $key (@REMOVE_KEY_LIST) { + (s/$key//g); + } + print; +} diff --git a/libdmmp/docs/kernel-doc b/libdmmp/docs/kernel-doc new file mode 100755 index 0000000..8f0f508 --- /dev/null +++ b/libdmmp/docs/kernel-doc @@ -0,0 +1,2222 @@ +#!/usr/bin/env perl +# SPDX-License-Identifier: GPL-2.0 + +use warnings; +use strict; + +## Copyright (c) 1998 Michael Zucchi, All Rights Reserved ## +## Copyright (C) 2000, 1 Tim Waugh ## +## Copyright (C) 2001 Simon Huggins ## +## Copyright (C) 2005-2012 Randy Dunlap ## +## Copyright (C) 2012 Dan Luedtke ## +## ## +## #define enhancements by Armin Kuster ## +## Copyright (c) 2000 MontaVista Software, Inc. ## +## ## +## This software falls under the GNU General Public License. ## +## Please read the COPYING file for more information ## + +# 18/01/2001 - Cleanups +# Functions prototyped as foo(void) same as foo() +# Stop eval'ing where we don't need to. +# -- huggie@earth.li + +# 27/06/2001 - Allowed whitespace after initial "/**" and +# allowed comments before function declarations. +# -- Christian Kreibich + +# Still to do: +# - add perldoc documentation +# - Look more closely at some of the scarier bits :) + +# 26/05/2001 - Support for separate source and object trees. +# Return error code. +# Keith Owens + +# 23/09/2001 - Added support for typedefs, structs, enums and unions +# Support for Context section; can be terminated using empty line +# Small fixes (like spaces vs. \s in regex) +# -- Tim Jansen + +# 25/07/2012 - Added support for HTML5 +# -- Dan Luedtke + +sub usage { + my $message = <<"EOF"; +Usage: $0 [OPTION ...] FILE ... + +Read C language source or header FILEs, extract embedded documentation comments, +and print formatted documentation to standard output. + +The documentation comments are identified by "/**" opening comment mark. See +Documentation/doc-guide/kernel-doc.rst for the documentation comment syntax. + +Output format selection (mutually exclusive): + -man Output troff manual page format. This is the default. + -rst Output reStructuredText format. + -none Do not output documentation, only warnings. + +Output selection (mutually exclusive): + -export Only output documentation for symbols that have been + exported using EXPORT_SYMBOL() or EXPORT_SYMBOL_GPL() + in any input FILE or -export-file FILE. + -internal Only output documentation for symbols that have NOT been + exported using EXPORT_SYMBOL() or EXPORT_SYMBOL_GPL() + in any input FILE or -export-file FILE. + -function NAME Only output documentation for the given function(s) + or DOC: section title(s). All other functions and DOC: + sections are ignored. May be specified multiple times. + -nofunction NAME Do NOT output documentation for the given function(s); + only output documentation for the other functions and + DOC: sections. May be specified multiple times. + +Output selection modifiers: + -no-doc-sections Do not output DOC: sections. + -enable-lineno Enable output of #define LINENO lines. Only works with + reStructuredText format. + -export-file FILE Specify an additional FILE in which to look for + EXPORT_SYMBOL() and EXPORT_SYMBOL_GPL(). To be used with + -export or -internal. May be specified multiple times. + +Other parameters: + -v Verbose output, more warnings and other information. + -h Print this help. + +EOF + print $message; + exit 1; +} + +# +# format of comments. +# In the following table, (...)? signifies optional structure. +# (...)* signifies 0 or more structure elements +# /** +# * function_name(:)? (- short description)? +# (* @parameterx: (description of parameter x)?)* +# (* a blank line)? +# * (Description:)? (Description of function)? +# * (section header: (section description)? )* +# (*)?*/ +# +# So .. the trivial example would be: +# +# /** +# * my_function +# */ +# +# If the Description: header tag is omitted, then there must be a blank line +# after the last parameter specification. +# e.g. +# /** +# * my_function - does my stuff +# * @my_arg: its mine damnit +# * +# * Does my stuff explained. +# */ +# +# or, could also use: +# /** +# * my_function - does my stuff +# * @my_arg: its mine damnit +# * Description: Does my stuff explained. +# */ +# etc. +# +# Besides functions you can also write documentation for structs, unions, +# enums and typedefs. Instead of the function name you must write the name +# of the declaration; the struct/union/enum/typedef must always precede +# the name. Nesting of declarations is not supported. +# Use the argument mechanism to document members or constants. +# e.g. +# /** +# * struct my_struct - short description +# * @a: first member +# * @b: second member +# * +# * Longer description +# */ +# struct my_struct { +# int a; +# int b; +# /* private: */ +# int c; +# }; +# +# All descriptions can be multiline, except the short function description. +# +# For really longs structs, you can also describe arguments inside the +# body of the struct. +# eg. +# /** +# * struct my_struct - short description +# * @a: first member +# * @b: second member +# * +# * Longer description +# */ +# struct my_struct { +# int a; +# int b; +# /** +# * @c: This is longer description of C +# * +# * You can use paragraphs to describe arguments +# * using this method. +# */ +# int c; +# }; +# +# This should be use only for struct/enum members. +# +# You can also add additional sections. When documenting kernel functions you +# should document the "Context:" of the function, e.g. whether the functions +# can be called form interrupts. Unlike other sections you can end it with an +# empty line. +# A non-void function should have a "Return:" section describing the return +# value(s). +# Example-sections should contain the string EXAMPLE so that they are marked +# appropriately in DocBook. +# +# Example: +# /** +# * user_function - function that can only be called in user context +# * @a: some argument +# * Context: !in_interrupt() +# * +# * Some description +# * Example: +# * user_function(22); +# */ +# ... +# +# +# All descriptive text is further processed, scanning for the following special +# patterns, which are highlighted appropriately. +# +# 'funcname()' - function +# '$ENVVAR' - environmental variable +# '&struct_name' - name of a structure (up to two words including 'struct') +# '&struct_name.member' - name of a structure member +# '@parameter' - name of a parameter +# '%CONST' - name of a constant. +# '``LITERAL``' - literal string without any spaces on it. + +## init lots of data + +my $errors = 0; +my $warnings = 0; +my $anon_struct_union = 0; + +# match expressions used to find embedded type information +my $type_constant = '\b``([^\`]+)``\b'; +my $type_constant2 = '\%([-_\w]+)'; +my $type_func = '(\w+)\(\)'; +my $type_param = '\@(\w*(\.\w+)*(\.\.\.)?)'; +my $type_fp_param = '\@(\w+)\(\)'; # Special RST handling for func ptr params +my $type_env = '(\$\w+)'; +my $type_enum = '\&(enum\s*([_\w]+))'; +my $type_struct = '\&(struct\s*([_\w]+))'; +my $type_typedef = '\&(typedef\s*([_\w]+))'; +my $type_union = '\&(union\s*([_\w]+))'; +my $type_member = '\&([_\w]+)(\.|->)([_\w]+)'; +my $type_fallback = '\&([_\w]+)'; +my $type_member_func = $type_member . '\(\)'; + +# Output conversion substitutions. +# One for each output format + +# these are pretty rough +my @highlights_man = ( + [$type_constant, "\$1"], + [$type_constant2, "\$1"], + [$type_func, "\\\\fB\$1\\\\fP"], + [$type_enum, "\\\\fI\$1\\\\fP"], + [$type_struct, "\\\\fI\$1\\\\fP"], + [$type_typedef, "\\\\fI\$1\\\\fP"], + [$type_union, "\\\\fI\$1\\\\fP"], + [$type_param, "\\\\fI\$1\\\\fP"], + [$type_member, "\\\\fI\$1\$2\$3\\\\fP"], + [$type_fallback, "\\\\fI\$1\\\\fP"] + ); +my $blankline_man = ""; + +# rst-mode +my @highlights_rst = ( + [$type_constant, "``\$1``"], + [$type_constant2, "``\$1``"], + # Note: need to escape () to avoid func matching later + [$type_member_func, "\\:c\\:type\\:`\$1\$2\$3\\\\(\\\\) <\$1>`"], + [$type_member, "\\:c\\:type\\:`\$1\$2\$3 <\$1>`"], + [$type_fp_param, "**\$1\\\\(\\\\)**"], + [$type_func, "\\:c\\:func\\:`\$1()`"], + [$type_enum, "\\:c\\:type\\:`\$1 <\$2>`"], + [$type_struct, "\\:c\\:type\\:`\$1 <\$2>`"], + [$type_typedef, "\\:c\\:type\\:`\$1 <\$2>`"], + [$type_union, "\\:c\\:type\\:`\$1 <\$2>`"], + # in rst this can refer to any type + [$type_fallback, "\\:c\\:type\\:`\$1`"], + [$type_param, "**\$1**"] + ); +my $blankline_rst = "\n"; + +# read arguments +if ($#ARGV == -1) { + usage(); +} + +my $kernelversion; +my $dohighlight = ""; + +my $verbose = 0; +my $output_mode = "rst"; +my $output_preformatted = 0; +my $no_doc_sections = 0; +my $enable_lineno = 0; +my @highlights = @highlights_rst; +my $blankline = $blankline_rst; +my $modulename = "Kernel API"; + +use constant { + OUTPUT_ALL => 0, # output all symbols and doc sections + OUTPUT_INCLUDE => 1, # output only specified symbols + OUTPUT_EXCLUDE => 2, # output everything except specified symbols + OUTPUT_EXPORTED => 3, # output exported symbols + OUTPUT_INTERNAL => 4, # output non-exported symbols +}; +my $output_selection = OUTPUT_ALL; +my $show_not_found = 0; + +my @export_file_list; + +my @build_time; +if (defined($ENV{'KBUILD_BUILD_TIMESTAMP'}) && + (my $seconds = `date -d"${ENV{'KBUILD_BUILD_TIMESTAMP'}}" +%s`) ne '') { + @build_time = gmtime($seconds); +} else { + @build_time = localtime; +} + +my $man_date = ('January', 'February', 'March', 'April', 'May', 'June', + 'July', 'August', 'September', 'October', + 'November', 'December')[$build_time[4]] . + " " . ($build_time[5]+1900); + +# Essentially these are globals. +# They probably want to be tidied up, made more localised or something. +# CAVEAT EMPTOR! Some of the others I localised may not want to be, which +# could cause "use of undefined value" or other bugs. +my ($function, %function_table, %parametertypes, $declaration_purpose); +my $declaration_start_line; +my ($type, $declaration_name, $return_type); +my ($newsection, $newcontents, $prototype, $brcount, %source_map); + +if (defined($ENV{'KBUILD_VERBOSE'})) { + $verbose = "$ENV{'KBUILD_VERBOSE'}"; +} + +# Generated docbook code is inserted in a template at a point where +# docbook v3.1 requires a non-zero sequence of RefEntry's; see: +# http://www.oasis-open.org/docbook/documentation/reference/html/refentry.html +# We keep track of number of generated entries and generate a dummy +# if needs be to ensure the expanded template can be postprocessed +# into html. +my $section_counter = 0; + +my $lineprefix=""; + +# Parser states +use constant { + STATE_NORMAL => 0, # normal code + STATE_NAME => 1, # looking for function name + STATE_BODY_MAYBE => 2, # body - or maybe more description + STATE_BODY => 3, # the body of the comment + STATE_PROTO => 4, # scanning prototype + STATE_DOCBLOCK => 5, # documentation block + STATE_INLINE => 6, # gathering documentation outside main block +}; +my $state; +my $in_doc_sect; +my $leading_space; + +# Inline documentation state +use constant { + STATE_INLINE_NA => 0, # not applicable ($state != STATE_INLINE) + STATE_INLINE_NAME => 1, # looking for member name (@foo:) + STATE_INLINE_TEXT => 2, # looking for member documentation + STATE_INLINE_END => 3, # done + STATE_INLINE_ERROR => 4, # error - Comment without header was found. + # Spit a warning as it's not + # proper kernel-doc and ignore the rest. +}; +my $inline_doc_state; + +#declaration types: can be +# 'function', 'struct', 'union', 'enum', 'typedef' +my $decl_type; + +my $doc_start = '^/\*\*\s*$'; # Allow whitespace at end of comment start. +my $doc_end = '\*/'; +my $doc_com = '\s*\*\s*'; +my $doc_com_body = '\s*\* ?'; +my $doc_decl = $doc_com . '(\w+)'; +# @params and a strictly limited set of supported section names +my $doc_sect = $doc_com . + '\s*(\@[.\w]+|\@\.\.\.|description|context|returns?|notes?|examples?)\s*:(.*)'; +my $doc_content = $doc_com_body . '(.*)'; +my $doc_block = $doc_com . 'DOC:\s*(.*)?'; +my $doc_inline_start = '^\s*/\*\*\s*$'; +my $doc_inline_sect = '\s*\*\s*(@\s*[\w][\w\.]*\s*):(.*)'; +my $doc_inline_end = '^\s*\*/\s*$'; +my $doc_inline_oneline = '^\s*/\*\*\s*(@[\w\s]+):\s*(.*)\s*\*/\s*$'; +my $export_symbol = '^\s*EXPORT_SYMBOL(_GPL)?\s*\(\s*(\w+)\s*\)\s*;'; + +my %parameterdescs; +my %parameterdesc_start_lines; +my @parameterlist; +my %sections; +my @sectionlist; +my %section_start_lines; +my $sectcheck; +my $struct_actual; + +my $contents = ""; +my $new_start_line = 0; + +# the canonical section names. see also $doc_sect above. +my $section_default = "Description"; # default section +my $section_intro = "Introduction"; +my $section = $section_default; +my $section_context = "Context"; +my $section_return = "Return"; + +my $undescribed = "-- undescribed --"; + +reset_state(); + +while ($ARGV[0] =~ m/^--?(.*)/) { + my $cmd = $1; + shift @ARGV; + if ($cmd eq "man") { + $output_mode = "man"; + @highlights = @highlights_man; + $blankline = $blankline_man; + } elsif ($cmd eq "rst") { + $output_mode = "rst"; + @highlights = @highlights_rst; + $blankline = $blankline_rst; + } elsif ($cmd eq "none") { + $output_mode = "none"; + } elsif ($cmd eq "module") { # not needed for XML, inherits from calling document + $modulename = shift @ARGV; + } elsif ($cmd eq "function") { # to only output specific functions + $output_selection = OUTPUT_INCLUDE; + $function = shift @ARGV; + $function_table{$function} = 1; + } elsif ($cmd eq "nofunction") { # output all except specific functions + $output_selection = OUTPUT_EXCLUDE; + $function = shift @ARGV; + $function_table{$function} = 1; + } elsif ($cmd eq "export") { # only exported symbols + $output_selection = OUTPUT_EXPORTED; + %function_table = (); + } elsif ($cmd eq "internal") { # only non-exported symbols + $output_selection = OUTPUT_INTERNAL; + %function_table = (); + } elsif ($cmd eq "export-file") { + my $file = shift @ARGV; + push(@export_file_list, $file); + } elsif ($cmd eq "v") { + $verbose = 1; + } elsif (($cmd eq "h") || ($cmd eq "help")) { + usage(); + } elsif ($cmd eq 'no-doc-sections') { + $no_doc_sections = 1; + } elsif ($cmd eq 'enable-lineno') { + $enable_lineno = 1; + } elsif ($cmd eq 'show-not-found') { + $show_not_found = 1; + } else { + # Unknown argument + usage(); + } +} + +# continue execution near EOF; + +# get kernel version from env +sub get_kernel_version() { + my $version = 'unknown kernel version'; + + if (defined($ENV{'KERNELVERSION'})) { + $version = $ENV{'KERNELVERSION'}; + } + return $version; +} + +# +sub print_lineno { + my $lineno = shift; + if ($enable_lineno && defined($lineno)) { + print "#define LINENO " . $lineno . "\n"; + } +} +## +# dumps section contents to arrays/hashes intended for that purpose. +# +sub dump_section { + my $file = shift; + my $name = shift; + my $contents = join "\n", @_; + + if ($name =~ m/$type_param/) { + $name = $1; + $parameterdescs{$name} = $contents; + $sectcheck = $sectcheck . $name . " "; + $parameterdesc_start_lines{$name} = $new_start_line; + $new_start_line = 0; + } elsif ($name eq "@\.\.\.") { + $name = "..."; + $parameterdescs{$name} = $contents; + $sectcheck = $sectcheck . $name . " "; + $parameterdesc_start_lines{$name} = $new_start_line; + $new_start_line = 0; + } else { + if (defined($sections{$name}) && ($sections{$name} ne "")) { + # Only warn on user specified duplicate section names. + if ($name ne $section_default) { + print STDERR "${file}:$.: warning: duplicate section name '$name'\n"; + ++$warnings; + } + $sections{$name} .= $contents; + } else { + $sections{$name} = $contents; + push @sectionlist, $name; + $section_start_lines{$name} = $new_start_line; + $new_start_line = 0; + } + } +} + +## +# dump DOC: section after checking that it should go out +# +sub dump_doc_section { + my $file = shift; + my $name = shift; + my $contents = join "\n", @_; + + if ($no_doc_sections) { + return; + } + + if (($output_selection == OUTPUT_ALL) || + ($output_selection == OUTPUT_INCLUDE && + defined($function_table{$name})) || + ($output_selection == OUTPUT_EXCLUDE && + !defined($function_table{$name}))) + { + dump_section($file, $name, $contents); + output_blockhead({'sectionlist' => \@sectionlist, + 'sections' => \%sections, + 'module' => $modulename, + 'content-only' => ($output_selection != OUTPUT_ALL), }); + } +} + +## +# output function +# +# parameterdescs, a hash. +# function => "function name" +# parameterlist => @list of parameters +# parameterdescs => %parameter descriptions +# sectionlist => @list of sections +# sections => %section descriptions +# + +sub output_highlight { + my $contents = join "\n",@_; + my $line; + +# DEBUG +# if (!defined $contents) { +# use Carp; +# confess "output_highlight got called with no args?\n"; +# } + +# print STDERR "contents b4:$contents\n"; + eval $dohighlight; + die $@ if $@; +# print STDERR "contents af:$contents\n"; + + foreach $line (split "\n", $contents) { + if (! $output_preformatted) { + $line =~ s/^\s*//; + } + if ($line eq ""){ + if (! $output_preformatted) { + print $lineprefix, $blankline; + } + } else { + if ($output_mode eq "man" && substr($line, 0, 1) eq ".") { + print "\\&$line"; + } else { + print $lineprefix, $line; + } + } + print "\n"; + } +} + +## +# output function in man +sub output_function_man(%) { + my %args = %{$_[0]}; + my ($parameter, $section); + my $count; + + print ".TH \"$args{'function'}\" 9 \"$args{'function'}\" \"$man_date\" \"Kernel Hacker's Manual\" LINUX\n"; + + print ".SH NAME\n"; + print $args{'function'} . " \\- " . $args{'purpose'} . "\n"; + + print ".SH SYNOPSIS\n"; + if ($args{'functiontype'} ne "") { + print ".B \"" . $args{'functiontype'} . "\" " . $args{'function'} . "\n"; + } else { + print ".B \"" . $args{'function'} . "\n"; + } + $count = 0; + my $parenth = "("; + my $post = ","; + foreach my $parameter (@{$args{'parameterlist'}}) { + if ($count == $#{$args{'parameterlist'}}) { + $post = ");"; + } + $type = $args{'parametertypes'}{$parameter}; + if ($type =~ m/([^\(]*\(\*)\s*\)\s*\(([^\)]*)\)/) { + # pointer-to-function + print ".BI \"" . $parenth . $1 . "\" " . $parameter . " \") (" . $2 . ")" . $post . "\"\n"; + } else { + $type =~ s/([^\*])$/$1 /; + print ".BI \"" . $parenth . $type . "\" " . $parameter . " \"" . $post . "\"\n"; + } + $count++; + $parenth = ""; + } + + print ".SH ARGUMENTS\n"; + foreach $parameter (@{$args{'parameterlist'}}) { + my $parameter_name = $parameter; + $parameter_name =~ s/\[.*//; + + print ".IP \"" . $parameter . "\" 12\n"; + output_highlight($args{'parameterdescs'}{$parameter_name}); + } + foreach $section (@{$args{'sectionlist'}}) { + print ".SH \"", uc $section, "\"\n"; + output_highlight($args{'sections'}{$section}); + } +} + +## +# output enum in man +sub output_enum_man(%) { + my %args = %{$_[0]}; + my ($parameter, $section); + my $count; + + print ".TH \"$args{'module'}\" 9 \"enum $args{'enum'}\" \"$man_date\" \"API Manual\" LINUX\n"; + + print ".SH NAME\n"; + print "enum " . $args{'enum'} . " \\- " . $args{'purpose'} . "\n"; + + print ".SH SYNOPSIS\n"; + print "enum " . $args{'enum'} . " {\n"; + $count = 0; + foreach my $parameter (@{$args{'parameterlist'}}) { + print ".br\n.BI \" $parameter\"\n"; + if ($count == $#{$args{'parameterlist'}}) { + print "\n};\n"; + last; + } + else { + print ", \n.br\n"; + } + $count++; + } + + print ".SH Constants\n"; + foreach $parameter (@{$args{'parameterlist'}}) { + my $parameter_name = $parameter; + $parameter_name =~ s/\[.*//; + + print ".IP \"" . $parameter . "\" 12\n"; + output_highlight($args{'parameterdescs'}{$parameter_name}); + } + foreach $section (@{$args{'sectionlist'}}) { + print ".SH \"$section\"\n"; + output_highlight($args{'sections'}{$section}); + } +} + +## +# output struct in man +sub output_struct_man(%) { + my %args = %{$_[0]}; + my ($parameter, $section); + + print ".TH \"$args{'module'}\" 9 \"" . $args{'type'} . " " . $args{'struct'} . "\" \"$man_date\" \"API Manual\" LINUX\n"; + + print ".SH NAME\n"; + print $args{'type'} . " " . $args{'struct'} . " \\- " . $args{'purpose'} . "\n"; + + my $declaration = $args{'definition'}; + $declaration =~ s/\t/ /g; + $declaration =~ s/\n/"\n.br\n.BI \"/g; + print ".SH SYNOPSIS\n"; + print $args{'type'} . " " . $args{'struct'} . " {\n.br\n"; + print ".BI \"$declaration\n};\n.br\n\n"; + + print ".SH Members\n"; + foreach $parameter (@{$args{'parameterlist'}}) { + ($parameter =~ /^#/) && next; + + my $parameter_name = $parameter; + $parameter_name =~ s/\[.*//; + + ($args{'parameterdescs'}{$parameter_name} ne $undescribed) || next; + print ".IP \"" . $parameter . "\" 12\n"; + output_highlight($args{'parameterdescs'}{$parameter_name}); + } + foreach $section (@{$args{'sectionlist'}}) { + print ".SH \"$section\"\n"; + output_highlight($args{'sections'}{$section}); + } +} + +## +# output typedef in man +sub output_typedef_man(%) { + my %args = %{$_[0]}; + my ($parameter, $section); + + print ".TH \"$args{'module'}\" 9 \"$args{'typedef'}\" \"$man_date\" \"API Manual\" LINUX\n"; + + print ".SH NAME\n"; + print "typedef " . $args{'typedef'} . " \\- " . $args{'purpose'} . "\n"; + + foreach $section (@{$args{'sectionlist'}}) { + print ".SH \"$section\"\n"; + output_highlight($args{'sections'}{$section}); + } +} + +sub output_blockhead_man(%) { + my %args = %{$_[0]}; + my ($parameter, $section); + my $count; + + print ".TH \"$args{'module'}\" 9 \"$args{'module'}\" \"$man_date\" \"API Manual\" LINUX\n"; + + foreach $section (@{$args{'sectionlist'}}) { + print ".SH \"$section\"\n"; + output_highlight($args{'sections'}{$section}); + } +} + +## +# output in restructured text +# + +# +# This could use some work; it's used to output the DOC: sections, and +# starts by putting out the name of the doc section itself, but that tends +# to duplicate a header already in the template file. +# +sub output_blockhead_rst(%) { + my %args = %{$_[0]}; + my ($parameter, $section); + + foreach $section (@{$args{'sectionlist'}}) { + if ($output_selection != OUTPUT_INCLUDE) { + print "**$section**\n\n"; + } + print_lineno($section_start_lines{$section}); + output_highlight_rst($args{'sections'}{$section}); + print "\n"; + } +} + +# +# Apply the RST highlights to a sub-block of text. +# +sub highlight_block($) { + # The dohighlight kludge requires the text be called $contents + my $contents = shift; + eval $dohighlight; + die $@ if $@; + return $contents; +} + +# +# Regexes used only here. +# +my $sphinx_literal = '^[^.].*::$'; +my $sphinx_cblock = '^\.\.\ +code-block::'; + +sub output_highlight_rst { + my $input = join "\n",@_; + my $output = ""; + my $line; + my $in_literal = 0; + my $litprefix; + my $block = ""; + + foreach $line (split "\n",$input) { + # + # If we're in a literal block, see if we should drop out + # of it. Otherwise pass the line straight through unmunged. + # + if ($in_literal) { + if (! ($line =~ /^\s*$/)) { + # + # If this is the first non-blank line in a literal + # block we need to figure out what the proper indent is. + # + if ($litprefix eq "") { + $line =~ /^(\s*)/; + $litprefix = '^' . $1; + $output .= $line . "\n"; + } elsif (! ($line =~ /$litprefix/)) { + $in_literal = 0; + } else { + $output .= $line . "\n"; + } + } else { + $output .= $line . "\n"; + } + } + # + # Not in a literal block (or just dropped out) + # + if (! $in_literal) { + $block .= $line . "\n"; + if (($line =~ /$sphinx_literal/) || ($line =~ /$sphinx_cblock/)) { + $in_literal = 1; + $litprefix = ""; + $output .= highlight_block($block); + $block = "" + } + } + } + + if ($block) { + $output .= highlight_block($block); + } + foreach $line (split "\n", $output) { + print $lineprefix . $line . "\n"; + } +} + +sub output_function_rst(%) { + my %args = %{$_[0]}; + my ($parameter, $section); + my $oldprefix = $lineprefix; + my $start = ""; + + if ($args{'typedef'}) { + print ".. c:type:: ". $args{'function'} . "\n\n"; + print_lineno($declaration_start_line); + print " **Typedef**: "; + $lineprefix = ""; + output_highlight_rst($args{'purpose'}); + $start = "\n\n**Syntax**\n\n ``"; + } else { + print ".. c:function:: "; + } + if ($args{'functiontype'} ne "") { + $start .= $args{'functiontype'} . " " . $args{'function'} . " ("; + } else { + $start .= $args{'function'} . " ("; + } + print $start; + + my $count = 0; + foreach my $parameter (@{$args{'parameterlist'}}) { + if ($count ne 0) { + print ", "; + } + $count++; + $type = $args{'parametertypes'}{$parameter}; + + if ($type =~ m/([^\(]*\(\*)\s*\)\s*\(([^\)]*)\)/) { + # pointer-to-function + print $1 . $parameter . ") (" . $2; + } else { + print $type . " " . $parameter; + } + } + if ($args{'typedef'}) { + print ");``\n\n"; + } else { + print ")\n\n"; + print_lineno($declaration_start_line); + $lineprefix = " "; + output_highlight_rst($args{'purpose'}); + print "\n"; + } + + print "**Parameters**\n\n"; + $lineprefix = " "; + foreach $parameter (@{$args{'parameterlist'}}) { + my $parameter_name = $parameter; + $parameter_name =~ s/\[.*//; + $type = $args{'parametertypes'}{$parameter}; + + if ($type ne "") { + print "``$type $parameter``\n"; + } else { + print "``$parameter``\n"; + } + + print_lineno($parameterdesc_start_lines{$parameter_name}); + + if (defined($args{'parameterdescs'}{$parameter_name}) && + $args{'parameterdescs'}{$parameter_name} ne $undescribed) { + output_highlight_rst($args{'parameterdescs'}{$parameter_name}); + } else { + print " *undescribed*\n"; + } + print "\n"; + } + + $lineprefix = $oldprefix; + output_section_rst(@_); +} + +sub output_section_rst(%) { + my %args = %{$_[0]}; + my $section; + my $oldprefix = $lineprefix; + $lineprefix = ""; + + foreach $section (@{$args{'sectionlist'}}) { + print "**$section**\n\n"; + print_lineno($section_start_lines{$section}); + output_highlight_rst($args{'sections'}{$section}); + print "\n"; + } + print "\n"; + $lineprefix = $oldprefix; +} + +sub output_enum_rst(%) { + my %args = %{$_[0]}; + my ($parameter); + my $oldprefix = $lineprefix; + my $count; + my $name = "enum " . $args{'enum'}; + + print "\n\n.. c:type:: " . $name . "\n\n"; + print_lineno($declaration_start_line); + $lineprefix = " "; + output_highlight_rst($args{'purpose'}); + print "\n"; + + print "**Constants**\n\n"; + $lineprefix = " "; + foreach $parameter (@{$args{'parameterlist'}}) { + print "``$parameter``\n"; + if ($args{'parameterdescs'}{$parameter} ne $undescribed) { + output_highlight_rst($args{'parameterdescs'}{$parameter}); + } else { + print " *undescribed*\n"; + } + print "\n"; + } + + $lineprefix = $oldprefix; + output_section_rst(@_); +} + +sub output_typedef_rst(%) { + my %args = %{$_[0]}; + my ($parameter); + my $oldprefix = $lineprefix; + my $name = "typedef " . $args{'typedef'}; + + print "\n\n.. c:type:: " . $name . "\n\n"; + print_lineno($declaration_start_line); + $lineprefix = " "; + output_highlight_rst($args{'purpose'}); + print "\n"; + + $lineprefix = $oldprefix; + output_section_rst(@_); +} + +sub output_struct_rst(%) { + my %args = %{$_[0]}; + my ($parameter); + my $oldprefix = $lineprefix; + my $name = $args{'type'} . " " . $args{'struct'}; + + print "\n\n.. c:type:: " . $name . "\n\n"; + print_lineno($declaration_start_line); + $lineprefix = " "; + output_highlight_rst($args{'purpose'}); + print "\n"; + + print "**Definition**\n\n"; + print "::\n\n"; + my $declaration = $args{'definition'}; + $declaration =~ s/\t/ /g; + print " " . $args{'type'} . " " . $args{'struct'} . " {\n$declaration };\n\n"; + + print "**Members**\n\n"; + $lineprefix = " "; + foreach $parameter (@{$args{'parameterlist'}}) { + ($parameter =~ /^#/) && next; + + my $parameter_name = $parameter; + $parameter_name =~ s/\[.*//; + + ($args{'parameterdescs'}{$parameter_name} ne $undescribed) || next; + $type = $args{'parametertypes'}{$parameter}; + print_lineno($parameterdesc_start_lines{$parameter_name}); + print "``" . $parameter . "``\n"; + output_highlight_rst($args{'parameterdescs'}{$parameter_name}); + print "\n"; + } + print "\n"; + + $lineprefix = $oldprefix; + output_section_rst(@_); +} + +## none mode output functions + +sub output_function_none(%) { +} + +sub output_enum_none(%) { +} + +sub output_typedef_none(%) { +} + +sub output_struct_none(%) { +} + +sub output_blockhead_none(%) { +} + +## +# generic output function for all types (function, struct/union, typedef, enum); +# calls the generated, variable output_ function name based on +# functype and output_mode +sub output_declaration { + no strict 'refs'; + my $name = shift; + my $functype = shift; + my $func = "output_${functype}_$output_mode"; + if (($output_selection == OUTPUT_ALL) || + (($output_selection == OUTPUT_INCLUDE || + $output_selection == OUTPUT_EXPORTED) && + defined($function_table{$name})) || + (($output_selection == OUTPUT_EXCLUDE || + $output_selection == OUTPUT_INTERNAL) && + !($functype eq "function" && defined($function_table{$name})))) + { + &$func(@_); + $section_counter++; + } +} + +## +# generic output function - calls the right one based on current output mode. +sub output_blockhead { + no strict 'refs'; + my $func = "output_blockhead_" . $output_mode; + &$func(@_); + $section_counter++; +} + +## +# takes a declaration (struct, union, enum, typedef) and +# invokes the right handler. NOT called for functions. +sub dump_declaration($$) { + no strict 'refs'; + my ($prototype, $file) = @_; + my $func = "dump_" . $decl_type; + &$func(@_); +} + +sub dump_union($$) { + dump_struct(@_); +} + +sub dump_struct($$) { + my $x = shift; + my $file = shift; + + if ($x =~ /(struct|union)\s+(\w+)\s*\{(.*)\}/) { + my $decl_type = $1; + $declaration_name = $2; + my $members = $3; + + # ignore members marked private: + $members =~ s/\/\*\s*private:.*?\/\*\s*public:.*?\*\///gosi; + $members =~ s/\/\*\s*private:.*//gosi; + # strip comments: + $members =~ s/\/\*.*?\*\///gos; + # strip attributes + $members =~ s/__attribute__\s*\(\([a-z,_\*\s\(\)]*\)\)//i; + $members =~ s/__aligned\s*\([^;]*\)//gos; + $members =~ s/\s*CRYPTO_MINALIGN_ATTR//gos; + # replace DECLARE_BITMAP + $members =~ s/DECLARE_BITMAP\s*\(([^,)]+),\s*([^,)]+)\)/unsigned long $1\[BITS_TO_LONGS($2)\]/gos; + # replace DECLARE_HASHTABLE + $members =~ s/DECLARE_HASHTABLE\s*\(([^,)]+),\s*([^,)]+)\)/unsigned long $1\[1 << (($2) - 1)\]/gos; + # replace DECLARE_KFIFO + $members =~ s/DECLARE_KFIFO\s*\(([^,)]+),\s*([^,)]+),\s*([^,)]+)\)/$2 \*$1/gos; + # replace DECLARE_KFIFO_PTR + $members =~ s/DECLARE_KFIFO_PTR\s*\(([^,)]+),\s*([^,)]+)\)/$2 \*$1/gos; + + my $declaration = $members; + + # Split nested struct/union elements as newer ones + while ($members =~ m/(struct|union)([^\{\};]+)\{([^\{\}]*)\}([^\{\}\;]*)\;/) { + my $newmember; + my $maintype = $1; + my $ids = $4; + my $content = $3; + foreach my $id(split /,/, $ids) { + $newmember .= "$maintype $id; "; + + $id =~ s/[:\[].*//; + $id =~ s/^\s*\**(\S+)\s*/$1/; + foreach my $arg (split /;/, $content) { + next if ($arg =~ m/^\s*$/); + if ($arg =~ m/^([^\(]+\(\*?\s*)([\w\.]*)(\s*\).*)/) { + # pointer-to-function + my $type = $1; + my $name = $2; + my $extra = $3; + next if (!$name); + if ($id =~ m/^\s*$/) { + # anonymous struct/union + $newmember .= "$type$name$extra; "; + } else { + $newmember .= "$type$id.$name$extra; "; + } + } else { + my $type; + my $names; + $arg =~ s/^\s+//; + $arg =~ s/\s+$//; + # Handle bitmaps + $arg =~ s/:\s*\d+\s*//g; + # Handle arrays + $arg =~ s/\[.*\]//g; + # The type may have multiple words, + # and multiple IDs can be defined, like: + # const struct foo, *bar, foobar + # So, we remove spaces when parsing the + # names, in order to match just names + # and commas for the names + $arg =~ s/\s*,\s*/,/g; + if ($arg =~ m/(.*)\s+([\S+,]+)/) { + $type = $1; + $names = $2; + } else { + $newmember .= "$arg; "; + next; + } + foreach my $name (split /,/, $names) { + $name =~ s/^\s*\**(\S+)\s*/$1/; + next if (($name =~ m/^\s*$/)); + if ($id =~ m/^\s*$/) { + # anonymous struct/union + $newmember .= "$type $name; "; + } else { + $newmember .= "$type $id.$name; "; + } + } + } + } + } + $members =~ s/(struct|union)([^\{\};]+)\{([^\{\}]*)\}([^\{\}\;]*)\;/$newmember/; + } + + # Ignore other nested elements, like enums + $members =~ s/(\{[^\{\}]*\})//g; + + create_parameterlist($members, ';', $file, $declaration_name); + check_sections($file, $declaration_name, $decl_type, $sectcheck, $struct_actual); + + # Adjust declaration for better display + $declaration =~ s/([\{;])/$1\n/g; + $declaration =~ s/\}\s+;/};/g; + # Better handle inlined enums + do {} while ($declaration =~ s/(enum\s+\{[^\}]+),([^\n])/$1,\n$2/); + + my @def_args = split /\n/, $declaration; + my $level = 1; + $declaration = ""; + foreach my $clause (@def_args) { + $clause =~ s/^\s+//; + $clause =~ s/\s+$//; + $clause =~ s/\s+/ /; + next if (!$clause); + $level-- if ($clause =~ m/(\})/ && $level > 1); + if (!($clause =~ m/^\s*#/)) { + $declaration .= "\t" x $level; + } + $declaration .= "\t" . $clause . "\n"; + $level++ if ($clause =~ m/(\{)/ && !($clause =~m/\}/)); + } + output_declaration($declaration_name, + 'struct', + {'struct' => $declaration_name, + 'module' => $modulename, + 'definition' => $declaration, + 'parameterlist' => \@parameterlist, + 'parameterdescs' => \%parameterdescs, + 'parametertypes' => \%parametertypes, + 'sectionlist' => \@sectionlist, + 'sections' => \%sections, + 'purpose' => $declaration_purpose, + 'type' => $decl_type + }); + } + else { + print STDERR "${file}:$.: error: Cannot parse struct or union!\n"; + ++$errors; + } +} + + +sub show_warnings($$) { + my $functype = shift; + my $name = shift; + + return 1 if ($output_selection == OUTPUT_ALL); + + if ($output_selection == OUTPUT_EXPORTED) { + if (defined($function_table{$name})) { + return 1; + } else { + return 0; + } + } + if ($output_selection == OUTPUT_INTERNAL) { + if (!($functype eq "function" && defined($function_table{$name}))) { + return 1; + } else { + return 0; + } + } + if ($output_selection == OUTPUT_INCLUDE) { + if (defined($function_table{$name})) { + return 1; + } else { + return 0; + } + } + if ($output_selection == OUTPUT_EXCLUDE) { + if (!defined($function_table{$name})) { + return 1; + } else { + return 0; + } + } + die("Please add the new output type at show_warnings()"); +} + +sub dump_enum($$) { + my $x = shift; + my $file = shift; + + $x =~ s@/\*.*?\*/@@gos; # strip comments. + # strip #define macros inside enums + $x =~ s@#\s*((define|ifdef)\s+|endif)[^;]*;@@gos; + + if ($x =~ /enum\s+(\w+)\s*\{(.*)\}/) { + $declaration_name = $1; + my $members = $2; + my %_members; + + $members =~ s/\s+$//; + + foreach my $arg (split ',', $members) { + $arg =~ s/^\s*(\w+).*/$1/; + push @parameterlist, $arg; + if (!$parameterdescs{$arg}) { + $parameterdescs{$arg} = $undescribed; + if (show_warnings("enum", $declaration_name)) { + print STDERR "${file}:$.: warning: Enum value '$arg' not described in enum '$declaration_name'\n"; + } + } + $_members{$arg} = 1; + } + + while (my ($k, $v) = each %parameterdescs) { + if (!exists($_members{$k})) { + if (show_warnings("enum", $declaration_name)) { + print STDERR "${file}:$.: warning: Excess enum value '$k' description in '$declaration_name'\n"; + } + } + } + + output_declaration($declaration_name, + 'enum', + {'enum' => $declaration_name, + 'module' => $modulename, + 'parameterlist' => \@parameterlist, + 'parameterdescs' => \%parameterdescs, + 'sectionlist' => \@sectionlist, + 'sections' => \%sections, + 'purpose' => $declaration_purpose + }); + } + else { + print STDERR "${file}:$.: error: Cannot parse enum!\n"; + ++$errors; + } +} + +sub dump_typedef($$) { + my $x = shift; + my $file = shift; + + $x =~ s@/\*.*?\*/@@gos; # strip comments. + + # Parse function prototypes + if ($x =~ /typedef\s+(\w+)\s*\(\*\s*(\w\S+)\s*\)\s*\((.*)\);/ || + $x =~ /typedef\s+(\w+)\s*(\w\S+)\s*\s*\((.*)\);/) { + + # Function typedefs + $return_type = $1; + $declaration_name = $2; + my $args = $3; + + create_parameterlist($args, ',', $file, $declaration_name); + + output_declaration($declaration_name, + 'function', + {'function' => $declaration_name, + 'typedef' => 1, + 'module' => $modulename, + 'functiontype' => $return_type, + 'parameterlist' => \@parameterlist, + 'parameterdescs' => \%parameterdescs, + 'parametertypes' => \%parametertypes, + 'sectionlist' => \@sectionlist, + 'sections' => \%sections, + 'purpose' => $declaration_purpose + }); + return; + } + + while (($x =~ /\(*.\)\s*;$/) || ($x =~ /\[*.\]\s*;$/)) { + $x =~ s/\(*.\)\s*;$/;/; + $x =~ s/\[*.\]\s*;$/;/; + } + + if ($x =~ /typedef.*\s+(\w+)\s*;/) { + $declaration_name = $1; + + output_declaration($declaration_name, + 'typedef', + {'typedef' => $declaration_name, + 'module' => $modulename, + 'sectionlist' => \@sectionlist, + 'sections' => \%sections, + 'purpose' => $declaration_purpose + }); + } + else { + print STDERR "${file}:$.: error: Cannot parse typedef!\n"; + ++$errors; + } +} + +sub save_struct_actual($) { + my $actual = shift; + + # strip all spaces from the actual param so that it looks like one string item + $actual =~ s/\s*//g; + $struct_actual = $struct_actual . $actual . " "; +} + +sub create_parameterlist($$$$) { + my $args = shift; + my $splitter = shift; + my $file = shift; + my $declaration_name = shift; + my $type; + my $param; + + # temporarily replace commas inside function pointer definition + while ($args =~ /(\([^\),]+),/) { + $args =~ s/(\([^\),]+),/$1#/g; + } + + foreach my $arg (split($splitter, $args)) { + # strip comments + $arg =~ s/\/\*.*\*\///; + # strip leading/trailing spaces + $arg =~ s/^\s*//; + $arg =~ s/\s*$//; + $arg =~ s/\s+/ /; + + if ($arg =~ /^#/) { + # Treat preprocessor directive as a typeless variable just to fill + # corresponding data structures "correctly". Catch it later in + # output_* subs. + push_parameter($arg, "", $file); + } elsif ($arg =~ m/\(.+\)\s*\(/) { + # pointer-to-function + $arg =~ tr/#/,/; + $arg =~ m/[^\(]+\(\*?\s*([\w\.]*)\s*\)/; + $param = $1; + $type = $arg; + $type =~ s/([^\(]+\(\*?)\s*$param/$1/; + save_struct_actual($param); + push_parameter($param, $type, $file, $declaration_name); + } elsif ($arg) { + $arg =~ s/\s*:\s*/:/g; + $arg =~ s/\s*\[/\[/g; + + my @args = split('\s*,\s*', $arg); + if ($args[0] =~ m/\*/) { + $args[0] =~ s/(\*+)\s*/ $1/; + } + + my @first_arg; + if ($args[0] =~ /^(.*\s+)(.*?\[.*\].*)$/) { + shift @args; + push(@first_arg, split('\s+', $1)); + push(@first_arg, $2); + } else { + @first_arg = split('\s+', shift @args); + } + + unshift(@args, pop @first_arg); + $type = join " ", @first_arg; + + foreach $param (@args) { + if ($param =~ m/^(\*+)\s*(.*)/) { + save_struct_actual($2); + push_parameter($2, "$type $1", $file, $declaration_name); + } + elsif ($param =~ m/(.*?):(\d+)/) { + if ($type ne "") { # skip unnamed bit-fields + save_struct_actual($1); + push_parameter($1, "$type:$2", $file, $declaration_name) + } + } + else { + save_struct_actual($param); + push_parameter($param, $type, $file, $declaration_name); + } + } + } + } +} + +sub push_parameter($$$$) { + my $param = shift; + my $type = shift; + my $file = shift; + my $declaration_name = shift; + + if (($anon_struct_union == 1) && ($type eq "") && + ($param eq "}")) { + return; # ignore the ending }; from anon. struct/union + } + + $anon_struct_union = 0; + $param =~ s/[\[\)].*//; + + if ($type eq "" && $param =~ /\.\.\.$/) + { + if (!$param =~ /\w\.\.\.$/) { + # handles unnamed variable parameters + $param = "..."; + } + if (!defined $parameterdescs{$param} || $parameterdescs{$param} eq "") { + $parameterdescs{$param} = "variable arguments"; + } + } + elsif ($type eq "" && ($param eq "" or $param eq "void")) + { + $param="void"; + $parameterdescs{void} = "no arguments"; + } + elsif ($type eq "" && ($param eq "struct" or $param eq "union")) + # handle unnamed (anonymous) union or struct: + { + $type = $param; + $param = "{unnamed_" . $param . "}"; + $parameterdescs{$param} = "anonymous\n"; + $anon_struct_union = 1; + } + + # warn if parameter has no description + # (but ignore ones starting with # as these are not parameters + # but inline preprocessor statements); + # Note: It will also ignore void params and unnamed structs/unions + if (!defined $parameterdescs{$param} && $param !~ /^#/) { + $parameterdescs{$param} = $undescribed; + + if (show_warnings($type, $declaration_name)) { + print STDERR + "${file}:$.: warning: Function parameter or member '$param' not described in '$declaration_name'\n"; + ++$warnings; + } + } + + # strip spaces from $param so that it is one continuous string + # on @parameterlist; + # this fixes a problem where check_sections() cannot find + # a parameter like "addr[6 + 2]" because it actually appears + # as "addr[6", "+", "2]" on the parameter list; + # but it's better to maintain the param string unchanged for output, + # so just weaken the string compare in check_sections() to ignore + # "[blah" in a parameter string; + ###$param =~ s/\s*//g; + push @parameterlist, $param; + $type =~ s/\s\s+/ /g; + $parametertypes{$param} = $type; +} + +sub check_sections($$$$$) { + my ($file, $decl_name, $decl_type, $sectcheck, $prmscheck) = @_; + my @sects = split ' ', $sectcheck; + my @prms = split ' ', $prmscheck; + my $err; + my ($px, $sx); + my $prm_clean; # strip trailing "[array size]" and/or beginning "*" + + foreach $sx (0 .. $#sects) { + $err = 1; + foreach $px (0 .. $#prms) { + $prm_clean = $prms[$px]; + $prm_clean =~ s/\[.*\]//; + $prm_clean =~ s/__attribute__\s*\(\([a-z,_\*\s\(\)]*\)\)//i; + # ignore array size in a parameter string; + # however, the original param string may contain + # spaces, e.g.: addr[6 + 2] + # and this appears in @prms as "addr[6" since the + # parameter list is split at spaces; + # hence just ignore "[..." for the sections check; + $prm_clean =~ s/\[.*//; + + ##$prm_clean =~ s/^\**//; + if ($prm_clean eq $sects[$sx]) { + $err = 0; + last; + } + } + if ($err) { + if ($decl_type eq "function") { + print STDERR "${file}:$.: warning: " . + "Excess function parameter " . + "'$sects[$sx]' " . + "description in '$decl_name'\n"; + ++$warnings; + } + } + } +} + +## +# Checks the section describing the return value of a function. +sub check_return_section { + my $file = shift; + my $declaration_name = shift; + my $return_type = shift; + + # Ignore an empty return type (It's a macro) + # Ignore functions with a "void" return type. (But don't ignore "void *") + if (($return_type eq "") || ($return_type =~ /void\s*\w*\s*$/)) { + return; + } + + if (!defined($sections{$section_return}) || + $sections{$section_return} eq "") { + print STDERR "${file}:$.: warning: " . + "No description found for return value of " . + "'$declaration_name'\n"; + ++$warnings; + } +} + +## +# takes a function prototype and the name of the current file being +# processed and spits out all the details stored in the global +# arrays/hashes. +sub dump_function($$) { + my $prototype = shift; + my $file = shift; + my $noret = 0; + + $prototype =~ s/^static +//; + $prototype =~ s/^extern +//; + $prototype =~ s/^asmlinkage +//; + $prototype =~ s/^inline +//; + $prototype =~ s/^__inline__ +//; + $prototype =~ s/^__inline +//; + $prototype =~ s/^__always_inline +//; + $prototype =~ s/^noinline +//; + $prototype =~ s/__init +//; + $prototype =~ s/__init_or_module +//; + $prototype =~ s/__meminit +//; + $prototype =~ s/__must_check +//; + $prototype =~ s/__weak +//; + $prototype =~ s/__sched +//; + my $define = $prototype =~ s/^#\s*define\s+//; #ak added + $prototype =~ s/__attribute__\s*\(\( + (?: + [\w\s]++ # attribute name + (?:\([^)]*+\))? # attribute arguments + \s*+,? # optional comma at the end + )+ + \)\)\s+//x; + + # Yes, this truly is vile. We are looking for: + # 1. Return type (may be nothing if we're looking at a macro) + # 2. Function name + # 3. Function parameters. + # + # All the while we have to watch out for function pointer parameters + # (which IIRC is what the two sections are for), C types (these + # regexps don't even start to express all the possibilities), and + # so on. + # + # If you mess with these regexps, it's a good idea to check that + # the following functions' documentation still comes out right: + # - parport_register_device (function pointer parameters) + # - atomic_set (macro) + # - pci_match_device, __copy_to_user (long return type) + + if ($define && $prototype =~ m/^()([a-zA-Z0-9_~:]+)\s+/) { + # This is an object-like macro, it has no return type and no parameter + # list. + # Function-like macros are not allowed to have spaces between + # declaration_name and opening parenthesis (notice the \s+). + $return_type = $1; + $declaration_name = $2; + $noret = 1; + } elsif ($prototype =~ m/^()([a-zA-Z0-9_~:]+)\s*\(([^\(]*)\)/ || + $prototype =~ m/^(\w+)\s+([a-zA-Z0-9_~:]+)\s*\(([^\(]*)\)/ || + $prototype =~ m/^(\w+\s*\*+)\s*([a-zA-Z0-9_~:]+)\s*\(([^\(]*)\)/ || + $prototype =~ m/^(\w+\s+\w+)\s+([a-zA-Z0-9_~:]+)\s*\(([^\(]*)\)/ || + $prototype =~ m/^(\w+\s+\w+\s*\*+)\s*([a-zA-Z0-9_~:]+)\s*\(([^\(]*)\)/ || + $prototype =~ m/^(\w+\s+\w+\s+\w+)\s+([a-zA-Z0-9_~:]+)\s*\(([^\(]*)\)/ || + $prototype =~ m/^(\w+\s+\w+\s+\w+\s*\*+)\s*([a-zA-Z0-9_~:]+)\s*\(([^\(]*)\)/ || + $prototype =~ m/^()([a-zA-Z0-9_~:]+)\s*\(([^\{]*)\)/ || + $prototype =~ m/^(\w+)\s+([a-zA-Z0-9_~:]+)\s*\(([^\{]*)\)/ || + $prototype =~ m/^(\w+\s*\*+)\s*([a-zA-Z0-9_~:]+)\s*\(([^\{]*)\)/ || + $prototype =~ m/^(\w+\s+\w+)\s+([a-zA-Z0-9_~:]+)\s*\(([^\{]*)\)/ || + $prototype =~ m/^(\w+\s+\w+\s*\*+)\s*([a-zA-Z0-9_~:]+)\s*\(([^\{]*)\)/ || + $prototype =~ m/^(\w+\s+\w+\s+\w+)\s+([a-zA-Z0-9_~:]+)\s*\(([^\{]*)\)/ || + $prototype =~ m/^(\w+\s+\w+\s+\w+\s*\*+)\s*([a-zA-Z0-9_~:]+)\s*\(([^\{]*)\)/ || + $prototype =~ m/^(\w+\s+\w+\s+\w+\s+\w+)\s+([a-zA-Z0-9_~:]+)\s*\(([^\{]*)\)/ || + $prototype =~ m/^(\w+\s+\w+\s+\w+\s+\w+\s*\*+)\s*([a-zA-Z0-9_~:]+)\s*\(([^\{]*)\)/ || + $prototype =~ m/^(\w+\s+\w+\s*\*+\s*\w+\s*\*+\s*)\s*([a-zA-Z0-9_~:]+)\s*\(([^\{]*)\)/) { + $return_type = $1; + $declaration_name = $2; + my $args = $3; + + create_parameterlist($args, ',', $file, $declaration_name); + } else { + print STDERR "${file}:$.: warning: cannot understand function prototype: '$prototype'\n"; + return; + } + + my $prms = join " ", @parameterlist; + check_sections($file, $declaration_name, "function", $sectcheck, $prms); + + # This check emits a lot of warnings at the moment, because many + # functions don't have a 'Return' doc section. So until the number + # of warnings goes sufficiently down, the check is only performed in + # verbose mode. + # TODO: always perform the check. + if ($verbose && !$noret) { + check_return_section($file, $declaration_name, $return_type); + } + + output_declaration($declaration_name, + 'function', + {'function' => $declaration_name, + 'module' => $modulename, + 'functiontype' => $return_type, + 'parameterlist' => \@parameterlist, + 'parameterdescs' => \%parameterdescs, + 'parametertypes' => \%parametertypes, + 'sectionlist' => \@sectionlist, + 'sections' => \%sections, + 'purpose' => $declaration_purpose + }); +} + +sub reset_state { + $function = ""; + %parameterdescs = (); + %parametertypes = (); + @parameterlist = (); + %sections = (); + @sectionlist = (); + $sectcheck = ""; + $struct_actual = ""; + $prototype = ""; + + $state = STATE_NORMAL; + $inline_doc_state = STATE_INLINE_NA; +} + +sub tracepoint_munge($) { + my $file = shift; + my $tracepointname = 0; + my $tracepointargs = 0; + + if ($prototype =~ m/TRACE_EVENT\((.*?),/) { + $tracepointname = $1; + } + if ($prototype =~ m/DEFINE_SINGLE_EVENT\((.*?),/) { + $tracepointname = $1; + } + if ($prototype =~ m/DEFINE_EVENT\((.*?),(.*?),/) { + $tracepointname = $2; + } + $tracepointname =~ s/^\s+//; #strip leading whitespace + if ($prototype =~ m/TP_PROTO\((.*?)\)/) { + $tracepointargs = $1; + } + if (($tracepointname eq 0) || ($tracepointargs eq 0)) { + print STDERR "${file}:$.: warning: Unrecognized tracepoint format: \n". + "$prototype\n"; + } else { + $prototype = "static inline void trace_$tracepointname($tracepointargs)"; + } +} + +sub syscall_munge() { + my $void = 0; + + $prototype =~ s@[\r\n]+@ @gos; # strip newlines/CR's +## if ($prototype =~ m/SYSCALL_DEFINE0\s*\(\s*(a-zA-Z0-9_)*\s*\)/) { + if ($prototype =~ m/SYSCALL_DEFINE0/) { + $void = 1; +## $prototype = "long sys_$1(void)"; + } + + $prototype =~ s/SYSCALL_DEFINE.*\(/long sys_/; # fix return type & func name + if ($prototype =~ m/long (sys_.*?),/) { + $prototype =~ s/,/\(/; + } elsif ($void) { + $prototype =~ s/\)/\(void\)/; + } + + # now delete all of the odd-number commas in $prototype + # so that arg types & arg names don't have a comma between them + my $count = 0; + my $len = length($prototype); + if ($void) { + $len = 0; # skip the for-loop + } + for (my $ix = 0; $ix < $len; $ix++) { + if (substr($prototype, $ix, 1) eq ',') { + $count++; + if ($count % 2 == 1) { + substr($prototype, $ix, 1) = ' '; + } + } + } +} + +sub process_proto_function($$) { + my $x = shift; + my $file = shift; + + $x =~ s@\/\/.*$@@gos; # strip C99-style comments to end of line + + if ($x =~ m#\s*/\*\s+MACDOC\s*#io || ($x =~ /^#/ && $x !~ /^#\s*define/)) { + # do nothing + } + elsif ($x =~ /([^\{]*)/) { + $prototype .= $1; + } + + if (($x =~ /\{/) || ($x =~ /\#\s*define/) || ($x =~ /;/)) { + $prototype =~ s@/\*.*?\*/@@gos; # strip comments. + $prototype =~ s@[\r\n]+@ @gos; # strip newlines/cr's. + $prototype =~ s@^\s+@@gos; # strip leading spaces + if ($prototype =~ /SYSCALL_DEFINE/) { + syscall_munge(); + } + if ($prototype =~ /TRACE_EVENT/ || $prototype =~ /DEFINE_EVENT/ || + $prototype =~ /DEFINE_SINGLE_EVENT/) + { + tracepoint_munge($file); + } + dump_function($prototype, $file); + reset_state(); + } +} + +sub process_proto_type($$) { + my $x = shift; + my $file = shift; + + $x =~ s@[\r\n]+@ @gos; # strip newlines/cr's. + $x =~ s@^\s+@@gos; # strip leading spaces + $x =~ s@\s+$@@gos; # strip trailing spaces + $x =~ s@\/\/.*$@@gos; # strip C99-style comments to end of line + + if ($x =~ /^#/) { + # To distinguish preprocessor directive from regular declaration later. + $x .= ";"; + } + + while (1) { + if ( $x =~ /([^\{\};]*)([\{\};])(.*)/ ) { + if( length $prototype ) { + $prototype .= " " + } + $prototype .= $1 . $2; + ($2 eq '{') && $brcount++; + ($2 eq '}') && $brcount--; + if (($2 eq ';') && ($brcount == 0)) { + dump_declaration($prototype, $file); + reset_state(); + last; + } + $x = $3; + } else { + $prototype .= $x; + last; + } + } +} + + +sub map_filename($) { + my $file; + my ($orig_file) = @_; + + if (defined($ENV{'SRCTREE'})) { + $file = "$ENV{'SRCTREE'}" . "/" . $orig_file; + } else { + $file = $orig_file; + } + + if (defined($source_map{$file})) { + $file = $source_map{$file}; + } + + return $file; +} + +sub process_export_file($) { + my ($orig_file) = @_; + my $file = map_filename($orig_file); + + if (!open(IN,"<$file")) { + print STDERR "Error: Cannot open file $file\n"; + ++$errors; + return; + } + + while () { + if (/$export_symbol/) { + $function_table{$2} = 1; + } + } + + close(IN); +} + +# +# Parsers for the various processing states. +# +# STATE_NORMAL: looking for the /** to begin everything. +# +sub process_normal() { + if (/$doc_start/o) { + $state = STATE_NAME; # next line is always the function name + $in_doc_sect = 0; + $declaration_start_line = $. + 1; + } +} + +# +# STATE_NAME: Looking for the "name - description" line +# +sub process_name($$) { + my $file = shift; + my $identifier; + my $descr; + + if (/$doc_block/o) { + $state = STATE_DOCBLOCK; + $contents = ""; + $new_start_line = $. + 1; + + if ( $1 eq "" ) { + $section = $section_intro; + } else { + $section = $1; + } + } + elsif (/$doc_decl/o) { + $identifier = $1; + if (/\s*([\w\s]+?)(\(\))?\s*-/) { + $identifier = $1; + } + + $state = STATE_BODY; + # if there's no @param blocks need to set up default section + # here + $contents = ""; + $section = $section_default; + $new_start_line = $. + 1; + if (/-(.*)/) { + # strip leading/trailing/multiple spaces + $descr= $1; + $descr =~ s/^\s*//; + $descr =~ s/\s*$//; + $descr =~ s/\s+/ /g; + $declaration_purpose = $descr; + $state = STATE_BODY_MAYBE; + } else { + $declaration_purpose = ""; + } + + if (($declaration_purpose eq "") && $verbose) { + print STDERR "${file}:$.: warning: missing initial short description on line:\n"; + print STDERR $_; + ++$warnings; + } + + if ($identifier =~ m/^struct/) { + $decl_type = 'struct'; + } elsif ($identifier =~ m/^union/) { + $decl_type = 'union'; + } elsif ($identifier =~ m/^enum/) { + $decl_type = 'enum'; + } elsif ($identifier =~ m/^typedef/) { + $decl_type = 'typedef'; + } else { + $decl_type = 'function'; + } + + if ($verbose) { + print STDERR "${file}:$.: info: Scanning doc for $identifier\n"; + } + } else { + print STDERR "${file}:$.: warning: Cannot understand $_ on line $.", + " - I thought it was a doc line\n"; + ++$warnings; + $state = STATE_NORMAL; + } +} + + +# +# STATE_BODY and STATE_BODY_MAYBE: the bulk of a kerneldoc comment. +# +sub process_body($$) { + my $file = shift; + + if (/$doc_sect/i) { # case insensitive for supported section names + $newsection = $1; + $newcontents = $2; + + # map the supported section names to the canonical names + if ($newsection =~ m/^description$/i) { + $newsection = $section_default; + } elsif ($newsection =~ m/^context$/i) { + $newsection = $section_context; + } elsif ($newsection =~ m/^returns?$/i) { + $newsection = $section_return; + } elsif ($newsection =~ m/^\@return$/) { + # special: @return is a section, not a param description + $newsection = $section_return; + } + + if (($contents ne "") && ($contents ne "\n")) { + if (!$in_doc_sect && $verbose) { + print STDERR "${file}:$.: warning: contents before sections\n"; + ++$warnings; + } + dump_section($file, $section, $contents); + $section = $section_default; + } + + $in_doc_sect = 1; + $state = STATE_BODY; + $contents = $newcontents; + $new_start_line = $.; + while (substr($contents, 0, 1) eq " ") { + $contents = substr($contents, 1); + } + if ($contents ne "") { + $contents .= "\n"; + } + $section = $newsection; + $leading_space = undef; + } elsif (/$doc_end/) { + if (($contents ne "") && ($contents ne "\n")) { + dump_section($file, $section, $contents); + $section = $section_default; + $contents = ""; + } + # look for doc_com + + doc_end: + if ($_ =~ m'\s*\*\s*[a-zA-Z_0-9:\.]+\*/') { + print STDERR "${file}:$.: warning: suspicious ending line: $_"; + ++$warnings; + } + + $prototype = ""; + $state = STATE_PROTO; + $brcount = 0; + } elsif (/$doc_content/) { + # miguel-style comment kludge, look for blank lines after + # @parameter line to signify start of description + if ($1 eq "") { + if ($section =~ m/^@/ || $section eq $section_context) { + dump_section($file, $section, $contents); + $section = $section_default; + $contents = ""; + $new_start_line = $.; + } else { + $contents .= "\n"; + } + $state = STATE_BODY; + } elsif ($state == STATE_BODY_MAYBE) { + # Continued declaration purpose + chomp($declaration_purpose); + $declaration_purpose .= " " . $1; + $declaration_purpose =~ s/\s+/ /g; + } else { + my $cont = $1; + if ($section =~ m/^@/ || $section eq $section_context) { + if (!defined $leading_space) { + if ($cont =~ m/^(\s+)/) { + $leading_space = $1; + } else { + $leading_space = ""; + } + } + $cont =~ s/^$leading_space//; + } + $contents .= $cont . "\n"; + } + } else { + # i dont know - bad line? ignore. + print STDERR "${file}:$.: warning: bad line: $_"; + ++$warnings; + } +} + + +# +# STATE_PROTO: reading a function/whatever prototype. +# +sub process_proto($$) { + my $file = shift; + + if (/$doc_inline_oneline/) { + $section = $1; + $contents = $2; + if ($contents ne "") { + $contents .= "\n"; + dump_section($file, $section, $contents); + $section = $section_default; + $contents = ""; + } + } elsif (/$doc_inline_start/) { + $state = STATE_INLINE; + $inline_doc_state = STATE_INLINE_NAME; + } elsif ($decl_type eq 'function') { + process_proto_function($_, $file); + } else { + process_proto_type($_, $file); + } +} + +# +# STATE_DOCBLOCK: within a DOC: block. +# +sub process_docblock($$) { + my $file = shift; + + if (/$doc_end/) { + dump_doc_section($file, $section, $contents); + $section = $section_default; + $contents = ""; + $function = ""; + %parameterdescs = (); + %parametertypes = (); + @parameterlist = (); + %sections = (); + @sectionlist = (); + $prototype = ""; + $state = STATE_NORMAL; + } elsif (/$doc_content/) { + if ( $1 eq "" ) { + $contents .= $blankline; + } else { + $contents .= $1 . "\n"; + } + } +} + +# +# STATE_INLINE: docbook comments within a prototype. +# +sub process_inline($$) { + my $file = shift; + + # First line (state 1) needs to be a @parameter + if ($inline_doc_state == STATE_INLINE_NAME && /$doc_inline_sect/o) { + $section = $1; + $contents = $2; + $new_start_line = $.; + if ($contents ne "") { + while (substr($contents, 0, 1) eq " ") { + $contents = substr($contents, 1); + } + $contents .= "\n"; + } + $inline_doc_state = STATE_INLINE_TEXT; + # Documentation block end */ + } elsif (/$doc_inline_end/) { + if (($contents ne "") && ($contents ne "\n")) { + dump_section($file, $section, $contents); + $section = $section_default; + $contents = ""; + } + $state = STATE_PROTO; + $inline_doc_state = STATE_INLINE_NA; + # Regular text + } elsif (/$doc_content/) { + if ($inline_doc_state == STATE_INLINE_TEXT) { + $contents .= $1 . "\n"; + # nuke leading blank lines + if ($contents =~ /^\s*$/) { + $contents = ""; + } + } elsif ($inline_doc_state == STATE_INLINE_NAME) { + $inline_doc_state = STATE_INLINE_ERROR; + print STDERR "${file}:$.: warning: "; + print STDERR "Incorrect use of kernel-doc format: $_"; + ++$warnings; + } + } +} + + +sub process_file($) { + my $file; + my $initial_section_counter = $section_counter; + my ($orig_file) = @_; + + $file = map_filename($orig_file); + + if (!open(IN,"<$file")) { + print STDERR "Error: Cannot open file $file\n"; + ++$errors; + return; + } + + $. = 1; + + $section_counter = 0; + while () { + while (s/\\\s*$//) { + $_ .= ; + } + # Replace tabs by spaces + while ($_ =~ s/\t+/' ' x (length($&) * 8 - length($`) % 8)/e) {}; + # Hand this line to the appropriate state handler + if ($state == STATE_NORMAL) { + process_normal(); + } elsif ($state == STATE_NAME) { + process_name($file, $_); + } elsif ($state == STATE_BODY || $state == STATE_BODY_MAYBE) { + process_body($file, $_); + } elsif ($state == STATE_INLINE) { # scanning for inline parameters + process_inline($file, $_); + } elsif ($state == STATE_PROTO) { + process_proto($file, $_); + } elsif ($state == STATE_DOCBLOCK) { + process_docblock($file, $_); + } + } + + # Make sure we got something interesting. + if ($initial_section_counter == $section_counter) { + if ($output_mode ne "none") { + print STDERR "${file}:1: warning: no structured comments found\n"; + } + if (($output_selection == OUTPUT_INCLUDE) && ($show_not_found == 1)) { + print STDERR " Was looking for '$_'.\n" for keys %function_table; + } + } +} + + +$kernelversion = get_kernel_version(); + +# generate a sequence of code that will splice in highlighting information +# using the s// operator. +for (my $k = 0; $k < @highlights; $k++) { + my $pattern = $highlights[$k][0]; + my $result = $highlights[$k][1]; +# print STDERR "scanning pattern:$pattern, highlight:($result)\n"; + $dohighlight .= "\$contents =~ s:$pattern:$result:gs;\n"; +} + +# Read the file that maps relative names to absolute names for +# separate source and object directories and for shadow trees. +if (open(SOURCE_MAP, "<.tmp_filelist.txt")) { + my ($relname, $absname); + while() { + chop(); + ($relname, $absname) = (split())[0..1]; + $relname =~ s:^/+::; + $source_map{$relname} = $absname; + } + close(SOURCE_MAP); +} + +if ($output_selection == OUTPUT_EXPORTED || + $output_selection == OUTPUT_INTERNAL) { + + push(@export_file_list, @ARGV); + + foreach (@export_file_list) { + chomp; + process_export_file($_); + } +} + +foreach (@ARGV) { + chomp; + process_file($_); +} +if ($verbose && $errors) { + print STDERR "$errors errors\n"; +} +if ($verbose && $warnings) { + print STDERR "$warnings warnings\n"; +} + +exit($output_mode eq "none" ? 0 : $errors); diff --git a/libdmmp/docs/libdmmp.h.3 b/libdmmp/docs/libdmmp.h.3 new file mode 100644 index 0000000..45d5be3 --- /dev/null +++ b/libdmmp/docs/libdmmp.h.3 @@ -0,0 +1,113 @@ +.TH "libdmmp.h" 3 "January 2016" "Device Mapper Multipath API - libdmmp Manual" + +.SH NAME +libdmmp.h \- Device Mapper Multipath API. + +.SH SYNOPSIS +#include + +.SH "DESCRIPTION" + +All the libdmmp public functions ships its own man pages. +Use 'man 3 ' to check the detail usage. + +.SH "USAGE" + +To use libdmmp in your project, we suggest to use the 'pkg-config' way: + + * Add this line into your configure.ac: + + PKG_CHECK_MODULES([LIBDMMP], [libdmmp]) + + * Add these lines into your Makefile.am: + + foo_LDFLAGS += $(LIBDMMP_LIBS) + foo_CFLAGS += $(LIBDMMP_CFLAGS) + +.SH LOG HANDLING + +The log handler function could be set via 'dmmp_context_log_func_set()'. +The log priority could be set via 'dmmp_context_log_priority_set()'. + +By default, the log priorities is 'DMMP_LOG_PRIORITY_WARNING'. +By default, the log handler is print log to STDERR, and its code is listed +below in case you want to create your own log handler. + + static int _DMMP_LOG_STRERR_ALIGN_WIDTH = 80; + + static void _log_stderr(struct dmmp_context *ctx, + enum dmmp_log_priority priority, + const char *file, int line, + const char *func_name, + const char *format, va_list args) + { + int printed_bytes = 0; + + printed_bytes += fprintf(stderr, "libdmmp %s: ", + dmmp_log_priority_str(priority)); + printed_bytes += vfprintf(stderr, format, args); + userdata = dmmp_context_userdata_get(ctx); + if (userdata != NULL) + fprintf(stderr, "(with user data at memory address %p)", + userdata); + + if (printed_bytes < _DMMP_LOG_STRERR_ALIGN_WIDTH) { + fprintf(stderr, "%*s # %s:%s():%d\n", + _DMMP_LOG_STRERR_ALIGN_WIDTH - printed_bytes, "", file, + func_name, line); + } else { + fprintf(stderr, " # %s:%s():%d\n", file, func_name, line); + } + } + + +.SH "SAMPLE CODE" + + #include + + int main(int argc, char *argv[]) { + struct dmmp_context *ctx = NULL; + struct dmmp_mpath **dmmp_mps = NULL; + struct dmmp_path_group **dmmp_pgs = NULL; + struct dmmp_path **dmmp_ps = NULL; + uint32_t dmmp_mp_count = 0; + uint32_t dmmp_pg_count = 0; + uint32_t dmmp_p_count = 0; + const char *name = NULL; + const char *wwid = NULL; + uint32_t i = 0; + int rc = DMMP_OK; + + ctx = dmmp_context_new(); + dmmp_context_log_priority_set(ctx, DMMP_LOG_PRIORITY_DEBUG); + // By default, log will be printed to STDERR, you could + // change that via dmmp_context_log_func_set() + rc = dmmp_mpath_array_get(ctx, &dmmp_mps, &dmmp_mp_count); + if (rc != DMMP_OK) { + printf("dmmp_mpath_array_get() failed with %d: %s", rc, + dmmp_strerror(rc)); + goto out; + } + for (i = 0; i < dmmp_mp_count; ++i) { + name = dmmp_mpath_name_get(dmmp_mps[i]); + wwid = dmmp_mpath_wwid_get(dmmp_mps[i]); + printf("dmmp_mpath_array_get(): Got mpath: %s %s\n", name, + wwid); + // You could use dmmp_path_group_array_get() to retrieve + // path group information and then invoke dmmp_path_array_get() + // for path information. + } + + out: + dmmp_context_free(ctx); + dmmp_mpath_array_free(dmmp_mps, dmmp_mp_count); + if (rc != DMMP_OK) + exit(1); + exit(0); + } + +.SH "LICENSE" +GPLv2+ + +.SH "BUG" +Please report bug to diff --git a/libdmmp/docs/split-man.pl b/libdmmp/docs/split-man.pl new file mode 100755 index 0000000..a97acc1 --- /dev/null +++ b/libdmmp/docs/split-man.pl @@ -0,0 +1,40 @@ +#!/usr/bin/perl +# Originally From: +# https://www.kernel.org/doc/Documentation/kernel-doc-nano-HOWTO.txt +# +# Changes: +# * Create manpage section 3 instead of 9. +# * Replace 'Kernel Hackers Manual' to +# 'Device Mapper Multipath API - libdmmp Manual' +# * Remove LINUX from header. +# * Remove DMMP_DLL_EXPORT. +$man_sec_num = 3; +$title = 'Device Mapper Multipath API - libdmmp Manual'; + +if ( $#ARGV < 0 ) { + die "where do I put the results?\n"; +} + +mkdir $ARGV[0], 0777; +$state = 0; +while () { + if (/^\.TH \"[^\"]*\" 9 \"([^\"]*)\"/) { + if ( $state == 1 ) { close OUT } + $state = 1; + $fn = "$ARGV[0]/$1.$man_sec_num"; + print STDERR "Creating $fn\n"; + open OUT, ">$fn" or die "can't open $fn: $!\n"; + + # Change man page code from 9 to $man_sec_num; + s/^\.TH (\"[^\"]*\") 9 \"([^\"]*)\"/\.TH $1 $man_sec_num \"$2\"/; + s/Kernel Hacker's Manual/$title/g; + s/LINUX//g; + + print OUT $_; + } + elsif ( $state != 0 ) { + print OUT $_; + } +} + +close OUT; diff --git a/libdmmp/libdmmp.c b/libdmmp/libdmmp.c new file mode 100644 index 0000000..aafd509 --- /dev/null +++ b/libdmmp/libdmmp.c @@ -0,0 +1,490 @@ +/* + * Copyright (C) 2015 - 2017 Red Hat, Inc. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + * + * Author: Gris Ge + * Todd Gill + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "libdmmp/libdmmp.h" +#include "libdmmp_private.h" + +#define _DEFAULT_UXSOCK_TIMEOUT 60000 +/* ^ 60 seconds. On system with 10k sdX, dmmp_mpath_array_get() + * only take 3.5 seconds, so this default value should be OK for most users. + */ + +#define _DMMP_IPC_SHOW_JSON_CMD "show maps json" +#define _DMMP_JSON_MAJOR_KEY "major_version" +#define _DMMP_JSON_MAJOR_VERSION 0 +#define _DMMP_JSON_MAPS_KEY "maps" +#define _ERRNO_STR_BUFF_SIZE 256 +#define _IPC_MAX_CMD_LEN 512 +/* ^ Was _MAX_CMD_LEN in ./libmultipath/uxsock.h */ +#define _LAST_ERR_MSG_BUFF_SIZE 1024 + +struct dmmp_context { + void (*log_func)(struct dmmp_context *ctx, int priority, + const char *file, int line, const char *func_name, + const char *format, va_list args); + int log_priority; + void *userdata; + unsigned int tmo; + char last_err_msg[_LAST_ERR_MSG_BUFF_SIZE]; +}; + +/* + * The multipathd daemon are using "uxsock_timeout" to define timeout value, + * if timeout at daemon side, we will get message "timeout\n". + * To unify this timeout with `dmmp_context_timeout_set()`, this function + * will keep retry mpath_process_cmd() tile meet the time of + * dmmp_context_timeout_get(). + * Need to free `*output` string manually. + */ +static int _process_cmd(struct dmmp_context *ctx, int fd, const char *cmd, + char **output); + +static int _ipc_connect(struct dmmp_context *ctx, int *fd); + +_dmmp_getter_func_gen(dmmp_context_log_priority_get, + struct dmmp_context, ctx, log_priority, + int); + +_dmmp_getter_func_gen(dmmp_context_userdata_get, struct dmmp_context, ctx, + userdata, void *); + +_dmmp_getter_func_gen(dmmp_context_timeout_get, struct dmmp_context, ctx, tmo, + unsigned int); + +_dmmp_getter_func_gen(dmmp_last_error_msg, struct dmmp_context, ctx, + last_err_msg, const char *); + +_dmmp_array_free_func_gen(dmmp_mpath_array_free, struct dmmp_mpath, + _dmmp_mpath_free); + +void _dmmp_log(struct dmmp_context *ctx, int priority, const char *file, + int line, const char *func_name, const char *format, ...) +{ + va_list args; + + if (ctx->log_func == NULL) + return; + + va_start(args, format); + ctx->log_func(ctx, priority, file, line, func_name, format, args); + if (priority == DMMP_LOG_PRIORITY_ERROR) + vsnprintf(ctx->last_err_msg, _LAST_ERR_MSG_BUFF_SIZE, + format, args); + va_end(args); +} + +struct dmmp_context *dmmp_context_new(void) +{ + struct dmmp_context *ctx = NULL; + + ctx = (struct dmmp_context *) malloc(sizeof(struct dmmp_context)); + + if (ctx == NULL) + return NULL; + + ctx->log_func = _dmmp_log_stderr; + ctx->log_priority = DMMP_LOG_PRIORITY_DEFAULT; + ctx->userdata = NULL; + ctx->tmo = _DEFAULT_UXSOCK_TIMEOUT; + memset(ctx->last_err_msg, 0, _LAST_ERR_MSG_BUFF_SIZE); + + return ctx; +} + +void dmmp_context_free(struct dmmp_context *ctx) +{ + free(ctx); +} + +void dmmp_context_log_priority_set(struct dmmp_context *ctx, int priority) +{ + assert(ctx != NULL); + ctx->log_priority = priority; +} + +void dmmp_context_timeout_set(struct dmmp_context *ctx, unsigned int tmo) +{ + assert(ctx != NULL); + ctx->tmo = tmo; +} + +void dmmp_context_log_func_set + (struct dmmp_context *ctx, + void (*log_func)(struct dmmp_context *ctx, int priority, + const char *file, int line, const char *func_name, + const char *format, va_list args)) +{ + assert(ctx != NULL); + ctx->log_func = log_func; +} + +void dmmp_context_userdata_set(struct dmmp_context *ctx, void *userdata) +{ + assert(ctx != NULL); + ctx->userdata = userdata; +} + +int dmmp_mpath_array_get(struct dmmp_context *ctx, + struct dmmp_mpath ***dmmp_mps, uint32_t *dmmp_mp_count) +{ + struct dmmp_mpath *dmmp_mp = NULL; + int rc = DMMP_OK; + char *j_str = NULL; + json_object *j_obj = NULL; + json_object *j_obj_map = NULL; + enum json_tokener_error j_err = json_tokener_success; + json_tokener *j_token = NULL; + struct array_list *ar_maps = NULL; + uint32_t i = 0; + int cur_json_major_version = -1; + int ar_maps_len = -1; + int ipc_fd = -1; + + assert(ctx != NULL); + assert(dmmp_mps != NULL); + assert(dmmp_mp_count != NULL); + + *dmmp_mps = NULL; + *dmmp_mp_count = 0; + + _good(_ipc_connect(ctx, &ipc_fd), rc, out); + + _good(_process_cmd(ctx, ipc_fd, _DMMP_IPC_SHOW_JSON_CMD, &j_str), + rc, out); + + _debug(ctx, "Got json output from multipathd: '%s'", j_str); + + j_token = json_tokener_new(); + if (j_token == NULL) { + rc = DMMP_ERR_BUG; + _error(ctx, "BUG: json_tokener_new() retuned NULL"); + goto out; + } + j_obj = json_tokener_parse_ex(j_token, j_str, strlen(j_str) + 1); + + if (j_obj == NULL) { + rc = DMMP_ERR_IPC_ERROR; + j_err = json_tokener_get_error(j_token); + _error(ctx, "Failed to parse JSON output from multipathd IPC: " + "%s", json_tokener_error_desc(j_err)); + goto out; + } + + _json_obj_get_value(ctx, j_obj, cur_json_major_version, + _DMMP_JSON_MAJOR_KEY, json_type_int, + json_object_get_int, rc, out); + + if (cur_json_major_version != _DMMP_JSON_MAJOR_VERSION) { + rc = DMMP_ERR_INCOMPATIBLE; + _error(ctx, "Incompatible multipathd JSON major version %d, " + "should be %d", cur_json_major_version, + _DMMP_JSON_MAJOR_VERSION); + goto out; + } + _debug(ctx, "multipathd JSON major version(%d) check pass", + _DMMP_JSON_MAJOR_VERSION); + + _json_obj_get_value(ctx, j_obj, ar_maps, _DMMP_JSON_MAPS_KEY, + json_type_array, json_object_get_array, rc, out); + + if (ar_maps == NULL) { + rc = DMMP_ERR_BUG; + _error(ctx, "BUG: Got NULL map array from " + "_json_obj_get_value()"); + goto out; + } + + ar_maps_len = array_list_length(ar_maps); + if (ar_maps_len < 0) { + rc = DMMP_ERR_BUG; + _error(ctx, "BUG: Got negative length for ar_maps"); + goto out; + } + else if (ar_maps_len == 0) + goto out; + else + *dmmp_mp_count = ar_maps_len & UINT32_MAX; + + *dmmp_mps = (struct dmmp_mpath **) + malloc(sizeof(struct dmmp_mpath *) * (*dmmp_mp_count)); + _dmmp_alloc_null_check(ctx, dmmp_mps, rc, out); + for (; i < *dmmp_mp_count; ++i) + (*dmmp_mps)[i] = NULL; + + for (i = 0; i < *dmmp_mp_count; ++i) { + j_obj_map = array_list_get_idx(ar_maps, i); + if (j_obj_map == NULL) { + rc = DMMP_ERR_BUG; + _error(ctx, "BUG: array_list_get_idx() return NULL"); + goto out; + } + + dmmp_mp = _dmmp_mpath_new(); + _dmmp_alloc_null_check(ctx, dmmp_mp, rc, out); + (*dmmp_mps)[i] = dmmp_mp; + _good(_dmmp_mpath_update(ctx, dmmp_mp, j_obj_map), rc, out); + } + +out: + if (ipc_fd >= 0) + mpath_disconnect(ipc_fd); + free(j_str); + if (j_token != NULL) + json_tokener_free(j_token); + if (j_obj != NULL) + json_object_put(j_obj); + + if (rc != DMMP_OK) { + dmmp_mpath_array_free(*dmmp_mps, *dmmp_mp_count); + *dmmp_mps = NULL; + *dmmp_mp_count = 0; + } + + return rc; +} + +static int _process_cmd(struct dmmp_context *ctx, int fd, const char *cmd, + char **output) +{ + int errno_save = 0; + int rc = DMMP_OK; + char errno_str_buff[_ERRNO_STR_BUFF_SIZE]; + struct timespec start_ts; + struct timespec cur_ts; + unsigned int ipc_tmo = 0; + bool flag_check_tmo = false; + unsigned int elapsed = 0; + + assert(output != NULL); + assert(ctx != NULL); + assert(cmd != NULL); + + *output = NULL; + + if (clock_gettime(CLOCK_MONOTONIC, &start_ts) != 0) { + _error(ctx, "BUG: Failed to get CLOCK_MONOTONIC time " + "via clock_gettime(), error %d", errno); + return DMMP_ERR_BUG; + } + + ipc_tmo = ctx->tmo; + if (ctx->tmo == 0) + ipc_tmo = _DEFAULT_UXSOCK_TIMEOUT; + +invoke: + _debug(ctx, "Invoking IPC command '%s' with IPC tmo %u milliseconds", + cmd, ipc_tmo); + flag_check_tmo = false; + if (mpath_process_cmd(fd, cmd, output, ipc_tmo) != 0) { + errno_save = errno; + memset(errno_str_buff, 0, _ERRNO_STR_BUFF_SIZE); + strerror_r(errno_save, errno_str_buff, _ERRNO_STR_BUFF_SIZE); + if (errno_save == ETIMEDOUT) { + flag_check_tmo = true; + } else { + _error(ctx, "IPC failed when process command '%s' with " + "error %d(%s)", cmd, errno_save, errno_str_buff); + _debug(ctx, "%s", *output); + rc = DMMP_ERR_IPC_ERROR; + goto out; + } + } + if ((*output != NULL) && + (strncmp(*output, "timeout", strlen("timeout")) == 0)) + flag_check_tmo = true; + + if (flag_check_tmo == true) { + free(*output); + *output = NULL; + if (ctx->tmo == 0) { + _debug(ctx, "IPC timeout, but user requested infinite " + "timeout"); + goto invoke; + } + + if (clock_gettime(CLOCK_MONOTONIC, &cur_ts) != 0) { + _error(ctx, "BUG: Failed to get CLOCK_MONOTONIC time " + "via clock_gettime(), error %d", errno); + rc = DMMP_ERR_BUG; + goto out; + } + elapsed = (cur_ts.tv_sec - start_ts.tv_sec) * 1000 + + (cur_ts.tv_nsec - start_ts.tv_nsec) / 1000000; + + if (elapsed >= ctx->tmo) { + rc = DMMP_ERR_IPC_TIMEOUT; + _error(ctx, "Timeout, try to increase it via " + "dmmp_context_timeout_set()"); + goto out; + } + if (ctx->tmo != 0) + ipc_tmo = ctx->tmo - elapsed; + + _debug(ctx, "IPC timeout, but user requested timeout has not " + "reached yet, still have %u milliseconds", ipc_tmo); + goto invoke; + } else { + if ((*output == NULL) || (strlen(*output) == 0)) { + _error(ctx, "IPC return empty reply for command %s", + cmd); + rc = DMMP_ERR_IPC_ERROR; + goto out; + } + } + + if ((*output != NULL) && + strncmp(*output, "permission deny", + strlen("permission deny")) == 0) { + _error(ctx, "Permission deny, need to be root"); + rc = DMMP_ERR_PERMISSION_DENY; + goto out; + } + +out: + if (rc != DMMP_OK) { + free(*output); + *output = NULL; + } + return rc; +} + +static int _ipc_connect(struct dmmp_context *ctx, int *fd) +{ + int rc = DMMP_OK; + int errno_save = 0; + char errno_str_buff[_ERRNO_STR_BUFF_SIZE]; + + assert(ctx != NULL); + assert(fd != NULL); + + *fd = -1; + + *fd = mpath_connect(); + if (*fd == -1) { + errno_save = errno; + memset(errno_str_buff, 0, _ERRNO_STR_BUFF_SIZE); + strerror_r(errno_save, errno_str_buff, _ERRNO_STR_BUFF_SIZE); + if (errno_save == ECONNREFUSED) { + rc = DMMP_ERR_NO_DAEMON; + _error(ctx, "Socket connection refuse. " + "Maybe multipathd daemon is not running"); + } else { + _error(ctx, "IPC failed with error %d(%s)", errno_save, + errno_str_buff); + rc = DMMP_ERR_IPC_ERROR; + } + } + return rc; +} + +int dmmp_flush_mpath(struct dmmp_context *ctx, const char *mpath_name) +{ + int rc = DMMP_OK; + struct dmmp_mpath **dmmp_mps = NULL; + uint32_t dmmp_mp_count = 0; + uint32_t i = 0; + bool found = false; + int ipc_fd = -1; + char cmd[_IPC_MAX_CMD_LEN]; + char *output = NULL; + + assert(ctx != NULL); + assert(mpath_name != NULL); + + snprintf(cmd, _IPC_MAX_CMD_LEN, "del map %s", mpath_name); + if (strlen(cmd) == _IPC_MAX_CMD_LEN - 1) { + rc = DMMP_ERR_INVALID_ARGUMENT; + _error(ctx, "Invalid mpath name %s", mpath_name); + goto out; + } + + _good(_ipc_connect(ctx, &ipc_fd), rc, out); + _good(_process_cmd(ctx, ipc_fd, cmd, &output), rc, out); + + /* _process_cmd() already make sure output is not NULL */ + + if (strncmp(output, "fail", strlen("fail")) == 0) { + /* Check whether specified mpath exits */ + _good(dmmp_mpath_array_get(ctx, &dmmp_mps, &dmmp_mp_count), + rc, out); + + for (i = 0; i < dmmp_mp_count; ++i) { + if (strcmp(dmmp_mpath_name_get(dmmp_mps[i]), + mpath_name) == 0) { + found = true; + break; + } + } + + if (found == false) { + rc = DMMP_ERR_MPATH_NOT_FOUND; + _error(ctx, "Specified mpath %s not found", mpath_name); + goto out; + } + + rc = DMMP_ERR_MPATH_BUSY; + _error(ctx, "Specified mpath is in use"); + } else if (strncmp(output, "ok", strlen("ok")) != 0) { + rc = DMMP_ERR_BUG; + _error(ctx, "Got unexpected output for cmd '%s': '%s'", + cmd, output); + } + +out: + if (ipc_fd >= 0) + mpath_disconnect(ipc_fd); + dmmp_mpath_array_free(dmmp_mps, dmmp_mp_count); + free(output); + return rc; +} + +int dmmp_reconfig(struct dmmp_context *ctx) +{ + int rc = DMMP_OK; + int ipc_fd = -1; + char *output = NULL; + char cmd[_IPC_MAX_CMD_LEN]; + + snprintf(cmd, _IPC_MAX_CMD_LEN, "%s", "reconfigure"); + + _good(_ipc_connect(ctx, &ipc_fd), rc, out); + _good(_process_cmd(ctx, ipc_fd, cmd, &output), rc, out); + +out: + if (ipc_fd >= 0) + mpath_disconnect(ipc_fd); + free(output); + return rc; +} diff --git a/libdmmp/libdmmp.pc.in b/libdmmp/libdmmp.pc.in new file mode 100644 index 0000000..ebb8cad --- /dev/null +++ b/libdmmp/libdmmp.pc.in @@ -0,0 +1,9 @@ +includedir=__INCLUDEDIR__ +libdir=__LIBDIR__ + +Name: libdmmp +Version: __VERSION__ +Description: Device mapper multipath management library +Requires: +Libs: -L${libdir} -ldmmp +Cflags: -I${includedir} diff --git a/libdmmp/libdmmp/libdmmp.h b/libdmmp/libdmmp/libdmmp.h new file mode 100644 index 0000000..6e6610d --- /dev/null +++ b/libdmmp/libdmmp/libdmmp.h @@ -0,0 +1,735 @@ +/* + * Copyright (C) 2015 - 2017 Red Hat, Inc. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + * + * Author: Gris Ge + * Todd Gill + */ + + +#ifndef _LIB_DMMP_H_ +#define _LIB_DMMP_H_ + +#include +#include + +#ifdef __cplusplus +extern "C" { +#endif + +#define DMMP_DLL_EXPORT __attribute__ ((visibility ("default"))) +#define DMMP_DLL_LOCAL __attribute__ ((visibility ("hidden"))) + +#define DMMP_OK 0 +#define DMMP_ERR_BUG 1 +#define DMMP_ERR_NO_MEMORY 2 +#define DMMP_ERR_IPC_TIMEOUT 3 +#define DMMP_ERR_IPC_ERROR 4 +#define DMMP_ERR_NO_DAEMON 5 +#define DMMP_ERR_INCOMPATIBLE 6 +#define DMMP_ERR_MPATH_BUSY 7 +#define DMMP_ERR_MPATH_NOT_FOUND 8 +#define DMMP_ERR_INVALID_ARGUMENT 9 +#define DMMP_ERR_PERMISSION_DENY 10 + +/* + * Use the syslog severity level as log priority + */ +#define DMMP_LOG_PRIORITY_ERROR 3 +#define DMMP_LOG_PRIORITY_WARNING 4 +#define DMMP_LOG_PRIORITY_INFO 6 +#define DMMP_LOG_PRIORITY_DEBUG 7 + +#define DMMP_LOG_PRIORITY_DEFAULT DMMP_LOG_PRIORITY_WARNING + +/** + * dmmp_log_priority_str() - Convert log priority to string. + * + * Convert log priority to string (const char *). + * + * @priority: + * int. Log priority. + * + * Return: + * const char *. Valid string are: + * + * * "ERROR" for DMMP_LOG_PRIORITY_ERROR + * + * * "WARN " for DMMP_LOG_PRIORITY_WARNING + * + * * "INFO " for DMMP_LOG_PRIORITY_INFO + * + * * "DEBUG" for DMMP_LOG_PRIORITY_DEBUG + * + * * "Invalid argument" for invalid log priority. + */ +DMMP_DLL_EXPORT const char *dmmp_log_priority_str(int priority); + +struct DMMP_DLL_EXPORT dmmp_context; + +struct DMMP_DLL_EXPORT dmmp_mpath; + +struct DMMP_DLL_EXPORT dmmp_path_group; + +#define DMMP_PATH_GROUP_STATUS_UNKNOWN 0 +#define DMMP_PATH_GROUP_STATUS_ENABLED 1 +#define DMMP_PATH_GROUP_STATUS_DISABLED 2 +#define DMMP_PATH_GROUP_STATUS_ACTIVE 3 + +struct DMMP_DLL_EXPORT dmmp_path; + +#define DMMP_PATH_STATUS_UNKNOWN 0 +//#define DMMP_PATH_STATUS_UNCHECKED 1 +// ^ print.h does not expose this. +#define DMMP_PATH_STATUS_DOWN 2 +#define DMMP_PATH_STATUS_UP 3 +#define DMMP_PATH_STATUS_SHAKY 4 +#define DMMP_PATH_STATUS_GHOST 5 +#define DMMP_PATH_STATUS_PENDING 6 +#define DMMP_PATH_STATUS_TIMEOUT 7 +//#define DMMP_PATH_STATUS_REMOVED 8 +// ^ print.h does not expose this. +#define DMMP_PATH_STATUS_DELAYED 9 + +/** + * dmmp_strerror() - Convert error code to string. + * + * Convert error code (int) to string (const char *): + * + * * DMMP_OK -- "OK" + * + * * DMMP_ERR_BUG -- "BUG of libdmmp library" + * + * * DMMP_ERR_NO_MEMORY -- "Out of memory" + * + * * DMMP_ERR_IPC_TIMEOUT -- "Timeout when communicate with multipathd, + * try to set bigger timeout value via dmmp_context_timeout_set ()" + * + * * DMMP_ERR_IPC_ERROR -- "Error when communicate with multipathd daemon" + * + * * DMMP_ERR_NO_DAEMON -- "The multipathd daemon not started" + * + * * DMMP_ERR_INCOMPATIBLE -- "The multipathd daemon version is not + * compatible with current library" + * + * * Other invalid error number -- "Invalid argument" + * + * @rc: + * int. Return code by libdmmp functions. When provided error code is not a + * valid error code, return "Invalid argument". + * + * Return: + * const char *. The meaning of provided error code. + * + */ +DMMP_DLL_EXPORT const char *dmmp_strerror(int rc); + +/** + * dmmp_context_new() - Create struct dmmp_context. + * + * The default logging level (DMMP_LOG_PRIORITY_DEFAULT) is + * DMMP_LOG_PRIORITY_WARNING which means only warning and error message will be + * forward to log handler function. The default log handler function will print + * log message to STDERR, to change so, please use dmmp_context_log_func_set() + * to set your own log handler, check manpage libdmmp.h(3) for detail. + * + * Return: + * Pointer of 'struct dmmp_context'. Should be freed by + * dmmp_context_free(). + */ +DMMP_DLL_EXPORT struct dmmp_context *dmmp_context_new(void); + +/** + * dmmp_context_free() - Release the memory of struct dmmp_context. + * + * Release the memory of struct dmmp_context, but the userdata memory defined + * via dmmp_context_userdata_set() will not be touched. + * + * @ctx: + * Pointer of 'struct dmmp_context'. + * Return: + * void + */ +DMMP_DLL_EXPORT void dmmp_context_free(struct dmmp_context *ctx); + +/** + * dmmp_context_timeout_set() - Set IPC timeout. + * + * By default, the IPC to multipathd daemon will timeout after 60 seconds. + * + * @ctx: + * Pointer of 'struct dmmp_context'. + * If this pointer is NULL, your program will be terminated by assert. + * + * @tmo: + * Timeout in milliseconds(1 seconds equal 1000 milliseconds). + * 0 means infinite, function only return when error or pass. + * + * Return: + * void + */ +DMMP_DLL_EXPORT void dmmp_context_timeout_set(struct dmmp_context *ctx, + unsigned int tmo); + +/** + * dmmp_context_timeout_get() - Get IPC timeout. + * + * Retrieve timeout value of IPC connection to multipathd daemon. + * + * @ctx: + * Pointer of 'struct dmmp_context'. + * If this pointer is NULL, your program will be terminated by assert. + * + * Return: + * unsigned int. Timeout in milliseconds. + */ +DMMP_DLL_EXPORT unsigned int dmmp_context_timeout_get(struct dmmp_context *ctx); + +/** + * dmmp_context_log_priority_set() - Set log priority. + * + * + * When library generates log message, only equal or more important(less value) + * message will be forwarded to log handler function. Valid log priority values + * are: + * + * * DMMP_LOG_PRIORITY_ERROR -- 3 + * + * * DMMP_LOG_PRIORITY_WARNING -- 4 + * + * * DMMP_LOG_PRIORITY_INFO -- 5 + * + * * DMMP_LOG_PRIORITY_DEBUG -- 7 + * + * @ctx: + * Pointer of 'struct dmmp_context'. + * If this pointer is NULL, your program will be terminated by assert. + * + * @priority: + * int, log priority. + * + * Return: + * void + */ +DMMP_DLL_EXPORT void dmmp_context_log_priority_set(struct dmmp_context *ctx, + int priority); + +/** + * dmmp_context_log_priority_get() - Get log priority. + * + * Retrieve current log priority. Valid log priority values are: + * + * * DMMP_LOG_PRIORITY_ERROR -- 3 + * + * * DMMP_LOG_PRIORITY_WARNING -- 4 + * + * * DMMP_LOG_PRIORITY_INFO -- 5 + * + * * DMMP_LOG_PRIORITY_DEBUG -- 7 + * + * @ctx: + * Pointer of 'struct dmmp_context'. + * If this pointer is NULL, your program will be terminated by assert. + * + * Return: + * int, log priority. + */ +DMMP_DLL_EXPORT int dmmp_context_log_priority_get(struct dmmp_context *ctx); + +/** + * dmmp_context_log_func_set() - Set log handler function. + * + * Set custom log handler. The log handler will be invoked when log message + * is equal or more important(less value) than log priority setting. + * Please check manpage libdmmp.h(3) for detail usage. + * + * @ctx: + * Pointer of 'struct dmmp_context'. + * If this pointer is NULL, your program will be terminated by assert. + * @log_func: + * Pointer of log handler function. If set to NULL, all log will be + * ignored. + * + * Return: + * void + */ +DMMP_DLL_EXPORT void dmmp_context_log_func_set + (struct dmmp_context *ctx, + void (*log_func) + (struct dmmp_context *ctx, int priority, + const char *file, int line, const char *func_name, + const char *format, va_list args)); + +/** + * dmmp_context_userdata_set() - Set user data pointer. + * + * Store user data pointer into 'struct dmmp_context'. + * + * @ctx: + * Pointer of 'struct dmmp_context'. + * If this pointer is NULL, your program will be terminated by assert. + * @userdata: + * Pointer of user defined data. + * + * Return: + * void + */ +DMMP_DLL_EXPORT void dmmp_context_userdata_set(struct dmmp_context *ctx, + void *userdata); + +/** + * dmmp_context_userdata_get() - Get user data pointer. + * + * Retrieve user data pointer from 'struct dmmp_context'. + * + * @ctx: + * Pointer of 'struct dmmp_context'. + * If this pointer is NULL, your program will be terminated by assert. + * + * Return: + * void *. Pointer of user defined data. + */ +DMMP_DLL_EXPORT void *dmmp_context_userdata_get(struct dmmp_context *ctx); + +/** + * dmmp_mpath_array_get() - Query all existing multipath devices. + * + * Query all existing multipath devices and store them into a pointer array. + * The memory of 'dmmp_mps' should be freed via dmmp_mpath_array_free(). + * + * @ctx: + * Pointer of 'struct dmmp_context'. + * If this pointer is NULL, your program will be terminated by assert. + * @dmmp_mps: + * Output pointer array of 'struct dmmp_mpath'. + * If this pointer is NULL, your program will be terminated by assert. + * @dmmp_mp_count: + * Output pointer of uint32_t. Hold the size of 'dmmp_mps' pointer array. + * If this pointer is NULL, your program will be terminated by assert. + * + * Return: + * int. Valid error codes are: + * + * * DMMP_OK + * + * * DMMP_ERR_BUG + * + * * DMMP_ERR_NO_MEMORY + * + * * DMMP_ERR_NO_DAEMON + * + * * DMMP_ERR_INCONSISTENT_DATA + * + * Error number could be converted to string by dmmp_strerror(). + */ +DMMP_DLL_EXPORT int dmmp_mpath_array_get(struct dmmp_context *ctx, + struct dmmp_mpath ***dmmp_mps, + uint32_t *dmmp_mp_count); + +/** + * dmmp_mpath_array_free() - Free 'struct dmmp_mpath' pointer array. + * + * Free the 'dmmp_mps' pointer array generated by dmmp_mpath_array_get(). + * If provided 'dmmp_mps' pointer is NULL or dmmp_mp_count == 0, do nothing. + * + * @dmmp_mps: + * Pointer of 'struct dmmp_mpath' array. + * @dmmp_mp_count: + * uint32_t, the size of 'dmmp_mps' pointer array. + * + * Return: + * void + */ +DMMP_DLL_EXPORT void dmmp_mpath_array_free(struct dmmp_mpath **dmmp_mps, + uint32_t dmmp_mp_count); + +/** + * dmmp_mpath_wwid_get() - Retrieve WWID of certain mpath. + * + * @dmmp_mp: + * Pointer of 'struct dmmp_mpath'. + * If this pointer is NULL, your program will be terminated by assert. + * + * Return: + * const char *. No need to free this memory, the resources will get + * freed when dmmp_mpath_array_free(). + */ +DMMP_DLL_EXPORT const char *dmmp_mpath_wwid_get(struct dmmp_mpath *dmmp_mp); + +/** + * dmmp_mpath_name_get() - Retrieve name(alias) of certain mpath. + * + * Retrieve the name (also known as alias) of certain mpath. + * When the config 'user_friendly_names' been set 'no', the name will be + * identical to WWID retrieved by dmmp_mpath_wwid_get(). + * + * @dmmp_mp: + * Pointer of 'struct dmmp_mpath'. + * If this pointer is NULL, your program will be terminated by assert. + * + * Return: + * const char *. No need to free this memory, the resources will get + * freed when dmmp_mpath_array_free(). + */ +DMMP_DLL_EXPORT const char *dmmp_mpath_name_get(struct dmmp_mpath *dmmp_mp); + +/** + * dmmp_mpath_kdev_name_get() - Retrieve kernel DEVNAME of certain mpath. + * + * Retrieve DEVNAME name used by kernel uevent of specified mpath. + * For example: 'dm-1'. + * + * @dmmp_mp: + * Pointer of 'struct dmmp_mpath'. + * If this pointer is NULL, your program will be terminated by assert. + * + * Return: + * const char *. No need to free this memory, the resources will get + * freed when dmmp_mpath_array_free(). + */ +DMMP_DLL_EXPORT const char *dmmp_mpath_kdev_name_get + (struct dmmp_mpath *dmmp_mp); + +/** + * dmmp_path_group_array_get() - Retrieve path groups pointer array. + * + * Retrieve the path groups of certain mpath. + * + * The memory of output pointer array is hold by 'struct dmmp_mpath', no + * need to free this memory, the resources will got freed when + * dmmp_mpath_array_free(). + * + * @dmmp_mp: + * Pointer of 'struct dmmp_mpath'. + * If this pointer is NULL, your program will be terminated by assert. + * @dmmp_pgs: + * Output pointer of 'struct dmmp_path_group' pointer array. + * If this pointer is NULL, your program will be terminated by assert. + * @dmmp_pg_count: + * Output pointer of uint32_t. Hold the size of 'dmmp_pgs' pointer array. + * If this pointer is NULL, your program will be terminated by assert. + * + * Return: + * void + */ +DMMP_DLL_EXPORT void dmmp_path_group_array_get + (struct dmmp_mpath *dmmp_mp, struct dmmp_path_group ***dmmp_pgs, + uint32_t *dmmp_pg_count); + +/** + * dmmp_path_group_id_get() - Retrieve path group ID. + * + * Retrieve the path group ID which could be used to switch active path group + * via command: + * + * multipathd -k'switch multipath mpathb group $id' + * + * @dmmp_pg: + * Pointer of 'struct dmmp_path_group'. + * If this pointer is NULL, your program will be terminated by assert. + * + * Return: + * uint32_t. + */ +DMMP_DLL_EXPORT uint32_t dmmp_path_group_id_get + (struct dmmp_path_group *dmmp_pg); + +/** + * dmmp_path_group_priority_get() - Retrieve path group priority. + * + * The enabled path group with highest priority will be next active path group + * if active path group down. + * + * @dmmp_pg: + * Pointer of 'struct dmmp_path_group'. + * If this pointer is NULL, your program will be terminated by assert. + * + * Return: + * uint32_t. + */ +DMMP_DLL_EXPORT uint32_t dmmp_path_group_priority_get + (struct dmmp_path_group *dmmp_pg); + +/** + * dmmp_path_group_status_get() - Retrieve path group status. + * + * The valid path group statuses are: + * + * * DMMP_PATH_GROUP_STATUS_UNKNOWN + * + * * DMMP_PATH_GROUP_STATUS_ENABLED -- standby to be active + * + * * DMMP_PATH_GROUP_STATUS_DISABLED -- disabled due to all path down + * + * * DMMP_PATH_GROUP_STATUS_ACTIVE -- selected to handle I/O + * + * @dmmp_pg: + * Pointer of 'struct dmmp_path_group'. + * If this pointer is NULL, your program will be terminated by assert. + * + * Return: + * uint32_t. + */ +DMMP_DLL_EXPORT uint32_t dmmp_path_group_status_get + (struct dmmp_path_group *dmmp_pg); + +/** + * dmmp_path_group_status_str() - Convert path group status to string. + * + * Convert path group status uint32_t to string (const char *). + * + * @pg_status: + * uint32_t. Path group status. + * When provided value is not a valid path group status, return "Invalid + * argument". + * + * Return: + * const char *. Valid string are: + * + * * "Invalid argument" + * + * * "undef" + * + * * "enabled" + * + * * "disabled" + * + * * "active" + */ +DMMP_DLL_EXPORT const char *dmmp_path_group_status_str(uint32_t pg_status); + +/** + * dmmp_path_group_selector_get() - Retrieve path group selector. + * + * Path group selector determine which path in active path group will be + * use to next I/O. + * + * @dmmp_pg: + * Pointer of 'struct dmmp_path_group'. + * If this pointer is NULL, your program will be terminated by assert. + * + * Return: + * const char *. + */ +DMMP_DLL_EXPORT const char *dmmp_path_group_selector_get + (struct dmmp_path_group *dmmp_pg); + +/** + * dmmp_path_array_get() - Retrieve path pointer array. + * + * The memory of output pointer array is hold by 'struct dmmp_mpath', no + * need to free this memory, the resources will got freed when + * dmmp_mpath_array_free(). + * + * @dmmp_pg: + * Pointer of 'struct dmmp_path_group'. + * If this pointer is NULL, your program will be terminated by assert. + * @dmmp_ps: + * Output pointer of 'struct dmmp_path' pointer array. + * If this pointer is NULL, your program will be terminated by assert. + * @dmmp_p_count: + * Output pointer of uint32_t. Hold the size of 'dmmp_ps' pointer array. + * If this pointer is NULL, your program will be terminated by assert. + * + * Return: + * void + */ +DMMP_DLL_EXPORT void dmmp_path_array_get(struct dmmp_path_group *dmmp_pg, + struct dmmp_path ***dmmp_ps, + uint32_t *dmmp_p_count); + +/** + * dmmp_path_blk_name_get() - Retrieve block name. + * + * Retrieve block name of certain path. The example of block names are "sda", + * "nvme0n1". + * + * @dmmp_p: + * Pointer of 'struct dmmp_path'. + * If this pointer is NULL, your program will be terminated by assert. + * + * Return: + * const char *. No need to free this memory, the resources will get + * freed when dmmp_mpath_array_free(). + */ +DMMP_DLL_EXPORT const char *dmmp_path_blk_name_get(struct dmmp_path *dmmp_p); + +/** + * dmmp_path_status_get() - Retrieve the path status. + * + * The valid path statuses are: + * + * * DMMP_PATH_STATUS_UNKNOWN + * + * * DMMP_PATH_STATUS_DOWN + * + * Path is down and you shouldn't try to send commands to it. + * + * * DMMP_PATH_STATUS_UP + * + * Path is up and I/O can be sent to it. + * + * * DMMP_PATH_STATUS_SHAKY + * + * Only emc_clariion checker when path not available for "normal" + * operations. + * + * * DMMP_PATH_STATUS_GHOST + * + * Only hp_sw and rdac checkers. Indicates a "passive/standby" + * path on active/passive HP arrays. These paths will return valid + * answers to certain SCSI commands (tur, read_capacity, inquiry, + * start_stop), but will fail I/O commands. The path needs an + * initialization command to be sent to it in order for I/Os to + * succeed. + * + * * DMMP_PATH_STATUS_PENDING + * + * Available for all async checkers when a check IO is in flight. + * + * * DMMP_PATH_STATUS_TIMEOUT + * + * Only tur checker when command timed out. + * + * * DMMP_PATH_STATUS_DELAYED + * + * If a path fails after being up for less than delay_watch_checks checks, + * when it comes back up again, it will not be marked as up until it has + * been up for delay_wait_checks checks. During this time, it is marked as + * "delayed". + * + * @dmmp_p: + * Pointer of 'struct dmmp_path'. + * If this pointer is NULL, your program will be terminated by assert. + * + * Return: + * uint32_t. + */ +DMMP_DLL_EXPORT uint32_t dmmp_path_status_get(struct dmmp_path *dmmp_p); + +/** + * dmmp_path_status_str() - Convert path status to string. + * + * Convert path status uint32_t to string (const char *): + * + * * DMMP_PATH_STATUS_UNKNOWN -- "undef" + * + * * DMMP_PATH_STATUS_DOWN -- "faulty" + * + * * DMMP_PATH_STATUS_UP -- "ready" + * + * * DMMP_PATH_STATUS_SHAKY -- "shaky" + * + * * DMMP_PATH_STATUS_GHOST -- "ghost" + * + * * DMMP_PATH_STATUS_PENDING -- "pending" + * + * * DMMP_PATH_STATUS_TIMEOUT -- "timeout" + * + * * DMMP_PATH_STATUS_REMOVED -- "removed" + * + * * DMMP_PATH_STATUS_DELAYED -- "delayed" + * + * @path_status: + * uint32_t. Path status. + * When provided value is not a valid path status, return + * "Invalid argument". + * + * Return: + * const char *. The meaning of status value. + */ +DMMP_DLL_EXPORT const char *dmmp_path_status_str(uint32_t path_status); + +/** + * dmmp_flush_mpath() - Flush specified multipath device map if unused. + * + * Flush a multipath device map specified as parameter, if unused. + * + * @ctx: + * Pointer of 'struct dmmp_context'. + * If this pointer is NULL, your program will be terminated by assert. + * @mpath_name: + * const char *. The name of multipath device map. + * + * Return: + * int. Valid error codes are: + * + * * DMMP_OK + * + * * DMMP_ERR_BUG + * + * * DMMP_ERR_NO_MEMORY + * + * * DMMP_ERR_NO_DAEMON + * + * * DMMP_ERR_MPATH_BUSY + * + * * DMMP_ERR_MPATH_NOT_FOUND + * + * * DMMP_ERR_INVALID_ARGUMENT + * + * * DMMP_ERR_PERMISSION_DENY + * + * Error number could be converted to string by dmmp_strerror(). + */ +DMMP_DLL_EXPORT int dmmp_flush_mpath(struct dmmp_context *ctx, + const char *mpath_name); + +/** + * dmmp_reconfig() - Instruct multipathd daemon to do reconfiguration. + * + * Instruct multipathd daemon to do reconfiguration. + * + * @ctx: + * Pointer of 'struct dmmp_context'. + * If this pointer is NULL, your program will be terminated by assert. + * + * Return: + * int. Valid error codes are: + * + * * DMMP_OK + * + * * DMMP_ERR_BUG + * + * * DMMP_ERR_NO_MEMORY + * + * * DMMP_ERR_NO_DAEMON + * + * * DMMP_ERR_PERMISSION_DENY + * + * Error number could be converted to string by dmmp_strerror(). + */ +DMMP_DLL_EXPORT int dmmp_reconfig(struct dmmp_context *ctx); + +/** + * dmmp_last_error_msg() - Retrieves the last error message. + * + * Retrieves the last error message. + * + * @ctx: + * Pointer of 'struct dmmp_context'. + * If this pointer is NULL, your program will be terminated by assert. + * + * Return: + * const char *. No need to free this memory, the resources will get + * freed when dmmp_context_free(). + */ +DMMP_DLL_EXPORT const char *dmmp_last_error_msg(struct dmmp_context *ctx); + +#ifdef __cplusplus +} /* End of extern "C" */ +#endif + +#endif /* End of _LIB_DMMP_H_ */ diff --git a/libdmmp/libdmmp_misc.c b/libdmmp/libdmmp_misc.c new file mode 100644 index 0000000..69b5a20 --- /dev/null +++ b/libdmmp/libdmmp_misc.c @@ -0,0 +1,91 @@ +/* + * Copyright (C) 2015 - 2017 Red Hat, Inc. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + * + * Author: Gris Ge + * Todd Gill + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "libdmmp/libdmmp.h" +#include "libdmmp_private.h" + +#define _DMMP_LOG_STRERR_ALIGN_WIDTH 80 +/* ^ Only used in _dmmp_log_stderr() for pretty log output. + * When provided log message is less than 80 bytes, fill it with space, then + * print code file name, function name, line after the 80th bytes. + */ + +static const struct _num_str_conv _DMMP_RC_MSG_CONV[] = { + {DMMP_OK, "OK"}, + {DMMP_ERR_NO_MEMORY, "Out of memory"}, + {DMMP_ERR_BUG, "BUG of libdmmp library"}, + {DMMP_ERR_IPC_TIMEOUT, "Timeout when communicate with multipathd, " + "try to increase it via " + "dmmp_context_timeout_set()"}, + {DMMP_ERR_IPC_ERROR, "Error when communicate with multipathd daemon"}, + {DMMP_ERR_NO_DAEMON, "The multipathd daemon not started"}, + {DMMP_ERR_INCOMPATIBLE, "Incompatible multipathd daemon version"}, + {DMMP_ERR_MPATH_BUSY, "Specified multipath device map is in use"}, + {DMMP_ERR_MPATH_NOT_FOUND, "Specified multipath not found"}, + {DMMP_ERR_INVALID_ARGUMENT, "Invalid argument"}, + {DMMP_ERR_PERMISSION_DENY, "Permission deny"}, +}; + +_dmmp_str_func_gen(dmmp_strerror, int, rc, _DMMP_RC_MSG_CONV); + +static const struct _num_str_conv _DMMP_PRI_CONV[] = { + {DMMP_LOG_PRIORITY_DEBUG, "DEBUG"}, + {DMMP_LOG_PRIORITY_INFO, "INFO"}, + {DMMP_LOG_PRIORITY_WARNING, "WARNING"}, + {DMMP_LOG_PRIORITY_ERROR, "ERROR"}, +}; +_dmmp_str_func_gen(dmmp_log_priority_str, int, priority, _DMMP_PRI_CONV); + +void _dmmp_log_stderr(struct dmmp_context *ctx, int priority, + const char *file, int line, const char *func_name, + const char *format, va_list args) +{ + int printed_bytes = 0; + void *userdata = NULL; + + printed_bytes += fprintf(stderr, "libdmmp %s: ", + dmmp_log_priority_str(priority)); + printed_bytes += vfprintf(stderr, format, args); + + userdata = dmmp_context_userdata_get(ctx); + if (userdata != NULL) + fprintf(stderr, "(userdata address: %p)", + userdata); + /* ^ Just demonstrate how userdata could be used and + * bypass clang static analyzer about unused ctx argument warning + */ + + if (printed_bytes < _DMMP_LOG_STRERR_ALIGN_WIDTH) { + fprintf(stderr, "%*s # %s:%s():%d\n", + _DMMP_LOG_STRERR_ALIGN_WIDTH - printed_bytes, "", file, + func_name, line); + } else { + fprintf(stderr, " # %s:%s():%d\n", file, func_name, line); + } +} diff --git a/libdmmp/libdmmp_mp.c b/libdmmp/libdmmp_mp.c new file mode 100644 index 0000000..bc48d0e --- /dev/null +++ b/libdmmp/libdmmp_mp.c @@ -0,0 +1,159 @@ +/* + * Copyright (C) 2015 - 2016 Red Hat, Inc. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + * + * Author: Gris Ge + * Todd Gill + */ + +#include +#include +#include +#include +#include +#include +#include + +#include "libdmmp/libdmmp.h" +#include "libdmmp_private.h" + +struct dmmp_mpath { + char *wwid; + char *alias; + uint32_t dmmp_pg_count; + struct dmmp_path_group **dmmp_pgs; + char *kdev_name; +}; + +_dmmp_getter_func_gen(dmmp_mpath_name_get, struct dmmp_mpath, dmmp_mp, + alias, const char *); +_dmmp_getter_func_gen(dmmp_mpath_wwid_get, struct dmmp_mpath, dmmp_mp, + wwid, const char *); +_dmmp_getter_func_gen(dmmp_mpath_kdev_name_get, struct dmmp_mpath, dmmp_mp, + kdev_name, const char *); + +struct dmmp_mpath *_dmmp_mpath_new(void) +{ + struct dmmp_mpath *dmmp_mp = NULL; + + dmmp_mp = (struct dmmp_mpath *) malloc(sizeof(struct dmmp_mpath)); + + if (dmmp_mp != NULL) { + dmmp_mp->wwid = NULL; + dmmp_mp->alias = NULL; + dmmp_mp->dmmp_pg_count = 0; + dmmp_mp->dmmp_pgs = NULL; + } + return dmmp_mp; +} + +int _dmmp_mpath_update(struct dmmp_context *ctx, struct dmmp_mpath *dmmp_mp, + json_object *j_obj_map) +{ + int rc = DMMP_OK; + const char *wwid = NULL; + const char *alias = NULL; + struct array_list *ar_pgs = NULL; + int ar_pgs_len = -1; + uint32_t i = 0; + struct dmmp_path_group *dmmp_pg = NULL; + const char *kdev_name = NULL; + + assert(ctx != NULL); + assert(dmmp_mp != NULL); + assert(j_obj_map != NULL); + + _json_obj_get_value(ctx, j_obj_map, wwid, "uuid", json_type_string, + json_object_get_string, rc, out); + _json_obj_get_value(ctx, j_obj_map, alias, "name", json_type_string, + json_object_get_string, rc, out); + _json_obj_get_value(ctx, j_obj_map, kdev_name, "sysfs", + json_type_string, json_object_get_string, rc, out); + + _dmmp_null_or_empty_str_check(ctx, wwid, rc, out); + _dmmp_null_or_empty_str_check(ctx, alias, rc, out); + + dmmp_mp->wwid = strdup(wwid); + _dmmp_alloc_null_check(ctx, dmmp_mp->wwid, rc, out); + dmmp_mp->alias = strdup(alias); + _dmmp_alloc_null_check(ctx, dmmp_mp->alias, rc, out); + dmmp_mp->kdev_name = strdup(kdev_name); + _dmmp_alloc_null_check(ctx, dmmp_mp->kdev_name, rc, out); + + _json_obj_get_value(ctx, j_obj_map, ar_pgs, "path_groups", + json_type_array, json_object_get_array, rc, out); + ar_pgs_len = array_list_length(ar_pgs); + if (ar_pgs_len < 0) { + rc = DMMP_ERR_BUG; + _error(ctx, "BUG: Got negative length for ar_pgs"); + goto out; + } + else if (ar_pgs_len == 0) + goto out; + else + dmmp_mp->dmmp_pg_count = ar_pgs_len & UINT32_MAX; + + dmmp_mp->dmmp_pgs = (struct dmmp_path_group **) + malloc(sizeof(struct dmmp_path_group *) * + dmmp_mp->dmmp_pg_count); + _dmmp_alloc_null_check(ctx, dmmp_mp->dmmp_pgs, rc, out); + for (; i < dmmp_mp->dmmp_pg_count; ++i) + dmmp_mp->dmmp_pgs[i] = NULL; + + for (i = 0; i < dmmp_mp->dmmp_pg_count; ++i) { + dmmp_pg = _dmmp_path_group_new(); + _dmmp_alloc_null_check(ctx, dmmp_pg, rc, out); + dmmp_mp->dmmp_pgs[i] = dmmp_pg; + _good(_dmmp_path_group_update(ctx, dmmp_pg, + array_list_get_idx(ar_pgs, i)), + rc, out); + } + + _debug(ctx, "Got mpath wwid: '%s', alias: '%s'", dmmp_mp->wwid, + dmmp_mp->alias); + +out: + if (rc != DMMP_OK) + _dmmp_mpath_free(dmmp_mp); + return rc; +} + +void _dmmp_mpath_free(struct dmmp_mpath *dmmp_mp) +{ + if (dmmp_mp == NULL) + return ; + + free((char *) dmmp_mp->alias); + free((char *) dmmp_mp->wwid); + free((char *) dmmp_mp->kdev_name); + + if (dmmp_mp->dmmp_pgs != NULL) + _dmmp_path_group_array_free(dmmp_mp->dmmp_pgs, + dmmp_mp->dmmp_pg_count); + + free(dmmp_mp); +} + +void dmmp_path_group_array_get(struct dmmp_mpath *dmmp_mp, + struct dmmp_path_group ***dmmp_pgs, + uint32_t *dmmp_pg_count) +{ + assert(dmmp_mp != NULL); + assert(dmmp_pgs != NULL); + assert(dmmp_pg_count != NULL); + + *dmmp_pgs = dmmp_mp->dmmp_pgs; + *dmmp_pg_count = dmmp_mp->dmmp_pg_count; +} diff --git a/libdmmp/libdmmp_path.c b/libdmmp/libdmmp_path.c new file mode 100644 index 0000000..47a2162 --- /dev/null +++ b/libdmmp/libdmmp_path.c @@ -0,0 +1,115 @@ +/* + * Copyright (C) 2015 - 2016 Red Hat, Inc. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + * + * Author: Gris Ge + * Todd Gill + */ + +#include +#include +#include +#include +#include + +#include "libdmmp/libdmmp.h" +#include "libdmmp_private.h" + +#define _DMMP_SHOW_PS_INDEX_BLK_NAME 0 +#define _DMMP_SHOW_PS_INDEX_SATAUS 1 +#define _DMMP_SHOW_PS_INDEX_WWID 2 +#define _DMMP_SHOW_PS_INDEX_PGID 3 + +struct dmmp_path { + char *blk_name; + uint32_t status; +}; + +static const struct _num_str_conv _DMMP_PATH_STATUS_CONV[] = { + {DMMP_PATH_STATUS_UNKNOWN, "undef"}, + {DMMP_PATH_STATUS_UP, "ready"}, + {DMMP_PATH_STATUS_DOWN, "faulty"}, + {DMMP_PATH_STATUS_SHAKY, "shaky"}, + {DMMP_PATH_STATUS_GHOST, "ghost"}, + {DMMP_PATH_STATUS_PENDING, "i/o pending"}, + {DMMP_PATH_STATUS_TIMEOUT, "i/o timeout"}, + {DMMP_PATH_STATUS_DELAYED, "delayed"}, +}; + +_dmmp_str_func_gen(dmmp_path_status_str, uint32_t, path_status, + _DMMP_PATH_STATUS_CONV); +_dmmp_str_conv_func_gen(_dmmp_path_status_str_conv, ctx, path_status_str, + uint32_t, DMMP_PATH_STATUS_UNKNOWN, + _DMMP_PATH_STATUS_CONV); + +_dmmp_getter_func_gen(dmmp_path_blk_name_get, struct dmmp_path, dmmp_p, + blk_name, const char *); +_dmmp_getter_func_gen(dmmp_path_status_get, struct dmmp_path, dmmp_p, + status, uint32_t); + +struct dmmp_path *_dmmp_path_new(void) +{ + struct dmmp_path *dmmp_p = NULL; + + dmmp_p = (struct dmmp_path *) malloc(sizeof(struct dmmp_path)); + + if (dmmp_p != NULL) { + dmmp_p->blk_name = NULL; + dmmp_p->status = DMMP_PATH_STATUS_UNKNOWN; + } + return dmmp_p; +} + +int _dmmp_path_update(struct dmmp_context *ctx, struct dmmp_path *dmmp_p, + json_object *j_obj_p) +{ + int rc = DMMP_OK; + const char *blk_name = NULL; + const char *status_str = NULL; + + assert(ctx != NULL); + assert(dmmp_p != NULL); + assert(j_obj_p != NULL); + + _json_obj_get_value(ctx, j_obj_p, blk_name, "dev", + json_type_string, json_object_get_string, rc, out); + _json_obj_get_value(ctx, j_obj_p, status_str, "chk_st", + json_type_string, json_object_get_string, rc, out); + + _dmmp_null_or_empty_str_check(ctx, blk_name, rc, out); + _dmmp_null_or_empty_str_check(ctx, status_str, rc, out); + + dmmp_p->blk_name = strdup(blk_name); + _dmmp_alloc_null_check(ctx, dmmp_p->blk_name, rc, out); + + dmmp_p->status = _dmmp_path_status_str_conv(ctx, status_str); + + _debug(ctx, "Got path blk_name: '%s'", dmmp_p->blk_name); + _debug(ctx, "Got path status: %s(%" PRIu32 ")", + dmmp_path_status_str(dmmp_p->status), dmmp_p->status); + +out: + if (rc != DMMP_OK) + _dmmp_path_free(dmmp_p); + return rc; +} + +void _dmmp_path_free(struct dmmp_path *dmmp_p) +{ + if (dmmp_p == NULL) + return; + free(dmmp_p->blk_name); + free(dmmp_p); +} diff --git a/libdmmp/libdmmp_pg.c b/libdmmp/libdmmp_pg.c new file mode 100644 index 0000000..5149161 --- /dev/null +++ b/libdmmp/libdmmp_pg.c @@ -0,0 +1,208 @@ +/* + * Copyright (C) 2015 - 2016 Red Hat, Inc. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + * + * Author: Gris Ge + * Todd Gill + */ + +#include +#include +#include +#include +#include +#include + +#include "libdmmp/libdmmp.h" +#include "libdmmp_private.h" + +#define _DMMP_SHOW_PGS_CMD "show groups raw format %w|%g|%p|%t|%s" +#define _DMMP_SHOW_PG_INDEX_WWID 0 +#define _DMMP_SHOW_PG_INDEX_PG_ID 1 +#define _DMMP_SHOW_PG_INDEX_PRI 2 +#define _DMMP_SHOW_PG_INDEX_STATUS 3 +#define _DMMP_SHOW_PG_INDEX_SELECTOR 4 + +struct dmmp_path_group { + uint32_t id; + /* ^ pgindex of struct path, will be used for path group switch */ + uint32_t status; + uint32_t priority; + char *selector; + uint32_t dmmp_p_count; + struct dmmp_path **dmmp_ps; +}; + +static const struct _num_str_conv _DMMP_PATH_GROUP_STATUS_CONV[] = { + {DMMP_PATH_GROUP_STATUS_UNKNOWN, "undef"}, + {DMMP_PATH_GROUP_STATUS_ACTIVE, "active"}, + {DMMP_PATH_GROUP_STATUS_DISABLED, "disabled"}, + {DMMP_PATH_GROUP_STATUS_ENABLED, "enabled"}, +}; + +_dmmp_str_func_gen(dmmp_path_group_status_str, uint32_t, pg_status, + _DMMP_PATH_GROUP_STATUS_CONV); +_dmmp_str_conv_func_gen(_dmmp_path_group_status_str_conv, ctx, pg_status_str, + uint32_t, DMMP_PATH_GROUP_STATUS_UNKNOWN, + _DMMP_PATH_GROUP_STATUS_CONV); + +_dmmp_getter_func_gen(dmmp_path_group_id_get, struct dmmp_path_group, dmmp_pg, + id, uint32_t); +_dmmp_getter_func_gen(dmmp_path_group_status_get, struct dmmp_path_group, + dmmp_pg, status, uint32_t); +_dmmp_getter_func_gen(dmmp_path_group_priority_get, struct dmmp_path_group, + dmmp_pg, priority, uint32_t); +_dmmp_getter_func_gen(dmmp_path_group_selector_get, struct dmmp_path_group, + dmmp_pg, selector, const char *); +_dmmp_array_free_func_gen(_dmmp_path_group_array_free, struct dmmp_path_group, + _dmmp_path_group_free); + + +struct dmmp_path_group *_dmmp_path_group_new(void) +{ + struct dmmp_path_group *dmmp_pg = NULL; + + dmmp_pg = (struct dmmp_path_group *) + malloc(sizeof(struct dmmp_path_group)); + + if (dmmp_pg != NULL) { + dmmp_pg->id = _DMMP_PATH_GROUP_ID_UNKNOWN; + dmmp_pg->status = DMMP_PATH_GROUP_STATUS_UNKNOWN; + dmmp_pg->priority = 0; + dmmp_pg->selector = NULL; + dmmp_pg->dmmp_p_count = 0; + dmmp_pg->dmmp_ps = NULL; + } + return dmmp_pg; +} +int _dmmp_path_group_update(struct dmmp_context *ctx, + struct dmmp_path_group *dmmp_pg, + json_object *j_obj_pg) +{ + int rc = DMMP_OK; + uint32_t id = 0; + int priority_int = -1 ; + const char *status_str = NULL; + const char *selector = NULL; + struct array_list *ar_ps = NULL; + int ar_ps_len = -1; + uint32_t i = 0; + struct dmmp_path *dmmp_p = NULL; + + assert(ctx != NULL); + assert(dmmp_pg != NULL); + assert(j_obj_pg != NULL); + + _json_obj_get_value(ctx, j_obj_pg, status_str, "dm_st", + json_type_string, json_object_get_string, rc, out); + + _json_obj_get_value(ctx, j_obj_pg, selector, "selector", + json_type_string, json_object_get_string, rc, out); + + _json_obj_get_value(ctx, j_obj_pg, priority_int, "pri", + json_type_int, json_object_get_int, rc, out); + + _json_obj_get_value(ctx, j_obj_pg, id, "group", + json_type_int, json_object_get_int, rc, out); + + dmmp_pg->priority = (priority_int <= 0) ? 0 : priority_int & UINT32_MAX; + + _dmmp_null_or_empty_str_check(ctx, status_str, rc, out); + _dmmp_null_or_empty_str_check(ctx, selector, rc, out); + + dmmp_pg->selector = strdup(selector); + _dmmp_alloc_null_check(ctx, dmmp_pg->selector, rc, out); + + dmmp_pg->id = id; + + if (dmmp_pg->id == _DMMP_PATH_GROUP_ID_UNKNOWN) { + rc = DMMP_ERR_BUG; + _error(ctx, "BUG: Got unknown(%d) path group ID", + _DMMP_PATH_GROUP_ID_UNKNOWN); + goto out; + } + + dmmp_pg->status = _dmmp_path_group_status_str_conv(ctx, status_str); + + _json_obj_get_value(ctx, j_obj_pg, ar_ps, "paths", + json_type_array, json_object_get_array, rc, out); + + ar_ps_len = array_list_length(ar_ps); + if (ar_ps_len < 0) { + rc = DMMP_ERR_BUG; + _error(ctx, "BUG: Got negative length for ar_ps"); + goto out; + } + else if (ar_ps_len == 0) + goto out; + else + dmmp_pg->dmmp_p_count = ar_ps_len & UINT32_MAX; + + dmmp_pg->dmmp_ps = (struct dmmp_path **) + malloc(sizeof(struct dmmp_path *) * dmmp_pg->dmmp_p_count); + _dmmp_alloc_null_check(ctx, dmmp_pg->dmmp_ps, rc, out); + for (; i < dmmp_pg->dmmp_p_count; ++i) + dmmp_pg->dmmp_ps[i] = NULL; + + for (i = 0; i < dmmp_pg->dmmp_p_count; ++i) { + dmmp_p = _dmmp_path_new(); + _dmmp_alloc_null_check(ctx, dmmp_p, rc, out); + dmmp_pg->dmmp_ps[i] = dmmp_p; + _good(_dmmp_path_update(ctx, dmmp_p, + array_list_get_idx(ar_ps, i)), + rc, out); + } + + _debug(ctx, "Got path group id: %" PRIu32 "", dmmp_pg->id); + _debug(ctx, "Got path group priority: %" PRIu32 "", dmmp_pg->priority); + _debug(ctx, "Got path group status: %s(%" PRIu32 ")", + dmmp_path_group_status_str(dmmp_pg->status), dmmp_pg->status); + _debug(ctx, "Got path group selector: '%s'", dmmp_pg->selector); + +out: + if (rc != DMMP_OK) + _dmmp_path_group_free(dmmp_pg); + return rc; +} + +void _dmmp_path_group_free(struct dmmp_path_group *dmmp_pg) +{ + uint32_t i = 0; + + if (dmmp_pg == NULL) + return; + + free((char *) dmmp_pg->selector); + + if (dmmp_pg->dmmp_ps != NULL) { + for (i = 0; i < dmmp_pg->dmmp_p_count; ++i) { + _dmmp_path_free(dmmp_pg->dmmp_ps[i]); + } + free(dmmp_pg->dmmp_ps); + } + free(dmmp_pg); +} + +void dmmp_path_array_get(struct dmmp_path_group *mp_pg, + struct dmmp_path ***mp_paths, + uint32_t *dmmp_p_count) +{ + assert(mp_pg != NULL); + assert(mp_paths != NULL); + assert(dmmp_p_count != NULL); + + *mp_paths = mp_pg->dmmp_ps; + *dmmp_p_count = mp_pg->dmmp_p_count; +} diff --git a/libdmmp/libdmmp_private.h b/libdmmp/libdmmp_private.h new file mode 100644 index 0000000..ac85b63 --- /dev/null +++ b/libdmmp/libdmmp_private.h @@ -0,0 +1,211 @@ +/* + * Copyright (C) 2015 - 2016 Red Hat, Inc. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + * + * Author: Gris Ge + * Todd Gill + */ + +#ifndef _LIB_DMMP_PRIVATE_H_ +#define _LIB_DMMP_PRIVATE_H_ + +/* + * Notes: + * Internal/Private functions does not check input argument but using + * assert() to abort if NULL pointer found in argument. + */ + +#include +#include +#include +#include + +#include "libdmmp/libdmmp.h" + +#ifdef __cplusplus +extern "C" { +#endif + +#define _good(rc, rc_val, out) \ + do { \ + rc_val = rc; \ + if (rc_val != DMMP_OK) \ + goto out; \ + } while(0) + +#define _DMMP_PATH_GROUP_ID_UNKNOWN 0 + +struct DMMP_DLL_LOCAL _num_str_conv; +struct _num_str_conv { + const uint32_t value; + const char *str; +}; + +#define _dmmp_str_func_gen(func_name, var_type, var, conv_array) \ +const char *func_name(var_type var) { \ + size_t i = 0; \ + uint32_t tmp_var = var & UINT32_MAX; \ + /* In the whole libdmmp, we don't have negative value */ \ + for (; i < sizeof(conv_array)/sizeof(conv_array[0]); ++i) { \ + if ((conv_array[i].value) == tmp_var) \ + return conv_array[i].str; \ + } \ + return "Invalid argument"; \ +} + +#define _dmmp_str_conv_func_gen(func_name, ctx, var_name, out_type, \ + unknown_value, conv_array) \ +static out_type func_name(struct dmmp_context *ctx, const char *var_name) { \ + size_t i = 0; \ + for (; i < sizeof(conv_array)/sizeof(conv_array[0]); ++i) { \ + if (strcmp(conv_array[i].str, var_name) == 0) \ + return conv_array[i].value; \ + } \ + _warn(ctx, "Got unknown " #var_name ": '%s'", var_name); \ + return unknown_value; \ +} + +#define _json_obj_get_value(ctx, j_obj, out_value, key, value_type, \ + value_func, rc, out) \ +do { \ + json_type j_type = json_type_null; \ + json_object *j_obj_tmp = NULL; \ + if (json_object_object_get_ex(j_obj, key, &j_obj_tmp) != TRUE) { \ + _error(ctx, "Invalid JSON output from multipathd IPC: " \ + "key '%s' not found", key); \ + rc = DMMP_ERR_IPC_ERROR; \ + goto out; \ + } \ + if (j_obj_tmp == NULL) { \ + _error(ctx, "BUG: Got NULL j_obj_tmp from " \ + "json_object_object_get_ex() while it return TRUE"); \ + rc = DMMP_ERR_BUG; \ + goto out; \ + } \ + j_type = json_object_get_type(j_obj_tmp); \ + if (j_type != value_type) { \ + _error(ctx, "Invalid value type for key'%s' of JSON output " \ + "from multipathd IPC. Should be %s(%d), " \ + "but got %s(%d)", key, json_type_to_name(value_type), \ + value_type, json_type_to_name(j_type), j_type); \ + rc = DMMP_ERR_IPC_ERROR; \ + goto out; \ + } \ + out_value = value_func(j_obj_tmp); \ +} while(0); + +DMMP_DLL_LOCAL int _dmmp_ipc_exec(struct dmmp_context *ctx, const char *cmd, + char **output); + +DMMP_DLL_LOCAL struct dmmp_mpath *_dmmp_mpath_new(void); +DMMP_DLL_LOCAL struct dmmp_path_group *_dmmp_path_group_new(void); +DMMP_DLL_LOCAL struct dmmp_path *_dmmp_path_new(void); + +DMMP_DLL_LOCAL int _dmmp_mpath_update(struct dmmp_context *ctx, + struct dmmp_mpath *dmmp_mp, + json_object *j_obj_map); +DMMP_DLL_LOCAL int _dmmp_path_group_update(struct dmmp_context *ctx, + struct dmmp_path_group *dmmp_pg, + json_object *j_obj_pg); +DMMP_DLL_LOCAL int _dmmp_path_update(struct dmmp_context *ctx, + struct dmmp_path *dmmp_p, + json_object *j_obj_p); + +DMMP_DLL_LOCAL void _dmmp_mpath_free(struct dmmp_mpath *dmmp_mp); +DMMP_DLL_LOCAL void _dmmp_path_group_free(struct dmmp_path_group *dmmp_pg); +DMMP_DLL_LOCAL void _dmmp_path_group_array_free + (struct dmmp_path_group **dmmp_pgs, uint32_t dmmp_pg_count); +DMMP_DLL_LOCAL void _dmmp_path_free(struct dmmp_path *dmmp_p); +DMMP_DLL_LOCAL void _dmmp_log(struct dmmp_context *ctx, int priority, + const char *file, int line, + const char *func_name, + const char *format, ...) + __attribute__((format(printf, 6, 7))); +DMMP_DLL_LOCAL void _dmmp_log_err_str(struct dmmp_context *ctx, int rc); + +DMMP_DLL_LOCAL void _dmmp_log_stderr(struct dmmp_context *ctx, int priority, + const char *file, int line, + const char *func_name, const char *format, + va_list args) + __attribute__((format(printf, 6, 0))); + + +#define _dmmp_log_cond(ctx, prio, arg...) \ + do { \ + if (dmmp_context_log_priority_get(ctx) >= prio) \ + _dmmp_log(ctx, prio, __FILE__, __LINE__, __FUNCTION__, \ + ## arg); \ + } while (0) + +#define _debug(ctx, arg...) \ + _dmmp_log_cond(ctx, DMMP_LOG_PRIORITY_DEBUG, ## arg) +#define _info(ctx, arg...) \ + _dmmp_log_cond(ctx, DMMP_LOG_PRIORITY_INFO, ## arg) +#define _warn(ctx, arg...) \ + _dmmp_log_cond(ctx, DMMP_LOG_PRIORITY_WARNING, ## arg) +#define _error(ctx, arg...) \ + _dmmp_log_cond(ctx, DMMP_LOG_PRIORITY_ERROR, ## arg) + +/* + * Check pointer returned by malloc() or strdup(), if NULL, set + * rc as DMMP_ERR_NO_MEMORY, report error and goto goto_out. + */ +#define _dmmp_alloc_null_check(ctx, ptr, rc, goto_out) \ + do { \ + if (ptr == NULL) { \ + rc = DMMP_ERR_NO_MEMORY; \ + _error(ctx, "%s", dmmp_strerror(rc)); \ + goto goto_out; \ + } \ + } while(0) + +#define _dmmp_null_or_empty_str_check(ctx, var, rc, goto_out) \ + do { \ + if (var == NULL) { \ + rc = DMMP_ERR_BUG; \ + _error(ctx, "BUG: Got NULL " #var); \ + goto goto_out; \ + } \ + if (strlen(var) == 0) { \ + rc = DMMP_ERR_BUG; \ + _error(ctx, "BUG: Got empty " #var); \ + goto goto_out; \ + } \ + } while(0) + +#define _dmmp_getter_func_gen(func_name, struct_name, struct_data, \ + prop_name, prop_type) \ + prop_type func_name(struct_name *struct_data) \ + { \ + assert(struct_data != NULL); \ + return struct_data->prop_name; \ + } + +#define _dmmp_array_free_func_gen(func_name, struct_name, struct_free_func) \ + void func_name(struct_name **ptr_array, uint32_t ptr_count) \ + { \ + uint32_t i = 0; \ + if (ptr_array == NULL) \ + return; \ + for (; i < ptr_count; ++i) \ + struct_free_func(ptr_array[i]); \ + free(ptr_array); \ + } + +#ifdef __cplusplus +} /* End of extern "C" */ +#endif + +#endif /* End of _LIB_DMMP_PRIVATE_H_ */ diff --git a/libdmmp/test/Makefile b/libdmmp/test/Makefile new file mode 100644 index 0000000..20b3945 --- /dev/null +++ b/libdmmp/test/Makefile @@ -0,0 +1,37 @@ +# Makefile +# +# Copyright (C) 2015-2016 Gris Ge +# +include ../../Makefile.inc + +_libdmmpdir=../$(libdmmpdir) +_mpathcmddir=../$(mpathcmddir) + +TEST_EXEC = libdmmp_test +SPD_TEST_EXEC = libdmmp_speed_test +CFLAGS += -I$(_libdmmpdir) +LDFLAGS += -L$(_libdmmpdir) -ldmmp + +all: $(TEST_EXEC) $(SPD_TEST_EXEC) + +check: $(TEST_EXEC) $(SPD_TEST_EXEC) + sudo env LD_LIBRARY_PATH=$(_libdmmpdir):$(_mpathcmddir) \ + valgrind --quiet --leak-check=full \ + --show-reachable=no --show-possibly-lost=no \ + --trace-children=yes --error-exitcode=1 \ + ./$(TEST_EXEC) + $(MAKE) speed_test + +speed_test: $(SPD_TEST_EXEC) + sudo env LD_LIBRARY_PATH=$(_libdmmpdir):$(_mpathcmddir) \ + time -p ./$(SPD_TEST_EXEC) + +clean: dep_clean + rm -f $(TEST_EXEC) $(SPD_TEST_EXEC) + +OBJS = $(TEST_EXEC).o $(SPD_TEST_EXEC).o +include $(wildcard $(OBJS:.o=.d)) + + +dep_clean: + $(RM) $(OBJS:.o=.d) diff --git a/libdmmp/test/libdmmp_speed_test.c b/libdmmp/test/libdmmp_speed_test.c new file mode 100644 index 0000000..372cd39 --- /dev/null +++ b/libdmmp/test/libdmmp_speed_test.c @@ -0,0 +1,49 @@ +/* + * Copyright (C) 2015-2016 Red Hat, Inc. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + * + * Author: Gris Ge + */ + +#include +#include +#include +#include +#include +#include +#include + +#include + +int main(int argc, char *argv[]) +{ + struct dmmp_context *ctx = NULL; + struct dmmp_mpath **dmmp_mps = NULL; + uint32_t dmmp_mp_count = 0; + int rc = EXIT_SUCCESS; + + ctx = dmmp_context_new(); + dmmp_context_log_priority_set(ctx, DMMP_LOG_PRIORITY_WARNING); + + if (dmmp_mpath_array_get(ctx, &dmmp_mps, &dmmp_mp_count) != 0) { + printf("FAILED\n"); + rc = EXIT_FAILURE; + } else { + printf("Got %" PRIu32 " mpath\n", dmmp_mp_count); + dmmp_mpath_array_free(dmmp_mps, dmmp_mp_count); + } + dmmp_context_free(ctx); + exit(rc); +} diff --git a/libdmmp/test/libdmmp_test.c b/libdmmp/test/libdmmp_test.c new file mode 100644 index 0000000..d944e1e --- /dev/null +++ b/libdmmp/test/libdmmp_test.c @@ -0,0 +1,194 @@ +/* + * Copyright (C) 2015-2017 Red Hat, Inc. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + * + * Author: Gris Ge + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#define FAIL(rc, out, ...) \ + do { \ + rc = EXIT_FAILURE; \ + fprintf(stderr, "FAIL: "__VA_ARGS__ ); \ + goto out; \ + } while(0) +#define PASS(...) fprintf(stdout, "PASS: "__VA_ARGS__ ); +#define FILE_NAME_SIZE 256 +#define TMO 60000 /* Forcing timeout to 60 seconds */ + +int test_paths(struct dmmp_path_group *mp_pg) +{ + struct dmmp_path **mp_ps = NULL; + uint32_t mp_p_count = 0; + uint32_t i = 0; + const char *blk_name = NULL; + int rc = EXIT_SUCCESS; + + dmmp_path_array_get(mp_pg, &mp_ps, &mp_p_count); + if (mp_p_count == 0) + FAIL(rc, out, "dmmp_path_array_get(): Got no path\n"); + for (i = 0; i < mp_p_count; ++i) { + blk_name = dmmp_path_blk_name_get(mp_ps[i]); + if (blk_name == NULL) + FAIL(rc, out, "dmmp_path_blk_name_get(): Got NULL\n"); + PASS("dmmp_path_blk_name_get(): %s\n", blk_name); + PASS("dmmp_path_status_get(): %" PRIu32 " -- %s\n", + dmmp_path_status_get(mp_ps[i]), + dmmp_path_status_str(dmmp_path_status_get(mp_ps[i]))); + } +out: + return rc; +} + +int test_path_groups(struct dmmp_mpath *dmmp_mp) +{ + struct dmmp_path_group **dmmp_pgs = NULL; + uint32_t dmmp_pg_count = 0; + uint32_t i = 0; + int rc = EXIT_SUCCESS; + + dmmp_path_group_array_get(dmmp_mp, &dmmp_pgs, &dmmp_pg_count); + if ((dmmp_pg_count == 0) && (dmmp_pgs != NULL)) + FAIL(rc, out, "dmmp_path_group_array_get(): mp_pgs is not NULL " + "but mp_pg_count is 0\n"); + if ((dmmp_pg_count != 0) && (dmmp_pgs == NULL)) + FAIL(rc, out, "dmmp_path_group_array_get(): mp_pgs is NULL " + "but mp_pg_count is not 0\n"); + if (dmmp_pg_count == 0) + FAIL(rc, out, "dmmp_path_group_array_get(): " + "Got 0 path group\n"); + + PASS("dmmp_path_group_array_get(): Got %" PRIu32 " path groups\n", + dmmp_pg_count); + + for (i = 0; i < dmmp_pg_count; ++i) { + PASS("dmmp_path_group_id_get(): %" PRIu32 "\n", + dmmp_path_group_id_get(dmmp_pgs[i])); + PASS("dmmp_path_group_priority_get(): %" PRIu32 "\n", + dmmp_path_group_priority_get(dmmp_pgs[i])); + PASS("dmmp_path_group_status_get(): %" PRIu32 " -- %s\n", + dmmp_path_group_status_get(dmmp_pgs[i]), + dmmp_path_group_status_str + (dmmp_path_group_status_get(dmmp_pgs[i]))); + PASS("dmmp_path_group_selector_get(): %s\n", + dmmp_path_group_selector_get(dmmp_pgs[i])); + rc = test_paths(dmmp_pgs[i]); + if (rc != 0) + goto out; + } +out: + return rc; +} + +int main(int argc, char *argv[]) +{ + struct dmmp_context *ctx = NULL; + struct dmmp_mpath **dmmp_mps = NULL; + uint32_t dmmp_mp_count = 0; + uint32_t old_dmmp_mp_count = 0; + const char *name = NULL; + const char *wwid = NULL; + const char *kdev = NULL; + uint32_t i = 0; + int rc = EXIT_SUCCESS; + const char *old_name = NULL; + bool found = false; + + ctx = dmmp_context_new(); + dmmp_context_log_priority_set(ctx, DMMP_LOG_PRIORITY_DEBUG); + dmmp_context_userdata_set(ctx, ctx); + dmmp_context_userdata_set(ctx, NULL); + dmmp_context_timeout_set(ctx, TMO); + if (dmmp_context_timeout_get(ctx) != TMO) + FAIL(rc, out, "dmmp_context_timeout_set(): Failed to set " + "timeout to %u\n", TMO); + + if (dmmp_mpath_array_get(ctx, &dmmp_mps, &dmmp_mp_count) != 0) + FAIL(rc, out, "dmmp_mpath_array_get() failed: %s\n", + dmmp_last_error_msg(ctx)); + if (dmmp_mp_count == 0) + FAIL(rc, out, "dmmp_mpath_array_get(): " + "Got no multipath devices\n"); + PASS("dmmp_mpath_array_get(): Got %" PRIu32 " mpath\n", dmmp_mp_count); + for (i = 0; i < dmmp_mp_count; ++i) { + name = dmmp_mpath_name_get(dmmp_mps[i]); + wwid = dmmp_mpath_wwid_get(dmmp_mps[i]); + kdev = dmmp_mpath_kdev_name_get(dmmp_mps[i]); + if ((name == NULL) ||(wwid == NULL) || (kdev == NULL)) + FAIL(rc, out, + "dmmp_mpath_array_get(): Got NULL name or wwid"); + PASS("dmmp_mpath_array_get(): Got mpath(%s): %s %s\n", + kdev, name, wwid); + rc = test_path_groups(dmmp_mps[i]); + if (rc != 0) + goto out; + } + + old_name = strdup(name); + if (old_name == NULL) + FAIL(rc, out, "strdup(): no memory\n"); + + old_dmmp_mp_count = dmmp_mp_count; + + dmmp_mpath_array_free(dmmp_mps, dmmp_mp_count); + + if (dmmp_flush_mpath(ctx, old_name) != DMMP_OK) + FAIL(rc, out, "dmmp_flush_mpath(): failed %s\n", + dmmp_last_error_msg(ctx)); + + PASS("dmmp_flush_mpath(): OK\n"); + + if (dmmp_reconfig(ctx) != DMMP_OK) + FAIL(rc, out, "dmmp_reconfig() failed: %s\n", + dmmp_last_error_msg(ctx)); + + PASS("dmmp_reconfig(): OK\n"); + + if (dmmp_mpath_array_get(ctx, &dmmp_mps, &dmmp_mp_count) != 0) + FAIL(rc, out, "dmmp_mpath_array_get() failed: %s\n", + dmmp_last_error_msg(ctx)); + if (dmmp_mp_count == 0) + FAIL(rc, out, "dmmp_mpath_array_get(): " + "Got no multipath devices\n"); + + if (dmmp_mp_count != old_dmmp_mp_count) + FAIL(rc, out, "Got different mpath count after reconfig: " + "old %" PRIu32 ", new %" PRIu32 "\n", old_dmmp_mp_count, + dmmp_mp_count); + + for (i = 0; i < dmmp_mp_count; ++i) { + if (strcmp(old_name, dmmp_mpath_name_get(dmmp_mps[i])) == 0) { + found = true; + break; + } + } + if (found == false) + FAIL(rc, out, "dmmp_reconfig() does not recreate deleted " + "mpath %s\n", old_name); + +out: + dmmp_context_free(ctx); + exit(rc); +} diff --git a/libmpathcmd/Makefile b/libmpathcmd/Makefile new file mode 100644 index 0000000..0f6b816 --- /dev/null +++ b/libmpathcmd/Makefile @@ -0,0 +1,36 @@ +include ../Makefile.inc + +SONAME = 0 +DEVLIB = libmpathcmd.so +LIBS = $(DEVLIB).$(SONAME) + +CFLAGS += $(LIB_CFLAGS) + +OBJS = mpath_cmd.o + +all: $(LIBS) + +$(LIBS): $(OBJS) + $(CC) $(LDFLAGS) $(SHARED_FLAGS) -Wl,-soname=$@ -o $@ $(OBJS) $(LIBDEPS) + $(LN) $@ $(DEVLIB) + +install: $(LIBS) + $(INSTALL_PROGRAM) -d $(DESTDIR)$(syslibdir) + $(INSTALL_PROGRAM) -m 755 $(LIBS) $(DESTDIR)$(syslibdir)/$(LIBS) + $(LN) $(LIBS) $(DESTDIR)$(syslibdir)/$(DEVLIB) + $(INSTALL_PROGRAM) -d $(DESTDIR)$(includedir) + $(INSTALL_PROGRAM) -m 644 mpath_cmd.h $(DESTDIR)$(includedir) + +uninstall: + $(RM) $(DESTDIR)$(syslibdir)/$(LIBS) + $(RM) $(DESTDIR)$(syslibdir)/$(DEVLIB) + $(RM) $(DESTDIR)$(includedir)/mpath_cmd.h + +clean: dep_clean + $(RM) core *.a *.o *.so *.so.* *.gz + +include $(wildcard $(OBJS:.o=.d)) + + +dep_clean: + $(RM) $(OBJS:.o=.d) diff --git a/libmpathcmd/mpath_cmd.c b/libmpathcmd/mpath_cmd.c new file mode 100644 index 0000000..60b2d96 --- /dev/null +++ b/libmpathcmd/mpath_cmd.c @@ -0,0 +1,226 @@ +/* + * Copyright (C) 2015 Red Hat, Inc. + * + * This file is part of the device-mapper multipath userspace tools. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public License + * as published by the Free Software Foundation; either version 2.1 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program. If not, see . + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "mpath_cmd.h" + +/* + * keep reading until its all read + */ +static ssize_t read_all(int fd, void *buf, size_t len, unsigned int timeout) +{ + size_t total = 0; + ssize_t n; + int ret; + struct pollfd pfd; + + while (len) { + pfd.fd = fd; + pfd.events = POLLIN; + ret = poll(&pfd, 1, timeout); + if (!ret) { + errno = ETIMEDOUT; + return -1; + } else if (ret < 0) { + if (errno == EINTR) + continue; + return -1; + } else if (!(pfd.revents & POLLIN)) + continue; + n = recv(fd, buf, len, 0); + if (n < 0) { + if ((errno == EINTR) || (errno == EAGAIN)) + continue; + return -1; + } + if (!n) + return total; + buf = n + (char *)buf; + len -= n; + total += n; + } + return total; +} + +/* + * keep writing until it's all sent + */ +static size_t write_all(int fd, const void *buf, size_t len) +{ + size_t total = 0; + + while (len) { + ssize_t n = send(fd, buf, len, MSG_NOSIGNAL); + if (n < 0) { + if ((errno == EINTR) || (errno == EAGAIN)) + continue; + return total; + } + if (!n) + return total; + buf = n + (const char *)buf; + len -= n; + total += n; + } + return total; +} + +/* + * connect to a unix domain socket + */ +int __mpath_connect(int nonblocking) +{ + int fd; + size_t len; + struct sockaddr_un addr; + int flags = 0; + + memset(&addr, 0, sizeof(addr)); + addr.sun_family = AF_LOCAL; + addr.sun_path[0] = '\0'; + strncpy(&addr.sun_path[1], DEFAULT_SOCKET, sizeof(addr.sun_path) - 1); + len = strlen(DEFAULT_SOCKET) + 1 + sizeof(sa_family_t); + if (len > sizeof(struct sockaddr_un)) + len = sizeof(struct sockaddr_un); + + fd = socket(AF_LOCAL, SOCK_STREAM, 0); + if (fd == -1) + return -1; + + if (nonblocking) { + flags = fcntl(fd, F_GETFL, 0); + if (flags != -1) + (void)fcntl(fd, F_SETFL, flags|O_NONBLOCK); + } + + if (connect(fd, (struct sockaddr *)&addr, len) == -1) { + int err = errno; + + close(fd); + errno = err; + return -1; + } + + if (nonblocking && flags != -1) + (void)fcntl(fd, F_SETFL, flags); + + return fd; +} + +/* + * connect to a unix domain socket + */ +int mpath_connect(void) +{ + return __mpath_connect(0); +} + +int mpath_disconnect(int fd) +{ + return close(fd); +} + +ssize_t mpath_recv_reply_len(int fd, unsigned int timeout) +{ + size_t len; + ssize_t ret; + + ret = read_all(fd, &len, sizeof(len), timeout); + if (ret < 0) + return ret; + if (ret != sizeof(len)) { + errno = EIO; + return -1; + } + if (len <= 0 || len >= MAX_REPLY_LEN) { + errno = ERANGE; + return -1; + } + return len; +} + +int mpath_recv_reply_data(int fd, char *reply, size_t len, + unsigned int timeout) +{ + ssize_t ret; + + ret = read_all(fd, reply, len, timeout); + if (ret < 0) + return ret; + if ((size_t)ret != len) { + errno = EIO; + return -1; + } + reply[len - 1] = '\0'; + return 0; +} + +int mpath_recv_reply(int fd, char **reply, unsigned int timeout) +{ + int err; + ssize_t len; + + *reply = NULL; + len = mpath_recv_reply_len(fd, timeout); + if (len <= 0) + return len; + *reply = malloc(len); + if (!*reply) + return -1; + err = mpath_recv_reply_data(fd, *reply, len, timeout); + if (err) { + free(*reply); + *reply = NULL; + return -1; + } + return 0; +} + +int mpath_send_cmd(int fd, const char *cmd) +{ + size_t len; + + if (cmd != NULL) + len = strlen(cmd) + 1; + else + len = 0; + if (write_all(fd, &len, sizeof(len)) != sizeof(len)) + return -1; + if (len && write_all(fd, cmd, len) != len) + return -1; + return 0; +} + +int mpath_process_cmd(int fd, const char *cmd, char **reply, + unsigned int timeout) +{ + if (mpath_send_cmd(fd, cmd) != 0) + return -1; + return mpath_recv_reply(fd, reply, timeout); +} diff --git a/libmpathcmd/mpath_cmd.h b/libmpathcmd/mpath_cmd.h new file mode 100644 index 0000000..ccfd35f --- /dev/null +++ b/libmpathcmd/mpath_cmd.h @@ -0,0 +1,144 @@ +/* + * Copyright (C) 2015 Red Hat, Inc. + * + * This file is part of the device-mapper multipath userspace tools. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public License + * as published by the Free Software Foundation; either version 2.1 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program. If not, see . + */ + +#ifndef LIB_MPATH_CMD_H +#define LIB_MPATH_CMD_H + +/* + * This should be sufficient for json output for >10000 maps, + * and >60000 paths. + */ +#define MAX_REPLY_LEN (32 * 1024 * 1024) + +#ifdef __cplusplus +extern "C" { +#endif + +#define DEFAULT_SOCKET "/org/kernel/linux/storage/multipathd" +#define DEFAULT_REPLY_TIMEOUT 4000 + + +/* + * DESCRIPTION: + * Same as mpath_connect() (see below) except for the "nonblocking" + * parameter. + * If "nonblocking" is set, connects in non-blocking mode. This is + * useful to avoid blocking if the listening socket's backlog is + * exceeded. In this case, errno will be set to EAGAIN. + * In case of success, the returned file descriptor is in in blocking + * mode, even if "nonblocking" was true. + * + * RETURNS: + * A file descriptor on success. -1 on failure (with errno set). + */ +int __mpath_connect(int nonblocking); + +/* + * DESCRIPTION: + * Connect to the running multipathd daemon. On systems with the + * multipathd.socket systemd unit file installed, this command will + * start multipathd if it is not already running. This function + * must be run before any of the others in this library + * + * RETURNS: + * A file descriptor on success. -1 on failure (with errno set). + */ +int mpath_connect(void); + + +/* + * DESCRIPTION: + * Disconnect from the multipathd daemon. This function must be + * run after after processing all the multipath commands. + * + * RETURNS: + * 0 on success. -1 on failure (with errno set). + */ +int mpath_disconnect(int fd); + + +/* + * DESCRIPTION + * Send multipathd a command and return the reply. This function + * does the same as calling mpath_send_cmd() and then + * mpath_recv_reply() + * + * RETURNS: + * 0 on successs, and reply will either be NULL (if there was no + * reply data), or point to the reply string, which must be freed by + * the caller. -1 on failure (with errno set). + */ +int mpath_process_cmd(int fd, const char *cmd, char **reply, + unsigned int timeout); + + +/* + * DESCRIPTION: + * Send a command to multipathd + * + * RETURNS: + * 0 on success. -1 on failure (with errno set) + */ +int mpath_send_cmd(int fd, const char *cmd); + + +/* + * DESCRIPTION: + * Return a reply from multipathd for a previously sent command. + * This is equivalent to calling mpath_recv_reply_len(), allocating + * a buffer of the appropriate size, and then calling + * mpath_recv_reply_data() with that buffer. + * + * RETURNS: + * 0 on success, and reply will either be NULL (if there was no + * reply data), or point to the reply string, which must be freed by + * the caller, -1 on failure (with errno set). + */ +int mpath_recv_reply(int fd, char **reply, unsigned int timeout); + + +/* + * DESCRIPTION: + * Return the size of the upcoming reply data from the sent multipath + * command. This must be called before calling mpath_recv_reply_data(). + * + * RETURNS: + * The required size of the reply data buffer on success. -1 on + * failure (with errno set). + */ +ssize_t mpath_recv_reply_len(int fd, unsigned int timeout); + + +/* + * DESCRIPTION: + * Return the reply data from the sent multipath command. + * mpath_recv_reply_len must be called first. reply must point to a + * buffer of len size. + * + * RETURNS: + * 0 on success, and reply will contain the reply data string. -1 + * on failure (with errno set). + */ +int mpath_recv_reply_data(int fd, char *reply, size_t len, + unsigned int timeout); + +#ifdef __cplusplus +} +#endif +#endif /* LIB_MPATH_CMD_H */ diff --git a/libmpathpersist/Makefile b/libmpathpersist/Makefile new file mode 100644 index 0000000..21fdad8 --- /dev/null +++ b/libmpathpersist/Makefile @@ -0,0 +1,47 @@ +include ../Makefile.inc + +SONAME = 0 +DEVLIB = libmpathpersist.so +LIBS = $(DEVLIB).$(SONAME) + +CFLAGS += $(LIB_CFLAGS) -I$(multipathdir) -I$(mpathpersistdir) -I$(mpathcmddir) + +LIBDEPS += -lpthread -ldevmapper -ldl -L$(multipathdir) -lmultipath \ + -L$(mpathcmddir) -lmpathcmd + +OBJS = mpath_persist.o mpath_updatepr.o mpath_pr_ioctl.o + +all: $(LIBS) + +$(LIBS): $(OBJS) + $(CC) $(LDFLAGS) $(SHARED_FLAGS) $(LIBDEPS) -Wl,-soname=$@ -o $@ $(OBJS) + $(LN) $(LIBS) $(DEVLIB) + $(GZIP) mpath_persistent_reserve_in.3 > mpath_persistent_reserve_in.3.gz + $(GZIP) mpath_persistent_reserve_out.3 > mpath_persistent_reserve_out.3.gz + +install: $(LIBS) + $(INSTALL_PROGRAM) -d $(DESTDIR)$(syslibdir) + $(INSTALL_PROGRAM) -m 755 $(LIBS) $(DESTDIR)$(syslibdir)/$(LIBS) + $(INSTALL_PROGRAM) -m 755 -d $(DESTDIR)$(syslibdir) + $(INSTALL_PROGRAM) -m 755 -d $(DESTDIR)$(man3dir) + $(INSTALL_PROGRAM) -m 755 -d $(DESTDIR)$(includedir) + $(LN) $(LIBS) $(DESTDIR)$(syslibdir)/$(DEVLIB) + $(INSTALL_PROGRAM) -m 644 mpath_persistent_reserve_in.3.gz $(DESTDIR)$(man3dir) + $(INSTALL_PROGRAM) -m 644 mpath_persistent_reserve_out.3.gz $(DESTDIR)$(man3dir) + $(INSTALL_PROGRAM) -m 644 mpath_persist.h $(DESTDIR)$(includedir) + +uninstall: + $(RM) $(DESTDIR)$(syslibdir)/$(LIBS) + $(RM) $(DESTDIR)$(man3dir)/mpath_persistent_reserve_in.3.gz + $(RM) $(DESTDIR)$(man3dir)/mpath_persistent_reserve_out.3.gz + $(RM) $(DESTDIR)$(includedir)/mpath_persist.h + $(RM) $(DESTDIR)$(syslibdir)/$(DEVLIB) + +clean: dep_clean + $(RM) core *.a *.o *.so *.so.* *.gz + +include $(wildcard $(OBJS:.o=.d)) + + +dep_clean: + $(RM) $(OBJS:.o=.d) diff --git a/libmpathpersist/mpath_persist.c b/libmpathpersist/mpath_persist.c new file mode 100644 index 0000000..3da7a6c --- /dev/null +++ b/libmpathpersist/mpath_persist.c @@ -0,0 +1,914 @@ +#include +#include "defaults.h" +#include +#include +#include +#include "vector.h" +#include "checkers.h" +#include "structs.h" +#include "structs_vec.h" +#include + +#include "prio.h" +#include +#include "devmapper.h" +#include "debug.h" +#include "config.h" +#include "switchgroup.h" +#include "discovery.h" +#include "configure.h" +#include "dmparser.h" +#include +#include "propsel.h" +#include "util.h" +#include "unaligned.h" + +#include "mpath_persist.h" +#include "mpathpr.h" +#include "mpath_pr_ioctl.h" + +#include +#include +#include +#include +#include + +#define __STDC_FORMAT_MACROS 1 + +extern struct udev *udev; + +struct config * +mpath_lib_init (void) +{ + struct config *conf; + + conf = load_config(DEFAULT_CONFIGFILE); + if (!conf) { + condlog(0, "Failed to initialize multipath config."); + return NULL; + } + conf->force_sync = 1; + set_max_fds(conf->max_fds); + + return conf; +} + +int +mpath_lib_exit (struct config *conf) +{ + dm_lib_release(); + dm_lib_exit(); + cleanup_prio(); + cleanup_checkers(); + free_config(conf); + conf = NULL; + return 0; +} + +static int +updatepaths (struct multipath * mpp) +{ + int i, j; + struct pathgroup * pgp; + struct path * pp; + struct config *conf; + + if (!mpp->pg) + return 0; + + vector_foreach_slot (mpp->pg, pgp, i){ + if (!pgp->paths) + continue; + + vector_foreach_slot (pgp->paths, pp, j){ + if (!strlen(pp->dev)){ + /* + * path is not in sysfs anymore + */ + pp->state = PATH_DOWN; + continue; + } + pp->mpp = mpp; + if (pp->udev == NULL) { + pp->udev = get_udev_device(pp->dev_t, DEV_DEVT); + if (pp->udev == NULL) { + pp->state = PATH_DOWN; + continue; + } + conf = get_multipath_config(); + pathinfo(pp, conf, DI_SYSFS|DI_CHECKER); + put_multipath_config(conf); + continue; + } + if (pp->state == PATH_UNCHECKED || + pp->state == PATH_WILD) { + conf = get_multipath_config(); + pathinfo(pp, conf, DI_CHECKER); + put_multipath_config(conf); + } + } + } + return 0; +} + +int +mpath_prin_activepath (struct multipath *mpp, int rq_servact, + struct prin_resp * resp, int noisy) +{ + int i,j, ret = MPATH_PR_DMMP_ERROR; + struct pathgroup *pgp = NULL; + struct path *pp = NULL; + + vector_foreach_slot (mpp->pg, pgp, j){ + vector_foreach_slot (pgp->paths, pp, i){ + if (!((pp->state == PATH_UP) || + (pp->state == PATH_GHOST))){ + condlog(2, "%s: %s not available. Skip.", + mpp->wwid, pp->dev); + condlog(3, "%s: status = %d.", + mpp->wwid, pp->state); + continue; + } + + condlog(3, "%s: sending pr in command to %s ", + mpp->wwid, pp->dev); + ret = mpath_send_prin_activepath(pp->dev, rq_servact, + resp, noisy); + switch(ret) + { + case MPATH_PR_SUCCESS: + case MPATH_PR_SENSE_INVALID_OP: + return ret; + default: + continue; + } + } + } + return ret; +} + +int mpath_persistent_reserve_in (int fd, int rq_servact, + struct prin_resp *resp, int noisy, int verbose) +{ + int ret = mpath_persistent_reserve_init_vecs(verbose); + + if (ret != MPATH_PR_SUCCESS) + return ret; + ret = __mpath_persistent_reserve_in(fd, rq_servact, resp, noisy); + mpath_persistent_reserve_free_vecs(); + return ret; +} + +int mpath_persistent_reserve_out ( int fd, int rq_servact, int rq_scope, + unsigned int rq_type, struct prout_param_descriptor *paramp, int noisy, int verbose) +{ + int ret = mpath_persistent_reserve_init_vecs(verbose); + + if (ret != MPATH_PR_SUCCESS) + return ret; + ret = __mpath_persistent_reserve_out(fd, rq_servact, rq_scope, rq_type, + paramp, noisy); + mpath_persistent_reserve_free_vecs(); + return ret; +} + +static vector curmp; +static vector pathvec; + +void mpath_persistent_reserve_free_vecs(void) +{ + free_multipathvec(curmp, KEEP_PATHS); + free_pathvec(pathvec, FREE_PATHS); + curmp = pathvec = NULL; +} + +int mpath_persistent_reserve_init_vecs(int verbose) +{ + struct config *conf = get_multipath_config(); + + conf->verbosity = verbose; + put_multipath_config(conf); + + if (curmp) + return MPATH_PR_SUCCESS; + /* + * allocate core vectors to store paths and multipaths + */ + curmp = vector_alloc (); + pathvec = vector_alloc (); + + if (!curmp || !pathvec){ + condlog (0, "vector allocation failed."); + goto err; + } + + if (dm_get_maps(curmp)) + goto err; + + return MPATH_PR_SUCCESS; + +err: + mpath_persistent_reserve_free_vecs(); + return MPATH_PR_DMMP_ERROR; +} + +static int mpath_get_map(int fd, char **palias, struct multipath **pmpp) +{ + int ret = MPATH_PR_DMMP_ERROR; + struct stat info; + int major, minor; + char *alias; + struct multipath *mpp; + + if (fstat(fd, &info) != 0){ + condlog(0, "stat error fd=%d", fd); + return MPATH_PR_FILE_ERROR; + } + if(!S_ISBLK(info.st_mode)){ + condlog(3, "Failed to get major:minor. fd=%d", fd); + return MPATH_PR_FILE_ERROR; + } + + major = major(info.st_rdev); + minor = minor(info.st_rdev); + condlog(4, "Device %d:%d", major, minor); + + /* get alias from major:minor*/ + alias = dm_mapname(major, minor); + if (!alias){ + condlog(0, "%d:%d failed to get device alias.", major, minor); + return MPATH_PR_DMMP_ERROR; + } + + condlog(3, "alias = %s", alias); + + if (dm_map_present(alias) && dm_is_mpath(alias) != 1){ + condlog(3, "%s: not a multipath device.", alias); + goto out; + } + + /* get info of all paths from the dm device */ + if (get_mpvec(curmp, pathvec, alias)){ + condlog(0, "%s: failed to get device info.", alias); + goto out; + } + + mpp = find_mp_by_alias(curmp, alias); + + if (!mpp) { + condlog(0, "%s: devmap not registered.", alias); + goto out; + } + + ret = MPATH_PR_SUCCESS; + if (pmpp) + *pmpp = mpp; + if (palias) { + *palias = alias; + alias = NULL; + } +out: + FREE(alias); + return ret; +} + +int __mpath_persistent_reserve_in (int fd, int rq_servact, + struct prin_resp *resp, int noisy) +{ + struct multipath *mpp; + int ret; + + ret = mpath_get_map(fd, NULL, &mpp); + if (ret != MPATH_PR_SUCCESS) + return ret; + + ret = mpath_prin_activepath(mpp, rq_servact, resp, noisy); + + return ret; +} + +int __mpath_persistent_reserve_out ( int fd, int rq_servact, int rq_scope, + unsigned int rq_type, struct prout_param_descriptor *paramp, int noisy) +{ + struct multipath *mpp; + char *alias; + int ret; + uint64_t prkey; + struct config *conf; + + ret = mpath_get_map(fd, &alias, &mpp); + if (ret != MPATH_PR_SUCCESS) + return ret; + + conf = get_multipath_config(); + select_reservation_key(conf, mpp); + select_all_tg_pt(conf, mpp); + put_multipath_config(conf); + + memcpy(&prkey, paramp->sa_key, 8); + if (mpp->prkey_source == PRKEY_SOURCE_FILE && prkey && + ((!get_be64(mpp->reservation_key) && + rq_servact == MPATH_PROUT_REG_SA) || + rq_servact == MPATH_PROUT_REG_IGN_SA)) { + memcpy(&mpp->reservation_key, paramp->sa_key, 8); + if (update_prkey_flags(alias, get_be64(mpp->reservation_key), + paramp->sa_flags)) { + condlog(0, "%s: failed to set prkey for multipathd.", + alias); + ret = MPATH_PR_DMMP_ERROR; + goto out1; + } + } + + if (memcmp(paramp->key, &mpp->reservation_key, 8) && + memcmp(paramp->sa_key, &mpp->reservation_key, 8)) { + condlog(0, "%s: configured reservation key doesn't match: 0x%" PRIx64, alias, get_be64(mpp->reservation_key)); + ret = MPATH_PR_SYNTAX_ERROR; + goto out1; + } + + switch(rq_servact) + { + case MPATH_PROUT_REG_SA: + case MPATH_PROUT_REG_IGN_SA: + ret= mpath_prout_reg(mpp, rq_servact, rq_scope, rq_type, paramp, noisy); + break; + case MPATH_PROUT_RES_SA : + case MPATH_PROUT_PREE_SA : + case MPATH_PROUT_PREE_AB_SA : + case MPATH_PROUT_CLEAR_SA: + ret = mpath_prout_common(mpp, rq_servact, rq_scope, rq_type, paramp, noisy); + break; + case MPATH_PROUT_REL_SA: + ret = mpath_prout_rel(mpp, rq_servact, rq_scope, rq_type, paramp, noisy); + break; + default: + ret = MPATH_PR_OTHER; + goto out1; + } + + if ((ret == MPATH_PR_SUCCESS) && ((rq_servact == MPATH_PROUT_REG_SA) || + (rq_servact == MPATH_PROUT_REG_IGN_SA))) + { + if (prkey == 0) { + update_prflag(alias, 0); + update_prkey(alias, 0); + } else + update_prflag(alias, 1); + } else if ((ret == MPATH_PR_SUCCESS) && (rq_servact == MPATH_PROUT_CLEAR_SA)) { + update_prflag(alias, 0); + update_prkey(alias, 0); + } +out1: + FREE(alias); + return ret; +} + +int +get_mpvec (vector curmp, vector pathvec, char * refwwid) +{ + int i; + struct multipath *mpp; + char params[PARAMS_SIZE], status[PARAMS_SIZE]; + + vector_foreach_slot (curmp, mpp, i){ + /* + * discard out of scope maps + */ + if (!mpp->alias) { + condlog(0, "%s: map with empty alias!", __func__); + continue; + } + + if (mpp->pg != NULL) + /* Already seen this one */ + continue; + + if (refwwid && strncmp (mpp->alias, refwwid, WWID_SIZE - 1)) + continue; + + dm_get_map(mpp->alias, &mpp->size, params); + condlog(3, "params = %s", params); + dm_get_status(mpp->alias, status); + condlog(3, "status = %s", status); + disassemble_map (pathvec, params, mpp, 0); + + /* + * disassemble_map() can add new paths to pathvec. + * If not in "fast list mode", we need to fetch information + * about them + */ + updatepaths(mpp); + disassemble_status (status, mpp); + + } + return MPATH_PR_SUCCESS ; +} + +int mpath_send_prin_activepath (char * dev, int rq_servact, + struct prin_resp * resp, int noisy) +{ + + int rc; + + rc = prin_do_scsi_ioctl(dev, rq_servact, resp, noisy); + + return (rc); +} + +int mpath_prout_reg(struct multipath *mpp,int rq_servact, int rq_scope, + unsigned int rq_type, struct prout_param_descriptor * paramp, int noisy) +{ + + int i, j, k; + struct pathgroup *pgp = NULL; + struct path *pp = NULL; + int rollback = 0; + int active_pathcount=0; + int rc; + int count=0; + int status = MPATH_PR_SUCCESS; + int all_tg_pt; + uint64_t sa_key = 0; + + if (!mpp) + return MPATH_PR_DMMP_ERROR; + + all_tg_pt = (mpp->all_tg_pt == ALL_TG_PT_ON || + paramp->sa_flags & MPATH_F_ALL_TG_PT_MASK); + active_pathcount = pathcount(mpp, PATH_UP) + pathcount(mpp, PATH_GHOST); + + if (active_pathcount == 0) { + condlog (0, "%s: no path available", mpp->wwid); + return MPATH_PR_DMMP_ERROR; + } + + struct threadinfo thread[active_pathcount]; + int hosts[active_pathcount]; + + memset(thread, 0, sizeof(thread)); + + /* init thread parameter */ + for (i =0; i< active_pathcount; i++){ + hosts[i] = -1; + thread[i].param.rq_servact = rq_servact; + thread[i].param.rq_scope = rq_scope; + thread[i].param.rq_type = rq_type; + thread[i].param.paramp = paramp; + thread[i].param.noisy = noisy; + thread[i].param.status = MPATH_PR_SKIP; + + condlog (3, "THREAD ID [%d] INFO]", i); + condlog (3, "rq_servact=%d ", thread[i].param.rq_servact); + condlog (3, "rq_scope=%d ", thread[i].param.rq_scope); + condlog (3, "rq_type=%d ", thread[i].param.rq_type); + condlog (3, "rkey="); + condlog (3, "paramp->sa_flags =%02x ", + thread[i].param.paramp->sa_flags); + condlog (3, "noisy=%d ", thread[i].param.noisy); + condlog (3, "status=%d ", thread[i].param.status); + } + + pthread_attr_t attr; + pthread_attr_init(&attr); + pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_JOINABLE); + + vector_foreach_slot (mpp->pg, pgp, j){ + vector_foreach_slot (pgp->paths, pp, i){ + if (!((pp->state == PATH_UP) || (pp->state == PATH_GHOST))){ + condlog (1, "%s: %s path not up. Skip.", mpp->wwid, pp->dev); + continue; + } + if (all_tg_pt && pp->sg_id.host_no != -1) { + for (k = 0; k < count; k++) { + if (pp->sg_id.host_no == hosts[k]) { + condlog(3, "%s: %s host %d matches skip.", pp->wwid, pp->dev, pp->sg_id.host_no); + break; + } + } + if (k < count) + continue; + } + strlcpy(thread[count].param.dev, pp->dev, + FILE_NAME_SIZE); + + if (count && (thread[count].param.paramp->sa_flags & MPATH_F_SPEC_I_PT_MASK)){ + /* + * Clearing SPEC_I_PT as transportids are already registered by now. + */ + thread[count].param.paramp->sa_flags &= (~MPATH_F_SPEC_I_PT_MASK); + } + + condlog (3, "%s: sending pr out command to %s", mpp->wwid, pp->dev); + + rc = pthread_create(&thread[count].id, &attr, mpath_prout_pthread_fn, (void *)(&thread[count].param)); + if (rc){ + condlog (0, "%s: failed to create thread %d", mpp->wwid, rc); + thread[count].param.status = MPATH_PR_THREAD_ERROR; + } + else + hosts[count] = pp->sg_id.host_no; + count = count + 1; + } + } + for( i=0; i < count ; i++){ + if (thread[i].param.status != MPATH_PR_THREAD_ERROR) { + rc = pthread_join(thread[i].id, NULL); + if (rc){ + condlog (0, "%s: Thread[%d] failed to join thread %d", mpp->wwid, i, rc); + } + } + if (!rollback && (thread[i].param.status == MPATH_PR_RESERV_CONFLICT)){ + rollback = 1; + sa_key = get_unaligned_be64(¶mp->sa_key[0]); + status = MPATH_PR_RESERV_CONFLICT ; + } + if (!rollback && (status == MPATH_PR_SUCCESS)){ + status = thread[i].param.status; + } + } + if (rollback && ((rq_servact == MPATH_PROUT_REG_SA) && sa_key != 0 )){ + condlog (3, "%s: ERROR: initiating pr out rollback", mpp->wwid); + memcpy(¶mp->key, ¶mp->sa_key, 8); + memset(¶mp->sa_key, 0, 8); + for( i=0 ; i < count ; i++){ + if(thread[i].param.status == MPATH_PR_SUCCESS) { + rc = pthread_create(&thread[i].id, &attr, mpath_prout_pthread_fn, + (void *)(&thread[i].param)); + if (rc){ + condlog (0, "%s: failed to create thread for rollback. %d", mpp->wwid, rc); + thread[i].param.status = MPATH_PR_THREAD_ERROR; + } + } else + thread[i].param.status = MPATH_PR_SKIP; + } + for(i=0; i < count ; i++){ + if (thread[i].param.status != MPATH_PR_SKIP && + thread[i].param.status != MPATH_PR_THREAD_ERROR) { + rc = pthread_join(thread[i].id, NULL); + if (rc){ + condlog (3, "%s: failed to join thread while rolling back %d", + mpp->wwid, i); + } + } + } + } + + pthread_attr_destroy(&attr); + return (status); +} + +void * mpath_prout_pthread_fn(void *p) +{ + int ret; + struct prout_param * param = (struct prout_param *)p; + + ret = prout_do_scsi_ioctl( param->dev,param->rq_servact, param->rq_scope, + param->rq_type, param->paramp, param->noisy); + param->status = ret; + pthread_exit(NULL); +} + +int mpath_prout_common(struct multipath *mpp,int rq_servact, int rq_scope, + unsigned int rq_type, struct prout_param_descriptor* paramp, int noisy) +{ + int i,j, ret; + struct pathgroup *pgp = NULL; + struct path *pp = NULL; + + vector_foreach_slot (mpp->pg, pgp, j){ + vector_foreach_slot (pgp->paths, pp, i){ + if (!((pp->state == PATH_UP) || (pp->state == PATH_GHOST))){ + condlog (1, "%s: %s path not up. Skip", + mpp->wwid, pp->dev); + continue; + } + + condlog (3, "%s: sending pr out command to %s", mpp->wwid, pp->dev); + ret = send_prout_activepath(pp->dev, rq_servact, + rq_scope, rq_type, + paramp, noisy); + return ret ; + } + } + return MPATH_PR_SUCCESS; +} + +int send_prout_activepath(char * dev, int rq_servact, int rq_scope, + unsigned int rq_type, struct prout_param_descriptor * paramp, int noisy) +{ + struct prout_param param; + param.rq_servact = rq_servact; + param.rq_scope = rq_scope; + param.rq_type = rq_type; + param.paramp = paramp; + param.noisy = noisy; + param.status = -1; + + pthread_t thread; + pthread_attr_t attr; + int rc; + + memset(&thread, 0, sizeof(thread)); + strlcpy(param.dev, dev, FILE_NAME_SIZE); + /* Initialize and set thread joinable attribute */ + pthread_attr_init(&attr); + pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_JOINABLE); + + rc = pthread_create(&thread, &attr, mpath_prout_pthread_fn, (void *)(¶m)); + if (rc){ + condlog (3, "%s: failed to create thread %d", dev, rc); + return MPATH_PR_THREAD_ERROR; + } + /* Free attribute and wait for the other threads */ + pthread_attr_destroy(&attr); + rc = pthread_join(thread, NULL); + + return (param.status); +} + +int mpath_prout_rel(struct multipath *mpp,int rq_servact, int rq_scope, + unsigned int rq_type, struct prout_param_descriptor * paramp, int noisy) +{ + int i, j; + int num = 0; + struct pathgroup *pgp = NULL; + struct path *pp = NULL; + int active_pathcount = 0; + pthread_attr_t attr; + int rc, found = 0; + int count = 0; + int status = MPATH_PR_SUCCESS; + struct prin_resp resp; + struct prout_param_descriptor *pamp; + struct prin_resp *pr_buff; + int length; + struct transportid *pptr; + + if (!mpp) + return MPATH_PR_DMMP_ERROR; + + active_pathcount = pathcount (mpp, PATH_UP) + pathcount (mpp, PATH_GHOST); + + struct threadinfo thread[active_pathcount]; + memset(thread, 0, sizeof(thread)); + for (i = 0; i < active_pathcount; i++){ + thread[i].param.rq_servact = rq_servact; + thread[i].param.rq_scope = rq_scope; + thread[i].param.rq_type = rq_type; + thread[i].param.paramp = paramp; + thread[i].param.noisy = noisy; + thread[i].param.status = MPATH_PR_SKIP; + + condlog (3, " path count = %d", i); + condlog (3, "rq_servact=%d ", thread[i].param.rq_servact); + condlog (3, "rq_scope=%d ", thread[i].param.rq_scope); + condlog (3, "rq_type=%d ", thread[i].param.rq_type); + condlog (3, "noisy=%d ", thread[i].param.noisy); + condlog (3, "status=%d ", thread[i].param.status); + } + + pthread_attr_init (&attr); + pthread_attr_setdetachstate (&attr, PTHREAD_CREATE_JOINABLE); + + vector_foreach_slot (mpp->pg, pgp, j){ + vector_foreach_slot (pgp->paths, pp, i){ + if (!((pp->state == PATH_UP) || (pp->state == PATH_GHOST))){ + condlog (1, "%s: %s path not up.", mpp->wwid, pp->dev); + continue; + } + + strlcpy(thread[count].param.dev, pp->dev, + FILE_NAME_SIZE); + condlog (3, "%s: sending pr out command to %s", mpp->wwid, pp->dev); + rc = pthread_create (&thread[count].id, &attr, mpath_prout_pthread_fn, + (void *) (&thread[count].param)); + if (rc) { + condlog (0, "%s: failed to create thread. %d", mpp->wwid, rc); + thread[count].param.status = MPATH_PR_THREAD_ERROR; + } + count = count + 1; + } + } + pthread_attr_destroy (&attr); + for (i = 0; i < count; i++){ + if (thread[i].param.status != MPATH_PR_THREAD_ERROR) { + rc = pthread_join (thread[i].id, NULL); + if (rc){ + condlog (1, "%s: failed to join thread. %d", mpp->wwid, rc); + } + } + } + + for (i = 0; i < count; i++){ + /* check thread status here and return the status */ + + if (thread[i].param.status == MPATH_PR_RESERV_CONFLICT) + status = MPATH_PR_RESERV_CONFLICT; + else if (status == MPATH_PR_SUCCESS + && thread[i].param.status != MPATH_PR_RESERV_CONFLICT) + status = thread[i].param.status; + } + + status = mpath_prin_activepath (mpp, MPATH_PRIN_RRES_SA, &resp, noisy); + if (status != MPATH_PR_SUCCESS){ + condlog (0, "%s: pr in read reservation command failed.", mpp->wwid); + return MPATH_PR_OTHER; + } + + num = resp.prin_descriptor.prin_readresv.additional_length / 8; + if (num == 0){ + condlog (2, "%s: Path holding reservation is released.", mpp->wwid); + return MPATH_PR_SUCCESS; + } + condlog (2, "%s: Path holding reservation is not avialable.", mpp->wwid); + + pr_buff = mpath_alloc_prin_response(MPATH_PRIN_RFSTAT_SA); + if (!pr_buff){ + condlog (0, "%s: failed to alloc pr in response buffer.", mpp->wwid); + return MPATH_PR_OTHER; + } + + status = mpath_prin_activepath (mpp, MPATH_PRIN_RFSTAT_SA, pr_buff, noisy); + + if (status != MPATH_PR_SUCCESS){ + condlog (0, "%s: pr in read full status command failed.", mpp->wwid); + goto out; + } + + num = pr_buff->prin_descriptor.prin_readfd.number_of_descriptor; + if (0 == num){ + goto out; + } + length = sizeof (struct prout_param_descriptor) + (sizeof (struct transportid *)); + + pamp = (struct prout_param_descriptor *)malloc (length); + if (!pamp){ + condlog (0, "%s: failed to alloc pr out parameter.", mpp->wwid); + goto out1; + } + + memset(pamp, 0, length); + + pamp->trnptid_list[0] = (struct transportid *) malloc (sizeof (struct transportid)); + if (!pamp->trnptid_list[0]){ + condlog (0, "%s: failed to alloc pr out transportid.", mpp->wwid); + goto out1; + } + + if (get_be64(mpp->reservation_key)){ + memcpy (pamp->key, &mpp->reservation_key, 8); + condlog (3, "%s: reservation key set.", mpp->wwid); + } + + status = mpath_prout_common (mpp, MPATH_PROUT_CLEAR_SA, + rq_scope, rq_type, pamp, noisy); + + if (status) { + condlog(0, "%s: failed to send CLEAR_SA", mpp->wwid); + goto out1; + } + + pamp->num_transportid = 1; + pptr=pamp->trnptid_list[0]; + + for (i = 0; i < num; i++){ + if (get_be64(mpp->reservation_key) && + memcmp(pr_buff->prin_descriptor.prin_readfd.descriptors[i]->key, + &mpp->reservation_key, 8)){ + /*register with tarnsport id*/ + memset(pamp, 0, length); + pamp->trnptid_list[0] = pptr; + memset (pamp->trnptid_list[0], 0, sizeof (struct transportid)); + memcpy (pamp->sa_key, + pr_buff->prin_descriptor.prin_readfd.descriptors[i]->key, 8); + pamp->sa_flags = MPATH_F_SPEC_I_PT_MASK; + pamp->num_transportid = 1; + + memcpy (pamp->trnptid_list[0], + &pr_buff->prin_descriptor.prin_readfd.descriptors[i]->trnptid, + sizeof (struct transportid)); + status = mpath_prout_common (mpp, MPATH_PROUT_REG_SA, 0, rq_type, + pamp, noisy); + + pamp->sa_flags = 0; + memcpy (pamp->key, pr_buff->prin_descriptor.prin_readfd.descriptors[i]->key, 8); + memset (pamp->sa_key, 0, 8); + pamp->num_transportid = 0; + status = mpath_prout_common (mpp, MPATH_PROUT_REG_SA, 0, rq_type, + pamp, noisy); + } + else + { + if (get_be64(mpp->reservation_key)) + found = 1; + } + + + } + + if (found){ + memset (pamp, 0, length); + memcpy (pamp->sa_key, &mpp->reservation_key, 8); + memset (pamp->key, 0, 8); + status = mpath_prout_reg(mpp, MPATH_PROUT_REG_SA, rq_scope, rq_type, pamp, noisy); + } + + + free(pptr); +out1: + free (pamp); +out: + free (pr_buff); + return (status); +} + +void * mpath_alloc_prin_response(int prin_sa) +{ + void * ptr = NULL; + int size=0; + switch (prin_sa) + { + case MPATH_PRIN_RKEY_SA: + size = sizeof(struct prin_readdescr); + break; + case MPATH_PRIN_RRES_SA: + size = sizeof(struct prin_resvdescr); + break; + case MPATH_PRIN_RCAP_SA: + size=sizeof(struct prin_capdescr); + break; + case MPATH_PRIN_RFSTAT_SA: + size = sizeof(struct print_fulldescr_list) + + sizeof(struct prin_fulldescr *)*MPATH_MX_TIDS; + break; + } + if (size > 0) + { + ptr = calloc(size, 1); + } + return ptr; +} + +int update_map_pr(struct multipath *mpp) +{ + int noisy=0; + struct prin_resp *resp; + unsigned int i; + int ret, isFound; + + if (!get_be64(mpp->reservation_key)) + { + /* Nothing to do. Assuming pr mgmt feature is disabled*/ + condlog(4, "%s: reservation_key not set in multipath.conf", + mpp->alias); + return MPATH_PR_SUCCESS; + } + + resp = mpath_alloc_prin_response(MPATH_PRIN_RKEY_SA); + if (!resp) + { + condlog(0,"%s : failed to alloc resp in update_map_pr", mpp->alias); + return MPATH_PR_OTHER; + } + ret = mpath_prin_activepath(mpp, MPATH_PRIN_RKEY_SA, resp, noisy); + + if (ret != MPATH_PR_SUCCESS ) + { + condlog(0,"%s : pr in read keys service action failed Error=%d", mpp->alias, ret); + free(resp); + return ret; + } + + if (resp->prin_descriptor.prin_readkeys.additional_length == 0 ) + { + condlog(3,"%s: No key found. Device may not be registered. ", mpp->alias); + free(resp); + return MPATH_PR_SUCCESS; + } + + condlog(2, "%s: Multipath reservation_key: 0x%" PRIx64 " ", mpp->alias, + get_be64(mpp->reservation_key)); + + isFound =0; + for (i = 0; i < resp->prin_descriptor.prin_readkeys.additional_length/8; i++ ) + { + condlog(2, "%s: PR IN READKEYS[%d] reservation key:", mpp->alias, i); + dumpHex((char *)&resp->prin_descriptor.prin_readkeys.key_list[i*8], 8 , 1); + + if (!memcmp(&mpp->reservation_key, &resp->prin_descriptor.prin_readkeys.key_list[i*8], 8)) + { + condlog(2, "%s: reservation key found in pr in readkeys response", mpp->alias); + isFound =1; + } + } + + if (isFound) + { + mpp->prflag = 1; + condlog(2, "%s: prflag flag set.", mpp->alias ); + } + + free(resp); + return MPATH_PR_SUCCESS; +} diff --git a/libmpathpersist/mpath_persist.h b/libmpathpersist/mpath_persist.h new file mode 100644 index 0000000..7cf4faf --- /dev/null +++ b/libmpathpersist/mpath_persist.h @@ -0,0 +1,286 @@ +/* version - 1.0 */ + +#ifndef MPATH_PERSIST_LIB_H +#define MPATH_PERSIST_LIB_H + + +#ifdef __cplusplus +extern "C" { +#endif + +#include + +#define MPATH_MAX_PARAM_LEN 8192 + +#define MPATH_MX_TIDS 32 /* Max number of transport ids"*/ +#define MPATH_MX_TID_LEN 256 /* Max length of transport id */ + +/* PRIN Service Actions */ +#define MPATH_PRIN_RKEY_SA 0x00 /* READ KEYS SA*/ +#define MPATH_PRIN_RRES_SA 0x01 /* READ RESERVATION SA*/ +#define MPATH_PRIN_RCAP_SA 0x02 /* REPORT CAPABILITIES SA*/ +#define MPATH_PRIN_RFSTAT_SA 0x03 /* READ FULL STATUS SA*/ + +/* PROUT Service Actions */ +#define MPATH_PROUT_REG_SA 0x00 /* REGISTER SA */ +#define MPATH_PROUT_RES_SA 0x01 /* RESERVE SA*/ +#define MPATH_PROUT_REL_SA 0x02 /* RELEASE SA*/ +#define MPATH_PROUT_CLEAR_SA 0x03 /* CLEAR SA*/ +#define MPATH_PROUT_PREE_SA 0x04 /* PREEMPT SA*/ +#define MPATH_PROUT_PREE_AB_SA 0x05 /* PREEMPT AND ABORT SA*/ +#define MPATH_PROUT_REG_IGN_SA 0x06 /* REGISTER AND IGNORE EXISTING KEY SA*/ +#define MPATH_PROUT_REG_MOV_SA 0x07 /* REGISTER AND MOVE SA*/ + +#define MPATH_LU_SCOPE 0x00 /* LU_SCOPE */ + +/* Persistent reservations type */ +#define MPATH_PRTPE_WE 0x01 /* Write Exclusive */ +#define MPATH_PRTPE_EA 0x03 /* Exclusive Access*/ +#define MPATH_PRTPE_WE_RO 0x05 /* WriteExclusive Registrants Only */ +#define MPATH_PRTPE_EA_RO 0x06 /* Exclusive Access. Registrants Only*/ +#define MPATH_PRTPE_WE_AR 0x07 /* Write Exclusive. All Registrants*/ +#define MPATH_PRTPE_EA_AR 0x08 /* Exclusive Access. All Registrants */ + + +/* PR RETURN_STATUS */ +#define MPATH_PR_SKIP -1 /* skipping this path */ +#define MPATH_PR_SUCCESS 0 +#define MPATH_PR_SYNTAX_ERROR 1 /* syntax error or invalid parameter */ + /* status for check condition */ +#define MPATH_PR_SENSE_NOT_READY 2 /* [sk,asc,ascq: 0x2,*,*] */ +#define MPATH_PR_SENSE_MEDIUM_ERROR 3 /* [sk,asc,ascq: 0x3,*,*] */ +#define MPATH_PR_SENSE_HARDWARE_ERROR 4 /* [sk,asc,ascq: 0x4,*,*] */ +#define MPATH_PR_ILLEGAL_REQ 5 /* [sk,asc,ascq: 0x5,*,*]*/ +#define MPATH_PR_SENSE_UNIT_ATTENTION 6 /* [sk,asc,ascq: 0x6,*,*] */ +#define MPATH_PR_SENSE_INVALID_OP 7 /* [sk,asc,ascq: 0x5,0x20,0x0]*/ +#define MPATH_PR_SENSE_ABORTED_COMMAND 8 /* [sk,asc,ascq: 0xb,*,*] */ +#define MPATH_PR_NO_SENSE 9 /* [sk,asc,ascq: 0x0,*,*] */ + +#define MPATH_PR_SENSE_MALFORMED 10 /* Response to SCSI command malformed */ +#define MPATH_PR_RESERV_CONFLICT 11 /* Reservation conflict on the device */ +#define MPATH_PR_FILE_ERROR 12 /* file (device node) problems(e.g. not found)*/ +#define MPATH_PR_DMMP_ERROR 13 /* DMMP related error.(e.g Error in getting dm info */ +#define MPATH_PR_THREAD_ERROR 14 /* pthreads error (e.g. unable to create new thread) */ +#define MPATH_PR_OTHER 15 /*other error/warning has occurred(transport + or driver error) */ + +/* PR MASK */ +#define MPATH_F_APTPL_MASK 0x01 /* APTPL MASK*/ +#define MPATH_F_ALL_TG_PT_MASK 0x04 /* ALL_TG_PT MASK*/ +#define MPATH_F_SPEC_I_PT_MASK 0x08 /* SPEC_I_PT MASK*/ +#define MPATH_PR_TYPE_MASK 0x0f /* TYPE MASK*/ +#define MPATH_PR_SCOPE_MASK 0xf0 /* SCOPE MASK*/ + +/*Transport ID PROTOCOL IDENTIFIER values */ +#define MPATH_PROTOCOL_ID_FC 0x00 +#define MPATH_PROTOCOL_ID_ISCSI 0x05 +#define MPATH_PROTOCOL_ID_SAS 0x06 + + +/*Transport ID FORMATE CODE */ +#define MPATH_WWUI_DEVICE_NAME 0x00 /* World wide unique initiator device name */ +#define MPATH_WWUI_PORT_IDENTIFIER 0x40 /* World wide unique initiator port identifier */ + + + +extern unsigned int mpath_mx_alloc_len; + + + +struct prin_readdescr +{ + uint32_t prgeneration; + uint32_t additional_length; /* The value should be either 0 or divisible by 8. + 0 indicates no registered reservation key. */ + uint8_t key_list[MPATH_MAX_PARAM_LEN]; +}; + +struct prin_resvdescr +{ + uint32_t prgeneration; + uint32_t additional_length; /* The value should be either 0 or 10h. 0 indicates + there is no reservation held. 10h indicates the + key[8] and scope_type have valid values */ + uint8_t key[8]; + uint32_t _obsolete; + uint8_t _reserved; + uint8_t scope_type; /* Use PR SCOPE AND TYPE MASK specified above */ + uint16_t _obsolete1; +}; + +struct prin_capdescr +{ + uint16_t length; + uint8_t flags[2]; + uint16_t pr_type_mask; + uint16_t _reserved; +}; + +struct transportid +{ + uint8_t format_code; + uint8_t protocol_id; + union { + uint8_t n_port_name[8]; /* FC transport*/ + uint8_t sas_address[8]; /* SAS transport */ + uint8_t iscsi_name[256]; /* ISCSI transport */ + }; +}; + +struct prin_fulldescr +{ + uint8_t key[8]; + uint8_t flag; /* All_tg_pt and reservation holder */ + uint8_t scope_type; /* Use PR SCOPE AND TYPE MASK specified above. + Meaningful only for reservation holder */ + uint16_t rtpi; + struct transportid trnptid; +}; + +struct print_fulldescr_list +{ + uint32_t prgeneration; + uint32_t number_of_descriptor; + uint8_t private_buffer[MPATH_MAX_PARAM_LEN]; /*Private buffer for list storage*/ + struct prin_fulldescr *descriptors[]; +}; + +struct prin_resp +{ + union + { + struct prin_readdescr prin_readkeys; /* for PRIN read keys SA*/ + struct prin_resvdescr prin_readresv; /* for PRIN read reservation SA*/ + struct prin_capdescr prin_readcap; /* for PRIN Report Capabilities SA*/ + struct print_fulldescr_list prin_readfd; /* for PRIN read full status SA*/ + }prin_descriptor; +}; + +struct prout_param_descriptor { /* PROUT parameter descriptor */ + uint8_t key[8]; + uint8_t sa_key[8]; + uint32_t _obsolete; + uint8_t sa_flags; + uint8_t _reserved; + uint16_t _obsolete1; + uint8_t private_buffer[MPATH_MAX_PARAM_LEN]; /*private buffer for list storage*/ + uint32_t num_transportid; /* Number of Transport ID listed in trnptid_list[]*/ + struct transportid *trnptid_list[]; +}; + + +/* Function declarations */ + +/* + * DESCRIPTION : + * Initialize device mapper multipath configuration. This function must be invoked first + * before performing reservation management functions. + * RESTRICTIONS: + * + * RETURNS: struct config ->Success, NULL->Failed. + */ +extern struct config * mpath_lib_init (void); + + +/* + * DESCRIPTION : + * Release device mapper multipath configuration. This function must be invoked after + * performing reservation management functions. + * RESTRICTIONS: + * + * RETURNS: 0->Success, 1->Failed. + */ +extern int mpath_lib_exit (struct config *conf); + + +/* + * DESCRIPTION : + * This function sends PRIN command to the DM device and get the response. + * + * @fd: The file descriptor of a multipath device. Input argument. + * @rq_servact: PRIN command service action. Input argument + * @resp: The response from PRIN service action. The resp is a struct specified above. The caller should + * manage the memory allocation of this struct + * @noisy: Turn on debugging trace: Input argument. 0->Disable, 1->Enable + * @verbose: Set verbosity level. Input argument. value:[0-3]. 0->disabled, 3->Max verbose + * + * RESTRICTIONS: + * + * RETURNS: MPATH_PR_SUCCESS if PR command successful else returns any of the status specified + * above in RETURN_STATUS. + * + */ +extern int mpath_persistent_reserve_in (int fd, int rq_servact, struct prin_resp *resp, + int noisy, int verbose); + +/* + * DESCRIPTION : + * This function is like mpath_persistent_reserve_in(), except that it doesn't call + * mpath_persistent_reserve_init_vecs() and mpath_persistent_reserve_free_vecs() + * before and after the actual PR call. + */ +extern int __mpath_persistent_reserve_in(int fd, int rq_servact, + struct prin_resp *resp, int noisy); + +/* + * DESCRIPTION : + * This function sends PROUT command to the DM device and get the response. + * + * @fd: The file descriptor of a multipath device. Input argument. + * @rq_servact: PROUT command service action. Input argument + * @rq_scope: Persistent reservation scope. The value should be always LU_SCOPE (0h). + * @rq_type: Persistent reservation type. The valid values of persistent reservation types are + * 5h (Write exclusive - registrants only) + * 6h (Exclusive access - registrants only) + * 7h (Write exclusive - All registrants) + * 8h (Exclusive access - All registrants). + * @paramp: PROUT command parameter data. The paramp is a struct which describes PROUT + * parameter list. The caller should manage the memory allocation of this struct. + * @noisy: Turn on debugging trace: Input argument.0->Disable, 1->Enable. + * @verbose: Set verbosity level. Input argument. value:0 to 3. 0->disabled, 3->Max verbose + * + * RESTRICTIONS: + * + * RETURNS: MPATH_PR_SUCCESS if PR command successful else returns any of the status specified + * above in RETURN_STATUS. + */ +extern int mpath_persistent_reserve_out ( int fd, int rq_servact, int rq_scope, + unsigned int rq_type, struct prout_param_descriptor *paramp, int noisy, + int verbose); +/* + * DESCRIPTION : + * This function is like mpath_persistent_reserve_out(), except that it doesn't call + * mpath_persistent_reserve_init_vecs() and mpath_persistent_reserve_free_vecs() + * before and after the actual PR call. + */ +extern int __mpath_persistent_reserve_out( int fd, int rq_servact, int rq_scope, + unsigned int rq_type, struct prout_param_descriptor *paramp, + int noisy); + +/* + * DESCRIPTION : + * This function allocates data structures and performs basic initialization and + * device discovery for later calls of __mpath_persistent_reserve_in() or + * __mpath_persistent_reserve_out(). + * @verbose: Set verbosity level. Input argument. value:0 to 3. 0->disabled, 3->Max verbose + * + * RESTRICTIONS: + * + * RETURNS: MPATH_PR_SUCCESS if successful else returns any of the status specified + * above in RETURN_STATUS. + */ +int mpath_persistent_reserve_init_vecs(int verbose); + +/* + * DESCRIPTION : + * This function frees data structures allocated by + * mpath_persistent_reserve_init_vecs(). + */ +void mpath_persistent_reserve_free_vecs(void); + + +#ifdef __cplusplus +} +#endif + +#endif /*MPATH_PERSIST_LIB_H*/ diff --git a/libmpathpersist/mpath_persistent_reserve_in.3 b/libmpathpersist/mpath_persistent_reserve_in.3 new file mode 100644 index 0000000..4691bde --- /dev/null +++ b/libmpathpersist/mpath_persistent_reserve_in.3 @@ -0,0 +1,116 @@ +.\" ---------------------------------------------------------------------------- +.\" Update the date below if you make any significant change. +.\" Make sure there are no errors with: +.\" groff -z -wall -b -e -t libmpathpersist/mpath_persistent_reserve_in.3 +.\" +.\" ---------------------------------------------------------------------------- +. +.TH MPATH_PERSISTENT_RESERVE_IN 3 2016-11-01 "Linux" +. +. +.\" ---------------------------------------------------------------------------- +.SH NAME +.\" ---------------------------------------------------------------------------- +. +mpath_persistent_reserve_in \- send PRIN command to DM device +. +. +.\" ---------------------------------------------------------------------------- +.SH SYNOPSIS +.\" ---------------------------------------------------------------------------- +. +.B #include +.P +.BI "int mpath_persistent_reserve_in" "(int fd, int rq_servact, struct prin_resp *resp, int noisy, int verbose)" +.P +. +. +.\" ---------------------------------------------------------------------------- +.SH DESCRIPTION +.\" ---------------------------------------------------------------------------- +. +The function in the \fBmpath_persistent_reserve_in ()\fR sends PRIN command to +the DM device and gets the response. +.TP +.B Parameters: +.RS +.TP 12 +.I fd +The file descriptor of a multipath device. Input argument. +.TP +.I rq_servact +PRIN command service action. Input argument. +.TP +.I resp +The response from PRIN service action. The caller should manage the memory allocation of this structure. +.TP +.I noisy +Turn on debugging trace: Input argument. 0->Disable, 1->Enable. +.TP +.I verbose +Set verbosity level. Input argument. value:[0-3]. 0->Crits and Errors, 1->Warnings, 2->Info, 3->Debug. +.RE +. +. +.\" ---------------------------------------------------------------------------- +.SH RETURNS +.\" ---------------------------------------------------------------------------- +. +.TP 12 +.B MPATH_PR_SUCCESS +If PR command successful. +.TP +.B MPATH_PR_SYNTAX_ERROR +If syntax error or invalid parameter. +.TP +.B MPATH_PR_SENSE_NOT_READY +If command fails with [sk,asc,ascq: 0x2,*,*]. +.TP +.B MPATH_PR_SENSE_MEDIUM_ERROR +If command fails with [sk,asc,ascq: 0x3,*,*]. +.TP +.B MPATH_PR_SENSE_HARDWARE_ERROR +If command fails with [sk,asc,ascq: 0x4,*,*]. +.TP +.B MPATH_PR_SENSE_INVALID_OP +If command fails with [sk,asc,ascq: 0x5,0x20,0x0]. +.TP +.B MPATH_PR_ILLEGAL_REQ +If command fails with [sk,asc,ascq: 0x5,*,*]. +.TP +.B MPATH_PR_SENSE_UNIT_ATTENTION +If command fails with [sk,asc,ascq: 0x6,*,*]. +.TP +.B MPATH_PR_SENSE_ABORTED_COMMAND +If command fails with [sk,asc,ascq: 0xb,*,*]. +.TP +.B MPATH_PR_NO_SENSE +If command fails with [sk,asc,ascq: 0x0,*,*]. +.TP +.B MPATH_PR_SENSE_MALFORMED +If command fails with SCSI command malformed. +.TP +.B MPATH_PR_FILE_ERROR +If command fails while accessing file (device node) problems(e.g. not found). +.TP +.B MPATH_PR_DMMP_ERROR +If Device Mapper related error.(e.g Error in getting dm info). +.TP +.B MPATH_PR_OTHER +If other error/warning has occurred(e.g transport or driver error). +. +. +.\" ---------------------------------------------------------------------------- +.SH "SEE ALSO" +.\" ---------------------------------------------------------------------------- +. +.BR mpathpersist (8). +. +. +.\" ---------------------------------------------------------------------------- +.SH AUTHORS +.\" ---------------------------------------------------------------------------- +. +\fImultipath-tools\fR was developed by Christophe Varoqui +and others. +.\" EOF diff --git a/libmpathpersist/mpath_persistent_reserve_out.3 b/libmpathpersist/mpath_persistent_reserve_out.3 new file mode 100644 index 0000000..55b00b0 --- /dev/null +++ b/libmpathpersist/mpath_persistent_reserve_out.3 @@ -0,0 +1,136 @@ +.\" ---------------------------------------------------------------------------- +.\" Update the date below if you make any significant change. +.\" Make sure there are no errors with: +.\" groff -z -wall -b -e -t libmpathpersist/mpath_persistent_reserve_out.3 +.\" +.\" ---------------------------------------------------------------------------- +. +.TH MPATH_PERSISTENT_RESERVE_OUT 3 2016-11-01 "Linux" +. +. +.\" ---------------------------------------------------------------------------- +.SH NAME +.\" ---------------------------------------------------------------------------- +. +mpath_persistent_reserve_out \- send PROUT command to DM device +. +. +.\" ---------------------------------------------------------------------------- +.SH SYNOPSIS +.\" ---------------------------------------------------------------------------- +. +.B #include +.P +.BI "int mpath_persistent_reserve_out" "(int fd, int rq_servact, struct prin_resp *resp, int noisy, int verbose)" +.P +. +. +.\" ---------------------------------------------------------------------------- +.SH DESCRIPTION +.\" ---------------------------------------------------------------------------- +. +The function in the \fBmpath_persistent_reserve_out ()\fR sends PROUT command to +the DM device and gets the response. +.TP +.B Parameters: +.RS +.TP 12 +.I fd +The file descriptor of a multipath device. Input argument. +.TP +.I rq_servact +PROUT command service action. Input argument. +.TP +.I rq_scope +Persistent reservation scope. The value should be always LU_SCOPE (0h). +.TP +.I rq_type +Persistent reservation type. The valid values of persistent reservation types are: +.RS +.IP +5h (Write exclusive - registrants only). +.IP +6h (Exclusive access - registrants only). +.IP +7h (Write exclusive - All registrants). +.IP +8h (Exclusive access - All registrants). +.RE +.TP +.I paramp +PROUT command parameter data. The paramp is a struct which describes PROUT +parameter list. Caller should manage the memory allocation of this structure. +.TP +.I noisy +Turn on debugging trace: Input argument. 0->Disable, 1->Enable. +.TP +.I verbose +Set verbosity level. Input argument. value: 0 to 3. 0->Crits and Errors, 1->Warnings, 2->Info, 3->Debug. +.RE +. +. +.\" ---------------------------------------------------------------------------- +.SH RETURNS +.\" ---------------------------------------------------------------------------- +. +.TP 12 +.B MPATH_PR_SUCCESS +If PR command successful else returns any one of the status mentioned below. +.TP +.B MPATH_PR_SYNTAX_ERROR +If syntax error or invalid parameter. +.TP +.B MPATH_PR_SENSE_NOT_READY +If command fails with [sk,asc,ascq: 0x2,*,*]. +.TP +.B MPATH_PR_SENSE_MEDIUM_ERROR +If command fails with [sk,asc,ascq: 0x3,*,*]. +.TP +.B MPATH_PR_SENSE_HARDWARE_ERROR +If command fails with [sk,asc,ascq: 0x4,*,*]. +.TP +.B MPATH_PR_SENSE_INVALID_OP +If command fails with [sk,asc,ascq: 0x5,0x20,0x0]. +.TP +.B MPATH_PR_ILLEGAL_REQ +If command fails with [sk,asc,ascq: 0x5,*,*]. +.TP +.B MPATH_PR_SENSE_UNIT_ATTENTION +If command fails with [sk,asc,ascq: 0x6,*,*]. +.TP +.B MPATH_PR_SENSE_ABORTED_COMMAND +If command fails with [sk,asc,ascq: 0xb,*,*]. +.TP +.B MPATH_PR_NO_SENSE +If command fails with [sk,asc,ascq: 0x0,*,*]. +.TP +.B MPATH_PR_SENSE_MALFORMED +If command fails with SCSI command malformed. +.TP +.B MPATH_PR_FILE_ERROR +If command fails while accessing file (device node) problems(e.g. not found). +.TP +.B MPATH_PR_DMMP_ERROR +If Device Mapper related error.(e.g Error in getting dm info). +.TP +.B MPATH_PR_OTHER +If other error/warning has occurred(e.g transport or driver error). +.TP +.B MPATH_PR_RESERV_CONFLICT +If command fails with reservation conflict. +. +. +.\" ---------------------------------------------------------------------------- +.SH "SEE ALSO" +.\" ---------------------------------------------------------------------------- +. +.BR mpathpersist (8). +. +. +.\" ---------------------------------------------------------------------------- +.SH AUTHORS +.\" ---------------------------------------------------------------------------- +. +\fImultipath-tools\fR was developed by Christophe Varoqui +and others. +.\" EOF diff --git a/libmpathpersist/mpath_pr_ioctl.c b/libmpathpersist/mpath_pr_ioctl.c new file mode 100644 index 0000000..74b26b0 --- /dev/null +++ b/libmpathpersist/mpath_pr_ioctl.c @@ -0,0 +1,547 @@ +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "mpath_pr_ioctl.h" +#include "mpath_persist.h" +#include "unaligned.h" + +#include "debug.h" + +#define FILE_NAME_SIZE 256 + +#define TIMEOUT 2000 +#define MAXRETRY 5 + +int prin_do_scsi_ioctl(char * dev, int rq_servact, struct prin_resp *resp, int noisy); +int mpath_translate_response (char * dev, struct sg_io_hdr io_hdr, + SenseData_t *Sensedata); +void dumpHex(const char* str, int len, int no_ascii); +int prout_do_scsi_ioctl( char * dev, int rq_servact, int rq_scope, + unsigned int rq_type, struct prout_param_descriptor *paramp, int noisy); +uint32_t format_transportids(struct prout_param_descriptor *paramp); +void convert_be32_to_cpu(uint32_t *num); +void convert_be16_to_cpu(uint16_t *num); +void decode_transport_id(struct prin_fulldescr *fdesc, unsigned char * p, int length); +int get_prin_length(int rq_servact); +int mpath_isLittleEndian(void); + +unsigned int mpath_mx_alloc_len; + +int prout_do_scsi_ioctl(char * dev, int rq_servact, int rq_scope, + unsigned int rq_type, struct prout_param_descriptor *paramp, int noisy) +{ + + int status, paramlen = 24, ret = 0; + uint32_t translen=0; + int retry = MAXRETRY; + SenseData_t Sensedata; + struct sg_io_hdr io_hdr; + char devname[FILE_NAME_SIZE]; + int fd = -1; + + snprintf(devname, FILE_NAME_SIZE, "/dev/%s",dev); + fd = open(devname, O_RDONLY); + if(fd < 0){ + condlog (1, "%s: unable to open device.", dev); + return MPATH_PR_FILE_ERROR; + } + + unsigned char cdb[MPATH_PROUT_CMDLEN] = + {MPATH_PROUT_CMD, 0, 0, 0, 0, 0, 0, 0, 0, 0}; + + + if (paramp->sa_flags & MPATH_F_SPEC_I_PT_MASK) + { + translen = format_transportids(paramp); + paramlen = 24 + translen; + } + else + paramlen = 24; + + if ( rq_servact > 0) + cdb[1] = (unsigned char)(rq_servact & 0x1f); + cdb[2] = (((rq_scope & 0xf) << 4) | (rq_type & 0xf)); + cdb[7] = (unsigned char)((paramlen >> 8) & 0xff); + cdb[8] = (unsigned char)(paramlen & 0xff); + +retry : + condlog(4, "%s: rq_servact = %d", dev, rq_servact); + condlog(4, "%s: rq_scope = %d ", dev, rq_scope); + condlog(4, "%s: rq_type = %d ", dev, rq_type); + condlog(4, "%s: paramlen = %d", dev, paramlen); + + if (noisy) + { + condlog(4, "%s: Persistent Reservation OUT parameter:", dev); + dumpHex((const char *)paramp, paramlen,1); + } + + memset(&Sensedata, 0, sizeof(SenseData_t)); + memset(&io_hdr,0 , sizeof( struct sg_io_hdr)); + io_hdr.interface_id = 'S'; + io_hdr.cmd_len = MPATH_PROUT_CMDLEN; + io_hdr.cmdp = cdb; + io_hdr.sbp = (void *)&Sensedata; + io_hdr.mx_sb_len = sizeof (SenseData_t); + io_hdr.timeout = TIMEOUT; + + if (paramlen > 0) { + io_hdr.dxferp = (void *)paramp; + io_hdr.dxfer_len = paramlen; + io_hdr.dxfer_direction = SG_DXFER_TO_DEV ; + } + else { + io_hdr.dxfer_direction = SG_DXFER_NONE; + } + ret = ioctl(fd, SG_IO, &io_hdr); + if (ret < 0) + { + condlog(0, "%s: ioctl failed %d", dev, ret); + close(fd); + return ret; + } + + condlog(4, "%s: Duration=%u (ms)", dev, io_hdr.duration); + + status = mpath_translate_response(dev, io_hdr, &Sensedata); + condlog(3, "%s: status = %d", dev, status); + + if (status == MPATH_PR_SENSE_UNIT_ATTENTION && (retry > 0)) + { + --retry; + condlog(3, "%s: retrying for Unit Attention. Remaining retries = %d", + dev, retry); + goto retry; + } + + if (((status == MPATH_PR_SENSE_NOT_READY )&& (Sensedata.ASC == 0x04)&& + (Sensedata.ASCQ == 0x07))&& (retry > 0)) + { + usleep(1000); + --retry; + condlog(3, "%s: retrying for sense 02/04/07." + " Remaining retries = %d", dev, retry); + goto retry; + } + + close(fd); + return status; +} + +uint32_t format_transportids(struct prout_param_descriptor *paramp) +{ + unsigned int i = 0, len; + uint32_t buff_offset = 4; + memset(paramp->private_buffer, 0, MPATH_MAX_PARAM_LEN); + for (i=0; i < paramp->num_transportid; i++ ) + { + paramp->private_buffer[buff_offset] = (uint8_t)((paramp->trnptid_list[i]->format_code & 0xff)| + (paramp->trnptid_list[i]->protocol_id & 0xff)); + buff_offset += 1; + switch(paramp->trnptid_list[i]->protocol_id) + { + case MPATH_PROTOCOL_ID_FC: + buff_offset += 7; + memcpy(¶mp->private_buffer[buff_offset], ¶mp->trnptid_list[i]->n_port_name, 8); + buff_offset +=8 ; + buff_offset +=8 ; + break; + case MPATH_PROTOCOL_ID_SAS: + buff_offset += 3; + memcpy(¶mp->private_buffer[buff_offset], ¶mp->trnptid_list[i]->sas_address, 8); + buff_offset += 12; + break; + case MPATH_PROTOCOL_ID_ISCSI: + buff_offset += 1; + len = (paramp->trnptid_list[i]->iscsi_name[1] & 0xff)+2; + memcpy(¶mp->private_buffer[buff_offset], ¶mp->trnptid_list[i]->iscsi_name,len); + buff_offset += len ; + break; + } + + } + buff_offset -= 4; + paramp->private_buffer[0] = (unsigned char)((buff_offset >> 24) & 0xff); + paramp->private_buffer[1] = (unsigned char)((buff_offset >> 16) & 0xff); + paramp->private_buffer[2] = (unsigned char)((buff_offset >> 8) & 0xff); + paramp->private_buffer[3] = (unsigned char)(buff_offset & 0xff); + buff_offset += 4; + return buff_offset; +} + +static void mpath_format_readkeys(struct prin_resp *pr_buff) +{ + convert_be32_to_cpu(&pr_buff->prin_descriptor.prin_readkeys.prgeneration); + convert_be32_to_cpu(&pr_buff->prin_descriptor.prin_readkeys.additional_length); +} + +static void mpath_format_readresv(struct prin_resp *pr_buff) +{ + + convert_be32_to_cpu(&pr_buff->prin_descriptor.prin_readresv.prgeneration); + convert_be32_to_cpu(&pr_buff->prin_descriptor.prin_readresv.additional_length); + + return; +} + +static void mpath_format_reportcapabilities(struct prin_resp *pr_buff) +{ + convert_be16_to_cpu(&pr_buff->prin_descriptor.prin_readcap.length); + convert_be16_to_cpu(&pr_buff->prin_descriptor.prin_readcap.pr_type_mask); + + return; +} + +static void mpath_format_readfullstatus(struct prin_resp *pr_buff) +{ + int num; + uint32_t fdesc_count=0; + unsigned char *p; + char *ppbuff; + uint32_t additional_length, k, tid_len_len = 0; + char tempbuff[MPATH_MAX_PARAM_LEN]; + struct prin_fulldescr fdesc; + + convert_be32_to_cpu(&pr_buff->prin_descriptor.prin_readfd.prgeneration); + convert_be32_to_cpu(&pr_buff->prin_descriptor.prin_readfd.number_of_descriptor); + + if (pr_buff->prin_descriptor.prin_readfd.number_of_descriptor == 0) + { + condlog(3, "No registration or reservation found."); + return; + } + + additional_length = pr_buff->prin_descriptor.prin_readfd.number_of_descriptor; + if (additional_length > MPATH_MAX_PARAM_LEN) { + condlog(3, "PRIN length %u exceeds max length %d", additional_length, + MPATH_MAX_PARAM_LEN); + return; + } + + memset(&fdesc, 0, sizeof(struct prin_fulldescr)); + + memcpy( tempbuff, pr_buff->prin_descriptor.prin_readfd.private_buffer,MPATH_MAX_PARAM_LEN ); + memset(&pr_buff->prin_descriptor.prin_readfd.private_buffer, 0, MPATH_MAX_PARAM_LEN); + + p =(unsigned char *)tempbuff; + ppbuff = (char *)pr_buff->prin_descriptor.prin_readfd.private_buffer; + + for (k = 0; k < additional_length; k += num, p += num) { + memcpy(&fdesc.key, p, 8 ); + fdesc.flag = p[12]; + fdesc.scope_type = p[13]; + fdesc.rtpi = get_unaligned_be16(&p[18]); + + tid_len_len = get_unaligned_be32(&p[20]); + if (tid_len_len + 24 + k > additional_length) { + condlog(0, + "%s: corrupt PRIN response: status descriptor end %d exceeds length %d", + __func__, tid_len_len + k + 24, + additional_length); + tid_len_len = additional_length - k - 24; + } + + if (tid_len_len > 0) + decode_transport_id( &fdesc, &p[24], tid_len_len); + + num = 24 + tid_len_len; + memcpy(ppbuff, &fdesc, sizeof(struct prin_fulldescr)); + pr_buff->prin_descriptor.prin_readfd.descriptors[fdesc_count]= (struct prin_fulldescr *)ppbuff; + ppbuff += sizeof(struct prin_fulldescr); + ++fdesc_count; + } + + pr_buff->prin_descriptor.prin_readfd.number_of_descriptor = fdesc_count; + + return; +} + +void +decode_transport_id(struct prin_fulldescr *fdesc, unsigned char * p, int length) +{ + unsigned int num; + int jump, k; + for (k = 0, jump = 24; k < length; k += jump, p += jump) { + fdesc->trnptid.format_code = ((p[0] >> 6) & 0x3); + fdesc->trnptid.protocol_id = (p[0] & 0xf); + switch (fdesc->trnptid.protocol_id) { + case MPATH_PROTOCOL_ID_FC: + memcpy(&fdesc->trnptid.n_port_name, &p[8], 8); + jump = 24; + break; + case MPATH_PROTOCOL_ID_ISCSI: + num = get_unaligned_be16(&p[2]); + if (num >= sizeof(fdesc->trnptid.iscsi_name)) + num = sizeof(fdesc->trnptid.iscsi_name); + memcpy(&fdesc->trnptid.iscsi_name, &p[4], num); + jump = (((num + 4) < 24) ? 24 : num + 4); + break; + case MPATH_PROTOCOL_ID_SAS: + memcpy(&fdesc->trnptid.sas_address, &p[4], 8); + jump = 24; + break; + default: + jump = 24; + break; + } + } +} + +int prin_do_scsi_ioctl(char * dev, int rq_servact, struct prin_resp * resp, int noisy) +{ + + int ret, status, got, fd; + int mx_resp_len; + SenseData_t Sensedata; + int retry = MAXRETRY; + struct sg_io_hdr io_hdr; + char devname[FILE_NAME_SIZE]; + unsigned char cdb[MPATH_PRIN_CMDLEN] = + {MPATH_PRIN_CMD, 0, 0, 0, 0, 0, 0, 0, 0, 0}; + + snprintf(devname, FILE_NAME_SIZE, "/dev/%s",dev); + fd = open(devname, O_RDONLY); + if(fd < 0){ + condlog(0, "%s: Unable to open device ", dev); + return MPATH_PR_FILE_ERROR; + } + + if (mpath_mx_alloc_len) + mx_resp_len = mpath_mx_alloc_len; + else + mx_resp_len = get_prin_length(rq_servact); + + if (mx_resp_len == 0) { + status = MPATH_PR_SYNTAX_ERROR; + goto out; + } + + cdb[1] = (unsigned char)(rq_servact & 0x1f); + cdb[7] = (unsigned char)((mx_resp_len >> 8) & 0xff); + cdb[8] = (unsigned char)(mx_resp_len & 0xff); + +retry : + memset(&Sensedata, 0, sizeof(SenseData_t)); + memset(&io_hdr,0 , sizeof( struct sg_io_hdr)); + + io_hdr.interface_id = 'S'; + io_hdr.cmd_len = MPATH_PRIN_CMDLEN; + io_hdr.mx_sb_len = sizeof (SenseData_t); + io_hdr.dxfer_direction = SG_DXFER_FROM_DEV; + io_hdr.cmdp = cdb; + io_hdr.sbp = (void *)&Sensedata; + io_hdr.timeout = TIMEOUT; + + + + io_hdr.dxfer_len = mx_resp_len; + io_hdr.dxferp = (void *)resp; + + ret =ioctl(fd, SG_IO, &io_hdr); + if (ret < 0){ + condlog(0, "%s: IOCTL failed %d", dev, ret); + status = MPATH_PR_OTHER; + goto out; + } + + got = mx_resp_len - io_hdr.resid; + + condlog(3, "%s: duration = %u (ms)", dev, io_hdr.duration); + condlog(4, "%s: persistent reservation in: requested %d bytes but got %d bytes)", dev, mx_resp_len, got); + + status = mpath_translate_response(dev, io_hdr, &Sensedata); + + if (status == MPATH_PR_SENSE_UNIT_ATTENTION && (retry > 0)) + { + --retry; + condlog(3, "%s: retrying for Unit Attention. Remaining retries = %d", dev, retry); + goto retry; + } + + if (((status == MPATH_PR_SENSE_NOT_READY )&& (Sensedata.ASC == 0x04)&& + (Sensedata.ASCQ == 0x07))&& (retry > 0)) + { + usleep(1000); + --retry; + condlog(3, "%s: retrying for 02/04/07. Remaining retries = %d", dev, retry); + goto retry; + } + + if (status != MPATH_PR_SUCCESS) + goto out; + + if (noisy) + dumpHex((const char *)resp, got , 1); + + + switch (rq_servact) + { + case MPATH_PRIN_RKEY_SA : + mpath_format_readkeys(resp); + break; + case MPATH_PRIN_RRES_SA : + mpath_format_readresv(resp); + break; + case MPATH_PRIN_RCAP_SA : + mpath_format_reportcapabilities(resp); + break; + case MPATH_PRIN_RFSTAT_SA : + mpath_format_readfullstatus(resp); + } + +out: + close(fd); + return status; +} + +int mpath_translate_response (char * dev, struct sg_io_hdr io_hdr, + SenseData_t *Sensedata) +{ + condlog(3, "%s: status driver:%02x host:%02x scsi:%02x", dev, + io_hdr.driver_status, io_hdr.host_status ,io_hdr.status); + io_hdr.status &= 0x7e; + if ((0 == io_hdr.status) && + (0 == io_hdr.host_status) && + (0 == io_hdr.driver_status)) + return MPATH_PR_SUCCESS; + + switch(io_hdr.status) { + case SAM_STAT_GOOD: + break; + case SAM_STAT_CHECK_CONDITION: + condlog(3, "%s: Sense_Key=%02x, ASC=%02x ASCQ=%02x", + dev, Sensedata->Sense_Key, + Sensedata->ASC, Sensedata->ASCQ); + switch(Sensedata->Sense_Key) { + case NO_SENSE: + return MPATH_PR_NO_SENSE; + case RECOVERED_ERROR: + return MPATH_PR_SUCCESS; + case NOT_READY: + return MPATH_PR_SENSE_NOT_READY; + case MEDIUM_ERROR: + return MPATH_PR_SENSE_MEDIUM_ERROR; + case BLANK_CHECK: + return MPATH_PR_OTHER; + case HARDWARE_ERROR: + return MPATH_PR_SENSE_HARDWARE_ERROR; + case ILLEGAL_REQUEST: + return MPATH_PR_ILLEGAL_REQ; + case UNIT_ATTENTION: + return MPATH_PR_SENSE_UNIT_ATTENTION; + case DATA_PROTECT: + case COPY_ABORTED: + return MPATH_PR_OTHER; + case ABORTED_COMMAND: + return MPATH_PR_SENSE_ABORTED_COMMAND; + + default : + return MPATH_PR_OTHER; + } + case SAM_STAT_RESERVATION_CONFLICT: + return MPATH_PR_RESERV_CONFLICT; + + default : + return MPATH_PR_OTHER; + } + + switch(io_hdr.host_status) { + case DID_OK : + break; + default : + return MPATH_PR_OTHER; + } + switch(io_hdr.driver_status) + { + case DRIVER_OK: + break; + default : + return MPATH_PR_OTHER; + } + return MPATH_PR_SUCCESS; +} + +void convert_be16_to_cpu(uint16_t *num) +{ + *num = get_unaligned_be16(num); +} + +void convert_be32_to_cpu(uint32_t *num) +{ + *num = get_unaligned_be32(num); +} + +void +dumpHex(const char* str, int len, int log) +{ + const char * p = str; + unsigned char c; + char buff[82]; + const int bpstart = 5; + int bpos = bpstart; + int k; + + if (len <= 0) + return; + memset(buff, ' ', 80); + buff[80] = '\0'; + for (k = 0; k < len; k++) { + c = *p++; + bpos += 3; + if (bpos == (bpstart + (9 * 3))) + bpos++; + sprintf(&buff[bpos], "%.2x", (int)(unsigned char)c); + buff[bpos + 2] = ' '; + if ((k > 0) && (0 == ((k + 1) % 16))) { + if (log) + condlog(0, "%.76s" , buff); + else + printf("%.76s" , buff); + bpos = bpstart; + memset(buff, ' ', 80); + } + } + if (bpos > bpstart) { + buff[bpos + 2] = '\0'; + if (log) + condlog(0, "%s", buff); + else + printf("%s\n" , buff); + } + return; +} + +int get_prin_length(int rq_servact) +{ + int mx_resp_len; + switch (rq_servact) + { + case MPATH_PRIN_RKEY_SA: + mx_resp_len = sizeof(struct prin_readdescr); + break; + case MPATH_PRIN_RRES_SA : + mx_resp_len = sizeof(struct prin_resvdescr); + break; + case MPATH_PRIN_RCAP_SA : + mx_resp_len = sizeof(struct prin_capdescr); + break; + case MPATH_PRIN_RFSTAT_SA: + mx_resp_len = sizeof(struct print_fulldescr_list) + sizeof(struct prin_fulldescr *)*32; + break; + default: + condlog(0, "invalid service action, %d", rq_servact); + mx_resp_len = 0; + break; + } + return mx_resp_len; +} diff --git a/libmpathpersist/mpath_pr_ioctl.h b/libmpathpersist/mpath_pr_ioctl.h new file mode 100644 index 0000000..625490f --- /dev/null +++ b/libmpathpersist/mpath_pr_ioctl.h @@ -0,0 +1,109 @@ +#define MPATH_XFER_HOST_DEV 0 /*data transfer from initiator to target */ +#define MPATH_XFER_DEV_HOST 1 /*data transfer from target to initiator */ +#define MPATH_XFER_NONE 2 /*no data transfer */ +#define MPATH_XFER_UNKNOWN 3 /*data transfer direction is unknown */ + +#if 0 +static const char * pr_type_strs[] = { + "obsolete [0]", + "Write Exclusive", + "obsolete [2]", + "Exclusive Access", + "obsolete [4]", + "Write Exclusive, registrants only", + "Exclusive Access, registrants only", + "Write Exclusive, all registrants", + "Exclusive Access, all registrants", + "obsolete [9]", "obsolete [0xa]", "obsolete [0xb]", "obsolete [0xc]", + "obsolete [0xd]", "obsolete [0xe]", "obsolete [0xf]", +}; +#endif + +typedef unsigned int LWORD; /* unsigned numeric, bit patterns */ +typedef unsigned char BYTE; /* unsigned numeric, bit patterns */ + +typedef struct SenseData +{ + BYTE Error_Code; + BYTE Segment_Number; /* not applicable to DAC */ + BYTE Sense_Key; + BYTE Information[ 4 ]; + BYTE Additional_Len; + LWORD Command_Specific_Info; + BYTE ASC; + BYTE ASCQ; + BYTE Field_Replaceable_Unit; + BYTE Sense_Key_Specific_Info[ 3 ]; + BYTE Recovery_Action[ 2 ]; + BYTE Total_Errors; + BYTE Total_Retries; + BYTE ASC_Stack_1; + BYTE ASCQ_Stack_1; + BYTE ASC_Stack_2; + BYTE ASCQ_Stack_2; + BYTE Additional_FRU_Info[ 8 ]; + BYTE Error_Specific_Info[ 3 ]; + BYTE Error_Detection_Point[ 4 ]; + BYTE Original_CDB[10]; + BYTE Host_ID; + BYTE Host_Descriptor[ 2 ]; + BYTE Serial_Number[ 16 ]; + BYTE Array_SW_Revision[ 4 ]; + BYTE Data_Xfer_Operation; + BYTE LUN_Number; + BYTE LUN_Status; + BYTE Drive_ID; + BYTE Xfer_Start_Drive_ID; + BYTE Drive_SW_Revision[ 4 ]; + BYTE Drive_Product_ID[ 16 ]; + BYTE PowerUp_Status[ 2 ]; + BYTE RAID_Level; + BYTE Drive_Sense_ID[ 2 ]; + BYTE Drive_Sense_Data[ 32 ]; + BYTE Reserved2[24]; +} SenseData_t; + +#define MPATH_PRIN_CMD 0x5e +#define MPATH_PRIN_CMDLEN 10 +#define MPATH_PROUT_CMD 0x5f +#define MPATH_PROUT_CMDLEN 10 + +#define DID_OK 0x00 +/* + * Status codes + */ +#define SAM_STAT_GOOD 0x00 +#define SAM_STAT_CHECK_CONDITION 0x02 +#define SAM_STAT_CONDITION_MET 0x04 +#define SAM_STAT_BUSY 0x08 +#define SAM_STAT_INTERMEDIATE 0x10 +#define SAM_STAT_INTERMEDIATE_CONDITION_MET 0x14 +#define SAM_STAT_RESERVATION_CONFLICT 0x18 +#define SAM_STAT_COMMAND_TERMINATED 0x22 /* obsolete in SAM-3 */ +#define SAM_STAT_TASK_SET_FULL 0x28 +#define SAM_STAT_ACA_ACTIVE 0x30 +#define SAM_STAT_TASK_ABORTED 0x40 + +#define STATUS_MASK 0x3e + +/* + * SENSE KEYS + */ + +#define NO_SENSE 0x00 +#define RECOVERED_ERROR 0x01 +#define NOT_READY 0x02 +#define MEDIUM_ERROR 0x03 +#define HARDWARE_ERROR 0x04 +#define ILLEGAL_REQUEST 0x05 +#define UNIT_ATTENTION 0x06 +#define DATA_PROTECT 0x07 +#define BLANK_CHECK 0x08 +#define COPY_ABORTED 0x0a +#define ABORTED_COMMAND 0x0b +#define VOLUME_OVERFLOW 0x0d +#define MISCOMPARE 0x0e + + +/* Driver status */ +#define DRIVER_OK 0x00 diff --git a/libmpathpersist/mpath_updatepr.c b/libmpathpersist/mpath_updatepr.c new file mode 100644 index 0000000..0aca28e --- /dev/null +++ b/libmpathpersist/mpath_updatepr.c @@ -0,0 +1,73 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "debug.h" +#include "mpath_cmd.h" +#include "uxsock.h" +#include "memory.h" +#include "mpathpr.h" + + +static int do_update_pr(char *alias, char *arg) +{ + int fd; + char str[256]; + char *reply; + int ret = 0; + + fd = mpath_connect(); + if (fd == -1) { + condlog (0, "ux socket connect error"); + return -1; + } + + snprintf(str,sizeof(str),"map %s %s", alias, arg); + condlog (2, "%s: pr message=%s", alias, str); + if (send_packet(fd, str) != 0) { + condlog(2, "%s: message=%s send error=%d", alias, str, errno); + mpath_disconnect(fd); + return -1; + } + ret = recv_packet(fd, &reply, DEFAULT_REPLY_TIMEOUT); + if (ret < 0) { + condlog(2, "%s: message=%s recv error=%d", alias, str, errno); + ret = -1; + } else { + condlog (2, "%s: message=%s reply=%s", alias, str, reply); + if (reply && strncmp(reply,"ok", 2) == 0) + ret = 0; + else + ret = -1; + } + + free(reply); + mpath_disconnect(fd); + return ret; +} + +int update_prflag(char *mapname, int set) { + return do_update_pr(mapname, (set)? "setprstatus" : "unsetprstatus"); +} + +int update_prkey_flags(char *mapname, uint64_t prkey, uint8_t sa_flags) { + char str[256]; + char *flagstr = ""; + + if (sa_flags & MPATH_F_APTPL_MASK) + flagstr = ":aptpl"; + if (prkey) + sprintf(str, "setprkey key %" PRIx64 "%s", prkey, flagstr); + else + sprintf(str, "unsetprkey"); + return do_update_pr(mapname, str); +} diff --git a/libmpathpersist/mpathpr.h b/libmpathpersist/mpathpr.h new file mode 100644 index 0000000..5ea8cd6 --- /dev/null +++ b/libmpathpersist/mpathpr.h @@ -0,0 +1,54 @@ +#ifndef MPATHPR_H +#define MPATHPR_H + +#include "structs.h" /* FILE_NAME_SIZE */ + +struct prin_param { + char dev[FILE_NAME_SIZE]; + int rq_servact; + struct prin_resp *resp; + int noisy; + int status; +}; + +struct prout_param { + char dev[FILE_NAME_SIZE]; + int rq_servact; + int rq_scope; + unsigned int rq_type; + struct prout_param_descriptor *paramp; + int noisy; + int status; +}; + +struct threadinfo { + int status; + pthread_t id; + struct prout_param param; +}; + +int prin_do_scsi_ioctl(char * dev, int rq_servact, struct prin_resp * resp, int noisy); +int prout_do_scsi_ioctl( char * dev, int rq_servact, int rq_scope, + unsigned int rq_type, struct prout_param_descriptor *paramp, int noisy); +void * _mpath_pr_update (void *arg); +int mpath_send_prin_activepath (char * dev, int rq_servact, struct prin_resp * resp, int noisy); +int get_mpvec (vector curmp, vector pathvec, char * refwwid); +void * mpath_prout_pthread_fn(void *p); +void dumpHex(const char* , int len, int no_ascii); + +int mpath_prout_reg(struct multipath *mpp,int rq_servact, int rq_scope, + unsigned int rq_type, struct prout_param_descriptor * paramp, int noisy); +int mpath_prout_common(struct multipath *mpp,int rq_servact, int rq_scope, + unsigned int rq_type, struct prout_param_descriptor * paramp, int noisy); +int mpath_prout_rel(struct multipath *mpp,int rq_servact, int rq_scope, + unsigned int rq_type, struct prout_param_descriptor * paramp, int noisy); +int send_prout_activepath(char * dev, int rq_servact, int rq_scope, + unsigned int rq_type, struct prout_param_descriptor * paramp, int noisy); + +int update_prflag(char *mapname, int set); +int update_prkey_flags(char *mapname, uint64_t prkey, uint8_t sa_flags); +#define update_prkey(mapname, prkey) update_prkey_flags(mapname, prkey, 0) +void * mpath_alloc_prin_response(int prin_sa); +int update_map_pr(struct multipath *mpp); + +#endif diff --git a/libmultipath/Makefile b/libmultipath/Makefile new file mode 100644 index 0000000..e5651e4 --- /dev/null +++ b/libmultipath/Makefile @@ -0,0 +1,86 @@ +# +# Copyright (C) 2003 Christophe Varoqui, +# +include ../Makefile.inc + +SONAME = 0 +DEVLIB = libmultipath.so +LIBS = $(DEVLIB).$(SONAME) + +CFLAGS += $(LIB_CFLAGS) -I$(mpathcmddir) -I$(mpathpersistdir) -I$(nvmedir) + +LIBDEPS += -lpthread -ldl -ldevmapper -ludev -L$(mpathcmddir) -lmpathcmd -lurcu -laio + +ifdef SYSTEMD + CFLAGS += -DUSE_SYSTEMD=$(SYSTEMD) + ifeq ($(shell test $(SYSTEMD) -gt 209 && echo 1), 1) + LIBDEPS += -lsystemd + else + LIBDEPS += -lsystemd-daemon + endif +endif + +ifneq ($(call check_func,dm_task_no_flush,/usr/include/libdevmapper.h),0) + CFLAGS += -DLIBDM_API_FLUSH -D_GNU_SOURCE +endif + +ifneq ($(call check_func,dm_task_set_cookie,/usr/include/libdevmapper.h),0) + CFLAGS += -DLIBDM_API_COOKIE +endif + +ifneq ($(call check_func,udev_monitor_set_receive_buffer_size,/usr/include/libudev.h),0) + CFLAGS += -DLIBUDEV_API_RECVBUF +endif + +ifneq ($(call check_func,dm_task_deferred_remove,/usr/include/libdevmapper.h),0) + CFLAGS += -DLIBDM_API_DEFERRED +endif + +OBJS = memory.o parser.o vector.o devmapper.o callout.o \ + hwtable.o blacklist.o util.o dmparser.o config.o \ + structs.o discovery.o propsel.o dict.o \ + pgpolicies.o debug.o defaults.o uevent.o time-util.o \ + switchgroup.o uxsock.o print.o alias.o log_pthread.o \ + log.o configure.o structs_vec.o sysfs.o prio.o checkers.o \ + lock.o file.o wwids.o prioritizers/alua_rtpg.o prkey.o \ + io_err_stat.o dm-generic.o generic.o foreign.o nvme-lib.o + +all: $(LIBS) + +nvme-lib.o: nvme-lib.c nvme-ioctl.c nvme-ioctl.h + $(CC) $(CFLAGS) -Wno-unused-function -c -o $@ $< + +# there are lots of "unused parameters" in dict.c +# because not all handler / snprint methods nees all parameters +dict.o: dict.c + $(CC) $(CFLAGS) -Wno-unused-parameter -c -o $@ $< + +make_static = $(shell sed '/^static/!s/^\([a-z]\{1,\} \)/static \1/' <$1 >$2) + +nvme-ioctl.c: nvme/nvme-ioctl.c + $(call make_static,$<,$@) + +nvme-ioctl.h: nvme/nvme-ioctl.h + $(call make_static,$<,$@) + +$(LIBS): $(OBJS) + $(CC) $(LDFLAGS) $(SHARED_FLAGS) -Wl,-soname=$@ -o $@ $(OBJS) $(LIBDEPS) + $(LN) $@ $(DEVLIB) + +install: + $(INSTALL_PROGRAM) -d $(DESTDIR)$(syslibdir) + $(INSTALL_PROGRAM) -m 755 $(LIBS) $(DESTDIR)$(syslibdir)/$(LIBS) + $(INSTALL_PROGRAM) -m 755 -d $(DESTDIR)$(libdir) + $(LN) $(LIBS) $(DESTDIR)$(syslibdir)/$(DEVLIB) + +uninstall: + $(RM) $(DESTDIR)$(syslibdir)/$(LIBS) + $(RM) $(DESTDIR)$(syslibdir)/$(DEVLIB) + +clean: dep_clean + $(RM) core *.a *.o *.so *.so.* *.gz nvme-ioctl.c nvme-ioctl.h + +include $(wildcard $(OBJS:.o=.d)) + +dep_clean: + $(RM) $(OBJS:.o=.d) diff --git a/libmultipath/alias.c b/libmultipath/alias.c new file mode 100644 index 0000000..14401ca --- /dev/null +++ b/libmultipath/alias.c @@ -0,0 +1,429 @@ +/* + * Copyright (c) 2005 Christophe Varoqui + * Copyright (c) 2005 Benjamin Marzinski, Redhat + */ +#include +#include +#include +#include +#include +#include + +#include "debug.h" +#include "util.h" +#include "uxsock.h" +#include "alias.h" +#include "file.h" +#include "vector.h" +#include "checkers.h" +#include "structs.h" + + +/* + * significant parts of this file were taken from iscsi-bindings.c of the + * linux-iscsi project. + * Copyright (C) 2002 Cisco Systems, Inc. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published + * by the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * See the file COPYING included with this distribution for more details. + */ + +int +valid_alias(const char *alias) +{ + if (strchr(alias, '/') != NULL) + return 0; + return 1; +} + + +static int +format_devname(char *name, int id, int len, const char *prefix) +{ + int pos; + int prefix_len = strlen(prefix); + + if (len <= prefix_len + 1 || id <= 0) + return -1; + + memset(name, 0, len); + strcpy(name, prefix); + name[len - 1] = '\0'; + for (pos = len - 2; pos >= prefix_len; pos--) { + id--; + name[pos] = 'a' + id % 26; + if (id < 26) + break; + id /= 26; + } + if (pos < prefix_len) + return -1; + + memmove(name + prefix_len, name + pos, len - pos); + return (prefix_len + len - pos - 1); +} + +static int +scan_devname(const char *alias, const char *prefix) +{ + const char *c; + int i, n = 0; + static const int last_26 = INT_MAX / 26; + + if (!prefix || strncmp(alias, prefix, strlen(prefix))) + return -1; + + if (strlen(alias) == strlen(prefix)) + return -1; + + if (strlen(alias) > strlen(prefix) + 7) + /* id of 'aaaaaaaa' overflows int */ + return -1; + + c = alias + strlen(prefix); + while (*c != '\0' && *c != ' ' && *c != '\t') { + if (*c < 'a' || *c > 'z') + return -1; + i = *c - 'a'; + if (n > last_26 || (n == last_26 && i >= INT_MAX % 26)) + return -1; + n = n * 26 + i; + c++; + n++; + } + + return n; +} + +/* + * Returns: 0 if matching entry in WWIDs file found + * -1 if an error occurs + * >0 a free ID that could be used for the WWID at hand + * *map_alias is set to a freshly allocated string with the matching alias if + * the function returns 0, or to NULL otherwise. + */ +static int +lookup_binding(FILE *f, const char *map_wwid, char **map_alias, + const char *prefix) +{ + char buf[LINE_MAX]; + unsigned int line_nr = 0; + int id = 1; + int biggest_id = 1; + int smallest_bigger_id = INT_MAX; + + *map_alias = NULL; + + rewind(f); + while (fgets(buf, LINE_MAX, f)) { + const char *alias, *wwid; + char *c; + int curr_id; + + line_nr++; + c = strpbrk(buf, "#\n\r"); + if (c) + *c = '\0'; + alias = strtok(buf, " \t"); + if (!alias) /* blank line */ + continue; + curr_id = scan_devname(alias, prefix); + if (curr_id == id) { + if (id < INT_MAX) + id++; + else { + id = -1; + break; + } + } + if (curr_id > biggest_id) + biggest_id = curr_id; + if (curr_id > id && curr_id < smallest_bigger_id) + smallest_bigger_id = curr_id; + wwid = strtok(NULL, " \t"); + if (!wwid){ + condlog(3, + "Ignoring malformed line %u in bindings file", + line_nr); + continue; + } + if (strcmp(wwid, map_wwid) == 0){ + condlog(3, "Found matching wwid [%s] in bindings file." + " Setting alias to %s", wwid, alias); + *map_alias = strdup(alias); + if (*map_alias == NULL) { + condlog(0, "Cannot copy alias from bindings " + "file: out of memory"); + return -1; + } + return 0; + } + } + if (id >= smallest_bigger_id) { + if (biggest_id < INT_MAX) + id = biggest_id + 1; + else + id = -1; + } + if (id < 0) { + condlog(0, "no more available user_friendly_names"); + return -1; + } else + condlog(3, "No matching wwid [%s] in bindings file.", map_wwid); + return id; +} + +static int +rlookup_binding(FILE *f, char *buff, const char *map_alias) +{ + char line[LINE_MAX]; + unsigned int line_nr = 0; + + buff[0] = '\0'; + + while (fgets(line, LINE_MAX, f)) { + char *c; + const char *alias, *wwid; + + line_nr++; + c = strpbrk(line, "#\n\r"); + if (c) + *c = '\0'; + alias = strtok(line, " \t"); + if (!alias) /* blank line */ + continue; + wwid = strtok(NULL, " \t"); + if (!wwid){ + condlog(3, + "Ignoring malformed line %u in bindings file", + line_nr); + continue; + } + if (strlen(wwid) > WWID_SIZE - 1) { + condlog(3, + "Ignoring too large wwid at %u in bindings file", line_nr); + continue; + } + if (strcmp(alias, map_alias) == 0){ + condlog(3, "Found matching alias [%s] in bindings file." + "\nSetting wwid to %s", alias, wwid); + strlcpy(buff, wwid, WWID_SIZE); + return 0; + } + } + condlog(3, "No matching alias [%s] in bindings file.", map_alias); + + return -1; +} + +static char * +allocate_binding(int fd, const char *wwid, int id, const char *prefix) +{ + char buf[LINE_MAX]; + off_t offset; + char *alias, *c; + int i; + + if (id <= 0) { + condlog(0, "%s: cannot allocate new binding for id %d", + __func__, id); + return NULL; + } + + i = format_devname(buf, id, LINE_MAX, prefix); + if (i == -1) + return NULL; + + c = buf + i; + if (snprintf(c, LINE_MAX - i, " %s\n", wwid) >= LINE_MAX - i) { + condlog(1, "%s: line too long for %s\n", __func__, wwid); + return NULL; + } + buf[LINE_MAX - 1] = '\0'; + + offset = lseek(fd, 0, SEEK_END); + if (offset < 0){ + condlog(0, "Cannot seek to end of bindings file : %s", + strerror(errno)); + return NULL; + } + if (write(fd, buf, strlen(buf)) != (ssize_t)strlen(buf)){ + condlog(0, "Cannot write binding to bindings file : %s", + strerror(errno)); + /* clear partial write */ + if (ftruncate(fd, offset)) + condlog(0, "Cannot truncate the header : %s", + strerror(errno)); + return NULL; + } + c = strchr(buf, ' '); + if (c) + *c = '\0'; + + condlog(3, "Created new binding [%s] for WWID [%s]", buf, wwid); + alias = strdup(buf); + if (alias == NULL) + condlog(0, "cannot copy new alias from bindings file: out of memory"); + + return alias; +} + +char * +use_existing_alias (const char *wwid, const char *file, const char *alias_old, + const char *prefix, int bindings_read_only) +{ + char *alias = NULL; + int id = 0; + int fd, can_write; + char buff[WWID_SIZE]; + FILE *f; + + fd = open_file(file, &can_write, BINDINGS_FILE_HEADER); + if (fd < 0) + return NULL; + + f = fdopen(fd, "r"); + if (!f) { + condlog(0, "cannot fdopen on bindings file descriptor"); + close(fd); + return NULL; + } + /* lookup the binding. if it exists, the wwid will be in buff + * either way, id contains the id for the alias + */ + rlookup_binding(f, buff, alias_old); + + if (strlen(buff) > 0) { + /* if buff is our wwid, it's already + * allocated correctly + */ + if (strcmp(buff, wwid) == 0) + alias = STRDUP(alias_old); + else { + alias = NULL; + condlog(0, "alias %s already bound to wwid %s, cannot reuse", + alias_old, buff); + } + goto out; + } + + id = lookup_binding(f, wwid, &alias, NULL); + if (alias) { + condlog(3, "Use existing binding [%s] for WWID [%s]", + alias, wwid); + goto out; + } + + /* allocate the existing alias in the bindings file */ + id = scan_devname(alias_old, prefix); + if (id <= 0) + goto out; + + if (fflush(f) != 0) { + condlog(0, "cannot fflush bindings file stream : %s", + strerror(errno)); + goto out; + } + + if (can_write && !bindings_read_only) { + alias = allocate_binding(fd, wwid, id, prefix); + condlog(0, "Allocated existing binding [%s] for WWID [%s]", + alias, wwid); + } + +out: + pthread_cleanup_push(free, alias); + fclose(f); + pthread_cleanup_pop(0); + return alias; +} + +char * +get_user_friendly_alias(const char *wwid, const char *file, const char *prefix, + int bindings_read_only) +{ + char *alias; + int fd, id; + FILE *f; + int can_write; + + if (!wwid || *wwid == '\0') { + condlog(3, "Cannot find binding for empty WWID"); + return NULL; + } + + fd = open_file(file, &can_write, BINDINGS_FILE_HEADER); + if (fd < 0) + return NULL; + + f = fdopen(fd, "r"); + if (!f) { + condlog(0, "cannot fdopen on bindings file descriptor : %s", + strerror(errno)); + close(fd); + return NULL; + } + + id = lookup_binding(f, wwid, &alias, prefix); + if (id < 0) { + fclose(f); + return NULL; + } + + pthread_cleanup_push(free, alias); + + if (fflush(f) != 0) { + condlog(0, "cannot fflush bindings file stream : %s", + strerror(errno)); + free(alias); + alias = NULL; + } else if (can_write && !bindings_read_only && !alias) + alias = allocate_binding(fd, wwid, id, prefix); + + fclose(f); + + pthread_cleanup_pop(0); + return alias; +} + +int +get_user_friendly_wwid(const char *alias, char *buff, const char *file) +{ + int fd, unused; + FILE *f; + + if (!alias || *alias == '\0') { + condlog(3, "Cannot find binding for empty alias"); + return -1; + } + + fd = open_file(file, &unused, BINDINGS_FILE_HEADER); + if (fd < 0) + return -1; + + f = fdopen(fd, "r"); + if (!f) { + condlog(0, "cannot fdopen on bindings file descriptor : %s", + strerror(errno)); + close(fd); + return -1; + } + + rlookup_binding(f, buff, alias); + if (!strlen(buff)) { + fclose(f); + return -1; + } + + fclose(f); + return 0; +} diff --git a/libmultipath/alias.h b/libmultipath/alias.h new file mode 100644 index 0000000..7c4b302 --- /dev/null +++ b/libmultipath/alias.h @@ -0,0 +1,17 @@ +#define BINDINGS_FILE_HEADER \ +"# Multipath bindings, Version : 1.0\n" \ +"# NOTE: this file is automatically maintained by the multipath program.\n" \ +"# You should not need to edit this file in normal circumstances.\n" \ +"#\n" \ +"# Format:\n" \ +"# alias wwid\n" \ +"#\n" + +int valid_alias(const char *alias); +char *get_user_friendly_alias(const char *wwid, const char *file, + const char *prefix, + int bindings_readonly); +int get_user_friendly_wwid(const char *alias, char *buff, const char *file); +char *use_existing_alias (const char *wwid, const char *file, + const char *alias_old, + const char *prefix, int bindings_read_only); diff --git a/libmultipath/blacklist.c b/libmultipath/blacklist.c new file mode 100644 index 0000000..00e8dbd --- /dev/null +++ b/libmultipath/blacklist.c @@ -0,0 +1,550 @@ +/* + * Copyright (c) 2004, 2005 Christophe Varoqui + */ +#include +#include + +#include "checkers.h" +#include "memory.h" +#include "vector.h" +#include "util.h" +#include "debug.h" +#include "structs.h" +#include "config.h" +#include "blacklist.h" +#include "structs_vec.h" +#include "print.h" + +int store_ble(vector blist, char * str, int origin) +{ + struct blentry * ble; + + if (!str) + return 0; + + if (!blist) + goto out; + + ble = MALLOC(sizeof(struct blentry)); + + if (!ble) + goto out; + + if (regcomp(&ble->regex, str, REG_EXTENDED|REG_NOSUB)) + goto out1; + + if (!vector_alloc_slot(blist)) + goto out1; + + ble->str = str; + ble->origin = origin; + vector_set_slot(blist, ble); + return 0; +out1: + FREE(ble); +out: + FREE(str); + return 1; +} + + +int alloc_ble_device(vector blist) +{ + struct blentry_device * ble = MALLOC(sizeof(struct blentry_device)); + + if (!ble) + return 1; + + if (!blist || !vector_alloc_slot(blist)) { + FREE(ble); + return 1; + } + vector_set_slot(blist, ble); + return 0; +} + +int set_ble_device(vector blist, char * vendor, char * product, int origin) +{ + struct blentry_device * ble; + + if (!blist) + return 1; + + ble = VECTOR_LAST_SLOT(blist); + + if (!ble) + return 1; + + if (vendor) { + if (regcomp(&ble->vendor_reg, vendor, + REG_EXTENDED|REG_NOSUB)) { + FREE(vendor); + if (product) + FREE(product); + return 1; + } + ble->vendor = vendor; + } + if (product) { + if (regcomp(&ble->product_reg, product, + REG_EXTENDED|REG_NOSUB)) { + FREE(product); + if (vendor) { + ble->vendor = NULL; + FREE(vendor); + } + return 1; + } + ble->product = product; + } + ble->origin = origin; + return 0; +} + +int +_blacklist_exceptions (vector elist, const char * str) +{ + int i; + struct blentry * ele; + + vector_foreach_slot (elist, ele, i) { + if (!regexec(&ele->regex, str, 0, NULL, 0)) + return 1; + } + return 0; +} + +int +_blacklist (vector blist, const char * str) +{ + int i; + struct blentry * ble; + + vector_foreach_slot (blist, ble, i) { + if (!regexec(&ble->regex, str, 0, NULL, 0)) + return 1; + } + return 0; +} + +int +_blacklist_exceptions_device(const struct _vector *elist, const char * vendor, + const char * product) +{ + int i; + struct blentry_device * ble; + + vector_foreach_slot (elist, ble, i) { + if (!ble->vendor && !ble->product) + continue; + if ((!ble->vendor || + !regexec(&ble->vendor_reg, vendor, 0, NULL, 0)) && + (!ble->product || + !regexec(&ble->product_reg, product, 0, NULL, 0))) + return 1; + } + return 0; +} + +int +_blacklist_device (const struct _vector *blist, const char * vendor, + const char * product) +{ + int i; + struct blentry_device * ble; + + vector_foreach_slot (blist, ble, i) { + if (!ble->vendor && !ble->product) + continue; + if ((!ble->vendor || + !regexec(&ble->vendor_reg, vendor, 0, NULL, 0)) && + (!ble->product || + !regexec(&ble->product_reg, product, 0, NULL, 0))) + return 1; + } + return 0; +} + +static int +find_blacklist_device (const struct _vector *blist, const char * vendor, + const char * product) +{ + int i; + struct blentry_device * ble; + + vector_foreach_slot (blist, ble, i) { + if (((!vendor && !ble->vendor) || + (vendor && ble->vendor && + !strcmp(vendor, ble->vendor))) && + ((!product && !ble->product) || + (product && ble->product && + !strcmp(product, ble->product)))) + return 1; + } + return 0; +} + +int +setup_default_blist (struct config * conf) +{ + struct blentry * ble; + struct hwentry *hwe; + char * str; + int i; + + str = STRDUP("^(ram|zram|raw|loop|fd|md|dm-|sr|scd|st|dcssblk)[0-9]"); + if (!str) + return 1; + if (store_ble(conf->blist_devnode, str, ORIGIN_DEFAULT)) + return 1; + + str = STRDUP("^(td|hd|vd)[a-z]"); + if (!str) + return 1; + if (store_ble(conf->blist_devnode, str, ORIGIN_DEFAULT)) + return 1; + + str = STRDUP("(SCSI_IDENT_|ID_WWN)"); + if (!str) + return 1; + if (store_ble(conf->elist_property, str, ORIGIN_DEFAULT)) + return 1; + + vector_foreach_slot (conf->hwtable, hwe, i) { + if (hwe->bl_product) { + if (find_blacklist_device(conf->blist_device, + hwe->vendor, hwe->bl_product)) + continue; + if (alloc_ble_device(conf->blist_device)) + return 1; + ble = VECTOR_SLOT(conf->blist_device, + VECTOR_SIZE(conf->blist_device) - 1); + if (set_ble_device(conf->blist_device, + STRDUP(hwe->vendor), + STRDUP(hwe->bl_product), + ORIGIN_DEFAULT)) { + FREE(ble); + vector_del_slot(conf->blist_device, VECTOR_SIZE(conf->blist_device) - 1); + return 1; + } + } + } + return 0; +} + +#define LOG_BLIST(M, S, lvl) \ + if (vendor && product) \ + condlog(lvl, "%s: (%s:%s) %s %s", \ + dev, vendor, product, (M), (S)); \ + else if (wwid && !dev) \ + condlog(lvl, "%s: %s %s", wwid, (M), (S)); \ + else if (wwid) \ + condlog(lvl, "%s: %s %s %s", dev, (M), wwid, (S)); \ + else if (env) \ + condlog(lvl, "%s: %s %s %s", dev, (M), env, (S)); \ + else if (protocol) \ + condlog(lvl, "%s: %s %s %s", dev, (M), protocol, (S)); \ + else \ + condlog(lvl, "%s: %s %s", dev, (M), (S)) + +static void +log_filter (const char *dev, char *vendor, char *product, char *wwid, + const char *env, const char *protocol, int r, int lvl) +{ + /* + * Try to sort from most likely to least. + */ + switch (r) { + case MATCH_NOTHING: + break; + case MATCH_DEVICE_BLIST: + LOG_BLIST("vendor/product", "blacklisted", lvl); + break; + case MATCH_WWID_BLIST: + LOG_BLIST("wwid", "blacklisted", lvl); + break; + case MATCH_DEVNODE_BLIST: + LOG_BLIST("device node name", "blacklisted", lvl); + break; + case MATCH_PROPERTY_BLIST: + LOG_BLIST("udev property", "blacklisted", lvl); + break; + case MATCH_PROTOCOL_BLIST: + LOG_BLIST("protocol", "blacklisted", lvl); + break; + case MATCH_DEVICE_BLIST_EXCEPT: + LOG_BLIST("vendor/product", "whitelisted", lvl); + break; + case MATCH_WWID_BLIST_EXCEPT: + LOG_BLIST("wwid", "whitelisted", lvl); + break; + case MATCH_DEVNODE_BLIST_EXCEPT: + LOG_BLIST("device node name", "whitelisted", lvl); + break; + case MATCH_PROPERTY_BLIST_EXCEPT: + LOG_BLIST("udev property", "whitelisted", lvl); + break; + case MATCH_PROPERTY_BLIST_MISSING: + LOG_BLIST("blacklisted,", "udev property missing", lvl); + break; + case MATCH_PROTOCOL_BLIST_EXCEPT: + LOG_BLIST("protocol", "whitelisted", lvl); + break; + } +} + +int +filter_device (vector blist, vector elist, char * vendor, char * product, + char * dev) +{ + int r = MATCH_NOTHING; + + if (vendor && product) { + if (_blacklist_exceptions_device(elist, vendor, product)) + r = MATCH_DEVICE_BLIST_EXCEPT; + else if (_blacklist_device(blist, vendor, product)) + r = MATCH_DEVICE_BLIST; + } + + log_filter(dev, vendor, product, NULL, NULL, NULL, r, 3); + return r; +} + +int +filter_devnode (vector blist, vector elist, char * dev) +{ + int r = MATCH_NOTHING; + + if (dev) { + if (_blacklist_exceptions(elist, dev)) + r = MATCH_DEVNODE_BLIST_EXCEPT; + else if (_blacklist(blist, dev)) + r = MATCH_DEVNODE_BLIST; + } + + log_filter(dev, NULL, NULL, NULL, NULL, NULL, r, 3); + return r; +} + +int +filter_wwid (vector blist, vector elist, char * wwid, char * dev) +{ + int r = MATCH_NOTHING; + + if (wwid) { + if (_blacklist_exceptions(elist, wwid)) + r = MATCH_WWID_BLIST_EXCEPT; + else if (_blacklist(blist, wwid)) + r = MATCH_WWID_BLIST; + } + + log_filter(dev, NULL, NULL, wwid, NULL, NULL, r, 3); + return r; +} + +int +filter_protocol(vector blist, vector elist, struct path * pp) +{ + char buf[PROTOCOL_BUF_SIZE]; + int r = MATCH_NOTHING; + + if (pp) { + snprint_path_protocol(buf, sizeof(buf), pp); + + if (_blacklist_exceptions(elist, buf)) + r = MATCH_PROTOCOL_BLIST_EXCEPT; + else if (_blacklist(blist, buf)) + r = MATCH_PROTOCOL_BLIST; + } + + log_filter(pp->dev, NULL, NULL, NULL, NULL, buf, r, 3); + return r; +} + +int +filter_path (struct config * conf, struct path * pp) +{ + int r; + + r = filter_property(conf, pp->udev, 3, pp->uid_attribute); + if (r > 0) + return r; + r = filter_devnode(conf->blist_devnode, conf->elist_devnode, pp->dev); + if (r > 0) + return r; + r = filter_device(conf->blist_device, conf->elist_device, + pp->vendor_id, pp->product_id, pp->dev); + if (r > 0) + return r; + r = filter_protocol(conf->blist_protocol, conf->elist_protocol, pp); + if (r > 0) + return r; + r = filter_wwid(conf->blist_wwid, conf->elist_wwid, pp->wwid, pp->dev); + return r; +} + +int +filter_property(struct config *conf, struct udev_device *udev, int lvl, + const char *uid_attribute) +{ + const char *devname = udev_device_get_sysname(udev); + struct udev_list_entry *list_entry; + const char *env = NULL; + int r = MATCH_NOTHING; + + if (udev) { + /* + * This is the inverse of the 'normal' matching; + * the environment variable _has_ to match. + * But only if the uid_attribute used for determining the WWID + * of the path is is present in the environment + * (uid_attr_seen). If this is not the case, udev probably + * just failed to access the device, which should not cause the + * device to be blacklisted (it won't be used by multipath + * anyway without WWID). + * Likewise, if no uid attribute is defined, udev-based WWID + * determination is effectively off, and devices shouldn't be + * blacklisted by missing properties (check_missing_prop). + */ + + bool check_missing_prop = uid_attribute != NULL && + *uid_attribute != '\0'; + bool uid_attr_seen = false; + + r = MATCH_PROPERTY_BLIST_MISSING; + udev_list_entry_foreach(list_entry, + udev_device_get_properties_list_entry(udev)) { + + env = udev_list_entry_get_name(list_entry); + if (!env) + continue; + + if (check_missing_prop && !strcmp(env, uid_attribute)) + uid_attr_seen = true; + + if (_blacklist_exceptions(conf->elist_property, env)) { + r = MATCH_PROPERTY_BLIST_EXCEPT; + break; + } + if (_blacklist(conf->blist_property, env)) { + r = MATCH_PROPERTY_BLIST; + break; + } + env = NULL; + } + if (r == MATCH_PROPERTY_BLIST_MISSING && + (!check_missing_prop || !uid_attr_seen)) + r = MATCH_NOTHING; + } + + log_filter(devname, NULL, NULL, NULL, env, NULL, r, lvl); + return r; +} + +static void free_ble(struct blentry *ble) +{ + if (!ble) + return; + regfree(&ble->regex); + FREE(ble->str); + FREE(ble); +} + +void +free_blacklist (vector blist) +{ + struct blentry * ble; + int i; + + if (!blist) + return; + + vector_foreach_slot (blist, ble, i) { + free_ble(ble); + } + vector_free(blist); +} + +void merge_blacklist(vector blist) +{ + struct blentry *bl1, *bl2; + int i, j; + + vector_foreach_slot(blist, bl1, i) { + j = i + 1; + vector_foreach_slot_after(blist, bl2, j) { + if (!bl1->str || !bl2->str || strcmp(bl1->str, bl2->str)) + continue; + condlog(3, "%s: duplicate blist entry section for %s", + __func__, bl1->str); + free_ble(bl2); + vector_del_slot(blist, j); + j--; + } + } +} + +static void free_ble_device(struct blentry_device *ble) +{ + if (ble) { + if (ble->vendor) { + regfree(&ble->vendor_reg); + FREE(ble->vendor); + } + if (ble->product) { + regfree(&ble->product_reg); + FREE(ble->product); + } + FREE(ble); + } +} + +void +free_blacklist_device (vector blist) +{ + struct blentry_device * ble; + int i; + + if (!blist) + return; + + vector_foreach_slot (blist, ble, i) { + free_ble_device(ble); + } + vector_free(blist); +} + +void merge_blacklist_device(vector blist) +{ + struct blentry_device *bl1, *bl2; + int i, j; + + vector_foreach_slot(blist, bl1, i) { + if (!bl1->vendor && !bl1->product) { + free_ble_device(bl1); + vector_del_slot(blist, i); + i--; + } + } + + vector_foreach_slot(blist, bl1, i) { + j = i + 1; + vector_foreach_slot_after(blist, bl2, j) { + if ((!bl1->vendor && bl2->vendor) || + (bl1->vendor && !bl2->vendor) || + (bl1->vendor && bl2->vendor && + strcmp(bl1->vendor, bl2->vendor))) + continue; + if ((!bl1->product && bl2->product) || + (bl1->product && !bl2->product) || + (bl1->product && bl2->product && + strcmp(bl1->product, bl2->product))) + continue; + condlog(3, "%s: duplicate blist entry section for %s:%s", + __func__, bl1->vendor, bl1->product); + free_ble_device(bl2); + vector_del_slot(blist, j); + j--; + } + } +} diff --git a/libmultipath/blacklist.h b/libmultipath/blacklist.h new file mode 100644 index 0000000..2d721f6 --- /dev/null +++ b/libmultipath/blacklist.h @@ -0,0 +1,49 @@ +#ifndef _BLACKLIST_H +#define _BLACKLIST_H + +#include +#include + +#define MATCH_NOTHING 0 +#define MATCH_WWID_BLIST 1 +#define MATCH_DEVICE_BLIST 2 +#define MATCH_DEVNODE_BLIST 3 +#define MATCH_PROPERTY_BLIST 4 +#define MATCH_PROPERTY_BLIST_MISSING 5 +#define MATCH_PROTOCOL_BLIST 6 +#define MATCH_WWID_BLIST_EXCEPT -MATCH_WWID_BLIST +#define MATCH_DEVICE_BLIST_EXCEPT -MATCH_DEVICE_BLIST +#define MATCH_DEVNODE_BLIST_EXCEPT -MATCH_DEVNODE_BLIST +#define MATCH_PROPERTY_BLIST_EXCEPT -MATCH_PROPERTY_BLIST +#define MATCH_PROTOCOL_BLIST_EXCEPT -MATCH_PROTOCOL_BLIST + +struct blentry { + char * str; + regex_t regex; + int origin; +}; + +struct blentry_device { + char * vendor; + char * product; + regex_t vendor_reg; + regex_t product_reg; + int origin; +}; + +int setup_default_blist (struct config *); +int alloc_ble_device (vector); +int filter_devnode (vector, vector, char *); +int filter_wwid (vector, vector, char *, char *); +int filter_device (vector, vector, char *, char *, char *); +int filter_path (struct config *, struct path *); +int filter_property(struct config *, struct udev_device *, int, const char*); +int filter_protocol(vector, vector, struct path *); +int store_ble (vector, char *, int); +int set_ble_device (vector, char *, char *, int); +void free_blacklist (vector); +void free_blacklist_device (vector); +void merge_blacklist(vector); +void merge_blacklist_device(vector); + +#endif /* _BLACKLIST_H */ diff --git a/libmultipath/byteorder.h b/libmultipath/byteorder.h new file mode 100644 index 0000000..0a86244 --- /dev/null +++ b/libmultipath/byteorder.h @@ -0,0 +1,44 @@ +#ifndef BYTEORDER_H_INCLUDED +#define BYTEORDER_H_INCLUDED + +#ifdef __linux__ +# include +# include +#else +# error unsupported +#endif + +#if BYTE_ORDER == LITTLE_ENDIAN +# define le16_to_cpu(x) (uint16_t)(x) +# define be16_to_cpu(x) bswap_16(x) +# define le32_to_cpu(x) (uint32_t)(x) +# define le64_to_cpu(x) (uint64_t)(x) +# define be32_to_cpu(x) bswap_32(x) +# define be64_to_cpu(x) bswap_64(x) +#elif BYTE_ORDER == BIG_ENDIAN +# define le16_to_cpu(x) bswap_16(x) +# define be16_to_cpu(x) (uint16_t)(x) +# define le32_to_cpu(x) bswap_32(x) +# define le64_to_cpu(x) bswap_64(x) +# define be32_to_cpu(x) (uint32_t)(x) +# define be64_to_cpu(x) (uint64_t)(x) +#else +# error unsupported +#endif + +#define cpu_to_le16(x) le16_to_cpu(x) +#define cpu_to_be16(x) be16_to_cpu(x) +#define cpu_to_le32(x) le32_to_cpu(x) +#define cpu_to_be32(x) be32_to_cpu(x) +#define cpu_to_le64(x) le64_to_cpu(x) +#define cpu_to_be64(x) be64_to_cpu(x) + +struct be64 { + uint64_t _v; +}; + +#define get_be64(x) be64_to_cpu((x)._v) +#define put_be64(x, y) do { (x)._v = cpu_to_be64(y); } while (0) + + +#endif /* BYTEORDER_H_INCLUDED */ diff --git a/libmultipath/callout.c b/libmultipath/callout.c new file mode 100644 index 0000000..dac088c --- /dev/null +++ b/libmultipath/callout.c @@ -0,0 +1,221 @@ +/* + * Source: copy of the udev package source file + * + * Copyrights of the source file apply + * Copyright (c) 2004 Christophe Varoqui + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "checkers.h" +#include "vector.h" +#include "structs.h" +#include "util.h" +#include "callout.h" +#include "debug.h" + +int execute_program(char *path, char *value, int len) +{ + int retval; + int count; + int status; + int fds[2], null_fd; + pid_t pid; + char *pos; + char arg[CALLOUT_MAX_SIZE]; + int argc = sizeof(arg) / 2; + char *argv[argc + 1]; + int i; + + i = 0; + + if (strchr(path, ' ')) { + strlcpy(arg, path, sizeof(arg)); + pos = arg; + while (pos != NULL && i < argc) { + if (pos[0] == '\'') { + /* don't separate if in apostrophes */ + pos++; + argv[i] = strsep(&pos, "\'"); + while (pos[0] == ' ') + pos++; + } else { + argv[i] = strsep(&pos, " "); + } + i++; + } + } else { + argv[i++] = path; + } + argv[i] = NULL; + + retval = pipe(fds); + + if (retval != 0) { + condlog(0, "error creating pipe for callout: %s", strerror(errno)); + return -1; + } + + pid = fork(); + + switch(pid) { + case 0: + /* child */ + + /* dup write side of pipe to STDOUT */ + if (dup2(fds[1], STDOUT_FILENO) < 0) { + condlog(1, "failed to dup2 stdout: %m"); + return -1; + } + close(fds[0]); + close(fds[1]); + + /* Ignore writes to stderr */ + null_fd = open("/dev/null", O_WRONLY); + if (null_fd > 0) { + if (dup2(null_fd, STDERR_FILENO) < 0) + condlog(1, "failed to dup2 stderr: %m"); + close(null_fd); + } + + retval = execv(argv[0], argv); + condlog(0, "error execing %s : %s", argv[0], strerror(errno)); + exit(-1); + case -1: + condlog(0, "fork failed: %s", strerror(errno)); + close(fds[0]); + close(fds[1]); + return -1; + default: + /* parent reads from fds[0] */ + close(fds[1]); + retval = 0; + i = 0; + while (1) { + count = read(fds[0], value + i, len - i-1); + if (count <= 0) + break; + + i += count; + if (i >= len-1) { + condlog(0, "not enough space for response from %s", argv[0]); + retval = -1; + break; + } + } + + if (count < 0) { + condlog(0, "no response from %s", argv[0]); + retval = -1; + } + + if (i > 0 && value[i-1] == '\n') + i--; + value[i] = '\0'; + + wait(&status); + close(fds[0]); + + retval = -1; + if (WIFEXITED(status)) { + status = WEXITSTATUS(status); + if (status == 0) + retval = 0; + else + condlog(0, "%s exited with %d", argv[0], status); + } + else if (WIFSIGNALED(status)) + condlog(0, "%s was terminated by signal %d", argv[0], WTERMSIG(status)); + else + condlog(0, "%s terminated abnormally", argv[0]); + } + return retval; +} + +int apply_format(char * string, char * cmd, struct path * pp) +{ + char * pos; + char * dst; + char * p; + char * q; + int len; + int myfree; + + if (!string) + return 1; + + if (!cmd) + return 1; + + dst = cmd; + p = dst; + pos = strchr(string, '%'); + myfree = CALLOUT_MAX_SIZE; + + if (!pos) { + strcpy(dst, string); + return 0; + } + + len = (int) (pos - string) + 1; + myfree -= len; + + if (myfree < 2) + return 1; + + snprintf(p, len, "%s", string); + p += len - 1; + pos++; + + switch (*pos) { + case 'n': + len = strlen(pp->dev) + 1; + myfree -= len; + + if (myfree < 2) + return 1; + + snprintf(p, len, "%s", pp->dev); + for (q = p; q < p + len; q++) { + if (q && *q == '!') + *q = '/'; + } + p += len - 1; + break; + case 'd': + len = strlen(pp->dev_t) + 1; + myfree -= len; + + if (myfree < 2) + return 1; + + snprintf(p, len, "%s", pp->dev_t); + p += len - 1; + break; + default: + break; + } + pos++; + + if (!*pos) { + condlog(3, "formatted callout = %s", dst); + return 0; + } + + len = strlen(pos) + 1; + myfree -= len; + + if (myfree < 2) + return 1; + + snprintf(p, len, "%s", pos); + condlog(3, "reformatted callout = %s", dst); + return 0; +} diff --git a/libmultipath/callout.h b/libmultipath/callout.h new file mode 100644 index 0000000..ab648e8 --- /dev/null +++ b/libmultipath/callout.h @@ -0,0 +1,7 @@ +#ifndef _CALLOUT_H +#define _CALLOUT_H + +int execute_program(char *, char *, int); +int apply_format (char *, char *, struct path *); + +#endif /* _CALLOUT_H */ diff --git a/libmultipath/checkers.c b/libmultipath/checkers.c new file mode 100644 index 0000000..8d2be8a --- /dev/null +++ b/libmultipath/checkers.c @@ -0,0 +1,377 @@ +#include +#include +#include +#include +#include + +#include "debug.h" +#include "checkers.h" +#include "vector.h" + +struct checker_class { + struct list_head node; + void *handle; + int refcount; + int sync; + char name[CHECKER_NAME_LEN]; + int (*check)(struct checker *); + int (*init)(struct checker *); /* to allocate the context */ + int (*mp_init)(struct checker *); /* to allocate the mpcontext */ + void (*free)(struct checker *); /* to free the context */ + void (*reset)(void); /* to reset the global variables */ + const char **msgtable; + short msgtable_size; +}; + +char *checker_state_names[] = { + "wild", + "unchecked", + "down", + "up", + "shaky", + "ghost", + "pending", + "timeout", + "removed", + "delayed", +}; + +static LIST_HEAD(checkers); + +const char *checker_state_name(int i) +{ + return checker_state_names[i]; +} + +static struct checker_class *alloc_checker_class(void) +{ + struct checker_class *c; + + c = MALLOC(sizeof(struct checker_class)); + if (c) { + INIT_LIST_HEAD(&c->node); + c->refcount = 1; + } + return c; +} + +void free_checker_class(struct checker_class *c) +{ + if (!c) + return; + c->refcount--; + if (c->refcount) { + condlog(4, "%s checker refcount %d", + c->name, c->refcount); + return; + } + condlog(3, "unloading %s checker", c->name); + list_del(&c->node); + if (c->reset) + c->reset(); + if (c->handle) { + if (dlclose(c->handle) != 0) { + condlog(0, "Cannot unload checker %s: %s", + c->name, dlerror()); + } + } + FREE(c); +} + +void cleanup_checkers (void) +{ + struct checker_class *checker_loop; + struct checker_class *checker_temp; + + list_for_each_entry_safe(checker_loop, checker_temp, &checkers, node) { + free_checker_class(checker_loop); + } +} + +static struct checker_class *checker_class_lookup(const char *name) +{ + struct checker_class *c; + + if (!name || !strlen(name)) + return NULL; + list_for_each_entry(c, &checkers, node) { + if (!strncmp(name, c->name, CHECKER_NAME_LEN)) + return c; + } + return NULL; +} + +void reset_checker_classes(void) +{ + struct checker_class *c; + + list_for_each_entry(c, &checkers, node) { + if (c->reset) + c->reset(); + } +} + +static struct checker_class *add_checker_class(const char *multipath_dir, + const char *name) +{ + char libname[LIB_CHECKER_NAMELEN]; + struct stat stbuf; + struct checker_class *c; + char *errstr; + + c = alloc_checker_class(); + if (!c) + return NULL; + snprintf(c->name, CHECKER_NAME_LEN, "%s", name); + if (!strncmp(c->name, NONE, 4)) + goto done; + snprintf(libname, LIB_CHECKER_NAMELEN, "%s/libcheck%s.so", + multipath_dir, name); + if (stat(libname,&stbuf) < 0) { + condlog(0,"Checker '%s' not found in %s", + name, multipath_dir); + goto out; + } + condlog(3, "loading %s checker", libname); + c->handle = dlopen(libname, RTLD_NOW); + if (!c->handle) { + if ((errstr = dlerror()) != NULL) + condlog(0, "A dynamic linking error occurred: (%s)", + errstr); + goto out; + } + c->check = (int (*)(struct checker *)) dlsym(c->handle, "libcheck_check"); + errstr = dlerror(); + if (errstr != NULL) + condlog(0, "A dynamic linking error occurred: (%s)", errstr); + if (!c->check) + goto out; + + c->init = (int (*)(struct checker *)) dlsym(c->handle, "libcheck_init"); + errstr = dlerror(); + if (errstr != NULL) + condlog(0, "A dynamic linking error occurred: (%s)", errstr); + if (!c->init) + goto out; + + c->mp_init = (int (*)(struct checker *)) dlsym(c->handle, "libcheck_mp_init"); + c->reset = (void (*)(void)) dlsym(c->handle, "libcheck_reset"); + /* These 2 functions can be NULL. call dlerror() to clear out any + * error string */ + dlerror(); + + c->free = (void (*)(struct checker *)) dlsym(c->handle, "libcheck_free"); + errstr = dlerror(); + if (errstr != NULL) + condlog(0, "A dynamic linking error occurred: (%s)", errstr); + if (!c->free) + goto out; + + c->msgtable_size = 0; + c->msgtable = dlsym(c->handle, "libcheck_msgtable"); + + if (c->msgtable != NULL) { + const char **p; + + for (p = c->msgtable; + *p && (p - c->msgtable) < CHECKER_MSGTABLE_SIZE; p++) + /* nothing */; + + c->msgtable_size = p - c->msgtable; + } else + c->msgtable_size = 0; + condlog(3, "checker %s: message table size = %d", + c->name, c->msgtable_size); + +done: + c->sync = 1; + list_add(&c->node, &checkers); + return c; +out: + free_checker_class(c); + return NULL; +} + +void checker_set_fd (struct checker * c, int fd) +{ + if (!c) + return; + c->fd = fd; +} + +void checker_set_sync (struct checker * c) +{ + if (!c || !c->cls) + return; + c->cls->sync = 1; +} + +void checker_set_async (struct checker * c) +{ + if (!c || !c->cls) + return; + c->cls->sync = 0; +} + +void checker_enable (struct checker * c) +{ + if (!c) + return; + c->disable = 0; +} + +void checker_disable (struct checker * c) +{ + if (!c) + return; + c->disable = 1; +} + +int checker_init (struct checker * c, void ** mpctxt_addr) +{ + if (!c || !c->cls) + return 1; + c->mpcontext = mpctxt_addr; + if (c->cls->init && c->cls->init(c) != 0) + return 1; + if (mpctxt_addr && *mpctxt_addr == NULL && c->cls->mp_init && + c->cls->mp_init(c) != 0) /* continue even if mp_init fails */ + c->mpcontext = NULL; + return 0; +} + +int checker_mp_init(struct checker * c, void ** mpctxt_addr) +{ + if (!c || !c->cls) + return 1; + if (c->mpcontext || !mpctxt_addr) + return 0; + c->mpcontext = mpctxt_addr; + if (*mpctxt_addr == NULL && c->cls->mp_init && + c->cls->mp_init(c) != 0) { + c->mpcontext = NULL; + return 1; + } + return 0; +} + +void checker_clear (struct checker *c) +{ + memset(c, 0x0, sizeof(struct checker)); + c->fd = -1; +} + +void checker_put (struct checker * dst) +{ + struct checker_class *src; + + if (!dst) + return; + src = dst->cls; + + if (src && src->free) + src->free(dst); + checker_clear(dst); + free_checker_class(src); +} + +int checker_check (struct checker * c, int path_state) +{ + int r; + + if (!c) + return PATH_WILD; + + c->msgid = CHECKER_MSGID_NONE; + if (c->disable) { + c->msgid = CHECKER_MSGID_DISABLED; + return PATH_UNCHECKED; + } + if (!strncmp(c->cls->name, NONE, 4)) + return path_state; + + if (c->fd < 0) { + c->msgid = CHECKER_MSGID_NO_FD; + return PATH_WILD; + } + r = c->cls->check(c); + + return r; +} + +const char *checker_name(const struct checker *c) +{ + if (!c || !c->cls) + return NULL; + return c->cls->name; +} + +int checker_is_sync(const struct checker *c) +{ + return c && c->cls && c->cls->sync; +} + +static const char *generic_msg[CHECKER_GENERIC_MSGTABLE_SIZE] = { + [CHECKER_MSGID_NONE] = "", + [CHECKER_MSGID_DISABLED] = " is disabled", + [CHECKER_MSGID_NO_FD] = " has no usable fd", + [CHECKER_MSGID_INVALID] = " provided invalid message id", + [CHECKER_MSGID_UP] = " reports path is up", + [CHECKER_MSGID_DOWN] = " reports path is down", + [CHECKER_MSGID_GHOST] = " reports path is ghost", + [CHECKER_MSGID_UNSUPPORTED] = " doesn't support this device", +}; + +const char *checker_message(const struct checker *c) +{ + int id; + + if (!c || !c->cls || c->msgid < 0 || + (c->msgid >= CHECKER_GENERIC_MSGTABLE_SIZE && + c->msgid < CHECKER_FIRST_MSGID)) + goto bad_id; + + if (c->msgid < CHECKER_GENERIC_MSGTABLE_SIZE) + return generic_msg[c->msgid]; + + id = c->msgid - CHECKER_FIRST_MSGID; + if (id < c->cls->msgtable_size) + return c->cls->msgtable[id]; + +bad_id: + return generic_msg[CHECKER_MSGID_NONE]; +} + +void checker_clear_message (struct checker *c) +{ + if (!c) + return; + c->msgid = CHECKER_MSGID_NONE; +} + +void checker_get(const char *multipath_dir, struct checker *dst, + const char *name) +{ + struct checker_class *src = NULL; + + if (!dst) + return; + + if (name && strlen(name)) { + src = checker_class_lookup(name); + if (!src) + src = add_checker_class(multipath_dir, name); + } + dst->cls = src; + if (!src) + return; + + src->refcount++; +} + +int init_checkers(const char *multipath_dir) +{ + if (!add_checker_class(multipath_dir, DEFAULT_CHECKER)) + return 1; + return 0; +} diff --git a/libmultipath/checkers.h b/libmultipath/checkers.h new file mode 100644 index 0000000..b458118 --- /dev/null +++ b/libmultipath/checkers.h @@ -0,0 +1,178 @@ +#ifndef _CHECKERS_H +#define _CHECKERS_H + +#include "list.h" +#include "memory.h" +#include "defaults.h" + +/* + * + * Userspace (multipath/multipathd) path states + * + * PATH_WILD: + * - Use: Any checker + * - Description: Corner case where "fd < 0" for path fd (see checker_check()), + * or where a checker detects an unsupported device + * (e.g. wrong checker configured for a given device). + * + * PATH_UNCHECKED: + * - Use: Only in directio checker + * - Description: set when fcntl(F_GETFL) fails to return flags or O_DIRECT + * not include in flags, or O_DIRECT read fails + * - Notes: + * - multipathd: uses it to skip over paths in sync_map_state() + * - multipath: used in update_paths(); if state==PATH_UNCHECKED, call + * pathinfo() + * + * PATH_DOWN: + * - Use: All checkers (directio, emc_clariion, hp_sw, readsector0, tur) + * - Description: Either a) SG_IO ioctl failed, or b) check condition on some + * SG_IO ioctls that succeed (tur, readsector0 checkers); path is down and + * you shouldn't try to send commands to it + * + * PATH_UP: + * - Use: All checkers (directio, emc_clariion, hp_sw, readsector0, tur) + * - Description: Path is up and I/O can be sent to it + * + * PATH_SHAKY: + * - Use: Only emc_clariion + * - Description: Indicates path not available for "normal" operations + * + * PATH_GHOST: + * - Use: Only hp_sw and rdac + * - Description: Indicates a "passive/standby" path on active/passive HP + * arrays. These paths will return valid answers to certain SCSI commands + * (tur, read_capacity, inquiry, start_stop), but will fail I/O commands. + * The path needs an initialization command to be sent to it in order for + * I/Os to succeed. + * + * PATH_PENDING: + * - Use: All async checkers + * - Description: Indicates a check IO is in flight. + * + * PATH_TIMEOUT: + * - Use: Only tur checker + * - Description: Command timed out + * + * PATH REMOVED: + * - Use: All checkers + * - Description: Device has been removed from the system + * + * PATH_DELAYED: + * - Use: None of the checkers (returned if the path is being delayed before + * reintegration. + * - Description: If a path fails after being up for less than + * delay_watch_checks checks, when it comes back up again, it will not + * be marked as up until it has been up for delay_wait_checks checks. + * During this time, it is marked as "delayed" + */ +enum path_check_state { + PATH_WILD, + PATH_UNCHECKED, + PATH_DOWN, + PATH_UP, + PATH_SHAKY, + PATH_GHOST, + PATH_PENDING, + PATH_TIMEOUT, + PATH_REMOVED, + PATH_DELAYED, + PATH_MAX_STATE +}; + +#define DIRECTIO "directio" +#define TUR "tur" +#define HP_SW "hp_sw" +#define RDAC "rdac" +#define EMC_CLARIION "emc_clariion" +#define READSECTOR0 "readsector0" +#define CCISS_TUR "cciss_tur" +#define NONE "none" + +#define ASYNC_TIMEOUT_SEC 30 + +/* + * strings lengths + */ +#define CHECKER_NAME_LEN 16 +#define CHECKER_MSG_LEN 256 +#define CHECKER_DEV_LEN 256 +#define LIB_CHECKER_NAMELEN 256 + +/* + * Generic message IDs for use in checkers. + */ +enum { + CHECKER_MSGID_NONE = 0, + CHECKER_MSGID_DISABLED, + CHECKER_MSGID_NO_FD, + CHECKER_MSGID_INVALID, + CHECKER_MSGID_UP, + CHECKER_MSGID_DOWN, + CHECKER_MSGID_GHOST, + CHECKER_MSGID_UNSUPPORTED, + CHECKER_GENERIC_MSGTABLE_SIZE, + CHECKER_FIRST_MSGID = 100, /* lowest msgid for checkers */ + CHECKER_MSGTABLE_SIZE = 100, /* max msg table size for checkers */ +}; + +struct checker_class; +struct checker { + struct checker_class *cls; + int fd; + unsigned int timeout; + int disable; + short msgid; /* checker-internal extra status */ + void * context; /* store for persistent data */ + void ** mpcontext; /* store for persistent data shared + multipath-wide. Use MALLOC if + you want to stuff data in. */ +}; + +static inline int checker_selected(const struct checker *c) +{ + return c != NULL && c->cls != NULL; +} + +const char *checker_state_name(int); +int init_checkers(const char *); +void cleanup_checkers (void); +int checker_init (struct checker *, void **); +int checker_mp_init(struct checker *, void **); +void checker_clear (struct checker *); +void checker_put (struct checker *); +void checker_reset (struct checker *); +void checker_set_sync (struct checker *); +void checker_set_async (struct checker *); +void checker_set_fd (struct checker *, int); +void checker_enable (struct checker *); +void checker_disable (struct checker *); +int checker_check (struct checker *, int); +int checker_is_sync(const struct checker *); +const char *checker_name (const struct checker *); +void reset_checker_classes(void); +/* + * This returns a string that's best prepended with "$NAME checker", + * where $NAME is the return value of checker_name(). + */ +const char *checker_message(const struct checker *); +void checker_clear_message (struct checker *c); +void checker_get(const char *, struct checker *, const char *); + +/* Prototypes for symbols exported by path checker dynamic libraries (.so) */ +int libcheck_check(struct checker *); +int libcheck_init(struct checker *); +void libcheck_free(struct checker *); +/* + * msgid => message map. + * + * It only needs to be provided if the checker defines specific + * message IDs. + * Message IDs available to checkers start at CHECKER_FIRST_MSG. + * The msgtable array is 0-based, i.e. msgtable[0] is the message + * for msgid == __CHECKER_FIRST_MSG. + * The table ends with a NULL element. + */ +extern const char *libcheck_msgtable[]; + +#endif /* _CHECKERS_H */ diff --git a/libmultipath/checkers/Makefile b/libmultipath/checkers/Makefile new file mode 100644 index 0000000..02caea6 --- /dev/null +++ b/libmultipath/checkers/Makefile @@ -0,0 +1,41 @@ +# +# Copyright (C) 2003 Christophe Varoqui, +# +include ../../Makefile.inc + +CFLAGS += $(LIB_CFLAGS) -I.. + +# If you add or remove a checker also update multipath/multipath.conf.5 +LIBS= \ + libcheckcciss_tur.so \ + libcheckreadsector0.so \ + libchecktur.so \ + libcheckdirectio.so \ + libcheckemc_clariion.so \ + libcheckhp_sw.so \ + libcheckrdac.so + +all: $(LIBS) + +libcheckdirectio.so: libsg.o directio.o + $(CC) $(LDFLAGS) $(SHARED_FLAGS) -o $@ $^ -laio + +libcheck%.so: libsg.o %.o + $(CC) $(LDFLAGS) $(SHARED_FLAGS) -o $@ $^ + +install: + $(INSTALL_PROGRAM) -m 755 $(LIBS) $(DESTDIR)$(libdir) + +uninstall: + for file in $(LIBS); do $(RM) $(DESTDIR)$(libdir)/$$file; done + +clean: dep_clean + $(RM) core *.a *.o *.gz *.so + +OBJS := $(LIBS:libcheck%.so=%.o) libsg.o directio.o +.SECONDARY: $(OBJS) + +include $(wildcard $(OBJS:.o=.d)) + +dep_clean: + $(RM) $(OBJS:.o=.d) diff --git a/libmultipath/checkers/cciss.h b/libmultipath/checkers/cciss.h new file mode 100644 index 0000000..cf20946 --- /dev/null +++ b/libmultipath/checkers/cciss.h @@ -0,0 +1,141 @@ +#ifndef CCISS_H +#define CCISS_H + +#include +#include + +#define CCISS_IOC_MAGIC 'B' + +/* + * transfer direction + */ +#define XFER_NONE 0x00 +#define XFER_WRITE 0x01 +#define XFER_READ 0x02 +#define XFER_RSVD 0x03 + +/* + * task attribute + */ +#define ATTR_UNTAGGED 0x00 +#define ATTR_SIMPLE 0x04 +#define ATTR_HEADOFQUEUE 0x05 +#define ATTR_ORDERED 0x06 +#define ATTR_ACA 0x07 + +/* + * cdb type + */ +#define TYPE_CMD 0x00 +#define TYPE_MSG 0x01 + +#define SENSEINFOBYTES 32 + +/* + * Type defs used in the following structs + */ +#define BYTE __u8 +#define WORD __u16 +#define HWORD __u16 +#define DWORD __u32 + +#pragma pack(1) + +//Command List Structure +typedef union _SCSI3Addr_struct { + struct { + BYTE Dev; + BYTE Bus:6; + BYTE Mode:2; // b00 + } PeripDev; + struct { + BYTE DevLSB; + BYTE DevMSB:6; + BYTE Mode:2; // b01 + } LogDev; + struct { + BYTE Dev:5; + BYTE Bus:3; + BYTE Targ:6; + BYTE Mode:2; // b10 + } LogUnit; +} SCSI3Addr_struct; + +typedef struct _PhysDevAddr_struct { + DWORD TargetId:24; + DWORD Bus:6; + DWORD Mode:2; + SCSI3Addr_struct Target[2]; //2 level target device addr +} PhysDevAddr_struct; + +typedef struct _LogDevAddr_struct { + DWORD VolId:30; + DWORD Mode:2; + BYTE reserved[4]; +} LogDevAddr_struct; + +typedef union _LUNAddr_struct { + BYTE LunAddrBytes[8]; + SCSI3Addr_struct SCSI3Lun[4]; + PhysDevAddr_struct PhysDev; + LogDevAddr_struct LogDev; +} LUNAddr_struct; + +typedef struct _RequestBlock_struct { + BYTE CDBLen; + struct { + BYTE Type:3; + BYTE Attribute:3; + BYTE Direction:2; + } Type; + HWORD Timeout; + BYTE CDB[16]; +} RequestBlock_struct; + +typedef union _MoreErrInfo_struct{ + struct { + BYTE Reserved[3]; + BYTE Type; + DWORD ErrorInfo; + } Common_Info; + struct{ + BYTE Reserved[2]; + BYTE offense_size;//size of offending entry + BYTE offense_num; //byte # of offense 0-base + DWORD offense_value; + } Invalid_Cmd; +} MoreErrInfo_struct; + +typedef struct _ErrorInfo_struct { + BYTE ScsiStatus; + BYTE SenseLen; + HWORD CommandStatus; + DWORD ResidualCnt; + MoreErrInfo_struct MoreErrInfo; + BYTE SenseInfo[SENSEINFOBYTES]; +} ErrorInfo_struct; + +#pragma pack() + +typedef struct _IOCTL_Command_struct { + LUNAddr_struct LUN_info; + RequestBlock_struct Request; + ErrorInfo_struct error_info; + WORD buf_size; /* size in bytes of the buf */ + BYTE *buf; +} IOCTL_Command_struct; + +typedef struct _LogvolInfo_struct{ + __u32 LunID; + int num_opens; /* number of opens on the logical volume */ + int num_parts; /* number of partitions configured on logvol */ +} LogvolInfo_struct; + +#define CCISS_PASSTHRU _IOWR(CCISS_IOC_MAGIC, 11, IOCTL_Command_struct) +#define CCISS_GETLUNINFO _IOR(CCISS_IOC_MAGIC, 17, LogvolInfo_struct) + +int cciss_init( struct checker *); +void cciss_free (struct checker * c); +int cciss_tur( struct checker *); + +#endif diff --git a/libmultipath/checkers/cciss_tur.c b/libmultipath/checkers/cciss_tur.c new file mode 100644 index 0000000..eaf67b3 --- /dev/null +++ b/libmultipath/checkers/cciss_tur.c @@ -0,0 +1,129 @@ +/* + ***************************************************************************** + * * + * (C) Copyright 2007 Hewlett-Packard Development Company, L.P * + * * + * This program is free software; you can redistribute it and/or modify it * + * under the terms of the GNU General Public License as published by the Free* + * Software Foundation; either version 2 of the License, or (at your option)* + * any later version. * + * * + * This program is distributed in the hope that it will be useful, but * + * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY* + * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License * + * for more details. * + * * + * You should have received a copy of the GNU General Public License along * + * with this program. If not, see . * + * * + ***************************************************************************** +*/ + +/* + * This program originally derived from and inspired by + * Christophe Varoqui's tur.c, part of libchecker. + */ + + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "checkers.h" + +#include "cciss.h" + +#define TUR_CMD_LEN 6 +#define HEAVY_CHECK_COUNT 10 + +struct cciss_tur_checker_context { + void * dummy; +}; + +int libcheck_init (__attribute__((unused)) struct checker * c) +{ + return 0; +} + +void libcheck_free (__attribute__((unused)) struct checker * c) +{ + return; +} + +int libcheck_check(struct checker * c) +{ + int rc; + int ret; + unsigned int lun = 0; + struct cciss_tur_checker_context * ctxt = NULL; + LogvolInfo_struct lvi; // logical "volume" info + IOCTL_Command_struct cic; // cciss ioctl command + + if ((c->fd) < 0) { + c->msgid = CHECKER_MSGID_NO_FD; + ret = -1; + goto out; + } + + rc = ioctl(c->fd, CCISS_GETLUNINFO, &lvi); + if ( rc != 0) { + perror("Error: "); + fprintf(stderr, "cciss TUR failed in CCISS_GETLUNINFO: %s\n", + strerror(errno)); + c->msgid = CHECKER_MSGID_DOWN; + ret = PATH_DOWN; + goto out; + } else { + lun = lvi.LunID; + } + + memset(&cic, 0, sizeof(cic)); + cic.LUN_info.LogDev.VolId = lun & 0x3FFFFFFF; + cic.LUN_info.LogDev.Mode = 0x01; /* logical volume addressing */ + cic.Request.CDBLen = 6; /* need to try just 2 bytes here */ + cic.Request.Type.Type = TYPE_CMD; // It is a command. + cic.Request.Type.Attribute = ATTR_SIMPLE; + cic.Request.Type.Direction = XFER_NONE; + cic.Request.Timeout = 0; + + cic.Request.CDB[0] = 0; + cic.Request.CDB[1] = 0; + cic.Request.CDB[2] = 0; + cic.Request.CDB[3] = 0; + cic.Request.CDB[4] = 0; + cic.Request.CDB[5] = 0; + + rc = ioctl(c->fd, CCISS_PASSTHRU, &cic); + if (rc < 0) { + fprintf(stderr, "cciss TUR failed: %s\n", + strerror(errno)); + c->msgid = CHECKER_MSGID_DOWN; + ret = PATH_DOWN; + goto out; + } + + if ((cic.error_info.CommandStatus | cic.error_info.ScsiStatus )) { + c->msgid = CHECKER_MSGID_DOWN; + ret = PATH_DOWN; + goto out; + } + + c->msgid = CHECKER_MSGID_UP; + + ret = PATH_UP; +out: + /* + * caller told us he doesn't want to keep the context : + * free it + */ + if (!c->context) + free(ctxt); + + return(ret); +} diff --git a/libmultipath/checkers/directio.c b/libmultipath/checkers/directio.c new file mode 100644 index 0000000..503519e --- /dev/null +++ b/libmultipath/checkers/directio.c @@ -0,0 +1,399 @@ +/* + * Copyright (c) 2005 Hannes Reinecke, Suse + */ +#define _GNU_SOURCE +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "checkers.h" +#include "../libmultipath/debug.h" +#include "../libmultipath/time-util.h" + +#define AIO_GROUP_SIZE 1024 + +/* Note: This checker type relies on the fact that only one checker can be run + * at a time, since multiple checkers share the same aio_group, and must be + * able to modify other checker's async_reqs. If multple checkers become able + * to be run at the same time, this checker will need to add locking, and + * probably polling on event fds, to deal with that */ + +struct aio_group { + struct list_head node; + int holders; + io_context_t ioctx; + struct list_head orphans; +}; + +struct async_req { + struct iocb io; + unsigned int blksize; + unsigned char * buf; + struct list_head node; + int state; /* PATH_REMOVED means this is an orphan */ +}; + +static LIST_HEAD(aio_grp_list); + +enum { + MSG_DIRECTIO_UNKNOWN = CHECKER_FIRST_MSGID, + MSG_DIRECTIO_PENDING, + MSG_DIRECTIO_BLOCKSIZE, +}; + +#define _IDX(x) (MSG_DIRECTIO_##x - CHECKER_FIRST_MSGID) +const char *libcheck_msgtable[] = { + [_IDX(UNKNOWN)] = " is not available", + [_IDX(PENDING)] = " is waiting on aio", + [_IDX(BLOCKSIZE)] = " cannot get blocksize, set default", + NULL, +}; + +#define LOG(prio, fmt, args...) condlog(prio, "directio: " fmt, ##args) + +struct directio_context { + int running; + int reset_flags; + struct aio_group *aio_grp; + struct async_req *req; +}; + +static struct aio_group * +add_aio_group(void) +{ + struct aio_group *aio_grp; + + aio_grp = malloc(sizeof(struct aio_group)); + if (!aio_grp) + return NULL; + memset(aio_grp, 0, sizeof(struct aio_group)); + INIT_LIST_HEAD(&aio_grp->orphans); + + if (io_setup(AIO_GROUP_SIZE, &aio_grp->ioctx) != 0) { + LOG(1, "io_setup failed"); + if (errno == EAGAIN) + LOG(1, "global number of io events too small. Increase fs.aio-max-nr with sysctl"); + free(aio_grp); + return NULL; + } + list_add(&aio_grp->node, &aio_grp_list); + return aio_grp; +} + +static int +set_aio_group(struct directio_context *ct) +{ + struct aio_group *aio_grp = NULL; + + list_for_each_entry(aio_grp, &aio_grp_list, node) + if (aio_grp->holders < AIO_GROUP_SIZE) + goto found; + aio_grp = add_aio_group(); + if (!aio_grp) { + ct->aio_grp = NULL; + return -1; + } +found: + aio_grp->holders++; + ct->aio_grp = aio_grp; + return 0; +} + +static void +remove_aio_group(struct aio_group *aio_grp) +{ + struct async_req *req, *tmp; + + io_destroy(aio_grp->ioctx); + list_for_each_entry_safe(req, tmp, &aio_grp->orphans, node) { + list_del(&req->node); + free(req->buf); + free(req); + } + list_del(&aio_grp->node); + free(aio_grp); +} + +/* If an aio_group is completely full of orphans, then no checkers can + * use it, which means that no checkers can clear out the orphans. To + * avoid keeping the useless group around, simply remove remove the + * group */ +static void +check_orphaned_group(struct aio_group *aio_grp) +{ + int count = 0; + struct list_head *item; + + if (aio_grp->holders < AIO_GROUP_SIZE) + return; + list_for_each(item, &aio_grp->orphans) + count++; + if (count >= AIO_GROUP_SIZE) + remove_aio_group(aio_grp); +} + +void libcheck_reset (void) +{ + struct aio_group *aio_grp, *tmp; + + list_for_each_entry_safe(aio_grp, tmp, &aio_grp_list, node) + remove_aio_group(aio_grp); +} + +int libcheck_init (struct checker * c) +{ + unsigned long pgsize = getpagesize(); + struct directio_context * ct; + struct async_req *req = NULL; + long flags; + + ct = malloc(sizeof(struct directio_context)); + if (!ct) + return 1; + memset(ct, 0, sizeof(struct directio_context)); + + if (set_aio_group(ct) < 0) + goto out; + + req = malloc(sizeof(struct async_req)); + if (!req) { + goto out; + } + memset(req, 0, sizeof(struct async_req)); + INIT_LIST_HEAD(&req->node); + + if (ioctl(c->fd, BLKBSZGET, &req->blksize) < 0) { + c->msgid = MSG_DIRECTIO_BLOCKSIZE; + req->blksize = 4096; + } + if (req->blksize > 4096) { + /* + * Sanity check for DASD; BSZGET is broken + */ + req->blksize = 4096; + } + if (!req->blksize) + goto out; + + if (posix_memalign((void **)&req->buf, pgsize, req->blksize) != 0) + goto out; + + flags = fcntl(c->fd, F_GETFL); + if (flags < 0) + goto out; + if (!(flags & O_DIRECT)) { + flags |= O_DIRECT; + if (fcntl(c->fd, F_SETFL, flags) < 0) + goto out; + ct->reset_flags = 1; + } + + /* Successfully initialized, return the context. */ + ct->req = req; + c->context = (void *) ct; + return 0; + +out: + if (req) { + if (req->buf) + free(req->buf); + free(req); + } + if (ct->aio_grp) + ct->aio_grp->holders--; + free(ct); + return 1; +} + +void libcheck_free (struct checker * c) +{ + struct directio_context * ct = (struct directio_context *)c->context; + struct io_event event; + long flags; + + if (!ct) + return; + + if (ct->reset_flags) { + if ((flags = fcntl(c->fd, F_GETFL)) >= 0) { + int ret __attribute__ ((unused)); + + flags &= ~O_DIRECT; + /* No point in checking for errors */ + ret = fcntl(c->fd, F_SETFL, flags); + } + } + + if (ct->running && + (ct->req->state != PATH_PENDING || + io_cancel(ct->aio_grp->ioctx, &ct->req->io, &event) == 0)) + ct->running = 0; + if (!ct->running) { + free(ct->req->buf); + free(ct->req); + ct->aio_grp->holders--; + } else { + ct->req->state = PATH_REMOVED; + list_add(&ct->req->node, &ct->aio_grp->orphans); + check_orphaned_group(ct->aio_grp); + } + + free(ct); + c->context = NULL; +} + +static int +get_events(struct aio_group *aio_grp, struct timespec *timeout) +{ + struct io_event events[128]; + int i, nr, got_events = 0; + struct timespec zero_timeout = {0}; + struct timespec *timep = timeout; + + do { + errno = 0; + nr = io_getevents(aio_grp->ioctx, 1, 128, events, timep); + got_events |= (nr > 0); + + for (i = 0; i < nr; i++) { + struct async_req *req = container_of(events[i].obj, struct async_req, io); + + LOG(3, "io finished %lu/%lu", events[i].res, + events[i].res2); + + /* got an orphaned request */ + if (req->state == PATH_REMOVED) { + list_del(&req->node); + free(req->buf); + free(req); + aio_grp->holders--; + } else + req->state = (events[i].res == req->blksize) ? + PATH_UP : PATH_DOWN; + } + timep = &zero_timeout; + } while (nr == 128); /* assume there are more events and try again */ + + if (nr < 0) + LOG(3, "async io getevents returned %i (errno=%s)", + nr, strerror(errno)); + + return got_events; +} + +static int +check_state(int fd, struct directio_context *ct, int sync, int timeout_secs) +{ + struct timespec timeout = { .tv_nsec = 1000 }; + struct stat sb; + int rc; + long r; + struct timespec currtime, endtime; + + if (fstat(fd, &sb) == 0) { + LOG(4, "called for %x", (unsigned) sb.st_rdev); + } + if (sync > 0) { + LOG(4, "called in synchronous mode"); + timeout.tv_sec = timeout_secs; + timeout.tv_nsec = 0; + } + + if (ct->running) { + if (ct->req->state != PATH_PENDING) { + ct->running = 0; + return ct->req->state; + } + } else { + struct iocb *ios[1] = { &ct->req->io }; + + LOG(3, "starting new request"); + memset(&ct->req->io, 0, sizeof(struct iocb)); + io_prep_pread(&ct->req->io, fd, ct->req->buf, + ct->req->blksize, 0); + ct->req->state = PATH_PENDING; + if (io_submit(ct->aio_grp->ioctx, 1, ios) != 1) { + LOG(3, "io_submit error %i", errno); + return PATH_UNCHECKED; + } + } + ct->running++; + + get_monotonic_time(&endtime); + endtime.tv_sec += timeout.tv_sec; + endtime.tv_nsec += timeout.tv_nsec; + normalize_timespec(&endtime); + while(1) { + r = get_events(ct->aio_grp, &timeout); + + if (ct->req->state != PATH_PENDING) { + ct->running = 0; + return ct->req->state; + } else if (r == 0 || + (timeout.tv_sec == 0 && timeout.tv_nsec == 0)) + break; + + get_monotonic_time(&currtime); + timespecsub(&endtime, &currtime, &timeout); + if (timeout.tv_sec < 0) + timeout.tv_sec = timeout.tv_nsec = 0; + } + if (ct->running > timeout_secs || sync) { + struct io_event event; + + LOG(3, "abort check on timeout"); + + r = io_cancel(ct->aio_grp->ioctx, &ct->req->io, &event); + /* + * Only reset ct->running if we really + * could abort the pending I/O + */ + if (!r) + ct->running = 0; + rc = PATH_DOWN; + } else { + LOG(3, "async io pending"); + rc = PATH_PENDING; + } + + return rc; +} + +int libcheck_check (struct checker * c) +{ + int ret; + struct directio_context * ct = (struct directio_context *)c->context; + + if (!ct) + return PATH_UNCHECKED; + + ret = check_state(c->fd, ct, checker_is_sync(c), c->timeout); + + switch (ret) + { + case PATH_UNCHECKED: + c->msgid = MSG_DIRECTIO_UNKNOWN; + break; + case PATH_DOWN: + c->msgid = CHECKER_MSGID_DOWN; + break; + case PATH_UP: + c->msgid = CHECKER_MSGID_UP; + break; + case PATH_PENDING: + c->msgid = MSG_DIRECTIO_PENDING; + break; + default: + break; + } + return ret; +} diff --git a/libmultipath/checkers/directio.h b/libmultipath/checkers/directio.h new file mode 100644 index 0000000..1865b1f --- /dev/null +++ b/libmultipath/checkers/directio.h @@ -0,0 +1,8 @@ +#ifndef _DIRECTIO_H +#define _DIRECTIO_H + +int directio (struct checker *); +int directio_init (struct checker *); +void directio_free (struct checker *); + +#endif /* _DIRECTIO_H */ diff --git a/libmultipath/checkers/emc_clariion.c b/libmultipath/checkers/emc_clariion.c new file mode 100644 index 0000000..5cd63ac --- /dev/null +++ b/libmultipath/checkers/emc_clariion.c @@ -0,0 +1,320 @@ +/* + * Copyright (c) 2004, 2005 Lars Marowsky-Bree + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "../libmultipath/sg_include.h" +#include "libsg.h" +#include "checkers.h" +#include "debug.h" +#include "memory.h" + +#define INQUIRY_CMD 0x12 +#define INQUIRY_CMDLEN 6 +#define HEAVY_CHECK_COUNT 10 +#define SCSI_COMMAND_TERMINATED 0x22 +#define SCSI_CHECK_CONDITION 0x2 +#define RECOVERED_ERROR 0x01 +#define ILLEGAL_REQUEST 0x05 +#define SG_ERR_DRIVER_SENSE 0x08 + +/* + * Mechanism to track CLARiiON inactive snapshot LUs. + * This is done so that we can fail passive paths + * to an inactive snapshot LU even though since a + * simple read test would return 02/04/03 instead + * of 05/25/01 sensekey/ASC/ASCQ data. + */ +#define IS_INACTIVE_SNAP(c) (c->mpcontext ? \ + ((struct emc_clariion_checker_LU_context *) \ + (*c->mpcontext))->inactive_snap \ + : 0) + +#define SET_INACTIVE_SNAP(c) if (c->mpcontext) \ + ((struct emc_clariion_checker_LU_context *)\ + (*c->mpcontext))->inactive_snap = 1 + +#define CLR_INACTIVE_SNAP(c) if (c->mpcontext) \ + ((struct emc_clariion_checker_LU_context *)\ + (*c->mpcontext))->inactive_snap = 0 + +enum { + MSG_CLARIION_QUERY_FAILED = CHECKER_FIRST_MSGID, + MSG_CLARIION_QUERY_ERROR, + MSG_CLARIION_PATH_CONFIG, + MSG_CLARIION_UNIT_REPORT, + MSG_CLARIION_PATH_NOT_AVAIL, + MSG_CLARIION_LUN_UNBOUND, + MSG_CLARIION_WWN_CHANGED, + MSG_CLARIION_READ_ERROR, + MSG_CLARIION_PASSIVE_GOOD, +}; + +#define _IDX(x) (MSG_CLARIION_ ## x - CHECKER_FIRST_MSGID) +const char *libcheck_msgtable[] = { + [_IDX(QUERY_FAILED)] = ": sending query command failed", + [_IDX(QUERY_ERROR)] = ": query command indicates error", + [_IDX(PATH_CONFIG)] = + ": Path not correctly configured for failover", + [_IDX(UNIT_REPORT)] = + ": Path unit report page in unknown format", + [_IDX(PATH_NOT_AVAIL)] = + ": Path not available for normal operations", + [_IDX(LUN_UNBOUND)] = ": Logical Unit is unbound or LUNZ", + [_IDX(WWN_CHANGED)] = ": Logical Unit WWN has changed", + [_IDX(READ_ERROR)] = ": Read error", + [_IDX(PASSIVE_GOOD)] = ": Active path is healthy", + NULL, +}; + +struct emc_clariion_checker_path_context { + char wwn[16]; + unsigned wwn_set; +}; + +struct emc_clariion_checker_LU_context { + int inactive_snap; +}; + +void hexadecimal_to_ascii(char * wwn, char *wwnstr) +{ + int i,j, nbl; + + for (i=0,j=0;i<16;i++) { + wwnstr[j++] = ((nbl = ((wwn[i]&0xf0) >> 4)) <= 9) ? + '0' + nbl : 'a' + (nbl - 10); + wwnstr[j++] = ((nbl = (wwn[i]&0x0f)) <= 9) ? + '0' + nbl : 'a' + (nbl - 10); + } + wwnstr[32]=0; +} + +int libcheck_init (struct checker * c) +{ + /* + * Allocate and initialize the path specific context. + */ + c->context = MALLOC(sizeof(struct emc_clariion_checker_path_context)); + if (!c->context) + return 1; + ((struct emc_clariion_checker_path_context *)c->context)->wwn_set = 0; + + return 0; +} + +int libcheck_mp_init (struct checker * c) +{ + /* + * Allocate and initialize the multi-path global context. + */ + if (c->mpcontext && *c->mpcontext == NULL) { + void * mpctxt = malloc(sizeof(int)); + if (!mpctxt) + return 1; + *c->mpcontext = mpctxt; + CLR_INACTIVE_SNAP(c); + } + + return 0; +} + +void libcheck_free (struct checker * c) +{ + free(c->context); +} + +int libcheck_check (struct checker * c) +{ + unsigned char sense_buffer[128] = { 0, }; + unsigned char sb[SENSE_BUFF_LEN] = { 0, }, *sbb; + unsigned char inqCmdBlk[INQUIRY_CMDLEN] = {INQUIRY_CMD, 1, 0xC0, 0, + sizeof(sense_buffer), 0}; + struct sg_io_hdr io_hdr; + struct emc_clariion_checker_path_context * ct = + (struct emc_clariion_checker_path_context *)c->context; + char wwnstr[33]; + int ret; + int retry_emc = 5; + +retry: + memset(&io_hdr, 0, sizeof (struct sg_io_hdr)); + memset(sense_buffer, 0, 128); + memset(sb, 0, SENSE_BUFF_LEN); + io_hdr.interface_id = 'S'; + io_hdr.cmd_len = sizeof (inqCmdBlk); + io_hdr.mx_sb_len = sizeof (sb); + io_hdr.dxfer_direction = SG_DXFER_FROM_DEV; + io_hdr.dxfer_len = sizeof (sense_buffer); + io_hdr.dxferp = sense_buffer; + io_hdr.cmdp = inqCmdBlk; + io_hdr.sbp = sb; + io_hdr.timeout = c->timeout * 1000; + io_hdr.pack_id = 0; + if (ioctl(c->fd, SG_IO, &io_hdr) < 0) { + if (errno == ENOTTY) { + c->msgid = CHECKER_MSGID_UNSUPPORTED; + return PATH_WILD; + } + c->msgid = MSG_CLARIION_QUERY_FAILED; + return PATH_DOWN; + } + + if (io_hdr.info & SG_INFO_OK_MASK) { + switch (io_hdr.host_status) { + case DID_BUS_BUSY: + case DID_ERROR: + case DID_SOFT_ERROR: + case DID_TRANSPORT_DISRUPTED: + /* Transport error, retry */ + if (--retry_emc) + goto retry; + break; + default: + break; + } + } + + if (SCSI_CHECK_CONDITION == io_hdr.status || + SCSI_COMMAND_TERMINATED == io_hdr.status || + SG_ERR_DRIVER_SENSE == (0xf & io_hdr.driver_status)) { + if (io_hdr.sbp && (io_hdr.sb_len_wr > 2)) { + unsigned char *sbp = io_hdr.sbp; + int sense_key; + + if (sbp[0] & 0x2) + sense_key = sbp[1] & 0xf; + else + sense_key = sbp[2] & 0xf; + + if (sense_key == ILLEGAL_REQUEST) { + c->msgid = CHECKER_MSGID_UNSUPPORTED; + return PATH_WILD; + } else if (sense_key != RECOVERED_ERROR) { + condlog(1, "emc_clariion_checker: INQUIRY failed with sense key %02x", + sense_key); + c->msgid = MSG_CLARIION_QUERY_ERROR; + return PATH_DOWN; + } + } + } + + if (io_hdr.info & SG_INFO_OK_MASK) { + condlog(1, "emc_clariion_checker: INQUIRY failed without sense, status %02x", + io_hdr.status); + c->msgid = MSG_CLARIION_QUERY_ERROR; + return PATH_DOWN; + } + + if (/* Verify the code page - right page & revision */ + sense_buffer[1] != 0xc0 || sense_buffer[9] != 0x00) { + c->msgid = MSG_CLARIION_UNIT_REPORT; + return PATH_DOWN; + } + + if ( /* Effective initiator type */ + sense_buffer[27] != 0x03 + /* + * Failover mode should be set to 1 (PNR failover mode) + * or 4 (ALUA failover mode). + */ + || (((sense_buffer[28] & 0x07) != 0x04) && + ((sense_buffer[28] & 0x07) != 0x06)) + /* Arraycommpath should be set to 1 */ + || (sense_buffer[30] & 0x04) != 0x04) { + c->msgid = MSG_CLARIION_PATH_CONFIG; + return PATH_DOWN; + } + + if ( /* LUN operations should indicate normal operations */ + sense_buffer[48] != 0x00) { + c->msgid = MSG_CLARIION_PATH_NOT_AVAIL; + return PATH_SHAKY; + } + + if ( /* LUN should at least be bound somewhere and not be LUNZ */ + sense_buffer[4] == 0x00) { + c->msgid = MSG_CLARIION_LUN_UNBOUND; + return PATH_DOWN; + } + + /* + * store the LUN WWN there and compare that it indeed did not + * change in between, to protect against the path suddenly + * pointing somewhere else. + */ + if (ct->wwn_set) { + if (memcmp(ct->wwn, &sense_buffer[10], 16) != 0) { + c->msgid = MSG_CLARIION_WWN_CHANGED; + return PATH_DOWN; + } + } else { + memcpy(ct->wwn, &sense_buffer[10], 16); + ct->wwn_set = 1; + } + + /* + * Issue read on active path to determine if inactive snapshot. + */ + if (sense_buffer[4] == 2) {/* if active path */ + unsigned char buf[4096]; + + memset(buf, 0, 4096); + ret = sg_read(c->fd, &buf[0], 4096, + sbb = &sb[0], SENSE_BUFF_LEN, c->timeout); + if (ret == PATH_DOWN) { + hexadecimal_to_ascii(ct->wwn, wwnstr); + + /* + * Check for inactive snapshot LU this way. Must + * fail these. + */ + if (((sbb[2]&0xf) == 5) && (sbb[12] == 0x25) && + (sbb[13]==1)) { + /* + * Do this so that we can fail even the + * passive paths which will return + * 02/04/03 not 05/25/01 on read. + */ + SET_INACTIVE_SNAP(c); + condlog(3, "emc_clariion_checker: Active " + "path to inactive snapshot WWN %s.", + wwnstr); + } else { + condlog(3, "emc_clariion_checker: Read " + "error for WWN %s. Sense data are " + "0x%x/0x%x/0x%x.", wwnstr, + sbb[2]&0xf, sbb[12], sbb[13]); + c->msgid = MSG_CLARIION_READ_ERROR; + } + } else { + c->msgid = MSG_CLARIION_PASSIVE_GOOD; + /* + * Remove the path from the set of paths to inactive + * snapshot LUs if it was in this list since the + * snapshot is no longer inactive. + */ + CLR_INACTIVE_SNAP(c); + } + } else { + if (IS_INACTIVE_SNAP(c)) { + hexadecimal_to_ascii(ct->wwn, wwnstr); + condlog(3, "emc_clariion_checker: Passive " + "path to inactive snapshot WWN %s.", + wwnstr); + ret = PATH_DOWN; + } else { + c->msgid = MSG_CLARIION_PASSIVE_GOOD; + ret = PATH_UP; /* not ghost */ + } + } + + return ret; +} diff --git a/libmultipath/checkers/emc_clariion.h b/libmultipath/checkers/emc_clariion.h new file mode 100644 index 0000000..a1018a6 --- /dev/null +++ b/libmultipath/checkers/emc_clariion.h @@ -0,0 +1,8 @@ +#ifndef _EMC_CLARIION_H +#define _EMC_CLARIION_H + +int emc_clariion (struct checker *); +int emc_clariion_init (struct checker *); +void emc_clariion_free (struct checker *); + +#endif /* _EMC_CLARIION_H */ diff --git a/libmultipath/checkers/hp_sw.c b/libmultipath/checkers/hp_sw.c new file mode 100644 index 0000000..915918c --- /dev/null +++ b/libmultipath/checkers/hp_sw.c @@ -0,0 +1,150 @@ +/* + * Copyright (c) 2005 Christophe Varoqui + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "checkers.h" + +#include "../libmultipath/sg_include.h" +#include "../libmultipath/unaligned.h" + +#define TUR_CMD_LEN 6 +#define INQUIRY_CMDLEN 6 +#define INQUIRY_CMD 0x12 +#define SENSE_BUFF_LEN 32 +#define SCSI_CHECK_CONDITION 0x2 +#define SCSI_COMMAND_TERMINATED 0x22 +#define SG_ERR_DRIVER_SENSE 0x08 +#define RECOVERED_ERROR 0x01 +#define ILLEGAL_REQUEST 0x05 +#define MX_ALLOC_LEN 255 +#define HEAVY_CHECK_COUNT 10 + +struct sw_checker_context { + void * dummy; +}; + +int libcheck_init (__attribute__((unused)) struct checker * c) +{ + return 0; +} + +void libcheck_free (__attribute__((unused)) struct checker * c) +{ + return; +} + +static int +do_inq(int sg_fd, int cmddt, int evpd, unsigned int pg_op, + void *resp, int mx_resp_len, unsigned int timeout) +{ + unsigned char inqCmdBlk[INQUIRY_CMDLEN] = + { INQUIRY_CMD, 0, 0, 0, 0, 0 }; + unsigned char sense_b[SENSE_BUFF_LEN]; + struct sg_io_hdr io_hdr; + + if (cmddt) + inqCmdBlk[1] |= 2; + if (evpd) + inqCmdBlk[1] |= 1; + inqCmdBlk[2] = (unsigned char) pg_op; + put_unaligned_be16(mx_resp_len, &inqCmdBlk[3]); + memset(&io_hdr, 0, sizeof (struct sg_io_hdr)); + memset(sense_b, 0, SENSE_BUFF_LEN); + io_hdr.interface_id = 'S'; + io_hdr.cmd_len = sizeof (inqCmdBlk); + io_hdr.mx_sb_len = sizeof (sense_b); + io_hdr.dxfer_direction = SG_DXFER_FROM_DEV; + io_hdr.dxfer_len = mx_resp_len; + io_hdr.dxferp = resp; + io_hdr.cmdp = inqCmdBlk; + io_hdr.sbp = sense_b; + io_hdr.timeout = timeout * 1000; + + if (ioctl(sg_fd, SG_IO, &io_hdr) < 0) { + if (errno == ENOTTY) + return PATH_WILD; + else + return PATH_DOWN; + } + + /* treat SG_ERR here to get rid of sg_err.[ch] */ + io_hdr.status &= 0x7e; + if ((0 == io_hdr.status) && (0 == io_hdr.host_status) && + (0 == io_hdr.driver_status)) + return PATH_UP; + + if ((SCSI_CHECK_CONDITION == io_hdr.status) || + (SCSI_COMMAND_TERMINATED == io_hdr.status) || + (SG_ERR_DRIVER_SENSE == (0xf & io_hdr.driver_status))) { + if (io_hdr.sbp && (io_hdr.sb_len_wr > 2)) { + int sense_key; + unsigned char * sense_buffer = io_hdr.sbp; + if (sense_buffer[0] & 0x2) + sense_key = sense_buffer[1] & 0xf; + else + sense_key = sense_buffer[2] & 0xf; + if (RECOVERED_ERROR == sense_key) + return PATH_UP; + else if (ILLEGAL_REQUEST == sense_key) + return PATH_WILD; + } + } + return PATH_DOWN; +} + +static int +do_tur (int fd, unsigned int timeout) +{ + unsigned char turCmdBlk[TUR_CMD_LEN] = { 0x00, 0, 0, 0, 0, 0 }; + struct sg_io_hdr io_hdr; + unsigned char sense_buffer[32]; + + memset(&io_hdr, 0, sizeof (struct sg_io_hdr)); + io_hdr.interface_id = 'S'; + io_hdr.cmd_len = sizeof (turCmdBlk); + io_hdr.mx_sb_len = sizeof (sense_buffer); + io_hdr.dxfer_direction = SG_DXFER_NONE; + io_hdr.cmdp = turCmdBlk; + io_hdr.sbp = sense_buffer; + io_hdr.timeout = timeout * 1000; + io_hdr.pack_id = 0; + + if (ioctl(fd, SG_IO, &io_hdr) < 0) + return 1; + + if (io_hdr.info & SG_INFO_OK_MASK) + return 1; + + return 0; +} + +int libcheck_check(struct checker * c) +{ + char buff[MX_ALLOC_LEN]; + int ret = do_inq(c->fd, 0, 1, 0x80, buff, MX_ALLOC_LEN, c->timeout); + + if (ret == PATH_WILD) { + c->msgid = CHECKER_MSGID_UNSUPPORTED; + return ret; + } + if (ret != PATH_UP) { + c->msgid = CHECKER_MSGID_DOWN; + return ret; + }; + + if (do_tur(c->fd, c->timeout)) { + c->msgid = CHECKER_MSGID_GHOST; + return PATH_GHOST; + } + c->msgid = CHECKER_MSGID_UP; + return PATH_UP; +} diff --git a/libmultipath/checkers/hp_sw.h b/libmultipath/checkers/hp_sw.h new file mode 100644 index 0000000..3be0d8e --- /dev/null +++ b/libmultipath/checkers/hp_sw.h @@ -0,0 +1,8 @@ +#ifndef _HP_SW_H +#define _HP_SW_H + +int hp_sw (struct checker *); +int hp_sw_init (struct checker *); +void hp_sw_free (struct checker *); + +#endif /* _HP_SW_H */ diff --git a/libmultipath/checkers/libsg.c b/libmultipath/checkers/libsg.c new file mode 100644 index 0000000..958ea92 --- /dev/null +++ b/libmultipath/checkers/libsg.c @@ -0,0 +1,94 @@ +/* + * Copyright (c) 2004, 2005 Christophe Varoqui + */ +#include +#include +#include +#include + +#include "checkers.h" +#include "libsg.h" +#include "../libmultipath/sg_include.h" + +int +sg_read (int sg_fd, unsigned char * buff, int buff_len, + unsigned char * sense, int sense_len, unsigned int timeout) +{ + /* defaults */ + int blocks; + long long start_block = 0; + int bs = 512; + int cdbsz = 10; + + unsigned char rdCmd[cdbsz]; + unsigned char *sbb = sense; + struct sg_io_hdr io_hdr; + int res; + int rd_opcode[] = {0x8, 0x28, 0xa8, 0x88}; + int sz_ind; + struct stat filestatus; + int retry_count = 3; + + if (fstat(sg_fd, &filestatus) != 0) + return PATH_DOWN; + bs = (filestatus.st_blksize > 4096)? 4096: filestatus.st_blksize; + blocks = buff_len / bs; + memset(rdCmd, 0, cdbsz); + sz_ind = 1; + rdCmd[0] = rd_opcode[sz_ind]; + rdCmd[2] = (unsigned char)((start_block >> 24) & 0xff); + rdCmd[3] = (unsigned char)((start_block >> 16) & 0xff); + rdCmd[4] = (unsigned char)((start_block >> 8) & 0xff); + rdCmd[5] = (unsigned char)(start_block & 0xff); + rdCmd[7] = (unsigned char)((blocks >> 8) & 0xff); + rdCmd[8] = (unsigned char)(blocks & 0xff); + + memset(&io_hdr, 0, sizeof(struct sg_io_hdr)); + io_hdr.interface_id = 'S'; + io_hdr.cmd_len = cdbsz; + io_hdr.cmdp = rdCmd; + io_hdr.dxfer_direction = SG_DXFER_FROM_DEV; + io_hdr.dxfer_len = bs * blocks; + io_hdr.dxferp = buff; + io_hdr.mx_sb_len = sense_len; + io_hdr.sbp = sense; + io_hdr.timeout = timeout * 1000; + io_hdr.pack_id = (int)start_block; + +retry: + memset(sense, 0, sense_len); + while (((res = ioctl(sg_fd, SG_IO, &io_hdr)) < 0) && (EINTR == errno)); + + if (res < 0) { + if (ENOMEM == errno) { + return PATH_UP; + } + return PATH_DOWN; + } + + if ((0 == io_hdr.status) && + (0 == io_hdr.host_status) && + (0 == io_hdr.driver_status)) { + return PATH_UP; + } else { + int key = 0; + + if (io_hdr.sb_len_wr > 3) { + if (sbb[0] == 0x72 || sbb[0] == 0x73) + key = sbb[1] & 0x0f; + else if (io_hdr.sb_len_wr > 13 && + ((sbb[0] & 0x7f) == 0x70 || + (sbb[0] & 0x7f) == 0x71)) + key = sbb[2] & 0x0f; + } + + /* + * Retry if UNIT_ATTENTION check condition. + */ + if (key == 0x6) { + if (--retry_count) + goto retry; + } + return PATH_DOWN; + } +} diff --git a/libmultipath/checkers/libsg.h b/libmultipath/checkers/libsg.h new file mode 100644 index 0000000..3994f45 --- /dev/null +++ b/libmultipath/checkers/libsg.h @@ -0,0 +1,9 @@ +#ifndef _LIBSG_H +#define _LIBSG_H + +#define SENSE_BUFF_LEN 32 + +int sg_read (int sg_fd, unsigned char * buff, int buff_len, + unsigned char * sense, int sense_len, unsigned int timeout); + +#endif /* _LIBSG_H */ diff --git a/libmultipath/checkers/rdac.c b/libmultipath/checkers/rdac.c new file mode 100644 index 0000000..d924a9f --- /dev/null +++ b/libmultipath/checkers/rdac.c @@ -0,0 +1,358 @@ +/* + * Copyright (c) 2005 Christophe Varoqui + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "checkers.h" +#include "debug.h" + +#include "../libmultipath/sg_include.h" + +#define INQUIRY_CMDLEN 6 +#define INQUIRY_CMD 0x12 +#define MODE_SENSE_CMD 0x5a +#define MODE_SELECT_CMD 0x55 +#define MODE_SEN_SEL_CMDLEN 10 +#define SENSE_BUFF_LEN 32 +#define SCSI_CHECK_CONDITION 0x2 +#define SCSI_COMMAND_TERMINATED 0x22 +#define SG_ERR_DRIVER_SENSE 0x08 +#define RECOVERED_ERROR 0x01 +#define ILLEGAL_REQUEST 0x05 + + +#define CURRENT_PAGE_CODE_VALUES 0 +#define CHANGEABLE_PAGE_CODE_VALUES 1 + +#define MSG_RDAC_DOWN " reports path is down" +#define MSG_RDAC_DOWN_TYPE(STR) MSG_RDAC_DOWN": "STR + +#define RTPG_UNAVAILABLE 0x3 +#define RTPG_OFFLINE 0xE +#define RTPG_TRANSITIONING 0xF + +#define RTPG_UNAVAIL_NON_RESPONSIVE 0x2 +#define RTPG_UNAVAIL_IN_RESET 0x3 +#define RTPG_UNAVAIL_CFW_DL1 0x4 +#define RTPG_UNAVAIL_CFW_DL2 0x5 +#define RTPG_UNAVAIL_QUIESCED 0x6 +#define RTPG_UNAVAIL_SERVICE_MODE 0x7 + +struct control_mode_page { + unsigned char header[8]; + unsigned char page_code; + unsigned char page_len; + unsigned char dontcare0[3]; + unsigned char tas_bit; + unsigned char dontcare1[6]; +}; + +struct rdac_checker_context { + void * dummy; +}; + +int libcheck_init (struct checker * c) +{ + unsigned char cmd[MODE_SEN_SEL_CMDLEN]; + unsigned char sense_b[SENSE_BUFF_LEN]; + struct sg_io_hdr io_hdr; + struct control_mode_page current, changeable; + int set = 0; + + memset(cmd, 0, MODE_SEN_SEL_CMDLEN); + cmd[0] = MODE_SENSE_CMD; + cmd[1] = 0x08; /* DBD bit on */ + cmd[2] = 0xA + (CURRENT_PAGE_CODE_VALUES << 6); + cmd[8] = (sizeof(struct control_mode_page) & 0xff); + + memset(&io_hdr, 0, sizeof(struct sg_io_hdr)); + memset(sense_b, 0, SENSE_BUFF_LEN); + memset(¤t, 0, sizeof(struct control_mode_page)); + + io_hdr.interface_id = 'S'; + io_hdr.cmd_len = MODE_SEN_SEL_CMDLEN; + io_hdr.mx_sb_len = sizeof(sense_b); + io_hdr.dxfer_direction = SG_DXFER_FROM_DEV; + io_hdr.dxfer_len = (sizeof(struct control_mode_page) & 0xff); + io_hdr.dxferp = ¤t; + io_hdr.cmdp = cmd; + io_hdr.sbp = sense_b; + io_hdr.timeout = c->timeout * 1000; + + if (ioctl(c->fd, SG_IO, &io_hdr) < 0) + goto out; + + /* check the TAS bit to see if it is already set */ + if ((current.tas_bit >> 6) & 0x1) { + set = 1; + goto out; + } + + /* get the changeble values */ + cmd[2] = 0xA + (CHANGEABLE_PAGE_CODE_VALUES << 6); + io_hdr.dxferp = &changeable; + memset(&changeable, 0, sizeof(struct control_mode_page)); + + if (ioctl(c->fd, SG_IO, &io_hdr) < 0) + goto out; + + /* if TAS bit is not settable exit */ + if (((changeable.tas_bit >> 6) & 0x1) == 0) + goto out; + + /* Now go ahead and set it */ + memset(cmd, 0, MODE_SEN_SEL_CMDLEN); + cmd[0] = MODE_SELECT_CMD; + cmd[1] = 0x1; /* set SP bit on */ + cmd[8] = (sizeof(struct control_mode_page) & 0xff); + + /* use the same buffer as current, only set the tas bit */ + current.page_code = 0xA; + current.page_len = 0xA; + current.tas_bit |= (1 << 6); + + io_hdr.dxfer_direction = SG_DXFER_TO_DEV; + io_hdr.dxferp = ¤t; + + if (ioctl(c->fd, SG_IO, &io_hdr) < 0) + goto out; + + /* Success */ + set = 1; +out: + if (set == 0) + condlog(3, "rdac checker failed to set TAS bit"); + return 0; +} + +void libcheck_free(__attribute__((unused)) struct checker *c) +{ + return; +} + +static int +do_inq(int sg_fd, unsigned int pg_op, void *resp, int mx_resp_len, + unsigned int timeout) +{ + unsigned char inqCmdBlk[INQUIRY_CMDLEN] = { INQUIRY_CMD, 1, 0, 0, 0, 0 }; + unsigned char sense_b[SENSE_BUFF_LEN]; + struct sg_io_hdr io_hdr; + int retry_rdac = 5; + +retry: + inqCmdBlk[2] = (unsigned char) pg_op; + inqCmdBlk[4] = (unsigned char) (mx_resp_len & 0xff); + memset(&io_hdr, 0, sizeof (struct sg_io_hdr)); + memset(sense_b, 0, SENSE_BUFF_LEN); + + io_hdr.interface_id = 'S'; + io_hdr.cmd_len = sizeof (inqCmdBlk); + io_hdr.mx_sb_len = sizeof (sense_b); + io_hdr.dxfer_direction = SG_DXFER_FROM_DEV; + io_hdr.dxfer_len = mx_resp_len; + io_hdr.dxferp = resp; + io_hdr.cmdp = inqCmdBlk; + io_hdr.sbp = sense_b; + io_hdr.timeout = timeout * 1000; + + if (ioctl(sg_fd, SG_IO, &io_hdr) < 0 && errno == ENOTTY) + return PATH_WILD; + + /* treat SG_ERR here to get rid of sg_err.[ch] */ + io_hdr.status &= 0x7e; + if ((0 == io_hdr.status) && (0 == io_hdr.host_status) && + (0 == io_hdr.driver_status)) + return PATH_UP; + + /* check if we need to retry this error */ + if (io_hdr.info & SG_INFO_OK_MASK) { + switch (io_hdr.host_status) { + case DID_BUS_BUSY: + case DID_ERROR: + case DID_SOFT_ERROR: + case DID_TRANSPORT_DISRUPTED: + /* Transport error, retry */ + if (--retry_rdac) + goto retry; + break; + default: + break; + } + } + + if ((SCSI_CHECK_CONDITION == io_hdr.status) || + (SCSI_COMMAND_TERMINATED == io_hdr.status) || + (SG_ERR_DRIVER_SENSE == (0xf & io_hdr.driver_status))) { + if (io_hdr.sbp && (io_hdr.sb_len_wr > 2)) { + int sense_key; + unsigned char * sense_buffer = io_hdr.sbp; + if (sense_buffer[0] & 0x2) + sense_key = sense_buffer[1] & 0xf; + else + sense_key = sense_buffer[2] & 0xf; + if (RECOVERED_ERROR == sense_key) + return PATH_UP; + else if (ILLEGAL_REQUEST == sense_key) + return PATH_WILD; + condlog(1, "rdac checker: INQUIRY failed with sense key %02x", + sense_key); + } + } + return PATH_DOWN; +} + +struct volume_access_inq +{ + char PQ_PDT; + char dontcare0[7]; + char avtcvp; + char vol_ppp; + char aas_cur; + char vendor_specific_cur; + char aas_alt; + char vendor_specific_alt; + char dontcare1[34]; +}; + +enum { + RDAC_MSGID_NOT_CONN = CHECKER_FIRST_MSGID, + RDAC_MSGID_IN_STARTUP, + RDAC_MSGID_NON_RESPONSIVE, + RDAC_MSGID_IN_RESET, + RDAC_MSGID_FW_DOWNLOADING, + RDAC_MSGID_QUIESCED, + RDAC_MSGID_SERVICE_MODE, + RDAC_MSGID_UNAVAILABLE, + RDAC_MSGID_INQUIRY_FAILED, +}; + +#define _IDX(x) (RDAC_MSGID_##x - CHECKER_FIRST_MSGID) +const char *libcheck_msgtable[] = { + [_IDX(NOT_CONN)] = MSG_RDAC_DOWN_TYPE("lun not connected"), + [_IDX(IN_STARTUP)] = MSG_RDAC_DOWN_TYPE("ctlr is in startup sequence"), + [_IDX(NON_RESPONSIVE)] = + MSG_RDAC_DOWN_TYPE("non-responsive to queries"), + [_IDX(IN_RESET)] = MSG_RDAC_DOWN_TYPE("ctlr held in reset"), + [_IDX(FW_DOWNLOADING)] = + MSG_RDAC_DOWN_TYPE("ctlr firmware downloading"), + [_IDX(QUIESCED)] = MSG_RDAC_DOWN_TYPE("ctlr quiesced by admin request"), + [_IDX(SERVICE_MODE)] = MSG_RDAC_DOWN_TYPE("ctlr is in service mode"), + [_IDX(UNAVAILABLE)] = MSG_RDAC_DOWN_TYPE("ctlr is unavailable"), + [_IDX(INQUIRY_FAILED)] = MSG_RDAC_DOWN_TYPE("inquiry failed"), + NULL, +}; + +static int +checker_msg_string(const struct volume_access_inq *inq) +{ + /* lun not connected */ + if (((inq->PQ_PDT & 0xE0) == 0x20) || (inq->PQ_PDT & 0x7f)) + return RDAC_MSGID_NOT_CONN; + + /* if no tpg data is available, give the generic path down message */ + if (!(inq->avtcvp & 0x10)) + return CHECKER_MSGID_DOWN; + + /* controller is booting up */ + if (((inq->aas_cur & 0x0F) == RTPG_TRANSITIONING) && + (inq->aas_alt & 0x0F) != RTPG_TRANSITIONING) + return RDAC_MSGID_IN_STARTUP; + + /* if not unavailable, give generic message */ + if ((inq->aas_cur & 0x0F) != RTPG_UNAVAILABLE) + return CHECKER_MSGID_DOWN; + + /* target port group unavailable */ + switch (inq->vendor_specific_cur) { + case RTPG_UNAVAIL_NON_RESPONSIVE: + return RDAC_MSGID_NON_RESPONSIVE; + case RTPG_UNAVAIL_IN_RESET: + return RDAC_MSGID_IN_RESET; + case RTPG_UNAVAIL_CFW_DL1: + case RTPG_UNAVAIL_CFW_DL2: + return RDAC_MSGID_FW_DOWNLOADING; + case RTPG_UNAVAIL_QUIESCED: + return RDAC_MSGID_QUIESCED; + case RTPG_UNAVAIL_SERVICE_MODE: + return RDAC_MSGID_SERVICE_MODE; + default: + return RDAC_MSGID_UNAVAILABLE; + } +} + +int libcheck_check(struct checker * c) +{ + struct volume_access_inq inq; + int ret, inqfail; + + inqfail = 0; + memset(&inq, 0, sizeof(struct volume_access_inq)); + ret = do_inq(c->fd, 0xC9, &inq, sizeof(struct volume_access_inq), + c->timeout); + if (ret != PATH_UP) { + inqfail = 1; + goto done; + } + + if (((inq.PQ_PDT & 0xE0) == 0x20) || (inq.PQ_PDT & 0x7f)) { + /* LUN not connected*/ + ret = PATH_DOWN; + goto done; + } + + /* If TPGDE bit set, evaluate TPG information */ + if ((inq.avtcvp & 0x10)) { + switch (inq.aas_cur & 0x0F) { + /* Never use the path if it reports unavailable */ + case RTPG_UNAVAILABLE: + ret = PATH_DOWN; + goto done; + /* + * If both controllers report transitioning, it + * means mode select or STPG is being processed. + * + * If this controller alone is transitioning, it's + * booting and we shouldn't use it yet. + */ + case RTPG_TRANSITIONING: + if ((inq.aas_alt & 0xF) != RTPG_TRANSITIONING) { + ret = PATH_DOWN; + goto done; + } + break; + } + } + + /* If owner set or ioship mode is enabled return PATH_UP always */ + if ((inq.avtcvp & 0x1) || ((inq.avtcvp >> 5) & 0x1)) + ret = PATH_UP; + else + ret = PATH_GHOST; + +done: + switch (ret) { + case PATH_WILD: + c->msgid = CHECKER_MSGID_UNSUPPORTED; + break; + case PATH_DOWN: + c->msgid = (inqfail ? RDAC_MSGID_INQUIRY_FAILED : + checker_msg_string(&inq)); + break; + case PATH_UP: + c->msgid = CHECKER_MSGID_UP; + break; + case PATH_GHOST: + c->msgid = CHECKER_MSGID_GHOST; + break; + } + + return ret; +} diff --git a/libmultipath/checkers/rdac.h b/libmultipath/checkers/rdac.h new file mode 100644 index 0000000..d7bf812 --- /dev/null +++ b/libmultipath/checkers/rdac.h @@ -0,0 +1,8 @@ +#ifndef _RDAC_H +#define _RDAC_H + +int rdac(struct checker *); +int rdac_init(struct checker *); +void rdac_free(struct checker *); + +#endif /* _RDAC_H */ diff --git a/libmultipath/checkers/readsector0.c b/libmultipath/checkers/readsector0.c new file mode 100644 index 0000000..b041f11 --- /dev/null +++ b/libmultipath/checkers/readsector0.c @@ -0,0 +1,44 @@ +/* + * Copyright (c) 2004, 2005 Christophe Varoqui + */ +#include + +#include "checkers.h" +#include "libsg.h" + +struct readsector0_checker_context { + void * dummy; +}; + +int libcheck_init (__attribute__((unused)) struct checker * c) +{ + return 0; +} + +void libcheck_free (__attribute__((unused)) struct checker * c) +{ + return; +} + +int libcheck_check (struct checker * c) +{ + unsigned char buf[4096]; + unsigned char sbuf[SENSE_BUFF_LEN]; + int ret; + + ret = sg_read(c->fd, &buf[0], 4096, &sbuf[0], + SENSE_BUFF_LEN, c->timeout); + + switch (ret) + { + case PATH_DOWN: + c->msgid = CHECKER_MSGID_DOWN; + break; + case PATH_UP: + c->msgid = CHECKER_MSGID_UP; + break; + default: + break; + } + return ret; +} diff --git a/libmultipath/checkers/readsector0.h b/libmultipath/checkers/readsector0.h new file mode 100644 index 0000000..0f5d654 --- /dev/null +++ b/libmultipath/checkers/readsector0.h @@ -0,0 +1,8 @@ +#ifndef _READSECTOR0_H +#define _READSECTOR0_H + +int readsector0 (struct checker *); +int readsector0_init (struct checker *); +void readsector0_free (struct checker *); + +#endif /* _READSECTOR0_H */ diff --git a/libmultipath/checkers/tur.c b/libmultipath/checkers/tur.c new file mode 100644 index 0000000..e886fcf --- /dev/null +++ b/libmultipath/checkers/tur.c @@ -0,0 +1,429 @@ +/* + * Some code borrowed from sg-utils. + * + * Copyright (c) 2004 Christophe Varoqui + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "checkers.h" + +#include "../libmultipath/debug.h" +#include "../libmultipath/sg_include.h" +#include "../libmultipath/util.h" +#include "../libmultipath/time-util.h" +#include "../libmultipath/util.h" + +#define TUR_CMD_LEN 6 +#define HEAVY_CHECK_COUNT 10 + +enum { + MSG_TUR_RUNNING = CHECKER_FIRST_MSGID, + MSG_TUR_TIMEOUT, + MSG_TUR_FAILED, +}; + +#define _IDX(x) (MSG_ ## x - CHECKER_FIRST_MSGID) +const char *libcheck_msgtable[] = { + [_IDX(TUR_RUNNING)] = " still running", + [_IDX(TUR_TIMEOUT)] = " timed out", + [_IDX(TUR_FAILED)] = " failed to initialize", + NULL, +}; + +struct tur_checker_context { + dev_t devt; + int state; + int running; /* uatomic access only */ + int fd; + unsigned int timeout; + time_t time; + pthread_t thread; + pthread_mutex_t lock; + pthread_cond_t active; + int holders; /* uatomic access only */ + int msgid; +}; + +int libcheck_init (struct checker * c) +{ + struct tur_checker_context *ct; + struct stat sb; + + ct = malloc(sizeof(struct tur_checker_context)); + if (!ct) + return 1; + memset(ct, 0, sizeof(struct tur_checker_context)); + + ct->state = PATH_UNCHECKED; + ct->fd = -1; + uatomic_set(&ct->holders, 1); + pthread_cond_init_mono(&ct->active); + pthread_mutex_init(&ct->lock, NULL); + if (fstat(c->fd, &sb) == 0) + ct->devt = sb.st_rdev; + c->context = ct; + + return 0; +} + +static void cleanup_context(struct tur_checker_context *ct) +{ + pthread_mutex_destroy(&ct->lock); + pthread_cond_destroy(&ct->active); + free(ct); +} + +void libcheck_free (struct checker * c) +{ + if (c->context) { + struct tur_checker_context *ct = c->context; + int holders; + int running; + + running = uatomic_xchg(&ct->running, 0); + if (running) + pthread_cancel(ct->thread); + ct->thread = 0; + holders = uatomic_sub_return(&ct->holders, 1); + if (!holders) + cleanup_context(ct); + c->context = NULL; + } + return; +} + +static int +tur_check(int fd, unsigned int timeout, short *msgid) +{ + struct sg_io_hdr io_hdr; + unsigned char turCmdBlk[TUR_CMD_LEN] = { 0x00, 0, 0, 0, 0, 0 }; + unsigned char sense_buffer[32]; + int retry_tur = 5; + +retry: + memset(&io_hdr, 0, sizeof (struct sg_io_hdr)); + memset(&sense_buffer, 0, 32); + io_hdr.interface_id = 'S'; + io_hdr.cmd_len = sizeof (turCmdBlk); + io_hdr.mx_sb_len = sizeof (sense_buffer); + io_hdr.dxfer_direction = SG_DXFER_NONE; + io_hdr.cmdp = turCmdBlk; + io_hdr.sbp = sense_buffer; + io_hdr.timeout = timeout * 1000; + io_hdr.pack_id = 0; + if (ioctl(fd, SG_IO, &io_hdr) < 0) { + if (errno == ENOTTY) { + *msgid = CHECKER_MSGID_UNSUPPORTED; + return PATH_WILD; + } + *msgid = CHECKER_MSGID_DOWN; + return PATH_DOWN; + } + if ((io_hdr.status & 0x7e) == 0x18) { + /* + * SCSI-3 arrays might return + * reservation conflict on TUR + */ + *msgid = CHECKER_MSGID_UP; + return PATH_UP; + } + if (io_hdr.info & SG_INFO_OK_MASK) { + int key = 0, asc, ascq; + + switch (io_hdr.host_status) { + case DID_OK: + case DID_NO_CONNECT: + case DID_BAD_TARGET: + case DID_ABORT: + case DID_TRANSPORT_FAILFAST: + break; + default: + /* Driver error, retry */ + if (--retry_tur) + goto retry; + break; + } + if (io_hdr.sb_len_wr > 3) { + if (io_hdr.sbp[0] == 0x72 || io_hdr.sbp[0] == 0x73) { + key = io_hdr.sbp[1] & 0x0f; + asc = io_hdr.sbp[2]; + ascq = io_hdr.sbp[3]; + } else if (io_hdr.sb_len_wr > 13 && + ((io_hdr.sbp[0] & 0x7f) == 0x70 || + (io_hdr.sbp[0] & 0x7f) == 0x71)) { + key = io_hdr.sbp[2] & 0x0f; + asc = io_hdr.sbp[12]; + ascq = io_hdr.sbp[13]; + } + } + if (key == 0x6) { + /* Unit Attention, retry */ + if (--retry_tur) + goto retry; + } + else if (key == 0x2) { + /* Not Ready */ + /* Note: Other ALUA states are either UP or DOWN */ + if( asc == 0x04 && ascq == 0x0b){ + /* + * LOGICAL UNIT NOT ACCESSIBLE, + * TARGET PORT IN STANDBY STATE + */ + *msgid = CHECKER_MSGID_GHOST; + return PATH_GHOST; + } + } + *msgid = CHECKER_MSGID_DOWN; + return PATH_DOWN; + } + *msgid = CHECKER_MSGID_UP; + return PATH_UP; +} + +#define tur_thread_cleanup_push(ct) pthread_cleanup_push(cleanup_func, ct) +#define tur_thread_cleanup_pop(ct) pthread_cleanup_pop(1) + +static void cleanup_func(void *data) +{ + int holders; + struct tur_checker_context *ct = data; + + holders = uatomic_sub_return(&ct->holders, 1); + if (!holders) + cleanup_context(ct); + rcu_unregister_thread(); +} + +/* + * Test code for "zombie tur thread" handling. + * Compile e.g. with CFLAGS=-DTUR_TEST_MAJOR=8 + * Additional parameters can be configure with the macros below. + * + * Everty nth started TUR thread will hang in non-cancellable state + * for given number of seconds, for device given by major/minor. + */ +#ifdef TUR_TEST_MAJOR + +#ifndef TUR_TEST_MINOR +#define TUR_TEST_MINOR 0 +#endif +#ifndef TUR_SLEEP_INTERVAL +#define TUR_SLEEP_INTERVAL 3 +#endif +#ifndef TUR_SLEEP_SECS +#define TUR_SLEEP_SECS 60 +#endif + +static void tur_deep_sleep(const struct tur_checker_context *ct) +{ + static int sleep_cnt; + const struct timespec ts = { .tv_sec = TUR_SLEEP_SECS, .tv_nsec = 0 }; + int oldstate; + + if (ct->devt != makedev(TUR_TEST_MAJOR, TUR_TEST_MINOR) || + ++sleep_cnt % TUR_SLEEP_INTERVAL != 0) + return; + + condlog(1, "tur thread going to sleep for %ld seconds", ts.tv_sec); + if (pthread_setcancelstate(PTHREAD_CANCEL_DISABLE, &oldstate) != 0) + condlog(0, "pthread_setcancelstate: %m"); + if (nanosleep(&ts, NULL) != 0) + condlog(0, "nanosleep: %m"); + condlog(1, "tur zombie thread woke up"); + if (pthread_setcancelstate(oldstate, NULL) != 0) + condlog(0, "pthread_setcancelstate (2): %m"); + pthread_testcancel(); +} +#else +#define tur_deep_sleep(x) do {} while (0) +#endif /* TUR_TEST_MAJOR */ + +static void *tur_thread(void *ctx) +{ + struct tur_checker_context *ct = ctx; + int state, running; + short msgid; + + /* This thread can be canceled, so setup clean up */ + tur_thread_cleanup_push(ct); + rcu_register_thread(); + + condlog(4, "%d:%d : tur checker starting up", major(ct->devt), + minor(ct->devt)); + + tur_deep_sleep(ct); + state = tur_check(ct->fd, ct->timeout, &msgid); + pthread_testcancel(); + + /* TUR checker done */ + pthread_mutex_lock(&ct->lock); + ct->state = state; + ct->msgid = msgid; + pthread_cond_signal(&ct->active); + pthread_mutex_unlock(&ct->lock); + + condlog(4, "%d:%d : tur checker finished, state %s", major(ct->devt), + minor(ct->devt), checker_state_name(state)); + + running = uatomic_xchg(&ct->running, 0); + if (!running) + pause(); + + tur_thread_cleanup_pop(ct); + + return ((void *)0); +} + + +static void tur_timeout(struct timespec *tsp) +{ + get_monotonic_time(tsp); + tsp->tv_nsec += 1000 * 1000; /* 1 millisecond */ + normalize_timespec(tsp); +} + +static void tur_set_async_timeout(struct checker *c) +{ + struct tur_checker_context *ct = c->context; + struct timespec now; + + get_monotonic_time(&now); + ct->time = now.tv_sec + c->timeout; +} + +static int tur_check_async_timeout(struct checker *c) +{ + struct tur_checker_context *ct = c->context; + struct timespec now; + + get_monotonic_time(&now); + return (now.tv_sec > ct->time); +} + +int libcheck_check(struct checker * c) +{ + struct tur_checker_context *ct = c->context; + struct timespec tsp; + pthread_attr_t attr; + int tur_status, r; + + if (!ct) + return PATH_UNCHECKED; + + if (checker_is_sync(c)) + return tur_check(c->fd, c->timeout, &c->msgid); + + /* + * Async mode + */ + if (ct->thread) { + if (tur_check_async_timeout(c)) { + int running = uatomic_xchg(&ct->running, 0); + if (running) { + pthread_cancel(ct->thread); + condlog(3, "%d:%d : tur checker timeout", + major(ct->devt), minor(ct->devt)); + c->msgid = MSG_TUR_TIMEOUT; + tur_status = PATH_TIMEOUT; + } else { + pthread_mutex_lock(&ct->lock); + tur_status = ct->state; + c->msgid = ct->msgid; + pthread_mutex_unlock(&ct->lock); + } + ct->thread = 0; + } else if (uatomic_read(&ct->running) != 0) { + condlog(3, "%d:%d : tur checker not finished", + major(ct->devt), minor(ct->devt)); + tur_status = PATH_PENDING; + } else { + /* TUR checker done */ + ct->thread = 0; + pthread_mutex_lock(&ct->lock); + tur_status = ct->state; + c->msgid = ct->msgid; + pthread_mutex_unlock(&ct->lock); + } + } else { + if (uatomic_read(&ct->holders) > 1) { + /* + * The thread has been cancelled but hasn't quit. + * We have to prevent it from interfering with the new + * thread. We create a new context and leave the old + * one with the stale thread, hoping it will clean up + * eventually. + */ + condlog(3, "%d:%d : tur thread not responding", + major(ct->devt), minor(ct->devt)); + + /* + * libcheck_init will replace c->context. + * It fails only in OOM situations. In this case, return + * PATH_UNCHECKED to avoid prematurely failing the path. + */ + if (libcheck_init(c) != 0) + return PATH_UNCHECKED; + + if (!uatomic_sub_return(&ct->holders, 1)) + /* It did terminate, eventually */ + cleanup_context(ct); + + ct = c->context; + } + /* Start new TUR checker */ + pthread_mutex_lock(&ct->lock); + tur_status = ct->state = PATH_PENDING; + ct->msgid = CHECKER_MSGID_NONE; + pthread_mutex_unlock(&ct->lock); + ct->fd = c->fd; + ct->timeout = c->timeout; + uatomic_add(&ct->holders, 1); + uatomic_set(&ct->running, 1); + tur_set_async_timeout(c); + setup_thread_attr(&attr, 32 * 1024, 1); + r = pthread_create(&ct->thread, &attr, tur_thread, ct); + pthread_attr_destroy(&attr); + if (r) { + uatomic_sub(&ct->holders, 1); + uatomic_set(&ct->running, 0); + ct->thread = 0; + condlog(3, "%d:%d : failed to start tur thread, using" + " sync mode", major(ct->devt), minor(ct->devt)); + return tur_check(c->fd, c->timeout, &c->msgid); + } + tur_timeout(&tsp); + pthread_mutex_lock(&ct->lock); + if (ct->state == PATH_PENDING) + r = pthread_cond_timedwait(&ct->active, &ct->lock, + &tsp); + if (!r) { + tur_status = ct->state; + c->msgid = ct->msgid; + } + pthread_mutex_unlock(&ct->lock); + if (tur_status == PATH_PENDING) { + condlog(4, "%d:%d : tur checker still running", + major(ct->devt), minor(ct->devt)); + } else { + int running = uatomic_xchg(&ct->running, 0); + if (running) + pthread_cancel(ct->thread); + ct->thread = 0; + } + } + + return tur_status; +} diff --git a/libmultipath/checkers/tur.h b/libmultipath/checkers/tur.h new file mode 100644 index 0000000..a2e8c88 --- /dev/null +++ b/libmultipath/checkers/tur.h @@ -0,0 +1,8 @@ +#ifndef _TUR_H +#define _TUR_H + +int tur (struct checker *); +int tur_init (struct checker *); +void tur_free (struct checker *); + +#endif /* _TUR_H */ diff --git a/libmultipath/config.c b/libmultipath/config.c new file mode 100644 index 0000000..b4d8768 --- /dev/null +++ b/libmultipath/config.c @@ -0,0 +1,947 @@ +/* + * Copyright (c) 2004, 2005 Christophe Varoqui + * Copyright (c) 2005 Benjamin Marzinski, Redhat + * Copyright (c) 2005 Edward Goggin, EMC + */ +#include +#include +#include +#include +#include +#include + +#include "checkers.h" +#include "memory.h" +#include "util.h" +#include "debug.h" +#include "parser.h" +#include "dict.h" +#include "hwtable.h" +#include "vector.h" +#include "structs.h" +#include "config.h" +#include "blacklist.h" +#include "defaults.h" +#include "prio.h" +#include "devmapper.h" +#include "mpath_cmd.h" +#include "propsel.h" + +static int +hwe_strmatch (const struct hwentry *hwe1, const struct hwentry *hwe2) +{ + if ((hwe2->vendor && !hwe1->vendor) || + (hwe1->vendor && (!hwe2->vendor || + strcmp(hwe1->vendor, hwe2->vendor)))) + return 1; + + if ((hwe2->product && !hwe1->product) || + (hwe1->product && (!hwe2->product || + strcmp(hwe1->product, hwe2->product)))) + return 1; + + if ((hwe2->revision && !hwe1->revision) || + (hwe1->revision && (!hwe2->revision || + strcmp(hwe1->revision, hwe2->revision)))) + return 1; + + return 0; +} + +static struct hwentry * +find_hwe_strmatch (const struct _vector *hwtable, const struct hwentry *hwe) +{ + int i; + struct hwentry *tmp, *ret = NULL; + + vector_foreach_slot (hwtable, tmp, i) { + if (hwe_strmatch(tmp, hwe)) + continue; + ret = tmp; + break; + } + return ret; +} + +static int +hwe_regmatch (const struct hwentry *hwe1, const char *vendor, + const char *product, const char *revision) +{ + regex_t vre, pre, rre; + int retval = 1; + + if (hwe1->vendor && + regcomp(&vre, hwe1->vendor, REG_EXTENDED|REG_NOSUB)) + goto out; + + if (hwe1->product && + regcomp(&pre, hwe1->product, REG_EXTENDED|REG_NOSUB)) + goto out_vre; + + if (hwe1->revision && + regcomp(&rre, hwe1->revision, REG_EXTENDED|REG_NOSUB)) + goto out_pre; + + if ((vendor || product || revision) && + (!hwe1->vendor || !vendor || + !regexec(&vre, vendor, 0, NULL, 0)) && + (!hwe1->product || !product || + !regexec(&pre, product, 0, NULL, 0)) && + (!hwe1->revision || !revision || + !regexec(&rre, revision, 0, NULL, 0))) + retval = 0; + + if (hwe1->revision) + regfree(&rre); +out_pre: + if (hwe1->product) + regfree(&pre); +out_vre: + if (hwe1->vendor) + regfree(&vre); +out: + return retval; +} + +static void _log_match(const char *fn, const struct hwentry *h, + const char *vendor, const char *product, + const char *revision) +{ + condlog(4, "%s: found match /%s:%s:%s/ for '%s:%s:%s'", fn, + h->vendor, h->product, h->revision, + vendor, product, revision); +} +#define log_match(h, v, p, r) _log_match(__func__, (h), (v), (p), (r)) + +int +find_hwe (const struct _vector *hwtable, + const char * vendor, const char * product, const char * revision, + vector result) +{ + int i, n = 0; + struct hwentry *tmp; + + /* + * Search backwards here, and add forward. + * User modified entries are attached at the end of + * the list, so we have to check them first before + * continuing to the generic entries + */ + vector_reset(result); + vector_foreach_slot_backwards (hwtable, tmp, i) { + if (hwe_regmatch(tmp, vendor, product, revision)) + continue; + if (vector_alloc_slot(result) != NULL) { + vector_set_slot(result, tmp); + n++; + } + log_match(tmp, vendor, product, revision); + } + condlog(n > 1 ? 3 : 4, "%s: found %d hwtable matches for %s:%s:%s", + __func__, n, vendor, product, revision); + return n; +} + +struct mpentry *find_mpe(vector mptable, char *wwid) +{ + int i; + struct mpentry * mpe; + + if (!wwid) + return NULL; + + vector_foreach_slot (mptable, mpe, i) + if (mpe->wwid && !strcmp(mpe->wwid, wwid)) + return mpe; + + return NULL; +} + +char *get_mpe_wwid(vector mptable, char *alias) +{ + int i; + struct mpentry * mpe; + + if (!alias) + return NULL; + + vector_foreach_slot (mptable, mpe, i) + if (mpe->alias && strcmp(mpe->alias, alias) == 0) + return mpe->wwid; + + return NULL; +} + +void +free_hwe (struct hwentry * hwe) +{ + if (!hwe) + return; + + if (hwe->vendor) + FREE(hwe->vendor); + + if (hwe->product) + FREE(hwe->product); + + if (hwe->revision) + FREE(hwe->revision); + + if (hwe->getuid) + FREE(hwe->getuid); + + if (hwe->uid_attribute) + FREE(hwe->uid_attribute); + + if (hwe->features) + FREE(hwe->features); + + if (hwe->hwhandler) + FREE(hwe->hwhandler); + + if (hwe->selector) + FREE(hwe->selector); + + if (hwe->checker_name) + FREE(hwe->checker_name); + + if (hwe->prio_name) + FREE(hwe->prio_name); + + if (hwe->prio_args) + FREE(hwe->prio_args); + + if (hwe->alias_prefix) + FREE(hwe->alias_prefix); + + if (hwe->bl_product) + FREE(hwe->bl_product); + + FREE(hwe); +} + +void +free_hwtable (vector hwtable) +{ + int i; + struct hwentry * hwe; + + if (!hwtable) + return; + + vector_foreach_slot (hwtable, hwe, i) + free_hwe(hwe); + + vector_free(hwtable); +} + +void +free_mpe (struct mpentry * mpe) +{ + if (!mpe) + return; + + if (mpe->wwid) + FREE(mpe->wwid); + + if (mpe->selector) + FREE(mpe->selector); + + if (mpe->getuid) + FREE(mpe->getuid); + + if (mpe->uid_attribute) + FREE(mpe->uid_attribute); + + if (mpe->alias) + FREE(mpe->alias); + + if (mpe->prio_name) + FREE(mpe->prio_name); + + if (mpe->prio_args) + FREE(mpe->prio_args); + + FREE(mpe); +} + +void +free_mptable (vector mptable) +{ + int i; + struct mpentry * mpe; + + if (!mptable) + return; + + vector_foreach_slot (mptable, mpe, i) + free_mpe(mpe); + + vector_free(mptable); +} + +struct mpentry * +alloc_mpe (void) +{ + struct mpentry * mpe = (struct mpentry *) + MALLOC(sizeof(struct mpentry)); + + return mpe; +} + +struct hwentry * +alloc_hwe (void) +{ + struct hwentry * hwe = (struct hwentry *) + MALLOC(sizeof(struct hwentry)); + + return hwe; +} + +static char * +set_param_str(const char * str) +{ + char * dst; + int len; + + if (!str) + return NULL; + + len = strlen(str); + + if (!len) + return NULL; + + dst = (char *)MALLOC(len + 1); + + if (!dst) + return NULL; + + strcpy(dst, str); + return dst; +} + +#define merge_str(s) \ + if (!dst->s && src->s) { \ + if (!(dst->s = set_param_str(src->s))) \ + return 1; \ + } + +#define merge_num(s) \ + if (!dst->s && src->s) \ + dst->s = src->s + + +static int +merge_hwe (struct hwentry * dst, struct hwentry * src) +{ + char id[SCSI_VENDOR_SIZE+PATH_PRODUCT_SIZE]; + merge_str(vendor); + merge_str(product); + merge_str(revision); + merge_str(getuid); + merge_str(uid_attribute); + merge_str(features); + merge_str(hwhandler); + merge_str(selector); + merge_str(checker_name); + merge_str(prio_name); + merge_str(prio_args); + merge_str(alias_prefix); + merge_str(bl_product); + merge_num(pgpolicy); + merge_num(pgfailback); + merge_num(rr_weight); + merge_num(no_path_retry); + merge_num(minio); + merge_num(minio_rq); + merge_num(flush_on_last_del); + merge_num(fast_io_fail); + merge_num(dev_loss); + merge_num(user_friendly_names); + merge_num(retain_hwhandler); + merge_num(detect_prio); + merge_num(detect_checker); + merge_num(deferred_remove); + merge_num(delay_watch_checks); + merge_num(delay_wait_checks); + merge_num(skip_kpartx); + merge_num(max_sectors_kb); + merge_num(ghost_delay); + merge_num(all_tg_pt); + merge_num(vpd_vendor_id); + merge_num(san_path_err_threshold); + merge_num(san_path_err_forget_rate); + merge_num(san_path_err_recovery_time); + merge_num(marginal_path_err_sample_time); + merge_num(marginal_path_err_rate_threshold); + merge_num(marginal_path_err_recheck_gap_time); + merge_num(marginal_path_double_failed_time); + + snprintf(id, sizeof(id), "%s/%s", dst->vendor, dst->product); + reconcile_features_with_options(id, &dst->features, + &dst->no_path_retry, + &dst->retain_hwhandler); + return 0; +} + +static int +merge_mpe(struct mpentry *dst, struct mpentry *src) +{ + if (!dst || !src) + return 1; + + merge_str(alias); + merge_str(uid_attribute); + merge_str(getuid); + merge_str(selector); + merge_str(features); + merge_str(prio_name); + merge_str(prio_args); + + if (dst->prkey_source == PRKEY_SOURCE_NONE && + src->prkey_source != PRKEY_SOURCE_NONE) { + dst->prkey_source = src->prkey_source; + dst->sa_flags = src->sa_flags; + memcpy(&dst->reservation_key, &src->reservation_key, + sizeof(dst->reservation_key)); + } + + merge_num(pgpolicy); + merge_num(pgfailback); + merge_num(rr_weight); + merge_num(no_path_retry); + merge_num(minio); + merge_num(minio_rq); + merge_num(flush_on_last_del); + merge_num(attribute_flags); + merge_num(user_friendly_names); + merge_num(deferred_remove); + merge_num(delay_watch_checks); + merge_num(delay_wait_checks); + merge_num(san_path_err_threshold); + merge_num(san_path_err_forget_rate); + merge_num(san_path_err_recovery_time); + merge_num(marginal_path_err_sample_time); + merge_num(marginal_path_err_rate_threshold); + merge_num(marginal_path_err_recheck_gap_time); + merge_num(marginal_path_double_failed_time); + merge_num(skip_kpartx); + merge_num(max_sectors_kb); + merge_num(ghost_delay); + merge_num(uid); + merge_num(gid); + merge_num(mode); + + return 0; +} + +void merge_mptable(vector mptable) +{ + struct mpentry *mp1, *mp2; + int i, j; + + vector_foreach_slot(mptable, mp1, i) { + j = i + 1; + vector_foreach_slot_after(mptable, mp2, j) { + if (strcmp(mp1->wwid, mp2->wwid)) + continue; + condlog(1, "%s: duplicate multipath config section for %s", + __func__, mp1->wwid); + merge_mpe(mp2, mp1); + free_mpe(mp1); + vector_del_slot(mptable, i); + i--; + break; + } + } +} + +int +store_hwe (vector hwtable, struct hwentry * dhwe) +{ + struct hwentry * hwe; + + if (find_hwe_strmatch(hwtable, dhwe)) + return 0; + + if (!(hwe = alloc_hwe())) + return 1; + + if (!dhwe->vendor || !(hwe->vendor = set_param_str(dhwe->vendor))) + goto out; + + if (!dhwe->product || !(hwe->product = set_param_str(dhwe->product))) + goto out; + + if (dhwe->revision && !(hwe->revision = set_param_str(dhwe->revision))) + goto out; + + if (dhwe->uid_attribute && !(hwe->uid_attribute = set_param_str(dhwe->uid_attribute))) + goto out; + + if (dhwe->getuid && !(hwe->getuid = set_param_str(dhwe->getuid))) + goto out; + + if (dhwe->features && !(hwe->features = set_param_str(dhwe->features))) + goto out; + + if (dhwe->hwhandler && !(hwe->hwhandler = set_param_str(dhwe->hwhandler))) + goto out; + + if (dhwe->selector && !(hwe->selector = set_param_str(dhwe->selector))) + goto out; + + if (dhwe->checker_name && !(hwe->checker_name = set_param_str(dhwe->checker_name))) + goto out; + + if (dhwe->prio_name && !(hwe->prio_name = set_param_str(dhwe->prio_name))) + goto out; + + if (dhwe->prio_args && !(hwe->prio_args = set_param_str(dhwe->prio_args))) + goto out; + + if (dhwe->alias_prefix && !(hwe->alias_prefix = set_param_str(dhwe->alias_prefix))) + goto out; + + hwe->pgpolicy = dhwe->pgpolicy; + hwe->pgfailback = dhwe->pgfailback; + hwe->rr_weight = dhwe->rr_weight; + hwe->no_path_retry = dhwe->no_path_retry; + hwe->minio = dhwe->minio; + hwe->minio_rq = dhwe->minio_rq; + hwe->flush_on_last_del = dhwe->flush_on_last_del; + hwe->fast_io_fail = dhwe->fast_io_fail; + hwe->dev_loss = dhwe->dev_loss; + hwe->user_friendly_names = dhwe->user_friendly_names; + hwe->retain_hwhandler = dhwe->retain_hwhandler; + hwe->detect_prio = dhwe->detect_prio; + hwe->detect_checker = dhwe->detect_checker; + hwe->ghost_delay = dhwe->ghost_delay; + hwe->vpd_vendor_id = dhwe->vpd_vendor_id; + + if (dhwe->bl_product && !(hwe->bl_product = set_param_str(dhwe->bl_product))) + goto out; + + if (!vector_alloc_slot(hwtable)) + goto out; + + vector_set_slot(hwtable, hwe); + return 0; +out: + free_hwe(hwe); + return 1; +} + +static void +factorize_hwtable (vector hw, int n, const char *table_desc) +{ + struct hwentry *hwe1, *hwe2; + int i, j; + +restart: + vector_foreach_slot(hw, hwe1, i) { + /* drop invalid device configs */ + if (i >= n && (!hwe1->vendor || !hwe1->product)) { + condlog(0, "device config in %s missing vendor or product parameter", + table_desc); + vector_del_slot(hw, i--); + free_hwe(hwe1); + continue; + } + j = n > i + 1 ? n : i + 1; + vector_foreach_slot_after(hw, hwe2, j) { + if (hwe_strmatch(hwe2, hwe1) == 0) { + condlog(i >= n ? 1 : 3, + "%s: duplicate device section for %s:%s:%s in %s", + __func__, hwe1->vendor, hwe1->product, + hwe1->revision, table_desc); + vector_del_slot(hw, i); + merge_hwe(hwe2, hwe1); + free_hwe(hwe1); + if (i < n) + n -= 1; + /* + * Play safe here; we have modified + * the original vector so the outer + * vector_foreach_slot() might + * become confused. + */ + goto restart; + } + } + } + return; +} + +struct config * +alloc_config (void) +{ + return (struct config *)MALLOC(sizeof(struct config)); +} + +void +free_config (struct config * conf) +{ + if (!conf) + return; + + if (conf->multipath_dir) + FREE(conf->multipath_dir); + + if (conf->selector) + FREE(conf->selector); + + if (conf->uid_attribute) + FREE(conf->uid_attribute); + + vector_reset(&conf->uid_attrs); + + if (conf->getuid) + FREE(conf->getuid); + + if (conf->features) + FREE(conf->features); + + if (conf->hwhandler) + FREE(conf->hwhandler); + + if (conf->bindings_file) + FREE(conf->bindings_file); + + if (conf->wwids_file) + FREE(conf->wwids_file); + + if (conf->prkeys_file) + FREE(conf->prkeys_file); + + if (conf->prio_name) + FREE(conf->prio_name); + + if (conf->alias_prefix) + FREE(conf->alias_prefix); + if (conf->partition_delim) + FREE(conf->partition_delim); + + if (conf->prio_args) + FREE(conf->prio_args); + + if (conf->checker_name) + FREE(conf->checker_name); + + if (conf->config_dir) + FREE(conf->config_dir); + + free_blacklist(conf->blist_devnode); + free_blacklist(conf->blist_wwid); + free_blacklist(conf->blist_property); + free_blacklist(conf->blist_protocol); + free_blacklist_device(conf->blist_device); + + free_blacklist(conf->elist_devnode); + free_blacklist(conf->elist_wwid); + free_blacklist(conf->elist_property); + free_blacklist(conf->elist_protocol); + free_blacklist_device(conf->elist_device); + + free_mptable(conf->mptable); + free_hwtable(conf->hwtable); + free_hwe(conf->overrides); + free_keywords(conf->keywords); + FREE(conf); +} + +/* if multipath fails to process the config directory, it should continue, + * with just a warning message */ +static void +process_config_dir(struct config *conf, char *dir) +{ + struct dirent **namelist; + struct scandir_result sr; + int i, n; + char path[LINE_MAX]; + int old_hwtable_size; + + if (dir[0] != '/') { + condlog(1, "config_dir '%s' must be a fully qualified path", + dir); + return; + } + n = scandir(dir, &namelist, NULL, alphasort); + if (n < 0) { + if (errno == ENOENT) + condlog(3, "No configuration dir '%s'", dir); + else + condlog(0, "couldn't open configuration dir '%s': %s", + dir, strerror(errno)); + return; + } else if (n == 0) + return; + sr.di = namelist; + sr.n = n; + pthread_cleanup_push_cast(free_scandir_result, &sr); + for (i = 0; i < n; i++) { + char *ext = strrchr(namelist[i]->d_name, '.'); + + if (!ext || strcmp(ext, ".conf")) + continue; + + old_hwtable_size = VECTOR_SIZE(conf->hwtable); + snprintf(path, LINE_MAX, "%s/%s", dir, namelist[i]->d_name); + path[LINE_MAX-1] = '\0'; + process_file(conf, path); + factorize_hwtable(conf->hwtable, old_hwtable_size, + namelist[i]->d_name); + } + pthread_cleanup_pop(1); +} + +static void set_max_checkint_from_watchdog(struct config *conf) +{ +#ifdef USE_SYSTEMD + char *envp = getenv("WATCHDOG_USEC"); + unsigned long checkint; + + if (envp && sscanf(envp, "%lu", &checkint) == 1) { + /* Value is in microseconds */ + checkint /= 1000000; + if (checkint < 1 || checkint > UINT_MAX) { + condlog(1, "invalid value for WatchdogSec: \"%s\"", envp); + return; + } + if (conf->max_checkint == 0 || conf->max_checkint > checkint) + conf->max_checkint = checkint; + condlog(3, "enabling watchdog, interval %ld", checkint); + conf->use_watchdog = true; + } +#endif +} + +struct config * +load_config (char * file) +{ + struct config *conf = alloc_config(); + + if (!conf) + return NULL; + + /* + * internal defaults + */ + conf->verbosity = DEFAULT_VERBOSITY; + + get_sys_max_fds(&conf->max_fds); + conf->bindings_file = set_default(DEFAULT_BINDINGS_FILE); + conf->wwids_file = set_default(DEFAULT_WWIDS_FILE); + conf->prkeys_file = set_default(DEFAULT_PRKEYS_FILE); + conf->multipath_dir = set_default(DEFAULT_MULTIPATHDIR); + conf->attribute_flags = 0; + conf->reassign_maps = DEFAULT_REASSIGN_MAPS; + conf->checkint = CHECKINT_UNDEF; + conf->use_watchdog = false; + conf->max_checkint = 0; + conf->force_sync = DEFAULT_FORCE_SYNC; + conf->partition_delim = (default_partition_delim != NULL ? + strdup(default_partition_delim) : NULL); + conf->processed_main_config = 0; + conf->find_multipaths = DEFAULT_FIND_MULTIPATHS; + conf->uxsock_timeout = DEFAULT_REPLY_TIMEOUT; + conf->retrigger_tries = DEFAULT_RETRIGGER_TRIES; + conf->retrigger_delay = DEFAULT_RETRIGGER_DELAY; + conf->uev_wait_timeout = DEFAULT_UEV_WAIT_TIMEOUT; + conf->remove_retries = 0; + conf->ghost_delay = DEFAULT_GHOST_DELAY; + conf->all_tg_pt = DEFAULT_ALL_TG_PT; + /* + * preload default hwtable + */ + conf->hwtable = vector_alloc(); + if (!conf->hwtable) + goto out; + if (setup_default_hwtable(conf->hwtable)) + goto out; + +#ifdef CHECK_BUILTIN_HWTABLE + factorize_hwtable(conf->hwtable, 0, "builtin"); +#endif + /* + * read the config file + */ + conf->keywords = vector_alloc(); + init_keywords(conf->keywords); + if (filepresent(file)) { + int builtin_hwtable_size; + + builtin_hwtable_size = VECTOR_SIZE(conf->hwtable); + if (process_file(conf, file)) { + condlog(0, "error parsing config file"); + goto out; + } + factorize_hwtable(conf->hwtable, builtin_hwtable_size, file); + } + + conf->processed_main_config = 1; + if (conf->config_dir == NULL) + conf->config_dir = set_default(DEFAULT_CONFIG_DIR); + if (conf->config_dir && conf->config_dir[0] != '\0') + process_config_dir(conf, conf->config_dir); + + /* + * fill the voids left in the config file + */ + set_max_checkint_from_watchdog(conf); + if (conf->max_checkint == 0) { + if (conf->checkint == CHECKINT_UNDEF) + conf->checkint = DEFAULT_CHECKINT; + conf->max_checkint = (conf->checkint < UINT_MAX / 4 ? + conf->checkint * 4 : UINT_MAX); + } else if (conf->checkint == CHECKINT_UNDEF) + conf->checkint = (conf->max_checkint >= 4 ? + conf->max_checkint / 4 : 1); + else if (conf->checkint > conf->max_checkint) + conf->checkint = conf->max_checkint; + condlog(3, "polling interval: %d, max: %d", + conf->checkint, conf->max_checkint); + + if (conf->blist_devnode == NULL) { + conf->blist_devnode = vector_alloc(); + + if (!conf->blist_devnode) + goto out; + } + if (conf->blist_wwid == NULL) { + conf->blist_wwid = vector_alloc(); + + if (!conf->blist_wwid) + goto out; + } + if (conf->blist_device == NULL) { + conf->blist_device = vector_alloc(); + + if (!conf->blist_device) + goto out; + } + if (conf->blist_property == NULL) { + conf->blist_property = vector_alloc(); + + if (!conf->blist_property) + goto out; + } + if (conf->blist_protocol == NULL) { + conf->blist_protocol = vector_alloc(); + + if (!conf->blist_protocol) + goto out; + } + + if (conf->elist_devnode == NULL) { + conf->elist_devnode = vector_alloc(); + + if (!conf->elist_devnode) + goto out; + } + if (conf->elist_wwid == NULL) { + conf->elist_wwid = vector_alloc(); + + if (!conf->elist_wwid) + goto out; + } + + if (conf->elist_device == NULL) { + conf->elist_device = vector_alloc(); + + if (!conf->elist_device) + goto out; + } + + if (conf->elist_property == NULL) { + conf->elist_property = vector_alloc(); + + if (!conf->elist_property) + goto out; + } + if (conf->elist_protocol == NULL) { + conf->elist_protocol = vector_alloc(); + + if (!conf->elist_protocol) + goto out; + } + + if (setup_default_blist(conf)) + goto out; + + if (conf->mptable == NULL) { + conf->mptable = vector_alloc(); + if (!conf->mptable) + goto out; + } + + merge_mptable(conf->mptable); + merge_blacklist(conf->blist_devnode); + merge_blacklist(conf->blist_property); + merge_blacklist(conf->blist_wwid); + merge_blacklist_device(conf->blist_device); + merge_blacklist(conf->elist_devnode); + merge_blacklist(conf->elist_property); + merge_blacklist(conf->elist_wwid); + merge_blacklist_device(conf->elist_device); + + if (conf->bindings_file == NULL) + conf->bindings_file = set_default(DEFAULT_BINDINGS_FILE); + + if (!conf->multipath_dir || !conf->bindings_file || + !conf->wwids_file || !conf->prkeys_file) + goto out; + + return conf; +out: + free_config(conf); + return NULL; +} + +char *get_uid_attribute_by_attrs(struct config *conf, + const char *path_dev) +{ + vector uid_attrs = &conf->uid_attrs; + int j; + char *att, *col; + + vector_foreach_slot(uid_attrs, att, j) { + col = strrchr(att, ':'); + if (!col) + continue; + if (!strncmp(path_dev, att, col - att)) + return col + 1; + } + return NULL; +} + +int parse_uid_attrs(char *uid_attrs, struct config *conf) +{ + vector attrs = &conf->uid_attrs; + char *uid_attr_record, *tmp; + int ret = 0, count; + + if (!uid_attrs) + return 1; + + count = get_word(uid_attrs, &uid_attr_record); + while (uid_attr_record) { + tmp = strchr(uid_attr_record, ':'); + if (!tmp) { + condlog(2, "invalid record in uid_attrs: %s", + uid_attr_record); + free(uid_attr_record); + ret = 1; + } else if (!vector_alloc_slot(attrs)) { + free(uid_attr_record); + ret = 1; + } else + vector_set_slot(attrs, uid_attr_record); + if (!count) + break; + uid_attrs += count; + count = get_word(uid_attrs, &uid_attr_record); + } + return ret; +} diff --git a/libmultipath/config.h b/libmultipath/config.h new file mode 100644 index 0000000..ceecff2 --- /dev/null +++ b/libmultipath/config.h @@ -0,0 +1,262 @@ +#ifndef _CONFIG_H +#define _CONFIG_H + +#include +#include +#include +#include +#include "byteorder.h" + +#define ORIGIN_DEFAULT 0 +#define ORIGIN_CONFIG 1 + +/* + * In kernel, fast_io_fail == 0 means immediate failure on rport delete. + * OTOH '0' means not-configured in various places in multipath-tools. + */ +#define MP_FAST_IO_FAIL_UNSET (0) +#define MP_FAST_IO_FAIL_OFF (-1) +#define MP_FAST_IO_FAIL_ZERO (-2) + +enum devtypes { + DEV_NONE, + DEV_DEVT, + DEV_DEVNODE, + DEV_DEVMAP, + DEV_UEVENT +}; + +enum mpath_cmds { + CMD_NONE, + CMD_CREATE, + CMD_DRY_RUN, + CMD_LIST_SHORT, + CMD_LIST_LONG, + CMD_VALID_PATH, + CMD_REMOVE_WWID, + CMD_RESET_WWIDS, + CMD_ADD_WWID, + CMD_USABLE_PATHS, + CMD_DUMP_CONFIG, +}; + +enum force_reload_types { + FORCE_RELOAD_NONE, + FORCE_RELOAD_YES, + FORCE_RELOAD_WEAK, +}; + +struct hwentry { + char * vendor; + char * product; + char * revision; + char * uid_attribute; + char * getuid; + char * features; + char * hwhandler; + char * selector; + char * checker_name; + char * prio_name; + char * prio_args; + char * alias_prefix; + + int pgpolicy; + int pgfailback; + int rr_weight; + int no_path_retry; + int minio; + int minio_rq; + int flush_on_last_del; + int fast_io_fail; + unsigned int dev_loss; + int user_friendly_names; + int retain_hwhandler; + int detect_prio; + int detect_checker; + int deferred_remove; + int delay_watch_checks; + int delay_wait_checks; + int san_path_err_threshold; + int san_path_err_forget_rate; + int san_path_err_recovery_time; + int marginal_path_err_sample_time; + int marginal_path_err_rate_threshold; + int marginal_path_err_recheck_gap_time; + int marginal_path_double_failed_time; + int skip_kpartx; + int max_sectors_kb; + int ghost_delay; + int all_tg_pt; + int vpd_vendor_id; + char * bl_product; +}; + +struct mpentry { + char * wwid; + char * alias; + char * uid_attribute; + char * getuid; + char * selector; + char * features; + + char * prio_name; + char * prio_args; + int prkey_source; + struct be64 reservation_key; + uint8_t sa_flags; + int pgpolicy; + int pgfailback; + int rr_weight; + int no_path_retry; + int minio; + int minio_rq; + int flush_on_last_del; + int attribute_flags; + int user_friendly_names; + int deferred_remove; + int delay_watch_checks; + int delay_wait_checks; + int san_path_err_threshold; + int san_path_err_forget_rate; + int san_path_err_recovery_time; + int marginal_path_err_sample_time; + int marginal_path_err_rate_threshold; + int marginal_path_err_recheck_gap_time; + int marginal_path_double_failed_time; + int skip_kpartx; + int max_sectors_kb; + int ghost_delay; + uid_t uid; + gid_t gid; + mode_t mode; +}; + +struct config { + struct rcu_head rcu; + int verbosity; + int pgpolicy_flag; + int pgpolicy; + int minio; + int minio_rq; + unsigned int checkint; + unsigned int max_checkint; + bool use_watchdog; + int pgfailback; + int remove; + int rr_weight; + int no_path_retry; + int user_friendly_names; + int bindings_read_only; + int max_fds; + int force_reload; + int queue_without_daemon; + int checker_timeout; + int flush_on_last_del; + int attribute_flags; + int fast_io_fail; + unsigned int dev_loss; + int log_checker_err; + int allow_queueing; + int find_multipaths; + uid_t uid; + gid_t gid; + mode_t mode; + int reassign_maps; + int retain_hwhandler; + int detect_prio; + int detect_checker; + int force_sync; + int deferred_remove; + int processed_main_config; + int delay_watch_checks; + int delay_wait_checks; + int san_path_err_threshold; + int san_path_err_forget_rate; + int san_path_err_recovery_time; + int marginal_path_err_sample_time; + int marginal_path_err_rate_threshold; + int marginal_path_err_recheck_gap_time; + int marginal_path_double_failed_time; + int uxsock_timeout; + int strict_timing; + int retrigger_tries; + int retrigger_delay; + int delayed_reconfig; + int uev_wait_timeout; + int skip_kpartx; + int remove_retries; + int max_sectors_kb; + int ghost_delay; + int find_multipaths_timeout; + int marginal_pathgroups; + unsigned int version[3]; + unsigned int sequence_nr; + + char * multipath_dir; + char * selector; + struct _vector uid_attrs; + char * uid_attribute; + char * getuid; + char * features; + char * hwhandler; + char * bindings_file; + char * wwids_file; + char * prkeys_file; + char * prio_name; + char * prio_args; + char * checker_name; + char * alias_prefix; + char * partition_delim; + char * config_dir; + int prkey_source; + int all_tg_pt; + struct be64 reservation_key; + uint8_t sa_flags; + + vector keywords; + vector mptable; + vector hwtable; + struct hwentry *overrides; + + vector blist_devnode; + vector blist_wwid; + vector blist_device; + vector blist_property; + vector blist_protocol; + vector elist_devnode; + vector elist_wwid; + vector elist_device; + vector elist_property; + vector elist_protocol; + char *enable_foreign; +}; + +extern struct udev * udev; + +int find_hwe (const struct _vector *hwtable, + const char * vendor, const char * product, const char *revision, + vector result); +struct mpentry * find_mpe (vector mptable, char * wwid); +char * get_mpe_wwid (vector mptable, char * alias); + +struct hwentry * alloc_hwe (void); +struct mpentry * alloc_mpe (void); + +void free_hwe (struct hwentry * hwe); +void free_hwtable (vector hwtable); +void free_mpe (struct mpentry * mpe); +void free_mptable (vector mptable); + +int store_hwe (vector hwtable, struct hwentry *); + +struct config *load_config (char * file); +struct config * alloc_config (void); +void free_config (struct config * conf); +extern struct config *get_multipath_config(void); +extern void put_multipath_config(void *); + +int parse_uid_attrs(char *uid_attrs, struct config *conf); +char *get_uid_attribute_by_attrs(struct config *conf, + const char *path_dev); + +#endif diff --git a/libmultipath/configure.c b/libmultipath/configure.c new file mode 100644 index 0000000..c95848a --- /dev/null +++ b/libmultipath/configure.c @@ -0,0 +1,1557 @@ +/* + * Copyright (c) 2003, 2004, 2005 Christophe Varoqui + * Copyright (c) 2005 Benjamin Marzinski, Redhat + * Copyright (c) 2005 Kiyoshi Ueda, NEC + * Copyright (c) 2005 Patrick Caulfield, Redhat + * Copyright (c) 2005 Edward Goggin, EMC + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "mpath_cmd.h" + +#include "checkers.h" +#include "vector.h" +#include "memory.h" +#include "devmapper.h" +#include "defaults.h" +#include "structs.h" +#include "structs_vec.h" +#include "dmparser.h" +#include "config.h" +#include "blacklist.h" +#include "propsel.h" +#include "discovery.h" +#include "debug.h" +#include "switchgroup.h" +#include "dm-generic.h" +#include "print.h" +#include "configure.h" +#include "pgpolicies.h" +#include "dict.h" +#include "alias.h" +#include "prio.h" +#include "util.h" +#include "uxsock.h" +#include "wwids.h" +#include "sysfs.h" +#include "io_err_stat.h" + +/* Time in ms to wait for pending checkers in setup_map() */ +#define WAIT_CHECKERS_PENDING_MS 10 +#define WAIT_ALL_CHECKERS_PENDING_MS 90 + +/* group paths in pg by host adapter + */ +int group_by_host_adapter(struct pathgroup *pgp, vector adapters) +{ + struct adapter_group *agp; + struct host_group *hgp; + struct path *pp, *pp1; + char adapter_name1[SLOT_NAME_SIZE]; + char adapter_name2[SLOT_NAME_SIZE]; + int i, j; + int found_hostgroup = 0; + + while (VECTOR_SIZE(pgp->paths) > 0) { + + pp = VECTOR_SLOT(pgp->paths, 0); + + if (sysfs_get_host_adapter_name(pp, adapter_name1)) + goto out; + /* create a new host adapter group + */ + agp = alloc_adaptergroup(); + if (!agp) + goto out; + agp->pgp = pgp; + + strlcpy(agp->adapter_name, adapter_name1, SLOT_NAME_SIZE); + store_adaptergroup(adapters, agp); + + /* create a new host port group + */ + hgp = alloc_hostgroup(); + if (!hgp) + goto out; + if (store_hostgroup(agp->host_groups, hgp)) + goto out; + + hgp->host_no = pp->sg_id.host_no; + agp->num_hosts++; + if (store_path(hgp->paths, pp)) + goto out; + + hgp->num_paths++; + /* delete path from path group + */ + vector_del_slot(pgp->paths, 0); + + /* add all paths belonging to same host adapter + */ + vector_foreach_slot(pgp->paths, pp1, i) { + if (sysfs_get_host_adapter_name(pp1, adapter_name2)) + goto out; + if (strcmp(adapter_name1, adapter_name2) == 0) { + found_hostgroup = 0; + vector_foreach_slot(agp->host_groups, hgp, j) { + if (hgp->host_no == pp1->sg_id.host_no) { + if (store_path(hgp->paths, pp1)) + goto out; + hgp->num_paths++; + found_hostgroup = 1; + break; + } + } + if (!found_hostgroup) { + /* this path belongs to new host port + * within this adapter + */ + hgp = alloc_hostgroup(); + if (!hgp) + goto out; + + if (store_hostgroup(agp->host_groups, hgp)) + goto out; + + agp->num_hosts++; + if (store_path(hgp->paths, pp1)) + goto out; + + hgp->host_no = pp1->sg_id.host_no; + hgp->num_paths++; + } + /* delete paths from original path_group + * as they are added into adapter group now + */ + vector_del_slot(pgp->paths, i); + i--; + } + } + } + return 0; + +out: /* add back paths into pg as re-ordering failed + */ + vector_foreach_slot(adapters, agp, i) { + vector_foreach_slot(agp->host_groups, hgp, j) { + while (VECTOR_SIZE(hgp->paths) > 0) { + pp = VECTOR_SLOT(hgp->paths, 0); + if (store_path(pgp->paths, pp)) + condlog(3, "failed to restore " + "path %s into path group", + pp->dev); + vector_del_slot(hgp->paths, 0); + } + } + } + free_adaptergroup(adapters); + return 1; +} + +/* re-order paths in pg by alternating adapters and host ports + * for optimized selection + */ +int order_paths_in_pg_by_alt_adapters(struct pathgroup *pgp, vector adapters, + int total_paths) +{ + int next_adapter_index = 0; + struct adapter_group *agp; + struct host_group *hgp; + struct path *pp; + + while (total_paths > 0) { + agp = VECTOR_SLOT(adapters, next_adapter_index); + if (!agp) { + condlog(0, "can't get adapter group %d", next_adapter_index); + return 1; + } + + hgp = VECTOR_SLOT(agp->host_groups, agp->next_host_index); + if (!hgp) { + condlog(0, "can't get host group %d of adapter group %d", next_adapter_index, agp->next_host_index); + return 1; + } + + if (!hgp->num_paths) { + agp->next_host_index++; + agp->next_host_index %= agp->num_hosts; + next_adapter_index++; + next_adapter_index %= VECTOR_SIZE(adapters); + continue; + } + + pp = VECTOR_SLOT(hgp->paths, 0); + + if (store_path(pgp->paths, pp)) + return 1; + + total_paths--; + + vector_del_slot(hgp->paths, 0); + + hgp->num_paths--; + + agp->next_host_index++; + agp->next_host_index %= agp->num_hosts; + next_adapter_index++; + next_adapter_index %= VECTOR_SIZE(adapters); + } + + /* all paths are added into path_group + * in crafted child order + */ + return 0; +} + +/* round-robin: order paths in path group to alternate + * between all host adapters + */ +int rr_optimize_path_order(struct pathgroup *pgp) +{ + vector adapters; + struct path *pp; + int total_paths; + int i; + + total_paths = VECTOR_SIZE(pgp->paths); + vector_foreach_slot(pgp->paths, pp, i) { + if (pp->sg_id.proto_id != SCSI_PROTOCOL_FCP && + pp->sg_id.proto_id != SCSI_PROTOCOL_SAS && + pp->sg_id.proto_id != SCSI_PROTOCOL_ISCSI && + pp->sg_id.proto_id != SCSI_PROTOCOL_SRP) { + /* return success as default path order + * is maintained in path group + */ + return 0; + } + } + adapters = vector_alloc(); + if (!adapters) + return 0; + + /* group paths in path group by host adapters + */ + if (group_by_host_adapter(pgp, adapters)) { + /* already freed adapters */ + condlog(3, "Failed to group paths by adapters"); + return 0; + } + + /* re-order paths in pg to alternate between adapters and host ports + */ + if (order_paths_in_pg_by_alt_adapters(pgp, adapters, total_paths)) { + condlog(3, "Failed to re-order paths in pg by adapters " + "and host ports"); + free_adaptergroup(adapters); + /* return failure as original paths are + * removed form pgp + */ + return 1; + } + + free_adaptergroup(adapters); + return 0; +} + +static int wait_for_pending_paths(struct multipath *mpp, + struct config *conf, + int n_pending, int goal, int wait_ms) +{ + static const struct timespec millisec = + { .tv_sec = 0, .tv_nsec = 1000*1000 }; + int i, j; + struct path *pp; + struct pathgroup *pgp; + struct timespec ts; + + do { + vector_foreach_slot(mpp->pg, pgp, i) { + vector_foreach_slot(pgp->paths, pp, j) { + if (pp->state != PATH_PENDING) + continue; + pp->state = get_state(pp, conf, + 0, PATH_PENDING); + if (pp->state != PATH_PENDING && + --n_pending <= goal) + return 0; + } + } + ts = millisec; + while (nanosleep(&ts, &ts) != 0 && errno == EINTR) + /* nothing */; + } while (--wait_ms > 0); + + return n_pending; +} + +int setup_map(struct multipath *mpp, char *params, int params_size, + struct vectors *vecs) +{ + struct pathgroup * pgp; + struct config *conf; + int i, n_paths, marginal_pathgroups; + + /* + * don't bother if devmap size is unknown + */ + if (mpp->size <= 0) { + condlog(3, "%s: devmap size is unknown", mpp->alias); + return 1; + } + + /* + * free features, selector, and hwhandler properties if they are being reused + */ + free_multipath_attributes(mpp); + if (mpp->disable_queueing && VECTOR_SIZE(mpp->paths) != 0) + mpp->disable_queueing = 0; + + /* + * properties selectors + * + * Ordering matters for some properties: + * - features after no_path_retry and retain_hwhandler + * - hwhandler after retain_hwhandler + * No guarantee that this list is complete, check code in + * propsel.c if in doubt. + */ + conf = get_multipath_config(); + pthread_cleanup_push(put_multipath_config, conf); + + select_pgfailback(conf, mpp); + select_pgpolicy(conf, mpp); + select_selector(conf, mpp); + select_no_path_retry(conf, mpp); + select_retain_hwhandler(conf, mpp); + select_features(conf, mpp); + select_hwhandler(conf, mpp); + select_rr_weight(conf, mpp); + select_minio(conf, mpp); + select_mode(conf, mpp); + select_uid(conf, mpp); + select_gid(conf, mpp); + select_fast_io_fail(conf, mpp); + select_dev_loss(conf, mpp); + select_reservation_key(conf, mpp); + select_deferred_remove(conf, mpp); + select_marginal_path_err_sample_time(conf, mpp); + select_marginal_path_err_rate_threshold(conf, mpp); + select_marginal_path_err_recheck_gap_time(conf, mpp); + select_marginal_path_double_failed_time(conf, mpp); + select_san_path_err_threshold(conf, mpp); + select_san_path_err_forget_rate(conf, mpp); + select_san_path_err_recovery_time(conf, mpp); + select_delay_checks(conf, mpp); + select_skip_kpartx(conf, mpp); + select_max_sectors_kb(conf, mpp); + select_ghost_delay(conf, mpp); + select_flush_on_last_del(conf, mpp); + + sysfs_set_scsi_tmo(mpp, conf->checkint); + marginal_pathgroups = conf->marginal_pathgroups; + pthread_cleanup_pop(1); + + if (marginal_path_check_enabled(mpp)) + start_io_err_stat_thread(vecs); + + n_paths = VECTOR_SIZE(mpp->paths); + /* + * assign paths to path groups -- start with no groups and all paths + * in mpp->paths + */ + if (mpp->pg) { + vector_foreach_slot (mpp->pg, pgp, i) + free_pathgroup(pgp, KEEP_PATHS); + + vector_free(mpp->pg); + mpp->pg = NULL; + } + if (group_paths(mpp, marginal_pathgroups)) + return 1; + + /* + * If async state detection is used, see if pending state checks + * have finished, to get nr_active right. We can't wait until the + * checkers time out, as that may take 30s or more, and we are + * holding the vecs lock. + */ + if (conf->force_sync == 0 && n_paths > 0) { + int n_pending = pathcount(mpp, PATH_PENDING); + + if (n_pending > 0) + n_pending = wait_for_pending_paths( + mpp, conf, n_pending, 0, + WAIT_CHECKERS_PENDING_MS); + /* ALL paths pending - wait some more, but be satisfied + with only some paths finished */ + if (n_pending == n_paths) + n_pending = wait_for_pending_paths( + mpp, conf, n_pending, + n_paths >= 4 ? 2 : 1, + WAIT_ALL_CHECKERS_PENDING_MS); + if (n_pending > 0) + condlog(2, "%s: setting up map with %d/%d path checkers pending", + mpp->alias, n_pending, n_paths); + } + + /* + * ponders each path group and determine highest prio pg + * to switch over (default to first) + */ + mpp->bestpg = select_path_group(mpp); + + /* re-order paths in all path groups in an optimized way + * for round-robin path selectors to get maximum throughput. + */ + if (!strncmp(mpp->selector, "round-robin", 11)) { + vector_foreach_slot(mpp->pg, pgp, i) { + if (VECTOR_SIZE(pgp->paths) <= 2) + continue; + if (rr_optimize_path_order(pgp)) { + condlog(2, "cannot re-order paths for " + "optimization: %s", + mpp->alias); + return 1; + } + } + } + + /* + * transform the mp->pg vector of vectors of paths + * into a mp->params strings to feed the device-mapper + */ + if (assemble_map(mpp, params, params_size)) { + condlog(0, "%s: problem assembing map", mpp->alias); + return 1; + } + return 0; +} + +static void +compute_pgid(struct pathgroup * pgp) +{ + struct path * pp; + int i; + + vector_foreach_slot (pgp->paths, pp, i) + pgp->id ^= (long)pp; +} + +static int +pgcmp (struct multipath * mpp, struct multipath * cmpp) +{ + int i, j; + struct pathgroup * pgp; + struct pathgroup * cpgp; + int r = 0; + + if (!mpp) + return 0; + + vector_foreach_slot (mpp->pg, pgp, i) { + compute_pgid(pgp); + + vector_foreach_slot (cmpp->pg, cpgp, j) { + if (pgp->id == cpgp->id && + !pathcmp(pgp, cpgp)) { + r = 0; + break; + } + r++; + } + if (r) + return r; + } + return r; +} + +static struct udev_device * +get_udev_for_mpp(const struct multipath *mpp) +{ + dev_t devnum; + struct udev_device *udd; + + if (!mpp || !mpp->dmi) { + condlog(1, "%s called with empty mpp", __func__); + return NULL; + } + + devnum = makedev(mpp->dmi->major, mpp->dmi->minor); + udd = udev_device_new_from_devnum(udev, 'b', devnum); + if (!udd) { + condlog(1, "failed to get udev device for %s", mpp->alias); + return NULL; + } + return udd; +} + +static void +trigger_udev_change(const struct multipath *mpp) +{ + static const char change[] = "change"; + struct udev_device *udd = get_udev_for_mpp(mpp); + if (!udd) + return; + condlog(3, "triggering %s uevent for %s", change, mpp->alias); + sysfs_attr_set_value(udd, "uevent", change, sizeof(change)-1); + udev_device_unref(udd); +} + +static void trigger_partitions_udev_change(struct udev_device *dev, + const char *action, int len) +{ + struct udev_enumerate *part_enum; + struct udev_list_entry *item; + + part_enum = udev_enumerate_new(udev); + if (!part_enum) + return; + + if (udev_enumerate_add_match_parent(part_enum, dev) < 0 || + udev_enumerate_add_match_subsystem(part_enum, "block") < 0 || + udev_enumerate_scan_devices(part_enum) < 0) + goto unref; + + udev_list_entry_foreach(item, + udev_enumerate_get_list_entry(part_enum)) { + const char *syspath; + struct udev_device *part; + + syspath = udev_list_entry_get_name(item); + part = udev_device_new_from_syspath(udev, syspath); + if (!part) + continue; + + if (!strcmp("partition", udev_device_get_devtype(part))) { + condlog(4, "%s: triggering %s event for %s", __func__, + action, syspath); + sysfs_attr_set_value(part, "uevent", action, len); + } + udev_device_unref(part); + } +unref: + udev_enumerate_unref(part_enum); +} + +void +trigger_paths_udev_change(struct multipath *mpp, bool is_mpath) +{ + struct pathgroup *pgp; + struct path *pp; + int i, j; + /* + * If a path changes from multipath to non-multipath, we must + * synthesize an artificial "add" event, otherwise the LVM2 rules + * (69-lvm2-lvmetad.rules) won't pick it up. Otherwise, we'd just + * irritate ourselves with an "add", so use "change". + */ + const char *action = is_mpath ? "change" : "add"; + + if (!mpp || !mpp->pg) + return; + + vector_foreach_slot (mpp->pg, pgp, i) { + if (!pgp->paths) + continue; + vector_foreach_slot(pgp->paths, pp, j) { + const char *env; + + if (!pp->udev) + continue; + /* + * Paths that are already classified as multipath + * members don't need another uevent. + */ + env = udev_device_get_property_value( + pp->udev, "DM_MULTIPATH_DEVICE_PATH"); + + if (is_mpath && env != NULL && !strcmp(env, "1")) { + /* + * If FIND_MULTIPATHS_WAIT_UNTIL is not "0", + * path is in "maybe" state and timer is running + * Send uevent now (see multipath.rules). + */ + env = udev_device_get_property_value( + pp->udev, "FIND_MULTIPATHS_WAIT_UNTIL"); + if (env == NULL || !strcmp(env, "0")) + continue; + } else if (!is_mpath && + (env == NULL || !strcmp(env, "0"))) + continue; + + condlog(3, "triggering %s uevent for %s (is %smultipath member)", + action, pp->dev, is_mpath ? "" : "no "); + sysfs_attr_set_value(pp->udev, "uevent", + action, strlen(action)); + trigger_partitions_udev_change(pp->udev, action, + strlen(action)); + } + } + + mpp->needs_paths_uevent = 0; +} + +static int +is_mpp_known_to_udev(const struct multipath *mpp) +{ + struct udev_device *udd = get_udev_for_mpp(mpp); + int ret = (udd != NULL); + udev_device_unref(udd); + return ret; +} + +static int +sysfs_set_max_sectors_kb(struct multipath *mpp, int is_reload) +{ + struct pathgroup * pgp; + struct path *pp; + char buff[11]; + int i, j, ret, err = 0; + struct udev_device *udd; + int max_sectors_kb; + + if (mpp->max_sectors_kb == MAX_SECTORS_KB_UNDEF) + return 0; + max_sectors_kb = mpp->max_sectors_kb; + if (is_reload) { + if (!mpp->dmi && dm_get_info(mpp->alias, &mpp->dmi) != 0) { + condlog(1, "failed to get dm info for %s", mpp->alias); + return 1; + } + udd = get_udev_for_mpp(mpp); + if (!udd) { + condlog(1, "failed to get udev device to set max_sectors_kb for %s", mpp->alias); + return 1; + } + ret = sysfs_attr_get_value(udd, "queue/max_sectors_kb", buff, + sizeof(buff)); + udev_device_unref(udd); + if (ret <= 0) { + condlog(1, "failed to get current max_sectors_kb from %s", mpp->alias); + return 1; + } + if (sscanf(buff, "%u\n", &max_sectors_kb) != 1) { + condlog(1, "can't parse current max_sectors_kb from %s", + mpp->alias); + return 1; + } + } + snprintf(buff, 11, "%d", max_sectors_kb); + + vector_foreach_slot (mpp->pg, pgp, i) { + vector_foreach_slot(pgp->paths, pp, j) { + ret = sysfs_attr_set_value(pp->udev, + "queue/max_sectors_kb", + buff, strlen(buff)); + if (ret < 0) { + condlog(1, "failed setting max_sectors_kb on %s : %s", pp->dev, strerror(-ret)); + err = 1; + } + } + } + return err; +} + +static void +select_action (struct multipath * mpp, vector curmp, int force_reload) +{ + struct multipath * cmpp; + struct multipath * cmpp_by_name; + char * mpp_feat, * cmpp_feat; + + cmpp = find_mp_by_wwid(curmp, mpp->wwid); + cmpp_by_name = find_mp_by_alias(curmp, mpp->alias); + + if (!cmpp_by_name) { + if (cmpp) { + condlog(2, "%s: rename %s to %s", mpp->wwid, + cmpp->alias, mpp->alias); + strlcpy(mpp->alias_old, cmpp->alias, WWID_SIZE); + mpp->action = ACT_RENAME; + if (force_reload) { + mpp->force_udev_reload = 1; + mpp->action = ACT_FORCERENAME; + } + return; + } + mpp->action = ACT_CREATE; + condlog(3, "%s: set ACT_CREATE (map does not exist)", + mpp->alias); + return; + } + + if (!cmpp) { + condlog(2, "%s: remove (wwid changed)", mpp->alias); + dm_flush_map(mpp->alias); + strlcpy(cmpp_by_name->wwid, mpp->wwid, WWID_SIZE); + drop_multipath(curmp, cmpp_by_name->wwid, KEEP_PATHS); + mpp->action = ACT_CREATE; + condlog(3, "%s: set ACT_CREATE (map wwid change)", + mpp->alias); + return; + } + + if (cmpp != cmpp_by_name) { + condlog(2, "%s: unable to rename %s to %s (%s is used by %s)", + mpp->wwid, cmpp->alias, mpp->alias, + mpp->alias, cmpp_by_name->wwid); + /* reset alias to existing alias */ + FREE(mpp->alias); + mpp->alias = STRDUP(cmpp->alias); + mpp->action = ACT_IMPOSSIBLE; + return; + } + + if (pathcount(mpp, PATH_UP) == 0) { + mpp->action = ACT_IMPOSSIBLE; + condlog(3, "%s: set ACT_IMPOSSIBLE (no usable path)", + mpp->alias); + return; + } + if (force_reload) { + mpp->force_udev_reload = 1; + mpp->action = ACT_RELOAD; + condlog(3, "%s: set ACT_RELOAD (forced by user)", + mpp->alias); + return; + } + if (cmpp->size != mpp->size) { + mpp->force_udev_reload = 1; + mpp->action = ACT_RESIZE; + condlog(3, "%s: set ACT_RESIZE (size change)", + mpp->alias); + return; + } + + if (mpp->no_path_retry != NO_PATH_RETRY_UNDEF && + !!strstr(mpp->features, "queue_if_no_path") != + !!strstr(cmpp->features, "queue_if_no_path")) { + mpp->action = ACT_RELOAD; + condlog(3, "%s: set ACT_RELOAD (no_path_retry change)", + mpp->alias); + return; + } + if ((mpp->retain_hwhandler != RETAIN_HWHANDLER_ON || + strcmp(cmpp->hwhandler, "0") == 0) && + (strlen(cmpp->hwhandler) != strlen(mpp->hwhandler) || + strncmp(cmpp->hwhandler, mpp->hwhandler, + strlen(mpp->hwhandler)))) { + mpp->action = ACT_RELOAD; + condlog(3, "%s: set ACT_RELOAD (hwhandler change)", + mpp->alias); + return; + } + + if (mpp->retain_hwhandler != RETAIN_HWHANDLER_UNDEF && + !!strstr(mpp->features, "retain_attached_hw_handler") != + !!strstr(cmpp->features, "retain_attached_hw_handler") && + get_linux_version_code() < KERNEL_VERSION(4, 3, 0)) { + mpp->action = ACT_RELOAD; + condlog(3, "%s: set ACT_RELOAD (retain_hwhandler change)", + mpp->alias); + return; + } + + cmpp_feat = STRDUP(cmpp->features); + mpp_feat = STRDUP(mpp->features); + if (cmpp_feat && mpp_feat) { + remove_feature(&mpp_feat, "queue_if_no_path"); + remove_feature(&mpp_feat, "retain_attached_hw_handler"); + remove_feature(&cmpp_feat, "queue_if_no_path"); + remove_feature(&cmpp_feat, "retain_attached_hw_handler"); + if (strncmp(mpp_feat, cmpp_feat, PARAMS_SIZE)) { + mpp->action = ACT_RELOAD; + condlog(3, "%s: set ACT_RELOAD (features change)", + mpp->alias); + } + } + FREE(cmpp_feat); + FREE(mpp_feat); + + if (!cmpp->selector || strncmp(cmpp->selector, mpp->selector, + strlen(mpp->selector))) { + mpp->action = ACT_RELOAD; + condlog(3, "%s: set ACT_RELOAD (selector change)", + mpp->alias); + return; + } + if (cmpp->minio != mpp->minio) { + mpp->action = ACT_RELOAD; + condlog(3, "%s: set ACT_RELOAD (minio change, %u->%u)", + mpp->alias, cmpp->minio, mpp->minio); + return; + } + if (!cmpp->pg || VECTOR_SIZE(cmpp->pg) != VECTOR_SIZE(mpp->pg)) { + mpp->action = ACT_RELOAD; + condlog(3, "%s: set ACT_RELOAD (path group number change)", + mpp->alias); + return; + } + if (pgcmp(mpp, cmpp)) { + mpp->action = ACT_RELOAD; + condlog(3, "%s: set ACT_RELOAD (path group topology change)", + mpp->alias); + return; + } + if (cmpp->nextpg != mpp->bestpg) { + mpp->action = ACT_SWITCHPG; + condlog(3, "%s: set ACT_SWITCHPG (next path group change)", + mpp->alias); + return; + } + if (!is_mpp_known_to_udev(cmpp)) { + mpp->action = ACT_RELOAD; + condlog(3, "%s: set ACT_RELOAD (udev device not initialized)", + mpp->alias); + return; + } + mpp->action = ACT_NOTHING; + condlog(3, "%s: set ACT_NOTHING (map unchanged)", + mpp->alias); + return; +} + +int reinstate_paths(struct multipath *mpp) +{ + int i, j; + struct pathgroup * pgp; + struct path * pp; + + if (!mpp->pg) + return 0; + + vector_foreach_slot (mpp->pg, pgp, i) { + if (!pgp->paths) + continue; + + vector_foreach_slot (pgp->paths, pp, j) { + if (pp->state != PATH_UP && + (pgp->status == PGSTATE_DISABLED || + pgp->status == PGSTATE_ACTIVE)) + continue; + + if (pp->dmstate == PSTATE_FAILED) { + if (dm_reinstate_path(mpp->alias, pp->dev_t)) + condlog(0, "%s: error reinstating", + pp->dev); + } + } + } + return 0; +} + +static int +lock_multipath (struct multipath * mpp, int lock) +{ + struct pathgroup * pgp; + struct path * pp; + int i, j; + int x, y; + + if (!mpp || !mpp->pg) + return 0; + + vector_foreach_slot (mpp->pg, pgp, i) { + if (!pgp->paths) + continue; + vector_foreach_slot(pgp->paths, pp, j) { + if (lock && flock(pp->fd, LOCK_SH | LOCK_NB) && + errno == EWOULDBLOCK) + goto fail; + else if (!lock) + flock(pp->fd, LOCK_UN); + } + } + return 0; +fail: + vector_foreach_slot (mpp->pg, pgp, x) { + if (x > i) + return 1; + if (!pgp->paths) + continue; + vector_foreach_slot(pgp->paths, pp, y) { + if (x == i && y >= j) + return 1; + flock(pp->fd, LOCK_UN); + } + } + return 1; +} + +int domap(struct multipath *mpp, char *params, int is_daemon) +{ + int r = DOMAP_FAIL; + struct config *conf; + int verbosity; + + /* + * last chance to quit before touching the devmaps + */ + if (mpp->action == ACT_DRY_RUN) { + conf = get_multipath_config(); + verbosity = conf->verbosity; + put_multipath_config(conf); + print_multipath_topology(mpp, verbosity); + return DOMAP_DRY; + } + + if (mpp->action == ACT_CREATE && + dm_map_present(mpp->alias)) { + condlog(3, "%s: map already present", mpp->alias); + mpp->action = ACT_RELOAD; + } + + switch (mpp->action) { + case ACT_REJECT: + case ACT_NOTHING: + case ACT_IMPOSSIBLE: + return DOMAP_EXIST; + + case ACT_SWITCHPG: + dm_switchgroup(mpp->alias, mpp->bestpg); + /* + * we may have avoided reinstating paths because there where in + * active or disabled PG. Now that the topology has changed, + * retry. + */ + reinstate_paths(mpp); + return DOMAP_EXIST; + + case ACT_CREATE: + if (lock_multipath(mpp, 1)) { + condlog(3, "%s: failed to create map (in use)", + mpp->alias); + return DOMAP_RETRY; + } + + sysfs_set_max_sectors_kb(mpp, 0); + if (is_daemon && mpp->ghost_delay > 0 && count_active_paths(mpp) && + pathcount(mpp, PATH_UP) == 0) + mpp->ghost_delay_tick = mpp->ghost_delay; + r = dm_addmap_create(mpp, params); + + lock_multipath(mpp, 0); + break; + + case ACT_RELOAD: + sysfs_set_max_sectors_kb(mpp, 1); + if (mpp->ghost_delay_tick > 0 && pathcount(mpp, PATH_UP)) + mpp->ghost_delay_tick = 0; + r = dm_addmap_reload(mpp, params, 0); + break; + + case ACT_RESIZE: + sysfs_set_max_sectors_kb(mpp, 1); + if (mpp->ghost_delay_tick > 0 && pathcount(mpp, PATH_UP)) + mpp->ghost_delay_tick = 0; + r = dm_addmap_reload(mpp, params, 1); + break; + + case ACT_RENAME: + conf = get_multipath_config(); + pthread_cleanup_push(put_multipath_config, conf); + r = dm_rename(mpp->alias_old, mpp->alias, + conf->partition_delim, mpp->skip_kpartx); + pthread_cleanup_pop(1); + break; + + case ACT_FORCERENAME: + conf = get_multipath_config(); + pthread_cleanup_push(put_multipath_config, conf); + r = dm_rename(mpp->alias_old, mpp->alias, + conf->partition_delim, mpp->skip_kpartx); + pthread_cleanup_pop(1); + if (r) { + sysfs_set_max_sectors_kb(mpp, 1); + if (mpp->ghost_delay_tick > 0 && + pathcount(mpp, PATH_UP)) + mpp->ghost_delay_tick = 0; + r = dm_addmap_reload(mpp, params, 0); + } + break; + + default: + break; + } + + if (r == DOMAP_OK) { + /* + * DM_DEVICE_CREATE, DM_DEVICE_RENAME, or DM_DEVICE_RELOAD + * succeeded + */ + mpp->force_udev_reload = 0; + if (mpp->action == ACT_CREATE && + (remember_wwid(mpp->wwid) == 1 || + mpp->needs_paths_uevent)) + trigger_paths_udev_change(mpp, true); + if (!is_daemon) { + /* multipath client mode */ + dm_switchgroup(mpp->alias, mpp->bestpg); + } else { + /* multipath daemon mode */ + mpp->stat_map_loads++; + condlog(2, "%s: load table [0 %llu %s %s]", mpp->alias, + mpp->size, TGT_MPATH, params); + /* + * Required action is over, reset for the stateful daemon. + * But don't do it for creation as we use in the caller the + * mpp->action to figure out whether to start the watievent checker. + */ + if (mpp->action != ACT_CREATE) + mpp->action = ACT_NOTHING; + else { + conf = get_multipath_config(); + mpp->wait_for_udev = 1; + mpp->uev_wait_tick = conf->uev_wait_timeout; + put_multipath_config(conf); + } + } + dm_setgeometry(mpp); + return DOMAP_OK; + } else if (r == DOMAP_FAIL && mpp->action == ACT_CREATE && + mpp->needs_paths_uevent) + trigger_paths_udev_change(mpp, false); + + return DOMAP_FAIL; +} + +static int +deadmap (struct multipath * mpp) +{ + int i, j; + struct pathgroup * pgp; + struct path * pp; + + if (!mpp->pg) + return 1; + + vector_foreach_slot (mpp->pg, pgp, i) { + if (!pgp->paths) + continue; + + vector_foreach_slot (pgp->paths, pp, j) + if (strlen(pp->dev)) + return 0; /* alive */ + } + + return 1; /* dead */ +} + +int check_daemon(void) +{ + int fd; + char *reply; + int ret = 0; + unsigned int timeout; + struct config *conf; + + fd = mpath_connect(); + if (fd == -1) + return 0; + + if (send_packet(fd, "show daemon") != 0) + goto out; + conf = get_multipath_config(); + timeout = conf->uxsock_timeout; + put_multipath_config(conf); + if (recv_packet(fd, &reply, timeout) != 0) + goto out; + + if (reply && strstr(reply, "shutdown")) + goto out_free; + + ret = 1; + +out_free: + FREE(reply); +out: + mpath_disconnect(fd); + return ret; +} + +/* + * The force_reload parameter determines how coalesce_paths treats existing maps. + * FORCE_RELOAD_NONE: existing maps aren't touched at all + * FORCE_RELOAD_YES: all maps are rebuilt from scratch and (re)loaded in DM + * FORCE_RELOAD_WEAK: existing maps are compared to the current conf and only + * reloaded in DM if there's a difference. This is useful during startup. + */ +int coalesce_paths (struct vectors * vecs, vector newmp, char * refwwid, + int force_reload, enum mpath_cmds cmd) +{ + int ret = CP_FAIL; + int k, i, r; + int is_daemon = (cmd == CMD_NONE) ? 1 : 0; + char params[PARAMS_SIZE]; + struct multipath * mpp; + struct path * pp1; + struct path * pp2; + vector curmp = vecs->mpvec; + vector pathvec = vecs->pathvec; + struct config *conf; + int allow_queueing; + uint64_t *size_mismatch_seen; + + /* ignore refwwid if it's empty */ + if (refwwid && !strlen(refwwid)) + refwwid = NULL; + + if (force_reload != FORCE_RELOAD_NONE) { + vector_foreach_slot (pathvec, pp1, k) { + pp1->mpp = NULL; + } + } + + if (VECTOR_SIZE(pathvec) == 0) + return CP_OK; + size_mismatch_seen = calloc((VECTOR_SIZE(pathvec) - 1) / 64 + 1, + sizeof(uint64_t)); + if (size_mismatch_seen == NULL) + return CP_FAIL; + + vector_foreach_slot (pathvec, pp1, k) { + int invalid; + /* skip this path for some reason */ + + /* 1. if path has no unique id or wwid blacklisted */ + if (strlen(pp1->wwid) == 0) { + orphan_path(pp1, "no WWID"); + continue; + } + + conf = get_multipath_config(); + pthread_cleanup_push(put_multipath_config, conf); + invalid = (filter_path(conf, pp1) > 0); + pthread_cleanup_pop(1); + if (invalid) { + orphan_path(pp1, "blacklisted"); + continue; + } + + /* 2. if path already coalesced, or seen and discarded */ + if (pp1->mpp || is_bit_set_in_array(k, size_mismatch_seen)) + continue; + + /* 3. if path has disappeared */ + if (pp1->state == PATH_REMOVED) { + orphan_path(pp1, "path removed"); + continue; + } + + /* 4. path is out of scope */ + if (refwwid && strncmp(pp1->wwid, refwwid, WWID_SIZE - 1)) + continue; + + /* If find_multipaths was selected check if the path is valid */ + if (!refwwid && !should_multipath(pp1, pathvec, curmp)) { + orphan_path(pp1, "only one path"); + continue; + } + + /* + * at this point, we know we really got a new mp + */ + mpp = add_map_with_path(vecs, pp1, 0); + if (!mpp) { + orphan_path(pp1, "failed to create multipath device"); + continue; + } + + if (!mpp->paths) { + condlog(0, "%s: skip coalesce (no paths)", mpp->alias); + remove_map(mpp, vecs, 0); + continue; + } + + for (i = k + 1; i < VECTOR_SIZE(pathvec); i++) { + pp2 = VECTOR_SLOT(pathvec, i); + + if (strcmp(pp1->wwid, pp2->wwid)) + continue; + + if (!mpp->size && pp2->size) + mpp->size = pp2->size; + + if (mpp->size && pp2->size && + pp2->size != mpp->size) { + /* + * ouch, avoid feeding that to the DM + */ + condlog(0, "%s: size %llu, expected %llu. " + "Discard", pp2->dev, pp2->size, + mpp->size); + mpp->action = ACT_REJECT; + set_bit_in_array(i, size_mismatch_seen); + } + } + verify_paths(mpp, vecs); + + params[0] = '\0'; + if (setup_map(mpp, params, PARAMS_SIZE, vecs)) { + remove_map(mpp, vecs, 0); + continue; + } + + if (cmd == CMD_DRY_RUN) + mpp->action = ACT_DRY_RUN; + if (mpp->action == ACT_UNDEF) + select_action(mpp, curmp, + force_reload == FORCE_RELOAD_YES ? 1 : 0); + + r = domap(mpp, params, is_daemon); + + if (r == DOMAP_FAIL || r == DOMAP_RETRY) { + condlog(3, "%s: domap (%u) failure " + "for create/reload map", + mpp->alias, r); + if (r == DOMAP_FAIL || is_daemon) { + condlog(2, "%s: %s map", + mpp->alias, (mpp->action == ACT_CREATE)? + "ignoring" : "removing"); + remove_map(mpp, vecs, 0); + continue; + } else /* if (r == DOMAP_RETRY && !is_daemon) */ { + ret = CP_RETRY; + goto out; + } + } + if (r == DOMAP_DRY) + continue; + + if (r == DOMAP_EXIST && mpp->action == ACT_NOTHING && + force_reload == FORCE_RELOAD_WEAK) + /* + * First time we're called, and no changes applied. + * domap() was a noop. But we can't be sure that + * udev has already finished setting up this device + * (udev in initrd may have been shut down while + * processing this device or its children). + * Trigger a change event, just in case. + */ + trigger_udev_change(find_mp_by_wwid(curmp, mpp->wwid)); + + conf = get_multipath_config(); + allow_queueing = conf->allow_queueing; + put_multipath_config(conf); + if (!is_daemon && !allow_queueing && !check_daemon()) { + if (mpp->no_path_retry != NO_PATH_RETRY_UNDEF && + mpp->no_path_retry != NO_PATH_RETRY_FAIL) + condlog(3, "%s: multipathd not running, unset " + "queue_if_no_path feature", mpp->alias); + if (!dm_queue_if_no_path(mpp->alias, 0)) + remove_feature(&mpp->features, + "queue_if_no_path"); + } + + if (!is_daemon && mpp->action != ACT_NOTHING) { + int verbosity; + + conf = get_multipath_config(); + verbosity = conf->verbosity; + put_multipath_config(conf); + print_multipath_topology(mpp, verbosity); + } + + if (newmp) { + if (mpp->action != ACT_REJECT) { + if (!vector_alloc_slot(newmp)) + goto out; + vector_set_slot(newmp, mpp); + } + else + remove_map(mpp, vecs, 0); + } + } + /* + * Flush maps with only dead paths (ie not in sysfs) + * Keep maps with only failed paths + */ + if (newmp) { + vector_foreach_slot (newmp, mpp, i) { + char alias[WWID_SIZE]; + + if (!deadmap(mpp)) + continue; + + strlcpy(alias, mpp->alias, WWID_SIZE); + + vector_del_slot(newmp, i); + i--; + remove_map(mpp, vecs, 0); + + if (dm_flush_map(alias)) + condlog(2, "%s: remove failed (dead)", + alias); + else + condlog(2, "%s: remove (dead)", alias); + } + } + ret = CP_OK; +out: + free(size_mismatch_seen); + return ret; +} + +struct udev_device *get_udev_device(const char *dev, enum devtypes dev_type) +{ + struct udev_device *ud = NULL; + const char *base; + + if (dev == NULL || *dev == '\0') + return NULL; + + switch (dev_type) { + case DEV_DEVNODE: + case DEV_DEVMAP: + /* This should be GNU basename, compiler will warn if not */ + base = basename(dev); + if (*base == '\0') + break; + ud = udev_device_new_from_subsystem_sysname(udev, "block", + base); + break; + case DEV_DEVT: + ud = udev_device_new_from_devnum(udev, 'b', parse_devt(dev)); + break; + case DEV_UEVENT: + ud = udev_device_new_from_environment(udev); + break; + default: + condlog(0, "Internal error: get_udev_device called with invalid type %d\n", + dev_type); + break; + } + if (ud == NULL) + condlog(2, "get_udev_device: failed to look up %s with type %d", + dev, dev_type); + return ud; +} + +/* + * returns: + * 0 - success + * 1 - failure + * 2 - blacklist + */ +int get_refwwid(enum mpath_cmds cmd, char *dev, enum devtypes dev_type, + vector pathvec, char **wwid) +{ + int ret = 1; + struct path * pp; + char buff[FILE_NAME_SIZE]; + char * refwwid = NULL, tmpwwid[WWID_SIZE]; + int flags = DI_SYSFS | DI_WWID; + struct config *conf; + int invalid = 0; + + if (!wwid) + return 1; + *wwid = NULL; + + if (dev_type == DEV_NONE) + return 1; + + if (cmd != CMD_REMOVE_WWID) + flags |= DI_BLACKLIST; + + if (dev_type == DEV_DEVNODE) { + if (basenamecpy(dev, buff, FILE_NAME_SIZE) == 0) { + condlog(1, "basename failed for '%s' (%s)", + dev, buff); + return 1; + } + + pp = find_path_by_dev(pathvec, buff); + if (!pp) { + struct udev_device *udevice = + get_udev_device(buff, dev_type); + + if (!udevice) + return 1; + + conf = get_multipath_config(); + pthread_cleanup_push(put_multipath_config, conf); + ret = store_pathinfo(pathvec, conf, udevice, + flags, &pp); + pthread_cleanup_pop(1); + udev_device_unref(udevice); + if (!pp) { + if (ret == 1) + condlog(0, "%s: can't store path info", + dev); + return ret; + } + } + conf = get_multipath_config(); + pthread_cleanup_push(put_multipath_config, conf); + if (pp->udev && pp->uid_attribute && + filter_property(conf, pp->udev, 3, pp->uid_attribute) > 0) + invalid = 1; + pthread_cleanup_pop(1); + if (invalid) + return 2; + + refwwid = pp->wwid; + goto out; + } + + if (dev_type == DEV_DEVT) { + strchop(dev); + if (devt2devname(buff, FILE_NAME_SIZE, dev)) { + condlog(0, "%s: cannot find block device\n", dev); + return 1; + } + pp = find_path_by_dev(pathvec, buff); + if (!pp) { + struct udev_device *udevice = + get_udev_device(dev, dev_type); + + if (!udevice) + return 1; + + conf = get_multipath_config(); + pthread_cleanup_push(put_multipath_config, conf); + ret = store_pathinfo(pathvec, conf, udevice, + flags, &pp); + pthread_cleanup_pop(1); + udev_device_unref(udevice); + if (!pp) { + if (ret == 1) + condlog(0, "%s can't store path info", + buff); + return ret; + } + } + conf = get_multipath_config(); + pthread_cleanup_push(put_multipath_config, conf); + if (pp->udev && pp->uid_attribute && + filter_property(conf, pp->udev, 3, pp->uid_attribute) > 0) + invalid = 1; + pthread_cleanup_pop(1); + if (invalid) + return 2; + refwwid = pp->wwid; + goto out; + } + + if (dev_type == DEV_UEVENT) { + struct udev_device *udevice = get_udev_device(dev, dev_type); + + if (!udevice) + return 1; + + conf = get_multipath_config(); + pthread_cleanup_push(put_multipath_config, conf); + ret = store_pathinfo(pathvec, conf, udevice, + flags, &pp); + pthread_cleanup_pop(1); + udev_device_unref(udevice); + if (!pp) { + if (ret == 1) + condlog(0, "%s: can't store path info", dev); + return ret; + } + conf = get_multipath_config(); + pthread_cleanup_push(put_multipath_config, conf); + if (pp->udev && pp->uid_attribute && + filter_property(conf, pp->udev, 3, pp->uid_attribute) > 0) + invalid = 1; + pthread_cleanup_pop(1); + if (invalid) + return 2; + refwwid = pp->wwid; + goto out; + } + + if (dev_type == DEV_DEVMAP) { + + conf = get_multipath_config(); + pthread_cleanup_push(put_multipath_config, conf); + if (((dm_get_uuid(dev, tmpwwid, WWID_SIZE)) == 0) + && (strlen(tmpwwid))) { + refwwid = tmpwwid; + goto check; + } + + /* + * may be a binding + */ + if (get_user_friendly_wwid(dev, tmpwwid, + conf->bindings_file) == 0) { + refwwid = tmpwwid; + goto check; + } + + /* + * or may be an alias + */ + refwwid = get_mpe_wwid(conf->mptable, dev); + + /* + * or directly a wwid + */ + if (!refwwid) + refwwid = dev; + +check: + if (refwwid && strlen(refwwid) && + filter_wwid(conf->blist_wwid, conf->elist_wwid, refwwid, + NULL) > 0) + invalid = 1; + pthread_cleanup_pop(1); + if (invalid) + return 2; + } +out: + if (refwwid && strlen(refwwid)) { + *wwid = STRDUP(refwwid); + return 0; + } + + return 1; +} + +int reload_map(struct vectors *vecs, struct multipath *mpp, int refresh, + int is_daemon) +{ + char params[PARAMS_SIZE] = {0}; + struct path *pp; + int i, r; + + update_mpp_paths(mpp, vecs->pathvec); + if (refresh) { + vector_foreach_slot (mpp->paths, pp, i) { + struct config *conf = get_multipath_config(); + pthread_cleanup_push(put_multipath_config, conf); + r = pathinfo(pp, conf, DI_PRIO); + pthread_cleanup_pop(1); + if (r) { + condlog(2, "%s: failed to refresh pathinfo", + mpp->alias); + return 1; + } + } + } + if (setup_map(mpp, params, PARAMS_SIZE, vecs)) { + condlog(0, "%s: failed to setup map", mpp->alias); + return 1; + } + select_action(mpp, vecs->mpvec, 1); + + r = domap(mpp, params, is_daemon); + if (r == DOMAP_FAIL || r == DOMAP_RETRY) { + condlog(3, "%s: domap (%u) failure " + "for reload map", mpp->alias, r); + return 1; + } + + return 0; +} diff --git a/libmultipath/configure.h b/libmultipath/configure.h new file mode 100644 index 0000000..d750900 --- /dev/null +++ b/libmultipath/configure.h @@ -0,0 +1,62 @@ +/* + * configurator actions + */ +#define ACT_NOTHING_STR "unchanged" +#define ACT_REJECT_STR "reject" +#define ACT_RELOAD_STR "reload" +#define ACT_SWITCHPG_STR "switchpg" +#define ACT_RENAME_STR "rename" +#define ACT_CREATE_STR "create" +#define ACT_RESIZE_STR "resize" + +enum actions { + ACT_UNDEF, + ACT_NOTHING, + ACT_REJECT, + ACT_RELOAD, + ACT_SWITCHPG, + ACT_RENAME, + ACT_CREATE, + ACT_RESIZE, + ACT_FORCERENAME, + ACT_DRY_RUN, + ACT_IMPOSSIBLE, +}; + +/* + * Return value of domap() + * DAEMON_RETRY is only used for ACT_CREATE (see domap()). + */ +enum { + DOMAP_RETRY = -1, + DOMAP_FAIL = 0, + DOMAP_OK = 1, + DOMAP_EXIST = 2, + DOMAP_DRY = 3 +}; + +/* + * Return value of coalesce_paths() + * CP_RETRY is only used in non-daemon case (multipath). + */ +enum { + CP_OK = 0, + CP_FAIL, + CP_RETRY, +}; + +#define FLUSH_ONE 1 +#define FLUSH_ALL 2 + +struct vectors; + +int setup_map (struct multipath * mpp, char * params, int params_size, + struct vectors *vecs ); +int domap (struct multipath * mpp, char * params, int is_daemon); +int reinstate_paths (struct multipath *mpp); +int coalesce_paths (struct vectors *vecs, vector curmp, char * refwwid, int force_reload, enum mpath_cmds cmd); +int get_refwwid (enum mpath_cmds cmd, char * dev, enum devtypes dev_type, + vector pathvec, char **wwid); +int reload_map(struct vectors *vecs, struct multipath *mpp, int refresh, int is_daemon); +struct udev_device *get_udev_device(const char *dev, enum devtypes dev_type); +void trigger_paths_udev_change(struct multipath *mpp, bool is_mpath); diff --git a/libmultipath/debug.c b/libmultipath/debug.c new file mode 100644 index 0000000..4128cb9 --- /dev/null +++ b/libmultipath/debug.c @@ -0,0 +1,50 @@ +/* + * Copyright (c) 2005 Christophe Varoqui + */ +#include +#include +#include +#include + +#include "log_pthread.h" +#include +#include +#include "../third-party/valgrind/drd.h" +#include "vector.h" +#include "config.h" +#include "defaults.h" +#include "debug.h" + +void dlog (int sink, int prio, const char * fmt, ...) +{ + va_list ap; + int thres; + struct config *conf; + + va_start(ap, fmt); + conf = get_multipath_config(); + ANNOTATE_IGNORE_READS_BEGIN(); + thres = (conf) ? conf->verbosity : DEFAULT_VERBOSITY; + ANNOTATE_IGNORE_READS_END(); + put_multipath_config(conf); + + if (prio <= thres) { + if (sink < 1) { + if (sink == 0) { + time_t t = time(NULL); + struct tm *tb = localtime(&t); + char buff[16]; + + strftime(buff, sizeof(buff), + "%b %d %H:%M:%S", tb); + buff[sizeof(buff)-1] = '\0'; + + fprintf(stderr, "%s | ", buff); + } + vfprintf(stderr, fmt, ap); + } + else + log_safe(prio + 3, fmt, ap); + } + va_end(ap); +} diff --git a/libmultipath/debug.h b/libmultipath/debug.h new file mode 100644 index 0000000..c6120c1 --- /dev/null +++ b/libmultipath/debug.h @@ -0,0 +1,13 @@ +void dlog (int sink, int prio, const char * fmt, ...) + __attribute__((format(printf, 3, 4))); + + +#include +#include + +#include "log_pthread.h" + +extern int logsink; + +#define condlog(prio, fmt, args...) \ + dlog(logsink, prio, fmt "\n", ##args) diff --git a/libmultipath/defaults.c b/libmultipath/defaults.c new file mode 100644 index 0000000..082640d --- /dev/null +++ b/libmultipath/defaults.c @@ -0,0 +1,9 @@ +/* + * Copyright (c) 2005 Christophe Varoqui + */ +#include + +#include "defaults.h" +#include "memory.h" + +const char * const default_partition_delim = DEFAULT_PARTITION_DELIM; diff --git a/libmultipath/defaults.h b/libmultipath/defaults.h new file mode 100644 index 0000000..e5ee6af --- /dev/null +++ b/libmultipath/defaults.h @@ -0,0 +1,75 @@ +#ifndef _DEFAULTS_H +#define _DEFAULTS_H +#include + +/* + * If you add or modify a value also update multipath/multipath.conf.5 + * and the TEMPLATE in libmultipath/hwtable.c + */ +#define DEFAULT_UID_ATTRIBUTE "ID_SERIAL" +#define DEFAULT_NVME_UID_ATTRIBUTE "ID_WWN" +#define DEFAULT_UDEVDIR "/dev" +#define DEFAULT_MULTIPATHDIR "/" LIB_STRING "/multipath" +#define DEFAULT_SELECTOR "service-time 0" +#define DEFAULT_ALIAS_PREFIX "mpath" +#define DEFAULT_FEATURES "0" +#define DEFAULT_HWHANDLER "0" +#define DEFAULT_MINIO 1000 +#define DEFAULT_MINIO_RQ 1 +#define DEFAULT_PGPOLICY FAILOVER +#define DEFAULT_FAILBACK -FAILBACK_MANUAL +#define DEFAULT_RR_WEIGHT RR_WEIGHT_NONE +#define DEFAULT_NO_PATH_RETRY NO_PATH_RETRY_UNDEF +#define DEFAULT_VERBOSITY 2 +#define DEFAULT_REASSIGN_MAPS 0 +#define DEFAULT_FIND_MULTIPATHS FIND_MULTIPATHS_STRICT +#define DEFAULT_FAST_IO_FAIL 5 +#define DEFAULT_DEV_LOSS_TMO 600 +#define DEFAULT_RETAIN_HWHANDLER RETAIN_HWHANDLER_ON +#define DEFAULT_DETECT_PRIO DETECT_PRIO_ON +#define DEFAULT_DETECT_CHECKER DETECT_CHECKER_ON +#define DEFAULT_DEFERRED_REMOVE DEFERRED_REMOVE_OFF +#define DEFAULT_DELAY_CHECKS NU_NO +#define DEFAULT_ERR_CHECKS NU_NO +#define DEFAULT_UEVENT_STACKSIZE 256 +#define DEFAULT_RETRIGGER_DELAY 10 +#define DEFAULT_RETRIGGER_TRIES 3 +#define DEFAULT_UEV_WAIT_TIMEOUT 30 +#define DEFAULT_PRIO PRIO_CONST +#define DEFAULT_PRIO_ARGS "" +#define DEFAULT_CHECKER TUR +#define DEFAULT_FLUSH FLUSH_DISABLED +#define DEFAULT_USER_FRIENDLY_NAMES USER_FRIENDLY_NAMES_OFF +#define DEFAULT_FORCE_SYNC 0 +#define UNSET_PARTITION_DELIM "/UNSET/" +#define DEFAULT_PARTITION_DELIM NULL +#define DEFAULT_SKIP_KPARTX SKIP_KPARTX_OFF +#define DEFAULT_DISABLE_CHANGED_WWIDS 1 +#define DEFAULT_MAX_SECTORS_KB MAX_SECTORS_KB_UNDEF +#define DEFAULT_GHOST_DELAY GHOST_DELAY_OFF +#define DEFAULT_FIND_MULTIPATHS_TIMEOUT -10 +#define DEFAULT_UNKNOWN_FIND_MULTIPATHS_TIMEOUT 1 +#define DEFAULT_ALL_TG_PT ALL_TG_PT_OFF +/* Enable all foreign libraries by default */ +#define DEFAULT_ENABLE_FOREIGN "" + +#define CHECKINT_UNDEF UINT_MAX +#define DEFAULT_CHECKINT 5 + +#define MAX_DEV_LOSS_TMO UINT_MAX +#define DEFAULT_PIDFILE "/" RUN_DIR "/multipathd.pid" +#define DEFAULT_SOCKET "/org/kernel/linux/storage/multipathd" +#define DEFAULT_CONFIGFILE "/etc/multipath.conf" +#define DEFAULT_BINDINGS_FILE "/etc/multipath/bindings" +#define DEFAULT_WWIDS_FILE "/etc/multipath/wwids" +#define DEFAULT_PRKEYS_FILE "/etc/multipath/prkeys" +#define DEFAULT_CONFIG_DIR "/etc/multipath/conf.d" +#define MULTIPATH_SHM_BASE "/dev/shm/multipath/" + + +static inline char *set_default(char *str) +{ + return strdup(str); +} +extern const char *const default_partition_delim; +#endif /* _DEFAULTS_H */ diff --git a/libmultipath/devmapper.c b/libmultipath/devmapper.c new file mode 100644 index 0000000..bed8ddc --- /dev/null +++ b/libmultipath/devmapper.c @@ -0,0 +1,1653 @@ +/* + * snippets copied from device-mapper dmsetup.c + * Copyright (c) 2004, 2005 Christophe Varoqui + * Copyright (c) 2005 Kiyoshi Ueda, NEC + * Copyright (c) 2005 Patrick Caulfield, Redhat + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "util.h" +#include "checkers.h" +#include "vector.h" +#include "structs.h" +#include "debug.h" +#include "memory.h" +#include "devmapper.h" +#include "sysfs.h" +#include "config.h" +#include "wwids.h" + +#include "log_pthread.h" +#include +#include + +#define MAX_WAIT 5 +#define LOOPS_PER_SEC 5 + +static int dm_conf_verbosity; + +#ifdef LIBDM_API_DEFERRED +static int dm_cancel_remove_partmaps(const char * mapname); +#endif + +static int do_foreach_partmaps(const char * mapname, + int (*partmap_func)(const char *, void *), + void *data); + +#ifndef LIBDM_API_COOKIE +static inline int dm_task_set_cookie(struct dm_task *dmt, uint32_t *c, int a) +{ + return 1; +} + +void dm_udev_wait(unsigned int c) +{ +} + +void dm_udev_set_sync_support(int c) +{ +} + +#endif + +__attribute__((format(printf, 4, 5))) static void +dm_write_log (int level, const char *file, int line, const char *f, ...) +{ + va_list ap; + int thres; + + if (level > 6) + level = 6; + + thres = dm_conf_verbosity; + if (thres <= 3 || level > thres) + return; + + va_start(ap, f); + if (logsink < 1) { + if (logsink == 0) { + time_t t = time(NULL); + struct tm *tb = localtime(&t); + char buff[16]; + + strftime(buff, sizeof(buff), "%b %d %H:%M:%S", tb); + buff[sizeof(buff)-1] = '\0'; + + fprintf(stderr, "%s | ", buff); + } + fprintf(stderr, "libdevmapper: %s(%i): ", file, line); + vfprintf(stderr, f, ap); + fprintf(stderr, "\n"); + } else { + condlog(level, "libdevmapper: %s(%i): ", file, line); + log_safe(level + 3, f, ap); + } + va_end(ap); + + return; +} + +void dm_init(int v) +{ + dm_conf_verbosity = v; + dm_log_init(&dm_write_log); + dm_log_init_verbose(v + 3); +} + +static int +dm_lib_prereq (void) +{ + char version[64]; + int v[3]; +#if defined(LIBDM_API_DEFERRED) + int minv[3] = {1, 2, 89}; +#elif defined(DM_SUBSYSTEM_UDEV_FLAG0) + int minv[3] = {1, 2, 82}; +#elif defined(LIBDM_API_COOKIE) + int minv[3] = {1, 2, 38}; +#else + int minv[3] = {1, 2, 8}; +#endif + + dm_get_library_version(version, sizeof(version)); + condlog(3, "libdevmapper version %s", version); + if (sscanf(version, "%d.%d.%d ", &v[0], &v[1], &v[2]) != 3) { + condlog(0, "invalid libdevmapper version %s", version); + return 1; + } + + if VERSION_GE(v, minv) + return 0; + condlog(0, "libdevmapper version must be >= %d.%.2d.%.2d", + minv[0], minv[1], minv[2]); + return 1; +} + +int +dm_drv_version(unsigned int *v) +{ + char buff[64]; + + v[0] = 0; + v[1] = 0; + v[2] = 0; + + if (!dm_driver_version(buff, sizeof(buff))) { + condlog(0, "cannot get kernel dm version"); + return 1; + } + if (sscanf(buff, "%u.%u.%u ", &v[0], &v[1], &v[2]) != 3) { + condlog(0, "invalid kernel dm version '%s'", buff); + return 1; + } + return 0; +} + +int +dm_tgt_version (unsigned int * version, char * str) +{ + int r = 2; + struct dm_task *dmt; + struct dm_versions *target; + struct dm_versions *last_target; + unsigned int *v; + + version[0] = 0; + version[1] = 0; + version[2] = 0; + + if (!(dmt = dm_task_create(DM_DEVICE_LIST_VERSIONS))) + return 1; + + dm_task_no_open_count(dmt); + + if (!dm_task_run(dmt)) { + condlog(0, "Can not communicate with kernel DM"); + goto out; + } + target = dm_task_get_versions(dmt); + + do { + last_target = target; + if (!strncmp(str, target->name, strlen(str))) { + r = 1; + break; + } + target = (void *) target + target->next; + } while (last_target != target); + + if (r == 2) { + condlog(0, "DM %s kernel driver not loaded", str); + goto out; + } + v = target->version; + version[0] = v[0]; + version[1] = v[1]; + version[2] = v[2]; + r = 0; +out: + dm_task_destroy(dmt); + return r; +} + +static int +dm_tgt_prereq (unsigned int *ver) +{ + unsigned int minv[3] = {1, 0, 3}; + unsigned int version[3] = {0, 0, 0}; + unsigned int * v = version; + + if (dm_tgt_version(v, TGT_MPATH)) { + /* in doubt return not capable */ + return 1; + } + + /* test request based multipath capability */ + condlog(3, "DM multipath kernel driver v%u.%u.%u", + v[0], v[1], v[2]); + + if (VERSION_GE(v, minv)) { + ver[0] = v[0]; + ver[1] = v[1]; + ver[2] = v[2]; + return 0; + } + + condlog(0, "DM multipath kernel driver must be >= v%u.%u.%u", + minv[0], minv[1], minv[2]); + return 1; +} + +static int dm_prereq(unsigned int *v) +{ + if (dm_lib_prereq()) + return 1; + return dm_tgt_prereq(v); +} + +static int libmp_dm_udev_sync = 0; + +void libmp_udev_set_sync_support(int on) +{ + libmp_dm_udev_sync = !!on; +} + +void libmp_dm_init(void) +{ + struct config *conf; + int verbosity; + unsigned int version[3]; + + if (dm_prereq(version)) + exit(1); + conf = get_multipath_config(); + verbosity = conf->verbosity; + memcpy(conf->version, version, sizeof(version)); + put_multipath_config(conf); + dm_init(verbosity); + dm_udev_set_sync_support(libmp_dm_udev_sync); +} + +struct dm_task* +libmp_dm_task_create(int task) +{ + static pthread_once_t dm_initialized = PTHREAD_ONCE_INIT; + + pthread_once(&dm_initialized, libmp_dm_init); + return dm_task_create(task); +} + +#define do_deferred(x) ((x) == DEFERRED_REMOVE_ON || (x) == DEFERRED_REMOVE_IN_PROGRESS) + +static int +dm_simplecmd (int task, const char *name, int no_flush, int need_sync, uint16_t udev_flags, int deferred_remove) { + int r = 0; + int udev_wait_flag = ((need_sync || udev_flags) && + (task == DM_DEVICE_RESUME || + task == DM_DEVICE_REMOVE)); + uint32_t cookie = 0; + struct dm_task *dmt; + + if (!(dmt = libmp_dm_task_create (task))) + return 0; + + if (!dm_task_set_name (dmt, name)) + goto out; + + dm_task_no_open_count(dmt); + dm_task_skip_lockfs(dmt); /* for DM_DEVICE_RESUME */ +#ifdef LIBDM_API_FLUSH + if (no_flush) + dm_task_no_flush(dmt); /* for DM_DEVICE_SUSPEND/RESUME */ +#endif +#ifdef LIBDM_API_DEFERRED + if (do_deferred(deferred_remove)) + dm_task_deferred_remove(dmt); +#endif + if (udev_wait_flag && + !dm_task_set_cookie(dmt, &cookie, + DM_UDEV_DISABLE_LIBRARY_FALLBACK | udev_flags)) + goto out; + + r = dm_task_run (dmt); + + if (udev_wait_flag) + dm_udev_wait(cookie); +out: + dm_task_destroy (dmt); + return r; +} + +int dm_simplecmd_flush (int task, const char *name, uint16_t udev_flags) +{ + return dm_simplecmd(task, name, 0, 1, udev_flags, 0); +} + +int dm_simplecmd_noflush (int task, const char *name, uint16_t udev_flags) +{ + return dm_simplecmd(task, name, 1, 1, udev_flags, 0); +} + +static int +dm_device_remove (const char *name, int needsync, int deferred_remove) { + return dm_simplecmd(DM_DEVICE_REMOVE, name, 0, needsync, 0, + deferred_remove); +} + +static int +dm_addmap (int task, const char *target, struct multipath *mpp, + char * params, int ro, uint16_t udev_flags) { + int r = 0; + struct dm_task *dmt; + char *prefixed_uuid = NULL; + uint32_t cookie = 0; + + /* Need to add this here to allow 0 to be passed in udev_flags */ + udev_flags |= DM_UDEV_DISABLE_LIBRARY_FALLBACK; + + if (!(dmt = libmp_dm_task_create (task))) + return 0; + + if (!dm_task_set_name (dmt, mpp->alias)) + goto addout; + + if (!dm_task_add_target (dmt, 0, mpp->size, target, params)) + goto addout; + + if (ro) + dm_task_set_ro(dmt); + + if (task == DM_DEVICE_CREATE) { + if (strlen(mpp->wwid) > 0) { + prefixed_uuid = MALLOC(UUID_PREFIX_LEN + + strlen(mpp->wwid) + 1); + if (!prefixed_uuid) { + condlog(0, "cannot create prefixed uuid : %s", + strerror(errno)); + goto addout; + } + sprintf(prefixed_uuid, UUID_PREFIX "%s", mpp->wwid); + if (!dm_task_set_uuid(dmt, prefixed_uuid)) + goto freeout; + } + dm_task_skip_lockfs(dmt); +#ifdef LIBDM_API_FLUSH + dm_task_no_flush(dmt); +#endif + } + + if (mpp->attribute_flags & (1 << ATTR_MODE) && + !dm_task_set_mode(dmt, mpp->mode)) + goto freeout; + if (mpp->attribute_flags & (1 << ATTR_UID) && + !dm_task_set_uid(dmt, mpp->uid)) + goto freeout; + if (mpp->attribute_flags & (1 << ATTR_GID) && + !dm_task_set_gid(dmt, mpp->gid)) + goto freeout; + condlog(4, "%s: %s [0 %llu %s %s]", mpp->alias, + task == DM_DEVICE_RELOAD ? "reload" : "addmap", mpp->size, + target, params); + + dm_task_no_open_count(dmt); + + if (task == DM_DEVICE_CREATE && + !dm_task_set_cookie(dmt, &cookie, udev_flags)) + goto freeout; + + r = dm_task_run (dmt); + + if (task == DM_DEVICE_CREATE) + dm_udev_wait(cookie); +freeout: + if (prefixed_uuid) + FREE(prefixed_uuid); + +addout: + dm_task_destroy (dmt); + + return r; +} + +static uint16_t build_udev_flags(const struct multipath *mpp, int reload) +{ + /* DM_UDEV_DISABLE_LIBRARY_FALLBACK is added in dm_addmap */ + return (mpp->skip_kpartx == SKIP_KPARTX_ON ? + MPATH_UDEV_NO_KPARTX_FLAG : 0) | + ((count_active_paths(mpp) == 0 || mpp->ghost_delay_tick > 0) ? + MPATH_UDEV_NO_PATHS_FLAG : 0) | + (reload && !mpp->force_udev_reload ? + MPATH_UDEV_RELOAD_FLAG : 0); +} + +int dm_addmap_create (struct multipath *mpp, char * params) +{ + int ro; + uint16_t udev_flags = build_udev_flags(mpp, 0); + + for (ro = 0; ro <= 1; ro++) { + int err; + + if (dm_addmap(DM_DEVICE_CREATE, TGT_MPATH, mpp, params, ro, + udev_flags)) { + if (unmark_failed_wwid(mpp->wwid) == + WWID_FAILED_CHANGED) + mpp->needs_paths_uevent = 1; + return 1; + } + /* + * DM_DEVICE_CREATE is actually DM_DEV_CREATE + DM_TABLE_LOAD. + * Failing the second part leaves an empty map. Clean it up. + */ + err = errno; + if (dm_map_present(mpp->alias)) { + condlog(3, "%s: failed to load map (a path might be in use)", mpp->alias); + dm_flush_map_nosync(mpp->alias); + } + if (err != EROFS) { + condlog(3, "%s: failed to load map, error %d", + mpp->alias, err); + break; + } + } + if (mark_failed_wwid(mpp->wwid) == WWID_FAILED_CHANGED) + mpp->needs_paths_uevent = 1; + return 0; +} + +#define ADDMAP_RW 0 +#define ADDMAP_RO 1 + +int dm_addmap_reload(struct multipath *mpp, char *params, int flush) +{ + int r = 0; + uint16_t udev_flags = build_udev_flags(mpp, 1); + + /* + * DM_DEVICE_RELOAD cannot wait on a cookie, as + * the cookie will only ever be released after an + * DM_DEVICE_RESUME. So call DM_DEVICE_RESUME + * after each successful call to DM_DEVICE_RELOAD. + */ + if (!mpp->force_readonly) + r = dm_addmap(DM_DEVICE_RELOAD, TGT_MPATH, mpp, params, + ADDMAP_RW, 0); + if (!r) { + if (!mpp->force_readonly && errno != EROFS) + return 0; + r = dm_addmap(DM_DEVICE_RELOAD, TGT_MPATH, mpp, + params, ADDMAP_RO, 0); + } + if (r) + r = dm_simplecmd(DM_DEVICE_RESUME, mpp->alias, !flush, + 1, udev_flags, 0); + if (r) + return r; + + /* If the resume failed, dm will leave the device suspended, and + * drop the new table, so doing a second resume will try using + * the original table */ + if (dm_is_suspended(mpp->alias)) + dm_simplecmd(DM_DEVICE_RESUME, mpp->alias, !flush, 1, + udev_flags, 0); + return 0; +} + +static int +do_get_info(const char *name, struct dm_info *info) +{ + int r = -1; + struct dm_task *dmt; + + if (!(dmt = libmp_dm_task_create(DM_DEVICE_INFO))) + return r; + + if (!dm_task_set_name(dmt, name)) + goto out; + + dm_task_no_open_count(dmt); + + if (!dm_task_run(dmt)) + goto out; + + if (!dm_task_get_info(dmt, info)) + goto out; + + if (!info->exists) + goto out; + + r = 0; +out: + dm_task_destroy(dmt); + return r; +} + +int dm_map_present(const char * str) +{ + struct dm_info info; + + return (do_get_info(str, &info) == 0); +} + +int dm_get_map(const char *name, unsigned long long *size, char *outparams) +{ + int r = 1; + struct dm_task *dmt; + uint64_t start, length; + char *target_type = NULL; + char *params = NULL; + + if (!(dmt = libmp_dm_task_create(DM_DEVICE_TABLE))) + return 1; + + if (!dm_task_set_name(dmt, name)) + goto out; + + dm_task_no_open_count(dmt); + + if (!dm_task_run(dmt)) + goto out; + + /* Fetch 1st target */ + dm_get_next_target(dmt, NULL, &start, &length, + &target_type, ¶ms); + + if (size) + *size = length; + + if (!outparams) { + r = 0; + goto out; + } + if (snprintf(outparams, PARAMS_SIZE, "%s", params) <= PARAMS_SIZE) + r = 0; +out: + dm_task_destroy(dmt); + return r; +} + +static int +dm_get_prefixed_uuid(const char *name, char *uuid, int uuid_len) +{ + struct dm_task *dmt; + const char *uuidtmp; + int r = 1; + + dmt = libmp_dm_task_create(DM_DEVICE_INFO); + if (!dmt) + return 1; + + if (!dm_task_set_name (dmt, name)) + goto uuidout; + + if (!dm_task_run(dmt)) + goto uuidout; + + uuidtmp = dm_task_get_uuid(dmt); + if (uuidtmp) + strlcpy(uuid, uuidtmp, uuid_len); + else + uuid[0] = '\0'; + + r = 0; +uuidout: + dm_task_destroy(dmt); + return r; +} + +int dm_get_uuid(const char *name, char *uuid, int uuid_len) +{ + char tmp[DM_UUID_LEN]; + + if (dm_get_prefixed_uuid(name, tmp, sizeof(tmp))) + return 1; + + if (!strncmp(tmp, UUID_PREFIX, UUID_PREFIX_LEN)) + strlcpy(uuid, tmp + UUID_PREFIX_LEN, uuid_len); + else + uuid[0] = '\0'; + + return 0; +} + +static int +is_mpath_part(const char *part_name, const char *map_name) +{ + char *p; + char part_uuid[DM_UUID_LEN], map_uuid[DM_UUID_LEN]; + + if (dm_get_prefixed_uuid(part_name, part_uuid, sizeof(part_uuid))) + return 0; + + if (dm_get_prefixed_uuid(map_name, map_uuid, sizeof(map_uuid))) + return 0; + + if (strncmp(part_uuid, "part", 4) != 0) + return 0; + + p = strstr(part_uuid, UUID_PREFIX); + if (p && !strcmp(p, map_uuid)) + return 1; + + return 0; +} + +int dm_get_status(const char *name, char *outstatus) +{ + int r = 1; + struct dm_task *dmt; + uint64_t start, length; + char *target_type = NULL; + char *status = NULL; + + if (!(dmt = libmp_dm_task_create(DM_DEVICE_STATUS))) + return 1; + + if (!dm_task_set_name(dmt, name)) + goto out; + + dm_task_no_open_count(dmt); + + if (!dm_task_run(dmt)) + goto out; + + /* Fetch 1st target */ + dm_get_next_target(dmt, NULL, &start, &length, + &target_type, &status); + if (!status) { + condlog(2, "get null status."); + goto out; + } + + if (snprintf(outstatus, PARAMS_SIZE, "%s", status) <= PARAMS_SIZE) + r = 0; +out: + if (r) + condlog(0, "%s: error getting map status string", name); + + dm_task_destroy(dmt); + return r; +} + +/* + * returns: + * 1 : match + * 0 : no match + * -1 : empty map, or more than 1 target + */ +int dm_type(const char *name, char *type) +{ + int r = 0; + struct dm_task *dmt; + uint64_t start, length; + char *target_type = NULL; + char *params; + + if (!(dmt = libmp_dm_task_create(DM_DEVICE_TABLE))) + return 0; + + if (!dm_task_set_name(dmt, name)) + goto out; + + dm_task_no_open_count(dmt); + + if (!dm_task_run(dmt)) + goto out; + + /* Fetch 1st target */ + if (dm_get_next_target(dmt, NULL, &start, &length, + &target_type, ¶ms) != NULL) + /* multiple targets */ + r = -1; + else if (!target_type) + r = -1; + else if (!strcmp(target_type, type)) + r = 1; + +out: + dm_task_destroy(dmt); + return r; +} + +/* + * returns: + * 1 : is multipath device + * 0 : is not multipath device + * -1 : error + */ +int dm_is_mpath(const char *name) +{ + int r = -1; + struct dm_task *dmt; + struct dm_info info; + uint64_t start, length; + char *target_type = NULL; + char *params; + const char *uuid; + + if (!(dmt = libmp_dm_task_create(DM_DEVICE_TABLE))) + goto out; + + if (!dm_task_set_name(dmt, name)) + goto out_task; + + dm_task_no_open_count(dmt); + + if (!dm_task_run(dmt)) + goto out_task; + + if (!dm_task_get_info(dmt, &info)) + goto out_task; + + r = 0; + + if (!info.exists) + goto out_task; + + uuid = dm_task_get_uuid(dmt); + + if (!uuid || strncmp(uuid, UUID_PREFIX, UUID_PREFIX_LEN) != 0) + goto out_task; + + /* Fetch 1st target */ + if (dm_get_next_target(dmt, NULL, &start, &length, &target_type, + ¶ms) != NULL) + /* multiple targets */ + goto out_task; + + if (!target_type || strcmp(target_type, TGT_MPATH) != 0) + goto out_task; + + r = 1; +out_task: + dm_task_destroy(dmt); +out: + if (r < 0) + condlog(3, "%s: dm command failed in %s: %s", name, __FUNCTION__, strerror(errno)); + return r; +} + +static int +dm_dev_t (const char * mapname, char * dev_t, int len) +{ + struct dm_info info; + + if (do_get_info(mapname, &info) != 0) + return 1; + + if (snprintf(dev_t, len, "%i:%i", info.major, info.minor) > len) + return 1; + + return 0; +} + +int +dm_get_opencount (const char * mapname) +{ + int r = -1; + struct dm_task *dmt; + struct dm_info info; + + if (!(dmt = libmp_dm_task_create(DM_DEVICE_INFO))) + return 0; + + if (!dm_task_set_name(dmt, mapname)) + goto out; + + if (!dm_task_run(dmt)) + goto out; + + if (!dm_task_get_info(dmt, &info)) + goto out; + + if (!info.exists) + goto out; + + r = info.open_count; +out: + dm_task_destroy(dmt); + return r; +} + +int +dm_get_major_minor(const char *name, int *major, int *minor) +{ + struct dm_info info; + + if (do_get_info(name, &info) != 0) + return -1; + + *major = info.major; + *minor = info.minor; + return 0; +} + +static int +has_partmap(const char *name __attribute__((unused)), + void *data __attribute__((unused))) +{ + return 1; +} + +static int +partmap_in_use(const char *name, void *data) +{ + int part_count, *ret_count = (int *)data; + int open_count = dm_get_opencount(name); + + if (ret_count) + (*ret_count)++; + part_count = 0; + if (open_count) { + if (do_foreach_partmaps(name, partmap_in_use, &part_count)) + return 1; + if (open_count != part_count) { + condlog(2, "%s: map in use", name); + return 1; + } + } + return 0; +} + +int _dm_flush_map (const char * mapname, int need_sync, int deferred_remove, + int need_suspend, int retries) +{ + int r; + int queue_if_no_path = 0; + int udev_flags = 0; + unsigned long long mapsize; + char params[PARAMS_SIZE] = {0}; + + if (dm_is_mpath(mapname) != 1) + return 0; /* nothing to do */ + + /* if the device currently has no partitions, do not + run kpartx on it if you fail to delete it */ + if (do_foreach_partmaps(mapname, has_partmap, NULL) == 0) + udev_flags |= MPATH_UDEV_NO_KPARTX_FLAG; + + /* If you aren't doing a deferred remove, make sure that no + * devices are in use */ + if (!do_deferred(deferred_remove) && partmap_in_use(mapname, NULL)) + return 1; + + if (need_suspend && + !dm_get_map(mapname, &mapsize, params) && + strstr(params, "queue_if_no_path")) { + if (!dm_queue_if_no_path(mapname, 0)) + queue_if_no_path = 1; + else + /* Leave queue_if_no_path alone if unset failed */ + queue_if_no_path = -1; + } + + if (dm_remove_partmaps(mapname, need_sync, deferred_remove)) + return 1; + + if (!do_deferred(deferred_remove) && dm_get_opencount(mapname)) { + condlog(2, "%s: map in use", mapname); + return 1; + } + + do { + if (need_suspend && queue_if_no_path != -1) + dm_simplecmd_flush(DM_DEVICE_SUSPEND, mapname, 0); + + r = dm_device_remove(mapname, need_sync, deferred_remove); + + if (r) { + if (do_deferred(deferred_remove) + && dm_map_present(mapname)) { + condlog(4, "multipath map %s remove deferred", + mapname); + return 2; + } + condlog(4, "multipath map %s removed", mapname); + return 0; + } else { + condlog(2, "failed to remove multipath map %s", + mapname); + if (need_suspend && queue_if_no_path != -1) { + dm_simplecmd_noflush(DM_DEVICE_RESUME, + mapname, udev_flags); + } + } + if (retries) + sleep(1); + } while (retries-- > 0); + + if (queue_if_no_path == 1) + dm_queue_if_no_path(mapname, 1); + + return 1; +} + +#ifdef LIBDM_API_DEFERRED + +int +dm_flush_map_nopaths(const char * mapname, int deferred_remove) +{ + return _dm_flush_map(mapname, 1, deferred_remove, 0, 0); +} + +#else + +int +dm_flush_map_nopaths(const char * mapname, int deferred_remove) +{ + return _dm_flush_map(mapname, 1, 0, 0, 0); +} + +#endif + +int dm_flush_maps (int retries) +{ + int r = 0; + struct dm_task *dmt; + struct dm_names *names; + unsigned next = 0; + + if (!(dmt = libmp_dm_task_create (DM_DEVICE_LIST))) + return 0; + + dm_task_no_open_count(dmt); + + if (!dm_task_run (dmt)) + goto out; + + if (!(names = dm_task_get_names (dmt))) + goto out; + + if (!names->dev) + goto out; + + do { + r |= dm_suspend_and_flush_map(names->name, retries); + next = names->next; + names = (void *) names + next; + } while (next); + +out: + dm_task_destroy (dmt); + return r; +} + +int +dm_message(const char * mapname, char * message) +{ + int r = 1; + struct dm_task *dmt; + + if (!(dmt = libmp_dm_task_create(DM_DEVICE_TARGET_MSG))) + return 1; + + if (!dm_task_set_name(dmt, mapname)) + goto out; + + if (!dm_task_set_sector(dmt, 0)) + goto out; + + if (!dm_task_set_message(dmt, message)) + goto out; + + dm_task_no_open_count(dmt); + + if (!dm_task_run(dmt)) + goto out; + + r = 0; +out: + if (r) + condlog(0, "DM message failed [%s]", message); + + dm_task_destroy(dmt); + return r; +} + +int +dm_fail_path(const char * mapname, char * path) +{ + char message[32]; + + if (snprintf(message, 32, "fail_path %s", path) > 32) + return 1; + + return dm_message(mapname, message); +} + +int +dm_reinstate_path(const char * mapname, char * path) +{ + char message[32]; + + if (snprintf(message, 32, "reinstate_path %s", path) > 32) + return 1; + + return dm_message(mapname, message); +} + +int +dm_queue_if_no_path(const char *mapname, int enable) +{ + char *message; + + if (enable) + message = "queue_if_no_path"; + else + message = "fail_if_no_path"; + + return dm_message(mapname, message); +} + +static int +dm_groupmsg (const char * msg, const char * mapname, int index) +{ + char message[32]; + + if (snprintf(message, 32, "%s_group %i", msg, index) > 32) + return 1; + + return dm_message(mapname, message); +} + +int +dm_switchgroup(const char * mapname, int index) +{ + return dm_groupmsg("switch", mapname, index); +} + +int +dm_enablegroup(const char * mapname, int index) +{ + return dm_groupmsg("enable", mapname, index); +} + +int +dm_disablegroup(const char * mapname, int index) +{ + return dm_groupmsg("disable", mapname, index); +} + +struct multipath *dm_get_multipath(const char *name) +{ + struct multipath *mpp = NULL; + + mpp = alloc_multipath(); + if (!mpp) + return NULL; + + mpp->alias = STRDUP(name); + + if (!mpp->alias) + goto out; + + if (dm_get_map(name, &mpp->size, NULL)) + goto out; + + dm_get_uuid(name, mpp->wwid, WWID_SIZE); + dm_get_info(name, &mpp->dmi); + + return mpp; +out: + free_multipath(mpp, KEEP_PATHS); + return NULL; +} + +int +dm_get_maps (vector mp) +{ + struct multipath * mpp; + int r = 1; + struct dm_task *dmt; + struct dm_names *names; + unsigned next = 0; + + if (!mp) + return 1; + + if (!(dmt = libmp_dm_task_create(DM_DEVICE_LIST))) + return 1; + + dm_task_no_open_count(dmt); + + if (!dm_task_run(dmt)) + goto out; + + if (!(names = dm_task_get_names(dmt))) + goto out; + + if (!names->dev) { + r = 0; /* this is perfectly valid */ + goto out; + } + + do { + if (dm_is_mpath(names->name) != 1) + goto next; + + mpp = dm_get_multipath(names->name); + if (!mpp) + goto out; + + if (!vector_alloc_slot(mp)) + goto out; + + vector_set_slot(mp, mpp); + mpp = NULL; +next: + next = names->next; + names = (void *) names + next; + } while (next); + + r = 0; + goto out; +out: + dm_task_destroy (dmt); + return r; +} + +int +dm_geteventnr (const char *name) +{ + struct dm_info info; + + if (do_get_info(name, &info) != 0) + return -1; + + return info.event_nr; +} + +int +dm_is_suspended(const char *name) +{ + struct dm_info info; + + if (do_get_info(name, &info) != 0) + return -1; + + return info.suspended; +} + +char * +dm_mapname(int major, int minor) +{ + char * response = NULL; + const char *map; + struct dm_task *dmt; + int r; + int loop = MAX_WAIT * LOOPS_PER_SEC; + + if (!(dmt = libmp_dm_task_create(DM_DEVICE_STATUS))) + return NULL; + + if (!dm_task_set_major(dmt, major) || + !dm_task_set_minor(dmt, minor)) + goto bad; + + dm_task_no_open_count(dmt); + + /* + * device map might not be ready when we get here from + * daemon uev_trigger -> uev_add_map + */ + while (--loop) { + r = dm_task_run(dmt); + + if (r) + break; + + usleep(1000 * 1000 / LOOPS_PER_SEC); + } + + if (!r) { + condlog(0, "%i:%i: timeout fetching map name", major, minor); + goto bad; + } + + map = dm_task_get_name(dmt); + if (map && strlen(map)) + response = STRDUP((const char *)dm_task_get_name(dmt)); + + dm_task_destroy(dmt); + return response; +bad: + dm_task_destroy(dmt); + condlog(0, "%i:%i: error fetching map name", major, minor); + return NULL; +} + +static int +do_foreach_partmaps (const char * mapname, + int (*partmap_func)(const char *, void *), + void *data) +{ + struct dm_task *dmt; + struct dm_names *names; + unsigned next = 0; + char params[PARAMS_SIZE]; + unsigned long long size; + char dev_t[32]; + int r = 1; + char *p; + + if (!(dmt = libmp_dm_task_create(DM_DEVICE_LIST))) + return 1; + + dm_task_no_open_count(dmt); + + if (!dm_task_run(dmt)) + goto out; + + if (!(names = dm_task_get_names(dmt))) + goto out; + + if (!names->dev) { + r = 0; /* this is perfectly valid */ + goto out; + } + + if (dm_dev_t(mapname, &dev_t[0], 32)) + goto out; + + do { + if ( + /* + * if there is only a single "linear" target + */ + (dm_type(names->name, TGT_PART) == 1) && + + /* + * and the uuid of the target is a partition of the + * uuid of the multipath device + */ + is_mpath_part(names->name, mapname) && + + /* + * and we can fetch the map table from the kernel + */ + !dm_get_map(names->name, &size, ¶ms[0]) && + + /* + * and the table maps over the multipath map + */ + (p = strstr(params, dev_t)) && + !isdigit(*(p + strlen(dev_t))) + ) { + if (partmap_func(names->name, data) != 0) + goto out; + } + + next = names->next; + names = (void *) names + next; + } while (next); + + r = 0; +out: + dm_task_destroy (dmt); + return r; +} + +struct remove_data { + int need_sync; + int deferred_remove; +}; + +static int +remove_partmap(const char *name, void *data) +{ + struct remove_data *rd = (struct remove_data *)data; + + if (dm_get_opencount(name)) { + dm_remove_partmaps(name, rd->need_sync, rd->deferred_remove); + if (!do_deferred(rd->deferred_remove) && + dm_get_opencount(name)) { + condlog(2, "%s: map in use", name); + return 1; + } + } + condlog(4, "partition map %s removed", name); + dm_device_remove(name, rd->need_sync, rd->deferred_remove); + return 0; +} + +int +dm_remove_partmaps (const char * mapname, int need_sync, int deferred_remove) +{ + struct remove_data rd = { need_sync, deferred_remove }; + return do_foreach_partmaps(mapname, remove_partmap, &rd); +} + +#ifdef LIBDM_API_DEFERRED + +static int +cancel_remove_partmap (const char *name, void *unused __attribute__((unused))) +{ + if (dm_get_opencount(name)) + dm_cancel_remove_partmaps(name); + if (dm_message(name, "@cancel_deferred_remove") != 0) + condlog(0, "%s: can't cancel deferred remove: %s", name, + strerror(errno)); + return 0; +} + +static int +dm_get_deferred_remove (const char * mapname) +{ + struct dm_info info; + + if (do_get_info(mapname, &info) != 0) + return -1; + + return info.deferred_remove; +} + +static int +dm_cancel_remove_partmaps(const char * mapname) { + return do_foreach_partmaps(mapname, cancel_remove_partmap, NULL); +} + +int +dm_cancel_deferred_remove (struct multipath *mpp) +{ + int r = 0; + + if (!dm_get_deferred_remove(mpp->alias)) + return 0; + if (mpp->deferred_remove == DEFERRED_REMOVE_IN_PROGRESS) + mpp->deferred_remove = DEFERRED_REMOVE_ON; + + dm_cancel_remove_partmaps(mpp->alias); + r = dm_message(mpp->alias, "@cancel_deferred_remove"); + if (r) + condlog(0, "%s: can't cancel deferred remove: %s", mpp->alias, + strerror(errno)); + else + condlog(2, "%s: canceled deferred remove", mpp->alias); + return r; +} + +#else + +int +dm_cancel_deferred_remove (struct multipath *mpp) +{ + return 0; +} + +#endif + +static struct dm_info * +alloc_dminfo (void) +{ + return MALLOC(sizeof(struct dm_info)); +} + +int +dm_get_info (const char * mapname, struct dm_info ** dmi) +{ + if (!mapname) + return 1; + + if (!*dmi) + *dmi = alloc_dminfo(); + + if (!*dmi) + return 1; + + if (do_get_info(mapname, *dmi) != 0) { + memset(*dmi, 0, sizeof(struct dm_info)); + FREE(*dmi); + *dmi = NULL; + return 1; + } + return 0; +} + +struct rename_data { + const char *old; + char *new; + char *delim; +}; + +static int +rename_partmap (const char *name, void *data) +{ + char buff[PARAMS_SIZE]; + int offset; + struct rename_data *rd = (struct rename_data *)data; + + if (strncmp(name, rd->old, strlen(rd->old)) != 0) + return 0; + for (offset = strlen(rd->old); name[offset] && !(isdigit(name[offset])); offset++); /* do nothing */ + snprintf(buff, PARAMS_SIZE, "%s%s%s", rd->new, rd->delim, + name + offset); + dm_rename(name, buff, rd->delim, SKIP_KPARTX_OFF); + condlog(4, "partition map %s renamed", name); + return 0; +} + +int +dm_rename_partmaps (const char * old, char * new, char *delim) +{ + struct rename_data rd; + + rd.old = old; + rd.new = new; + + if (delim) + rd.delim = delim; + else { + if (isdigit(new[strlen(new)-1])) + rd.delim = "p"; + else + rd.delim = ""; + } + return do_foreach_partmaps(old, rename_partmap, &rd); +} + +int +dm_rename (const char * old, char * new, char *delim, int skip_kpartx) +{ + int r = 0; + struct dm_task *dmt; + uint32_t cookie = 0; + uint16_t udev_flags = DM_UDEV_DISABLE_LIBRARY_FALLBACK | ((skip_kpartx == SKIP_KPARTX_ON)? MPATH_UDEV_NO_KPARTX_FLAG : 0); + + if (dm_rename_partmaps(old, new, delim)) + return r; + + if (!(dmt = libmp_dm_task_create(DM_DEVICE_RENAME))) + return r; + + if (!dm_task_set_name(dmt, old)) + goto out; + + if (!dm_task_set_newname(dmt, new)) + goto out; + + dm_task_no_open_count(dmt); + + if (!dm_task_set_cookie(dmt, &cookie, udev_flags)) + goto out; + r = dm_task_run(dmt); + + dm_udev_wait(cookie); + +out: + dm_task_destroy(dmt); + + return r; +} + +void dm_reassign_deps(char *table, const char *dep, const char *newdep) +{ + char *n, *newtable; + const char *p; + + newtable = strdup(table); + if (!newtable) + return; + p = strstr(newtable, dep); + n = table + (p - newtable); + strcpy(n, newdep); + n += strlen(newdep); + p += strlen(dep); + strcat(n, p); + FREE(newtable); +} + +int dm_reassign_table(const char *name, char *old, char *new) +{ + int r = 0, modified = 0; + uint64_t start, length; + struct dm_task *dmt, *reload_dmt; + char *target, *params = NULL; + char *buff; + void *next = NULL; + + if (!(dmt = libmp_dm_task_create(DM_DEVICE_TABLE))) + return 0; + + if (!dm_task_set_name(dmt, name)) + goto out; + + dm_task_no_open_count(dmt); + + if (!dm_task_run(dmt)) + goto out; + if (!(reload_dmt = libmp_dm_task_create(DM_DEVICE_RELOAD))) + goto out; + if (!dm_task_set_name(reload_dmt, name)) + goto out_reload; + + do { + next = dm_get_next_target(dmt, next, &start, &length, + &target, ¶ms); + buff = strdup(params); + if (!buff) { + condlog(3, "%s: failed to replace target %s, " + "out of memory", name, target); + goto out_reload; + } + if (strcmp(target, TGT_MPATH) && strstr(params, old)) { + condlog(3, "%s: replace target %s %s", + name, target, buff); + dm_reassign_deps(buff, old, new); + condlog(3, "%s: with target %s %s", + name, target, buff); + modified++; + } + dm_task_add_target(reload_dmt, start, length, target, buff); + free(buff); + } while (next); + + if (modified) { + dm_task_no_open_count(reload_dmt); + + if (!dm_task_run(reload_dmt)) { + condlog(3, "%s: failed to reassign targets", name); + goto out_reload; + } + dm_simplecmd_noflush(DM_DEVICE_RESUME, name, + MPATH_UDEV_RELOAD_FLAG); + } + r = 1; + +out_reload: + dm_task_destroy(reload_dmt); +out: + dm_task_destroy(dmt); + return r; +} + + +/* + * Reassign existing device-mapper table(s) to not use + * the block devices but point to the multipathed + * device instead + */ +int dm_reassign(const char *mapname) +{ + struct dm_deps *deps; + struct dm_task *dmt; + struct dm_info info; + char dev_t[32], dm_dep[32]; + int r = 0; + unsigned int i; + + if (dm_dev_t(mapname, &dev_t[0], 32)) { + condlog(3, "%s: failed to get device number", mapname); + return 1; + } + + if (!(dmt = libmp_dm_task_create(DM_DEVICE_DEPS))) { + condlog(3, "%s: couldn't make dm task", mapname); + return 0; + } + + if (!dm_task_set_name(dmt, mapname)) + goto out; + + dm_task_no_open_count(dmt); + + if (!dm_task_run(dmt)) + goto out; + + if (!dm_task_get_info(dmt, &info)) + goto out; + + if (!(deps = dm_task_get_deps(dmt))) + goto out; + + if (!info.exists) + goto out; + + for (i = 0; i < deps->count; i++) { + sprintf(dm_dep, "%d:%d", + major(deps->device[i]), + minor(deps->device[i])); + sysfs_check_holders(dm_dep, dev_t); + } + + r = 1; +out: + dm_task_destroy (dmt); + return r; +} + +int dm_setgeometry(struct multipath *mpp) +{ + struct dm_task *dmt; + struct path *pp; + char heads[4], sectors[4]; + char cylinders[10], start[32]; + int r = 0; + + if (!mpp) + return 1; + + pp = first_path(mpp); + if (!pp) { + condlog(3, "%s: no path for geometry", mpp->alias); + return 1; + } + if (pp->geom.cylinders == 0 || + pp->geom.heads == 0 || + pp->geom.sectors == 0) { + condlog(3, "%s: invalid geometry on %s", mpp->alias, pp->dev); + return 1; + } + + if (!(dmt = libmp_dm_task_create(DM_DEVICE_SET_GEOMETRY))) + return 0; + + if (!dm_task_set_name(dmt, mpp->alias)) + goto out; + + dm_task_no_open_count(dmt); + + /* What a sick interface ... */ + snprintf(heads, 4, "%u", pp->geom.heads); + snprintf(sectors, 4, "%u", pp->geom.sectors); + snprintf(cylinders, 10, "%u", pp->geom.cylinders); + snprintf(start, 32, "%lu", pp->geom.start); + if (!dm_task_set_geometry(dmt, cylinders, heads, sectors, start)) { + condlog(3, "%s: Failed to set geometry", mpp->alias); + goto out; + } + + r = dm_task_run(dmt); +out: + dm_task_destroy(dmt); + + return r; +} diff --git a/libmultipath/devmapper.h b/libmultipath/devmapper.h new file mode 100644 index 0000000..7557a86 --- /dev/null +++ b/libmultipath/devmapper.h @@ -0,0 +1,80 @@ +#ifndef _DEVMAPPER_H +#define _DEVMAPPER_H + +#include "structs.h" + +#define TGT_MPATH "multipath" +#define TGT_PART "linear" + +#ifdef DM_SUBSYSTEM_UDEV_FLAG0 +#define MPATH_UDEV_RELOAD_FLAG DM_SUBSYSTEM_UDEV_FLAG0 +#else +#define MPATH_UDEV_RELOAD_FLAG 0 +#endif + +#ifdef DM_SUBSYSTEM_UDEV_FLAG1 +#define MPATH_UDEV_NO_KPARTX_FLAG DM_SUBSYSTEM_UDEV_FLAG1 +#else +#define MPATH_UDEV_NO_KPARTX_FLAG 0 +#endif + +#ifdef DM_SUBSYSTEM_UDEV_FLAG2 +#define MPATH_UDEV_NO_PATHS_FLAG DM_SUBSYSTEM_UDEV_FLAG2 +#else +#define MPATH_UDEV_NO_PATHS_FLAG 0 +#endif + +#define UUID_PREFIX "mpath-" +#define UUID_PREFIX_LEN (sizeof(UUID_PREFIX) - 1) + +void dm_init(int verbosity); +void libmp_dm_init(void); +void libmp_udev_set_sync_support(int on); +struct dm_task *libmp_dm_task_create(int task); +int dm_drv_version (unsigned int * version); +int dm_tgt_version (unsigned int * version, char * str); +int dm_simplecmd_flush (int, const char *, uint16_t); +int dm_simplecmd_noflush (int, const char *, uint16_t); +int dm_addmap_create (struct multipath *mpp, char *params); +int dm_addmap_reload (struct multipath *mpp, char *params, int flush); +int dm_map_present (const char *); +int dm_get_map(const char *, unsigned long long *, char *); +int dm_get_status(const char *, char *); +int dm_type(const char *, char *); +int dm_is_mpath(const char *); +int _dm_flush_map (const char *, int, int, int, int); +int dm_flush_map_nopaths(const char * mapname, int deferred_remove); +#define dm_flush_map(mapname) _dm_flush_map(mapname, 1, 0, 0, 0) +#define dm_flush_map_nosync(mapname) _dm_flush_map(mapname, 0, 0, 0, 0) +#define dm_suspend_and_flush_map(mapname, retries) \ + _dm_flush_map(mapname, 1, 0, 1, retries) +int dm_cancel_deferred_remove(struct multipath *mpp); +int dm_flush_maps (int retries); +int dm_fail_path(const char * mapname, char * path); +int dm_reinstate_path(const char * mapname, char * path); +int dm_queue_if_no_path(const char *mapname, int enable); +int dm_switchgroup(const char * mapname, int index); +int dm_enablegroup(const char * mapname, int index); +int dm_disablegroup(const char * mapname, int index); +int dm_get_maps (vector mp); +int dm_geteventnr (const char *name); +int dm_is_suspended(const char *name); +int dm_get_major_minor (const char *name, int *major, int *minor); +char * dm_mapname(int major, int minor); +int dm_remove_partmaps (const char * mapname, int need_sync, + int deferred_remove); +int dm_get_uuid(const char *name, char *uuid, int uuid_len); +int dm_get_info (const char * mapname, struct dm_info ** dmi); +int dm_rename (const char * old, char * new, char * delim, int skip_kpartx); +int dm_reassign(const char * mapname); +int dm_reassign_table(const char *name, char *old, char *new); +int dm_setgeometry(struct multipath *mpp); +struct multipath *dm_get_multipath(const char *name); + +#define VERSION_GE(v, minv) ( \ + (v[0] > minv[0]) || \ + ((v[0] == minv[0]) && (v[1] > minv[1])) || \ + ((v[0] == minv[0]) && (v[1] == minv[1]) && (v[2] >= minv[2])) \ +) + +#endif /* _DEVMAPPER_H */ diff --git a/libmultipath/dict.c b/libmultipath/dict.c new file mode 100644 index 0000000..3e25e74 --- /dev/null +++ b/libmultipath/dict.c @@ -0,0 +1,1966 @@ +/* + * Based on Alexandre Cassen template for keepalived + * Copyright (c) 2004, 2005, 2006 Christophe Varoqui + * Copyright (c) 2005 Benjamin Marzinski, Redhat + * Copyright (c) 2005 Kiyoshi Ueda, NEC + */ +#include +#include +#include +#include "checkers.h" +#include "vector.h" +#include "hwtable.h" +#include "structs.h" +#include "parser.h" +#include "config.h" +#include "debug.h" +#include "memory.h" +#include "pgpolicies.h" +#include "blacklist.h" +#include "defaults.h" +#include "prio.h" +#include "util.h" +#include +#include +#include +#include +#include "mpath_cmd.h" +#include "dict.h" + +static int +set_int(vector strvec, void *ptr) +{ + int *int_ptr = (int *)ptr; + char *buff, *eptr; + long res; + int rc; + + buff = set_value(strvec); + if (!buff) + return 1; + + res = strtol(buff, &eptr, 10); + if (eptr > buff) + while (isspace(*eptr)) + eptr++; + if (*buff == '\0' || *eptr != '\0' || res > INT_MAX || res < INT_MIN) { + condlog(1, "%s: invalid value for %s: \"%s\"", + __func__, (char*)VECTOR_SLOT(strvec, 0), buff); + rc = 1; + } else { + rc = 0; + *int_ptr = res; + } + + FREE(buff); + return rc; +} + +static int +set_uint(vector strvec, void *ptr) +{ + unsigned int *uint_ptr = (unsigned int *)ptr; + char *buff, *eptr; + long res; + int rc; + + buff = set_value(strvec); + if (!buff) + return 1; + + res = strtol(buff, &eptr, 10); + if (eptr > buff) + while (isspace(*eptr)) + eptr++; + if (*buff == '\0' || *eptr != '\0' || res < 0 || res > UINT_MAX) { + condlog(1, "%s: invalid value for %s: \"%s\"", + __func__, (char*)VECTOR_SLOT(strvec, 0), buff); + rc = 1; + } else { + rc = 0; + *uint_ptr = res; + } + + FREE(buff); + return rc; +} + +static int +set_str(vector strvec, void *ptr) +{ + char **str_ptr = (char **)ptr; + + if (*str_ptr) + FREE(*str_ptr); + *str_ptr = set_value(strvec); + + if (!*str_ptr) + return 1; + + return 0; +} + +static int +set_yes_no(vector strvec, void *ptr) +{ + char * buff; + int *int_ptr = (int *)ptr; + + buff = set_value(strvec); + if (!buff) + return 1; + + if (strcmp(buff, "yes") == 0 || strcmp(buff, "1") == 0) + *int_ptr = YN_YES; + else + *int_ptr = YN_NO; + + FREE(buff); + return 0; +} + +static int +set_yes_no_undef(vector strvec, void *ptr) +{ + char * buff; + int *int_ptr = (int *)ptr; + + buff = set_value(strvec); + if (!buff) + return 1; + + if (strcmp(buff, "no") == 0 || strcmp(buff, "0") == 0) + *int_ptr = YNU_NO; + else if (strcmp(buff, "yes") == 0 || strcmp(buff, "1") == 0) + *int_ptr = YNU_YES; + else + *int_ptr = YNU_UNDEF; + + FREE(buff); + return 0; +} + +static int +print_int (char *buff, int len, long v) +{ + return snprintf(buff, len, "%li", v); +} + +static int +print_nonzero (char *buff, int len, long v) +{ + if (!v) + return 0; + return snprintf(buff, len, "%li", v); +} + +static int +print_str (char *buff, int len, const char *ptr) +{ + char *p; + char *last; + const char *q; + + if (!ptr || len <= 0) + return 0; + + q = strchr(ptr, '"'); + if (q == NULL) + return snprintf(buff, len, "\"%s\"", ptr); + + last = buff + len - 1; + p = buff; + if (p >= last) + goto out; + *p++ = '"'; + if (p >= last) + goto out; + for (; q; q = strchr(ptr, '"')) { + if (q + 1 - ptr < last - p) + p = mempcpy(p, ptr, q + 1 - ptr); + else { + p = mempcpy(p, ptr, last - p); + goto out; + } + *p++ = '"'; + if (p >= last) + goto out; + ptr = q + 1; + } + p += strlcpy(p, ptr, last - p); + if (p >= last) + goto out; + *p++ = '"'; + *p = '\0'; + return p - buff; +out: + *p = '\0'; + return len; +} + +static int +print_ignored (char *buff, int len) +{ + return snprintf(buff, len, "ignored"); +} + +static int +print_yes_no (char *buff, int len, long v) +{ + return snprintf(buff, len, "\"%s\"", + (v == YN_NO)? "no" : "yes"); +} + +static int +print_yes_no_undef (char *buff, int len, long v) +{ + if (!v) + return 0; + return snprintf(buff, len, "\"%s\"", + (v == YNU_NO)? "no" : "yes"); +} + +#define declare_def_handler(option, function) \ +static int \ +def_ ## option ## _handler (struct config *conf, vector strvec) \ +{ \ + return function (strvec, &conf->option); \ +} + +#define declare_def_snprint(option, function) \ +static int \ +snprint_def_ ## option (struct config *conf, char * buff, int len, \ + const void * data) \ +{ \ + return function (buff, len, conf->option); \ +} + +#define declare_def_snprint_defint(option, function, value) \ +static int \ +snprint_def_ ## option (struct config *conf, char * buff, int len, \ + const void * data) \ +{ \ + int i = value; \ + if (!conf->option) \ + return function (buff, len, i); \ + return function (buff, len, conf->option); \ +} + +#define declare_def_snprint_defstr(option, function, value) \ +static int \ +snprint_def_ ## option (struct config *conf, char * buff, int len, \ + const void * data) \ +{ \ + static const char *s = value; \ + if (!conf->option) \ + return function (buff, len, s); \ + return function (buff, len, conf->option); \ +} + +#define declare_hw_handler(option, function) \ +static int \ +hw_ ## option ## _handler (struct config *conf, vector strvec) \ +{ \ + struct hwentry * hwe = VECTOR_LAST_SLOT(conf->hwtable); \ + if (!hwe) \ + return 1; \ + return function (strvec, &hwe->option); \ +} + +#define declare_hw_snprint(option, function) \ +static int \ +snprint_hw_ ## option (struct config *conf, char * buff, int len, \ + const void * data) \ +{ \ + const struct hwentry * hwe = (const struct hwentry *)data; \ + return function (buff, len, hwe->option); \ +} + +#define declare_ovr_handler(option, function) \ +static int \ +ovr_ ## option ## _handler (struct config *conf, vector strvec) \ +{ \ + if (!conf->overrides) \ + return 1; \ + return function (strvec, &conf->overrides->option); \ +} + +#define declare_ovr_snprint(option, function) \ +static int \ +snprint_ovr_ ## option (struct config *conf, char * buff, int len, \ + const void * data) \ +{ \ + return function (buff, len, conf->overrides->option); \ +} + +#define declare_mp_handler(option, function) \ +static int \ +mp_ ## option ## _handler (struct config *conf, vector strvec) \ +{ \ + struct mpentry * mpe = VECTOR_LAST_SLOT(conf->mptable); \ + if (!mpe) \ + return 1; \ + return function (strvec, &mpe->option); \ +} + +#define declare_mp_snprint(option, function) \ +static int \ +snprint_mp_ ## option (struct config *conf, char * buff, int len, \ + const void * data) \ +{ \ + const struct mpentry * mpe = (const struct mpentry *)data; \ + return function (buff, len, mpe->option); \ +} + +static int checkint_handler(struct config *conf, vector strvec) +{ + int rc = set_uint(strvec, &conf->checkint); + + if (rc) + return rc; + if (conf->checkint == CHECKINT_UNDEF) + conf->checkint--; + return 0; +} + +declare_def_snprint(checkint, print_int) + +declare_def_handler(max_checkint, set_uint) +declare_def_snprint(max_checkint, print_int) + +declare_def_handler(verbosity, set_int) +declare_def_snprint(verbosity, print_int) + +declare_def_handler(reassign_maps, set_yes_no) +declare_def_snprint(reassign_maps, print_yes_no) + +declare_def_handler(multipath_dir, set_str) +declare_def_snprint(multipath_dir, print_str) + +static int def_partition_delim_handler(struct config *conf, vector strvec) +{ + int rc = set_str(strvec, &conf->partition_delim); + + if (rc != 0) + return rc; + + if (!strcmp(conf->partition_delim, UNSET_PARTITION_DELIM)) { + FREE(conf->partition_delim); + conf->partition_delim = NULL; + } + return 0; +} + +static int snprint_def_partition_delim(struct config *conf, char *buff, + int len, const void *data) +{ + if (default_partition_delim == NULL || conf->partition_delim != NULL) + return print_str(buff, len, conf->partition_delim); + else + return print_str(buff, len, UNSET_PARTITION_DELIM); +} + +static const char * const find_multipaths_optvals[] = { + [FIND_MULTIPATHS_OFF] = "off", + [FIND_MULTIPATHS_ON] = "on", + [FIND_MULTIPATHS_STRICT] = "strict", + [FIND_MULTIPATHS_GREEDY] = "greedy", + [FIND_MULTIPATHS_SMART] = "smart", +}; + +static int +def_find_multipaths_handler(struct config *conf, vector strvec) +{ + char *buff; + int i; + + if (set_yes_no_undef(strvec, &conf->find_multipaths) == 0 && + conf->find_multipaths != FIND_MULTIPATHS_UNDEF) + return 0; + + buff = set_value(strvec); + if (!buff) + return 1; + + for (i = FIND_MULTIPATHS_OFF; i < __FIND_MULTIPATHS_LAST; i++) { + if (find_multipaths_optvals[i] != NULL && + !strcmp(buff, find_multipaths_optvals[i])) { + conf->find_multipaths = i; + break; + } + } + + if (conf->find_multipaths == YNU_UNDEF) { + condlog(0, "illegal value for find_multipaths: %s", buff); + conf->find_multipaths = DEFAULT_FIND_MULTIPATHS; + } + + FREE(buff); + return 0; +} + +static int +snprint_def_find_multipaths(struct config *conf, char *buff, int len, + const void *data) +{ + return print_str(buff, len, + find_multipaths_optvals[conf->find_multipaths]); +} + +declare_def_handler(selector, set_str) +declare_def_snprint_defstr(selector, print_str, DEFAULT_SELECTOR) +declare_hw_handler(selector, set_str) +declare_hw_snprint(selector, print_str) +declare_ovr_handler(selector, set_str) +declare_ovr_snprint(selector, print_str) +declare_mp_handler(selector, set_str) +declare_mp_snprint(selector, print_str) + +static int snprint_uid_attrs(struct config *conf, char *buff, int len, + const void *dummy) +{ + char *p = buff; + int n, j; + const char *att; + + vector_foreach_slot(&conf->uid_attrs, att, j) { + n = snprintf(p, len, "%s%s", j == 0 ? "" : " ", att); + if (n >= len) + return (p - buff) + n; + p += n; + len -= n; + } + return p - buff; +} + +static int uid_attrs_handler(struct config *conf, vector strvec) +{ + char *val; + + vector_reset(&conf->uid_attrs); + val = set_value(strvec); + if (!val) + return 1; + if (parse_uid_attrs(val, conf)) + condlog(1, "error parsing uid_attrs: \"%s\"", val); + condlog(3, "parsed %d uid_attrs", VECTOR_SIZE(&conf->uid_attrs)); + FREE(val); + return 0; +} + +declare_def_handler(uid_attribute, set_str) +declare_def_snprint_defstr(uid_attribute, print_str, DEFAULT_UID_ATTRIBUTE) +declare_ovr_handler(uid_attribute, set_str) +declare_ovr_snprint(uid_attribute, print_str) +declare_hw_handler(uid_attribute, set_str) +declare_hw_snprint(uid_attribute, print_str) + +declare_def_handler(getuid, set_str) +declare_def_snprint(getuid, print_str) +declare_ovr_handler(getuid, set_str) +declare_ovr_snprint(getuid, print_str) +declare_hw_handler(getuid, set_str) +declare_hw_snprint(getuid, print_str) + +declare_def_handler(prio_name, set_str) +declare_def_snprint_defstr(prio_name, print_str, DEFAULT_PRIO) +declare_ovr_handler(prio_name, set_str) +declare_ovr_snprint(prio_name, print_str) +declare_hw_handler(prio_name, set_str) +declare_hw_snprint(prio_name, print_str) +declare_mp_handler(prio_name, set_str) +declare_mp_snprint(prio_name, print_str) + +declare_def_handler(alias_prefix, set_str) +declare_def_snprint_defstr(alias_prefix, print_str, DEFAULT_ALIAS_PREFIX) +declare_ovr_handler(alias_prefix, set_str) +declare_ovr_snprint(alias_prefix, print_str) +declare_hw_handler(alias_prefix, set_str) +declare_hw_snprint(alias_prefix, print_str) + +declare_def_handler(prio_args, set_str) +declare_def_snprint_defstr(prio_args, print_str, DEFAULT_PRIO_ARGS) +declare_ovr_handler(prio_args, set_str) +declare_ovr_snprint(prio_args, print_str) +declare_hw_handler(prio_args, set_str) +declare_hw_snprint(prio_args, print_str) +declare_mp_handler(prio_args, set_str) +declare_mp_snprint(prio_args, print_str) + +declare_def_handler(features, set_str) +declare_def_snprint_defstr(features, print_str, DEFAULT_FEATURES) +declare_ovr_handler(features, set_str) +declare_ovr_snprint(features, print_str) +declare_hw_handler(features, set_str) +declare_hw_snprint(features, print_str) +declare_mp_handler(features, set_str) +declare_mp_snprint(features, print_str) + +declare_def_handler(checker_name, set_str) +declare_def_snprint_defstr(checker_name, print_str, DEFAULT_CHECKER) +declare_ovr_handler(checker_name, set_str) +declare_ovr_snprint(checker_name, print_str) +declare_hw_handler(checker_name, set_str) +declare_hw_snprint(checker_name, print_str) + +declare_def_handler(minio, set_int) +declare_def_snprint_defint(minio, print_int, DEFAULT_MINIO) +declare_ovr_handler(minio, set_int) +declare_ovr_snprint(minio, print_nonzero) +declare_hw_handler(minio, set_int) +declare_hw_snprint(minio, print_nonzero) +declare_mp_handler(minio, set_int) +declare_mp_snprint(minio, print_nonzero) + +declare_def_handler(minio_rq, set_int) +declare_def_snprint_defint(minio_rq, print_int, DEFAULT_MINIO_RQ) +declare_ovr_handler(minio_rq, set_int) +declare_ovr_snprint(minio_rq, print_nonzero) +declare_hw_handler(minio_rq, set_int) +declare_hw_snprint(minio_rq, print_nonzero) +declare_mp_handler(minio_rq, set_int) +declare_mp_snprint(minio_rq, print_nonzero) + +declare_def_handler(queue_without_daemon, set_yes_no) +static int +snprint_def_queue_without_daemon (struct config *conf, + char * buff, int len, const void * data) +{ + switch (conf->queue_without_daemon) { + case QUE_NO_DAEMON_OFF: + return snprintf(buff, len, "\"no\""); + case QUE_NO_DAEMON_ON: + return snprintf(buff, len, "\"yes\""); + case QUE_NO_DAEMON_FORCE: + return snprintf(buff, len, "\"forced\""); + } + return 0; +} + +declare_def_handler(checker_timeout, set_int) +declare_def_snprint(checker_timeout, print_nonzero) + +declare_def_handler(flush_on_last_del, set_yes_no_undef) +declare_def_snprint_defint(flush_on_last_del, print_yes_no_undef, DEFAULT_FLUSH) +declare_ovr_handler(flush_on_last_del, set_yes_no_undef) +declare_ovr_snprint(flush_on_last_del, print_yes_no_undef) +declare_hw_handler(flush_on_last_del, set_yes_no_undef) +declare_hw_snprint(flush_on_last_del, print_yes_no_undef) +declare_mp_handler(flush_on_last_del, set_yes_no_undef) +declare_mp_snprint(flush_on_last_del, print_yes_no_undef) + +declare_def_handler(user_friendly_names, set_yes_no_undef) +declare_def_snprint_defint(user_friendly_names, print_yes_no_undef, + DEFAULT_USER_FRIENDLY_NAMES) +declare_ovr_handler(user_friendly_names, set_yes_no_undef) +declare_ovr_snprint(user_friendly_names, print_yes_no_undef) +declare_hw_handler(user_friendly_names, set_yes_no_undef) +declare_hw_snprint(user_friendly_names, print_yes_no_undef) +declare_mp_handler(user_friendly_names, set_yes_no_undef) +declare_mp_snprint(user_friendly_names, print_yes_no_undef) + +declare_def_handler(bindings_file, set_str) +declare_def_snprint(bindings_file, print_str) + +declare_def_handler(wwids_file, set_str) +declare_def_snprint(wwids_file, print_str) + +declare_def_handler(prkeys_file, set_str) +declare_def_snprint(prkeys_file, print_str) + +declare_def_handler(retain_hwhandler, set_yes_no_undef) +declare_def_snprint_defint(retain_hwhandler, print_yes_no_undef, + DEFAULT_RETAIN_HWHANDLER) +declare_ovr_handler(retain_hwhandler, set_yes_no_undef) +declare_ovr_snprint(retain_hwhandler, print_yes_no_undef) +declare_hw_handler(retain_hwhandler, set_yes_no_undef) +declare_hw_snprint(retain_hwhandler, print_yes_no_undef) + +declare_def_handler(detect_prio, set_yes_no_undef) +declare_def_snprint_defint(detect_prio, print_yes_no_undef, + DEFAULT_DETECT_PRIO) +declare_ovr_handler(detect_prio, set_yes_no_undef) +declare_ovr_snprint(detect_prio, print_yes_no_undef) +declare_hw_handler(detect_prio, set_yes_no_undef) +declare_hw_snprint(detect_prio, print_yes_no_undef) + +declare_def_handler(detect_checker, set_yes_no_undef) +declare_def_snprint_defint(detect_checker, print_yes_no_undef, + DEFAULT_DETECT_CHECKER) +declare_ovr_handler(detect_checker, set_yes_no_undef) +declare_ovr_snprint(detect_checker, print_yes_no_undef) +declare_hw_handler(detect_checker, set_yes_no_undef) +declare_hw_snprint(detect_checker, print_yes_no_undef) + +declare_def_handler(force_sync, set_yes_no) +declare_def_snprint(force_sync, print_yes_no) + +declare_def_handler(deferred_remove, set_yes_no_undef) +declare_def_snprint_defint(deferred_remove, print_yes_no_undef, + DEFAULT_DEFERRED_REMOVE) +declare_ovr_handler(deferred_remove, set_yes_no_undef) +declare_ovr_snprint(deferred_remove, print_yes_no_undef) +declare_hw_handler(deferred_remove, set_yes_no_undef) +declare_hw_snprint(deferred_remove, print_yes_no_undef) +declare_mp_handler(deferred_remove, set_yes_no_undef) +declare_mp_snprint(deferred_remove, print_yes_no_undef) + +declare_def_handler(retrigger_tries, set_int) +declare_def_snprint(retrigger_tries, print_int) + +declare_def_handler(retrigger_delay, set_int) +declare_def_snprint(retrigger_delay, print_int) + +declare_def_handler(uev_wait_timeout, set_int) +declare_def_snprint(uev_wait_timeout, print_int) + +declare_def_handler(strict_timing, set_yes_no) +declare_def_snprint(strict_timing, print_yes_no) + +declare_def_handler(skip_kpartx, set_yes_no_undef) +declare_def_snprint_defint(skip_kpartx, print_yes_no_undef, + DEFAULT_SKIP_KPARTX) +declare_ovr_handler(skip_kpartx, set_yes_no_undef) +declare_ovr_snprint(skip_kpartx, print_yes_no_undef) +declare_hw_handler(skip_kpartx, set_yes_no_undef) +declare_hw_snprint(skip_kpartx, print_yes_no_undef) +declare_mp_handler(skip_kpartx, set_yes_no_undef) +declare_mp_snprint(skip_kpartx, print_yes_no_undef) +static int def_disable_changed_wwids_handler(struct config *conf, vector strvec) +{ + return 0; +} +static int snprint_def_disable_changed_wwids(struct config *conf, char *buff, + int len, const void *data) +{ + return print_ignored(buff, len); +} + +declare_def_handler(remove_retries, set_int) +declare_def_snprint(remove_retries, print_int) + +declare_def_handler(max_sectors_kb, set_int) +declare_def_snprint(max_sectors_kb, print_nonzero) +declare_ovr_handler(max_sectors_kb, set_int) +declare_ovr_snprint(max_sectors_kb, print_nonzero) +declare_hw_handler(max_sectors_kb, set_int) +declare_hw_snprint(max_sectors_kb, print_nonzero) +declare_mp_handler(max_sectors_kb, set_int) +declare_mp_snprint(max_sectors_kb, print_nonzero) + +declare_def_handler(find_multipaths_timeout, set_int) +declare_def_snprint_defint(find_multipaths_timeout, print_int, + DEFAULT_FIND_MULTIPATHS_TIMEOUT) + +declare_def_handler(enable_foreign, set_str) +declare_def_snprint_defstr(enable_foreign, print_str, + DEFAULT_ENABLE_FOREIGN) + +static int +def_config_dir_handler(struct config *conf, vector strvec) +{ + /* this is only valid in the main config file */ + if (conf->processed_main_config) + return 0; + return set_str(strvec, &conf->config_dir); +} +declare_def_snprint(config_dir, print_str) + +#define declare_def_attr_handler(option, function) \ +static int \ +def_ ## option ## _handler (struct config *conf, vector strvec) \ +{ \ + return function (strvec, &conf->option, &conf->attribute_flags);\ +} + +#define declare_def_attr_snprint(option, function) \ +static int \ +snprint_def_ ## option (struct config *conf, char * buff, int len, \ + const void * data) \ +{ \ + return function (buff, len, conf->option, \ + conf->attribute_flags); \ +} + +#define declare_mp_attr_handler(option, function) \ +static int \ +mp_ ## option ## _handler (struct config *conf, vector strvec) \ +{ \ + struct mpentry * mpe = VECTOR_LAST_SLOT(conf->mptable); \ + if (!mpe) \ + return 1; \ + return function (strvec, &mpe->option, &mpe->attribute_flags); \ +} + +#define declare_mp_attr_snprint(option, function) \ +static int \ +snprint_mp_ ## option (struct config *conf, char * buff, int len, \ + const void * data) \ +{ \ + const struct mpentry * mpe = (const struct mpentry *)data; \ + return function (buff, len, mpe->option, \ + mpe->attribute_flags); \ +} + +static int +set_mode(vector strvec, void *ptr, int *flags) +{ + mode_t mode; + mode_t *mode_ptr = (mode_t *)ptr; + char *buff; + + buff = set_value(strvec); + + if (!buff) + return 1; + + if (sscanf(buff, "%o", &mode) == 1 && mode <= 0777) { + *flags |= (1 << ATTR_MODE); + *mode_ptr = mode; + } + + FREE(buff); + return 0; +} + +static int +set_uid(vector strvec, void *ptr, int *flags) +{ + uid_t uid; + uid_t *uid_ptr = (uid_t *)ptr; + char *buff; + char passwd_buf[1024]; + struct passwd info, *found; + + buff = set_value(strvec); + if (!buff) + return 1; + if (getpwnam_r(buff, &info, passwd_buf, 1024, &found) == 0 && found) { + *flags |= (1 << ATTR_UID); + *uid_ptr = info.pw_uid; + } + else if (sscanf(buff, "%u", &uid) == 1){ + *flags |= (1 << ATTR_UID); + *uid_ptr = uid; + } + + FREE(buff); + return 0; +} + +static int +set_gid(vector strvec, void *ptr, int *flags) +{ + gid_t gid; + gid_t *gid_ptr = (gid_t *)ptr; + char *buff; + char passwd_buf[1024]; + struct passwd info, *found; + + buff = set_value(strvec); + if (!buff) + return 1; + + if (getpwnam_r(buff, &info, passwd_buf, 1024, &found) == 0 && found) { + *flags |= (1 << ATTR_GID); + *gid_ptr = info.pw_gid; + } + else if (sscanf(buff, "%u", &gid) == 1){ + *flags |= (1 << ATTR_GID); + *gid_ptr = gid; + } + FREE(buff); + return 0; +} + +static int +print_mode(char * buff, int len, long v, int flags) +{ + mode_t mode = (mode_t)v; + if ((flags & (1 << ATTR_MODE)) == 0) + return 0; + return snprintf(buff, len, "0%o", mode); +} + +static int +print_uid(char * buff, int len, long v, int flags) +{ + uid_t uid = (uid_t)v; + if ((flags & (1 << ATTR_UID)) == 0) + return 0; + return snprintf(buff, len, "0%o", uid); +} + +static int +print_gid(char * buff, int len, long v, int flags) +{ + gid_t gid = (gid_t)v; + if ((flags & (1 << ATTR_GID)) == 0) + return 0; + return snprintf(buff, len, "0%o", gid); +} + +declare_def_attr_handler(mode, set_mode) +declare_def_attr_snprint(mode, print_mode) +declare_mp_attr_handler(mode, set_mode) +declare_mp_attr_snprint(mode, print_mode) + +declare_def_attr_handler(uid, set_uid) +declare_def_attr_snprint(uid, print_uid) +declare_mp_attr_handler(uid, set_uid) +declare_mp_attr_snprint(uid, print_uid) + +declare_def_attr_handler(gid, set_gid) +declare_def_attr_snprint(gid, print_gid) +declare_mp_attr_handler(gid, set_gid) +declare_mp_attr_snprint(gid, print_gid) + +static int +set_fast_io_fail(vector strvec, void *ptr) +{ + char * buff; + int *int_ptr = (int *)ptr; + + buff = set_value(strvec); + if (!buff) + return 1; + + if (strcmp(buff, "off") == 0) + *int_ptr = MP_FAST_IO_FAIL_OFF; + else if (sscanf(buff, "%d", int_ptr) != 1 || + *int_ptr < MP_FAST_IO_FAIL_ZERO) + *int_ptr = MP_FAST_IO_FAIL_UNSET; + else if (*int_ptr == 0) + *int_ptr = MP_FAST_IO_FAIL_ZERO; + + FREE(buff); + return 0; +} + +int +print_fast_io_fail(char * buff, int len, long v) +{ + if (v == MP_FAST_IO_FAIL_UNSET) + return 0; + if (v == MP_FAST_IO_FAIL_OFF) + return snprintf(buff, len, "\"off\""); + if (v == MP_FAST_IO_FAIL_ZERO) + return snprintf(buff, len, "0"); + return snprintf(buff, len, "%ld", v); +} + +declare_def_handler(fast_io_fail, set_fast_io_fail) +declare_def_snprint_defint(fast_io_fail, print_fast_io_fail, + DEFAULT_FAST_IO_FAIL) +declare_ovr_handler(fast_io_fail, set_fast_io_fail) +declare_ovr_snprint(fast_io_fail, print_fast_io_fail) +declare_hw_handler(fast_io_fail, set_fast_io_fail) +declare_hw_snprint(fast_io_fail, print_fast_io_fail) + +static int +set_dev_loss(vector strvec, void *ptr) +{ + char * buff; + unsigned int *uint_ptr = (unsigned int *)ptr; + + buff = set_value(strvec); + if (!buff) + return 1; + + if (!strcmp(buff, "infinity")) + *uint_ptr = MAX_DEV_LOSS_TMO; + else if (sscanf(buff, "%u", uint_ptr) != 1) + *uint_ptr = 0; + + FREE(buff); + return 0; +} + +int +print_dev_loss(char * buff, int len, unsigned long v) +{ + if (!v) + return 0; + if (v >= MAX_DEV_LOSS_TMO) + return snprintf(buff, len, "\"infinity\""); + return snprintf(buff, len, "%lu", v); +} + +declare_def_handler(dev_loss, set_dev_loss) +declare_def_snprint(dev_loss, print_dev_loss) +declare_ovr_handler(dev_loss, set_dev_loss) +declare_ovr_snprint(dev_loss, print_dev_loss) +declare_hw_handler(dev_loss, set_dev_loss) +declare_hw_snprint(dev_loss, print_dev_loss) + +static int +set_pgpolicy(vector strvec, void *ptr) +{ + char * buff; + int *int_ptr = (int *)ptr; + + buff = set_value(strvec); + if (!buff) + return 1; + + *int_ptr = get_pgpolicy_id(buff); + FREE(buff); + + return 0; +} + +int +print_pgpolicy(char * buff, int len, long pgpolicy) +{ + char str[POLICY_NAME_SIZE]; + + if (!pgpolicy) + return 0; + + get_pgpolicy_name(str, POLICY_NAME_SIZE, pgpolicy); + + return snprintf(buff, len, "\"%s\"", str); +} + +declare_def_handler(pgpolicy, set_pgpolicy) +declare_def_snprint_defint(pgpolicy, print_pgpolicy, DEFAULT_PGPOLICY) +declare_ovr_handler(pgpolicy, set_pgpolicy) +declare_ovr_snprint(pgpolicy, print_pgpolicy) +declare_hw_handler(pgpolicy, set_pgpolicy) +declare_hw_snprint(pgpolicy, print_pgpolicy) +declare_mp_handler(pgpolicy, set_pgpolicy) +declare_mp_snprint(pgpolicy, print_pgpolicy) + +int +get_sys_max_fds(int *max_fds) +{ + FILE *file; + int nr_open; + int ret = 1; + + file = fopen("/proc/sys/fs/nr_open", "r"); + if (!file) { + fprintf(stderr, "Cannot open /proc/sys/fs/nr_open : %s\n", + strerror(errno)); + return 1; + } + if (fscanf(file, "%d", &nr_open) != 1) { + fprintf(stderr, "Cannot read max open fds from /proc/sys/fs/nr_open"); + if (ferror(file)) + fprintf(stderr, " : %s\n", strerror(errno)); + else + fprintf(stderr, "\n"); + } else { + *max_fds = nr_open; + ret = 0; + } + fclose(file); + return ret; +} + + +static int +max_fds_handler(struct config *conf, vector strvec) +{ + char * buff; + int r = 0, max_fds; + + buff = set_value(strvec); + + if (!buff) + return 1; + + r = get_sys_max_fds(&max_fds); + if (r) { + /* Assume safe limit */ + max_fds = 4096; + } + if (strlen(buff) == 3 && + !strcmp(buff, "max")) + conf->max_fds = max_fds; + else + conf->max_fds = atoi(buff); + + if (conf->max_fds > max_fds) + conf->max_fds = max_fds; + + FREE(buff); + + return r; +} + +static int +snprint_max_fds (struct config *conf, char * buff, int len, const void * data) +{ + int r = 0, max_fds; + + if (!conf->max_fds) + return 0; + + r = get_sys_max_fds(&max_fds); + if (!r && conf->max_fds >= max_fds) + return snprintf(buff, len, "\"max\""); + else + return snprintf(buff, len, "%d", conf->max_fds); +} + +static int +set_rr_weight(vector strvec, void *ptr) +{ + int *int_ptr = (int *)ptr; + char * buff; + + buff = set_value(strvec); + + if (!buff) + return 1; + + if (!strcmp(buff, "priorities")) + *int_ptr = RR_WEIGHT_PRIO; + + if (!strcmp(buff, "uniform")) + *int_ptr = RR_WEIGHT_NONE; + + FREE(buff); + + return 0; +} + +int +print_rr_weight (char * buff, int len, long v) +{ + if (!v) + return 0; + if (v == RR_WEIGHT_PRIO) + return snprintf(buff, len, "\"priorities\""); + if (v == RR_WEIGHT_NONE) + return snprintf(buff, len, "\"uniform\""); + + return 0; +} + +declare_def_handler(rr_weight, set_rr_weight) +declare_def_snprint_defint(rr_weight, print_rr_weight, DEFAULT_RR_WEIGHT) +declare_ovr_handler(rr_weight, set_rr_weight) +declare_ovr_snprint(rr_weight, print_rr_weight) +declare_hw_handler(rr_weight, set_rr_weight) +declare_hw_snprint(rr_weight, print_rr_weight) +declare_mp_handler(rr_weight, set_rr_weight) +declare_mp_snprint(rr_weight, print_rr_weight) + +static int +set_pgfailback(vector strvec, void *ptr) +{ + int *int_ptr = (int *)ptr; + char * buff; + + buff = set_value(strvec); + if (!buff) + return 1; + + if (strlen(buff) == 6 && !strcmp(buff, "manual")) + *int_ptr = -FAILBACK_MANUAL; + else if (strlen(buff) == 9 && !strcmp(buff, "immediate")) + *int_ptr = -FAILBACK_IMMEDIATE; + else if (strlen(buff) == 10 && !strcmp(buff, "followover")) + *int_ptr = -FAILBACK_FOLLOWOVER; + else + *int_ptr = atoi(buff); + + FREE(buff); + + return 0; +} + +int +print_pgfailback (char * buff, int len, long v) +{ + switch(v) { + case FAILBACK_UNDEF: + return 0; + case -FAILBACK_MANUAL: + return snprintf(buff, len, "\"manual\""); + case -FAILBACK_IMMEDIATE: + return snprintf(buff, len, "\"immediate\""); + case -FAILBACK_FOLLOWOVER: + return snprintf(buff, len, "\"followover\""); + default: + return snprintf(buff, len, "%li", v); + } +} + +declare_def_handler(pgfailback, set_pgfailback) +declare_def_snprint_defint(pgfailback, print_pgfailback, DEFAULT_FAILBACK) +declare_ovr_handler(pgfailback, set_pgfailback) +declare_ovr_snprint(pgfailback, print_pgfailback) +declare_hw_handler(pgfailback, set_pgfailback) +declare_hw_snprint(pgfailback, print_pgfailback) +declare_mp_handler(pgfailback, set_pgfailback) +declare_mp_snprint(pgfailback, print_pgfailback) + +static int +no_path_retry_helper(vector strvec, void *ptr) +{ + int *int_ptr = (int *)ptr; + char * buff; + + buff = set_value(strvec); + if (!buff) + return 1; + + if (!strcmp(buff, "fail") || !strcmp(buff, "0")) + *int_ptr = NO_PATH_RETRY_FAIL; + else if (!strcmp(buff, "queue")) + *int_ptr = NO_PATH_RETRY_QUEUE; + else if ((*int_ptr = atoi(buff)) < 1) + *int_ptr = NO_PATH_RETRY_UNDEF; + + FREE(buff); + return 0; +} + +int +print_no_path_retry(char * buff, int len, long v) +{ + switch(v) { + case NO_PATH_RETRY_UNDEF: + return 0; + case NO_PATH_RETRY_FAIL: + return snprintf(buff, len, "\"fail\""); + case NO_PATH_RETRY_QUEUE: + return snprintf(buff, len, "\"queue\""); + default: + return snprintf(buff, len, "%li", v); + } +} + +declare_def_handler(no_path_retry, no_path_retry_helper) +declare_def_snprint(no_path_retry, print_no_path_retry) +declare_ovr_handler(no_path_retry, no_path_retry_helper) +declare_ovr_snprint(no_path_retry, print_no_path_retry) +declare_hw_handler(no_path_retry, no_path_retry_helper) +declare_hw_snprint(no_path_retry, print_no_path_retry) +declare_mp_handler(no_path_retry, no_path_retry_helper) +declare_mp_snprint(no_path_retry, print_no_path_retry) + +static int +def_log_checker_err_handler(struct config *conf, vector strvec) +{ + char * buff; + + buff = set_value(strvec); + + if (!buff) + return 1; + + if (strlen(buff) == 4 && !strcmp(buff, "once")) + conf->log_checker_err = LOG_CHKR_ERR_ONCE; + else if (strlen(buff) == 6 && !strcmp(buff, "always")) + conf->log_checker_err = LOG_CHKR_ERR_ALWAYS; + + free(buff); + return 0; +} + +static int +snprint_def_log_checker_err (struct config *conf, char * buff, int len, + const void * data) +{ + if (conf->log_checker_err == LOG_CHKR_ERR_ONCE) + return snprintf(buff, len, "once"); + return snprintf(buff, len, "always"); +} + +static int +set_reservation_key(vector strvec, struct be64 *be64_ptr, uint8_t *flags_ptr, + int *source_ptr) +{ + char *buff; + uint64_t prkey; + uint8_t sa_flags; + + buff = set_value(strvec); + if (!buff) + return 1; + + if (strcmp(buff, "file") == 0) { + *source_ptr = PRKEY_SOURCE_FILE; + *flags_ptr = 0; + put_be64(*be64_ptr, 0); + FREE(buff); + return 0; + } + + if (parse_prkey_flags(buff, &prkey, &sa_flags) != 0) { + FREE(buff); + return 1; + } + *source_ptr = PRKEY_SOURCE_CONF; + *flags_ptr = sa_flags; + put_be64(*be64_ptr, prkey); + FREE(buff); + return 0; +} + +int +print_reservation_key(char * buff, int len, struct be64 key, uint8_t flags, + int source) +{ + char *flagstr = ""; + if (source == PRKEY_SOURCE_NONE) + return 0; + if (source == PRKEY_SOURCE_FILE) + return snprintf(buff, len, "file"); + if (flags & MPATH_F_APTPL_MASK) + flagstr = ":aptpl"; + return snprintf(buff, len, "0x%" PRIx64 "%s", get_be64(key), + flagstr); +} + +static int +def_reservation_key_handler(struct config *conf, vector strvec) +{ + return set_reservation_key(strvec, &conf->reservation_key, + &conf->sa_flags, + &conf->prkey_source); +} + +static int +snprint_def_reservation_key (struct config *conf, char * buff, int len, + const void * data) +{ + return print_reservation_key(buff, len, conf->reservation_key, + conf->sa_flags, + conf->prkey_source); +} + +static int +mp_reservation_key_handler(struct config *conf, vector strvec) +{ + struct mpentry * mpe = VECTOR_LAST_SLOT(conf->mptable); + if (!mpe) + return 1; + return set_reservation_key(strvec, &mpe->reservation_key, + &mpe->sa_flags, + &mpe->prkey_source); +} + +static int +snprint_mp_reservation_key (struct config *conf, char * buff, int len, + const void * data) +{ + const struct mpentry * mpe = (const struct mpentry *)data; + return print_reservation_key(buff, len, mpe->reservation_key, + mpe->sa_flags, + mpe->prkey_source); +} + +static int +set_off_int_undef(vector strvec, void *ptr) +{ + int *int_ptr = (int *)ptr; + char * buff; + + buff = set_value(strvec); + if (!buff) + return 1; + + if (!strcmp(buff, "no") || !strcmp(buff, "0")) + *int_ptr = NU_NO; + else if ((*int_ptr = atoi(buff)) < 1) + *int_ptr = NU_UNDEF; + + FREE(buff); + return 0; +} + +int +print_off_int_undef(char * buff, int len, long v) +{ + switch(v) { + case NU_UNDEF: + return 0; + case NU_NO: + return snprintf(buff, len, "\"no\""); + default: + return snprintf(buff, len, "%li", v); + } +} + +declare_def_handler(delay_watch_checks, set_off_int_undef) +declare_def_snprint_defint(delay_watch_checks, print_off_int_undef, + DEFAULT_DELAY_CHECKS) +declare_ovr_handler(delay_watch_checks, set_off_int_undef) +declare_ovr_snprint(delay_watch_checks, print_off_int_undef) +declare_hw_handler(delay_watch_checks, set_off_int_undef) +declare_hw_snprint(delay_watch_checks, print_off_int_undef) +declare_mp_handler(delay_watch_checks, set_off_int_undef) +declare_mp_snprint(delay_watch_checks, print_off_int_undef) +declare_def_handler(delay_wait_checks, set_off_int_undef) +declare_def_snprint_defint(delay_wait_checks, print_off_int_undef, + DEFAULT_DELAY_CHECKS) +declare_ovr_handler(delay_wait_checks, set_off_int_undef) +declare_ovr_snprint(delay_wait_checks, print_off_int_undef) +declare_hw_handler(delay_wait_checks, set_off_int_undef) +declare_hw_snprint(delay_wait_checks, print_off_int_undef) +declare_mp_handler(delay_wait_checks, set_off_int_undef) +declare_mp_snprint(delay_wait_checks, print_off_int_undef) +declare_def_handler(san_path_err_threshold, set_off_int_undef) +declare_def_snprint_defint(san_path_err_threshold, print_off_int_undef, + DEFAULT_ERR_CHECKS) +declare_ovr_handler(san_path_err_threshold, set_off_int_undef) +declare_ovr_snprint(san_path_err_threshold, print_off_int_undef) +declare_hw_handler(san_path_err_threshold, set_off_int_undef) +declare_hw_snprint(san_path_err_threshold, print_off_int_undef) +declare_mp_handler(san_path_err_threshold, set_off_int_undef) +declare_mp_snprint(san_path_err_threshold, print_off_int_undef) +declare_def_handler(san_path_err_forget_rate, set_off_int_undef) +declare_def_snprint_defint(san_path_err_forget_rate, print_off_int_undef, + DEFAULT_ERR_CHECKS) +declare_ovr_handler(san_path_err_forget_rate, set_off_int_undef) +declare_ovr_snprint(san_path_err_forget_rate, print_off_int_undef) +declare_hw_handler(san_path_err_forget_rate, set_off_int_undef) +declare_hw_snprint(san_path_err_forget_rate, print_off_int_undef) +declare_mp_handler(san_path_err_forget_rate, set_off_int_undef) +declare_mp_snprint(san_path_err_forget_rate, print_off_int_undef) +declare_def_handler(san_path_err_recovery_time, set_off_int_undef) +declare_def_snprint_defint(san_path_err_recovery_time, print_off_int_undef, + DEFAULT_ERR_CHECKS) +declare_ovr_handler(san_path_err_recovery_time, set_off_int_undef) +declare_ovr_snprint(san_path_err_recovery_time, print_off_int_undef) +declare_hw_handler(san_path_err_recovery_time, set_off_int_undef) +declare_hw_snprint(san_path_err_recovery_time, print_off_int_undef) +declare_mp_handler(san_path_err_recovery_time, set_off_int_undef) +declare_mp_snprint(san_path_err_recovery_time, print_off_int_undef) +declare_def_handler(marginal_path_err_sample_time, set_off_int_undef) +declare_def_snprint_defint(marginal_path_err_sample_time, print_off_int_undef, + DEFAULT_ERR_CHECKS) +declare_ovr_handler(marginal_path_err_sample_time, set_off_int_undef) +declare_ovr_snprint(marginal_path_err_sample_time, print_off_int_undef) +declare_hw_handler(marginal_path_err_sample_time, set_off_int_undef) +declare_hw_snprint(marginal_path_err_sample_time, print_off_int_undef) +declare_mp_handler(marginal_path_err_sample_time, set_off_int_undef) +declare_mp_snprint(marginal_path_err_sample_time, print_off_int_undef) +declare_def_handler(marginal_path_err_rate_threshold, set_off_int_undef) +declare_def_snprint_defint(marginal_path_err_rate_threshold, print_off_int_undef, + DEFAULT_ERR_CHECKS) +declare_ovr_handler(marginal_path_err_rate_threshold, set_off_int_undef) +declare_ovr_snprint(marginal_path_err_rate_threshold, print_off_int_undef) +declare_hw_handler(marginal_path_err_rate_threshold, set_off_int_undef) +declare_hw_snprint(marginal_path_err_rate_threshold, print_off_int_undef) +declare_mp_handler(marginal_path_err_rate_threshold, set_off_int_undef) +declare_mp_snprint(marginal_path_err_rate_threshold, print_off_int_undef) +declare_def_handler(marginal_path_err_recheck_gap_time, set_off_int_undef) +declare_def_snprint_defint(marginal_path_err_recheck_gap_time, print_off_int_undef, + DEFAULT_ERR_CHECKS) +declare_ovr_handler(marginal_path_err_recheck_gap_time, set_off_int_undef) +declare_ovr_snprint(marginal_path_err_recheck_gap_time, print_off_int_undef) +declare_hw_handler(marginal_path_err_recheck_gap_time, set_off_int_undef) +declare_hw_snprint(marginal_path_err_recheck_gap_time, print_off_int_undef) +declare_mp_handler(marginal_path_err_recheck_gap_time, set_off_int_undef) +declare_mp_snprint(marginal_path_err_recheck_gap_time, print_off_int_undef) +declare_def_handler(marginal_path_double_failed_time, set_off_int_undef) +declare_def_snprint_defint(marginal_path_double_failed_time, print_off_int_undef, + DEFAULT_ERR_CHECKS) +declare_ovr_handler(marginal_path_double_failed_time, set_off_int_undef) +declare_ovr_snprint(marginal_path_double_failed_time, print_off_int_undef) +declare_hw_handler(marginal_path_double_failed_time, set_off_int_undef) +declare_hw_snprint(marginal_path_double_failed_time, print_off_int_undef) +declare_mp_handler(marginal_path_double_failed_time, set_off_int_undef) +declare_mp_snprint(marginal_path_double_failed_time, print_off_int_undef) + +declare_def_handler(ghost_delay, set_off_int_undef) +declare_def_snprint(ghost_delay, print_off_int_undef) +declare_ovr_handler(ghost_delay, set_off_int_undef) +declare_ovr_snprint(ghost_delay, print_off_int_undef) +declare_hw_handler(ghost_delay, set_off_int_undef) +declare_hw_snprint(ghost_delay, print_off_int_undef) +declare_mp_handler(ghost_delay, set_off_int_undef) +declare_mp_snprint(ghost_delay, print_off_int_undef) + +declare_def_handler(all_tg_pt, set_yes_no_undef) +declare_def_snprint_defint(all_tg_pt, print_yes_no_undef, DEFAULT_ALL_TG_PT) +declare_ovr_handler(all_tg_pt, set_yes_no_undef) +declare_ovr_snprint(all_tg_pt, print_yes_no_undef) +declare_hw_handler(all_tg_pt, set_yes_no_undef) +declare_hw_snprint(all_tg_pt, print_yes_no_undef) + +declare_def_handler(marginal_pathgroups, set_yes_no) +declare_def_snprint(marginal_pathgroups, print_yes_no) + +static int +def_uxsock_timeout_handler(struct config *conf, vector strvec) +{ + unsigned int uxsock_timeout; + char *buff; + + buff = set_value(strvec); + if (!buff) + return 1; + + if (sscanf(buff, "%u", &uxsock_timeout) == 1 && + uxsock_timeout > DEFAULT_REPLY_TIMEOUT) + conf->uxsock_timeout = uxsock_timeout; + else + conf->uxsock_timeout = DEFAULT_REPLY_TIMEOUT; + + free(buff); + return 0; +} + +static int +hw_vpd_vendor_handler(struct config *conf, vector strvec) +{ + int i; + char *buff; + + struct hwentry * hwe = VECTOR_LAST_SLOT(conf->hwtable); + if (!hwe) + return 1; + + buff = set_value(strvec); + if (!buff) + return 1; + for (i = 0; i < VPD_VP_ARRAY_SIZE; i++) { + if (strcmp(buff, vpd_vendor_pages[i].name) == 0) { + hwe->vpd_vendor_id = i; + goto out; + } + } + hwe->vpd_vendor_id = 0; +out: + FREE(buff); + return 0; +} + +static int +snprint_hw_vpd_vendor(struct config *conf, char * buff, int len, + const void * data) +{ + const struct hwentry * hwe = (const struct hwentry *)data; + + if (hwe->vpd_vendor_id > 0 && hwe->vpd_vendor_id < VPD_VP_ARRAY_SIZE) + return snprintf(buff, len, "%s", + vpd_vendor_pages[hwe->vpd_vendor_id].name); + return 0; +} + +/* + * blacklist block handlers + */ +static int +blacklist_handler(struct config *conf, vector strvec) +{ + if (!conf->blist_devnode) + conf->blist_devnode = vector_alloc(); + if (!conf->blist_wwid) + conf->blist_wwid = vector_alloc(); + if (!conf->blist_device) + conf->blist_device = vector_alloc(); + if (!conf->blist_property) + conf->blist_property = vector_alloc(); + if (!conf->blist_protocol) + conf->blist_protocol = vector_alloc(); + + if (!conf->blist_devnode || !conf->blist_wwid || + !conf->blist_device || !conf->blist_property || + !conf->blist_protocol) + return 1; + + return 0; +} + +static int +blacklist_exceptions_handler(struct config *conf, vector strvec) +{ + if (!conf->elist_devnode) + conf->elist_devnode = vector_alloc(); + if (!conf->elist_wwid) + conf->elist_wwid = vector_alloc(); + if (!conf->elist_device) + conf->elist_device = vector_alloc(); + if (!conf->elist_property) + conf->elist_property = vector_alloc(); + if (!conf->elist_protocol) + conf->elist_protocol = vector_alloc(); + + if (!conf->elist_devnode || !conf->elist_wwid || + !conf->elist_device || !conf->elist_property || + !conf->elist_protocol) + return 1; + + return 0; +} + +#define declare_ble_handler(option) \ +static int \ +ble_ ## option ## _handler (struct config *conf, vector strvec) \ +{ \ + char * buff; \ + \ + if (!conf->option) \ + return 1; \ + \ + buff = set_value(strvec); \ + if (!buff) \ + return 1; \ + \ + return store_ble(conf->option, buff, ORIGIN_CONFIG); \ +} + +#define declare_ble_device_handler(name, option, vend, prod) \ +static int \ +ble_ ## option ## _ ## name ## _handler (struct config *conf, vector strvec) \ +{ \ + char * buff; \ + \ + if (!conf->option) \ + return 1; \ + \ + buff = set_value(strvec); \ + if (!buff) \ + return 1; \ + \ + return set_ble_device(conf->option, vend, prod, ORIGIN_CONFIG); \ +} + +declare_ble_handler(blist_devnode) +declare_ble_handler(elist_devnode) +declare_ble_handler(blist_wwid) +declare_ble_handler(elist_wwid) +declare_ble_handler(blist_property) +declare_ble_handler(elist_property) +declare_ble_handler(blist_protocol) +declare_ble_handler(elist_protocol) + +static int +snprint_def_uxsock_timeout(struct config *conf, char * buff, int len, + const void * data) +{ + return snprintf(buff, len, "%u", conf->uxsock_timeout); +} + +static int +snprint_ble_simple (struct config *conf, char * buff, int len, + const void * data) +{ + const struct blentry * ble = (const struct blentry *)data; + + return snprintf(buff, len, "\"%s\"", ble->str); +} + +static int +ble_device_handler(struct config *conf, vector strvec) +{ + return alloc_ble_device(conf->blist_device); +} + +static int +ble_except_device_handler(struct config *conf, vector strvec) +{ + return alloc_ble_device(conf->elist_device); +} + +declare_ble_device_handler(vendor, blist_device, buff, NULL) +declare_ble_device_handler(vendor, elist_device, buff, NULL) +declare_ble_device_handler(product, blist_device, NULL, buff) +declare_ble_device_handler(product, elist_device, NULL, buff) + +static int +snprint_bled_vendor (struct config *conf, char * buff, int len, + const void * data) +{ + const struct blentry_device * bled = + (const struct blentry_device *)data; + + return snprintf(buff, len, "\"%s\"", bled->vendor); +} + +static int +snprint_bled_product (struct config *conf, char * buff, int len, + const void * data) +{ + const struct blentry_device * bled = + (const struct blentry_device *)data; + + return snprintf(buff, len, "\"%s\"", bled->product); +} + +/* + * devices block handlers + */ +static int +devices_handler(struct config *conf, vector strvec) +{ + if (!conf->hwtable) + conf->hwtable = vector_alloc(); + + if (!conf->hwtable) + return 1; + + return 0; +} + +static int +device_handler(struct config *conf, vector strvec) +{ + struct hwentry * hwe; + + hwe = alloc_hwe(); + + if (!hwe) + return 1; + + if (!vector_alloc_slot(conf->hwtable)) { + free_hwe(hwe); + return 1; + } + vector_set_slot(conf->hwtable, hwe); + + return 0; +} + +declare_hw_handler(vendor, set_str) +declare_hw_snprint(vendor, print_str) + +declare_hw_handler(product, set_str) +declare_hw_snprint(product, print_str) + +declare_hw_handler(revision, set_str) +declare_hw_snprint(revision, print_str) + +declare_hw_handler(bl_product, set_str) +declare_hw_snprint(bl_product, print_str) + +declare_hw_handler(hwhandler, set_str) +declare_hw_snprint(hwhandler, print_str) + +/* + * overrides handlers + */ +static int +overrides_handler(struct config *conf, vector strvec) +{ + if (!conf->overrides) + conf->overrides = alloc_hwe(); + + if (!conf->overrides) + return 1; + + return 0; +} + + + +/* + * multipaths block handlers + */ +static int +multipaths_handler(struct config *conf, vector strvec) +{ + if (!conf->mptable) + conf->mptable = vector_alloc(); + + if (!conf->mptable) + return 1; + + return 0; +} + +static int +multipath_handler(struct config *conf, vector strvec) +{ + struct mpentry * mpe; + + mpe = alloc_mpe(); + + if (!mpe) + return 1; + + if (!vector_alloc_slot(conf->mptable)) { + free_mpe(mpe); + return 1; + } + vector_set_slot(conf->mptable, mpe); + + return 0; +} + +declare_mp_handler(wwid, set_str) +declare_mp_snprint(wwid, print_str) + +declare_mp_handler(alias, set_str) +declare_mp_snprint(alias, print_str) + +/* + * deprecated handlers + */ + +static int +deprecated_handler(struct config *conf, vector strvec) +{ + char * buff; + + buff = set_value(strvec); + + if (!buff) + return 1; + + FREE(buff); + return 0; +} + +static int +snprint_deprecated (struct config *conf, char * buff, int len, + const void * data) +{ + return 0; +} + +#define __deprecated + +/* + * If you add or remove a keyword also update multipath/multipath.conf.5 + */ +void +init_keywords(vector keywords) +{ + install_keyword_root("defaults", NULL); + install_keyword("verbosity", &def_verbosity_handler, &snprint_def_verbosity); + install_keyword("polling_interval", &checkint_handler, &snprint_def_checkint); + install_keyword("max_polling_interval", &def_max_checkint_handler, &snprint_def_max_checkint); + install_keyword("reassign_maps", &def_reassign_maps_handler, &snprint_def_reassign_maps); + install_keyword("multipath_dir", &def_multipath_dir_handler, &snprint_def_multipath_dir); + install_keyword("path_selector", &def_selector_handler, &snprint_def_selector); + install_keyword("path_grouping_policy", &def_pgpolicy_handler, &snprint_def_pgpolicy); + install_keyword("uid_attrs", &uid_attrs_handler, &snprint_uid_attrs); + install_keyword("uid_attribute", &def_uid_attribute_handler, &snprint_def_uid_attribute); + install_keyword("getuid_callout", &def_getuid_handler, &snprint_def_getuid); + install_keyword("prio", &def_prio_name_handler, &snprint_def_prio_name); + install_keyword("prio_args", &def_prio_args_handler, &snprint_def_prio_args); + install_keyword("features", &def_features_handler, &snprint_def_features); + install_keyword("path_checker", &def_checker_name_handler, &snprint_def_checker_name); + install_keyword("checker", &def_checker_name_handler, NULL); + install_keyword("alias_prefix", &def_alias_prefix_handler, &snprint_def_alias_prefix); + install_keyword("failback", &def_pgfailback_handler, &snprint_def_pgfailback); + install_keyword("rr_min_io", &def_minio_handler, &snprint_def_minio); + install_keyword("rr_min_io_rq", &def_minio_rq_handler, &snprint_def_minio_rq); + install_keyword("max_fds", &max_fds_handler, &snprint_max_fds); + install_keyword("rr_weight", &def_rr_weight_handler, &snprint_def_rr_weight); + install_keyword("no_path_retry", &def_no_path_retry_handler, &snprint_def_no_path_retry); + install_keyword("queue_without_daemon", &def_queue_without_daemon_handler, &snprint_def_queue_without_daemon); + install_keyword("checker_timeout", &def_checker_timeout_handler, &snprint_def_checker_timeout); + install_keyword("pg_timeout", &deprecated_handler, &snprint_deprecated); + install_keyword("flush_on_last_del", &def_flush_on_last_del_handler, &snprint_def_flush_on_last_del); + install_keyword("user_friendly_names", &def_user_friendly_names_handler, &snprint_def_user_friendly_names); + install_keyword("mode", &def_mode_handler, &snprint_def_mode); + install_keyword("uid", &def_uid_handler, &snprint_def_uid); + install_keyword("gid", &def_gid_handler, &snprint_def_gid); + install_keyword("fast_io_fail_tmo", &def_fast_io_fail_handler, &snprint_def_fast_io_fail); + install_keyword("dev_loss_tmo", &def_dev_loss_handler, &snprint_def_dev_loss); + install_keyword("bindings_file", &def_bindings_file_handler, &snprint_def_bindings_file); + install_keyword("wwids_file", &def_wwids_file_handler, &snprint_def_wwids_file); + install_keyword("prkeys_file", &def_prkeys_file_handler, &snprint_def_prkeys_file); + install_keyword("log_checker_err", &def_log_checker_err_handler, &snprint_def_log_checker_err); + install_keyword("reservation_key", &def_reservation_key_handler, &snprint_def_reservation_key); + install_keyword("all_tg_pt", &def_all_tg_pt_handler, &snprint_def_all_tg_pt); + install_keyword("retain_attached_hw_handler", &def_retain_hwhandler_handler, &snprint_def_retain_hwhandler); + install_keyword("detect_prio", &def_detect_prio_handler, &snprint_def_detect_prio); + install_keyword("detect_checker", &def_detect_checker_handler, &snprint_def_detect_checker); + install_keyword("force_sync", &def_force_sync_handler, &snprint_def_force_sync); + install_keyword("strict_timing", &def_strict_timing_handler, &snprint_def_strict_timing); + install_keyword("deferred_remove", &def_deferred_remove_handler, &snprint_def_deferred_remove); + install_keyword("partition_delimiter", &def_partition_delim_handler, &snprint_def_partition_delim); + install_keyword("config_dir", &def_config_dir_handler, &snprint_def_config_dir); + install_keyword("delay_watch_checks", &def_delay_watch_checks_handler, &snprint_def_delay_watch_checks); + install_keyword("delay_wait_checks", &def_delay_wait_checks_handler, &snprint_def_delay_wait_checks); + install_keyword("san_path_err_threshold", &def_san_path_err_threshold_handler, &snprint_def_san_path_err_threshold); + install_keyword("san_path_err_forget_rate", &def_san_path_err_forget_rate_handler, &snprint_def_san_path_err_forget_rate); + install_keyword("san_path_err_recovery_time", &def_san_path_err_recovery_time_handler, &snprint_def_san_path_err_recovery_time); + install_keyword("marginal_path_err_sample_time", &def_marginal_path_err_sample_time_handler, &snprint_def_marginal_path_err_sample_time); + install_keyword("marginal_path_err_rate_threshold", &def_marginal_path_err_rate_threshold_handler, &snprint_def_marginal_path_err_rate_threshold); + install_keyword("marginal_path_err_recheck_gap_time", &def_marginal_path_err_recheck_gap_time_handler, &snprint_def_marginal_path_err_recheck_gap_time); + install_keyword("marginal_path_double_failed_time", &def_marginal_path_double_failed_time_handler, &snprint_def_marginal_path_double_failed_time); + + install_keyword("find_multipaths", &def_find_multipaths_handler, &snprint_def_find_multipaths); + install_keyword("uxsock_timeout", &def_uxsock_timeout_handler, &snprint_def_uxsock_timeout); + install_keyword("retrigger_tries", &def_retrigger_tries_handler, &snprint_def_retrigger_tries); + install_keyword("retrigger_delay", &def_retrigger_delay_handler, &snprint_def_retrigger_delay); + install_keyword("missing_uev_wait_timeout", &def_uev_wait_timeout_handler, &snprint_def_uev_wait_timeout); + install_keyword("skip_kpartx", &def_skip_kpartx_handler, &snprint_def_skip_kpartx); + install_keyword("disable_changed_wwids", &def_disable_changed_wwids_handler, &snprint_def_disable_changed_wwids); + install_keyword("remove_retries", &def_remove_retries_handler, &snprint_def_remove_retries); + install_keyword("max_sectors_kb", &def_max_sectors_kb_handler, &snprint_def_max_sectors_kb); + install_keyword("ghost_delay", &def_ghost_delay_handler, &snprint_def_ghost_delay); + install_keyword("find_multipaths_timeout", + &def_find_multipaths_timeout_handler, + &snprint_def_find_multipaths_timeout); + install_keyword("enable_foreign", &def_enable_foreign_handler, + &snprint_def_enable_foreign); + install_keyword("marginal_pathgroups", &def_marginal_pathgroups_handler, &snprint_def_marginal_pathgroups); + __deprecated install_keyword("default_selector", &def_selector_handler, NULL); + __deprecated install_keyword("default_path_grouping_policy", &def_pgpolicy_handler, NULL); + __deprecated install_keyword("default_uid_attribute", &def_uid_attribute_handler, NULL); + __deprecated install_keyword("default_getuid_callout", &def_getuid_handler, NULL); + __deprecated install_keyword("default_features", &def_features_handler, NULL); + __deprecated install_keyword("default_path_checker", &def_checker_name_handler, NULL); + + install_keyword_root("blacklist", &blacklist_handler); + install_keyword_multi("devnode", &ble_blist_devnode_handler, &snprint_ble_simple); + install_keyword_multi("wwid", &ble_blist_wwid_handler, &snprint_ble_simple); + install_keyword_multi("property", &ble_blist_property_handler, &snprint_ble_simple); + install_keyword_multi("protocol", &ble_blist_protocol_handler, &snprint_ble_simple); + install_keyword_multi("device", &ble_device_handler, NULL); + install_sublevel(); + install_keyword("vendor", &ble_blist_device_vendor_handler, &snprint_bled_vendor); + install_keyword("product", &ble_blist_device_product_handler, &snprint_bled_product); + install_sublevel_end(); + install_keyword_root("blacklist_exceptions", &blacklist_exceptions_handler); + install_keyword_multi("devnode", &ble_elist_devnode_handler, &snprint_ble_simple); + install_keyword_multi("wwid", &ble_elist_wwid_handler, &snprint_ble_simple); + install_keyword_multi("property", &ble_elist_property_handler, &snprint_ble_simple); + install_keyword_multi("protocol", &ble_elist_protocol_handler, &snprint_ble_simple); + install_keyword_multi("device", &ble_except_device_handler, NULL); + install_sublevel(); + install_keyword("vendor", &ble_elist_device_vendor_handler, &snprint_bled_vendor); + install_keyword("product", &ble_elist_device_product_handler, &snprint_bled_product); + install_sublevel_end(); + +#if 0 + __deprecated install_keyword_root("devnode_blacklist", &blacklist_handler); + __deprecated install_keyword("devnode", &ble_devnode_handler, &snprint_ble_simple); + __deprecated install_keyword("wwid", &ble_wwid_handler, &snprint_ble_simple); + __deprecated install_keyword("device", &ble_device_handler, NULL); + __deprecated install_sublevel(); + __deprecated install_keyword("vendor", &ble_vendor_handler, &snprint_bled_vendor); + __deprecated install_keyword("product", &ble_product_handler, &snprint_bled_product); + __deprecated install_sublevel_end(); +#endif +/* + * If you add or remove a "device subsection" keyword also update + * multipath/multipath.conf.5 and the TEMPLATE in libmultipath/hwtable.c + */ + install_keyword_root("devices", &devices_handler); + install_keyword_multi("device", &device_handler, NULL); + install_sublevel(); + install_keyword("vendor", &hw_vendor_handler, &snprint_hw_vendor); + install_keyword("product", &hw_product_handler, &snprint_hw_product); + install_keyword("revision", &hw_revision_handler, &snprint_hw_revision); + install_keyword("product_blacklist", &hw_bl_product_handler, &snprint_hw_bl_product); + install_keyword("path_grouping_policy", &hw_pgpolicy_handler, &snprint_hw_pgpolicy); + install_keyword("uid_attribute", &hw_uid_attribute_handler, &snprint_hw_uid_attribute); + install_keyword("getuid_callout", &hw_getuid_handler, &snprint_hw_getuid); + install_keyword("path_selector", &hw_selector_handler, &snprint_hw_selector); + install_keyword("path_checker", &hw_checker_name_handler, &snprint_hw_checker_name); + install_keyword("checker", &hw_checker_name_handler, NULL); + install_keyword("alias_prefix", &hw_alias_prefix_handler, &snprint_hw_alias_prefix); + install_keyword("features", &hw_features_handler, &snprint_hw_features); + install_keyword("hardware_handler", &hw_hwhandler_handler, &snprint_hw_hwhandler); + install_keyword("prio", &hw_prio_name_handler, &snprint_hw_prio_name); + install_keyword("prio_args", &hw_prio_args_handler, &snprint_hw_prio_args); + install_keyword("failback", &hw_pgfailback_handler, &snprint_hw_pgfailback); + install_keyword("rr_weight", &hw_rr_weight_handler, &snprint_hw_rr_weight); + install_keyword("no_path_retry", &hw_no_path_retry_handler, &snprint_hw_no_path_retry); + install_keyword("rr_min_io", &hw_minio_handler, &snprint_hw_minio); + install_keyword("rr_min_io_rq", &hw_minio_rq_handler, &snprint_hw_minio_rq); + install_keyword("pg_timeout", &deprecated_handler, &snprint_deprecated); + install_keyword("flush_on_last_del", &hw_flush_on_last_del_handler, &snprint_hw_flush_on_last_del); + install_keyword("fast_io_fail_tmo", &hw_fast_io_fail_handler, &snprint_hw_fast_io_fail); + install_keyword("dev_loss_tmo", &hw_dev_loss_handler, &snprint_hw_dev_loss); + install_keyword("user_friendly_names", &hw_user_friendly_names_handler, &snprint_hw_user_friendly_names); + install_keyword("retain_attached_hw_handler", &hw_retain_hwhandler_handler, &snprint_hw_retain_hwhandler); + install_keyword("detect_prio", &hw_detect_prio_handler, &snprint_hw_detect_prio); + install_keyword("detect_checker", &hw_detect_checker_handler, &snprint_hw_detect_checker); + install_keyword("deferred_remove", &hw_deferred_remove_handler, &snprint_hw_deferred_remove); + install_keyword("delay_watch_checks", &hw_delay_watch_checks_handler, &snprint_hw_delay_watch_checks); + install_keyword("delay_wait_checks", &hw_delay_wait_checks_handler, &snprint_hw_delay_wait_checks); + install_keyword("san_path_err_threshold", &hw_san_path_err_threshold_handler, &snprint_hw_san_path_err_threshold); + install_keyword("san_path_err_forget_rate", &hw_san_path_err_forget_rate_handler, &snprint_hw_san_path_err_forget_rate); + install_keyword("san_path_err_recovery_time", &hw_san_path_err_recovery_time_handler, &snprint_hw_san_path_err_recovery_time); + install_keyword("marginal_path_err_sample_time", &hw_marginal_path_err_sample_time_handler, &snprint_hw_marginal_path_err_sample_time); + install_keyword("marginal_path_err_rate_threshold", &hw_marginal_path_err_rate_threshold_handler, &snprint_hw_marginal_path_err_rate_threshold); + install_keyword("marginal_path_err_recheck_gap_time", &hw_marginal_path_err_recheck_gap_time_handler, &snprint_hw_marginal_path_err_recheck_gap_time); + install_keyword("marginal_path_double_failed_time", &hw_marginal_path_double_failed_time_handler, &snprint_hw_marginal_path_double_failed_time); + install_keyword("skip_kpartx", &hw_skip_kpartx_handler, &snprint_hw_skip_kpartx); + install_keyword("max_sectors_kb", &hw_max_sectors_kb_handler, &snprint_hw_max_sectors_kb); + install_keyword("ghost_delay", &hw_ghost_delay_handler, &snprint_hw_ghost_delay); + install_keyword("all_tg_pt", &hw_all_tg_pt_handler, &snprint_hw_all_tg_pt); + install_keyword("vpd_vendor", &hw_vpd_vendor_handler, &snprint_hw_vpd_vendor); + install_sublevel_end(); + + install_keyword_root("overrides", &overrides_handler); + install_keyword("path_grouping_policy", &ovr_pgpolicy_handler, &snprint_ovr_pgpolicy); + install_keyword("uid_attribute", &ovr_uid_attribute_handler, &snprint_ovr_uid_attribute); + install_keyword("getuid_callout", &ovr_getuid_handler, &snprint_ovr_getuid); + install_keyword("path_selector", &ovr_selector_handler, &snprint_ovr_selector); + install_keyword("path_checker", &ovr_checker_name_handler, &snprint_ovr_checker_name); + install_keyword("checker", &ovr_checker_name_handler, NULL); + install_keyword("alias_prefix", &ovr_alias_prefix_handler, &snprint_ovr_alias_prefix); + install_keyword("features", &ovr_features_handler, &snprint_ovr_features); + install_keyword("prio", &ovr_prio_name_handler, &snprint_ovr_prio_name); + install_keyword("prio_args", &ovr_prio_args_handler, &snprint_ovr_prio_args); + install_keyword("failback", &ovr_pgfailback_handler, &snprint_ovr_pgfailback); + install_keyword("rr_weight", &ovr_rr_weight_handler, &snprint_ovr_rr_weight); + install_keyword("no_path_retry", &ovr_no_path_retry_handler, &snprint_ovr_no_path_retry); + install_keyword("rr_min_io", &ovr_minio_handler, &snprint_ovr_minio); + install_keyword("rr_min_io_rq", &ovr_minio_rq_handler, &snprint_ovr_minio_rq); + install_keyword("flush_on_last_del", &ovr_flush_on_last_del_handler, &snprint_ovr_flush_on_last_del); + install_keyword("fast_io_fail_tmo", &ovr_fast_io_fail_handler, &snprint_ovr_fast_io_fail); + install_keyword("dev_loss_tmo", &ovr_dev_loss_handler, &snprint_ovr_dev_loss); + install_keyword("user_friendly_names", &ovr_user_friendly_names_handler, &snprint_ovr_user_friendly_names); + install_keyword("retain_attached_hw_handler", &ovr_retain_hwhandler_handler, &snprint_ovr_retain_hwhandler); + install_keyword("detect_prio", &ovr_detect_prio_handler, &snprint_ovr_detect_prio); + install_keyword("detect_checker", &ovr_detect_checker_handler, &snprint_ovr_detect_checker); + install_keyword("deferred_remove", &ovr_deferred_remove_handler, &snprint_ovr_deferred_remove); + install_keyword("delay_watch_checks", &ovr_delay_watch_checks_handler, &snprint_ovr_delay_watch_checks); + install_keyword("delay_wait_checks", &ovr_delay_wait_checks_handler, &snprint_ovr_delay_wait_checks); + install_keyword("san_path_err_threshold", &ovr_san_path_err_threshold_handler, &snprint_ovr_san_path_err_threshold); + install_keyword("san_path_err_forget_rate", &ovr_san_path_err_forget_rate_handler, &snprint_ovr_san_path_err_forget_rate); + install_keyword("san_path_err_recovery_time", &ovr_san_path_err_recovery_time_handler, &snprint_ovr_san_path_err_recovery_time); + install_keyword("marginal_path_err_sample_time", &ovr_marginal_path_err_sample_time_handler, &snprint_ovr_marginal_path_err_sample_time); + install_keyword("marginal_path_err_rate_threshold", &ovr_marginal_path_err_rate_threshold_handler, &snprint_ovr_marginal_path_err_rate_threshold); + install_keyword("marginal_path_err_recheck_gap_time", &ovr_marginal_path_err_recheck_gap_time_handler, &snprint_ovr_marginal_path_err_recheck_gap_time); + install_keyword("marginal_path_double_failed_time", &ovr_marginal_path_double_failed_time_handler, &snprint_ovr_marginal_path_double_failed_time); + + install_keyword("skip_kpartx", &ovr_skip_kpartx_handler, &snprint_ovr_skip_kpartx); + install_keyword("max_sectors_kb", &ovr_max_sectors_kb_handler, &snprint_ovr_max_sectors_kb); + install_keyword("ghost_delay", &ovr_ghost_delay_handler, &snprint_ovr_ghost_delay); + install_keyword("all_tg_pt", &ovr_all_tg_pt_handler, &snprint_ovr_all_tg_pt); + + install_keyword_root("multipaths", &multipaths_handler); + install_keyword_multi("multipath", &multipath_handler, NULL); + install_sublevel(); + install_keyword("wwid", &mp_wwid_handler, &snprint_mp_wwid); + install_keyword("alias", &mp_alias_handler, &snprint_mp_alias); + install_keyword("path_grouping_policy", &mp_pgpolicy_handler, &snprint_mp_pgpolicy); + install_keyword("path_selector", &mp_selector_handler, &snprint_mp_selector); + install_keyword("prio", &mp_prio_name_handler, &snprint_mp_prio_name); + install_keyword("prio_args", &mp_prio_args_handler, &snprint_mp_prio_args); + install_keyword("failback", &mp_pgfailback_handler, &snprint_mp_pgfailback); + install_keyword("rr_weight", &mp_rr_weight_handler, &snprint_mp_rr_weight); + install_keyword("no_path_retry", &mp_no_path_retry_handler, &snprint_mp_no_path_retry); + install_keyword("rr_min_io", &mp_minio_handler, &snprint_mp_minio); + install_keyword("rr_min_io_rq", &mp_minio_rq_handler, &snprint_mp_minio_rq); + install_keyword("pg_timeout", &deprecated_handler, &snprint_deprecated); + install_keyword("flush_on_last_del", &mp_flush_on_last_del_handler, &snprint_mp_flush_on_last_del); + install_keyword("features", &mp_features_handler, &snprint_mp_features); + install_keyword("mode", &mp_mode_handler, &snprint_mp_mode); + install_keyword("uid", &mp_uid_handler, &snprint_mp_uid); + install_keyword("gid", &mp_gid_handler, &snprint_mp_gid); + install_keyword("reservation_key", &mp_reservation_key_handler, &snprint_mp_reservation_key); + install_keyword("user_friendly_names", &mp_user_friendly_names_handler, &snprint_mp_user_friendly_names); + install_keyword("deferred_remove", &mp_deferred_remove_handler, &snprint_mp_deferred_remove); + install_keyword("delay_watch_checks", &mp_delay_watch_checks_handler, &snprint_mp_delay_watch_checks); + install_keyword("delay_wait_checks", &mp_delay_wait_checks_handler, &snprint_mp_delay_wait_checks); + install_keyword("san_path_err_threshold", &mp_san_path_err_threshold_handler, &snprint_mp_san_path_err_threshold); + install_keyword("san_path_err_forget_rate", &mp_san_path_err_forget_rate_handler, &snprint_mp_san_path_err_forget_rate); + install_keyword("san_path_err_recovery_time", &mp_san_path_err_recovery_time_handler, &snprint_mp_san_path_err_recovery_time); + install_keyword("marginal_path_err_sample_time", &mp_marginal_path_err_sample_time_handler, &snprint_mp_marginal_path_err_sample_time); + install_keyword("marginal_path_err_rate_threshold", &mp_marginal_path_err_rate_threshold_handler, &snprint_mp_marginal_path_err_rate_threshold); + install_keyword("marginal_path_err_recheck_gap_time", &mp_marginal_path_err_recheck_gap_time_handler, &snprint_mp_marginal_path_err_recheck_gap_time); + install_keyword("marginal_path_double_failed_time", &mp_marginal_path_double_failed_time_handler, &snprint_mp_marginal_path_double_failed_time); + install_keyword("skip_kpartx", &mp_skip_kpartx_handler, &snprint_mp_skip_kpartx); + install_keyword("max_sectors_kb", &mp_max_sectors_kb_handler, &snprint_mp_max_sectors_kb); + install_keyword("ghost_delay", &mp_ghost_delay_handler, &snprint_mp_ghost_delay); + install_sublevel_end(); +} diff --git a/libmultipath/dict.h b/libmultipath/dict.h new file mode 100644 index 0000000..a40ac66 --- /dev/null +++ b/libmultipath/dict.h @@ -0,0 +1,21 @@ +#ifndef _DICT_H +#define _DICT_H + +#ifndef _VECTOR_H +#include "vector.h" +#endif + +#include "byteorder.h" + +void init_keywords(vector keywords); +int get_sys_max_fds(int *); +int print_rr_weight(char *buff, int len, long v); +int print_pgfailback(char *buff, int len, long v); +int print_pgpolicy(char *buff, int len, long v); +int print_no_path_retry(char *buff, int len, long v); +int print_fast_io_fail(char *buff, int len, long v); +int print_dev_loss(char *buff, int len, unsigned long v); +int print_reservation_key(char * buff, int len, struct be64 key, uint8_t + flags, int source); +int print_off_int_undef(char *buff, int len, long v); +#endif /* _DICT_H */ diff --git a/libmultipath/discovery.c b/libmultipath/discovery.c new file mode 100644 index 0000000..ee3290c --- /dev/null +++ b/libmultipath/discovery.c @@ -0,0 +1,2206 @@ +/* + * Copyright (c) 2004, 2005, 2006 Christophe Varoqui + * Copyright (c) 2005 Stefan Bader, IBM + * Copyright (c) 2005 Mike Anderson + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "checkers.h" +#include "vector.h" +#include "memory.h" +#include "util.h" +#include "structs.h" +#include "config.h" +#include "blacklist.h" +#include "callout.h" +#include "debug.h" +#include "propsel.h" +#include "sg_include.h" +#include "sysfs.h" +#include "discovery.h" +#include "prio.h" +#include "defaults.h" +#include "unaligned.h" +#include "prioritizers/alua_rtpg.h" +#include "foreign.h" + +struct vpd_vendor_page vpd_vendor_pages[VPD_VP_ARRAY_SIZE] = { + [VPD_VP_UNDEF] = { 0x00, "undef" }, + [VPD_VP_HP3PAR] = { 0xc0, "hp3par" }, +}; + +int +alloc_path_with_pathinfo (struct config *conf, struct udev_device *udevice, + const char *wwid, int flag, struct path **pp_ptr) +{ + int err = PATHINFO_FAILED; + struct path * pp; + const char * devname; + + if (pp_ptr) + *pp_ptr = NULL; + + devname = udev_device_get_sysname(udevice); + if (!devname) + return PATHINFO_FAILED; + + pp = alloc_path(); + + if (!pp) + return PATHINFO_FAILED; + + if (wwid) + strlcpy(pp->wwid, wwid, sizeof(pp->wwid)); + + if (safe_sprintf(pp->dev, "%s", devname)) { + condlog(0, "pp->dev too small"); + } else { + pp->udev = udev_device_ref(udevice); + err = pathinfo(pp, conf, flag | DI_BLACKLIST); + } + + if (err || !pp_ptr) + free_path(pp); + else if (pp_ptr) + *pp_ptr = pp; + return err; +} + +int +store_pathinfo (vector pathvec, struct config *conf, + struct udev_device *udevice, int flag, struct path **pp_ptr) +{ + int err = PATHINFO_FAILED; + struct path * pp; + const char * devname; + + if (pp_ptr) + *pp_ptr = NULL; + + devname = udev_device_get_sysname(udevice); + if (!devname) + return PATHINFO_FAILED; + + pp = alloc_path(); + + if (!pp) + return PATHINFO_FAILED; + + if(safe_sprintf(pp->dev, "%s", devname)) { + condlog(0, "pp->dev too small"); + goto out; + } + pp->udev = udev_device_ref(udevice); + err = pathinfo(pp, conf, flag); + if (err) + goto out; + + err = store_path(pathvec, pp); + if (err) + goto out; + pp->checkint = conf->checkint; + +out: + if (err) + free_path(pp); + else if (pp_ptr) + *pp_ptr = pp; + return err; +} + +static int +path_discover (vector pathvec, struct config * conf, + struct udev_device *udevice, int flag) +{ + struct path * pp; + const char * devname; + + devname = udev_device_get_sysname(udevice); + if (!devname) + return PATHINFO_FAILED; + + pp = find_path_by_dev(pathvec, devname); + if (!pp) { + char devt[BLK_DEV_SIZE]; + dev_t devnum = udev_device_get_devnum(udevice); + + snprintf(devt, BLK_DEV_SIZE, "%d:%d", + major(devnum), minor(devnum)); + pp = find_path_by_devt(pathvec, devt); + if (!pp) + return store_pathinfo(pathvec, conf, + udevice, flag | DI_BLACKLIST, + NULL); + } + return pathinfo(pp, conf, flag); +} + +static void cleanup_udev_enumerate_ptr(void *arg) +{ + struct udev_enumerate *ue; + + if (!arg) + return; + ue = *((struct udev_enumerate**) arg); + if (ue) + (void)udev_enumerate_unref(ue); +} + +static void cleanup_udev_device_ptr(void *arg) +{ + struct udev_device *ud; + + if (!arg) + return; + ud = *((struct udev_device**) arg); + if (ud) + (void)udev_device_unref(ud); +} + +int +path_discovery (vector pathvec, int flag) +{ + struct udev_enumerate *udev_iter = NULL; + struct udev_list_entry *entry; + struct udev_device *udevice = NULL; + struct config *conf; + int num_paths = 0, total_paths = 0, ret; + + pthread_cleanup_push(cleanup_udev_enumerate_ptr, &udev_iter); + pthread_cleanup_push(cleanup_udev_device_ptr, &udevice); + conf = get_multipath_config(); + pthread_cleanup_push(put_multipath_config, conf); + + udev_iter = udev_enumerate_new(udev); + if (!udev_iter) { + ret = -ENOMEM; + goto out; + } + + if (udev_enumerate_add_match_subsystem(udev_iter, "block") < 0 || + udev_enumerate_add_match_is_initialized(udev_iter) < 0 || + udev_enumerate_scan_devices(udev_iter) < 0) { + condlog(1, "%s: error setting up udev_enumerate: %m", __func__); + ret = -1; + goto out; + } + + udev_list_entry_foreach(entry, + udev_enumerate_get_list_entry(udev_iter)) { + const char *devtype; + const char *devpath; + + devpath = udev_list_entry_get_name(entry); + condlog(4, "Discover device %s", devpath); + udevice = udev_device_new_from_syspath(udev, devpath); + if (!udevice) { + condlog(4, "%s: no udev information", devpath); + continue; + } + devtype = udev_device_get_devtype(udevice); + if(devtype && !strncmp(devtype, "disk", 4)) { + total_paths++; + if (path_discover(pathvec, conf, + udevice, flag) == PATHINFO_OK) + num_paths++; + } + udevice = udev_device_unref(udevice); + } + ret = total_paths - num_paths; + condlog(4, "Discovered %d/%d paths", num_paths, total_paths); +out: + pthread_cleanup_pop(1); + pthread_cleanup_pop(1); + pthread_cleanup_pop(1); + return ret; +} + +#define declare_sysfs_get_str(fname) \ +ssize_t \ +sysfs_get_##fname (struct udev_device * udev, char * buff, size_t len) \ +{ \ + size_t l; \ + const char * attr; \ + const char * devname; \ + \ + if (!udev) \ + return -ENOSYS; \ + \ + devname = udev_device_get_sysname(udev); \ + \ + attr = udev_device_get_sysattr_value(udev, #fname); \ + if (!attr) { \ + condlog(3, "%s: attribute %s not found in sysfs", \ + devname, #fname); \ + return -ENXIO; \ + } \ + for (l = strlen(attr); l >= 1 && isspace(attr[l-1]); l--); \ + if (l > len) { \ + condlog(3, "%s: overflow in attribute %s", \ + devname, #fname); \ + return -EINVAL; \ + } \ + strlcpy(buff, attr, len); \ + return strchop(buff); \ +} + +declare_sysfs_get_str(devtype); +declare_sysfs_get_str(vendor); +declare_sysfs_get_str(model); +declare_sysfs_get_str(rev); + +static ssize_t +sysfs_get_binary (struct udev_device * udev, const char *attrname, + unsigned char *buff, size_t len) +{ + ssize_t attr_len; + const char * devname; + + if (!udev) { + condlog(3, "No udev device given\n"); + return -ENOSYS; + } + + devname = udev_device_get_sysname(udev); + attr_len = sysfs_bin_attr_get_value(udev, attrname, buff, len); + if (attr_len < 0) { + condlog(3, "%s: attribute %s not found in sysfs", + devname, attrname); + return attr_len; + } + return attr_len; +} + +ssize_t sysfs_get_vpd(struct udev_device * udev, unsigned char pg, + unsigned char *buff, size_t len) +{ + char attrname[9]; + + snprintf(attrname, sizeof(attrname), "vpd_pg%02x", pg); + return sysfs_get_binary(udev, attrname, buff, len); +} + +ssize_t sysfs_get_inquiry(struct udev_device * udev, + unsigned char *buff, size_t len) +{ + return sysfs_get_binary(udev, "inquiry", buff, len); +} + +int +sysfs_get_timeout(const struct path *pp, unsigned int *timeout) +{ + const char *attr = NULL; + const char *subsys; + struct udev_device *parent; + char *eptr; + unsigned long t; + + if (!pp->udev || pp->bus != SYSFS_BUS_SCSI) + return -ENOSYS; + + parent = pp->udev; + while (parent) { + subsys = udev_device_get_subsystem(parent); + attr = udev_device_get_sysattr_value(parent, "timeout"); + if (subsys && attr) + break; + parent = udev_device_get_parent(parent); + } + if (!attr) { + condlog(3, "%s: No timeout value in sysfs", pp->dev); + return -ENXIO; + } + + t = strtoul(attr, &eptr, 0); + if (attr == eptr || t == ULONG_MAX) { + condlog(3, "%s: Cannot parse timeout attribute '%s'", + pp->dev, attr); + return -EINVAL; + } + if (t > UINT_MAX) { + condlog(3, "%s: Overflow in timeout value '%s'", + pp->dev, attr); + return -ERANGE; + } + *timeout = t; + + return 1; +} + +static int +sysfs_get_tgt_nodename(struct path *pp, char *node) +{ + const char *tgtname, *value; + struct udev_device *parent, *tgtdev; + int host, channel, tgtid = -1; + + parent = udev_device_get_parent_with_subsystem_devtype(pp->udev, "scsi", "scsi_device"); + if (!parent) + return 1; + /* Check for SAS */ + value = udev_device_get_sysattr_value(parent, "sas_address"); + if (value) { + tgtdev = udev_device_get_parent(parent); + while (tgtdev) { + tgtname = udev_device_get_sysname(tgtdev); + if (sscanf(tgtname, "end_device-%d:%d", + &host, &tgtid) == 2) + break; + tgtdev = udev_device_get_parent(tgtdev); + tgtid = -1; + } + if (tgtid >= 0) { + pp->sg_id.proto_id = SCSI_PROTOCOL_SAS; + pp->sg_id.transport_id = tgtid; + strlcpy(node, value, NODE_NAME_SIZE); + return 0; + } + } + + /* Check for USB */ + tgtdev = udev_device_get_parent(parent); + while (tgtdev) { + value = udev_device_get_subsystem(tgtdev); + if (value && !strcmp(value, "usb")) { + pp->sg_id.proto_id = SCSI_PROTOCOL_UNSPEC; + tgtname = udev_device_get_sysname(tgtdev); + strlcpy(node, tgtname, NODE_NAME_SIZE); + condlog(3, "%s: skip USB device %s", pp->dev, node); + return 1; + } + tgtdev = udev_device_get_parent(tgtdev); + } + parent = udev_device_get_parent_with_subsystem_devtype(pp->udev, "scsi", "scsi_target"); + if (!parent) + return 1; + /* Check for FibreChannel */ + tgtdev = udev_device_get_parent(parent); + value = udev_device_get_sysname(tgtdev); + if (sscanf(value, "rport-%d:%d-%d", + &host, &channel, &tgtid) == 3) { + tgtdev = udev_device_new_from_subsystem_sysname(udev, + "fc_remote_ports", value); + if (tgtdev) { + condlog(3, "SCSI target %d:%d:%d -> " + "FC rport %d:%d-%d", + pp->sg_id.host_no, pp->sg_id.channel, + pp->sg_id.scsi_id, host, channel, + tgtid); + value = udev_device_get_sysattr_value(tgtdev, + "node_name"); + if (value) { + pp->sg_id.proto_id = SCSI_PROTOCOL_FCP; + pp->sg_id.transport_id = tgtid; + strlcpy(node, value, NODE_NAME_SIZE); + udev_device_unref(tgtdev); + return 0; + } else + udev_device_unref(tgtdev); + } + } + + /* Check for iSCSI */ + parent = pp->udev; + tgtname = NULL; + while (parent) { + tgtname = udev_device_get_sysname(parent); + if (tgtname && sscanf(tgtname , "session%d", &tgtid) == 1) + break; + parent = udev_device_get_parent(parent); + tgtname = NULL; + tgtid = -1; + } + if (parent && tgtname) { + tgtdev = udev_device_new_from_subsystem_sysname(udev, + "iscsi_session", tgtname); + if (tgtdev) { + const char *value; + + value = udev_device_get_sysattr_value(tgtdev, "targetname"); + if (value) { + pp->sg_id.proto_id = SCSI_PROTOCOL_ISCSI; + pp->sg_id.transport_id = tgtid; + strlcpy(node, value, NODE_NAME_SIZE); + udev_device_unref(tgtdev); + return 0; + } + else + udev_device_unref(tgtdev); + } + } + /* Check for libata */ + parent = pp->udev; + tgtname = NULL; + while (parent) { + tgtname = udev_device_get_sysname(parent); + if (tgtname && sscanf(tgtname, "ata%d", &tgtid) == 1) + break; + parent = udev_device_get_parent(parent); + tgtname = NULL; + } + if (tgtname) { + pp->sg_id.proto_id = SCSI_PROTOCOL_ATA; + pp->sg_id.transport_id = tgtid; + snprintf(node, NODE_NAME_SIZE, "ata-%d.00", tgtid); + return 0; + } + /* Unknown SCSI transport. Keep fingers crossed */ + pp->sg_id.proto_id = SCSI_PROTOCOL_UNSPEC; + return 0; +} + +int sysfs_get_host_adapter_name(const struct path *pp, char *adapter_name) +{ + int proto_id; + + if (!pp || !adapter_name) + return 1; + + proto_id = pp->sg_id.proto_id; + + if (proto_id != SCSI_PROTOCOL_FCP && + proto_id != SCSI_PROTOCOL_SAS && + proto_id != SCSI_PROTOCOL_ISCSI && + proto_id != SCSI_PROTOCOL_SRP) { + return 1; + } + /* iscsi doesn't have adapter info in sysfs + * get ip_address for grouping paths + */ + if (pp->sg_id.proto_id == SCSI_PROTOCOL_ISCSI) + return sysfs_get_iscsi_ip_address(pp, adapter_name); + + /* fetch adapter pci name for other protocols + */ + return sysfs_get_host_pci_name(pp, adapter_name); +} + +int sysfs_get_host_pci_name(const struct path *pp, char *pci_name) +{ + struct udev_device *hostdev, *parent; + char host_name[HOST_NAME_LEN]; + const char *driver_name, *value; + + if (!pp || !pci_name) + return 1; + + sprintf(host_name, "host%d", pp->sg_id.host_no); + hostdev = udev_device_new_from_subsystem_sysname(udev, + "scsi_host", host_name); + if (!hostdev) + return 1; + + parent = udev_device_get_parent(hostdev); + while (parent) { + driver_name = udev_device_get_driver(parent); + if (!driver_name) { + parent = udev_device_get_parent(parent); + continue; + } + if (!strcmp(driver_name, "pcieport")) + break; + parent = udev_device_get_parent(parent); + } + if (parent) { + /* pci_device found + */ + value = udev_device_get_sysname(parent); + + strncpy(pci_name, value, SLOT_NAME_SIZE); + udev_device_unref(hostdev); + return 0; + } + udev_device_unref(hostdev); + return 1; +} + +int sysfs_get_iscsi_ip_address(const struct path *pp, char *ip_address) +{ + struct udev_device *hostdev; + char host_name[HOST_NAME_LEN]; + const char *value; + + sprintf(host_name, "host%d", pp->sg_id.host_no); + hostdev = udev_device_new_from_subsystem_sysname(udev, + "iscsi_host", host_name); + if (hostdev) { + value = udev_device_get_sysattr_value(hostdev, + "ipaddress"); + if (value) { + strncpy(ip_address, value, SLOT_NAME_SIZE); + udev_device_unref(hostdev); + return 0; + } else + udev_device_unref(hostdev); + } + return 1; +} + +int +sysfs_get_asymmetric_access_state(struct path *pp, char *buff, int buflen) +{ + struct udev_device *parent = pp->udev; + char value[16], *eptr; + unsigned long preferred; + + while (parent) { + const char *subsys = udev_device_get_subsystem(parent); + if (subsys && !strncmp(subsys, "scsi", 4)) + break; + parent = udev_device_get_parent(parent); + } + + if (!parent) + return -1; + + if (sysfs_attr_get_value(parent, "access_state", buff, buflen) <= 0) + return -1; + + if (sysfs_attr_get_value(parent, "preferred_path", value, 16) <= 0) + return 0; + + preferred = strtoul(value, &eptr, 0); + if (value == eptr || preferred == ULONG_MAX) { + /* Parse error, ignore */ + return 0; + } + return !!preferred; +} + +static void +sysfs_set_rport_tmo(struct multipath *mpp, struct path *pp) +{ + struct udev_device *rport_dev = NULL; + char value[16], *eptr; + char rport_id[32]; + unsigned long long tmo = 0; + int ret; + + sprintf(rport_id, "rport-%d:%d-%d", + pp->sg_id.host_no, pp->sg_id.channel, pp->sg_id.transport_id); + rport_dev = udev_device_new_from_subsystem_sysname(udev, + "fc_remote_ports", rport_id); + if (!rport_dev) { + condlog(1, "%s: No fc_remote_port device for '%s'", pp->dev, + rport_id); + return; + } + condlog(4, "target%d:%d:%d -> %s", pp->sg_id.host_no, + pp->sg_id.channel, pp->sg_id.scsi_id, rport_id); + + /* + * read the current dev_loss_tmo value from sysfs + */ + ret = sysfs_attr_get_value(rport_dev, "dev_loss_tmo", value, 16); + if (ret <= 0) { + condlog(0, "%s: failed to read dev_loss_tmo value, " + "error %d", rport_id, -ret); + goto out; + } + tmo = strtoull(value, &eptr, 0); + if (value == eptr || tmo == ULLONG_MAX) { + condlog(0, "%s: Cannot parse dev_loss_tmo " + "attribute '%s'", rport_id, value); + goto out; + } + + /* + * This is tricky. + * dev_loss_tmo will be limited to 600 if fast_io_fail + * is _not_ set. + * fast_io_fail will be limited by the current dev_loss_tmo + * setting. + * So to get everything right we first need to increase + * dev_loss_tmo to the fast_io_fail setting (if present), + * then set fast_io_fail, and _then_ set dev_loss_tmo + * to the correct value. + */ + if (mpp->fast_io_fail != MP_FAST_IO_FAIL_UNSET && + mpp->fast_io_fail != MP_FAST_IO_FAIL_ZERO && + mpp->fast_io_fail != MP_FAST_IO_FAIL_OFF) { + /* Check if we need to temporarily increase dev_loss_tmo */ + if ((unsigned int)mpp->fast_io_fail >= tmo) { + /* Increase dev_loss_tmo temporarily */ + snprintf(value, sizeof(value), "%u", + (unsigned int)mpp->fast_io_fail + 1); + ret = sysfs_attr_set_value(rport_dev, "dev_loss_tmo", + value, strlen(value)); + if (ret <= 0) { + if (ret == -EBUSY) + condlog(3, "%s: rport blocked", + rport_id); + else + condlog(0, "%s: failed to set " + "dev_loss_tmo to %s, error %d", + rport_id, value, -ret); + goto out; + } + } + } else if (mpp->dev_loss > DEFAULT_DEV_LOSS_TMO && + mpp->no_path_retry != NO_PATH_RETRY_QUEUE) { + condlog(3, "%s: limiting dev_loss_tmo to %d, since " + "fast_io_fail is not set", + rport_id, DEFAULT_DEV_LOSS_TMO); + mpp->dev_loss = DEFAULT_DEV_LOSS_TMO; + } + if (mpp->fast_io_fail != MP_FAST_IO_FAIL_UNSET) { + if (mpp->fast_io_fail == MP_FAST_IO_FAIL_OFF) + sprintf(value, "off"); + else if (mpp->fast_io_fail == MP_FAST_IO_FAIL_ZERO) + sprintf(value, "0"); + else + snprintf(value, 16, "%u", mpp->fast_io_fail); + ret = sysfs_attr_set_value(rport_dev, "fast_io_fail_tmo", + value, strlen(value)); + if (ret <= 0) { + if (ret == -EBUSY) + condlog(3, "%s: rport blocked", rport_id); + else + condlog(0, "%s: failed to set fast_io_fail_tmo to %s, error %d", + rport_id, value, -ret); + } + } + if (mpp->dev_loss > 0) { + snprintf(value, 16, "%u", mpp->dev_loss); + ret = sysfs_attr_set_value(rport_dev, "dev_loss_tmo", + value, strlen(value)); + if (ret <= 0) { + if (ret == -EBUSY) + condlog(3, "%s: rport blocked", rport_id); + else + condlog(0, "%s: failed to set dev_loss_tmo to %s, error %d", + rport_id, value, -ret); + } + } +out: + udev_device_unref(rport_dev); +} + +static void +sysfs_set_session_tmo(struct multipath *mpp, struct path *pp) +{ + struct udev_device *session_dev = NULL; + char session_id[64]; + char value[11]; + + sprintf(session_id, "session%d", pp->sg_id.transport_id); + session_dev = udev_device_new_from_subsystem_sysname(udev, + "iscsi_session", session_id); + if (!session_dev) { + condlog(1, "%s: No iscsi session for '%s'", pp->dev, + session_id); + return; + } + condlog(4, "target%d:%d:%d -> %s", pp->sg_id.host_no, + pp->sg_id.channel, pp->sg_id.scsi_id, session_id); + + if (mpp->dev_loss) { + condlog(3, "%s: ignoring dev_loss_tmo on iSCSI", pp->dev); + } + if (mpp->fast_io_fail != MP_FAST_IO_FAIL_UNSET) { + if (mpp->fast_io_fail == MP_FAST_IO_FAIL_OFF) { + condlog(3, "%s: can't switch off fast_io_fail_tmo " + "on iSCSI", pp->dev); + } else if (mpp->fast_io_fail == MP_FAST_IO_FAIL_ZERO) { + condlog(3, "%s: can't set fast_io_fail_tmo to '0'" + "on iSCSI", pp->dev); + } else { + snprintf(value, 11, "%u", mpp->fast_io_fail); + if (sysfs_attr_set_value(session_dev, "recovery_tmo", + value, strlen(value)) <= 0) { + condlog(3, "%s: Failed to set recovery_tmo, " + " error %d", pp->dev, errno); + } + } + } + udev_device_unref(session_dev); + return; +} + +static void +sysfs_set_nexus_loss_tmo(struct multipath *mpp, struct path *pp) +{ + struct udev_device *sas_dev = NULL; + char end_dev_id[64]; + char value[11]; + + sprintf(end_dev_id, "end_device-%d:%d", + pp->sg_id.host_no, pp->sg_id.transport_id); + sas_dev = udev_device_new_from_subsystem_sysname(udev, + "sas_end_device", end_dev_id); + if (!sas_dev) { + condlog(1, "%s: No SAS end device for '%s'", pp->dev, + end_dev_id); + return; + } + condlog(4, "target%d:%d:%d -> %s", pp->sg_id.host_no, + pp->sg_id.channel, pp->sg_id.scsi_id, end_dev_id); + + if (mpp->dev_loss) { + snprintf(value, 11, "%u", mpp->dev_loss); + if (sysfs_attr_set_value(sas_dev, "I_T_nexus_loss_timeout", + value, strlen(value)) <= 0) + condlog(3, "%s: failed to update " + "I_T Nexus loss timeout, error %d", + pp->dev, errno); + } + udev_device_unref(sas_dev); + return; +} + +int +sysfs_set_scsi_tmo (struct multipath *mpp, unsigned int checkint) +{ + struct path *pp; + int i; + unsigned int dev_loss_tmo = mpp->dev_loss; + + if (mpp->no_path_retry > 0) { + uint64_t no_path_retry_tmo = + (uint64_t)mpp->no_path_retry * checkint; + + if (no_path_retry_tmo > MAX_DEV_LOSS_TMO) + no_path_retry_tmo = MAX_DEV_LOSS_TMO; + if (no_path_retry_tmo > dev_loss_tmo) + dev_loss_tmo = no_path_retry_tmo; + condlog(3, "%s: update dev_loss_tmo to %u", + mpp->alias, dev_loss_tmo); + } else if (mpp->no_path_retry == NO_PATH_RETRY_QUEUE) { + dev_loss_tmo = MAX_DEV_LOSS_TMO; + condlog(3, "%s: update dev_loss_tmo to %u", + mpp->alias, dev_loss_tmo); + } + mpp->dev_loss = dev_loss_tmo; + if (mpp->dev_loss && mpp->fast_io_fail > 0 && + (unsigned int)mpp->fast_io_fail >= mpp->dev_loss) { + condlog(3, "%s: turning off fast_io_fail (%d is not smaller than dev_loss_tmo)", + mpp->alias, mpp->fast_io_fail); + mpp->fast_io_fail = MP_FAST_IO_FAIL_OFF; + } + if (!mpp->dev_loss && mpp->fast_io_fail == MP_FAST_IO_FAIL_UNSET) + return 0; + + vector_foreach_slot(mpp->paths, pp, i) { + if (pp->sg_id.proto_id == SCSI_PROTOCOL_FCP) + sysfs_set_rport_tmo(mpp, pp); + if (pp->sg_id.proto_id == SCSI_PROTOCOL_ISCSI) + sysfs_set_session_tmo(mpp, pp); + if (pp->sg_id.proto_id == SCSI_PROTOCOL_SAS) + sysfs_set_nexus_loss_tmo(mpp, pp); + } + return 0; +} + +int +do_inq(int sg_fd, int cmddt, int evpd, unsigned int pg_op, + void *resp, int mx_resp_len) +{ + unsigned char inqCmdBlk[INQUIRY_CMDLEN] = + { INQUIRY_CMD, 0, 0, 0, 0, 0 }; + unsigned char sense_b[SENSE_BUFF_LEN]; + struct sg_io_hdr io_hdr; + + if (cmddt) + inqCmdBlk[1] |= 2; + if (evpd) + inqCmdBlk[1] |= 1; + inqCmdBlk[2] = (unsigned char) pg_op; + inqCmdBlk[3] = (unsigned char)((mx_resp_len >> 8) & 0xff); + inqCmdBlk[4] = (unsigned char) (mx_resp_len & 0xff); + memset(&io_hdr, 0, sizeof (struct sg_io_hdr)); + memset(sense_b, 0, SENSE_BUFF_LEN); + io_hdr.interface_id = 'S'; + io_hdr.cmd_len = sizeof (inqCmdBlk); + io_hdr.mx_sb_len = sizeof (sense_b); + io_hdr.dxfer_direction = SG_DXFER_FROM_DEV; + io_hdr.dxfer_len = mx_resp_len; + io_hdr.dxferp = resp; + io_hdr.cmdp = inqCmdBlk; + io_hdr.sbp = sense_b; + io_hdr.timeout = DEF_TIMEOUT * 1000; + + if (ioctl(sg_fd, SG_IO, &io_hdr) < 0) + return -1; + + /* treat SG_ERR here to get rid of sg_err.[ch] */ + io_hdr.status &= 0x7e; + if ((0 == io_hdr.status) && (0 == io_hdr.host_status) && + (0 == io_hdr.driver_status)) + return 0; + if ((SCSI_CHECK_CONDITION == io_hdr.status) || + (SCSI_COMMAND_TERMINATED == io_hdr.status) || + (SG_ERR_DRIVER_SENSE == (0xf & io_hdr.driver_status))) { + if (io_hdr.sbp && (io_hdr.sb_len_wr > 2)) { + int sense_key; + unsigned char * sense_buffer = io_hdr.sbp; + if (sense_buffer[0] & 0x2) + sense_key = sense_buffer[1] & 0xf; + else + sense_key = sense_buffer[2] & 0xf; + if(RECOVERED_ERROR == sense_key) + return 0; + } + } + return -1; +} + +static int +get_serial (char * str, int maxlen, int fd) +{ + int len = 0; + char buff[MX_ALLOC_LEN + 1] = {0}; + + if (fd < 0) + return 1; + + if (0 == do_inq(fd, 0, 1, 0x80, buff, MX_ALLOC_LEN)) { + len = buff[3]; + if (len >= maxlen) + return 1; + if (len > 0) { + memcpy(str, buff + 4, len); + str[len] = '\0'; + } + return 0; + } + return 1; +} + +/* + * Side effect: sets pp->tpgs if it could be determined. + * If ALUA calls fail because paths are unreachable, pp->tpgs remains unchanged. + */ +static void +detect_alua(struct path * pp) +{ + int ret; + int tpgs; + unsigned int timeout; + + if (sysfs_get_timeout(pp, &timeout) <= 0) + timeout = DEF_TIMEOUT; + + tpgs = get_target_port_group_support(pp, timeout); + if (tpgs == -RTPG_INQUIRY_FAILED) + return; + else if (tpgs <= 0) { + pp->tpgs = TPGS_NONE; + return; + } + + if (pp->fd == -1 || pp->offline) + return; + + ret = get_target_port_group(pp, timeout); + if (ret < 0 || get_asymmetric_access_state(pp, ret, timeout) < 0) { + int state; + + if (ret == -RTPG_INQUIRY_FAILED) + return; + + state = path_offline(pp); + if (state == PATH_DOWN || state == PATH_PENDING) + return; + + pp->tpgs = TPGS_NONE; + return; + } + pp->tpgs = tpgs; +} + +int path_get_tpgs(struct path *pp) +{ + if (pp->tpgs == TPGS_UNDEF) + detect_alua(pp); + return pp->tpgs; +} + +#define DEFAULT_SGIO_LEN 254 + +/* Query VPD page @pg. Returns number of INQUIRY bytes + upon success and -1 upon failure. */ +static int +sgio_get_vpd (unsigned char * buff, int maxlen, int fd, int pg) +{ + int len = DEFAULT_SGIO_LEN; + int rlen; + + if (fd < 0) { + errno = EBADF; + return -1; + } +retry: + if (0 == do_inq(fd, 0, 1, pg, buff, len)) { + rlen = get_unaligned_be16(&buff[2]) + 4; + if (rlen <= len || len >= maxlen) + return rlen; + len = (rlen < maxlen)? rlen : maxlen; + goto retry; + } + return -1; +} + +static int +get_geometry(struct path *pp) +{ + if (pp->fd < 0) + return 1; + + if (ioctl(pp->fd, HDIO_GETGEO, &pp->geom)) { + condlog(2, "%s: HDIO_GETGEO failed with %d", pp->dev, errno); + memset(&pp->geom, 0, sizeof(pp->geom)); + return 1; + } + condlog(3, "%s: %u cyl, %u heads, %u sectors/track, start at %lu", + pp->dev, pp->geom.cylinders, pp->geom.heads, + pp->geom.sectors, pp->geom.start); + return 0; +} + +static int +parse_vpd_pg80(const unsigned char *in, char *out, size_t out_len) +{ + size_t len = get_unaligned_be16(&in[2]); + + if (out_len == 0) + return 0; + + /* + * Strip leading and trailing whitespace + */ + while (len > 0 && in[len + 3] == ' ') + --len; + while (len > 0 && in[4] == ' ') { + ++in; + --len; + } + + if (len >= out_len) { + condlog(2, "vpd pg80 overflow, %lu/%lu bytes required", + len + 1, out_len); + len = out_len - 1; + } + if (len > 0) { + memcpy(out, in + 4, len); + out[len] = '\0'; + } + return len; +} + +static int +parse_vpd_pg83(const unsigned char *in, size_t in_len, + char *out, size_t out_len) +{ + const unsigned char *d; + const unsigned char *vpd = NULL; + size_t len, vpd_len, i; + int vpd_type, prio = -1, naa_prio; + + d = in + 4; + while (d < in + in_len) { + /* Select 'association: LUN' */ + if ((d[1] & 0x30) != 0) { + d += d[3] + 4; + continue; + } + switch (d[1] & 0xf) { + case 0x3: + /* NAA: Prio 5 */ + switch (d[4] >> 4) { + case 6: + /* IEEE Registered Extended: Prio 8 */ + naa_prio = 8; + break; + case 5: + /* IEEE Registered: Prio 7 */ + naa_prio = 7; + break; + case 2: + /* IEEE Extended: Prio 6 */ + naa_prio = 6; + break; + case 3: + /* IEEE Locally assigned: Prio 1 */ + naa_prio = 1; + break; + default: + /* Default: no priority */ + naa_prio = -1; + break; + } + if (prio < naa_prio) { + prio = naa_prio; + vpd = d; + } + break; + case 0x8: + /* SCSI Name: Prio 4 */ + if (memcmp(d + 4, "eui.", 4) && + memcmp(d + 4, "naa.", 4) && + memcmp(d + 4, "iqn.", 4)) + continue; + if (prio < 4) { + prio = 4; + vpd = d; + } + break; + case 0x2: + /* EUI-64: Prio 3 */ + if (prio < 3) { + prio = 3; + vpd = d; + } + break; + case 0x1: + /* T-10 Vendor ID: Prio 2 */ + if (prio < 2) { + prio = 2; + vpd = d; + } + break; + } + d += d[3] + 4; + } + + if (prio <= 0) + return -ENODATA; + /* Need space at least for one digit */ + else if (out_len <= 1) + return 0; + + len = 0; + vpd_type = vpd[1] & 0xf; + vpd_len = vpd[3]; + vpd += 4; + if (vpd_type == 0x2 || vpd_type == 0x3) { + size_t i; + + len = sprintf(out, "%d", vpd_type); + if (2 * vpd_len >= out_len - len) { + condlog(1, "%s: WWID overflow, type %d, %lu/%lu bytes required", + __func__, vpd_type, + 2 * vpd_len + len + 1, out_len); + vpd_len = (out_len - len - 1) / 2; + } + for (i = 0; i < vpd_len; i++) + len += sprintf(out + len, + "%02x", vpd[i]); + } else if (vpd_type == 0x8 && vpd_len < 4) { + condlog(1, "%s: VPD length %lu too small for designator type 8", + __func__, vpd_len); + return -EINVAL; + } else if (vpd_type == 0x8) { + if (!memcmp("eui.", vpd, 4)) + out[0] = '2'; + else if (!memcmp("naa.", vpd, 4)) + out[0] = '3'; + else + out[0] = '8'; + + vpd += 4; + len = vpd_len - 4; + while (len > 2 && vpd[len - 2] == '\0') + --len; + if (len > out_len - 1) { + condlog(1, "%s: WWID overflow, type 8/%c, %lu/%lu bytes required", + __func__, out[0], len + 1, out_len); + len = out_len - 1; + } + + if (out[0] == '8') + for (i = 0; i < len; ++i) + out[1 + i] = vpd[i]; + else + for (i = 0; i < len; ++i) + out[1 + i] = tolower(vpd[i]); + + /* designator should be 0-terminated, but let's make sure */ + out[len] = '\0'; + + } else if (vpd_type == 0x1) { + const unsigned char *p; + size_t p_len; + + out[0] = '1'; + len = 1; + while ((p = memchr(vpd, ' ', vpd_len))) { + p_len = p - vpd; + if (len + p_len > out_len - 1) { + condlog(1, "%s: WWID overflow, type 1, %lu/%lu bytes required", + __func__, len + p_len, out_len); + p_len = out_len - len - 1; + } + memcpy(out + len, vpd, p_len); + len += p_len; + if (len >= out_len - 1) { + out[len] = '\0'; + break; + } + out[len] = '_'; + len ++; + if (len >= out_len - 1) { + out[len] = '\0'; + break; + } + vpd = p; + vpd_len -= p_len; + while (vpd && *vpd == ' ') { + vpd++; + vpd_len --; + } + } + p_len = vpd_len; + if (p_len > 0 && len < out_len - 1) { + if (len + p_len > out_len - 1) { + condlog(1, "%s: WWID overflow, type 1, %lu/%lu bytes required", + __func__, len + p_len + 1, out_len); + p_len = out_len - len - 1; + } + memcpy(out + len, vpd, p_len); + len += p_len; + out[len] = '\0'; + } + if (len > 1 && out[len - 1] == '_') { + out[len - 1] = '\0'; + len--; + } + } + return len; +} + +static int +parse_vpd_c0_hp3par(const unsigned char *in, size_t in_len, + char *out, size_t out_len) +{ + size_t len; + + memset(out, 0x0, out_len); + if (in_len <= 4 || (in[4] > 3 && in_len < 44)) { + condlog(3, "HP/3PAR vendor specific VPD page length too short: %lu", in_len); + return -EINVAL; + } + if (in[4] <= 3) /* revision must be > 3 to have Vomlume Name */ + return -ENODATA; + len = get_unaligned_be32(&in[40]); + if (len > out_len || len + 44 > in_len) { + condlog(3, "HP/3PAR vendor specific Volume name too long: %lu", + len); + return -EINVAL; + } + memcpy(out, &in[44], len); + out[out_len - 1] = '\0'; + return len; +} + +static int +get_vpd_sysfs (struct udev_device *parent, int pg, char * str, int maxlen) +{ + int len, buff_len; + unsigned char buff[4096]; + + memset(buff, 0x0, 4096); + if (!parent || sysfs_get_vpd(parent, pg, buff, 4096) <= 0) { + condlog(3, "failed to read sysfs vpd pg%02x", pg); + return -EINVAL; + } + + if (buff[1] != pg) { + condlog(3, "vpd pg%02x error, invalid vpd page %02x", + pg, buff[1]); + return -ENODATA; + } + buff_len = get_unaligned_be16(&buff[2]) + 4; + if (buff_len > 4096) + condlog(3, "vpd pg%02x page truncated", pg); + + if (pg == 0x80) + len = parse_vpd_pg80(buff, str, maxlen); + else if (pg == 0x83) + len = parse_vpd_pg83(buff, buff_len, str, maxlen); + else + len = -ENOSYS; + + return len; +} + +int +get_vpd_sgio (int fd, int pg, int vend_id, char * str, int maxlen) +{ + int len, buff_len; + unsigned char buff[4096]; + + memset(buff, 0x0, 4096); + if (sgio_get_vpd(buff, 4096, fd, pg) < 0) { + int lvl = pg == 0x80 || pg == 0x83 ? 3 : 4; + + condlog(lvl, "failed to issue vpd inquiry for pg%02x", + pg); + return -errno; + } + + if (buff[1] != pg) { + condlog(3, "vpd pg%02x error, invalid vpd page %02x", + pg, buff[1]); + return -ENODATA; + } + buff_len = get_unaligned_be16(&buff[2]) + 4; + if (buff_len > 4096) { + condlog(3, "vpd pg%02x page truncated", pg); + buff_len = 4096; + } + if (pg == 0x80) + len = parse_vpd_pg80(buff, str, maxlen); + else if (pg == 0x83) + len = parse_vpd_pg83(buff, buff_len, str, maxlen); + else if (pg == 0xc9 && maxlen >= 8) { + if (buff_len < 8) + len = -ENODATA; + else { + len = (buff_len <= maxlen)? buff_len : maxlen; + memcpy (str, buff, len); + } + } else if (pg == 0xc0 && vend_id == VPD_VP_HP3PAR) + len = parse_vpd_c0_hp3par(buff, buff_len, str, maxlen); + else + len = -ENOSYS; + + return len; +} + +static int +scsi_sysfs_pathinfo (struct path * pp, vector hwtable) +{ + struct udev_device *parent; + const char *attr_path = NULL; + + parent = pp->udev; + while (parent) { + const char *subsys = udev_device_get_subsystem(parent); + if (subsys && !strncmp(subsys, "scsi", 4)) { + attr_path = udev_device_get_sysname(parent); + if (!attr_path) + break; + if (sscanf(attr_path, "%i:%i:%i:%i", + &pp->sg_id.host_no, + &pp->sg_id.channel, + &pp->sg_id.scsi_id, + &pp->sg_id.lun) == 4) + break; + } + parent = udev_device_get_parent(parent); + } + if (!attr_path || pp->sg_id.host_no == -1) + return PATHINFO_FAILED; + + if (sysfs_get_vendor(parent, pp->vendor_id, SCSI_VENDOR_SIZE) <= 0) + return PATHINFO_FAILED;; + + condlog(3, "%s: vendor = %s", pp->dev, pp->vendor_id); + + if (sysfs_get_model(parent, pp->product_id, PATH_PRODUCT_SIZE) <= 0) + return PATHINFO_FAILED;; + + condlog(3, "%s: product = %s", pp->dev, pp->product_id); + + if (sysfs_get_rev(parent, pp->rev, PATH_REV_SIZE) < 0) + return PATHINFO_FAILED;; + + condlog(3, "%s: rev = %s", pp->dev, pp->rev); + + /* + * set the hwe configlet pointer + */ + find_hwe(hwtable, pp->vendor_id, pp->product_id, pp->rev, pp->hwe); + + /* + * host / bus / target / lun + */ + condlog(3, "%s: h:b:t:l = %i:%i:%i:%i", + pp->dev, + pp->sg_id.host_no, + pp->sg_id.channel, + pp->sg_id.scsi_id, + pp->sg_id.lun); + + /* + * target node name + */ + if(sysfs_get_tgt_nodename(pp, pp->tgt_node_name)) + return PATHINFO_FAILED; + + condlog(3, "%s: tgt_node_name = %s", + pp->dev, pp->tgt_node_name); + + return PATHINFO_OK; +} + +static int +nvme_sysfs_pathinfo (struct path * pp, vector hwtable) +{ + struct udev_device *parent; + const char *attr_path = NULL; + const char *attr; + + attr_path = udev_device_get_sysname(pp->udev); + if (!attr_path) + return PATHINFO_FAILED; + + if (sscanf(attr_path, "nvme%dn%d", + &pp->sg_id.host_no, + &pp->sg_id.scsi_id) != 2) + return PATHINFO_FAILED; + + parent = udev_device_get_parent_with_subsystem_devtype(pp->udev, + "nvme", NULL); + if (!parent) + return PATHINFO_SKIPPED; + + attr = udev_device_get_sysattr_value(pp->udev, "nsid"); + pp->sg_id.lun = attr ? atoi(attr) : 0; + + attr = udev_device_get_sysattr_value(parent, "cntlid"); + pp->sg_id.channel = attr ? atoi(attr) : 0; + + snprintf(pp->vendor_id, SCSI_VENDOR_SIZE, "NVME"); + snprintf(pp->product_id, PATH_PRODUCT_SIZE, "%s", + udev_device_get_sysattr_value(parent, "model")); + snprintf(pp->serial, SERIAL_SIZE, "%s", + udev_device_get_sysattr_value(parent, "serial")); + snprintf(pp->rev, PATH_REV_SIZE, "%s", + udev_device_get_sysattr_value(parent, "firmware_rev")); + + condlog(3, "%s: vendor = %s", pp->dev, pp->vendor_id); + condlog(3, "%s: product = %s", pp->dev, pp->product_id); + condlog(3, "%s: serial = %s", pp->dev, pp->serial); + condlog(3, "%s: rev = %s", pp->dev, pp->rev); + + find_hwe(hwtable, pp->vendor_id, pp->product_id, NULL, pp->hwe); + + return PATHINFO_OK; +} + +static int +ccw_sysfs_pathinfo (struct path * pp, vector hwtable) +{ + struct udev_device *parent; + char attr_buff[NAME_SIZE]; + const char *attr_path; + + parent = pp->udev; + while (parent) { + const char *subsys = udev_device_get_subsystem(parent); + if (subsys && !strncmp(subsys, "ccw", 3)) + break; + parent = udev_device_get_parent(parent); + } + if (!parent) + return PATHINFO_FAILED; + + sprintf(pp->vendor_id, "IBM"); + + condlog(3, "%s: vendor = %s", pp->dev, pp->vendor_id); + + if (sysfs_get_devtype(parent, attr_buff, FILE_NAME_SIZE) <= 0) + return PATHINFO_FAILED; + + if (!strncmp(attr_buff, "3370", 4)) { + sprintf(pp->product_id,"S/390 DASD FBA"); + } else if (!strncmp(attr_buff, "9336", 4)) { + sprintf(pp->product_id,"S/390 DASD FBA"); + } else { + sprintf(pp->product_id,"S/390 DASD ECKD"); + } + + condlog(3, "%s: product = %s", pp->dev, pp->product_id); + + /* + * set the hwe configlet pointer + */ + find_hwe(hwtable, pp->vendor_id, pp->product_id, NULL, pp->hwe); + + /* + * host / bus / target / lun + */ + attr_path = udev_device_get_sysname(parent); + pp->sg_id.lun = 0; + if (sscanf(attr_path, "%i.%i.%x", + &pp->sg_id.host_no, + &pp->sg_id.channel, + &pp->sg_id.scsi_id) == 3) { + condlog(3, "%s: h:b:t:l = %i:%i:%i:%i", + pp->dev, + pp->sg_id.host_no, + pp->sg_id.channel, + pp->sg_id.scsi_id, + pp->sg_id.lun); + } + + return PATHINFO_OK; +} + +static int +cciss_sysfs_pathinfo (struct path * pp, vector hwtable) +{ + const char * attr_path = NULL; + struct udev_device *parent; + + parent = pp->udev; + while (parent) { + const char *subsys = udev_device_get_subsystem(parent); + if (subsys && !strncmp(subsys, "cciss", 5)) { + attr_path = udev_device_get_sysname(parent); + if (!attr_path) + break; + if (sscanf(attr_path, "c%id%i", + &pp->sg_id.host_no, + &pp->sg_id.scsi_id) == 2) + break; + } + parent = udev_device_get_parent(parent); + } + if (!attr_path || pp->sg_id.host_no == -1) + return PATHINFO_FAILED; + + if (sysfs_get_vendor(parent, pp->vendor_id, SCSI_VENDOR_SIZE) <= 0) + return PATHINFO_FAILED; + + condlog(3, "%s: vendor = %s", pp->dev, pp->vendor_id); + + if (sysfs_get_model(parent, pp->product_id, PATH_PRODUCT_SIZE) <= 0) + return PATHINFO_FAILED; + + condlog(3, "%s: product = %s", pp->dev, pp->product_id); + + if (sysfs_get_rev(parent, pp->rev, PATH_REV_SIZE) <= 0) + return PATHINFO_FAILED; + + condlog(3, "%s: rev = %s", pp->dev, pp->rev); + + /* + * set the hwe configlet pointer + */ + find_hwe(hwtable, pp->vendor_id, pp->product_id, pp->rev, pp->hwe); + + /* + * host / bus / target / lun + */ + pp->sg_id.lun = 0; + pp->sg_id.channel = 0; + condlog(3, "%s: h:b:t:l = %i:%i:%i:%i", + pp->dev, + pp->sg_id.host_no, + pp->sg_id.channel, + pp->sg_id.scsi_id, + pp->sg_id.lun); + + return PATHINFO_OK; +} + +static int +common_sysfs_pathinfo (struct path * pp) +{ + dev_t devt; + + if (!pp) + return PATHINFO_FAILED; + + if (!pp->udev) { + condlog(4, "%s: udev not initialised", pp->dev); + return PATHINFO_FAILED; + } + devt = udev_device_get_devnum(pp->udev); + snprintf(pp->dev_t, BLK_DEV_SIZE, "%d:%d", major(devt), minor(devt)); + + condlog(4, "%s: dev_t = %s", pp->dev, pp->dev_t); + + if (sysfs_get_size(pp, &pp->size)) + return PATHINFO_FAILED; + + condlog(3, "%s: size = %llu", pp->dev, pp->size); + + return PATHINFO_OK; +} + +int +path_offline (struct path * pp) +{ + struct udev_device * parent; + char buff[SCSI_STATE_SIZE]; + int err; + const char *subsys_type; + + if (pp->bus == SYSFS_BUS_SCSI) { + subsys_type = "scsi"; + } + else if (pp->bus == SYSFS_BUS_NVME) { + subsys_type = "nvme"; + } + else { + return PATH_UP; + } + + parent = pp->udev; + while (parent) { + const char *subsys = udev_device_get_subsystem(parent); + if (subsys && !strncmp(subsys, subsys_type, 4)) + break; + parent = udev_device_get_parent(parent); + } + + if (!parent) { + condlog(1, "%s: failed to get sysfs information", pp->dev); + return PATH_REMOVED; + } + + memset(buff, 0x0, SCSI_STATE_SIZE); + err = sysfs_attr_get_value(parent, "state", buff, SCSI_STATE_SIZE); + if (err <= 0) { + if (err == -ENXIO) + return PATH_REMOVED; + else + return PATH_DOWN; + } + + + condlog(4, "%s: path state = %s", pp->dev, buff); + + if (pp->bus == SYSFS_BUS_SCSI) { + if (!strncmp(buff, "offline", 7)) { + pp->offline = 1; + return PATH_DOWN; + } + pp->offline = 0; + if (!strncmp(buff, "blocked", 7) || + !strncmp(buff, "quiesce", 7)) + return PATH_PENDING; + else if (!strncmp(buff, "running", 7)) + return PATH_UP; + + } + else if (pp->bus == SYSFS_BUS_NVME) { + if (!strncmp(buff, "dead", 4)) { + pp->offline = 1; + return PATH_DOWN; + } + pp->offline = 0; + if (!strncmp(buff, "new", 3) || + !strncmp(buff, "deleting", 8)) + return PATH_PENDING; + else if (!strncmp(buff, "live", 4)) + return PATH_UP; + } + + return PATH_DOWN; +} + +int +sysfs_pathinfo(struct path * pp, vector hwtable) +{ + int r = common_sysfs_pathinfo(pp); + + if (r != PATHINFO_OK) + return r; + + pp->bus = SYSFS_BUS_UNDEF; + if (!strncmp(pp->dev,"cciss",5)) + pp->bus = SYSFS_BUS_CCISS; + if (!strncmp(pp->dev,"dasd", 4)) + pp->bus = SYSFS_BUS_CCW; + if (!strncmp(pp->dev,"sd", 2)) + pp->bus = SYSFS_BUS_SCSI; + if (!strncmp(pp->dev,"nvme", 4)) + pp->bus = SYSFS_BUS_NVME; + + switch (pp->bus) { + case SYSFS_BUS_SCSI: + return scsi_sysfs_pathinfo(pp, hwtable); + case SYSFS_BUS_CCW: + return ccw_sysfs_pathinfo(pp, hwtable); + case SYSFS_BUS_CCISS: + return cciss_sysfs_pathinfo(pp, hwtable); + case SYSFS_BUS_NVME: + return nvme_sysfs_pathinfo(pp, hwtable); + case SYSFS_BUS_UNDEF: + default: + return PATHINFO_OK; + } +} + +static void +scsi_ioctl_pathinfo (struct path * pp, int mask) +{ + struct udev_device *parent; + const char *attr_path = NULL; + int vpd_id; + + if (!(mask & DI_SERIAL)) + return; + + select_vpd_vendor_id(pp); + vpd_id = pp->vpd_vendor_id; + + if (vpd_id != VPD_VP_UNDEF) { + char vpd_data[VPD_DATA_SIZE] = {0}; + + if (get_vpd_sgio(pp->fd, vpd_vendor_pages[vpd_id].pg, vpd_id, + vpd_data, sizeof(vpd_data)) < 0) + condlog(3, "%s: failed to get extra vpd data", pp->dev); + else { + vpd_data[VPD_DATA_SIZE - 1] = '\0'; + if (pp->vpd_data) + free(pp->vpd_data); + pp->vpd_data = strdup(vpd_data); + if (!pp->vpd_data) + condlog(0, "%s: failed to allocate space for vpd data", pp->dev); + } + } + + parent = pp->udev; + while (parent) { + const char *subsys = udev_device_get_subsystem(parent); + if (subsys && !strncmp(subsys, "scsi", 4)) { + attr_path = udev_device_get_sysname(parent); + if (!attr_path) + break; + if (sscanf(attr_path, "%i:%i:%i:%i", + &pp->sg_id.host_no, + &pp->sg_id.channel, + &pp->sg_id.scsi_id, + &pp->sg_id.lun) == 4) + break; + } + parent = udev_device_get_parent(parent); + } + if (!attr_path || pp->sg_id.host_no == -1) + return; + + if (get_vpd_sysfs(parent, 0x80, pp->serial, SERIAL_SIZE) <= 0) { + if (get_serial(pp->serial, SERIAL_SIZE, pp->fd)) { + condlog(3, "%s: fail to get serial", pp->dev); + return; + } + } + + condlog(3, "%s: serial = %s", pp->dev, pp->serial); + return; +} + +static void +cciss_ioctl_pathinfo(struct path *pp) +{ + get_serial(pp->serial, SERIAL_SIZE, pp->fd); + condlog(3, "%s: serial = %s", pp->dev, pp->serial); +} + +int +get_state (struct path * pp, struct config *conf, int daemon, int oldstate) +{ + struct checker * c = &pp->checker; + int state; + + if (!checker_selected(c)) { + if (daemon) { + if (pathinfo(pp, conf, DI_SYSFS) != PATHINFO_OK) { + condlog(3, "%s: couldn't get sysfs pathinfo", + pp->dev); + return PATH_UNCHECKED; + } + } + select_detect_checker(conf, pp); + select_checker(conf, pp); + if (!checker_selected(c)) { + condlog(3, "%s: No checker selected", pp->dev); + return PATH_UNCHECKED; + } + checker_set_fd(c, pp->fd); + if (checker_init(c, pp->mpp?&pp->mpp->mpcontext:NULL)) { + checker_clear(c); + condlog(3, "%s: checker init failed", pp->dev); + return PATH_UNCHECKED; + } + } + if (pp->mpp && !c->mpcontext) + checker_mp_init(c, &pp->mpp->mpcontext); + checker_clear_message(c); + if (conf->force_sync == 0) + checker_set_async(c); + else + checker_set_sync(c); + if (!conf->checker_timeout && + sysfs_get_timeout(pp, &(c->timeout)) <= 0) + c->timeout = DEF_TIMEOUT; + state = checker_check(c, oldstate); + condlog(3, "%s: %s state = %s", pp->dev, + checker_name(c), checker_state_name(state)); + if (state != PATH_UP && state != PATH_GHOST && + strlen(checker_message(c))) + condlog(3, "%s: %s checker%s", + pp->dev, checker_name(c), checker_message(c)); + return state; +} + +static int +get_prio (struct path * pp, int timeout) +{ + struct prio * p; + struct config *conf; + int old_prio; + + if (!pp) + return 0; + + p = &pp->prio; + if (!prio_selected(p)) { + conf = get_multipath_config(); + pthread_cleanup_push(put_multipath_config, conf); + select_detect_prio(conf, pp); + select_prio(conf, pp); + pthread_cleanup_pop(1); + if (!prio_selected(p)) { + condlog(3, "%s: no prio selected", pp->dev); + pp->priority = PRIO_UNDEF; + return 1; + } + } + old_prio = pp->priority; + pp->priority = prio_getprio(p, pp, timeout); + if (pp->priority < 0) { + /* this changes pp->offline, but why not */ + int state = path_offline(pp); + + if (state == PATH_DOWN || state == PATH_PENDING) { + pp->priority = old_prio; + condlog(3, "%s: %s prio error in state %d, keeping prio = %d", + pp->dev, prio_name(p), state, pp->priority); + } else { + condlog(3, "%s: %s prio error in state %d", + pp->dev, prio_name(p), state); + pp->priority = PRIO_UNDEF; + } + return 1; + } + condlog((old_prio == pp->priority ? 4 : 3), "%s: %s prio = %u", + pp->dev, prio_name(p), pp->priority); + return 0; +} + +/* + * Mangle string of length *len starting at start + * by removing character sequence "00" (hex for a 0 byte), + * starting at end, backwards. + * Changes the value of *len if characters were removed. + * Returns a pointer to the position where "end" was moved to. + */ +static char +*skip_zeroes_backward(char* start, size_t *len, char *end) +{ + char *p = end; + + while (p >= start + 2 && *(p - 1) == '0' && *(p - 2) == '0') + p -= 2; + + if (p == end) + return p; + + memmove(p, end, start + *len + 1 - end); + *len -= end - p; + + return p; +} + +/* + * Fix for NVME wwids looking like this: + * nvme.0000-3163653363666438366239656630386200-4c696e75780000000000000000000000000000000000000000000000000000000000000000000000-00000002 + * which are encountered in some combinations of Linux NVME host and target. + * The '00' are hex-encoded 0-bytes which are forbidden in the serial (SN) + * and model (MN) fields. Discard them. + * If a WWID of the above type is found, sets pp->wwid and returns a value > 0. + * Otherwise, returns 0. + */ +static int +fix_broken_nvme_wwid(struct path *pp, const char *value, size_t size) +{ + static const char _nvme[] = "nvme."; + size_t len, i; + char mangled[256]; + char *p; + + len = strlen(value); + if (len >= sizeof(mangled)) + return 0; + + /* Check that value starts with "nvme.%04x-" */ + if (memcmp(value, _nvme, sizeof(_nvme) - 1) || value[9] != '-') + return 0; + for (i = 5; i < 9; i++) + if (!isxdigit(value[i])) + return 0; + + memcpy(mangled, value, len + 1); + + /* search end of "model" part and strip trailing '00' */ + p = memrchr(mangled, '-', len); + if (p == NULL) + return 0; + + p = skip_zeroes_backward(mangled, &len, p); + + /* search end of "serial" part */ + p = memrchr(mangled, '-', p - mangled); + if (p == NULL || memrchr(mangled, '-', p - mangled) != mangled + 9) + /* We expect exactly 3 '-' in the value */ + return 0; + + p = skip_zeroes_backward(mangled, &len, p); + if (len >= size) + return 0; + + memcpy(pp->wwid, mangled, len + 1); + condlog(2, "%s: over-long WWID shortened to %s", pp->dev, pp->wwid); + return len; +} + +static int +get_udev_uid(struct path * pp, char *uid_attribute, struct udev_device *udev) +{ + ssize_t len; + const char *value; + + value = udev_device_get_property_value(udev, uid_attribute); + if (!value || strlen(value) == 0) + value = getenv(uid_attribute); + if (value && strlen(value)) { + len = strlcpy(pp->wwid, value, WWID_SIZE); + if (len >= WWID_SIZE) { + len = fix_broken_nvme_wwid(pp, value, WWID_SIZE); + if (len > 0) + return len; + condlog(0, "%s: wwid overflow", pp->dev); + len = WWID_SIZE; + } + } else { + condlog(3, "%s: no %s attribute", pp->dev, + uid_attribute); + len = -EINVAL; + } + return len; +} + +static int +get_vpd_uid(struct path * pp) +{ + struct udev_device *parent = pp->udev; + + while (parent) { + const char *subsys = udev_device_get_subsystem(parent); + if (subsys && !strncmp(subsys, "scsi", 4)) + break; + parent = udev_device_get_parent(parent); + } + + if (!parent) + return -EINVAL; + + return get_vpd_sysfs(parent, 0x83, pp->wwid, WWID_SIZE); +} + +static ssize_t uid_fallback(struct path *pp, int path_state, + const char **origin) +{ + ssize_t len = -1; + + if (pp->bus == SYSFS_BUS_SCSI) { + len = get_vpd_uid(pp); + *origin = "sysfs"; + if (len < 0 && path_state == PATH_UP) { + condlog(1, "%s: failed to get sysfs uid: %s", + pp->dev, strerror(-len)); + len = get_vpd_sgio(pp->fd, 0x83, 0, pp->wwid, + WWID_SIZE); + *origin = "sgio"; + } + } else if (pp->bus == SYSFS_BUS_NVME) { + char value[256]; + len = sysfs_attr_get_value(pp->udev, "wwid", value, + sizeof(value)); + if (len <= 0) + return -1; + len = strlcpy(pp->wwid, value, WWID_SIZE); + if (len >= WWID_SIZE) { + len = fix_broken_nvme_wwid(pp, value, + WWID_SIZE); + if (len > 0) + return len; + condlog(0, "%s: wwid overflow", pp->dev); + len = WWID_SIZE; + } + *origin = "sysfs"; + } + return len; +} + +static bool has_uid_fallback(struct path *pp) +{ + /* + * Falling back to direct WWID determination is dangerous + * if uid_attribute is set to something non-standard. + * Allow it only if it's either the default, or if udev + * has been disabled by setting 'uid_attribute ""'. + */ + if (!pp->uid_attribute) + return false; + return ((pp->bus == SYSFS_BUS_SCSI && + (!strcmp(pp->uid_attribute, DEFAULT_UID_ATTRIBUTE) || + !strcmp(pp->uid_attribute, ""))) || + (pp->bus == SYSFS_BUS_NVME && + (!strcmp(pp->uid_attribute, DEFAULT_NVME_UID_ATTRIBUTE) || + !strcmp(pp->uid_attribute, "")))); +} + +int +get_uid (struct path * pp, int path_state, struct udev_device *udev, + int allow_fallback) +{ + char *c; + const char *origin = "unknown"; + ssize_t len = 0; + struct config *conf; + int used_fallback = 0; + + if (!pp->uid_attribute && !pp->getuid) { + conf = get_multipath_config(); + pthread_cleanup_push(put_multipath_config, conf); + select_getuid(conf, pp); + pthread_cleanup_pop(1); + } + + memset(pp->wwid, 0, WWID_SIZE); + if (pp->getuid) { + char buff[CALLOUT_MAX_SIZE]; + + /* Use 'getuid' callout, deprecated */ + condlog(1, "%s: using deprecated getuid callout", pp->dev); + if (path_state != PATH_UP) { + condlog(3, "%s: path inaccessible", pp->dev); + len = -EWOULDBLOCK; + } else if (apply_format(pp->getuid, &buff[0], pp)) { + condlog(0, "error formatting uid callout command"); + len = -EINVAL; + } else if (execute_program(buff, pp->wwid, WWID_SIZE)) { + condlog(3, "error calling out %s", buff); + len = -EIO; + } else + len = strlen(pp->wwid); + origin = "callout"; + } else { + bool udev_available = udev && pp->uid_attribute + && *pp->uid_attribute; + + if (udev_available) { + len = get_udev_uid(pp, pp->uid_attribute, udev); + if (len <= 0) + condlog(1, + "%s: failed to get udev uid: %s", + pp->dev, strerror(-len)); + else + origin = "udev"; + } + if ((!udev_available || (len <= 0 && allow_fallback)) + && has_uid_fallback(pp)) { + used_fallback = 1; + len = uid_fallback(pp, path_state, &origin); + } + } + if ( len < 0 ) { + condlog(1, "%s: failed to get %s uid: %s", + pp->dev, origin, strerror(-len)); + memset(pp->wwid, 0x0, WWID_SIZE); + return 1; + } else { + /* Strip any trailing blanks */ + c = strchr(pp->wwid, '\0'); + c--; + while (c && c >= pp->wwid && *c == ' ') { + *c = '\0'; + c--; + } + } + condlog((used_fallback)? 1 : 3, "%s: uid = %s (%s)", pp->dev, + *pp->wwid == '\0' ? "" : pp->wwid, origin); + return 0; +} + +int pathinfo(struct path *pp, struct config *conf, int mask) +{ + int path_state; + + if (!pp || !conf) + return PATHINFO_FAILED; + + /* + * For behavior backward-compatibility with multipathd, + * the blacklisting by filter_property|devnode() is not + * limited by DI_BLACKLIST and occurs before this debug + * message with the mask value. + */ + if (pp->udev) { + const char *hidden = + udev_device_get_sysattr_value(pp->udev, "hidden"); + + if (hidden && !strcmp(hidden, "1")) { + condlog(4, "%s: hidden", pp->dev); + return PATHINFO_SKIPPED; + } + if (is_claimed_by_foreign(pp->udev) || + filter_property(conf, pp->udev, 4, pp->uid_attribute) > 0) + return PATHINFO_SKIPPED; + } + + if (filter_devnode(conf->blist_devnode, + conf->elist_devnode, + pp->dev) > 0) + return PATHINFO_SKIPPED; + + condlog(4, "%s: mask = 0x%x", pp->dev, mask); + + /* + * Sanity check: we need the device number to + * avoid inconsistent information in + * find_path_by_dev()/find_path_by_devt() + */ + if (!strlen(pp->dev_t) && !(mask & DI_SYSFS)) { + condlog(1, "%s: empty device number", pp->dev); + mask |= DI_SYSFS; + } + + /* + * fetch info available in sysfs + */ + if (mask & DI_SYSFS) { + int rc = sysfs_pathinfo(pp, conf->hwtable); + + if (rc != PATHINFO_OK) + return rc; + } + + if (mask & DI_BLACKLIST && mask & DI_SYSFS) { + if (filter_device(conf->blist_device, conf->elist_device, + pp->vendor_id, pp->product_id, pp->dev) > 0 || + filter_protocol(conf->blist_protocol, conf->elist_protocol, + pp) > 0) + return PATHINFO_SKIPPED; + } + + path_state = path_offline(pp); + if (path_state == PATH_REMOVED) + goto blank; + else if (mask & DI_NOIO) { + if (mask & DI_CHECKER) + /* + * Avoid any IO on the device itself. + * simply use the path_offline() return as its state + */ + pp->chkrstate = pp->state = path_state; + return PATHINFO_OK; + } + + /* + * fetch info not available through sysfs + */ + if (pp->fd < 0) + pp->fd = open(udev_device_get_devnode(pp->udev), O_RDONLY); + + if (pp->fd < 0) { + condlog(4, "Couldn't open node for %s: %s", + pp->dev, strerror(errno)); + goto blank; + } + + if (mask & DI_SERIAL) + get_geometry(pp); + + if (path_state == PATH_UP && pp->bus == SYSFS_BUS_SCSI) + scsi_ioctl_pathinfo(pp, mask); + + if (pp->bus == SYSFS_BUS_CCISS && mask & DI_SERIAL) + cciss_ioctl_pathinfo(pp); + + if (mask & DI_CHECKER) { + if (path_state == PATH_UP) { + int newstate = get_state(pp, conf, 0, path_state); + if (newstate != PATH_PENDING || + pp->state == PATH_UNCHECKED || + pp->state == PATH_WILD) + pp->chkrstate = pp->state = newstate; + if (pp->state == PATH_TIMEOUT) + pp->state = PATH_DOWN; + if (pp->state == PATH_UP && !pp->size) { + condlog(3, "%s: device size is 0, " + "path unusable", pp->dev); + pp->state = PATH_GHOST; + } + } else { + condlog(3, "%s: path inaccessible", pp->dev); + pp->chkrstate = pp->state = path_state; + } + } + + if ((mask & DI_WWID) && !strlen(pp->wwid)) { + get_uid(pp, path_state, pp->udev, + (pp->retriggers >= conf->retrigger_tries)); + if (!strlen(pp->wwid)) { + if (pp->bus == SYSFS_BUS_UNDEF) + return PATHINFO_SKIPPED; + if (pp->initialized != INIT_FAILED) { + pp->initialized = INIT_MISSING_UDEV; + pp->tick = conf->retrigger_delay; + } + return PATHINFO_OK; + } + else + pp->tick = 1; + } + + if (mask & DI_BLACKLIST && mask & DI_WWID) { + if (filter_wwid(conf->blist_wwid, conf->elist_wwid, + pp->wwid, pp->dev) > 0) { + return PATHINFO_SKIPPED; + } + } + + /* + * Retrieve path priority, even for PATH_DOWN paths if it has never + * been successfully obtained before. If path is down don't try + * for too long. + */ + if ((mask & DI_PRIO) && path_state == PATH_UP && strlen(pp->wwid)) { + if (pp->state != PATH_DOWN || pp->priority == PRIO_UNDEF) { + get_prio(pp, (pp->state != PATH_DOWN)? + (conf->checker_timeout * 1000) : 10); + } + } + + if ((mask & DI_ALL) == DI_ALL) + pp->initialized = INIT_OK; + return PATHINFO_OK; + +blank: + /* + * Recoverable error, for example faulty or offline path + */ + pp->chkrstate = pp->state = PATH_DOWN; + if (pp->initialized == INIT_NEW || pp->initialized == INIT_FAILED) + memset(pp->wwid, 0, WWID_SIZE); + + return PATHINFO_OK; +} diff --git a/libmultipath/discovery.h b/libmultipath/discovery.h new file mode 100644 index 0000000..6444887 --- /dev/null +++ b/libmultipath/discovery.h @@ -0,0 +1,84 @@ +#ifndef DISCOVERY_H +#define DISCOVERY_H + +#define SYSFS_PATH_SIZE 255 +#define INQUIRY_CMDLEN 6 +#define INQUIRY_CMD 0x12 +#define SENSE_BUFF_LEN 32 +#define RECOVERED_ERROR 0x01 +#define MX_ALLOC_LEN 255 +#define TUR_CMD_LEN 6 + +#ifndef BLKGETSIZE +#define BLKGETSIZE _IO(0x12,96) +#endif + +#ifndef DEF_TIMEOUT +#define DEF_TIMEOUT 30 +#endif + +/* + * excerpt from sg_err.h + */ +#define SCSI_CHECK_CONDITION 0x2 +#define SCSI_COMMAND_TERMINATED 0x22 +#define SG_ERR_DRIVER_SENSE 0x08 + +#define PATHINFO_OK 0 +#define PATHINFO_FAILED 1 +#define PATHINFO_SKIPPED 2 + +struct config; + +int path_discovery (vector pathvec, int flag); +int path_get_tpgs(struct path *pp); /* This function never returns TPGS_UNDEF */ +int do_tur (char *); +int path_offline (struct path *); +int get_state (struct path * pp, struct config * conf, int daemon, int state); +int get_vpd_sgio (int fd, int pg, int vend_id, char * str, int maxlen); +int pathinfo (struct path * pp, struct config * conf, int mask); +int alloc_path_with_pathinfo (struct config *conf, struct udev_device *udevice, + const char *wwid, int flag, struct path **pp_ptr); +int store_pathinfo (vector pathvec, struct config *conf, + struct udev_device *udevice, int flag, + struct path **pp_ptr); +int sysfs_set_scsi_tmo (struct multipath *mpp, unsigned int checkint); +int sysfs_get_timeout(const struct path *pp, unsigned int *timeout); +int sysfs_get_host_pci_name(const struct path *pp, char *pci_name); +int sysfs_get_iscsi_ip_address(const struct path *pp, char *ip_address); +int sysfs_get_host_adapter_name(const struct path *pp, + char *adapter_name); +ssize_t sysfs_get_vpd (struct udev_device *udev, unsigned char pg, + unsigned char *buff, size_t len); +ssize_t sysfs_get_inquiry(struct udev_device *udev, + unsigned char *buff, size_t len); +int sysfs_get_asymmetric_access_state(struct path *pp, + char *buff, int buflen); +int get_uid(struct path * pp, int path_state, struct udev_device *udev, + int allow_fallback); + +/* + * discovery bitmask + */ +enum discovery_mode { + __DI_SYSFS, + __DI_SERIAL, + __DI_CHECKER, + __DI_PRIO, + __DI_WWID, + __DI_BLACKLIST, + __DI_NOIO, +}; + +#define DI_SYSFS (1 << __DI_SYSFS) +#define DI_SERIAL (1 << __DI_SERIAL) +#define DI_CHECKER (1 << __DI_CHECKER) +#define DI_PRIO (1 << __DI_PRIO) +#define DI_WWID (1 << __DI_WWID) +#define DI_BLACKLIST (1 << __DI_BLACKLIST) +#define DI_NOIO (1 << __DI_NOIO) /* Avoid IO on the device */ + +#define DI_ALL (DI_SYSFS | DI_SERIAL | DI_CHECKER | DI_PRIO | \ + DI_WWID) + +#endif /* DISCOVERY_H */ diff --git a/libmultipath/dm-generic.c b/libmultipath/dm-generic.c new file mode 100644 index 0000000..1b42fa0 --- /dev/null +++ b/libmultipath/dm-generic.c @@ -0,0 +1,70 @@ +/* + Copyright (c) 2018 Martin Wilck, SUSE Linux GmbH + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License + as published by the Free Software Foundation; either version 2 + of the License, or (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . + */ + +#include +#include +#include "generic.h" +#include "dm-generic.h" +#include "structs.h" +#include "structs_vec.h" +#include "config.h" +#include "print.h" + +static const struct _vector* +dm_mp_get_pgs(const struct gen_multipath *gmp) +{ + return vector_convert(NULL, gen_multipath_to_dm(gmp)->pg, + struct pathgroup, dm_pathgroup_to_gen); +} + +static void dm_mp_rel_pgs(__attribute__((unused)) + const struct gen_multipath *gmp, + const struct _vector* v) +{ + vector_free_const(v); +} + +static const struct _vector* +dm_pg_get_paths(const struct gen_pathgroup *gpg) +{ + return vector_convert(NULL, gen_pathgroup_to_dm(gpg)->paths, + struct path, dm_path_to_gen); +} + +static void dm_mp_rel_paths(__attribute__((unused)) + const struct gen_pathgroup *gpg, + const struct _vector* v) +{ + vector_free_const(v); +} + +const struct gen_multipath_ops dm_gen_multipath_ops = { + .get_pathgroups = dm_mp_get_pgs, + .rel_pathgroups = dm_mp_rel_pgs, + .snprint = snprint_multipath_attr, + .style = snprint_multipath_style, +}; + +const struct gen_pathgroup_ops dm_gen_pathgroup_ops = { + .get_paths = dm_pg_get_paths, + .rel_paths = dm_mp_rel_paths, + .snprint = snprint_pathgroup_attr, +}; + +const struct gen_path_ops dm_gen_path_ops = { + .snprint = snprint_path_attr, +}; diff --git a/libmultipath/dm-generic.h b/libmultipath/dm-generic.h new file mode 100644 index 0000000..986429f --- /dev/null +++ b/libmultipath/dm-generic.h @@ -0,0 +1,39 @@ +/* + Copyright (c) 2018 Martin Wilck, SUSE Linux GmbH + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License + as published by the Free Software Foundation; either version 2 + of the License, or (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . + */ +#ifndef _DM_GENERIC_H +#define _DM_GENERIC_H +#include "generic.h" +#include "list.h" /* for container_of */ +#include "structs.h" + +#define dm_multipath_to_gen(mpp) (&((mpp)->generic_mp)) +#define gen_multipath_to_dm(gm) \ + container_of_const((gm), struct multipath, generic_mp) + +#define dm_pathgroup_to_gen(pg) (&(pg->generic_pg)) +#define gen_pathgroup_to_dm(gpg) \ + container_of_const((gpg), struct pathgroup, generic_pg) + +#define dm_path_to_gen(pp) (&((pp)->generic_path)) +#define gen_path_to_dm(gp) \ + container_of_const((gp), struct path, generic_path) + +extern const struct gen_multipath_ops dm_gen_multipath_ops; +extern const struct gen_pathgroup_ops dm_gen_pathgroup_ops; +extern const struct gen_path_ops dm_gen_path_ops; + +#endif /* _DM_GENERIC_H */ diff --git a/libmultipath/dmparser.c b/libmultipath/dmparser.c new file mode 100644 index 0000000..b856a07 --- /dev/null +++ b/libmultipath/dmparser.c @@ -0,0 +1,571 @@ +/* + * Copyright (c) 2004, 2005 Christophe Varoqui + * Copyright (c) 2005 Stefan Bader, IBM + * Copyright (c) 2005 Edward Goggin, EMC + */ +#include +#include +#include + +#include "checkers.h" +#include "vector.h" +#include "memory.h" +#include "structs.h" +#include "util.h" +#include "debug.h" +#include "dmparser.h" + +#define WORD_SIZE 64 + +static int +merge_words(char **dst, char *word) +{ + char * p = *dst; + int len, dstlen; + + dstlen = strlen(*dst); + len = dstlen + strlen(word) + 2; + *dst = REALLOC(*dst, len); + + if (!*dst) { + free(p); + return 1; + } + + p = *dst + dstlen; + *p = ' '; + ++p; + strncpy(p, word, len - dstlen - 1); + + return 0; +} + +#define APPEND(p, end, args...) \ +({ \ + int ret; \ + \ + ret = snprintf(p, end - p, ##args); \ + if (ret < 0) { \ + condlog(0, "%s: conversion error", mp->alias); \ + goto err; \ + } \ + p += ret; \ + if (p >= end) { \ + condlog(0, "%s: params too small", mp->alias); \ + goto err; \ + } \ +}) + +/* + * Transforms the path group vector into a proper device map string + */ +int +assemble_map (struct multipath * mp, char * params, int len) +{ + int i, j; + int minio; + int nr_priority_groups, initial_pg_nr; + char * p, * f; + const char *const end = params + len; + char no_path_retry[] = "queue_if_no_path"; + char retain_hwhandler[] = "retain_attached_hw_handler"; + struct pathgroup * pgp; + struct path * pp; + + minio = mp->minio; + p = params; + + nr_priority_groups = VECTOR_SIZE(mp->pg); + initial_pg_nr = (nr_priority_groups ? mp->bestpg : 0); + + if (mp->no_path_retry != NO_PATH_RETRY_UNDEF && + mp->no_path_retry != NO_PATH_RETRY_FAIL) { + add_feature(&mp->features, no_path_retry); + } + if (mp->retain_hwhandler == RETAIN_HWHANDLER_ON && + get_linux_version_code() < KERNEL_VERSION(4, 3, 0)) + add_feature(&mp->features, retain_hwhandler); + + f = STRDUP(mp->features); + + APPEND(p, end, "%s %s %i %i", f, mp->hwhandler, nr_priority_groups, + initial_pg_nr); + + vector_foreach_slot (mp->pg, pgp, i) { + pgp = VECTOR_SLOT(mp->pg, i); + APPEND(p, end, " %s %i 1", mp->selector, + VECTOR_SIZE(pgp->paths)); + + vector_foreach_slot (pgp->paths, pp, j) { + int tmp_minio = minio; + + if (mp->rr_weight == RR_WEIGHT_PRIO + && pp->priority > 0) + tmp_minio = minio * pp->priority; + if (!strlen(pp->dev_t) ) { + condlog(0, "dev_t not set for '%s'", pp->dev); + goto err; + } + APPEND(p, end, " %s %d", pp->dev_t, tmp_minio); + } + } + + FREE(f); + condlog(4, "%s: assembled map [%s]", mp->alias, params); + return 0; + +err: + FREE(f); + return 1; +} + +#undef APPEND + +int disassemble_map(vector pathvec, char *params, struct multipath *mpp, + int is_daemon) +{ + char * word; + char * p; + int i, j, k; + int num_features = 0; + int num_hwhandler = 0; + int num_pg = 0; + int num_pg_args = 0; + int num_paths = 0; + int num_paths_args = 0; + int def_minio = 0; + struct path * pp; + struct pathgroup * pgp; + + p = params; + + condlog(4, "%s: disassemble map [%s]", mpp->alias, params); + + /* + * features + */ + p += get_word(p, &mpp->features); + + if (!mpp->features) + return 1; + + num_features = atoi(mpp->features); + + for (i = 0; i < num_features; i++) { + p += get_word(p, &word); + + if (!word) + return 1; + + if (merge_words(&mpp->features, word)) { + FREE(word); + return 1; + } + + FREE(word); + } + + /* + * hwhandler + */ + p += get_word(p, &mpp->hwhandler); + + if (!mpp->hwhandler) + return 1; + + num_hwhandler = atoi(mpp->hwhandler); + + for (i = 0; i < num_hwhandler; i++) { + p += get_word(p, &word); + + if (!word) + return 1; + + if (merge_words(&mpp->hwhandler, word)) { + FREE(word); + return 1; + } + FREE(word); + } + + /* + * nb of path groups + */ + p += get_word(p, &word); + + if (!word) + return 1; + + num_pg = atoi(word); + FREE(word); + + if (num_pg > 0) { + if (!mpp->pg) { + mpp->pg = vector_alloc(); + if (!mpp->pg) + return 1; + } + } else { + free_pgvec(mpp->pg, KEEP_PATHS); + mpp->pg = NULL; + } + + /* + * first pg to try + */ + p += get_word(p, &word); + + if (!word) + goto out; + + mpp->nextpg = atoi(word); + FREE(word); + + for (i = 0; i < num_pg; i++) { + /* + * selector + */ + + if (!mpp->selector) { + p += get_word(p, &mpp->selector); + + if (!mpp->selector) + goto out; + + /* + * selector args + */ + p += get_word(p, &word); + + if (!word) + goto out; + + num_pg_args = atoi(word); + + if (merge_words(&mpp->selector, word)) + goto out1; + FREE(word); + } else { + p += get_word(p, NULL); + p += get_word(p, NULL); + } + + for (j = 0; j < num_pg_args; j++) + p += get_word(p, NULL); + + /* + * paths + */ + pgp = alloc_pathgroup(); + + if (!pgp) + goto out; + + if (add_pathgroup(mpp, pgp)) + goto out; + + p += get_word(p, &word); + + if (!word) + goto out; + + num_paths = atoi(word); + FREE(word); + + p += get_word(p, &word); + + if (!word) + goto out; + + num_paths_args = atoi(word); + FREE(word); + + for (j = 0; j < num_paths; j++) { + char devname[FILE_NAME_SIZE]; + + pp = NULL; + p += get_word(p, &word); + + if (!word) + goto out; + + if (devt2devname(devname, FILE_NAME_SIZE, word)) { + condlog(2, "%s: cannot find block device", + word); + devname[0] = '\0'; + } + + if (pathvec) { + if (strlen(devname)) + pp = find_path_by_dev(pathvec, devname); + else + pp = find_path_by_devt(pathvec, word); + } + + if (!pp) { + pp = alloc_path(); + + if (!pp) + goto out1; + + strlcpy(pp->dev_t, word, BLK_DEV_SIZE); + strlcpy(pp->dev, devname, FILE_NAME_SIZE); + if (strlen(mpp->wwid)) { + strlcpy(pp->wwid, mpp->wwid, + WWID_SIZE); + } + /* Only call this in multipath client mode */ + if (!is_daemon && store_path(pathvec, pp)) + goto out1; + } else { + if (!strlen(pp->wwid) && + strlen(mpp->wwid)) + strlcpy(pp->wwid, mpp->wwid, + WWID_SIZE); + } + FREE(word); + + if (store_path(pgp->paths, pp)) + goto out; + + /* + * Update wwid for multipaths which are not setup + * in the get_dm_mpvec() code path + */ + if (!strlen(mpp->wwid)) + strlcpy(mpp->wwid, pp->wwid, WWID_SIZE); + + /* + * Update wwid for paths which may not have been + * active at the time the getuid callout was run + */ + else if (!strlen(pp->wwid)) + strlcpy(pp->wwid, mpp->wwid, WWID_SIZE); + + /* + * Do not allow in-use patch to change wwid + */ + else if (strcmp(pp->wwid, mpp->wwid) != 0) { + condlog(0, "%s: path wwid appears to have changed. Using map wwid.\n", pp->dev_t); + strlcpy(pp->wwid, mpp->wwid, WWID_SIZE); + } + + pgp->id ^= (long)pp; + pp->pgindex = i + 1; + + for (k = 0; k < num_paths_args; k++) + if (k == 0) { + p += get_word(p, &word); + def_minio = atoi(word); + FREE(word); + + if (!strncmp(mpp->selector, + "round-robin", 11)) { + + if (mpp->rr_weight == RR_WEIGHT_PRIO + && pp->priority > 0) + def_minio /= pp->priority; + + } + + if (def_minio != mpp->minio) + mpp->minio = def_minio; + } + else + p += get_word(p, NULL); + + } + } + return 0; +out1: + FREE(word); +out: + free_pgvec(mpp->pg, KEEP_PATHS); + mpp->pg = NULL; + return 1; +} + +int disassemble_status(char *params, struct multipath *mpp) +{ + char * word; + char * p; + int i, j, k; + int num_feature_args; + int num_hwhandler_args; + int num_pg; + int num_pg_args; + int num_paths; + int def_minio = 0; + struct path * pp; + struct pathgroup * pgp; + + p = params; + + condlog(4, "%s: disassemble status [%s]", mpp->alias, params); + + /* + * features + */ + p += get_word(p, &word); + + if (!word) + return 1; + + num_feature_args = atoi(word); + FREE(word); + + for (i = 0; i < num_feature_args; i++) { + if (i == 1) { + p += get_word(p, &word); + + if (!word) + return 1; + + mpp->queuedio = atoi(word); + FREE(word); + continue; + } + /* unknown */ + p += get_word(p, NULL); + } + /* + * hwhandler + */ + p += get_word(p, &word); + + if (!word) + return 1; + + num_hwhandler_args = atoi(word); + FREE(word); + + for (i = 0; i < num_hwhandler_args; i++) + p += get_word(p, NULL); + + /* + * nb of path groups + */ + p += get_word(p, &word); + + if (!word) + return 1; + + num_pg = atoi(word); + FREE(word); + + if (num_pg == 0) + return 0; + + /* + * next pg to try + */ + p += get_word(p, NULL); + + if (VECTOR_SIZE(mpp->pg) < num_pg) + return 1; + + for (i = 0; i < num_pg; i++) { + pgp = VECTOR_SLOT(mpp->pg, i); + /* + * PG status + */ + p += get_word(p, &word); + + if (!word) + return 1; + + switch (*word) { + case 'D': + pgp->status = PGSTATE_DISABLED; + break; + case 'A': + pgp->status = PGSTATE_ACTIVE; + break; + case 'E': + pgp->status = PGSTATE_ENABLED; + break; + default: + pgp->status = PGSTATE_UNDEF; + break; + } + FREE(word); + + /* + * PG Status (discarded, would be '0' anyway) + */ + p += get_word(p, NULL); + + p += get_word(p, &word); + + if (!word) + return 1; + + num_paths = atoi(word); + FREE(word); + + p += get_word(p, &word); + + if (!word) + return 1; + + num_pg_args = atoi(word); + FREE(word); + + if (VECTOR_SIZE(pgp->paths) < num_paths) + return 1; + + for (j = 0; j < num_paths; j++) { + pp = VECTOR_SLOT(pgp->paths, j); + /* + * path + */ + p += get_word(p, NULL); + + /* + * path status + */ + p += get_word(p, &word); + + if (!word) + return 1; + + switch (*word) { + case 'F': + pp->dmstate = PSTATE_FAILED; + break; + case 'A': + pp->dmstate = PSTATE_ACTIVE; + break; + default: + break; + } + FREE(word); + /* + * fail count + */ + p += get_word(p, &word); + + if (!word) + return 1; + + pp->failcount = atoi(word); + FREE(word); + + /* + * selector args + */ + for (k = 0; k < num_pg_args; k++) { + if (!strncmp(mpp->selector, + "least-pending", 13)) { + p += get_word(p, &word); + if (sscanf(word,"%d:*d", + &def_minio) == 1 && + def_minio != mpp->minio) + mpp->minio = def_minio; + } else + p += get_word(p, NULL); + } + } + } + return 0; +} diff --git a/libmultipath/dmparser.h b/libmultipath/dmparser.h new file mode 100644 index 0000000..e1badb0 --- /dev/null +++ b/libmultipath/dmparser.h @@ -0,0 +1,3 @@ +int assemble_map (struct multipath *, char *, int); +int disassemble_map (vector, char *, struct multipath *, int); +int disassemble_status (char *, struct multipath *); diff --git a/libmultipath/file.c b/libmultipath/file.c new file mode 100644 index 0000000..72f1d24 --- /dev/null +++ b/libmultipath/file.c @@ -0,0 +1,181 @@ +/* + * Copyright (c) 2005 Christophe Varoqui + * Copyright (c) 2005 Benjamin Marzinski, Redhat + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "file.h" +#include "debug.h" +#include "uxsock.h" + + +/* + * significant parts of this file were taken from iscsi-bindings.c of the + * linux-iscsi project. + * Copyright (C) 2002 Cisco Systems, Inc. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published + * by the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * See the file COPYING included with this distribution for more details. + */ + +int +ensure_directories_exist(const char *str, mode_t dir_mode) +{ + char *pathname; + char *end; + int err; + + pathname = strdup(str); + if (!pathname){ + condlog(0, "Cannot copy file pathname %s : %s", + str, strerror(errno)); + return -1; + } + end = pathname; + /* skip leading slashes */ + while (end && *end && (*end == '/')) + end++; + + while ((end = strchr(end, '/'))) { + /* if there is another slash, make the dir. */ + *end = '\0'; + err = mkdir(pathname, dir_mode); + if (err && errno != EEXIST) { + condlog(0, "Cannot make directory [%s] : %s", + pathname, strerror(errno)); + free(pathname); + return -1; + } + if (!err) + condlog(3, "Created dir [%s]", pathname); + *end = '/'; + end++; + } + free(pathname); + return 0; +} + +static void +sigalrm(__attribute__((unused)) int sig) +{ + /* do nothing */ +} + +static int +lock_file(int fd, const char *file_name) +{ + struct sigaction act, oldact; + sigset_t set, oldset; + struct flock lock; + int err; + + memset(&lock, 0, sizeof(lock)); + lock.l_type = F_WRLCK; + lock.l_whence = SEEK_SET; + + act.sa_handler = sigalrm; + sigemptyset(&act.sa_mask); + act.sa_flags = 0; + sigemptyset(&set); + sigaddset(&set, SIGALRM); + + sigaction(SIGALRM, &act, &oldact); + pthread_sigmask(SIG_UNBLOCK, &set, &oldset); + + alarm(FILE_TIMEOUT); + err = fcntl(fd, F_SETLKW, &lock); + alarm(0); + + if (err) { + if (errno != EINTR) + condlog(0, "Cannot lock %s : %s", file_name, + strerror(errno)); + else + condlog(0, "%s is locked. Giving up.", file_name); + } + + pthread_sigmask(SIG_SETMASK, &oldset, NULL); + sigaction(SIGALRM, &oldact, NULL); + return err; +} + +int +open_file(const char *file, int *can_write, const char *header) +{ + int fd; + struct stat s; + + if (ensure_directories_exist(file, 0700)) + return -1; + *can_write = 1; + fd = open(file, O_RDWR | O_CREAT, S_IRUSR | S_IWUSR); + if (fd < 0) { + if (errno == EROFS) { + *can_write = 0; + condlog(3, "Cannot open file [%s] read/write. " + " trying readonly", file); + fd = open(file, O_RDONLY); + if (fd < 0) { + condlog(0, "Cannot open file [%s] " + "readonly : %s", file, strerror(errno)); + return -1; + } + } + else { + condlog(0, "Cannot open file [%s] : %s", file, + strerror(errno)); + return -1; + } + } + if (*can_write && lock_file(fd, file) < 0) + goto fail; + + memset(&s, 0, sizeof(s)); + if (fstat(fd, &s) < 0){ + condlog(0, "Cannot stat file %s : %s", file, strerror(errno)); + goto fail; + } + if (s.st_size == 0) { + if (*can_write == 0) + goto fail; + /* If file is empty, write the header */ + int len = strlen(header); + + if (write(fd, header, len) != len) { + condlog(0, + "Cannot write header to file %s : %s", file, + strerror(errno)); + /* cleanup partially written header */ + if (ftruncate(fd, 0)) + condlog(0, "Cannot truncate header : %s", + strerror(errno)); + goto fail; + } + fsync(fd); + condlog(3, "Initialized new file [%s]", file); + } + + return fd; + +fail: + close(fd); + return -1; +} diff --git a/libmultipath/file.h b/libmultipath/file.h new file mode 100644 index 0000000..3c75c90 --- /dev/null +++ b/libmultipath/file.h @@ -0,0 +1,14 @@ +/* + * Copyright (c) 2010 Benjamin Marzinski, Redhat + */ + +#ifndef _FILE_H +#define _FILE_H + +#include + +#define FILE_TIMEOUT 30 +int ensure_directories_exist(const char *str, mode_t dir_mode); +int open_file(const char *file, int *can_write, const char *header); + +#endif /* _FILE_H */ diff --git a/libmultipath/foreign.c b/libmultipath/foreign.c new file mode 100644 index 0000000..0159a83 --- /dev/null +++ b/libmultipath/foreign.c @@ -0,0 +1,650 @@ +/* + Copyright (c) 2018 Martin Wilck, SUSE Linux GmbH + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License + as published by the Free Software Foundation; either version 2 + of the License, or (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . +*/ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "vector.h" +#include "debug.h" +#include "util.h" +#include "foreign.h" +#include "structs.h" +#include "structs_vec.h" +#include "print.h" + +static vector foreigns; + +/* This protects vector foreigns */ +static pthread_rwlock_t foreign_lock = PTHREAD_RWLOCK_INITIALIZER; + +static void rdlock_foreigns(void) +{ + pthread_rwlock_rdlock(&foreign_lock); +} + +static void wrlock_foreigns(void) +{ + pthread_rwlock_wrlock(&foreign_lock); +} + +static void unlock_foreigns(__attribute__((unused)) void *unused) +{ + pthread_rwlock_unlock(&foreign_lock); +} + +#define get_dlsym(foreign, sym, lbl) \ + do { \ + foreign->sym = dlsym(foreign->handle, #sym); \ + if (foreign->sym == NULL) { \ + condlog(0, "%s: symbol \"%s\" not found in \"%s\"", \ + __func__, #sym, foreign->name); \ + goto lbl; \ + } \ + } while(0) + +static void free_foreign(struct foreign *fgn) +{ + struct context *ctx; + + if (fgn == NULL) + return; + + ctx = fgn->context; + fgn->context = NULL; + if (ctx != NULL) + fgn->cleanup(ctx); + + if (fgn->handle != NULL) + dlclose(fgn->handle); + free(fgn); +} + +void _cleanup_foreign(void) +{ + struct foreign *fgn; + int i; + + if (foreigns == NULL) + return; + + vector_foreach_slot_backwards(foreigns, fgn, i) { + vector_del_slot(foreigns, i); + free_foreign(fgn); + } + vector_free(foreigns); + foreigns = NULL; +} + +void cleanup_foreign(void) +{ + wrlock_foreigns(); + _cleanup_foreign(); + unlock_foreigns(NULL); +} + +static const char foreign_pattern[] = "libforeign-*.so"; + +static int select_foreign_libs(const struct dirent *di) +{ + + return fnmatch(foreign_pattern, di->d_name, FNM_FILE_NAME) == 0; +} + +static void free_pre(void *arg) +{ + regex_t **pre = arg; + + if (pre != NULL && *pre != NULL) { + regfree(*pre); + free(*pre); + *pre = NULL; + } +} + +static int _init_foreign(const char *multipath_dir, const char *enable) +{ + char pathbuf[PATH_MAX]; + struct dirent **di; + struct scandir_result sr; + int r, i; + regex_t *enable_re = NULL; + + foreigns = vector_alloc(); + if (foreigns == NULL) + return -ENOMEM; + + pthread_cleanup_push(free_pre, &enable_re); + enable_re = calloc(1, sizeof(*enable_re)); + if (enable_re) { + const char *str = enable ? enable : DEFAULT_ENABLE_FOREIGN; + + r = regcomp(enable_re, str, REG_EXTENDED|REG_NOSUB); + if (r != 0) { + char errbuf[64]; + + (void)regerror(r, enable_re, errbuf, sizeof(errbuf)); + condlog (2, "%s: error compiling enable_foreign = \"%s\": \"%s\"", + __func__, str, errbuf); + goto out_free_pre; + } + } + + r = scandir(multipath_dir, &di, select_foreign_libs, alphasort); + + if (r == 0) { + condlog(3, "%s: no foreign multipath libraries found", + __func__); + goto out_free_pre; + } else if (r < 0) { + r = -errno; + condlog(1, "%s: error scanning foreign multipath libraries: %m", + __func__); + _cleanup_foreign(); + goto out_free_pre; + } + + sr.di = di; + sr.n = r; + pthread_cleanup_push_cast(free_scandir_result, &sr); + for (i = 0; i < r; i++) { + const char *msg, *fn, *c; + struct foreign *fgn; + size_t len, namesz; + + fn = di[i]->d_name; + + len = strlen(fn); + c = strchr(fn, '-'); + if (len < sizeof(foreign_pattern) - 1 || c == NULL) { + condlog(0, "%s: bad file name %s, fnmatch error?", + __func__, fn); + continue; + } + c++; + condlog(4, "%s: found %s", __func__, fn); + + namesz = len - sizeof(foreign_pattern) + 3; + fgn = malloc(sizeof(*fgn) + namesz); + if (fgn == NULL) + continue; + memset(fgn, 0, sizeof(*fgn)); + strlcpy((char*)fgn + offsetof(struct foreign, name), c, namesz); + + if (enable_re != NULL) { + int ret = regexec(enable_re, fgn->name, 0, NULL, 0); + + if (ret == REG_NOMATCH) { + condlog(3, "%s: foreign library \"%s\" is not enabled", + __func__, fgn->name); + free(fgn); + continue; + } else if (ret != 0) + /* assume it matches */ + condlog(2, "%s: error %d in regexec() for %s", + __func__, ret, fgn->name); + } + + snprintf(pathbuf, sizeof(pathbuf), "%s/%s", multipath_dir, fn); + fgn->handle = dlopen(pathbuf, RTLD_NOW|RTLD_LOCAL); + msg = dlerror(); + if (fgn->handle == NULL) { + condlog(1, "%s: failed to dlopen %s: %s", __func__, + pathbuf, msg); + goto dl_err; + } + + get_dlsym(fgn, init, dl_err); + get_dlsym(fgn, cleanup, dl_err); + get_dlsym(fgn, add, dl_err); + get_dlsym(fgn, change, dl_err); + get_dlsym(fgn, delete, dl_err); + get_dlsym(fgn, delete_all, dl_err); + get_dlsym(fgn, check, dl_err); + get_dlsym(fgn, lock, dl_err); + get_dlsym(fgn, unlock, dl_err); + get_dlsym(fgn, get_multipaths, dl_err); + get_dlsym(fgn, release_multipaths, dl_err); + get_dlsym(fgn, get_paths, dl_err); + get_dlsym(fgn, release_paths, dl_err); + + fgn->context = fgn->init(LIBMP_FOREIGN_API, fgn->name); + if (fgn->context == NULL) { + condlog(0, "%s: init() failed for %s", __func__, fn); + goto dl_err; + } + + if (vector_alloc_slot(foreigns) == NULL) { + goto dl_err; + } + + vector_set_slot(foreigns, fgn); + condlog(3, "foreign library \"%s\" loaded successfully", + fgn->name); + + continue; + + dl_err: + free_foreign(fgn); + } + r = 0; + pthread_cleanup_pop(1); /* free_scandir_result */ +out_free_pre: + pthread_cleanup_pop(1); /* free_pre */ + return r; +} + +int init_foreign(const char *multipath_dir, const char *enable) +{ + int ret; + + wrlock_foreigns(); + + if (foreigns != NULL) { + unlock_foreigns(NULL); + condlog(0, "%s: already initialized", __func__); + return -EEXIST; + } + + pthread_cleanup_push(unlock_foreigns, NULL); + ret = _init_foreign(multipath_dir, enable); + pthread_cleanup_pop(1); + + return ret; +} + +int add_foreign(struct udev_device *udev) +{ + struct foreign *fgn; + dev_t dt; + int j; + int r = FOREIGN_IGNORED; + + if (udev == NULL) { + condlog(1, "%s called with NULL udev", __func__); + return FOREIGN_ERR; + } + + rdlock_foreigns(); + if (foreigns == NULL) { + unlock_foreigns(NULL); + return FOREIGN_ERR; + } + pthread_cleanup_push(unlock_foreigns, NULL); + + dt = udev_device_get_devnum(udev); + vector_foreach_slot(foreigns, fgn, j) { + r = fgn->add(fgn->context, udev); + + if (r == FOREIGN_CLAIMED) { + condlog(3, "%s: foreign \"%s\" claims device %d:%d", + __func__, fgn->name, major(dt), minor(dt)); + break; + } else if (r == FOREIGN_OK) { + condlog(4, "%s: foreign \"%s\" owns device %d:%d", + __func__, fgn->name, major(dt), minor(dt)); + break; + } else if (r != FOREIGN_IGNORED) { + condlog(1, "%s: unexpected return value %d from \"%s\"", + __func__, r, fgn->name); + } + } + + pthread_cleanup_pop(1); + return r; +} + +int change_foreign(struct udev_device *udev) +{ + struct foreign *fgn; + int j; + dev_t dt; + int r = FOREIGN_IGNORED; + + if (udev == NULL) { + condlog(1, "%s called with NULL udev", __func__); + return FOREIGN_ERR; + } + + rdlock_foreigns(); + if (foreigns == NULL) { + unlock_foreigns(NULL); + return FOREIGN_ERR; + } + pthread_cleanup_push(unlock_foreigns, NULL); + + dt = udev_device_get_devnum(udev); + vector_foreach_slot(foreigns, fgn, j) { + r = fgn->change(fgn->context, udev); + + if (r == FOREIGN_OK) { + condlog(4, "%s: foreign \"%s\" completed %d:%d", + __func__, fgn->name, major(dt), minor(dt)); + break; + } else if (r != FOREIGN_IGNORED) { + condlog(1, "%s: unexpected return value %d from \"%s\"", + __func__, r, fgn->name); + } + } + + pthread_cleanup_pop(1); + return r; +} + +int delete_foreign(struct udev_device *udev) +{ + struct foreign *fgn; + int j; + dev_t dt; + int r = FOREIGN_IGNORED; + + if (udev == NULL) { + condlog(1, "%s called with NULL udev", __func__); + return FOREIGN_ERR; + } + + rdlock_foreigns(); + if (foreigns == NULL) { + unlock_foreigns(NULL); + return FOREIGN_ERR; + } + pthread_cleanup_push(unlock_foreigns, NULL); + + dt = udev_device_get_devnum(udev); + vector_foreach_slot(foreigns, fgn, j) { + r = fgn->delete(fgn->context, udev); + + if (r == FOREIGN_OK) { + condlog(3, "%s: foreign \"%s\" deleted device %d:%d", + __func__, fgn->name, major(dt), minor(dt)); + break; + } else if (r != FOREIGN_IGNORED) { + condlog(1, "%s: unexpected return value %d from \"%s\"", + __func__, r, fgn->name); + } + } + + pthread_cleanup_pop(1); + return r; +} + +int delete_all_foreign(void) +{ + struct foreign *fgn; + int j; + + rdlock_foreigns(); + if (foreigns == NULL) { + unlock_foreigns(NULL); + return FOREIGN_ERR; + } + pthread_cleanup_push(unlock_foreigns, NULL); + + vector_foreach_slot(foreigns, fgn, j) { + int r; + + r = fgn->delete_all(fgn->context); + if (r != FOREIGN_IGNORED && r != FOREIGN_OK) { + condlog(1, "%s: unexpected return value %d from \"%s\"", + __func__, r, fgn->name); + } + } + + pthread_cleanup_pop(1); + return FOREIGN_OK; +} + +void check_foreign(void) +{ + struct foreign *fgn; + int j; + + rdlock_foreigns(); + if (foreigns == NULL) { + unlock_foreigns(NULL); + return; + } + pthread_cleanup_push(unlock_foreigns, NULL); + + vector_foreach_slot(foreigns, fgn, j) { + fgn->check(fgn->context); + } + + pthread_cleanup_pop(1); +} + +/* Call this after get_path_layout */ +void foreign_path_layout(void) +{ + struct foreign *fgn; + int i; + + rdlock_foreigns(); + if (foreigns == NULL) { + unlock_foreigns(NULL); + return; + } + pthread_cleanup_push(unlock_foreigns, NULL); + + vector_foreach_slot(foreigns, fgn, i) { + const struct _vector *vec; + + fgn->lock(fgn->context); + pthread_cleanup_push(fgn->unlock, fgn->context); + + vec = fgn->get_paths(fgn->context); + if (vec != NULL) { + _get_path_layout(vec, LAYOUT_RESET_NOT); + } + fgn->release_paths(fgn->context, vec); + + pthread_cleanup_pop(1); + } + + pthread_cleanup_pop(1); +} + +/* Call this after get_multipath_layout */ +void foreign_multipath_layout(void) +{ + struct foreign *fgn; + int i; + + rdlock_foreigns(); + if (foreigns == NULL) { + unlock_foreigns(NULL); + return; + } + pthread_cleanup_push(unlock_foreigns, NULL); + + vector_foreach_slot(foreigns, fgn, i) { + const struct _vector *vec; + + fgn->lock(fgn->context); + pthread_cleanup_push(fgn->unlock, fgn->context); + + vec = fgn->get_multipaths(fgn->context); + if (vec != NULL) { + _get_multipath_layout(vec, LAYOUT_RESET_NOT); + } + fgn->release_multipaths(fgn->context, vec); + + pthread_cleanup_pop(1); + } + + pthread_cleanup_pop(1); +} + +int snprint_foreign_topology(char *buf, int len, int verbosity) +{ + struct foreign *fgn; + int i; + char *c = buf; + + rdlock_foreigns(); + if (foreigns == NULL) { + unlock_foreigns(NULL); + return 0; + } + pthread_cleanup_push(unlock_foreigns, NULL); + + vector_foreach_slot(foreigns, fgn, i) { + const struct _vector *vec; + const struct gen_multipath *gm; + int j; + + fgn->lock(fgn->context); + pthread_cleanup_push(fgn->unlock, fgn->context); + + vec = fgn->get_multipaths(fgn->context); + if (vec != NULL) { + vector_foreach_slot(vec, gm, j) { + + c += _snprint_multipath_topology(gm, c, + buf + len - c, + verbosity); + if (c >= buf + len - 1) + break; + } + if (c >= buf + len - 1) + break; + } + fgn->release_multipaths(fgn->context, vec); + pthread_cleanup_pop(1); + } + + pthread_cleanup_pop(1); + return c - buf; +} + +void print_foreign_topology(int verbosity) +{ + int buflen = MAX_LINE_LEN * MAX_LINES; + char *buf = NULL, *tmp = NULL; + + buf = malloc(buflen); + buf[0] = '\0'; + while (buf != NULL) { + char *c = buf; + + c += snprint_foreign_topology(buf, buflen, + verbosity); + if (c < buf + buflen - 1) + break; + + buflen *= 2; + tmp = buf; + buf = realloc(buf, buflen); + } + + if (buf == NULL && tmp != NULL) + buf = tmp; + + if (buf != NULL) { + printf("%s", buf); + free(buf); + } +} + +int snprint_foreign_paths(char *buf, int len, const char *style, int pretty) +{ + struct foreign *fgn; + int i; + char *c = buf; + + rdlock_foreigns(); + if (foreigns == NULL) { + unlock_foreigns(NULL); + return 0; + } + pthread_cleanup_push(unlock_foreigns, NULL); + + vector_foreach_slot(foreigns, fgn, i) { + const struct _vector *vec; + const struct gen_path *gp; + int j; + + fgn->lock(fgn->context); + pthread_cleanup_push(fgn->unlock, fgn->context); + + vec = fgn->get_paths(fgn->context); + if (vec != NULL) { + vector_foreach_slot(vec, gp, j) { + c += _snprint_path(gp, c, buf + len - c, + style, pretty); + if (c >= buf + len - 1) + break; + } + if (c >= buf + len - 1) + break; + } + fgn->release_paths(fgn->context, vec); + pthread_cleanup_pop(1); + } + + pthread_cleanup_pop(1); + return c - buf; +} + +int snprint_foreign_multipaths(char *buf, int len, + const char *style, int pretty) +{ + struct foreign *fgn; + int i; + char *c = buf; + + rdlock_foreigns(); + if (foreigns == NULL) { + unlock_foreigns(NULL); + return 0; + } + pthread_cleanup_push(unlock_foreigns, NULL); + + vector_foreach_slot(foreigns, fgn, i) { + const struct _vector *vec; + const struct gen_multipath *gm; + int j; + + fgn->lock(fgn->context); + pthread_cleanup_push(fgn->unlock, fgn->context); + + vec = fgn->get_multipaths(fgn->context); + if (vec != NULL) { + vector_foreach_slot(vec, gm, j) { + c += _snprint_multipath(gm, c, buf + len - c, + style, pretty); + if (c >= buf + len - 1) + break; + } + if (c >= buf + len - 1) + break; + } + fgn->release_multipaths(fgn->context, vec); + pthread_cleanup_pop(1); + } + + pthread_cleanup_pop(1); + return c - buf; +} diff --git a/libmultipath/foreign.h b/libmultipath/foreign.h new file mode 100644 index 0000000..acd3360 --- /dev/null +++ b/libmultipath/foreign.h @@ -0,0 +1,321 @@ +/* + Copyright (c) 2018 Martin Wilck, SUSE Linux GmbH + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License + as published by the Free Software Foundation; either version 2 + of the License, or (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . +*/ +#ifndef _FOREIGN_H +#define _FOREIGN_H +#include +#include + +#define LIBMP_FOREIGN_API ((1 << 8) | 0) + +struct context; + +/* return codes of functions below returning "int" */ +enum foreign_retcode { + FOREIGN_OK, + FOREIGN_CLAIMED, + FOREIGN_IGNORED, + FOREIGN_UNCLAIMED, + FOREIGN_NODEV, + FOREIGN_ERR, + __LAST_FOREIGN_RETCODE, +}; + +/** + * Foreign multipath library API + * Foreign libraries must implement the following methods. + */ +struct foreign { + /** + * method: init(api, name) + * Initialize foreign library, and check API compatibility + * return pointer to opaque internal data strucure if successful, + * NULL otherwise. + * + * @param[in] api: API version + * @param[in] name: name to use for references to self in log messages, + * doesn't need to be strdup'd + * @returns context pointer to use in future method calls. + */ + struct context* (*init)(unsigned int api, const char *name); + + /** + * method: cleanup(context) + * Free data structures used by foreign library, including + * context itself. + * + * @param[in] context foreign library context. This shouldn't be + * referenced any more after calling cleanup(). + */ + void (*cleanup)(struct context *); + + /** + * method: add(context, udev) + * This is called during path detection, and for udev ADD events. + * + * @param[in] context foreign library context + * @param[in] udev udev device to add + * @returns status code + * @retval FOREIGN_CLAIMED: device newly claimed + * @retval FOREIGN_OK: device already registered, no action taken + * @retval FOREIGN_IGNORED: device is ignored, no action taken + * @retval FOREIGN_ERR: an error occurred (e.g. out-of-memory) + */ + int (*add)(struct context *, struct udev_device *); + + /** + * method: change + * This is called on udev CHANGE events. + * + * @param[in] context foreign library context + * @param[in] udev udev device that has generated the event + * @returns status code + * @retval FOREIGN_OK: event processed + * @retval FOREIGN_IGNORED: the device is ignored + * @retval FOREIGN_ERR: an error occurred (e.g. out-of-memory) + * + * Note: theoretically it can happen that the status of a foreign device + * (claimed vs. not claimed) changes in a change event. + * Supporting this correctly would require big efforts. For now, we + * don't support it. "multipathd reconfigure" starts foreign device + * detection from scratch and should be able to handle this situation. + */ + int (*change)(struct context *, struct udev_device *); + + /** + * method: delete + * This is called on udev DELETE events. + * + * @param[in] context foreign library context + * @param[in] udev udev device that has generated the event and + * should be deleted + * @returns status code + * @retval FOREIGN_OK: processed correctly (device deleted) + * @retval FOREIGN_IGNORED: device wasn't registered internally + * @retval FOREIGN_ERR: error occurred. + */ + int (*delete)(struct context *, struct udev_device *); + + /** + * method: delete_all + * This is called if multipathd reconfigures itself. + * Deletes all registered devices (maps and paths) + * + * @param[in] context foreign library context + * @returns status code + * @retval FOREIGN_OK: processed correctly + * @retval FOREIGN_IGNORED: nothing to delete + * @retval FOREIGN_ERR: error occurred + */ + int (*delete_all)(struct context*); + + /** + * method: check + * This is called from multipathd's checker loop. + * + * Check status of managed devices, update internal status, and print + * log messages if appropriate. + * @param[in] context foreign library context + */ + void (*check)(struct context *); + + /** + * lock internal data structures. + * @param[in] ctx: foreign context + */ + void (*lock)(struct context *ctx); + + /** + * unlock internal data structures. + * @param[in] ctx: foreign context (void* in order to use the function + * as argument to pthread_cleanup_push()) + */ + void (*unlock)(void *ctx); + + /** + * method: get_multipaths(context) + * Returned vector must be freed by calling release_multipaths(). + * Lock must be held until release_multipaths() is called. + * + * @param[in] context foreign library context + * @returns a vector of "struct gen_multipath*" with the map devices + * belonging to this library (see generic.h). + */ + const struct _vector* (*get_multipaths)(const struct context *); + + /** + * method: release_multipaths(context, mpvec) + * release data structures obtained with get_multipaths (if any) + * + * @param[in] ctx the foreign context + * @param[in] mpvec the vector allocated with get_multipaths() + */ + void (*release_multipaths)(const struct context *ctx, + const struct _vector* mpvec); + + /** + * method: get_paths + * Returned vector must be freed by calling release_paths(). + * Lock must be held until release_paths() is called. + * + * @param[in] context foreign library context + * @returns a vector of "struct gen_path*" with the path devices + * belonging to this library (see generic.h) + */ + const struct _vector* (*get_paths)(const struct context *); + + /** + * release data structures obtained with get_multipaths (if any) + * + * @param[in] ctx the foreign context + * @param[in] ppvec the vector allocated with get_paths() + */ + void (*release_paths)(const struct context *ctx, + const struct _vector* ppvec); + + void *handle; + struct context *context; + const char name[0]; +}; + +/** + * init_foreign(dir) + * load and initialize foreign multipath libraries in dir (libforeign-*.so). + * @param dir: directory to search + * @param enable: regex to match foreign library name ("*" above) against + * @returns: 0 on success, negative value on failure. + */ +int init_foreign(const char *multipath_dir, const char *enable); + +/** + * cleanup_foreign(dir) + * cleanup and free all data structures owned by foreign libraries + */ +void cleanup_foreign(void); + +/** + * add_foreign(udev) + * check if a device belongs to any foreign library. + * calls add() for all known foreign libs, in the order registered, + * until the first one returns FOREIGN_CLAIMED or FOREIGN_OK. + * @param udev: udev device to check + * @returns: status code + * @retval FOREIGN_CLAIMED: newly claimed by a foreign lib + * @retval FOREIGN_OK: already claimed by a foreign lib + * @retval FOREIGN_IGNORED: ignored by all foreign libs + * @retval FOREIGN_ERR: an error occurred + */ +int add_foreign(struct udev_device *); + +/** + * change_foreign(udev) + * Notify foreign libraries of an udev CHANGE event + * @param udev: udev device to check + * @returns: status code (see change() method above). + */ +int change_foreign(struct udev_device *); + +/** + * delete_foreign(udev) + * @param udev: udev device being removed + * @returns: status code (see remove() above) + */ +int delete_foreign(struct udev_device *); + +/** + * delete_all_foreign() + * call delete_all() for all foreign libraries + * @returns: status code (see delete_all() above) + */ +int delete_all_foreign(void); + +/** + * check_foreign() + * call check() (see above) for all foreign libraries + */ +void check_foreign(void); + +/** + * foreign_path_layout() + * call this before printing paths, after get_path_layout(), to determine + * output field width. + */ +void foreign_path_layout(void); + +/** + * foreign_multipath_layout() + * call this before printing maps, after get_multipath_layout(), to determine + * output field width. + */ +void foreign_multipath_layout(void); + +/** + * snprint_foreign_topology(buf, len, verbosity); + * prints topology information from foreign libraries into buffer, + * '\0' - terminated. + * @param buf: output buffer + * @param len: size of output buffer + * @param verbosity: verbosity level + * @returns: number of printed characters excluding trailing '\0'. + */ +int snprint_foreign_topology(char *buf, int len, int verbosity); + +/** + * snprint_foreign_paths(buf, len, style, pad); + * prints formatted path information from foreign libraries into buffer, + * '\0' - terminated. + * @param buf: output buffer + * @param len: size of output buffer + * @param style: format string + * @param pad: whether to pad field width + * @returns: number of printed characters excluding trailing '\0'. + */ +int snprint_foreign_paths(char *buf, int len, const char *style, int pad); + +/** + * snprint_foreign_multipaths(buf, len, style, pad); + * prints formatted map information from foreign libraries into buffer, + * '\0' - terminated. + * @param buf: output buffer + * @param len: size of output buffer + * @param style: format string + * @param pad: whether to pad field width + * @returns: number of printed characters excluding trailing '\0'. + */ +int snprint_foreign_multipaths(char *buf, int len, + const char *style, int pretty); + +/** + * print_foreign_topology(v) + * print foreign topology to stdout + * @param verbosity: verbosity level + */ +void print_foreign_topology(int verbosity); + +/** + * is_claimed_by_foreign(ud) + * @param udev: udev device + * @returns: true if device is (newly or already) claimed by a foreign lib + */ +static inline bool +is_claimed_by_foreign(struct udev_device *ud) +{ + int rc = add_foreign(ud); + + return (rc == FOREIGN_CLAIMED || rc == FOREIGN_OK); +} + +#endif /* _FOREIGN_H */ diff --git a/libmultipath/foreign/Makefile b/libmultipath/foreign/Makefile new file mode 100644 index 0000000..fae58a0 --- /dev/null +++ b/libmultipath/foreign/Makefile @@ -0,0 +1,31 @@ +# +# Copyright (C) 2003 Christophe Varoqui, +# +TOPDIR=../.. +include ../../Makefile.inc + +CFLAGS += $(LIB_CFLAGS) -I.. -I$(nvmedir) + +LIBS = libforeign-nvme.so + +all: $(LIBS) + +libforeign-%.so: %.o + $(CC) $(LDFLAGS) $(SHARED_FLAGS) -o $@ $^ + +install: + $(INSTALL_PROGRAM) -m 755 $(LIBS) $(DESTDIR)$(libdir) + +uninstall: + for file in $(LIBS); do $(RM) $(DESTDIR)$(libdir)/$$file; done + +clean: dep_clean + $(RM) core *.a *.o *.gz *.so + +OBJS := $(LIBS:libforeign-%.so=%.o) +.SECONDARY: $(OBJS) + +include $(wildcard $(OBJS:.o=.d)) + +dep_clean: + $(RM) $(OBJS:.o=.d) diff --git a/libmultipath/foreign/nvme.c b/libmultipath/foreign/nvme.c new file mode 100644 index 0000000..09cdddf --- /dev/null +++ b/libmultipath/foreign/nvme.c @@ -0,0 +1,955 @@ +/* + Copyright (c) 2018 Martin Wilck, SUSE Linux GmbH + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License + as published by the Free Software Foundation; either version 2 + of the License, or (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . +*/ + +#include "nvme-lib.h" +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "util.h" +#include "vector.h" +#include "generic.h" +#include "foreign.h" +#include "debug.h" +#include "structs.h" +#include "sysfs.h" + +static const char nvme_vendor[] = "NVMe"; +static const char N_A[] = "n/a"; +const char *THIS; + +struct nvme_map; +struct nvme_pathgroup { + struct gen_pathgroup gen; + struct _vector pathvec; +}; + +struct nvme_path { + struct gen_path gen; + struct udev_device *udev; + struct udev_device *ctl; + struct nvme_map *map; + bool seen; + /* + * The kernel works in failover mode. + * Each path has a separate path group. + */ + struct nvme_pathgroup pg; +}; + +struct nvme_map { + struct gen_multipath gen; + struct udev_device *udev; + struct udev_device *subsys; + dev_t devt; + struct _vector pgvec; + int nr_live; + int ana_supported; +}; + +#define NAME_LEN 64 /* buffer length for temp attributes */ +#define const_gen_mp_to_nvme(g) ((const struct nvme_map*)(g)) +#define gen_mp_to_nvme(g) ((struct nvme_map*)(g)) +#define nvme_mp_to_gen(n) &((n)->gen) +#define const_gen_pg_to_nvme(g) ((const struct nvme_pathgroup*)(g)) +#define gen_pg_to_nvme(g) ((struct nvme_pathgroup*)(g)) +#define nvme_pg_to_gen(n) &((n)->gen) +#define const_gen_path_to_nvme(g) ((const struct nvme_path*)(g)) +#define gen_path_to_nvme(g) ((struct nvme_path*)(g)) +#define nvme_path_to_gen(n) &((n)->gen) +#define nvme_pg_to_path(x) (VECTOR_SLOT(&((x)->pathvec), 0)) +#define nvme_path_to_pg(x) &((x)->pg) + +static void cleanup_nvme_path(struct nvme_path *path) +{ + condlog(5, "%s: %p %p", __func__, path, path->udev); + if (path->udev) + udev_device_unref(path->udev); + vector_reset(&path->pg.pathvec); + + /* ctl is implicitly referenced by udev, no need to unref */ + free(path); +} + +static void cleanup_nvme_map(struct nvme_map *map) +{ + struct nvme_pathgroup *pg; + struct nvme_path *path; + int i; + + vector_foreach_slot_backwards(&map->pgvec, pg, i) { + path = nvme_pg_to_path(pg); + condlog(5, "%s: %d %p", __func__, i, path); + cleanup_nvme_path(path); + vector_del_slot(&map->pgvec, i); + } + vector_reset(&map->pgvec); + if (map->udev) + udev_device_unref(map->udev); + /* subsys is implicitly referenced by udev, no need to unref */ + free(map); +} + +static const struct _vector* +nvme_mp_get_pgs(const struct gen_multipath *gmp) { + const struct nvme_map *nvme = const_gen_mp_to_nvme(gmp); + + /* This is all used under the lock, no need to copy */ + return &nvme->pgvec; +} + +static void +nvme_mp_rel_pgs(__attribute__((unused)) const struct gen_multipath *gmp, + __attribute__((unused)) const struct _vector *v) +{ + /* empty */ +} + +static void rstrip(char *str) +{ + int n; + + for (n = strlen(str) - 1; n >= 0 && str[n] == ' '; n--); + str[n+1] = '\0'; +} + +static int snprint_nvme_map(const struct gen_multipath *gmp, + char *buff, int len, char wildcard) +{ + const struct nvme_map *nvm = const_gen_mp_to_nvme(gmp); + char fld[NAME_LEN]; + const char *val; + + switch (wildcard) { + case 'd': + return snprintf(buff, len, "%s", + udev_device_get_sysname(nvm->udev)); + case 'n': + return snprintf(buff, len, "%s:nsid.%s", + udev_device_get_sysattr_value(nvm->subsys, + "subsysnqn"), + udev_device_get_sysattr_value(nvm->udev, + "nsid")); + case 'w': + return snprintf(buff, len, "%s", + udev_device_get_sysattr_value(nvm->udev, + "wwid")); + case 'N': + return snprintf(buff, len, "%u", nvm->nr_live); + case 'S': + return snprintf(buff, len, "%s", + udev_device_get_sysattr_value(nvm->udev, + "size")); + case 'v': + return snprintf(buff, len, "%s", nvme_vendor); + case 's': + case 'p': + snprintf(fld, sizeof(fld), "%s", + udev_device_get_sysattr_value(nvm->subsys, + "model")); + rstrip(fld); + if (wildcard == 'p') + return snprintf(buff, len, "%s", fld); + return snprintf(buff, len, "%s,%s,%s", nvme_vendor, fld, + udev_device_get_sysattr_value(nvm->subsys, + "firmware_rev")); + case 'e': + return snprintf(buff, len, "%s", + udev_device_get_sysattr_value(nvm->subsys, + "firmware_rev")); + case 'r': + val = udev_device_get_sysattr_value(nvm->udev, "ro"); + if (val[0] == 1) + return snprintf(buff, len, "%s", "ro"); + else + return snprintf(buff, len, "%s", "rw"); + case 'G': + return snprintf(buff, len, "%s", THIS); + case 'h': + if (nvm->ana_supported == YNU_YES) + return snprintf(buff, len, "ANA"); + default: + break; + } + + return snprintf(buff, len, N_A); +} + +static const struct _vector* +nvme_pg_get_paths(const struct gen_pathgroup *gpg) { + const struct nvme_pathgroup *gp = const_gen_pg_to_nvme(gpg); + + /* This is all used under the lock, no need to copy */ + return &gp->pathvec; +} + +static void +nvme_pg_rel_paths(__attribute__((unused)) const struct gen_pathgroup *gpg, + __attribute__((unused)) const struct _vector *v) +{ + /* empty */ +} + +static int snprint_hcil(const struct nvme_path *np, char *buf, int len) +{ + unsigned int nvmeid, ctlid, nsid; + int rc; + const char *sysname = udev_device_get_sysname(np->udev); + + rc = sscanf(sysname, "nvme%uc%un%u", &nvmeid, &ctlid, &nsid); + if (rc != 3) { + condlog(1, "%s: failed to scan %s", __func__, sysname); + rc = snprintf(buf, len, "(ERR:%s)", sysname); + } else + rc = snprintf(buf, len, "%u:%u:%u", nvmeid, ctlid, nsid); + return (rc < len ? rc : len); +} + +static int snprint_nvme_path(const struct gen_path *gp, + char *buff, int len, char wildcard) +{ + const struct nvme_path *np = const_gen_path_to_nvme(gp); + dev_t devt; + char fld[NAME_LEN]; + struct udev_device *pci; + + switch (wildcard) { + case 'w': + return snprintf(buff, len, "%s", + udev_device_get_sysattr_value(np->udev, + "wwid")); + case 'd': + return snprintf(buff, len, "%s", + udev_device_get_sysname(np->udev)); + case 'i': + return snprint_hcil(np, buff, len); + case 'D': + devt = udev_device_get_devnum(np->udev); + return snprintf(buff, len, "%u:%u", major(devt), minor(devt)); + case 'o': + if (sysfs_attr_get_value(np->ctl, "state", + fld, sizeof(fld)) > 0) + return snprintf(buff, len, "%s", fld); + break; + case 'T': + if (sysfs_attr_get_value(np->udev, "ana_state", fld, + sizeof(fld)) > 0) + return snprintf(buff, len, "%s", fld); + break; + case 'p': + if (sysfs_attr_get_value(np->udev, "ana_state", fld, + sizeof(fld)) > 0) { + rstrip(fld); + if (!strcmp(fld, "optimized")) + return snprintf(buff, len, "%d", 50); + else if (!strcmp(fld, "non-optimized")) + return snprintf(buff, len, "%d", 10); + else + return snprintf(buff, len, "%d", 0); + } + break; + case 's': + snprintf(fld, sizeof(fld), "%s", + udev_device_get_sysattr_value(np->ctl, + "model")); + rstrip(fld); + return snprintf(buff, len, "%s,%s,%s", nvme_vendor, fld, + udev_device_get_sysattr_value(np->ctl, + "firmware_rev")); + case 'S': + return snprintf(buff, len, "%s", + udev_device_get_sysattr_value(np->udev, + "size")); + case 'z': + return snprintf(buff, len, "%s", + udev_device_get_sysattr_value(np->ctl, + "serial")); + case 'm': + return snprintf(buff, len, "%s", + udev_device_get_sysname(np->map->udev)); + case 'N': + case 'R': + return snprintf(buff, len, "%s:%s", + udev_device_get_sysattr_value(np->ctl, + "transport"), + udev_device_get_sysattr_value(np->ctl, + "address")); + case 'G': + return snprintf(buff, len, "[%s]", THIS); + case 'a': + pci = udev_device_get_parent_with_subsystem_devtype(np->ctl, + "pci", + NULL); + if (pci != NULL) + return snprintf(buff, len, "PCI:%s", + udev_device_get_sysname(pci)); + /* fall through */ + default: + break; + } + return snprintf(buff, len, "%s", N_A); + return 0; +} + +static int snprint_nvme_pg(const struct gen_pathgroup *gmp, + char *buff, int len, char wildcard) +{ + const struct nvme_pathgroup *pg = const_gen_pg_to_nvme(gmp); + const struct nvme_path *path = nvme_pg_to_path(pg); + + switch (wildcard) { + case 't': + return snprint_nvme_path(nvme_path_to_gen(path), + buff, len, 'T'); + case 'p': + return snprint_nvme_path(nvme_path_to_gen(path), + buff, len, 'p'); + default: + return snprintf(buff, len, N_A); + } +} + +static int nvme_style(__attribute__((unused)) const struct gen_multipath* gm, + char *buf, int len, + __attribute__((unused)) int verbosity) +{ + int n = snprintf(buf, len, "%%w [%%G]:%%d %%s"); + + return (n < len ? n : len - 1); +} + +static const struct gen_multipath_ops nvme_map_ops = { + .get_pathgroups = nvme_mp_get_pgs, + .rel_pathgroups = nvme_mp_rel_pgs, + .style = nvme_style, + .snprint = snprint_nvme_map, +}; + +static const struct gen_pathgroup_ops nvme_pg_ops __attribute__((unused)) = { + .get_paths = nvme_pg_get_paths, + .rel_paths = nvme_pg_rel_paths, + .snprint = snprint_nvme_pg, +}; + +static const struct gen_path_ops nvme_path_ops __attribute__((unused)) = { + .snprint = snprint_nvme_path, +}; + +struct context { + pthread_mutex_t mutex; + vector mpvec; + struct udev *udev; +}; + +void lock(struct context *ctx) +{ + pthread_mutex_lock(&ctx->mutex); +} + +void unlock(void *arg) +{ + struct context *ctx = arg; + + pthread_mutex_unlock(&ctx->mutex); +} + +static int _delete_all(struct context *ctx) +{ + struct nvme_map *nm; + int n = VECTOR_SIZE(ctx->mpvec), i; + + if (n == 0) + return FOREIGN_IGNORED; + + vector_foreach_slot_backwards(ctx->mpvec, nm, i) { + vector_del_slot(ctx->mpvec, i); + cleanup_nvme_map(nm); + } + return FOREIGN_OK; +} + +int delete_all(struct context *ctx) +{ + int rc; + + condlog(5, "%s called for \"%s\"", __func__, THIS); + + lock(ctx); + pthread_cleanup_push(unlock, ctx); + rc = _delete_all(ctx); + pthread_cleanup_pop(1); + + return rc; +} + +void cleanup(struct context *ctx) +{ + (void)delete_all(ctx); + + lock(ctx); + /* + * Locking is not strictly necessary here, locking in foreign.c + * makes sure that no other code is called with this ctx any more. + * But this should make static checkers feel better. + */ + pthread_cleanup_push(unlock, ctx); + if (ctx->udev) + udev_unref(ctx->udev); + if (ctx->mpvec) + vector_free(ctx->mpvec); + ctx->mpvec = NULL; + ctx->udev = NULL; + pthread_cleanup_pop(1); + pthread_mutex_destroy(&ctx->mutex); + + free(ctx); +} + +struct context *init(unsigned int api, const char *name) +{ + struct context *ctx; + + if (api > LIBMP_FOREIGN_API) { + condlog(0, "%s: api version mismatch: %08x > %08x\n", + __func__, api, LIBMP_FOREIGN_API); + return NULL; + } + + if ((ctx = calloc(1, sizeof(*ctx)))== NULL) + return NULL; + + pthread_mutex_init(&ctx->mutex, NULL); + + ctx->udev = udev_new(); + if (ctx->udev == NULL) + goto err; + + ctx->mpvec = vector_alloc(); + if (ctx->mpvec == NULL) + goto err; + + THIS = name; + return ctx; +err: + cleanup(ctx); + return NULL; +} + +static struct nvme_map *_find_nvme_map_by_devt(const struct context *ctx, + dev_t devt) +{ + struct nvme_map *nm; + int i; + + if (ctx->mpvec == NULL) + return NULL; + + vector_foreach_slot(ctx->mpvec, nm, i) { + if (nm->devt == devt) + return nm; + } + + return NULL; +} + +static struct nvme_path * +_find_path_by_syspath(struct nvme_map *map, const char *syspath) +{ + struct nvme_pathgroup *pg; + char real[PATH_MAX]; + const char *ppath; + int i; + + ppath = realpath(syspath, real); + if (ppath == NULL) { + condlog(1, "%s: %s: error in realpath", __func__, THIS); + ppath = syspath; + } + + vector_foreach_slot(&map->pgvec, pg, i) { + struct nvme_path *path = nvme_pg_to_path(pg); + + if (!strcmp(ppath, + udev_device_get_syspath(path->udev))) + return path; + } + condlog(4, "%s: %s: %s not found", __func__, THIS, ppath); + return NULL; +} + +static void _udev_device_unref(void *p) +{ + udev_device_unref(p); +} + +static void _udev_enumerate_unref(void *p) +{ + udev_enumerate_unref(p); +} + +static int _dirent_controller(const struct dirent *di) +{ + static const char nvme_prefix[] = "nvme"; + const char *p; + +#ifdef _DIRENT_HAVE_D_TYPE + if (di->d_type != DT_LNK) + return 0; +#endif + if (strncmp(di->d_name, nvme_prefix, sizeof(nvme_prefix) - 1)) + return 0; + p = di->d_name + sizeof(nvme_prefix) - 1; + if (*p == '\0' || !isdigit(*p)) + return 0; + for (++p; *p != '\0'; ++p) + if (!isdigit(*p)) + return 0; + return 1; +} + +/* Find the block device for a given nvme controller */ +struct udev_device *get_ctrl_blkdev(const struct context *ctx, + struct udev_device *ctrl) +{ + struct udev_list_entry *item; + struct udev_device *blkdev = NULL; + struct udev_enumerate *enm = udev_enumerate_new(ctx->udev); + + if (enm == NULL) + return NULL; + + pthread_cleanup_push(_udev_enumerate_unref, enm); + if (udev_enumerate_add_match_parent(enm, ctrl) < 0) + goto out; + if (udev_enumerate_add_match_subsystem(enm, "block")) + goto out; + + if (udev_enumerate_scan_devices(enm) < 0) { + condlog(1, "%s: %s: error enumerating devices", __func__, THIS); + goto out; + } + + for (item = udev_enumerate_get_list_entry(enm); + item != NULL; + item = udev_list_entry_get_next(item)) { + struct udev_device *tmp; + + tmp = udev_device_new_from_syspath(ctx->udev, + udev_list_entry_get_name(item)); + if (tmp == NULL) + continue; + if (!strcmp(udev_device_get_devtype(tmp), "disk")) { + blkdev = tmp; + break; + } else + udev_device_unref(tmp); + } + + if (blkdev == NULL) + condlog(1, "%s: %s: failed to get blockdev for %s", + __func__, THIS, udev_device_get_sysname(ctrl)); + else + condlog(5, "%s: %s: got %s", __func__, THIS, + udev_device_get_sysname(blkdev)); +out: + pthread_cleanup_pop(1); + return blkdev; +} + +static void test_ana_support(struct nvme_map *map, struct udev_device *ctl) +{ + const char *dev_t; + char sys_path[64]; + long fd; + int rc; + + if (map->ana_supported != YNU_UNDEF) + return; + + dev_t = udev_device_get_sysattr_value(ctl, "dev"); + if (safe_sprintf(sys_path, "/dev/char/%s", dev_t)) + return; + + fd = open(sys_path, O_RDONLY); + if (fd == -1) { + condlog(2, "%s: error opening %s", __func__, sys_path); + return; + } + + pthread_cleanup_push(close_fd, (void *)fd); + rc = nvme_id_ctrl_ana(fd, NULL); + if (rc < 0) + condlog(2, "%s: error in nvme_id_ctrl: %s", __func__, + strerror(errno)); + else { + map->ana_supported = (rc == 1 ? YNU_YES : YNU_NO); + condlog(3, "%s: NVMe ctrl %s: ANA %s supported", __func__, dev_t, + rc == 1 ? "is" : "is not"); + } + pthread_cleanup_pop(1); +} + +static void _find_controllers(struct context *ctx, struct nvme_map *map) +{ + char pathbuf[PATH_MAX], realbuf[PATH_MAX]; + struct dirent **di = NULL; + struct scandir_result sr; + struct udev_device *subsys; + struct nvme_pathgroup *pg; + struct nvme_path *path; + int r, i, n; + + if (map == NULL || map->udev == NULL) + return; + + vector_foreach_slot(&map->pgvec, pg, i) { + path = nvme_pg_to_path(pg); + path->seen = false; + } + + subsys = udev_device_get_parent_with_subsystem_devtype(map->udev, + "nvme-subsystem", + NULL); + if (subsys == NULL) { + condlog(1, "%s: %s: BUG: no NVME subsys for %s", __func__, THIS, + udev_device_get_sysname(map->udev)); + return; + } + + n = snprintf(pathbuf, sizeof(pathbuf), "%s", + udev_device_get_syspath(subsys)); + r = scandir(pathbuf, &di, _dirent_controller, alphasort); + + if (r == 0) { + condlog(3, "%s: %s: no controllers for %s", __func__, THIS, + udev_device_get_sysname(map->udev)); + return; + } else if (r < 0) { + condlog(1, "%s: %s: error %d scanning controllers of %s", + __func__, THIS, errno, + udev_device_get_sysname(map->udev)); + return; + } + + sr.di = di; + sr.n = r; + pthread_cleanup_push_cast(free_scandir_result, &sr); + for (i = 0; i < r; i++) { + char *fn = di[i]->d_name; + struct udev_device *ctrl, *udev; + + if (safe_snprintf(pathbuf + n, sizeof(pathbuf) - n, "/%s", fn)) + continue; + if (realpath(pathbuf, realbuf) == NULL) { + condlog(3, "%s: %s: realpath: %s", __func__, THIS, + strerror(errno)); + continue; + } + condlog(4, "%s: %s: found %s", __func__, THIS, realbuf); + + ctrl = udev_device_new_from_syspath(ctx->udev, realbuf); + if (ctrl == NULL) { + condlog(1, "%s: %s: failed to get udev device for %s", + __func__, THIS, realbuf); + continue; + } + + pthread_cleanup_push(_udev_device_unref, ctrl); + udev = get_ctrl_blkdev(ctx, ctrl); + /* + * We give up the reference to the nvme device here and get + * it back from the child below. + * This way we don't need to worry about unreffing it. + */ + pthread_cleanup_pop(1); + + if (udev == NULL) + continue; + + path = _find_path_by_syspath(map, + udev_device_get_syspath(udev)); + if (path != NULL) { + path->seen = true; + condlog(4, "%s: %s already known", + __func__, fn); + continue; + } + + path = calloc(1, sizeof(*path)); + if (path == NULL) + continue; + + path->gen.ops = &nvme_path_ops; + path->udev = udev; + path->seen = true; + path->map = map; + path->ctl = udev_device_get_parent_with_subsystem_devtype + (udev, "nvme", NULL); + if (path->ctl == NULL) { + condlog(1, "%s: %s: failed to get controller for %s", + __func__, THIS, fn); + cleanup_nvme_path(path); + continue; + } + test_ana_support(map, path->ctl); + + path->pg.gen.ops = &nvme_pg_ops; + if (vector_alloc_slot(&path->pg.pathvec) == NULL) { + cleanup_nvme_path(path); + continue; + } + vector_set_slot(&path->pg.pathvec, path); + if (vector_alloc_slot(&map->pgvec) == NULL) { + cleanup_nvme_path(path); + continue; + } + vector_set_slot(&map->pgvec, &path->pg); + condlog(3, "%s: %s: new path %s added to %s", + __func__, THIS, udev_device_get_sysname(udev), + udev_device_get_sysname(map->udev)); + } + pthread_cleanup_pop(1); + + map->nr_live = 0; + vector_foreach_slot_backwards(&map->pgvec, pg, i) { + path = nvme_pg_to_path(pg); + if (!path->seen) { + condlog(1, "path %d not found in %s any more", + i, udev_device_get_sysname(map->udev)); + vector_del_slot(&map->pgvec, i); + cleanup_nvme_path(path); + } else { + static const char live_state[] = "live"; + char state[16]; + + if ((sysfs_attr_get_value(path->ctl, "state", state, + sizeof(state)) > 0) && + !strncmp(state, live_state, sizeof(live_state) - 1)) + map->nr_live++; + } + } + condlog(3, "%s: %s: map %s has %d/%d live paths", __func__, THIS, + udev_device_get_sysname(map->udev), map->nr_live, + VECTOR_SIZE(&map->pgvec)); +} + +static int _add_map(struct context *ctx, struct udev_device *ud, + struct udev_device *subsys) +{ + dev_t devt = udev_device_get_devnum(ud); + struct nvme_map *map; + + if (_find_nvme_map_by_devt(ctx, devt) != NULL) + return FOREIGN_OK; + + map = calloc(1, sizeof(*map)); + if (map == NULL) + return FOREIGN_ERR; + + map->devt = devt; + map->udev = udev_device_ref(ud); + /* + * subsys is implicitly referenced by map->udev, + * no need to take a reference here. + */ + map->subsys = subsys; + map->gen.ops = &nvme_map_ops; + + if (vector_alloc_slot(ctx->mpvec) == NULL) { + cleanup_nvme_map(map); + return FOREIGN_ERR; + } + vector_set_slot(ctx->mpvec, map); + _find_controllers(ctx, map); + + return FOREIGN_CLAIMED; +} + +int add(struct context *ctx, struct udev_device *ud) +{ + struct udev_device *subsys; + int rc; + + condlog(5, "%s called for \"%s\"", __func__, THIS); + + if (ud == NULL) + return FOREIGN_ERR; + if (strcmp("disk", udev_device_get_devtype(ud))) + return FOREIGN_IGNORED; + + subsys = udev_device_get_parent_with_subsystem_devtype(ud, + "nvme-subsystem", + NULL); + if (subsys == NULL) + return FOREIGN_IGNORED; + + lock(ctx); + pthread_cleanup_push(unlock, ctx); + rc = _add_map(ctx, ud, subsys); + pthread_cleanup_pop(1); + + if (rc == FOREIGN_CLAIMED) + condlog(3, "%s: %s: added map %s", __func__, THIS, + udev_device_get_sysname(ud)); + else if (rc != FOREIGN_OK) + condlog(1, "%s: %s: retcode %d adding %s", + __func__, THIS, rc, udev_device_get_sysname(ud)); + + return rc; +} + +int change(__attribute__((unused)) struct context *ctx, + __attribute__((unused)) struct udev_device *ud) +{ + condlog(5, "%s called for \"%s\"", __func__, THIS); + return FOREIGN_IGNORED; +} + +static int _delete_map(struct context *ctx, struct udev_device *ud) +{ + int k; + struct nvme_map *map; + dev_t devt = udev_device_get_devnum(ud); + + map = _find_nvme_map_by_devt(ctx, devt); + if (map ==NULL) + return FOREIGN_IGNORED; + + k = find_slot(ctx->mpvec, map); + if (k == -1) + return FOREIGN_ERR; + else + vector_del_slot(ctx->mpvec, k); + + cleanup_nvme_map(map); + + return FOREIGN_OK; +} + +int delete(struct context *ctx, struct udev_device *ud) +{ + int rc; + + condlog(5, "%s called for \"%s\"", __func__, THIS); + + if (ud == NULL) + return FOREIGN_ERR; + + lock(ctx); + pthread_cleanup_push(unlock, ctx); + rc = _delete_map(ctx, ud); + pthread_cleanup_pop(1); + + if (rc == FOREIGN_OK) + condlog(3, "%s: %s: map %s deleted", __func__, THIS, + udev_device_get_sysname(ud)); + else if (rc != FOREIGN_IGNORED) + condlog(1, "%s: %s: retcode %d deleting map %s", __func__, + THIS, rc, udev_device_get_sysname(ud)); + + return rc; +} + +void _check(struct context *ctx) +{ + struct gen_multipath *gm; + int i; + + vector_foreach_slot(ctx->mpvec, gm, i) { + struct nvme_map *map = gen_mp_to_nvme(gm); + + _find_controllers(ctx, map); + } +} + +void check(struct context *ctx) +{ + condlog(4, "%s called for \"%s\"", __func__, THIS); + lock(ctx); + pthread_cleanup_push(unlock, ctx); + _check(ctx); + pthread_cleanup_pop(1); + return; +} + +/* + * It's safe to pass our internal pointer, this is only used under the lock. + */ +const struct _vector *get_multipaths(const struct context *ctx) +{ + condlog(5, "%s called for \"%s\"", __func__, THIS); + return ctx->mpvec; +} + +void release_multipaths(__attribute__((unused)) const struct context *ctx, + __attribute__((unused)) const struct _vector *mpvec) +{ + condlog(5, "%s called for \"%s\"", __func__, THIS); + /* NOP */ +} + +/* + * It's safe to pass our internal pointer, this is only used under the lock. + */ +const struct _vector * get_paths(const struct context *ctx) +{ + vector paths = NULL; + const struct gen_multipath *gm; + int i; + + condlog(5, "%s called for \"%s\"", __func__, THIS); + vector_foreach_slot(ctx->mpvec, gm, i) { + const struct nvme_map *nm = const_gen_mp_to_nvme(gm); + paths = vector_convert(paths, &nm->pgvec, + struct nvme_pathgroup, nvme_pg_to_path); + } + return paths; +} + +void release_paths(__attribute__((unused)) const struct context *ctx, + const struct _vector *mpvec) +{ + condlog(5, "%s called for \"%s\"", __func__, THIS); + vector_free_const(mpvec); +} + +/* compile-time check whether all methods are present and correctly typed */ +#define _METHOD_INIT(x) .x = x +static struct foreign __methods __attribute__((unused)) = { + _METHOD_INIT(init), + _METHOD_INIT(cleanup), + _METHOD_INIT(change), + _METHOD_INIT(delete), + _METHOD_INIT(delete_all), + _METHOD_INIT(check), + _METHOD_INIT(lock), + _METHOD_INIT(unlock), + _METHOD_INIT(get_multipaths), + _METHOD_INIT(release_multipaths), + _METHOD_INIT(get_paths), + _METHOD_INIT(release_paths), +}; diff --git a/libmultipath/generic.c b/libmultipath/generic.c new file mode 100644 index 0000000..5f03b9e --- /dev/null +++ b/libmultipath/generic.c @@ -0,0 +1,37 @@ +/* + Copyright (c) 2018 Martin Wilck, SUSE Linux GmbH + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License + as published by the Free Software Foundation; either version 2 + of the License, or (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . + */ + + +#include +#include "generic.h" +#include "structs.h" + +int generic_style(const struct gen_multipath* gm, + char *buf, int len, __attribute__((unused)) int verbosity) +{ + char alias_buf[WWID_SIZE]; + char wwid_buf[WWID_SIZE]; + int n = 0; + + gm->ops->snprint(gm, alias_buf, sizeof(alias_buf), 'n'); + gm->ops->snprint(gm, wwid_buf, sizeof(wwid_buf), 'w'); + + n += snprintf(buf, len, "%%n %s[%%G]:%%d %%s", + strcmp(alias_buf, wwid_buf) ? "(%w) " : ""); + + return (n < len ? n : len - 1); +} diff --git a/libmultipath/generic.h b/libmultipath/generic.h new file mode 100644 index 0000000..6346ffe --- /dev/null +++ b/libmultipath/generic.h @@ -0,0 +1,134 @@ +/* + Copyright (c) 2018 Martin Wilck, SUSE Linux GmbH + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License + as published by the Free Software Foundation; either version 2 + of the License, or (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . + */ +#ifndef _GENERIC_H +#define _GENERIC_H +#include "vector.h" + +struct gen_multipath; +struct gen_pathgroup; +struct gen_path; + +/** + * Methods implemented for gen_multipath "objects" + */ +struct gen_multipath_ops { + /** + * method: get_pathgroups(gmp) + * caller is responsible to returned data using rel_pathgroups() + * caller is also responsible to lock the gmp (directly or indirectly) + * while working with the return value. + * @param gmp: generic multipath object to act on + * @returns a vector of const struct gen_pathgroup* + */ + const struct _vector* (*get_pathgroups)(const struct gen_multipath*); + /** + * method: rel_pathgroups(gmp, v) + * free data allocated by get_pathgroups(), if any + * @param gmp: generic multipath object to act on + * @param v the value returned by get_pathgroups() + */ + void (*rel_pathgroups)(const struct gen_multipath*, + const struct _vector*); + /** + * method: snprint(gmp, buf, len, wildcard) + * prints the property of the multipath map matching + * the passed-in wildcard character into "buf", + * 0-terminated, no more than "len" characters including trailing '\0'. + * + * @param gmp: generic multipath object to act on + * @param buf: output buffer + * @param buflen: buffer size + * @param wildcard: the multipath wildcard (see print.c) + * @returns the number of characters printed (without trailing '\0'). + */ + int (*snprint)(const struct gen_multipath*, + char *buf, int len, char wildcard); + /** + * method: style(gmp, buf, len, verbosity) + * returns the format string to be used for the multipath object, + * defined with the wildcards as defined in print.c + * generic_style() should work well in most cases. + * @param gmp: generic multipath object to act on + * @param buf: output buffer + * @param buflen: buffer size + * @param verbosity: verbosity level + * @returns number of format chars printed + */ + int (*style)(const struct gen_multipath*, + char *buf, int len, int verbosity); +}; + +/** + * Methods implemented for gen_pathgroup "objects" + */ +struct gen_pathgroup_ops { + /** + * method: get_paths(gpg) + * caller is responsible to returned data using rel_paths() + * @param gpg: generic pathgroup object to act on + * @returns a vector of const struct gen_path* + */ + const struct _vector* (*get_paths)(const struct gen_pathgroup*); + /** + * method: rel_paths(gpg, v) + * free data allocated by get_paths(), if any + * @param gmp: generic pathgroup object to act on + * @param v the value returned by get_paths() + */ + void (*rel_paths)(const struct gen_pathgroup*, const struct _vector*); + /** + * Method snprint() + * see gen_multipath_ops->snprint() above + */ + int (*snprint)(const struct gen_pathgroup*, + char *buf, int len, char wildcard); +}; + +struct gen_path_ops { + /** + * Method snprint() + * see gen_multipath_ops->snprint() above + */ + int (*snprint)(const struct gen_path*, + char *buf, int len, char wildcard); +}; + +struct gen_multipath { + const struct gen_multipath_ops *ops; +}; + +struct gen_pathgroup { + const struct gen_pathgroup_ops *ops; +}; + +struct gen_path { + const struct gen_path_ops *ops; +}; + +/** + * Helper functions for setting up the various generic_X_ops + */ + +/** + * generic_style() + * A simple style() method (see above) that should fit most + * foreign libraries. + */ +int generic_style(const struct gen_multipath*, + char *buf, int len, int verbosity); + +#endif /* _GENERIC_H */ diff --git a/libmultipath/hwtable.c b/libmultipath/hwtable.c new file mode 100644 index 0000000..d1fcfdb --- /dev/null +++ b/libmultipath/hwtable.c @@ -0,0 +1,1285 @@ +#include + +#include "checkers.h" +#include "vector.h" +#include "defaults.h" +#include "structs.h" +#include "config.h" +#include "pgpolicies.h" +#include "prio.h" +#include "hwtable.h" + +/* + * Tuning suggestions on these parameters should go to + * dm-devel@redhat.com (subscribers-only, see README) + * + * You are welcome to claim maintainership over a controller + * family. Please mail the currently enlisted maintainer and + * the upstream package maintainer. + * + * Please, use the TEMPLATE below to add new hardware. + * + * WARNING: + * + * Devices with a proprietary handler must also be included in + * the kernel side. Currently at drivers/scsi/scsi_dh.c + * + * Moreover, if a device needs a special treatment by the SCSI + * subsystem it should be included in drivers/scsi/scsi_devinfo.c + */ + +#if 0 + /* + * Copy this TEMPLATE to add new hardware. + * + * Keep only mandatory(.vendor and .product) and modified attributes. + * Attributes with default values must be removed. + * .vendor, .product, .revision and .bl_product are POSIX Extended regex. + * + * COMPANY_NAME + * + * Maintainer: NAME + */ + { + /* Product Name */ + .vendor = "VENDOR", + .product = "PRODUCT", + .revision = "REVISION", + .bl_product = "BL_PRODUCT", + .pgpolicy = FAILOVER, + .uid_attribute = "ID_SERIAL", + .selector = "service-time 0", + .checker_name = TUR, + .alias_prefix = "mpath", + .features = "0", + .hwhandler = "0", + .prio_name = PRIO_CONST, + .prio_args = "", + .pgfailback = -FAILBACK_MANUAL, + .rr_weight = RR_WEIGHT_NONE, + .no_path_retry = NO_PATH_RETRY_UNDEF, + .minio = 1000, + .minio_rq = 1, + .flush_on_last_del = FLUSH_DISABLED, + .user_friendly_names = USER_FRIENDLY_NAMES_OFF, + .fast_io_fail = 5, + .dev_loss = 600, + .retain_hwhandler = RETAIN_HWHANDLER_ON, + .detect_prio = DETECT_PRIO_ON, + .detect_checker = DETECT_CHECKER_ON, + .deferred_remove = DEFERRED_REMOVE_OFF, + .delay_watch_checks = DELAY_CHECKS_OFF, + .delay_wait_checks = DELAY_CHECKS_OFF, + .skip_kpartx = SKIP_KPARTX_OFF, + .max_sectors_kb = MAX_SECTORS_KB_UNDEF, + .ghost_delay = GHOST_DELAY_OFF, + }, +#endif + +static struct hwentry default_hw[] = { + /* + * Generic NVMe devices + * + * Due to the parsing logic in find_hwe(), generic entries + * have to be put on top of this list, and more specific ones + * below. + */ + { + /* Generic NVMe */ + .vendor = "NVME", + .product = ".*", + .uid_attribute = DEFAULT_NVME_UID_ATTRIBUTE, + .checker_name = NONE, + .retain_hwhandler = RETAIN_HWHANDLER_OFF, + }, + /* + * Apple + * + * Maintainer: Shyam Sundar + */ + { + /* Xserve RAID */ + .vendor = "APPLE", + .product = "Xserve RAID", + .pgpolicy = MULTIBUS, + }, + /* + * HPE + */ + { + /* 3PAR / Primera */ + .vendor = "3PARdata", + .product = "VV", + .pgpolicy = GROUP_BY_PRIO, + .pgfailback = -FAILBACK_IMMEDIATE, + .hwhandler = "1 alua", + .prio_name = PRIO_ALUA, + .no_path_retry = 18, + .fast_io_fail = 10, + .dev_loss = MAX_DEV_LOSS_TMO, + .vpd_vendor_id = VPD_VP_HP3PAR, + }, + { + /* RA8000 / ESA12000 */ + .vendor = "DEC", + .product = "HSG80", + .no_path_retry = NO_PATH_RETRY_QUEUE, + .hwhandler = "1 hp_sw", + .pgpolicy = GROUP_BY_PRIO, + .checker_name = HP_SW, + .prio_name = PRIO_HP_SW, + }, + { + /* VIRTUAL ARRAY 7400 */ + .vendor = "HP", + .product = "A6189A", + .pgpolicy = MULTIBUS, + .no_path_retry = 12, + }, + { + /* MSA 1000/1500 and EVA 3000/5000, with old firmware */ + .vendor = "(COMPAQ|HP)", + .product = "(MSA|HSV)1[01]0", + .hwhandler = "1 hp_sw", + .pgpolicy = GROUP_BY_PRIO, + .no_path_retry = 12, + .checker_name = HP_SW, + .prio_name = PRIO_HP_SW, + }, + { + /* MSA 1000/1500 with new firmware */ + .vendor = "(COMPAQ|HP)", + .product = "MSA VOLUME", + .pgpolicy = GROUP_BY_PRIO, + .pgfailback = -FAILBACK_IMMEDIATE, + .no_path_retry = 12, + .prio_name = PRIO_ALUA, + }, + { + /* EVA 3000/5000 with new firmware, EVA 4000/6000/8000 */ + .vendor = "(COMPAQ|HP)", + .product = "(HSV1[01]1|HSV2[01]0|HSV3[046]0|HSV4[05]0)", + .pgpolicy = GROUP_BY_PRIO, + .pgfailback = -FAILBACK_IMMEDIATE, + .no_path_retry = 12, + .prio_name = PRIO_ALUA, + }, + { + /* MSA2000 family with old firmware */ + .vendor = "HP", + .product = "(MSA2[02]12fc|MSA2012i)", + .pgpolicy = MULTIBUS, + .no_path_retry = 18, + }, + { + /* MSA2000 family with new firmware */ + .vendor = "HP", + .product = "(MSA2012sa|MSA23(12|24)(fc|i|sa)|MSA2000s VOLUME)", + .pgpolicy = GROUP_BY_PRIO, + .pgfailback = -FAILBACK_IMMEDIATE, + .no_path_retry = 18, + .prio_name = PRIO_ALUA, + }, + { + /* MSA 1040, 1050, 2040 and 2050 families */ + .vendor = "HP", + .product = "MSA [12]0[45]0 SA[NS]", + .pgpolicy = GROUP_BY_PRIO, + .pgfailback = -FAILBACK_IMMEDIATE, + .no_path_retry = 18, + .prio_name = PRIO_ALUA, + }, + { + /* SAN Virtualization Services Platform */ + .vendor = "HP", + .product = "HSVX700", + .hwhandler = "1 alua", + .pgpolicy = GROUP_BY_PRIO, + .pgfailback = -FAILBACK_IMMEDIATE, + .no_path_retry = 12, + .prio_name = PRIO_ALUA, + }, + { + /* Smart Array */ + .vendor = "HP", + .product = "LOGICAL VOLUME", + .pgpolicy = MULTIBUS, + .no_path_retry = 12, + }, + { + /* P2000 family */ + .vendor = "HP", + .product = "(P2000 G3 FC|P2000G3 FC/iSCSI|P2000 G3 SAS|P2000 G3 iSCSI)", + .pgpolicy = GROUP_BY_PRIO, + .pgfailback = -FAILBACK_IMMEDIATE, + .no_path_retry = 18, + .prio_name = PRIO_ALUA, + }, + { + /* StoreVirtual 4000 and 3200 families */ + .vendor = "LEFTHAND", + .product = "(P4000|iSCSIDisk|FCDISK)", + .pgpolicy = GROUP_BY_PRIO, + .pgfailback = -FAILBACK_IMMEDIATE, + .no_path_retry = 18, + .prio_name = PRIO_ALUA, + }, + { + /* Nimble Storage */ + .vendor = "Nimble", + .product = "Server", + .hwhandler = "1 alua", + .pgpolicy = GROUP_BY_PRIO, + .pgfailback = -FAILBACK_IMMEDIATE, + .prio_name = PRIO_ALUA, + .no_path_retry = NO_PATH_RETRY_QUEUE, + }, + /* SGI */ + { + /* Total Performance 9100 */ + .vendor = "SGI", + .product = "TP9100", + .pgpolicy = MULTIBUS, + }, + { + /* Total Performance family */ + .vendor = "SGI", + .product = "TP9[3457]00", + .bl_product = "Universal Xport", + .pgpolicy = GROUP_BY_PRIO, + .checker_name = RDAC, + .features = "2 pg_init_retries 50", + .hwhandler = "1 rdac", + .prio_name = PRIO_RDAC, + .pgfailback = -FAILBACK_IMMEDIATE, + .no_path_retry = 30, + }, + { + /* (RDAC) InfiniteStorage */ + .vendor = "SGI", + .product = "IS", + .bl_product = "Universal Xport", + .pgpolicy = GROUP_BY_PRIO, + .checker_name = RDAC, + .features = "2 pg_init_retries 50", + .hwhandler = "1 rdac", + .prio_name = PRIO_RDAC, + .pgfailback = -FAILBACK_IMMEDIATE, + .no_path_retry = 30, + }, + { + /* (DDN) InfiniteStorage */ + .vendor = "SGI", + .product = "^DD[46]A-", + .pgpolicy = GROUP_BY_PRIO, + .pgfailback = -FAILBACK_IMMEDIATE, + .prio_name = PRIO_ALUA, + .no_path_retry = 30, + }, + /* + * DataDirect Networks + */ + { + /* SAN DataDirector */ + .vendor = "DDN", + .product = "SAN DataDirector", + .pgpolicy = MULTIBUS, + }, + { + /* EF3010 */ + .vendor = "DDN", + .product = "^EF3010", + .pgpolicy = MULTIBUS, + .no_path_retry = 30, + }, + { + /* EF3015 / S2A and SFA families */ + .vendor = "DDN", + .product = "^(EF3015|S2A|SFA)", + .pgpolicy = GROUP_BY_PRIO, + .pgfailback = -FAILBACK_IMMEDIATE, + .prio_name = PRIO_ALUA, + .no_path_retry = 30, + }, + { + /* + * Nexenta COMSTAR + * + * Maintainer: Yacine Kheddache + */ + .vendor = "NEXENTA", + .product = "COMSTAR", + .pgpolicy = GROUP_BY_SERIAL, + .no_path_retry = 30, + }, + { + /* Tegile IntelliFlash */ + .vendor = "TEGILE", + .product = "(ZEBI-(FC|ISCSI)|INTELLIFLASH)", + .hwhandler = "1 alua", + .pgpolicy = GROUP_BY_PRIO, + .pgfailback = -FAILBACK_IMMEDIATE, + .prio_name = PRIO_ALUA, + .no_path_retry = 10, + }, + /* + * Dell EMC + */ + { + /* Symmetrix / DMX / VMAX / PowerMax */ + .vendor = "EMC", + .product = "SYMMETRIX", + .pgpolicy = MULTIBUS, + .no_path_retry = 6, + }, + { + /* DGC CLARiiON CX/AX / VNX and Unity */ + .vendor = "^DGC", + .product = "^(RAID|DISK|VRAID)", + .bl_product = "LUNZ", + .hwhandler = "1 emc", + .pgpolicy = GROUP_BY_PRIO, + .pgfailback = -FAILBACK_IMMEDIATE, + .no_path_retry = (300 / DEFAULT_CHECKINT), + .checker_name = EMC_CLARIION, + .prio_name = PRIO_EMC, + }, + { + /* Invista / VPLEX */ + .vendor = "EMC", + .product = "Invista", + .bl_product = "LUNZ", + .pgpolicy = MULTIBUS, + .no_path_retry = 5, + }, + { + /* XtremIO */ + .vendor = "XtremIO", + .product = "XtremApp", + .pgpolicy = MULTIBUS, + }, + { + /* + * SC Series, formerly Compellent + * + * Maintainer: Sean McGinnis + */ + .vendor = "COMPELNT", + .product = "Compellent Vol", + .pgpolicy = MULTIBUS, + .no_path_retry = NO_PATH_RETRY_QUEUE, + }, + { + /* MD Series */ + .vendor = "DELL", + .product = "^MD3", + .bl_product = "Universal Xport", + .pgpolicy = GROUP_BY_PRIO, + .checker_name = RDAC, + .features = "2 pg_init_retries 50", + .hwhandler = "1 rdac", + .prio_name = PRIO_RDAC, + .pgfailback = -FAILBACK_IMMEDIATE, + .no_path_retry = 30, + }, + { + /* EMC PowerMax NVMe */ + .vendor = "NVME", + .product = "^EMC PowerMax_", + .pgpolicy = MULTIBUS, + }, + /* + * Fujitsu + */ + { + /* CentricStor Virtual Tape */ + .vendor = "FSC", + .product = "CentricStor", + .pgpolicy = GROUP_BY_SERIAL, + }, + { + /* ETERNUS family */ + .vendor = "FUJITSU", + .product = "ETERNUS_DX(H|L|M|400|8000)", + .pgpolicy = GROUP_BY_PRIO, + .pgfailback = -FAILBACK_IMMEDIATE, + .no_path_retry = 10, + .prio_name = PRIO_ALUA, + }, + { + /* FibreCAT S80 */ + .vendor = "(EUROLOGC|EuroLogc)", + .product = "FC2502", + .pgpolicy = MULTIBUS, + }, + { + /* ETERNUS 2000, 3000 and 4000 */ + .vendor = "FUJITSU", + .product = "E[234]000", + .pgpolicy = GROUP_BY_PRIO, + .pgfailback = -FAILBACK_IMMEDIATE, + .no_path_retry = 10, + .prio_name = PRIO_ALUA, + }, + { + /* ETERNUS 6000 and 8000 */ + .vendor = "FUJITSU", + .product = "E[68]000", + .pgpolicy = MULTIBUS, + .no_path_retry = 10, + }, + /* + * Hitachi Vantara + * + * Maintainer: Matthias Rudolph + */ + { + /* USP-V, HUS VM, VSP, VSP G1X00 and VSP GX00 families / HP XP */ + .vendor = "(HITACHI|HP)", + .product = "^OPEN-", + .pgpolicy = MULTIBUS, + }, + { + /* AMS other than AMS 2000 */ + .vendor = "HITACHI", + .product = "^DF", + .no_path_retry = NO_PATH_RETRY_QUEUE, + .pgpolicy = GROUP_BY_PRIO, + .pgfailback = -FAILBACK_IMMEDIATE, + .prio_name = PRIO_HDS, + }, + { + /* AMS 2000 and HUS 100 families */ + .vendor = "HITACHI", + .product = "^DF600F", + .pgpolicy = MULTIBUS, + }, + /* + * IBM + * + * Maintainer: Hannes Reinecke + */ + { + /* ProFibre 4000R */ + .vendor = "IBM", + .product = "ProFibre 4000R", + .pgpolicy = MULTIBUS, + }, + { + /* DS4300 / FAStT600 */ + .vendor = "IBM", + .product = "^1722-600", + .bl_product = "Universal Xport", + .pgpolicy = GROUP_BY_PRIO, + .checker_name = RDAC, + .features = "2 pg_init_retries 50", + .hwhandler = "1 rdac", + .prio_name = PRIO_RDAC, + .pgfailback = -FAILBACK_IMMEDIATE, + .no_path_retry = 30, + }, + { + /* DS4100 / FAStT100 */ + .vendor = "IBM", + .product = "^1724", + .bl_product = "Universal Xport", + .pgpolicy = GROUP_BY_PRIO, + .checker_name = RDAC, + .features = "2 pg_init_retries 50", + .hwhandler = "1 rdac", + .prio_name = PRIO_RDAC, + .pgfailback = -FAILBACK_IMMEDIATE, + .no_path_retry = 30, + }, + { + /* DS3000 / DS3200 / DS3300 / DS3400 / Boot DS */ + .vendor = "IBM", + .product = "^1726", + .bl_product = "Universal Xport", + .pgpolicy = GROUP_BY_PRIO, + .checker_name = RDAC, + .features = "2 pg_init_retries 50", + .hwhandler = "1 rdac", + .prio_name = PRIO_RDAC, + .pgfailback = -FAILBACK_IMMEDIATE, + .no_path_retry = 30, + }, + { + /* DS4400 / DS4500 / FAStT700 / FAStT900 */ + .vendor = "IBM", + .product = "^1742", + .bl_product = "Universal Xport", + .pgpolicy = GROUP_BY_PRIO, + .checker_name = RDAC, + .features = "2 pg_init_retries 50", + .hwhandler = "1 rdac", + .prio_name = PRIO_RDAC, + .pgfailback = -FAILBACK_IMMEDIATE, + .no_path_retry = 30, + }, + { + /* DS3500 / DS3512 / DS3524 */ + .vendor = "IBM", + .product = "^1746", + .bl_product = "Universal Xport", + .pgpolicy = GROUP_BY_PRIO, + .checker_name = RDAC, + .features = "2 pg_init_retries 50", + .hwhandler = "1 rdac", + .prio_name = PRIO_RDAC, + .pgfailback = -FAILBACK_IMMEDIATE, + .no_path_retry = 30, + }, + { + /* DCS3860 */ + .vendor = "IBM", + .product = "^1813", + .bl_product = "Universal Xport", + .pgpolicy = GROUP_BY_PRIO, + .checker_name = RDAC, + .features = "2 pg_init_retries 50", + .hwhandler = "1 rdac", + .prio_name = PRIO_RDAC, + .pgfailback = -FAILBACK_IMMEDIATE, + .no_path_retry = 30, + }, + { + /* DS3950 / DS4200 / DS4700 / DS5020 */ + .vendor = "IBM", + .product = "^1814", + .bl_product = "Universal Xport", + .pgpolicy = GROUP_BY_PRIO, + .checker_name = RDAC, + .features = "2 pg_init_retries 50", + .hwhandler = "1 rdac", + .prio_name = PRIO_RDAC, + .pgfailback = -FAILBACK_IMMEDIATE, + .no_path_retry = 30, + }, + { + /* DS4800 */ + .vendor = "IBM", + .product = "^1815", + .bl_product = "Universal Xport", + .pgpolicy = GROUP_BY_PRIO, + .checker_name = RDAC, + .features = "2 pg_init_retries 50", + .hwhandler = "1 rdac", + .prio_name = PRIO_RDAC, + .pgfailback = -FAILBACK_IMMEDIATE, + .no_path_retry = 30, + }, + { + /* DS5000 / DS5100 / DS5300 / DCS3700 */ + .vendor = "IBM", + .product = "^1818", + .bl_product = "Universal Xport", + .pgpolicy = GROUP_BY_PRIO, + .checker_name = RDAC, + .features = "2 pg_init_retries 50", + .hwhandler = "1 rdac", + .prio_name = PRIO_RDAC, + .pgfailback = -FAILBACK_IMMEDIATE, + .no_path_retry = 30, + }, + { + /* Netfinity Fibre Channel RAID Controller Unit */ + .vendor = "IBM", + .product = "^3526", + .bl_product = "Universal Xport", + .pgpolicy = GROUP_BY_PRIO, + .checker_name = RDAC, + .features = "2 pg_init_retries 50", + .hwhandler = "1 rdac", + .prio_name = PRIO_RDAC, + .pgfailback = -FAILBACK_IMMEDIATE, + .no_path_retry = 30, + }, + { + /* FAStT200 and FAStT500 */ + .vendor = "IBM", + .product = "^(3542|3552)", + .bl_product = "Universal Xport", + .pgpolicy = GROUP_BY_PRIO, + .checker_name = RDAC, + .features = "2 pg_init_retries 50", + .hwhandler = "1 rdac", + .prio_name = PRIO_RDAC, + .pgfailback = -FAILBACK_IMMEDIATE, + .no_path_retry = 30, + }, + { + /* Enterprise Storage Server(ESS) / Shark family */ + .vendor = "IBM", + .product = "^2105", + .no_path_retry = NO_PATH_RETRY_QUEUE, + .pgpolicy = MULTIBUS, + }, + { + /* DS6000 / DS6800 */ + .vendor = "IBM", + .product = "^1750500", + .no_path_retry = NO_PATH_RETRY_QUEUE, + .pgpolicy = GROUP_BY_PRIO, + .pgfailback = -FAILBACK_IMMEDIATE, + .prio_name = PRIO_ALUA, + }, + { + /* DS8000 family */ + .vendor = "IBM", + .product = "^2107900", + .no_path_retry = NO_PATH_RETRY_QUEUE, + .pgpolicy = MULTIBUS, + }, + { + /* Storwize family / SAN Volume Controller / Flex System V7000 / FlashSystem V840/V9000/9100 */ + .vendor = "IBM", + .product = "^2145", + .no_path_retry = NO_PATH_RETRY_QUEUE, + .pgpolicy = GROUP_BY_PRIO, + .pgfailback = -FAILBACK_IMMEDIATE, + .prio_name = PRIO_ALUA, + }, + { + /* PAV DASD ECKD */ + .vendor = "IBM", + .product = "S/390 DASD ECKD", + .bl_product = "S/390", + .uid_attribute = "ID_UID", + .no_path_retry = NO_PATH_RETRY_QUEUE, + .pgpolicy = MULTIBUS, + .checker_name = DIRECTIO, + }, + { + /* PAV DASD FBA */ + .vendor = "IBM", + .product = "S/390 DASD FBA", + .bl_product = "S/390", + .uid_attribute = "ID_UID", + .no_path_retry = NO_PATH_RETRY_QUEUE, + .pgpolicy = MULTIBUS, + .checker_name = DIRECTIO, + }, + { + /* Power RAID */ + .vendor = "IBM", + .product = "^IPR", + .no_path_retry = NO_PATH_RETRY_QUEUE, + .hwhandler = "1 alua", + .pgpolicy = GROUP_BY_PRIO, + .pgfailback = -FAILBACK_IMMEDIATE, + .prio_name = PRIO_ALUA, + }, + { + /* SAS RAID Controller Module (RSSM) */ + .vendor = "IBM", + .product = "1820N00", + .pgpolicy = GROUP_BY_PRIO, + .pgfailback = -FAILBACK_IMMEDIATE, + .no_path_retry = NO_PATH_RETRY_QUEUE, + .prio_name = PRIO_ALUA, + }, + { + /* XIV Storage System / FlashSystem A9000/A9000R */ + .vendor = "(XIV|IBM)", + .product = "(NEXTRA|2810XIV)", + .no_path_retry = NO_PATH_RETRY_QUEUE, + .pgpolicy = MULTIBUS, + }, + { + /* TMS RamSan / FlashSystem 710/720/810/820/840/900 */ + .vendor = "(TMS|IBM)", + .product = "(RamSan|FlashSystem)", + .pgpolicy = MULTIBUS, + }, + { + /* (DDN) DCS9900, SONAS 2851-DR1 */ + .vendor = "IBM", + .product = "^(DCS9900|2851)", + .pgpolicy = GROUP_BY_PRIO, + .pgfailback = -FAILBACK_IMMEDIATE, + .prio_name = PRIO_ALUA, + .no_path_retry = 30, + }, + /* + * IBM Power Virtual SCSI Devices + * + * Maintainer: Brian King + */ + { + /* AIX VDASD */ + .vendor = "AIX", + .product = "VDASD", + .pgpolicy = MULTIBUS, + .no_path_retry = (300 / DEFAULT_CHECKINT), + }, + { + /* 3303 NVDISK */ + .vendor = "IBM", + .product = "3303[ ]+NVDISK", + .no_path_retry = (300 / DEFAULT_CHECKINT), + }, + { + /* AIX NVDISK */ + .vendor = "AIX", + .product = "NVDISK", + .hwhandler = "1 alua", + .pgpolicy = GROUP_BY_PRIO, + .pgfailback = -FAILBACK_IMMEDIATE, + .no_path_retry = (300 / DEFAULT_CHECKINT), + .prio_name = PRIO_ALUA, + }, + /* + * Lenovo + */ + { + /* + * DE Series + * + * Maintainer: NetApp RDAC team + */ + .vendor = "LENOVO", + .product = "DE_Series", + .bl_product = "Universal Xport", + .pgpolicy = GROUP_BY_PRIO, + .checker_name = RDAC, + .features = "2 pg_init_retries 50", + .hwhandler = "1 rdac", + .prio_name = PRIO_RDAC, + .pgfailback = -FAILBACK_IMMEDIATE, + .no_path_retry = 30, + }, + /* + * NetApp + */ + { + /* + * ONTAP family + * + * Maintainer: Martin George + */ + .vendor = "NETAPP", + .product = "LUN", + .features = "2 pg_init_retries 50", + .no_path_retry = NO_PATH_RETRY_QUEUE, + .pgpolicy = GROUP_BY_PRIO, + .pgfailback = -FAILBACK_IMMEDIATE, + .flush_on_last_del = FLUSH_ENABLED, + .dev_loss = MAX_DEV_LOSS_TMO, + .prio_name = PRIO_ONTAP, + .user_friendly_names = USER_FRIENDLY_NAMES_OFF, + }, + { + /* + * SANtricity(RDAC) family + * + * Maintainer: NetApp RDAC team + */ + .vendor = "(NETAPP|LSI|ENGENIO)", + .product = "INF-01-00", + .bl_product = "Universal Xport", + .pgpolicy = GROUP_BY_PRIO, + .checker_name = RDAC, + .features = "2 pg_init_retries 50", + .hwhandler = "1 rdac", + .prio_name = PRIO_RDAC, + .pgfailback = -FAILBACK_IMMEDIATE, + .no_path_retry = 30, + }, + { + /* + * SolidFir family + * + * Maintainer: PJ Waskiewicz + */ + .vendor = "SolidFir", + .product = "SSD SAN", + .pgpolicy = MULTIBUS, + .no_path_retry = 24, + }, + { + /* + * NVMe-FC namespace devices: MULTIBUS, queueing preferred + * + * The hwtable is searched backwards, so place this after "Generic NVMe" + */ + .vendor = "NVME", + .product = "^NetApp ONTAP Controller", + .pgpolicy = MULTIBUS, + .no_path_retry = NO_PATH_RETRY_QUEUE, + }, + /* + * NEC + */ + { + /* M-Series */ + .vendor = "NEC", + .product = "DISK ARRAY", + .hwhandler = "1 alua", + .pgpolicy = GROUP_BY_PRIO, + .pgfailback = -FAILBACK_IMMEDIATE, + .prio_name = PRIO_ALUA, + }, + /* + * Oracle + */ + /* + * Pillar Data / Oracle FS + * + * Maintainer: Srinivasan Ramani + */ + { + /* Axiom */ + .vendor = "^Pillar", + .product = "^Axiom", + .pgpolicy = GROUP_BY_PRIO, + .pgfailback = -FAILBACK_IMMEDIATE, + .prio_name = PRIO_ALUA, + }, + { + /* FS */ + .vendor = "^Oracle", + .product = "^Oracle FS", + .pgpolicy = GROUP_BY_PRIO, + .pgfailback = -FAILBACK_IMMEDIATE, + .prio_name = PRIO_ALUA, + }, + /* Sun - StorageTek */ + { + /* B210, B220, B240 and B280 */ + .vendor = "STK", + .product = "BladeCtlr", + .bl_product = "Universal Xport", + .pgpolicy = GROUP_BY_PRIO, + .checker_name = RDAC, + .features = "2 pg_init_retries 50", + .hwhandler = "1 rdac", + .prio_name = PRIO_RDAC, + .pgfailback = -FAILBACK_IMMEDIATE, + .no_path_retry = 30, + }, + { + /* 9176, D173, D178, D210, D220, D240 and D280 */ + .vendor = "STK", + .product = "OPENstorage", + .bl_product = "Universal Xport", + .pgpolicy = GROUP_BY_PRIO, + .checker_name = RDAC, + .features = "2 pg_init_retries 50", + .hwhandler = "1 rdac", + .prio_name = PRIO_RDAC, + .pgfailback = -FAILBACK_IMMEDIATE, + .no_path_retry = 30, + }, + { + /* 6540 */ + .vendor = "STK", + .product = "FLEXLINE 380", + .bl_product = "Universal Xport", + .pgpolicy = GROUP_BY_PRIO, + .checker_name = RDAC, + .features = "2 pg_init_retries 50", + .hwhandler = "1 rdac", + .prio_name = PRIO_RDAC, + .pgfailback = -FAILBACK_IMMEDIATE, + .no_path_retry = 30, + }, + { + /* (Dot Hill) 3120, 3310, 3320, 3510 and 3511 */ + .vendor = "SUN", + .product = "StorEdge 3", + .pgpolicy = MULTIBUS, + }, + { + /* 6580 and 6780 */ + .vendor = "SUN", + .product = "STK6580_6780", + .bl_product = "Universal Xport", + .pgpolicy = GROUP_BY_PRIO, + .checker_name = RDAC, + .features = "2 pg_init_retries 50", + .hwhandler = "1 rdac", + .prio_name = PRIO_RDAC, + .pgfailback = -FAILBACK_IMMEDIATE, + .no_path_retry = 30, + }, + { + /* 6130 / 6140 */ + .vendor = "SUN", + .product = "CSM[12]00_R", + .bl_product = "Universal Xport", + .pgpolicy = GROUP_BY_PRIO, + .checker_name = RDAC, + .features = "2 pg_init_retries 50", + .hwhandler = "1 rdac", + .prio_name = PRIO_RDAC, + .pgfailback = -FAILBACK_IMMEDIATE, + .no_path_retry = 30, + }, + { + /* 2500 / 2510 / 2530 / 2540 */ + .vendor = "SUN", + .product = "LCSM100_[IEFS]", + .bl_product = "Universal Xport", + .pgpolicy = GROUP_BY_PRIO, + .checker_name = RDAC, + .features = "2 pg_init_retries 50", + .hwhandler = "1 rdac", + .prio_name = PRIO_RDAC, + .pgfailback = -FAILBACK_IMMEDIATE, + .no_path_retry = 30, + }, + { + /* 6180 */ + .vendor = "SUN", + .product = "SUN_6180", + .bl_product = "Universal Xport", + .pgpolicy = GROUP_BY_PRIO, + .checker_name = RDAC, + .features = "2 pg_init_retries 50", + .hwhandler = "1 rdac", + .prio_name = PRIO_RDAC, + .pgfailback = -FAILBACK_IMMEDIATE, + .no_path_retry = 30, + }, + { + /* ArrayStorage */ + .vendor = "SUN", + .product = "ArrayStorage", + .bl_product = "Universal Xport", + .pgpolicy = GROUP_BY_PRIO, + .checker_name = RDAC, + .features = "2 pg_init_retries 50", + .hwhandler = "1 rdac", + .prio_name = PRIO_RDAC, + .pgfailback = -FAILBACK_IMMEDIATE, + .no_path_retry = 30, + }, + { + /* ZFS Storage Appliances */ + .vendor = "SUN", + .product = "(Sun Storage|ZFS Storage|COMSTAR)", + .pgpolicy = GROUP_BY_PRIO, + .pgfailback = -FAILBACK_IMMEDIATE, + .prio_name = PRIO_ALUA, + .no_path_retry = 30, + }, + /* + * Pivot3 + * + * Maintainer: Bart Brooks + */ + { + /* Raige */ + .vendor = "PIVOT3", + .product = "RAIGE VOLUME", + .no_path_retry = NO_PATH_RETRY_QUEUE, + .pgpolicy = MULTIBUS, + }, + { + /* NexGen / vSTAC */ + .vendor = "(NexGen|Pivot3)", + .product = "(TierStore|vSTAC)", + .pgpolicy = GROUP_BY_PRIO, + .pgfailback = -FAILBACK_IMMEDIATE, + .prio_name = PRIO_ALUA, + .no_path_retry = NO_PATH_RETRY_QUEUE, + }, + /* + * Intel + */ + { + /* Multi-Flex */ + .vendor = "(Intel|INTEL)", + .product = "Multi-Flex", + .bl_product = "VTrak V-LUN", + .hwhandler = "1 alua", + .pgpolicy = GROUP_BY_PRIO, + .pgfailback = -FAILBACK_IMMEDIATE, + .no_path_retry = NO_PATH_RETRY_QUEUE, + .prio_name = PRIO_ALUA, + }, + /* + * Linux-IO Target + */ + { + /* Linux-IO Target */ + .vendor = "(LIO-ORG|SUSE)", + .product = "RBD", + .hwhandler = "1 alua", + .pgpolicy = GROUP_BY_PRIO, + .pgfailback = -FAILBACK_IMMEDIATE, + .no_path_retry = 12, + .prio_name = PRIO_ALUA, + }, + /* + * DataCore + */ + { + /* SANmelody */ + .vendor = "DataCore", + .product = "SANmelody", + .pgpolicy = GROUP_BY_PRIO, + .pgfailback = -FAILBACK_IMMEDIATE, + .no_path_retry = NO_PATH_RETRY_QUEUE, + .prio_name = PRIO_ALUA, + }, + { + /* SANsymphony */ + .vendor = "DataCore", + .product = "Virtual Disk", + .pgpolicy = GROUP_BY_PRIO, + .pgfailback = -FAILBACK_IMMEDIATE, + .no_path_retry = NO_PATH_RETRY_QUEUE, + .prio_name = PRIO_ALUA, + }, + /* + * Pure Storage + */ + { + /* FlashArray */ + .vendor = "PURE", + .product = "FlashArray", + .pgpolicy = GROUP_BY_PRIO, + .pgfailback = -FAILBACK_IMMEDIATE, + .hwhandler = "1 alua", + .prio_name = PRIO_ALUA, + .fast_io_fail = 10, + .max_sectors_kb = 4096, + }, + /* + * Huawei + */ + { + /* OceanStor V3 */ + .vendor = "HUAWEI", + .product = "XSG1", + .pgpolicy = GROUP_BY_PRIO, + .prio_name = PRIO_ALUA, + }, + /* + * Kove + */ + { + /* XPD */ + .vendor = "KOVE", + .product = "XPD", + .pgpolicy = MULTIBUS, + }, + /* + * Infinidat + * + * Maintainer: Arnon Yaari + */ + { + /* InfiniBox */ + .vendor = "NFINIDAT", + .product = "InfiniBox", + .pgpolicy = GROUP_BY_PRIO, + .pgfailback = 30, + .prio_name = PRIO_ALUA, + .selector = "round-robin 0", + .rr_weight = RR_WEIGHT_PRIO, + .no_path_retry = NO_PATH_RETRY_FAIL, + .minio = 1, + .minio_rq = 1, + .flush_on_last_del = FLUSH_ENABLED, + .fast_io_fail = 15, + .dev_loss = 15, + }, + /* + * Kaminario + */ + { + /* K2 */ + .vendor = "KMNRIO", + .product = "K2", + .pgpolicy = MULTIBUS, + }, + /* + * Imation/Nexsan + */ + { + /* E-Series */ + .vendor = "NEXSAN", + .product = "NXS-B0", + .pgpolicy = GROUP_BY_PRIO, + .pgfailback = -FAILBACK_IMMEDIATE, + .prio_name = PRIO_ALUA, + .no_path_retry = 15, + }, + { + /* SATABeast / SATABoy */ + .vendor = "NEXSAN", + .product = "SATAB", + .pgpolicy = GROUP_BY_PRIO, + .pgfailback = -FAILBACK_IMMEDIATE, + .prio_name = PRIO_ALUA, + .no_path_retry = 15, + }, + { + /* NST / UNITY */ + .vendor = "Nexsan", + .product = "(NestOS|NST5000)", + .hwhandler = "1 alua", + .pgpolicy = GROUP_BY_PRIO, + .pgfailback = -FAILBACK_IMMEDIATE, + .prio_name = PRIO_ALUA, + .no_path_retry = 30, + }, + /* + * Violin Systems + */ + { + /* 3000 / 6000 Series */ + .vendor = "VIOLIN", + .product = "SAN ARRAY$", + .pgpolicy = GROUP_BY_SERIAL, + .no_path_retry = 30, + }, + { + /* 3000 / 6000 Series - ALUA mode */ + .vendor = "VIOLIN", + .product = "SAN ARRAY ALUA", + .hwhandler = "1 alua", + .pgpolicy = GROUP_BY_PRIO, + .pgfailback = -FAILBACK_IMMEDIATE, + .prio_name = PRIO_ALUA, + .no_path_retry = 30, + }, + { + /* FSP 7000 family */ + .vendor = "VIOLIN", + .product = "CONCERTO ARRAY", + .pgpolicy = MULTIBUS, + .no_path_retry = 30, + }, + /* Xiotech */ + { + /* Intelligent Storage Elements family */ + .vendor = "(XIOTECH|XIOtech)", + .product = "ISE", + .pgpolicy = MULTIBUS, + .no_path_retry = 12, + }, + { + /* iglu blaze family */ + .vendor = "(XIOTECH|XIOtech)", + .product = "IGLU DISK", + .pgpolicy = MULTIBUS, + .no_path_retry = 30, + }, + { + /* Magnitude family */ + .vendor = "(XIOTECH|XIOtech)", + .product = "Magnitude", + .pgpolicy = MULTIBUS, + .no_path_retry = 30, + }, + /* + * Promise Technology + */ + { + /* VTrak family */ + .vendor = "Promise", + .product = "VTrak", + .bl_product = "VTrak V-LUN", + .hwhandler = "1 alua", + .pgpolicy = GROUP_BY_PRIO, + .pgfailback = -FAILBACK_IMMEDIATE, + .prio_name = PRIO_ALUA, + .no_path_retry = 30, + }, + { + /* Vess family */ + .vendor = "Promise", + .product = "Vess", + .bl_product = "Vess V-LUN", + .hwhandler = "1 alua", + .pgpolicy = GROUP_BY_PRIO, + .pgfailback = -FAILBACK_IMMEDIATE, + .prio_name = PRIO_ALUA, + .no_path_retry = 30, + }, + /* + * Infortrend Technology + */ + { + /* EonStor / ESVA */ + .vendor = "^IFT", + .product = ".*", + .pgpolicy = GROUP_BY_PRIO, + .pgfailback = -FAILBACK_IMMEDIATE, + .prio_name = PRIO_ALUA, + .no_path_retry = 30, + }, + /* + * Seagate Technology (Dot Hill Systems) + */ + { + /* SANnet family */ + .vendor = "DotHill", + .product = "SANnet", + .pgpolicy = MULTIBUS, + .no_path_retry = 30, + }, + { + /* R/Evolution family */ + .vendor = "DotHill", + .product = "R/Evo", + .pgpolicy = GROUP_BY_PRIO, + .pgfailback = -FAILBACK_IMMEDIATE, + .prio_name = PRIO_ALUA, + .no_path_retry = 30, + }, + { + /* AssuredSAN family */ + .vendor = "DotHill", + .product = "^DH", + .pgpolicy = GROUP_BY_PRIO, + .pgfailback = -FAILBACK_IMMEDIATE, + .prio_name = PRIO_ALUA, + .no_path_retry = 30, + }, + /* + * AccelStor + */ + { + /* NeoSapphire */ + .vendor = "AStor", + .product = "NeoSapphire", + .pgpolicy = MULTIBUS, + .no_path_retry = 30, + }, + /* + * INSPUR + */ + { + /* AS5300/AS5500 G2 */ + .vendor = "INSPUR", + .product = "MCS", + .pgpolicy = GROUP_BY_PRIO, + .pgfailback = -FAILBACK_IMMEDIATE, + .prio_name = PRIO_ALUA, + }, + /* + * EOL + */ + { + /* NULL */ + .vendor = NULL, + .product = NULL, + }, +}; + +int setup_default_hwtable(vector hw) +{ + int r = 0; + struct hwentry * hwe = default_hw; + + while (hwe->vendor) { + r += store_hwe(hw, hwe); + hwe++; + } + return r; +} diff --git a/libmultipath/hwtable.h b/libmultipath/hwtable.h new file mode 100644 index 0000000..13c5701 --- /dev/null +++ b/libmultipath/hwtable.h @@ -0,0 +1,6 @@ +#ifndef _HWTABLE_H +#define _HWTABLE_H + +int setup_default_hwtable (vector hw); + +#endif /* _HWTABLE_H */ diff --git a/libmultipath/io_err_stat.c b/libmultipath/io_err_stat.c new file mode 100644 index 0000000..1b9cd6c --- /dev/null +++ b/libmultipath/io_err_stat.c @@ -0,0 +1,790 @@ +/* + * (C) Copyright HUAWEI Technology Corp. 2017, All Rights Reserved. + * + * io_err_stat.c + * version 1.0 + * + * IO error stream statistic process for path failure event from kernel + * + * Author(s): Guan Junxiong 2017 + * + * This file is released under the GPL version 2, or any later version. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "vector.h" +#include "memory.h" +#include "checkers.h" +#include "config.h" +#include "structs.h" +#include "structs_vec.h" +#include "devmapper.h" +#include "debug.h" +#include "lock.h" +#include "time-util.h" +#include "io_err_stat.h" + +#define IOTIMEOUT_SEC 60 +#define TIMEOUT_NO_IO_NSEC 10000000 /*10ms = 10000000ns*/ +#define FLAKY_PATHFAIL_THRESHOLD 2 +#define CONCUR_NR_EVENT 32 + +#define PATH_IO_ERR_IN_CHECKING -1 +#define PATH_IO_ERR_WAITING_TO_CHECK -2 + +#define io_err_stat_log(prio, fmt, args...) \ + condlog(prio, "io error statistic: " fmt, ##args) + + +struct io_err_stat_pathvec { + pthread_mutex_t mutex; + vector pathvec; +}; + +struct dio_ctx { + struct timespec io_starttime; + unsigned int blksize; + void *buf; + struct iocb io; +}; + +struct io_err_stat_path { + char devname[FILE_NAME_SIZE]; + int fd; + struct dio_ctx *dio_ctx_array; + int io_err_nr; + int io_nr; + struct timespec start_time; + + int total_time; + int err_rate_threshold; +}; + +pthread_t io_err_stat_thr; +pthread_attr_t io_err_stat_attr; + +static pthread_mutex_t io_err_thread_lock = PTHREAD_MUTEX_INITIALIZER; +static pthread_cond_t io_err_thread_cond = PTHREAD_COND_INITIALIZER; +static int io_err_thread_running = 0; + +static struct io_err_stat_pathvec *paths; +struct vectors *vecs; +io_context_t ioctx; + +static void cancel_inflight_io(struct io_err_stat_path *pp); + +static void rcu_unregister(__attribute__((unused)) void *param) +{ + rcu_unregister_thread(); +} + +struct io_err_stat_path *find_err_path_by_dev(vector pathvec, char *dev) +{ + int i; + struct io_err_stat_path *pp; + + if (!pathvec) + return NULL; + vector_foreach_slot(pathvec, pp, i) + if (!strcmp(pp->devname, dev)) + return pp; + + io_err_stat_log(4, "%s: not found in check queue", dev); + + return NULL; +} + +static int init_each_dio_ctx(struct dio_ctx *ct, int blksize, + unsigned long pgsize) +{ + ct->blksize = blksize; + if (posix_memalign(&ct->buf, pgsize, blksize)) + return 1; + memset(ct->buf, 0, blksize); + ct->io_starttime.tv_sec = 0; + ct->io_starttime.tv_nsec = 0; + + return 0; +} + +static void deinit_each_dio_ctx(struct dio_ctx *ct) +{ + if (ct->buf) + free(ct->buf); +} + +static int setup_directio_ctx(struct io_err_stat_path *p) +{ + unsigned long pgsize = getpagesize(); + char fpath[PATH_MAX]; + unsigned int blksize = 0; + int i; + + if (snprintf(fpath, PATH_MAX, "/dev/%s", p->devname) >= PATH_MAX) + return 1; + if (p->fd < 0) + p->fd = open(fpath, O_RDONLY | O_DIRECT); + if (p->fd < 0) + return 1; + + p->dio_ctx_array = MALLOC(sizeof(struct dio_ctx) * CONCUR_NR_EVENT); + if (!p->dio_ctx_array) + goto fail_close; + + if (ioctl(p->fd, BLKBSZGET, &blksize) < 0) { + io_err_stat_log(4, "%s:cannot get blocksize, set default 512", + p->devname); + blksize = 512; + } + if (!blksize) + goto free_pdctx; + + for (i = 0; i < CONCUR_NR_EVENT; i++) { + if (init_each_dio_ctx(p->dio_ctx_array + i, blksize, pgsize)) + goto deinit; + } + return 0; + +deinit: + for (i = 0; i < CONCUR_NR_EVENT; i++) + deinit_each_dio_ctx(p->dio_ctx_array + i); +free_pdctx: + FREE(p->dio_ctx_array); +fail_close: + close(p->fd); + + return 1; +} + +static void destroy_directio_ctx(struct io_err_stat_path *p) +{ + int i; + + if (!p || !p->dio_ctx_array) + return; + cancel_inflight_io(p); + + for (i = 0; i < CONCUR_NR_EVENT; i++) + deinit_each_dio_ctx(p->dio_ctx_array + i); + FREE(p->dio_ctx_array); + + if (p->fd > 0) + close(p->fd); +} + +static struct io_err_stat_path *alloc_io_err_stat_path(void) +{ + struct io_err_stat_path *p; + + p = (struct io_err_stat_path *)MALLOC(sizeof(*p)); + if (!p) + return NULL; + + memset(p->devname, 0, sizeof(p->devname)); + p->io_err_nr = 0; + p->io_nr = 0; + p->total_time = 0; + p->start_time.tv_sec = 0; + p->start_time.tv_nsec = 0; + p->err_rate_threshold = 0; + p->fd = -1; + + return p; +} + +static void free_io_err_stat_path(struct io_err_stat_path *p) +{ + FREE(p); +} + +static struct io_err_stat_pathvec *alloc_pathvec(void) +{ + struct io_err_stat_pathvec *p; + int r; + + p = (struct io_err_stat_pathvec *)MALLOC(sizeof(*p)); + if (!p) + return NULL; + p->pathvec = vector_alloc(); + if (!p->pathvec) + goto out_free_struct_pathvec; + r = pthread_mutex_init(&p->mutex, NULL); + if (r) + goto out_free_member_pathvec; + + return p; + +out_free_member_pathvec: + vector_free(p->pathvec); +out_free_struct_pathvec: + FREE(p); + return NULL; +} + +static void free_io_err_pathvec(struct io_err_stat_pathvec *p) +{ + struct io_err_stat_path *path; + int i; + + if (!p) + return; + pthread_mutex_destroy(&p->mutex); + if (!p->pathvec) { + vector_foreach_slot(p->pathvec, path, i) { + destroy_directio_ctx(path); + free_io_err_stat_path(path); + } + vector_free(p->pathvec); + } + FREE(p); +} + +/* + * return value + * 0: enqueue OK + * 1: fails because of internal error + */ +static int enqueue_io_err_stat_by_path(struct path *path) +{ + struct io_err_stat_path *p; + + pthread_mutex_lock(&paths->mutex); + p = find_err_path_by_dev(paths->pathvec, path->dev); + if (p) { + pthread_mutex_unlock(&paths->mutex); + return 0; + } + pthread_mutex_unlock(&paths->mutex); + + p = alloc_io_err_stat_path(); + if (!p) + return 1; + + memcpy(p->devname, path->dev, sizeof(p->devname)); + p->total_time = path->mpp->marginal_path_err_sample_time; + p->err_rate_threshold = path->mpp->marginal_path_err_rate_threshold; + + if (setup_directio_ctx(p)) + goto free_ioerr_path; + pthread_mutex_lock(&paths->mutex); + if (!vector_alloc_slot(paths->pathvec)) + goto unlock_destroy; + vector_set_slot(paths->pathvec, p); + pthread_mutex_unlock(&paths->mutex); + + io_err_stat_log(2, "%s: enqueue path %s to check", + path->mpp->alias, path->dev); + return 0; + +unlock_destroy: + pthread_mutex_unlock(&paths->mutex); + destroy_directio_ctx(p); +free_ioerr_path: + free_io_err_stat_path(p); + + return 1; +} + +int io_err_stat_handle_pathfail(struct path *path) +{ + struct timespec curr_time; + + if (uatomic_read(&io_err_thread_running) == 0) + return 1; + + if (path->io_err_disable_reinstate) { + io_err_stat_log(3, "%s: reinstate is already disabled", + path->dev); + return 1; + } + if (path->io_err_pathfail_cnt < 0) + return 1; + + if (!path->mpp) + return 1; + if (path->mpp->marginal_path_double_failed_time <= 0 || + path->mpp->marginal_path_err_sample_time <= 0 || + path->mpp->marginal_path_err_recheck_gap_time <= 0 || + path->mpp->marginal_path_err_rate_threshold < 0) { + io_err_stat_log(4, "%s: parameter not set", path->mpp->alias); + return 1; + } + if (path->mpp->marginal_path_err_sample_time < (2 * IOTIMEOUT_SEC)) { + io_err_stat_log(2, "%s: marginal_path_err_sample_time should not less than %d", + path->mpp->alias, 2 * IOTIMEOUT_SEC); + return 1; + } + /* + * The test should only be started for paths that have failed + * repeatedly in a certain time frame, so that we have reason + * to assume they're flaky. Without bother the admin to configure + * the repeated count threshold and time frame, we assume a path + * which fails at least twice within 60 seconds is flaky. + */ + if (clock_gettime(CLOCK_MONOTONIC, &curr_time) != 0) + return 1; + if (path->io_err_pathfail_cnt == 0) { + path->io_err_pathfail_cnt++; + path->io_err_pathfail_starttime = curr_time.tv_sec; + io_err_stat_log(5, "%s: start path flakiness pre-checking", + path->dev); + return 0; + } + if ((curr_time.tv_sec - path->io_err_pathfail_starttime) > + path->mpp->marginal_path_double_failed_time) { + path->io_err_pathfail_cnt = 0; + path->io_err_pathfail_starttime = curr_time.tv_sec; + io_err_stat_log(5, "%s: restart path flakiness pre-checking", + path->dev); + } + path->io_err_pathfail_cnt++; + if (path->io_err_pathfail_cnt >= FLAKY_PATHFAIL_THRESHOLD) { + path->io_err_disable_reinstate = 1; + path->io_err_pathfail_cnt = PATH_IO_ERR_WAITING_TO_CHECK; + /* enqueue path as soon as it comes up */ + path->io_err_dis_reinstate_time = 0; + if (path->state != PATH_DOWN) { + struct config *conf; + int oldstate = path->state; + unsigned int checkint; + + conf = get_multipath_config(); + checkint = conf->checkint; + put_multipath_config(conf); + io_err_stat_log(2, "%s: mark as failed", path->dev); + path->mpp->stat_path_failures++; + path->state = PATH_DOWN; + path->dmstate = PSTATE_FAILED; + if (oldstate == PATH_UP || oldstate == PATH_GHOST) + update_queue_mode_del_path(path->mpp); + if (path->tick > checkint) + path->tick = checkint; + } + } + + return 0; +} + +int need_io_err_check(struct path *pp) +{ + struct timespec curr_time; + int r; + + if (uatomic_read(&io_err_thread_running) == 0) + return 0; + if (count_active_paths(pp->mpp) <= 0) { + io_err_stat_log(2, "%s: recover path early", pp->dev); + goto recover; + } + if (pp->io_err_pathfail_cnt != PATH_IO_ERR_WAITING_TO_CHECK) + return 1; + if (clock_gettime(CLOCK_MONOTONIC, &curr_time) != 0 || + (curr_time.tv_sec - pp->io_err_dis_reinstate_time) > + pp->mpp->marginal_path_err_recheck_gap_time) { + io_err_stat_log(4, "%s: reschedule checking after %d seconds", + pp->dev, + pp->mpp->marginal_path_err_recheck_gap_time); + r = enqueue_io_err_stat_by_path(pp); + /* + * Enqueue fails because of internal error. + * In this case , we recover this path + * Or else, return 1 to set path state to PATH_SHAKY + */ + if (r == 1) { + io_err_stat_log(3, "%s: enqueue fails, to recover", + pp->dev); + goto recover; + } else + pp->io_err_pathfail_cnt = PATH_IO_ERR_IN_CHECKING; + } + + return 1; + +recover: + pp->io_err_pathfail_cnt = 0; + pp->io_err_disable_reinstate = 0; + return 0; +} + +static int delete_io_err_stat_by_addr(struct io_err_stat_path *p) +{ + int i; + + i = find_slot(paths->pathvec, p); + if (i != -1) + vector_del_slot(paths->pathvec, i); + + destroy_directio_ctx(p); + free_io_err_stat_path(p); + + return 0; +} + +static void account_async_io_state(struct io_err_stat_path *pp, int rc) +{ + switch (rc) { + case PATH_DOWN: + case PATH_TIMEOUT: + pp->io_err_nr++; + break; + case PATH_UNCHECKED: + case PATH_UP: + case PATH_PENDING: + break; + default: + break; + } +} + +static int poll_io_err_stat(struct vectors *vecs, struct io_err_stat_path *pp) +{ + struct timespec currtime, difftime; + struct path *path; + double err_rate; + + if (clock_gettime(CLOCK_MONOTONIC, &currtime) != 0) + return 1; + timespecsub(&currtime, &pp->start_time, &difftime); + if (difftime.tv_sec < pp->total_time) + return 0; + + io_err_stat_log(4, "%s: check end", pp->devname); + + err_rate = pp->io_nr == 0 ? 0 : (pp->io_err_nr * 1000.0f) / pp->io_nr; + io_err_stat_log(3, "%s: IO error rate (%.1f/1000)", + pp->devname, err_rate); + pthread_cleanup_push(cleanup_lock, &vecs->lock); + lock(&vecs->lock); + pthread_testcancel(); + path = find_path_by_dev(vecs->pathvec, pp->devname); + if (!path) { + io_err_stat_log(4, "path %s not found'", pp->devname); + } else if (err_rate <= pp->err_rate_threshold) { + path->io_err_pathfail_cnt = 0; + path->io_err_disable_reinstate = 0; + io_err_stat_log(3, "%s: (%d/%d) good to enable reinstating", + pp->devname, pp->io_err_nr, pp->io_nr); + /* + * schedule path check as soon as possible to + * update path state. Do NOT reinstate dm path here + */ + path->tick = 1; + + } else if (path->mpp && count_active_paths(path->mpp) > 0) { + io_err_stat_log(3, "%s: keep failing the dm path %s", + path->mpp->alias, path->dev); + path->io_err_pathfail_cnt = PATH_IO_ERR_WAITING_TO_CHECK; + path->io_err_disable_reinstate = 1; + path->io_err_dis_reinstate_time = currtime.tv_sec; + io_err_stat_log(3, "%s: disable reinstating of %s", + path->mpp->alias, path->dev); + } else { + path->io_err_pathfail_cnt = 0; + path->io_err_disable_reinstate = 0; + io_err_stat_log(3, "%s: there is orphan path, enable reinstating", + pp->devname); + } + lock_cleanup_pop(vecs->lock); + + delete_io_err_stat_by_addr(pp); + + return 0; +} + +static int send_each_async_io(struct dio_ctx *ct, int fd, char *dev) +{ + int rc = -1; + + if (ct->io_starttime.tv_nsec == 0 && + ct->io_starttime.tv_sec == 0) { + struct iocb *ios[1] = { &ct->io }; + + if (clock_gettime(CLOCK_MONOTONIC, &ct->io_starttime) != 0) { + ct->io_starttime.tv_sec = 0; + ct->io_starttime.tv_nsec = 0; + return rc; + } + io_prep_pread(&ct->io, fd, ct->buf, ct->blksize, 0); + if (io_submit(ioctx, 1, ios) != 1) { + io_err_stat_log(5, "%s: io_submit error %i", + dev, errno); + return rc; + } + rc = 0; + } + + return rc; +} + +static void send_batch_async_ios(struct io_err_stat_path *pp) +{ + int i; + struct dio_ctx *ct; + struct timespec currtime, difftime; + + if (clock_gettime(CLOCK_MONOTONIC, &currtime) != 0) + return; + /* + * Give a free time for all IO to complete or timeout + */ + if (pp->start_time.tv_sec != 0) { + timespecsub(&currtime, &pp->start_time, &difftime); + if (difftime.tv_sec + IOTIMEOUT_SEC >= pp->total_time) + return; + } + + for (i = 0; i < CONCUR_NR_EVENT; i++) { + ct = pp->dio_ctx_array + i; + if (!send_each_async_io(ct, pp->fd, pp->devname)) + pp->io_nr++; + } + if (pp->start_time.tv_sec == 0 && pp->start_time.tv_nsec == 0 && + clock_gettime(CLOCK_MONOTONIC, &pp->start_time)) { + pp->start_time.tv_sec = 0; + pp->start_time.tv_nsec = 0; + } +} + +static int try_to_cancel_timeout_io(struct dio_ctx *ct, struct timespec *t, + char *dev) +{ + struct timespec difftime; + struct io_event event; + int rc = PATH_UNCHECKED; + int r; + + if (ct->io_starttime.tv_sec == 0) + return rc; + timespecsub(t, &ct->io_starttime, &difftime); + if (difftime.tv_sec > IOTIMEOUT_SEC) { + struct iocb *ios[1] = { &ct->io }; + + io_err_stat_log(5, "%s: abort check on timeout", dev); + r = io_cancel(ioctx, ios[0], &event); + if (r) + io_err_stat_log(5, "%s: io_cancel error %i", + dev, errno); + ct->io_starttime.tv_sec = 0; + ct->io_starttime.tv_nsec = 0; + rc = PATH_TIMEOUT; + } else { + rc = PATH_PENDING; + } + + return rc; +} + +static void poll_async_io_timeout(void) +{ + struct io_err_stat_path *pp; + struct timespec curr_time; + int rc = PATH_UNCHECKED; + int i, j; + + if (clock_gettime(CLOCK_MONOTONIC, &curr_time) != 0) + return; + vector_foreach_slot(paths->pathvec, pp, i) { + for (j = 0; j < CONCUR_NR_EVENT; j++) { + rc = try_to_cancel_timeout_io(pp->dio_ctx_array + j, + &curr_time, pp->devname); + account_async_io_state(pp, rc); + } + } +} + +static void cancel_inflight_io(struct io_err_stat_path *pp) +{ + struct io_event event; + int i, r; + + for (i = 0; i < CONCUR_NR_EVENT; i++) { + struct dio_ctx *ct = pp->dio_ctx_array + i; + struct iocb *ios[1] = { &ct->io }; + + if (ct->io_starttime.tv_sec == 0 + && ct->io_starttime.tv_nsec == 0) + continue; + io_err_stat_log(5, "%s: abort infligh io", + pp->devname); + r = io_cancel(ioctx, ios[0], &event); + if (r) + io_err_stat_log(5, "%s: io_cancel error %d, %i", + pp->devname, r, errno); + ct->io_starttime.tv_sec = 0; + ct->io_starttime.tv_nsec = 0; + } +} + +static inline int handle_done_dio_ctx(struct dio_ctx *ct, struct io_event *ev) +{ + ct->io_starttime.tv_sec = 0; + ct->io_starttime.tv_nsec = 0; + return (ev->res == ct->blksize) ? PATH_UP : PATH_DOWN; +} + +static void handle_async_io_done_event(struct io_event *io_evt) +{ + struct io_err_stat_path *pp; + struct dio_ctx *ct; + int rc = PATH_UNCHECKED; + int i, j; + + vector_foreach_slot(paths->pathvec, pp, i) { + for (j = 0; j < CONCUR_NR_EVENT; j++) { + ct = pp->dio_ctx_array + j; + if (&ct->io == io_evt->obj) { + rc = handle_done_dio_ctx(ct, io_evt); + account_async_io_state(pp, rc); + return; + } + } + } +} + +static void process_async_ios_event(int timeout_nsecs, char *dev) +{ + struct io_event events[CONCUR_NR_EVENT]; + int i, n; + struct timespec timeout = { .tv_nsec = timeout_nsecs }; + + errno = 0; + n = io_getevents(ioctx, 1L, CONCUR_NR_EVENT, events, &timeout); + if (n < 0) { + io_err_stat_log(3, "%s: async io events returned %d (errno=%s)", + dev, n, strerror(errno)); + } else { + for (i = 0; i < n; i++) + handle_async_io_done_event(&events[i]); + } +} + +static void service_paths(void) +{ + struct io_err_stat_path *pp; + int i; + + pthread_mutex_lock(&paths->mutex); + vector_foreach_slot(paths->pathvec, pp, i) { + send_batch_async_ios(pp); + process_async_ios_event(TIMEOUT_NO_IO_NSEC, pp->devname); + poll_async_io_timeout(); + poll_io_err_stat(vecs, pp); + } + pthread_mutex_unlock(&paths->mutex); +} + +static void cleanup_unlock(void *arg) +{ + pthread_mutex_unlock((pthread_mutex_t*) arg); +} + +static void cleanup_exited(__attribute__((unused)) void *arg) +{ + uatomic_set(&io_err_thread_running, 0); +} + +static void *io_err_stat_loop(void *data) +{ + sigset_t set; + + vecs = (struct vectors *)data; + pthread_cleanup_push(rcu_unregister, NULL); + rcu_register_thread(); + + pthread_cleanup_push(cleanup_exited, NULL); + + sigfillset(&set); + sigdelset(&set, SIGUSR2); + + mlockall(MCL_CURRENT | MCL_FUTURE); + + pthread_mutex_lock(&io_err_thread_lock); + uatomic_set(&io_err_thread_running, 1); + pthread_cond_broadcast(&io_err_thread_cond); + pthread_mutex_unlock(&io_err_thread_lock); + + while (1) { + struct timespec ts; + + service_paths(); + + ts.tv_sec = 0; + ts.tv_nsec = 100 * 1000 * 1000; + /* + * pselect() with no fds, a timeout, and a sigmask: + * sleep for 100ms and react on SIGUSR2. + */ + pselect(1, NULL, NULL, NULL, &ts, &set); + } + + pthread_cleanup_pop(1); + pthread_cleanup_pop(1); + return NULL; +} + +int start_io_err_stat_thread(void *data) +{ + int ret; + + if (uatomic_read(&io_err_thread_running) == 1) + return 0; + + if (io_setup(CONCUR_NR_EVENT, &ioctx) != 0) { + io_err_stat_log(4, "io_setup failed"); + return 1; + } + paths = alloc_pathvec(); + if (!paths) + goto destroy_ctx; + + pthread_mutex_lock(&io_err_thread_lock); + pthread_cleanup_push(cleanup_unlock, &io_err_thread_lock); + + ret = pthread_create(&io_err_stat_thr, &io_err_stat_attr, + io_err_stat_loop, data); + + while (!ret && !uatomic_read(&io_err_thread_running) && + pthread_cond_wait(&io_err_thread_cond, + &io_err_thread_lock) == 0); + + pthread_cleanup_pop(1); + + if (ret) { + io_err_stat_log(0, "cannot create io_error statistic thread"); + goto out_free; + } + + io_err_stat_log(2, "io_error statistic thread started"); + return 0; + +out_free: + free_io_err_pathvec(paths); +destroy_ctx: + io_destroy(ioctx); + io_err_stat_log(0, "failed to start io_error statistic thread"); + return 1; +} + +void stop_io_err_stat_thread(void) +{ + if (io_err_stat_thr == (pthread_t)0) + return; + + if (uatomic_read(&io_err_thread_running) == 1) + pthread_cancel(io_err_stat_thr); + + pthread_join(io_err_stat_thr, NULL); + free_io_err_pathvec(paths); + io_destroy(ioctx); +} diff --git a/libmultipath/io_err_stat.h b/libmultipath/io_err_stat.h new file mode 100644 index 0000000..53d6d7d --- /dev/null +++ b/libmultipath/io_err_stat.h @@ -0,0 +1,15 @@ +#ifndef _IO_ERR_STAT_H +#define _IO_ERR_STAT_H + +#include "vector.h" +#include "lock.h" + + +extern pthread_attr_t io_err_stat_attr; + +int start_io_err_stat_thread(void *data); +void stop_io_err_stat_thread(void); +int io_err_stat_handle_pathfail(struct path *path); +int need_io_err_check(struct path *pp); + +#endif /* _IO_ERR_STAT_H */ diff --git a/libmultipath/list.h b/libmultipath/list.h new file mode 100644 index 0000000..ced021f --- /dev/null +++ b/libmultipath/list.h @@ -0,0 +1,365 @@ +/* + * Copied from the Linux kernel source tree, version 2.6.0-test1. + * + * Licensed under the GPL v2 as per the whole kernel source tree. + * + */ + +#ifndef _LIST_H +#define _LIST_H + +#include + +/** + * container_of - cast a member of a structure out to the containing structure + * + * @ptr: the pointer to the member. + * @type: the type of the container struct this is embedded in. + * @member: the name of the member within the struct. + * + */ +#define container_of_const(ptr, type, member) ({ \ + const typeof( ((type *)0)->member ) *__mptr = (ptr); \ + (const type *)( (const char *)__mptr - offsetof(type,member) );}) + +#define container_of(ptr, type, member) ({ \ + typeof( ((type *)0)->member ) *__mptr = (ptr); \ + (type *)( (char *)__mptr - offsetof(type,member) );}) + +/* + * These are non-NULL pointers that will result in page faults + * under normal circumstances, used to verify that nobody uses + * non-initialized list entries. + */ +#define LIST_POISON1 ((void *) 0x00100100) +#define LIST_POISON2 ((void *) 0x00200200) + +/* + * Simple doubly linked list implementation. + * + * Some of the internal functions ("__xxx") are useful when + * manipulating whole lists rather than single entries, as + * sometimes we already know the next/prev entries and we can + * generate better code by using them directly rather than + * using the generic single-entry routines. + */ + +struct list_head { + struct list_head *next, *prev; +}; + +#define LIST_HEAD_INIT(name) { &(name), &(name) } + +#define LIST_HEAD(name) \ + struct list_head name = LIST_HEAD_INIT(name) + +#define INIT_LIST_HEAD(ptr) do { \ + (ptr)->next = (ptr); (ptr)->prev = (ptr); \ +} while (0) + +/* + * Insert a new entry between two known consecutive entries. + * + * This is only for internal list manipulation where we know + * the prev/next entries already! + */ +static inline void __list_add(struct list_head *new, + struct list_head *prev, + struct list_head *next) +{ + next->prev = new; + new->next = next; + new->prev = prev; + prev->next = new; +} + +/** + * list_add - add a new entry + * @new: new entry to be added + * @head: list head to add it after + * + * Insert a new entry after the specified head. + * This is good for implementing stacks. + */ +static inline void list_add(struct list_head *new, struct list_head *head) +{ + __list_add(new, head, head->next); +} + +/** + * list_add_tail - add a new entry + * @new: new entry to be added + * @head: list head to add it before + * + * Insert a new entry before the specified head. + * This is useful for implementing queues. + */ +static inline void list_add_tail(struct list_head *new, struct list_head *head) +{ + __list_add(new, head->prev, head); +} + +/* + * Delete a list entry by making the prev/next entries + * point to each other. + * + * This is only for internal list manipulation where we know + * the prev/next entries already! + */ +static inline void __list_del(struct list_head * prev, struct list_head * next) +{ + next->prev = prev; + prev->next = next; +} + +/** + * list_del - deletes entry from list. + * @entry: the element to delete from the list. + * Note: list_empty on entry does not return true after this, the entry is + * in an undefined state. + */ +static inline void list_del(struct list_head *entry) +{ + __list_del(entry->prev, entry->next); + entry->next = LIST_POISON1; + entry->prev = LIST_POISON2; +} + +/** + * list_del_init - deletes entry from list and reinitialize it. + * @entry: the element to delete from the list. + */ +static inline void list_del_init(struct list_head *entry) +{ + __list_del(entry->prev, entry->next); + INIT_LIST_HEAD(entry); +} + +/** + * list_move - delete from one list and add as another's head + * @list: the entry to move + * @head: the head that will precede our entry + */ +static inline void list_move(struct list_head *list, struct list_head *head) +{ + __list_del(list->prev, list->next); + list_add(list, head); +} + +/** + * list_move_tail - delete from one list and add as another's tail + * @list: the entry to move + * @head: the head that will follow our entry + */ +static inline void list_move_tail(struct list_head *list, + struct list_head *head) +{ + __list_del(list->prev, list->next); + list_add_tail(list, head); +} + +/** + * list_empty - tests whether a list is empty + * @head: the list to test. + */ +static inline int list_empty(struct list_head *head) +{ + return head->next == head; +} + +static inline void __list_splice(const struct list_head *list, + struct list_head *prev, + struct list_head *next) +{ + struct list_head *first = list->next; + struct list_head *last = list->prev; + + first->prev = prev; + prev->next = first; + + last->next = next; + next->prev = last; +} + +/** + * list_splice - join two lists + * @list: the new list to add. + * @head: the place to add it in the first list. + */ +static inline void list_splice(struct list_head *list, struct list_head *head) +{ + if (!list_empty(list)) + __list_splice(list, head, head->next); +} + +/** + * list_splice_tail - join two lists, each list being a queue + * @list: the new list to add. + * @head: the place to add it in the first list. + */ +static inline void list_splice_tail(struct list_head *list, + struct list_head *head) +{ + if (!list_empty(list)) + __list_splice(list, head->prev, head); +} + +/** + * list_splice_init - join two lists and reinitialise the emptied list. + * @list: the new list to add. + * @head: the place to add it in the first list. + * + * The list at @list is reinitialised + */ +static inline void list_splice_init(struct list_head *list, + struct list_head *head) +{ + if (!list_empty(list)) { + __list_splice(list, head, head->next); + INIT_LIST_HEAD(list); + } +} + +/** + * list_splice_tail_init - join two lists and reinitialise the emptied list + * @list: the new list to add. + * @head: the place to add it in the first list. + * + * Each of the lists is a queue. + * The list at @list is reinitialised + */ +static inline void list_splice_tail_init(struct list_head *list, + struct list_head *head) +{ + if (!list_empty(list)) { + __list_splice(list, head->prev, head); + INIT_LIST_HEAD(list); + } +} + +/** + * list_entry - get the struct for this entry + * @ptr: the &struct list_head pointer. + * @type: the type of the struct this is embedded in. + * @member: the name of the list_struct within the struct. + */ +#define list_entry(ptr, type, member) \ + container_of(ptr, type, member) + +/** + * list_for_each - iterate over a list + * @pos: the &struct list_head to use as a loop counter. + * @head: the head for your list. + */ +#define list_for_each(pos, head) \ + for (pos = (head)->next; pos != (head); \ + pos = pos->next) + +/** + * __list_for_each - iterate over a list + * @pos: the &struct list_head to use as a loop counter. + * @head: the head for your list. + * + * This variant differs from list_for_each() in that it's the + * simplest possible list iteration code. + * Use this for code that knows the list to be very short (empty + * or 1 entry) most of the time. + */ +#define __list_for_each(pos, head) \ + for (pos = (head)->next; pos != (head); pos = pos->next) + +/** + * list_for_each_prev - iterate over a list backwards + * @pos: the &struct list_head to use as a loop counter. + * @head: the head for your list. + */ +#define list_for_each_prev(pos, head) \ + for (pos = (head)->prev; pos != (head); pos = pos->prev) + +/** + * list_for_each_safe - iterate over a list safe against removal of list entry + * @pos: the &struct list_head to use as a loop counter. + * @n: another &struct list_head to use as temporary storage + * @head: the head for your list. + */ +#define list_for_each_safe(pos, n, head) \ + for (pos = (head)->next, n = pos->next; pos != (head); \ + pos = n, n = pos->next) + +/** + * list_for_each_entry - iterate over list of given type + * @pos: the type * to use as a loop counter. + * @head: the head for your list. + * @member: the name of the list_struct within the struct. + */ +#define list_for_each_entry(pos, head, member) \ + for (pos = list_entry((head)->next, typeof(*pos), member); \ + &pos->member != (head); \ + pos = list_entry(pos->member.next, typeof(*pos), member)) + +/** + * list_for_each_entry_reverse - iterate backwards over list of given type. + * @pos: the type * to use as a loop counter. + * @head: the head for your list. + * @member: the name of the list_struct within the struct. + */ +#define list_for_each_entry_reverse(pos, head, member) \ + for (pos = list_entry((head)->prev, typeof(*pos), member); \ + &pos->member != (head); \ + pos = list_entry(pos->member.prev, typeof(*pos), member)) + +/** + * list_for_each_entry_safe - iterate over list of given type safe against removal of list entry + * @pos: the type * to use as a loop counter. + * @n: another type * to use as temporary storage + * @head: the head for your list. + * @member: the name of the list_struct within the struct. + */ +#define list_for_each_entry_safe(pos, n, head, member) \ + for (pos = list_entry((head)->next, typeof(*pos), member), \ + n = list_entry(pos->member.next, typeof(*pos), member); \ + &pos->member != (head); \ + pos = n, n = list_entry(n->member.next, typeof(*n), member)) + +/** + * list_for_each_entry_reverse_safe - iterate backwards over list of given type safe against removal of list entry + * @pos: the type * to use as a loop counter. + * @n: another type * to use as temporary storage + * @head: the head for your list. + * @member: the name of the list_struct within the struct. + */ +#define list_for_each_entry_reverse_safe(pos, n, head, member) \ + for (pos = list_entry((head)->prev, typeof(*pos), member), \ + n = list_entry(pos->member.prev, typeof(*pos), member);\ + &pos->member != (head); \ + pos = n, n = list_entry(n->member.prev, typeof(*n), member)) + +/** + * list_for_some_entry_safe - iterate list from the given begin node to the given end node safe against removal of list entry + * @pos: the type * to use as a loop counter. + * @n: another type * to use as temporary storage + * @from: the begin node of the iteration. + * @to: the end node of the iteration. + * @member: the name of the list_struct within the struct. + */ +#define list_for_some_entry_safe(pos, n, from, to, member) \ + for (pos = list_entry((from)->next, typeof(*pos), member), \ + n = list_entry(pos->member.next, typeof(*pos), member); \ + &pos->member != (to); \ + pos = n, n = list_entry(n->member.next, typeof(*n), member)) + +/** + * list_for_some_entry_reverse_safe - iterate backwards list from the given begin node to the given end node safe against removal of list entry + * @pos: the type * to use as a loop counter. + * @n: another type * to use as temporary storage + * @from: the begin node of the iteration. + * @to: the end node of the iteration. + * @member: the name of the list_struct within the struct. + */ +#define list_for_some_entry_reverse_safe(pos, n, from, to, member) \ + for (pos = list_entry((from)->prev, typeof(*pos), member), \ + n = list_entry(pos->member.prev, typeof(*pos), member); \ + &pos->member != (to); \ + pos = n, n = list_entry(n->member.prev, typeof(*n), member)) + +#endif /* _LIST_H */ diff --git a/libmultipath/lock.c b/libmultipath/lock.c new file mode 100644 index 0000000..72c70e3 --- /dev/null +++ b/libmultipath/lock.c @@ -0,0 +1,8 @@ +#include "lock.h" + +void cleanup_lock (void * data) +{ + struct mutex_lock *lock = data; + + unlock(lock); +} diff --git a/libmultipath/lock.h b/libmultipath/lock.h new file mode 100644 index 0000000..a170efe --- /dev/null +++ b/libmultipath/lock.h @@ -0,0 +1,29 @@ +#ifndef _LOCK_H +#define _LOCK_H + +#include + +struct mutex_lock { + pthread_mutex_t mutex; +}; + +static inline void lock(struct mutex_lock *a) +{ + pthread_mutex_lock(&a->mutex); +} + +static inline int timedlock(struct mutex_lock *a, struct timespec *tmo) +{ + return pthread_mutex_timedlock(&a->mutex, tmo); +} + +static inline void unlock(struct mutex_lock *a) +{ + pthread_mutex_unlock(&a->mutex); +} + +#define lock_cleanup_pop(a) pthread_cleanup_pop(1) + +void cleanup_lock (void * data); + +#endif /* _LOCK_H */ diff --git a/libmultipath/log.c b/libmultipath/log.c new file mode 100644 index 0000000..debd36d --- /dev/null +++ b/libmultipath/log.c @@ -0,0 +1,205 @@ +/* + * Copyright (c) 2005 Christophe Varoqui + * Copyright (c) 2005 Benjamin Marzinski, Redhat + * Copyright (c) 2005 Jun'ichi Nomura, NEC + */ +#include +#include +#include +#include +#include +#include + +#include "memory.h" +#include "log.h" + +#define ALIGN(len, s) (((len)+(s)-1)/(s)*(s)) + +struct logarea* la; + +#if LOGDBG +static void dump_logarea (void) +{ + struct logmsg * msg; + + logdbg(stderr, "\n==== area: start addr = %p, end addr = %p ====\n", + la->start, la->end); + logdbg(stderr, "|addr |next |prio|msg\n"); + + for (msg = (struct logmsg *)la->head; (void *)msg != la->tail; + msg = msg->next) + logdbg(stderr, "|%p |%p |%i |%s\n", (void *)msg, msg->next, + msg->prio, (char *)&msg->str); + + logdbg(stderr, "|%p |%p |%i |%s\n", (void *)msg, msg->next, + msg->prio, (char *)&msg->str); + + logdbg(stderr, "\n\n"); +} +#endif + +static int logarea_init (int size) +{ + logdbg(stderr,"enter logarea_init\n"); + la = (struct logarea *)MALLOC(sizeof(struct logarea)); + + if (!la) + return 1; + + if (size < MAX_MSG_SIZE) + size = DEFAULT_AREA_SIZE; + + la->start = MALLOC(size); + if (!la->start) { + FREE(la); + return 1; + } + memset(la->start, 0, size); + + la->empty = 1; + la->end = la->start + size; + la->head = la->start; + la->tail = la->start; + + la->buff = MALLOC(MAX_MSG_SIZE + sizeof(struct logmsg)); + + if (!la->buff) { + FREE(la->start); + FREE(la); + return 1; + } + return 0; + +} + +int log_init(char *program_name, int size) +{ + logdbg(stderr,"enter log_init\n"); + openlog(program_name, 0, LOG_DAEMON); + + if (logarea_init(size)) + return 1; + + return 0; +} + +void free_logarea (void) +{ + FREE(la->start); + FREE(la->buff); + FREE(la); + return; +} + +void log_close (void) +{ + free_logarea(); + closelog(); + + return; +} + +void log_reset (char *program_name) +{ + closelog(); + tzset(); + openlog(program_name, 0, LOG_DAEMON); +} + +int log_enqueue (int prio, const char * fmt, va_list ap) +{ + int len, fwd; + char buff[MAX_MSG_SIZE]; + struct logmsg * msg; + struct logmsg * lastmsg; + + lastmsg = (struct logmsg *)la->tail; + + if (!la->empty) { + fwd = sizeof(struct logmsg) + + strlen((char *)&lastmsg->str) * sizeof(char) + 1; + la->tail += ALIGN(fwd, sizeof(void *)); + } + vsnprintf(buff, MAX_MSG_SIZE, fmt, ap); + len = ALIGN(sizeof(struct logmsg) + strlen(buff) * sizeof(char) + 1, + sizeof(void *)); + + /* not enough space on tail : rewind */ + if (la->head <= la->tail && len > (la->end - la->tail)) { + logdbg(stderr, "enqueue: rewind tail to %p\n", la->tail); + if (la->head == la->start ) { + logdbg(stderr, "enqueue: can not rewind tail, drop msg\n"); + la->tail = lastmsg; + return 1; /* can't reuse */ + } + la->tail = la->start; + + if (la->empty) + la->head = la->start; + } + + /* not enough space on head : drop msg */ + if (la->head > la->tail && len >= (la->head - la->tail)) { + logdbg(stderr, "enqueue: log area overrun, drop msg\n"); + + if (!la->empty) + la->tail = lastmsg; + + return 1; + } + + /* ok, we can stage the msg in the area */ + la->empty = 0; + msg = (struct logmsg *)la->tail; + msg->prio = prio; + memcpy((void *)&msg->str, buff, strlen(buff) + 1); + lastmsg->next = la->tail; + msg->next = la->head; + + logdbg(stderr, "enqueue: %p, %p, %i, %s\n", (void *)msg, msg->next, + msg->prio, (char *)&msg->str); + +#if LOGDBG + dump_logarea(); +#endif + return 0; +} + +int log_dequeue (void * buff) +{ + struct logmsg * src = (struct logmsg *)la->head; + struct logmsg * dst = (struct logmsg *)buff; + struct logmsg * lst = (struct logmsg *)la->tail; + + if (la->empty) + return 1; + + int len = strlen((char *)&src->str) * sizeof(char) + + sizeof(struct logmsg) + 1; + + dst->prio = src->prio; + memcpy(dst, src, len); + + if (la->tail == la->head) + la->empty = 1; /* we purge the last logmsg */ + else { + la->head = src->next; + lst->next = la->head; + } + logdbg(stderr, "dequeue: %p, %p, %i, %s\n", + (void *)src, src->next, src->prio, (char *)&src->str); + + memset((void *)src, 0, len); + + return 0; +} + +/* + * this one can block under memory pressure + */ +void log_syslog (void * buff) +{ + struct logmsg * msg = (struct logmsg *)buff; + + syslog(msg->prio, "%s", (char *)&msg->str); +} diff --git a/libmultipath/log.h b/libmultipath/log.h new file mode 100644 index 0000000..d2448f6 --- /dev/null +++ b/libmultipath/log.h @@ -0,0 +1,44 @@ +#ifndef LOG_H +#define LOG_H + +#define DEFAULT_AREA_SIZE 16384 +#define MAX_MSG_SIZE 256 + +#ifndef LOGLEVEL +#define LOGLEVEL 5 +#endif + +#if LOGDBG +#define logdbg(file, fmt, args...) fprintf(file, fmt, ##args) +#else +#define logdbg(file, fmt, args...) do {} while (0) +#endif + +struct logmsg { + short int prio; + void * next; + char str[0]; +}; + +struct logarea { + int empty; + void * head; + void * tail; + void * start; + void * end; + char * buff; +}; + +extern struct logarea* la; + +int log_init (char * progname, int size); +void log_close (void); +void log_reset (char * progname); +int log_enqueue (int prio, const char * fmt, va_list ap) + __attribute__((format(printf, 2, 0))); +int log_dequeue (void *); +void log_syslog (void *); +void dump_logmsg (void *); +void free_logarea (void); + +#endif /* LOG_H */ diff --git a/libmultipath/log_pthread.c b/libmultipath/log_pthread.c new file mode 100644 index 0000000..15baef8 --- /dev/null +++ b/libmultipath/log_pthread.c @@ -0,0 +1,135 @@ +/* + * Copyright (c) 2005 Christophe Varoqui + */ +#include +#include +#include +#include +#include +#include + +#include "memory.h" + +#include "log_pthread.h" +#include "log.h" +#include "lock.h" + +static pthread_t log_thr; + +static pthread_mutex_t logq_lock; +static pthread_mutex_t logev_lock; +static pthread_cond_t logev_cond; + +static int logq_running; +static int log_messages_pending; + +void log_safe (int prio, const char * fmt, va_list ap) +{ + if (prio > LOG_DEBUG) + prio = LOG_DEBUG; + + if (log_thr == (pthread_t)0) { + vsyslog(prio, fmt, ap); + return; + } + + pthread_mutex_lock(&logq_lock); + log_enqueue(prio, fmt, ap); + pthread_mutex_unlock(&logq_lock); + + pthread_mutex_lock(&logev_lock); + log_messages_pending = 1; + pthread_cond_signal(&logev_cond); + pthread_mutex_unlock(&logev_lock); +} + +static void flush_logqueue (void) +{ + int empty; + + do { + pthread_mutex_lock(&logq_lock); + empty = log_dequeue(la->buff); + pthread_mutex_unlock(&logq_lock); + if (!empty) + log_syslog(la->buff); + } while (empty == 0); +} + +static void * log_thread (__attribute__((unused)) void * et) +{ + int running; + + pthread_mutex_lock(&logev_lock); + logq_running = 1; + pthread_mutex_unlock(&logev_lock); + + mlockall(MCL_CURRENT | MCL_FUTURE); + logdbg(stderr,"enter log_thread\n"); + + while (1) { + pthread_mutex_lock(&logev_lock); + if (logq_running && !log_messages_pending) + pthread_cond_wait(&logev_cond, &logev_lock); + log_messages_pending = 0; + running = logq_running; + pthread_mutex_unlock(&logev_lock); + if (!running) + break; + flush_logqueue(); + } + return NULL; +} + +void log_thread_start (pthread_attr_t *attr) +{ + logdbg(stderr,"enter log_thread_start\n"); + + pthread_mutex_init(&logq_lock, NULL); + pthread_mutex_init(&logev_lock, NULL); + pthread_cond_init(&logev_cond, NULL); + + if (log_init("multipathd", 0)) { + fprintf(stderr,"can't initialize log buffer\n"); + exit(1); + } + if (pthread_create(&log_thr, attr, log_thread, NULL)) { + fprintf(stderr,"can't start log thread\n"); + exit(1); + } + + return; +} + +void log_thread_reset (void) +{ + logdbg(stderr,"resetting log\n"); + + pthread_mutex_lock(&logq_lock); + log_reset("multipathd"); + pthread_mutex_unlock(&logq_lock); +} + +void log_thread_stop (void) +{ + logdbg(stderr,"enter log_thread_stop\n"); + + pthread_mutex_lock(&logev_lock); + logq_running = 0; + pthread_cond_signal(&logev_cond); + pthread_mutex_unlock(&logev_lock); + + pthread_mutex_lock(&logq_lock); + pthread_cancel(log_thr); + pthread_mutex_unlock(&logq_lock); + pthread_join(log_thr, NULL); + log_thr = (pthread_t)0; + + flush_logqueue(); + + pthread_mutex_destroy(&logq_lock); + pthread_mutex_destroy(&logev_lock); + pthread_cond_destroy(&logev_cond); + + log_close(); +} diff --git a/libmultipath/log_pthread.h b/libmultipath/log_pthread.h new file mode 100644 index 0000000..810ac92 --- /dev/null +++ b/libmultipath/log_pthread.h @@ -0,0 +1,12 @@ +#ifndef _LOG_PTHREAD_H +#define _LOG_PTHREAD_H + +#include + +void log_safe(int prio, const char * fmt, va_list ap) + __attribute__((format(printf, 2, 0))); +void log_thread_start(pthread_attr_t *attr); +void log_thread_reset (void); +void log_thread_stop(void); + +#endif /* _LOG_PTHREAD_H */ diff --git a/libmultipath/memory.c b/libmultipath/memory.c new file mode 100644 index 0000000..7514642 --- /dev/null +++ b/libmultipath/memory.c @@ -0,0 +1,444 @@ +/* + * Part: Memory management framework. This framework is used to + * find any memory leak. + * + * Version: $Id: memory.c,v 1.1.11 2005/03/01 01:22:13 acassen Exp $ + * + * Authors: Alexandre Cassen, + * Jan Holmberg, + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * See the GNU General Public License for more details. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + * Copyright (C) 2001-2005 Alexandre Cassen, + */ + +#include +#include "memory.h" + +/* + * Memory management. in debug mode, + * help finding eventual memory leak. + * Allocation memory types manipulated are : + * + * +type+--------meaning--------+ + * ! 0 ! Free slot ! + * ! 1 ! Overrun ! + * ! 2 ! free null ! + * ! 3 ! realloc null ! + * ! 4 ! Not previus allocated ! + * ! 8 ! Last free list ! + * ! 9 ! Allocated ! + * +----+-----------------------+ + * + * global variable debug bit 9 ( 512 ) used to + * flag some memory error. + * + */ + +#ifdef _DEBUG_ + +typedef struct { + int type; + int line; + char *func; + char *file; + void *ptr; + unsigned long size; + long csum; +} MEMCHECK; + +/* Last free pointers */ +static MEMCHECK free_list[256]; + +static MEMCHECK alloc_list[MAX_ALLOC_LIST]; +static int number_alloc_list = 0; +static int n = 0; /* Alloc list pointer */ +static int f = 0; /* Free list pointer */ + +void * +dbg_malloc(unsigned long size, char *file, char *function, int line) +{ + void *buf; + int i = 0; + long check; + + buf = zalloc(size + sizeof (long)); + + check = 0xa5a5 + size; + *(long *) ((char *) buf + size) = check; + + while (i < number_alloc_list) { + if (alloc_list[i].type == 0) + break; + i++; + } + + if (i == number_alloc_list) + number_alloc_list++; + + assert(number_alloc_list < MAX_ALLOC_LIST); + + alloc_list[i].ptr = buf; + alloc_list[i].size = size; + alloc_list[i].file = file; + alloc_list[i].func = function; + alloc_list[i].line = line; + alloc_list[i].csum = check; + alloc_list[i].type = 9; + + if (debug & 1) + printf("zalloc[%3d:%3d], %p, %4ld at %s, %3d, %s\n", + i, number_alloc_list, buf, size, file, line, + function); + + n++; + return buf; +} + +char * +dbg_strdup(char *str, char *file, char *function, int line) +{ + void *buf; + int i = 0; + long check; + long size; + + size = strlen(str) + 1; + buf = zalloc(size + sizeof (long)); + strcat(buf, str); + + check = 0xa5a5 + size; + *(long *) ((char *) buf + size) = check; + + while (i < number_alloc_list) { + if (alloc_list[i].type == 0) + break; + i++; + } + + if (i == number_alloc_list) + number_alloc_list++; + + assert(number_alloc_list < MAX_ALLOC_LIST); + + alloc_list[i].ptr = buf; + alloc_list[i].size = size; + alloc_list[i].file = file; + alloc_list[i].func = function; + alloc_list[i].line = line; + alloc_list[i].csum = check; + alloc_list[i].type = 9; + + if (debug & 1) + printf("strdup[%3d:%3d], %p, %4ld at %s, %3d, %s\n", + i, number_alloc_list, buf, size, file, line, + function); + + n++; + return buf; +} + + + +/* Display a buffer into a HEXA formatted output */ +static void +dump_buffer(char *buff, int count) +{ + int i, j, c; + int printnext = 1; + + if (count % 16) + c = count + (16 - count % 16); + else + c = count; + + for (i = 0; i < c; i++) { + if (printnext) { + printnext--; + printf("%.4x ", i & 0xffff); + } + if (i < count) + printf("%3.2x", buff[i] & 0xff); + else + printf(" "); + if (!((i + 1) % 8)) { + if ((i + 1) % 16) + printf(" -"); + else { + printf(" "); + for (j = i - 15; j <= i; j++) + if (j < count) { + if ((buff[j] & 0xff) >= 0x20 + && (buff[j] & 0xff) <= 0x7e) + printf("%c", + buff[j] & 0xff); + else + printf("."); + } else + printf(" "); + printf("\n"); + printnext = 1; + } + } + } +} + +int +dbg_free(void *buffer, char *file, char *function, int line) +{ + int i = 0; + void *buf; + + /* If nullpointer remember */ + if (buffer == NULL) { + i = number_alloc_list++; + + assert(number_alloc_list < MAX_ALLOC_LIST); + + alloc_list[i].ptr = buffer; + alloc_list[i].size = 0; + alloc_list[i].file = file; + alloc_list[i].func = function; + alloc_list[i].line = line; + alloc_list[i].type = 2; + if (debug & 1) + printf("free NULL in %s, %3d, %s\n", file, + line, function); + + debug |= 512; /* Memory Error detect */ + + return n; + } else + buf = buffer; + + while (i < number_alloc_list) { + if (alloc_list[i].type == 9 && alloc_list[i].ptr == buf) { + if (* + ((long *) ((char *) alloc_list[i].ptr + + alloc_list[i].size)) == + alloc_list[i].csum) + alloc_list[i].type = 0; /* Release */ + else { + alloc_list[i].type = 1; /* Overrun */ + if (debug & 1) { + printf("free corrupt, buffer overrun [%3d:%3d], %p, %4ld at %s, %3d, %s\n", + i, number_alloc_list, + buf, alloc_list[i].size, file, + line, function); + dump_buffer(alloc_list[i].ptr, + alloc_list[i].size + sizeof (long)); + printf("Check_sum\n"); + dump_buffer((char *) &alloc_list[i].csum, + sizeof(long)); + + debug |= 512; /* Memory Error detect */ + } + } + break; + } + i++; + } + + /* Not found */ + if (i == number_alloc_list) { + printf("Free ERROR %p\n", buffer); + number_alloc_list++; + + assert(number_alloc_list < MAX_ALLOC_LIST); + + alloc_list[i].ptr = buf; + alloc_list[i].size = 0; + alloc_list[i].file = file; + alloc_list[i].func = function; + alloc_list[i].line = line; + alloc_list[i].type = 4; + debug |= 512; + + return n; + } + + if (buffer != NULL) + xfree(buffer); + + if (debug & 1) + printf("free [%3d:%3d], %p, %4ld at %s, %3d, %s\n", + i, number_alloc_list, buf, + alloc_list[i].size, file, line, function); + + free_list[f].file = file; + free_list[f].line = line; + free_list[f].func = function; + free_list[f].ptr = buffer; + free_list[f].type = 8; + free_list[f].csum = i; /* Using this field for row id */ + + f++; + f &= 255; + n--; + + return n; +} + +void +dbg_free_final(char *banner) +{ + unsigned int sum = 0, overrun = 0, badptr = 0; + int i, j; + i = 0; + + printf("\n---[ Memory dump for (%s)]---\n\n", banner); + + while (i < number_alloc_list) { + switch (alloc_list[i].type) { + case 3: + badptr++; + printf + ("null pointer to realloc(nil,%ld)! at %s, %3d, %s\n", + alloc_list[i].size, alloc_list[i].file, + alloc_list[i].line, alloc_list[i].func); + break; + case 4: + badptr++; + printf + ("pointer not found in table to free(%p) [%3d:%3d], at %s, %3d, %s\n", + alloc_list[i].ptr, i, number_alloc_list, + alloc_list[i].file, alloc_list[i].line, + alloc_list[i].func); + for (j = 0; j < 256; j++) + if (free_list[j].ptr == alloc_list[i].ptr) + if (free_list[j].type == 8) + printf + (" -> pointer already released at [%3d:%3d], at %s, %3d, %s\n", + (int) free_list[j].csum, + number_alloc_list, + free_list[j].file, + free_list[j].line, + free_list[j].func); + break; + case 2: + badptr++; + printf("null pointer to free(nil)! at %s, %3d, %s\n", + alloc_list[i].file, alloc_list[i].line, + alloc_list[i].func); + break; + case 1: + overrun++; + printf("%p [%3d:%3d], %4ld buffer overrun!:\n", + alloc_list[i].ptr, i, number_alloc_list, + alloc_list[i].size); + printf(" --> source of malloc: %s, %3d, %s\n", + alloc_list[i].file, alloc_list[i].line, + alloc_list[i].func); + break; + case 9: + sum += alloc_list[i].size; + printf("%p [%3d:%3d], %4ld not released!:\n", + alloc_list[i].ptr, i, number_alloc_list, + alloc_list[i].size); + printf(" --> source of malloc: %s, %3d, %s\n", + alloc_list[i].file, alloc_list[i].line, + alloc_list[i].func); + break; + } + i++; + } + + printf("\n\n---[ Memory dump summary for (%s) ]---\n", banner); + printf("Total number of bytes not freed...: %d\n", sum); + printf("Number of entries not freed.......: %d\n", n); + printf("Maximum allocated entries.........: %d\n", number_alloc_list); + printf("Number of bad entries.............: %d\n", badptr); + printf("Number of buffer overrun..........: %d\n\n", overrun); + + if (sum || n || badptr || overrun) + printf("=> Program seems to have some memory problem !!!\n\n"); + else + printf("=> Program seems to be memory allocation safe...\n\n"); +} + +void * +dbg_realloc(void *buffer, unsigned long size, char *file, char *function, + int line) +{ + int i = 0; + void *buf, *buf2; + long check; + + if (buffer == NULL) { + printf("realloc %p %s, %3d %s\n", buffer, file, line, function); + i = number_alloc_list++; + + assert(number_alloc_list < MAX_ALLOC_LIST); + + alloc_list[i].ptr = NULL; + alloc_list[i].size = 0; + alloc_list[i].file = file; + alloc_list[i].func = function; + alloc_list[i].line = line; + alloc_list[i].type = 3; + return dbg_malloc(size, file, function, line); + } + + buf = buffer; + + while (i < number_alloc_list) { + if (alloc_list[i].ptr == buf) { + buf = alloc_list[i].ptr; + break; + } + i++; + } + + /* not found */ + if (i == number_alloc_list) { + printf("realloc ERROR no matching zalloc %p \n", buffer); + number_alloc_list++; + + assert(number_alloc_list < MAX_ALLOC_LIST); + + alloc_list[i].ptr = buf; + alloc_list[i].size = 0; + alloc_list[i].file = file; + alloc_list[i].func = function; + alloc_list[i].line = line; + alloc_list[i].type = 9; + debug |= 512; /* Memory Error detect */ + return NULL; + } + + buf2 = ((char *) buf) + alloc_list[i].size; + + if (*(long *) (buf2) != alloc_list[i].csum) { + alloc_list[i].type = 1; + debug |= 512; /* Memory Error detect */ + } + buf = realloc(buffer, size + sizeof (long)); + + check = 0xa5a5 + size; + *(long *) ((char *) buf + size) = check; + alloc_list[i].csum = check; + + if (debug & 1) + printf("realloc [%3d:%3d] %p, %4ld %s %d %s -> %p %4ld %s %d %s\n", + i, number_alloc_list, alloc_list[i].ptr, + alloc_list[i].size, alloc_list[i].file, alloc_list[i].line, alloc_list[i].func, + buf, size, file, line, function); + + alloc_list[i].ptr = buf; + alloc_list[i].size = size; + alloc_list[i].file = file; + alloc_list[i].line = line; + alloc_list[i].func = function; + + return buf; +} + +#endif diff --git a/libmultipath/memory.h b/libmultipath/memory.h new file mode 100644 index 0000000..a3c478e --- /dev/null +++ b/libmultipath/memory.h @@ -0,0 +1,66 @@ +/* + * Part: memory.c include file. + * + * Version: $Id: memory.h,v 1.1.11 2005/03/01 01:22:13 acassen Exp $ + * + * Authors: Alexandre Cassen, + * Jan Holmberg, + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * See the GNU General Public License for more details. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + * Copyright (C) 2001-2005 Alexandre Cassen, + */ + +#ifndef _MEMORY_H +#define _MEMORY_H + +/* system includes */ +#include +#include +#include +#include + +/* Local defines */ +#ifdef _DEBUG_ + +int debug; + +#define MAX_ALLOC_LIST 2048 + +#define MALLOC(n) ( dbg_malloc((n), \ + (__FILE__), (char *)(__FUNCTION__), (__LINE__)) ) +#define FREE(b) ( dbg_free((b), \ + (__FILE__), (char *)(__FUNCTION__), (__LINE__)) ) +#define REALLOC(b,n) ( dbg_realloc((b), (n), \ + (__FILE__), (char *)(__FUNCTION__), (__LINE__)) ) +#define STRDUP(n) ( dbg_strdup((n), \ + (__FILE__), (char *)(__FUNCTION__), (__LINE__)) ) + +/* Memory debug prototypes defs */ +extern void *dbg_malloc(unsigned long, char *, char *, int); +extern int dbg_free(void *, char *, char *, int); +extern void *dbg_realloc(void *, unsigned long, char *, char *, int); +extern char *dbg_strdup(char *, char *, char *, int); +extern void dbg_free_final(char *); + +#else + +#define MALLOC(n) (calloc(1,(n))) +#define FREE(p) do { free(p); p = NULL; } while(0) +#define REALLOC(p,n) (realloc((p),(n))) +#define STRDUP(n) (strdup(n)) + +#endif + +/* Common defines */ +#define FREE_PTR(P) if((P)) FREE((P)); + +#endif diff --git a/libmultipath/nvme-lib.c b/libmultipath/nvme-lib.c new file mode 100644 index 0000000..f30e769 --- /dev/null +++ b/libmultipath/nvme-lib.c @@ -0,0 +1,49 @@ +#include +/* avoid inclusion of standard API */ +#define _NVME_LIB_C 1 +#include "nvme-lib.h" +#include "nvme-ioctl.c" +#include "debug.h" + +int log_nvme_errcode(int err, const char *dev, const char *msg) +{ + if (err > 0) + condlog(3, "%s: %s: NVMe status %d", dev, msg, err); + else if (err < 0) + condlog(3, "%s: %s: %s", dev, msg, strerror(errno)); + return err; +} + +int libmp_nvme_get_nsid(int fd) +{ + return nvme_get_nsid(fd); +} + +int libmp_nvme_identify_ctrl(int fd, struct nvme_id_ctrl *ctrl) +{ + return nvme_identify_ctrl(fd, ctrl); +} + +int libmp_nvme_identify_ns(int fd, __u32 nsid, bool present, + struct nvme_id_ns *ns) +{ + return nvme_identify_ns(fd, nsid, present, ns); +} + +int libmp_nvme_ana_log(int fd, void *ana_log, size_t ana_log_len, int rgo) +{ + return nvme_ana_log(fd, ana_log, ana_log_len, rgo); +} + +int nvme_id_ctrl_ana(int fd, struct nvme_id_ctrl *ctrl) +{ + int rc; + struct nvme_id_ctrl c; + + rc = nvme_identify_ctrl(fd, &c); + if (rc < 0) + return rc; + if (ctrl) + *ctrl = c; + return c.cmic & (1 << 3) ? 1 : 0; +} diff --git a/libmultipath/nvme-lib.h b/libmultipath/nvme-lib.h new file mode 100644 index 0000000..448dd99 --- /dev/null +++ b/libmultipath/nvme-lib.h @@ -0,0 +1,39 @@ +#ifndef NVME_LIB_H +#define NVME_LIB_H + +#include "nvme.h" + +int log_nvme_errcode(int err, const char *dev, const char *msg); +int libmp_nvme_get_nsid(int fd); +int libmp_nvme_identify_ctrl(int fd, struct nvme_id_ctrl *ctrl); +int libmp_nvme_identify_ns(int fd, __u32 nsid, bool present, + struct nvme_id_ns *ns); +int libmp_nvme_ana_log(int fd, void *ana_log, size_t ana_log_len, int rgo); +/* + * Identify controller, and return true if ANA is supported + * ctrl will be filled in if controller is identified, even w/o ANA + * ctrl may be NULL + */ +int nvme_id_ctrl_ana(int fd, struct nvme_id_ctrl *ctrl); + +#ifndef _NVME_LIB_C +/* + * In all files except nvme-lib.c, the nvme functions can be called + * by their usual name. + */ +#define nvme_get_nsid libmp_nvme_get_nsid +#define nvme_identify_ctrl libmp_nvme_identify_ctrl +#define nvme_identify_ns libmp_nvme_identify_ns +#define nvme_ana_log libmp_nvme_ana_log +/* + * Undefine these to avoid clashes with libmultipath's byteorder.h + */ +#undef cpu_to_le16 +#undef cpu_to_le32 +#undef cpu_to_le64 +#undef le16_to_cpu +#undef le32_to_cpu +#undef le64_to_cpu +#endif + +#endif /* NVME_LIB_H */ diff --git a/libmultipath/nvme/argconfig.h b/libmultipath/nvme/argconfig.h new file mode 100644 index 0000000..adb192b --- /dev/null +++ b/libmultipath/nvme/argconfig.h @@ -0,0 +1,99 @@ +//////////////////////////////////////////////////////////////////////// +// +// Copyright 2014 PMC-Sierra, Inc. +// +// This program is free software; you can redistribute it and/or +// modify it under the terms of the GNU General Public License +// as published by the Free Software Foundation; either version 2 +// of the License, or (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program; if not, write to the Free Software +// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. +// +//////////////////////////////////////////////////////////////////////// + +//////////////////////////////////////////////////////////////////////// +// +// Author: Logan Gunthorpe +// Logan Gunthorpe +// +// Date: Oct 23 2014 +// +// Description: +// Header file for argconfig.c +// +//////////////////////////////////////////////////////////////////////// + +#ifndef argconfig_H +#define argconfig_H + +#include +#include +#include + +enum argconfig_types { + CFG_NONE, + CFG_STRING, + CFG_INT, + CFG_SIZE, + CFG_LONG, + CFG_LONG_SUFFIX, + CFG_DOUBLE, + CFG_BOOL, + CFG_BYTE, + CFG_SHORT, + CFG_POSITIVE, + CFG_INCREMENT, + CFG_SUBOPTS, + CFG_FILE_A, + CFG_FILE_W, + CFG_FILE_R, + CFG_FILE_AP, + CFG_FILE_WP, + CFG_FILE_RP, +}; + +struct argconfig_commandline_options { + const char *option; + const char short_option; + const char *meta; + enum argconfig_types config_type; + void *default_value; + int argument_type; + const char *help; +}; + +#define CFG_MAX_SUBOPTS 500 +#define MAX_HELP_FUNC 20 + +#ifdef __cplusplus +extern "C" { +#endif + +typedef void argconfig_help_func(); +void argconfig_append_usage(const char *str); +void argconfig_print_help(const char *program_desc, + const struct argconfig_commandline_options *options); +int argconfig_parse(int argc, char *argv[], const char *program_desc, + const struct argconfig_commandline_options *options, + void *config_out, size_t config_size); +int argconfig_parse_subopt_string(char *string, char **options, + size_t max_options); +unsigned argconfig_parse_comma_sep_array(char *string, int *ret, + unsigned max_length); +unsigned argconfig_parse_comma_sep_array_long(char *string, + unsigned long long *ret, + unsigned max_length); +void argconfig_register_help_func(argconfig_help_func * f); + +void print_word_wrapped(const char *s, int indent, int start); +#ifdef __cplusplus +} +#endif +#endif diff --git a/libmultipath/nvme/json.h b/libmultipath/nvme/json.h new file mode 100644 index 0000000..c4ea531 --- /dev/null +++ b/libmultipath/nvme/json.h @@ -0,0 +1,87 @@ +#ifndef __JSON__H +#define __JSON__H + +struct json_object; +struct json_array; +struct json_pair; + +#define JSON_TYPE_STRING 0 +#define JSON_TYPE_INTEGER 1 +#define JSON_TYPE_FLOAT 2 +#define JSON_TYPE_OBJECT 3 +#define JSON_TYPE_ARRAY 4 +#define JSON_TYPE_UINT 5 +#define JSON_PARENT_TYPE_PAIR 0 +#define JSON_PARENT_TYPE_ARRAY 1 +struct json_value { + int type; + union { + long long integer_number; + unsigned long long uint_number; + long double float_number; + char *string; + struct json_object *object; + struct json_array *array; + }; + int parent_type; + union { + struct json_pair *parent_pair; + struct json_array *parent_array; + }; +}; + +struct json_array { + struct json_value **values; + int value_cnt; + struct json_value *parent; +}; + +struct json_object { + struct json_pair **pairs; + int pair_cnt; + struct json_value *parent; +}; + +struct json_pair { + char *name; + struct json_value *value; + struct json_object *parent; +}; + +struct json_object *json_create_object(void); +struct json_array *json_create_array(void); + +void json_free_object(struct json_object *obj); + +int json_object_add_value_type(struct json_object *obj, const char *name, int type, ...); +#define json_object_add_value_int(obj, name, val) \ + json_object_add_value_type((obj), name, JSON_TYPE_INTEGER, (long long) (val)) +#define json_object_add_value_uint(obj, name, val) \ + json_object_add_value_type((obj), name, JSON_TYPE_UINT, (unsigned long long) (val)) +#define json_object_add_value_float(obj, name, val) \ + json_object_add_value_type((obj), name, JSON_TYPE_FLOAT, (val)) +#define json_object_add_value_string(obj, name, val) \ + json_object_add_value_type((obj), name, JSON_TYPE_STRING, (val)) +#define json_object_add_value_object(obj, name, val) \ + json_object_add_value_type((obj), name, JSON_TYPE_OBJECT, (val)) +#define json_object_add_value_array(obj, name, val) \ + json_object_add_value_type((obj), name, JSON_TYPE_ARRAY, (val)) +int json_array_add_value_type(struct json_array *array, int type, ...); +#define json_array_add_value_int(obj, val) \ + json_array_add_value_type((obj), JSON_TYPE_INTEGER, (val)) +#define json_array_add_value_uint(obj, val) \ + json_array_add_value_type((obj), JSON_TYPE_UINT, (val)) +#define json_array_add_value_float(obj, val) \ + json_array_add_value_type((obj), JSON_TYPE_FLOAT, (val)) +#define json_array_add_value_string(obj, val) \ + json_array_add_value_type((obj), JSON_TYPE_STRING, (val)) +#define json_array_add_value_object(obj, val) \ + json_array_add_value_type((obj), JSON_TYPE_OBJECT, (val)) +#define json_array_add_value_array(obj, val) \ + json_array_add_value_type((obj), JSON_TYPE_ARRAY, (val)) + +#define json_array_last_value_object(obj) \ + (obj->values[obj->value_cnt - 1]->object) + +void json_print_object(struct json_object *obj, void *); +#endif diff --git a/libmultipath/nvme/linux/nvme.h b/libmultipath/nvme/linux/nvme.h new file mode 100644 index 0000000..a697554 --- /dev/null +++ b/libmultipath/nvme/linux/nvme.h @@ -0,0 +1,1528 @@ +/* + * Definitions for the NVM Express interface + * Copyright (c) 2011-2014, Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + */ + +#ifndef _LINUX_NVME_H +#define _LINUX_NVME_H + +#include +#include + +/* NQN names in commands fields specified one size */ +#define NVMF_NQN_FIELD_LEN 256 + +/* However the max length of a qualified name is another size */ +#define NVMF_NQN_SIZE 223 + +#define NVMF_TRSVCID_SIZE 32 +#define NVMF_TRADDR_SIZE 256 +#define NVMF_TSAS_SIZE 256 + +#define NVME_DISC_SUBSYS_NAME "nqn.2014-08.org.nvmexpress.discovery" + +#define NVME_RDMA_IP_PORT 4420 + +#define NVME_NSID_ALL 0xffffffff + +enum nvme_subsys_type { + NVME_NQN_DISC = 1, /* Discovery type target subsystem */ + NVME_NQN_NVME = 2, /* NVME type target subsystem */ +}; + +/* Address Family codes for Discovery Log Page entry ADRFAM field */ +enum { + NVMF_ADDR_FAMILY_PCI = 0, /* PCIe */ + NVMF_ADDR_FAMILY_IP4 = 1, /* IP4 */ + NVMF_ADDR_FAMILY_IP6 = 2, /* IP6 */ + NVMF_ADDR_FAMILY_IB = 3, /* InfiniBand */ + NVMF_ADDR_FAMILY_FC = 4, /* Fibre Channel */ +}; + +/* Transport Type codes for Discovery Log Page entry TRTYPE field */ +enum { + NVMF_TRTYPE_RDMA = 1, /* RDMA */ + NVMF_TRTYPE_FC = 2, /* Fibre Channel */ + NVMF_TRTYPE_TCP = 3, /* TCP */ + NVMF_TRTYPE_LOOP = 254, /* Reserved for host usage */ + NVMF_TRTYPE_MAX, +}; + +/* Transport Requirements codes for Discovery Log Page entry TREQ field */ +enum { + NVMF_TREQ_NOT_SPECIFIED = 0, /* Not specified */ + NVMF_TREQ_REQUIRED = 1, /* Required */ + NVMF_TREQ_NOT_REQUIRED = 2, /* Not Required */ + NVMF_TREQ_DISABLE_SQFLOW = (1 << 2), /* SQ flow control disable supported */ +}; + +/* RDMA QP Service Type codes for Discovery Log Page entry TSAS + * RDMA_QPTYPE field + */ +enum { + NVMF_RDMA_QPTYPE_CONNECTED = 1, /* Reliable Connected */ + NVMF_RDMA_QPTYPE_DATAGRAM = 2, /* Reliable Datagram */ +}; + +/* RDMA QP Service Type codes for Discovery Log Page entry TSAS + * RDMA_QPTYPE field + */ +enum { + NVMF_RDMA_PRTYPE_NOT_SPECIFIED = 1, /* No Provider Specified */ + NVMF_RDMA_PRTYPE_IB = 2, /* InfiniBand */ + NVMF_RDMA_PRTYPE_ROCE = 3, /* InfiniBand RoCE */ + NVMF_RDMA_PRTYPE_ROCEV2 = 4, /* InfiniBand RoCEV2 */ + NVMF_RDMA_PRTYPE_IWARP = 5, /* IWARP */ +}; + +/* RDMA Connection Management Service Type codes for Discovery Log Page + * entry TSAS RDMA_CMS field + */ +enum { + NVMF_RDMA_CMS_RDMA_CM = 1, /* Sockets based endpoint addressing */ +}; + +/* TCP port security type for Discovery Log Page entry TSAS + */ +enum { + NVMF_TCP_SECTYPE_NONE = 0, /* No Security */ + NVMF_TCP_SECTYPE_TLS = 1, /* Transport Layer Security */ +}; + +#define NVME_AQ_DEPTH 32 +#define NVME_NR_AEN_COMMANDS 1 +#define NVME_AQ_BLK_MQ_DEPTH (NVME_AQ_DEPTH - NVME_NR_AEN_COMMANDS) + +/* + * Subtract one to leave an empty queue entry for 'Full Queue' condition. See + * NVM-Express 1.2 specification, section 4.1.2. + */ +#define NVME_AQ_MQ_TAG_DEPTH (NVME_AQ_BLK_MQ_DEPTH - 1) + +enum { + NVME_REG_CAP = 0x0000, /* Controller Capabilities */ + NVME_REG_VS = 0x0008, /* Version */ + NVME_REG_INTMS = 0x000c, /* Interrupt Mask Set */ + NVME_REG_INTMC = 0x0010, /* Interrupt Mask Clear */ + NVME_REG_CC = 0x0014, /* Controller Configuration */ + NVME_REG_CSTS = 0x001c, /* Controller Status */ + NVME_REG_NSSR = 0x0020, /* NVM Subsystem Reset */ + NVME_REG_AQA = 0x0024, /* Admin Queue Attributes */ + NVME_REG_ASQ = 0x0028, /* Admin SQ Base Address */ + NVME_REG_ACQ = 0x0030, /* Admin CQ Base Address */ + NVME_REG_CMBLOC = 0x0038, /* Controller Memory Buffer Location */ + NVME_REG_CMBSZ = 0x003c, /* Controller Memory Buffer Size */ + NVME_REG_BPINFO = 0x0040, /* Boot Partition Information */ + NVME_REG_BPRSEL = 0x0044, /* Boot Partition Read Select */ + NVME_REG_BPMBL = 0x0048, /* Boot Partition Memory Buffer Location */ + NVME_REG_PMRCAP = 0x0e00, /* Persistent Memory Capabilities */ + NVME_REG_PMRCTL = 0x0e04, /* Persistent Memory Region Control */ + NVME_REG_PMRSTS = 0x0e08, /* Persistent Memory Region Status */ + NVME_REG_DBS = 0x1000, /* SQ 0 Tail Doorbell */ +}; + +#define NVME_CAP_MQES(cap) ((cap) & 0xffff) +#define NVME_CAP_TIMEOUT(cap) (((cap) >> 24) & 0xff) +#define NVME_CAP_STRIDE(cap) (((cap) >> 32) & 0xf) +#define NVME_CAP_NSSRC(cap) (((cap) >> 36) & 0x1) +#define NVME_CAP_MPSMIN(cap) (((cap) >> 48) & 0xf) +#define NVME_CAP_MPSMAX(cap) (((cap) >> 52) & 0xf) + +#define NVME_CMB_BIR(cmbloc) ((cmbloc) & 0x7) +#define NVME_CMB_OFST(cmbloc) (((cmbloc) >> 12) & 0xfffff) +#define NVME_CMB_SZ(cmbsz) (((cmbsz) >> 12) & 0xfffff) +#define NVME_CMB_SZU(cmbsz) (((cmbsz) >> 8) & 0xf) + +#define NVME_CMB_WDS(cmbsz) ((cmbsz) & 0x10) +#define NVME_CMB_RDS(cmbsz) ((cmbsz) & 0x8) +#define NVME_CMB_LISTS(cmbsz) ((cmbsz) & 0x4) +#define NVME_CMB_CQS(cmbsz) ((cmbsz) & 0x2) +#define NVME_CMB_SQS(cmbsz) ((cmbsz) & 0x1) + +/* + * Submission and Completion Queue Entry Sizes for the NVM command set. + * (In bytes and specified as a power of two (2^n)). + */ +#define NVME_NVM_IOSQES 6 +#define NVME_NVM_IOCQES 4 + +enum { + NVME_CC_ENABLE = 1 << 0, + NVME_CC_CSS_NVM = 0 << 4, + NVME_CC_EN_SHIFT = 0, + NVME_CC_CSS_SHIFT = 4, + NVME_CC_MPS_SHIFT = 7, + NVME_CC_AMS_SHIFT = 11, + NVME_CC_SHN_SHIFT = 14, + NVME_CC_IOSQES_SHIFT = 16, + NVME_CC_IOCQES_SHIFT = 20, + NVME_CC_AMS_RR = 0 << NVME_CC_AMS_SHIFT, + NVME_CC_AMS_WRRU = 1 << NVME_CC_AMS_SHIFT, + NVME_CC_AMS_VS = 7 << NVME_CC_AMS_SHIFT, + NVME_CC_SHN_NONE = 0 << NVME_CC_SHN_SHIFT, + NVME_CC_SHN_NORMAL = 1 << NVME_CC_SHN_SHIFT, + NVME_CC_SHN_ABRUPT = 2 << NVME_CC_SHN_SHIFT, + NVME_CC_SHN_MASK = 3 << NVME_CC_SHN_SHIFT, + NVME_CC_IOSQES = NVME_NVM_IOSQES << NVME_CC_IOSQES_SHIFT, + NVME_CC_IOCQES = NVME_NVM_IOCQES << NVME_CC_IOCQES_SHIFT, + NVME_CSTS_RDY = 1 << 0, + NVME_CSTS_CFS = 1 << 1, + NVME_CSTS_NSSRO = 1 << 4, + NVME_CSTS_PP = 1 << 5, + NVME_CSTS_SHST_NORMAL = 0 << 2, + NVME_CSTS_SHST_OCCUR = 1 << 2, + NVME_CSTS_SHST_CMPLT = 2 << 2, + NVME_CSTS_SHST_MASK = 3 << 2, +}; + +struct nvme_id_power_state { + __le16 max_power; /* centiwatts */ + __u8 rsvd2; + __u8 flags; + __le32 entry_lat; /* microseconds */ + __le32 exit_lat; /* microseconds */ + __u8 read_tput; + __u8 read_lat; + __u8 write_tput; + __u8 write_lat; + __le16 idle_power; + __u8 idle_scale; + __u8 rsvd19; + __le16 active_power; + __u8 active_work_scale; + __u8 rsvd23[9]; +}; + +enum { + NVME_PS_FLAGS_MAX_POWER_SCALE = 1 << 0, + NVME_PS_FLAGS_NON_OP_STATE = 1 << 1, +}; + +struct nvme_id_ctrl { + __le16 vid; + __le16 ssvid; + char sn[20]; + char mn[40]; + char fr[8]; + __u8 rab; + __u8 ieee[3]; + __u8 cmic; + __u8 mdts; + __le16 cntlid; + __le32 ver; + __le32 rtd3r; + __le32 rtd3e; + __le32 oaes; + __le32 ctratt; + __le16 rrls; + __u8 rsvd102[26]; + __le16 crdt1; + __le16 crdt2; + __le16 crdt3; + __u8 rsvd134[122]; + __le16 oacs; + __u8 acl; + __u8 aerl; + __u8 frmw; + __u8 lpa; + __u8 elpe; + __u8 npss; + __u8 avscc; + __u8 apsta; + __le16 wctemp; + __le16 cctemp; + __le16 mtfa; + __le32 hmpre; + __le32 hmmin; + __u8 tnvmcap[16]; + __u8 unvmcap[16]; + __le32 rpmbs; + __le16 edstt; + __u8 dsto; + __u8 fwug; + __le16 kas; + __le16 hctma; + __le16 mntmt; + __le16 mxtmt; + __le32 sanicap; + __le32 hmminds; + __le16 hmmaxd; + __le16 nsetidmax; + __u8 rsvd340[2]; + __u8 anatt; + __u8 anacap; + __le32 anagrpmax; + __le32 nanagrpid; + __u8 rsvd352[160]; + __u8 sqes; + __u8 cqes; + __le16 maxcmd; + __le32 nn; + __le16 oncs; + __le16 fuses; + __u8 fna; + __u8 vwc; + __le16 awun; + __le16 awupf; + __u8 nvscc; + __u8 nwpc; + __le16 acwu; + __u8 rsvd534[2]; + __le32 sgls; + __le32 mnan; + __u8 rsvd544[224]; + char subnqn[256]; + __u8 rsvd1024[768]; + __le32 ioccsz; + __le32 iorcsz; + __le16 icdoff; + __u8 ctrattr; + __u8 msdbd; + __u8 rsvd1804[244]; + struct nvme_id_power_state psd[32]; + __u8 vs[1024]; +}; + +enum { + NVME_CTRL_ONCS_COMPARE = 1 << 0, + NVME_CTRL_ONCS_WRITE_UNCORRECTABLE = 1 << 1, + NVME_CTRL_ONCS_DSM = 1 << 2, + NVME_CTRL_ONCS_WRITE_ZEROES = 1 << 3, + NVME_CTRL_ONCS_TIMESTAMP = 1 << 6, + NVME_CTRL_VWC_PRESENT = 1 << 0, + NVME_CTRL_OACS_SEC_SUPP = 1 << 0, + NVME_CTRL_OACS_DIRECTIVES = 1 << 5, + NVME_CTRL_OACS_DBBUF_SUPP = 1 << 8, + NVME_CTRL_LPA_CMD_EFFECTS_LOG = 1 << 1, + NVME_CTRL_CTRATT_128_ID = 1 << 0, + NVME_CTRL_CTRATT_NON_OP_PSP = 1 << 1, + NVME_CTRL_CTRATT_NVM_SETS = 1 << 2, + NVME_CTRL_CTRATT_READ_RECV_LVLS = 1 << 3, + NVME_CTRL_CTRATT_ENDURANCE_GROUPS = 1 << 4, + NVME_CTRL_CTRATT_PREDICTABLE_LAT = 1 << 5, + NVME_CTRL_CTRATT_NAMESPACE_GRANULARITY = 1 << 7, + NVME_CTRL_CTRATT_UUID_LIST = 1 << 9, +}; + +struct nvme_lbaf { + __le16 ms; + __u8 ds; + __u8 rp; +}; + +struct nvme_id_ns { + __le64 nsze; + __le64 ncap; + __le64 nuse; + __u8 nsfeat; + __u8 nlbaf; + __u8 flbas; + __u8 mc; + __u8 dpc; + __u8 dps; + __u8 nmic; + __u8 rescap; + __u8 fpi; + __u8 dlfeat; + __le16 nawun; + __le16 nawupf; + __le16 nacwu; + __le16 nabsn; + __le16 nabo; + __le16 nabspf; + __le16 noiob; + __u8 nvmcap[16]; + __le16 npwg; + __le16 npwa; + __le16 npdg; + __le16 npda; + __le16 nows; + __u8 rsvd74[18]; + __le32 anagrpid; + __u8 rsvd96[3]; + __u8 nsattr; + __le16 nvmsetid; + __le16 endgid; + __u8 nguid[16]; + __u8 eui64[8]; + struct nvme_lbaf lbaf[16]; + __u8 rsvd192[192]; + __u8 vs[3712]; +}; + +enum { + NVME_ID_CNS_NS = 0x00, + NVME_ID_CNS_CTRL = 0x01, + NVME_ID_CNS_NS_ACTIVE_LIST = 0x02, + NVME_ID_CNS_NS_DESC_LIST = 0x03, + NVME_ID_CNS_NVMSET_LIST = 0x04, + NVME_ID_CNS_NS_PRESENT_LIST = 0x10, + NVME_ID_CNS_NS_PRESENT = 0x11, + NVME_ID_CNS_CTRL_NS_LIST = 0x12, + NVME_ID_CNS_CTRL_LIST = 0x13, + NVME_ID_CNS_SCNDRY_CTRL_LIST = 0x15, + NVME_ID_CNS_NS_GRANULARITY = 0x16, + NVME_ID_CNS_UUID_LIST = 0x17, +}; + +enum { + NVME_DIR_IDENTIFY = 0x00, + NVME_DIR_STREAMS = 0x01, + NVME_DIR_SND_ID_OP_ENABLE = 0x01, + NVME_DIR_SND_ST_OP_REL_ID = 0x01, + NVME_DIR_SND_ST_OP_REL_RSC = 0x02, + NVME_DIR_RCV_ID_OP_PARAM = 0x01, + NVME_DIR_RCV_ST_OP_PARAM = 0x01, + NVME_DIR_RCV_ST_OP_STATUS = 0x02, + NVME_DIR_RCV_ST_OP_RESOURCE = 0x03, + NVME_DIR_ENDIR = 0x01, +}; + +enum { + NVME_NS_FEAT_THIN = 1 << 0, + NVME_NS_FLBAS_LBA_MASK = 0xf, + NVME_NS_FLBAS_META_EXT = 0x10, + NVME_LBAF_RP_BEST = 0, + NVME_LBAF_RP_BETTER = 1, + NVME_LBAF_RP_GOOD = 2, + NVME_LBAF_RP_DEGRADED = 3, + NVME_NS_DPC_PI_LAST = 1 << 4, + NVME_NS_DPC_PI_FIRST = 1 << 3, + NVME_NS_DPC_PI_TYPE3 = 1 << 2, + NVME_NS_DPC_PI_TYPE2 = 1 << 1, + NVME_NS_DPC_PI_TYPE1 = 1 << 0, + NVME_NS_DPS_PI_FIRST = 1 << 3, + NVME_NS_DPS_PI_MASK = 0x7, + NVME_NS_DPS_PI_TYPE1 = 1, + NVME_NS_DPS_PI_TYPE2 = 2, + NVME_NS_DPS_PI_TYPE3 = 3, +}; + +struct nvme_ns_id_desc { + __u8 nidt; + __u8 nidl; + __le16 reserved; +}; + +#define NVME_NIDT_EUI64_LEN 8 +#define NVME_NIDT_NGUID_LEN 16 +#define NVME_NIDT_UUID_LEN 16 + +enum { + NVME_NIDT_EUI64 = 0x01, + NVME_NIDT_NGUID = 0x02, + NVME_NIDT_UUID = 0x03, +}; + +#define NVME_MAX_NVMSET 31 + +struct nvme_nvmset_attr_entry { + __le16 id; + __le16 endurance_group_id; + __u8 rsvd4[4]; + __le32 random_4k_read_typical; + __le32 opt_write_size; + __u8 total_nvmset_cap[16]; + __u8 unalloc_nvmset_cap[16]; + __u8 rsvd48[80]; +}; + +struct nvme_id_nvmset { + __u8 nid; + __u8 rsvd1[127]; + struct nvme_nvmset_attr_entry ent[NVME_MAX_NVMSET]; +}; + +struct nvme_id_ns_granularity_list_entry { + __le64 namespace_size_granularity; + __le64 namespace_capacity_granularity; +}; + +struct nvme_id_ns_granularity_list { + __le32 attributes; + __u8 num_descriptors; + __u8 rsvd[27]; + struct nvme_id_ns_granularity_list_entry entry[16]; +}; + +#define NVME_MAX_UUID_ENTRIES 128 +struct nvme_id_uuid_list_entry { + __u8 header; + __u8 rsvd1[15]; + __u8 uuid[16]; +}; + +struct nvme_id_uuid_list { + struct nvme_id_uuid_list_entry entry[NVME_MAX_UUID_ENTRIES]; +}; + +/** + * struct nvme_telemetry_log_page_hdr - structure for telemetry log page + * @lpi: Log page identifier + * @iee_oui: IEEE OUI Identifier + * @dalb1: Data area 1 last block + * @dalb2: Data area 2 last block + * @dalb3: Data area 3 last block + * @ctrlavail: Controller initiated data available + * @ctrldgn: Controller initiated telemetry Data Generation Number + * @rsnident: Reason Identifier + * @telemetry_dataarea: Contains telemetry data block + * + * This structure can be used for both telemetry host-initiated log page + * and controller-initiated log page. + */ +struct nvme_telemetry_log_page_hdr { + __u8 lpi; + __u8 rsvd[4]; + __u8 iee_oui[3]; + __le16 dalb1; + __le16 dalb2; + __le16 dalb3; + __u8 rsvd1[368]; + __u8 ctrlavail; + __u8 ctrldgn; + __u8 rsnident[128]; + __u8 telemetry_dataarea[0]; +}; + +struct nvme_endurance_group_log { + __u32 rsvd0; + __u8 avl_spare_threshold; + __u8 percent_used; + __u8 rsvd6[26]; + __u8 endurance_estimate[16]; + __u8 data_units_read[16]; + __u8 data_units_written[16]; + __u8 media_units_written[16]; + __u8 rsvd96[416]; +}; + +struct nvme_smart_log { + __u8 critical_warning; + __u8 temperature[2]; + __u8 avail_spare; + __u8 spare_thresh; + __u8 percent_used; + __u8 rsvd6[26]; + __u8 data_units_read[16]; + __u8 data_units_written[16]; + __u8 host_reads[16]; + __u8 host_writes[16]; + __u8 ctrl_busy_time[16]; + __u8 power_cycles[16]; + __u8 power_on_hours[16]; + __u8 unsafe_shutdowns[16]; + __u8 media_errors[16]; + __u8 num_err_log_entries[16]; + __le32 warning_temp_time; + __le32 critical_comp_time; + __le16 temp_sensor[8]; + __le32 thm_temp1_trans_count; + __le32 thm_temp2_trans_count; + __le32 thm_temp1_total_time; + __le32 thm_temp2_total_time; + __u8 rsvd232[280]; +}; + +struct nvme_self_test_res { + __u8 device_self_test_status; + __u8 segment_num; + __u8 valid_diagnostic_info; + __u8 rsvd; + __le64 power_on_hours; + __le32 nsid; + __le64 failing_lba; + __u8 status_code_type; + __u8 status_code; + __u8 vendor_specific[2]; +} __attribute__((packed)); + +struct nvme_self_test_log { + __u8 crnt_dev_selftest_oprn; + __u8 crnt_dev_selftest_compln; + __u8 rsvd[2]; + struct nvme_self_test_res result[20]; +} __attribute__((packed)); + +struct nvme_fw_slot_info_log { + __u8 afi; + __u8 rsvd1[7]; + __le64 frs[7]; + __u8 rsvd64[448]; +}; + +struct nvme_lba_status_desc { + __u64 dslba; + __u32 nlb; + __u8 rsvd_12; + __u8 status; + __u8 rsvd_15_14[2]; +}; + +struct nvme_lba_status { + __u32 nlsd; + __u8 cmpc; + __u8 rsvd_7_5[3]; + struct nvme_lba_status_desc descs[0]; +}; + +/* NVMe Namespace Write Protect State */ +enum { + NVME_NS_NO_WRITE_PROTECT = 0, + NVME_NS_WRITE_PROTECT, + NVME_NS_WRITE_PROTECT_POWER_CYCLE, + NVME_NS_WRITE_PROTECT_PERMANENT, +}; + +#define NVME_MAX_CHANGED_NAMESPACES 1024 + +struct nvme_changed_ns_list_log { + __le32 log[NVME_MAX_CHANGED_NAMESPACES]; +}; + +enum { + NVME_CMD_EFFECTS_CSUPP = 1 << 0, + NVME_CMD_EFFECTS_LBCC = 1 << 1, + NVME_CMD_EFFECTS_NCC = 1 << 2, + NVME_CMD_EFFECTS_NIC = 1 << 3, + NVME_CMD_EFFECTS_CCC = 1 << 4, + NVME_CMD_EFFECTS_CSE_MASK = 3 << 16, + NVME_CMD_EFFECTS_UUID_SEL = 1 << 19, +}; + +struct nvme_effects_log { + __le32 acs[256]; + __le32 iocs[256]; + __u8 resv[2048]; +}; + +enum nvme_ana_state { + NVME_ANA_OPTIMIZED = 0x01, + NVME_ANA_NONOPTIMIZED = 0x02, + NVME_ANA_INACCESSIBLE = 0x03, + NVME_ANA_PERSISTENT_LOSS = 0x04, + NVME_ANA_CHANGE = 0x0f, +}; + +struct nvme_ana_group_desc { + __le32 grpid; + __le32 nnsids; + __le64 chgcnt; + __u8 state; + __u8 rsvd17[15]; + __le32 nsids[]; +}; + +/* flag for the log specific field of the ANA log */ +#define NVME_ANA_LOG_RGO (1 << 0) + +struct nvme_ana_rsp_hdr { + __le64 chgcnt; + __le16 ngrps; + __le16 rsvd10[3]; +}; + +enum { + NVME_SMART_CRIT_SPARE = 1 << 0, + NVME_SMART_CRIT_TEMPERATURE = 1 << 1, + NVME_SMART_CRIT_RELIABILITY = 1 << 2, + NVME_SMART_CRIT_MEDIA = 1 << 3, + NVME_SMART_CRIT_VOLATILE_MEMORY = 1 << 4, +}; + +enum { + NVME_AER_ERROR = 0, + NVME_AER_SMART = 1, + NVME_AER_CSS = 6, + NVME_AER_VS = 7, +}; + +struct nvme_lba_range_type { + __u8 type; + __u8 attributes; + __u8 rsvd2[14]; + __u64 slba; + __u64 nlb; + __u8 guid[16]; + __u8 rsvd48[16]; +}; + +enum { + NVME_LBART_TYPE_FS = 0x01, + NVME_LBART_TYPE_RAID = 0x02, + NVME_LBART_TYPE_CACHE = 0x03, + NVME_LBART_TYPE_SWAP = 0x04, + + NVME_LBART_ATTRIB_TEMP = 1 << 0, + NVME_LBART_ATTRIB_HIDE = 1 << 1, +}; + +/* Predictable Latency Mode - Deterministic Threshold Configuration Data */ +struct nvme_plm_config { + __le16 enable_event; + __u8 rsvd2[30]; + __le64 dtwin_reads_thresh; + __le64 dtwin_writes_thresh; + __le64 dtwin_time_thresh; + __u8 rsvd56[456]; +}; + +struct nvme_reservation_status { + __le32 gen; + __u8 rtype; + __u8 regctl[2]; + __u8 resv5[2]; + __u8 ptpls; + __u8 resv10[13]; + struct { + __le16 cntlid; + __u8 rcsts; + __u8 resv3[5]; + __le64 hostid; + __le64 rkey; + } regctl_ds[]; +}; + +struct nvme_reservation_status_ext { + __le32 gen; + __u8 rtype; + __u8 regctl[2]; + __u8 resv5[2]; + __u8 ptpls; + __u8 resv10[14]; + __u8 resv24[40]; + struct { + __le16 cntlid; + __u8 rcsts; + __u8 resv3[5]; + __le64 rkey; + __u8 hostid[16]; + __u8 resv32[32]; + } regctl_eds[]; +}; + +enum nvme_async_event_type { + NVME_AER_TYPE_ERROR = 0, + NVME_AER_TYPE_SMART = 1, + NVME_AER_TYPE_NOTICE = 2, +}; + +/* I/O commands */ + +enum nvme_opcode { + nvme_cmd_flush = 0x00, + nvme_cmd_write = 0x01, + nvme_cmd_read = 0x02, + nvme_cmd_write_uncor = 0x04, + nvme_cmd_compare = 0x05, + nvme_cmd_write_zeroes = 0x08, + nvme_cmd_dsm = 0x09, + nvme_cmd_verify = 0x0c, + nvme_cmd_resv_register = 0x0d, + nvme_cmd_resv_report = 0x0e, + nvme_cmd_resv_acquire = 0x11, + nvme_cmd_resv_release = 0x15, +}; + +/* + * Descriptor subtype - lower 4 bits of nvme_(keyed_)sgl_desc identifier + * + * @NVME_SGL_FMT_ADDRESS: absolute address of the data block + * @NVME_SGL_FMT_OFFSET: relative offset of the in-capsule data block + * @NVME_SGL_FMT_TRANSPORT_A: transport defined format, value 0xA + * @NVME_SGL_FMT_INVALIDATE: RDMA transport specific remote invalidation + * request subtype + */ +enum { + NVME_SGL_FMT_ADDRESS = 0x00, + NVME_SGL_FMT_OFFSET = 0x01, + NVME_SGL_FMT_TRANSPORT_A = 0x0A, + NVME_SGL_FMT_INVALIDATE = 0x0f, +}; + +/* + * Descriptor type - upper 4 bits of nvme_(keyed_)sgl_desc identifier + * + * For struct nvme_sgl_desc: + * @NVME_SGL_FMT_DATA_DESC: data block descriptor + * @NVME_SGL_FMT_SEG_DESC: sgl segment descriptor + * @NVME_SGL_FMT_LAST_SEG_DESC: last sgl segment descriptor + * + * For struct nvme_keyed_sgl_desc: + * @NVME_KEY_SGL_FMT_DATA_DESC: keyed data block descriptor + * + * Transport-specific SGL types: + * @NVME_TRANSPORT_SGL_DATA_DESC: Transport SGL data dlock descriptor + */ +enum { + NVME_SGL_FMT_DATA_DESC = 0x00, + NVME_SGL_FMT_SEG_DESC = 0x02, + NVME_SGL_FMT_LAST_SEG_DESC = 0x03, + NVME_KEY_SGL_FMT_DATA_DESC = 0x04, + NVME_TRANSPORT_SGL_DATA_DESC = 0x05, +}; + +struct nvme_sgl_desc { + __le64 addr; + __le32 length; + __u8 rsvd[3]; + __u8 type; +}; + +struct nvme_keyed_sgl_desc { + __le64 addr; + __u8 length[3]; + __u8 key[4]; + __u8 type; +}; + +union nvme_data_ptr { + struct { + __le64 prp1; + __le64 prp2; + }; + struct nvme_sgl_desc sgl; + struct nvme_keyed_sgl_desc ksgl; +}; + +/* + * Lowest two bits of our flags field (FUSE field in the spec): + * + * @NVME_CMD_FUSE_FIRST: Fused Operation, first command + * @NVME_CMD_FUSE_SECOND: Fused Operation, second command + * + * Highest two bits in our flags field (PSDT field in the spec): + * + * @NVME_CMD_PSDT_SGL_METABUF: Use SGLS for this transfer, + * If used, MPTR contains addr of single physical buffer (byte aligned). + * @NVME_CMD_PSDT_SGL_METASEG: Use SGLS for this transfer, + * If used, MPTR contains an address of an SGL segment containing + * exactly 1 SGL descriptor (qword aligned). + */ +enum { + NVME_CMD_FUSE_FIRST = (1 << 0), + NVME_CMD_FUSE_SECOND = (1 << 1), + + NVME_CMD_SGL_METABUF = (1 << 6), + NVME_CMD_SGL_METASEG = (1 << 7), + NVME_CMD_SGL_ALL = NVME_CMD_SGL_METABUF | NVME_CMD_SGL_METASEG, +}; + +struct nvme_common_command { + __u8 opcode; + __u8 flags; + __u16 command_id; + __le32 nsid; + __le32 cdw2[2]; + __le64 metadata; + union nvme_data_ptr dptr; + __le32 cdw10[6]; +}; + +struct nvme_rw_command { + __u8 opcode; + __u8 flags; + __u16 command_id; + __le32 nsid; + __u64 rsvd2; + __le64 metadata; + union nvme_data_ptr dptr; + __le64 slba; + __le16 length; + __le16 control; + __le32 dsmgmt; + __le32 reftag; + __le16 apptag; + __le16 appmask; +}; + +enum { + NVME_RW_LR = 1 << 15, + NVME_RW_FUA = 1 << 14, + NVME_RW_DEAC = 1 << 9, + NVME_RW_DSM_FREQ_UNSPEC = 0, + NVME_RW_DSM_FREQ_TYPICAL = 1, + NVME_RW_DSM_FREQ_RARE = 2, + NVME_RW_DSM_FREQ_READS = 3, + NVME_RW_DSM_FREQ_WRITES = 4, + NVME_RW_DSM_FREQ_RW = 5, + NVME_RW_DSM_FREQ_ONCE = 6, + NVME_RW_DSM_FREQ_PREFETCH = 7, + NVME_RW_DSM_FREQ_TEMP = 8, + NVME_RW_DSM_LATENCY_NONE = 0 << 4, + NVME_RW_DSM_LATENCY_IDLE = 1 << 4, + NVME_RW_DSM_LATENCY_NORM = 2 << 4, + NVME_RW_DSM_LATENCY_LOW = 3 << 4, + NVME_RW_DSM_SEQ_REQ = 1 << 6, + NVME_RW_DSM_COMPRESSED = 1 << 7, + NVME_RW_PRINFO_PRCHK_REF = 1 << 10, + NVME_RW_PRINFO_PRCHK_APP = 1 << 11, + NVME_RW_PRINFO_PRCHK_GUARD = 1 << 12, + NVME_RW_PRINFO_PRACT = 1 << 13, + NVME_RW_DTYPE_STREAMS = 1 << 4, +}; + +struct nvme_dsm_cmd { + __u8 opcode; + __u8 flags; + __u16 command_id; + __le32 nsid; + __u64 rsvd2[2]; + union nvme_data_ptr dptr; + __le32 nr; + __le32 attributes; + __u32 rsvd12[4]; +}; + +enum { + NVME_DSMGMT_IDR = 1 << 0, + NVME_DSMGMT_IDW = 1 << 1, + NVME_DSMGMT_AD = 1 << 2, +}; + +#define NVME_DSM_MAX_RANGES 256 + +struct nvme_dsm_range { + __le32 cattr; + __le32 nlb; + __le64 slba; +}; + +struct nvme_write_zeroes_cmd { + __u8 opcode; + __u8 flags; + __u16 command_id; + __le32 nsid; + __u64 rsvd2; + __le64 metadata; + union nvme_data_ptr dptr; + __le64 slba; + __le16 length; + __le16 control; + __le32 dsmgmt; + __le32 reftag; + __le16 apptag; + __le16 appmask; +}; + +/* Features */ + +struct nvme_feat_auto_pst { + __le64 entries[32]; +}; + +enum { + NVME_HOST_MEM_ENABLE = (1 << 0), + NVME_HOST_MEM_RETURN = (1 << 1), +}; + +/* Admin commands */ + +enum nvme_admin_opcode { + nvme_admin_delete_sq = 0x00, + nvme_admin_create_sq = 0x01, + nvme_admin_get_log_page = 0x02, + nvme_admin_delete_cq = 0x04, + nvme_admin_create_cq = 0x05, + nvme_admin_identify = 0x06, + nvme_admin_abort_cmd = 0x08, + nvme_admin_set_features = 0x09, + nvme_admin_get_features = 0x0a, + nvme_admin_async_event = 0x0c, + nvme_admin_ns_mgmt = 0x0d, + nvme_admin_activate_fw = 0x10, + nvme_admin_download_fw = 0x11, + nvme_admin_dev_self_test = 0x14, + nvme_admin_ns_attach = 0x15, + nvme_admin_keep_alive = 0x18, + nvme_admin_directive_send = 0x19, + nvme_admin_directive_recv = 0x1a, + nvme_admin_virtual_mgmt = 0x1c, + nvme_admin_nvme_mi_send = 0x1d, + nvme_admin_nvme_mi_recv = 0x1e, + nvme_admin_dbbuf = 0x7C, + nvme_admin_format_nvm = 0x80, + nvme_admin_security_send = 0x81, + nvme_admin_security_recv = 0x82, + nvme_admin_sanitize_nvm = 0x84, + nvme_admin_get_lba_status = 0x86, +}; + +enum { + NVME_QUEUE_PHYS_CONTIG = (1 << 0), + NVME_CQ_IRQ_ENABLED = (1 << 1), + NVME_SQ_PRIO_URGENT = (0 << 1), + NVME_SQ_PRIO_HIGH = (1 << 1), + NVME_SQ_PRIO_MEDIUM = (2 << 1), + NVME_SQ_PRIO_LOW = (3 << 1), + NVME_FEAT_ARBITRATION = 0x01, + NVME_FEAT_POWER_MGMT = 0x02, + NVME_FEAT_LBA_RANGE = 0x03, + NVME_FEAT_TEMP_THRESH = 0x04, + NVME_FEAT_ERR_RECOVERY = 0x05, + NVME_FEAT_VOLATILE_WC = 0x06, + NVME_FEAT_NUM_QUEUES = 0x07, + NVME_FEAT_IRQ_COALESCE = 0x08, + NVME_FEAT_IRQ_CONFIG = 0x09, + NVME_FEAT_WRITE_ATOMIC = 0x0a, + NVME_FEAT_ASYNC_EVENT = 0x0b, + NVME_FEAT_AUTO_PST = 0x0c, + NVME_FEAT_HOST_MEM_BUF = 0x0d, + NVME_FEAT_TIMESTAMP = 0x0e, + NVME_FEAT_KATO = 0x0f, + NVME_FEAT_HCTM = 0X10, + NVME_FEAT_NOPSC = 0X11, + NVME_FEAT_RRL = 0x12, + NVME_FEAT_PLM_CONFIG = 0x13, + NVME_FEAT_PLM_WINDOW = 0x14, + NVME_FEAT_HOST_BEHAVIOR = 0x16, + NVME_FEAT_SANITIZE = 0x17, + NVME_FEAT_SW_PROGRESS = 0x80, + NVME_FEAT_HOST_ID = 0x81, + NVME_FEAT_RESV_MASK = 0x82, + NVME_FEAT_RESV_PERSIST = 0x83, + NVME_FEAT_WRITE_PROTECT = 0x84, + NVME_LOG_ERROR = 0x01, + NVME_LOG_SMART = 0x02, + NVME_LOG_FW_SLOT = 0x03, + NVME_LOG_CHANGED_NS = 0x04, + NVME_LOG_CMD_EFFECTS = 0x05, + NVME_LOG_DEVICE_SELF_TEST = 0x06, + NVME_LOG_TELEMETRY_HOST = 0x07, + NVME_LOG_TELEMETRY_CTRL = 0x08, + NVME_LOG_ENDURANCE_GROUP = 0x09, + NVME_LOG_ANA = 0x0c, + NVME_LOG_DISC = 0x70, + NVME_LOG_RESERVATION = 0x80, + NVME_LOG_SANITIZE = 0x81, + NVME_FWACT_REPL = (0 << 3), + NVME_FWACT_REPL_ACTV = (1 << 3), + NVME_FWACT_ACTV = (2 << 3), +}; + +enum { + NVME_NO_LOG_LSP = 0x0, + NVME_NO_LOG_LPO = 0x0, + NVME_LOG_ANA_LSP_RGO = 0x1, + NVME_TELEM_LSP_CREATE = 0x1, +}; + +/* Sanitize and Sanitize Monitor/Log */ +enum { + /* Sanitize */ + NVME_SANITIZE_NO_DEALLOC = 0x00000200, + NVME_SANITIZE_OIPBP = 0x00000100, + NVME_SANITIZE_OWPASS_SHIFT = 0x00000004, + NVME_SANITIZE_AUSE = 0x00000008, + NVME_SANITIZE_ACT_CRYPTO_ERASE = 0x00000004, + NVME_SANITIZE_ACT_OVERWRITE = 0x00000003, + NVME_SANITIZE_ACT_BLOCK_ERASE = 0x00000002, + NVME_SANITIZE_ACT_EXIT = 0x00000001, + + /* Sanitize Monitor/Log */ + NVME_SANITIZE_LOG_DATA_LEN = 0x0014, + NVME_SANITIZE_LOG_GLOBAL_DATA_ERASED = 0x0100, + NVME_SANITIZE_LOG_NUM_CMPLTED_PASS_MASK = 0x00F8, + NVME_SANITIZE_LOG_STATUS_MASK = 0x0007, + NVME_SANITIZE_LOG_NEVER_SANITIZED = 0x0000, + NVME_SANITIZE_LOG_COMPLETED_SUCCESS = 0x0001, + NVME_SANITIZE_LOG_IN_PROGESS = 0x0002, + NVME_SANITIZE_LOG_COMPLETED_FAILED = 0x0003, + NVME_SANITIZE_LOG_ND_COMPLETED_SUCCESS = 0x0004, +}; + +enum { + /* Self-test log Validation bits */ + NVME_SELF_TEST_VALID_NSID = 1 << 0, + NVME_SELF_TEST_VALID_FLBA = 1 << 1, + NVME_SELF_TEST_VALID_SCT = 1 << 2, + NVME_SELF_TEST_VALID_SC = 1 << 3, + NVME_SELF_TEST_REPORTS = 20, +}; + +struct nvme_identify { + __u8 opcode; + __u8 flags; + __u16 command_id; + __le32 nsid; + __u64 rsvd2[2]; + union nvme_data_ptr dptr; + __u8 cns; + __u8 rsvd3; + __le16 ctrlid; + __u32 rsvd11[5]; +}; + +#define NVME_IDENTIFY_DATA_SIZE 4096 + +struct nvme_features { + __u8 opcode; + __u8 flags; + __u16 command_id; + __le32 nsid; + __u64 rsvd2[2]; + union nvme_data_ptr dptr; + __le32 fid; + __le32 dword11; + __le32 dword12; + __le32 dword13; + __le32 dword14; + __le32 dword15; +}; + +struct nvme_host_mem_buf_desc { + __le64 addr; + __le32 size; + __u32 rsvd; +}; + +struct nvme_create_cq { + __u8 opcode; + __u8 flags; + __u16 command_id; + __u32 rsvd1[5]; + __le64 prp1; + __u64 rsvd8; + __le16 cqid; + __le16 qsize; + __le16 cq_flags; + __le16 irq_vector; + __u32 rsvd12[4]; +}; + +struct nvme_create_sq { + __u8 opcode; + __u8 flags; + __u16 command_id; + __u32 rsvd1[5]; + __le64 prp1; + __u64 rsvd8; + __le16 sqid; + __le16 qsize; + __le16 sq_flags; + __le16 cqid; + __u32 rsvd12[4]; +}; + +struct nvme_delete_queue { + __u8 opcode; + __u8 flags; + __u16 command_id; + __u32 rsvd1[9]; + __le16 qid; + __u16 rsvd10; + __u32 rsvd11[5]; +}; + +struct nvme_abort_cmd { + __u8 opcode; + __u8 flags; + __u16 command_id; + __u32 rsvd1[9]; + __le16 sqid; + __u16 cid; + __u32 rsvd11[5]; +}; + +struct nvme_download_firmware { + __u8 opcode; + __u8 flags; + __u16 command_id; + __u32 rsvd1[5]; + union nvme_data_ptr dptr; + __le32 numd; + __le32 offset; + __u32 rsvd12[4]; +}; + +struct nvme_format_cmd { + __u8 opcode; + __u8 flags; + __u16 command_id; + __le32 nsid; + __u64 rsvd2[4]; + __le32 cdw10; + __u32 rsvd11[5]; +}; + +struct nvme_get_log_page_command { + __u8 opcode; + __u8 flags; + __u16 command_id; + __le32 nsid; + __u64 rsvd2[2]; + union nvme_data_ptr dptr; + __u8 lid; + __u8 lsp; + __le16 numdl; + __le16 numdu; + __u16 rsvd11; + __le32 lpol; + __le32 lpou; + __u32 rsvd14[2]; +}; + +struct nvme_directive_cmd { + __u8 opcode; + __u8 flags; + __u16 command_id; + __le32 nsid; + __u64 rsvd2[2]; + union nvme_data_ptr dptr; + __le32 numd; + __u8 doper; + __u8 dtype; + __le16 dspec; + __u8 endir; + __u8 tdtype; + __u16 rsvd15; + + __u32 rsvd16[3]; +}; + +/* Sanitize Log Page */ +struct nvme_sanitize_log_page { + __le16 progress; + __le16 status; + __le32 cdw10_info; + __le32 est_ovrwrt_time; + __le32 est_blk_erase_time; + __le32 est_crypto_erase_time; + __le32 est_ovrwrt_time_with_no_deallocate; + __le32 est_blk_erase_time_with_no_deallocate; + __le32 est_crypto_erase_time_with_no_deallocate; +}; + +/* + * Fabrics subcommands. + */ +enum nvmf_fabrics_opcode { + nvme_fabrics_command = 0x7f, +}; + +enum nvmf_capsule_command { + nvme_fabrics_type_property_set = 0x00, + nvme_fabrics_type_connect = 0x01, + nvme_fabrics_type_property_get = 0x04, +}; + +struct nvmf_common_command { + __u8 opcode; + __u8 resv1; + __u16 command_id; + __u8 fctype; + __u8 resv2[35]; + __u8 ts[24]; +}; + +/* + * The legal cntlid range a NVMe Target will provide. + * Note that cntlid of value 0 is considered illegal in the fabrics world. + * Devices based on earlier specs did not have the subsystem concept; + * therefore, those devices had their cntlid value set to 0 as a result. + */ +#define NVME_CNTLID_MIN 1 +#define NVME_CNTLID_MAX 0xffef +#define NVME_CNTLID_DYNAMIC 0xffff + +#define MAX_DISC_LOGS 255 + +/* Discovery log page entry */ +struct nvmf_disc_rsp_page_entry { + __u8 trtype; + __u8 adrfam; + __u8 subtype; + __u8 treq; + __le16 portid; + __le16 cntlid; + __le16 asqsz; + __u8 resv8[22]; + char trsvcid[NVMF_TRSVCID_SIZE]; + __u8 resv64[192]; + char subnqn[NVMF_NQN_FIELD_LEN]; + char traddr[NVMF_TRADDR_SIZE]; + union tsas { + char common[NVMF_TSAS_SIZE]; + struct rdma { + __u8 qptype; + __u8 prtype; + __u8 cms; + __u8 resv3[5]; + __u16 pkey; + __u8 resv10[246]; + } rdma; + struct tcp { + __u8 sectype; + } tcp; + } tsas; +}; + +/* Discovery log page header */ +struct nvmf_disc_rsp_page_hdr { + __le64 genctr; + __le64 numrec; + __le16 recfmt; + __u8 resv14[1006]; + struct nvmf_disc_rsp_page_entry entries[0]; +}; + +struct nvmf_connect_command { + __u8 opcode; + __u8 resv1; + __u16 command_id; + __u8 fctype; + __u8 resv2[19]; + union nvme_data_ptr dptr; + __le16 recfmt; + __le16 qid; + __le16 sqsize; + __u8 cattr; + __u8 resv3; + __le32 kato; + __u8 resv4[12]; +}; + +struct nvmf_connect_data { + uuid_t hostid; + __le16 cntlid; + char resv4[238]; + char subsysnqn[NVMF_NQN_FIELD_LEN]; + char hostnqn[NVMF_NQN_FIELD_LEN]; + char resv5[256]; +}; + +struct nvmf_property_set_command { + __u8 opcode; + __u8 resv1; + __u16 command_id; + __u8 fctype; + __u8 resv2[35]; + __u8 attrib; + __u8 resv3[3]; + __le32 offset; + __le64 value; + __u8 resv4[8]; +}; + +struct nvmf_property_get_command { + __u8 opcode; + __u8 resv1; + __u16 command_id; + __u8 fctype; + __u8 resv2[35]; + __u8 attrib; + __u8 resv3[3]; + __le32 offset; + __u8 resv4[16]; +}; + +struct nvme_dbbuf { + __u8 opcode; + __u8 flags; + __u16 command_id; + __u32 rsvd1[5]; + __le64 prp1; + __le64 prp2; + __u32 rsvd12[6]; +}; + +struct streams_directive_params { + __le16 msl; + __le16 nssa; + __le16 nsso; + __u8 rsvd[10]; + __le32 sws; + __le16 sgs; + __le16 nsa; + __le16 nso; + __u8 rsvd2[6]; +}; + +struct nvme_command { + union { + struct nvme_common_command common; + struct nvme_rw_command rw; + struct nvme_identify identify; + struct nvme_features features; + struct nvme_create_cq create_cq; + struct nvme_create_sq create_sq; + struct nvme_delete_queue delete_queue; + struct nvme_download_firmware dlfw; + struct nvme_format_cmd format; + struct nvme_dsm_cmd dsm; + struct nvme_write_zeroes_cmd write_zeroes; + struct nvme_abort_cmd abort; + struct nvme_get_log_page_command get_log_page; + struct nvmf_common_command fabrics; + struct nvmf_connect_command connect; + struct nvmf_property_set_command prop_set; + struct nvmf_property_get_command prop_get; + struct nvme_dbbuf dbbuf; + struct nvme_directive_cmd directive; + }; +}; + +static inline bool nvme_is_write(struct nvme_command *cmd) +{ + /* + * What a mess... + * + * Why can't we simply have a Fabrics In and Fabrics out command? + */ + if (unlikely(cmd->common.opcode == nvme_fabrics_command)) + return cmd->fabrics.fctype & 1; + return cmd->common.opcode & 1; +} + +enum { + NVME_SCT_GENERIC = 0x0, + NVME_SCT_CMD_SPECIFIC = 0x1, + NVME_SCT_MEDIA = 0x2, +}; + +enum { + /* + * Generic Command Status: + */ + NVME_SC_SUCCESS = 0x0, + NVME_SC_INVALID_OPCODE = 0x1, + NVME_SC_INVALID_FIELD = 0x2, + NVME_SC_CMDID_CONFLICT = 0x3, + NVME_SC_DATA_XFER_ERROR = 0x4, + NVME_SC_POWER_LOSS = 0x5, + NVME_SC_INTERNAL = 0x6, + NVME_SC_ABORT_REQ = 0x7, + NVME_SC_ABORT_QUEUE = 0x8, + NVME_SC_FUSED_FAIL = 0x9, + NVME_SC_FUSED_MISSING = 0xa, + NVME_SC_INVALID_NS = 0xb, + NVME_SC_CMD_SEQ_ERROR = 0xc, + NVME_SC_SGL_INVALID_LAST = 0xd, + NVME_SC_SGL_INVALID_COUNT = 0xe, + NVME_SC_SGL_INVALID_DATA = 0xf, + NVME_SC_SGL_INVALID_METADATA = 0x10, + NVME_SC_SGL_INVALID_TYPE = 0x11, + + NVME_SC_SGL_INVALID_OFFSET = 0x16, + NVME_SC_SGL_INVALID_SUBTYPE = 0x17, + + NVME_SC_SANITIZE_FAILED = 0x1C, + NVME_SC_SANITIZE_IN_PROGRESS = 0x1D, + + NVME_SC_NS_WRITE_PROTECTED = 0x20, + NVME_SC_CMD_INTERRUPTED = 0x21, + + NVME_SC_LBA_RANGE = 0x80, + NVME_SC_CAP_EXCEEDED = 0x81, + NVME_SC_NS_NOT_READY = 0x82, + NVME_SC_RESERVATION_CONFLICT = 0x83, + + /* + * Command Specific Status: + */ + NVME_SC_CQ_INVALID = 0x100, + NVME_SC_QID_INVALID = 0x101, + NVME_SC_QUEUE_SIZE = 0x102, + NVME_SC_ABORT_LIMIT = 0x103, + NVME_SC_ABORT_MISSING = 0x104, + NVME_SC_ASYNC_LIMIT = 0x105, + NVME_SC_FIRMWARE_SLOT = 0x106, + NVME_SC_FIRMWARE_IMAGE = 0x107, + NVME_SC_INVALID_VECTOR = 0x108, + NVME_SC_INVALID_LOG_PAGE = 0x109, + NVME_SC_INVALID_FORMAT = 0x10a, + NVME_SC_FW_NEEDS_CONV_RESET = 0x10b, + NVME_SC_INVALID_QUEUE = 0x10c, + NVME_SC_FEATURE_NOT_SAVEABLE = 0x10d, + NVME_SC_FEATURE_NOT_CHANGEABLE = 0x10e, + NVME_SC_FEATURE_NOT_PER_NS = 0x10f, + NVME_SC_FW_NEEDS_SUBSYS_RESET = 0x110, + NVME_SC_FW_NEEDS_RESET = 0x111, + NVME_SC_FW_NEEDS_MAX_TIME = 0x112, + NVME_SC_FW_ACTIVATE_PROHIBITED = 0x113, + NVME_SC_OVERLAPPING_RANGE = 0x114, + NVME_SC_NS_INSUFFICIENT_CAP = 0x115, + NVME_SC_NS_ID_UNAVAILABLE = 0x116, + NVME_SC_NS_ALREADY_ATTACHED = 0x118, + NVME_SC_NS_IS_PRIVATE = 0x119, + NVME_SC_NS_NOT_ATTACHED = 0x11a, + NVME_SC_THIN_PROV_NOT_SUPP = 0x11b, + NVME_SC_CTRL_LIST_INVALID = 0x11c, + NVME_SC_BP_WRITE_PROHIBITED = 0x11e, + NVME_SC_PMR_SAN_PROHIBITED = 0x123, + + /* + * I/O Command Set Specific - NVM commands: + */ + NVME_SC_BAD_ATTRIBUTES = 0x180, + NVME_SC_INVALID_PI = 0x181, + NVME_SC_READ_ONLY = 0x182, + NVME_SC_ONCS_NOT_SUPPORTED = 0x183, + + /* + * I/O Command Set Specific - Fabrics commands: + */ + NVME_SC_CONNECT_FORMAT = 0x180, + NVME_SC_CONNECT_CTRL_BUSY = 0x181, + NVME_SC_CONNECT_INVALID_PARAM = 0x182, + NVME_SC_CONNECT_RESTART_DISC = 0x183, + NVME_SC_CONNECT_INVALID_HOST = 0x184, + + NVME_SC_DISCOVERY_RESTART = 0x190, + NVME_SC_AUTH_REQUIRED = 0x191, + + /* + * Media and Data Integrity Errors: + */ + NVME_SC_WRITE_FAULT = 0x280, + NVME_SC_READ_ERROR = 0x281, + NVME_SC_GUARD_CHECK = 0x282, + NVME_SC_APPTAG_CHECK = 0x283, + NVME_SC_REFTAG_CHECK = 0x284, + NVME_SC_COMPARE_FAILED = 0x285, + NVME_SC_ACCESS_DENIED = 0x286, + NVME_SC_UNWRITTEN_BLOCK = 0x287, + + /* + * Path-related Errors: + */ + NVME_SC_ANA_PERSISTENT_LOSS = 0x301, + NVME_SC_ANA_INACCESSIBLE = 0x302, + NVME_SC_ANA_TRANSITION = 0x303, + + NVME_SC_CRD = 0x1800, + NVME_SC_DNR = 0x4000, +}; + +struct nvme_completion { + /* + * Used by Admin and Fabrics commands to return data: + */ + union nvme_result { + __le16 u16; + __le32 u32; + __le64 u64; + } result; + __le16 sq_head; /* how much of this queue may be reclaimed */ + __le16 sq_id; /* submission queue that generated this entry */ + __u16 command_id; /* of the command which completed */ + __le16 status; /* did the command fail, and if so, why? */ +}; + +#define NVME_VS(major, minor, tertiary) \ + (((major) << 16) | ((minor) << 8) | (tertiary)) + +#define NVME_MAJOR(ver) ((ver) >> 16) +#define NVME_MINOR(ver) (((ver) >> 8) & 0xff) +#define NVME_TERTIARY(ver) ((ver) & 0xff) + +#endif /* _LINUX_NVME_H */ diff --git a/libmultipath/nvme/linux/nvme_ioctl.h b/libmultipath/nvme/linux/nvme_ioctl.h new file mode 100644 index 0000000..d25a532 --- /dev/null +++ b/libmultipath/nvme/linux/nvme_ioctl.h @@ -0,0 +1,67 @@ +/* + * Definitions for the NVM Express ioctl interface + * Copyright (c) 2011-2014, Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + */ + +#ifndef _UAPI_LINUX_NVME_IOCTL_H +#define _UAPI_LINUX_NVME_IOCTL_H + +#include +#include + +struct nvme_user_io { + __u8 opcode; + __u8 flags; + __u16 control; + __u16 nblocks; + __u16 rsvd; + __u64 metadata; + __u64 addr; + __u64 slba; + __u32 dsmgmt; + __u32 reftag; + __u16 apptag; + __u16 appmask; +}; + +struct nvme_passthru_cmd { + __u8 opcode; + __u8 flags; + __u16 rsvd1; + __u32 nsid; + __u32 cdw2; + __u32 cdw3; + __u64 metadata; + __u64 addr; + __u32 metadata_len; + __u32 data_len; + __u32 cdw10; + __u32 cdw11; + __u32 cdw12; + __u32 cdw13; + __u32 cdw14; + __u32 cdw15; + __u32 timeout_ms; + __u32 result; +}; + +#define nvme_admin_cmd nvme_passthru_cmd + +#define NVME_IOCTL_ID _IO('N', 0x40) +#define NVME_IOCTL_ADMIN_CMD _IOWR('N', 0x41, struct nvme_admin_cmd) +#define NVME_IOCTL_SUBMIT_IO _IOW('N', 0x42, struct nvme_user_io) +#define NVME_IOCTL_IO_CMD _IOWR('N', 0x43, struct nvme_passthru_cmd) +#define NVME_IOCTL_RESET _IO('N', 0x44) +#define NVME_IOCTL_SUBSYS_RESET _IO('N', 0x45) +#define NVME_IOCTL_RESCAN _IO('N', 0x46) + +#endif /* _UAPI_LINUX_NVME_IOCTL_H */ diff --git a/libmultipath/nvme/nvme-ioctl.c b/libmultipath/nvme/nvme-ioctl.c new file mode 100644 index 0000000..6959976 --- /dev/null +++ b/libmultipath/nvme/nvme-ioctl.c @@ -0,0 +1,928 @@ +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "nvme-ioctl.h" + +static int nvme_verify_chr(int fd) +{ + static struct stat nvme_stat; + int err = fstat(fd, &nvme_stat); + + if (err < 0) { + perror("fstat"); + return errno; + } + if (!S_ISCHR(nvme_stat.st_mode)) { + fprintf(stderr, + "Error: requesting reset on non-controller handle\n"); + return ENOTBLK; + } + return 0; +} + +int nvme_subsystem_reset(int fd) +{ + int ret; + + ret = nvme_verify_chr(fd); + if (ret) + return ret; + return ioctl(fd, NVME_IOCTL_SUBSYS_RESET); +} + +int nvme_reset_controller(int fd) +{ + int ret; + + ret = nvme_verify_chr(fd); + if (ret) + return ret; + return ioctl(fd, NVME_IOCTL_RESET); +} + +int nvme_ns_rescan(int fd) +{ + int ret; + + ret = nvme_verify_chr(fd); + if (ret) + return ret; + return ioctl(fd, NVME_IOCTL_RESCAN); +} + +int nvme_get_nsid(int fd) +{ + static struct stat nvme_stat; + int err = fstat(fd, &nvme_stat); + + if (err < 0) + return -errno; + + if (!S_ISBLK(nvme_stat.st_mode)) { + fprintf(stderr, + "Error: requesting namespace-id from non-block device\n"); + errno = ENOTBLK; + return -errno; + } + return ioctl(fd, NVME_IOCTL_ID); +} + +int nvme_submit_passthru(int fd, unsigned long ioctl_cmd, + struct nvme_passthru_cmd *cmd) +{ + return ioctl(fd, ioctl_cmd, cmd); +} + +static int nvme_submit_admin_passthru(int fd, struct nvme_passthru_cmd *cmd) +{ + return ioctl(fd, NVME_IOCTL_ADMIN_CMD, cmd); +} + +static int nvme_submit_io_passthru(int fd, struct nvme_passthru_cmd *cmd) +{ + return ioctl(fd, NVME_IOCTL_IO_CMD, cmd); +} + +int nvme_passthru(int fd, unsigned long ioctl_cmd, __u8 opcode, + __u8 flags, __u16 rsvd, + __u32 nsid, __u32 cdw2, __u32 cdw3, __u32 cdw10, __u32 cdw11, + __u32 cdw12, __u32 cdw13, __u32 cdw14, __u32 cdw15, + __u32 data_len, void *data, __u32 metadata_len, + void *metadata, __u32 timeout_ms, __u32 *result) +{ + struct nvme_passthru_cmd cmd = { + .opcode = opcode, + .flags = flags, + .rsvd1 = rsvd, + .nsid = nsid, + .cdw2 = cdw2, + .cdw3 = cdw3, + .metadata = (__u64)(uintptr_t) metadata, + .addr = (__u64)(uintptr_t) data, + .metadata_len = metadata_len, + .data_len = data_len, + .cdw10 = cdw10, + .cdw11 = cdw11, + .cdw12 = cdw12, + .cdw13 = cdw13, + .cdw14 = cdw14, + .cdw15 = cdw15, + .timeout_ms = timeout_ms, + .result = 0, + }; + int err; + + err = nvme_submit_passthru(fd, ioctl_cmd, &cmd); + if (!err && result) + *result = cmd.result; + return err; +} + +int nvme_io(int fd, __u8 opcode, __u64 slba, __u16 nblocks, __u16 control, + __u32 dsmgmt, __u32 reftag, __u16 apptag, __u16 appmask, void *data, + void *metadata) +{ + struct nvme_user_io io = { + .opcode = opcode, + .flags = 0, + .control = control, + .nblocks = nblocks, + .rsvd = 0, + .metadata = (__u64)(uintptr_t) metadata, + .addr = (__u64)(uintptr_t) data, + .slba = slba, + .dsmgmt = dsmgmt, + .reftag = reftag, + .appmask = appmask, + .apptag = apptag, + }; + return ioctl(fd, NVME_IOCTL_SUBMIT_IO, &io); +} + +int nvme_read(int fd, __u64 slba, __u16 nblocks, __u16 control, __u32 dsmgmt, + __u32 reftag, __u16 apptag, __u16 appmask, void *data, + void *metadata) +{ + return nvme_io(fd, nvme_cmd_read, slba, nblocks, control, dsmgmt, + reftag, apptag, appmask, data, metadata); +} + +int nvme_write(int fd, __u64 slba, __u16 nblocks, __u16 control, __u32 dsmgmt, + __u32 reftag, __u16 apptag, __u16 appmask, void *data, + void *metadata) +{ + return nvme_io(fd, nvme_cmd_write, slba, nblocks, control, dsmgmt, + reftag, apptag, appmask, data, metadata); +} + +int nvme_compare(int fd, __u64 slba, __u16 nblocks, __u16 control, __u32 dsmgmt, + __u32 reftag, __u16 apptag, __u16 appmask, void *data, + void *metadata) +{ + return nvme_io(fd, nvme_cmd_compare, slba, nblocks, control, dsmgmt, + reftag, apptag, appmask, data, metadata); +} + +int nvme_verify(int fd, __u32 nsid, __u64 slba, __u16 nblocks, + __u16 control, __u32 reftag, __u16 apptag, __u16 appmask) +{ + struct nvme_passthru_cmd cmd = { + .opcode = nvme_cmd_verify, + .nsid = nsid, + .cdw10 = slba & 0xffffffff, + .cdw11 = slba >> 32, + .cdw12 = nblocks | (control << 16), + .cdw14 = reftag, + .cdw15 = apptag | (appmask << 16), + }; + + return nvme_submit_io_passthru(fd, &cmd); +} + +int nvme_passthru_io(int fd, __u8 opcode, __u8 flags, __u16 rsvd, + __u32 nsid, __u32 cdw2, __u32 cdw3, __u32 cdw10, + __u32 cdw11, __u32 cdw12, __u32 cdw13, __u32 cdw14, + __u32 cdw15, __u32 data_len, void *data, + __u32 metadata_len, void *metadata, __u32 timeout_ms) +{ + return nvme_passthru(fd, NVME_IOCTL_IO_CMD, opcode, flags, rsvd, nsid, + cdw2, cdw3, cdw10, cdw11, cdw12, cdw13, cdw14, + cdw15, data_len, data, metadata_len, metadata, + timeout_ms, NULL); +} + +int nvme_write_zeros(int fd, __u32 nsid, __u64 slba, __u16 nlb, + __u16 control, __u32 reftag, __u16 apptag, __u16 appmask) +{ + struct nvme_passthru_cmd cmd = { + .opcode = nvme_cmd_write_zeroes, + .nsid = nsid, + .cdw10 = slba & 0xffffffff, + .cdw11 = slba >> 32, + .cdw12 = nlb | (control << 16), + .cdw14 = reftag, + .cdw15 = apptag | (appmask << 16), + }; + + return nvme_submit_io_passthru(fd, &cmd); +} + +int nvme_write_uncorrectable(int fd, __u32 nsid, __u64 slba, __u16 nlb) +{ + struct nvme_passthru_cmd cmd = { + .opcode = nvme_cmd_write_uncor, + .nsid = nsid, + .cdw10 = slba & 0xffffffff, + .cdw11 = slba >> 32, + .cdw12 = nlb, + }; + + return nvme_submit_io_passthru(fd, &cmd); +} + +int nvme_flush(int fd, __u32 nsid) +{ + struct nvme_passthru_cmd cmd = { + .opcode = nvme_cmd_flush, + .nsid = nsid, + }; + + return nvme_submit_io_passthru(fd, &cmd); +} + +int nvme_dsm(int fd, __u32 nsid, __u32 cdw11, struct nvme_dsm_range *dsm, + __u16 nr_ranges) +{ + struct nvme_passthru_cmd cmd = { + .opcode = nvme_cmd_dsm, + .nsid = nsid, + .addr = (__u64)(uintptr_t) dsm, + .data_len = nr_ranges * sizeof(*dsm), + .cdw10 = nr_ranges - 1, + .cdw11 = cdw11, + }; + + return nvme_submit_io_passthru(fd, &cmd); +} + +struct nvme_dsm_range *nvme_setup_dsm_range(__u32 *ctx_attrs, __u32 *llbas, + __u64 *slbas, __u16 nr_ranges) +{ + int i; + struct nvme_dsm_range *dsm = malloc(nr_ranges * sizeof(*dsm)); + + if (!dsm) { + fprintf(stderr, "malloc: %s\n", strerror(errno)); + return NULL; + } + for (i = 0; i < nr_ranges; i++) { + dsm[i].cattr = cpu_to_le32(ctx_attrs[i]); + dsm[i].nlb = cpu_to_le32(llbas[i]); + dsm[i].slba = cpu_to_le64(slbas[i]); + } + return dsm; +} + +int nvme_resv_acquire(int fd, __u32 nsid, __u8 rtype, __u8 racqa, + bool iekey, __u64 crkey, __u64 nrkey) +{ + __le64 payload[2] = { cpu_to_le64(crkey), cpu_to_le64(nrkey) }; + __u32 cdw10 = (racqa & 0x7) | (iekey ? 1 << 3 : 0) | rtype << 8; + struct nvme_passthru_cmd cmd = { + .opcode = nvme_cmd_resv_acquire, + .nsid = nsid, + .cdw10 = cdw10, + .addr = (__u64)(uintptr_t) (payload), + .data_len = sizeof(payload), + }; + + return nvme_submit_io_passthru(fd, &cmd); +} + +int nvme_resv_register(int fd, __u32 nsid, __u8 rrega, __u8 cptpl, + bool iekey, __u64 crkey, __u64 nrkey) +{ + __le64 payload[2] = { cpu_to_le64(crkey), cpu_to_le64(nrkey) }; + __u32 cdw10 = (rrega & 0x7) | (iekey ? 1 << 3 : 0) | cptpl << 30; + + struct nvme_passthru_cmd cmd = { + .opcode = nvme_cmd_resv_register, + .nsid = nsid, + .cdw10 = cdw10, + .addr = (__u64)(uintptr_t) (payload), + .data_len = sizeof(payload), + }; + + return nvme_submit_io_passthru(fd, &cmd); +} + +int nvme_resv_release(int fd, __u32 nsid, __u8 rtype, __u8 rrela, + bool iekey, __u64 crkey) +{ + __le64 payload[1] = { cpu_to_le64(crkey) }; + __u32 cdw10 = (rrela & 0x7) | (iekey ? 1 << 3 : 0) | rtype << 8; + + struct nvme_passthru_cmd cmd = { + .opcode = nvme_cmd_resv_release, + .nsid = nsid, + .cdw10 = cdw10, + .addr = (__u64)(uintptr_t) (payload), + .data_len = sizeof(payload), + }; + + return nvme_submit_io_passthru(fd, &cmd); +} + +int nvme_resv_report(int fd, __u32 nsid, __u32 numd, __u32 cdw11, void *data) +{ + struct nvme_passthru_cmd cmd = { + .opcode = nvme_cmd_resv_report, + .nsid = nsid, + .cdw10 = numd, + .cdw11 = cdw11, + .addr = (__u64)(uintptr_t) data, + .data_len = (numd + 1) << 2, + }; + + return nvme_submit_io_passthru(fd, &cmd); +} + +int nvme_identify13(int fd, __u32 nsid, __u32 cdw10, __u32 cdw11, void *data) +{ + struct nvme_admin_cmd cmd = { + .opcode = nvme_admin_identify, + .nsid = nsid, + .addr = (__u64)(uintptr_t) data, + .data_len = NVME_IDENTIFY_DATA_SIZE, + .cdw10 = cdw10, + .cdw11 = cdw11, + }; + + return nvme_submit_admin_passthru(fd, &cmd); +} + +int nvme_identify(int fd, __u32 nsid, __u32 cdw10, void *data) +{ + return nvme_identify13(fd, nsid, cdw10, 0, data); +} + +int nvme_identify_ctrl(int fd, void *data) +{ + return nvme_identify(fd, 0, 1, data); +} + +int nvme_identify_ns(int fd, __u32 nsid, bool present, void *data) +{ + int cns = present ? NVME_ID_CNS_NS_PRESENT : NVME_ID_CNS_NS; + + return nvme_identify(fd, nsid, cns, data); +} + +int nvme_identify_ns_list(int fd, __u32 nsid, bool all, void *data) +{ + int cns = all ? NVME_ID_CNS_NS_PRESENT_LIST : NVME_ID_CNS_NS_ACTIVE_LIST; + + return nvme_identify(fd, nsid, cns, data); +} + +int nvme_identify_ctrl_list(int fd, __u32 nsid, __u16 cntid, void *data) +{ + int cns = nsid ? NVME_ID_CNS_CTRL_NS_LIST : NVME_ID_CNS_CTRL_LIST; + + return nvme_identify(fd, nsid, (cntid << 16) | cns, data); +} + +int nvme_identify_secondary_ctrl_list(int fd, __u32 nsid, __u16 cntid, void *data) +{ + return nvme_identify(fd, nsid, (cntid << 16) | NVME_ID_CNS_SCNDRY_CTRL_LIST, data); +} + +int nvme_identify_ns_descs(int fd, __u32 nsid, void *data) +{ + + return nvme_identify(fd, nsid, NVME_ID_CNS_NS_DESC_LIST, data); +} + +int nvme_identify_nvmset(int fd, __u16 nvmset_id, void *data) +{ + return nvme_identify13(fd, 0, NVME_ID_CNS_NVMSET_LIST, nvmset_id, data); +} + +int nvme_identify_ns_granularity(int fd, void *data) +{ + return nvme_identify13(fd, 0, NVME_ID_CNS_NS_GRANULARITY, 0, data); +} + +int nvme_identify_uuid(int fd, void *data) +{ + return nvme_identify(fd, 0, NVME_ID_CNS_UUID_LIST, data); +} + +int nvme_get_log14(int fd, __u32 nsid, __u8 log_id, __u8 lsp, __u64 lpo, + __u16 lsi, bool rae, __u8 uuid_ix, __u32 data_len, void *data) +{ + struct nvme_admin_cmd cmd = { + .opcode = nvme_admin_get_log_page, + .nsid = nsid, + .addr = (__u64)(uintptr_t) data, + .data_len = data_len, + }; + __u32 numd = (data_len >> 2) - 1; + __u16 numdu = numd >> 16, numdl = numd & 0xffff; + + cmd.cdw10 = log_id | (numdl << 16) | (rae ? 1 << 15 : 0); + if (lsp) + cmd.cdw10 |= lsp << 8; + + cmd.cdw11 = numdu | (lsi << 16); + cmd.cdw12 = lpo; + cmd.cdw13 = (lpo >> 32); + cmd.cdw14 = uuid_ix; + + return nvme_submit_admin_passthru(fd, &cmd); + +} + +int nvme_get_log(int fd, __u32 nsid, __u8 log_id, bool rae, + __u32 data_len, void *data) +{ + void *ptr = data; + __u32 offset = 0, xfer_len = data_len; + int ret; + + /* + * 4k is the smallest possible transfer unit, so by + * restricting ourselves for 4k transfers we avoid having + * to check the MDTS value of the controller. + */ + do { + xfer_len = data_len - offset; + if (xfer_len > 4096) + xfer_len = 4096; + + ret = nvme_get_log13(fd, nsid, log_id, NVME_NO_LOG_LSP, + offset, 0, rae, xfer_len, ptr); + if (ret) + return ret; + + offset += xfer_len; + ptr += xfer_len; + } while (offset < data_len); + + return 0; +} + +int nvme_get_telemetry_log(int fd, void *lp, int generate_report, + int ctrl_init, size_t log_page_size, __u64 offset) +{ + if (ctrl_init) + return nvme_get_log13(fd, NVME_NSID_ALL, NVME_LOG_TELEMETRY_CTRL, + NVME_NO_LOG_LSP, offset, + 0, 1, log_page_size, lp); + if (generate_report) + return nvme_get_log13(fd, NVME_NSID_ALL, NVME_LOG_TELEMETRY_HOST, + NVME_TELEM_LSP_CREATE, offset, + 0, 1, log_page_size, lp); + else + return nvme_get_log13(fd, NVME_NSID_ALL, NVME_LOG_TELEMETRY_HOST, + NVME_NO_LOG_LSP, offset, + 0, 1, log_page_size, lp); +} + +int nvme_fw_log(int fd, struct nvme_firmware_log_page *fw_log) +{ + return nvme_get_log(fd, NVME_NSID_ALL, NVME_LOG_FW_SLOT, true, + sizeof(*fw_log), fw_log); +} + +int nvme_changed_ns_list_log(int fd, struct nvme_changed_ns_list_log *changed_ns_list_log) +{ + return nvme_get_log(fd, 0, NVME_LOG_CHANGED_NS, true, + sizeof(changed_ns_list_log->log), + changed_ns_list_log->log); +} + +int nvme_error_log(int fd, int entries, struct nvme_error_log_page *err_log) +{ + return nvme_get_log(fd, NVME_NSID_ALL, NVME_LOG_ERROR, false, + entries * sizeof(*err_log), err_log); +} + +int nvme_endurance_log(int fd, __u16 group_id, struct nvme_endurance_group_log *endurance_log) +{ + return nvme_get_log13(fd, 0, NVME_LOG_ENDURANCE_GROUP, 0, 0, group_id, 0, + sizeof(*endurance_log), endurance_log); +} + +int nvme_smart_log(int fd, __u32 nsid, struct nvme_smart_log *smart_log) +{ + return nvme_get_log(fd, nsid, NVME_LOG_SMART, false, + sizeof(*smart_log), smart_log); +} + +int nvme_ana_log(int fd, void *ana_log, size_t ana_log_len, int rgo) +{ + __u64 lpo = 0; + + return nvme_get_log13(fd, NVME_NSID_ALL, NVME_LOG_ANA, rgo, lpo, 0, + true, ana_log_len, ana_log); +} + +int nvme_self_test_log(int fd, struct nvme_self_test_log *self_test_log) +{ + return nvme_get_log(fd, NVME_NSID_ALL, NVME_LOG_DEVICE_SELF_TEST, false, + sizeof(*self_test_log), self_test_log); +} + +int nvme_effects_log(int fd, struct nvme_effects_log_page *effects_log) +{ + return nvme_get_log(fd, NVME_NSID_ALL, NVME_LOG_CMD_EFFECTS, false, + sizeof(*effects_log), effects_log); +} + +int nvme_discovery_log(int fd, struct nvmf_disc_rsp_page_hdr *log, __u32 size) +{ + return nvme_get_log(fd, 0, NVME_LOG_DISC, false, size, log); +} + +int nvme_sanitize_log(int fd, struct nvme_sanitize_log_page *sanitize_log) +{ + return nvme_get_log(fd, 0, NVME_LOG_SANITIZE, false, + sizeof(*sanitize_log), sanitize_log); +} + +int nvme_feature(int fd, __u8 opcode, __u32 nsid, __u32 cdw10, __u32 cdw11, + __u32 cdw12, __u32 data_len, void *data, __u32 *result) +{ + struct nvme_admin_cmd cmd = { + .opcode = opcode, + .nsid = nsid, + .cdw10 = cdw10, + .cdw11 = cdw11, + .cdw12 = cdw12, + .addr = (__u64)(uintptr_t) data, + .data_len = data_len, + }; + int err; + + err = nvme_submit_admin_passthru(fd, &cmd); + if (!err && result) + *result = cmd.result; + return err; +} + +int nvme_set_feature(int fd, __u32 nsid, __u8 fid, __u32 value, __u32 cdw12, + bool save, __u32 data_len, void *data, __u32 *result) +{ + __u32 cdw10 = fid | (save ? 1 << 31 : 0); + + return nvme_feature(fd, nvme_admin_set_features, nsid, cdw10, value, + cdw12, data_len, data, result); +} + + +/* + * Perform the opposite operation of the byte-swapping code at the start of the + * kernel function nvme_user_cmd(). + */ +static void nvme_to_passthru_cmd(struct nvme_passthru_cmd *pcmd, + const struct nvme_command *ncmd) +{ + assert(sizeof(*ncmd) < sizeof(*pcmd)); + memset(pcmd, 0, sizeof(*pcmd)); + pcmd->opcode = ncmd->common.opcode; + pcmd->flags = ncmd->common.flags; + pcmd->rsvd1 = ncmd->common.command_id; + pcmd->nsid = le32_to_cpu(ncmd->common.nsid); + pcmd->cdw2 = le32_to_cpu(ncmd->common.cdw2[0]); + pcmd->cdw3 = le32_to_cpu(ncmd->common.cdw2[1]); + /* Skip metadata and addr */ + pcmd->cdw10 = le32_to_cpu(ncmd->common.cdw10[0]); + pcmd->cdw11 = le32_to_cpu(ncmd->common.cdw10[1]); + pcmd->cdw12 = le32_to_cpu(ncmd->common.cdw10[2]); + pcmd->cdw13 = le32_to_cpu(ncmd->common.cdw10[3]); + pcmd->cdw14 = le32_to_cpu(ncmd->common.cdw10[4]); + pcmd->cdw15 = le32_to_cpu(ncmd->common.cdw10[5]); +} + +int nvme_get_property(int fd, int offset, uint64_t *value) +{ + struct nvme_passthru_cmd pcmd; + struct nvmf_property_get_command pg = { + .opcode = nvme_fabrics_command, + .fctype = nvme_fabrics_type_property_get, + .offset = cpu_to_le32(offset), + .attrib = is_64bit_reg(offset), + }; + struct nvme_command gcmd; + int err; + + gcmd.prop_get = pg; + nvme_to_passthru_cmd(&pcmd, &gcmd); + err = nvme_submit_admin_passthru(fd, &pcmd); + if (!err) { + /* + * nvme_submit_admin_passthru() stores the lower 32 bits + * of the property value in pcmd.result using CPU endianness. + */ + *value = pcmd.result; + } + return err; +} + +int nvme_get_properties(int fd, void **pbar) +{ + int offset; + uint64_t value; + int err; + int size = getpagesize(); + + *pbar = malloc(size); + if (!*pbar) { + fprintf(stderr, "malloc: %s\n", strerror(errno)); + return -ENOMEM; + } + + memset(*pbar, 0xff, size); + for (offset = NVME_REG_CAP; offset <= NVME_REG_CMBSZ;) { + err = nvme_get_property(fd, offset, &value); + if (err > 0 && (err & 0xff) == NVME_SC_INVALID_FIELD) { + err = 0; + value = -1; + } else if (err) { + free(*pbar); + break; + } + if (is_64bit_reg(offset)) { + *(uint64_t *)(*pbar + offset) = value; + offset += 8; + } else { + *(uint32_t *)(*pbar + offset) = value; + offset += 4; + } + } + + return err; +} + +int nvme_set_property(int fd, int offset, uint64_t value) +{ + struct nvmf_property_set_command ps = { + .opcode = nvme_fabrics_command, + .fctype = nvme_fabrics_type_property_set, + .offset = cpu_to_le32(offset), + .value = cpu_to_le64(value), + .attrib = is_64bit_reg(offset), + }; + struct nvme_command scmd; + struct nvme_passthru_cmd pcmd; + + scmd.prop_set = ps; + nvme_to_passthru_cmd(&pcmd, &scmd); + return nvme_submit_admin_passthru(fd, &pcmd); +} + +int nvme_get_feature(int fd, __u32 nsid, __u8 fid, __u8 sel, __u32 cdw11, + __u32 data_len, void *data, __u32 *result) +{ + __u32 cdw10 = fid | sel << 8; + + return nvme_feature(fd, nvme_admin_get_features, nsid, cdw10, cdw11, + 0, data_len, data, result); +} + +int nvme_format(int fd, __u32 nsid, __u8 lbaf, __u8 ses, __u8 pi, + __u8 pil, __u8 ms, __u32 timeout) +{ + __u32 cdw10 = lbaf | ms << 4 | pi << 5 | pil << 8 | ses << 9; + struct nvme_admin_cmd cmd = { + .opcode = nvme_admin_format_nvm, + .nsid = nsid, + .cdw10 = cdw10, + .timeout_ms = timeout, + }; + + return nvme_submit_admin_passthru(fd, &cmd); +} + +int nvme_ns_create(int fd, __u64 nsze, __u64 ncap, __u8 flbas, + __u8 dps, __u8 nmic, __u32 timeout, __u32 *result) +{ + struct nvme_id_ns ns = { + .nsze = cpu_to_le64(nsze), + .ncap = cpu_to_le64(ncap), + .flbas = flbas, + .dps = dps, + .nmic = nmic, + }; + struct nvme_admin_cmd cmd = { + .opcode = nvme_admin_ns_mgmt, + .addr = (__u64)(uintptr_t) ((void *)&ns), + .cdw10 = 0, + .data_len = 0x1000, + .timeout_ms = timeout, + }; + int err; + + err = nvme_submit_admin_passthru(fd, &cmd); + if (!err && result) + *result = cmd.result; + return err; +} + +int nvme_ns_delete(int fd, __u32 nsid, __u32 timeout) +{ + struct nvme_admin_cmd cmd = { + .opcode = nvme_admin_ns_mgmt, + .nsid = nsid, + .cdw10 = 1, + .timeout_ms = timeout, + }; + + return nvme_submit_admin_passthru(fd, &cmd); +} + +int nvme_ns_attachment(int fd, __u32 nsid, __u16 num_ctrls, __u16 *ctrlist, + bool attach) +{ + int i; + __u8 buf[0x1000]; + struct nvme_controller_list *cntlist = + (struct nvme_controller_list *)buf; + struct nvme_admin_cmd cmd = { + .opcode = nvme_admin_ns_attach, + .nsid = nsid, + .addr = (__u64)(uintptr_t) cntlist, + .cdw10 = attach ? 0 : 1, + .data_len = 0x1000, + }; + + memset(buf, 0, sizeof(buf)); + cntlist->num = cpu_to_le16(num_ctrls); + for (i = 0; i < num_ctrls; i++) + cntlist->identifier[i] = cpu_to_le16(ctrlist[i]); + + return nvme_submit_admin_passthru(fd, &cmd); +} + +int nvme_ns_attach_ctrls(int fd, __u32 nsid, __u16 num_ctrls, __u16 *ctrlist) +{ + return nvme_ns_attachment(fd, nsid, num_ctrls, ctrlist, true); +} + +int nvme_ns_detach_ctrls(int fd, __u32 nsid, __u16 num_ctrls, __u16 *ctrlist) +{ + return nvme_ns_attachment(fd, nsid, num_ctrls, ctrlist, false); +} + +int nvme_fw_download(int fd, __u32 offset, __u32 data_len, void *data) +{ + struct nvme_admin_cmd cmd = { + .opcode = nvme_admin_download_fw, + .addr = (__u64)(uintptr_t) data, + .data_len = data_len, + .cdw10 = (data_len >> 2) - 1, + .cdw11 = offset >> 2, + }; + + return nvme_submit_admin_passthru(fd, &cmd); +} + +int nvme_fw_commit(int fd, __u8 slot, __u8 action, __u8 bpid) +{ + struct nvme_admin_cmd cmd = { + .opcode = nvme_admin_activate_fw, + .cdw10 = (bpid << 31) | (action << 3) | slot, + }; + + return nvme_submit_admin_passthru(fd, &cmd); +} + +int nvme_sec_send(int fd, __u32 nsid, __u8 nssf, __u16 spsp, + __u8 secp, __u32 tl, __u32 data_len, void *data, __u32 *result) +{ + struct nvme_admin_cmd cmd = { + .opcode = nvme_admin_security_send, + .addr = (__u64)(uintptr_t) data, + .data_len = data_len, + .nsid = nsid, + .cdw10 = secp << 24 | spsp << 8 | nssf, + .cdw11 = tl, + }; + int err; + + err = nvme_submit_admin_passthru(fd, &cmd); + if (!err && result) + *result = cmd.result; + return err; +} + +int nvme_sec_recv(int fd, __u32 nsid, __u8 nssf, __u16 spsp, + __u8 secp, __u32 al, __u32 data_len, void *data, __u32 *result) +{ + struct nvme_admin_cmd cmd = { + .opcode = nvme_admin_security_recv, + .nsid = nsid, + .cdw10 = secp << 24 | spsp << 8 | nssf, + .cdw11 = al, + .addr = (__u64)(uintptr_t) data, + .data_len = data_len, + }; + int err; + + err = nvme_submit_admin_passthru(fd, &cmd); + if (!err && result) + *result = cmd.result; + return err; +} + +int nvme_get_lba_status(int fd, __u64 slba, __u32 mndw, __u8 atype, __u16 rl, + void *data) +{ + struct nvme_admin_cmd cmd = { + .opcode = nvme_admin_get_lba_status, + .addr = (__u64)(uintptr_t) data, + .cdw10 = slba & 0xffffffff, + .cdw11 = slba >> 32, + .cdw12 = mndw, + .cdw13 = (atype << 24) | rl, + }; + + return nvme_submit_admin_passthru(fd, &cmd); +} + +int nvme_dir_send(int fd, __u32 nsid, __u16 dspec, __u8 dtype, __u8 doper, + __u32 data_len, __u32 dw12, void *data, __u32 *result) +{ + struct nvme_admin_cmd cmd = { + .opcode = nvme_admin_directive_send, + .addr = (__u64)(uintptr_t) data, + .data_len = data_len, + .nsid = nsid, + .cdw10 = data_len? (data_len >> 2) - 1 : 0, + .cdw11 = dspec << 16 | dtype << 8 | doper, + .cdw12 = dw12, + }; + int err; + + err = nvme_submit_admin_passthru(fd, &cmd); + if (!err && result) + *result = cmd.result; + return err; +} + +int nvme_dir_recv(int fd, __u32 nsid, __u16 dspec, __u8 dtype, __u8 doper, + __u32 data_len, __u32 dw12, void *data, __u32 *result) +{ + struct nvme_admin_cmd cmd = { + .opcode = nvme_admin_directive_recv, + .addr = (__u64)(uintptr_t) data, + .data_len = data_len, + .nsid = nsid, + .cdw10 = data_len? (data_len >> 2) - 1 : 0, + .cdw11 = dspec << 16 | dtype << 8 | doper, + .cdw12 = dw12, + }; + int err; + + err = nvme_submit_admin_passthru(fd, &cmd); + if (!err && result) + *result = cmd.result; + return err; +} + +int nvme_sanitize(int fd, __u8 sanact, __u8 ause, __u8 owpass, __u8 oipbp, + __u8 no_dealloc, __u32 ovrpat) +{ + struct nvme_admin_cmd cmd = { + .opcode = nvme_admin_sanitize_nvm, + .cdw10 = no_dealloc << 9 | oipbp << 8 | + owpass << NVME_SANITIZE_OWPASS_SHIFT | + ause << 3 | sanact, + .cdw11 = ovrpat, + }; + + return nvme_submit_admin_passthru(fd, &cmd); +} + +int nvme_self_test_start(int fd, __u32 nsid, __u32 cdw10) +{ + struct nvme_admin_cmd cmd = { + .opcode = nvme_admin_dev_self_test, + .nsid = nsid, + .cdw10 = cdw10, + }; + + return nvme_submit_admin_passthru(fd, &cmd); +} + +int nvme_virtual_mgmt(int fd, __u32 cdw10, __u32 cdw11, __u32 *result) +{ + struct nvme_admin_cmd cmd = { + .opcode = nvme_admin_virtual_mgmt, + .cdw10 = cdw10, + .cdw11 = cdw11, + }; + int err; + + err = nvme_submit_admin_passthru(fd, &cmd); + if (!err && result) + *result = cmd.result; + + return err; +} diff --git a/libmultipath/nvme/nvme-ioctl.h b/libmultipath/nvme/nvme-ioctl.h new file mode 100644 index 0000000..565f764 --- /dev/null +++ b/libmultipath/nvme/nvme-ioctl.h @@ -0,0 +1,158 @@ +#ifndef _NVME_LIB_H +#define _NVME_LIB_H + +#include +#include +#include "linux/nvme_ioctl.h" +#include "nvme.h" + +#define NVME_IOCTL_TIMEOUT 120000 /* in milliseconds */ + +int nvme_get_nsid(int fd); + +/* Generic passthrough */ +int nvme_submit_passthru(int fd, unsigned long ioctl_cmd, + struct nvme_passthru_cmd *cmd); + +int nvme_passthru(int fd, unsigned long ioctl_cmd, __u8 opcode, __u8 flags, + __u16 rsvd, __u32 nsid, __u32 cdw2, __u32 cdw3, + __u32 cdw10, __u32 cdw11, __u32 cdw12, + __u32 cdw13, __u32 cdw14, __u32 cdw15, + __u32 data_len, void *data, __u32 metadata_len, + void *metadata, __u32 timeout_ms, __u32 *result); + +/* NVME_SUBMIT_IO */ +int nvme_io(int fd, __u8 opcode, __u64 slba, __u16 nblocks, __u16 control, + __u32 dsmgmt, __u32 reftag, __u16 apptag, + __u16 appmask, void *data, void *metadata); + +int nvme_read(int fd, __u64 slba, __u16 nblocks, __u16 control, + __u32 dsmgmt, __u32 reftag, __u16 apptag, + __u16 appmask, void *data, void *metadata); + +int nvme_write(int fd, __u64 slba, __u16 nblocks, __u16 control, + __u32 dsmgmt, __u32 reftag, __u16 apptag, + __u16 appmask, void *data, void *metadata); + +int nvme_compare(int fd, __u64 slba, __u16 nblocks, __u16 control, + __u32 dsmgmt, __u32 reftag, __u16 apptag, + __u16 appmask, void *data, void *metadata); + +int nvme_verify(int fd, __u32 nsid, __u64 slba, __u16 nblocks, + __u16 control, __u32 reftag, __u16 apptag, __u16 appmask); + +/* NVME_IO_CMD */ +int nvme_passthru_io(int fd, __u8 opcode, __u8 flags, __u16 rsvd, + __u32 nsid, __u32 cdw2, __u32 cdw3, + __u32 cdw10, __u32 cdw11, __u32 cdw12, + __u32 cdw13, __u32 cdw14, __u32 cdw15, + __u32 data_len, void *data, __u32 metadata_len, + void *metadata, __u32 timeout); + +int nvme_write_zeros(int fd, __u32 nsid, __u64 slba, __u16 nlb, + __u16 control, __u32 reftag, __u16 apptag, __u16 appmask); + +int nvme_write_uncorrectable(int fd, __u32 nsid, __u64 slba, __u16 nlb); + +int nvme_flush(int fd, __u32 nsid); + +int nvme_dsm(int fd, __u32 nsid, __u32 cdw11, struct nvme_dsm_range *dsm, + __u16 nr_ranges); +struct nvme_dsm_range *nvme_setup_dsm_range(__u32 *ctx_attrs, + __u32 *llbas, __u64 *slbas, + __u16 nr_ranges); + +int nvme_resv_acquire(int fd, __u32 nsid, __u8 rtype, __u8 racqa, + bool iekey, __u64 crkey, __u64 nrkey); +int nvme_resv_register(int fd, __u32 nsid, __u8 rrega, __u8 cptpl, + bool iekey, __u64 crkey, __u64 nrkey); +int nvme_resv_release(int fd, __u32 nsid, __u8 rtype, __u8 rrela, + bool iekey, __u64 crkey); +int nvme_resv_report(int fd, __u32 nsid, __u32 numd, __u32 cdw11, void *data); + +int nvme_identify13(int fd, __u32 nsid, __u32 cdw10, __u32 cdw11, void *data); +int nvme_identify(int fd, __u32 nsid, __u32 cdw10, void *data); +int nvme_identify_ctrl(int fd, void *data); +int nvme_identify_ns(int fd, __u32 nsid, bool present, void *data); +int nvme_identify_ns_list(int fd, __u32 nsid, bool all, void *data); +int nvme_identify_ctrl_list(int fd, __u32 nsid, __u16 cntid, void *data); +int nvme_identify_ns_descs(int fd, __u32 nsid, void *data); +int nvme_identify_nvmset(int fd, __u16 nvmset_id, void *data); +int nvme_identify_uuid(int fd, void *data); +int nvme_identify_secondary_ctrl_list(int fd, __u32 nsid, __u16 cntid, void *data); +int nvme_identify_ns_granularity(int fd, void *data); +int nvme_get_log(int fd, __u32 nsid, __u8 log_id, bool rae, + __u32 data_len, void *data); +int nvme_get_log14(int fd, __u32 nsid, __u8 log_id, __u8 lsp, __u64 lpo, + __u16 group_id, bool rae, __u8 uuid_ix, + __u32 data_len, void *data); + +static inline int nvme_get_log13(int fd, __u32 nsid, __u8 log_id, __u8 lsp, + __u64 lpo, __u16 lsi, bool rae, __u32 data_len, + void *data) +{ + return nvme_get_log14(fd, nsid, log_id, lsp, lpo, lsi, rae, 0, + data_len, data); +} + +int nvme_get_telemetry_log(int fd, void *lp, int generate_report, + int ctrl_gen, size_t log_page_size, __u64 offset); +int nvme_fw_log(int fd, struct nvme_firmware_log_page *fw_log); +int nvme_changed_ns_list_log(int fd, + struct nvme_changed_ns_list_log *changed_ns_list_log); +int nvme_error_log(int fd, int entries, struct nvme_error_log_page *err_log); +int nvme_smart_log(int fd, __u32 nsid, struct nvme_smart_log *smart_log); +int nvme_ana_log(int fd, void *ana_log, size_t ana_log_len, int rgo); +int nvme_effects_log(int fd, struct nvme_effects_log_page *effects_log); +int nvme_discovery_log(int fd, struct nvmf_disc_rsp_page_hdr *log, __u32 size); +int nvme_sanitize_log(int fd, struct nvme_sanitize_log_page *sanitize_log); +int nvme_endurance_log(int fd, __u16 group_id, + struct nvme_endurance_group_log *endurance_log); + +int nvme_feature(int fd, __u8 opcode, __u32 nsid, __u32 cdw10, + __u32 cdw11, __u32 cdw12, __u32 data_len, void *data, + __u32 *result); +int nvme_set_feature(int fd, __u32 nsid, __u8 fid, __u32 value, __u32 cdw12, + bool save, __u32 data_len, void *data, __u32 *result); +int nvme_get_feature(int fd, __u32 nsid, __u8 fid, __u8 sel, + __u32 cdw11, __u32 data_len, void *data, __u32 *result); + +int nvme_format(int fd, __u32 nsid, __u8 lbaf, __u8 ses, __u8 pi, + __u8 pil, __u8 ms, __u32 timeout); + +int nvme_ns_create(int fd, __u64 nsze, __u64 ncap, __u8 flbas, + __u8 dps, __u8 nmic, __u32 timeout, __u32 *result); +int nvme_ns_delete(int fd, __u32 nsid, __u32 timeout); + +int nvme_ns_attachment(int fd, __u32 nsid, __u16 num_ctrls, + __u16 *ctrlist, bool attach); +int nvme_ns_attach_ctrls(int fd, __u32 nsid, __u16 num_ctrls, __u16 *ctrlist); +int nvme_ns_detach_ctrls(int fd, __u32 nsid, __u16 num_ctrls, __u16 *ctrlist); + +int nvme_fw_download(int fd, __u32 offset, __u32 data_len, void *data); +int nvme_fw_commit(int fd, __u8 slot, __u8 action, __u8 bpid); + +int nvme_sec_send(int fd, __u32 nsid, __u8 nssf, __u16 spsp, + __u8 secp, __u32 tl, __u32 data_len, void *data, __u32 *result); +int nvme_sec_recv(int fd, __u32 nsid, __u8 nssf, __u16 spsp, + __u8 secp, __u32 al, __u32 data_len, void *data, __u32 *result); + +int nvme_subsystem_reset(int fd); +int nvme_reset_controller(int fd); +int nvme_ns_rescan(int fd); + +int nvme_get_lba_status(int fd, __u64 slba, __u32 mndw, __u8 atype, __u16 rl, + void *data); +int nvme_dir_send(int fd, __u32 nsid, __u16 dspec, __u8 dtype, __u8 doper, + __u32 data_len, __u32 dw12, void *data, __u32 *result); +int nvme_dir_recv(int fd, __u32 nsid, __u16 dspec, __u8 dtype, __u8 doper, + __u32 data_len, __u32 dw12, void *data, __u32 *result); +int nvme_get_properties(int fd, void **pbar); +int nvme_set_property(int fd, int offset, uint64_t value); +int nvme_get_property(int fd, int offset, uint64_t *value); +int nvme_sanitize(int fd, __u8 sanact, __u8 ause, __u8 owpass, __u8 oipbp, + __u8 no_dealloc, __u32 ovrpat); +int nvme_self_test_start(int fd, __u32 nsid, __u32 cdw10); +int nvme_self_test_log(int fd, struct nvme_self_test_log *self_test_log); +int nvme_virtual_mgmt(int fd, __u32 cdw10, __u32 cdw11, __u32 *result); +#endif /* _NVME_LIB_H */ diff --git a/libmultipath/nvme/nvme.h b/libmultipath/nvme/nvme.h new file mode 100644 index 0000000..7e0278b --- /dev/null +++ b/libmultipath/nvme/nvme.h @@ -0,0 +1,240 @@ +/* + * Definitions for the NVM Express interface + * Copyright (c) 2011-2014, Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + */ + +#ifndef _NVME_H +#define _NVME_H + +#include +#include +#include +#include "plugin.h" +#include "json.h" + +#define unlikely(x) x + +#ifdef LIBUUID +#include +#else +typedef struct { + uint8_t b[16]; +} uuid_t; +#endif + +#include "linux/nvme.h" + +struct nvme_effects_log_page { + __le32 acs[256]; + __le32 iocs[256]; + __u8 resv[2048]; +}; + +struct nvme_error_log_page { + __le64 error_count; + __le16 sqid; + __le16 cmdid; + __le16 status_field; + __le16 parm_error_location; + __le64 lba; + __le32 nsid; + __u8 vs; + __u8 resv[3]; + __le64 cs; + __u8 resv2[24]; +}; + +struct nvme_firmware_log_page { + __u8 afi; + __u8 resv[7]; + __u64 frs[7]; + __u8 resv2[448]; +}; + +/* idle and active power scales occupy the last 2 bits of the field */ +#define POWER_SCALE(s) ((s) >> 6) + +struct nvme_host_mem_buffer { + __u32 hsize; + __u32 hmdlal; + __u32 hmdlau; + __u32 hmdlec; + __u8 rsvd16[4080]; +}; + +struct nvme_auto_pst { + __u32 data; + __u32 rsvd32; +}; + +struct nvme_timestamp { + __u8 timestamp[6]; + __u8 attr; + __u8 rsvd; +}; + +struct nvme_controller_list { + __le16 num; + __le16 identifier[]; +}; + +struct nvme_secondary_controller_entry { + __le16 scid; /* Secondary Controller Identifier */ + __le16 pcid; /* Primary Controller Identifier */ + __u8 scs; /* Secondary Controller State */ + __u8 rsvd5[3]; + __le16 vfn; /* Virtual Function Number */ + __le16 nvq; /* Number of VQ Flexible Resources Assigned */ + __le16 nvi; /* Number of VI Flexible Resources Assigned */ + __u8 rsvd14[18]; +}; + +struct nvme_secondary_controllers_list { + __u8 num; + __u8 rsvd[31]; + struct nvme_secondary_controller_entry sc_entry[127]; +}; + +struct nvme_bar_cap { + __u16 mqes; + __u8 ams_cqr; + __u8 to; + __u16 bps_css_nssrs_dstrd; + __u8 mpsmax_mpsmin; + __u8 rsvd_pmrs; +}; + +#ifdef __CHECKER__ +#define __force __attribute__((force)) +#else +#define __force +#endif + +static inline __le16 cpu_to_le16(uint16_t x) +{ + return (__force __le16)htole16(x); +} +static inline __le32 cpu_to_le32(uint32_t x) +{ + return (__force __le32)htole32(x); +} +static inline __le64 cpu_to_le64(uint64_t x) +{ + return (__force __le64)htole64(x); +} + +static inline uint16_t le16_to_cpu(__le16 x) +{ + return le16toh((__force __u16)x); +} +static inline uint32_t le32_to_cpu(__le32 x) +{ + return le32toh((__force __u32)x); +} +static inline uint64_t le64_to_cpu(__le64 x) +{ + return le64toh((__force __u64)x); +} + +#define MAX_LIST_ITEMS 256 +struct list_item { + char node[1024]; + struct nvme_id_ctrl ctrl; + int nsid; + struct nvme_id_ns ns; + unsigned block; +}; + +struct ctrl_list_item { + char *name; + char *address; + char *transport; + char *state; + char *ana_state; + char *subsysnqn; + char *traddr; + char *trsvcid; + char *host_traddr; +}; + +struct subsys_list_item { + char *name; + char *subsysnqn; + int nctrls; + struct ctrl_list_item *ctrls; +}; + +enum { + NORMAL, + JSON, + BINARY, +}; + +struct connect_args { + char *subsysnqn; + char *transport; + char *traddr; + char *trsvcid; + char *host_traddr; +}; + +#define SYS_NVME "/sys/class/nvme" + +bool ctrl_matches_connectargs(char *name, struct connect_args *args); +char *find_ctrl_with_connectargs(struct connect_args *args); +char *__parse_connect_arg(char *conargs, const char delim, const char *fieldnm); + +extern const char *conarg_nqn; +extern const char *conarg_transport; +extern const char *conarg_traddr; +extern const char *conarg_trsvcid; +extern const char *conarg_host_traddr; + +void register_extension(struct plugin *plugin); + +#include "argconfig.h" +int parse_and_open(int argc, char **argv, const char *desc, + const struct argconfig_commandline_options *clo, void *cfg, size_t size); + +extern const char *devicename; + +int __id_ctrl(int argc, char **argv, struct command *cmd, struct plugin *plugin, void (*vs)(__u8 *vs, struct json_object *root)); +int validate_output_format(char *format); + +struct subsys_list_item *get_subsys_list(int *subcnt, char *subsysnqn, __u32 nsid); +void free_subsys_list(struct subsys_list_item *slist, int n); +char *nvme_char_from_block(char *block); + +/* + * is_64bit_reg - It checks whether given offset of the controller register is + * 64bit or not. + * @offset: offset of controller register field in bytes + * + * It gives true if given offset is 64bit register, otherwise it returns false. + * + * Notes: This function does not care about transport so that the offset is + * not going to be checked inside of this function for the unsupported fields + * in a specific transport. For example, BPMBL(Boot Partition Memory Buffer + * Location) register is not supported by fabrics, but it can be chcked here. + */ +static inline bool is_64bit_reg(__u32 offset) +{ + if (offset == NVME_REG_CAP || + offset == NVME_REG_ASQ || + offset == NVME_REG_ACQ || + offset == NVME_REG_BPMBL) + return true; + + return false; +} + +#endif /* _NVME_H */ diff --git a/libmultipath/nvme/plugin.h b/libmultipath/nvme/plugin.h new file mode 100644 index 0000000..91079fb --- /dev/null +++ b/libmultipath/nvme/plugin.h @@ -0,0 +1,36 @@ +#ifndef PLUGIN_H +#define PLUGIN_H + +#include + +struct program { + const char *name; + const char *version; + const char *usage; + const char *desc; + const char *more; + struct command **commands; + struct plugin *extensions; +}; + +struct plugin { + const char *name; + const char *desc; + struct command **commands; + struct program *parent; + struct plugin *next; + struct plugin *tail; +}; + +struct command { + char *name; + char *help; + int (*fn)(int argc, char **argv, struct command *command, struct plugin *plugin); + char *alias; +}; + +void usage(struct plugin *plugin); +void general_help(struct plugin *plugin); +int handle_plugin(int argc, char **argv, struct plugin *plugin); + +#endif diff --git a/libmultipath/parser.c b/libmultipath/parser.c new file mode 100644 index 0000000..d478b17 --- /dev/null +++ b/libmultipath/parser.c @@ -0,0 +1,603 @@ +/* + * Part: Configuration file parser/reader. Place into the dynamic + * data structure representation the conf file + * + * Version: $Id: parser.c,v 1.0.3 2003/05/11 02:28:03 acassen Exp $ + * + * Author: Alexandre Cassen, + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * See the GNU General Public License for more details. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include +#include + +#include "vector.h" +#include "config.h" +#include "parser.h" +#include "memory.h" +#include "debug.h" + +/* local vars */ +static int sublevel = 0; +static int line_nr; + +int +keyword_alloc(vector keywords, char *string, + int (*handler) (struct config *, vector), + int (*print) (struct config *, char *, int, const void*), + int unique) +{ + struct keyword *keyword; + + keyword = (struct keyword *) MALLOC(sizeof (struct keyword)); + + if (!keyword) + return 1; + + if (!vector_alloc_slot(keywords)) { + FREE(keyword); + return 1; + } + keyword->string = string; + keyword->handler = handler; + keyword->print = print; + keyword->unique = unique; + + vector_set_slot(keywords, keyword); + + return 0; +} + +void +install_sublevel(void) +{ + sublevel++; +} + +void +install_sublevel_end(void) +{ + sublevel--; +} + +int +_install_keyword(vector keywords, char *string, + int (*handler) (struct config *, vector), + int (*print) (struct config *, char *, int, const void*), + int unique) +{ + int i = 0; + struct keyword *keyword; + + /* fetch last keyword */ + keyword = VECTOR_LAST_SLOT(keywords); + if (!keyword) + return 1; + + /* position to last sub level */ + for (i = 0; i < sublevel; i++) { + keyword = VECTOR_LAST_SLOT(keyword->sub); + if (!keyword) + return 1; + } + + /* First sub level allocation */ + if (!keyword->sub) + keyword->sub = vector_alloc(); + + if (!keyword->sub) + return 1; + + /* add new sub keyword */ + return keyword_alloc(keyword->sub, string, handler, print, unique); +} + +void +free_keywords(vector keywords) +{ + struct keyword *keyword; + int i; + + if (!keywords) + return; + + for (i = 0; i < VECTOR_SIZE(keywords); i++) { + keyword = VECTOR_SLOT(keywords, i); + if (keyword->sub) + free_keywords(keyword->sub); + FREE(keyword); + } + vector_free(keywords); +} + +struct keyword * +find_keyword(vector keywords, vector v, char * name) +{ + struct keyword *keyword; + int i; + size_t len; + + if (!name || !keywords) + return NULL; + + if (!v) + v = keywords; + + len = strlen(name); + + for (i = 0; i < VECTOR_SIZE(v); i++) { + keyword = VECTOR_SLOT(v, i); + if ((strlen(keyword->string) == len) && + !strcmp(keyword->string, name)) + return keyword; + if (keyword->sub) { + keyword = find_keyword(keywords, keyword->sub, name); + if (keyword) + return keyword; + } + } + return NULL; +} + +int +snprint_keyword(char *buff, int len, char *fmt, struct keyword *kw, + const void *data) +{ + int r; + int fwd = 0; + char *f = fmt; + struct config *conf; + + if (!kw || !kw->print) + return 0; + + do { + if (fwd == len || *f == '\0') + break; + if (*f != '%') { + *(buff + fwd) = *f; + fwd++; + continue; + } + f++; + switch(*f) { + case 'k': + fwd += snprintf(buff + fwd, len - fwd, "%s", kw->string); + break; + case 'v': + conf = get_multipath_config(); + pthread_cleanup_push(put_multipath_config, conf); + r = kw->print(conf, buff + fwd, len - fwd, data); + pthread_cleanup_pop(1); + if (!r) { /* no output if no value */ + buff[0] = '\0'; + return 0; + } + fwd += r; + break; + } + if (fwd > len) + fwd = len; + } while (*f++); + return fwd; +} + +static const char quote_marker[] = { '\0', '"', '\0' }; +bool is_quote(const char* token) +{ + return !memcmp(token, quote_marker, sizeof(quote_marker)); +} + +vector +alloc_strvec(char *string) +{ + char *cp, *start, *token; + int strlen; + int in_string; + vector strvec; + + if (!string) + return NULL; + + cp = string; + + /* Skip white spaces */ + while ((isspace((int) *cp) || !isascii((int) *cp)) && *cp != '\0') + cp++; + + /* Return if there is only white spaces */ + if (*cp == '\0') + return NULL; + + /* Return if string begin with a comment */ + if (*cp == '!' || *cp == '#') + return NULL; + + /* Create a vector and alloc each command piece */ + strvec = vector_alloc(); + + if (!strvec) + return NULL; + + in_string = 0; + while (1) { + int two_quotes = 0; + + if (!vector_alloc_slot(strvec)) + goto out; + + start = cp; + if (*cp == '"' && !(in_string && *(cp + 1) == '"')) { + cp++; + token = MALLOC(sizeof(quote_marker)); + + if (!token) + goto out; + + memcpy(token, quote_marker, sizeof(quote_marker)); + if (in_string) + in_string = 0; + else + in_string = 1; + } else if (!in_string && (*cp == '{' || *cp == '}')) { + token = MALLOC(2); + + if (!token) + goto out; + + *(token) = *cp; + *(token + 1) = '\0'; + cp++; + } else { + + move_on: + while ((in_string || + (!isspace((int) *cp) && isascii((int) *cp) && + *cp != '!' && *cp != '#' && *cp != '{' && + *cp != '}')) && *cp != '\0' && *cp != '"') + cp++; + + /* Two consecutive double quotes - don't end string */ + if (in_string && *cp == '"') { + if (*(cp + 1) == '"') { + two_quotes = 1; + cp += 2; + goto move_on; + } + } + + strlen = cp - start; + token = MALLOC(strlen + 1); + + if (!token) + goto out; + + memcpy(token, start, strlen); + *(token + strlen) = '\0'; + + /* Replace "" by " */ + if (two_quotes) { + char *qq = strstr(token, "\"\""); + while (qq != NULL) { + memmove(qq + 1, qq + 2, + strlen + 1 - (qq + 2 - token)); + qq = strstr(qq + 1, "\"\""); + } + } + } + vector_set_slot(strvec, token); + + while ((!in_string && + (isspace((int) *cp) || !isascii((int) *cp))) + && *cp != '\0') + cp++; + if (*cp == '\0' || *cp == '!' || *cp == '#') + return strvec; + } +out: + vector_free(strvec); + return NULL; +} + +static int +read_line(FILE *stream, char *buf, int size) +{ + char *p; + + if (fgets(buf, size, stream) == NULL) + return 0; + strtok_r(buf, "\n\r", &p); + return 1; +} + +void * +set_value(vector strvec) +{ + char *str = VECTOR_SLOT(strvec, 1); + size_t size; + int i = 0; + int len = 0; + char *alloc = NULL; + char *tmp; + + if (!str) { + condlog(0, "option '%s' missing value", + (char *)VECTOR_SLOT(strvec, 0)); + return NULL; + } + if (!is_quote(str)) { + size = strlen(str); + if (size == 0) { + condlog(0, "option '%s' has empty value", + (char *)VECTOR_SLOT(strvec, 0)); + return NULL; + } + alloc = MALLOC(sizeof (char) * (size + 1)); + if (alloc) + memcpy(alloc, str, size); + else + goto oom; + return alloc; + } + /* Even empty quotes counts as a value (An empty string) */ + alloc = (char *) MALLOC(sizeof (char)); + if (!alloc) + goto oom; + for (i = 2; i < VECTOR_SIZE(strvec); i++) { + str = VECTOR_SLOT(strvec, i); + if (!str) { + free(alloc); + condlog(0, "parse error for option '%s'", + (char *)VECTOR_SLOT(strvec, 0)); + return NULL; + } + if (is_quote(str)) + break; + tmp = alloc; + /* The first +1 is for the NULL byte. The rest are for the + * spaces between words */ + len += strlen(str) + 1; + alloc = REALLOC(alloc, sizeof (char) * len); + if (!alloc) { + FREE(tmp); + goto oom; + } + if (*alloc != '\0') + strncat(alloc, " ", 1); + strncat(alloc, str, len - strlen(alloc) - 1); + } + return alloc; +oom: + condlog(0, "can't allocate memory for option '%s'", + (char *)VECTOR_SLOT(strvec, 0)); + return NULL; +} + +/* non-recursive configuration stream handler */ +static int kw_level = 0; + +int warn_on_duplicates(vector uniques, char *str, char *file) +{ + char *tmp; + int i; + + vector_foreach_slot(uniques, tmp, i) { + if (!strcmp(str, tmp)) { + condlog(1, "%s line %d, duplicate keyword: %s", + file, line_nr, str); + return 0; + } + } + tmp = strdup(str); + if (!tmp) + return 1; + if (!vector_alloc_slot(uniques)) { + free(tmp); + return 1; + } + vector_set_slot(uniques, tmp); + return 0; +} + +void free_uniques(vector uniques) +{ + char *tmp; + int i; + + vector_foreach_slot(uniques, tmp, i) + free(tmp); + vector_free(uniques); +} + +int +is_sublevel_keyword(char *str) +{ + return (strcmp(str, "defaults") == 0 || strcmp(str, "blacklist") == 0 || + strcmp(str, "blacklist_exceptions") == 0 || + strcmp(str, "devices") == 0 || strcmp(str, "devices") == 0 || + strcmp(str, "device") == 0 || strcmp(str, "multipaths") == 0 || + strcmp(str, "multipath") == 0); +} + +int +validate_config_strvec(vector strvec, char *file) +{ + char *str; + int i; + + str = VECTOR_SLOT(strvec, 0); + if (str == NULL) { + condlog(0, "can't parse option on line %d of %s", + line_nr, file); + return -1; + } + if (*str == '}') { + if (VECTOR_SIZE(strvec) > 1) + condlog(0, "ignoring extra data starting with '%s' on line %d of %s", (char *)VECTOR_SLOT(strvec, 1), line_nr, file); + return 0; + } + if (*str == '{') { + condlog(0, "invalid keyword '%s' on line %d of %s", + str, line_nr, file); + return -1; + } + if (is_sublevel_keyword(str)) { + str = VECTOR_SLOT(strvec, 1); + if (str == NULL) + condlog(0, "missing '{' on line %d of %s", + line_nr, file); + else if (*str != '{') + condlog(0, "expecting '{' on line %d of %s. found '%s'", + line_nr, file, str); + else if (VECTOR_SIZE(strvec) > 2) + condlog(0, "ignoring extra data starting with '%s' on line %d of %s", (char *)VECTOR_SLOT(strvec, 2), line_nr, file); + return 0; + } + str = VECTOR_SLOT(strvec, 1); + if (str == NULL) { + condlog(0, "missing value for option '%s' on line %d of %s", + (char *)VECTOR_SLOT(strvec, 0), line_nr, file); + return -1; + } + if (!is_quote(str)) { + if (VECTOR_SIZE(strvec) > 2) + condlog(0, "ignoring extra data starting with '%s' on line %d of %s", (char *)VECTOR_SLOT(strvec, 2), line_nr, file); + return 0; + } + for (i = 2; i < VECTOR_SIZE(strvec); i++) { + str = VECTOR_SLOT(strvec, i); + if (str == NULL) { + condlog(0, "can't parse value on line %d of %s", + line_nr, file); + return -1; + } + if (is_quote(str)) { + if (VECTOR_SIZE(strvec) > i + 1) + condlog(0, "ignoring extra data starting with '%s' on line %d of %s", (char *)VECTOR_SLOT(strvec, (i + 1)), line_nr, file); + return 0; + } + } + condlog(0, "missing closing quotes on line %d of %s", + line_nr, file); + return 0; +} + +static int +process_stream(struct config *conf, FILE *stream, vector keywords, char *file) +{ + int i; + int r = 0, t; + struct keyword *keyword; + char *str; + char *buf; + vector strvec; + vector uniques; + + uniques = vector_alloc(); + if (!uniques) + return 1; + + buf = MALLOC(MAXBUF); + + if (!buf) { + vector_free(uniques); + return 1; + } + + while (read_line(stream, buf, MAXBUF)) { + line_nr++; + strvec = alloc_strvec(buf); + if (!strvec) + continue; + + if (validate_config_strvec(strvec, file) != 0) { + free_strvec(strvec); + continue; + } + + str = VECTOR_SLOT(strvec, 0); + + if (!strcmp(str, EOB)) { + if (kw_level > 0) { + free_strvec(strvec); + break; + } + condlog(0, "unmatched '%s' at line %d of %s", + EOB, line_nr, file); + } + + for (i = 0; i < VECTOR_SIZE(keywords); i++) { + keyword = VECTOR_SLOT(keywords, i); + + if (!strcmp(keyword->string, str)) { + if (keyword->unique && + warn_on_duplicates(uniques, str, file)) { + r = 1; + free_strvec(strvec); + goto out; + } + if (keyword->handler) { + t = (*keyword->handler) (conf, strvec); + r += t; + if (t) + condlog(1, "multipath.conf +%d, parsing failed: %s", + line_nr, buf); + } + + if (keyword->sub) { + kw_level++; + r += process_stream(conf, stream, + keyword->sub, file); + kw_level--; + } + break; + } + } + if (i >= VECTOR_SIZE(keywords)) + condlog(1, "%s line %d, invalid keyword: %s", + file, line_nr, str); + + free_strvec(strvec); + } + +out: + FREE(buf); + free_uniques(uniques); + return r; +} + +/* Data initialization */ +int +process_file(struct config *conf, char *file) +{ + int r; + FILE *stream; + + if (!conf->keywords) { + condlog(0, "No keywords allocated"); + return 1; + } + stream = fopen(file, "r"); + if (!stream) { + condlog(0, "couldn't open configuration file '%s': %s", + file, strerror(errno)); + return 1; + } + + /* Stream handling */ + line_nr = 0; + r = process_stream(conf, stream, conf->keywords, file); + fclose(stream); + //free_keywords(keywords); + + return r; +} diff --git a/libmultipath/parser.h b/libmultipath/parser.h new file mode 100644 index 0000000..62906e9 --- /dev/null +++ b/libmultipath/parser.h @@ -0,0 +1,86 @@ +/* + * Soft: Keepalived is a failover program for the LVS project + * . It monitor & manipulate + * a loadbalanced server pool using multi-layer checks. + * + * Part: cfreader.c include file. + * + * Version: $Id: parser.h,v 1.0.3 2003/05/11 02:28:03 acassen Exp $ + * + * Author: Alexandre Cassen, + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * See the GNU General Public License for more details. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#ifndef _PARSER_H +#define _PARSER_H + +/* system includes */ +#include +#include +#include +#include +#include +#include + +/* local includes */ +#include "vector.h" +#include "config.h" + +/* Global definitions */ +#define EOB "}" +#define MAXBUF 1024 + +/* ketword definition */ +struct keyword { + char *string; + int (*handler) (struct config *, vector); + int (*print) (struct config *, char *, int, const void *); + vector sub; + int unique; +}; + +/* Reloading helpers */ +#define SET_RELOAD (reload = 1) +#define UNSET_RELOAD (reload = 0) +#define RELOAD_DELAY 5 + +/* iterator helper */ +#define iterate_sub_keywords(k,p,i) \ + for (i = 0; i < (k)->sub->allocated && ((p) = (k)->sub->slot[i]); i++) + +/* Prototypes */ +extern int keyword_alloc(vector keywords, char *string, + int (*handler) (struct config *, vector), + int (*print) (struct config *, char *, int, + const void *), + int unique); +#define install_keyword_root(str, h) keyword_alloc(keywords, str, h, NULL, 1) +extern void install_sublevel(void); +extern void install_sublevel_end(void); +extern int _install_keyword(vector keywords, char *string, + int (*handler) (struct config *, vector), + int (*print) (struct config *, char *, int, + const void *), + int unique); +#define install_keyword(str, vec, pri) _install_keyword(keywords, str, vec, pri, 1) +#define install_keyword_multi(str, vec, pri) _install_keyword(keywords, str, vec, pri, 0) +extern void dump_keywords(vector keydump, int level); +extern void free_keywords(vector keywords); +extern vector alloc_strvec(char *string); +extern void *set_value(vector strvec); +extern int process_file(struct config *conf, char *conf_file); +extern struct keyword * find_keyword(vector keywords, vector v, char * name); +int snprint_keyword(char *buff, int len, char *fmt, struct keyword *kw, + const void *data); +bool is_quote(const char* token); + +#endif diff --git a/libmultipath/pgpolicies.c b/libmultipath/pgpolicies.c new file mode 100644 index 0000000..02cafdc --- /dev/null +++ b/libmultipath/pgpolicies.c @@ -0,0 +1,338 @@ +/* + * Copyright (c) 2004, 2005 Christophe Varoqui + */ +#include +#include +#include +#include + +#include "checkers.h" +#include "util.h" +#include "memory.h" +#include "vector.h" +#include "structs.h" +#include "pgpolicies.h" +#include "switchgroup.h" + +int get_pgpolicy_id(char * str) +{ + if (0 == strncmp(str, "failover", 8)) + return FAILOVER; + if (0 == strncmp(str, "multibus", 8)) + return MULTIBUS; + if (0 == strncmp(str, "group_by_serial", 15)) + return GROUP_BY_SERIAL; + if (0 == strncmp(str, "group_by_prio", 13)) + return GROUP_BY_PRIO; + if (0 == strncmp(str, "group_by_node_name", 18)) + return GROUP_BY_NODE_NAME; + + return IOPOLICY_UNDEF; +} + +int get_pgpolicy_name(char * buff, int len, int id) +{ + char * s; + + switch (id) { + case FAILOVER: + s = "failover"; + break; + case MULTIBUS: + s = "multibus"; + break; + case GROUP_BY_SERIAL: + s = "group_by_serial"; + break; + case GROUP_BY_PRIO: + s = "group_by_prio"; + break; + case GROUP_BY_NODE_NAME: + s = "group_by_node_name"; + break; + default: + s = "undefined"; + break; + } + return snprintf(buff, len, "%s", s); +} + + +void +sort_pathgroups (struct multipath *mp) { + int i, j; + struct pathgroup * pgp1, * pgp2; + + if (!mp->pg) + return; + + vector_foreach_slot(mp->pg, pgp1, i) { + path_group_prio_update(pgp1); + for (j = i - 1; j >= 0; j--) { + pgp2 = VECTOR_SLOT(mp->pg, j); + if (!pgp2) + continue; + if (pgp2->marginal < pgp1->marginal || + (pgp2->marginal == pgp1->marginal && + (pgp2->priority > pgp1->priority || + (pgp2->priority == pgp1->priority && + pgp2->enabled_paths >= pgp1->enabled_paths)))) { + vector_move_up(mp->pg, i, j + 1); + break; + } + } + if (j < 0 && i != 0) + vector_move_up(mp->pg, i, 0); + } +} + +static int +split_marginal_paths(vector paths, vector *normal_p, vector *marginal_p) +{ + int i; + int has_marginal = 0; + int has_normal = 0; + struct path *pp; + vector normal = NULL; + vector marginal = NULL; + + *normal_p = *marginal_p = NULL; + vector_foreach_slot(paths, pp, i) { + if (pp->marginal) + has_marginal = 1; + else + has_normal = 1; + } + + if (!has_marginal || !has_normal) + return -1; + + normal = vector_alloc(); + marginal = vector_alloc(); + if (!normal || !marginal) + goto fail; + + vector_foreach_slot(paths, pp, i) { + if (pp->marginal) { + if (store_path(marginal, pp)) + goto fail; + } + else { + if (store_path(normal, pp)) + goto fail; + } + } + *normal_p = normal; + *marginal_p = marginal; + return 0; +fail: + vector_free(normal); + vector_free(marginal); + return -1; +} + +int group_paths(struct multipath *mp, int marginal_pathgroups) +{ + vector normal, marginal; + + if (!mp->pg) + mp->pg = vector_alloc(); + if (!mp->pg) + return 1; + + if (VECTOR_SIZE(mp->paths) == 0) + goto out; + if (!mp->pgpolicyfn) + goto fail; + + if (!marginal_pathgroups || + split_marginal_paths(mp->paths, &normal, &marginal) != 0) { + if (mp->pgpolicyfn(mp, mp->paths) != 0) + goto fail; + } else { + if (mp->pgpolicyfn(mp, normal) != 0) + goto fail_marginal; + if (mp->pgpolicyfn(mp, marginal) != 0) + goto fail_marginal; + vector_free(normal); + vector_free(marginal); + } + sort_pathgroups(mp); +out: + vector_free(mp->paths); + mp->paths = NULL; + return 0; +fail_marginal: + vector_free(normal); + vector_free(marginal); +fail: + vector_free(mp->pg); + mp->pg = NULL; + return 1; +} + +typedef bool (path_match_fn)(struct path *pp1, struct path *pp2); + +bool +node_names_match(struct path *pp1, struct path *pp2) +{ + return (strncmp(pp1->tgt_node_name, pp2->tgt_node_name, + NODE_NAME_SIZE) == 0); +} + +bool +serials_match(struct path *pp1, struct path *pp2) +{ + return (strncmp(pp1->serial, pp2->serial, SERIAL_SIZE) == 0); +} + +bool +prios_match(struct path *pp1, struct path *pp2) +{ + return (pp1->priority == pp2->priority); +} + +int group_by_match(struct multipath * mp, vector paths, + bool (*path_match_fn)(struct path *, struct path *)) +{ + int i, j; + int * bitmap; + struct path * pp; + struct pathgroup * pgp; + struct path * pp2; + + /* init the bitmap */ + bitmap = (int *)MALLOC(VECTOR_SIZE(paths) * sizeof (int)); + + if (!bitmap) + goto out; + + for (i = 0; i < VECTOR_SIZE(paths); i++) { + + if (bitmap[i]) + continue; + + pp = VECTOR_SLOT(paths, i); + + /* here, we really got a new pg */ + pgp = alloc_pathgroup(); + + if (!pgp) + goto out1; + + if (add_pathgroup(mp, pgp)) + goto out2; + + /* feed the first path */ + if (store_path(pgp->paths, pp)) + goto out1; + + bitmap[i] = 1; + + for (j = i + 1; j < VECTOR_SIZE(paths); j++) { + + if (bitmap[j]) + continue; + + pp2 = VECTOR_SLOT(paths, j); + + if (path_match_fn(pp, pp2)) { + if (store_path(pgp->paths, pp2)) + goto out1; + + bitmap[j] = 1; + } + } + } + FREE(bitmap); + return 0; +out2: + free_pathgroup(pgp, KEEP_PATHS); +out1: + FREE(bitmap); +out: + free_pgvec(mp->pg, KEEP_PATHS); + mp->pg = NULL; + return 1; +} + +/* + * One path group per unique tgt_node_name present in the path vector + */ +int group_by_node_name(struct multipath * mp, vector paths) +{ + return group_by_match(mp, paths, node_names_match); +} + +/* + * One path group per unique serial number present in the path vector + */ +int group_by_serial(struct multipath * mp, vector paths) +{ + return group_by_match(mp, paths, serials_match); +} + +/* + * One path group per priority present in the path vector + */ +int group_by_prio(struct multipath *mp, vector paths) +{ + return group_by_match(mp, paths, prios_match); +} + +int one_path_per_group(struct multipath *mp, vector paths) +{ + int i; + struct path * pp; + struct pathgroup * pgp; + + for (i = 0; i < VECTOR_SIZE(paths); i++) { + pp = VECTOR_SLOT(paths, i); + pgp = alloc_pathgroup(); + + if (!pgp) + goto out; + + if (add_pathgroup(mp, pgp)) + goto out1; + + if (store_path(pgp->paths, pp)) + goto out; + } + return 0; +out1: + free_pathgroup(pgp, KEEP_PATHS); +out: + free_pgvec(mp->pg, KEEP_PATHS); + mp->pg = NULL; + return 1; +} + +int one_group(struct multipath *mp, vector paths) /* aka multibus */ +{ + int i; + struct path * pp; + struct pathgroup * pgp; + + pgp = alloc_pathgroup(); + + if (!pgp) + goto out; + + if (add_pathgroup(mp, pgp)) + goto out1; + + for (i = 0; i < VECTOR_SIZE(paths); i++) { + pp = VECTOR_SLOT(paths, i); + + if (store_path(pgp->paths, pp)) + goto out; + } + return 0; +out1: + free_pathgroup(pgp, KEEP_PATHS); +out: + free_pgvec(mp->pg, KEEP_PATHS); + mp->pg = NULL; + return 1; +} diff --git a/libmultipath/pgpolicies.h b/libmultipath/pgpolicies.h new file mode 100644 index 0000000..1592761 --- /dev/null +++ b/libmultipath/pgpolicies.h @@ -0,0 +1,34 @@ +#ifndef _PGPOLICIES_H +#define _PGPOLICIES_H + +#if 0 +#ifndef _MAIN_H +#include "main.h" +#endif +#endif + +#define POLICY_NAME_SIZE 32 + +/* Storage controllers capabilities */ +enum iopolicies { + IOPOLICY_UNDEF, + FAILOVER, + MULTIBUS, + GROUP_BY_SERIAL, + GROUP_BY_PRIO, + GROUP_BY_NODE_NAME +}; + +int get_pgpolicy_id(char *); +int get_pgpolicy_name (char *, int, int); +int group_paths(struct multipath *, int); +/* + * policies + */ +int one_path_per_group(struct multipath *, vector); +int one_group(struct multipath *, vector); +int group_by_serial(struct multipath *, vector); +int group_by_prio(struct multipath *, vector); +int group_by_node_name(struct multipath *, vector); + +#endif diff --git a/libmultipath/print.c b/libmultipath/print.c new file mode 100644 index 0000000..b944ef3 --- /dev/null +++ b/libmultipath/print.c @@ -0,0 +1,2132 @@ +/* + * Copyright (c) 2005 Christophe Varoqui + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "checkers.h" +#include "vector.h" +#include "structs.h" +#include "structs_vec.h" +#include "dmparser.h" +#include "config.h" +#include "configure.h" +#include "pgpolicies.h" +#include "print.h" +#include "defaults.h" +#include "parser.h" +#include "blacklist.h" +#include "switchgroup.h" +#include "devmapper.h" +#include "uevent.h" +#include "debug.h" +#include "discovery.h" +#include "util.h" + +#define MAX(x,y) (((x) > (y)) ? (x) : (y)) +#define MIN(x,y) (((x) > (y)) ? (y) : (x)) +#define TAIL (line + len - 1 - c) +#define NOPAD s = c +#define PAD(x) \ +do { \ + while (c < (s + x) && (c < (line + len - 1))) \ + *c++ = ' '; \ + s = c; \ +} while (0) + +static char * +__endline(char *line, size_t len, char *c) +{ + if (c > line) { + if (c >= line + len) + c = line + len - 1; + *(c - 1) = '\n'; + *c = '\0'; + } + return c; +} + +#define PRINT(var, size, format, args...) \ +do { \ + fwd = snprintf(var, size, format, ##args); \ + c += (fwd >= size) ? size : fwd; \ +} while (0) + +/* + * information printing helpers + */ +static int +snprint_str (char * buff, size_t len, const char * str) +{ + return snprintf(buff, len, "%s", str); +} + +static int +snprint_int (char * buff, size_t len, int val) +{ + return snprintf(buff, len, "%i", val); +} + +static int +snprint_uint (char * buff, size_t len, unsigned int val) +{ + return snprintf(buff, len, "%u", val); +} + +static int +snprint_size (char * buff, size_t len, unsigned long long size) +{ + float s = (float)(size >> 1); /* start with KB */ + char units[] = {'K','M','G','T','P'}; + char *u = units; + + while (s >= 1024 && *u != 'P') { + s = s / 1024; + u++; + } + + return snprintf(buff, len, "%.*f%c", s < 10, s, *u); +} + +/* + * multipath info printing functions + */ +static int +snprint_name (char * buff, size_t len, const struct multipath * mpp) +{ + if (mpp->alias) + return snprintf(buff, len, "%s", mpp->alias); + else + return snprintf(buff, len, "%s", mpp->wwid); +} + +static int +snprint_sysfs (char * buff, size_t len, const struct multipath * mpp) +{ + if (mpp->dmi) + return snprintf(buff, len, "dm-%i", mpp->dmi->minor); + else + return snprintf(buff, len, "undef"); +} + +static int +snprint_ro (char * buff, size_t len, const struct multipath * mpp) +{ + if (!mpp->dmi) + return snprintf(buff, len, "undef"); + if (mpp->dmi->read_only) + return snprintf(buff, len, "ro"); + else + return snprintf(buff, len, "rw"); +} + +static int +snprint_progress (char * buff, size_t len, int cur, int total) +{ + char * c = buff; + char * end = buff + len; + + if (total > 0) { + int i = PROGRESS_LEN * cur / total; + int j = PROGRESS_LEN - i; + + while (i-- > 0) { + c += snprintf(c, len, "X"); + if ((len = (end - c)) <= 1) goto out; + } + + while (j-- > 0) { + c += snprintf(c, len, "."); + if ((len = (end - c)) <= 1) goto out; + } + } + + c += snprintf(c, len, " %i/%i", cur, total); + +out: + buff[c - buff + 1] = '\0'; + return (c - buff + 1); +} + +static int +snprint_failback (char * buff, size_t len, const struct multipath * mpp) +{ + if (mpp->pgfailback == -FAILBACK_IMMEDIATE) + return snprintf(buff, len, "immediate"); + if (mpp->pgfailback == -FAILBACK_FOLLOWOVER) + return snprintf(buff, len, "followover"); + + if (!mpp->failback_tick) + return snprintf(buff, len, "-"); + else + return snprint_progress(buff, len, mpp->failback_tick, + mpp->pgfailback); +} + +static int +snprint_queueing (char * buff, size_t len, const struct multipath * mpp) +{ + if (mpp->no_path_retry == NO_PATH_RETRY_FAIL) + return snprintf(buff, len, "off"); + else if (mpp->no_path_retry == NO_PATH_RETRY_QUEUE) + return snprintf(buff, len, "on"); + else if (mpp->no_path_retry == NO_PATH_RETRY_UNDEF) + return snprintf(buff, len, "-"); + else if (mpp->no_path_retry > 0) { + if (mpp->retry_tick > 0) + + return snprintf(buff, len, "%i sec", + mpp->retry_tick); + else if (mpp->retry_tick == 0 && count_active_paths(mpp) > 0) + return snprintf(buff, len, "%i chk", + mpp->no_path_retry); + else + return snprintf(buff, len, "off"); + } + return 0; +} + +static int +snprint_nb_paths (char * buff, size_t len, const struct multipath * mpp) +{ + return snprint_int(buff, len, count_active_paths(mpp)); +} + +static int +snprint_dm_map_state (char * buff, size_t len, const struct multipath * mpp) +{ + if (mpp->dmi && mpp->dmi->suspended) + return snprintf(buff, len, "suspend"); + else + return snprintf(buff, len, "active"); +} + +static int +snprint_multipath_size (char * buff, size_t len, const struct multipath * mpp) +{ + return snprint_size(buff, len, mpp->size); +} + +static int +snprint_features (char * buff, size_t len, const struct multipath * mpp) +{ + return snprint_str(buff, len, mpp->features); +} + +static int +snprint_hwhandler (char * buff, size_t len, const struct multipath * mpp) +{ + return snprint_str(buff, len, mpp->hwhandler); +} + +static int +snprint_path_faults (char * buff, size_t len, const struct multipath * mpp) +{ + return snprint_uint(buff, len, mpp->stat_path_failures); +} + +static int +snprint_switch_grp (char * buff, size_t len, const struct multipath * mpp) +{ + return snprint_uint(buff, len, mpp->stat_switchgroup); +} + +static int +snprint_map_loads (char * buff, size_t len, const struct multipath * mpp) +{ + return snprint_uint(buff, len, mpp->stat_map_loads); +} + +static int +snprint_total_q_time (char * buff, size_t len, const struct multipath * mpp) +{ + return snprint_uint(buff, len, mpp->stat_total_queueing_time); +} + +static int +snprint_q_timeouts (char * buff, size_t len, const struct multipath * mpp) +{ + return snprint_uint(buff, len, mpp->stat_queueing_timeouts); +} + +static int +snprint_map_failures (char * buff, size_t len, const struct multipath * mpp) +{ + return snprint_uint(buff, len, mpp->stat_map_failures); +} + +static int +snprint_multipath_uuid (char * buff, size_t len, const struct multipath * mpp) +{ + return snprint_str(buff, len, mpp->wwid); +} + +static int +snprint_multipath_vpr (char * buff, size_t len, const struct multipath * mpp) +{ + struct pathgroup * pgp; + struct path * pp; + int i, j; + + vector_foreach_slot(mpp->pg, pgp, i) { + vector_foreach_slot(pgp->paths, pp, j) { + if (strlen(pp->vendor_id) && strlen(pp->product_id)) + return snprintf(buff, len, "%s,%s", + pp->vendor_id, pp->product_id); + } + } + return snprintf(buff, len, "##,##"); +} + + +static int +snprint_multipath_vend (char * buff, size_t len, const struct multipath * mpp) +{ + struct pathgroup * pgp; + struct path * pp; + int i, j; + + vector_foreach_slot(mpp->pg, pgp, i) { + vector_foreach_slot(pgp->paths, pp, j) { + if (strlen(pp->vendor_id)) + return snprintf(buff, len, "%s", pp->vendor_id); + } + } + return snprintf(buff, len, "##"); +} + +static int +snprint_multipath_prod (char * buff, size_t len, const struct multipath * mpp) +{ + struct pathgroup * pgp; + struct path * pp; + int i, j; + + vector_foreach_slot(mpp->pg, pgp, i) { + vector_foreach_slot(pgp->paths, pp, j) { + if (strlen(pp->product_id)) + return snprintf(buff, len, "%s", pp->product_id); + } + } + return snprintf(buff, len, "##"); +} + +static int +snprint_multipath_rev (char * buff, size_t len, const struct multipath * mpp) +{ + struct pathgroup * pgp; + struct path * pp; + int i, j; + + vector_foreach_slot(mpp->pg, pgp, i) { + vector_foreach_slot(pgp->paths, pp, j) { + if (strlen(pp->rev)) + return snprintf(buff, len, "%s", pp->rev); + } + } + return snprintf(buff, len, "##"); +} + +static int +snprint_multipath_foreign (char * buff, size_t len, + __attribute__((unused)) const struct multipath * pp) +{ + return snprintf(buff, len, "%s", "--"); +} + +static int +snprint_action (char * buff, size_t len, const struct multipath * mpp) +{ + switch (mpp->action) { + case ACT_REJECT: + return snprint_str(buff, len, ACT_REJECT_STR); + case ACT_RENAME: + return snprint_str(buff, len, ACT_RENAME_STR); + case ACT_RELOAD: + return snprint_str(buff, len, ACT_RELOAD_STR); + case ACT_CREATE: + return snprint_str(buff, len, ACT_CREATE_STR); + case ACT_SWITCHPG: + return snprint_str(buff, len, ACT_SWITCHPG_STR); + default: + return 0; + } +} + +static int +snprint_multipath_vpd_data(char * buff, size_t len, + const struct multipath * mpp) +{ + struct pathgroup * pgp; + struct path * pp; + int i, j; + + vector_foreach_slot(mpp->pg, pgp, i) + vector_foreach_slot(pgp->paths, pp, j) + if (pp->vpd_data) + return snprintf(buff, len, "%s", pp->vpd_data); + return snprintf(buff, len, "[undef]"); +} + +/* + * path info printing functions + */ +static int +snprint_path_uuid (char * buff, size_t len, const struct path * pp) +{ + return snprint_str(buff, len, pp->wwid); +} + +static int +snprint_hcil (char * buff, size_t len, const struct path * pp) +{ + if (!pp || pp->sg_id.host_no < 0) + return snprintf(buff, len, "#:#:#:#"); + + return snprintf(buff, len, "%i:%i:%i:%i", + pp->sg_id.host_no, + pp->sg_id.channel, + pp->sg_id.scsi_id, + pp->sg_id.lun); +} + +static int +snprint_dev (char * buff, size_t len, const struct path * pp) +{ + if (!pp || !strlen(pp->dev)) + return snprintf(buff, len, "-"); + else + return snprint_str(buff, len, pp->dev); +} + +static int +snprint_dev_t (char * buff, size_t len, const struct path * pp) +{ + if (!pp || !strlen(pp->dev)) + return snprintf(buff, len, "#:#"); + else + return snprint_str(buff, len, pp->dev_t); +} + +static int +snprint_offline (char * buff, size_t len, const struct path * pp) +{ + if (!pp || !pp->mpp) + return snprintf(buff, len, "unknown"); + else if (pp->offline) + return snprintf(buff, len, "offline"); + else + return snprintf(buff, len, "running"); +} + +static int +snprint_chk_state (char * buff, size_t len, const struct path * pp) +{ + if (!pp || !pp->mpp) + return snprintf(buff, len, "undef"); + + switch (pp->state) { + case PATH_UP: + return snprintf(buff, len, "ready"); + case PATH_DOWN: + return snprintf(buff, len, "faulty"); + case PATH_SHAKY: + return snprintf(buff, len, "shaky"); + case PATH_GHOST: + return snprintf(buff, len, "ghost"); + case PATH_PENDING: + return snprintf(buff, len, "i/o pending"); + case PATH_TIMEOUT: + return snprintf(buff, len, "i/o timeout"); + case PATH_DELAYED: + return snprintf(buff, len, "delayed"); + default: + return snprintf(buff, len, "undef"); + } +} + +static int +snprint_dm_path_state (char * buff, size_t len, const struct path * pp) +{ + if (!pp) + return snprintf(buff, len, "undef"); + + switch (pp->dmstate) { + case PSTATE_ACTIVE: + return snprintf(buff, len, "active"); + case PSTATE_FAILED: + return snprintf(buff, len, "failed"); + default: + return snprintf(buff, len, "undef"); + } +} + +static int +snprint_vpr (char * buff, size_t len, const struct path * pp) +{ + return snprintf(buff, len, "%s,%s", + pp->vendor_id, pp->product_id); +} + +static int +snprint_next_check (char * buff, size_t len, const struct path * pp) +{ + if (!pp || !pp->mpp) + return snprintf(buff, len, "orphan"); + + return snprint_progress(buff, len, pp->tick, pp->checkint); +} + +static int +snprint_pri (char * buff, size_t len, const struct path * pp) +{ + return snprint_int(buff, len, pp ? pp->priority : -1); +} + +static int +snprint_pg_selector (char * buff, size_t len, const struct pathgroup * pgp) +{ + const char *s = pgp->mpp->selector; + + return snprint_str(buff, len, s ? s : ""); +} + +static int +snprint_pg_pri (char * buff, size_t len, const struct pathgroup * pgp) +{ + return snprint_int(buff, len, pgp->priority); +} + +static int +snprint_pg_state (char * buff, size_t len, const struct pathgroup * pgp) +{ + switch (pgp->status) { + case PGSTATE_ENABLED: + return snprintf(buff, len, "enabled"); + case PGSTATE_DISABLED: + return snprintf(buff, len, "disabled"); + case PGSTATE_ACTIVE: + return snprintf(buff, len, "active"); + default: + return snprintf(buff, len, "undef"); + } +} + +static int +snprint_pg_marginal (char * buff, size_t len, const struct pathgroup * pgp) +{ + if (pgp->marginal) + return snprintf(buff, len, "marginal"); + return snprintf(buff, len, "normal"); +} + +static int +snprint_path_size (char * buff, size_t len, const struct path * pp) +{ + return snprint_size(buff, len, pp->size); +} + +int +snprint_path_serial (char * buff, size_t len, const struct path * pp) +{ + return snprint_str(buff, len, pp->serial); +} + +static int +snprint_path_mpp (char * buff, size_t len, const struct path * pp) +{ + if (!pp->mpp) + return snprintf(buff, len, "[orphan]"); + if (!pp->mpp->alias) + return snprintf(buff, len, "[unknown]"); + return snprint_str(buff, len, pp->mpp->alias); +} + +static int +snprint_host_attr (char * buff, size_t len, const struct path * pp, char *attr) +{ + struct udev_device *host_dev = NULL; + char host_id[32]; + const char *value = NULL; + int ret; + + if (pp->sg_id.proto_id != SCSI_PROTOCOL_FCP) + return snprintf(buff, len, "[undef]"); + sprintf(host_id, "host%d", pp->sg_id.host_no); + host_dev = udev_device_new_from_subsystem_sysname(udev, "fc_host", + host_id); + if (!host_dev) { + condlog(1, "%s: No fc_host device for '%s'", pp->dev, host_id); + goto out; + } + value = udev_device_get_sysattr_value(host_dev, attr); + if (value) + ret = snprint_str(buff, len, value); + udev_device_unref(host_dev); +out: + if (!value) + ret = snprintf(buff, len, "[unknown]"); + return ret; +} + +int +snprint_host_wwnn (char * buff, size_t len, const struct path * pp) +{ + return snprint_host_attr(buff, len, pp, "node_name"); +} + +int +snprint_host_wwpn (char * buff, size_t len, const struct path * pp) +{ + return snprint_host_attr(buff, len, pp, "port_name"); +} + +int +snprint_tgt_wwpn (char * buff, size_t len, const struct path * pp) +{ + struct udev_device *rport_dev = NULL; + char rport_id[32]; + const char *value = NULL; + int ret; + + if (pp->sg_id.proto_id != SCSI_PROTOCOL_FCP) + return snprintf(buff, len, "[undef]"); + sprintf(rport_id, "rport-%d:%d-%d", + pp->sg_id.host_no, pp->sg_id.channel, pp->sg_id.transport_id); + rport_dev = udev_device_new_from_subsystem_sysname(udev, + "fc_remote_ports", rport_id); + if (!rport_dev) { + condlog(1, "%s: No fc_remote_port device for '%s'", pp->dev, + rport_id); + goto out; + } + value = udev_device_get_sysattr_value(rport_dev, "port_name"); + if (value) + ret = snprint_str(buff, len, value); + udev_device_unref(rport_dev); +out: + if (!value) + ret = snprintf(buff, len, "[unknown]"); + return ret; +} + + +int +snprint_tgt_wwnn (char * buff, size_t len, const struct path * pp) +{ + if (pp->tgt_node_name[0] == '\0') + return snprintf(buff, len, "[undef]"); + return snprint_str(buff, len, pp->tgt_node_name); +} + +static int +snprint_host_adapter (char * buff, size_t len, const struct path * pp) +{ + char adapter[SLOT_NAME_SIZE]; + + if (sysfs_get_host_adapter_name(pp, adapter)) + return snprintf(buff, len, "[undef]"); + return snprint_str(buff, len, adapter); +} + +static int +snprint_path_checker (char * buff, size_t len, const struct path * pp) +{ + const struct checker * c = &pp->checker; + return snprint_str(buff, len, checker_name(c)); +} + +static int +snprint_path_foreign (char * buff, size_t len, + __attribute__((unused)) const struct path * pp) +{ + return snprintf(buff, len, "%s", "--"); +} + +static int +snprint_path_failures(char * buff, size_t len, const struct path * pp) +{ + return snprint_int(buff, len, pp->failcount); +} + +/* if you add a protocol string bigger than "scsi:unspec" you must + * also change PROTOCOL_BUF_SIZE */ +int +snprint_path_protocol(char * buff, size_t len, const struct path * pp) +{ + switch (pp->bus) { + case SYSFS_BUS_SCSI: + switch (pp->sg_id.proto_id) { + case SCSI_PROTOCOL_FCP: + return snprintf(buff, len, "scsi:fcp"); + case SCSI_PROTOCOL_SPI: + return snprintf(buff, len, "scsi:spi"); + case SCSI_PROTOCOL_SSA: + return snprintf(buff, len, "scsi:ssa"); + case SCSI_PROTOCOL_SBP: + return snprintf(buff, len, "scsi:sbp"); + case SCSI_PROTOCOL_SRP: + return snprintf(buff, len, "scsi:srp"); + case SCSI_PROTOCOL_ISCSI: + return snprintf(buff, len, "scsi:iscsi"); + case SCSI_PROTOCOL_SAS: + return snprintf(buff, len, "scsi:sas"); + case SCSI_PROTOCOL_ADT: + return snprintf(buff, len, "scsi:adt"); + case SCSI_PROTOCOL_ATA: + return snprintf(buff, len, "scsi:ata"); + case SCSI_PROTOCOL_UNSPEC: + default: + return snprintf(buff, len, "scsi:unspec"); + } + case SYSFS_BUS_CCW: + return snprintf(buff, len, "ccw"); + case SYSFS_BUS_CCISS: + return snprintf(buff, len, "cciss"); + case SYSFS_BUS_NVME: + return snprintf(buff, len, "nvme"); + case SYSFS_BUS_UNDEF: + default: + return snprintf(buff, len, "undef"); + } +} + +int +snprint_path_marginal(char * buff, size_t len, const struct path * pp) +{ + if (pp->marginal) + return snprintf(buff, len, "marginal"); + return snprintf(buff, len, "normal"); +} + +static int +snprint_path_vpd_data(char * buff, size_t len, const struct path * pp) +{ + if (pp->vpd_data) + return snprintf(buff, len, "%s", pp->vpd_data); + return snprintf(buff, len, "[undef]"); +} + +struct multipath_data mpd[] = { + {'n', "name", 0, snprint_name}, + {'w', "uuid", 0, snprint_multipath_uuid}, + {'d', "sysfs", 0, snprint_sysfs}, + {'F', "failback", 0, snprint_failback}, + {'Q', "queueing", 0, snprint_queueing}, + {'N', "paths", 0, snprint_nb_paths}, + {'r', "write_prot", 0, snprint_ro}, + {'t', "dm-st", 0, snprint_dm_map_state}, + {'S', "size", 0, snprint_multipath_size}, + {'f', "features", 0, snprint_features}, + {'x', "failures", 0, snprint_map_failures}, + {'h', "hwhandler", 0, snprint_hwhandler}, + {'A', "action", 0, snprint_action}, + {'0', "path_faults", 0, snprint_path_faults}, + {'1', "switch_grp", 0, snprint_switch_grp}, + {'2', "map_loads", 0, snprint_map_loads}, + {'3', "total_q_time", 0, snprint_total_q_time}, + {'4', "q_timeouts", 0, snprint_q_timeouts}, + {'s', "vend/prod/rev", 0, snprint_multipath_vpr}, + {'v', "vend", 0, snprint_multipath_vend}, + {'p', "prod", 0, snprint_multipath_prod}, + {'e', "rev", 0, snprint_multipath_rev}, + {'G', "foreign", 0, snprint_multipath_foreign}, + {'g', "vpd page data", 0, snprint_multipath_vpd_data}, + {0, NULL, 0 , NULL} +}; + +struct path_data pd[] = { + {'w', "uuid", 0, snprint_path_uuid}, + {'i', "hcil", 0, snprint_hcil}, + {'d', "dev", 0, snprint_dev}, + {'D', "dev_t", 0, snprint_dev_t}, + {'t', "dm_st", 0, snprint_dm_path_state}, + {'o', "dev_st", 0, snprint_offline}, + {'T', "chk_st", 0, snprint_chk_state}, + {'s', "vend/prod/rev", 0, snprint_vpr}, + {'c', "checker", 0, snprint_path_checker}, + {'C', "next_check", 0, snprint_next_check}, + {'p', "pri", 0, snprint_pri}, + {'S', "size", 0, snprint_path_size}, + {'z', "serial", 0, snprint_path_serial}, + {'M', "marginal_st", 0, snprint_path_marginal}, + {'m', "multipath", 0, snprint_path_mpp}, + {'N', "host WWNN", 0, snprint_host_wwnn}, + {'n', "target WWNN", 0, snprint_tgt_wwnn}, + {'R', "host WWPN", 0, snprint_host_wwpn}, + {'r', "target WWPN", 0, snprint_tgt_wwpn}, + {'a', "host adapter", 0, snprint_host_adapter}, + {'G', "foreign", 0, snprint_path_foreign}, + {'g', "vpd page data", 0, snprint_path_vpd_data}, + {'0', "failures", 0, snprint_path_failures}, + {'P', "protocol", 0, snprint_path_protocol}, + {0, NULL, 0 , NULL} +}; + +struct pathgroup_data pgd[] = { + {'s', "selector", 0, snprint_pg_selector}, + {'p', "pri", 0, snprint_pg_pri}, + {'t', "dm_st", 0, snprint_pg_state}, + {'M', "marginal_st", 0, snprint_pg_marginal}, + {0, NULL, 0 , NULL} +}; + +int +snprint_wildcards (char * buff, int len) +{ + int i, fwd = 0; + + fwd += snprintf(buff + fwd, len - fwd, "multipath format wildcards:\n"); + for (i = 0; mpd[i].header; i++) + fwd += snprintf(buff + fwd, len - fwd, "%%%c %s\n", + mpd[i].wildcard, mpd[i].header); + fwd += snprintf(buff + fwd, len - fwd, "\npath format wildcards:\n"); + for (i = 0; pd[i].header; i++) + fwd += snprintf(buff + fwd, len - fwd, "%%%c %s\n", + pd[i].wildcard, pd[i].header); + fwd += snprintf(buff + fwd, len - fwd, "\npathgroup format wildcards:\n"); + for (i = 0; pgd[i].header; i++) + fwd += snprintf(buff + fwd, len - fwd, "%%%c %s\n", + pgd[i].wildcard, pgd[i].header); + return fwd; +} + +void +get_path_layout(vector pathvec, int header) +{ + vector gpvec = vector_convert(NULL, pathvec, struct path, + dm_path_to_gen); + _get_path_layout(gpvec, + header ? LAYOUT_RESET_HEADER : LAYOUT_RESET_ZERO); + vector_free(gpvec); +} + +static void +reset_width(unsigned int *width, enum layout_reset reset, const char *header) +{ + switch (reset) { + case LAYOUT_RESET_HEADER: + *width = strlen(header); + break; + case LAYOUT_RESET_ZERO: + *width = 0; + break; + default: + /* don't reset */ + break; + } +} + +void +_get_path_layout (const struct _vector *gpvec, enum layout_reset reset) +{ + int i, j; + char buff[MAX_FIELD_LEN]; + const struct gen_path *gp; + + for (j = 0; pd[j].header; j++) { + + reset_width(&pd[j].width, reset, pd[j].header); + + if (gpvec == NULL) + continue; + + vector_foreach_slot (gpvec, gp, i) { + gp->ops->snprint(gp, buff, MAX_FIELD_LEN, + pd[j].wildcard); + pd[j].width = MAX(pd[j].width, strlen(buff)); + } + } +} + +static void +reset_multipath_layout (void) +{ + int i; + + for (i = 0; mpd[i].header; i++) + mpd[i].width = 0; +} + +void +get_multipath_layout (vector mpvec, int header) { + vector gmvec = vector_convert(NULL, mpvec, struct multipath, + dm_multipath_to_gen); + _get_multipath_layout(gmvec, + header ? LAYOUT_RESET_HEADER : LAYOUT_RESET_ZERO); + vector_free(gmvec); +} + +void +_get_multipath_layout (const struct _vector *gmvec, + enum layout_reset reset) +{ + int i, j; + char buff[MAX_FIELD_LEN]; + const struct gen_multipath * gm; + + for (j = 0; mpd[j].header; j++) { + + reset_width(&mpd[j].width, reset, mpd[j].header); + + if (gmvec == NULL) + continue; + + vector_foreach_slot (gmvec, gm, i) { + gm->ops->snprint(gm, buff, MAX_FIELD_LEN, + mpd[j].wildcard); + mpd[j].width = MAX(mpd[j].width, strlen(buff)); + } + condlog(4, "%s: width %d", mpd[j].header, mpd[j].width); + } +} + +static struct multipath_data * +mpd_lookup(char wildcard) +{ + int i; + + for (i = 0; mpd[i].header; i++) + if (mpd[i].wildcard == wildcard) + return &mpd[i]; + + return NULL; +} + +int snprint_multipath_attr(const struct gen_multipath* gm, + char *buf, int len, char wildcard) +{ + const struct multipath *mpp = gen_multipath_to_dm(gm); + struct multipath_data *mpd = mpd_lookup(wildcard); + + if (mpd == NULL) + return 0; + return mpd->snprint(buf, len, mpp); +} + +static struct path_data * +pd_lookup(char wildcard) +{ + int i; + + for (i = 0; pd[i].header; i++) + if (pd[i].wildcard == wildcard) + return &pd[i]; + + return NULL; +} + +int snprint_path_attr(const struct gen_path* gp, + char *buf, int len, char wildcard) +{ + const struct path *pp = gen_path_to_dm(gp); + struct path_data *pd = pd_lookup(wildcard); + + if (pd == NULL) + return 0; + return pd->snprint(buf, len, pp); +} + +static struct pathgroup_data * +pgd_lookup(char wildcard) +{ + int i; + + for (i = 0; pgd[i].header; i++) + if (pgd[i].wildcard == wildcard) + return &pgd[i]; + + return NULL; +} + +int snprint_pathgroup_attr(const struct gen_pathgroup* gpg, + char *buf, int len, char wildcard) +{ + const struct pathgroup *pg = gen_pathgroup_to_dm(gpg); + struct pathgroup_data *pdg = pgd_lookup(wildcard); + + if (pdg == NULL) + return 0; + return pdg->snprint(buf, len, pg); +} + +int +snprint_multipath_header (char * line, int len, const char * format) +{ + char * c = line; /* line cursor */ + char * s = line; /* for padding */ + const char * f = format; /* format string cursor */ + int fwd; + struct multipath_data * data; + + do { + if (TAIL <= 0) + break; + + if (*f != '%') { + *c++ = *f; + NOPAD; + continue; + } + f++; + + if (!(data = mpd_lookup(*f))) + continue; /* unknown wildcard */ + + PRINT(c, TAIL, "%s", data->header); + PAD(data->width); + } while (*f++); + + __endline(line, len, c); + return (c - line); +} + +int +_snprint_multipath (const struct gen_multipath * gmp, + char * line, int len, const char * format, int pad) +{ + char * c = line; /* line cursor */ + char * s = line; /* for padding */ + const char * f = format; /* format string cursor */ + int fwd; + struct multipath_data * data; + char buff[MAX_FIELD_LEN] = {}; + + do { + if (TAIL <= 0) + break; + + if (*f != '%') { + *c++ = *f; + NOPAD; + continue; + } + f++; + + if (!(data = mpd_lookup(*f))) + continue; + + gmp->ops->snprint(gmp, buff, MAX_FIELD_LEN, *f); + PRINT(c, TAIL, "%s", buff); + if (pad) + PAD(data->width); + buff[0] = '\0'; + } while (*f++); + + __endline(line, len, c); + return (c - line); +} + +int +snprint_path_header (char * line, int len, const char * format) +{ + char * c = line; /* line cursor */ + char * s = line; /* for padding */ + const char * f = format; /* format string cursor */ + int fwd; + struct path_data * data; + + do { + if (TAIL <= 0) + break; + + if (*f != '%') { + *c++ = *f; + NOPAD; + continue; + } + f++; + + if (!(data = pd_lookup(*f))) + continue; /* unknown wildcard */ + + PRINT(c, TAIL, "%s", data->header); + PAD(data->width); + } while (*f++); + + __endline(line, len, c); + return (c - line); +} + +int +_snprint_path (const struct gen_path * gp, char * line, int len, + const char * format, int pad) +{ + char * c = line; /* line cursor */ + char * s = line; /* for padding */ + const char * f = format; /* format string cursor */ + int fwd; + struct path_data * data; + char buff[MAX_FIELD_LEN]; + + do { + if (TAIL <= 0) + break; + + if (*f != '%') { + *c++ = *f; + NOPAD; + continue; + } + f++; + + if (!(data = pd_lookup(*f))) + continue; + + gp->ops->snprint(gp, buff, MAX_FIELD_LEN, *f); + PRINT(c, TAIL, "%s", buff); + if (pad) + PAD(data->width); + } while (*f++); + + __endline(line, len, c); + return (c - line); +} + +int +_snprint_pathgroup (const struct gen_pathgroup * ggp, char * line, int len, + char * format) +{ + char * c = line; /* line cursor */ + char * s = line; /* for padding */ + char * f = format; /* format string cursor */ + int fwd; + struct pathgroup_data * data; + char buff[MAX_FIELD_LEN]; + + do { + if (TAIL <= 0) + break; + + if (*f != '%') { + *c++ = *f; + NOPAD; + continue; + } + f++; + + if (!(data = pgd_lookup(*f))) + continue; + + ggp->ops->snprint(ggp, buff, MAX_FIELD_LEN, *f); + PRINT(c, TAIL, "%s", buff); + PAD(data->width); + } while (*f++); + + __endline(line, len, c); + return (c - line); +} +#define snprint_pathgroup(line, len, fmt, pgp) \ + _snprint_pathgroup(dm_pathgroup_to_gen(pgp), line, len, fmt) + +void _print_multipath_topology(const struct gen_multipath *gmp, int verbosity) +{ + int resize; + char *buff = NULL; + char *old = NULL; + int len, maxlen = MAX_LINE_LEN * MAX_LINES; + + buff = MALLOC(maxlen); + do { + if (!buff) { + if (old) + FREE(old); + condlog(0, "couldn't allocate memory for list: %s\n", + strerror(errno)); + return; + } + + len = _snprint_multipath_topology(gmp, buff, maxlen, verbosity); + resize = (len == maxlen - 1); + + if (resize) { + maxlen *= 2; + old = buff; + buff = REALLOC(buff, maxlen); + } + } while (resize); + printf("%s", buff); + FREE(buff); +} + +int +snprint_multipath_style(const struct gen_multipath *gmp, char *style, int len, + int verbosity) +{ + int n; + const struct multipath *mpp = gen_multipath_to_dm(gmp); + bool need_action = (verbosity > 1 && + mpp->action != ACT_NOTHING && + mpp->action != ACT_UNDEF && + mpp->action != ACT_IMPOSSIBLE); + bool need_wwid = (strncmp(mpp->alias, mpp->wwid, WWID_SIZE)); + + n = snprintf(style, len, "%s%s%s%s", + need_action ? "%A: " : "", "%n", + need_wwid ? " (%w)" : "", " %d %s"); + return MIN(n, len - 1); +} + +int _snprint_multipath_topology(const struct gen_multipath *gmp, + char *buff, int len, int verbosity) +{ + int j, i, fwd = 0; + const struct _vector *pgvec; + const struct gen_pathgroup *gpg; + char style[64]; + char * c = style; + char fmt[64]; + char * f; + + if (verbosity <= 0) + return fwd; + + reset_multipath_layout(); + + if (verbosity == 1) + return _snprint_multipath(gmp, buff, len, "%n", 1); + + if(isatty(1)) + c += sprintf(c, "%c[%dm", 0x1B, 1); /* bold on */ + + c += gmp->ops->style(gmp, c, sizeof(style) - (c - style), + verbosity); + if(isatty(1)) + c += sprintf(c, "%c[%dm", 0x1B, 0); /* bold off */ + + fwd += _snprint_multipath(gmp, buff + fwd, len - fwd, style, 1); + if (fwd >= len) + return len; + fwd += _snprint_multipath(gmp, buff + fwd, len - fwd, + PRINT_MAP_PROPS, 1); + if (fwd >= len) + return len; + + pgvec = gmp->ops->get_pathgroups(gmp); + if (pgvec == NULL) + return fwd; + + vector_foreach_slot (pgvec, gpg, j) { + const struct _vector *pathvec; + struct gen_path *gp; + + f=fmt; + + if (j + 1 < VECTOR_SIZE(pgvec)) { + strcpy(f, "|-+- " PRINT_PG_INDENT); + } else + strcpy(f, "`-+- " PRINT_PG_INDENT); + fwd += _snprint_pathgroup(gpg, buff + fwd, len - fwd, fmt); + + if (fwd >= len) { + fwd = len; + break; + } + + pathvec = gpg->ops->get_paths(gpg); + if (pathvec == NULL) + continue; + + vector_foreach_slot (pathvec, gp, i) { + f=fmt; + if (*f != '|') + *f=' '; + f++; + if (i + 1 < VECTOR_SIZE(pathvec)) + strcpy(f, " |- " PRINT_PATH_INDENT); + else + strcpy(f, " `- " PRINT_PATH_INDENT); + fwd += _snprint_path(gp, buff + fwd, len - fwd, fmt, 1); + if (fwd >= len) { + fwd = len; + break; + } + } + gpg->ops->rel_paths(gpg, pathvec); + + if (fwd == len) + break; + } + gmp->ops->rel_pathgroups(gmp, pgvec); + return fwd; +} + + +static int +snprint_json (char * buff, int len, int indent, char *json_str) +{ + int fwd = 0, i; + + for (i = 0; i < indent; i++) { + fwd += snprintf(buff + fwd, len - fwd, PRINT_JSON_INDENT); + if (fwd >= len) + return fwd; + } + + fwd += snprintf(buff + fwd, len - fwd, "%s", json_str); + return fwd; +} + +static int +snprint_json_header (char * buff, int len) +{ + int fwd = 0; + + fwd += snprint_json(buff, len, 0, PRINT_JSON_START_ELEM); + if (fwd >= len) + return fwd; + + fwd += snprintf(buff + fwd, len - fwd, PRINT_JSON_START_VERSION, + PRINT_JSON_MAJOR_VERSION, PRINT_JSON_MINOR_VERSION); + return fwd; +} + +static int +snprint_json_elem_footer (char * buff, int len, int indent, int last) +{ + int fwd = 0, i; + + for (i = 0; i < indent; i++) { + fwd += snprintf(buff + fwd, len - fwd, PRINT_JSON_INDENT); + if (fwd >= len) + return fwd; + } + + if (last == 1) + fwd += snprintf(buff + fwd, len - fwd, "%s", PRINT_JSON_END_LAST_ELEM); + else + fwd += snprintf(buff + fwd, len - fwd, "%s", PRINT_JSON_END_ELEM); + return fwd; +} + +static int +snprint_multipath_fields_json (char * buff, int len, + const struct multipath * mpp, int last) +{ + int i, j, fwd = 0; + struct path *pp; + struct pathgroup *pgp; + + fwd += snprint_multipath(buff, len, PRINT_JSON_MAP, mpp, 0); + if (fwd >= len) + return fwd; + + fwd += snprint_json(buff + fwd, len - fwd, 2, PRINT_JSON_START_GROUPS); + if (fwd >= len) + return fwd; + + vector_foreach_slot (mpp->pg, pgp, i) { + + fwd += snprint_pathgroup(buff + fwd, len - fwd, PRINT_JSON_GROUP, pgp); + if (fwd >= len) + return fwd; + + fwd += snprintf(buff + fwd, len - fwd, PRINT_JSON_GROUP_NUM, i + 1); + if (fwd >= len) + return fwd; + + fwd += snprint_json(buff + fwd, len - fwd, 3, PRINT_JSON_START_PATHS); + if (fwd >= len) + return fwd; + + vector_foreach_slot (pgp->paths, pp, j) { + fwd += snprint_path(buff + fwd, len - fwd, PRINT_JSON_PATH, pp, 0); + if (fwd >= len) + return fwd; + + fwd += snprint_json_elem_footer(buff + fwd, + len - fwd, 3, j + 1 == VECTOR_SIZE(pgp->paths)); + if (fwd >= len) + return fwd; + } + fwd += snprint_json(buff + fwd, len - fwd, 0, PRINT_JSON_END_ARRAY); + if (fwd >= len) + return fwd; + + fwd += snprint_json_elem_footer(buff + fwd, + len - fwd, 2, i + 1 == VECTOR_SIZE(mpp->pg)); + if (fwd >= len) + return fwd; + } + + fwd += snprint_json(buff + fwd, len - fwd, 0, PRINT_JSON_END_ARRAY); + if (fwd >= len) + return fwd; + + fwd += snprint_json_elem_footer(buff + fwd, len - fwd, 1, last); + return fwd; +} + +int +snprint_multipath_map_json (char * buff, int len, const struct multipath * mpp) +{ + int fwd = 0; + + fwd += snprint_json_header(buff, len); + if (fwd >= len) + return len; + + fwd += snprint_json(buff + fwd, len - fwd, 0, PRINT_JSON_START_MAP); + if (fwd >= len) + return len; + + fwd += snprint_multipath_fields_json(buff + fwd, len - fwd, mpp, 1); + if (fwd >= len) + return len; + + fwd += snprint_json(buff + fwd, len - fwd, 0, "\n"); + if (fwd >= len) + return len; + + fwd += snprint_json(buff + fwd, len - fwd, 0, PRINT_JSON_END_LAST); + if (fwd >= len) + return len; + return fwd; +} + +int +snprint_multipath_topology_json (char * buff, int len, const struct vectors * vecs) +{ + int i, fwd = 0; + struct multipath * mpp; + + fwd += snprint_json_header(buff, len); + if (fwd >= len) + return len; + + fwd += snprint_json(buff + fwd, len - fwd, 1, PRINT_JSON_START_MAPS); + if (fwd >= len) + return len; + + vector_foreach_slot(vecs->mpvec, mpp, i) { + fwd += snprint_multipath_fields_json(buff + fwd, len - fwd, + mpp, i + 1 == VECTOR_SIZE(vecs->mpvec)); + if (fwd >= len) + return len; + } + + fwd += snprint_json(buff + fwd, len - fwd, 0, PRINT_JSON_END_ARRAY); + if (fwd >= len) + return len; + + fwd += snprint_json(buff + fwd, len - fwd, 0, PRINT_JSON_END_LAST); + if (fwd >= len) + return len; + return fwd; +} + +static int +snprint_hwentry (const struct config *conf, + char * buff, int len, const struct hwentry * hwe) +{ + int i; + int fwd = 0; + struct keyword * kw; + struct keyword * rootkw; + + rootkw = find_keyword(conf->keywords, NULL, "devices"); + + if (!rootkw || !rootkw->sub) + return 0; + + rootkw = find_keyword(conf->keywords, rootkw->sub, "device"); + + if (!rootkw) + return 0; + + fwd += snprintf(buff + fwd, len - fwd, "\tdevice {\n"); + if (fwd >= len) + return len; + iterate_sub_keywords(rootkw, kw, i) { + fwd += snprint_keyword(buff + fwd, len - fwd, "\t\t%k %v\n", + kw, hwe); + if (fwd >= len) + return len; + } + fwd += snprintf(buff + fwd, len - fwd, "\t}\n"); + if (fwd >= len) + return len; + return fwd; +} + +static int snprint_hwtable(const struct config *conf, + char *buff, int len, + const struct _vector *hwtable) +{ + int fwd = 0; + int i; + struct hwentry * hwe; + struct keyword * rootkw; + + rootkw = find_keyword(conf->keywords, NULL, "devices"); + if (!rootkw) + return 0; + + fwd += snprintf(buff + fwd, len - fwd, "devices {\n"); + if (fwd >= len) + return len; + vector_foreach_slot (hwtable, hwe, i) { + fwd += snprint_hwentry(conf, buff + fwd, len - fwd, hwe); + if (fwd >= len) + return len; + } + fwd += snprintf(buff + fwd, len - fwd, "}\n"); + if (fwd >= len) + return len; + return fwd; +} + +static int +snprint_mpentry (const struct config *conf, char * buff, int len, + const struct mpentry * mpe, const struct _vector *mpvec) +{ + int i; + int fwd = 0; + struct keyword * kw; + struct keyword * rootkw; + struct multipath *mpp = NULL; + + if (mpvec != NULL && (mpp = find_mp_by_wwid(mpvec, mpe->wwid)) == NULL) + return 0; + + rootkw = find_keyword(conf->keywords, NULL, "multipath"); + if (!rootkw) + return 0; + + fwd += snprintf(buff + fwd, len - fwd, "\tmultipath {\n"); + if (fwd >= len) + return len; + iterate_sub_keywords(rootkw, kw, i) { + fwd += snprint_keyword(buff + fwd, len - fwd, "\t\t%k %v\n", + kw, mpe); + if (fwd >= len) + return len; + } + /* + * This mpp doesn't have alias defined. Add the alias in a comment. + */ + if (mpp != NULL && strcmp(mpp->alias, mpp->wwid)) { + fwd += snprintf(buff + fwd, len - fwd, "\t\t# alias \"%s\"\n", + mpp->alias); + if (fwd >= len) + return len; + } + fwd += snprintf(buff + fwd, len - fwd, "\t}\n"); + if (fwd >= len) + return len; + return fwd; +} + +static int snprint_mptable(const struct config *conf, + char *buff, int len, const struct _vector *mpvec) +{ + int fwd = 0; + int i; + struct mpentry * mpe; + struct keyword * rootkw; + + rootkw = find_keyword(conf->keywords, NULL, "multipaths"); + if (!rootkw) + return 0; + + fwd += snprintf(buff + fwd, len - fwd, "multipaths {\n"); + if (fwd >= len) + return len; + vector_foreach_slot (conf->mptable, mpe, i) { + fwd += snprint_mpentry(conf, buff + fwd, len - fwd, mpe, mpvec); + if (fwd >= len) + return len; + } + if (mpvec != NULL) { + struct multipath *mpp; + + vector_foreach_slot(mpvec, mpp, i) { + if (find_mpe(conf->mptable, mpp->wwid) != NULL) + continue; + + fwd += snprintf(buff + fwd, len - fwd, + "\tmultipath {\n"); + if (fwd >= len) + return len; + fwd += snprintf(buff + fwd, len - fwd, + "\t\twwid \"%s\"\n", mpp->wwid); + if (fwd >= len) + return len; + /* + * This mpp doesn't have alias defined in + * multipath.conf - otherwise find_mpe would have + * found it. Add the alias in a comment. + */ + if (strcmp(mpp->alias, mpp->wwid)) { + fwd += snprintf(buff + fwd, len - fwd, + "\t\t# alias \"%s\"\n", + mpp->alias); + if (fwd >= len) + return len; + } + fwd += snprintf(buff + fwd, len - fwd, "\t}\n"); + if (fwd >= len) + return len; + } + } + fwd += snprintf(buff + fwd, len - fwd, "}\n"); + if (fwd >= len) + return len; + return fwd; +} + +static int snprint_overrides(const struct config *conf, char * buff, int len, + const struct hwentry *overrides) +{ + int fwd = 0; + int i; + struct keyword *rootkw; + struct keyword *kw; + + rootkw = find_keyword(conf->keywords, NULL, "overrides"); + if (!rootkw) + return 0; + + fwd += snprintf(buff + fwd, len - fwd, "overrides {\n"); + if (fwd >= len) + return len; + if (!overrides) + goto out; + iterate_sub_keywords(rootkw, kw, i) { + fwd += snprint_keyword(buff + fwd, len - fwd, "\t%k %v\n", + kw, NULL); + if (fwd >= len) + return len; + } +out: + fwd += snprintf(buff + fwd, len - fwd, "}\n"); + if (fwd >= len) + return len; + return fwd; +} + +static int snprint_defaults(const struct config *conf, char *buff, int len) +{ + int fwd = 0; + int i; + struct keyword *rootkw; + struct keyword *kw; + + rootkw = find_keyword(conf->keywords, NULL, "defaults"); + if (!rootkw) + return 0; + + fwd += snprintf(buff + fwd, len - fwd, "defaults {\n"); + if (fwd >= len) + return len; + + iterate_sub_keywords(rootkw, kw, i) { + fwd += snprint_keyword(buff + fwd, len - fwd, "\t%k %v\n", + kw, NULL); + if (fwd >= len) + return len; + } + fwd += snprintf(buff + fwd, len - fwd, "}\n"); + if (fwd >= len) + return len; + return fwd; +} + +static int +snprint_blacklist_group (char *buff, int len, int *fwd, vector *vec) +{ + int threshold = MAX_LINE_LEN; + struct blentry * ble; + int pos; + int i; + + pos = *fwd; + if (!VECTOR_SIZE(*vec)) { + if ((len - pos - threshold) <= 0) + return 0; + pos += snprintf(buff + pos, len - pos, " \n"); + } else vector_foreach_slot (*vec, ble, i) { + if ((len - pos - threshold) <= 0) + return 0; + if (ble->origin == ORIGIN_CONFIG) + pos += snprintf(buff + pos, len - pos, " (config file rule) "); + else if (ble->origin == ORIGIN_DEFAULT) + pos += snprintf(buff + pos, len - pos, " (default rule) "); + pos += snprintf(buff + pos, len - pos, "%s\n", ble->str); + } + + *fwd = pos; + return pos; +} + +static int +snprint_blacklist_devgroup (char *buff, int len, int *fwd, vector *vec) +{ + int threshold = MAX_LINE_LEN; + struct blentry_device * bled; + int pos; + int i; + + pos = *fwd; + if (!VECTOR_SIZE(*vec)) { + if ((len - pos - threshold) <= 0) + return 0; + pos += snprintf(buff + pos, len - pos, " \n"); + } else vector_foreach_slot (*vec, bled, i) { + if ((len - pos - threshold) <= 0) + return 0; + if (bled->origin == ORIGIN_CONFIG) + pos += snprintf(buff + pos, len - pos, " (config file rule) "); + else if (bled->origin == ORIGIN_DEFAULT) + pos += snprintf(buff + pos, len - pos, " (default rule) "); + pos += snprintf(buff + pos, len - pos, "%s:%s\n", bled->vendor, bled->product); + } + + *fwd = pos; + return pos; +} + +int snprint_blacklist_report(struct config *conf, char *buff, int len) +{ + int threshold = MAX_LINE_LEN; + int fwd = 0; + + if ((len - fwd - threshold) <= 0) + return len; + fwd += snprintf(buff + fwd, len - fwd, "device node rules:\n" + "- blacklist:\n"); + if (!snprint_blacklist_group(buff, len, &fwd, &conf->blist_devnode)) + return len; + + if ((len - fwd - threshold) <= 0) + return len; + fwd += snprintf(buff + fwd, len - fwd, "- exceptions:\n"); + if (snprint_blacklist_group(buff, len, &fwd, &conf->elist_devnode) == 0) + return len; + + if ((len - fwd - threshold) <= 0) + return len; + fwd += snprintf(buff + fwd, len - fwd, "udev property rules:\n" + "- blacklist:\n"); + if (!snprint_blacklist_group(buff, len, &fwd, &conf->blist_property)) + return len; + + if ((len - fwd - threshold) <= 0) + return len; + fwd += snprintf(buff + fwd, len - fwd, "- exceptions:\n"); + if (snprint_blacklist_group(buff, len, &fwd, &conf->elist_property) == 0) + return len; + + if ((len - fwd - threshold) <= 0) + return len; + fwd += snprintf(buff + fwd, len - fwd, "protocol rules:\n" + "- blacklist:\n"); + if (!snprint_blacklist_group(buff, len, &fwd, &conf->blist_protocol)) + return len; + + if ((len - fwd - threshold) <= 0) + return len; + fwd += snprintf(buff + fwd, len - fwd, "- exceptions:\n"); + if (snprint_blacklist_group(buff, len, &fwd, &conf->elist_protocol) == 0) + return len; + + if ((len - fwd - threshold) <= 0) + return len; + fwd += snprintf(buff + fwd, len - fwd, "wwid rules:\n" + "- blacklist:\n"); + if (snprint_blacklist_group(buff, len, &fwd, &conf->blist_wwid) == 0) + return len; + + if ((len - fwd - threshold) <= 0) + return len; + fwd += snprintf(buff + fwd, len - fwd, "- exceptions:\n"); + if (snprint_blacklist_group(buff, len, &fwd, &conf->elist_wwid) == 0) + return len; + + if ((len - fwd - threshold) <= 0) + return len; + fwd += snprintf(buff + fwd, len - fwd, "device rules:\n" + "- blacklist:\n"); + if (snprint_blacklist_devgroup(buff, len, &fwd, &conf->blist_device) == 0) + return len; + + if ((len - fwd - threshold) <= 0) + return len; + fwd += snprintf(buff + fwd, len - fwd, "- exceptions:\n"); + if (snprint_blacklist_devgroup(buff, len, &fwd, &conf->elist_device) == 0) + return len; + + if (fwd > len) + return len; + return fwd; +} + +static int snprint_blacklist(const struct config *conf, char *buff, int len) +{ + int i; + struct blentry * ble; + struct blentry_device * bled; + int fwd = 0; + struct keyword *rootkw; + struct keyword *kw; + + rootkw = find_keyword(conf->keywords, NULL, "blacklist"); + if (!rootkw) + return 0; + + fwd += snprintf(buff + fwd, len - fwd, "blacklist {\n"); + if (fwd >= len) + return len; + + vector_foreach_slot (conf->blist_devnode, ble, i) { + kw = find_keyword(conf->keywords, rootkw->sub, "devnode"); + if (!kw) + return 0; + fwd += snprint_keyword(buff + fwd, len - fwd, "\t%k %v\n", + kw, ble); + if (fwd >= len) + return len; + } + vector_foreach_slot (conf->blist_wwid, ble, i) { + kw = find_keyword(conf->keywords, rootkw->sub, "wwid"); + if (!kw) + return 0; + fwd += snprint_keyword(buff + fwd, len - fwd, "\t%k %v\n", + kw, ble); + if (fwd >= len) + return len; + } + vector_foreach_slot (conf->blist_property, ble, i) { + kw = find_keyword(conf->keywords, rootkw->sub, "property"); + if (!kw) + return 0; + fwd += snprint_keyword(buff + fwd, len - fwd, "\t%k %v\n", + kw, ble); + if (fwd >= len) + return len; + } + vector_foreach_slot (conf->blist_protocol, ble, i) { + kw = find_keyword(conf->keywords, rootkw->sub, "protocol"); + if (!kw) + return 0; + fwd += snprint_keyword(buff + fwd, len - fwd, "\t%k %v\n", + kw, ble); + if (fwd >= len) + return len; + } + rootkw = find_keyword(conf->keywords, rootkw->sub, "device"); + if (!rootkw) + return 0; + + vector_foreach_slot (conf->blist_device, bled, i) { + fwd += snprintf(buff + fwd, len - fwd, "\tdevice {\n"); + if (fwd >= len) + return len; + kw = find_keyword(conf->keywords, rootkw->sub, "vendor"); + if (!kw) + return 0; + fwd += snprint_keyword(buff + fwd, len - fwd, "\t\t%k %v\n", + kw, bled); + if (fwd >= len) + return len; + kw = find_keyword(conf->keywords, rootkw->sub, "product"); + if (!kw) + return 0; + fwd += snprint_keyword(buff + fwd, len - fwd, "\t\t%k %v\n", + kw, bled); + if (fwd >= len) + return len; + fwd += snprintf(buff + fwd, len - fwd, "\t}\n"); + if (fwd >= len) + return len; + } + fwd += snprintf(buff + fwd, len - fwd, "}\n"); + if (fwd >= len) + return len; + return fwd; +} + +static int snprint_blacklist_except(const struct config *conf, + char *buff, int len) +{ + int i; + struct blentry * ele; + struct blentry_device * eled; + int fwd = 0; + struct keyword *rootkw; + struct keyword *kw; + + rootkw = find_keyword(conf->keywords, NULL, "blacklist_exceptions"); + if (!rootkw) + return 0; + + fwd += snprintf(buff + fwd, len - fwd, "blacklist_exceptions {\n"); + if (fwd >= len) + return len; + + vector_foreach_slot (conf->elist_devnode, ele, i) { + kw = find_keyword(conf->keywords, rootkw->sub, "devnode"); + if (!kw) + return 0; + fwd += snprint_keyword(buff + fwd, len - fwd, "\t%k %v\n", + kw, ele); + if (fwd >= len) + return len; + } + vector_foreach_slot (conf->elist_wwid, ele, i) { + kw = find_keyword(conf->keywords, rootkw->sub, "wwid"); + if (!kw) + return 0; + fwd += snprint_keyword(buff + fwd, len - fwd, "\t%k %v\n", + kw, ele); + if (fwd >= len) + return len; + } + vector_foreach_slot (conf->elist_property, ele, i) { + kw = find_keyword(conf->keywords, rootkw->sub, "property"); + if (!kw) + return 0; + fwd += snprint_keyword(buff + fwd, len - fwd, "\t%k %v\n", + kw, ele); + if (fwd >= len) + return len; + } + vector_foreach_slot (conf->elist_protocol, ele, i) { + kw = find_keyword(conf->keywords, rootkw->sub, "protocol"); + if (!kw) + return 0; + fwd += snprint_keyword(buff + fwd, len - fwd, "\t%k %v\n", + kw, ele); + if (fwd >= len) + return len; + } + rootkw = find_keyword(conf->keywords, rootkw->sub, "device"); + if (!rootkw) + return 0; + + vector_foreach_slot (conf->elist_device, eled, i) { + fwd += snprintf(buff + fwd, len - fwd, "\tdevice {\n"); + if (fwd >= len) + return len; + kw = find_keyword(conf->keywords, rootkw->sub, "vendor"); + if (!kw) + return 0; + fwd += snprint_keyword(buff + fwd, len - fwd, "\t\t%k %v\n", + kw, eled); + if (fwd >= len) + return len; + kw = find_keyword(conf->keywords, rootkw->sub, "product"); + if (!kw) + return 0; + fwd += snprint_keyword(buff + fwd, len - fwd, "\t\t%k %v\n", + kw, eled); + if (fwd >= len) + return len; + fwd += snprintf(buff + fwd, len - fwd, "\t}\n"); + if (fwd >= len) + return len; + } + fwd += snprintf(buff + fwd, len - fwd, "}\n"); + if (fwd >= len) + return len; + return fwd; +} + +char *snprint_config(const struct config *conf, int *len, + const struct _vector *hwtable, const struct _vector *mpvec) +{ + char *reply; + /* built-in config is >20kB already */ + unsigned int maxlen = 32768; + + for (reply = NULL; maxlen <= UINT_MAX/2; maxlen *= 2) { + char *c, *tmp = reply; + + reply = REALLOC(reply, maxlen); + if (!reply) { + if (tmp) + free(tmp); + return NULL; + } + + c = reply + snprint_defaults(conf, reply, maxlen); + if ((c - reply) == maxlen) + continue; + + c += snprint_blacklist(conf, c, reply + maxlen - c); + if ((c - reply) == maxlen) + continue; + + c += snprint_blacklist_except(conf, c, reply + maxlen - c); + if ((c - reply) == maxlen) + continue; + + c += snprint_hwtable(conf, c, reply + maxlen - c, + hwtable ? hwtable : conf->hwtable); + if ((c - reply) == maxlen) + continue; + + c += snprint_overrides(conf, c, reply + maxlen - c, + conf->overrides); + if ((c - reply) == maxlen) + continue; + + if (VECTOR_SIZE(conf->mptable) > 0 || + (mpvec != NULL && VECTOR_SIZE(mpvec) > 0)) + c += snprint_mptable(conf, c, reply + maxlen - c, + mpvec); + + if ((c - reply) < maxlen) { + if (len) + *len = c - reply; + return reply; + } + } + + free(reply); + return NULL; +} + +int snprint_status(char *buff, int len, const struct vectors *vecs) +{ + int fwd = 0; + int i; + unsigned int count[PATH_MAX_STATE] = {0}; + struct path * pp; + + vector_foreach_slot (vecs->pathvec, pp, i) { + count[pp->state]++; + } + fwd += snprintf(buff + fwd, len - fwd, "path checker states:\n"); + for (i=0; ipathvec, pp, i) + if (pp->fd >= 0) + monitored_count++; + fwd += snprintf(buff + fwd, len - fwd, "\npaths: %d\nbusy: %s\n", + monitored_count, is_uevent_busy()? "True" : "False"); + + if (fwd >= len) + return len; + return fwd; +} + +int snprint_devices(struct config *conf, char * buff, int len, + const struct vectors *vecs) +{ + DIR *blkdir; + struct dirent *blkdev; + struct stat statbuf; + char devpath[PATH_MAX]; + int threshold = MAX_LINE_LEN; + int fwd = 0; + int r; + + struct path * pp; + + if (!(blkdir = opendir("/sys/block"))) + return 1; + + if ((len - fwd - threshold) <= 0) { + closedir(blkdir); + return len; + } + fwd += snprintf(buff + fwd, len - fwd, "available block devices:\n"); + + while ((blkdev = readdir(blkdir)) != NULL) { + if ((strcmp(blkdev->d_name,".") == 0) || + (strcmp(blkdev->d_name,"..") == 0)) + continue; + + if (safe_sprintf(devpath, "/sys/block/%s", blkdev->d_name)) + continue; + + if (stat(devpath, &statbuf) < 0) + continue; + + if (S_ISDIR(statbuf.st_mode) == 0) + continue; + + if ((len - fwd - threshold) <= 0) { + closedir(blkdir); + return len; + } + + fwd += snprintf(buff + fwd, len - fwd, " %s", + blkdev->d_name); + pp = find_path_by_dev(vecs->pathvec, blkdev->d_name); + if (!pp) { + r = filter_devnode(conf->blist_devnode, + conf->elist_devnode, blkdev->d_name); + if (r > 0) + fwd += snprintf(buff + fwd, len - fwd, + " devnode blacklisted, unmonitored"); + else if (r <= 0) + fwd += snprintf(buff + fwd, len - fwd, + " devnode whitelisted, unmonitored"); + } else + fwd += snprintf(buff + fwd, len - fwd, + " devnode whitelisted, monitored"); + fwd += snprintf(buff + fwd, len - fwd, "\n"); + } + closedir(blkdir); + + if (fwd >= len) + return len; + return fwd; +} + +/* + * stdout printing helpers + */ +void print_path(struct path *pp, char *style) +{ + char line[MAX_LINE_LEN]; + + memset(&line[0], 0, MAX_LINE_LEN); + snprint_path(&line[0], MAX_LINE_LEN, style, pp, 1); + printf("%s", line); +} + +void print_all_paths(vector pathvec, int banner) +{ + print_all_paths_custo(pathvec, banner, PRINT_PATH_LONG); +} + +void print_all_paths_custo(vector pathvec, int banner, char *fmt) +{ + int i; + struct path * pp; + char line[MAX_LINE_LEN]; + + if (!VECTOR_SIZE(pathvec)) { + if (banner) + fprintf(stdout, "===== no paths =====\n"); + return; + } + + if (banner) + fprintf(stdout, "===== paths list =====\n"); + + get_path_layout(pathvec, 1); + snprint_path_header(line, MAX_LINE_LEN, fmt); + fprintf(stdout, "%s", line); + + vector_foreach_slot (pathvec, pp, i) + print_path(pp, fmt); +} diff --git a/libmultipath/print.h b/libmultipath/print.h new file mode 100644 index 0000000..e8260d0 --- /dev/null +++ b/libmultipath/print.h @@ -0,0 +1,157 @@ +#ifndef _PRINT_H +#define _PRINT_H +#include "dm-generic.h" + +#define PRINT_PATH_LONG "%w %i %d %D %p %t %T %s %o" +#define PRINT_PATH_INDENT "%i %d %D %t %T %o" +#define PRINT_PATH_CHECKER "%i %d %D %p %t %T %o %C" +#define PRINT_MAP_STATUS "%n %F %Q %N %t %r" +#define PRINT_MAP_STATS "%n %0 %1 %2 %3 %4" +#define PRINT_MAP_NAMES "%n %d %w" +#define PRINT_MAP_PROPS "size=%S features='%f' hwhandler='%h' wp=%r" +#define PRINT_PG_INDENT "policy='%s' prio=%p status=%t" + +#define PRINT_JSON_MULTIPLIER 5 +#define PRINT_JSON_MAJOR_VERSION 0 +#define PRINT_JSON_MINOR_VERSION 1 +#define PRINT_JSON_START_VERSION " \"major_version\": %d,\n" \ + " \"minor_version\": %d,\n" +#define PRINT_JSON_START_ELEM "{\n" +#define PRINT_JSON_START_MAP " \"map\":" +#define PRINT_JSON_START_MAPS "\"maps\": [" +#define PRINT_JSON_START_PATHS "\"paths\": [" +#define PRINT_JSON_START_GROUPS "\"path_groups\": [" +#define PRINT_JSON_END_ELEM "}," +#define PRINT_JSON_END_LAST_ELEM "}" +#define PRINT_JSON_END_LAST "}\n" +#define PRINT_JSON_END_ARRAY "]\n" +#define PRINT_JSON_INDENT " " +#define PRINT_JSON_MAP "{\n" \ + " \"name\" : \"%n\",\n" \ + " \"uuid\" : \"%w\",\n" \ + " \"sysfs\" : \"%d\",\n" \ + " \"failback\" : \"%F\",\n" \ + " \"queueing\" : \"%Q\",\n" \ + " \"paths\" : %N,\n" \ + " \"write_prot\" : \"%r\",\n" \ + " \"dm_st\" : \"%t\",\n" \ + " \"features\" : \"%f\",\n" \ + " \"hwhandler\" : \"%h\",\n" \ + " \"action\" : \"%A\",\n" \ + " \"path_faults\" : %0,\n" \ + " \"vend\" : \"%v\",\n" \ + " \"prod\" : \"%p\",\n" \ + " \"rev\" : \"%e\",\n" \ + " \"switch_grp\" : %1,\n" \ + " \"map_loads\" : %2,\n" \ + " \"total_q_time\" : %3,\n" \ + " \"q_timeouts\" : %4," + +#define PRINT_JSON_GROUP "{\n" \ + " \"selector\" : \"%s\",\n" \ + " \"pri\" : %p,\n" \ + " \"dm_st\" : \"%t\",\n" \ + " \"marginal_st\" : \"%M\"," + +#define PRINT_JSON_GROUP_NUM " \"group\" : %d,\n" + +#define PRINT_JSON_PATH "{\n" \ + " \"dev\" : \"%d\",\n"\ + " \"dev_t\" : \"%D\",\n" \ + " \"dm_st\" : \"%t\",\n" \ + " \"dev_st\" : \"%o\",\n" \ + " \"chk_st\" : \"%T\",\n" \ + " \"checker\" : \"%c\",\n" \ + " \"pri\" : %p,\n" \ + " \"host_wwnn\" : \"%N\",\n" \ + " \"target_wwnn\" : \"%n\",\n" \ + " \"host_wwpn\" : \"%R\",\n" \ + " \"target_wwpn\" : \"%r\",\n" \ + " \"host_adapter\" : \"%a\",\n" \ + " \"marginal_st\" : \"%M\"" + +#define MAX_LINE_LEN 80 +#define MAX_LINES 64 +#define MAX_FIELD_LEN 128 +#define PROGRESS_LEN 10 + +struct path_data { + char wildcard; + char * header; + unsigned int width; + int (*snprint)(char * buff, size_t len, const struct path * pp); +}; + +struct multipath_data { + char wildcard; + char * header; + unsigned int width; + int (*snprint)(char * buff, size_t len, const struct multipath * mpp); +}; + +struct pathgroup_data { + char wildcard; + char * header; + unsigned int width; + int (*snprint)(char * buff, size_t len, const struct pathgroup * pgp); +}; + +enum layout_reset { + LAYOUT_RESET_NOT, + LAYOUT_RESET_ZERO, + LAYOUT_RESET_HEADER, +}; + +void _get_path_layout (const struct _vector *gpvec, enum layout_reset); +void get_path_layout (vector pathvec, int header); +void _get_multipath_layout (const struct _vector *gmvec, enum layout_reset); +void get_multipath_layout (vector mpvec, int header); +int snprint_path_header (char *, int, const char *); +int snprint_multipath_header (char *, int, const char *); +int _snprint_path (const struct gen_path *, char *, int, const char *, int); +#define snprint_path(buf, len, fmt, pp, v) \ + _snprint_path(dm_path_to_gen(pp), buf, len, fmt, v) +int _snprint_multipath (const struct gen_multipath *, char *, int, + const char *, int); +#define snprint_multipath(buf, len, fmt, mp, v) \ + _snprint_multipath(dm_multipath_to_gen(mp), buf, len, fmt, v) +int _snprint_multipath_topology (const struct gen_multipath *, char *, int, + int verbosity); +#define snprint_multipath_topology(buf, len, mpp, v) \ + _snprint_multipath_topology (dm_multipath_to_gen(mpp), buf, len, v) +int snprint_multipath_topology_json (char * buff, int len, + const struct vectors * vecs); +char *snprint_config(const struct config *conf, int *len, + const struct _vector *hwtable, + const struct _vector *mpvec); +int snprint_multipath_map_json (char * buff, int len, + const struct multipath * mpp); +int snprint_blacklist_report (struct config *, char *, int); +int snprint_wildcards (char *, int); +int snprint_status (char *, int, const struct vectors *); +int snprint_devices (struct config *, char *, int, const struct vectors *); +int snprint_path_serial (char *, size_t, const struct path *); +int snprint_host_wwnn (char *, size_t, const struct path *); +int snprint_host_wwpn (char *, size_t, const struct path *); +int snprint_tgt_wwnn (char *, size_t, const struct path *); +int snprint_tgt_wwpn (char *, size_t, const struct path *); +#define PROTOCOL_BUF_SIZE sizeof("scsi:unspec") +int snprint_path_protocol(char *, size_t, const struct path *); + +void _print_multipath_topology (const struct gen_multipath * gmp, + int verbosity); +#define print_multipath_topology(mpp, v) \ + _print_multipath_topology(dm_multipath_to_gen(mpp), v) + +void print_all_paths (vector pathvec, int banner); +void print_all_paths_custo (vector pathvec, int banner, char *fmt); + +int snprint_path_attr(const struct gen_path* gp, + char *buf, int len, char wildcard); +int snprint_pathgroup_attr(const struct gen_pathgroup* gpg, + char *buf, int len, char wildcard); +int snprint_multipath_attr(const struct gen_multipath* gm, + char *buf, int len, char wildcard); +int snprint_multipath_style(const struct gen_multipath *gmp, + char *style, int len, int verbosity); +#endif /* _PRINT_H */ diff --git a/libmultipath/prio.c b/libmultipath/prio.c new file mode 100644 index 0000000..194563c --- /dev/null +++ b/libmultipath/prio.c @@ -0,0 +1,187 @@ +#include +#include +#include +#include +#include + +#include "debug.h" +#include "util.h" +#include "prio.h" + +static LIST_HEAD(prioritizers); + +unsigned int get_prio_timeout(unsigned int timeout_ms, + unsigned int default_timeout) +{ + if (timeout_ms) + return timeout_ms; + return default_timeout; +} + +int init_prio (char *multipath_dir) +{ + if (!add_prio(multipath_dir, DEFAULT_PRIO)) + return 1; + return 0; +} + +static struct prio * alloc_prio (void) +{ + struct prio *p; + + p = MALLOC(sizeof(struct prio)); + if (p) { + INIT_LIST_HEAD(&p->node); + p->refcount = 1; + } + return p; +} + +void free_prio (struct prio * p) +{ + if (!p) + return; + p->refcount--; + if (p->refcount) { + condlog(4, "%s prioritizer refcount %d", + p->name, p->refcount); + return; + } + condlog(3, "unloading %s prioritizer", p->name); + list_del(&p->node); + if (p->handle) { + if (dlclose(p->handle) != 0) { + condlog(0, "Cannot unload prioritizer %s: %s", + p->name, dlerror()); + } + } + FREE(p); +} + +void cleanup_prio(void) +{ + struct prio * prio_loop; + struct prio * prio_temp; + + list_for_each_entry_safe(prio_loop, prio_temp, &prioritizers, node) { + free_prio(prio_loop); + } +} + +static struct prio * prio_lookup (char * name) +{ + struct prio * p; + + if (!name || !strlen(name)) + return NULL; + + list_for_each_entry(p, &prioritizers, node) { + if (!strncmp(name, p->name, PRIO_NAME_LEN)) + return p; + } + return NULL; +} + +int prio_set_args (struct prio * p, const char * args) +{ + return snprintf(p->args, PRIO_ARGS_LEN, "%s", args); +} + +struct prio * add_prio (char *multipath_dir, char * name) +{ + char libname[LIB_PRIO_NAMELEN]; + struct stat stbuf; + struct prio * p; + char *errstr; + + p = alloc_prio(); + if (!p) + return NULL; + snprintf(p->name, PRIO_NAME_LEN, "%s", name); + snprintf(libname, LIB_PRIO_NAMELEN, "%s/libprio%s.so", + multipath_dir, name); + if (stat(libname,&stbuf) < 0) { + condlog(0,"Prioritizer '%s' not found in %s", + name, multipath_dir); + goto out; + } + condlog(3, "loading %s prioritizer", libname); + p->handle = dlopen(libname, RTLD_NOW); + if (!p->handle) { + if ((errstr = dlerror()) != NULL) + condlog(0, "A dynamic linking error occurred: (%s)", + errstr); + goto out; + } + p->getprio = (int (*)(struct path *, char *, unsigned int)) dlsym(p->handle, "getprio"); + errstr = dlerror(); + if (errstr != NULL) + condlog(0, "A dynamic linking error occurred: (%s)", errstr); + if (!p->getprio) + goto out; + list_add(&p->node, &prioritizers); + return p; +out: + free_prio(p); + return NULL; +} + +int prio_getprio (struct prio * p, struct path * pp, unsigned int timeout) +{ + return p->getprio(pp, p->args, timeout); +} + +int prio_selected (const struct prio * p) +{ + if (!p) + return 0; + return (p->getprio) ? 1 : 0; +} + +const char * prio_name (const struct prio * p) +{ + return p->name; +} + +const char * prio_args (const struct prio * p) +{ + return p->args; +} + +void prio_get (char *multipath_dir, struct prio * dst, char * name, char * args) +{ + struct prio * src = NULL; + + if (!dst) + return; + + if (name && strlen(name)) { + src = prio_lookup(name); + if (!src) + src = add_prio(multipath_dir, name); + } + if (!src) { + dst->getprio = NULL; + return; + } + + strncpy(dst->name, src->name, PRIO_NAME_LEN); + if (args) + strlcpy(dst->args, args, PRIO_ARGS_LEN); + dst->getprio = src->getprio; + dst->handle = NULL; + + src->refcount++; +} + +void prio_put (struct prio * dst) +{ + struct prio * src; + + if (!dst || !dst->getprio) + return; + + src = prio_lookup(dst->name); + memset(dst, 0x0, sizeof(struct prio)); + free_prio(src); +} diff --git a/libmultipath/prio.h b/libmultipath/prio.h new file mode 100644 index 0000000..599d1d8 --- /dev/null +++ b/libmultipath/prio.h @@ -0,0 +1,72 @@ +#ifndef _PRIO_H +#define _PRIO_H + +/* + * knowing about path struct gives flexibility to prioritizers + */ +#include "checkers.h" +#include "vector.h" + +/* forward declaration to avoid circular dependency */ +struct path; + +#include "list.h" +#include "memory.h" +#include "defaults.h" + +/* + * Known prioritizers for use in hwtable.c + */ +#define PRIO_ALUA "alua" +#define PRIO_CONST "const" +#define PRIO_DATACORE "datacore" +#define PRIO_EMC "emc" +#define PRIO_HDS "hds" +#define PRIO_HP_SW "hp_sw" +#define PRIO_IET "iet" +#define PRIO_ONTAP "ontap" +#define PRIO_RANDOM "random" +#define PRIO_RDAC "rdac" +#define PRIO_WEIGHTED_PATH "weightedpath" +#define PRIO_SYSFS "sysfs" +#define PRIO_PATH_LATENCY "path_latency" +#define PRIO_ANA "ana" + +/* + * Value used to mark the fact prio was not defined + */ +#define PRIO_UNDEF -1 + +/* + * strings lengths + */ +#define LIB_PRIO_NAMELEN 255 +#define PRIO_NAME_LEN 16 +#define PRIO_ARGS_LEN 255 + +struct prio { + void *handle; + int refcount; + struct list_head node; + char name[PRIO_NAME_LEN]; + char args[PRIO_ARGS_LEN]; + int (*getprio)(struct path *, char *, unsigned int); +}; + +unsigned int get_prio_timeout(unsigned int checker_timeout, + unsigned int default_timeout); +int init_prio (char *); +void cleanup_prio (void); +struct prio * add_prio (char *, char *); +int prio_getprio (struct prio *, struct path *, unsigned int); +void prio_get (char *, struct prio *, char *, char *); +void prio_put (struct prio *); +int prio_selected (const struct prio *); +const char * prio_name (const struct prio *); +const char * prio_args (const struct prio *); +int prio_set_args (struct prio *, const char *); + +/* The only function exported by prioritizer dynamic libraries (.so) */ +int getprio(struct path *, char *, unsigned int); + +#endif /* _PRIO_H */ diff --git a/libmultipath/prioritizers/Makefile b/libmultipath/prioritizers/Makefile new file mode 100644 index 0000000..9d0fe03 --- /dev/null +++ b/libmultipath/prioritizers/Makefile @@ -0,0 +1,52 @@ +# +# Copyright (C) 2007 Christophe Varoqui, +# +include ../../Makefile.inc + +CFLAGS += $(LIB_CFLAGS) -I.. + +# If you add or remove a prioritizer also update multipath/multipath.conf.5 +LIBS = \ + libprioalua.so \ + libprioconst.so \ + libpriodatacore.so \ + libprioemc.so \ + libpriohds.so \ + libpriohp_sw.so \ + libprioiet.so \ + libprioontap.so \ + libpriorandom.so \ + libpriordac.so \ + libprioweightedpath.so \ + libpriopath_latency.so \ + libpriosysfs.so + +ifneq ($(call check_file,/usr/include/linux/nvme_ioctl.h),0) + LIBS += libprioana.so + CFLAGS += -I../nvme +endif + +all: $(LIBS) + +libpriopath_latency.so: path_latency.o ../checkers/libsg.o + $(CC) $(LDFLAGS) $(SHARED_FLAGS) -o $@ $^ -lm + +libprio%.so: %.o + $(CC) $(LDFLAGS) $(SHARED_FLAGS) -o $@ $^ + +install: $(LIBS) + $(INSTALL_PROGRAM) -m 755 libprio*.so $(DESTDIR)$(libdir) + +uninstall: + for file in $(LIBS); do $(RM) $(DESTDIR)$(libdir)/$$file; done + +clean: dep_clean + $(RM) core *.a *.o *.gz *.so + +OBJS = $(LIBS:libprio%.so=%.o) alua_rtpg.o +.SECONDARY: $(OBJS) + +include $(wildcard $(OBJS:.o=.d)) + +dep_clean: + $(RM) $(OBJS:.o=.d) diff --git a/libmultipath/prioritizers/alua.c b/libmultipath/prioritizers/alua.c new file mode 100644 index 0000000..0ab06e2 --- /dev/null +++ b/libmultipath/prioritizers/alua.c @@ -0,0 +1,147 @@ +/* + * (C) Copyright IBM Corp. 2004, 2005 All Rights Reserved. + * + * main.c + * + * Tool to make use of a SCSI-feature called Asymmetric Logical Unit Access. + * It determines the ALUA state of a device and prints a priority value to + * stdout. + * + * Author(s): Jan Kunigk + * S. Bader + * + * This file is released under the GPL. + */ +#include + +#include "debug.h" +#include "prio.h" +#include "structs.h" + +#include "alua.h" + +#define ALUA_PRIO_NOT_SUPPORTED 1 +#define ALUA_PRIO_RTPG_FAILED 2 +#define ALUA_PRIO_GETAAS_FAILED 3 +#define ALUA_PRIO_TPGS_FAILED 4 +#define ALUA_PRIO_NO_INFORMATION 5 + +static const char * aas_string[] = { + [AAS_OPTIMIZED] = "active/optimized", + [AAS_NON_OPTIMIZED] = "active/non-optimized", + [AAS_STANDBY] = "standby", + [AAS_UNAVAILABLE] = "unavailable", + [AAS_LBA_DEPENDENT] = "logical block dependent", + [AAS_RESERVED] = "ARRAY BUG: invalid TPGs state!", + [AAS_OFFLINE] = "offline", + [AAS_TRANSITIONING] = "transitioning between states", +}; + +static const char *aas_print_string(int rc) +{ + rc &= 0x7f; + + if (rc & 0x70) + return aas_string[AAS_RESERVED]; + rc &= 0x0f; + if (rc > AAS_RESERVED && rc < AAS_OFFLINE) + return aas_string[AAS_RESERVED]; + else + return aas_string[rc]; +} + +int +get_alua_info(struct path * pp, unsigned int timeout) +{ + int rc; + int tpg; + + tpg = get_target_port_group(pp, timeout); + if (tpg < 0) { + rc = get_target_port_group_support(pp, timeout); + if (rc < 0) + return -ALUA_PRIO_TPGS_FAILED; + if (rc == TPGS_NONE) + return -ALUA_PRIO_NOT_SUPPORTED; + return -ALUA_PRIO_RTPG_FAILED; + } + condlog(3, "%s: reported target port group is %i", pp->dev, tpg); + rc = get_asymmetric_access_state(pp, tpg, timeout); + if (rc < 0) { + condlog(2, "%s: get_asymmetric_access_state returned %d", + __func__, rc); + return -ALUA_PRIO_GETAAS_FAILED; + } + + condlog(3, "%s: aas = %02x [%s]%s", pp->dev, rc, aas_print_string(rc), + (rc & 0x80) ? " [preferred]" : ""); + return rc; +} + +int get_exclusive_pref_arg(char *args) +{ + char *ptr; + + if (args == NULL) + return 0; + ptr = strstr(args, "exclusive_pref_bit"); + if (!ptr) + return 0; + if (ptr[18] != '\0' && ptr[18] != ' ' && ptr[18] != '\t') + return 0; + if (ptr != args && ptr[-1] != ' ' && ptr[-1] != '\t') + return 0; + return 1; +} + +int getprio (struct path * pp, char * args, unsigned int timeout) +{ + int rc; + int aas; + int priopath; + int exclusive_pref; + + if (pp->fd < 0) + return -ALUA_PRIO_NO_INFORMATION; + + exclusive_pref = get_exclusive_pref_arg(args); + rc = get_alua_info(pp, timeout); + if (rc >= 0) { + aas = (rc & 0x0f); + priopath = (rc & 0x80); + switch(aas) { + case AAS_OPTIMIZED: + rc = 50; + break; + case AAS_NON_OPTIMIZED: + rc = 10; + break; + case AAS_LBA_DEPENDENT: + rc = 5; + break; + case AAS_STANDBY: + rc = 1; + break; + default: + rc = 0; + } + if (priopath && (aas != AAS_OPTIMIZED || exclusive_pref)) + rc += 80; + } else { + switch(-rc) { + case ALUA_PRIO_NOT_SUPPORTED: + condlog(0, "%s: alua not supported", pp->dev); + break; + case ALUA_PRIO_RTPG_FAILED: + condlog(0, "%s: couldn't get target port group", pp->dev); + break; + case ALUA_PRIO_GETAAS_FAILED: + condlog(0, "%s: couldn't get asymmetric access state", pp->dev); + break; + case ALUA_PRIO_TPGS_FAILED: + condlog(3, "%s: couldn't get supported alua states", pp->dev); + break; + } + } + return rc; +} diff --git a/libmultipath/prioritizers/alua.h b/libmultipath/prioritizers/alua.h new file mode 100644 index 0000000..78a3d15 --- /dev/null +++ b/libmultipath/prioritizers/alua.h @@ -0,0 +1,9 @@ +#ifndef _ALUA_H +#define _ALUA_H + +#include "alua_rtpg.h" + +#define PRIO_ALUA "alua" +int prio_alua(struct path * pp); + +#endif diff --git a/libmultipath/prioritizers/alua_rtpg.c b/libmultipath/prioritizers/alua_rtpg.c new file mode 100644 index 0000000..bbf5aac --- /dev/null +++ b/libmultipath/prioritizers/alua_rtpg.c @@ -0,0 +1,428 @@ +/* + * (C) Copyright IBM Corp. 2004, 2005 All Rights Reserved. + * + * rtpg.c + * + * Tool to make use of a SCSI-feature called Asymmetric Logical Unit Access. + * It determines the ALUA state of a device and prints a priority value to + * stdout. + * + * Author(s): Jan Kunigk + * S. Bader + * + * This file is released under the GPL. + */ +#include +#include +#include +#include +#include +#include +#include +#include + +#define __user +#include + +#include "../structs.h" +#include "../prio.h" +#include "../discovery.h" +#include "../unaligned.h" +#include "../debug.h" +#include "alua_rtpg.h" + +#define SENSE_BUFF_LEN 32 +#define SGIO_TIMEOUT 60000 + +#define PRINT_DEBUG(f, a...) \ + condlog(4, "alua: " f, ##a) + +/* + * Optionally print the commands sent and the data received a hex dump. + */ +#if DEBUG > 0 +#if DEBUG_DUMPHEX > 0 +#define PRINT_HEX(p, l) print_hex(p, l) +void +print_hex(unsigned char *p, unsigned long len) +{ + int i; + + for(i = 0; i < len; i++) { + if (i % 16 == 0) + printf("%04x: ", i); + printf("%02x%s", p[i], (((i + 1) % 16) == 0) ? "\n" : " "); + } + printf("\n"); +} +#else +#define PRINT_HEX(p, l) +#endif +#else +#define PRINT_HEX(p, l) +#endif + +/* + * Returns 0 if the SCSI command either was successful or if the an error was + * recovered, otherwise 1. (definitions taken from sg_err.h) + */ +#define SCSI_CHECK_CONDITION 0x2 +#define SCSI_COMMAND_TERMINATED 0x22 +#define SG_ERR_DRIVER_SENSE 0x08 +#define RECOVERED_ERROR 0x01 +#define NOT_READY 0x2 +#define UNIT_ATTENTION 0x6 + +enum scsi_disposition { + SCSI_GOOD = 0, + SCSI_ERROR, + SCSI_RETRY, +}; + +static int +scsi_error(struct sg_io_hdr *hdr, int opcode) +{ + int sense_key, asc, ascq; + + /* Treat SG_ERR here to get rid of sg_err.[ch] */ + hdr->status &= 0x7e; + + if ( + (hdr->status == 0) && + (hdr->host_status == 0) && + (hdr->driver_status == 0) + ) { + return SCSI_GOOD; + } + + sense_key = asc = ascq = -1; + if ( + (hdr->status == SCSI_CHECK_CONDITION) || + (hdr->status == SCSI_COMMAND_TERMINATED) || + ((hdr->driver_status & 0xf) == SG_ERR_DRIVER_SENSE) + ) { + if (hdr->sbp && (hdr->sb_len_wr > 2)) { + unsigned char * sense_buffer = hdr->sbp; + + if (sense_buffer[0] & 0x2) { + sense_key = sense_buffer[1] & 0xf; + if (hdr->sb_len_wr > 3) + asc = sense_buffer[2]; + if (hdr->sb_len_wr > 4) + ascq = sense_buffer[3]; + } else { + sense_key = sense_buffer[2] & 0xf; + if (hdr->sb_len_wr > 13) + asc = sense_buffer[12]; + if (hdr->sb_len_wr > 14) + ascq = sense_buffer[13]; + } + + if (sense_key == RECOVERED_ERROR) + return SCSI_GOOD; + } + } + + PRINT_DEBUG("alua: SCSI error for command %02x: status %02x, sense %02x/%02x/%02x", + opcode, hdr->status, sense_key, asc, ascq); + + if (sense_key == UNIT_ATTENTION || sense_key == NOT_READY) + return SCSI_RETRY; + else + return SCSI_ERROR; +} + +/* + * Helper function to setup and run a SCSI inquiry command. + */ +static int +do_inquiry_sg(int fd, int evpd, unsigned int codepage, + void *resp, int resplen, unsigned int timeout) +{ + struct inquiry_command cmd; + struct sg_io_hdr hdr; + unsigned char sense[SENSE_BUFF_LEN]; + int rc, retry_count = 3; + +retry: + memset(&cmd, 0, sizeof(cmd)); + cmd.op = OPERATION_CODE_INQUIRY; + if (evpd) { + inquiry_command_set_evpd(&cmd); + cmd.page = codepage; + } + put_unaligned_be16(resplen, cmd.length); + PRINT_HEX((unsigned char *) &cmd, sizeof(cmd)); + + memset(&hdr, 0, sizeof(hdr)); + hdr.interface_id = 'S'; + hdr.cmdp = (unsigned char *) &cmd; + hdr.cmd_len = sizeof(cmd); + hdr.dxfer_direction = SG_DXFER_FROM_DEV; + hdr.dxferp = resp; + hdr.dxfer_len = resplen; + hdr.sbp = sense; + hdr.mx_sb_len = sizeof(sense); + hdr.timeout = get_prio_timeout(timeout, SGIO_TIMEOUT); + + if (ioctl(fd, SG_IO, &hdr) < 0) { + PRINT_DEBUG("do_inquiry: IOCTL failed!"); + return -RTPG_INQUIRY_FAILED; + } + + rc = scsi_error(&hdr, OPERATION_CODE_INQUIRY); + if (rc == SCSI_ERROR) { + PRINT_DEBUG("do_inquiry: SCSI error!"); + return -RTPG_INQUIRY_FAILED; + } else if (rc == SCSI_RETRY) { + if (--retry_count >= 0) + goto retry; + PRINT_DEBUG("do_inquiry: retries exhausted!"); + return -RTPG_INQUIRY_FAILED; + } + PRINT_HEX((unsigned char *) resp, resplen); + + return 0; +} + +int do_inquiry(const struct path *pp, int evpd, unsigned int codepage, + void *resp, int resplen, unsigned int timeout) +{ + struct udev_device *ud; + + ud = udev_device_get_parent_with_subsystem_devtype(pp->udev, "scsi", + "scsi_device"); + if (ud != NULL) { + int rc; + + if (!evpd) + rc = sysfs_get_inquiry(ud, resp, resplen); + else + rc = sysfs_get_vpd(ud, codepage, resp, resplen); + + if (rc >= 0) { + PRINT_HEX((unsigned char *) resp, resplen); + return 0; + } + } + return do_inquiry_sg(pp->fd, evpd, codepage, resp, resplen, timeout); +} + +/* + * This function returns the support for target port groups by evaluating the + * data returned by the standard inquiry command. + */ +int +get_target_port_group_support(const struct path *pp, unsigned int timeout) +{ + struct inquiry_data inq; + int rc; + + memset((unsigned char *)&inq, 0, sizeof(inq)); + rc = do_inquiry(pp, 0, 0x00, &inq, sizeof(inq), timeout); + if (!rc) { + rc = inquiry_data_get_tpgs(&inq); + } + + return rc; +} + +static int +get_sysfs_pg83(const struct path *pp, unsigned char *buff, int buflen) +{ + struct udev_device *parent = pp->udev; + + while (parent) { + const char *subsys = udev_device_get_subsystem(parent); + if (subsys && !strncmp(subsys, "scsi", 4)) + break; + parent = udev_device_get_parent(parent); + } + + if (!parent || sysfs_get_vpd(parent, 0x83, buff, buflen) <= 0) { + PRINT_DEBUG("failed to read sysfs vpd pg83"); + return -1; + } + return 0; +} + +int +get_target_port_group(const struct path * pp, unsigned int timeout) +{ + unsigned char *buf; + struct vpd83_data * vpd83; + struct vpd83_dscr * dscr; + int rc; + int buflen, scsi_buflen; + + buflen = 4096; + buf = (unsigned char *)malloc(buflen); + if (!buf) { + PRINT_DEBUG("malloc failed: could not allocate" + "%u bytes", buflen); + return -RTPG_RTPG_FAILED; + } + + memset(buf, 0, buflen); + + rc = get_sysfs_pg83(pp, buf, buflen); + + if (rc < 0) { + rc = do_inquiry(pp, 1, 0x83, buf, buflen, timeout); + if (rc < 0) + goto out; + + scsi_buflen = get_unaligned_be16(&buf[2]) + 4; + /* Paranoia */ + if (scsi_buflen >= USHRT_MAX) + scsi_buflen = USHRT_MAX; + if (buflen < scsi_buflen) { + free(buf); + buf = (unsigned char *)malloc(scsi_buflen); + if (!buf) { + PRINT_DEBUG("malloc failed: could not allocate" + "%u bytes", scsi_buflen); + return -RTPG_RTPG_FAILED; + } + buflen = scsi_buflen; + memset(buf, 0, buflen); + rc = do_inquiry(pp, 1, 0x83, buf, buflen, timeout); + if (rc < 0) + goto out; + } + } + + vpd83 = (struct vpd83_data *) buf; + rc = -RTPG_NO_TPG_IDENTIFIER; + FOR_EACH_VPD83_DSCR(vpd83, dscr) { + if (vpd83_dscr_istype(dscr, IDTYPE_TARGET_PORT_GROUP)) { + struct vpd83_tpg_dscr *p; + if (rc != -RTPG_NO_TPG_IDENTIFIER) { + PRINT_DEBUG("get_target_port_group: more " + "than one TPG identifier found!"); + continue; + } + p = (struct vpd83_tpg_dscr *)dscr->data; + rc = get_unaligned_be16(p->tpg); + } + } + + if (rc == -RTPG_NO_TPG_IDENTIFIER) { + PRINT_DEBUG("get_target_port_group: " + "no TPG identifier found!"); + } +out: + free(buf); + return rc; +} + +int +do_rtpg(int fd, void* resp, long resplen, unsigned int timeout) +{ + struct rtpg_command cmd; + struct sg_io_hdr hdr; + unsigned char sense[SENSE_BUFF_LEN]; + int retry_count = 3, rc; + +retry: + memset(&cmd, 0, sizeof(cmd)); + cmd.op = OPERATION_CODE_RTPG; + rtpg_command_set_service_action(&cmd); + put_unaligned_be32(resplen, cmd.length); + PRINT_HEX((unsigned char *) &cmd, sizeof(cmd)); + + memset(&hdr, 0, sizeof(hdr)); + hdr.interface_id = 'S'; + hdr.cmdp = (unsigned char *) &cmd; + hdr.cmd_len = sizeof(cmd); + hdr.dxfer_direction = SG_DXFER_FROM_DEV; + hdr.dxferp = resp; + hdr.dxfer_len = resplen; + hdr.mx_sb_len = sizeof(sense); + hdr.sbp = sense; + hdr.timeout = get_prio_timeout(timeout, SGIO_TIMEOUT); + + if (ioctl(fd, SG_IO, &hdr) < 0) { + condlog(2, "%s: sg ioctl failed: %s", + __func__, strerror(errno)); + return -RTPG_RTPG_FAILED; + } + + rc = scsi_error(&hdr, OPERATION_CODE_RTPG); + if (rc == SCSI_ERROR) { + PRINT_DEBUG("do_rtpg: SCSI error!"); + return -RTPG_RTPG_FAILED; + } else if (rc == SCSI_RETRY) { + if (--retry_count >= 0) + goto retry; + PRINT_DEBUG("do_rtpg: retries exhausted!"); + return -RTPG_RTPG_FAILED; + } + PRINT_HEX(resp, resplen); + + return 0; +} + +int +get_asymmetric_access_state(const struct path *pp, unsigned int tpg, + unsigned int timeout) +{ + unsigned char *buf; + struct rtpg_data * tpgd; + struct rtpg_tpg_dscr * dscr; + int rc; + unsigned int buflen; + uint64_t scsi_buflen; + int fd = pp->fd; + + buflen = 4096; + buf = (unsigned char *)malloc(buflen); + if (!buf) { + PRINT_DEBUG ("malloc failed: could not allocate" + "%u bytes", buflen); + return -RTPG_RTPG_FAILED; + } + memset(buf, 0, buflen); + rc = do_rtpg(fd, buf, buflen, timeout); + if (rc < 0) { + PRINT_DEBUG("%s: do_rtpg returned %d", __func__, rc); + goto out; + } + scsi_buflen = get_unaligned_be32(&buf[0]) + 4; + if (scsi_buflen > UINT_MAX) + scsi_buflen = UINT_MAX; + if (buflen < scsi_buflen) { + free(buf); + buf = (unsigned char *)malloc(scsi_buflen); + if (!buf) { + PRINT_DEBUG("malloc failed: could not allocate %" + PRIu64 " bytes", scsi_buflen); + return -RTPG_RTPG_FAILED; + } + buflen = scsi_buflen; + memset(buf, 0, buflen); + rc = do_rtpg(fd, buf, buflen, timeout); + if (rc < 0) + goto out; + } + + tpgd = (struct rtpg_data *) buf; + rc = -RTPG_TPG_NOT_FOUND; + RTPG_FOR_EACH_PORT_GROUP(tpgd, dscr) { + if (get_unaligned_be16(dscr->tpg) == tpg) { + if (rc != -RTPG_TPG_NOT_FOUND) { + PRINT_DEBUG("get_asymmetric_access_state: " + "more than one entry with same port " + "group."); + } else { + condlog(5, "pref=%i", dscr->b0); + rc = rtpg_tpg_dscr_get_aas(dscr); + } + } + } + if (rc == -RTPG_TPG_NOT_FOUND) + condlog(2, "%s: port group %d not found", __func__, tpg); +out: + free(buf); + return rc; +} diff --git a/libmultipath/prioritizers/alua_rtpg.h b/libmultipath/prioritizers/alua_rtpg.h new file mode 100644 index 0000000..675709f --- /dev/null +++ b/libmultipath/prioritizers/alua_rtpg.h @@ -0,0 +1,30 @@ +/* + * (C) Copyright IBM Corp. 2004, 2005 All Rights Reserved. + * + * rtpg.h + * + * Tool to make use of a SCSI-feature called Asymmetric Logical Unit Access. + * It determines the ALUA state of a device and prints a priority value to + * stdout. + * + * Author(s): Jan Kunigk + * S. Bader + * + * This file is released under the GPL. + */ +#ifndef __RTPG_H__ +#define __RTPG_H__ +#include "alua_spc3.h" + +#define RTPG_SUCCESS 0 +#define RTPG_INQUIRY_FAILED 1 +#define RTPG_NO_TPG_IDENTIFIER 2 +#define RTPG_RTPG_FAILED 3 +#define RTPG_TPG_NOT_FOUND 4 + +int get_target_port_group_support(const struct path *pp, unsigned int timeout); +int get_target_port_group(const struct path *pp, unsigned int timeout); +int get_asymmetric_access_state(const struct path *pp, + unsigned int tpg, unsigned int timeout); + +#endif /* __RTPG_H__ */ diff --git a/libmultipath/prioritizers/alua_spc3.h b/libmultipath/prioritizers/alua_spc3.h new file mode 100644 index 0000000..18b495e --- /dev/null +++ b/libmultipath/prioritizers/alua_spc3.h @@ -0,0 +1,295 @@ +/* + * (C) Copyright IBM Corp. 2004, 2005 All Rights Reserved. + * + * spc3.h + * + * Tool to make use of a SCSI-feature called Asymmetric Logical Unit Access. + * It determines the ALUA state of a device and prints a priority value to + * stdout. + * + * Author(s): Jan Kunigk + * S. Bader + * + * This file is released under the GPL. + */ +#ifndef __SPC3_H__ +#define __SPC3_H__ + +/*============================================================================= + * Definitions to support the standard inquiry command as defined in SPC-3. + * If the evpd (enable vital product data) bit is set the data that will be + * returned is selected by the page field. This field must be 0 if the evpd + * bit is not set. + *============================================================================= + */ +#define OPERATION_CODE_INQUIRY 0x12 + +struct inquiry_command { + unsigned char op; + unsigned char b1; /* xxxxxx.. = reserved */ + /* ......x. = obsolete */ + /* .......x = evpd */ + unsigned char page; + unsigned char length[2]; + unsigned char control; +} __attribute__((packed)); + +static inline void +inquiry_command_set_evpd(struct inquiry_command *ic) +{ + ic->b1 |= 1; +} + +/*----------------------------------------------------------------------------- + * Data returned by the standard inquiry command. + *----------------------------------------------------------------------------- + * + * Peripheral qualifier codes. + */ +#define PQ_CONNECTED 0x0 +#define PQ_DISCONNECTED 0x1 +#define PQ_UNSUPPORTED 0x3 + +/* Defined peripheral device types. */ +#define PDT_DIRECT_ACCESS 0x00 +#define PDT_SEQUENTIAL_ACCESS 0x01 +#define PDT_PRINTER 0x02 +#define PDT_PROCESSOR 0x03 +#define PDT_WRITE_ONCE 0x04 +#define PDT_CD_DVD 0x05 +#define PDT_SCANNER 0x06 +#define PDT_OPTICAL_MEMORY 0x07 +#define PDT_MEDIUM_CHANGER 0x08 +#define PDT_COMMUNICATIONS 0x09 +#define PDT_STORAGE_ARRAY_CONTROLLER 0x0c +#define PDT_ENCLOSURE_SERVICES 0x0d +#define PDT_SIMPLIFIED_DIRECT_ACCESS 0x0e +#define PDT_OPTICAL_CARD_READER_WRITER 0x0f +#define PDT_BRIDGE_CONTROLLER 0x10 +#define PDT_OBJECT_BASED 0x11 +#define PDT_AUTOMATION_INTERFACE 0x12 +#define PDT_LUN 0x1e +#define PDT_UNKNOWN 0x1f + +/* Defined version codes. */ +#define VERSION_NONE 0x00 +#define VERSION_SPC 0x03 +#define VERSION_SPC2 0x04 +#define VERSION_SPC3 0x05 + +/* Defined TPGS field values. */ +#define TPGS_UNDEF -1 +#define TPGS_NONE 0x0 +#define TPGS_IMPLICIT 0x1 +#define TPGS_EXPLICIT 0x2 +#define TPGS_BOTH 0x3 + +struct inquiry_data { + unsigned char b0; /* xxx..... = peripheral_qualifier */ + /* ...xxxxx = peripheral_device_type */ + unsigned char b1; /* x....... = removable medium */ + /* .xxxxxxx = reserved */ + unsigned char version; + unsigned char b3; /* xx...... = obsolete */ + /* ..x..... = normal aca supported */ + /* ...x.... = hirarchichal lun supp. */ + /* ....xxxx = response format */ + /* 2 is spc-3 format */ + unsigned char length; + unsigned char b5; /* x....... = storage controller */ + /* component supported */ + /* .x...... = access controls coord. */ + /* ..xx.... = target port group supp.*/ + /* ....x... = third party copy supp. */ + /* .....xx. = reserved */ + /* .......x = protection info supp. */ + unsigned char b6; /* x....... = bque */ + /* .x...... = enclosure services sup.*/ + /* ..x..... = vs1 */ + /* ...x.... = multiport support */ + /* ....x... = medium changer */ + /* .....xx. = obsolete */ + /* .......x = add16 */ + unsigned char b7; /* xx...... = obsolete */ + /* ..x..... = wbus16 */ + /* ...x.... = sync */ + /* ....x... = linked commands supp. */ + /* .....x.. = obsolete */ + /* ......x. = command queue support */ + /* .......x = vs2 */ + unsigned char vendor_identification[8]; + unsigned char product_identification[16]; + unsigned char product_revision[4]; + unsigned char vendor_specific[20]; + unsigned char b56; /* xxxx.... = reserved */ + /* ....xx.. = clocking */ + /* ......x. = qas */ + /* .......x = ius */ + unsigned char reserved4; + unsigned char version_descriptor[8][2]; + unsigned char reserved5[22]; + unsigned char vendor_parameters[0]; +} __attribute__((packed)); + +static inline int +inquiry_data_get_tpgs(struct inquiry_data *id) +{ + return (id->b5 >> 4) & 3; +} + +/*----------------------------------------------------------------------------- + * Inquiry data returned when requesting vital product data page 0x83. + *----------------------------------------------------------------------------- + */ +#define CODESET_BINARY 0x1 +#define CODESET_ACSII 0x2 +#define CODESET_UTF8 0x3 + +#define ASSOCIATION_UNIT 0x0 +#define ASSOCIATION_PORT 0x1 +#define ASSOCIATION_DEVICE 0x2 + +#define IDTYPE_VENDOR_SPECIFIC 0x0 +#define IDTYPE_T10_VENDOR_ID 0x1 +#define IDTYPE_EUI64 0x2 +#define IDTYPE_NAA 0x3 +#define IDTYPE_RELATIVE_TPG_ID 0x4 +#define IDTYPE_TARGET_PORT_GROUP 0x5 +#define IDTYPE_LUN_GROUP 0x6 +#define IDTYPE_MD5_LUN_ID 0x7 +#define IDTYPE_SCSI_NAME_STRING 0x8 + +struct vpd83_tpg_dscr { + unsigned char reserved1[2]; + unsigned char tpg[2]; +} __attribute__((packed)); + +struct vpd83_dscr { + unsigned char b0; /* xxxx.... = protocol id */ + /* ....xxxx = codeset */ + unsigned char b1; /* x....... = protocol id valid */ + /* .x...... = reserved */ + /* ..xx.... = association */ + /* ....xxxx = id type */ + unsigned char reserved2; + unsigned char length; /* size-4 */ + unsigned char data[0]; +} __attribute__((packed)); + +static inline int +vpd83_dscr_istype(struct vpd83_dscr *d, unsigned char type) +{ + return ((d->b1 & 7) == type); +} + +struct vpd83_data { + unsigned char b0; /* xxx..... = peripheral_qualifier */ + /* ...xxxxx = peripheral_device_type */ + unsigned char page_code; /* 0x83 */ + unsigned char length[2]; /* size-4 */ + struct vpd83_dscr data[0]; +} __attribute__((packed)); + +/*----------------------------------------------------------------------------- + * This macro should be used to walk through all identification descriptors + * defined in the code page 0x83. + * The argument p is a pointer to the code page 0x83 data and d is used to + * point to the current descriptor. + *----------------------------------------------------------------------------- + */ +#define FOR_EACH_VPD83_DSCR(p, d) \ + for( \ + d = p->data; \ + (((char *) d) - ((char *) p)) < \ + get_unaligned_be16(p->length); \ + d = (struct vpd83_dscr *) \ + ((char *) d + d->length + 4) \ + ) + +/*============================================================================= + * The following structures and macros are used to call the report target port + * groups command defined in SPC-3. + * This command is used to get information about the target port groups (which + * states are supported, which ports belong to this group, and so on) and the + * current state of each target port group. + *============================================================================= + */ +#define OPERATION_CODE_RTPG 0xa3 +#define SERVICE_ACTION_RTPG 0x0a + +struct rtpg_command { + unsigned char op; /* 0xa3 */ + unsigned char b1; /* xxx..... = reserved */ + /* ...xxxxx = service action (0x0a) */ + unsigned char reserved2[4]; + unsigned char length[4]; + unsigned char reserved3; + unsigned char control; +} __attribute__((packed)); + +static inline void +rtpg_command_set_service_action(struct rtpg_command *cmd) +{ + cmd->b1 = (cmd->b1 & 0xe0) | SERVICE_ACTION_RTPG; +} + +struct rtpg_tp_dscr { + unsigned char obsolete1[2]; + /* The Relative Target Port Identifier of a target port. */ + unsigned char rtpi[2]; +} __attribute__((packed)); + +#define AAS_OPTIMIZED 0x0 +#define AAS_NON_OPTIMIZED 0x1 +#define AAS_STANDBY 0x2 +#define AAS_UNAVAILABLE 0x3 +#define AAS_LBA_DEPENDENT 0x4 +#define AAS_RESERVED 0x5 +#define AAS_OFFLINE 0xe +#define AAS_TRANSITIONING 0xf + +#define TPG_STATUS_NONE 0x0 +#define TPG_STATUS_SET 0x1 +#define TPG_STATUS_IMPLICIT_CHANGE 0x2 + +struct rtpg_tpg_dscr { + unsigned char b0; /* x....... = pref(ered) port */ + /* .xxx.... = reserved */ + /* ....xxxx = asymetric access state */ + unsigned char b1; /* xxx..... = reserved */ + /* ...x.... = LBA dependent support */ + /* ....x... = unavailable support */ + /* .....x.. = standby support */ + /* ......x. = non-optimized support */ + /* .......x = optimized support */ + unsigned char tpg[2]; + unsigned char reserved3; + unsigned char status; + unsigned char vendor_unique; + unsigned char port_count; + struct rtpg_tp_dscr data[0]; +} __attribute__((packed)); + +static inline int +rtpg_tpg_dscr_get_aas(struct rtpg_tpg_dscr *d) +{ + return (d->b0 & 0x8f); +} + +struct rtpg_data { + unsigned char length[4]; /* size-4 */ + struct rtpg_tpg_dscr data[0]; +} __attribute__((packed)); + +#define RTPG_FOR_EACH_PORT_GROUP(p, g) \ + for( \ + g = &(p->data[0]); \ + (((char *) g) - ((char *) p)) < get_unaligned_be32(p->length); \ + g = (struct rtpg_tpg_dscr *) ( \ + ((char *) g) + \ + sizeof(struct rtpg_tpg_dscr) + \ + g->port_count * sizeof(struct rtpg_tp_dscr) \ + ) \ + ) + +#endif /* __SPC3_H__ */ diff --git a/libmultipath/prioritizers/ana.c b/libmultipath/prioritizers/ana.c new file mode 100644 index 0000000..b5c7873 --- /dev/null +++ b/libmultipath/prioritizers/ana.c @@ -0,0 +1,234 @@ +/* + * (C) Copyright HUAWEI Technology Corp. 2017 All Rights Reserved. + * + * ana.c + * Version 1.00 + * + * Tool to make use of a NVMe-feature called Asymmetric Namespace Access. + * It determines the ANA state of a device and prints a priority value to stdout. + * + * Author(s): Cheng Jike + * Li Jie + * + * This file is released under the GPL version 2, or any later version. + */ +#include +#include +#include +#include +#include +#include + +#include "debug.h" +#include "nvme-lib.h" +#include "prio.h" +#include "util.h" +#include "structs.h" + +enum { + ANA_ERR_GETCTRL_FAILED = 1, + ANA_ERR_NOT_NVME, + ANA_ERR_NOT_SUPPORTED, + ANA_ERR_GETANAS_OVERFLOW, + ANA_ERR_GETANAS_NOTFOUND, + ANA_ERR_GETANALOG_FAILED, + ANA_ERR_GETNSID_FAILED, + ANA_ERR_GETNS_FAILED, + ANA_ERR_NO_MEMORY, + ANA_ERR_NO_INFORMATION, +}; + +static const char *ana_errmsg[] = { + [ANA_ERR_GETCTRL_FAILED] = "couldn't get ctrl info", + [ANA_ERR_NOT_NVME] = "not an NVMe device", + [ANA_ERR_NOT_SUPPORTED] = "ANA not supported", + [ANA_ERR_GETANAS_OVERFLOW] = "buffer overflow in ANA log", + [ANA_ERR_GETANAS_NOTFOUND] = "NSID or ANAGRPID not found", + [ANA_ERR_GETANALOG_FAILED] = "couldn't get ana log", + [ANA_ERR_GETNSID_FAILED] = "couldn't get NSID", + [ANA_ERR_GETNS_FAILED] = "couldn't get namespace info", + [ANA_ERR_NO_MEMORY] = "out of memory", + [ANA_ERR_NO_INFORMATION] = "invalid fd", +}; + +static const char *anas_string[] = { + [NVME_ANA_OPTIMIZED] = "ANA Optimized State", + [NVME_ANA_NONOPTIMIZED] = "ANA Non-Optimized State", + [NVME_ANA_INACCESSIBLE] = "ANA Inaccessible State", + [NVME_ANA_PERSISTENT_LOSS] = "ANA Persistent Loss State", + [NVME_ANA_CHANGE] = "ANA Change state", +}; + +static const char *aas_print_string(int rc) +{ + rc &= 0xff; + if (rc >= 0 && rc < (int)ARRAY_SIZE(anas_string) && + anas_string[rc] != NULL) + return anas_string[rc]; + + return "invalid ANA state"; +} + +static int get_ana_state(__u32 nsid, __u32 anagrpid, void *ana_log, + size_t ana_log_len) +{ + void *base = ana_log; + struct nvme_ana_rsp_hdr *hdr = base; + struct nvme_ana_group_desc *ana_desc; + size_t offset = sizeof(struct nvme_ana_rsp_hdr); + __u32 nr_nsids; + size_t nsid_buf_size; + int i; + unsigned int j; + + for (i = 0; i < le16_to_cpu(hdr->ngrps); i++) { + ana_desc = base + offset; + + offset += sizeof(*ana_desc); + if (offset > ana_log_len) + return -ANA_ERR_GETANAS_OVERFLOW; + + nr_nsids = le32_to_cpu(ana_desc->nnsids); + nsid_buf_size = nr_nsids * sizeof(__le32); + + offset += nsid_buf_size; + if (offset > ana_log_len) + return -ANA_ERR_GETANAS_OVERFLOW; + + for (j = 0; j < nr_nsids; j++) { + if (nsid == le32_to_cpu(ana_desc->nsids[j])) + return ana_desc->state; + } + + if (anagrpid != 0 && anagrpid == le32_to_cpu(ana_desc->grpid)) + return ana_desc->state; + + } + return -ANA_ERR_GETANAS_NOTFOUND; +} + +static int get_ana_info(struct path * pp) +{ + int rc; + __u32 nsid; + struct nvme_id_ctrl ctrl; + struct nvme_id_ns ns; + void *ana_log; + size_t ana_log_len; + bool is_anagrpid_const; + + rc = nvme_id_ctrl_ana(pp->fd, &ctrl); + if (rc < 0) { + log_nvme_errcode(rc, pp->dev, "nvme_identify_ctrl"); + return -ANA_ERR_GETCTRL_FAILED; + } else if (rc == 0) + return -ANA_ERR_NOT_SUPPORTED; + + nsid = nvme_get_nsid(pp->fd); + if (nsid <= 0) { + log_nvme_errcode(rc, pp->dev, "nvme_get_nsid"); + return -ANA_ERR_GETNSID_FAILED; + } + is_anagrpid_const = ctrl.anacap & (1 << 6); + + /* + * Code copied from nvme-cli/nvme.c. We don't need to allocate an + * [nanagrpid*mnan] array of NSIDs because each NSID can occur at most + * in one ANA group. + */ + ana_log_len = sizeof(struct nvme_ana_rsp_hdr) + + le32_to_cpu(ctrl.nanagrpid) + * sizeof(struct nvme_ana_group_desc); + + if (is_anagrpid_const) { + rc = nvme_identify_ns(pp->fd, nsid, 0, &ns); + if (rc) { + log_nvme_errcode(rc, pp->dev, "nvme_identify_ns"); + return -ANA_ERR_GETNS_FAILED; + } + } else + ana_log_len += le32_to_cpu(ctrl.mnan) * sizeof(__le32); + + ana_log = malloc(ana_log_len); + if (!ana_log) + return -ANA_ERR_NO_MEMORY; + pthread_cleanup_push(free, ana_log); + rc = nvme_ana_log(pp->fd, ana_log, ana_log_len, + is_anagrpid_const ? NVME_ANA_LOG_RGO : 0); + if (rc) { + log_nvme_errcode(rc, pp->dev, "nvme_ana_log"); + rc = -ANA_ERR_GETANALOG_FAILED; + } else + rc = get_ana_state(nsid, + is_anagrpid_const ? + le32_to_cpu(ns.anagrpid) : 0, + ana_log, ana_log_len); + pthread_cleanup_pop(1); + if (rc >= 0) + condlog(4, "%s: ana state = %02x [%s]", pp->dev, rc, + aas_print_string(rc)); + return rc; +} + +/* + * Priorities modeled roughly after the ALUA model (alua.c/sysfs.c) + * Reference: ANA Base Protocol (NVMe TP 4004a, 11/13/2018). + * + * Differences: + * + * - The ANA base spec defines no implicit or explicit (STPG) state management. + * If a state is encountered that doesn't allow normal I/O (all except + * OPTIMIZED and NON_OPTIMIZED), we can't do anything but either wait for a + * Access State Change Notice (can't do that in multipathd as we don't receive + * those), or retry commands in regular time intervals until ANATT is expired + * (not implemented). Mapping UNAVAILABLE state to ALUA STANDBY is the best we + * can currently do. + * + * FIXME: Waiting for ANATT could be implemented with a "delayed failback" + * mechanism. The current "failback" method can't be used, as it would + * affect failback to every state, and here only failback to UNAVAILABLE + * should be delayed. + * + * - PERSISTENT_LOSS state is even below ALUA's UNAVAILABLE state. + * FIXME: According to the ANA TP, accessing paths in PERSISTENT_LOSS state + * in any way makes no sense (e.g. §8.19.6 - paths in this state shouldn't + * even be checked under "all paths down" conditions). Device mapper can, + * and will, select a PG for IO if it has non-failed paths, even if the + * PG has priority 0. We could avoid that only with an "ANA path checker". + * + * - ALUA has no CHANGE state. The ANA TP §8.18.3 / §8.19.4 suggests + * that CHANGE state should be treated in roughly the same way as + * INACCESSIBLE. Therefore we assign the same prio to it. + * + * - ALUA's LBA-dependent state has no ANA equivalent. + */ + +int getprio(struct path *pp, __attribute__((unused)) char *args, + __attribute__((unused)) unsigned int timeout) +{ + int rc; + + if (pp->fd < 0) + rc = -ANA_ERR_NO_INFORMATION; + else + rc = get_ana_info(pp); + + switch (rc) { + case NVME_ANA_OPTIMIZED: + return 50; + case NVME_ANA_NONOPTIMIZED: + return 10; + case NVME_ANA_INACCESSIBLE: + case NVME_ANA_CHANGE: + return 1; + case NVME_ANA_PERSISTENT_LOSS: + return 0; + default: + break; + } + if (rc < 0 && -rc < (int)ARRAY_SIZE(ana_errmsg)) + condlog(2, "%s: ANA error: %s", pp->dev, ana_errmsg[-rc]); + else + condlog(1, "%s: invalid ANA rc code %d", pp->dev, rc); + return -1; +} diff --git a/libmultipath/prioritizers/const.c b/libmultipath/prioritizers/const.c new file mode 100644 index 0000000..059d859 --- /dev/null +++ b/libmultipath/prioritizers/const.c @@ -0,0 +1,10 @@ +#include + +#include "prio.h" + +int getprio(__attribute__((unused)) struct path * pp, + __attribute__((unused)) char * args, + __attribute__((unused)) unsigned int timeout) +{ + return 1; +} diff --git a/libmultipath/prioritizers/datacore.c b/libmultipath/prioritizers/datacore.c new file mode 100644 index 0000000..02dc2e2 --- /dev/null +++ b/libmultipath/prioritizers/datacore.c @@ -0,0 +1,105 @@ +/* + * (C) 2010 Christophe Varoqui + * (C) 2009 Dembach Goo Infromatik GmbH & Co KG + * Manon Goo + * + * datacore.c + * Version 0.9 + * + * This program was inspired by work from + * Matthias Rudolph + * + * This work is made available on the basis of the + * GPLv2 for detials see . + * + * Manon Goo 2009 + * + * + */ + +#include +#include + +#include +#include "sg_include.h" +#include "debug.h" +#include "prio.h" +#include "structs.h" + +#define INQ_REPLY_LEN 255 +#define INQ_CMD_CODE 0x12 +#define INQ_CMD_LEN 6 + +#define dc_log(prio, msg) condlog(prio, "%s: datacore prio: " msg, dev) + +int datacore_prio (const char *dev, int sg_fd, char * args) +{ + int k; + char sdsname[32]; + unsigned char inqCmdBlk[INQ_CMD_LEN] = { INQ_CMD_CODE, 0, 0, 0, INQ_REPLY_LEN, 0 }; + unsigned char inqBuff[INQ_REPLY_LEN]; + unsigned char *inqBuffp = inqBuff; + unsigned char sense_buffer[32]; + sg_io_hdr_t io_hdr; + + int timeout = 2000; + char preferredsds_buff[255] = ""; + char * preferredsds = &preferredsds_buff[0]; + + if (!args) { + dc_log(0, "need prio_args with preferredsds set"); + return 0; + } + + if (sscanf(args, "timeout=%i preferredsds=%s", + &timeout, preferredsds) == 2) {} + else if (sscanf(args, "preferredsds=%s timeout=%i", + preferredsds, &timeout) == 2) {} + else if (sscanf(args, "preferredsds=%s", + preferredsds) == 1) {} + else { + dc_log(0, "unexpected prio_args format"); + return 0; + } + + // on error just return prio 0 + if (strlen(preferredsds) <= 1) { + dc_log(0, "prio args: preferredsds too short (1 character min)"); + return 0; + } + if ((timeout < 500) || (timeout > 20000)) { + dc_log(0, "prio args: timeout out of bounds [500:20000]"); + return 0; + } + if ((ioctl(sg_fd, SG_GET_VERSION_NUM, &k) < 0) || (k < 30000)) + return 0; + + memset (&io_hdr, 0, sizeof (sg_io_hdr_t)); + io_hdr.interface_id = 'S'; + io_hdr.cmd_len = sizeof (inqCmdBlk); + io_hdr.mx_sb_len = sizeof (sense_buffer); + io_hdr.dxfer_direction = SG_DXFER_FROM_DEV; + io_hdr.dxfer_len = INQ_REPLY_LEN; + io_hdr.dxferp = inqBuff; + io_hdr.cmdp = inqCmdBlk; + io_hdr.sbp = sense_buffer; + io_hdr.timeout = timeout; + + // on error just return prio 0 + if (ioctl(sg_fd, SG_IO, &io_hdr) < 0) + return 0; + if ((io_hdr.info & SG_INFO_OK_MASK) != SG_INFO_OK) + return 0; + + snprintf(sdsname, sizeof(sdsname), "%.16s", inqBuffp + 112); + + if (strstr(sdsname , preferredsds)) + return 1; + return 0; +} + +int getprio(struct path * pp, char * args, + __attribute__((unused)) unsigned int timeout) +{ + return datacore_prio(pp->dev, pp->fd, args); +} diff --git a/libmultipath/prioritizers/emc.c b/libmultipath/prioritizers/emc.c new file mode 100644 index 0000000..3b63cca --- /dev/null +++ b/libmultipath/prioritizers/emc.c @@ -0,0 +1,88 @@ +#include +#include +#include + +#include "sg_include.h" +#include "debug.h" +#include "prio.h" +#include "structs.h" + +#define INQUIRY_CMD 0x12 +#define INQUIRY_CMDLEN 6 + +#define pp_emc_log(prio, msg) condlog(prio, "%s: emc prio: " msg, dev) + +int emc_clariion_prio(const char *dev, int fd, unsigned int timeout) +{ + unsigned char sense_buffer[128]; + unsigned char sb[128]; + unsigned char inqCmdBlk[INQUIRY_CMDLEN] = {INQUIRY_CMD, 1, 0xC0, 0, + sizeof(sense_buffer), 0}; + struct sg_io_hdr io_hdr; + int ret = PRIO_UNDEF; + + memset(&io_hdr, 0, sizeof (struct sg_io_hdr)); + memset(&sense_buffer, 0, 128); + io_hdr.interface_id = 'S'; + io_hdr.cmd_len = sizeof (inqCmdBlk); + io_hdr.mx_sb_len = sizeof (sb); + io_hdr.dxfer_direction = SG_DXFER_FROM_DEV; + io_hdr.dxfer_len = sizeof (sense_buffer); + io_hdr.dxferp = sense_buffer; + io_hdr.cmdp = inqCmdBlk; + io_hdr.sbp = sb; + io_hdr.timeout = get_prio_timeout(timeout, 60000); + io_hdr.pack_id = 0; + if (ioctl(fd, SG_IO, &io_hdr) < 0) { + pp_emc_log(0, "sending query command failed"); + goto out; + } + if (io_hdr.info & SG_INFO_OK_MASK) { + pp_emc_log(0, "query command indicates error"); + goto out; + } + + if (/* Verify the code page - right page & revision */ + sense_buffer[1] != 0xc0 || sense_buffer[9] != 0x00) { + pp_emc_log(0, "path unit report page in unknown format"); + goto out; + } + + if ( /* Effective initiator type */ + sense_buffer[27] != 0x03 + /* + * Failover mode should be set to 1 (PNR failover mode) + * or 4 (ALUA failover mode). + */ + || (((sense_buffer[28] & 0x07) != 0x04) && + ((sense_buffer[28] & 0x07) != 0x06)) + /* Arraycommpath should be set to 1 */ + || (sense_buffer[30] & 0x04) != 0x04) { + pp_emc_log(0, "path not correctly configured for failover"); + goto out; + } + + if ( /* LUN operations should indicate normal operations */ + sense_buffer[48] != 0x00) { + pp_emc_log(0, "path not available for normal operations"); + goto out; + } + + /* LUN state: unbound, bound, or owned */ + ret = sense_buffer[4]; + + /* Is the default owner equal to this path? */ + /* Note this will switch to the default priority group, even if + * it is not the currently active one. */ + if (sense_buffer[5] == sense_buffer[8]) + ret+=2; + +out: + return(ret); +} + +int getprio (struct path *pp, __attribute__((unused)) char *args, + unsigned int timeout) +{ + return emc_clariion_prio(pp->dev, pp->fd, timeout); +} diff --git a/libmultipath/prioritizers/hds.c b/libmultipath/prioritizers/hds.c new file mode 100644 index 0000000..88cac5f --- /dev/null +++ b/libmultipath/prioritizers/hds.c @@ -0,0 +1,175 @@ +/* + * (C) Copyright HDS GmbH 2006. All Rights Reserved. + * + * pp_hds_modular.c + * Version 2.00 + * + * Prioritizer for Device Mapper Multipath and HDS Storage + * + * Hitachis Modular Storage contains two controllers for redundancy. The + * Storage internal LUN (LDEV) will normally allocated via two paths to the + * server (one path per controller). For performance reasons should the server + * access to a LDEV only via one controller. The other path to the other + * controller is stand-by. It is also possible to allocate more as one path + * for a LDEV per controller. Here is active/active access allowed. The other + * paths via the other controller are stand-by. + * + * This prioritizer checks with inquiry command the represented LDEV and + * Controller number and gives back a priority followed by this scheme: + * + * CONTROLLER ODD and LDEV ODD: PRIORITY 1 + * CONTROLLER ODD and LDEV EVEN: PRIORITY 0 + * CONTROLLER EVEN and LDEV ODD: PRIORITY 0 + * CONTROLLER EVEN and LDEV EVEN: PRIORITY 1 + * + * In the storage you can define for each LDEV a owner controller. If the + * server makes IOs via the other controller the storage will switch the + * ownership automatically. In this case you can see in the storage that the + * current controller is different from the default controller, but this is + * absolutely no problem. + * + * With this prioritizer it is possible to establish a static load balancing. + * Half of the LUNs are accessed via one HBA/storage controller and the other + * half via the other HBA/storage controller. + * + * In cluster environmemnts (RAC) it also guarantees that all cluster nodes have + * access to the LDEVs via the same controller. + * + * You can run the prioritizer manually in verbose mode: + * # pp_hds_modular -v 8:224 + * VENDOR: HITACHI + * PRODUCT: DF600F-CM + * SERIAL: 0x0105 + * LDEV: 0x00C6 + * CTRL: 1 + * PORT: B + * CTRL ODD, LDEV EVEN, PRIO 0 + * + * To compile this source please execute # cc pp_hds_modular.c -o /sbin/mpath_prio_hds_modular + * + * Changes 2006-07-16: + * - Changed to forward declaration of functions + * - The switch-statement was changed to a logical expression + * - unlinking of the devpath now also occurs at the end of + * hds_modular_prio to avoid old /tmp/.pp_balance.%u.%u.devnode + * entries in /tmp-Directory + * - The for-statements for passing variables where changed to + * snprintf-commands in verbose mode + * Changes 2006-08-10: + * - Back to the old switch statements because the regular expression does + * not work under RHEL4 U3 i386 + * Changes 2007-06-27: + * - switched from major:minor argument to device node argument + * + * This file is released under the GPL. + * + */ + +#include +#include +#include +#include +#include +#include + +#include "sg_include.h" +#include "debug.h" +#include "prio.h" +#include "structs.h" + +#define INQ_REPLY_LEN 255 +#define INQ_CMD_CODE 0x12 +#define INQ_CMD_LEN 6 + +#define pp_hds_log(prio, fmt, args...) \ + condlog(prio, "%s: hds prio: " fmt, dev, ##args) + +int hds_modular_prio (const char *dev, int fd, unsigned int timeout) +{ + int k; + char vendor[9]; + char product[32]; + char serial[32]; + char ldev[32]; + char ctrl[32]; + char port[32]; + unsigned char inqCmdBlk[INQ_CMD_LEN] = { INQ_CMD_CODE, 0, 0, 0, INQ_REPLY_LEN, 0 }; + unsigned char inqBuff[INQ_REPLY_LEN]; + unsigned char *inqBuffp = inqBuff; + unsigned char sense_buffer[32]; + sg_io_hdr_t io_hdr; + + if ((ioctl (fd, SG_GET_VERSION_NUM, &k) < 0) || (k < 30000)) { + pp_hds_log(0, "can't use SG ioctl interface"); + return -1; + } + + memset (&io_hdr, 0, sizeof (sg_io_hdr_t)); + memset (inqBuff, 0, INQ_REPLY_LEN); + io_hdr.interface_id = 'S'; + io_hdr.cmd_len = sizeof (inqCmdBlk); + io_hdr.mx_sb_len = sizeof (sense_buffer); + io_hdr.dxfer_direction = SG_DXFER_FROM_DEV; + io_hdr.dxfer_len = INQ_REPLY_LEN; + io_hdr.dxferp = inqBuff; + io_hdr.cmdp = inqCmdBlk; + io_hdr.sbp = sense_buffer; + io_hdr.timeout = get_prio_timeout(timeout, 2000); /* TimeOut = 2 seconds */ + + if (ioctl (fd, SG_IO, &io_hdr) < 0) { + pp_hds_log(0, "SG_IO error"); + return -1; + } + if ((io_hdr.info & SG_INFO_OK_MASK) != SG_INFO_OK) { + pp_hds_log(0, "SCSI error"); + return -1; + } + + snprintf (vendor, 9, "%.8s", inqBuffp + 8); + snprintf (product, 17, "%.16s", inqBuffp + 16); + snprintf (serial, 5, "%.4s", inqBuffp + 40); + snprintf (ldev, 5, "%.4s", inqBuffp + 44); + snprintf (ctrl, 2, "%.1s", inqBuffp + 49); + snprintf (port, 2, "%.1s", inqBuffp + 50); + + pp_hds_log(4, "VENDOR: %s", vendor); + pp_hds_log(4, "PRODUCT: %s", product); + pp_hds_log(4, "SERIAL: 0x%s", serial); + pp_hds_log(4, "LDEV: 0x%s", ldev); + pp_hds_log(4, "CTRL: %s", ctrl); + pp_hds_log(4, "PORT: %s", port); + + switch (ctrl[0]) { + case '0': case '2': case '4': case '6': case '8': + switch (ldev[3]) { + case '0': case '2': case '4': case '6': case '8': case 'A': case 'C': case 'E': + pp_hds_log(4, "CTRL EVEN, LDEV EVEN, PRIO 1"); + return 1; + break; + case '1': case '3': case '5': case '7': case '9': case 'B': case 'D': case 'F': + pp_hds_log(4, "CTRL EVEN, LDEV ODD, PRIO 0"); + return 0; + break; + } + break; + case '1': case '3': case '5': case '7': case '9': + switch (ldev[3]) { + case '0': case '2': case '4': case '6': case '8': case 'A': case 'C': case 'E': + pp_hds_log(4, "CTRL ODD, LDEV EVEN, PRIO 0"); + return 0; + break; + case '1': case '3': case '5': case '7': case '9': case 'B': case 'D': case 'F': + pp_hds_log(4, "CTRL ODD, LDEV ODD, PRIO 1"); + return 1; + break; + } + break; + } + return -1; +} + +int getprio (struct path * pp, __attribute__((unused)) char *args, + unsigned int timeout) +{ + return hds_modular_prio(pp->dev, pp->fd, timeout); +} diff --git a/libmultipath/prioritizers/hp_sw.c b/libmultipath/prioritizers/hp_sw.c new file mode 100644 index 0000000..5b85ad2 --- /dev/null +++ b/libmultipath/prioritizers/hp_sw.c @@ -0,0 +1,102 @@ +/* + * Path priority checker for HP active/standby controller + * + * Check the path state and sort them into groups. + * There is actually a preferred path in the controller; + * we should ask HP on how to retrieve that information. + */ +#include +#include +#include +#include +#include +#include + +#include "sg_include.h" +#include "debug.h" +#include "prio.h" +#include "structs.h" + +#define TUR_CMD_LEN 6 +#define SCSI_CHECK_CONDITION 0x2 +#define SCSI_COMMAND_TERMINATED 0x22 +#define SG_ERR_DRIVER_SENSE 0x08 +#define RECOVERED_ERROR 0x01 +#define NOT_READY 0x02 +#define UNIT_ATTENTION 0x06 + +#define HP_PATH_ACTIVE 0x04 +#define HP_PATH_STANDBY 0x02 +#define HP_PATH_FAILED 0x00 + +#define pp_hp_sw_log(prio, fmt, args...) \ + condlog(prio, "%s: hp_sw prio: " fmt, dev, ##args) + +int hp_sw_prio(const char *dev, int fd, unsigned int timeout) +{ + unsigned char turCmdBlk[TUR_CMD_LEN] = { 0x00, 0, 0, 0, 0, 0 }; + unsigned char sb[128]; + struct sg_io_hdr io_hdr; + int ret = HP_PATH_FAILED; + + memset(&io_hdr, 0, sizeof (struct sg_io_hdr)); + io_hdr.interface_id = 'S'; + io_hdr.cmd_len = sizeof (turCmdBlk); + io_hdr.mx_sb_len = sizeof (sb); + io_hdr.dxfer_direction = SG_DXFER_NONE; + io_hdr.cmdp = turCmdBlk; + io_hdr.sbp = sb; + io_hdr.timeout = get_prio_timeout(timeout, 60000); + io_hdr.pack_id = 0; +retry: + if (ioctl(fd, SG_IO, &io_hdr) < 0) { + pp_hp_sw_log(0, "sending tur command failed"); + goto out; + } + io_hdr.status &= 0x7e; + if ((0 == io_hdr.status) && (0 == io_hdr.host_status) && + (0 == io_hdr.driver_status)) { + /* Command completed normally, path is active */ + ret = HP_PATH_ACTIVE; + } + + if ((SCSI_CHECK_CONDITION == io_hdr.status) || + (SCSI_COMMAND_TERMINATED == io_hdr.status) || + (SG_ERR_DRIVER_SENSE == (0xf & io_hdr.driver_status))) { + if (io_hdr.sbp && (io_hdr.sb_len_wr > 2)) { + int sense_key, asc, asq; + unsigned char * sense_buffer = io_hdr.sbp; + if (sense_buffer[0] & 0x2) { + sense_key = sense_buffer[1] & 0xf; + asc = sense_buffer[2]; + asq = sense_buffer[3]; + } else { + sense_key = sense_buffer[2] & 0xf; + asc = sense_buffer[12]; + asq = sense_buffer[13]; + } + if(RECOVERED_ERROR == sense_key) + ret = HP_PATH_ACTIVE; + if(NOT_READY == sense_key) { + if (asc == 0x04 && asq == 0x02) { + /* This is a standby path */ + ret = HP_PATH_STANDBY; + } + } + if(UNIT_ATTENTION == sense_key) { + if (asc == 0x29) { + /* Retry for device reset */ + goto retry; + } + } + } + } +out: + return(ret); +} + +int getprio (struct path *pp, __attribute__((unused)) char *args, + unsigned int timeout) +{ + return hp_sw_prio(pp->dev, pp->fd, timeout); +} diff --git a/libmultipath/prioritizers/iet.c b/libmultipath/prioritizers/iet.c new file mode 100644 index 0000000..e98773c --- /dev/null +++ b/libmultipath/prioritizers/iet.c @@ -0,0 +1,145 @@ +#include +#include +#include +#include +#include +#include +#include +#include "prio.h" +#include "debug.h" +#include +#include "structs.h" + +// +// This prioritizer suits iSCSI needs, makes it possible to prefer one path. +// +// (It's a bit of a misnomer since supports the client side [eg. open-iscsi] +// instead of just "iet".) +// +// Usage: +// prio "iet" +// prio_args "preferredip=10.11.12.13" +// +// Uses /dev/disk/by-path to find the IP of the device. +// Assigns prio 20 (high) to the preferred IP and prio 10 (low) to the rest. +// +// by Olivier Lambert +// + +#define dc_log(prio, msg) condlog(prio, "%s: iet prio: " msg, dev) +// +// name: find_regex +// @param string: string you want to search into +// @param regex: the pattern used +// @return result: string finded in string with regex, "none" if none +char *find_regex(char * string, char * regex) +{ + int err; + regex_t preg; + err = regcomp(&preg, regex, REG_EXTENDED); + + if (err == 0) { + int match; + size_t nmatch = 0; + regmatch_t *pmatch = NULL; + nmatch = preg.re_nsub; + pmatch = malloc(sizeof(*pmatch) * nmatch); + + if (pmatch) { + match = regexec(&preg, string, nmatch, pmatch, 0); + regfree(&preg); + + if (match == 0) { + char *result = NULL; + int start = pmatch[0].rm_so; + int end = pmatch[0].rm_eo; + size_t size = end - start; + result = malloc (sizeof(*result) * (size + 1)); + + if (result) { + strncpy(result, &string[start], size); + result[size] = '\0'; + free(pmatch); + return result; + } + } + free(pmatch); + } + } + return NULL; +} + +// +// name: inet_prio +// @param +// @return prio +int iet_prio(const char *dev, char * args) +{ + char preferredip_buff[255] = ""; + char *preferredip = &preferredip_buff[0]; + // Phase 1 : checks. If anyone fails, return prio 0. + // check if args exists + if (!args) { + dc_log(0, "need prio_args with preferredip set"); + return 0; + } + // check if args format is OK + if (sscanf(args, "preferredip=%s", preferredip) ==1) {} + else { + dc_log(0, "unexpected prio_args format"); + return 0; + } + // check if ip is not too short + if (strlen(preferredip) <= 7) { + dc_log(0, "prio args: preferredip too short"); + return 0; + } + // Phase 2 : find device in /dev/disk/by-path to match device/ip + DIR *dir_p; + struct dirent *dir_entry_p; + enum { BUFFERSIZE = 1024 }; + char buffer[BUFFERSIZE]; + char fullpath[BUFFERSIZE] = "/dev/disk/by-path/"; + dir_p = opendir(fullpath); + + // loop to find device in /dev/disk/by-path + while( NULL != (dir_entry_p = readdir(dir_p))) { + if (dir_entry_p->d_name[0] != '.') { + char path[BUFFERSIZE] = "/dev/disk/by-path/"; + strcat(path,dir_entry_p->d_name); + ssize_t nchars = readlink(path, buffer, sizeof(buffer)-1); + if (nchars != -1) { + char *device; + buffer[nchars] = '\0'; + device = find_regex(buffer,"(sd[a-z]+)"); + // if device parsed is the right one + if (device!=NULL && strncmp(device, dev, strlen(device)) == 0) { + char *ip; + ip = find_regex(dir_entry_p->d_name,"([0-9]{1,3}\\.[0-9]{1,3}\\.[0-9]{1,3}\\.[0-9]{1,3})"); + // if prefferedip and ip fetched matches + if (ip!=NULL && strncmp(ip, preferredip, strlen(ip)) == 0) { + // high prio + free(ip); + free(device); + closedir(dir_p); + return 20; + } + free(ip); + } + free(device); + } + else { + printf("error\n"); + } + } + } + // nothing found, low prio + closedir(dir_p); + return 10; +} + +int getprio(struct path * pp, char * args, + __attribute__((unused)) unsigned int timeout) +{ + return iet_prio(pp->dev, args); +} diff --git a/libmultipath/prioritizers/ontap.c b/libmultipath/prioritizers/ontap.c new file mode 100644 index 0000000..262e69d --- /dev/null +++ b/libmultipath/prioritizers/ontap.c @@ -0,0 +1,248 @@ +/* + * Copyright 2005 Network Appliance, Inc., All Rights Reserved + * Author: David Wysochanski available at davidw@netapp.com + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License v2 for more details. + */ + +#include +#include +#include +#include +#include + +#include "sg_include.h" +#include "debug.h" +#include "prio.h" +#include "structs.h" +#include "unaligned.h" + +#define INQUIRY_CMD 0x12 +#define INQUIRY_CMDLEN 6 +#define DEFAULT_PRIOVAL 10 +#define RESULTS_MAX 256 +#define SG_TIMEOUT 60000 + +#define pp_ontap_log(prio, fmt, args...) \ + condlog(prio, "%s: ontap prio: " fmt, dev, ##args) + +static void dump_cdb(unsigned char *cdb, int size) +{ + int i; + char buf[10*5+1]; + char * p = &buf[0]; + + condlog(0, "- SCSI CDB: "); + for (i=0; imasked_status, + io_hdr->host_status, io_hdr->driver_status); + if (io_hdr->sb_len_wr > 0) { + condlog(0, "- SCSI sense data: "); + for (i=0; isb_len_wr; i++) { + p += snprintf(p, 128*(io_hdr->sb_len_wr-i), "0x%02x ", + io_hdr->sbp[i]); + } + condlog(0, "%s", buf); + } +} + +/* + * Returns: + * -1: error, errno set + * 0: success + */ +static int send_gva(const char *dev, int fd, unsigned char pg, + unsigned char *results, int *results_size, + unsigned int timeout) +{ + unsigned char sb[128]; + unsigned char cdb[10] = {0xc0, 0, 0x1, 0xa, 0x98, 0xa, + pg, sizeof(sb), 0, 0}; + struct sg_io_hdr io_hdr; + int ret = -1; + + memset(&io_hdr, 0, sizeof (struct sg_io_hdr)); + memset(results, 0, *results_size); + io_hdr.interface_id = 'S'; + io_hdr.cmd_len = sizeof (cdb); + io_hdr.mx_sb_len = sizeof (sb); + io_hdr.dxfer_direction = SG_DXFER_FROM_DEV; + io_hdr.dxfer_len = *results_size; + io_hdr.dxferp = results; + io_hdr.cmdp = cdb; + io_hdr.sbp = sb; + io_hdr.timeout = get_prio_timeout(timeout, SG_TIMEOUT); + io_hdr.pack_id = 0; + if (ioctl(fd, SG_IO, &io_hdr) < 0) { + pp_ontap_log(0, "SG_IO ioctl failed, errno=%d", errno); + dump_cdb(cdb, sizeof(cdb)); + goto out; + } + if (io_hdr.info & SG_INFO_OK_MASK) { + pp_ontap_log(0, "SCSI error"); + dump_cdb(cdb, sizeof(cdb)); + process_sg_error(&io_hdr); + goto out; + } + + if (results[4] != 0x0a || results[5] != 0x98 || + results[6] != 0x0a ||results[7] != 0x01) { + dump_cdb(cdb, sizeof(cdb)); + pp_ontap_log(0, "GVA return wrong format "); + pp_ontap_log(0, "results[4-7] = 0x%02x 0x%02x 0x%02x 0x%02x", + results[4], results[5], results[6], results[7]); + goto out; + } + ret = 0; +out: + return(ret); +} + +/* + * Returns: + * -1: Unable to obtain proxy info + * 0: Device _not_ proxy path + * 1: Device _is_ proxy path + */ +static int get_proxy(const char *dev, int fd, unsigned int timeout) +{ + unsigned char results[256]; + unsigned char sb[128]; + unsigned char cdb[INQUIRY_CMDLEN] = {INQUIRY_CMD, 1, 0xc1, 0, + sizeof(sb), 0}; + struct sg_io_hdr io_hdr; + int ret = -1; + + memset(&results, 0, sizeof (results)); + memset(&io_hdr, 0, sizeof (struct sg_io_hdr)); + io_hdr.interface_id = 'S'; + io_hdr.cmd_len = sizeof (cdb); + io_hdr.mx_sb_len = sizeof (sb); + io_hdr.dxfer_direction = SG_DXFER_FROM_DEV; + io_hdr.dxfer_len = sizeof (results); + io_hdr.dxferp = results; + io_hdr.cmdp = cdb; + io_hdr.sbp = sb; + io_hdr.timeout = get_prio_timeout(timeout, SG_TIMEOUT); + io_hdr.pack_id = 0; + if (ioctl(fd, SG_IO, &io_hdr) < 0) { + pp_ontap_log(0, "ioctl sending inquiry command failed, " + "errno=%d", errno); + dump_cdb(cdb, sizeof(cdb)); + goto out; + } + if (io_hdr.info & SG_INFO_OK_MASK) { + pp_ontap_log(0, "SCSI error"); + dump_cdb(cdb, sizeof(cdb)); + process_sg_error(&io_hdr); + goto out; + } + + if (results[1] != 0xc1 || results[8] != 0x0a || + results[9] != 0x98 || results[10] != 0x0a || + results[11] != 0x0 || results[12] != 0xc1 || + results[13] != 0x0) { + pp_ontap_log(0,"proxy info page in unknown format - "); + pp_ontap_log(0,"results[8-13]=0x%02x 0x%02x 0x%02x 0x%02x " + "0x%02x 0x%02x", + results[8], results[9], results[10], + results[11], results[12], results[13]); + dump_cdb(cdb, sizeof(cdb)); + goto out; + } + ret = (results[19] & 0x02) >> 1; + +out: + return(ret); +} + +/* + * Returns priority of device based on device info. + * + * 4: FCP non-proxy, FCP proxy unknown, or unable to determine protocol + * 3: iSCSI HBA + * 2: iSCSI software + * 1: FCP proxy + */ +static int ontap_prio(const char *dev, int fd, unsigned int timeout) +{ + unsigned char results[RESULTS_MAX]; + int results_size=RESULTS_MAX; + int rc; + int is_proxy; + int is_iscsi_software; + int is_iscsi_hardware; + int tot_len; + + is_iscsi_software = is_iscsi_hardware = is_proxy = 0; + + memset(&results, 0, sizeof (results)); + rc = send_gva(dev, fd, 0x41, results, &results_size, timeout); + if (rc >= 0) { + tot_len = get_unaligned_be32(&results[0]); + if (tot_len <= 8) { + goto try_fcp_proxy; + } + if (results[8] != 0x41) { + pp_ontap_log(0, "GVA page 0x41 error - " + "results[8] = 0x%x", results[8]); + goto try_fcp_proxy; + } + if ((strncmp((char *)&results[12], "ism_sw", 6) == 0) || + (strncmp((char *)&results[12], "iswt", 4) == 0)) { + is_iscsi_software = 1; + goto prio_select; + } + else if (strncmp((char *)&results[12], "ism_sn", 6) == 0) { + is_iscsi_hardware = 1; + goto prio_select; + } + } else { + return 0; + } + +try_fcp_proxy: + rc = get_proxy(dev, fd, timeout); + if (rc >= 0) { + is_proxy = rc; + } + +prio_select: + if (is_iscsi_hardware) { + return 3; + } else if (is_iscsi_software) { + return 2; + } else { + if (is_proxy) { + return 1; + } else { + /* Either non-proxy, or couldn't get proxy info */ + return 4; + } + } +} + +int getprio (struct path *pp, __attribute__((unused)) char *args, + unsigned int timeout) +{ + return ontap_prio(pp->dev, pp->fd, timeout); +} diff --git a/libmultipath/prioritizers/path_latency.c b/libmultipath/prioritizers/path_latency.c new file mode 100644 index 0000000..eeee01e --- /dev/null +++ b/libmultipath/prioritizers/path_latency.c @@ -0,0 +1,303 @@ +/* + * (C) Copyright HUAWEI Technology Corp. 2017, All Rights Reserved. + * + * path_latency.c + * + * Prioritizer for device mapper multipath, where the corresponding priority + * values of specific paths are provided by a latency algorithm. And the + * latency algorithm is dependent on arguments("io_num" and "base_num"). + * + * The principle of the algorithm as follows: + * 1. By sending a certain number "io_num" of read IOs to the current path + * continuously, the IOs' average latency can be calculated. + * 2. Max value and min value of average latency are constant. According to + * the average latency of each path and the "base_num" of logarithmic + * scale, the priority "rc" of each path can be provided. + * + * Author(s): Yang Feng + * Revised: Guan Junxiong + * + * This file is released under the GPL version 2, or any later version. + */ + +#define _GNU_SOURCE +#include +#include +#include +#include +#include +#include +#include +#include + +#include "debug.h" +#include "prio.h" +#include "structs.h" +#include "util.h" +#include "time-util.h" + +#define pp_pl_log(prio, fmt, args...) condlog(prio, "path_latency prio: " fmt, ##args) + +#define MAX_IO_NUM 200 +#define MIN_IO_NUM 20 +#define DEF_IO_NUM 100 + +#define MAX_BASE_NUM 10 +#define MIN_BASE_NUM 1.1 +// This is 10**(1/4). 4 prio steps correspond to a factor of 10. +#define DEF_BASE_NUM 1.77827941004 + +#define MAX_AVG_LATENCY 100000000. /* Unit: us */ +#define MIN_AVG_LATENCY 1. /* Unit: us */ + +#define DEFAULT_PRIORITY 0 + +#define USEC_PER_SEC 1000000LL +#define NSEC_PER_USEC 1000LL + +#define DEF_BLK_SIZE 4096 + +static int prepare_directio_read(int fd, int *blksz, char **pbuf, + int *restore_flags) +{ + unsigned long pgsize = getpagesize(); + long flags; + + if (ioctl(fd, BLKBSZGET, blksz) < 0) { + pp_pl_log(3,"catnnot get blocksize, set default"); + *blksz = DEF_BLK_SIZE; + } + if (posix_memalign((void **)pbuf, pgsize, *blksz)) + return -1; + + flags = fcntl(fd, F_GETFL); + if (flags < 0) + goto free_out; + if (!(flags & O_DIRECT)) { + flags |= O_DIRECT; + if (fcntl(fd, F_SETFL, flags) < 0) + goto free_out; + *restore_flags = 1; + } + + return 0; + +free_out: + free(*pbuf); + + return -1; +} + +static void cleanup_directio_read(int fd, char *buf, int restore_flags) +{ + long flags; + + free(buf); + + if (!restore_flags) + return; + if ((flags = fcntl(fd, F_GETFL)) >= 0) { + int ret __attribute__ ((unused)); + flags &= ~O_DIRECT; + /* No point in checking for errors */ + ret = fcntl(fd, F_SETFL, flags); + } +} + +static int do_directio_read(int fd, unsigned int timeout, char *buf, int sz) +{ + fd_set read_fds; + struct timeval tm = { .tv_sec = timeout }; + int ret; + int num_read; + + if (lseek(fd, 0, SEEK_SET) == -1) + return -1; + FD_ZERO(&read_fds); + FD_SET(fd, &read_fds); + ret = select(fd+1, &read_fds, NULL, NULL, &tm); + if (ret <= 0) + return -1; + num_read = read(fd, buf, sz); + if (num_read != sz) + return -1; + + return 0; +} + +int check_args_valid(int io_num, double base_num) +{ + if ((io_num < MIN_IO_NUM) || (io_num > MAX_IO_NUM)) { + pp_pl_log(0, "args io_num is outside the valid range"); + return 0; + } + + if ((base_num < MIN_BASE_NUM) || (base_num > MAX_BASE_NUM)) { + pp_pl_log(0, "args base_num is outside the valid range"); + return 0; + } + + return 1; +} + +/* + * In multipath.conf, args form: io_num=n base_num=m. For example, args are + * "io_num=20 base_num=10", this function can get io_num value 20 and + * base_num value 10. + */ +static int get_ionum_and_basenum(char *args, int *ionum, double *basenum) +{ + char split_char[] = " \t"; + char *arg, *temp; + char *str, *str_inval; + int i; + int flag_io = 0, flag_base = 0; + + if ((args == NULL) || (ionum == NULL) || (basenum == NULL)) { + pp_pl_log(0, "args string is NULL"); + return 0; + } + + arg = temp = STRDUP(args); + if (!arg) + return 0; + + for (i = 0; i < 2; i++) { + str = get_next_string(&temp, split_char); + if (!str) + goto out; + if (!strncmp(str, "io_num=", 7) && strlen(str) > 7) { + *ionum = (int)strtoul(str + 7, &str_inval, 10); + if (str == str_inval) + goto out; + flag_io = 1; + } + else if (!strncmp(str, "base_num=", 9) && strlen(str) > 9) { + *basenum = strtod(str + 9, &str_inval); + if (str == str_inval) + goto out; + flag_base = 1; + } + } + + if (!flag_io || !flag_base) + goto out; + if (check_args_valid(*ionum, *basenum) == 0) + goto out; + + FREE(arg); + return 1; +out: + FREE(arg); + return 0; +} + +/* + * Do not scale the prioriy in a certain range such as [0, 1024] + * because scaling will eliminate the effect of base_num. + */ +int calcPrio(double lg_avglatency, double lg_maxavglatency, + double lg_minavglatency) +{ + if (lg_avglatency <= lg_minavglatency) + return lg_maxavglatency - lg_minavglatency; + + if (lg_avglatency >= lg_maxavglatency) + return 0; + + return lg_maxavglatency - lg_avglatency; +} + +int getprio(struct path *pp, char *args, unsigned int timeout) +{ + int rc, temp; + int io_num = 0; + double base_num = 0; + double lg_avglatency, lg_maxavglatency, lg_minavglatency; + double standard_deviation; + double lg_toldelay = 0; + int blksize; + char *buf; + int restore_flags = 0; + double lg_base; + double sum_squares = 0; + + if (pp->fd < 0) + return -1; + + if (get_ionum_and_basenum(args, &io_num, &base_num) == 0) { + io_num = DEF_IO_NUM; + base_num = DEF_BASE_NUM; + pp_pl_log(0, "%s: fails to get path_latency args, set default:" + "io_num=%d base_num=%.3lf", + pp->dev, io_num, base_num); + } + + lg_base = log(base_num); + lg_maxavglatency = log(MAX_AVG_LATENCY) / lg_base; + lg_minavglatency = log(MIN_AVG_LATENCY) / lg_base; + + if (prepare_directio_read(pp->fd, &blksize, &buf, &restore_flags) < 0) + return PRIO_UNDEF; + + temp = io_num; + while (temp-- > 0) { + struct timespec tv_before, tv_after, tv_diff; + double diff, reldiff; + + (void)clock_gettime(CLOCK_MONOTONIC, &tv_before); + + if (do_directio_read(pp->fd, timeout, buf, blksize)) { + pp_pl_log(0, "%s: path down", pp->dev); + cleanup_directio_read(pp->fd, buf, restore_flags); + return -1; + } + + (void)clock_gettime(CLOCK_MONOTONIC, &tv_after); + + timespecsub(&tv_after, &tv_before, &tv_diff); + diff = tv_diff.tv_sec * 1000 * 1000 + tv_diff.tv_nsec / 1000; + + if (diff == 0) + /* + * Avoid taking log(0). + * This unlikely case is treated as minimum - + * the sums don't increase + */ + continue; + + /* we scale by lg_base here */ + reldiff = log(diff) / lg_base; + + /* + * We assume that the latency complies with Log-normal + * distribution. The logarithm of latency is in normal + * distribution. + */ + lg_toldelay += reldiff; + sum_squares += reldiff * reldiff; + } + + cleanup_directio_read(pp->fd, buf, restore_flags); + + lg_avglatency = lg_toldelay / (long long)io_num; + + if (lg_avglatency > lg_maxavglatency) { + pp_pl_log(2, + "%s: average latency (%lld us) is outside the thresold (%lld us)", + pp->dev, (long long)pow(base_num, lg_avglatency), + (long long)MAX_AVG_LATENCY); + return DEFAULT_PRIORITY; + } + + standard_deviation = sqrt((sum_squares - lg_toldelay * lg_avglatency) + / (io_num - 1)); + + rc = calcPrio(lg_avglatency, lg_maxavglatency, lg_minavglatency); + + pp_pl_log(3, "%s: latency avg=%.2e uncertainty=%.1f prio=%d\n", + pp->dev, exp(lg_avglatency * lg_base), + exp(standard_deviation * lg_base), rc); + + return rc; +} diff --git a/libmultipath/prioritizers/random.c b/libmultipath/prioritizers/random.c new file mode 100644 index 0000000..b742ac2 --- /dev/null +++ b/libmultipath/prioritizers/random.c @@ -0,0 +1,17 @@ +#include +#include +#include +#include + +#include "prio.h" + +int getprio(__attribute__((unused)) struct path *pp, + __attribute__((unused)) char *args, + __attribute__((unused)) unsigned int timeout) +{ + struct timeval tv; + + gettimeofday(&tv, NULL); + srand((unsigned int)tv.tv_usec); + return 1+(int) (10.0*rand()/(RAND_MAX+1.0)); +} diff --git a/libmultipath/prioritizers/rdac.c b/libmultipath/prioritizers/rdac.c new file mode 100644 index 0000000..92a2fb8 --- /dev/null +++ b/libmultipath/prioritizers/rdac.c @@ -0,0 +1,98 @@ +#include +#include +#include + +#include "sg_include.h" +#include "debug.h" +#include "prio.h" +#include "structs.h" + +#define INQUIRY_CMD 0x12 +#define INQUIRY_CMDLEN 6 + +#define pp_rdac_log(prio, msg) condlog(prio, "%s: rdac prio: " msg, dev) + +int rdac_prio(const char *dev, int fd, unsigned int timeout) +{ + unsigned char sense_buffer[128]; + unsigned char sb[128]; + unsigned char inqCmdBlk[INQUIRY_CMDLEN] = {INQUIRY_CMD, 1, 0xC9, 0, + sizeof(sense_buffer), 0}; + struct sg_io_hdr io_hdr; + int ret = 0; + + memset(&io_hdr, 0, sizeof (struct sg_io_hdr)); + memset(sense_buffer, 0, 128); + io_hdr.interface_id = 'S'; + io_hdr.cmd_len = sizeof (inqCmdBlk); + io_hdr.mx_sb_len = sizeof (sb); + io_hdr.dxfer_direction = SG_DXFER_FROM_DEV; + io_hdr.dxfer_len = sizeof (sense_buffer); + io_hdr.dxferp = sense_buffer; + io_hdr.cmdp = inqCmdBlk; + io_hdr.sbp = sb; + io_hdr.timeout = get_prio_timeout(timeout, 60000); + io_hdr.pack_id = 0; + if (ioctl(fd, SG_IO, &io_hdr) < 0) { + pp_rdac_log(0, "sending inquiry command failed"); + goto out; + } + if (io_hdr.info & SG_INFO_OK_MASK) { + pp_rdac_log(0, "inquiry command indicates error"); + goto out; + } + + if (/* Verify the code page - right page & page identifier */ + sense_buffer[1] != 0xc9 || + sense_buffer[3] != 0x2c || + sense_buffer[4] != 'v' || + sense_buffer[5] != 'a' || + sense_buffer[6] != 'c' ) { + pp_rdac_log(0, "volume access control page in unknown format"); + goto out; + } + + if ( /* Current Volume Path Bit */ + ( sense_buffer[8] & 0x01) == 0x01 ) { + /* + * This volume was owned by the controller receiving + * the inquiry command. + */ + ret |= 0x02; + } + + /* Volume Preferred Path Priority */ + switch ( sense_buffer[9] & 0x0F ) { + case 0x01: + /* + * Access to this volume is most preferred through + * this path and other paths with this value. + */ + ret |= 0x04; + break; + case 0x02: + /* + * Access to this volume through this path is to be used + * as a secondary path. Typically this path would be used + * for fail-over situations. + */ + ret |= 0x01; + break; + default: + /* Reserved values */ + break; + } + + /* For ioship mode set the bit 3 (00001000) */ + if ((sense_buffer[8] >> 5) & 0x01) + ret |= 0x08; + +out: + return(ret); +} + +int getprio (struct path *pp, __attribute__((unused)) char *args, + unsigned int timeout) +{ + return rdac_prio(pp->dev, pp->fd, timeout); +} diff --git a/libmultipath/prioritizers/sysfs.c b/libmultipath/prioritizers/sysfs.c new file mode 100644 index 0000000..a6feb42 --- /dev/null +++ b/libmultipath/prioritizers/sysfs.c @@ -0,0 +1,62 @@ +/* + * sysfs.c + * + * Copyright(c) 2016 Hannes Reinecke, SUSE Linux GmbH + */ + +#include + +#include "structs.h" +#include "discovery.h" +#include "prio.h" + +static const struct { + unsigned char value; + char *name; +} sysfs_access_state_map[] = { + { 50, "active/optimized" }, + { 10, "active/non-optimized" }, + { 5, "lba-dependent" }, + { 1, "standby" }, +}; + +int get_exclusive_pref_arg(char *args) +{ + char *ptr; + + if (args == NULL) + return 0; + ptr = strstr(args, "exclusive_pref_bit"); + if (!ptr) + return 0; + if (ptr[18] != '\0' && ptr[18] != ' ' && ptr[18] != '\t') + return 0; + if (ptr != args && ptr[-1] != ' ' && ptr[-1] != '\t') + return 0; + return 1; +} + +int getprio (struct path * pp, char *args, + __attribute__((unused)) unsigned int timeout) +{ + int prio = 0, rc, i; + char buff[512]; + int exclusive_pref; + + exclusive_pref = get_exclusive_pref_arg(args); + rc = sysfs_get_asymmetric_access_state(pp, buff, 512); + if (rc < 0) + return PRIO_UNDEF; + prio = 0; + for (i = 0; i < 4; i++) { + if (!strncmp(buff, sysfs_access_state_map[i].name, + strlen(sysfs_access_state_map[i].name))) { + prio = sysfs_access_state_map[i].value; + break; + } + } + if (rc > 0 && (prio != 50 || exclusive_pref)) + prio += 80; + + return prio; +} diff --git a/libmultipath/prioritizers/weightedpath.c b/libmultipath/prioritizers/weightedpath.c new file mode 100644 index 0000000..916970d --- /dev/null +++ b/libmultipath/prioritizers/weightedpath.c @@ -0,0 +1,150 @@ +/* + * + * (C) Copyright 2008 Hewlett-Packard Development Company, L.P + * + * This file is released under the GPL + */ + +/* + * Prioritizer for device mapper multipath, where specific paths and the + * corresponding priority values are provided as arguments. + * + * This prioritizer assigns the priority value provided in the configuration + * file based on the comparison made between the specified paths and the path + * instance for which this is called. + * Paths can be specified as a regular expression of devname of the path or + * as hbtl information of the path. + * + * Examples: + * prio "weightedpath hbtl 1:.:.:. 2 4:.:.:. 4" + * prio "weightedpath devname sda 10 sde 20" + * + * Returns zero as the default priority. + */ + +#include +#include + +#include "prio.h" +#include "weightedpath.h" +#include "config.h" +#include "structs.h" +#include "memory.h" +#include "debug.h" +#include +#include "structs_vec.h" +#include "print.h" +#include "util.h" + +#define CHECK_LEN \ +do { \ + if ((p - str) >= (len - 1)) { \ + condlog(0, "%s: %s - buffer size too small", pp->dev, pp->prio.name); \ + return -1; \ + } \ +} while(0) + +static int +build_serial_path(struct path *pp, char *str, int len) +{ + char *p = str; + + p += snprint_path_serial(p, str + len - p, pp); + CHECK_LEN; + return 0; +} + +static int +build_wwn_path(struct path *pp, char *str, int len) +{ + char *p = str; + + p += snprint_host_wwnn(p, str + len - p, pp); + CHECK_LEN; + p += snprintf(p, str + len - p, ":"); + CHECK_LEN; + p += snprint_host_wwpn(p, str + len - p, pp); + CHECK_LEN; + p += snprintf(p, str + len - p, ":"); + CHECK_LEN; + p += snprint_tgt_wwnn(p, str + len - p, pp); + CHECK_LEN; + p += snprintf(p, str + len - p, ":"); + CHECK_LEN; + p += snprint_tgt_wwpn(p, str + len - p, pp); + CHECK_LEN; + return 0; +} + +/* main priority routine */ +int prio_path_weight(struct path *pp, char *prio_args) +{ + char path[FILE_NAME_SIZE]; + char *arg; + char *temp, *regex, *prio; + char split_char[] = " \t"; + int priority = DEFAULT_PRIORITY, path_found = 0; + regex_t pathe; + + /* Return default priority if there is no argument */ + if (!prio_args) + return priority; + + arg = temp = STRDUP(prio_args); + + regex = get_next_string(&temp, split_char); + + /* Return default priority if the argument is not parseable */ + if (!regex) { + FREE(arg); + return priority; + } + + if (!strcmp(regex, HBTL)) { + sprintf(path, "%d:%d:%d:%d", pp->sg_id.host_no, + pp->sg_id.channel, pp->sg_id.scsi_id, pp->sg_id.lun); + } else if (!strcmp(regex, DEV_NAME)) { + strcpy(path, pp->dev); + } else if (!strcmp(regex, SERIAL)) { + if (build_serial_path(pp, path, FILE_NAME_SIZE) != 0) { + FREE(arg); + return priority; + } + } else if (!strcmp(regex, WWN)) { + if (build_wwn_path(pp, path, FILE_NAME_SIZE) != 0) { + FREE(arg); + return priority; + } + } else { + condlog(0, "%s: %s - Invalid arguments", pp->dev, + pp->prio.name); + FREE(arg); + return priority; + } + + while (!path_found) { + if (!temp) + break; + if (!(regex = get_next_string(&temp, split_char))) + break; + if (!(prio = get_next_string(&temp, split_char))) + break; + + if (!regcomp(&pathe, regex, REG_EXTENDED|REG_NOSUB)) { + if (!regexec(&pathe, path, 0, NULL, 0)) { + path_found = 1; + priority = atoi(prio); + } + regfree(&pathe); + } + } + + FREE(arg); + return priority; +} + +int getprio(struct path *pp, char *args, + __attribute__((unused)) unsigned int timeout) +{ + return prio_path_weight(pp, args); +} diff --git a/libmultipath/prioritizers/weightedpath.h b/libmultipath/prioritizers/weightedpath.h new file mode 100644 index 0000000..a1b268f --- /dev/null +++ b/libmultipath/prioritizers/weightedpath.h @@ -0,0 +1,13 @@ +#ifndef _WEIGHTED_PATH_H +#define _WEIGHTED_PATH_H + +#define PRIO_WEIGHTED_PATH "weightedpath" +#define HBTL "hbtl" +#define DEV_NAME "devname" +#define SERIAL "serial" +#define WWN "wwn" +#define DEFAULT_PRIORITY 0 + +int prio_path_weight(struct path *pp, char *prio_args); + +#endif diff --git a/libmultipath/prkey.c b/libmultipath/prkey.c new file mode 100644 index 0000000..d645f81 --- /dev/null +++ b/libmultipath/prkey.c @@ -0,0 +1,187 @@ +#include "structs.h" +#include "file.h" +#include "debug.h" +#include "config.h" +#include "util.h" +#include "propsel.h" +#include "prkey.h" +#include +#include +#include +#include +#include +#include +#include +#include + +#define PRKEY_READ 0 +#define PRKEY_WRITE 1 + +static int do_prkey(int fd, char *wwid, char *keystr, int cmd) +{ + char buf[4097]; + char *ptr; + off_t start = 0; + int bytes; + + while (1) { + if (lseek(fd, start, SEEK_SET) < 0) { + condlog(0, "prkey file read lseek failed : %s", + strerror(errno)); + return 1; + } + bytes = read(fd, buf, 4096); + if (bytes < 0) { + if (errno == EINTR || errno == EAGAIN) + continue; + condlog(0, "failed to read from prkey file : %s", + strerror(errno)); + return 1; + } + if (!bytes) { + ptr = NULL; + break; + } + buf[bytes] = '\0'; + ptr = strstr(buf, wwid); + while (ptr) { + if (ptr == buf || *(ptr - 1) != ' ' || + *(ptr + strlen(wwid)) != '\n') + ptr = strstr(ptr + strlen(wwid), wwid); + else + break; + } + if (ptr) { + condlog(3, "found prkey for '%s'", wwid); + ptr[strlen(wwid)] = '\0'; + if (ptr - PRKEY_SIZE < buf || + (ptr - PRKEY_SIZE != buf && + *(ptr - PRKEY_SIZE - 1) != '\n')) { + condlog(0, "malformed prkey file line for wwid: '%s'", ptr); + return 1; + } + ptr = ptr - PRKEY_SIZE; + break; + } + ptr = strrchr(buf, '\n'); + if (ptr == NULL) { + condlog(4, "couldn't file newline, assuming end of file"); + break; + } + start = start + (ptr - buf) + 1; + } + if (cmd == PRKEY_READ) { + if (!ptr || *ptr == '#') + return 1; + memcpy(keystr, ptr, PRKEY_SIZE - 1); + keystr[PRKEY_SIZE - 1] = '\0'; + return 0; + } + if (!ptr && !keystr) + return 0; + if (ptr) { + if (lseek(fd, start + (ptr - buf), SEEK_SET) < 0) { + condlog(0, "prkey write lseek failed : %s", + strerror(errno)); + return 1; + } + } + if (!keystr) { + if (safe_write(fd, "#", 1) < 0) { + condlog(0, "failed to write to prkey file : %s", + strerror(errno)); + return 1; + } + return 0; + } + if (!ptr) { + if (lseek(fd, 0, SEEK_END) < 0) { + condlog(0, "prkey write lseek failed : %s", + strerror(errno)); + return 1; + } + } + bytes = sprintf(buf, "%s %s\n", keystr, wwid); + if (safe_write(fd, buf, bytes) < 0) { + condlog(0, "failed to write to prkey file: %s", + strerror(errno)); + return 1; + } + return 0; +} + +int get_prkey(struct config *conf, struct multipath *mpp, uint64_t *prkey, + uint8_t *sa_flags) +{ + int fd; + int unused; + int ret = 1; + char keystr[PRKEY_SIZE]; + + if (!strlen(mpp->wwid)) + goto out; + + fd = open_file(conf->prkeys_file, &unused, PRKEYS_FILE_HEADER); + if (fd < 0) + goto out; + ret = do_prkey(fd, mpp->wwid, keystr, PRKEY_READ); + if (ret) + goto out_file; + *sa_flags = 0; + if (strchr(keystr, 'X')) + *sa_flags = MPATH_F_APTPL_MASK; + ret = !!parse_prkey(keystr, prkey); +out_file: + close(fd); +out: + return ret; +} + +int set_prkey(struct config *conf, struct multipath *mpp, uint64_t prkey, + uint8_t sa_flags) +{ + int fd; + int can_write = 1; + int ret = 1; + char keystr[PRKEY_SIZE]; + + if (!strlen(mpp->wwid)) + goto out; + + if (sa_flags & ~MPATH_F_APTPL_MASK) { + condlog(0, "unsupported pr flags, 0x%x", + sa_flags & ~MPATH_F_APTPL_MASK); + sa_flags &= MPATH_F_APTPL_MASK; + } + + fd = open_file(conf->prkeys_file, &can_write, PRKEYS_FILE_HEADER); + if (fd < 0) + goto out; + if (!can_write) { + condlog(0, "cannot set prkey, prkeys file is read-only"); + goto out_file; + } + if (prkey) { + /* using the capitalization of the 'x' is a hack, but + * it's unlikely that mpath_persist will support more options + * since sg_persist doesn't, and this lets us keep the + * same file format as before instead of needing to change + * the format of the prkeys file */ + if (sa_flags) + snprintf(keystr, PRKEY_SIZE, "0X%016" PRIx64, prkey); + else + snprintf(keystr, PRKEY_SIZE, "0x%016" PRIx64, prkey); + keystr[PRKEY_SIZE - 1] = '\0'; + ret = do_prkey(fd, mpp->wwid, keystr, PRKEY_WRITE); + } + else + ret = do_prkey(fd, mpp->wwid, NULL, PRKEY_WRITE); + if (ret == 0) + select_reservation_key(conf, mpp); + if (get_be64(mpp->reservation_key) != prkey) + ret = 1; +out_file: + close(fd); +out: + return ret; +} diff --git a/libmultipath/prkey.h b/libmultipath/prkey.h new file mode 100644 index 0000000..6739191 --- /dev/null +++ b/libmultipath/prkey.h @@ -0,0 +1,21 @@ +#ifndef _PRKEY_H +#define _PRKEY_H + +#include "structs.h" +#include + +#define PRKEYS_FILE_HEADER \ +"# Multipath persistent reservation keys, Version : 1.0\n" \ +"# NOTE: this file is automatically maintained by the multipathd program.\n" \ +"# You should not need to edit this file in normal circumstances.\n" \ +"#\n" \ +"# Format:\n" \ +"# prkey wwid\n" \ +"#\n" + +int set_prkey(struct config *conf, struct multipath *mpp, uint64_t prkey, + uint8_t sa_flags); +int get_prkey(struct config *conf, struct multipath *mpp, uint64_t *prkey, + uint8_t *sa_flags); + +#endif /* _PRKEY_H */ diff --git a/libmultipath/propsel.c b/libmultipath/propsel.c new file mode 100644 index 0000000..897e48c --- /dev/null +++ b/libmultipath/propsel.c @@ -0,0 +1,1237 @@ +/* + * Copyright (c) 2004, 2005 Christophe Varoqui + * Copyright (c) 2005 Benjamin Marzinski, Redhat + * Copyright (c) 2005 Kiyoshi Ueda, NEC + */ +#include + +#include "nvme-lib.h" +#include "checkers.h" +#include "memory.h" +#include "vector.h" +#include "structs.h" +#include "config.h" +#include "debug.h" +#include "pgpolicies.h" +#include "alias.h" +#include "defaults.h" +#include "devmapper.h" +#include "prio.h" +#include "discovery.h" +#include "dict.h" +#include "util.h" +#include "sysfs.h" +#include "prioritizers/alua_rtpg.h" +#include "prkey.h" +#include "propsel.h" +#include +#include + +pgpolicyfn *pgpolicies[] = { + NULL, + one_path_per_group, + one_group, + group_by_serial, + group_by_prio, + group_by_node_name +}; + +#define do_set(var, src, dest, msg) \ +do { \ + if (src && src->var) { \ + dest = src->var; \ + origin = msg; \ + goto out; \ + } \ +} while(0) + +#define __do_set_from_vec(type, var, src, dest) \ +({ \ + type *_p; \ + bool _found = false; \ + int i; \ + \ + vector_foreach_slot(src, _p, i) { \ + if (_p->var) { \ + dest = _p->var; \ + _found = true; \ + break; \ + } \ + } \ + _found; \ +}) + +#define __do_set_from_hwe(var, src, dest) \ + __do_set_from_vec(struct hwentry, var, (src)->hwe, dest) + +#define do_set_from_hwe(var, src, dest, msg) \ + if (__do_set_from_hwe(var, src, dest)) { \ + origin = msg; \ + goto out; \ + } + +static const char default_origin[] = "(setting: multipath internal)"; +static const char hwe_origin[] = + "(setting: storage device configuration)"; +static const char multipaths_origin[] = + "(setting: multipath.conf multipaths section)"; +static const char conf_origin[] = + "(setting: multipath.conf defaults/devices section)"; +static const char overrides_origin[] = + "(setting: multipath.conf overrides section)"; +static const char cmdline_origin[] = + "(setting: multipath command line [-p] flag)"; +static const char autodetect_origin[] = + "(setting: storage device autodetected)"; +static const char marginal_path_origin[] = + "(setting: implied by marginal_path check)"; +static const char delay_watch_origin[] = + "(setting: implied by delay_watch_checks)"; +static const char delay_wait_origin[] = + "(setting: implied by delay_wait_checks)"; + +#define do_default(dest, value) \ +do { \ + dest = value; \ + origin = default_origin; \ +} while(0) + +#define mp_set_mpe(var) \ +do_set(var, mp->mpe, mp->var, multipaths_origin) +#define mp_set_hwe(var) \ +do_set_from_hwe(var, mp, mp->var, hwe_origin) +#define mp_set_ovr(var) \ +do_set(var, conf->overrides, mp->var, overrides_origin) +#define mp_set_conf(var) \ +do_set(var, conf, mp->var, conf_origin) +#define mp_set_default(var, value) \ +do_default(mp->var, value) + +#define pp_set_mpe(var) \ +do_set(var, mpe, pp->var, multipaths_origin) +#define pp_set_hwe(var) \ +do_set_from_hwe(var, pp, pp->var, hwe_origin) +#define pp_set_conf(var) \ +do_set(var, conf, pp->var, conf_origin) +#define pp_set_ovr(var) \ +do_set(var, conf->overrides, pp->var, overrides_origin) +#define pp_set_default(var, value) \ +do_default(pp->var, value) + +#define do_attr_set(var, src, shift, msg) \ +do { \ + if (src && (src->attribute_flags & (1 << shift))) { \ + mp->attribute_flags |= (1 << shift); \ + mp->var = src->var; \ + origin = msg; \ + goto out; \ + } \ +} while(0) + +#define set_attr_mpe(var, shift) \ +do_attr_set(var, mp->mpe, shift, "(setting: multipath.conf multipaths section)") +#define set_attr_conf(var, shift) \ +do_attr_set(var, conf, shift, "(setting: multipath.conf defaults/devices section)") + +#define do_prkey_set(src, msg) \ +do { \ + if (src && src->prkey_source != PRKEY_SOURCE_NONE) { \ + mp->prkey_source = src->prkey_source; \ + mp->reservation_key = src->reservation_key; \ + mp->sa_flags = src->sa_flags; \ + origin = msg; \ + goto out; \ + } \ +} while (0) + +int select_mode(struct config *conf, struct multipath *mp) +{ + const char *origin; + + set_attr_mpe(mode, ATTR_MODE); + set_attr_conf(mode, ATTR_MODE); + mp->attribute_flags &= ~(1 << ATTR_MODE); + return 0; +out: + condlog(3, "%s: mode = 0%o %s", mp->alias, mp->mode, origin); + return 0; +} + +int select_uid(struct config *conf, struct multipath *mp) +{ + const char *origin; + + set_attr_mpe(uid, ATTR_UID); + set_attr_conf(uid, ATTR_UID); + mp->attribute_flags &= ~(1 << ATTR_UID); + return 0; +out: + condlog(3, "%s: uid = 0%o %s", mp->alias, mp->uid, origin); + return 0; +} + +int select_gid(struct config *conf, struct multipath *mp) +{ + const char *origin; + + set_attr_mpe(gid, ATTR_GID); + set_attr_conf(gid, ATTR_GID); + mp->attribute_flags &= ~(1 << ATTR_GID); + return 0; +out: + condlog(3, "%s: gid = 0%o %s", mp->alias, mp->gid, origin); + return 0; +} + +/* + * selectors : + * traverse the configuration layers from most specific to most generic + * stop at first explicit setting found + */ +int select_rr_weight(struct config *conf, struct multipath * mp) +{ + const char *origin; + char buff[13]; + + mp_set_mpe(rr_weight); + mp_set_ovr(rr_weight); + mp_set_hwe(rr_weight); + mp_set_conf(rr_weight); + mp_set_default(rr_weight, DEFAULT_RR_WEIGHT); +out: + print_rr_weight(buff, 13, mp->rr_weight); + condlog(3, "%s: rr_weight = %s %s", mp->alias, buff, origin); + return 0; +} + +int select_pgfailback(struct config *conf, struct multipath * mp) +{ + const char *origin; + char buff[13]; + + mp_set_mpe(pgfailback); + mp_set_ovr(pgfailback); + mp_set_hwe(pgfailback); + mp_set_conf(pgfailback); + mp_set_default(pgfailback, DEFAULT_FAILBACK); +out: + print_pgfailback(buff, 13, mp->pgfailback); + condlog(3, "%s: failback = %s %s", mp->alias, buff, origin); + return 0; +} + +int select_pgpolicy(struct config *conf, struct multipath * mp) +{ + const char *origin; + char buff[POLICY_NAME_SIZE]; + + if (conf->pgpolicy_flag > 0) { + mp->pgpolicy = conf->pgpolicy_flag; + origin = cmdline_origin; + goto out; + } + mp_set_mpe(pgpolicy); + mp_set_ovr(pgpolicy); + mp_set_hwe(pgpolicy); + mp_set_conf(pgpolicy); + mp_set_default(pgpolicy, DEFAULT_PGPOLICY); +out: + mp->pgpolicyfn = pgpolicies[mp->pgpolicy]; + get_pgpolicy_name(buff, POLICY_NAME_SIZE, mp->pgpolicy); + condlog(3, "%s: path_grouping_policy = %s %s", mp->alias, buff, origin); + return 0; +} + +int select_selector(struct config *conf, struct multipath * mp) +{ + const char *origin; + + mp_set_mpe(selector); + mp_set_ovr(selector); + mp_set_hwe(selector); + mp_set_conf(selector); + mp_set_default(selector, DEFAULT_SELECTOR); +out: + mp->selector = STRDUP(mp->selector); + condlog(3, "%s: path_selector = \"%s\" %s", mp->alias, mp->selector, + origin); + return 0; +} + +static void +select_alias_prefix (struct config *conf, struct multipath * mp) +{ + const char *origin; + + mp_set_ovr(alias_prefix); + mp_set_hwe(alias_prefix); + mp_set_conf(alias_prefix); + mp_set_default(alias_prefix, DEFAULT_ALIAS_PREFIX); +out: + condlog(3, "%s: alias_prefix = %s %s", mp->wwid, mp->alias_prefix, + origin); +} + +static int +want_user_friendly_names(struct config *conf, struct multipath * mp) +{ + + const char *origin; + int user_friendly_names; + + do_set(user_friendly_names, mp->mpe, user_friendly_names, + multipaths_origin); + do_set(user_friendly_names, conf->overrides, user_friendly_names, + overrides_origin); + do_set_from_hwe(user_friendly_names, mp, user_friendly_names, + hwe_origin); + do_set(user_friendly_names, conf, user_friendly_names, + conf_origin); + do_default(user_friendly_names, DEFAULT_USER_FRIENDLY_NAMES); +out: + condlog(3, "%s: user_friendly_names = %s %s", mp->wwid, + (user_friendly_names == USER_FRIENDLY_NAMES_ON)? "yes" : "no", + origin); + return (user_friendly_names == USER_FRIENDLY_NAMES_ON); +} + +int select_alias(struct config *conf, struct multipath * mp) +{ + const char *origin = NULL; + + if (mp->mpe && mp->mpe->alias) { + mp->alias = STRDUP(mp->mpe->alias); + origin = multipaths_origin; + goto out; + } + + mp->alias = NULL; + if (!want_user_friendly_names(conf, mp)) + goto out; + + select_alias_prefix(conf, mp); + + if (strlen(mp->alias_old) > 0) { + mp->alias = use_existing_alias(mp->wwid, conf->bindings_file, + mp->alias_old, mp->alias_prefix, + conf->bindings_read_only); + memset (mp->alias_old, 0, WWID_SIZE); + origin = "(setting: using existing alias)"; + } + + if (mp->alias == NULL) { + mp->alias = get_user_friendly_alias(mp->wwid, + conf->bindings_file, mp->alias_prefix, conf->bindings_read_only); + origin = "(setting: user_friendly_name)"; + } +out: + if (mp->alias == NULL) { + mp->alias = STRDUP(mp->wwid); + origin = "(setting: default to WWID)"; + } + if (mp->alias) + condlog(3, "%s: alias = %s %s", mp->wwid, mp->alias, origin); + return mp->alias ? 0 : 1; +} + +void reconcile_features_with_options(const char *id, char **features, int* no_path_retry, + int *retain_hwhandler) +{ + static const char q_i_n_p[] = "queue_if_no_path"; + static const char r_a_h_h[] = "retain_attached_hw_handler"; + char buff[12]; + + if (*features == NULL) + return; + if (id == NULL) + id = "UNKNOWN"; + + /* + * We only use no_path_retry internally. The "queue_if_no_path" + * device-mapper feature is derived from it when the map is loaded. + * For consistency, "queue_if_no_path" is removed from the + * internal libmultipath features string. + * For backward compatibility we allow 'features "1 queue_if_no_path"'; + * it's translated into "no_path_retry queue" here. + */ + if (strstr(*features, q_i_n_p)) { + condlog(0, "%s: option 'features \"1 %s\"' is deprecated, " + "please use 'no_path_retry queue' instead", + id, q_i_n_p); + if (*no_path_retry == NO_PATH_RETRY_UNDEF) { + *no_path_retry = NO_PATH_RETRY_QUEUE; + print_no_path_retry(buff, sizeof(buff), + *no_path_retry); + condlog(3, "%s: no_path_retry = %s (inherited setting from feature '%s')", + id, buff, q_i_n_p); + }; + /* Warn only if features string is overridden */ + if (*no_path_retry != NO_PATH_RETRY_QUEUE) { + print_no_path_retry(buff, sizeof(buff), + *no_path_retry); + condlog(2, "%s: ignoring feature '%s' because no_path_retry is set to '%s'", + id, q_i_n_p, buff); + } + remove_feature(features, q_i_n_p); + } + if (strstr(*features, r_a_h_h)) { + condlog(0, "%s: option 'features \"1 %s\"' is deprecated", + id, r_a_h_h); + if (*retain_hwhandler == RETAIN_HWHANDLER_UNDEF) { + condlog(3, "%s: %s = on (inherited setting from feature '%s')", + id, r_a_h_h, r_a_h_h); + *retain_hwhandler = RETAIN_HWHANDLER_ON; + } else if (*retain_hwhandler == RETAIN_HWHANDLER_OFF) + condlog(2, "%s: ignoring feature '%s' because %s is set to 'off'", + id, r_a_h_h, r_a_h_h); + remove_feature(features, r_a_h_h); + } +} + +int select_features(struct config *conf, struct multipath *mp) +{ + const char *origin; + + mp_set_mpe(features); + mp_set_ovr(features); + mp_set_hwe(features); + mp_set_conf(features); + mp_set_default(features, DEFAULT_FEATURES); +out: + mp->features = STRDUP(mp->features); + + reconcile_features_with_options(mp->alias, &mp->features, + &mp->no_path_retry, + &mp->retain_hwhandler); + condlog(3, "%s: features = \"%s\" %s", mp->alias, mp->features, origin); + return 0; +} + +static int get_dh_state(struct path *pp, char *value, size_t value_len) +{ + struct udev_device *ud; + + if (pp->udev == NULL) + return -1; + + ud = udev_device_get_parent_with_subsystem_devtype( + pp->udev, "scsi", "scsi_device"); + if (ud == NULL) + return -1; + + return sysfs_attr_get_value(ud, "dh_state", value, value_len); +} + +int select_hwhandler(struct config *conf, struct multipath *mp) +{ + const char *origin; + struct path *pp; + /* dh_state is no longer than "detached" */ + char handler[12]; + static char alua_name[] = "1 alua"; + static const char tpgs_origin[]= "(setting: autodetected from TPGS)"; + char *dh_state; + int i; + bool all_tpgs = true, one_tpgs = false; + + dh_state = &handler[2]; + + /* + * TPGS_UNDEF means that ALUA support couldn't determined either way + * yet, probably because the path was always down. + * If at least one path does have TPGS support, and no path has + * TPGS_NONE, assume that TPGS would be supported by all paths if + * all were up. + */ + vector_foreach_slot(mp->paths, pp, i) { + int tpgs = path_get_tpgs(pp); + + all_tpgs = all_tpgs && tpgs != TPGS_NONE; + one_tpgs = one_tpgs || + (tpgs != TPGS_NONE && tpgs != TPGS_UNDEF); + } + all_tpgs = all_tpgs && one_tpgs; + + if (mp->retain_hwhandler != RETAIN_HWHANDLER_OFF) { + vector_foreach_slot(mp->paths, pp, i) { + if (get_dh_state(pp, dh_state, sizeof(handler) - 2) > 0 + && strcmp(dh_state, "detached")) { + memcpy(handler, "1 ", 2); + mp->hwhandler = handler; + origin = "(setting: retained by kernel driver)"; + goto out; + } + } + } + + mp_set_hwe(hwhandler); + mp_set_conf(hwhandler); + mp_set_default(hwhandler, DEFAULT_HWHANDLER); +out: + if (all_tpgs && !strcmp(mp->hwhandler, DEFAULT_HWHANDLER) && + origin == default_origin) { + mp->hwhandler = alua_name; + origin = tpgs_origin; + } else if (!all_tpgs && !strcmp(mp->hwhandler, alua_name)) { + mp->hwhandler = DEFAULT_HWHANDLER; + origin = tpgs_origin; + } + mp->hwhandler = STRDUP(mp->hwhandler); + condlog(3, "%s: hardware_handler = \"%s\" %s", mp->alias, mp->hwhandler, + origin); + return 0; +} + +/* + * Current RDAC (NetApp E-Series) firmware relies + * on periodic REPORT TARGET PORT GROUPS for + * internal load balancing. + * Using the sysfs priority checker defeats this purpose. + * + * Moreover, NetApp would also prefer the RDAC checker over ALUA. + * (https://www.redhat.com/archives/dm-devel/2017-September/msg00326.html) + */ +static int +check_rdac(struct path * pp) +{ + int len; + char buff[44]; + const char *checker_name; + + if (pp->bus != SYSFS_BUS_SCSI) + return 0; + /* Avoid ioctl if this is likely not an RDAC array */ + if (__do_set_from_hwe(checker_name, pp, checker_name) && + strcmp(checker_name, RDAC)) + return 0; + len = get_vpd_sgio(pp->fd, 0xC9, 0, buff, 44); + if (len <= 0) + return 0; + return !(memcmp(buff + 4, "vac1", 4)); +} + +int select_checker(struct config *conf, struct path *pp) +{ + const char *origin; + char *ckr_name; + struct checker * c = &pp->checker; + + if (pp->detect_checker == DETECT_CHECKER_ON) { + origin = autodetect_origin; + if (check_rdac(pp)) { + ckr_name = RDAC; + goto out; + } else if (path_get_tpgs(pp) != TPGS_NONE) { + ckr_name = TUR; + goto out; + } + } + do_set(checker_name, conf->overrides, ckr_name, overrides_origin); + do_set_from_hwe(checker_name, pp, ckr_name, hwe_origin); + do_set(checker_name, conf, ckr_name, conf_origin); + do_default(ckr_name, DEFAULT_CHECKER); +out: + checker_get(conf->multipath_dir, c, ckr_name); + condlog(3, "%s: path_checker = %s %s", pp->dev, + checker_name(c), origin); + if (conf->checker_timeout) { + c->timeout = conf->checker_timeout; + condlog(3, "%s: checker timeout = %u s %s", + pp->dev, c->timeout, conf_origin); + } + else if (sysfs_get_timeout(pp, &c->timeout) > 0) + condlog(3, "%s: checker timeout = %u s (setting: kernel sysfs)", + pp->dev, c->timeout); + else { + c->timeout = DEF_TIMEOUT; + condlog(3, "%s: checker timeout = %u s %s", + pp->dev, c->timeout, default_origin); + } + return 0; +} + +int select_getuid(struct config *conf, struct path *pp) +{ + const char *origin; + + pp->uid_attribute = get_uid_attribute_by_attrs(conf, pp->dev); + if (pp->uid_attribute) { + origin = "(setting: multipath.conf defaults section / uid_attrs)"; + goto out; + } + + pp_set_ovr(getuid); + pp_set_ovr(uid_attribute); + pp_set_hwe(getuid); + pp_set_hwe(uid_attribute); + pp_set_conf(getuid); + pp_set_conf(uid_attribute); + pp_set_default(uid_attribute, DEFAULT_UID_ATTRIBUTE); +out: + if (pp->uid_attribute) + condlog(3, "%s: uid_attribute = %s %s", pp->dev, + pp->uid_attribute, origin); + else if (pp->getuid) + condlog(3, "%s: getuid = \"%s\" %s", pp->dev, pp->getuid, + origin); + return 0; +} + +void +detect_prio(struct config *conf, struct path * pp) +{ + struct prio *p = &pp->prio; + char buff[512]; + char *default_prio; + int tpgs; + + switch(pp->bus) { + case SYSFS_BUS_NVME: + if (nvme_id_ctrl_ana(pp->fd, NULL) == 0) + return; + default_prio = PRIO_ANA; + break; + case SYSFS_BUS_SCSI: + tpgs = path_get_tpgs(pp); + if (tpgs == TPGS_NONE) + return; + if ((tpgs == TPGS_EXPLICIT || !check_rdac(pp)) && + sysfs_get_asymmetric_access_state(pp, buff, 512) >= 0) + default_prio = PRIO_SYSFS; + else + default_prio = PRIO_ALUA; + break; + default: + return; + } + prio_get(conf->multipath_dir, p, default_prio, DEFAULT_PRIO_ARGS); +} + +#define set_prio(dir, src, msg) \ +do { \ + if (src && src->prio_name) { \ + prio_get(dir, p, src->prio_name, src->prio_args); \ + origin = msg; \ + goto out; \ + } \ +} while(0) + +#define set_prio_from_vec(type, dir, src, msg, p) \ +do { \ + type *_p; \ + int i; \ + char *prio_name = NULL, *prio_args = NULL; \ + \ + vector_foreach_slot(src, _p, i) { \ + if (prio_name == NULL && _p->prio_name) \ + prio_name = _p->prio_name; \ + if (prio_args == NULL && _p->prio_args) \ + prio_args = _p->prio_args; \ + } \ + if (prio_name != NULL) { \ + prio_get(dir, p, prio_name, prio_args); \ + origin = msg; \ + goto out; \ + } \ +} while (0) + +int select_prio(struct config *conf, struct path *pp) +{ + const char *origin; + struct mpentry * mpe; + struct prio * p = &pp->prio; + int log_prio = 3; + + if (pp->detect_prio == DETECT_PRIO_ON) { + detect_prio(conf, pp); + if (prio_selected(p)) { + origin = autodetect_origin; + goto out; + } + } + mpe = find_mpe(conf->mptable, pp->wwid); + set_prio(conf->multipath_dir, mpe, multipaths_origin); + set_prio(conf->multipath_dir, conf->overrides, overrides_origin); + set_prio_from_vec(struct hwentry, conf->multipath_dir, + pp->hwe, hwe_origin, p); + set_prio(conf->multipath_dir, conf, conf_origin); + prio_get(conf->multipath_dir, p, DEFAULT_PRIO, DEFAULT_PRIO_ARGS); + origin = default_origin; +out: + /* + * fetch tpgs mode for alua, if its not already obtained + */ + if (!strncmp(prio_name(p), PRIO_ALUA, PRIO_NAME_LEN)) { + int tpgs = path_get_tpgs(pp); + + if (tpgs == TPGS_NONE) { + prio_get(conf->multipath_dir, + p, DEFAULT_PRIO, DEFAULT_PRIO_ARGS); + origin = "(setting: emergency fallback - alua failed)"; + log_prio = 1; + } + } + condlog(log_prio, "%s: prio = %s %s", pp->dev, prio_name(p), origin); + condlog(3, "%s: prio args = \"%s\" %s", pp->dev, prio_args(p), origin); + return 0; +} + +int select_no_path_retry(struct config *conf, struct multipath *mp) +{ + const char *origin = NULL; + char buff[12]; + + if (mp->disable_queueing) { + condlog(0, "%s: queueing disabled", mp->alias); + mp->no_path_retry = NO_PATH_RETRY_FAIL; + return 0; + } + mp_set_mpe(no_path_retry); + mp_set_ovr(no_path_retry); + mp_set_hwe(no_path_retry); + mp_set_conf(no_path_retry); +out: + print_no_path_retry(buff, 12, mp->no_path_retry); + if (origin) + condlog(3, "%s: no_path_retry = %s %s", mp->alias, buff, + origin); + else + condlog(3, "%s: no_path_retry = undef %s", + mp->alias, default_origin); + return 0; +} + +int +select_minio_rq (struct config *conf, struct multipath * mp) +{ + const char *origin; + + do_set(minio_rq, mp->mpe, mp->minio, multipaths_origin); + do_set(minio_rq, conf->overrides, mp->minio, overrides_origin); + do_set_from_hwe(minio_rq, mp, mp->minio, hwe_origin); + do_set(minio_rq, conf, mp->minio, conf_origin); + do_default(mp->minio, DEFAULT_MINIO_RQ); +out: + condlog(3, "%s: minio = %i %s", mp->alias, mp->minio, origin); + return 0; +} + +int +select_minio_bio (struct config *conf, struct multipath * mp) +{ + const char *origin; + + mp_set_mpe(minio); + mp_set_ovr(minio); + mp_set_hwe(minio); + mp_set_conf(minio); + mp_set_default(minio, DEFAULT_MINIO); +out: + condlog(3, "%s: minio = %i %s", mp->alias, mp->minio, origin); + return 0; +} + +int select_minio(struct config *conf, struct multipath *mp) +{ + unsigned int minv_dmrq[3] = {1, 1, 0}; + + if (VERSION_GE(conf->version, minv_dmrq)) + return select_minio_rq(conf, mp); + else + return select_minio_bio(conf, mp); +} + +int select_fast_io_fail(struct config *conf, struct multipath *mp) +{ + const char *origin; + char buff[12]; + + mp_set_ovr(fast_io_fail); + mp_set_hwe(fast_io_fail); + mp_set_conf(fast_io_fail); + mp_set_default(fast_io_fail, DEFAULT_FAST_IO_FAIL); +out: + print_fast_io_fail(buff, 12, mp->fast_io_fail); + condlog(3, "%s: fast_io_fail_tmo = %s %s", mp->alias, buff, origin); + return 0; +} + +int select_dev_loss(struct config *conf, struct multipath *mp) +{ + const char *origin; + char buff[12]; + + mp_set_ovr(dev_loss); + mp_set_hwe(dev_loss); + mp_set_conf(dev_loss); + mp->dev_loss = 0; + return 0; +out: + print_dev_loss(buff, 12, mp->dev_loss); + condlog(3, "%s: dev_loss_tmo = %s %s", mp->alias, buff, origin); + return 0; +} + +int select_flush_on_last_del(struct config *conf, struct multipath *mp) +{ + const char *origin; + + mp_set_mpe(flush_on_last_del); + mp_set_ovr(flush_on_last_del); + mp_set_hwe(flush_on_last_del); + mp_set_conf(flush_on_last_del); + mp_set_default(flush_on_last_del, DEFAULT_FLUSH); +out: + condlog(3, "%s: flush_on_last_del = %s %s", mp->alias, + (mp->flush_on_last_del == FLUSH_ENABLED)? "yes" : "no", origin); + return 0; +} + +int select_reservation_key(struct config *conf, struct multipath *mp) +{ + const char *origin; + char buff[PRKEY_SIZE]; + char *from_file = ""; + uint64_t prkey = 0; + + do_prkey_set(mp->mpe, multipaths_origin); + do_prkey_set(conf, conf_origin); + put_be64(mp->reservation_key, 0); + mp->sa_flags = 0; + mp->prkey_source = PRKEY_SOURCE_NONE; + return 0; +out: + if (mp->prkey_source == PRKEY_SOURCE_FILE) { + from_file = " (from prkeys file)"; + if (get_prkey(conf, mp, &prkey, &mp->sa_flags) != 0) + put_be64(mp->reservation_key, 0); + else + put_be64(mp->reservation_key, prkey); + } + print_reservation_key(buff, PRKEY_SIZE, mp->reservation_key, + mp->sa_flags, mp->prkey_source); + condlog(3, "%s: reservation_key = %s %s%s", mp->alias, buff, origin, + from_file); + return 0; +} + +int select_retain_hwhandler(struct config *conf, struct multipath *mp) +{ + const char *origin; + unsigned int minv_dm_retain[3] = {1, 5, 0}; + + if (!VERSION_GE(conf->version, minv_dm_retain)) { + mp->retain_hwhandler = RETAIN_HWHANDLER_OFF; + origin = "(setting: WARNING, requires kernel dm-mpath version >= 1.5.0)"; + goto out; + } + if (get_linux_version_code() >= KERNEL_VERSION(4, 3, 0)) { + mp->retain_hwhandler = RETAIN_HWHANDLER_ON; + origin = "(setting: implied in kernel >= 4.3.0)"; + goto out; + } + mp_set_ovr(retain_hwhandler); + mp_set_hwe(retain_hwhandler); + mp_set_conf(retain_hwhandler); + mp_set_default(retain_hwhandler, DEFAULT_RETAIN_HWHANDLER); +out: + condlog(3, "%s: retain_attached_hw_handler = %s %s", mp->alias, + (mp->retain_hwhandler == RETAIN_HWHANDLER_ON)? "yes" : "no", + origin); + return 0; +} + +int select_detect_prio(struct config *conf, struct path *pp) +{ + const char *origin; + + pp_set_ovr(detect_prio); + pp_set_hwe(detect_prio); + pp_set_conf(detect_prio); + pp_set_default(detect_prio, DEFAULT_DETECT_PRIO); +out: + condlog(3, "%s: detect_prio = %s %s", pp->dev, + (pp->detect_prio == DETECT_PRIO_ON)? "yes" : "no", origin); + return 0; +} + +int select_detect_checker(struct config *conf, struct path *pp) +{ + const char *origin; + + pp_set_ovr(detect_checker); + pp_set_hwe(detect_checker); + pp_set_conf(detect_checker); + pp_set_default(detect_checker, DEFAULT_DETECT_CHECKER); +out: + condlog(3, "%s: detect_checker = %s %s", pp->dev, + (pp->detect_checker == DETECT_CHECKER_ON)? "yes" : "no", + origin); + return 0; +} + +int select_deferred_remove(struct config *conf, struct multipath *mp) +{ + const char *origin; + +#ifndef LIBDM_API_DEFERRED + mp->deferred_remove = DEFERRED_REMOVE_OFF; + origin = "(setting: WARNING, not compiled with support)"; + goto out; +#endif + if (mp->deferred_remove == DEFERRED_REMOVE_IN_PROGRESS) { + condlog(3, "%s: deferred remove in progress", mp->alias); + return 0; + } + mp_set_mpe(deferred_remove); + mp_set_ovr(deferred_remove); + mp_set_hwe(deferred_remove); + mp_set_conf(deferred_remove); + mp_set_default(deferred_remove, DEFAULT_DEFERRED_REMOVE); +out: + condlog(3, "%s: deferred_remove = %s %s", mp->alias, + (mp->deferred_remove == DEFERRED_REMOVE_ON)? "yes" : "no", + origin); + return 0; +} + +static inline int san_path_check_options_set(const struct multipath *mp) +{ + return mp->san_path_err_threshold > 0 || + mp->san_path_err_forget_rate > 0 || + mp->san_path_err_recovery_time > 0; +} + +static int +use_delay_watch_checks(struct config *conf, struct multipath *mp) +{ + int value = NU_UNDEF; + const char *origin = default_origin; + char buff[12]; + + do_set(delay_watch_checks, mp->mpe, value, multipaths_origin); + do_set(delay_watch_checks, conf->overrides, value, overrides_origin); + do_set_from_hwe(delay_watch_checks, mp, value, hwe_origin); + do_set(delay_watch_checks, conf, value, conf_origin); +out: + if (print_off_int_undef(buff, 12, value) != 0) + condlog(3, "%s: delay_watch_checks = %s %s", mp->alias, buff, + origin); + return value; +} + +static int +use_delay_wait_checks(struct config *conf, struct multipath *mp) +{ + int value = NU_UNDEF; + const char *origin = default_origin; + char buff[12]; + + do_set(delay_wait_checks, mp->mpe, value, multipaths_origin); + do_set(delay_wait_checks, conf->overrides, value, overrides_origin); + do_set_from_hwe(delay_wait_checks, mp, value, hwe_origin); + do_set(delay_wait_checks, conf, value, conf_origin); +out: + if (print_off_int_undef(buff, 12, value) != 0) + condlog(3, "%s: delay_wait_checks = %s %s", mp->alias, buff, + origin); + return value; +} + +int select_delay_checks(struct config *conf, struct multipath *mp) +{ + int watch_checks, wait_checks; + char buff[12]; + + watch_checks = use_delay_watch_checks(conf, mp); + wait_checks = use_delay_wait_checks(conf, mp); + if (watch_checks <= 0 && wait_checks <= 0) + return 0; + if (san_path_check_options_set(mp)) { + condlog(3, "%s: both marginal_path and delay_checks error detection options selected", mp->alias); + condlog(3, "%s: ignoring delay_checks options", mp->alias); + return 0; + } + mp->san_path_err_threshold = 1; + condlog(3, "%s: san_path_err_threshold = 1 %s", mp->alias, + (watch_checks > 0)? delay_watch_origin : delay_wait_origin); + if (watch_checks > 0) { + mp->san_path_err_forget_rate = watch_checks; + print_off_int_undef(buff, 12, mp->san_path_err_forget_rate); + condlog(3, "%s: san_path_err_forget_rate = %s %s", mp->alias, + buff, delay_watch_origin); + } + if (wait_checks > 0) { + mp->san_path_err_recovery_time = wait_checks * + conf->max_checkint; + print_off_int_undef(buff, 12, mp->san_path_err_recovery_time); + condlog(3, "%s: san_path_err_recovery_time = %s %s", mp->alias, + buff, delay_wait_origin); + } + return 0; +} + +static int san_path_deprecated_warned; +#define warn_san_path_deprecated(v, x) \ + do { \ + if (v->x > 0 && !san_path_deprecated_warned) { \ + san_path_deprecated_warned = 1; \ + condlog(1, "WARNING: option %s is deprecated, " \ + "please use marginal_path options instead", \ + #x); \ + } \ + } while(0) + +int select_san_path_err_threshold(struct config *conf, struct multipath *mp) +{ + const char *origin; + char buff[12]; + + if (marginal_path_check_enabled(mp)) { + mp->san_path_err_threshold = NU_NO; + origin = marginal_path_origin; + goto out; + } + mp_set_mpe(san_path_err_threshold); + mp_set_ovr(san_path_err_threshold); + mp_set_hwe(san_path_err_threshold); + mp_set_conf(san_path_err_threshold); + mp_set_default(san_path_err_threshold, DEFAULT_ERR_CHECKS); +out: + if (print_off_int_undef(buff, 12, mp->san_path_err_threshold) != 0) + condlog(3, "%s: san_path_err_threshold = %s %s", + mp->alias, buff, origin); + warn_san_path_deprecated(mp, san_path_err_threshold); + return 0; +} + +int select_san_path_err_forget_rate(struct config *conf, struct multipath *mp) +{ + const char *origin; + char buff[12]; + + if (marginal_path_check_enabled(mp)) { + mp->san_path_err_forget_rate = NU_NO; + origin = marginal_path_origin; + goto out; + } + mp_set_mpe(san_path_err_forget_rate); + mp_set_ovr(san_path_err_forget_rate); + mp_set_hwe(san_path_err_forget_rate); + mp_set_conf(san_path_err_forget_rate); + mp_set_default(san_path_err_forget_rate, DEFAULT_ERR_CHECKS); +out: + if (print_off_int_undef(buff, 12, mp->san_path_err_forget_rate) != 0) + condlog(3, "%s: san_path_err_forget_rate = %s %s", mp->alias, + buff, origin); + warn_san_path_deprecated(mp, san_path_err_forget_rate); + return 0; + +} + +int select_san_path_err_recovery_time(struct config *conf, struct multipath *mp) +{ + const char *origin; + char buff[12]; + + if (marginal_path_check_enabled(mp)) { + mp->san_path_err_recovery_time = NU_NO; + origin = marginal_path_origin; + goto out; + } + mp_set_mpe(san_path_err_recovery_time); + mp_set_ovr(san_path_err_recovery_time); + mp_set_hwe(san_path_err_recovery_time); + mp_set_conf(san_path_err_recovery_time); + mp_set_default(san_path_err_recovery_time, DEFAULT_ERR_CHECKS); +out: + if (print_off_int_undef(buff, 12, mp->san_path_err_recovery_time) != 0) + condlog(3, "%s: san_path_err_recovery_time = %s %s", mp->alias, + buff, origin); + warn_san_path_deprecated(mp, san_path_err_recovery_time); + return 0; + +} + +int select_marginal_path_err_sample_time(struct config *conf, struct multipath *mp) +{ + const char *origin; + char buff[12]; + + mp_set_mpe(marginal_path_err_sample_time); + mp_set_ovr(marginal_path_err_sample_time); + mp_set_hwe(marginal_path_err_sample_time); + mp_set_conf(marginal_path_err_sample_time); + mp_set_default(marginal_path_err_sample_time, DEFAULT_ERR_CHECKS); +out: + if (print_off_int_undef(buff, 12, mp->marginal_path_err_sample_time) + != 0) + condlog(3, "%s: marginal_path_err_sample_time = %s %s", + mp->alias, buff, origin); + return 0; +} + +int select_marginal_path_err_rate_threshold(struct config *conf, struct multipath *mp) +{ + const char *origin; + char buff[12]; + + mp_set_mpe(marginal_path_err_rate_threshold); + mp_set_ovr(marginal_path_err_rate_threshold); + mp_set_hwe(marginal_path_err_rate_threshold); + mp_set_conf(marginal_path_err_rate_threshold); + mp_set_default(marginal_path_err_rate_threshold, DEFAULT_ERR_CHECKS); +out: + if (print_off_int_undef(buff, 12, mp->marginal_path_err_rate_threshold) + != 0) + condlog(3, "%s: marginal_path_err_rate_threshold = %s %s", + mp->alias, buff, origin); + return 0; +} + +int select_marginal_path_err_recheck_gap_time(struct config *conf, struct multipath *mp) +{ + const char *origin; + char buff[12]; + + mp_set_mpe(marginal_path_err_recheck_gap_time); + mp_set_ovr(marginal_path_err_recheck_gap_time); + mp_set_hwe(marginal_path_err_recheck_gap_time); + mp_set_conf(marginal_path_err_recheck_gap_time); + mp_set_default(marginal_path_err_recheck_gap_time, DEFAULT_ERR_CHECKS); +out: + if (print_off_int_undef(buff, 12, + mp->marginal_path_err_recheck_gap_time) != 0) + condlog(3, "%s: marginal_path_err_recheck_gap_time = %s %s", + mp->alias, buff, origin); + return 0; +} + +int select_marginal_path_double_failed_time(struct config *conf, struct multipath *mp) +{ + const char *origin; + char buff[12]; + + mp_set_mpe(marginal_path_double_failed_time); + mp_set_ovr(marginal_path_double_failed_time); + mp_set_hwe(marginal_path_double_failed_time); + mp_set_conf(marginal_path_double_failed_time); + mp_set_default(marginal_path_double_failed_time, DEFAULT_ERR_CHECKS); +out: + if (print_off_int_undef(buff, 12, mp->marginal_path_double_failed_time) + != 0) + condlog(3, "%s: marginal_path_double_failed_time = %s %s", + mp->alias, buff, origin); + return 0; +} + +int select_skip_kpartx (struct config *conf, struct multipath * mp) +{ + const char *origin; + + mp_set_mpe(skip_kpartx); + mp_set_ovr(skip_kpartx); + mp_set_hwe(skip_kpartx); + mp_set_conf(skip_kpartx); + mp_set_default(skip_kpartx, DEFAULT_SKIP_KPARTX); +out: + condlog(3, "%s: skip_kpartx = %s %s", mp->alias, + (mp->skip_kpartx == SKIP_KPARTX_ON)? "yes" : "no", + origin); + return 0; +} + +int select_max_sectors_kb(struct config *conf, struct multipath * mp) +{ + const char *origin; + + mp_set_mpe(max_sectors_kb); + mp_set_ovr(max_sectors_kb); + mp_set_hwe(max_sectors_kb); + mp_set_conf(max_sectors_kb); + mp_set_default(max_sectors_kb, DEFAULT_MAX_SECTORS_KB); + /* + * In the default case, we will not modify max_sectors_kb in sysfs + * (see sysfs_set_max_sectors_kb()). + * Don't print a log message here to avoid user confusion. + */ + return 0; +out: + condlog(3, "%s: max_sectors_kb = %i %s", mp->alias, mp->max_sectors_kb, + origin); + return 0; +} + +int select_ghost_delay (struct config *conf, struct multipath * mp) +{ + const char *origin; + char buff[12]; + + mp_set_mpe(ghost_delay); + mp_set_ovr(ghost_delay); + mp_set_hwe(ghost_delay); + mp_set_conf(ghost_delay); + mp_set_default(ghost_delay, DEFAULT_GHOST_DELAY); +out: + if (print_off_int_undef(buff, 12, mp->ghost_delay) != 0) + condlog(3, "%s: ghost_delay = %s %s", mp->alias, buff, origin); + return 0; +} + +int select_find_multipaths_timeout(struct config *conf, struct path *pp) +{ + const char *origin; + + pp_set_conf(find_multipaths_timeout); + pp_set_default(find_multipaths_timeout, + DEFAULT_FIND_MULTIPATHS_TIMEOUT); +out: + /* + * If configured value is negative, and this "unknown" hardware + * (no hwentry), use very small timeout to avoid delays. + */ + if (pp->find_multipaths_timeout < 0) { + pp->find_multipaths_timeout = -pp->find_multipaths_timeout; + if (!pp->hwe) { + pp->find_multipaths_timeout = + DEFAULT_UNKNOWN_FIND_MULTIPATHS_TIMEOUT; + origin = "(default for unknown hardware)"; + } + } + condlog(3, "%s: timeout for find_multipaths \"smart\" = %ds %s", + pp->dev, pp->find_multipaths_timeout, origin); + return 0; +} + +int select_all_tg_pt (struct config *conf, struct multipath * mp) +{ + const char *origin; + + mp_set_ovr(all_tg_pt); + mp_set_hwe(all_tg_pt); + mp_set_conf(all_tg_pt); + mp_set_default(all_tg_pt, DEFAULT_ALL_TG_PT); +out: + condlog(3, "%s: all_tg_pt = %s %s", mp->alias, + (mp->all_tg_pt == ALL_TG_PT_ON)? "yes" : "no", + origin); + return 0; +} + +int select_vpd_vendor_id (struct path *pp) +{ + const char *origin; + + pp_set_hwe(vpd_vendor_id); + pp_set_default(vpd_vendor_id, 0); +out: + if (pp->vpd_vendor_id < 0 || pp->vpd_vendor_id >= VPD_VP_ARRAY_SIZE) { + condlog(3, "%s: vpd_vendor_id = %d (invalid, setting to 0)", + pp->dev, pp->vpd_vendor_id); + pp->vpd_vendor_id = 0; + } + condlog(3, "%s: vpd_vendor_id = %d \"%s\" %s", pp->dev, + pp->vpd_vendor_id, vpd_vendor_pages[pp->vpd_vendor_id].name, + origin); + return 0; +} diff --git a/libmultipath/propsel.h b/libmultipath/propsel.h new file mode 100644 index 0000000..3d6edd8 --- /dev/null +++ b/libmultipath/propsel.h @@ -0,0 +1,40 @@ +int select_rr_weight (struct config *conf, struct multipath * mp); +int select_pgfailback (struct config *conf, struct multipath * mp); +int select_pgpolicy (struct config *conf, struct multipath * mp); +int select_selector (struct config *conf, struct multipath * mp); +int select_alias (struct config *conf, struct multipath * mp); +int select_features (struct config *conf, struct multipath * mp); +int select_hwhandler (struct config *conf, struct multipath * mp); +int select_checker(struct config *conf, struct path *pp); +int select_getuid (struct config *conf, struct path * pp); +int select_prio (struct config *conf, struct path * pp); +int select_find_multipaths_timeout(struct config *conf, struct path *pp); +int select_no_path_retry(struct config *conf, struct multipath *mp); +int select_flush_on_last_del(struct config *conf, struct multipath *mp); +int select_minio(struct config *conf, struct multipath *mp); +int select_mode(struct config *conf, struct multipath *mp); +int select_uid(struct config *conf, struct multipath *mp); +int select_gid(struct config *conf, struct multipath *mp); +int select_fast_io_fail(struct config *conf, struct multipath *mp); +int select_dev_loss(struct config *conf, struct multipath *mp); +int select_reservation_key(struct config *conf, struct multipath *mp); +int select_retain_hwhandler (struct config *conf, struct multipath * mp); +int select_detect_prio(struct config *conf, struct path * pp); +int select_detect_checker(struct config *conf, struct path * pp); +int select_deferred_remove(struct config *conf, struct multipath *mp); +int select_delay_checks(struct config *conf, struct multipath * mp); +int select_skip_kpartx (struct config *conf, struct multipath * mp); +int select_max_sectors_kb (struct config *conf, struct multipath * mp); +int select_san_path_err_forget_rate(struct config *conf, struct multipath *mp); +int select_san_path_err_threshold(struct config *conf, struct multipath *mp); +int select_san_path_err_recovery_time(struct config *conf, struct multipath *mp); +int select_marginal_path_err_sample_time(struct config *conf, struct multipath *mp); +int select_marginal_path_err_rate_threshold(struct config *conf, struct multipath *mp); +int select_marginal_path_err_recheck_gap_time(struct config *conf, struct multipath *mp); +int select_marginal_path_double_failed_time(struct config *conf, struct multipath *mp); +int select_ghost_delay(struct config *conf, struct multipath * mp); +void reconcile_features_with_options(const char *id, char **features, + int* no_path_retry, + int *retain_hwhandler); +int select_all_tg_pt (struct config *conf, struct multipath * mp); +int select_vpd_vendor_id (struct path *pp); diff --git a/libmultipath/sg_include.h b/libmultipath/sg_include.h new file mode 100644 index 0000000..750a13a --- /dev/null +++ b/libmultipath/sg_include.h @@ -0,0 +1,25 @@ +#define __user +#include + +#ifndef DID_OK +#define DID_OK 0x00 /* NO error */ +#define DID_NO_CONNECT 0x01 /* Couldn't connect before timeout period */ +#define DID_BUS_BUSY 0x02 /* BUS stayed busy through time out period */ +#define DID_TIME_OUT 0x03 /* TIMED OUT for other reason */ +#define DID_BAD_TARGET 0x04 /* BAD target. */ +#define DID_ABORT 0x05 /* Told to abort for some other reason */ +#define DID_PARITY 0x06 /* Parity error */ +#define DID_ERROR 0x07 /* Internal error */ +#define DID_RESET 0x08 /* Reset by somebody. */ +#define DID_BAD_INTR 0x09 /* Got an interrupt we weren't expecting. */ +#define DID_PASSTHROUGH 0x0a /* Force command past mid-layer */ +#define DID_SOFT_ERROR 0x0b /* The low level driver just wish a retry */ +#define DID_IMM_RETRY 0x0c /* Retry without decrementing retry count */ +#define DID_REQUEUE 0x0d /* Requeue command (no immediate retry) also + * without decrementing the retry count */ +#define DID_TRANSPORT_DISRUPTED 0x0e /* Transport error disrupted execution + * and the driver blocked the port to + * recover the link. Transport class will + * retry or fail IO */ +#define DID_TRANSPORT_FAILFAST 0x0f /* Transport class fastfailed the io */ +#endif diff --git a/libmultipath/structs.c b/libmultipath/structs.c new file mode 100644 index 0000000..2dd378c --- /dev/null +++ b/libmultipath/structs.c @@ -0,0 +1,704 @@ +/* + * Copyright (c) 2004, 2005 Christophe Varoqui + * Copyright (c) 2004 Stefan Bader, IBM + */ +#include +#include +#include +#include + +#include "checkers.h" +#include "memory.h" +#include "vector.h" +#include "util.h" +#include "structs.h" +#include "config.h" +#include "debug.h" +#include "structs_vec.h" +#include "blacklist.h" +#include "prio.h" +#include "prioritizers/alua_spc3.h" +#include "dm-generic.h" + +struct adapter_group * +alloc_adaptergroup(void) +{ + struct adapter_group *agp; + + agp = (struct adapter_group *)MALLOC(sizeof(struct adapter_group)); + + if (!agp) + return NULL; + + agp->host_groups = vector_alloc(); + if (!agp->host_groups) { + FREE(agp); + agp = NULL; + } + return agp; +} + +void free_adaptergroup(vector adapters) +{ + int i; + struct adapter_group *agp; + + vector_foreach_slot(adapters, agp, i) { + free_hostgroup(agp->host_groups); + FREE(agp); + } + vector_free(adapters); +} + +void free_hostgroup(vector hostgroups) +{ + int i; + struct host_group *hgp; + + if (!hostgroups) + return; + + vector_foreach_slot(hostgroups, hgp, i) { + vector_free(hgp->paths); + FREE(hgp); + } + vector_free(hostgroups); +} + +struct host_group * +alloc_hostgroup(void) +{ + struct host_group *hgp; + + hgp = (struct host_group *)MALLOC(sizeof(struct host_group)); + + if (!hgp) + return NULL; + + hgp->paths = vector_alloc(); + + if (!hgp->paths) { + FREE(hgp); + hgp = NULL; + } + return hgp; +} + +struct path * +alloc_path (void) +{ + struct path * pp; + + pp = (struct path *)MALLOC(sizeof(struct path)); + + if (pp) { + pp->sg_id.host_no = -1; + pp->sg_id.channel = -1; + pp->sg_id.scsi_id = -1; + pp->sg_id.lun = -1; + pp->sg_id.proto_id = SCSI_PROTOCOL_UNSPEC; + pp->fd = -1; + pp->tpgs = TPGS_UNDEF; + pp->priority = PRIO_UNDEF; + pp->checkint = CHECKINT_UNDEF; + checker_clear(&pp->checker); + dm_path_to_gen(pp)->ops = &dm_gen_path_ops; + pp->hwe = vector_alloc(); + if (pp->hwe == NULL) { + free(pp); + return NULL; + } + } + return pp; +} + +void +free_path (struct path * pp) +{ + if (!pp) + return; + + if (checker_selected(&pp->checker)) + checker_put(&pp->checker); + + if (prio_selected(&pp->prio)) + prio_put(&pp->prio); + + if (pp->fd >= 0) + close(pp->fd); + + if (pp->udev) { + udev_device_unref(pp->udev); + pp->udev = NULL; + } + if (pp->vpd_data) + free(pp->vpd_data); + + vector_free(pp->hwe); + + FREE(pp); +} + +void +free_pathvec (vector vec, enum free_path_mode free_paths) +{ + int i; + struct path * pp; + + if (!vec) + return; + + if (free_paths == FREE_PATHS) + vector_foreach_slot(vec, pp, i) + free_path(pp); + + vector_free(vec); +} + +struct pathgroup * +alloc_pathgroup (void) +{ + struct pathgroup * pgp; + + pgp = (struct pathgroup *)MALLOC(sizeof(struct pathgroup)); + + if (!pgp) + return NULL; + + pgp->paths = vector_alloc(); + + if (!pgp->paths) { + FREE(pgp); + return NULL; + } + + dm_pathgroup_to_gen(pgp)->ops = &dm_gen_pathgroup_ops; + return pgp; +} + +void +free_pathgroup (struct pathgroup * pgp, enum free_path_mode free_paths) +{ + if (!pgp) + return; + + free_pathvec(pgp->paths, free_paths); + FREE(pgp); +} + +void +free_pgvec (vector pgvec, enum free_path_mode free_paths) +{ + int i; + struct pathgroup * pgp; + + if (!pgvec) + return; + + vector_foreach_slot(pgvec, pgp, i) + free_pathgroup(pgp, free_paths); + + vector_free(pgvec); +} + +struct multipath * +alloc_multipath (void) +{ + struct multipath * mpp; + + mpp = (struct multipath *)MALLOC(sizeof(struct multipath)); + + if (mpp) { + mpp->bestpg = 1; + mpp->mpcontext = NULL; + mpp->no_path_retry = NO_PATH_RETRY_UNDEF; + mpp->fast_io_fail = MP_FAST_IO_FAIL_UNSET; + dm_multipath_to_gen(mpp)->ops = &dm_gen_multipath_ops; + } + return mpp; +} + +void free_multipath_attributes(struct multipath *mpp) +{ + if (!mpp) + return; + + if (mpp->selector) { + FREE(mpp->selector); + mpp->selector = NULL; + } + + if (mpp->features) { + FREE(mpp->features); + mpp->features = NULL; + } + + if (mpp->hwhandler) { + FREE(mpp->hwhandler); + mpp->hwhandler = NULL; + } +} + +void +free_multipath (struct multipath * mpp, enum free_path_mode free_paths) +{ + if (!mpp) + return; + + free_multipath_attributes(mpp); + + if (mpp->alias) { + FREE(mpp->alias); + mpp->alias = NULL; + } + + if (mpp->dmi) { + FREE(mpp->dmi); + mpp->dmi = NULL; + } + + free_pathvec(mpp->paths, free_paths); + free_pgvec(mpp->pg, free_paths); + FREE_PTR(mpp->mpcontext); + FREE(mpp); +} + +void +drop_multipath (vector mpvec, char * wwid, enum free_path_mode free_paths) +{ + int i; + struct multipath * mpp; + + if (!mpvec) + return; + + vector_foreach_slot (mpvec, mpp, i) { + if (!strncmp(mpp->wwid, wwid, WWID_SIZE)) { + free_multipath(mpp, free_paths); + vector_del_slot(mpvec, i); + return; + } + } +} + +void +free_multipathvec (vector mpvec, enum free_path_mode free_paths) +{ + int i; + struct multipath * mpp; + + if (!mpvec) + return; + + vector_foreach_slot (mpvec, mpp, i) + free_multipath(mpp, free_paths); + + vector_free(mpvec); +} + +int +store_path (vector pathvec, struct path * pp) +{ + int err = 0; + + if (!strlen(pp->dev_t)) { + condlog(2, "%s: Empty device number", pp->dev); + err++; + } + if (!strlen(pp->dev)) { + condlog(2, "%s: Empty device name", pp->dev_t); + err++; + } + + if (err > 1) + return 1; + + if (!vector_alloc_slot(pathvec)) + return 1; + + vector_set_slot(pathvec, pp); + + return 0; +} + +int add_pathgroup(struct multipath *mpp, struct pathgroup *pgp) +{ + if (!vector_alloc_slot(mpp->pg)) + return 1; + + vector_set_slot(mpp->pg, pgp); + + pgp->mpp = mpp; + return 0; +} + +int +store_hostgroup(vector hostgroupvec, struct host_group * hgp) +{ + if (!vector_alloc_slot(hostgroupvec)) + return 1; + + vector_set_slot(hostgroupvec, hgp); + return 0; +} + +int +store_adaptergroup(vector adapters, struct adapter_group * agp) +{ + if (!vector_alloc_slot(adapters)) + return 1; + + vector_set_slot(adapters, agp); + return 0; +} + +struct multipath * +find_mp_by_minor (const struct _vector *mpvec, unsigned int minor) +{ + int i; + struct multipath * mpp; + + if (!mpvec) + return NULL; + + vector_foreach_slot (mpvec, mpp, i) { + if (!mpp->dmi) + continue; + + if (mpp->dmi->minor == minor) + return mpp; + } + return NULL; +} + +struct multipath * +find_mp_by_wwid (const struct _vector *mpvec, const char * wwid) +{ + int i; + struct multipath * mpp; + + if (!mpvec) + return NULL; + + vector_foreach_slot (mpvec, mpp, i) + if (!strncmp(mpp->wwid, wwid, WWID_SIZE)) + return mpp; + + return NULL; +} + +struct multipath * +find_mp_by_alias (const struct _vector *mpvec, const char * alias) +{ + int i; + size_t len; + struct multipath * mpp; + + if (!mpvec) + return NULL; + + len = strlen(alias); + + if (!len) + return NULL; + + vector_foreach_slot (mpvec, mpp, i) { + if (strlen(mpp->alias) == len && + !strncmp(mpp->alias, alias, len)) + return mpp; + } + return NULL; +} + +struct multipath * +find_mp_by_str (const struct _vector *mpvec, const char * str) +{ + int minor; + + if (sscanf(str, "dm-%d", &minor) == 1) + return find_mp_by_minor(mpvec, minor); + else + return find_mp_by_alias(mpvec, str); +} + +struct path * +find_path_by_dev (const struct _vector *pathvec, const char * dev) +{ + int i; + struct path * pp; + + if (!pathvec) + return NULL; + + vector_foreach_slot (pathvec, pp, i) + if (!strcmp(pp->dev, dev)) + return pp; + + condlog(4, "%s: dev not found in pathvec", dev); + return NULL; +} + +struct path * +find_path_by_devt (const struct _vector *pathvec, const char * dev_t) +{ + int i; + struct path * pp; + + if (!pathvec) + return NULL; + + vector_foreach_slot (pathvec, pp, i) + if (!strcmp(pp->dev_t, dev_t)) + return pp; + + condlog(4, "%s: dev_t not found in pathvec", dev_t); + return NULL; +} + +int pathcountgr(const struct pathgroup *pgp, int state) +{ + struct path *pp; + int count = 0; + int i; + + vector_foreach_slot (pgp->paths, pp, i) + if ((pp->state == state) || (state == PATH_WILD)) + count++; + + return count; +} + +int pathcount(const struct multipath *mpp, int state) +{ + struct pathgroup *pgp; + int count = 0; + int i; + + if (mpp->pg) { + vector_foreach_slot (mpp->pg, pgp, i) + count += pathcountgr(pgp, state); + } + return count; +} + +int count_active_paths(const struct multipath *mpp) +{ + struct pathgroup *pgp; + struct path *pp; + int count = 0; + int i, j; + + if (!mpp->pg) + return 0; + + vector_foreach_slot (mpp->pg, pgp, i) { + vector_foreach_slot (pgp->paths, pp, j) { + if (pp->state == PATH_UP || pp->state == PATH_GHOST) + count++; + } + } + return count; +} + +int pathcmp(const struct pathgroup *pgp, const struct pathgroup *cpgp) +{ + int i, j; + struct path *pp, *cpp; + int pnum = 0, found = 0; + + vector_foreach_slot(pgp->paths, pp, i) { + pnum++; + vector_foreach_slot(cpgp->paths, cpp, j) { + if ((long)pp == (long)cpp) { + found++; + break; + } + } + } + + return pnum - found; +} + +struct path * +first_path (const struct multipath * mpp) +{ + struct pathgroup * pgp; + if (!mpp->pg) + return NULL; + pgp = VECTOR_SLOT(mpp->pg, 0); + + return pgp?VECTOR_SLOT(pgp->paths, 0):NULL; +} + +int add_feature(char **f, const char *n) +{ + int c = 0, d, l; + char *e, *t; + + if (!f) + return 1; + + /* Nothing to do */ + if (!n || *n == '0') + return 0; + + if (strchr(n, ' ') != NULL) { + condlog(0, "internal error: feature \"%s\" contains spaces", n); + return 1; + } + + /* default feature is null */ + if(!*f) + { + l = asprintf(&t, "1 %s", n); + if(l == -1) + return 1; + + *f = t; + return 0; + } + + /* Check if feature is already present */ + if (strstr(*f, n)) + return 0; + + /* Get feature count */ + c = strtoul(*f, &e, 10); + if (*f == e || (*e != ' ' && *e != '\0')) { + condlog(0, "parse error in feature string \"%s\"", *f); + return 1; + } + + /* Add 1 digit and 1 space */ + l = strlen(e) + strlen(n) + 2; + + c++; + /* Check if we need more digits for feature count */ + for (d = c; d >= 10; d /= 10) + l++; + + t = MALLOC(l + 1); + if (!t) + return 1; + + /* e: old feature string with leading space, or "" */ + if (*e == ' ') + while (*(e + 1) == ' ') + e++; + + snprintf(t, l + 1, "%0d%s %s", c, e, n); + + FREE(*f); + *f = t; + + return 0; +} + +int remove_feature(char **f, const char *o) +{ + int c = 0, d, l; + char *e, *p, *n; + const char *q; + + if (!f || !*f) + return 1; + + /* Nothing to do */ + if (!o || *o == '\0') + return 0; + + /* Check if not present */ + if (!strstr(*f, o)) + return 0; + + /* Get feature count */ + c = strtoul(*f, &e, 10); + if (*f == e) + /* parse error */ + return 1; + + /* Normalize features */ + while (*o == ' ') { + o++; + } + /* Just spaces, return */ + if (*o == '\0') + return 0; + q = o + strlen(o); + while (*q == ' ') + q--; + d = (int)(q - o); + + /* Update feature count */ + c--; + q = o; + while (q[0] != '\0') { + if (q[0] == ' ' && q[1] != ' ' && q[1] != '\0') + c--; + q++; + } + + /* Quick exit if all features have been removed */ + if (c == 0) { + n = MALLOC(2); + if (!n) + return 1; + strcpy(n, "0"); + goto out; + } + + /* Search feature to be removed */ + e = strstr(*f, o); + if (!e) + /* Not found, return */ + return 0; + + /* Update feature count space */ + l = strlen(*f) - d; + n = MALLOC(l + 1); + if (!n) + return 1; + + /* Copy the feature count */ + sprintf(n, "%0d", c); + /* + * Copy existing features up to the feature + * about to be removed + */ + p = strchr(*f, ' '); + if (!p) { + /* Internal error, feature string inconsistent */ + FREE(n); + return 1; + } + while (*p == ' ') + p++; + p--; + if (e != p) { + do { + e--; + d++; + } while (*e == ' '); + e++; d--; + strncat(n, p, (size_t)(e - p)); + p += (size_t)(e - p); + } + /* Skip feature to be removed */ + p += d; + + /* Copy remaining features */ + if (strlen(p)) { + while (*p == ' ') + p++; + if (strlen(p)) { + p--; + strcat(n, p); + } + } + +out: + FREE(*f); + *f = n; + + return 0; +} diff --git a/libmultipath/structs.h b/libmultipath/structs.h new file mode 100644 index 0000000..9bd39eb --- /dev/null +++ b/libmultipath/structs.h @@ -0,0 +1,474 @@ +#ifndef _STRUCTS_H +#define _STRUCTS_H + +#include +#include +#include + +#include "prio.h" +#include "byteorder.h" +#include "generic.h" + +#define WWID_SIZE 128 +#define SERIAL_SIZE 128 +#define NODE_NAME_SIZE 224 +#define PATH_STR_SIZE 16 +#define PARAMS_SIZE 4096 +#define FILE_NAME_SIZE 256 +#define CALLOUT_MAX_SIZE 256 +#define BLK_DEV_SIZE 33 +#define PATH_SIZE 512 +#define NAME_SIZE 512 +#define HOST_NAME_LEN 16 +#define SLOT_NAME_SIZE 40 +#define PRKEY_SIZE 19 +#define VPD_DATA_SIZE 128 + +#define SCSI_VENDOR_SIZE 9 +#define SCSI_PRODUCT_SIZE 17 +#define SCSI_STATE_SIZE 19 +#define NVME_MODEL_SIZE 41 +#define NVME_REV_SIZE 9 + +/* This must be the maximum of SCSI and NVME sizes */ +#define PATH_PRODUCT_SIZE NVME_MODEL_SIZE +#define PATH_REV_SIZE NVME_REV_SIZE + +#define NO_PATH_RETRY_UNDEF 0 +#define NO_PATH_RETRY_FAIL -1 +#define NO_PATH_RETRY_QUEUE -2 + + +enum free_path_mode { + KEEP_PATHS, + FREE_PATHS +}; + +enum rr_weight_mode { + RR_WEIGHT_UNDEF, + RR_WEIGHT_NONE, + RR_WEIGHT_PRIO +}; + +enum failback_mode { + FAILBACK_UNDEF, + FAILBACK_MANUAL, + FAILBACK_IMMEDIATE, + FAILBACK_FOLLOWOVER +}; + +enum sysfs_buses { + SYSFS_BUS_UNDEF, + SYSFS_BUS_SCSI, + SYSFS_BUS_CCW, + SYSFS_BUS_CCISS, + SYSFS_BUS_NVME, +}; + +enum pathstates { + PSTATE_UNDEF, + PSTATE_FAILED, + PSTATE_ACTIVE +}; + +enum pgstates { + PGSTATE_UNDEF, + PGSTATE_ENABLED, + PGSTATE_DISABLED, + PGSTATE_ACTIVE +}; + +enum yes_no_states { + YN_NO, + YN_YES, +}; + +enum queue_without_daemon_states { + QUE_NO_DAEMON_OFF = YN_NO, + QUE_NO_DAEMON_ON = YN_YES, + QUE_NO_DAEMON_FORCE, +}; + +enum attribute_bits { + ATTR_UID, + ATTR_GID, + ATTR_MODE, +}; + +enum yes_no_undef_states { + YNU_UNDEF, + YNU_NO, + YNU_YES, +}; + +#define _FIND_MULTIPATHS_F (1 << 1) +#define _FIND_MULTIPATHS_I (1 << 2) +#define _FIND_MULTIPATHS_N (1 << 3) +/* + * _FIND_MULTIPATHS_F must have the same value as YNU_YES. + * Generate a compile time error if that isn't the case. + */ +extern char ___error1___[-(_FIND_MULTIPATHS_F != YNU_YES)]; + +#define find_multipaths_on(conf) \ + (!!((conf)->find_multipaths & _FIND_MULTIPATHS_F)) +#define ignore_wwids_on(conf) \ + (!!((conf)->find_multipaths & _FIND_MULTIPATHS_I)) +#define ignore_new_devs_on(conf) \ + (!!((conf)->find_multipaths & _FIND_MULTIPATHS_N)) + +enum find_multipaths_states { + FIND_MULTIPATHS_UNDEF = YNU_UNDEF, + FIND_MULTIPATHS_OFF = YNU_NO, + FIND_MULTIPATHS_ON = _FIND_MULTIPATHS_F, + FIND_MULTIPATHS_GREEDY = _FIND_MULTIPATHS_I, + FIND_MULTIPATHS_SMART = _FIND_MULTIPATHS_F|_FIND_MULTIPATHS_I, + FIND_MULTIPATHS_STRICT = _FIND_MULTIPATHS_F|_FIND_MULTIPATHS_N, + __FIND_MULTIPATHS_LAST, +}; + +enum flush_states { + FLUSH_UNDEF = YNU_UNDEF, + FLUSH_DISABLED = YNU_NO, + FLUSH_ENABLED = YNU_YES, +}; + +enum log_checker_err_states { + LOG_CHKR_ERR_ALWAYS, + LOG_CHKR_ERR_ONCE, +}; + +enum user_friendly_names_states { + USER_FRIENDLY_NAMES_UNDEF = YNU_UNDEF, + USER_FRIENDLY_NAMES_OFF = YNU_NO, + USER_FRIENDLY_NAMES_ON = YNU_YES, +}; + +enum retain_hwhandler_states { + RETAIN_HWHANDLER_UNDEF = YNU_UNDEF, + RETAIN_HWHANDLER_OFF = YNU_NO, + RETAIN_HWHANDLER_ON = YNU_YES, +}; + +enum detect_prio_states { + DETECT_PRIO_UNDEF = YNU_UNDEF, + DETECT_PRIO_OFF = YNU_NO, + DETECT_PRIO_ON = YNU_YES, +}; + +enum detect_checker_states { + DETECT_CHECKER_UNDEF = YNU_UNDEF, + DETECT_CHECKER_OFF = YNU_NO, + DETECT_CHECKER_ON = YNU_YES, +}; + +enum deferred_remove_states { + DEFERRED_REMOVE_UNDEF = YNU_UNDEF, + DEFERRED_REMOVE_OFF = YNU_NO, + DEFERRED_REMOVE_ON = YNU_YES, + DEFERRED_REMOVE_IN_PROGRESS, +}; + +enum skip_kpartx_states { + SKIP_KPARTX_UNDEF = YNU_UNDEF, + SKIP_KPARTX_OFF = YNU_NO, + SKIP_KPARTX_ON = YNU_YES, +}; + +enum max_sectors_kb_states { + MAX_SECTORS_KB_UNDEF = 0, + MAX_SECTORS_KB_MIN = 4, /* can't be smaller than page size */ +}; + +enum scsi_protocol { + SCSI_PROTOCOL_FCP = 0, /* Fibre Channel */ + SCSI_PROTOCOL_SPI = 1, /* parallel SCSI */ + SCSI_PROTOCOL_SSA = 2, /* Serial Storage Architecture - Obsolete */ + SCSI_PROTOCOL_SBP = 3, /* firewire */ + SCSI_PROTOCOL_SRP = 4, /* Infiniband RDMA */ + SCSI_PROTOCOL_ISCSI = 5, + SCSI_PROTOCOL_SAS = 6, + SCSI_PROTOCOL_ADT = 7, /* Media Changers */ + SCSI_PROTOCOL_ATA = 8, + SCSI_PROTOCOL_UNSPEC = 0xf, /* No specific protocol */ +}; + +enum no_undef_states { + NU_NO = -1, + NU_UNDEF = 0, +}; + +enum ghost_delay_states { + GHOST_DELAY_OFF = NU_NO, + GHOST_DELAY_UNDEF = NU_UNDEF, +}; + +enum initialized_states { + INIT_NEW, + INIT_FAILED, + INIT_MISSING_UDEV, + INIT_REQUESTED_UDEV, + INIT_OK, +}; + +enum prkey_sources { + PRKEY_SOURCE_NONE, + PRKEY_SOURCE_CONF, + PRKEY_SOURCE_FILE, +}; + +enum all_tg_pt_states { + ALL_TG_PT_UNDEF = YNU_UNDEF, + ALL_TG_PT_OFF = YNU_NO, + ALL_TG_PT_ON = YNU_YES, +}; + +enum vpd_vendor_ids { + VPD_VP_UNDEF, + VPD_VP_HP3PAR, + VPD_VP_ARRAY_SIZE, /* This must remain the last entry */ +}; + +struct vpd_vendor_page { + int pg; + const char *name; +}; +extern struct vpd_vendor_page vpd_vendor_pages[VPD_VP_ARRAY_SIZE]; + +struct sg_id { + int host_no; + int channel; + int scsi_id; + int lun; + short h_cmd_per_lun; + short d_queue_depth; + enum scsi_protocol proto_id; + int transport_id; +}; + +# ifndef HDIO_GETGEO +# define HDIO_GETGEO 0x0301 /* get device geometry */ + +struct hd_geometry { + unsigned char heads; + unsigned char sectors; + unsigned short cylinders; + unsigned long start; +}; +#endif + +struct path { + char dev[FILE_NAME_SIZE]; + char dev_t[BLK_DEV_SIZE]; + struct udev_device *udev; + struct sg_id sg_id; + struct hd_geometry geom; + char wwid[WWID_SIZE]; + char vendor_id[SCSI_VENDOR_SIZE]; + char product_id[PATH_PRODUCT_SIZE]; + char rev[PATH_REV_SIZE]; + char serial[SERIAL_SIZE]; + char tgt_node_name[NODE_NAME_SIZE]; + char *vpd_data; + unsigned long long size; + unsigned int checkint; + unsigned int tick; + int bus; + int offline; + int state; + int dmstate; + int chkrstate; + int failcount; + int priority; + int pgindex; + int detect_prio; + int detect_checker; + int tpgs; + char * uid_attribute; + char * getuid; + struct prio prio; + struct checker checker; + struct multipath * mpp; + int fd; + int initialized; + int retriggers; + unsigned int path_failures; + time_t dis_reinstate_time; + int disable_reinstate; + int san_path_err_forget_rate; + time_t io_err_dis_reinstate_time; + int io_err_disable_reinstate; + int io_err_pathfail_cnt; + int io_err_pathfail_starttime; + int find_multipaths_timeout; + int marginal; + int vpd_vendor_id; + /* configlet pointers */ + vector hwe; + struct gen_path generic_path; +}; + +typedef int (pgpolicyfn) (struct multipath *, vector); + +struct multipath { + char wwid[WWID_SIZE]; + char alias_old[WWID_SIZE]; + int pgpolicy; + pgpolicyfn *pgpolicyfn; + int nextpg; + int bestpg; + int queuedio; + int action; + int wait_for_udev; + int uev_wait_tick; + int pgfailback; + int failback_tick; + int rr_weight; + int no_path_retry; /* number of retries after all paths are down */ + int retry_tick; /* remaining times for retries */ + int disable_queueing; + int minio; + int flush_on_last_del; + int attribute_flags; + int fast_io_fail; + int retain_hwhandler; + int deferred_remove; + bool in_recovery; + int san_path_err_threshold; + int san_path_err_forget_rate; + int san_path_err_recovery_time; + int marginal_path_err_sample_time; + int marginal_path_err_rate_threshold; + int marginal_path_err_recheck_gap_time; + int marginal_path_double_failed_time; + int skip_kpartx; + int max_sectors_kb; + int force_readonly; + int force_udev_reload; + int needs_paths_uevent; + int ghost_delay; + int ghost_delay_tick; + unsigned int dev_loss; + uid_t uid; + gid_t gid; + mode_t mode; + unsigned long long size; + vector paths; + vector pg; + struct dm_info * dmi; + + /* configlet pointers */ + char * alias; + char * alias_prefix; + char * selector; + char * features; + char * hwhandler; + struct mpentry * mpe; + vector hwe; + + /* threads */ + pthread_t waiter; + + /* stats */ + unsigned int stat_switchgroup; + unsigned int stat_path_failures; + unsigned int stat_map_loads; + unsigned int stat_total_queueing_time; + unsigned int stat_queueing_timeouts; + unsigned int stat_map_failures; + + /* checkers shared data */ + void * mpcontext; + + /* persistent management data*/ + int prkey_source; + struct be64 reservation_key; + uint8_t sa_flags; + unsigned char prflag; + int all_tg_pt; + struct gen_multipath generic_mp; +}; + +static inline int marginal_path_check_enabled(const struct multipath *mpp) +{ + return mpp->marginal_path_double_failed_time > 0 && + mpp->marginal_path_err_sample_time > 0 && + mpp->marginal_path_err_recheck_gap_time > 0 && + mpp->marginal_path_err_rate_threshold >= 0; +} + +static inline int san_path_check_enabled(const struct multipath *mpp) +{ + return mpp->san_path_err_threshold > 0 && + mpp->san_path_err_forget_rate > 0 && + mpp->san_path_err_recovery_time > 0; +} + +struct pathgroup { + long id; + int status; + int priority; + int enabled_paths; + int marginal; + vector paths; + struct multipath *mpp; + struct gen_pathgroup generic_pg; +}; + +struct adapter_group { + char adapter_name[SLOT_NAME_SIZE]; + struct pathgroup *pgp; + int num_hosts; + vector host_groups; + int next_host_index; +}; + +struct host_group { + int host_no; + int num_paths; + vector paths; +}; + +struct path * alloc_path (void); +struct pathgroup * alloc_pathgroup (void); +struct multipath * alloc_multipath (void); +void free_path (struct path *); +void free_pathvec (vector vec, enum free_path_mode free_paths); +void free_pathgroup (struct pathgroup * pgp, enum free_path_mode free_paths); +void free_pgvec (vector pgvec, enum free_path_mode free_paths); +void free_multipath (struct multipath *, enum free_path_mode free_paths); +void free_multipath_attributes (struct multipath *); +void drop_multipath (vector mpvec, char * wwid, enum free_path_mode free_paths); +void free_multipathvec (vector mpvec, enum free_path_mode free_paths); + +struct adapter_group * alloc_adaptergroup(void); +struct host_group * alloc_hostgroup(void); +void free_adaptergroup(vector adapters); +void free_hostgroup(vector hostgroups); + +int store_adaptergroup(vector adapters, struct adapter_group *agp); +int store_hostgroup(vector hostgroupvec, struct host_group *hgp); + +int store_path (vector pathvec, struct path * pp); +int add_pathgroup(struct multipath*, struct pathgroup *); + +struct multipath * find_mp_by_alias (const struct _vector *mp, const char *alias); +struct multipath * find_mp_by_wwid (const struct _vector *mp, const char *wwid); +struct multipath * find_mp_by_str (const struct _vector *mp, const char *wwid); +struct multipath * find_mp_by_minor (const struct _vector *mp, + unsigned int minor); + +struct path * find_path_by_devt (const struct _vector *pathvec, const char *devt); +struct path * find_path_by_dev (const struct _vector *pathvec, const char *dev); +struct path * first_path (const struct multipath *mpp); + +int pathcountgr (const struct pathgroup *, int); +int pathcount (const struct multipath *, int); +int count_active_paths(const struct multipath *); +int pathcmp (const struct pathgroup *, const struct pathgroup *); +int add_feature (char **, const char *); +int remove_feature (char **, const char *); + +extern char sysfs_path[PATH_SIZE]; + +#endif /* _STRUCTS_H */ diff --git a/libmultipath/structs_vec.c b/libmultipath/structs_vec.c new file mode 100644 index 0000000..3dbbaa0 --- /dev/null +++ b/libmultipath/structs_vec.c @@ -0,0 +1,550 @@ +#include +#include +#include + +#include "util.h" +#include "checkers.h" +#include "vector.h" +#include "defaults.h" +#include "debug.h" +#include "config.h" +#include "structs.h" +#include "structs_vec.h" +#include "sysfs.h" +#include "devmapper.h" +#include "dmparser.h" +#include "propsel.h" +#include "discovery.h" +#include "prio.h" +#include "configure.h" +#include "libdevmapper.h" +#include "io_err_stat.h" +#include "switchgroup.h" + +/* + * creates or updates mpp->paths reading mpp->pg + */ +int update_mpp_paths(struct multipath *mpp, vector pathvec) +{ + struct pathgroup * pgp; + struct path * pp; + int i,j; + + if (!mpp || !mpp->pg) + return 0; + + if (!mpp->paths && + !(mpp->paths = vector_alloc())) + return 1; + + vector_foreach_slot (mpp->pg, pgp, i) { + vector_foreach_slot (pgp->paths, pp, j) { + if (!find_path_by_devt(mpp->paths, pp->dev_t) && + (find_path_by_devt(pathvec, pp->dev_t)) && + store_path(mpp->paths, pp)) + return 1; + } + } + return 0; +} + +int adopt_paths(vector pathvec, struct multipath *mpp) +{ + int i, ret; + struct path * pp; + struct config *conf; + + if (!mpp) + return 0; + + if (update_mpp_paths(mpp, pathvec)) + return 1; + + vector_foreach_slot (pathvec, pp, i) { + if (!strncmp(mpp->wwid, pp->wwid, WWID_SIZE)) { + if (pp->size != 0 && mpp->size != 0 && + pp->size != mpp->size) { + condlog(3, "%s: size mismatch for %s, not adding path", + pp->dev, mpp->alias); + continue; + } + condlog(3, "%s: ownership set to %s", + pp->dev, mpp->alias); + pp->mpp = mpp; + + if (!mpp->paths && !(mpp->paths = vector_alloc())) + return 1; + + if (!find_path_by_dev(mpp->paths, pp->dev) && + store_path(mpp->paths, pp)) + return 1; + conf = get_multipath_config(); + pthread_cleanup_push(put_multipath_config, conf); + ret = pathinfo(pp, conf, + DI_PRIO | DI_CHECKER); + pthread_cleanup_pop(1); + if (ret) + return 1; + } + } + return 0; +} + +void orphan_path(struct path *pp, const char *reason) +{ + condlog(3, "%s: orphan path, %s", pp->dev, reason); + pp->mpp = NULL; + pp->dmstate = PSTATE_UNDEF; + pp->uid_attribute = NULL; + pp->getuid = NULL; + prio_put(&pp->prio); + checker_put(&pp->checker); + if (pp->fd >= 0) + close(pp->fd); + pp->fd = -1; +} + +void orphan_paths(vector pathvec, struct multipath *mpp, const char *reason) +{ + int i; + struct path * pp; + + vector_foreach_slot (pathvec, pp, i) { + if (pp->mpp == mpp) { + orphan_path(pp, reason); + } + } +} + +void +remove_map(struct multipath * mpp, struct vectors * vecs, int purge_vec) +{ + int i; + + /* + * clear references to this map + */ + orphan_paths(vecs->pathvec, mpp, "map removed internally"); + + if (purge_vec && + (i = find_slot(vecs->mpvec, (void *)mpp)) != -1) + vector_del_slot(vecs->mpvec, i); + + /* + * final free + */ + free_multipath(mpp, KEEP_PATHS); +} + +void +remove_map_by_alias(const char *alias, struct vectors * vecs, int purge_vec) +{ + struct multipath * mpp = find_mp_by_alias(vecs->mpvec, alias); + if (mpp) { + condlog(2, "%s: removing map by alias", alias); + remove_map(mpp, vecs, purge_vec); + } +} + +void +remove_maps(struct vectors * vecs) +{ + int i; + struct multipath * mpp; + + if (!vecs) + return; + + vector_foreach_slot (vecs->mpvec, mpp, i) { + remove_map(mpp, vecs, 1); + i--; + } + + vector_free(vecs->mpvec); + vecs->mpvec = NULL; +} + +void +extract_hwe_from_path(struct multipath * mpp) +{ + struct path * pp = NULL; + int i; + + if (mpp->hwe || !mpp->paths) + return; + + condlog(3, "%s: searching paths for valid hwe", mpp->alias); + /* doing this in two passes seems like paranoia to me */ + vector_foreach_slot(mpp->paths, pp, i) { + if (pp->state != PATH_UP) + continue; + if (pp->hwe) { + mpp->hwe = pp->hwe; + return; + } + } + vector_foreach_slot(mpp->paths, pp, i) { + if (pp->state == PATH_UP) + continue; + if (pp->hwe) { + mpp->hwe = pp->hwe; + return; + } + } +} + +int +update_multipath_table (struct multipath *mpp, vector pathvec, int is_daemon) +{ + char params[PARAMS_SIZE] = {0}; + + if (!mpp) + return 1; + + if (dm_get_map(mpp->alias, &mpp->size, params)) { + condlog(3, "%s: cannot get map", mpp->alias); + return 1; + } + + if (disassemble_map(pathvec, params, mpp, is_daemon)) { + condlog(3, "%s: cannot disassemble map", mpp->alias); + return 1; + } + + return 0; +} + +int +update_multipath_status (struct multipath *mpp) +{ + char status[PARAMS_SIZE] = {0}; + + if (!mpp) + return 1; + + if (dm_get_status(mpp->alias, status)) { + condlog(3, "%s: cannot get status", mpp->alias); + return 1; + } + + if (disassemble_status(status, mpp)) { + condlog(3, "%s: cannot disassemble status", mpp->alias); + return 1; + } + + return 0; +} + +void sync_paths(struct multipath *mpp, vector pathvec) +{ + struct path *pp; + struct pathgroup *pgp; + int found, i, j; + + vector_foreach_slot (mpp->paths, pp, i) { + found = 0; + vector_foreach_slot(mpp->pg, pgp, j) { + if (find_slot(pgp->paths, (void *)pp) != -1) { + found = 1; + break; + } + } + if (!found) { + condlog(3, "%s dropped path %s", mpp->alias, pp->dev); + vector_del_slot(mpp->paths, i--); + orphan_path(pp, "path removed externally"); + } + } + update_mpp_paths(mpp, pathvec); + vector_foreach_slot (mpp->paths, pp, i) + pp->mpp = mpp; +} + +int +update_multipath_strings(struct multipath *mpp, vector pathvec, int is_daemon) +{ + struct pathgroup *pgp; + int i; + + if (!mpp) + return 1; + + update_mpp_paths(mpp, pathvec); + condlog(4, "%s: %s", mpp->alias, __FUNCTION__); + + free_multipath_attributes(mpp); + free_pgvec(mpp->pg, KEEP_PATHS); + mpp->pg = NULL; + + if (update_multipath_table(mpp, pathvec, is_daemon)) + return 1; + sync_paths(mpp, pathvec); + + if (update_multipath_status(mpp)) + return 1; + + vector_foreach_slot(mpp->pg, pgp, i) + if (pgp->paths) + path_group_prio_update(pgp); + + return 0; +} + +static void enter_recovery_mode(struct multipath *mpp) +{ + unsigned int checkint; + struct config *conf; + + if (mpp->in_recovery || mpp->no_path_retry <= 0) + return; + + conf = get_multipath_config(); + checkint = conf->checkint; + put_multipath_config(conf); + + /* + * Enter retry mode. + * meaning of +1: retry_tick may be decremented in checkerloop before + * starting retry. + */ + mpp->in_recovery = true; + mpp->stat_queueing_timeouts++; + mpp->retry_tick = mpp->no_path_retry * checkint + 1; + condlog(1, "%s: Entering recovery mode: max_retries=%d", + mpp->alias, mpp->no_path_retry); +} + +static void leave_recovery_mode(struct multipath *mpp) +{ + bool recovery = mpp->in_recovery; + + mpp->in_recovery = false; + mpp->retry_tick = 0; + + /* + * in_recovery is only ever set if mpp->no_path_retry > 0 + * (see enter_recovery_mode()). But no_path_retry may have been + * changed while the map was recovering, so test it here again. + */ + if (recovery && (mpp->no_path_retry == NO_PATH_RETRY_QUEUE || + mpp->no_path_retry > 0)) { + dm_queue_if_no_path(mpp->alias, 1); + condlog(2, "%s: queue_if_no_path enabled", mpp->alias); + condlog(1, "%s: Recovered to normal mode", mpp->alias); + } +} + +void __set_no_path_retry(struct multipath *mpp, bool check_features) +{ + bool is_queueing; + + check_features = check_features && mpp->features != NULL; + if (check_features) + is_queueing = strstr(mpp->features, "queue_if_no_path"); + + switch (mpp->no_path_retry) { + case NO_PATH_RETRY_UNDEF: + break; + case NO_PATH_RETRY_FAIL: + if (!check_features || is_queueing) + dm_queue_if_no_path(mpp->alias, 0); + break; + case NO_PATH_RETRY_QUEUE: + if (!check_features || !is_queueing) + dm_queue_if_no_path(mpp->alias, 1); + break; + default: + if (count_active_paths(mpp) > 0) { + /* + * If in_recovery is set, leave_recovery_mode() takes + * care of dm_queue_if_no_path. Otherwise, do it here. + */ + if ((!check_features || !is_queueing) && + !mpp->in_recovery) + dm_queue_if_no_path(mpp->alias, 1); + leave_recovery_mode(mpp); + } else + enter_recovery_mode(mpp); + break; + } +} + +void +sync_map_state(struct multipath *mpp) +{ + struct pathgroup *pgp; + struct path *pp; + unsigned int i, j; + + if (!mpp->pg) + return; + + vector_foreach_slot (mpp->pg, pgp, i){ + vector_foreach_slot (pgp->paths, pp, j){ + if (pp->state == PATH_UNCHECKED || + pp->state == PATH_WILD || + pp->state == PATH_DELAYED) + continue; + if (mpp->ghost_delay_tick > 0) + continue; + if ((pp->dmstate == PSTATE_FAILED || + pp->dmstate == PSTATE_UNDEF) && + (pp->state == PATH_UP || pp->state == PATH_GHOST)) + dm_reinstate_path(mpp->alias, pp->dev_t); + else if ((pp->dmstate == PSTATE_ACTIVE || + pp->dmstate == PSTATE_UNDEF) && + (pp->state == PATH_DOWN || + pp->state == PATH_SHAKY)) { + condlog(2, "sync_map_state: failing %s state %d dmstate %d", + pp->dev, pp->state, pp->dmstate); + dm_fail_path(mpp->alias, pp->dev_t); + } + } + } +} + +static void +find_existing_alias (struct multipath * mpp, + struct vectors *vecs) +{ + struct multipath * mp; + int i; + + vector_foreach_slot (vecs->mpvec, mp, i) + if (strncmp(mp->wwid, mpp->wwid, WWID_SIZE - 1) == 0) { + strlcpy(mpp->alias_old, mp->alias, WWID_SIZE); + return; + } +} + +struct multipath *add_map_with_path(struct vectors *vecs, struct path *pp, + int add_vec) +{ + struct multipath * mpp; + struct config *conf = NULL; + + if (!strlen(pp->wwid)) + return NULL; + + if (!(mpp = alloc_multipath())) + return NULL; + + conf = get_multipath_config(); + mpp->mpe = find_mpe(conf->mptable, pp->wwid); + mpp->hwe = pp->hwe; + put_multipath_config(conf); + + strcpy(mpp->wwid, pp->wwid); + find_existing_alias(mpp, vecs); + if (select_alias(conf, mpp)) + goto out; + mpp->size = pp->size; + + if (adopt_paths(vecs->pathvec, mpp)) + goto out; + + if (add_vec) { + if (!vector_alloc_slot(vecs->mpvec)) + goto out; + + vector_set_slot(vecs->mpvec, mpp); + } + + return mpp; + +out: + remove_map(mpp, vecs, PURGE_VEC); + return NULL; +} + +int verify_paths(struct multipath *mpp, struct vectors *vecs) +{ + struct path * pp; + int count = 0; + int i, j; + + if (!mpp) + return 0; + + vector_foreach_slot (mpp->paths, pp, i) { + /* + * see if path is in sysfs + */ + if (sysfs_attr_get_value(pp->udev, "dev", + pp->dev_t, BLK_DEV_SIZE) < 0) { + if (pp->state != PATH_DOWN) { + condlog(1, "%s: removing valid path %s in state %d", + mpp->alias, pp->dev, pp->state); + } else { + condlog(3, "%s: failed to access path %s", + mpp->alias, pp->dev); + } + count++; + vector_del_slot(mpp->paths, i); + i--; + + /* Make sure mpp->hwe doesn't point to freed memory. + * We call extract_hwe_from_path() below to restore + * mpp->hwe + */ + if (mpp->hwe == pp->hwe) + mpp->hwe = NULL; + if ((j = find_slot(vecs->pathvec, + (void *)pp)) != -1) + vector_del_slot(vecs->pathvec, j); + free_path(pp); + } else { + condlog(4, "%s: verified path %s dev_t %s", + mpp->alias, pp->dev, pp->dev_t); + } + } + extract_hwe_from_path(mpp); + return count; +} + +/* + * mpp->no_path_retry: + * -2 (QUEUE) : queue_if_no_path enabled, never turned off + * -1 (FAIL) : fail_if_no_path + * 0 (UNDEF) : nothing + * >0 : queue_if_no_path enabled, turned off after polling n times + */ +void update_queue_mode_del_path(struct multipath *mpp) +{ + int active = count_active_paths(mpp); + + if (active == 0) { + enter_recovery_mode(mpp); + if (mpp->no_path_retry != NO_PATH_RETRY_QUEUE) + mpp->stat_map_failures++; + } + condlog(2, "%s: remaining active paths: %d", mpp->alias, active); +} + +void update_queue_mode_add_path(struct multipath *mpp) +{ + int active = count_active_paths(mpp); + + if (active > 0) + leave_recovery_mode(mpp); + condlog(2, "%s: remaining active paths: %d", mpp->alias, active); +} + +vector get_used_hwes(const struct _vector *pathvec) +{ + int i, j; + struct path *pp; + struct hwentry *hwe; + vector v = vector_alloc(); + + if (v == NULL) + return NULL; + + vector_foreach_slot(pathvec, pp, i) { + vector_foreach_slot_backwards(pp->hwe, hwe, j) { + vector_find_or_add_slot(v, hwe); + } + } + + return v; +} diff --git a/libmultipath/structs_vec.h b/libmultipath/structs_vec.h new file mode 100644 index 0000000..2a5e3d6 --- /dev/null +++ b/libmultipath/structs_vec.h @@ -0,0 +1,45 @@ +#ifndef _STRUCTS_VEC_H +#define _STRUCTS_VEC_H + +#include "vector.h" +#include "config.h" +#include "lock.h" + +struct vectors { + struct mutex_lock lock; /* defined in lock.h */ + vector pathvec; + vector mpvec; +}; + +void __set_no_path_retry(struct multipath *mpp, bool check_features); +#define set_no_path_retry(mpp) __set_no_path_retry(mpp, true) + +int adopt_paths (vector pathvec, struct multipath * mpp); +void orphan_paths(vector pathvec, struct multipath *mpp, + const char *reason); +void orphan_path (struct path * pp, const char *reason); + +int verify_paths(struct multipath * mpp, struct vectors * vecs); +int update_mpp_paths(struct multipath * mpp, vector pathvec); +int update_multipath_strings (struct multipath *mpp, vector pathvec, + int is_daemon); +void extract_hwe_from_path(struct multipath * mpp); + +#define PURGE_VEC 1 + +void remove_map (struct multipath * mpp, struct vectors * vecs, int purge_vec); +void remove_map_by_alias(const char *alias, struct vectors * vecs, + int purge_vec); +void remove_maps (struct vectors * vecs); + +void sync_map_state (struct multipath *); +struct multipath * add_map_with_path (struct vectors * vecs, + struct path * pp, int add_vec); +void update_queue_mode_del_path(struct multipath *mpp); +void update_queue_mode_add_path(struct multipath *mpp); +int update_multipath_table (struct multipath *mpp, vector pathvec, + int is_daemon); +int update_multipath_status (struct multipath *mpp); +vector get_used_hwes(const struct _vector *pathvec); + +#endif /* _STRUCTS_VEC_H */ diff --git a/libmultipath/switchgroup.c b/libmultipath/switchgroup.c new file mode 100644 index 0000000..6fdfcfa --- /dev/null +++ b/libmultipath/switchgroup.c @@ -0,0 +1,77 @@ +/* + * Copyright (c) 2005 Christophe Varoqui + * Copyright (c) 2005 Edward Goggin, EMC + */ +#include "checkers.h" +#include "vector.h" +#include "structs.h" +#include "switchgroup.h" + +void path_group_prio_update(struct pathgroup *pgp) +{ + int i; + int priority = 0; + int marginal = 0; + struct path * pp; + + pgp->enabled_paths = 0; + if (!pgp->paths) { + pgp->priority = 0; + return; + } + vector_foreach_slot (pgp->paths, pp, i) { + if (pp->marginal) + marginal++; + if (pp->state == PATH_UP || + pp->state == PATH_GHOST) { + priority += pp->priority; + pgp->enabled_paths++; + } + } + if (pgp->enabled_paths) + pgp->priority = priority / pgp->enabled_paths; + else + pgp->priority = 0; + if (marginal && marginal == i) + pgp->marginal = 1; +} + +int select_path_group(struct multipath *mpp) +{ + int i; + int normal_pgp = 0; + int max_priority = 0; + int bestpg = 1; + int max_enabled_paths = 1; + struct pathgroup * pgp; + + if (!mpp->pg) + return 1; + + vector_foreach_slot (mpp->pg, pgp, i) { + if (!pgp->paths) + continue; + + path_group_prio_update(pgp); + if (pgp->marginal && normal_pgp) + continue; + if (pgp->enabled_paths) { + if (!pgp->marginal && !normal_pgp) { + normal_pgp = 1; + max_priority = pgp->priority; + max_enabled_paths = pgp->enabled_paths; + bestpg = i + 1; + } else if (pgp->priority > max_priority) { + max_priority = pgp->priority; + max_enabled_paths = pgp->enabled_paths; + bestpg = i + 1; + } else if (pgp->priority == max_priority) { + if (pgp->enabled_paths > max_enabled_paths) { + max_enabled_paths = pgp->enabled_paths; + bestpg = i + 1; + } + } + } + } + return bestpg; +} diff --git a/libmultipath/switchgroup.h b/libmultipath/switchgroup.h new file mode 100644 index 0000000..9365e2e --- /dev/null +++ b/libmultipath/switchgroup.h @@ -0,0 +1,2 @@ +void path_group_prio_update (struct pathgroup * pgp); +int select_path_group (struct multipath * mpp); diff --git a/libmultipath/sysfs.c b/libmultipath/sysfs.c new file mode 100644 index 0000000..62ec2ed --- /dev/null +++ b/libmultipath/sysfs.c @@ -0,0 +1,353 @@ +/* + * Copyright (C) 2005-2006 Kay Sievers + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation version 2 of the License. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program. If not, see . + * + */ + + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "checkers.h" +#include "vector.h" +#include "structs.h" +#include "sysfs.h" +#include "list.h" +#include "util.h" +#include "debug.h" +#include "devmapper.h" + +/* + * When we modify an attribute value we cannot rely on libudev for now, + * as libudev lacks the capability to update an attribute value. + * So for modified attributes we need to implement our own function. + */ +ssize_t sysfs_attr_get_value(struct udev_device *dev, const char *attr_name, + char * value, size_t value_len) +{ + char devpath[PATH_SIZE]; + struct stat statbuf; + int fd; + ssize_t size = -1; + + if (!dev || !attr_name || !value) + return 0; + + snprintf(devpath, PATH_SIZE, "%s/%s", udev_device_get_syspath(dev), + attr_name); + condlog(4, "open '%s'", devpath); + /* read attribute value */ + fd = open(devpath, O_RDONLY); + if (fd < 0) { + condlog(4, "attribute '%s' can not be opened: %s", + devpath, strerror(errno)); + return -errno; + } + if (fstat(fd, &statbuf) < 0) { + condlog(4, "stat '%s' failed: %s", devpath, strerror(errno)); + close(fd); + return -ENXIO; + } + /* skip directories */ + if (S_ISDIR(statbuf.st_mode)) { + condlog(4, "%s is a directory", devpath); + close(fd); + return -EISDIR; + } + /* skip non-writeable files */ + if ((statbuf.st_mode & S_IRUSR) == 0) { + condlog(4, "%s is not readable", devpath); + close(fd); + return -EPERM; + } + + size = read(fd, value, value_len); + if (size < 0) { + condlog(4, "read from %s failed: %s", devpath, strerror(errno)); + size = -errno; + value[0] = '\0'; + } else if (size == (ssize_t)value_len) { + value[size - 1] = '\0'; + condlog(4, "overflow while reading from %s", devpath); + size = 0; + } else { + value[size] = '\0'; + size = strchop(value); + } + + close(fd); + return size; +} + +ssize_t sysfs_bin_attr_get_value(struct udev_device *dev, const char *attr_name, + unsigned char * value, size_t value_len) +{ + char devpath[PATH_SIZE]; + struct stat statbuf; + int fd; + ssize_t size = -1; + + if (!dev || !attr_name || !value) + return 0; + + snprintf(devpath, PATH_SIZE, "%s/%s", udev_device_get_syspath(dev), + attr_name); + condlog(4, "open '%s'", devpath); + /* read attribute value */ + fd = open(devpath, O_RDONLY); + if (fd < 0) { + condlog(4, "attribute '%s' can not be opened: %s", + devpath, strerror(errno)); + return -errno; + } + if (fstat(fd, &statbuf) != 0) { + condlog(4, "stat '%s' failed: %s", devpath, strerror(errno)); + close(fd); + return -ENXIO; + } + + /* skip directories */ + if (S_ISDIR(statbuf.st_mode)) { + condlog(4, "%s is a directory", devpath); + close(fd); + return -EISDIR; + } + + /* skip non-writeable files */ + if ((statbuf.st_mode & S_IRUSR) == 0) { + condlog(4, "%s is not readable", devpath); + close(fd); + return -EPERM; + } + + size = read(fd, value, value_len); + if (size < 0) { + condlog(4, "read from %s failed: %s", devpath, strerror(errno)); + size = -errno; + } else if (size == (ssize_t)value_len) { + condlog(4, "overflow while reading from %s", devpath); + size = 0; + } + + close(fd); + return size; +} + +ssize_t sysfs_attr_set_value(struct udev_device *dev, const char *attr_name, + const char * value, size_t value_len) +{ + char devpath[PATH_SIZE]; + struct stat statbuf; + int fd; + ssize_t size = -1; + + if (!dev || !attr_name || !value || !value_len) + return 0; + + snprintf(devpath, PATH_SIZE, "%s/%s", udev_device_get_syspath(dev), + attr_name); + condlog(4, "open '%s'", devpath); + /* write attribute value */ + fd = open(devpath, O_WRONLY); + if (fd < 0) { + condlog(4, "attribute '%s' can not be opened: %s", + devpath, strerror(errno)); + return -errno; + } + if (fstat(fd, &statbuf) != 0) { + condlog(4, "stat '%s' failed: %s", devpath, strerror(errno)); + close(fd); + return -errno; + } + + /* skip directories */ + if (S_ISDIR(statbuf.st_mode)) { + condlog(4, "%s is a directory", devpath); + close(fd); + return -EISDIR; + } + + /* skip non-writeable files */ + if ((statbuf.st_mode & S_IWUSR) == 0) { + condlog(4, "%s is not writeable", devpath); + close(fd); + return -EPERM; + } + + size = write(fd, value, value_len); + if (size < 0) { + condlog(4, "write to %s failed: %s", devpath, strerror(errno)); + size = -errno; + } else if (size < (ssize_t)value_len) { + condlog(4, "tried to write %ld to %s. Wrote %ld", + (long)value_len, devpath, (long)size); + size = 0; + } + + close(fd); + return size; +} + +int +sysfs_get_size (struct path *pp, unsigned long long * size) +{ + char attr[255]; + int r; + + if (!pp->udev || !size) + return 1; + + attr[0] = '\0'; + if (sysfs_attr_get_value(pp->udev, "size", attr, 255) <= 0) { + condlog(3, "%s: No size attribute in sysfs", pp->dev); + return 1; + } + + r = sscanf(attr, "%llu\n", size); + + if (r != 1) { + condlog(3, "%s: Cannot parse size attribute", pp->dev); + *size = 0; + return 1; + } + + return 0; +} + +int sysfs_check_holders(char * check_devt, char * new_devt) +{ + unsigned int major, new_minor, table_minor; + char path[PATH_MAX], check_dev[PATH_SIZE]; + char * table_name; + DIR *dirfd; + struct dirent *holder; + + if (sscanf(new_devt,"%d:%d", &major, &new_minor) != 2) { + condlog(1, "invalid device number %s", new_devt); + return 0; + } + + if (devt2devname(check_dev, PATH_SIZE, check_devt)) { + condlog(1, "can't get devname for %s", check_devt); + return 0; + } + + condlog(3, "%s: checking holder", check_dev); + + snprintf(path, sizeof(path), "/sys/block/%s/holders", check_dev); + dirfd = opendir(path); + if (dirfd == NULL) { + condlog(3, "%s: failed to open directory %s (%d)", + check_dev, path, errno); + return 0; + } + while ((holder = readdir(dirfd)) != NULL) { + if ((strcmp(holder->d_name,".") == 0) || + (strcmp(holder->d_name,"..") == 0)) + continue; + + if (sscanf(holder->d_name, "dm-%d", &table_minor) != 1) { + condlog(3, "%s: %s is not a dm-device", + check_dev, holder->d_name); + continue; + } + if (table_minor == new_minor) { + condlog(3, "%s: holder already correct", check_dev); + continue; + } + table_name = dm_mapname(major, table_minor); + + condlog(0, "%s: reassign table %s old %s new %s", check_dev, + table_name, check_devt, new_devt); + + dm_reassign_table(table_name, check_devt, new_devt); + FREE(table_name); + } + closedir(dirfd); + + return 0; +} + +static int select_dm_devs(const struct dirent *di) +{ + return fnmatch("dm-*", di->d_name, FNM_FILE_NAME) == 0; +} + +bool sysfs_is_multipathed(const struct path *pp) +{ + char pathbuf[PATH_MAX]; + struct scandir_result sr; + struct dirent **di; + int n, r, i; + bool found = false; + + n = snprintf(pathbuf, sizeof(pathbuf), "/sys/block/%s/holders", + pp->dev); + + if (n < 0 || (size_t)n >= sizeof(pathbuf)) { + condlog(1, "%s: pathname overflow", __func__); + return false; + } + + r = scandir(pathbuf, &di, select_dm_devs, alphasort); + if (r == 0) + return false; + else if (r < 0) { + condlog(1, "%s: error scanning %s", __func__, pathbuf); + return false; + } + + sr.di = di; + sr.n = r; + pthread_cleanup_push_cast(free_scandir_result, &sr); + for (i = 0; i < r && !found; i++) { + long fd; + int nr; + char uuid[6]; + + if (safe_snprintf(pathbuf + n, sizeof(pathbuf) - n, + "/%s/dm/uuid", di[i]->d_name)) + continue; + + fd = open(pathbuf, O_RDONLY); + if (fd == -1) { + condlog(1, "%s: error opening %s", __func__, pathbuf); + continue; + } + + pthread_cleanup_push(close_fd, (void *)fd); + nr = read(fd, uuid, sizeof(uuid)); + if (nr == sizeof(uuid) && !memcmp(uuid, "mpath-", sizeof(uuid))) + found = true; + else if (nr < 0) { + condlog(1, "%s: error reading from %s: %s", + __func__, pathbuf, strerror(errno)); + } + pthread_cleanup_pop(1); + } + pthread_cleanup_pop(1); + + return found; +} diff --git a/libmultipath/sysfs.h b/libmultipath/sysfs.h new file mode 100644 index 0000000..9ae30b3 --- /dev/null +++ b/libmultipath/sysfs.h @@ -0,0 +1,18 @@ +/* + * sysfs.h + */ + +#ifndef _LIBMULTIPATH_SYSFS_H +#define _LIBMULTIPATH_SYSFS_H +#include + +ssize_t sysfs_attr_set_value(struct udev_device *dev, const char *attr_name, + const char * value, size_t value_len); +ssize_t sysfs_attr_get_value(struct udev_device *dev, const char *attr_name, + char * value, size_t value_len); +ssize_t sysfs_bin_attr_get_value(struct udev_device *dev, const char *attr_name, + unsigned char * value, size_t value_len); +int sysfs_get_size (struct path *pp, unsigned long long * size); +int sysfs_check_holders(char * check_devt, char * new_devt); +bool sysfs_is_multipathed(const struct path *pp); +#endif diff --git a/libmultipath/time-util.c b/libmultipath/time-util.c new file mode 100644 index 0000000..55f366c --- /dev/null +++ b/libmultipath/time-util.c @@ -0,0 +1,51 @@ +#include +#include +#include +#include "time-util.h" + +void get_monotonic_time(struct timespec *res) +{ + struct timespec ts; + int rv = clock_gettime(CLOCK_MONOTONIC, &ts); + + assert(rv == 0); + *res = ts; +} + +/* Initialize @cond as a condition variable that uses the monotonic clock */ +void pthread_cond_init_mono(pthread_cond_t *cond) +{ + pthread_condattr_t attr; + int res; + + res = pthread_condattr_init(&attr); + assert(res == 0); + res = pthread_condattr_setclock(&attr, CLOCK_MONOTONIC); + assert(res == 0); + res = pthread_cond_init(cond, &attr); + assert(res == 0); + res = pthread_condattr_destroy(&attr); + assert(res == 0); +} + +/* Ensure that 0 <= ts->tv_nsec && ts->tv_nsec < 1000 * 1000 * 1000. */ +void normalize_timespec(struct timespec *ts) +{ + while (ts->tv_nsec < 0) { + ts->tv_nsec += 1000L * 1000 * 1000; + ts->tv_sec--; + } + while (ts->tv_nsec >= 1000L * 1000 * 1000) { + ts->tv_nsec -= 1000L * 1000 * 1000; + ts->tv_sec++; + } +} + +/* Compute *res = *a - *b */ +void timespecsub(const struct timespec *a, const struct timespec *b, + struct timespec *res) +{ + res->tv_sec = a->tv_sec - b->tv_sec; + res->tv_nsec = a->tv_nsec - b->tv_nsec; + normalize_timespec(res); +} diff --git a/libmultipath/time-util.h b/libmultipath/time-util.h new file mode 100644 index 0000000..b23d328 --- /dev/null +++ b/libmultipath/time-util.h @@ -0,0 +1,14 @@ +#ifndef _TIME_UTIL_H_ +#define _TIME_UTIL_H_ + +#include + +struct timespec; + +void get_monotonic_time(struct timespec *res); +void pthread_cond_init_mono(pthread_cond_t *cond); +void normalize_timespec(struct timespec *ts); +void timespecsub(const struct timespec *a, const struct timespec *b, + struct timespec *res); + +#endif /* _TIME_UTIL_H_ */ diff --git a/libmultipath/uevent.c b/libmultipath/uevent.c new file mode 100644 index 0000000..d38e8a7 --- /dev/null +++ b/libmultipath/uevent.c @@ -0,0 +1,952 @@ +/* + * uevent.c - trigger upon netlink uevents from the kernel + * + * Only kernels from version 2.6.10* on provide the uevent netlink socket. + * Until the libc-kernel-headers are updated, you need to compile with: + * + * gcc -I /lib/modules/`uname -r`/build/include -o uevent_listen uevent_listen.c + * + * Copyright (C) 2004 Kay Sievers + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation version 2 of the License. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program. If not, see . + * + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "memory.h" +#include "debug.h" +#include "list.h" +#include "uevent.h" +#include "vector.h" +#include "structs.h" +#include "util.h" +#include "config.h" +#include "blacklist.h" +#include "devmapper.h" + +#define MAX_ACCUMULATION_COUNT 2048 +#define MAX_ACCUMULATION_TIME 30*1000 +#define MIN_BURST_SPEED 10 + +typedef int (uev_trigger)(struct uevent *, void * trigger_data); + +LIST_HEAD(uevq); +pthread_mutex_t uevq_lock = PTHREAD_MUTEX_INITIALIZER; +pthread_mutex_t *uevq_lockp = &uevq_lock; +pthread_cond_t uev_cond = PTHREAD_COND_INITIALIZER; +pthread_cond_t *uev_condp = &uev_cond; +uev_trigger *my_uev_trigger; +void * my_trigger_data; +int servicing_uev; + +int is_uevent_busy(void) +{ + int empty; + + pthread_mutex_lock(uevq_lockp); + empty = list_empty(&uevq); + pthread_mutex_unlock(uevq_lockp); + return (!empty || servicing_uev); +} + +struct uevent * alloc_uevent (void) +{ + struct uevent *uev = MALLOC(sizeof(struct uevent)); + + if (uev) { + INIT_LIST_HEAD(&uev->node); + INIT_LIST_HEAD(&uev->merge_node); + } + + return uev; +} + +void +uevq_cleanup(struct list_head *tmpq) +{ + struct uevent *uev, *tmp; + + list_for_each_entry_safe(uev, tmp, tmpq, node) { + list_del_init(&uev->node); + + if (uev->udev) + udev_device_unref(uev->udev); + FREE(uev); + } +} + +static const char* uevent_get_env_var(const struct uevent *uev, + const char *attr) +{ + int i; + size_t len; + const char *p = NULL; + + if (attr == NULL) + goto invalid; + + len = strlen(attr); + if (len == 0) + goto invalid; + + for (i = 0; uev->envp[i] != NULL; i++) { + const char *var = uev->envp[i]; + + if (strlen(var) > len && + !memcmp(var, attr, len) && var[len] == '=') { + p = var + len + 1; + break; + } + } + + condlog(4, "%s: %s -> '%s'", __func__, attr, p); + return p; + +invalid: + condlog(2, "%s: empty variable name", __func__); + return NULL; +} + +static int uevent_get_env_positive_int(const struct uevent *uev, + const char *attr) +{ + const char *p = uevent_get_env_var(uev, attr); + char *q; + int ret; + + if (p == NULL || *p == '\0') + return -1; + + ret = strtoul(p, &q, 10); + if (*q != '\0' || ret < 0) { + condlog(2, "%s: invalid %s: '%s'", __func__, attr, p); + return -1; + } + return ret; +} + +void +uevent_get_wwid(struct uevent *uev) +{ + char *uid_attribute; + const char *val; + struct config * conf; + + conf = get_multipath_config(); + pthread_cleanup_push(put_multipath_config, conf); + uid_attribute = get_uid_attribute_by_attrs(conf, uev->kernel); + pthread_cleanup_pop(1); + + val = uevent_get_env_var(uev, uid_attribute); + if (val) + uev->wwid = val; +} + +bool +uevent_need_merge(void) +{ + struct config * conf; + bool need_merge = false; + + conf = get_multipath_config(); + if (VECTOR_SIZE(&conf->uid_attrs) > 0) + need_merge = true; + put_multipath_config(conf); + + return need_merge; +} + +bool +uevent_can_discard(struct uevent *uev) +{ + int invalid = 0; + struct config * conf; + + /* + * do not filter dm devices by devnode + */ + if (!strncmp(uev->kernel, "dm-", 3)) + return false; + /* + * filter paths devices by devnode + */ + conf = get_multipath_config(); + pthread_cleanup_push(put_multipath_config, conf); + if (filter_devnode(conf->blist_devnode, conf->elist_devnode, + uev->kernel) > 0) + invalid = 1; + pthread_cleanup_pop(1); + + if (invalid) + return true; + return false; +} + +bool +uevent_can_filter(struct uevent *earlier, struct uevent *later) +{ + + /* + * filter earlier uvents if path has removed later. Eg: + * "add path1 |chang path1 |add path2 |remove path1" + * can filter as: + * "add path2 |remove path1" + * uevents "add path1" and "chang path1" are filtered out + */ + if (!strcmp(earlier->kernel, later->kernel) && + !strcmp(later->action, "remove") && + strncmp(later->kernel, "dm-", 3)) { + return true; + } + + /* + * filter change uvents if add uevents exist. Eg: + * "change path1| add path1 |add path2" + * can filter as: + * "add path1 |add path2" + * uevent "chang path1" is filtered out + */ + if (!strcmp(earlier->kernel, later->kernel) && + !strcmp(earlier->action, "change") && + !strcmp(later->action, "add") && + strncmp(later->kernel, "dm-", 3)) { + return true; + } + + return false; +} + +bool +merge_need_stop(struct uevent *earlier, struct uevent *later) +{ + /* + * dm uevent do not try to merge with left uevents + */ + if (!strncmp(later->kernel, "dm-", 3)) + return true; + + /* + * we can not make a jugement without wwid, + * so it is sensible to stop merging + */ + if (!earlier->wwid || !later->wwid) + return true; + /* + * uevents merging stopped + * when we meet an opposite action uevent from the same LUN to AVOID + * "add path1 |remove path1 |add path2 |remove path2 |add path3" + * to merge as "remove path1, path2" and "add path1, path2, path3" + * OR + * "remove path1 |add path1 |remove path2 |add path2 |remove path3" + * to merge as "add path1, path2" and "remove path1, path2, path3" + * SO + * when we meet a non-change uevent from the same LUN + * with the same wwid and different action + * it would be better to stop merging. + */ + if (!strcmp(earlier->wwid, later->wwid) && + strcmp(earlier->action, later->action) && + strcmp(earlier->action, "change") && + strcmp(later->action, "change")) + return true; + + return false; +} + +bool +uevent_can_merge(struct uevent *earlier, struct uevent *later) +{ + /* merge paths uevents + * whose wwids exsit and are same + * and actions are same, + * and actions are addition or deletion + */ + if (earlier->wwid && later->wwid && + !strcmp(earlier->wwid, later->wwid) && + !strcmp(earlier->action, later->action) && + strncmp(earlier->action, "change", 6) && + strncmp(earlier->kernel, "dm-", 3)) { + return true; + } + + return false; +} + +void +uevent_prepare(struct list_head *tmpq) +{ + struct uevent *uev, *tmp; + + list_for_each_entry_reverse_safe(uev, tmp, tmpq, node) { + if (uevent_can_discard(uev)) { + list_del_init(&uev->node); + if (uev->udev) + udev_device_unref(uev->udev); + FREE(uev); + continue; + } + + if (strncmp(uev->kernel, "dm-", 3) && + uevent_need_merge()) + uevent_get_wwid(uev); + } +} + +void +uevent_filter(struct uevent *later, struct list_head *tmpq) +{ + struct uevent *earlier, *tmp; + + list_for_some_entry_reverse_safe(earlier, tmp, &later->node, tmpq, node) { + /* + * filter unnessary earlier uevents + * by the later uevent + */ + if (uevent_can_filter(earlier, later)) { + condlog(3, "uevent: %s-%s has filtered by uevent: %s-%s", + earlier->kernel, earlier->action, + later->kernel, later->action); + + list_del_init(&earlier->node); + if (earlier->udev) + udev_device_unref(earlier->udev); + FREE(earlier); + } + } +} + +void +uevent_merge(struct uevent *later, struct list_head *tmpq) +{ + struct uevent *earlier, *tmp; + + list_for_some_entry_reverse_safe(earlier, tmp, &later->node, tmpq, node) { + if (merge_need_stop(earlier, later)) + break; + /* + * merge earlier uevents to the later uevent + */ + if (uevent_can_merge(earlier, later)) { + condlog(3, "merged uevent: %s-%s-%s with uevent: %s-%s-%s", + earlier->action, earlier->kernel, earlier->wwid, + later->action, later->kernel, later->wwid); + + list_move(&earlier->node, &later->merge_node); + } + } +} + +void +merge_uevq(struct list_head *tmpq) +{ + struct uevent *later; + + uevent_prepare(tmpq); + list_for_each_entry_reverse(later, tmpq, node) { + uevent_filter(later, tmpq); + if(uevent_need_merge()) + uevent_merge(later, tmpq); + } +} + +void +service_uevq(struct list_head *tmpq) +{ + struct uevent *uev, *tmp; + + list_for_each_entry_safe(uev, tmp, tmpq, node) { + list_del_init(&uev->node); + + if (my_uev_trigger && my_uev_trigger(uev, my_trigger_data)) + condlog(0, "uevent trigger error"); + + uevq_cleanup(&uev->merge_node); + + if (uev->udev) + udev_device_unref(uev->udev); + FREE(uev); + } +} + +static void uevent_cleanup(void *arg) +{ + struct udev *udev = arg; + + condlog(3, "Releasing uevent_listen() resources"); + udev_unref(udev); +} + +static void monitor_cleanup(void *arg) +{ + struct udev_monitor *monitor = arg; + + condlog(3, "Releasing uevent_monitor() resources"); + udev_monitor_unref(monitor); +} + +/* + * Service the uevent queue. + */ +int uevent_dispatch(int (*uev_trigger)(struct uevent *, void * trigger_data), + void * trigger_data) +{ + my_uev_trigger = uev_trigger; + my_trigger_data = trigger_data; + + mlockall(MCL_CURRENT | MCL_FUTURE); + + while (1) { + LIST_HEAD(uevq_tmp); + + pthread_mutex_lock(uevq_lockp); + servicing_uev = 0; + /* + * Condition signals are unreliable, + * so make sure we only wait if we have to. + */ + if (list_empty(&uevq)) { + pthread_cond_wait(uev_condp, uevq_lockp); + } + servicing_uev = 1; + list_splice_init(&uevq, &uevq_tmp); + pthread_mutex_unlock(uevq_lockp); + if (!my_uev_trigger) + break; + merge_uevq(&uevq_tmp); + service_uevq(&uevq_tmp); + } + condlog(3, "Terminating uev service queue"); + uevq_cleanup(&uevq); + return 0; +} + +struct uevent *uevent_from_buffer(char *buf, ssize_t buflen) +{ + struct uevent *uev; + char *buffer; + size_t bufpos; + int i; + char *pos; + + uev = alloc_uevent(); + if (!uev) { + condlog(1, "lost uevent, oom"); + return NULL; + } + + if ((size_t)buflen > sizeof(buf)-1) + buflen = sizeof(buf)-1; + + /* + * Copy the shared receive buffer contents to buffer private + * to this uevent so we can immediately reuse the shared buffer. + */ + memcpy(uev->buffer, buf, HOTPLUG_BUFFER_SIZE + OBJECT_SIZE); + buffer = uev->buffer; + buffer[buflen] = '\0'; + + /* save start of payload */ + bufpos = strlen(buffer) + 1; + + /* action string */ + uev->action = buffer; + pos = strchr(buffer, '@'); + if (!pos) { + condlog(3, "bad action string '%s'", buffer); + FREE(uev); + return NULL; + } + pos[0] = '\0'; + + /* sysfs path */ + uev->devpath = &pos[1]; + + /* hotplug events have the environment attached - reconstruct envp[] */ + for (i = 0; (bufpos < (size_t)buflen) && (i < HOTPLUG_NUM_ENVP-1); i++) { + int keylen; + char *key; + + key = &buffer[bufpos]; + keylen = strlen(key); + uev->envp[i] = key; + /* Filter out sequence number */ + if (strncmp(key, "SEQNUM=", 7) == 0) { + char *eptr; + + uev->seqnum = strtoul(key + 7, &eptr, 10); + if (eptr == key + 7) + uev->seqnum = -1; + } + bufpos += keylen + 1; + } + uev->envp[i] = NULL; + + condlog(3, "uevent %ld '%s' from '%s'", uev->seqnum, + uev->action, uev->devpath); + uev->kernel = strrchr(uev->devpath, '/'); + if (uev->kernel) + uev->kernel++; + + /* print payload environment */ + for (i = 0; uev->envp[i] != NULL; i++) + condlog(5, "%s", uev->envp[i]); + + return uev; +} + +int failback_listen(void) +{ + int sock; + struct sockaddr_nl snl; + struct sockaddr_un sun; + socklen_t addrlen; + int retval; + int rcvbufsz = 128*1024; + int rcvsz = 0; + int rcvszsz = sizeof(rcvsz); + unsigned int *prcvszsz = (unsigned int *)&rcvszsz; + const int feature_on = 1; + /* + * First check whether we have a udev socket + */ + memset(&sun, 0x00, sizeof(struct sockaddr_un)); + sun.sun_family = AF_LOCAL; + strcpy(&sun.sun_path[1], "/org/kernel/dm/multipath_event"); + addrlen = offsetof(struct sockaddr_un, sun_path) + strlen(sun.sun_path+1) + 1; + + sock = socket(AF_LOCAL, SOCK_DGRAM, 0); + if (sock >= 0) { + + condlog(3, "reading events from udev socket."); + + /* the bind takes care of ensuring only one copy running */ + retval = bind(sock, (struct sockaddr *) &sun, addrlen); + if (retval < 0) { + condlog(0, "bind failed, exit"); + goto exit; + } + + /* enable receiving of the sender credentials */ + retval = setsockopt(sock, SOL_SOCKET, SO_PASSCRED, + &feature_on, sizeof(feature_on)); + if (retval < 0) { + condlog(0, "failed to enable credential passing, exit"); + goto exit; + } + + } else { + /* Fallback to read kernel netlink events */ + memset(&snl, 0x00, sizeof(struct sockaddr_nl)); + snl.nl_family = AF_NETLINK; + snl.nl_pid = getpid(); + snl.nl_groups = 0x01; + + sock = socket(PF_NETLINK, SOCK_DGRAM, NETLINK_KOBJECT_UEVENT); + if (sock == -1) { + condlog(0, "error getting socket, exit"); + return 1; + } + + condlog(3, "reading events from kernel."); + + /* + * try to avoid dropping uevents, even so, this is not a guarantee, + * but it does help to change the netlink uevent socket's + * receive buffer threshold from the default value of 106,496 to + * the maximum value of 262,142. + */ + retval = setsockopt(sock, SOL_SOCKET, SO_RCVBUF, &rcvbufsz, + sizeof(rcvbufsz)); + + if (retval < 0) { + condlog(0, "error setting receive buffer size for socket, exit"); + exit(1); + } + retval = getsockopt(sock, SOL_SOCKET, SO_RCVBUF, &rcvsz, prcvszsz); + if (retval < 0) { + condlog(0, "error setting receive buffer size for socket, exit"); + exit(1); + } + condlog(3, "receive buffer size for socket is %u.", rcvsz); + + /* enable receiving of the sender credentials */ + if (setsockopt(sock, SOL_SOCKET, SO_PASSCRED, + &feature_on, sizeof(feature_on)) < 0) { + condlog(0, "error on enabling credential passing for socket"); + exit(1); + } + + retval = bind(sock, (struct sockaddr *) &snl, + sizeof(struct sockaddr_nl)); + if (retval < 0) { + condlog(0, "bind failed, exit"); + goto exit; + } + } + + while (1) { + size_t bufpos; + ssize_t buflen; + struct uevent *uev; + struct msghdr smsg; + struct iovec iov; + char cred_msg[CMSG_SPACE(sizeof(struct ucred))]; + struct cmsghdr *cmsg; + struct ucred *cred; + static char buf[HOTPLUG_BUFFER_SIZE + OBJECT_SIZE]; + + memset(buf, 0x00, sizeof(buf)); + iov.iov_base = &buf; + iov.iov_len = sizeof(buf); + memset (&smsg, 0x00, sizeof(struct msghdr)); + smsg.msg_iov = &iov; + smsg.msg_iovlen = 1; + smsg.msg_control = cred_msg; + smsg.msg_controllen = sizeof(cred_msg); + + buflen = recvmsg(sock, &smsg, 0); + if (buflen < 0) { + if (errno != EINTR) + condlog(0, "error receiving message, errno %d", errno); + continue; + } + + cmsg = CMSG_FIRSTHDR(&smsg); + if (cmsg == NULL || cmsg->cmsg_type != SCM_CREDENTIALS) { + condlog(3, "no sender credentials received, message ignored"); + continue; + } + + cred = (struct ucred *)CMSG_DATA(cmsg); + if (cred->uid != 0) { + condlog(3, "sender uid=%d, message ignored", cred->uid); + continue; + } + + /* skip header */ + bufpos = strlen(buf) + 1; + if (bufpos < sizeof("a@/d") || bufpos >= sizeof(buf)) { + condlog(3, "invalid message length"); + continue; + } + + /* check message header */ + if (strstr(buf, "@/") == NULL) { + condlog(3, "unrecognized message header"); + continue; + } + if ((size_t)buflen > sizeof(buf)-1) { + condlog(2, "buffer overflow for received uevent"); + buflen = sizeof(buf)-1; + } + + uev = uevent_from_buffer(buf, buflen); + if (!uev) + continue; + /* + * Queue uevent and poke service pthread. + */ + pthread_mutex_lock(uevq_lockp); + list_add_tail(&uev->node, &uevq); + pthread_cond_signal(uev_condp); + pthread_mutex_unlock(uevq_lockp); + } + +exit: + close(sock); + return 1; +} + +struct uevent *uevent_from_udev_device(struct udev_device *dev) +{ + struct uevent *uev; + int i = 0; + char *pos, *end; + struct udev_list_entry *list_entry; + + uev = alloc_uevent(); + if (!uev) { + udev_device_unref(dev); + condlog(1, "lost uevent, oom"); + return NULL; + } + pos = uev->buffer; + end = pos + HOTPLUG_BUFFER_SIZE + OBJECT_SIZE - 1; + udev_list_entry_foreach(list_entry, udev_device_get_properties_list_entry(dev)) { + const char *name, *value; + int bytes; + + name = udev_list_entry_get_name(list_entry); + if (!name) + name = "(null)"; + value = udev_list_entry_get_value(list_entry); + if (!value) + value = "(null)"; + bytes = snprintf(pos, end - pos, "%s=%s", name, value); + if (pos + bytes >= end) { + condlog(2, "buffer overflow for uevent"); + break; + } + uev->envp[i] = pos; + pos += bytes; + *pos = '\0'; + pos++; + if (strcmp(name, "DEVPATH") == 0) + uev->devpath = uev->envp[i] + 8; + if (strcmp(name, "ACTION") == 0) + uev->action = uev->envp[i] + 7; + i++; + if (i == HOTPLUG_NUM_ENVP - 1) + break; + } + if (!uev->devpath || ! uev->action) { + udev_device_unref(dev); + condlog(1, "uevent missing necessary fields"); + FREE(uev); + return NULL; + } + uev->udev = dev; + uev->envp[i] = NULL; + + condlog(3, "uevent '%s' from '%s'", uev->action, uev->devpath); + uev->kernel = strrchr(uev->devpath, '/'); + if (uev->kernel) + uev->kernel++; + + /* print payload environment */ + for (i = 0; uev->envp[i] != NULL; i++) + condlog(5, "%s", uev->envp[i]); + return uev; +} + +bool uevent_burst(struct timeval *start_time, int events) +{ + struct timeval diff_time, end_time; + unsigned long speed; + unsigned long eclipse_ms; + + if(events > MAX_ACCUMULATION_COUNT) { + condlog(2, "burst got %u uevents, too much uevents, stopped", events); + return false; + } + + gettimeofday(&end_time, NULL); + timersub(&end_time, start_time, &diff_time); + + eclipse_ms = diff_time.tv_sec * 1000 + diff_time.tv_usec / 1000; + + if (eclipse_ms == 0) + return true; + + if (eclipse_ms > MAX_ACCUMULATION_TIME) { + condlog(2, "burst continued %lu ms, too long time, stopped", eclipse_ms); + return false; + } + + speed = (events * 1000) / eclipse_ms; + if (speed > MIN_BURST_SPEED) + return true; + + return false; +} + +int uevent_listen(struct udev *udev) +{ + int err = 2; + struct udev_monitor *monitor = NULL; + int fd, socket_flags, events; + struct timeval start_time; + int need_failback = 1; + int timeout = 30; + LIST_HEAD(uevlisten_tmp); + + /* + * Queue uevents for service by dedicated thread so that the uevent + * listening thread does not block on multipathd locks (vecs->lock) + * thereby not getting to empty the socket's receive buffer queue + * often enough. + */ + if (!udev) { + condlog(1, "no udev context"); + return 1; + } + udev_ref(udev); + pthread_cleanup_push(uevent_cleanup, udev); + + monitor = udev_monitor_new_from_netlink(udev, "udev"); + if (!monitor) { + condlog(2, "failed to create udev monitor"); + goto failback; + } + pthread_cleanup_push(monitor_cleanup, monitor); +#ifdef LIBUDEV_API_RECVBUF + if (udev_monitor_set_receive_buffer_size(monitor, 128 * 1024 * 1024)) + condlog(2, "failed to increase buffer size"); +#endif + fd = udev_monitor_get_fd(monitor); + if (fd < 0) { + condlog(2, "failed to get monitor fd"); + goto out; + } + socket_flags = fcntl(fd, F_GETFL); + if (socket_flags < 0) { + condlog(2, "failed to get monitor socket flags : %s", + strerror(errno)); + goto out; + } + if (fcntl(fd, F_SETFL, socket_flags & ~O_NONBLOCK) < 0) { + condlog(2, "failed to set monitor socket flags : %s", + strerror(errno)); + goto out; + } + err = udev_monitor_filter_add_match_subsystem_devtype(monitor, "block", + "disk"); + if (err) + condlog(2, "failed to create filter : %s", strerror(-err)); + err = udev_monitor_enable_receiving(monitor); + if (err) { + condlog(2, "failed to enable receiving : %s", strerror(-err)); + goto out; + } + + events = 0; + gettimeofday(&start_time, NULL); + while (1) { + struct uevent *uev; + struct udev_device *dev; + struct pollfd ev_poll; + int poll_timeout; + int fdcount; + + memset(&ev_poll, 0, sizeof(struct pollfd)); + ev_poll.fd = fd; + ev_poll.events = POLLIN; + poll_timeout = timeout * 1000; + errno = 0; + fdcount = poll(&ev_poll, 1, poll_timeout); + if (fdcount > 0 && ev_poll.revents & POLLIN) { + timeout = uevent_burst(&start_time, events + 1) ? 1 : 0; + dev = udev_monitor_receive_device(monitor); + if (!dev) { + condlog(0, "failed getting udev device"); + continue; + } + uev = uevent_from_udev_device(dev); + if (!uev) + continue; + list_add_tail(&uev->node, &uevlisten_tmp); + events++; + continue; + } + if (fdcount < 0) { + if (errno == EINTR) + continue; + + condlog(0, "error receiving " + "uevent message: %m"); + err = -errno; + break; + } + if (!list_empty(&uevlisten_tmp)) { + /* + * Queue uevents and poke service pthread. + */ + condlog(3, "Forwarding %d uevents", events); + pthread_mutex_lock(uevq_lockp); + list_splice_tail_init(&uevlisten_tmp, &uevq); + pthread_cond_signal(uev_condp); + pthread_mutex_unlock(uevq_lockp); + events = 0; + } + gettimeofday(&start_time, NULL); + timeout = 30; + } + need_failback = 0; +out: + pthread_cleanup_pop(1); +failback: + if (need_failback) + err = failback_listen(); + pthread_cleanup_pop(1); + return err; +} + +int uevent_get_major(const struct uevent *uev) +{ + return uevent_get_env_positive_int(uev, "MAJOR"); +} + +int uevent_get_minor(const struct uevent *uev) +{ + return uevent_get_env_positive_int(uev, "MINOR"); +} + +int uevent_get_disk_ro(const struct uevent *uev) +{ + return uevent_get_env_positive_int(uev, "DISK_RO"); +} + +static char *uevent_get_dm_str(const struct uevent *uev, char *attr) +{ + const char *tmp = uevent_get_env_var(uev, attr); + + if (tmp == NULL) + return NULL; + return strdup(tmp); +} + +char *uevent_get_dm_name(const struct uevent *uev) +{ + return uevent_get_dm_str(uev, "DM_NAME"); +} + +char *uevent_get_dm_path(const struct uevent *uev) +{ + return uevent_get_dm_str(uev, "DM_PATH"); +} + +char *uevent_get_dm_action(const struct uevent *uev) +{ + return uevent_get_dm_str(uev, "DM_ACTION"); +} + +bool uevent_is_mpath(const struct uevent *uev) +{ + const char *uuid = uevent_get_env_var(uev, "DM_UUID"); + + if (uuid == NULL) + return false; + if (strncmp(uuid, UUID_PREFIX, UUID_PREFIX_LEN)) + return false; + return uuid[UUID_PREFIX_LEN] != '\0'; +} diff --git a/libmultipath/uevent.h b/libmultipath/uevent.h new file mode 100644 index 0000000..0aa8675 --- /dev/null +++ b/libmultipath/uevent.h @@ -0,0 +1,44 @@ +#ifndef _UEVENT_H +#define _UEVENT_H + +/* + * buffer for environment variables, the kernel's size in + * lib/kobject_uevent.c should fit in +*/ +#define HOTPLUG_BUFFER_SIZE 2048 +#define HOTPLUG_NUM_ENVP 32 +#define OBJECT_SIZE 512 + +#ifndef NETLINK_KOBJECT_UEVENT +#define NETLINK_KOBJECT_UEVENT 15 +#endif + +struct udev; + +struct uevent { + struct list_head node; + struct list_head merge_node; + struct udev_device *udev; + char buffer[HOTPLUG_BUFFER_SIZE + OBJECT_SIZE]; + char *devpath; + char *action; + char *kernel; + const char *wwid; + unsigned long seqnum; + char *envp[HOTPLUG_NUM_ENVP]; +}; + +int is_uevent_busy(void); + +int uevent_listen(struct udev *udev); +int uevent_dispatch(int (*store_uev)(struct uevent *, void * trigger_data), + void * trigger_data); +int uevent_get_major(const struct uevent *uev); +int uevent_get_minor(const struct uevent *uev); +int uevent_get_disk_ro(const struct uevent *uev); +char *uevent_get_dm_name(const struct uevent *uev); +char *uevent_get_dm_path(const struct uevent *uev); +char *uevent_get_dm_action(const struct uevent *uev); +bool uevent_is_mpath(const struct uevent *uev); + +#endif /* _UEVENT_H */ diff --git a/libmultipath/unaligned.h b/libmultipath/unaligned.h new file mode 100644 index 0000000..b9eaa7c --- /dev/null +++ b/libmultipath/unaligned.h @@ -0,0 +1,54 @@ +#ifndef _UNALIGNED_H_ +#define _UNALIGNED_H_ + +#include + +static inline uint16_t get_unaligned_be16(const void *ptr) +{ + const uint8_t *p = ptr; + + return p[0] << 8 | p[1]; +} + +static inline uint32_t get_unaligned_be32(const void *ptr) +{ + const uint8_t *p = ptr; + + return p[0] << 24 | p[1] << 16 | p[2] << 8 | p[3]; +} + +static inline uint64_t get_unaligned_be64(const void *ptr) +{ + uint32_t low = get_unaligned_be32(ptr + 4); + uint64_t high = get_unaligned_be32(ptr); + + return high << 32 | low; +} + +static inline void put_unaligned_be16(uint16_t val, void *ptr) +{ + uint8_t *p = ptr; + + p[0] = val >> 8; + p[1] = val; +} + +static inline void put_unaligned_be32(uint32_t val, void *ptr) +{ + uint8_t *p = ptr; + + p[0] = val >> 24; + p[1] = val >> 16; + p[2] = val >> 8; + p[3] = val; +} + +static inline void put_unaligned_be64(uint64_t val, void *ptr) +{ + uint8_t *p = ptr; + + put_unaligned_be32(val >> 32, p); + put_unaligned_be32(val, p + 4); +} + +#endif /* _UNALIGNED_H_ */ diff --git a/libmultipath/util.c b/libmultipath/util.c new file mode 100644 index 0000000..51c38c8 --- /dev/null +++ b/libmultipath/util.c @@ -0,0 +1,471 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "util.h" +#include "debug.h" +#include "memory.h" +#include "checkers.h" +#include "vector.h" +#include "structs.h" +#include "log.h" + +size_t +strchop(char *str) +{ + int i; + + for (i=strlen(str)-1; i >=0 && isspace(str[i]); --i) ; + str[++i] = '\0'; + return strlen(str); +} + +int +basenamecpy (const char *src, char *dst, size_t size) +{ + const char *p, *e; + + if (!src || !dst || !strlen(src)) + return 0; + + p = basename(src); + + for (e = p + strlen(p) - 1; e >= p && isspace(*e); --e) ; + if (e < p || (size_t)(e - p) > size - 2) + return 0; + + strlcpy(dst, p, e - p + 2); + return strlen(dst); +} + +int +filepresent (char * run) { + struct stat buf; + + if(!stat(run, &buf)) + return 1; + return 0; +} + +char *get_next_string(char **temp, char *split_char) +{ + char *token = NULL; + token = strsep(temp, split_char); + while (token != NULL && !strcmp(token, "")) + token = strsep(temp, split_char); + return token; +} + +int +get_word (char * sentence, char ** word) +{ + char * p; + int len; + int skip = 0; + + if (word) + *word = NULL; + + while (*sentence == ' ') { + sentence++; + skip++; + } + if (*sentence == '\0') + return 0; + + p = sentence; + + while (*p != ' ' && *p != '\0') + p++; + + len = (int) (p - sentence); + + if (!word) + return skip + len; + + *word = MALLOC(len + 1); + + if (!*word) { + condlog(0, "get_word : oom"); + return 0; + } + strncpy(*word, sentence, len); + strchop(*word); + condlog(5, "*word = %s, len = %i", *word, len); + + if (*p == '\0') + return 0; + + return skip + len; +} + +size_t strlcpy(char *dst, const char *src, size_t size) +{ + size_t bytes = 0; + char *q = dst; + const char *p = src; + char ch; + + while ((ch = *p++)) { + if (bytes+1 < size) + *q++ = ch; + bytes++; + } + + /* If size == 0 there is no space for a final null... */ + if (size) + *q = '\0'; + return bytes; +} + +size_t strlcat(char *dst, const char *src, size_t size) +{ + size_t bytes = 0; + char *q = dst; + const char *p = src; + char ch; + + while (bytes < size && *q) { + q++; + bytes++; + } + if (bytes == size) + return (bytes + strlen(src)); + + while ((ch = *p++)) { + if (bytes+1 < size) + *q++ = ch; + bytes++; + } + + *q = '\0'; + return bytes; +} + +int devt2devname(char *devname, int devname_len, char *devt) +{ + FILE *fd; + unsigned int tmpmaj, tmpmin, major, minor; + char dev[FILE_NAME_SIZE]; + char block_path[PATH_SIZE]; + struct stat statbuf; + + memset(block_path, 0, sizeof(block_path)); + memset(dev, 0, sizeof(dev)); + if (sscanf(devt, "%u:%u", &major, &minor) != 2) { + condlog(0, "Invalid device number %s", devt); + return 1; + } + + if (devname_len > FILE_NAME_SIZE) + devname_len = FILE_NAME_SIZE; + + if (stat("/sys/dev/block", &statbuf) == 0) { + /* Newer kernels have /sys/dev/block */ + sprintf(block_path,"/sys/dev/block/%u:%u", major, minor); + dev[FILE_NAME_SIZE - 1] = '\0'; + if (lstat(block_path, &statbuf) == 0) { + if (S_ISLNK(statbuf.st_mode) && + readlink(block_path, dev, FILE_NAME_SIZE-1) > 0) { + char *p = strrchr(dev, '/'); + + if (!p) { + condlog(0, "No sysfs entry for %s", + block_path); + return 1; + } + p++; + strlcpy(devname, p, devname_len); + return 0; + } + } + condlog(4, "%s is invalid", block_path); + return 1; + } + memset(block_path, 0, sizeof(block_path)); + + if (!(fd = fopen("/proc/partitions", "r"))) { + condlog(0, "Cannot open /proc/partitions"); + return 1; + } + + while (!feof(fd)) { + int r = fscanf(fd,"%u %u %*d %s",&tmpmaj, &tmpmin, dev); + if (!r) { + r = fscanf(fd,"%*s\n"); + continue; + } + if (r != 3) + continue; + + if ((major == tmpmaj) && (minor == tmpmin)) { + if (safe_sprintf(block_path, "/sys/block/%s", dev)) { + condlog(0, "device name %s is too long", dev); + fclose(fd); + return 1; + } + break; + } + } + fclose(fd); + + if (strncmp(block_path,"/sys/block", 10)) { + condlog(3, "No device found for %u:%u", major, minor); + return 1; + } + + if (stat(block_path, &statbuf) < 0) { + condlog(0, "No sysfs entry for %s", block_path); + return 1; + } + + if (S_ISDIR(statbuf.st_mode) == 0) { + condlog(0, "sysfs entry %s is not a directory", block_path); + return 1; + } + basenamecpy((const char *)block_path, devname, devname_len); + return 0; +} + +/* This function returns a pointer inside of the supplied pathname string. + * If is_path_device is true, it may also modify the supplied string */ +char *convert_dev(char *name, int is_path_device) +{ + char *ptr; + + if (!name) + return NULL; + if (is_path_device) { + ptr = strstr(name, "cciss/"); + if (ptr) { + ptr += 5; + *ptr = '!'; + } + } + if (!strncmp(name, "/dev/", 5) && strlen(name) > 5) + ptr = name + 5; + else + ptr = name; + return ptr; +} + +dev_t parse_devt(const char *dev_t) +{ + int maj, min; + + if (sscanf(dev_t,"%d:%d", &maj, &min) != 2) + return 0; + + return makedev(maj, min); +} + +void +setup_thread_attr(pthread_attr_t *attr, size_t stacksize, int detached) +{ + int ret; + + ret = pthread_attr_init(attr); + assert(ret == 0); + if (stacksize < PTHREAD_STACK_MIN) + stacksize = PTHREAD_STACK_MIN; + ret = pthread_attr_setstacksize(attr, stacksize); + assert(ret == 0); + if (detached) { + ret = pthread_attr_setdetachstate(attr, + PTHREAD_CREATE_DETACHED); + assert(ret == 0); + } +} + +int systemd_service_enabled_in(const char *dev, const char *prefix) +{ + char path[PATH_SIZE], file[PATH_MAX], service[PATH_SIZE]; + DIR *dirfd; + struct dirent *d; + int found = 0; + + snprintf(service, PATH_SIZE, "multipathd.service"); + snprintf(path, PATH_SIZE, "%s/systemd/system", prefix); + condlog(3, "%s: checking for %s in %s", dev, service, path); + + dirfd = opendir(path); + if (dirfd == NULL) + return 0; + + while ((d = readdir(dirfd)) != NULL) { + char *p; + struct stat stbuf; + + if ((strcmp(d->d_name,".") == 0) || + (strcmp(d->d_name,"..") == 0)) + continue; + + if (strlen(d->d_name) < 6) + continue; + + p = d->d_name + strlen(d->d_name) - 6; + if (strcmp(p, ".wants")) + continue; + snprintf(file, sizeof(file), "%s/%s/%s", + path, d->d_name, service); + if (stat(file, &stbuf) == 0) { + condlog(3, "%s: found %s", dev, file); + found++; + break; + } + } + closedir(dirfd); + + return found; +} + +int systemd_service_enabled(const char *dev) +{ + int found = 0; + + found = systemd_service_enabled_in(dev, "/etc"); + if (!found) + found = systemd_service_enabled_in(dev, "/usr/lib"); + if (!found) + found = systemd_service_enabled_in(dev, "/lib"); + if (!found) + found = systemd_service_enabled_in(dev, "/run"); + return found; +} + +static int _linux_version_code; +static pthread_once_t _lvc_initialized = PTHREAD_ONCE_INIT; + +/* Returns current kernel version encoded as major*65536 + minor*256 + patch, + * so, for example, to check if the kernel is greater than 2.2.11: + * + * if (get_linux_version_code() > KERNEL_VERSION(2,2,11)) { } + * + * Copyright (C) 1999-2004 by Erik Andersen + * Code copied from busybox (GPLv2 or later) + */ +static void +_set_linux_version_code(void) +{ + struct utsname name; + char *t; + int i, r; + + uname(&name); /* never fails */ + t = name.release; + r = 0; + for (i = 0; i < 3; i++) { + t = strtok(t, "."); + r = r * 256 + (t ? atoi(t) : 0); + t = NULL; + } + _linux_version_code = r; +} + +int get_linux_version_code(void) +{ + pthread_once(&_lvc_initialized, _set_linux_version_code); + return _linux_version_code; +} + +int parse_prkey(char *ptr, uint64_t *prkey) +{ + if (!ptr) + return 1; + if (*ptr == '0') + ptr++; + if (*ptr == 'x' || *ptr == 'X') + ptr++; + if (*ptr == '\0' || strlen(ptr) > 16) + return 1; + if (strlen(ptr) != strspn(ptr, "0123456789aAbBcCdDeEfF")) + return 1; + if (sscanf(ptr, "%" SCNx64 "", prkey) != 1) + return 1; + return 0; +} + +int parse_prkey_flags(char *ptr, uint64_t *prkey, uint8_t *flags) +{ + char *flagstr; + + flagstr = strchr(ptr, ':'); + *flags = 0; + if (flagstr) { + *flagstr++ = '\0'; + if (strlen(flagstr) == 5 && strcmp(flagstr, "aptpl") == 0) + *flags = MPATH_F_APTPL_MASK; + } + return parse_prkey(ptr, prkey); +} + +int safe_write(int fd, const void *buf, size_t count) +{ + while (count > 0) { + ssize_t r = write(fd, buf, count); + if (r < 0) { + if (errno == EINTR) + continue; + return -errno; + } + count -= r; + buf = (const char *)buf + r; + } + return 0; +} + +void set_max_fds(rlim_t max_fds) +{ + struct rlimit fd_limit; + + if (!max_fds) + return; + + if (getrlimit(RLIMIT_NOFILE, &fd_limit) < 0) { + condlog(0, "can't get open fds limit: %s", + strerror(errno)); + fd_limit.rlim_cur = 0; + fd_limit.rlim_max = 0; + } + if (fd_limit.rlim_cur < max_fds) { + fd_limit.rlim_cur = max_fds; + if (fd_limit.rlim_max < max_fds) + fd_limit.rlim_max = max_fds; + if (setrlimit(RLIMIT_NOFILE, &fd_limit) < 0) { + condlog(0, "can't set open fds limit to " + "%lu/%lu : %s", + fd_limit.rlim_cur, fd_limit.rlim_max, + strerror(errno)); + } else { + condlog(3, "set open fds limit to %lu/%lu", + fd_limit.rlim_cur, fd_limit.rlim_max); + } + } +} + +void free_scandir_result(struct scandir_result *res) +{ + int i; + + for (i = 0; i < res->n; i++) + FREE(res->di[i]); + FREE(res->di); +} + +void close_fd(void *arg) +{ + close((long)arg); +} diff --git a/libmultipath/util.h b/libmultipath/util.h new file mode 100644 index 0000000..56bd78c --- /dev/null +++ b/libmultipath/util.h @@ -0,0 +1,69 @@ +#ifndef _UTIL_H +#define _UTIL_H + +#include +/* for rlim_t */ +#include +#include +#include + +size_t strchop(char *); +int basenamecpy (const char *src, char *dst, size_t size); +int filepresent (char * run); +char *get_next_string(char **temp, char *split_char); +int get_word (char * sentence, char ** word); +size_t strlcpy(char *dst, const char *src, size_t size); +size_t strlcat(char *dst, const char *src, size_t size); +int devt2devname (char *, int, char *); +dev_t parse_devt(const char *dev_t); +char *convert_dev(char *dev, int is_path_device); +void setup_thread_attr(pthread_attr_t *attr, size_t stacksize, int detached); +int systemd_service_enabled(const char *dev); +int get_linux_version_code(void); +int parse_prkey(char *ptr, uint64_t *prkey); +int parse_prkey_flags(char *ptr, uint64_t *prkey, uint8_t *flags); +int safe_write(int fd, const void *buf, size_t count); +void set_max_fds(rlim_t max_fds); + +#define KERNEL_VERSION(maj, min, ptc) ((((maj) * 256) + (min)) * 256 + (ptc)) +#define ARRAY_SIZE(x) (sizeof(x)/sizeof((x)[0])) + +#define safe_sprintf(var, format, args...) \ + safe_snprintf(var, sizeof(var), format, ##args) + +#define safe_snprintf(var, size, format, args...) \ + ({ \ + size_t __size = size; \ + int __ret; \ + \ + __ret = snprintf(var, __size, format, ##args); \ + __ret < 0 || (size_t)__ret >= __size; \ + }) + +#define pthread_cleanup_push_cast(f, arg) \ + pthread_cleanup_push(((void (*)(void *))&f), (arg)) + +void close_fd(void *arg); + +struct scandir_result { + struct dirent **di; + int n; +}; +void free_scandir_result(struct scandir_result *); + +static inline bool is_bit_set_in_array(unsigned int bit, const uint64_t *arr) +{ + return arr[bit / 64] & (1ULL << (bit % 64)) ? 1 : 0; +} + +static inline void set_bit_in_array(unsigned int bit, uint64_t *arr) +{ + arr[bit / 64] |= (1ULL << (bit % 64)); +} + +static inline void clear_bit_in_array(unsigned int bit, uint64_t *arr) +{ + arr[bit / 64] &= ~(1ULL << (bit % 64)); +} + +#endif /* _UTIL_H */ diff --git a/libmultipath/uxsock.c b/libmultipath/uxsock.c new file mode 100644 index 0000000..6adeedf --- /dev/null +++ b/libmultipath/uxsock.c @@ -0,0 +1,136 @@ +/* + * Original author : tridge@samba.org, January 2002 + * + * Copyright (c) 2005 Christophe Varoqui + * Copyright (c) 2005 Alasdair Kergon, Redhat + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#ifdef USE_SYSTEMD +#include +#endif +#include "mpath_cmd.h" + +#include "memory.h" +#include "uxsock.h" +#include "debug.h" + +/* + * Code is similar with mpath_recv_reply() with data size limitation + * and debug-able malloc. + * When limit == 0, it means no limit on data size, used for socket client + * to receiving data from multipathd. + */ +static int _recv_packet(int fd, char **buf, unsigned int timeout, + ssize_t limit); + +/* + * create a unix domain socket and start listening on it + * return a file descriptor open on the socket + */ +int ux_socket_listen(const char *name) +{ + int fd; + size_t len; +#ifdef USE_SYSTEMD + int num; +#endif + struct sockaddr_un addr; + +#ifdef USE_SYSTEMD + num = sd_listen_fds(0); + if (num > 1) { + condlog(3, "sd_listen_fds returned %d fds", num); + return -1; + } else if (num == 1) { + fd = SD_LISTEN_FDS_START + 0; + condlog(3, "using fd %d from sd_listen_fds", fd); + return fd; + } +#endif + fd = socket(AF_LOCAL, SOCK_STREAM, 0); + if (fd == -1) { + condlog(3, "Couldn't create ux_socket, error %d", errno); + return -1; + } + + memset(&addr, 0, sizeof(addr)); + addr.sun_family = AF_LOCAL; + addr.sun_path[0] = '\0'; + len = strlen(name) + 1; + if (len >= sizeof(addr.sun_path)) + len = sizeof(addr.sun_path) - 1; + memcpy(&addr.sun_path[1], name, len); + + len += sizeof(sa_family_t); + if (bind(fd, (struct sockaddr *)&addr, len) == -1) { + condlog(3, "Couldn't bind to ux_socket, error %d", errno); + close(fd); + return -1; + } + + if (listen(fd, 10) == -1) { + condlog(3, "Couldn't listen to ux_socket, error %d", errno); + close(fd); + return -1; + } + return fd; +} + +/* + * send a packet in length prefix format + */ +int send_packet(int fd, const char *buf) +{ + if (mpath_send_cmd(fd, buf) < 0) + return -errno; + return 0; +} + +static int _recv_packet(int fd, char **buf, unsigned int timeout, ssize_t limit) +{ + int err = 0; + ssize_t len = 0; + + *buf = NULL; + len = mpath_recv_reply_len(fd, timeout); + if (len == 0) + return len; + if (len < 0) + return -errno; + if ((limit > 0) && (len > limit)) + return -EINVAL; + (*buf) = MALLOC(len); + if (!*buf) + return -ENOMEM; + err = mpath_recv_reply_data(fd, *buf, len, timeout); + if (err != 0) { + FREE(*buf); + (*buf) = NULL; + return -errno; + } + return err; +} + +/* + * receive a packet in length prefix format + */ +int recv_packet(int fd, char **buf, unsigned int timeout) +{ + return _recv_packet(fd, buf, timeout, 0 /* no limit */); +} + +int recv_packet_from_client(int fd, char **buf, unsigned int timeout) +{ + return _recv_packet(fd, buf, timeout, _MAX_CMD_LEN); +} diff --git a/libmultipath/uxsock.h b/libmultipath/uxsock.h new file mode 100644 index 0000000..8e7401d --- /dev/null +++ b/libmultipath/uxsock.h @@ -0,0 +1,13 @@ +/* some prototypes */ +int ux_socket_listen(const char *name); +int send_packet(int fd, const char *buf); +int recv_packet(int fd, char **buf, unsigned int timeout); + +#define _MAX_CMD_LEN 512 + +/* + * Used for receiving socket command from untrusted socket client where data + * size is restricted to 512(_MAX_CMD_LEN) at most. + * Return -EINVAL if data length requested by client exceeded the _MAX_CMD_LEN. + */ +int recv_packet_from_client(int fd, char **buf, unsigned int timeout); diff --git a/libmultipath/vector.c b/libmultipath/vector.c new file mode 100644 index 0000000..501cf4c --- /dev/null +++ b/libmultipath/vector.c @@ -0,0 +1,210 @@ +/* + * Part: Vector structure manipulation. + * + * Version: $Id: vector.c,v 1.0.3 2003/05/11 02:28:03 acassen Exp $ + * + * Author: Alexandre Cassen, + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * See the GNU General Public License for more details. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + * Copyright (c) 2002, 2003, 2004 Alexandre Cassen + * Copyright (c) 2005 Christophe Varoqui + */ + +#include "memory.h" +#include +#include "vector.h" + +/* + * Initialize vector struct. + * allocated 'size' slot elements then return vector. + */ +vector +vector_alloc(void) +{ + vector v = (vector) MALLOC(sizeof (struct _vector)); + return v; +} + +/* allocated one slot */ +void * +vector_alloc_slot(vector v) +{ + void *new_slot = NULL; + + if (!v) + return NULL; + + v->allocated += VECTOR_DEFAULT_SIZE; + if (v->slot) + new_slot = REALLOC(v->slot, sizeof (void *) * v->allocated); + else + new_slot = (void *) MALLOC(sizeof (void *) * v->allocated); + + if (!new_slot) + v->allocated -= VECTOR_DEFAULT_SIZE; + else + v->slot = new_slot; + + return v->slot; +} + +int +vector_move_up(vector v, int src, int dest) +{ + void *value; + int i; + if (dest == src) + return 0; + if (dest > src || src >= v->allocated) + return -1; + value = v->slot[src]; + for (i = src - 1; i >= dest; i--) + v->slot[i + 1] = v->slot[i]; + v->slot[dest] = value; + return 0; +} + +void * +vector_insert_slot(vector v, int slot, void *value) +{ + int i; + + if (!vector_alloc_slot(v)) + return NULL; + + for (i = VECTOR_SIZE(v) - 2; i >= slot; i--) + v->slot[i + 1] = v->slot[i]; + + v->slot[slot] = value; + + return v->slot[slot]; +} + +int +find_slot(vector v, void * addr) +{ + int i; + + if (!v) + return -1; + + for (i = 0; i < VECTOR_SIZE(v); i++) + if (v->slot[i] == addr) + return i; + + return -1; +} + +void +vector_del_slot(vector v, int slot) +{ + int i; + + if (!v || !v->allocated || slot < 0 || slot > VECTOR_SIZE(v)) + return; + + for (i = slot + 1; i < VECTOR_SIZE(v); i++) + v->slot[i-1] = v->slot[i]; + + v->allocated -= VECTOR_DEFAULT_SIZE; + + if (v->allocated <= 0) { + FREE(v->slot); + v->slot = NULL; + v->allocated = 0; + } else { + void *new_slot; + + new_slot = REALLOC(v->slot, sizeof (void *) * v->allocated); + if (!new_slot) + v->allocated += VECTOR_DEFAULT_SIZE; + else + v->slot = new_slot; + } +} + +void +vector_repack(vector v) +{ + int i; + + if (!v || !v->allocated) + return; + + for (i = 0; i < VECTOR_SIZE(v); i++) + if (i > 0 && v->slot[i] == NULL) + vector_del_slot(v, i--); +} + +vector +vector_reset(vector v) +{ + if (!v) + return NULL; + + if (v->slot) + FREE(v->slot); + + v->allocated = 0; + v->slot = NULL; + return v; +} + +/* Free memory vector allocation */ +void +vector_free(vector v) +{ + if (!vector_reset(v)) + return; + FREE(v); +} + +void +free_strvec(vector strvec) +{ + int i; + char *str; + + if (!strvec) + return; + + vector_foreach_slot (strvec, str, i) + if (str) + FREE(str); + + vector_free(strvec); +} + +/* Set a vector slot value */ +void +vector_set_slot(vector v, void *value) +{ + unsigned int i; + + if (!v) + return; + + i = VECTOR_SIZE(v) - 1; + v->slot[i] = value; +} + +int vector_find_or_add_slot(vector v, void *value) +{ + int n = find_slot(v, value); + + if (n >= 0) + return n; + if (vector_alloc_slot(v) == NULL) + return -1; + vector_set_slot(v, value); + return VECTOR_SIZE(v) - 1; +} diff --git a/libmultipath/vector.h b/libmultipath/vector.h new file mode 100644 index 0000000..e16ec46 --- /dev/null +++ b/libmultipath/vector.h @@ -0,0 +1,90 @@ +/* + * Soft: Keepalived is a failover program for the LVS project + * . It monitor & manipulate + * a loadbalanced server pool using multi-layer checks. + * + * Part: vector.c include file. + * + * Version: $Id: vector.h,v 1.0.3 2003/05/11 02:28:03 acassen Exp $ + * + * Author: Alexandre Cassen, + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * See the GNU General Public License for more details. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#ifndef _VECTOR_H +#define _VECTOR_H + +/* vector definition */ +struct _vector { + int allocated; + void **slot; +}; +typedef struct _vector *vector; + +#define VECTOR_DEFAULT_SIZE 1 +#define VECTOR_SIZE(V) ((V) ? ((V)->allocated) / VECTOR_DEFAULT_SIZE : 0) +#define VECTOR_SLOT(V,E) (((V) && (E) < VECTOR_SIZE(V)) ? (V)->slot[(E)] : NULL) +#define VECTOR_LAST_SLOT(V) (((V) && VECTOR_SIZE(V) > 0) ? (V)->slot[(VECTOR_SIZE(V) - 1)] : NULL) + +#define vector_foreach_slot(v,p,i) \ + for (i = 0; (v) && (int)i < VECTOR_SIZE(v) && ((p) = (v)->slot[i]); i++) +#define vector_foreach_slot_after(v,p,i) \ + for (; (v) && (int)i < VECTOR_SIZE(v) && ((p) = (v)->slot[i]); i++) +#define vector_foreach_slot_backwards(v,p,i) \ + for (i = VECTOR_SIZE(v) - 1; (int)i >= 0 && ((p) = (v)->slot[i]); i--) + +#define identity(x) (x) +/* + * Given a vector vec with elements of given type, + * return a newly allocated vector with elements conv(e) for each element + * e in vec. "conv" may be a macro or a function. + * Use "identity" for a simple copy. + */ +#define vector_convert(new, vec, type, conv) \ + ({ \ + const struct _vector *__v = (vec); \ + vector __t = (new); \ + type *__j; \ + int __i; \ + \ + if (__t == NULL) \ + __t = vector_alloc(); \ + if (__t != NULL) { \ + vector_foreach_slot(__v, __j, __i) { \ + if (vector_alloc_slot(__t) == NULL) { \ + vector_free(__t); \ + __t = NULL; \ + break; \ + } \ + vector_set_slot(__t, conv(__j)); \ + } \ + } \ + __t; \ + }) + +/* Prototypes */ +extern vector vector_alloc(void); +extern void *vector_alloc_slot(vector v); +vector vector_reset(vector v); +extern void vector_free(vector v); +#define vector_free_const(x) vector_free((vector)(long)(x)) +extern void free_strvec(vector strvec); +extern void vector_set_slot(vector v, void *value); +extern void vector_del_slot(vector v, int slot); +extern void *vector_insert_slot(vector v, int slot, void *value); +int find_slot(vector v, void * addr); +int vector_find_or_add_slot(vector v, void *value); +extern void vector_repack(vector v); +extern void vector_dump(vector v); +extern void dump_strvec(vector strvec); +extern int vector_move_up(vector v, int src, int dest); +#endif diff --git a/libmultipath/version.h b/libmultipath/version.h new file mode 100644 index 0000000..7ddb4e8 --- /dev/null +++ b/libmultipath/version.h @@ -0,0 +1,37 @@ +/* + * Soft: multipath device mapper target autoconfig + * + * Version: $Id: main.h,v 0.0.1 2003/09/18 15:13:38 cvaroqui Exp $ + * + * Author: Christophe Varoqui + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * See the GNU General Public License for more details. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + * Copyright (c) 2006 Christophe Varoqui + */ +#ifndef _VERSION_H +#define _VERSION_H + +#define VERSION_CODE 0x000804 +#define DATE_CODE 0x050414 + +#define PROG "multipath-tools" + +#define MULTIPATH_VERSION(version) \ + (version >> 16) & 0xFF, \ + (version >> 8) & 0xFF, \ + version & 0xFF + +#define VERSION_STRING PROG" v%d.%d.%d (%.2d/%.2d, 20%.2d)\n", \ + MULTIPATH_VERSION(VERSION_CODE), \ + MULTIPATH_VERSION(DATE_CODE) + +#endif /* _VERSION_H */ diff --git a/libmultipath/wwids.c b/libmultipath/wwids.c new file mode 100644 index 0000000..28a2150 --- /dev/null +++ b/libmultipath/wwids.c @@ -0,0 +1,456 @@ +#include +#include +#include +#include +#include +#include +#include +#include + +#include "util.h" +#include "checkers.h" +#include "vector.h" +#include "structs.h" +#include "debug.h" +#include "uxsock.h" +#include "file.h" +#include "wwids.h" +#include "defaults.h" +#include "config.h" +#include "devmapper.h" + +/* + * Copyright (c) 2010 Benjamin Marzinski, Redhat + */ + +static int +lookup_wwid(FILE *f, char *wwid) { + int c; + char buf[LINE_MAX]; + int count; + + while ((c = fgetc(f)) != EOF){ + if (c != '/') { + if (fgets(buf, LINE_MAX, f) == NULL) + return 0; + else + continue; + } + count = 0; + while ((c = fgetc(f)) != '/') { + if (c == EOF) + return 0; + if (count >= WWID_SIZE - 1) + goto next; + if (wwid[count] == '\0') + goto next; + if (c != wwid[count++]) + goto next; + } + if (wwid[count] == '\0') + return 1; +next: + if (fgets(buf, LINE_MAX, f) == NULL) + return 0; + } + return 0; +} + +static int +write_out_wwid(int fd, char *wwid) { + int ret; + off_t offset; + char buf[WWID_SIZE + 3]; + + ret = snprintf(buf, WWID_SIZE + 3, "/%s/\n", wwid); + if (ret >= (WWID_SIZE + 3) || ret < 0){ + condlog(0, "can't format wwid for writing (%d) : %s", + ret, strerror(errno)); + return -1; + } + offset = lseek(fd, 0, SEEK_END); + if (offset < 0) { + condlog(0, "can't seek to the end of wwids file : %s", + strerror(errno)); + return -1; + } + if (write(fd, buf, strlen(buf)) != (ssize_t)strlen(buf)) { + condlog(0, "cannot write wwid to wwids file : %s", + strerror(errno)); + if (ftruncate(fd, offset)) + condlog(0, "cannot truncate failed wwid write : %s", + strerror(errno)); + return -1; + } + return 1; +} + +int +replace_wwids(vector mp) +{ + int i, can_write; + long fd; + struct multipath * mpp; + size_t len; + int ret = -1; + struct config *conf; + + conf = get_multipath_config(); + pthread_cleanup_push(put_multipath_config, conf); + fd = open_file(conf->wwids_file, &can_write, WWIDS_FILE_HEADER); + pthread_cleanup_pop(1); + if (fd < 0) + goto out; + + pthread_cleanup_push(close_fd, (void*)fd); + if (!can_write) { + condlog(0, "cannot replace wwids. wwids file is read-only"); + goto out_file; + } + if (ftruncate(fd, 0) < 0) { + condlog(0, "cannot truncate wwids file : %s", strerror(errno)); + goto out_file; + } + if (lseek(fd, 0, SEEK_SET) < 0) { + condlog(0, "cannot seek to the start of the file : %s", + strerror(errno)); + goto out_file; + } + len = strlen(WWIDS_FILE_HEADER); + if (write(fd, WWIDS_FILE_HEADER, len) != (ssize_t)len) { + condlog(0, "Can't write wwid file header : %s", + strerror(errno)); + /* cleanup partially written header */ + if (ftruncate(fd, 0) < 0) + condlog(0, "Cannot truncate header : %s", + strerror(errno)); + goto out_file; + } + if (!mp || !mp->allocated) { + ret = 0; + goto out_file; + } + vector_foreach_slot(mp, mpp, i) { + if (write_out_wwid(fd, mpp->wwid) < 0) + goto out_file; + } + ret = 0; +out_file: + pthread_cleanup_pop(1); +out: + return ret; +} + +int +do_remove_wwid(int fd, char *str) { + char buf[4097]; + char *ptr; + off_t start = 0; + int bytes; + + while (1) { + if (lseek(fd, start, SEEK_SET) < 0) { + condlog(0, "wwid file read lseek failed : %s", + strerror(errno)); + return -1; + } + bytes = read(fd, buf, 4096); + if (bytes < 0) { + if (errno == EINTR || errno == EAGAIN) + continue; + condlog(0, "failed to read from wwids file : %s", + strerror(errno)); + return -1; + } + if (!bytes) /* didn't find wwid to remove */ + return 1; + buf[bytes] = '\0'; + ptr = strstr(buf, str); + if (ptr != NULL) { + condlog(3, "found '%s'", str); + if (lseek(fd, start + (ptr - buf), SEEK_SET) < 0) { + condlog(0, "write lseek failed : %s", + strerror(errno)); + return -1; + } + while (1) { + if (write(fd, "#", 1) < 0) { + if (errno == EINTR || errno == EAGAIN) + continue; + condlog(0, "failed to write to wwids file : %s", strerror(errno)); + return -1; + } + return 0; + } + } + ptr = strrchr(buf, '\n'); + if (ptr == NULL) { /* shouldn't happen, assume it is EOF */ + condlog(4, "couldn't find newline, assuming end of file"); + return 1; + } + start = start + (ptr - buf) + 1; + } +} + + +int +remove_wwid(char *wwid) { + long fd; + int len, can_write; + char *str; + int ret = -1; + struct config *conf; + + len = strlen(wwid) + 4; /* two slashes the newline and a zero byte */ + str = malloc(len); + if (str == NULL) { + condlog(0, "can't allocate memory to remove wwid : %s", + strerror(errno)); + return -1; + } + pthread_cleanup_push(free, str); + if (snprintf(str, len, "/%s/\n", wwid) >= len) { + condlog(0, "string overflow trying to remove wwid"); + ret = -1; + goto out; + } + condlog(3, "removing line '%s' from wwids file", str); + conf = get_multipath_config(); + pthread_cleanup_push(put_multipath_config, conf); + fd = open_file(conf->wwids_file, &can_write, WWIDS_FILE_HEADER); + pthread_cleanup_pop(1); + + if (fd < 0) { + ret = -1; + goto out; + } + + pthread_cleanup_push(close_fd, (void*)fd); + if (!can_write) { + ret = -1; + condlog(0, "cannot remove wwid. wwids file is read-only"); + } else + ret = do_remove_wwid(fd, str); + pthread_cleanup_pop(1); +out: + /* free(str) */ + pthread_cleanup_pop(1); + return ret; +} + +int +check_wwids_file(char *wwid, int write_wwid) +{ + int fd, can_write, found, ret; + FILE *f; + struct config *conf; + + conf = get_multipath_config(); + pthread_cleanup_push(put_multipath_config, conf); + fd = open_file(conf->wwids_file, &can_write, WWIDS_FILE_HEADER); + pthread_cleanup_pop(1); + if (fd < 0) + return -1; + + f = fdopen(fd, "r"); + if (!f) { + condlog(0,"can't fdopen wwids file : %s", strerror(errno)); + close(fd); + return -1; + } + found = lookup_wwid(f, wwid); + if (found) { + ret = 0; + goto out; + } + if (!write_wwid) { + ret = -1; + goto out; + } + if (!can_write) { + condlog(0, "wwids file is read-only. Can't write wwid"); + ret = -1; + goto out; + } + + if (fflush(f) != 0) { + condlog(0, "cannot fflush wwids file stream : %s", + strerror(errno)); + ret = -1; + goto out; + } + + ret = write_out_wwid(fd, wwid); +out: + fclose(f); + return ret; +} + +int +should_multipath(struct path *pp1, vector pathvec, vector mpvec) +{ + int i, ignore_new_devs, find_multipaths; + struct path *pp2; + struct config *conf; + + conf = get_multipath_config(); + ignore_new_devs = ignore_new_devs_on(conf); + find_multipaths = find_multipaths_on(conf); + put_multipath_config(conf); + if (!find_multipaths && !ignore_new_devs) + return 1; + + condlog(4, "checking if %s should be multipathed", pp1->dev); + if (!ignore_new_devs) { + char tmp_wwid[WWID_SIZE]; + struct multipath *mp = find_mp_by_wwid(mpvec, pp1->wwid); + + if (mp != NULL && + dm_get_uuid(mp->alias, tmp_wwid, WWID_SIZE) == 0 && + !strncmp(tmp_wwid, pp1->wwid, WWID_SIZE)) { + condlog(3, "wwid %s is already multipathed, keeping it", + pp1->wwid); + return 1; + } + vector_foreach_slot(pathvec, pp2, i) { + if (pp1->dev == pp2->dev) + continue; + if (strncmp(pp1->wwid, pp2->wwid, WWID_SIZE) == 0) { + condlog(3, "found multiple paths with wwid %s, " + "multipathing %s", pp1->wwid, pp1->dev); + return 1; + } + } + } + if (check_wwids_file(pp1->wwid, 0) < 0) { + condlog(3, "wwid %s not in wwids file, skipping %s", + pp1->wwid, pp1->dev); + return 0; + } + condlog(3, "found wwid %s in wwids file, multipathing %s", pp1->wwid, + pp1->dev); + return 1; +} + +int +remember_wwid(char *wwid) +{ + int ret = check_wwids_file(wwid, 1); + if (ret < 0){ + condlog(3, "failed writing wwid %s to wwids file", wwid); + return -1; + } + if (ret == 1) + condlog(3, "wrote wwid %s to wwids file", wwid); + else + condlog(4, "wwid %s already in wwids file", wwid); + return ret; +} + +static const char shm_dir[] = MULTIPATH_SHM_BASE "failed_wwids"; +static const char shm_lock[] = ".lock"; +static const char shm_header[] = "multipath shm lock file, don't edit"; +static char _shm_lock_path[sizeof(shm_dir)+sizeof(shm_lock)]; +static const char *shm_lock_path = &_shm_lock_path[0]; + +static void init_shm_paths(void) +{ + snprintf(_shm_lock_path, sizeof(_shm_lock_path), + "%s/%s", shm_dir, shm_lock); +} + +static pthread_once_t shm_path_once = PTHREAD_ONCE_INIT; + +static int multipath_shm_open(bool rw) +{ + int fd; + int can_write; + + pthread_once(&shm_path_once, init_shm_paths); + fd = open_file(shm_lock_path, &can_write, shm_header); + + if (fd >= 0 && rw && !can_write) { + close(fd); + condlog(1, "failed to open %s for writing", shm_dir); + return -1; + } + + return fd; +} + +static void multipath_shm_close(void *arg) +{ + long fd = (long)arg; + + close(fd); + unlink(shm_lock_path); +} + +static int _failed_wwid_op(const char *wwid, bool rw, + int (*func)(const char *), const char *msg) +{ + char path[PATH_MAX]; + long lockfd; + int r = -1; + + if (safe_sprintf(path, "%s/%s", shm_dir, wwid)) { + condlog(1, "%s: path name overflow", __func__); + return -1; + } + + lockfd = multipath_shm_open(rw); + if (lockfd == -1) + return -1; + + pthread_cleanup_push(multipath_shm_close, (void *)lockfd); + r = func(path); + pthread_cleanup_pop(1); + + if (r == WWID_FAILED_ERROR) + condlog(1, "%s: %s: %s", msg, wwid, strerror(errno)); + else if (r == WWID_FAILED_CHANGED) + condlog(3, "%s: %s", msg, wwid); + else if (!rw) + condlog(4, "%s: %s is %s", msg, wwid, + r == WWID_IS_FAILED ? "failed" : "good"); + + return r; +} + +static int _is_failed(const char *path) +{ + struct stat st; + + if (lstat(path, &st) == 0) + return WWID_IS_FAILED; + else if (errno == ENOENT) + return WWID_IS_NOT_FAILED; + else + return WWID_FAILED_ERROR; +} + +static int _mark_failed(const char *path) +{ + /* Called from _failed_wwid_op: we know that shm_lock_path exists */ + if (_is_failed(path) == WWID_IS_FAILED) + return WWID_FAILED_UNCHANGED; + return (link(shm_lock_path, path) == 0 ? WWID_FAILED_CHANGED : + WWID_FAILED_ERROR); +} + +static int _unmark_failed(const char *path) +{ + if (_is_failed(path) == WWID_IS_NOT_FAILED) + return WWID_FAILED_UNCHANGED; + return (unlink(path) == 0 ? WWID_FAILED_CHANGED : WWID_FAILED_ERROR); +} + +#define declare_failed_wwid_op(op, rw) \ +int op ## _wwid(const char *wwid) \ +{ \ + return _failed_wwid_op(wwid, (rw), _ ## op, #op); \ +} + +declare_failed_wwid_op(is_failed, false) +declare_failed_wwid_op(mark_failed, true) +declare_failed_wwid_op(unmark_failed, true) diff --git a/libmultipath/wwids.h b/libmultipath/wwids.h new file mode 100644 index 0000000..0c6ee54 --- /dev/null +++ b/libmultipath/wwids.h @@ -0,0 +1,32 @@ +/* + * Copyright (c) 2010 Benjamin Marzinski, Redhat + */ + +#ifndef _WWIDS_H +#define _WWIDS_H + +#define WWIDS_FILE_HEADER \ +"# Multipath wwids, Version : 1.0\n" \ +"# NOTE: This file is automatically maintained by multipath and multipathd.\n" \ +"# You should not need to edit this file in normal circumstances.\n" \ +"#\n" \ +"# Valid WWIDs:\n" + +int should_multipath(struct path *pp, vector pathvec, vector mpvec); +int remember_wwid(char *wwid); +int check_wwids_file(char *wwid, int write_wwid); +int remove_wwid(char *wwid); +int replace_wwids(vector mp); + +enum { + WWID_IS_NOT_FAILED = 0, + WWID_IS_FAILED, + WWID_FAILED_UNCHANGED, + WWID_FAILED_CHANGED, + WWID_FAILED_ERROR = -1, +}; + +int is_failed_wwid(const char *wwid); +int mark_failed_wwid(const char *wwid); +int unmark_failed_wwid(const char *wwid); +#endif /* _WWIDS_H */ diff --git a/mpathpersist/Makefile b/mpathpersist/Makefile new file mode 100644 index 0000000..5126801 --- /dev/null +++ b/mpathpersist/Makefile @@ -0,0 +1,35 @@ +include ../Makefile.inc + +CFLAGS += $(BIN_CFLAGS) -I$(multipathdir) -I$(mpathpersistdir) +LDFLAGS += $(BIN_LDFLAGS) + +LIBDEPS += -L$(mpathpersistdir) -lmpathpersist -L$(multipathdir) -lmultipath \ + -L$(mpathcmddir) -lmpathcmd -lpthread -ldevmapper -ludev + +EXEC = mpathpersist + +OBJS = main.o + +all: $(EXEC) + +$(EXEC): $(OBJS) + $(CC) $(OBJS) -o $(EXEC) $(LDFLAGS) $(CFLAGS) $(LIBDEPS) + $(GZIP) $(EXEC).8 > $(EXEC).8.gz + +install: + $(INSTALL_PROGRAM) -d $(DESTDIR)$(bindir) + $(INSTALL_PROGRAM) -m 755 $(EXEC) $(DESTDIR)$(bindir)/ + $(INSTALL_PROGRAM) -d $(DESTDIR)$(man8dir) + $(INSTALL_PROGRAM) -m 644 $(EXEC).8.gz $(DESTDIR)$(man8dir) + +clean: dep_clean + $(RM) core *.o $(EXEC) *.gz + +include $(wildcard $(OBJS:.o=.d)) + +uninstall: + $(RM) $(DESTDIR)$(bindir)/$(EXEC) + $(RM) $(DESTDIR)$(man8dir)/$(EXEC).8.gz + +dep_clean: + $(RM) $(OBJS:.o=.d) diff --git a/mpathpersist/main.c b/mpathpersist/main.c new file mode 100644 index 0000000..28bfe41 --- /dev/null +++ b/mpathpersist/main.c @@ -0,0 +1,979 @@ +#include +#include +#include +#include +#include +#include "checkers.h" +#include "vector.h" +#include "config.h" +#include "structs.h" +#include +#include +#include "mpath_persist.h" +#include "main.h" +#include "debug.h" +#include +#include +#include +#include +#include "version.h" + +static const char * pr_type_strs[] = { + "obsolete [0]", + "Write Exclusive", + "obsolete [2]", + "Exclusive Access", + "obsolete [4]", + "Write Exclusive, registrants only", + "Exclusive Access, registrants only", + "Write Exclusive, all registrants", + "Exclusive Access, all registrants", + "obsolete [9]", "obsolete [0xa]", "obsolete [0xb]", "obsolete [0xc]", + "obsolete [0xd]", "obsolete [0xe]", "obsolete [0xf]", +}; + +int get_transportids_length(unsigned char * transportid_arr, int max_transportid, int num_transportids); +void mpath_print_buf_readcap(struct prin_resp *pr_buff); +void mpath_print_buf_readfullstat(struct prin_resp *pr_buff); +void mpath_print_buf_readresv(struct prin_resp *pr_buff); +void mpath_print_buf_readkeys(struct prin_resp *pr_buff); +void dumpHex(const char* str, int len, int no_ascii); +void * mpath_alloc_prin_response(int prin_sa); +void mpath_print_transport_id(struct prin_fulldescr *fdesc); +int construct_transportid(const char * inp, struct transportid transid[], int num_transportids); + +int logsink; +struct config *multipath_conf; + +struct config *get_multipath_config(void) +{ + return multipath_conf; +} + +void put_multipath_config(__attribute__((unused)) void * arg) +{ + /* Noop for now */ +} + +void rcu_register_thread_memb(void) {} + +void rcu_unregister_thread_memb(void) {} + +struct udev *udev; + +static int verbose, loglevel, noisy; + +static int handle_args(int argc, char * argv[], int line); + +static int do_batch_file(const char *batch_fn) +{ + char command[] = "mpathpersist"; + const int ARGV_CHUNK = 2; + const char delims[] = " \t\n"; + size_t len = 0; + char *line = NULL; + ssize_t n; + int nline = 0; + int argl = ARGV_CHUNK; + FILE *fl; + char **argv = calloc(argl, sizeof(*argv)); + int ret = MPATH_PR_SUCCESS; + + if (argv == NULL) + return MPATH_PR_OTHER; + + fl = fopen(batch_fn, "r"); + if (fl == NULL) { + fprintf(stderr, "unable to open %s: %s\n", + batch_fn, strerror(errno)); + free(argv); + return MPATH_PR_SYNTAX_ERROR; + } else { + if (verbose >= 2) + fprintf(stderr, "running batch file %s\n", + batch_fn); + } + + while ((n = getline(&line, &len, fl)) != -1) { + char *_token, *token; + int argc = 0; + int rv; + + nline++; + argv[argc++] = command; + + if (line[n-1] == '\n') + line[n-1] = '\0'; + if (verbose >= 3) + fprintf(stderr, "processing line %d: %s\n", + nline, line); + + for (token = strtok_r(line, delims, &_token); + token != NULL && *token != '#'; + token = strtok_r(NULL, delims, &_token)) { + + if (argc >= argl) { + int argn = argl + ARGV_CHUNK; + char **tmp; + + tmp = realloc(argv, argn * sizeof(*argv)); + if (tmp == NULL) + break; + argv = tmp; + argl = argn; + } + + if (argc == 1 && !strcmp(token, command)) + continue; + + argv[argc++] = token; + } + + if (argc <= 1) + continue; + + if (verbose >= 2) { + int i; + + fprintf(stderr, "## file %s line %d:", batch_fn, nline); + for (i = 0; i < argc; i++) + fprintf(stderr, " %s", argv[i]); + fprintf(stderr, "\n"); + } + + optind = 0; + rv = handle_args(argc, argv, nline); + if (rv != MPATH_PR_SUCCESS) + ret = rv; + } + + fclose(fl); + free(argv); + free(line); + return ret; +} + +static int handle_args(int argc, char * argv[], int nline) +{ + int c; + int fd = -1; + const char *device_name = NULL; + int num_prin_sa = 0; + int num_prout_sa = 0; + int num_prout_param = 0; + int prin_flag = 0; + int prout_flag = 0; + int ret = 0; + int hex = 0; + uint64_t param_sark = 0; + unsigned int prout_type = 0; + int param_alltgpt = 0; + int param_aptpl = 0; + uint64_t param_rk = 0; + unsigned int param_rtp = 0; + int num_transportids = 0; + struct transportid transportids[MPATH_MX_TIDS]; + int prout = 1; + int prin = 1; + int prin_sa = -1; + int prout_sa = -1; + int num_transport =0; + char *batch_fn = NULL; + void *resp = NULL; + struct transportid * tmp; + + memset(transportids, 0, MPATH_MX_TIDS * sizeof(struct transportid)); + + while (1) + { + int option_index = 0; + + c = getopt_long (argc, argv, "v:Cd:hHioYZK:S:PAT:skrGILcRX:l:f:", + long_options, &option_index); + if (c == -1) + break; + + switch (c) + { + case 'f': + if (nline != 0) { + fprintf(stderr, + "ERROR: -f option not allowed in batch file\n"); + ret = MPATH_PR_SYNTAX_ERROR; + goto out; + } + if (batch_fn != NULL) { + fprintf(stderr, + "ERROR: -f option can be used at most once\n"); + ret = MPATH_PR_SYNTAX_ERROR; + goto out; + } + batch_fn = strdup(optarg); + break; + case 'v': + if (nline == 0 && 1 != sscanf (optarg, "%d", &loglevel)) + { + fprintf (stderr, "bad argument to '--verbose'\n"); + ret = MPATH_PR_SYNTAX_ERROR; + goto out; + } + break; + + case 'C': + prout_sa = MPATH_PROUT_CLEAR_SA; + ++num_prout_sa; + break; + + case 'd': + device_name = optarg; + break; + + case 'h': + usage (); + free(batch_fn); + return 0; + + case 'H': + hex=1; + break; + + case 'i': + prin_flag = 1; + break; + + case 'o': + prout_flag = 1; + break; + + case 'Y': + param_alltgpt = 1; + ++num_prout_param; + break; + case 'Z': + param_aptpl = 1; + ++num_prout_param; + break; + case 'K': + if (1 != sscanf (optarg, "%" SCNx64 "", ¶m_rk)) + { + fprintf (stderr, "bad argument to '--param-rk'\n"); + ret = MPATH_PR_SYNTAX_ERROR; + goto out; + } + ++num_prout_param; + break; + + case 'S': + if (1 != sscanf (optarg, "%" SCNx64 "", ¶m_sark)) + { + fprintf (stderr, "bad argument to '--param-sark'\n"); + ret = MPATH_PR_SYNTAX_ERROR; + goto out; + } + ++num_prout_param; + break; + + case 'P': + prout_sa = MPATH_PROUT_PREE_SA; + ++num_prout_sa; + break; + + case 'A': + prout_sa = MPATH_PROUT_PREE_AB_SA; + ++num_prout_sa; + break; + + case 'T': + if (1 != sscanf (optarg, "%x", &prout_type)) + { + fprintf (stderr, "bad argument to '--prout-type'\n"); + ret = MPATH_PR_SYNTAX_ERROR; + goto out; + } + ++num_prout_param; + break; + + case 's': + prin_sa = MPATH_PRIN_RFSTAT_SA; + ++num_prin_sa; + break; + + case 'k': + prin_sa = MPATH_PRIN_RKEY_SA; + ++num_prin_sa; + break; + + case 'r': + prin_sa = MPATH_PRIN_RRES_SA; + ++num_prin_sa; + break; + + case 'G': + prout_sa = MPATH_PROUT_REG_SA; + ++num_prout_sa; + break; + + case 'I': + prout_sa = MPATH_PROUT_REG_IGN_SA; + ++num_prout_sa; + break; + + case 'L': + prout_sa = MPATH_PROUT_REL_SA; + ++num_prout_sa; + break; + + case 'c': + prin_sa = MPATH_PRIN_RCAP_SA; + ++num_prin_sa; + break; + + case 'R': + prout_sa = MPATH_PROUT_RES_SA; + ++num_prout_sa; + break; + + case 'X': + if (0 != construct_transportid(optarg, transportids, num_transport)) { + fprintf(stderr, "bad argument to '--transport-id'\n"); + ret = MPATH_PR_SYNTAX_ERROR; + goto out; + } + + ++num_transport; + break; + + case 'l': + if (1 != sscanf(optarg, "%u", &mpath_mx_alloc_len)) { + fprintf(stderr, "bad argument to '--alloc-length'\n"); + ret = MPATH_PR_SYNTAX_ERROR; + goto out; + } else if (MPATH_MAX_PARAM_LEN < mpath_mx_alloc_len) { + fprintf(stderr, "'--alloc-length' argument exceeds maximum" + " limit(%d)\n", MPATH_MAX_PARAM_LEN); + ret = MPATH_PR_SYNTAX_ERROR; + goto out; + } + break; + + default: + fprintf(stderr, "unrecognised switch " "code 0x%x ??\n", c); + ret = MPATH_PR_SYNTAX_ERROR; + goto out; + } + } + + if (optind < argc) + { + + if (NULL == device_name) + { + device_name = argv[optind]; + ++optind; + } + if (optind < argc) + { + for (; optind < argc; ++optind) + fprintf (stderr, "Unexpected extra argument: %s\n", argv[optind]); + ret = MPATH_PR_SYNTAX_ERROR; + goto out; + } + } + + if (nline == 0) { + /* set verbosity */ + noisy = (loglevel >= 3) ? 1 : hex; + verbose = (loglevel >= 3)? 3: loglevel; + ret = mpath_persistent_reserve_init_vecs(verbose); + if (ret != MPATH_PR_SUCCESS) + goto out; + } + + if ((prout_flag + prin_flag) == 0 && batch_fn == NULL) + { + fprintf (stderr, "choose either '--in' or '--out' \n"); + ret = MPATH_PR_SYNTAX_ERROR; + goto out; + } + if ((prout_flag + prin_flag) > 1) + { + fprintf (stderr, "choose either '--in' or '--out' \n"); + ret = MPATH_PR_SYNTAX_ERROR; + goto out; + } + else if (prout_flag) + { /* syntax check on PROUT arguments */ + prin = 0; + if ((1 != num_prout_sa) || (0 != num_prin_sa)) + { + fprintf (stderr, " For Persistent Reserve Out only one " + "appropriate\n service action must be " + "chosen \n"); + ret = MPATH_PR_SYNTAX_ERROR; + goto out; + } + } + else if (prin_flag) + { /* syntax check on PRIN arguments */ + prout = 0; + if (num_prout_sa > 0) + { + fprintf (stderr, " When a service action for Persistent " + "Reserve Out is chosen the\n" + " '--out' option must be given \n"); + ret = MPATH_PR_SYNTAX_ERROR; + goto out; + } + if (0 == num_prin_sa) + { + fprintf (stderr, + " No service action given for Persistent Reserve IN\n"); + ret = MPATH_PR_SYNTAX_ERROR; + goto out; + } + else if (num_prin_sa > 1) + { + fprintf (stderr, " Too many service actions given; choose " + "one only\n"); + ret = MPATH_PR_SYNTAX_ERROR; + goto out; + } + } + else + { + if (batch_fn == NULL) + ret = MPATH_PR_SYNTAX_ERROR; + goto out; + } + + if ((param_rtp) && (MPATH_PROUT_REG_MOV_SA != prout_sa)) + { + fprintf (stderr, " --relative-target-port" + " only useful with --register-move\n"); + ret = MPATH_PR_SYNTAX_ERROR; + goto out; + } + + if (((MPATH_PROUT_RES_SA == prout_sa) || + (MPATH_PROUT_REL_SA == prout_sa) || + (MPATH_PROUT_PREE_SA == prout_sa) || + (MPATH_PROUT_PREE_AB_SA == prout_sa)) && + (0 == prout_type)) { + fprintf(stderr, "Warning: --prout-type probably needs to be " + "given\n"); + } + if ((verbose > 2) && num_transportids) + { + fprintf (stderr, "number of tranport-ids decoded from " + "command line : %d\n", num_transportids); + } + + if (device_name == NULL) + { + fprintf (stderr, "No device name given \n"); + ret = MPATH_PR_SYNTAX_ERROR; + goto out; + } + + /* open device */ + if ((fd = open (device_name, O_RDONLY)) < 0) + { + fprintf (stderr, "%s: error opening file (rw) fd=%d\n", + device_name, fd); + ret = MPATH_PR_FILE_ERROR; + goto out; + } + + + if (prin) + { + resp = mpath_alloc_prin_response(prin_sa); + if (!resp) + { + fprintf (stderr, "failed to allocate PRIN response buffer\n"); + ret = MPATH_PR_OTHER; + goto out_fd; + } + + ret = __mpath_persistent_reserve_in (fd, prin_sa, resp, noisy); + if (ret != MPATH_PR_SUCCESS ) + { + fprintf (stderr, "Persistent Reserve IN command failed\n"); + free(resp); + goto out_fd; + } + + switch(prin_sa) + { + case MPATH_PRIN_RKEY_SA: + mpath_print_buf_readkeys(resp); + break; + case MPATH_PRIN_RRES_SA: + mpath_print_buf_readresv(resp); + break; + case MPATH_PRIN_RCAP_SA: + mpath_print_buf_readcap(resp); + break; + case MPATH_PRIN_RFSTAT_SA: + mpath_print_buf_readfullstat(resp); + break; + } + free(resp); + } + else if (prout) + { + int j; + struct prout_param_descriptor *paramp; + + paramp= malloc(sizeof(struct prout_param_descriptor) + (sizeof(struct transportid *)*(MPATH_MX_TIDS ))); + + memset(paramp, 0, sizeof(struct prout_param_descriptor) + (sizeof(struct transportid *)*(MPATH_MX_TIDS))); + + for (j = 7; j >= 0; --j) { + paramp->key[j] = (param_rk & 0xff); + param_rk >>= 8; + } + + for (j = 7; j >= 0; --j) { + paramp->sa_key[j] = (param_sark & 0xff); + param_sark >>= 8; + } + + if (param_alltgpt) + paramp->sa_flags |= MPATH_F_ALL_TG_PT_MASK; + if (param_aptpl) + paramp->sa_flags |= MPATH_F_APTPL_MASK; + + if (num_transport) + { + paramp->sa_flags |= MPATH_F_SPEC_I_PT_MASK; + paramp->num_transportid = num_transport; + for (j = 0 ; j < num_transport; j++) + { + paramp->trnptid_list[j] = (struct transportid *)malloc(sizeof(struct transportid)); + memcpy(paramp->trnptid_list[j], &transportids[j],sizeof(struct transportid)); + } + } + + /* PROUT commands other than 'register and move' */ + ret = __mpath_persistent_reserve_out (fd, prout_sa, 0, prout_type, + paramp, noisy); + for (j = 0 ; j < num_transport; j++) + { + tmp = paramp->trnptid_list[j]; + free(tmp); + } + free(paramp); + } + + if (ret != MPATH_PR_SUCCESS) + { + switch(ret) + { + case MPATH_PR_SENSE_UNIT_ATTENTION: + printf("persistent reserve out: scsi status: Unit Attention\n"); + break; + case MPATH_PR_RESERV_CONFLICT: + printf("persistent reserve out: scsi status: Reservation Conflict\n"); + break; + } + printf("PR out: command failed\n"); + } + +out_fd: + close (fd); +out : + if (ret == MPATH_PR_SYNTAX_ERROR) { + free(batch_fn); + if (nline == 0) + usage(); + else + fprintf(stderr, "syntax error on line %d in batch file\n", + nline); + } else if (batch_fn != NULL) { + int rv = do_batch_file(batch_fn); + + free(batch_fn); + ret = ret == 0 ? rv : ret; + } + if (nline == 0) + mpath_persistent_reserve_free_vecs(); + return (ret >= 0) ? ret : MPATH_PR_OTHER; +} + +int main(int argc, char *argv[]) +{ + int ret; + + if (optind == argc) + { + + fprintf (stderr, "No parameter used\n"); + usage (); + exit (1); + } + + if (getuid () != 0) + { + fprintf (stderr, "need to be root\n"); + exit (1); + } + + udev = udev_new(); + multipath_conf = mpath_lib_init(); + if(!multipath_conf) { + udev_unref(udev); + exit(1); + } + + ret = handle_args(argc, argv, 0); + + mpath_lib_exit(multipath_conf); + udev_unref(udev); + + return (ret >= 0) ? ret : MPATH_PR_OTHER; +} + +int +get_transportids_length(unsigned char * transportid_arr, int max_transportid, int num_transportids) +{ + int compact_len = 0; + unsigned char * ucp = transportid_arr; + int k, off, protocol_id, len; + for (k = 0, off = 0; ((k < num_transportids) && (k < max_transportid)); + ++k, off += MPATH_MX_TID_LEN) { + protocol_id = ucp[off] & 0xf; + if (5 == protocol_id) { + len = (ucp[off + 2] << 8) + ucp[off + 3] + 4; + if (len < 24) + len = 24; + if (off > compact_len) + memmove(ucp + compact_len, ucp + off, len); + compact_len += len; + + } else { + if (off > compact_len) + memmove(ucp + compact_len, ucp + off, 24); + compact_len += 24; + } + } + + return compact_len; +} + +void mpath_print_buf_readkeys( struct prin_resp *pr_buff) +{ + int i,j,k, num; + unsigned char *keyp; + uint64_t prkey; + printf(" PR generation=0x%x, ", pr_buff->prin_descriptor.prin_readkeys.prgeneration); + + num = pr_buff->prin_descriptor.prin_readkeys.additional_length / 8; + if (0 == num) { + printf(" 0 registered reservation key.\n"); + return; + } + else if (1 == num) + printf(" 1 registered reservation key follows:\n"); + else + printf(" %d registered reservation keys follow:\n", num); + + + keyp = (unsigned char *)&pr_buff->prin_descriptor.prin_readkeys.key_list[0]; + for (i = 0; i < num ; i++) + { + prkey = 0; + for (j = 0; j < 8; ++j) { + + if (j > 0) + prkey <<= 8; + prkey |= keyp[j]; + } + printf(" 0x%" PRIx64 "\n", prkey); + k=8*i+j; + keyp = (unsigned char *)&pr_buff->prin_descriptor.prin_readkeys.key_list[k]; + } +} + +void mpath_print_buf_readresv( struct prin_resp *pr_buff) +{ + int j, num, scope=0, type=0; + unsigned char *keyp; + uint64_t prkey; + + num = pr_buff->prin_descriptor.prin_readresv.additional_length / 8; + if (0 == num) + { + printf(" PR generation=0x%x, there is NO reservation held \n", pr_buff->prin_descriptor.prin_readresv.prgeneration); + return ; + } + else + printf(" PR generation=0x%x, Reservation follows:\n", pr_buff->prin_descriptor.prin_readresv.prgeneration); + keyp = (unsigned char *)&pr_buff->prin_descriptor.prin_readkeys.key_list[0]; + prkey = 0; + for (j = 0; j < 8; ++j) { + if (j > 0) + prkey <<= 8; + prkey |= keyp[j]; + } + + printf(" Key = 0x%" PRIx64 "\n", prkey); + + scope = (pr_buff->prin_descriptor.prin_readresv.scope_type >> 4) & 0x0f; + type = pr_buff->prin_descriptor.prin_readresv.scope_type & 0x0f; + + if (scope == 0) + printf(" scope = LU_SCOPE, type = %s", pr_type_strs[type]); + else + printf(" scope = %d, type = %s", scope, pr_type_strs[type]); + + printf("\n"); + +} + +void mpath_print_buf_readcap( struct prin_resp *pr_buff) +{ + if ( pr_buff->prin_descriptor.prin_readcap.length <= 2 ) { + fprintf(stderr, "Unexpected response for PRIN Report " + "Capabilities\n"); + return; //MALFORMED; + } + + printf("Report capabilities response:\n"); + + printf(" Compatible Reservation Handling(CRH): %d\n", !!(pr_buff->prin_descriptor.prin_readcap.flags[0] & 0x10)); + printf(" Specify Initiator Ports Capable(SIP_C): %d\n",!!(pr_buff->prin_descriptor.prin_readcap.flags[0] & 0x8)); + printf(" All Target Ports Capable(ATP_C): %d\n",!!(pr_buff->prin_descriptor.prin_readcap.flags[0] & 0x4 )); + printf(" Persist Through Power Loss Capable(PTPL_C): %d\n",!!(pr_buff->prin_descriptor.prin_readcap.flags[0])); + printf(" Type Mask Valid(TMV): %d\n", !!(pr_buff->prin_descriptor.prin_readcap.flags[1] & 0x80)); + printf(" Allow Commands: %d\n", !!(( pr_buff->prin_descriptor.prin_readcap.flags[1] >> 4) & 0x7)); + printf(" Persist Through Power Loss Active(PTPL_A): %d\n", + !!(pr_buff->prin_descriptor.prin_readcap.flags[1] & 0x1)); + + if(pr_buff->prin_descriptor.prin_readcap.flags[1] & 0x80) + { + printf(" Support indicated in Type mask:\n"); + + printf(" %s: %d\n", pr_type_strs[7], pr_buff->prin_descriptor.prin_readcap.pr_type_mask & 0x80); + printf(" %s: %d\n", pr_type_strs[6], pr_buff->prin_descriptor.prin_readcap.pr_type_mask & 0x40); + printf(" %s: %d\n", pr_type_strs[5], pr_buff->prin_descriptor.prin_readcap.pr_type_mask & 0x20); + printf(" %s: %d\n", pr_type_strs[3], pr_buff->prin_descriptor.prin_readcap.pr_type_mask & 0x8); + printf(" %s: %d\n", pr_type_strs[1], pr_buff->prin_descriptor.prin_readcap.pr_type_mask & 0x2); + printf(" %s: %d\n", pr_type_strs[8], pr_buff->prin_descriptor.prin_readcap.pr_type_mask & 0x100); + } +} + +void mpath_print_buf_readfullstat( struct prin_resp *pr_buff) +{ + + int i,j, num; + uint64_t prkey; + uint16_t rel_pt_addr; + unsigned char * keyp; + + num = pr_buff->prin_descriptor.prin_readfd.number_of_descriptor; + if (0 == num) + { + printf(" PR generation=0x%x \n", pr_buff->prin_descriptor.prin_readfd.prgeneration); + return ; + } + else + printf(" PR generation=0x%x \n", pr_buff->prin_descriptor.prin_readfd.prgeneration); + + for (i = 0 ; i < num; i++) + { + keyp = (unsigned char *)&pr_buff->prin_descriptor.prin_readfd.descriptors[i]->key; + + prkey = 0; + for (j = 0; j < 8; ++j) { + if (j > 0) + prkey <<= 8; + prkey |= *keyp; + ++keyp; + } + printf(" Key = 0x%" PRIx64 "\n", prkey); + + if (pr_buff->prin_descriptor.prin_readfd.descriptors[i]->flag & 0x02) + printf(" All target ports bit set\n"); + else { + printf(" All target ports bit clear\n"); + + rel_pt_addr = pr_buff->prin_descriptor.prin_readfd.descriptors[i]->rtpi; + printf(" Relative port address: 0x%x\n", + rel_pt_addr); + } + + if (pr_buff->prin_descriptor.prin_readfd.descriptors[i]->flag & 0x1) { + printf(" << Reservation holder >>\n"); + j = ((pr_buff->prin_descriptor.prin_readfd.descriptors[i]->scope_type >> 4) & 0xf); + if (0 == j) + printf(" scope: LU_SCOPE, "); + else + printf(" scope: %d ", j); + j = (pr_buff->prin_descriptor.prin_readfd.descriptors[i]->scope_type & 0xf); + printf(" type: %s\n", pr_type_strs[j]); + } else + printf(" not reservation holder\n"); + mpath_print_transport_id(pr_buff->prin_descriptor.prin_readfd.descriptors[i]); + } +} + +static void usage(void) +{ + fprintf(stderr, VERSION_STRING); + fprintf(stderr, + "Usage: mpathpersist [OPTIONS] [DEVICE]\n" + " Options:\n" + " --verbose|-v level verbosity level\n" + " 0 Critical messages\n" + " 1 Error messages\n" + " 2 Warning messages\n" + " 3 Informational messages\n" + " 4 Informational messages with trace enabled\n" + " --clear|-C PR Out: Clear\n" + " --device=DEVICE|-d DEVICE query or change DEVICE\n" + " --batch-file|-f FILE run commands from FILE\n" + " --help|-h output this usage message\n" + " --hex|-H output response in hex\n" + " --in|-i request PR In command \n" + " --out|-o request PR Out command\n" + " --param-alltgpt|-Y PR Out parameter 'ALL_TG_PT\n" + " --param-aptpl|-Z PR Out parameter 'APTPL'\n" + " --read-keys|-k PR In: Read Keys\n" + " --param-rk=RK|-K RK PR Out parameter reservation key\n" + " --param-sark=SARK|-S SARK PR Out parameter service action\n" + " reservation key (SARK is in hex)\n" + " --preempt|-P PR Out: Preempt\n" + " --preempt-abort|-A PR Out: Preempt and Abort\n" + " --prout-type=TYPE|-T TYPE PR Out command type\n" + " --read-full-status|-s PR In: Read Full Status\n" + " --read-keys|-k PR In: Read Keys\n" + " --read-reservation|-r PR In: Read Reservation\n" + " --register|-G PR Out: Register\n" + " --register-ignore|-I PR Out: Register and Ignore\n" + " --release|-L PR Out: Release\n" + " --report-capabilities|-c PR In: Report Capabilities\n" + " --reserve|-R PR Out: Reserve\n" + " --transport-id=TIDS|-X TIDS TransportIDs can be mentioned\n" + " in several forms\n" + " --alloc-length=LEN|-l LEN PR In: maximum allocation length\n"); +} + +void +mpath_print_transport_id(struct prin_fulldescr *fdesc) +{ + switch (fdesc->trnptid.protocol_id) { + case MPATH_PROTOCOL_ID_FC: + printf(" FCP-2 "); + if (0 != fdesc->trnptid.format_code) + printf(" [Unexpected format code: %d]\n", + fdesc->trnptid.format_code); + dumpHex((const char *)fdesc->trnptid.n_port_name, 8, 0); + break; + case MPATH_PROTOCOL_ID_ISCSI: + printf(" iSCSI "); + if (0 == fdesc->trnptid.format_code) { + printf("name: %.*s\n", (int)sizeof(fdesc->trnptid.iscsi_name), + fdesc->trnptid.iscsi_name); + }else if (1 == fdesc->trnptid.format_code){ + printf("world wide unique port id: %.*s\n", + (int)sizeof(fdesc->trnptid.iscsi_name), + fdesc->trnptid.iscsi_name); + }else { + printf(" [Unexpected format code: %d]\n", fdesc->trnptid.format_code); + dumpHex((const char *)fdesc->trnptid.iscsi_name, + (int)sizeof(fdesc->trnptid.iscsi_name), 0); + } + break; + case MPATH_PROTOCOL_ID_SAS: + printf(" SAS "); + if (0 != fdesc->trnptid.format_code) + printf(" [Unexpected format code: %d]\n", + fdesc->trnptid.format_code); + dumpHex((const char *)fdesc->trnptid.sas_address, 8, 0); + break; + default: + return; + } +} + +int +construct_transportid(const char * lcp, struct transportid transid[], int num_transportids) +{ + int k = 0; + int j, n, b, c, len, alen; + const char * ecp; + const char * isip; + + if ((0 == memcmp("fcp,", lcp, 4)) || + (0 == memcmp("FCP,", lcp, 4))) { + lcp += 4; + k = strspn(lcp, "0123456789aAbBcCdDeEfF"); + + len = strlen(lcp); + if (len != k) { + fprintf(stderr, "badly formed symbolic FCP TransportID: %s\n", + lcp); + return 1; + } + transid[num_transportids].format_code = MPATH_PROTOCOL_ID_FC; + transid[num_transportids].protocol_id = MPATH_WWUI_DEVICE_NAME; + for (k = 0, j = 0, b = 0; k < 16; ++k) { + c = lcp[k]; + if (isdigit(c)) + n = c - 0x30; + else if (isupper(c)) + n = c - 0x37; + else + n = c - 0x57; + if (k & 1) { + transid[num_transportids].n_port_name[j] = b | n; + ++j; + } else + b = n << 4; + } + goto my_cont_b; + } + if ((0 == memcmp("sas,", lcp, 4)) || (0 == memcmp("SAS,", lcp, 4))) { + lcp += 4; + k = strspn(lcp, "0123456789aAbBcCdDeEfF"); + len =strlen(lcp); + if (len != k) { + fprintf(stderr, "badly formed symbolic SAS TransportID: %s\n", + lcp); + return 1; + } + transid[num_transportids].format_code = MPATH_PROTOCOL_ID_SAS; + transid[num_transportids].protocol_id = MPATH_WWUI_DEVICE_NAME; + memcpy(&transid[num_transportids].sas_address, lcp, 8); + + goto my_cont_b; + } + if (0 == memcmp("iqn.", lcp, 4)) { + ecp = strpbrk(lcp, " \t"); + isip = strstr(lcp, ",i,0x"); + if (ecp && (isip > ecp)) + isip = NULL; + len = ecp ? (ecp - lcp) : (int)strlen(lcp); + transid[num_transportids].format_code = (isip ? MPATH_WWUI_PORT_IDENTIFIER:MPATH_WWUI_DEVICE_NAME); + transid[num_transportids].protocol_id = MPATH_PROTOCOL_ID_ISCSI; + alen = len + 1; /* at least one trailing null */ + if (alen < 20) + alen = 20; + else if (0 != (alen % 4)) + alen = ((alen / 4) + 1) * 4; + if (alen > 241) { /* sam5r02.pdf A.2 (Annex) */ + fprintf(stderr, "iSCSI name too long, alen=%d\n", alen); + return 0; + } + transid[num_transportids].iscsi_name[1] = alen & 0xff; + memcpy(&transid[num_transportids].iscsi_name[2], lcp, len); + goto my_cont_b; + } +my_cont_b: + if (k >= MPATH_MAX_PARAM_LEN) { + fprintf(stderr, "build_transportid: array length exceeded\n"); + return 1; + } + return 0; +} diff --git a/mpathpersist/main.h b/mpathpersist/main.h new file mode 100644 index 0000000..bfbb82e --- /dev/null +++ b/mpathpersist/main.h @@ -0,0 +1,30 @@ +static struct option long_options[] = { + {"verbose", 1, NULL, 'v'}, + {"clear", 0, NULL, 'C'}, + {"device", 1, NULL, 'd'}, + {"batch-file", 1, NULL, 'f' }, + {"help", 0, NULL, 'h'}, + {"hex", 0, NULL, 'H'}, + {"in", 0, NULL, 'i'}, + {"out", 0, NULL, 'o'}, + {"param-alltgpt", 0, NULL, 'Y'}, + {"param-aptpl", 0, NULL, 'Z'}, + {"param-rk", 1, NULL, 'K'}, + {"param-sark", 1, NULL, 'S'}, + {"preempt", 0, NULL, 'P'}, + {"preempt-abort", 0, NULL, 'A'}, + {"prout-type", 1, NULL, 'T'}, + {"read-full-status", 0, NULL, 's'}, + {"read-keys", 0, NULL, 'k'}, + {"read-reservation", 0, NULL, 'r'}, + {"register", 0, NULL, 'G'}, + {"register-ignore", 0, NULL, 'I'}, + {"release", 0, NULL, 'L'}, + {"report-capabilities", 0, NULL, 'c'}, + {"reserve", 0, NULL, 'R'}, + {"transport-id", 1, NULL, 'X'}, + {"alloc-length", 1, NULL, 'l'}, + {NULL, 0, NULL, 0} +}; + +static void usage(void); diff --git a/mpathpersist/mpathpersist.8 b/mpathpersist/mpathpersist.8 new file mode 100644 index 0000000..882043a --- /dev/null +++ b/mpathpersist/mpathpersist.8 @@ -0,0 +1,300 @@ +.\" ---------------------------------------------------------------------------- +.\" Update the date below if you make any significant change. +.\" Make sure there are no errors with: +.\" groff -z -wall -b -e -t mpathpersist/mpathpersist.8 +.\" +.\" ---------------------------------------------------------------------------- +. +.TH MPATHPERSIST 8 2019-05-27 "Linux" +. +. +.\" ---------------------------------------------------------------------------- +.SH NAME +.\" ---------------------------------------------------------------------------- +. +mpathpersist \- Manages SCSI persistent reservations on dm multipath devices. +. +. +.\" ---------------------------------------------------------------------------- +.SH SYNOPSIS +.\" ---------------------------------------------------------------------------- +. +.B mpathpersist +.RB [\| OPTIONS \|] +.I device +. +. +.\" ---------------------------------------------------------------------------- +.SH DESCRIPTION +.\" ---------------------------------------------------------------------------- +. +This utility is used to manage SCSI persistent reservations on Device Mapper +Multipath devices. To be able to use this functionality, the \fIreservation_key\fR +attribute must be defined in the \fI/etc/multipath.conf\fR file. Otherwise the +\fBmultipathd\fR daemon will not check for persistent reservation for newly +discovered paths or reinstated paths. +. +.LP +\fBmpathpersist\fR supports the same command-line options as the +\fBsg_persist\fR utility. +. +Consult the \fBsg_persist (8)\fR manual page for an in-depth discussion of the +various options. +. +.\" ---------------------------------------------------------------------------- +.SH OPTIONS +.\" ---------------------------------------------------------------------------- +. +.TP +.BI \-verbose|\-v " level" +Verbosity: +.RS +.TP 5 +.I 0 +Critical messages. +.TP +.I 1 +Error messages. +.TP +.I 2 +Warning messages. +.TP +.I 3 +Informational messages. +.TP +.I 4 +Informational messages with trace enabled. +.RE +. +.TP +.BI \--device=\fIDEVICE\fB|\-d " DEVICE" +Query or change DEVICE. +. +.TP +.BI \--batch-file=\fIDEVICE\fB|\-f " FILE" +Read commands from \fIFILE\fR. See section \(dqBATCH FILES\(dq below. This +option can be given at most once. +. +.TP +.B \--help|\-h +Output this usage message. +. +.TP +.B \--hex|\-H +Output response in hex. +. +.TP +.B \--in|\-i +Request PR In command. +. +.TP +.B \--out|\-o +Request PR Out command. +. +.TP +.B \--param-alltgpt|\-Y +PR Out parameter 'ALL_TG_PT'. +. +.TP +.B \--param-aptpl|\-Z +PR Out parameter 'APTPL'. +. +.TP +.B \--read-keys|\-k +PR In: Read Keys. +. +.TP +.BI \--param-rk=\fIRK\fB|\-K " RK" +PR Out parameter reservation key (RK is in hex, up to 8 bytes). +. +.TP +.BI \--param-sark=\fISARK\fB|\-S " SARK" +PR Out parameter service action reservation key (SARK is in hex). +. +.TP +.B \--preempt|\-P +PR Out: Preempt. +. +.TP +.B \--clear|\-C +PR Out: Clear registrations. +. +.TP +.B \--preempt-abort|\-A +PR Out: Preempt and Abort. +. +.TP +.BI \--prout-type=\fITYPE\fB|\-T " TYPE" +PR Out command type. +. +.TP +.B \--read-full-status|\-s +PR In: Read Full Status. +. +.TP +.B \--read-keys|\-k +PR In: Read Keys. +. +.TP +.B \--read-reservation|\-r +PR In: Read Reservation. +. +.TP +.B \--register|\-G +PR Out: Register. +. +.TP +.B \--register-ignore|\-I +PR Out: Register and Ignore. +. +.TP +.B \--release|\-L +PR Out: Release. +. +.TP +.B \--report-capabilities|\-c +PR In: Report Capabilities. +. +.TP +.B \--reserve|\-R +PR Out: Reserve. +. +.TP +.BI \--transport-id=\fITIDS\fB|\-X " TIDS" +TransportIDs can be mentioned in several forms. +. +.TP +.BI \--alloc-length=\fILEN\fB|\-l " LEN" +PR In: maximum allocation length. LEN is a decimal number between 0 and 8192. +. +. +.\" ---------------------------------------------------------------------------- +.SH EXAMPLE +.\" ---------------------------------------------------------------------------- +. +.PP +Register the key \(dq123abc\(dq for the /dev/mapper/mpath9 device: +.RS +\fBmpathpersist --out --register --param-sark=\fI123abc /dev/mapper/mpath9\fR +.RE +.PP +Read registered reservation keys for the /dev/mapper/mpath9 device: +.RS +\fBmpathpersist -i -k \fI/dev/mapper/mpath9\fR +.RE +.PP +Create a reservation for the /dev/mapper/mpath9 device with the given +reservation key: +.RS +\fBmpathpersist --out --reserve --param-rk=\fI123abc \fB--prout-type=\fI8 \fB-d \fI/dev/mapper/mpath9\fR +.RE +.PP +Read the reservation status of the /dev/mapper/mpath9 device: +.RS +\fBmpathpersist -i -s -d \fI/dev/mapper/mpath9\fR +.RE +.PP +Release the previously created reservation (note that the prout-type needs to +be the same as above): +.RS +\fBmpathpersist --out --release --param-rk=\fI123abc \fB--prout-type=\fI8 \fB-d \fI/dev/mapper/mpath9\fR +.RE +.PP +Remove the current key registered for this host (i.e. reset it to 0): +.RS +\fBmpathpersist --out --register-ignore -K \fI123abc\fB -S \fI0\fB \fI/dev/mapper/mpath9\fR +.RE +.PP +Remove current reservation, and unregister all registered keys from all I_T nexuses: +.RS +\fBmpathpersist -oCK \fI123abc \fI/dev/mapper/mpath9\fR +.RE +. +. +.\" ---------------------------------------------------------------------------- +.SH BATCH FILES +.\" ---------------------------------------------------------------------------- +. +.PP +The option \fI--batch-file\fR (\fI-f\fR) sets an input file to be processed +by \fBmpathpersist\fR. Grouping commands in batch files can provide a speed +improvement in particular on large installments, because \fBmpathpersist\fR +needs to scan existing paths and maps only once during startup. +. +.PP +The input file is a text file that is parsed +line by line. Every line of the file is interpreted as a command line +(i.e. list of options and parameters) for \fBmpathpersist\fR. Options +and parameters are separated by one or more whitespace characters (space or TAB). +Lines can, but do not have to, begin with the word \(dqmpathpersist\(dq. +The \(dq#\(dq character, either at the beginning of the line or following +some whitespace, denotes the start of a comment that lasts until the end of the +line. Empty lines are allowed. Continuation of mpathpersist commands over +multiple lines is not supported. +. +.PP +All options listed in this man page, except \fI-f\fR and +\fI-v\fR, are allowed in batch files. Both short and long option formats may be used. +Using the \fI-f\fR option inside the batch file is an error. The \fI-v\fR +option is ignored in batch files. +. +.PP +The multipath map on which to act must be specified on every input line, e.g. using the \fI-d\fR option. +Commands acting on different multipath maps may be combined in a +batch file, and multiple commands may act on the same multipath +map. Commands are executed one by one, so +that commands further down in the file see status changes caused by previous +commands. +If \fBmpathpersist\fR encounters an error while processing a line in the +batch file, batch file processing is \fBnot\fR aborted; subsequent commands +are executed nonetheless. The exit status of \fBmpathpersist\fR is the status +of the first failed command, or 0 if all commands succeeded. +. +.PP +If other options and parameters are used along with +\fI-f\fR on the \fBmpathpersist\fR command line, the command line will be executed first, followed +by the commands from the the batch file. +. +.PP +Below is an example of a valid batch input file. +. +.PP +.RS +.EX +# This is an mpathpersist input file. +# Short and long forms of the same command +-i -k /dev/dm-1 # short form, this comment is ignored +mpathpersist --in --read-keys --device=/dev/dm-1 + +# Mixing of long and short options, variable white space + --out --register -S abcde /dev/dm-1 + +# Mixing of commands for different maps +-ir /dev/dm-0 +-ir /dev/dm-1 + +mpathpersist --out --param-rk abcde --reserve --prout-type 5 /dev/dm-1 +# This should now show a reservation +-ir /dev/dm-1 +-oCK abcde /dev/dm-1 +--in --read-reservation /dev/dm-1 +.EE +.RE +. +. +.\" ---------------------------------------------------------------------------- +.SH "SEE ALSO" +.\" ---------------------------------------------------------------------------- +. +.BR multipath (8), +.BR multipathd (8), +.BR sg_persist (8). +. +. +.\" ---------------------------------------------------------------------------- +.SH AUTHORS +.\" ---------------------------------------------------------------------------- +. +\fImultipath-tools\fR was developed by Christophe Varoqui +and others. +.\" EOF diff --git a/multipath/11-dm-mpath.rules b/multipath/11-dm-mpath.rules new file mode 100644 index 0000000..07320a1 --- /dev/null +++ b/multipath/11-dm-mpath.rules @@ -0,0 +1,111 @@ +ACTION!="add|change", GOTO="mpath_end" +ENV{DM_UDEV_RULES_VSN}!="?*", GOTO="mpath_end" +ENV{DM_UUID}!="mpath-?*", GOTO="mpath_end" + +IMPORT{db}="DM_DISABLE_OTHER_RULES_FLAG_OLD" +IMPORT{db}="MPATH_DEVICE_READY" + +# If this uevent didn't come from dm, don't try to update the +# device state +ENV{DM_COOKIE}!="?*", ENV{DM_ACTION}!="PATH_*", IMPORT{db}="DM_UDEV_DISABLE_OTHER_RULES_FLAG", IMPORT{db}="DM_NOSCAN", GOTO="scan_import" + +ENV{.MPATH_DEVICE_READY_OLD}="$env{MPATH_DEVICE_READY}" + +# multipath sets DM_SUBSYSTEM_UDEV_FLAG2 when it reloads a +# table with no active devices. If this happens, mark the +# device not ready +ENV{DM_SUBSYSTEM_UDEV_FLAG2}=="1", ENV{MPATH_DEVICE_READY}="0",\ + GOTO="mpath_action" + +# If the last path has failed mark the device not ready +# Note that DM_NR_VALID_PATHS is only set for PATH_FAILED|PATH_REINSTATED +# events. +# This may not be reliable, as events aren't necessarily received in order. +ENV{DM_NR_VALID_PATHS}=="0", ENV{MPATH_DEVICE_READY}="0", GOTO="mpath_action" + +ENV{MPATH_SBIN_PATH}="/sbin" +TEST!="$env{MPATH_SBIN_PATH}/multipath", ENV{MPATH_SBIN_PATH}="/usr/sbin" + +# Don't run multipath -U during "coldplug" after switching root, +# because paths are just being added to the udev db. +ACTION=="add", ENV{.MPATH_DEVICE_READY_OLD}=="1", GOTO="paths_ok" + +# Check the map state directly with multipath -U. +# This doesn't attempt I/O on the device. +PROGRAM=="$env{MPATH_SBIN_PATH}/multipath -U %k", GOTO="paths_ok" +ENV{MPATH_DEVICE_READY}="0", GOTO="mpath_action" +LABEL="paths_ok" + +# Don't mark a device ready on a PATH_FAILED event. even if +# DM_NR_VALID_PATHS is greater than 0. Just keep the existing +# value +ENV{DM_ACTION}=="PATH_FAILED", GOTO="mpath_action" + +# This event is either a PATH_REINSTATED or a table reload where +# there are active paths. Mark the device ready +ENV{MPATH_DEVICE_READY}="1" + +LABEL="mpath_action" +# DM_SUBSYSTEM_UDEV_FLAG0 is the "RELOAD" flag for multipath subsystem. +# Drop the DM_ACTIVATION flag here as mpath reloads tables if any of its +# paths are lost/recovered. For any stack above the mpath device, this is not +# something that should be reacted upon since it would be useless extra work. +# It's exactly mpath's job to provide *seamless* device access to any of the +# paths that are available underneath. +ENV{DM_SUBSYSTEM_UDEV_FLAG0}=="1", \ + ENV{DM_ACTIVATION}="0", ENV{MPATH_UNCHANGED}="1" + +# For path failed or reinstated events, unset DM_ACTIVATION. +# This is similar to the DM_SUBSYSTEM_UDEV_FLAG0 case above. +ENV{DM_ACTION}=="PATH_FAILED|PATH_REINSTATED", \ + ENV{DM_ACTIVATION}="0", ENV{MPATH_UNCHANGED}="1" + +# Do not initiate scanning if no path is available, +# otherwise there would be a hang or IO error on access. +# We'd like to avoid this, especially within udev processing. +ENV{MPATH_DEVICE_READY}=="0", ENV{DM_NOSCAN}="1" + +# Also skip all foreign rules if no path is available. +# Remember the original value of DM_DISABLE_OTHER_RULES_FLAG +# and restore it back once we have at least one path available. +ENV{MPATH_DEVICE_READY}=="0", ENV{.MPATH_DEVICE_READY_OLD}=="1",\ + ENV{DM_DISABLE_OTHER_RULES_FLAG_OLD}=="",\ + ENV{DM_DISABLE_OTHER_RULES_FLAG_OLD}="$env{DM_UDEV_DISABLE_OTHER_RULES_FLAG}" +ENV{MPATH_DEVICE_READY}=="0", ENV{DM_UDEV_DISABLE_OTHER_RULES_FLAG}="1" +ENV{MPATH_DEVICE_READY}!="0", ENV{.MPATH_DEVICE_READY_OLD}=="0",\ + ENV{DM_UDEV_DISABLE_OTHER_RULES_FLAG}="$env{DM_DISABLE_OTHER_RULES_FLAG_OLD}",\ + ENV{DM_DISABLE_OTHER_RULES_FLAG_OLD}="",\ + ENV{DM_ACTIVATION}="1" + +# The code to check multipath state ends here. We need to set +# properties and symlinks regardless whether the map is usable or +# not. If symlinks get lost, systemd may auto-unmount file systems. + +LABEL="scan_import" +ENV{DM_NOSCAN}!="1", GOTO="import_end" +IMPORT{db}="ID_FS_TYPE" +IMPORT{db}="ID_FS_USAGE" +IMPORT{db}="ID_FS_UUID" +IMPORT{db}="ID_FS_UUID_ENC" +IMPORT{db}="ID_FS_LABEL" +IMPORT{db}="ID_FS_LABEL_ENC" +IMPORT{db}="ID_FS_VERSION" + +LABEL="import_end" + +# Multipath maps should take precedence over their members. +ENV{DM_UDEV_LOW_PRIORITY_FLAG}!="1", OPTIONS+="link_priority=50" + +# Set some additional symlinks that typically exist for mpath +# path members, too, and should be overridden. + +# kpartx_id is very robust, it works for suspended maps and maps +# with 0 dependencies. It sets DM_TYPE, DM_PART, DM_WWN +TEST=="/usr/lib/udev/kpartx_id", \ + IMPORT{program}=="kpartx_id %M %m $env{DM_UUID}" + +ENV{DM_TYPE}=="?*", ENV{DM_SERIAL}=="?*", \ + SYMLINK+="disk/by-id/$env{DM_TYPE}-$env{DM_SERIAL}" +ENV{DM_WWN}=="?*", SYMLINK+="disk/by-id/wwn-$env{DM_WWN}" + +LABEL="mpath_end" diff --git a/multipath/Makefile b/multipath/Makefile new file mode 100644 index 0000000..0828a8f --- /dev/null +++ b/multipath/Makefile @@ -0,0 +1,46 @@ +# +# Copyright (C) 2003 Christophe Varoqui, +# +include ../Makefile.inc + +CFLAGS += $(BIN_CFLAGS) -I$(multipathdir) -I$(mpathcmddir) +LDFLAGS += $(BIN_LDFLAGS) +LIBDEPS += -L$(multipathdir) -lmultipath -L$(mpathcmddir) -lmpathcmd \ + -lpthread -ldevmapper -ldl -ludev + +EXEC = multipath + +OBJS = main.o + +all: $(EXEC) + +$(EXEC): $(OBJS) $(multipathdir)/libmultipath.so $(mpathcmddir)/libmpathcmd.so + $(CC) $(CFLAGS) $(OBJS) -o $(EXEC) $(LDFLAGS) $(LIBDEPS) + $(GZIP) $(EXEC).8 > $(EXEC).8.gz + $(GZIP) $(EXEC).conf.5 > $(EXEC).conf.5.gz + +install: + $(INSTALL_PROGRAM) -d $(DESTDIR)$(bindir) + $(INSTALL_PROGRAM) -m 755 $(EXEC) $(DESTDIR)$(bindir)/ + $(INSTALL_PROGRAM) -d $(DESTDIR)$(udevrulesdir) + $(INSTALL_PROGRAM) -m 644 11-dm-mpath.rules $(DESTDIR)$(udevrulesdir) + $(INSTALL_PROGRAM) -m 644 $(EXEC).rules $(DESTDIR)$(libudevdir)/rules.d/56-multipath.rules + $(INSTALL_PROGRAM) -d $(DESTDIR)$(man8dir) + $(INSTALL_PROGRAM) -m 644 $(EXEC).8.gz $(DESTDIR)$(man8dir) + $(INSTALL_PROGRAM) -d $(DESTDIR)$(man5dir) + $(INSTALL_PROGRAM) -m 644 $(EXEC).conf.5.gz $(DESTDIR)$(man5dir) + +uninstall: + $(RM) $(DESTDIR)$(bindir)/$(EXEC) + $(RM) $(DESTDIR)$(udevrulesdir)/11-dm-mpath.rules + $(RM) $(DESTDIR)$(libudevdir)/rules.d/56-multipath.rules + $(RM) $(DESTDIR)$(man8dir)/$(EXEC).8.gz + $(RM) $(DESTDIR)$(man5dir)/$(EXEC).conf.5.gz + +clean: dep_clean + $(RM) core *.o $(EXEC) *.gz + +include $(wildcard $(OBJS:.o=.d)) + +dep_clean: + $(RM) $(OBJS:.o=.d) diff --git a/multipath/main.c b/multipath/main.c new file mode 100644 index 0000000..cf9d2a2 --- /dev/null +++ b/multipath/main.c @@ -0,0 +1,1163 @@ +/* + * Soft: multipath device mapper target autoconfig + * + * Version: $Id: main.h,v 0.0.1 2003/09/18 15:13:38 cvaroqui Exp $ + * + * Author: Christophe Varoqui + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * See the GNU General Public License for more details. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + * Copyright (c) 2003, 2004, 2005 Christophe Varoqui + * Copyright (c) 2005 Benjamin Marzinski, Redhat + * Copyright (c) 2005 Kiyoshi Ueda, NEC + * Copyright (c) 2005 Patrick Caulfield, Redhat + * Copyright (c) 2005 Edward Goggin, EMC + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "checkers.h" +#include "prio.h" +#include "vector.h" +#include "memory.h" +#include +#include "devmapper.h" +#include "util.h" +#include "defaults.h" +#include "config.h" +#include "structs.h" +#include "structs_vec.h" +#include "dmparser.h" +#include "sysfs.h" +#include "blacklist.h" +#include "discovery.h" +#include "debug.h" +#include "switchgroup.h" +#include "dm-generic.h" +#include "print.h" +#include "alias.h" +#include "configure.h" +#include "pgpolicies.h" +#include "version.h" +#include +#include "wwids.h" +#include "uxsock.h" +#include "mpath_cmd.h" +#include "foreign.h" +#include "propsel.h" +#include "time-util.h" +#include "file.h" + +int logsink; +struct udev *udev; +struct config *multipath_conf; + +/* + * Return values of configure(), print_cmd_valid(), and main(). + * RTVL_{YES,NO} are synonyms for RTVL_{OK,FAIL} for the CMD_VALID_PATH case. + */ +enum { + RTVL_OK = 0, + RTVL_YES = RTVL_OK, + RTVL_FAIL = 1, + RTVL_NO = RTVL_FAIL, + RTVL_MAYBE, /* only used internally, never returned */ + RTVL_RETRY, /* returned by configure(), not by main() */ +}; + +struct config *get_multipath_config(void) +{ + return multipath_conf; +} + +void put_multipath_config(__attribute__((unused)) void *arg) +{ + /* Noop for now */ +} + +static int +dump_config (struct config *conf, vector hwes, vector mpvec) +{ + char * reply = snprint_config(conf, NULL, hwes, mpvec); + + if (reply != NULL) { + printf("%s", reply); + FREE(reply); + return 0; + } else + return 1; +} + +void rcu_register_thread_memb(void) {} + +void rcu_unregister_thread_memb(void) {} + +static int +filter_pathvec (vector pathvec, char * refwwid) +{ + int i; + struct path * pp; + + if (!refwwid || !strlen(refwwid)) + return 0; + + vector_foreach_slot (pathvec, pp, i) { + if (strncmp(pp->wwid, refwwid, WWID_SIZE) != 0) { + condlog(3, "skip path %s : out of scope", pp->dev); + free_path(pp); + vector_del_slot(pathvec, i); + i--; + } + } + return 0; +} + +static void +usage (char * progname) +{ + fprintf (stderr, VERSION_STRING); + fprintf (stderr, "Usage:\n"); + fprintf (stderr, " %s [-v level] [-B|-d|-i|-q|-r] [-b file] [-p policy] [device]\n", progname); + fprintf (stderr, " %s [-v level] [-R retries] -f device\n", progname); + fprintf (stderr, " %s [-v level] [-R retries] -F\n", progname); + fprintf (stderr, " %s [-v level] [-l|-ll] [device]\n", progname); + fprintf (stderr, " %s [-v level] [-a|-w] device\n", progname); + fprintf (stderr, " %s [-v level] -W\n", progname); + fprintf (stderr, " %s [-v level] [-i] [-c|-C] device\n", progname); + fprintf (stderr, " %s [-v level] [-i] [-u|-U]\n", progname); + fprintf (stderr, " %s [-h|-t|-T]\n", progname); + fprintf (stderr, + "\n" + "Where:\n" + " -h print this usage text\n" + " -l show multipath topology (sysfs and DM info)\n" + " -ll show multipath topology (maximum info)\n" + " -f flush a multipath device map\n" + " -F flush all multipath device maps\n" + " -a add a device wwid to the wwids file\n" + " -c check if a device should be a path in a multipath device\n" + " -C check if a multipath device has usable paths\n" + " -q allow queue_if_no_path when multipathd is not running\n" + " -d dry run, do not create or update devmaps\n" + " -t display the currently used multipathd configuration\n" + " -T display the multipathd configuration without builtin defaults\n" + " -r force devmap reload\n" + " -i ignore wwids file\n" + " -B treat the bindings file as read only\n" + " -b fil bindings file location\n" + " -w remove a device from the wwids file\n" + " -W reset the wwids file include only the current devices\n" + " -R num number of times to retry removes of in-use devices\n" + " -u check if the device specified in the program environment should be a\n" + " path in a multipath device\n" + " -U check if the device specified in the program environment is a\n" + " multipath device with usable paths, see -C flag\n" + " -p pol force all maps to specified path grouping policy:\n" + " . failover one path per priority group\n" + " . multibus all paths in one priority group\n" + " . group_by_serial one priority group per serial\n" + " . group_by_prio one priority group per priority lvl\n" + " . group_by_node_name one priority group per target node\n" + " -v lvl verbosity level:\n" + " . 0 no output\n" + " . 1 print created devmap names only\n" + " . 2 default verbosity\n" + " . 3 print debug information\n" + " device action limited to:\n" + " . multipath named 'device' (ex: mpath0)\n" + " . multipath whose wwid is 'device' (ex: 60051...)\n" + " . multipath including the path named 'device' (ex: /dev/sda or\n" + " /dev/dm-0)\n" + " . multipath including the path with maj:min 'device' (ex: 8:0)\n" + ); + +} + +static int +update_paths (struct multipath * mpp, int quick) +{ + int i, j; + struct pathgroup * pgp; + struct path * pp; + struct config *conf; + + if (!mpp->pg) + return 0; + + vector_foreach_slot (mpp->pg, pgp, i) { + if (!pgp->paths) + continue; + + vector_foreach_slot (pgp->paths, pp, j) { + if (!strlen(pp->dev)) { + if (devt2devname(pp->dev, FILE_NAME_SIZE, + pp->dev_t)) { + /* + * path is not in sysfs anymore + */ + pp->chkrstate = pp->state = PATH_DOWN; + pp->offline = 1; + continue; + } + pp->mpp = mpp; + if (quick) + continue; + conf = get_multipath_config(); + if (pathinfo(pp, conf, DI_ALL)) + pp->state = PATH_UNCHECKED; + put_multipath_config(conf); + continue; + } + pp->mpp = mpp; + if (quick) + continue; + if (pp->state == PATH_UNCHECKED || + pp->state == PATH_WILD) { + conf = get_multipath_config(); + if (pathinfo(pp, conf, DI_CHECKER)) + pp->state = PATH_UNCHECKED; + put_multipath_config(conf); + } + + if (pp->priority == PRIO_UNDEF) { + conf = get_multipath_config(); + if (pathinfo(pp, conf, DI_PRIO)) + pp->priority = PRIO_UNDEF; + put_multipath_config(conf); + } + } + } + return 0; +} + +static int +get_dm_mpvec (enum mpath_cmds cmd, vector curmp, vector pathvec, char * refwwid) +{ + int i; + struct multipath * mpp; + char params[PARAMS_SIZE], status[PARAMS_SIZE]; + + if (dm_get_maps(curmp)) + return 1; + + vector_foreach_slot (curmp, mpp, i) { + /* + * discard out of scope maps + */ + if (refwwid && strlen(refwwid) && + strncmp(mpp->wwid, refwwid, WWID_SIZE)) { + condlog(3, "skip map %s: out of scope", mpp->alias); + free_multipath(mpp, KEEP_PATHS); + vector_del_slot(curmp, i); + i--; + continue; + } + + if (cmd == CMD_VALID_PATH) + continue; + + dm_get_map(mpp->alias, &mpp->size, params); + condlog(3, "params = %s", params); + dm_get_status(mpp->alias, status); + condlog(3, "status = %s", status); + + disassemble_map(pathvec, params, mpp, 0); + + /* + * disassemble_map() can add new paths to pathvec. + * If not in "fast list mode", we need to fetch information + * about them + */ + update_paths(mpp, (cmd == CMD_LIST_SHORT)); + + if (cmd == CMD_LIST_LONG) + mpp->bestpg = select_path_group(mpp); + + disassemble_status(status, mpp); + + if (cmd == CMD_LIST_SHORT || + cmd == CMD_LIST_LONG) { + struct config *conf = get_multipath_config(); + print_multipath_topology(mpp, conf->verbosity); + put_multipath_config(conf); + } + + if (cmd == CMD_CREATE) + reinstate_paths(mpp); + } + + if (cmd == CMD_LIST_SHORT || cmd == CMD_LIST_LONG) { + struct config *conf = get_multipath_config(); + + print_foreign_topology(conf->verbosity); + put_multipath_config(conf); + } + + return 0; +} + +static int check_usable_paths(struct config *conf, + const char *devpath, enum devtypes dev_type) +{ + struct udev_device *ud = NULL; + struct multipath *mpp = NULL; + struct pathgroup *pg; + struct path *pp; + char *mapname; + vector pathvec = NULL; + char params[PARAMS_SIZE], status[PARAMS_SIZE]; + dev_t devt; + int r = 1, i, j; + + ud = get_udev_device(devpath, dev_type); + if (ud == NULL) + return r; + + devt = udev_device_get_devnum(ud); + if (!dm_is_dm_major(major(devt))) { + condlog(1, "%s is not a dm device", devpath); + goto out; + } + + mapname = dm_mapname(major(devt), minor(devt)); + if (mapname == NULL) { + condlog(1, "dm device not found: %s", devpath); + goto out; + } + + if (dm_is_mpath(mapname) != 1) { + condlog(1, "%s is not a multipath map", devpath); + goto free; + } + + /* pathvec is needed for disassemble_map */ + pathvec = vector_alloc(); + if (pathvec == NULL) + goto free; + + mpp = dm_get_multipath(mapname); + if (mpp == NULL) + goto free; + + dm_get_map(mpp->alias, &mpp->size, params); + dm_get_status(mpp->alias, status); + disassemble_map(pathvec, params, mpp, 0); + disassemble_status(status, mpp); + + vector_foreach_slot (mpp->pg, pg, i) { + vector_foreach_slot (pg->paths, pp, j) { + pp->udev = get_udev_device(pp->dev_t, DEV_DEVT); + if (pp->udev == NULL) + continue; + if (pathinfo(pp, conf, DI_SYSFS|DI_NOIO|DI_CHECKER) != PATHINFO_OK) + continue; + + if (pp->state == PATH_UP && + pp->dmstate == PSTATE_ACTIVE) { + condlog(3, "%s: path %s is usable", + devpath, pp->dev); + r = 0; + goto found; + } + } + } +found: + condlog(r == 0 ? 3 : 2, "%s:%s usable paths found", + devpath, r == 0 ? "" : " no"); +free: + FREE(mapname); + free_multipath(mpp, FREE_PATHS); + vector_free(pathvec); +out: + udev_device_unref(ud); + return r; +} + +enum { + FIND_MULTIPATHS_WAIT_DONE = 0, + FIND_MULTIPATHS_WAITING = 1, + FIND_MULTIPATHS_ERROR = -1, + FIND_MULTIPATHS_NEVER = -2, +}; + +static const char shm_find_mp_dir[] = MULTIPATH_SHM_BASE "find_multipaths"; + +/** + * find_multipaths_check_timeout(wwid, tmo) + * Helper for "find_multipaths smart" + * + * @param[in] pp: path to check / record + * @param[in] tmo: configured timeout for this WWID, or value <= 0 for checking + * @param[out] until: timestamp until we must wait, CLOCK_REALTIME, if return + * value is FIND_MULTIPATHS_WAITING + * @returns: FIND_MULTIPATHS_WAIT_DONE, if waiting has finished + * @returns: FIND_MULTIPATHS_ERROR, if internal error occurred + * @returns: FIND_MULTIPATHS_NEVER, if tmo is 0 and we didn't wait for this + * device + * @returns: FIND_MULTIPATHS_WAITING, if timeout hasn't expired + */ +static int find_multipaths_check_timeout(const struct path *pp, long tmo, + struct timespec *until) +{ + char path[PATH_MAX]; + struct timespec now, ftimes[2], tdiff; + struct stat st; + long fd; + int r, retries = 0; + + clock_gettime(CLOCK_REALTIME, &now); + + if (safe_sprintf(path, "%s/%s", shm_find_mp_dir, pp->dev_t)) { + condlog(1, "%s: path name overflow", __func__); + return FIND_MULTIPATHS_ERROR; + } + + if (ensure_directories_exist(path, 0700)) { + condlog(1, "%s: error creating directories", __func__); + return FIND_MULTIPATHS_ERROR; + } + +retry: + fd = open(path, O_RDONLY); + if (fd != -1) { + pthread_cleanup_push(close_fd, (void *)fd); + r = fstat(fd, &st); + pthread_cleanup_pop(1); + + } else if (tmo > 0) { + if (errno == ENOENT) + fd = open(path, O_RDWR|O_EXCL|O_CREAT, 0644); + if (fd == -1) { + if (errno == EEXIST && !retries++) + /* We could have raced with another process */ + goto retry; + condlog(1, "%s: error opening %s: %s", + __func__, path, strerror(errno)); + return FIND_MULTIPATHS_ERROR; + }; + + pthread_cleanup_push(close_fd, (void *)fd); + /* + * We just created the file. Set st_mtim to our desired + * expiry time. + */ + ftimes[0].tv_sec = 0; + ftimes[0].tv_nsec = UTIME_OMIT; + ftimes[1].tv_sec = now.tv_sec + tmo; + ftimes[1].tv_nsec = now.tv_nsec; + if (futimens(fd, ftimes) != 0) { + condlog(1, "%s: error in futimens(%s): %s", __func__, + path, strerror(errno)); + } + r = fstat(fd, &st); + pthread_cleanup_pop(1); + } else + return FIND_MULTIPATHS_NEVER; + + if (r != 0) { + condlog(1, "%s: error in fstat for %s: %m", __func__, path); + return FIND_MULTIPATHS_ERROR; + } + + timespecsub(&st.st_mtim, &now, &tdiff); + + if (tdiff.tv_sec <= 0) + return FIND_MULTIPATHS_WAIT_DONE; + + *until = tdiff; + return FIND_MULTIPATHS_WAITING; +} + +static int print_cmd_valid(int k, const vector pathvec, + struct config *conf) +{ + int wait = FIND_MULTIPATHS_NEVER; + struct timespec until; + struct path *pp; + + if (k != RTVL_YES && k != RTVL_NO && k != RTVL_MAYBE) + return RTVL_NO; + + if (k == RTVL_MAYBE) { + /* + * Caller ensures that pathvec[0] is the path to + * examine. + */ + pp = VECTOR_SLOT(pathvec, 0); + select_find_multipaths_timeout(conf, pp); + wait = find_multipaths_check_timeout( + pp, pp->find_multipaths_timeout, &until); + if (wait != FIND_MULTIPATHS_WAITING) + k = RTVL_NO; + } else if (pathvec != NULL && (pp = VECTOR_SLOT(pathvec, 0))) + wait = find_multipaths_check_timeout(pp, 0, &until); + if (wait == FIND_MULTIPATHS_WAITING) + printf("FIND_MULTIPATHS_WAIT_UNTIL=\"%ld.%06ld\"\n", + until.tv_sec, until.tv_nsec/1000); + else if (wait == FIND_MULTIPATHS_WAIT_DONE) + printf("FIND_MULTIPATHS_WAIT_UNTIL=\"0\"\n"); + printf("DM_MULTIPATH_DEVICE_PATH=\"%d\"\n", + k == RTVL_MAYBE ? 2 : k == RTVL_YES ? 1 : 0); + /* Never return RTVL_MAYBE */ + return k == RTVL_NO ? RTVL_NO : RTVL_YES; +} + +/* + * Returns true if this device has been handled before, + * and released to systemd. + * + * This must be called before get_refwwid(), + * otherwise udev_device_new_from_environment() will have + * destroyed environ(7). + */ +static bool released_to_systemd(void) +{ + static const char dmdp[] = "DM_MULTIPATH_DEVICE_PATH"; + const char *dm_mp_dev_path = getenv(dmdp); + bool ret; + + ret = dm_mp_dev_path != NULL && !strcmp(dm_mp_dev_path, "0"); + condlog(4, "%s: %s=%s -> %d", __func__, dmdp, dm_mp_dev_path, ret); + return ret; +} + +static int +configure (struct config *conf, enum mpath_cmds cmd, + enum devtypes dev_type, char *devpath) +{ + vector curmp = NULL; + vector pathvec = NULL; + struct vectors vecs; + int r = RTVL_FAIL, rc; + int di_flag = 0; + char * refwwid = NULL; + char * dev = NULL; + bool released = released_to_systemd(); + + /* + * allocate core vectors to store paths and multipaths + */ + curmp = vector_alloc(); + pathvec = vector_alloc(); + + if (!curmp || !pathvec) { + condlog(0, "can not allocate memory"); + goto out; + } + vecs.pathvec = pathvec; + vecs.mpvec = curmp; + + dev = convert_dev(devpath, (dev_type == DEV_DEVNODE)); + + /* + * if we have a blacklisted device parameter, exit early + */ + if (dev && (dev_type == DEV_DEVNODE || + dev_type == DEV_UEVENT) && + cmd != CMD_REMOVE_WWID && + (filter_devnode(conf->blist_devnode, + conf->elist_devnode, dev) > 0)) { + goto print_valid; + } + + /* + * scope limiting must be translated into a wwid + * failing the translation is fatal (by policy) + */ + if (devpath) { + int failed = get_refwwid(cmd, devpath, dev_type, + pathvec, &refwwid); + if (!refwwid) { + condlog(4, "%s: failed to get wwid", devpath); + if (failed == 2 && cmd == CMD_VALID_PATH) + goto print_valid; + else + condlog(3, "scope is null"); + goto out; + } + if (cmd == CMD_REMOVE_WWID) { + rc = remove_wwid(refwwid); + if (rc == 0) { + printf("wwid '%s' removed\n", refwwid); + r = RTVL_OK; + } else if (rc == 1) { + printf("wwid '%s' not in wwids file\n", + refwwid); + r = RTVL_OK; + } + goto out; + } + if (cmd == CMD_ADD_WWID) { + rc = remember_wwid(refwwid); + if (rc >= 0) { + printf("wwid '%s' added\n", refwwid); + r = RTVL_OK; + } else + printf("failed adding '%s' to wwids file\n", + refwwid); + goto out; + } + condlog(3, "scope limited to %s", refwwid); + /* If you are ignoring the wwids file and find_multipaths is + * set, you need to actually check if there are two available + * paths to determine if this path should be multipathed. To + * do this, we put off the check until after discovering all + * the paths. + * Paths listed in the wwids file are always considered valid. + */ + if (cmd == CMD_VALID_PATH) { + if (is_failed_wwid(refwwid) == WWID_IS_FAILED) { + r = RTVL_NO; + goto print_valid; + } + if ((!find_multipaths_on(conf) && + ignore_wwids_on(conf)) || + check_wwids_file(refwwid, 0) == 0) + r = RTVL_YES; + if (!ignore_wwids_on(conf)) + goto print_valid; + /* At this point, either r==0 or find_multipaths_on. */ + + /* + * Shortcut for find_multipaths smart: + * Quick check if path is already multipathed. + */ + if (sysfs_is_multipathed(VECTOR_SLOT(pathvec, 0))) { + r = RTVL_YES; + goto print_valid; + } + + /* + * DM_MULTIPATH_DEVICE_PATH=="0" means that we have + * been called for this device already, and have + * released it to systemd. Unless the device is now + * already multipathed (see above), we can't try to + * grab it, because setting SYSTEMD_READY=0 would + * cause file systems to be unmounted. + * Leave DM_MULTIPATH_DEVICE_PATH="0". + */ + if (released) { + r = RTVL_NO; + goto print_valid; + } + if (r == RTVL_YES) + goto print_valid; + /* find_multipaths_on: Fall through to path detection */ + } + } + + /* + * get a path list + */ + if (devpath) + di_flag = DI_WWID; + + if (cmd == CMD_LIST_LONG) + /* extended path info '-ll' */ + di_flag |= DI_SYSFS | DI_CHECKER | DI_SERIAL; + else if (cmd == CMD_LIST_SHORT) + /* minimum path info '-l' */ + di_flag |= DI_SYSFS; + else + /* maximum info */ + di_flag = DI_ALL; + + if (path_discovery(pathvec, di_flag) < 0) + goto out; + + if (conf->verbosity > 2) + print_all_paths(pathvec, 1); + + get_path_layout(pathvec, 0); + foreign_path_layout(); + + if (get_dm_mpvec(cmd, curmp, pathvec, refwwid)) + goto out; + + filter_pathvec(pathvec, refwwid); + + if (cmd == CMD_DUMP_CONFIG) { + vector hwes = get_used_hwes(pathvec); + + dump_config(conf, hwes, curmp); + vector_free(hwes); + goto out; + } + + if (cmd == CMD_VALID_PATH) { + struct path *pp; + int fd; + + /* This only happens if find_multipaths and + * ignore_wwids is set, and the path is not in WWIDs + * file, not currently multipathed, and has + * never been released to systemd. + * If there is currently a multipath device matching + * the refwwid, or there is more than one path matching + * the refwwid, then the path is valid */ + if (VECTOR_SIZE(curmp) != 0) { + r = RTVL_YES; + goto print_valid; + } else if (VECTOR_SIZE(pathvec) > 1) + r = RTVL_YES; + else + r = RTVL_MAYBE; + + /* + * If opening the path with O_EXCL fails, the path + * is in use (e.g. mounted during initramfs processing). + * We know that it's not used by dm-multipath. + * We may not set SYSTEMD_READY=0 on such devices, it + * might cause systemd to umount the device. + * Use O_RDONLY, because udevd would trigger another + * uevent for close-after-write. + * + * The O_EXCL check is potentially dangerous, because it may + * race with other tasks trying to access the device. Therefore + * this code is only executed if the path hasn't been released + * to systemd earlier (see above). + * + * get_refwwid() above stores the path we examine in slot 0. + */ + pp = VECTOR_SLOT(pathvec, 0); + fd = open(udev_device_get_devnode(pp->udev), + O_RDONLY|O_EXCL); + if (fd >= 0) + close(fd); + else { + condlog(3, "%s: path %s is in use: %s", + __func__, pp->dev, + strerror(errno)); + /* + * Check if we raced with multipathd + */ + r = sysfs_is_multipathed(VECTOR_SLOT(pathvec, 0)) ? + RTVL_YES : RTVL_NO; + } + goto print_valid; + } + + if (cmd != CMD_CREATE && cmd != CMD_DRY_RUN) { + r = RTVL_OK; + goto out; + } + + /* + * core logic entry point + */ + rc = coalesce_paths(&vecs, NULL, refwwid, + conf->force_reload, cmd); + r = rc == CP_RETRY ? RTVL_RETRY : rc == CP_OK ? RTVL_OK : RTVL_FAIL; + +print_valid: + if (cmd == CMD_VALID_PATH) + r = print_cmd_valid(r, pathvec, conf); + +out: + if (refwwid) + FREE(refwwid); + + free_multipathvec(curmp, KEEP_PATHS); + free_pathvec(pathvec, FREE_PATHS); + + return r; +} + +static int +get_dev_type(char *dev) { + struct stat buf; + int i; + + if (stat(dev, &buf) == 0 && S_ISBLK(buf.st_mode)) { + if (dm_is_dm_major(major(buf.st_rdev))) + return DEV_DEVMAP; + return DEV_DEVNODE; + } + else if (sscanf(dev, "%d:%d", &i, &i) == 2) + return DEV_DEVT; + else if (valid_alias(dev)) + return DEV_DEVMAP; + return DEV_NONE; +} + +/* + * Some multipath commands are dangerous to run while multipathd is running. + * For example, "multipath -r" may apply a modified configuration to the kernel, + * while multipathd is still using the old configuration, leading to + * inconsistent state. + * + * It is safer to use equivalent multipathd client commands instead. + */ +enum { + DELEGATE_OK = 0, + DELEGATE_ERROR = -1, + NOT_DELEGATED = 1, +}; + +int delegate_to_multipathd(enum mpath_cmds cmd, + __attribute__((unused)) const char *dev, + __attribute__((unused)) enum devtypes dev_type, + const struct config *conf) +{ + int fd; + char command[1024], *p, *reply = NULL; + int n, r = DELEGATE_ERROR; + + p = command; + *p = '\0'; + n = sizeof(command); + + if (cmd == CMD_CREATE && conf->force_reload == FORCE_RELOAD_YES) { + p += snprintf(p, n, "reconfigure"); + } + /* Add other translations here */ + + if (strlen(command) == 0) + /* No command found, no need to delegate */ + return NOT_DELEGATED; + + fd = mpath_connect(); + if (fd == -1) + return NOT_DELEGATED; + + if (p >= command + sizeof(command)) { + condlog(0, "internal error - command buffer overflow"); + goto out; + } + + condlog(3, "delegating command to multipathd"); + + if (mpath_process_cmd(fd, command, &reply, conf->uxsock_timeout) + == -1) { + condlog(1, "error in multipath command %s: %s", + command, strerror(errno)); + goto out; + } + + if (reply != NULL && *reply != '\0' && strcmp(reply, "ok\n")) + printf("%s", reply); + r = DELEGATE_OK; + +out: + FREE(reply); + close(fd); + return r; +} + +static int test_multipathd_socket(void) +{ + int fd; + /* + * "multipath -u" may be run before the daemon is started. In this + * case, systemd might own the socket but might delay multipathd + * startup until some other unit (udev settle!) has finished + * starting. With many LUNs, the listen backlog may be exceeded, which + * would cause connect() to block. This causes udev workers calling + * "multipath -u" to hang, and thus creates a deadlock, until "udev + * settle" times out. To avoid this, call connect() in non-blocking + * mode here, and take EAGAIN as indication for a filled-up systemd + * backlog. + */ + + fd = __mpath_connect(1); + if (fd == -1) { + if (errno == EAGAIN) + condlog(3, "daemon backlog exceeded"); + else + return 0; + } else + close(fd); + return 1; +} + +int +main (int argc, char *argv[]) +{ + int arg; + extern char *optarg; + extern int optind; + int r = RTVL_FAIL; + enum mpath_cmds cmd = CMD_CREATE; + enum devtypes dev_type = DEV_NONE; + char *dev = NULL; + struct config *conf; + int retries = -1; + + udev = udev_new(); + logsink = 0; + conf = load_config(DEFAULT_CONFIGFILE); + if (!conf) + exit(RTVL_FAIL); + multipath_conf = conf; + conf->retrigger_tries = 0; + conf->force_sync = 1; + while ((arg = getopt(argc, argv, ":adcChl::FfM:v:p:b:BrR:itTquUwW")) != EOF ) { + switch(arg) { + case 1: printf("optarg : %s\n",optarg); + break; + case 'v': + if (sizeof(optarg) > sizeof(char *) || + !isdigit(optarg[0])) { + usage (argv[0]); + exit(RTVL_FAIL); + } + + conf->verbosity = atoi(optarg); + break; + case 'b': + conf->bindings_file = strdup(optarg); + break; + case 'B': + conf->bindings_read_only = 1; + break; + case 'q': + conf->allow_queueing = 1; + break; + case 'c': + cmd = CMD_VALID_PATH; + break; + case 'C': + cmd = CMD_USABLE_PATHS; + break; + case 'd': + if (cmd == CMD_CREATE) + cmd = CMD_DRY_RUN; + break; + case 'f': + conf->remove = FLUSH_ONE; + break; + case 'F': + conf->remove = FLUSH_ALL; + break; + case 'l': + if (optarg && !strncmp(optarg, "l", 1)) + cmd = CMD_LIST_LONG; + else + cmd = CMD_LIST_SHORT; + + break; + case 'M': +#if _DEBUG_ + debug = atoi(optarg); +#endif + break; + case 'p': + conf->pgpolicy_flag = get_pgpolicy_id(optarg); + if (conf->pgpolicy_flag == IOPOLICY_UNDEF) { + printf("'%s' is not a valid policy\n", optarg); + usage(argv[0]); + exit(RTVL_FAIL); + } + break; + case 'r': + conf->force_reload = FORCE_RELOAD_YES; + break; + case 'i': + conf->find_multipaths |= _FIND_MULTIPATHS_I; + break; + case 't': + r = dump_config(conf, NULL, NULL) ? RTVL_FAIL : RTVL_OK; + goto out_free_config; + case 'T': + cmd = CMD_DUMP_CONFIG; + break; + case 'h': + usage(argv[0]); + exit(RTVL_OK); + case 'u': + cmd = CMD_VALID_PATH; + dev_type = DEV_UEVENT; + break; + case 'U': + cmd = CMD_USABLE_PATHS; + dev_type = DEV_UEVENT; + break; + case 'w': + cmd = CMD_REMOVE_WWID; + break; + case 'W': + cmd = CMD_RESET_WWIDS; + break; + case 'a': + cmd = CMD_ADD_WWID; + break; + case 'R': + retries = atoi(optarg); + break; + case ':': + fprintf(stderr, "Missing option argument\n"); + usage(argv[0]); + exit(RTVL_FAIL); + case '?': + fprintf(stderr, "Unknown switch: %s\n", optarg); + usage(argv[0]); + exit(RTVL_FAIL); + default: + usage(argv[0]); + exit(RTVL_FAIL); + } + } + + if (getuid() != 0) { + fprintf(stderr, "need to be root\n"); + exit(RTVL_FAIL); + } + + if (optind < argc) { + dev = MALLOC(FILE_NAME_SIZE); + + if (!dev) + goto out; + + strlcpy(dev, argv[optind], FILE_NAME_SIZE); + if (dev_type != DEV_UEVENT) + dev_type = get_dev_type(dev); + if (dev_type == DEV_NONE) { + condlog(0, "'%s' is not a valid argument\n", dev); + goto out; + } + } + if (dev_type == DEV_UEVENT) { + openlog("multipath", 0, LOG_DAEMON); + setlogmask(LOG_UPTO(conf->verbosity + 3)); + logsink = 1; + } + + set_max_fds(conf->max_fds); + + libmp_udev_set_sync_support(1); + + if (init_checkers(conf->multipath_dir)) { + condlog(0, "failed to initialize checkers"); + goto out; + } + if (init_prio(conf->multipath_dir)) { + condlog(0, "failed to initialize prioritizers"); + goto out; + } + /* Failing here is non-fatal */ + init_foreign(conf->multipath_dir, conf->enable_foreign); + if (cmd == CMD_USABLE_PATHS) { + r = check_usable_paths(conf, dev, dev_type) ? + RTVL_FAIL : RTVL_OK; + goto out; + } + if (cmd == CMD_VALID_PATH && + (!dev || dev_type == DEV_DEVMAP)) { + condlog(0, "the -c option requires a path to check"); + goto out; + } + if (cmd == CMD_VALID_PATH && + dev_type == DEV_UEVENT) { + if (!test_multipathd_socket()) { + condlog(3, "%s: daemon is not running", dev); + if (!systemd_service_enabled(dev)) { + r = print_cmd_valid(RTVL_NO, NULL, conf); + goto out; + } + } + } + + if (cmd == CMD_REMOVE_WWID && !dev) { + condlog(0, "the -w option requires a device"); + goto out; + } + + switch(delegate_to_multipathd(cmd, dev, dev_type, conf)) { + case DELEGATE_OK: + exit(RTVL_OK); + case DELEGATE_ERROR: + exit(RTVL_FAIL); + case NOT_DELEGATED: + break; + } + + if (cmd == CMD_RESET_WWIDS) { + struct multipath * mpp; + int i; + vector curmp; + + curmp = vector_alloc(); + if (!curmp) { + condlog(0, "can't allocate memory for mp list"); + goto out; + } + if (dm_get_maps(curmp) == 0) + r = replace_wwids(curmp) ? RTVL_FAIL : RTVL_OK; + if (r == RTVL_OK) + printf("successfully reset wwids\n"); + vector_foreach_slot_backwards(curmp, mpp, i) { + vector_del_slot(curmp, i); + free_multipath(mpp, KEEP_PATHS); + } + vector_free(curmp); + goto out; + } + if (retries < 0) + retries = conf->remove_retries; + if (conf->remove == FLUSH_ONE) { + if (dev_type == DEV_DEVMAP) { + r = dm_suspend_and_flush_map(dev, retries) ? + RTVL_FAIL : RTVL_OK; + } else + condlog(0, "must provide a map name to remove"); + + goto out; + } + else if (conf->remove == FLUSH_ALL) { + r = dm_flush_maps(retries) ? RTVL_FAIL : RTVL_OK; + goto out; + } + while ((r = configure(conf, cmd, dev_type, dev)) == RTVL_RETRY) + condlog(3, "restart multipath configuration process"); + +out: + dm_lib_release(); + dm_lib_exit(); + + cleanup_foreign(); + cleanup_prio(); + cleanup_checkers(); + + /* + * multipath -u must exit with status 0, otherwise udev won't + * import its output. + */ + if (cmd == CMD_VALID_PATH && dev_type == DEV_UEVENT && r == RTVL_NO) + r = RTVL_OK; + + if (dev_type == DEV_UEVENT) + closelog(); + +out_free_config: + /* + * Freeing config must be done after dm_lib_exit(), because + * the logging function (dm_write_log()), which is called there, + * references the config. + */ + free_config(conf); + conf = NULL; + udev_unref(udev); + if (dev) + FREE(dev); +#ifdef _DEBUG_ + dbg_free_final(NULL); +#endif + return r; +} diff --git a/multipath/multipath.8 b/multipath/multipath.8 new file mode 100644 index 0000000..9cdd05a --- /dev/null +++ b/multipath/multipath.8 @@ -0,0 +1,288 @@ +.\" ---------------------------------------------------------------------------- +.\" Update the date below if you make any significant change. +.\" Make sure there are no errors with: +.\" groff -z -wall -b -e -t multipath/multipath.8 +.\" +.\" ---------------------------------------------------------------------------- +. +.TH MULTIPATH 8 2018-10-10 "Linux" +. +. +.\" ---------------------------------------------------------------------------- +.SH NAME +.\" ---------------------------------------------------------------------------- +. +multipath \- Device mapper target autoconfig. +. +. +.\" ---------------------------------------------------------------------------- +.SH SYNOPSIS +.\" ---------------------------------------------------------------------------- +. +.B multipath +.RB [\| \-v\ \c +.IR level \|] +.RB [\| \-B | \-d | \-i | \-q | \-r \|] +.RB [\| \-b\ \c +.IR file \|] +.RB [\| \-p\ \c +.IR policy \|] +.RB [\| device \|] +. +.LP +.B multipath +.RB [\| \-v\ \c +.IR level \|] +.RB [\| \-R\ \c +.IR retries \|] +.B \-f device +. +.LP +.B multipath +.RB [\| \-v\ \c +.IR level \|] +.RB [\| \-R\ \c +.IR retries \|] +.B \-F +. +.LP +.B multipath +.RB [\| \-v\ \c +.IR level \|] +.RB [\| \-l | \-ll \|] +.RB [\| device \|] +. +.LP +.B multipath +.RB [\| \-v\ \c +.IR level \|] +.RB [\| \-a | \-w \|] +.B device +. +.LP +.B multipath +.RB [\| \-v\ \c +.IR level \|] +.B -W +. +.LP +.B multipath +.RB [\| \-v\ \c +.IR level \|] +.RB [\| \-i \|] +.RB [\| \-c | \-C \|] +.B device +. +.LP +.B multipath +.RB [\| \-v\ \c +.IR level \|] +.RB [\| \-i \|] +.RB [\| \-u | \-U \|] +. +.LP +.B multipath +.RB [\| \-h | \-t | \-T \|] +. +.\" ---------------------------------------------------------------------------- +.SH DESCRIPTION +.\" ---------------------------------------------------------------------------- +. +.B multipath +is used to detect and coalesce multiple paths to devices, for fail-over or performance reasons. +. +.\" ---------------------------------------------------------------------------- +.SH ARGUMENTS +.\" ---------------------------------------------------------------------------- +. +The \fBdevice\fR argument restricts \fBmultipath\fR's operation to devices matching the given +expression. The argument may refer either to a multipath map or to +its components ("paths"). The expression may be in one of the following formats: +. +.TP 1.4i +.B device node +file name of a device node, e.g. \fI/dev/dm-10\fR or \fI/dev/sda\fR. If the node refers +to an existing device mapper device representing a multipath map, this selects +the map or its paths, depending on the operation mode. Otherwise, it selects a path device. +. +.TP +.B device ID +kernel device number specified by major:minor numbers, e.g. \fI65:16\fR. This +format can only be used for path devices. +. +.TP +.B WWID +a World Wide Identifier matching a multipath map or its paths. To list WWIDs of devices +present in the system, use e.g. the command "\fImultipath -d -v3 2>/dev/null\fR". +. +.\" ---------------------------------------------------------------------------- +.SH OPERATION MODES +.\" ---------------------------------------------------------------------------- +. +The default operation mode is to detect and set up multipath maps from the devices found in +the system. +. +Other operation modes are chosen by using one of the following command line switches: +.TP +.B \-f +Flush (remove) a multipath device map specified as parameter, if unused. +. +.TP +.B \-F +Flush (remove) all unused multipath device maps. +. +.TP +.B \-l +Show ("list") the current multipath topology from information fetched in sysfs and the device mapper. +. +.TP +.B \-ll +Show ("list") the current multipath topology from all available information (sysfs, the +device mapper, path checkers ...). +. +.TP +.B \-a +Add the WWID for the specified device to the WWIDs file. +. +.TP +.B \-w +Remove the WWID for the specified device from the WWIDs file. +. +.TP +.B \-W +Reset the WWIDs file to only include the current multipath devices. +. +.TP +.B \-c +Check if a block device should be a path in a multipath device. +. +.TP +.B \-C +Check if a multipath device has usable paths. This can be used to +test whether or not I/O on this device is likely to succeed. The command +itself doesn't attempt to do I/O on the device. +. +.TP +.B \-u +Check if the device specified in the program environment should be +a path in a multipath device. +. +.TP +.B \-U +Check if the device specified in the program environment is a multipath device +with usable paths. See \fB-C\fB. +. +.TP +.B \-h +Print usage text. +. +.TP +.B \-t +Display the currently used multipathd configuration. +. +.TP +.B \-T +Display the currently used multipathd configuration, limiting the output to +those devices actually present in the system. This can be used a template for +creating \fImultipath.conf\fR. +. +.\" ---------------------------------------------------------------------------- +.SH OPTIONS +.\" ---------------------------------------------------------------------------- +. +.TP +.BI \-v " level" +Verbosity of information printed to stdout in default and "list" operation +modes. The default level is \fI-v 2\fR. +.RS 1.2i +.TP 1.2i +.I 0 +Nothing is printed. +.TP +.I 1 +In default mode, Names/WWIDs of created or modified multipath maps are +printed. In list mode, WWIDs of all multipath maps are printed. +.TP +.I 2 +In default mode, +Topology of created or modified multipath maps is printed. +In list mode, topology of all multipath maps is printed. +.TP +.I 3 +All detected paths and the topology of all multipath maps are printed. +. +.LP +. +The verbosity level also controls the level of log and debug messages printed to +\fIstderr\fR. The default level corresponds to \fILOG_NOTICE\fR +(important messages that shouldn't be missed in normal operation). +. +.RE +.TP +.B \-d +Dry run, do not create or update devmaps. +. +.TP +.B \-i +Ignore WWIDs file when processing devices. If +\fIfind_multipaths strict\fR or \fIfind_multipaths no\fR is set in +\fImultipath.conf\fR, multipath only considers devices that are +listed in the WWIDs file. This option overrides that behavior. For other values +of \fIfind_multipaths\fR, this option has no effect. See the description of +\fIfind_multipaths\fR in +.BR multipath.conf (5). +This option should only be used in rare circumstances. +. +.TP +.B \-B +Treat the bindings file as read only. +. +.TP +.BI \-b " file" +Set \fIuser_friendly_names\fR bindings file location. The default is +\fI/etc/multipath/bindings\fR. +. +.TP +.B \-q +Don't unset the device mapper feature \fIqueue_if_no_path\fR for multipath +maps. Normally, \fBmultipath\fR would do so if \fBmultipathd\fR is not +running, because only a running multipath daemon guarantees that unusable +paths are reinstated when they become usable again. +. +.TP +.BI \-p " policy" +Force new maps to use the specified policy, overriding the configuration in +\fBmultipath.conf(5)\fR. The possible values for +\fIpolicy\fR are the same as the values for \fIpath_grouping_policy\fR in +\fBmultipath.conf(5)\fR. Existing maps are not modified. +. +.TP +.B \-r +Force a reload of all existing multipath maps. This command is delegated to +the multipathd daemon if it's running. In this case, other command line +switches of the \fImultipath\fR command have no effect. +. +.TP +.BI \-R " retries" +Number of times to retry flushing multipath devices that are in use. The default +is \fI0\fR. +. +.\" ---------------------------------------------------------------------------- +.SH "SEE ALSO" +.\" ---------------------------------------------------------------------------- +. +.BR multipathd (8), +.BR multipath.conf (5), +.BR kpartx (8), +.BR udev (8), +.BR dmsetup (8), +.BR hotplug (8). +. +. +.\" ---------------------------------------------------------------------------- +.SH AUTHORS +.\" ---------------------------------------------------------------------------- +. +\fImultipath-tools\fR was developed by Christophe Varoqui +and others. +.\" EOF diff --git a/multipath/multipath.conf.5 b/multipath/multipath.conf.5 new file mode 100644 index 0000000..05a5e8f --- /dev/null +++ b/multipath/multipath.conf.5 @@ -0,0 +1,1845 @@ +.\" ---------------------------------------------------------------------------- +.\" Update the date below if you make any significant change. +.\" Make sure there are no errors with: +.\" groff -z -wall -b -e -t multipath/multipath.conf.5 +.\" man --warnings -E UTF-8 -l -Tutf8 -Z multipath/multipath.conf.5 >/dev/null +.\" +.\" ---------------------------------------------------------------------------- +. +.TH MULTIPATH.CONF 5 2018-05-21 Linux +. +. +.\" ---------------------------------------------------------------------------- +.SH NAME +.\" ---------------------------------------------------------------------------- +. +multipath.conf \- multipath daemon configuration file. +. +. +.\" ---------------------------------------------------------------------------- +.SH DESCRIPTION +.\" ---------------------------------------------------------------------------- +. +.B "/etc/multipath.conf" +is the configuration file for the multipath daemon. It is used to +overwrite the built-in configuration table of \fBmultipathd\fP. +Any line whose first non-white-space character is a '#' is considered +a comment line. Empty lines are ignored. +.PP +Currently used multipathd configuration can be displayed with the \fBmultipath -t\fR +or \fBmultipathd show config\fR command. +. +. +.\" ---------------------------------------------------------------------------- +.SH SYNTAX +.\" ---------------------------------------------------------------------------- +. +The configuration file contains entries of the form: +.RS +.nf +.ft B +.sp +
{ +.RS +.ft B + +.I "..." +.ft B + { +.RS +.ft B + +.I "..." +.RE +.ft B +} +.RE +.ft B +} +.ft R +.fi +.RE +.LP +Each \fIsection\fP contains one or more attributes or subsections. The +recognized keywords for attributes or subsections depend on the +section in which they occur. +.LP +. +\fB\fR and \fB\fR must be on a single line. +\fB\fR is one of the keywords listed in this man page. +\fB\fR is either a simple word (containing no whitespace and none of the +characters '\(dq', '#', and '!') or \fIone\fR string enclosed in double +quotes ("..."). Outside a quoted string, text starting with '#', and '!' is +regarded as a comment and ignored until the end of the line. Inside a quoted +string, '#' and '!' are normal characters, and whitespace is preserved. +To represent a double quote character inside a double quoted string, use two +consecutive double quotes ('""'). Thus '2.5\(dq SSD' can be written as "2.5"" SSD". +.LP +. +Opening braces ('{') must follow the (sub)section name on the same line. Closing +braces ('}') that mark the end of a (sub)section must be the only non-whitespace +character on the line. Whitespace is ignored except inside double quotes, thus +the indentation shown in the above example is helpful for human readers but +not mandatory. +.LP +. +.LP +.B Note on regular expressions: +The \fImultipath.conf\fR syntax allows many attribute values to be specified as POSIX +Extended Regular Expressions (see \fBregex\fR(7)). These regular expressions +are \fBcase sensitive\fR and \fBnot anchored\fR, thus the expression "bar" matches "barbie", +"rhabarber", and "wunderbar", but not "Barbie". To avoid unwanted substring +matches, standard regular expression syntax using the special characters "^" and "$" can be used. +. +.LP +. +The following \fIsection\fP keywords are recognized: +.TP 17 +.B defaults +This section defines default values for attributes which are used +whenever no values are given in the appropriate device or multipath +sections. +.TP +.B blacklist +This section defines which devices should be excluded from the +multipath topology discovery. +.TP +.B blacklist_exceptions +This section defines which devices should be included in the +multipath topology discovery, despite being listed in the +\fIblacklist\fR section. +.TP +.B multipaths +This section defines the multipath topologies. They are indexed by a +\fIWorld Wide Identifier\fR(WWID). For details on the WWID generation +see section \fIWWID generation\fR below. Attributes set in this section take +precedence over all others. +.TP +.B devices +This section defines the device-specific settings. Devices are identified by +vendor, product, and revision. +.TP +.B overrides +This section defines values for attributes that should override the +device-specific settings for all devices. +.RE +.LP +. +. +.\" ---------------------------------------------------------------------------- +.SH "defaults section" +.\" ---------------------------------------------------------------------------- +. +The \fIdefaults\fR section recognizes the following keywords: +. +. +.TP 17 +.B verbosity +Default verbosity. Higher values increase the verbosity level. Valid +levels are between 0 and 6. +.RS +.TP +The default is: \fB2\fR +.RE +. +. +.TP +.B polling_interval +Interval between two path checks in seconds. For properly functioning paths, +the interval between checks will gradually increase to \fImax_polling_interval\fR. +This value will be overridden by the \fIWatchdogSec\fR +setting in the multipathd.service definition if systemd is used. +.RS +.TP +The default is: \fB5\fR +.RE +. +. +.TP +.B max_polling_interval +Maximal interval between two path checks in seconds. +.RS +.TP +The default is: \fB4 * polling_interval\fR +.RE +. +. +.TP +.B reassign_maps +Enable reassigning of device-mapper maps. With this option multipathd +will remap existing device-mapper maps to always point to multipath +device, not the underlying block devices. Possible values are +\fIyes\fR and \fIno\fR. +.RS +.TP +The default is: \fBno\fR +.RE +. +. +.TP +.B multipath_dir +Directory where the dynamic shared objects are stored. Defined at compile time, +commonly \fI/lib64/multipath/\fR or \fI/lib/multipath/\fR. +.RS +.TP +The default is: \fB\fR +.RE +. +. +.TP +.B path_selector +The default path selector algorithm to use; they are offered by the +kernel multipath target. There are three selector algorithms: +.RS +.TP 12 +.I "round-robin 0" +Loop through every path in the path group, sending the same amount of I/O to +each. Some aspects of behavior can be controlled with the attributes: +\fIrr_min_io\fR, \fIrr_min_io_rq\fR and \fIrr_weight\fR. +.TP +.I "queue-length 0" +(Since 2.6.31 kernel) Choose the path for the next bunch of I/O based on the amount +of outstanding I/O to the path. +.TP +.I "service-time 0" +(Since 2.6.31 kernel) Choose the path for the next bunch of I/O based on the amount +of outstanding I/O to the path and its relative throughput. +.TP +The default is: \fBservice-time 0\fR +.RE +. +. +.TP +.B path_grouping_policy +The default path grouping policy to apply to unspecified +multipaths. Possible values are: +.RS +.TP 12 +.I failover +One path per priority group. +.TP +.I multibus +All paths in one priority group. +.TP +.I group_by_serial +One priority group per serial number. +.TP +.I group_by_prio +One priority group per priority value. Priorities are determined by +callout programs specified as a global, per-controller or +per-multipath option in the configuration file. +.TP +.I group_by_node_name +One priority group per target node name. Target node names are fetched +in \fI/sys/class/fc_transport/target*/node_name\fR. +.TP +The default is: \fBfailover\fR +.RE +. +. +.TP +.B uid_attrs +. +Setting this option activates \fBmerging uevents\fR by WWID, which may improve +uevent processing effiency. Moreover, it's an alternative method to configure +the udev properties to use for determining unique path identifiers (WWIDs). +.RS +.PP +The value of this option is a space separated list of records like +\(dq\fItype:ATTR\fR\(dq, where \fItype\fR is matched against the beginning +of the device node name (e.g. \fIsd:ATTR\fR matches \fIsda\fR), and +\fIATTR\fR is the name of the udev property to use for matching devices. +.PP +If this option is configured and matches the device +node name of a device, it overrides any other configured methods for +determining the WWID for this device. +.PP +The default is: \fB\fR. To enable uevent merging, set it e.g. to +\(dqsd:ID_SERIAL dasd:ID_UID nvme:ID_WWN\(dq. +.RE +. +. +.TP +.B uid_attribute +The udev attribute providing a unique path identifier (WWID). If +\fIuid_attribute\fR is set to the empty string, WWID determination is done +using the \fIsysfs\fR method rather then using udev (not recommended in +production; see \fBWWID generation\fR below). +.RS +.TP +The default is: \fBID_SERIAL\fR, for SCSI devices +.TP +The default is: \fBID_UID\fR, for DASD devices +.TP +The default is: \fBID_WWN\fR, for NVMe devices +.RE +. +. +.TP +.B getuid_callout +(Superseded by \fIuid_attribute\fR) The default program and args to callout +to obtain a unique path identifier. Should be specified with an absolute path. +.RS +.TP +The default is: \fB\fR +.RE +. +. +.TP +.B prio +The name of the path priority routine. The specified routine +should return a numeric value specifying the relative priority +of this path. Higher number have a higher priority. +\fI"none"\fR is a valid value. Currently the following path priority routines +are implemented: +.RS +.TP 12 +.I const +Return a constant priority of \fI1\fR. +.TP +.I sysfs +Use the sysfs attributes \fIaccess_state\fR and \fIpreferred_path\fR to +generate the path priority. This prioritizer accepts the optional prio_arg +\fIexclusive_pref_bit\fR. +.TP +.I emc +(Hardware-dependent) +Generate the path priority for DGC class arrays as CLARiiON CX/AX and +EMC VNX and Unity families. +.TP +.I alua +(Hardware-dependent) +Generate the path priority based on the SCSI-3 ALUA settings. This prioritizer +accepts the optional prio_arg \fIexclusive_pref_bit\fR. +.TP +.I ontap +(Hardware-dependent) +Generate the path priority for NetApp ONTAP class and OEM arrays as IBM NSeries. +.TP +.I rdac +(Hardware-dependent) +Generate the path priority for LSI/Engenio/NetApp RDAC class as NetApp SANtricity +E/EF Series, and OEM arrays from IBM DELL SGI STK and SUN. +.TP +.I hp_sw +(Hardware-dependent) +Generate the path priority for HP/COMPAQ/DEC HSG80 and MSA/HSV arrays with +Active/Standby mode exclusively. +.TP +.I hds +(Hardware-dependent) +Generate the path priority for Hitachi AMS families of arrays other than AMS 2000. +.TP +.I random +Generate a random priority between 1 and 10. +.TP +.I weightedpath +Generate the path priority based on the regular expression and the +priority provided as argument. Requires prio_args keyword. +.TP +.I path_latency +Generate the path priority based on a latency algorithm. +Requires prio_args keyword. +.TP +.I ana +(Hardware-dependent) +Generate the path priority based on the NVMe ANA settings. +.TP +.I datacore +(Hardware-dependent) +Generate the path priority for some DataCore storage arrays. Requires prio_args +keyword. +.TP +.I iet +(iSCSI only) +Generate path priority for iSCSI targets based on IP address. Requires +prio_args keyword. +.PP +The default depends on the \fBdetect_prio\fR setting: If \fBdetect_prio\fR is +\fByes\fR (default), the default priority algorithm is \fBsysfs\fR (except for +NetAPP E-Series, where it is \fBalua\fR). If \fBdetect_prio\fR is +\fBno\fR, the default priority algorithm is \fBconst\fR. +.RE +. +. +.TP +.B prio_args +Arguments to pass to to the prio function. This only applies to certain +prioritizers: +.RS +.TP 12 +.I weighted +Needs a value of the form +\fI" ..."\fR +.RS +.TP 8 +.I hbtl +Regex can be of SCSI H:B:T:L format. For example: 1:0:.:. , *:0:0:. +.TP +.I devname +Regex can be of device name format. For example: sda , sd.e +.TP +.I serial +Regex can be of serial number format. For example: .*J1FR.*324 . The serial can +be looked up through sysfs or by running multipathd show paths format "%z". For +example: 0395J1FR904324 +.TP +.I wwn +Regex can be of the form \fI"host_wwnn:host_wwpn:target_wwnn:target_wwpn"\fR +these values can be looked up through sysfs or by running \fImultipathd show paths format +"%N:%R:%n:%r"\fR. For example: 0x200100e08ba0aea0:0x210100e08ba0aea0:.*:.* , .*:.*:iqn.2009-10.com.redhat.msp.lab.ask-06:.* +.RE +.TP 12 +.I path_latency +Needs a value of the form "io_num=\fI<20>\fR base_num=\fI<10>\fR" +.RS +.TP 8 +.I io_num +The number of read IOs sent to the current path continuously, used to calculate the average path latency. +Valid Values: Integer, [2, 200]. +.TP +.I base_num +The base number value of logarithmic scale, used to partition different priority ranks. Valid Values: Integer, +[2, 10]. And Max average latency value is 100s, min average latency value is 1us. +For example: If base_num=10, the paths will be grouped in priority groups with path latency <=1us, (1us, 10us], +(10us, 100us], (100us, 1ms], (1ms, 10ms], (10ms, 100ms], (100ms, 1s], (1s, 10s], (10s, 100s], >100s. +.RE +.TP 12 +.I alua +If \fIexclusive_pref_bit\fR is set, paths with the \fIpreferred path\fR bit +set will always be in their own path group. +.TP +.I sysfs +If \fIexclusive_pref_bit\fR is set, paths with the \fIpreferred path\fR bit +set will always be in their own path group. +.TP +.I datacore +.RS +.TP 8 +.I preferredsds +(Mandatory) The preferred "SDS name". +.TP +.I timeout +(Optional) The timeout for the INQUIRY, in ms. +.RE +.TP 12 +.I iet +.RS +.TP 8 +.I preferredip=... +(Mandatory) Th preferred IP address, in dotted decimal notation, for iSCSI targets. +.RE +.TP +The default is: \fB\fR +.RE +. +. +.TP +.B features +Specify any device-mapper features to be used. Syntax is \fInum list\fR +where \fInum\fR is the number, between 0 and 8, of features in \fIlist\fR. +Possible values for the feature list are: +.RS +.TP 12 +.I queue_if_no_path +(Deprecated, superseded by \fIno_path_retry\fR) Queue I/O if no path is active. +Identical to the \fIno_path_retry\fR with \fIqueue\fR value. If both this +feature and \fIno_path_retry\fR are set, the latter value takes +precedence. See KNOWN ISSUES. +.TP +.I pg_init_retries +(Since kernel 2.6.24) Number of times to retry pg_init, it must be between 1 and 50. +.TP +.I pg_init_delay_msecs +(Since kernel 2.6.38) Number of msecs before pg_init retry, it must be between 0 and 60000. +.TP +.I queue_mode +(Since kernel 4.8) Select the the queueing mode per multipath device. + can be \fIbio\fR, \fIrq\fR or \fImq\fR, which corresponds to +bio-based, request-based, and block-multiqueue (blk-mq) request-based, +respectively. +The default depends on the kernel parameter \fBdm_mod.use_blk_mq\fR. It is +\fImq\fR if the latter is set, and \fIrq\fR otherwise. +.TP +The default is: \fB\fR +.RE +. +. +.TP +.B path_checker +The default method used to determine the paths state. Possible values +are: +.RS +.TP 12 +.I readsector0 +(Deprecated) Read the first sector of the device. This checker is being +deprecated, please use \fItur\fR instead. +.TP +.I tur +Issue a \fITEST UNIT READY\fR command to the device. +.TP +.I emc_clariion +(Hardware-dependent) +Query the DGC/EMC specific EVPD page 0xC0 to determine the path state +for CLARiiON CX/AX and EMC VNX and Unity arrays families. +.TP +.I hp_sw +(Hardware-dependent) +Check the path state for HP/COMPAQ/DEC HSG80 and MSA/HSV arrays with +Active/Standby mode exclusively. +.TP +.I rdac +(Hardware-dependent) +Check the path state for LSI/Engenio/NetApp RDAC class as NetApp SANtricity E/EF +Series, and OEM arrays from IBM DELL SGI STK and SUN. +.TP +.I directio +(Deprecated) Read the first sector with direct I/O. If you have a large number +of paths, or many AIO users on a system, you may need to use sysctl to +increase fs.aio-max-nr. This checker is being deprecated, it could cause +spurious path failures under high load. Please use \fItur\fR instead. +.TP +.I cciss_tur +(Hardware-dependent) +Check the path state for HP/COMPAQ Smart Array(CCISS) controllers. +.TP +.I none +Do not check the device, fallback to use the values retrieved from sysfs +.TP +The default is: \fBtur\fR +.RE +. +. +.TP +.B alias_prefix +The \fIuser_friendly_names\fR prefix. +.RS +.TP +The default is: \fBmpath\fR +.RE +. +. +.TP +.B failback +Tell multipathd how to manage path group failback. +To select \fIimmediate\fR or a \fIvalue\fR, it's mandatory that the device +has support for a working prioritizer. +.RS +.TP 12 +.I immediate +Immediately failback to the highest priority pathgroup that contains +active paths. +.TP +.I manual +Do not perform automatic failback. +.TP +.I followover +Used to deal with multiple computers accessing the same Active/Passive storage +devices. Only perform automatic failback when the first path of a pathgroup +becomes active. This keeps a cluster node from automatically failing back when +another node requested the failover. +.TP +.I values > 0 +Deferred failback (time to defer in seconds). +.TP +The default is: \fBmanual\fR +.RE +. +. +.TP +.B rr_min_io +Number of I/O requests to route to a path before switching to the next in the +same path group. This is only for \fIBlock I/O\fR(BIO) based multipath and +only apply to \fIround-robin\fR path_selector. +.RS +.TP +The default is: \fB1000\fR +.RE +. +. +.TP +.B rr_min_io_rq +Number of I/O requests to route to a path before switching to the next in the +same path group. This is only for \fIRequest\fR based multipath and +only apply to \fIround-robin\fR path_selector. +.RS +.TP +The default is: \fB1\fR +.RE +. +. +.TP +.B max_fds +Specify the maximum number of file descriptors that can be opened by multipath +and multipathd. This is equivalent to ulimit \-n. A value of \fImax\fR will set +this to the system limit from \fI/proc/sys/fs/nr_open\fR. If this is not set, the +maximum number of open fds is taken from the calling process. It is usually +1024. To be safe, this should be set to the maximum number of paths plus 32, +if that number is greated than 1024. +.RS +.TP +The default is: \fBmax\fR +.RE +. +. +.TP +.B rr_weight +If set to \fIpriorities\fR the multipath configurator will assign path weights +as "path prio * rr_min_io". Possible values are +.I priorities +or +.I uniform . +Only apply to \fIround-robin\fR path_selector. +.RS +.TP +The default is: \fBuniform\fR +.RE +. +. +.TP +.B no_path_retry +Specify what to do when all paths are down. Possible values are: +.RS +.TP 12 +.I value > 0 +Number of retries until disable I/O queueing. +.TP +.I fail +For immediate failure (no I/O queueing). +.TP +.I queue +For never stop I/O queueing, similar to \fIqueue_if_no_path\fR. See KNOWN ISSUES. +.TP +The default is: \fBfail\fR +.RE +. +. +.TP +.B queue_without_daemon +If set to +.I no +, when multipathd stops, queueing will be turned off for all devices. +This is useful for devices that set no_path_retry. If a machine is +shut down while all paths to a device are down, it is possible to hang waiting +for I/O to return from the device after multipathd has been stopped. Without +multipathd running, access to the paths cannot be restored, and the kernel +cannot be told to stop queueing I/O. Setting queue_without_daemon to +.I no +, avoids this problem. +.RS +.TP +The default is: \fBno\fR +.RE +. +. +.TP +.B checker_timeout +Specify the timeout to use for path checkers and prioritizers that issue SCSI +commands with an explicit timeout, in seconds. +.RS +.TP +The default is: in \fB/sys/block/sd/device/timeout\fR +.RE +. +. +.TP +.B flush_on_last_del +If set to +.I yes +, multipathd will disable queueing when the last path to a device has been +deleted. +.RS +.TP +The default is: \fBno\fR +.RE +. +. +.TP +.B user_friendly_names +If set to +.I yes +, using the bindings file \fI/etc/multipath/bindings\fR to assign a persistent +and unique alias to the multipath, in the form of mpath. If set to +.I no +use the WWID as the alias. In either case this be will +be overridden by any specific aliases in the \fImultipaths\fR section. +.RS +.TP +The default is: \fBno\fR +.RE +. +. +.TP +.B fast_io_fail_tmo +Specify the number of seconds the SCSI layer will wait after a problem has been +detected on a FC remote port before failing I/O to devices on that remote port. +This should be smaller than dev_loss_tmo. Setting this to +.I off +will disable the timeout. +.RS +.TP +The default is: \fB5\fR +.RE +. +. +.TP +.B dev_loss_tmo +Specify the number of seconds the SCSI layer will wait after a problem has +been detected on a FC remote port before removing it from the system. This +can be set to "infinity" which sets it to the max value of 2147483647 +seconds, or 68 years. It will be automatically adjusted to the overall +retry interval \fIno_path_retry\fR * \fIpolling_interval\fR +if a number of retries is given with \fIno_path_retry\fR and the +overall retry interval is longer than the specified \fIdev_loss_tmo\fR value. +The Linux kernel will cap this value to \fI600\fR if \fIfast_io_fail_tmo\fR +is not set. See KNOWN ISSUES. +.RS +.TP +The default is: \fB600\fR +.RE +. +. +.TP +.B bindings_file +The full pathname of the binding file to be used when the user_friendly_names +option is set. +.RS +.TP +The default is: \fB/etc/multipath/bindings\fR +.RE +. +. +.TP +.B wwids_file +The full pathname of the WWIDs file, which is used by multipath to keep track +of the WWIDs for LUNs it has created multipath devices on in the past. +.RS +.TP +The default is: \fB/etc/multipath/wwids\fR +.RE +. +. +.TP +.B prkeys_file +The full pathname of the prkeys file, which is used by multipathd to keep +track of the persistent reservation key used for a specific WWID, when +\fIreservation_key\fR is set to \fBfile\fR. +.RS +.TP +The default is: \fB/etc/multipath/prkeys\fR +.RE +. +. +.TP +.B log_checker_err +If set to +.I once +, multipathd logs the first path checker error at logging level 2. Any later +errors are logged at level 3 until the device is restored. If set to +.I always +, multipathd always logs the path checker error at logging level 2. +.RS +.TP +The default is: \fBalways\fR +.RE +. +. +.TP +.B reservation_key +This is the service action reservation key used by mpathpersist. It must be +set for all multipath devices using persistent reservations, and it must be +the same as the RESERVATION KEY field of the PERSISTENT RESERVE OUT parameter +list which contains an 8-byte value provided by the application client to the +device server to identify the I_T nexus. If the \fI--param-aptpl\fR option is +used when registering the key with mpathpersist, \fB:aptpl\fR must be appended +to the end of the reservation key. +.RS +.PP +Alternatively, this can be set to \fBfile\fR, which will store the RESERVATION +KEY registered by mpathpersist in the \fIprkeys_file\fR. multipathd will then +use this key to register additional paths as they appear. When the +registration is removed, the RESERVATION KEY is removed from the +\fIprkeys_file\fR. The prkeys file will automatically keep track of whether +the key was registered with \fI--param-aptpl\fR. +.TP +The default is: \fB\fR +.RE +. +. +.TP +.B all_tg_pt +Set the 'all targets ports' flag when registering keys with mpathpersist. Some +arrays automatically set and clear registration keys on all target ports from a +host, instead of per target port per host. The ALL_TG_PT flag must be set to +successfully use mpathpersist on these arrays. Setting this option is identical +to calling mpathpersist with \fI--param-alltgpt\fR +.RS +.TP +The default is: \fBno\fR +.RE +. +. +.TP +.B retain_attached_hw_handler +(Obsolete for kernels >= 4.3) If set to +.I yes +and the SCSI layer has already attached a hardware_handler to the device, +multipath will not force the device to use the hardware_handler specified by +mutipath.conf. If the SCSI layer has not attached a hardware handler, +multipath will continue to use its configured hardware handler. +.RS +.PP +The default is: \fByes\fR +.PP +\fBImportant Note:\fR Linux kernel 4.3 or newer always behaves as if +\fB"retain_attached_hw_handler yes"\fR was set. +.RE +. +. +.TP +.B detect_prio +If set to +.I yes +, multipath will try to detect if the device supports SCSI-3 ALUA. If so, the +device will automatically use the \fIsysfs\fR prioritizer if the required sysf +attributes \fIaccess_state\fR and \fIpreferred_path\fR are supported, or the +\fIalua\fR prioritizer if not. If set to +.I no +, the prioritizer will be selected as usual. +.RS +.TP +The default is: \fByes\fR +.RE +. +. +.TP +.B detect_checker +if set to +.I yes +, multipath will try to detect if the device supports SCSI-3 ALUA. If so, the +device will automatically use the \fItur\fR checker. If set to +.I no +, the checker will be selected as usual. +.RS +.TP +The default is: \fByes\fR +.RE +. +. +.TP +.B force_sync +If set to +.I yes +, multipathd will call the path checkers in sync mode only. This means that +only one checker will run at a time. This is useful in the case where many +multipathd checkers running in parallel causes significant CPU pressure. +.RS +.TP +The default is: \fBno\fR +.RE +. +. +.TP +.B strict_timing +If set to +.I yes +, multipathd will start a new path checker loop after exactly one second, +so that each path check will occur at exactly \fIpolling_interval\fR +seconds. On busy systems path checks might take longer than one second; +here the missing ticks will be accounted for on the next round. +A warning will be printed if path checks take longer than \fIpolling_interval\fR +seconds. +.RS +.TP +The default is: \fBno\fR +.RE +. +. +.TP +.B deferred_remove +If set to +.I yes +, multipathd will do a deferred remove instead of a regular remove when the +last path device has been deleted. This means that if the multipath device is +still in use, it will be freed when the last user closes it. If path is added +to the multipath device before the last user closes it, the deferred remove +will be canceled. +.RS +.TP +The default is: \fBno\fR +.RE +. +. +.TP +.B partition_delimiter +This parameter controls how multipath chooses the names of partition devices +of multipath maps if a multipath map is renamed (e.g. if a map alias is added +or changed). If this parameter is set to a string other than "/UNSET/" (even +the empty string), multipath inserts that string between device name and +partition number to construct the partition device name. +Otherwise (i.e. if this parameter is unset or has the value "/UNSET/"), +the behavior depends on the map name: if it ends in a digit, a \fI"p"\fR is +inserted between name and partition number; otherwise, the partition number is +simply appended. +Distributions may use a non-null default value for this option; in this case, +the user must set it to "/UNSET/" to obtain the original \fB\fR +behavior. Use \fImultipath -T\fR to check the current settings. +.RS +.TP +The default is: \fB\fR +.RE +. +. +.TP +.B config_dir +If set to anything other than "", multipath will search this directory +alphabetically for file ending in ".conf" and it will read configuration +information from them, just as if it was in \fI/etc/multipath.conf\fR. +config_dir must either be "" or a fully qualified directory name. +.RS +.TP +The default is: \fB/etc/multipath/conf.d/\fR +.RE +. +. +.TP +.B san_path_err_threshold +If set to a value greater than 0, multipathd will watch paths and check how many +times a path has been failed due to errors.If the number of failures on a particular +path is greater then the san_path_err_threshold, then the path will not reinstate +till san_path_err_recovery_time. These path failures should occur within a +san_path_err_forget_rate checks, if not we will consider the path is good enough +to reinstantate. See "Shaky paths detection" below. +.RS +.TP +The default is: \fBno\fR +.RE +. +. +.TP +.B san_path_err_forget_rate +If set to a value greater than 0, multipathd will check whether the path failures +has exceeded the san_path_err_threshold within this many checks i.e +san_path_err_forget_rate . If so we will not reinstante the path till +san_path_err_recovery_time. See "Shaky paths detection" below. +.RS +.TP +The default is: \fBno\fR +.RE +. +. +.TP +.B san_path_err_recovery_time +If set to a value greater than 0, multipathd will make sure that when path failures +has exceeded the san_path_err_threshold within san_path_err_forget_rate then the path +will be placed in failed state for san_path_err_recovery_time duration.Once san_path_err_recovery_time +has timeout we will reinstante the failed path . +san_path_err_recovery_time value should be in secs. +See "Shaky paths detection" below. +.RS +.TP +The default is: \fBno\fR +.RE +. +. +.TP +.B marginal_path_double_failed_time +One of the four parameters of supporting path check based on accounting IO +error such as intermittent error. When a path failed event occurs twice in +\fImarginal_path_double_failed_time\fR seconds due to an IO error and all the +other three parameters are set, multipathd will fail the path and enqueue +this path into a queue of which members are sent a couple of continuous +direct reading asynchronous IOs at a fixed sample rate of 10HZ to start IO +error accounting process. See "Shaky paths detection" below. +.RS +.TP +The default is: \fBno\fR +.RE +. +. +.TP +.B marginal_path_err_sample_time +One of the four parameters of supporting path check based on accounting IO +error such as intermittent error. If it is set to a value no less than 120, +when a path fail event occurs twice in \fImarginal_path_double_failed_time\fR +second due to an IO error, multipathd will fail the path and enqueue this +path into a queue of which members are sent a couple of continuous direct +reading asynchronous IOs at a fixed sample rate of 10HZ to start the IO +accounting process for the path will last for +\fImarginal_path_err_sample_time\fR. +If the rate of IO error on a particular path is greater than the +\fImarginal_path_err_rate_threshold\fR, then the path will not reinstate for +\fImarginal_path_err_recheck_gap_time\fR seconds unless there is only one +active path. After \fImarginal_path_err_recheck_gap_time\fR expires, the path +will be requeueed for rechecking. If checking result is good enough, the +path will be reinstated. See "Shaky paths detection" below. +.RS +.TP +The default is: \fBno\fR +.RE +. +. +.TP +.B marginal_path_err_rate_threshold +The error rate threshold as a permillage (1/1000). One of the four parameters +of supporting path check based on accounting IO error such as intermittent +error. Refer to \fImarginal_path_err_sample_time\fR. If the rate of IO errors +on a particular path is greater than this parameter, then the path will not +reinstate for \fImarginal_path_err_recheck_gap_time\fR seconds unless there is +only one active path. See "Shaky paths detection" below. +.RS +.TP +The default is: \fBno\fR +.RE +. +. +.TP +.B marginal_path_err_recheck_gap_time +One of the four parameters of supporting path check based on accounting IO +error such as intermittent error. Refer to +\fImarginal_path_err_sample_time\fR. If this parameter is set to a positive +value, the failed path of which the IO error rate is larger than +\fImarginal_path_err_rate_threshold\fR will be kept in failed state for +\fImarginal_path_err_recheck_gap_time\fR seconds. When +\fImarginal_path_err_recheck_gap_time\fR seconds expires, the path will be +requeueed for checking. If checking result is good enough, the path will be +reinstated, or else it will keep failed. See "Shaky paths detection" below. +.RS +.TP +The default is: \fBno\fR +.RE +. +. +.TP +.B delay_watch_checks +This option is \fBdeprecated\fR, and mapped to \fIsan_path_err_forget_rate\fR. +If this is set to a value greater than 0 and no \fIsan_path_err\fR options +are set, \fIsan_path_err_forget_rate\fR will be set to the value of +\fIdelay_watch_checks\fR and \fIsan_path_err_threshold\fR will be set to 1. +See the \fIsan_path_err_forget_rate\fR and \fIsan_path_err_threshold\fR +options, and "Shaky paths detection" below for more information. +.RS +.TP +The default is: \fBno\fR +.RE +. +. +.TP +.B delay_wait_checks +This option is \fBdeprecated\fR, and mapped to \fIsan_path_err_recovery_time\fR. +If this is set to a value greater than 0 and no \fIsan_path_err\fR options +are set, \fIsan_path_err_recovery_time\fR will be set to the value of +\fIdelay_wait_checks\fR times \fImax_polling_interval\fR. This will give +approximately the same wait time as delay_wait_checks previously did. +Also, \fIsan_path_err_threshold\fR will be set to 1. See the +\fIsan_path_err_recovery_time\fR and \fIsan_path_err_threshold\fR +options, and "Shaky paths detection" below for more information. +.RS +.TP +The default is: \fBno\fR +.RE +. +. +.TP +.B marginal_pathgroups +If set to \fIno\fR, the \fIdelay_*_checks\fR, \fImarginal_path_*\fR, and +\fIsan_path_err_*\fR options will keep marginal, or \(dqshaky\(dq, paths from +being reinstated until they have been monitored for some time. This can cause +situations where all non-marginal paths are down, and no paths are usable +until multipathd detects this and reinstates a marginal path. If the multipath +device is not configured to queue IO in this case, it can cause IO errors to +occur, even though there are marginal paths available. However, if this +option is set to \fIyes\fR, when one of the marginal path detecting methods +determines that a path is marginal, it will be reinstated and placed in a +seperate pathgroup that will only be used after all the non-marginal pathgroups +have been tried first. This prevents the possibility of IO errors occuring +while marginal paths are still usable. After the path has been monitored +for the configured time, and is declared healthy, it will be returned to its +normal pathgroup. See "Shaky paths detection" below for more information. +.RS +.TP +The default is: \fBno\fR +.RE +. +. +.TP +.B find_multipaths +This option controls whether multipath and multipathd try to create multipath +maps over non-blacklisted devices they encounter. This matters a) when a device is +encountered by \fBmultipath -u\fR during udev rule processing (a device is +blocked from further processing by higher layers - such as LVM - if and only +if it\'s considered a valid multipath device path), and b) when multipathd +detects a new device. The following values are possible: +.RS +.TP 10 +.I strict +Both multipath and multipathd treat only such devices as multipath devices +which have been part of a multipath map previously, and which are therefore +listed in the \fBwwids_file\fR. Users can manually set up multipath maps using the +\fBmultipathd add map\fR command. Once set up manually, the map is +remembered in the wwids file and will be set up automatically in the future. +.TP +.I no +Multipath behaves like \fBstrict\fR. Multipathd behaves like \fBgreedy\fR. +.TP +.I yes +Both multipathd and multipath treat a device as multipath device if the +conditions for \fBstrict\fR are met, or if at least two non-blacklisted paths +with the same WWID have been detected. +.TP +.I greedy +Both multipathd and multipath treat every non-blacklisted device as multipath +device path. +.TP +.I smart +This differs from \fIfind_multipaths yes\fR only in +the way it treats new devices for which only one path has been +detected yet. When such a device is first encounted in udev rules, it is +treated as a multipath device. multipathd waits whether additional paths with +the same WWID appears. If that happens, it sets up a multipath map. If it +doesn\'t happen until a +timeout expires, or if setting up the map fails, a new uevent is triggered for +the device; at second encounter in the udev rules, the device will be treated +as non-multipath and passed on to upper layers. +\fBNote:\fR this may cause delays during device detection if +there are single-path devices which aren\'t blacklisted. +.TP +The default is: \fBstrict\fR +.RE +. +. +.TP +.B find_multipaths_timeout +Timeout, in seconds, to wait for additional paths after detecting the first +one, if \fIfind_multipaths +"smart"\fR (see above) is set. If the value is \fBpositive\fR, this timeout is used for all +unknown, non-blacklisted devices encountered. If the value is \fBnegative\fR +(recommended), it's only +applied to "known" devices that have an entry in multipath's hardware table, +either in the built-in table or in a \fIdevice\fR section; other ("unknown") devices will +use a timeout of only 1 second to avoid booting delays. The value 0 means +"use the built-in default". If \fIfind_multipath\fR has a value +other than \fIsmart\fR, this option has no effect. +.RS +.TP +The default is: \fB-10\fR (10s for known and 1s for unknown hardware) +.RE +. +. +.TP +.B uxsock_timeout +CLI receive timeout in milliseconds. For larger systems CLI commands +might timeout before the multipathd lock is released and the CLI command +can be processed. This will result in errors like +"timeout receiving packet" to be returned from CLI commands. +In these cases it is recommended to increase the CLI timeout to avoid +those issues. +.RS +.TP +The default is: \fB1000\fR +.RE +. +. +.TP +.B retrigger_tries +Sets the number of times multipathd will try to retrigger a uevent to get the +WWID. +.RS +.TP +The default is: \fB3\fR +.RE +. +. +.TP +.B retrigger_delay +Sets the amount of time, in seconds, to wait between retriggers. +.RS +.TP +The default is: \fB10\fR +.RE +. +. +.TP +.B missing_uev_wait_timeout +Controls how many seconds multipathd will wait, after a new multipath device +is created, to receive a change event from udev for the device, before +automatically enabling device reloads. Usually multipathd will delay reloads +on a device until it receives a change uevent from the initial table load. +.RS +.TP +The default is: \fB30\fR +.RE +. +. +.TP +.B skip_kpartx +If set to +.I yes +, kpartx will not automatically create partitions on the device. +.RS +.TP +The default is: \fBno\fR +.RE +. +. +.TP +.B disable_changed_wwids +This option is deprecated and ignored. If the WWID of a path suddenly changes, +multipathd handles it as if it was removed and then added again. +.RE +. +. +.TP +.B remove_retries +This sets how may times multipath will retry removing a device that is in-use. +Between each attempt, multipath will sleep 1 second. +.RS +.TP +The default is: \fB0\fR +.RE +. +. +.TP +.B max_sectors_kb +Sets the max_sectors_kb device parameter on all path devices and the multipath +device to the specified value. +.RS +.TP +The default is: \fB\fR +.RE +. +. +.TP +.B ghost_delay +Sets the number of seconds that multipath will wait after creating a device +with only ghost paths before marking it ready for use in systemd. This gives +the active paths time to appear before the multipath runs the hardware handler +to switch the ghost paths to active ones. Setting this to \fI0\fR or \fIon\fR +makes multipath immediately mark a device with only ghost paths as ready. +.RS +.TP +The default is: \fBno\fR +.RE +. +. +.TP +.B enable_foreign +Enables or disables foreign libraries (see section +.I FOREIGN MULTIPATH SUPPORT +below). The value is a regular expression; foreign libraries are loaded +if their name (e.g. \(dqnvme\(dq) matches the expression. By default, +all foreign libraries are enabled. +.RS +.TP +The default is: \fB\(dq\(dq\fR (the empty regular expression) +.RE +. +. + +. +.\" ---------------------------------------------------------------------------- +.SH "blacklist and blacklist_exceptions sections" +.\" ---------------------------------------------------------------------------- +. +The \fIblacklist\fR section is used to exclude specific devices from +the multipath topology. It is most commonly used to exclude local disks or +non-disk devices (such as LUNs for the storage array controller) from +being handled by multipath-tools. +.LP +. +. +The \fIblacklist_exceptions\fR section is used to revert the actions of the +\fIblacklist\fR section. This allows one to selectively include ("whitelist") devices which +would normally be excluded via the \fIblacklist\fR section. A common usage is +to blacklist "everything" using a catch-all regular expression, and create +specific blacklist_exceptions entries for those devices that should be handled +by multipath-tools. +.LP +. +. +The following keywords are recognized in both sections. The defaults are empty +unless explicitly stated. +.TP 17 +.B devnode +Regular expression matching the device nodes to be excluded/included. +.RS +.PP +The default \fIblacklist\fR consists of the regular expressions +"^(ram|zram|raw|loop|fd|md|dm-|sr|scd|st|dcssblk)[0-9]" and +"^(td|hd|vd)[a-z]". This causes virtual devices, non-disk devices, and some other +device types to be excluded from multipath handling by default. +.RE +.TP +.B wwid +Regular expression for the \fIWorld Wide Identifier\fR of a device to be excluded/included. +. +.TP +.B device +Subsection for the device description. This subsection recognizes the +.B vendor +and +.B product +keywords. Both are regular expressions. For a full description of these keywords please see the +\fIdevices\fR section description. +.TP +.B property +Regular expression for an udev property. All +devices that have matching udev properties will be excluded/included. +The handling of the \fIproperty\fR keyword is special, +because devices \fBmust\fR have at least one whitelisted udev property; +otherwise they're treated as blacklisted, and the message +"\fIblacklisted, udev property missing\fR" is displayed in the logs. +. +.RS +.PP +.B Note: +The behavior of this option has changed in \fBmultipath-tools\fR 0.8.2 +compared to previous versions. +Blacklisting by missing properties is only applied to devices which do have the +property specified by \fIuid_attribute\fR (e.g. \fIID_SERIAL\fR) +set. Previously, it was applied to every device, possibly causing devices to be +blacklisted because of temporary I/O error conditions. +.PP +The default \fIblacklist exception\fR is: \fB(SCSI_IDENT_|ID_WWN)\fR, causing +well-behaved SCSI devices and devices that provide a WWN (World Wide Number) +to be included, and all others to be excluded. +.RE +.TP +.B protocol +Regular expression for the protocol of a device to be excluded/included. +.RS +.PP +The protocol strings that multipath recognizes are \fIscsi:fcp\fR, +\fIscsi:spi\fR, \fIscsi:ssa\fR, \fIscsi:sbp\fR, \fIscsi:srp\fR, +\fIscsi:iscsi\fR, \fIscsi:sas\fR, \fIscsi:adt\fR, \fIscsi:ata\fR, +\fIscsi:unspec\fR, \fIccw\fR, \fIcciss\fR, \fInvme\fR, and \fIundef\fR. +The protocol that a path is using can be viewed by running +\fBmultipathd show paths format "%d %P"\fR +.RE +.LP +For every device, these 5 blacklist criteria are evaluated in the the order +"property, dev\%node, device, protocol, wwid". If a device turns out to be +blacklisted by any criterion, it's excluded from handling by multipathd, and +the later criteria aren't evaluated any more. For each +criterion, the whitelist takes precedence over the blacklist if a device +matches both. +.LP +.B +Note: +Besides the blacklist and whitelist, other configuration options such as +\fIfind_multipaths\fR have an impact on +whether or not a given device is handled by multipath-tools. +. +. +.\" ---------------------------------------------------------------------------- +.SH "multipaths section" +.\" ---------------------------------------------------------------------------- +. +The \fImultipaths\fR section allows setting attributes of multipath maps. The +attributes that are set via the multipaths section (see list below) take +precedence over all other configuration settings, including those from the +\fIoverrides\fR section. +.LP +The only recognized attribute for the \fImultipaths\fR section is the +\fImultipath\fR subsection. If there are multiple \fImultipath\fR subsections +matching a given WWID, the contents of these sections are merged, and settings +from later entries take precedence. +.LP +. +. +The \fImultipath\fR subsection recognizes the following attributes: +.TP 17 +.B wwid +(Mandatory) World Wide Identifier. Detected multipath maps are matched agains this attribute. +Note that, unlike the \fIwwid\fR attribute in the \fIblacklist\fR section, +this is \fBnot\fR a regular expression or a substring; WWIDs must match +exactly inside the multipaths section. +.TP +.B alias +Symbolic name for the multipath map. This takes precedence over a an entry for +the same WWID in the \fIbindings_file\fR. +.LP +. +. +The following attributes are optional; if not set the default values +are taken from the \fIoverrides\fR, \fIdevices\fR, or \fIdefaults\fR +section: +.sp 1 +.PD .1v +.RS +.TP 18 +.B path_grouping_policy +.TP +.B path_selector +.TP +.B prio +.TP +.B prio_args +.TP +.B failback +.TP +.B rr_weight +.TP +.B no_path_retry +.TP +.B rr_min_io +.TP +.B rr_min_io_rq +.TP +.B flush_on_last_del +.TP +.B features +.TP +.B reservation_key +.TP +.B user_friendly_names +.TP +.B deferred_remove +.TP +.B san_path_err_threshold +.TP +.B san_path_err_forget_rate +.TP +.B san_path_err_recovery_time +.TP +.B marginal_path_err_sample_time +.TP +.B marginal_path_err_rate_threshold +.TP +.B marginal_path_err_recheck_gap_time +.TP +.B marginal_path_double_failed_time +.TP +.B delay_watch_checks +.TP +.B delay_wait_checks +.TP +.B skip_kpartx +.TP +.B max_sectors_kb +.TP +.B ghost_delay +.RE +.PD +.LP +. +. +.\" ---------------------------------------------------------------------------- +.SH "devices section" +.\" ---------------------------------------------------------------------------- +. +\fImultipath-tools\fR have a built-in device table with reasonable defaults +for more than 100 known multipath-capable storage devices. The devices section +can be used to override these settings. If there are multiple matches for a +given device, the attributes of all matching entries are applied to it. +If an attribute is specified in several matching device subsections, +later entries take precedence. Thus, entries in files under \fIconfig_dir\fR (in +reverse alphabetical order) have the highest precedence, followed by entries +in \fImultipath.conf\fR; the built-in hardware table has the lowest +precedence. Inside a configuration file, later entries have higher precedence +than earlier ones. +.LP +The only recognized attribute for the \fIdevices\fR section is the \fIdevice\fR +subsection. Devices detected in the system are matched against the device entries +using the \fBvendor\fR, \fBproduct\fR, and \fBrevision\fR fields, which are +all POSIX Extended regular expressions (see \fBregex\fR(7)). +.LP +The vendor, product, and revision fields that multipath or multipathd detect for +devices in a system depend on the device type. For SCSI devices, they correspond to the +respective fields of the SCSI INQUIRY page. In general, the command '\fImultipathd show +paths format "%d %s"\fR' command can be used to see the detected properties +for all devices in the system. +.LP +. +The \fIdevice\fR subsection recognizes the following attributes: +.TP 17 +.B vendor +(Mandatory) Regular expression to match the vendor name. +.TP +.B product +(Mandatory) Regular expression to match the product name. +.TP +.B revision +Regular expression to match the product revision. If not specified, any +revision matches. +.TP +.B product_blacklist +Products with the given \fBvendor\fR matching this string are +blacklisted. This is equivalent to a \fBdevice\fR entry in the \fIblacklist\fR +section with the \fIvendor\fR attribute set to this entry's \fIvendor\fR, and +the \fIproduct\fR attribute set to the value of \fIproduct_blacklist\fR. +.TP +.B alias_prefix +The user_friendly_names prefix to use for this +device type, instead of the default "mpath". +.TP +.B vpd_vendor +The vendor specific vpd page information, using the vpd page abbreviation. +The vpd page abbreviation can be found by running \fIsg_vpd -e\fR. multipathd +will use this information to gather device specific information that can be +displayed with the \fI%g\fR wilcard for the \fImultipathd show maps format\fR +and \fImultipathd show paths format\fR commands. Currently only the +\fBhp3par\fR vpd page is supported. +.TP +.B hardware_handler +The hardware handler to use for this device type. +The following hardware handler are implemented: +.RS +.TP 12 +.I 1 emc +(Hardware-dependent) +Hardware handler for DGC class arrays as CLARiiON CX/AX and EMC VNX and Unity +families. +.TP +.I 1 rdac +(Hardware-dependent) +Hardware handler for LSI/Engenio/NetApp RDAC class as NetApp SANtricity E/EF +Series, and OEM arrays from IBM DELL SGI STK and SUN. +.TP +.I 1 hp_sw +(Hardware-dependent) +Hardware handler for HP/COMPAQ/DEC HSG80 and MSA/HSV arrays with +Active/Standby mode exclusively. +.TP +.I 1 alua +(Hardware-dependent) +Hardware handler for SCSI-3 ALUA compatible arrays. +.TP +.I 1 ana +(Hardware-dependent) +Hardware handler for NVMe ANA compatible arrays. +.PP +The default is: \fB\fR +.PP +\fBImportant Note:\fR Linux kernels 4.3 and newer automatically attach a device +handler to known devices (which includes all devices supporting SCSI-3 ALUA) +and disallow changing the handler +afterwards. Setting \fBhardware_handler\fR for such devices on these kernels +has no effect. +.RE +. +. +.LP +The following attributes are optional; if not set the default values +are taken from the \fIdefaults\fR +section: +.sp 1 +.PD .1v +.RS +.TP 18 +.B path_grouping_policy +.TP +.B uid_attribute +.TP +.B getuid_callout +.TP +.B path_selector +.TP +.B path_checker +.TP +.B prio +.TP +.B prio_args +.TP +.B features +.TP +.B failback +.TP +.B rr_weight +.TP +.B no_path_retry +.TP +.B rr_min_io +.TP +.B rr_min_io_rq +.TP +.B fast_io_fail_tmo +.TP +.B dev_loss_tmo +.TP +.B flush_on_last_del +.TP +.B user_friendly_names +.TP +.B retain_attached_hw_handler +.TP +.B detect_prio +.TP +.B detect_checker +.TP +.B deferred_remove +.TP +.B san_path_err_threshold +.TP +.B san_path_err_forget_rate +.TP +.B san_path_err_recovery_time +.TP +.B marginal_path_err_sample_time +.TP +.B marginal_path_err_rate_threshold +.TP +.B marginal_path_err_recheck_gap_time +.TP +.B marginal_path_double_failed_time +.TP +.B delay_watch_checks +.TP +.B delay_wait_checks +.TP +.B skip_kpartx +.TP +.B max_sectors_kb +.TP +.B ghost_delay +.TP +.B all_tg_pt +.RE +.PD +.LP +. +. +.\" ---------------------------------------------------------------------------- +.SH "overrides section" +.\" ---------------------------------------------------------------------------- +. +The overrides section recognizes the following optional attributes; if not set +the values are taken from the \fIdevices\fR or \fIdefaults\fR sections: +.sp 1 +.PD .1v +.RS +.TP 18 +.B path_grouping_policy +.TP +.B uid_attribute +.TP +.B getuid_callout +.TP +.B path_selector +.TP +.B path_checker +.TP +.B alias_prefix +.TP +.B features +.TP +.B prio +.TP +.B prio_args +.TP +.B failback +.TP +.B rr_weight +.TP +.B no_path_retry +.TP +.B rr_min_io +.TP +.B rr_min_io_rq +.TP +.B flush_on_last_del +.TP +.B fast_io_fail_tmo +.TP +.B dev_loss_tmo +.TP +.B user_friendly_names +.TP +.B retain_attached_hw_handler +.TP +.B detect_prio +.TP +.B detect_checker +.TP +.B deferred_remove +.TP +.B san_path_err_threshold +.TP +.B san_path_err_forget_rate +.TP +.B san_path_err_recovery_time +.TP +.B marginal_path_err_sample_time +.TP +.B marginal_path_err_rate_threshold +.TP +.B marginal_path_err_recheck_gap_time +.TP +.B marginal_path_double_failed_time +.TP +.B delay_watch_checks +.TP +.B delay_wait_checks +.TP +.B skip_kpartx +.TP +.B max_sectors_kb +.TP +.B ghost_delay +.TP +.B all_tg_pt +.RE +.PD +.LP +. +. +.\" ---------------------------------------------------------------------------- +.SH "WWID generation" +.\" ---------------------------------------------------------------------------- +. +Multipath uses a \fIWorld Wide Identification\fR (WWID) to determine +which paths belong to the same device. Each path presenting the same +WWID is assumed to point to the same device. +.LP +The WWID is generated by four methods (in the order of preference): +.TP 17 +.B uid_attrs +The WWID is derived from udev attributes by matching the device node name; cf +\fIuid_attrs\fR above. +.TP +.B getuid_callout +Use the specified external program; cf \fIgetuid_callout\fR above. +Care should be taken when using this method; the external program +needs to be loaded from disk for execution, which might lead to +deadlock situations in an all-paths-down scenario. +.TP +.B uid_attribute +Use the value of the specified udev attribute; cf \fIuid_attribute\fR +above. This method is preferred to \fIgetuid_callout\fR as multipath +does not need to call any external programs here. However, under +certain circumstances udev might not be able to generate the requested +variable. +.TP +.B sysfs +Try to determine the WWID from sysfs attributes. +For SCSI devices, this means reading the Vital Product Data (VPD) page +\(dqDevice Identification\(dq (0x83). +.PP +The default settings (using udev and \fBuid_attribute\fR configured from +the built-in hardware table) should work fine +in most scenarios. Users who want to enable uevent merging must set +\fBuid_attrs\fR. +. +. +.\" ---------------------------------------------------------------------------- +.SH "Shaky paths detection" +.\" ---------------------------------------------------------------------------- +. +A common problem in SAN setups is the occurence of intermittent errors: a +path is unreachable, then reachable again for a short time, disappears again, +and so forth. This happens typically on unstable interconnects. It is +undesirable to switch pathgroups unnecessarily on such frequent, unreliable +events. \fImultipathd\fR supports three different methods for detecting this +situation and dealing with it. All methods share the same basic mode of +operation: If a path is found to be \(dqshaky\(dq or \(dqflipping\(dq, +and appears to be in healthy status, it is not reinstated (put back to use) +immediately. Instead, it is placed in the \(dqdelayed\(dq state and watched +for some time, and only reinstated if the healthy state appears to be stable. +If the \fImarginal_pathgroups\fR option is set, the path will reinstated +immediately, but placed in a special pathgroup for marginal paths. Marginal +pathgroups will not be used until all other pathgroups have been tried. At the +time when the path would normally be reinstated, it will be returned to its +normal pathgroup. The logic of determining \(dqshaky\(dq condition, as well as +the logic when to reinstate, differs between the three methods. +.TP 8 +.B \(dqdelay_checks\(dq failure tracking +This method is \fBdeprecated\fR and mapped to the \(dqsan_path_err\(dq method. +See the \fIdelay_watch_checks\fR and \fIdelay_wait_checks\fR options above +for more information. + +.TP +.B \(dqmarginal_path\(dq failure tracking +If a second failure event (good->bad transition) occurs within +\fImarginal_path_double_failed_time\fR seconds after a failure, high-frequency +monitoring is started for the affected path: I/O is sent at a rate of 10 per +second. This is done for \fImarginal_path_err_sample_time\fR seconds. During +this period, the path is not reinstated. If the +rate of errors remains below \fImarginal_path_err_rate_threshold\fR during the +monitoring period, the path is reinstated. Otherwise, it +is kept in failed state for \fImarginal_path_err_recheck_gap_time\fR, and +after that, it is monitored again. For this method, time intervals are measured +in seconds. +.TP +.B \(dqsan_path_err\(dq failure tracking +multipathd counts path failures for each path. Once the number of failures +exceeds the value given by \fIsan_path_err_threshold\fR, the path is not +reinstated for \fIsan_path_err_recovery_time\fR seconds. While counting +failures, multipathd \(dqforgets\(dq one past failure every +\(dqsan_path_err_forget_rate\(dq ticks; thus if errors don't occur more +often then once in the forget rate interval, the failure count doesn't +increase and the threshold is never reached. Ticks are the time between +path checks by multipathd, which is variable and controlled by the +\fIpolling_interval\fR and \fImax_polling_interval\fR parameters. +. +.RS 8 +.LP +This method is \fBdeprecated\fR in favor of the \(dqmarginal_path\(dq failure +tracking method, and only offered for backward compatibility. +. +.RE +.LP +See the documentation +of the individual options above for details. +It is \fBstrongly discouraged\fR to use more than one of these methods for any +given multipath map, because the two concurrent methods may interact in +unpredictable ways. If the \(dqmarginal_path\(dq method is active, the +\(dqsan_path_err\(dq parameters are implicitly set to 0. +. +. +.\" ---------------------------------------------------------------------------- +.SH "FOREIGN MULTIPATH SUPPORT" +.\" ---------------------------------------------------------------------------- +. +multipath and multipathd can load \(dqforeign\(dq libraries to add +support for other multipathing technologies besides the Linux device mapper. +Currently this support is limited to printing detected information about +multipath setup. In topology output, the names of foreign maps are prefixed by +the foreign library name in square brackets, as in this example: +. +.P +.EX +# multipath -ll +uuid.fedcba98-3579-4567-8765-123456789abc [nvme]:nvme4n9 NVMe,Some NVMe controller,FFFFFFFF +size=167772160 features='n/a' hwhandler='ANA' wp=rw +|-+- policy='n/a' prio=50 status=optimized +| `- 4:38:1 nvme4c38n1 0:0 n/a optimized live +`-+- policy='n/a' prio=50 status=optimized + `- 4:39:1 nvme4c39n1 0:0 n/a optimized live +.EE +. +.P +The \(dqnvme\(dq foreign library provides support for NVMe native multipathing +in the kernel. It is part of the standard multipath package. +. +.\" ---------------------------------------------------------------------------- +.SH "KNOWN ISSUES" +.\" ---------------------------------------------------------------------------- +. +The usage of \fIqueue_if_no_path\fR option can lead to \fID state\fR +processes being hung and not killable in situations where all the paths to the +LUN go offline. It is advisable to use the \fIno_path_retry\fR option instead. +.P +The use of \fIqueue_if_no_path\fR or \fIno_path_retry\fR might lead to a +deadlock if the \fIdev_loss_tmo\fR setting results in a device being removed +while I/O is still queued. The multipath daemon will update the \fIdev_loss_tmo\fR +setting accordingly to avoid this deadlock. Hence if both values are +specified the order of precedence is \fIno_path_retry, queue_if_no_path, dev_loss_tmo\fR. +. +. +.\" ---------------------------------------------------------------------------- +.SH "SEE ALSO" +.\" ---------------------------------------------------------------------------- +. +.BR udev (8), +.BR dmsetup (8), +.BR multipath (8), +.BR multipathd (8). +. +. +.\" ---------------------------------------------------------------------------- +.SH AUTHORS +.\" ---------------------------------------------------------------------------- +. +\fImultipath-tools\fR was developed by Christophe Varoqui, +and others. +.\" EOF diff --git a/multipath/multipath.rules b/multipath/multipath.rules new file mode 100644 index 0000000..9df11a9 --- /dev/null +++ b/multipath/multipath.rules @@ -0,0 +1,91 @@ +# Set DM_MULTIPATH_DEVICE_PATH if the device should be handled by multipath +SUBSYSTEM!="block", GOTO="end_mpath" +KERNEL!="sd*|dasd*|nvme*", GOTO="end_mpath" +ACTION=="remove", TEST=="/dev/shm/multipath/find_multipaths/$major:$minor", \ + RUN+="/usr/bin/rm -f /dev/shm/multipath/find_multipaths/$major:$minor" +ACTION!="add|change", GOTO="end_mpath" + +IMPORT{cmdline}="nompath" +ENV{nompath}=="?*", GOTO="end_mpath" +IMPORT{cmdline}="multipath" +ENV{multipath}=="off", GOTO="end_mpath" + +ENV{DEVTYPE}!="partition", GOTO="test_dev" +IMPORT{parent}="DM_MULTIPATH_DEVICE_PATH" +ENV{DM_MULTIPATH_DEVICE_PATH}=="1", ENV{ID_FS_TYPE}="none", \ + ENV{SYSTEMD_READY}="0" +GOTO="end_mpath" + +LABEL="test_dev" + +ENV{MPATH_SBIN_PATH}="/sbin" +TEST!="$env{MPATH_SBIN_PATH}/multipath", ENV{MPATH_SBIN_PATH}="/usr/sbin" + +# FIND_MULTIPATHS_WAIT_UNTIL is the timeout (in seconds after the +# epoch). +IMPORT{db}="FIND_MULTIPATHS_WAIT_UNTIL" +ENV{.SAVED_FM_WAIT_UNTIL}="$env{FIND_MULTIPATHS_WAIT_UNTIL}" + +# multipath -u needs to know if this device has ever been exported +IMPORT{db}="DM_MULTIPATH_DEVICE_PATH" + +# multipath -u sets DM_MULTIPATH_DEVICE_PATH and, +# if "find_multipaths smart", also FIND_MULTIPATHS_WAIT_UNTIL. +IMPORT{program}="$env{MPATH_SBIN_PATH}/multipath -u %k" + +# case 1: this is definitely multipath +ENV{DM_MULTIPATH_DEVICE_PATH}=="1", \ + ENV{ID_FS_TYPE}="mpath_member", ENV{SYSTEMD_READY}="0", \ + GOTO="stop_wait" + +# case 2: this is definitely not multipath, or timeout has expired +ENV{DM_MULTIPATH_DEVICE_PATH}!="2", \ + GOTO="stop_wait" + +# Code below here is only run in "smart" mode. +# multipath -u has indicated this is "maybe" multipath. + +# Note that DM_MULTIPATH_DEVICE_PATH has the value 2 at this point. +# This value will never propagate to other rules files, because +# it will be reset to 1 in the "pretend_multipath" section below. + +# This shouldn't happen, just in case. +ENV{FIND_MULTIPATHS_WAIT_UNTIL}!="?*", GOTO="end_mpath" + +# Be careful not to start the timer twice. +ACTION!="add", GOTO="pretend_mpath" +ENV{.SAVED_FM_WAIT_UNTIL}=="?*", GOTO="pretend_mpath" + +# At this point, we are seeing this path for the first time, and it's "maybe" multipath. + +# The actual start command for the timer. +# +# The purpose of this command is only to make sure we will receive another +# uevent eventually. *Any* uevent may cause waiting to finish if it either ends +# in case 1-3 above, or if it arrives after FIND_MULTIPATHS_WAIT_UNTIL. +# +# Note that this will try to activate multipathd if it isn't running yet. +# If that fails, the unit starts and expires nonetheless. If multipathd +# startup needs to wait for other services, this wait time will add up with +# the --on-active timeout. +# +# We must trigger an "add" event because LVM2 will only act on those. + +RUN+="/usr/bin/systemd-run --unit=cancel-multipath-wait-$kernel --description 'cancel waiting for multipath siblings of $kernel' --no-block --timer-property DefaultDependencies=no --timer-property Conflicts=shutdown.target --timer-property Before=shutdown.target --timer-property AccuracySec=500ms --property DefaultDependencies=no --property Conflicts=shutdown.target --property Before=shutdown.target --property Wants=multipathd.service --property After=multipathd.service --on-active=$env{FIND_MULTIPATHS_WAIT_UNTIL} /usr/bin/udevadm trigger --action=add $sys$devpath" + +LABEL="pretend_mpath" +ENV{DM_MULTIPATH_DEVICE_PATH}="1" +ENV{SYSTEMD_READY}="0" +GOTO="end_mpath" + +LABEL="stop_wait" +# If timeout hasn't expired but we're not in "maybe" state any more, stop timer +# Do this only once, and only if the timer has been started before +IMPORT{db}="FIND_MULTIPATHS_WAIT_CANCELLED" +ENV{FIND_MULTIPATHS_WAIT_CANCELLED}!="?*", \ + ENV{FIND_MULTIPATHS_WAIT_UNTIL}=="?*", \ + ENV{FIND_MULTIPATHS_WAIT_UNTIL}!="0", \ + ENV{FIND_MULTIPATHS_WAIT_CANCELLED}="1", \ + RUN+="/usr/bin/systemctl stop cancel-multipath-wait-$kernel.timer" + +LABEL="end_mpath" diff --git a/multipathd/Makefile b/multipathd/Makefile new file mode 100644 index 0000000..8d90117 --- /dev/null +++ b/multipathd/Makefile @@ -0,0 +1,65 @@ +include ../Makefile.inc + +# +# debugging stuff +# +#CFLAGS += -DLCKDBG +#CFLAGS += -D_DEBUG_ +#CFLAGS += -DLOGDBG +CFLAGS += $(BIN_CFLAGS) -I$(multipathdir) -I$(mpathpersistdir) \ + -I$(mpathcmddir) -I$(thirdpartydir) +LDFLAGS += $(BIN_LDFLAGS) +LIBDEPS += -L$(multipathdir) -lmultipath -L$(mpathpersistdir) -lmpathpersist \ + -L$(mpathcmddir) -lmpathcmd -ludev -ldl -lurcu -lpthread \ + -ldevmapper -lreadline + +ifdef SYSTEMD + CFLAGS += -DUSE_SYSTEMD=$(SYSTEMD) + ifeq ($(shell test $(SYSTEMD) -gt 209 && echo 1), 1) + LIBDEPS += -lsystemd + else + LIBDEPS += -lsystemd-daemon + endif +endif +ifeq ($(ENABLE_DMEVENTS_POLL),0) + CFLAGS += -DNO_DMEVENTS_POLL +endif + +OBJS = main.o pidfile.o uxlsnr.o uxclnt.o cli.o cli_handlers.o waiter.o \ + dmevents.o + +EXEC = multipathd + +all : $(EXEC) + +$(EXEC): $(OBJS) $(multipathdir)/libmultipath.so $(mpathcmddir)/libmpathcmd.so + $(CC) $(CFLAGS) $(OBJS) $(LDFLAGS) -o $(EXEC) $(LIBDEPS) + $(GZIP) $(EXEC).8 > $(EXEC).8.gz + +cli_handlers.o: cli_handlers.c + $(CC) $(CFLAGS) -Wno-unused-parameter -c -o $@ $< + +install: + $(INSTALL_PROGRAM) -d $(DESTDIR)$(bindir) + $(INSTALL_PROGRAM) -m 755 $(EXEC) $(DESTDIR)$(bindir) +ifdef SYSTEMD + $(INSTALL_PROGRAM) -d $(DESTDIR)$(unitdir) + $(INSTALL_PROGRAM) -m 644 $(EXEC).service $(DESTDIR)$(unitdir) + $(INSTALL_PROGRAM) -m 644 $(EXEC).socket $(DESTDIR)$(unitdir) +endif + $(INSTALL_PROGRAM) -d $(DESTDIR)$(man8dir) + $(INSTALL_PROGRAM) -m 644 $(EXEC).8.gz $(DESTDIR)$(man8dir) + +uninstall: + $(RM) $(DESTDIR)$(bindir)/$(EXEC) + $(RM) $(DESTDIR)$(man8dir)/$(EXEC).8.gz + $(RM) $(DESTDIR)$(unitdir)/$(EXEC).service + $(RM) $(DESTDIR)$(unitdir)/$(EXEC).socket + +clean: dep_clean + $(RM) core *.o $(EXEC) *.gz + +include $(wildcard $(OBJS:.o=.d)) + +dep_clean: + $(RM) $(OBJS:.o=.d) diff --git a/multipathd/cli.c b/multipathd/cli.c new file mode 100644 index 0000000..800c0fb --- /dev/null +++ b/multipathd/cli.c @@ -0,0 +1,712 @@ +/* + * Copyright (c) 2005 Christophe Varoqui + */ +#include +#include +#include +#include "memory.h" +#include "vector.h" +#include "structs.h" +#include "structs_vec.h" +#include "parser.h" +#include "util.h" +#include "version.h" +#include + +#include "mpath_cmd.h" +#include "cli.h" +#include "debug.h" + +static vector keys; +static vector handlers; + +static struct key * +alloc_key (void) +{ + return (struct key *)MALLOC(sizeof(struct key)); +} + +static struct handler * +alloc_handler (void) +{ + return (struct handler *)MALLOC(sizeof(struct handler)); +} + +static int +add_key (vector vec, char * str, uint64_t code, int has_param) +{ + struct key * kw; + + kw = alloc_key(); + + if (!kw) + return 1; + + kw->code = code; + kw->has_param = has_param; + kw->str = STRDUP(str); + + if (!kw->str) + goto out; + + if (!vector_alloc_slot(vec)) + goto out1; + + vector_set_slot(vec, kw); + + return 0; + +out1: + FREE(kw->str); +out: + FREE(kw); + return 1; +} + +int +add_handler (uint64_t fp, int (*fn)(void *, char **, int *, void *)) +{ + struct handler * h; + + h = alloc_handler(); + + if (!h) + return 1; + + if (!vector_alloc_slot(handlers)) { + FREE(h); + return 1; + } + + vector_set_slot(handlers, h); + h->fingerprint = fp; + h->fn = fn; + + return 0; +} + +static struct handler * +find_handler (uint64_t fp) +{ + int i; + struct handler *h; + + vector_foreach_slot (handlers, h, i) + if (h->fingerprint == fp) + return h; + + return NULL; +} + +int +set_handler_callback (uint64_t fp, int (*fn)(void *, char **, int *, void *)) +{ + struct handler * h = find_handler(fp); + + if (!h) + return 1; + h->fn = fn; + h->locked = 1; + return 0; +} + +int +set_unlocked_handler_callback (uint64_t fp,int (*fn)(void *, char **, int *, void *)) +{ + struct handler * h = find_handler(fp); + + if (!h) + return 1; + h->fn = fn; + h->locked = 0; + return 0; +} + +static void +free_key (struct key * kw) +{ + if (kw->str) + FREE(kw->str); + + if (kw->param) + FREE(kw->param); + + FREE(kw); +} + +void +free_keys (vector vec) +{ + int i; + struct key * kw; + + vector_foreach_slot (vec, kw, i) + free_key(kw); + + vector_free(vec); +} + +void +free_handlers (void) +{ + int i; + struct handler * h; + + vector_foreach_slot (handlers, h, i) + FREE(h); + + vector_free(handlers); + handlers = NULL; +} + +int +load_keys (void) +{ + int r = 0; + keys = vector_alloc(); + + if (!keys) + return 1; + + r += add_key(keys, "list", LIST, 0); + r += add_key(keys, "show", LIST, 0); + r += add_key(keys, "add", ADD, 0); + r += add_key(keys, "remove", DEL, 0); + r += add_key(keys, "del", DEL, 0); + r += add_key(keys, "switch", SWITCH, 0); + r += add_key(keys, "switchgroup", SWITCH, 0); + r += add_key(keys, "suspend", SUSPEND, 0); + r += add_key(keys, "resume", RESUME, 0); + r += add_key(keys, "reinstate", REINSTATE, 0); + r += add_key(keys, "fail", FAIL, 0); + r += add_key(keys, "resize", RESIZE, 0); + r += add_key(keys, "reset", RESET, 0); + r += add_key(keys, "reload", RELOAD, 0); + r += add_key(keys, "forcequeueing", FORCEQ, 0); + r += add_key(keys, "disablequeueing", DISABLEQ, 0); + r += add_key(keys, "restorequeueing", RESTOREQ, 0); + r += add_key(keys, "paths", PATHS, 0); + r += add_key(keys, "maps", MAPS, 0); + r += add_key(keys, "multipaths", MAPS, 0); + r += add_key(keys, "path", PATH, 1); + r += add_key(keys, "map", MAP, 1); + r += add_key(keys, "multipath", MAP, 1); + r += add_key(keys, "group", GROUP, 1); + r += add_key(keys, "reconfigure", RECONFIGURE, 0); + r += add_key(keys, "daemon", DAEMON, 0); + r += add_key(keys, "status", STATUS, 0); + r += add_key(keys, "stats", STATS, 0); + r += add_key(keys, "topology", TOPOLOGY, 0); + r += add_key(keys, "config", CONFIG, 0); + r += add_key(keys, "blacklist", BLACKLIST, 0); + r += add_key(keys, "devices", DEVICES, 0); + r += add_key(keys, "raw", RAW, 0); + r += add_key(keys, "wildcards", WILDCARDS, 0); + r += add_key(keys, "quit", QUIT, 0); + r += add_key(keys, "exit", QUIT, 0); + r += add_key(keys, "shutdown", SHUTDOWN, 0); + r += add_key(keys, "getprstatus", GETPRSTATUS, 0); + r += add_key(keys, "setprstatus", SETPRSTATUS, 0); + r += add_key(keys, "unsetprstatus", UNSETPRSTATUS, 0); + r += add_key(keys, "format", FMT, 1); + r += add_key(keys, "json", JSON, 0); + r += add_key(keys, "getprkey", GETPRKEY, 0); + r += add_key(keys, "setprkey", SETPRKEY, 0); + r += add_key(keys, "unsetprkey", UNSETPRKEY, 0); + r += add_key(keys, "key", KEY, 1); + r += add_key(keys, "local", LOCAL, 0); + r += add_key(keys, "setmarginal", SETMARGINAL, 0); + r += add_key(keys, "unsetmarginal", UNSETMARGINAL, 0); + + + if (r) { + free_keys(keys); + keys = NULL; + return 1; + } + return 0; +} + +static struct key * +find_key (const char * str) +{ + int i; + int len, klen; + struct key * kw = NULL; + struct key * foundkw = NULL; + + len = strlen(str); + + vector_foreach_slot (keys, kw, i) { + if (strncmp(kw->str, str, len)) + continue; + klen = strlen(kw->str); + if (len == klen) + return kw; /* exact match */ + if (len < klen) { + if (!foundkw) + foundkw = kw; /* shortcut match */ + else + return NULL; /* ambiguous word */ + } + } + return foundkw; +} + +/* + * get_cmdvec + * + * returns: + * ENOMEM: not enough memory to allocate command + * EAGAIN: command not found + * EINVAL: argument missing for command + */ +static int +get_cmdvec (char * cmd, vector *v) +{ + int i; + int r = 0; + int get_param = 0; + char * buff; + struct key * kw = NULL; + struct key * cmdkw = NULL; + vector cmdvec, strvec; + + strvec = alloc_strvec(cmd); + if (!strvec) + return ENOMEM; + + cmdvec = vector_alloc(); + + if (!cmdvec) { + free_strvec(strvec); + return ENOMEM; + } + + vector_foreach_slot(strvec, buff, i) { + if (is_quote(buff)) + continue; + if (get_param) { + get_param = 0; + cmdkw->param = strdup(buff); + continue; + } + kw = find_key(buff); + if (!kw) { + r = EAGAIN; + goto out; + } + cmdkw = alloc_key(); + if (!cmdkw) { + r = ENOMEM; + goto out; + } + if (!vector_alloc_slot(cmdvec)) { + FREE(cmdkw); + r = ENOMEM; + goto out; + } + vector_set_slot(cmdvec, cmdkw); + cmdkw->code = kw->code; + cmdkw->has_param = kw->has_param; + if (kw->has_param) + get_param = 1; + } + if (get_param) { + r = EINVAL; + goto out; + } + *v = cmdvec; + free_strvec(strvec); + return 0; + +out: + free_strvec(strvec); + free_keys(cmdvec); + return r; +} + +static uint64_t +fingerprint(vector vec) +{ + int i; + uint64_t fp = 0; + struct key * kw; + + if (!vec) + return 0; + + vector_foreach_slot(vec, kw, i) + fp += kw->code; + + return fp; +} + +int +alloc_handlers (void) +{ + handlers = vector_alloc(); + + if (!handlers) + return 1; + + return 0; +} + +static int +genhelp_sprint_aliases (char * reply, int maxlen, vector keys, + struct key * refkw) +{ + int i, len = 0; + struct key * kw; + + vector_foreach_slot (keys, kw, i) { + if (kw->code == refkw->code && kw != refkw) { + len += snprintf(reply + len, maxlen - len, + "|%s", kw->str); + if (len >= maxlen) + return len; + } + } + + return len; +} + +static int +do_genhelp(char *reply, int maxlen, const char *cmd, int error) { + int len = 0; + int i, j; + uint64_t fp; + struct handler * h; + struct key * kw; + + switch(error) { + case ENOMEM: + len += snprintf(reply + len, maxlen - len, + "%s: Not enough memory\n", cmd); + break; + case EAGAIN: + len += snprintf(reply + len, maxlen - len, + "%s: not found\n", cmd); + break; + case EINVAL: + len += snprintf(reply + len, maxlen - len, + "%s: Missing argument\n", cmd); + break; + } + if (len >= maxlen) + goto out; + len += snprintf(reply + len, maxlen - len, VERSION_STRING); + if (len >= maxlen) + goto out; + len += snprintf(reply + len, maxlen - len, "CLI commands reference:\n"); + if (len >= maxlen) + goto out; + + vector_foreach_slot (handlers, h, i) { + fp = h->fingerprint; + vector_foreach_slot (keys, kw, j) { + if ((kw->code & fp)) { + fp -= kw->code; + len += snprintf(reply + len , maxlen - len, + " %s", kw->str); + if (len >= maxlen) + goto out; + len += genhelp_sprint_aliases(reply + len, + maxlen - len, + keys, kw); + if (len >= maxlen) + goto out; + + if (kw->has_param) { + len += snprintf(reply + len, + maxlen - len, + " $%s", kw->str); + if (len >= maxlen) + goto out; + } + } + } + len += snprintf(reply + len, maxlen - len, "\n"); + if (len >= maxlen) + goto out; + } +out: + return len; +} + + +static char * +genhelp_handler (const char *cmd, int error) +{ + char * reply; + char * p = NULL; + int maxlen = INITIAL_REPLY_LEN; + int again = 1; + + reply = MALLOC(maxlen); + + while (again) { + if (!reply) + return NULL; + p = reply; + p += do_genhelp(reply, maxlen, cmd, error); + again = ((p - reply) >= maxlen); + REALLOC_REPLY(reply, again, maxlen); + } + return reply; +} + +int +parse_cmd (char * cmd, char ** reply, int * len, void * data, int timeout ) +{ + int r; + struct handler * h; + vector cmdvec = NULL; + struct timespec tmo; + + r = get_cmdvec(cmd, &cmdvec); + + if (r) { + *reply = genhelp_handler(cmd, r); + if (*reply == NULL) + return EINVAL; + *len = strlen(*reply) + 1; + return 0; + } + + h = find_handler(fingerprint(cmdvec)); + + if (!h || !h->fn) { + free_keys(cmdvec); + *reply = genhelp_handler(cmd, EINVAL); + if (*reply == NULL) + return EINVAL; + *len = strlen(*reply) + 1; + return 0; + } + + /* + * execute handler + */ + if (clock_gettime(CLOCK_REALTIME, &tmo) == 0) { + tmo.tv_sec += timeout; + } else { + tmo.tv_sec = 0; + } + if (h->locked) { + int locked = 0; + struct vectors * vecs = (struct vectors *)data; + + pthread_cleanup_push(cleanup_lock, &vecs->lock); + if (tmo.tv_sec) { + r = timedlock(&vecs->lock, &tmo); + } else { + lock(&vecs->lock); + r = 0; + } + if (r == 0) { + locked = 1; + pthread_testcancel(); + r = h->fn(cmdvec, reply, len, data); + } + pthread_cleanup_pop(locked); + } else + r = h->fn(cmdvec, reply, len, data); + free_keys(cmdvec); + + return r; +} + +char * +get_keyparam (vector v, uint64_t code) +{ + struct key * kw; + int i; + + vector_foreach_slot(v, kw, i) + if (kw->code == code) + return kw->param; + + return NULL; +} + +int +cli_init (void) { + if (load_keys()) + return 1; + + if (alloc_handlers()) + return 1; + + add_handler(LIST+PATHS, NULL); + add_handler(LIST+PATHS+FMT, NULL); + add_handler(LIST+PATHS+RAW+FMT, NULL); + add_handler(LIST+PATH, NULL); + add_handler(LIST+STATUS, NULL); + add_handler(LIST+DAEMON, NULL); + add_handler(LIST+MAPS, NULL); + add_handler(LIST+MAPS+STATUS, NULL); + add_handler(LIST+MAPS+STATS, NULL); + add_handler(LIST+MAPS+FMT, NULL); + add_handler(LIST+MAPS+RAW+FMT, NULL); + add_handler(LIST+MAPS+TOPOLOGY, NULL); + add_handler(LIST+MAPS+JSON, NULL); + add_handler(LIST+TOPOLOGY, NULL); + add_handler(LIST+MAP+TOPOLOGY, NULL); + add_handler(LIST+MAP+JSON, NULL); + add_handler(LIST+MAP+FMT, NULL); + add_handler(LIST+MAP+RAW+FMT, NULL); + add_handler(LIST+CONFIG, NULL); + add_handler(LIST+CONFIG+LOCAL, NULL); + add_handler(LIST+BLACKLIST, NULL); + add_handler(LIST+DEVICES, NULL); + add_handler(LIST+WILDCARDS, NULL); + add_handler(RESET+MAPS+STATS, NULL); + add_handler(RESET+MAP+STATS, NULL); + add_handler(ADD+PATH, NULL); + add_handler(DEL+PATH, NULL); + add_handler(ADD+MAP, NULL); + add_handler(DEL+MAP, NULL); + add_handler(SWITCH+MAP+GROUP, NULL); + add_handler(RECONFIGURE, NULL); + add_handler(SUSPEND+MAP, NULL); + add_handler(RESUME+MAP, NULL); + add_handler(RESIZE+MAP, NULL); + add_handler(RESET+MAP, NULL); + add_handler(RELOAD+MAP, NULL); + add_handler(DISABLEQ+MAP, NULL); + add_handler(RESTOREQ+MAP, NULL); + add_handler(DISABLEQ+MAPS, NULL); + add_handler(RESTOREQ+MAPS, NULL); + add_handler(REINSTATE+PATH, NULL); + add_handler(FAIL+PATH, NULL); + add_handler(QUIT, NULL); + add_handler(SHUTDOWN, NULL); + add_handler(GETPRSTATUS+MAP, NULL); + add_handler(SETPRSTATUS+MAP, NULL); + add_handler(UNSETPRSTATUS+MAP, NULL); + add_handler(GETPRKEY+MAP, NULL); + add_handler(SETPRKEY+MAP+KEY, NULL); + add_handler(UNSETPRKEY+MAP, NULL); + add_handler(FORCEQ+DAEMON, NULL); + add_handler(RESTOREQ+DAEMON, NULL); + add_handler(SETMARGINAL+PATH, NULL); + add_handler(UNSETMARGINAL+PATH, NULL); + add_handler(UNSETMARGINAL+MAP, NULL); + + return 0; +} + +void cli_exit(void) +{ + free_handlers(); + free_keys(keys); + keys = NULL; +} + +static int +key_match_fingerprint (struct key * kw, uint64_t fp) +{ + if (!fp) + return 0; + + return ((fp & kw->code) == kw->code); +} + +/* + * This is the readline completion handler + */ +char * +key_generator (const char * str, int state) +{ + static int index, len, has_param; + static uint64_t rlfp; + struct key * kw; + int i; + struct handler *h; + vector v = NULL; + + if (!state) { + index = 0; + has_param = 0; + rlfp = 0; + len = strlen(str); + int r = get_cmdvec(rl_line_buffer, &v); + /* + * If a word completion is in progress, we don't want + * to take an exact keyword match in the fingerprint. + * For ex "show map[tab]" would validate "map" and discard + * "maps" as a valid candidate. + */ + if (v && len) + vector_del_slot(v, VECTOR_SIZE(v) - 1); + /* + * Clean up the mess if we dropped the last slot of a 1-slot + * vector + */ + if (v && !VECTOR_SIZE(v)) { + vector_free(v); + v = NULL; + } + /* + * If last keyword takes a param, don't even try to guess + */ + if (r == EINVAL) { + has_param = 1; + return (strdup("(value)")); + } + /* + * Compute a command fingerprint to find out possible completions. + * Once done, the vector is useless. Free it. + */ + if (v) { + rlfp = fingerprint(v); + free_keys(v); + } + } + /* + * No more completions for parameter placeholder. + * Brave souls might try to add parameter completion by walking paths and + * multipaths vectors. + */ + if (has_param) + return ((char *)NULL); + /* + * Loop through keywords for completion candidates + */ + vector_foreach_slot_after (keys, kw, index) { + if (!strncmp(kw->str, str, len)) { + /* + * Discard keywords already in the command line + */ + if (key_match_fingerprint(kw, rlfp)) { + struct key * curkw = find_key(str); + if (!curkw || (curkw != kw)) + continue; + } + /* + * Discard keywords making syntax errors. + * + * nfp is the candidate fingerprint we try to + * validate against all known command fingerprints. + */ + uint64_t nfp = rlfp | kw->code; + vector_foreach_slot(handlers, h, i) { + if (!rlfp || ((h->fingerprint & nfp) == nfp)) { + /* + * At least one full command is + * possible with this keyword : + * Consider it validated + */ + index++; + return (strdup(kw->str)); + } + } + } + } + /* + * No more candidates + */ + return ((char *)NULL); +} diff --git a/multipathd/cli.h b/multipathd/cli.h new file mode 100644 index 0000000..fdfb9ae --- /dev/null +++ b/multipathd/cli.h @@ -0,0 +1,146 @@ +#ifndef _CLI_H_ +#define _CLI_H_ + +#include + +enum { + __LIST, + __ADD, + __DEL, + __SWITCH, + __SUSPEND, + __RESUME, + __REINSTATE, + __FAIL, + __RESIZE, + __RESET, + __RELOAD, + __FORCEQ, + __DISABLEQ, + __RESTOREQ, + __PATHS, + __MAPS, + __PATH, + __MAP, + __GROUP, + __RECONFIGURE, + __DAEMON, + __STATUS, + __STATS, + __TOPOLOGY, + __CONFIG, + __BLACKLIST, + __DEVICES, + __RAW, + __WILDCARDS, + __QUIT, + __SHUTDOWN, + __GETPRSTATUS, + __SETPRSTATUS, + __UNSETPRSTATUS, + __FMT, + __JSON, + __GETPRKEY, + __SETPRKEY, + __UNSETPRKEY, + __KEY, + __LOCAL, + __SETMARGINAL, + __UNSETMARGINAL, +}; + +#define LIST (1 << __LIST) +#define ADD (1 << __ADD) +#define DEL (1 << __DEL) +#define SWITCH (1 << __SWITCH) +#define SUSPEND (1 << __SUSPEND) +#define RESUME (1 << __RESUME) +#define REINSTATE (1 << __REINSTATE) +#define FAIL (1 << __FAIL) +#define RESIZE (1 << __RESIZE) +#define RESET (1 << __RESET) +#define RELOAD (1 << __RELOAD) +#define FORCEQ (1 << __FORCEQ) +#define DISABLEQ (1 << __DISABLEQ) +#define RESTOREQ (1 << __RESTOREQ) +#define PATHS (1 << __PATHS) +#define MAPS (1 << __MAPS) +#define PATH (1 << __PATH) +#define MAP (1 << __MAP) +#define GROUP (1 << __GROUP) +#define RECONFIGURE (1 << __RECONFIGURE) +#define DAEMON (1 << __DAEMON) +#define STATUS (1 << __STATUS) +#define STATS (1 << __STATS) +#define TOPOLOGY (1 << __TOPOLOGY) +#define CONFIG (1 << __CONFIG) +#define BLACKLIST (1 << __BLACKLIST) +#define DEVICES (1 << __DEVICES) +#define RAW (1 << __RAW) +#define COUNT (1 << __COUNT) +#define WILDCARDS (1 << __WILDCARDS) +#define QUIT (1 << __QUIT) +#define SHUTDOWN (1 << __SHUTDOWN) +#define GETPRSTATUS (1ULL << __GETPRSTATUS) +#define SETPRSTATUS (1ULL << __SETPRSTATUS) +#define UNSETPRSTATUS (1ULL << __UNSETPRSTATUS) +#define FMT (1ULL << __FMT) +#define JSON (1ULL << __JSON) +#define GETPRKEY (1ULL << __GETPRKEY) +#define SETPRKEY (1ULL << __SETPRKEY) +#define UNSETPRKEY (1ULL << __UNSETPRKEY) +#define KEY (1ULL << __KEY) +#define LOCAL (1ULL << __LOCAL) +#define SETMARGINAL (1ULL << __SETMARGINAL) +#define UNSETMARGINAL (1ULL << __UNSETMARGINAL) + +#define INITIAL_REPLY_LEN 1200 + +#define REALLOC_REPLY(r, a, m) \ + do { \ + if ((a)) { \ + char *tmp = (r); \ + \ + if (m >= MAX_REPLY_LEN) { \ + condlog(1, "Warning: max reply length exceeded"); \ + free(tmp); \ + (r) = NULL; \ + } else { \ + (r) = REALLOC((r), (m) * 2); \ + if ((r)) { \ + memset((r) + (m), 0, (m)); \ + (m) *= 2; \ + } \ + else \ + free(tmp); \ + } \ + } \ + } while (0) + +struct key { + char * str; + char * param; + uint64_t code; + int has_param; +}; + +struct handler { + uint64_t fingerprint; + int locked; + int (*fn)(void *, char **, int *, void *); +}; + +int alloc_handlers (void); +int add_handler (uint64_t fp, int (*fn)(void *, char **, int *, void *)); +int set_handler_callback (uint64_t fp, int (*fn)(void *, char **, int *, void *)); +int set_unlocked_handler_callback (uint64_t fp, int (*fn)(void *, char **, int *, void *)); +int parse_cmd (char * cmd, char ** reply, int * len, void *, int); +int load_keys (void); +char * get_keyparam (vector v, uint64_t code); +void free_keys (vector vec); +void free_handlers (void); +int cli_init (void); +void cli_exit(void); +char * key_generator (const char * str, int state); + +#endif /* _CLI_H_ */ diff --git a/multipathd/cli_handlers.c b/multipathd/cli_handlers.c new file mode 100644 index 0000000..7d878c8 --- /dev/null +++ b/multipathd/cli_handlers.c @@ -0,0 +1,1625 @@ +/* + * Copyright (c) 2005 Christophe Varoqui + */ + +#define _GNU_SOURCE + +#include "checkers.h" +#include "memory.h" +#include "vector.h" +#include "structs.h" +#include "structs_vec.h" +#include +#include "devmapper.h" +#include "discovery.h" +#include "config.h" +#include "configure.h" +#include "blacklist.h" +#include "debug.h" +#include "dm-generic.h" +#include "print.h" +#include "sysfs.h" +#include +#include +#include +#include "util.h" +#include "prkey.h" +#include "propsel.h" +#include "main.h" +#include "mpath_cmd.h" +#include "cli.h" +#include "uevent.h" +#include "foreign.h" +#include "cli_handlers.h" + +int +show_paths (char ** r, int * len, struct vectors * vecs, char * style, + int pretty) +{ + int i; + struct path * pp; + char * c; + char * reply, * header; + unsigned int maxlen = INITIAL_REPLY_LEN; + int again = 1; + + get_path_layout(vecs->pathvec, 1); + foreign_path_layout(); + + reply = MALLOC(maxlen); + + while (again) { + if (!reply) + return 1; + + c = reply; + + if (pretty) + c += snprint_path_header(c, reply + maxlen - c, + style); + header = c; + + vector_foreach_slot(vecs->pathvec, pp, i) + c += snprint_path(c, reply + maxlen - c, + style, pp, pretty); + + c += snprint_foreign_paths(c, reply + maxlen - c, + style, pretty); + + again = ((c - reply) == (maxlen - 1)); + + REALLOC_REPLY(reply, again, maxlen); + } + + if (pretty && c == header) { + /* No output - clear header */ + *reply = '\0'; + c = reply; + } + + *r = reply; + *len = (int)(c - reply + 1); + return 0; +} + +int +show_path (char ** r, int * len, struct vectors * vecs, struct path *pp, + char * style) +{ + char * c; + char * reply; + unsigned int maxlen = INITIAL_REPLY_LEN; + int again = 1; + + get_path_layout(vecs->pathvec, 1); + reply = MALLOC(maxlen); + + while (again) { + if (!reply) + return 1; + + c = reply; + + c += snprint_path(c, reply + maxlen - c, style, pp, 0); + + again = ((c - reply) == (maxlen - 1)); + + REALLOC_REPLY(reply, again, maxlen); + } + *r = reply; + *len = (int)(c - reply + 1); + return 0; +} + +int +show_map_topology (char ** r, int * len, struct multipath * mpp, + struct vectors * vecs) +{ + char * c; + char * reply; + unsigned int maxlen = INITIAL_REPLY_LEN; + int again = 1; + + if (update_multipath(vecs, mpp->alias, 0)) + return 1; + reply = MALLOC(maxlen); + + while (again) { + if (!reply) + return 1; + + c = reply; + + c += snprint_multipath_topology(c, reply + maxlen - c, mpp, 2); + again = ((c - reply) == (maxlen - 1)); + + REALLOC_REPLY(reply, again, maxlen); + } + *r = reply; + *len = (int)(c - reply + 1); + return 0; +} + +int +show_maps_topology (char ** r, int * len, struct vectors * vecs) +{ + int i; + struct multipath * mpp; + char * c; + char * reply; + unsigned int maxlen = INITIAL_REPLY_LEN; + int again = 1; + + get_path_layout(vecs->pathvec, 0); + foreign_path_layout(); + + reply = MALLOC(maxlen); + + while (again) { + if (!reply) + return 1; + + c = reply; + + vector_foreach_slot(vecs->mpvec, mpp, i) { + if (update_multipath(vecs, mpp->alias, 0)) { + i--; + continue; + } + c += snprint_multipath_topology(c, reply + maxlen - c, + mpp, 2); + } + c += snprint_foreign_topology(c, reply + maxlen - c, 2); + + again = ((c - reply) == (maxlen - 1)); + + REALLOC_REPLY(reply, again, maxlen); + } + + *r = reply; + *len = (int)(c - reply + 1); + return 0; +} + +int +show_maps_json (char ** r, int * len, struct vectors * vecs) +{ + int i; + struct multipath * mpp; + char * c; + char * reply; + unsigned int maxlen = INITIAL_REPLY_LEN; + int again = 1; + + if (VECTOR_SIZE(vecs->mpvec) > 0) + maxlen *= PRINT_JSON_MULTIPLIER * VECTOR_SIZE(vecs->mpvec); + + vector_foreach_slot(vecs->mpvec, mpp, i) { + if (update_multipath(vecs, mpp->alias, 0)) { + return 1; + } + } + + reply = MALLOC(maxlen); + + while (again) { + if (!reply) + return 1; + + c = reply; + + c += snprint_multipath_topology_json(c, maxlen, vecs); + again = ((c - reply) == maxlen); + + REALLOC_REPLY(reply, again, maxlen); + } + *r = reply; + *len = (int)(c - reply); + return 0; +} + +int +show_map_json (char ** r, int * len, struct multipath * mpp, + struct vectors * vecs) +{ + char * c; + char * reply; + unsigned int maxlen = INITIAL_REPLY_LEN; + int again = 1; + + if (update_multipath(vecs, mpp->alias, 0)) + return 1; + reply = MALLOC(maxlen); + + while (again) { + if (!reply) + return 1; + + c = reply; + + c += snprint_multipath_map_json(c, maxlen, mpp); + again = ((c - reply) == maxlen); + + REALLOC_REPLY(reply, again, maxlen); + } + *r = reply; + *len = (int)(c - reply); + return 0; +} + +static int +show_config (char ** r, int * len, const struct _vector *hwtable, + const struct _vector *mpvec) +{ + struct config *conf; + char *reply; + + conf = get_multipath_config(); + pthread_cleanup_push(put_multipath_config, conf); + reply = snprint_config(conf, len, hwtable, mpvec); + pthread_cleanup_pop(1); + if (reply == NULL) + return 1; + *r = reply; + return 0; +} + +void +reset_stats(struct multipath * mpp) +{ + mpp->stat_switchgroup = 0; + mpp->stat_path_failures = 0; + mpp->stat_map_loads = 0; + mpp->stat_total_queueing_time = 0; + mpp->stat_queueing_timeouts = 0; + mpp->stat_map_failures = 0; +} + +int +cli_list_config (void * v, char ** reply, int * len, void * data) +{ + condlog(3, "list config (operator)"); + + return show_config(reply, len, NULL, NULL); +} + +static void v_free(void *x) +{ + vector_free(x); +} + +int +cli_list_config_local (void * v, char ** reply, int * len, void * data) +{ + struct vectors * vecs = (struct vectors *)data; + vector hwes; + int ret; + + condlog(3, "list config local (operator)"); + + hwes = get_used_hwes(vecs->pathvec); + pthread_cleanup_push(v_free, hwes); + ret = show_config(reply, len, hwes, vecs->mpvec); + pthread_cleanup_pop(1); + return ret; +} + +int +cli_list_paths (void * v, char ** reply, int * len, void * data) +{ + struct vectors * vecs = (struct vectors *)data; + + condlog(3, "list paths (operator)"); + + return show_paths(reply, len, vecs, PRINT_PATH_CHECKER, 1); +} + +int +cli_list_paths_fmt (void * v, char ** reply, int * len, void * data) +{ + struct vectors * vecs = (struct vectors *)data; + char * fmt = get_keyparam(v, FMT); + + condlog(3, "list paths (operator)"); + + return show_paths(reply, len, vecs, fmt, 1); +} + +int +cli_list_paths_raw (void * v, char ** reply, int * len, void * data) +{ + struct vectors * vecs = (struct vectors *)data; + char * fmt = get_keyparam(v, FMT); + + condlog(3, "list paths (operator)"); + + return show_paths(reply, len, vecs, fmt, 0); +} + +int +cli_list_path (void * v, char ** reply, int * len, void * data) +{ + struct vectors * vecs = (struct vectors *)data; + char * param = get_keyparam(v, PATH); + struct path *pp; + + param = convert_dev(param, 1); + condlog(3, "%s: list path (operator)", param); + + pp = find_path_by_dev(vecs->pathvec, param); + if (!pp) + return 1; + + return show_path(reply, len, vecs, pp, "%o"); +} + +int +cli_list_map_topology (void * v, char ** reply, int * len, void * data) +{ + struct multipath * mpp; + struct vectors * vecs = (struct vectors *)data; + char * param = get_keyparam(v, MAP); + + param = convert_dev(param, 0); + get_path_layout(vecs->pathvec, 0); + mpp = find_mp_by_str(vecs->mpvec, param); + + if (!mpp) + return 1; + + condlog(3, "list multipath %s (operator)", param); + + return show_map_topology(reply, len, mpp, vecs); +} + +int +cli_list_maps_topology (void * v, char ** reply, int * len, void * data) +{ + struct vectors * vecs = (struct vectors *)data; + + condlog(3, "list multipaths (operator)"); + + return show_maps_topology(reply, len, vecs); +} + +int +cli_list_map_json (void * v, char ** reply, int * len, void * data) +{ + struct multipath * mpp; + struct vectors * vecs = (struct vectors *)data; + char * param = get_keyparam(v, MAP); + + param = convert_dev(param, 0); + get_path_layout(vecs->pathvec, 0); + mpp = find_mp_by_str(vecs->mpvec, param); + + if (!mpp) + return 1; + + condlog(3, "list multipath json %s (operator)", param); + + return show_map_json(reply, len, mpp, vecs); +} + +int +cli_list_maps_json (void * v, char ** reply, int * len, void * data) +{ + struct vectors * vecs = (struct vectors *)data; + + condlog(3, "list multipaths json (operator)"); + + return show_maps_json(reply, len, vecs); +} + +int +cli_list_wildcards (void * v, char ** reply, int * len, void * data) +{ + char * c; + + *reply = MALLOC(INITIAL_REPLY_LEN); + + if (!*reply) + return 1; + + c = *reply; + c += snprint_wildcards(c, INITIAL_REPLY_LEN); + + *len = INITIAL_REPLY_LEN; + return 0; +} + +int +show_status (char ** r, int *len, struct vectors * vecs) +{ + char * c; + char * reply; + + unsigned int maxlen = INITIAL_REPLY_LEN; + reply = MALLOC(maxlen); + + if (!reply) + return 1; + + c = reply; + c += snprint_status(c, reply + maxlen - c, vecs); + + *r = reply; + *len = (int)(c - reply + 1); + return 0; +} + +int +show_daemon (char ** r, int *len) +{ + char * c; + char * reply; + + unsigned int maxlen = INITIAL_REPLY_LEN; + reply = MALLOC(maxlen); + + if (!reply) + return 1; + + c = reply; + c += snprintf(c, INITIAL_REPLY_LEN, "pid %d %s\n", + daemon_pid, daemon_status()); + + *r = reply; + *len = (int)(c - reply + 1); + return 0; +} + +int +show_map (char ** r, int *len, struct multipath * mpp, char * style, + int pretty) +{ + char * c; + char * reply; + unsigned int maxlen = INITIAL_REPLY_LEN; + int again = 1; + + reply = MALLOC(maxlen); + while (again) { + if (!reply) + return 1; + + c = reply; + c += snprint_multipath(c, reply + maxlen - c, style, + mpp, pretty); + + again = ((c - reply) == (maxlen - 1)); + + REALLOC_REPLY(reply, again, maxlen); + } + *r = reply; + *len = (int)(c - reply + 1); + return 0; +} + +int +show_maps (char ** r, int *len, struct vectors * vecs, char * style, + int pretty) +{ + int i; + struct multipath * mpp; + char * c, *header; + char * reply; + unsigned int maxlen = INITIAL_REPLY_LEN; + int again = 1; + + get_multipath_layout(vecs->mpvec, 1); + foreign_multipath_layout(); + + reply = MALLOC(maxlen); + + while (again) { + if (!reply) + return 1; + + c = reply; + if (pretty) + c += snprint_multipath_header(c, reply + maxlen - c, + style); + header = c; + + vector_foreach_slot(vecs->mpvec, mpp, i) { + if (update_multipath(vecs, mpp->alias, 0)) { + i--; + continue; + } + c += snprint_multipath(c, reply + maxlen - c, + style, mpp, pretty); + + } + c += snprint_foreign_multipaths(c, reply + maxlen - c, + style, pretty); + again = ((c - reply) == (maxlen - 1)); + + REALLOC_REPLY(reply, again, maxlen); + } + + if (pretty && c == header) { + /* No output - clear header */ + *reply = '\0'; + c = reply; + } + *r = reply; + *len = (int)(c - reply + 1); + return 0; +} + +int +cli_list_maps_fmt (void * v, char ** reply, int * len, void * data) +{ + struct vectors * vecs = (struct vectors *)data; + char * fmt = get_keyparam(v, FMT); + + condlog(3, "list maps (operator)"); + + return show_maps(reply, len, vecs, fmt, 1); +} + +int +cli_list_maps_raw (void * v, char ** reply, int * len, void * data) +{ + struct vectors * vecs = (struct vectors *)data; + char * fmt = get_keyparam(v, FMT); + + condlog(3, "list maps (operator)"); + + return show_maps(reply, len, vecs, fmt, 0); +} + +int +cli_list_map_fmt (void * v, char ** reply, int * len, void * data) +{ + struct multipath * mpp; + struct vectors * vecs = (struct vectors *)data; + char * param = get_keyparam(v, MAP); + char * fmt = get_keyparam(v, FMT); + + param = convert_dev(param, 0); + get_path_layout(vecs->pathvec, 0); + get_multipath_layout(vecs->mpvec, 1); + mpp = find_mp_by_str(vecs->mpvec, param); + if (!mpp) + return 1; + + condlog(3, "list map %s fmt %s (operator)", param, fmt); + + return show_map(reply, len, mpp, fmt, 1); +} + +int +cli_list_map_raw (void * v, char ** reply, int * len, void * data) +{ + struct multipath * mpp; + struct vectors * vecs = (struct vectors *)data; + char * param = get_keyparam(v, MAP); + char * fmt = get_keyparam(v, FMT); + + param = convert_dev(param, 0); + get_path_layout(vecs->pathvec, 0); + get_multipath_layout(vecs->mpvec, 1); + mpp = find_mp_by_str(vecs->mpvec, param); + if (!mpp) + return 1; + + condlog(3, "list map %s fmt %s (operator)", param, fmt); + + return show_map(reply, len, mpp, fmt, 0); +} + +int +cli_list_maps (void * v, char ** reply, int * len, void * data) +{ + struct vectors * vecs = (struct vectors *)data; + + condlog(3, "list maps (operator)"); + + return show_maps(reply, len, vecs, PRINT_MAP_NAMES, 1); +} + +int +cli_list_status (void * v, char ** reply, int * len, void * data) +{ + struct vectors * vecs = (struct vectors *)data; + + condlog(3, "list status (operator)"); + + return show_status(reply, len, vecs); +} + +int +cli_list_maps_status (void * v, char ** reply, int * len, void * data) +{ + struct vectors * vecs = (struct vectors *)data; + + condlog(3, "list maps status (operator)"); + + return show_maps(reply, len, vecs, PRINT_MAP_STATUS, 1); +} + +int +cli_list_maps_stats (void * v, char ** reply, int * len, void * data) +{ + struct vectors * vecs = (struct vectors *)data; + + condlog(3, "list maps stats (operator)"); + + return show_maps(reply, len, vecs, PRINT_MAP_STATS, 1); +} + +int +cli_list_daemon (void * v, char ** reply, int * len, void * data) +{ + condlog(3, "list daemon (operator)"); + + return show_daemon(reply, len); +} + +int +cli_reset_maps_stats (void * v, char ** reply, int * len, void * data) +{ + struct vectors * vecs = (struct vectors *)data; + int i; + struct multipath * mpp; + + condlog(3, "reset multipaths stats (operator)"); + + vector_foreach_slot(vecs->mpvec, mpp, i) { + reset_stats(mpp); + } + return 0; +} + +int +cli_reset_map_stats (void * v, char ** reply, int * len, void * data) +{ + struct vectors * vecs = (struct vectors *)data; + struct multipath * mpp; + char * param = get_keyparam(v, MAP); + + param = convert_dev(param, 0); + mpp = find_mp_by_str(vecs->mpvec, param); + + if (!mpp) + return 1; + + condlog(3, "reset multipath %s stats (operator)", param); + reset_stats(mpp); + return 0; +} + +int +cli_add_path (void * v, char ** reply, int * len, void * data) +{ + struct vectors * vecs = (struct vectors *)data; + char * param = get_keyparam(v, PATH); + struct path *pp; + int r; + struct config *conf; + int invalid = 0; + + param = convert_dev(param, 1); + condlog(2, "%s: add path (operator)", param); + conf = get_multipath_config(); + pthread_cleanup_push(put_multipath_config, conf); + if (filter_devnode(conf->blist_devnode, conf->elist_devnode, + param) > 0) + invalid = 1; + pthread_cleanup_pop(1); + if (invalid) + goto blacklisted; + + pp = find_path_by_dev(vecs->pathvec, param); + if (pp) { + condlog(2, "%s: path already in pathvec", param); + if (pp->mpp) + return 0; + } else { + struct udev_device *udevice; + + udevice = udev_device_new_from_subsystem_sysname(udev, + "block", + param); + if (!udevice) { + condlog(0, "%s: can't find path", param); + return 1; + } + conf = get_multipath_config(); + pthread_cleanup_push(put_multipath_config, conf); + r = store_pathinfo(vecs->pathvec, conf, + udevice, DI_ALL | DI_BLACKLIST, &pp); + pthread_cleanup_pop(1); + udev_device_unref(udevice); + if (!pp) { + if (r == 2) + goto blacklisted; + condlog(0, "%s: failed to store path info", param); + return 1; + } + } + return ev_add_path(pp, vecs, 1); +blacklisted: + *reply = strdup("blacklisted\n"); + *len = strlen(*reply) + 1; + condlog(2, "%s: path blacklisted", param); + return 0; +} + +int +cli_del_path (void * v, char ** reply, int * len, void * data) +{ + struct vectors * vecs = (struct vectors *)data; + char * param = get_keyparam(v, PATH); + struct path *pp; + + param = convert_dev(param, 1); + condlog(2, "%s: remove path (operator)", param); + pp = find_path_by_dev(vecs->pathvec, param); + if (!pp) { + condlog(0, "%s: path already removed", param); + return 1; + } + return ev_remove_path(pp, vecs, 1); +} + +int +cli_add_map (void * v, char ** reply, int * len, void * data) +{ + struct vectors * vecs = (struct vectors *)data; + char * param = get_keyparam(v, MAP); + int major, minor; + char dev_path[PATH_SIZE]; + char *refwwid, *alias = NULL; + int rc, count = 0; + struct config *conf; + int invalid = 0; + + param = convert_dev(param, 0); + condlog(2, "%s: add map (operator)", param); + + conf = get_multipath_config(); + pthread_cleanup_push(put_multipath_config, conf); + if (filter_wwid(conf->blist_wwid, conf->elist_wwid, param, NULL) > 0) + invalid = 1; + pthread_cleanup_pop(1); + if (invalid) { + *reply = strdup("blacklisted\n"); + *len = strlen(*reply) + 1; + condlog(2, "%s: map blacklisted", param); + return 1; + } + do { + if (dm_get_major_minor(param, &major, &minor) < 0) + condlog(2, "%s: not a device mapper table", param); + else { + sprintf(dev_path, "dm-%d", minor); + alias = dm_mapname(major, minor); + } + /*if there is no mapname found, we first create the device*/ + if (!alias && !count) { + condlog(2, "%s: mapname not found for %d:%d", + param, major, minor); + get_refwwid(CMD_NONE, param, DEV_DEVMAP, + vecs->pathvec, &refwwid); + if (refwwid) { + if (coalesce_paths(vecs, NULL, refwwid, + FORCE_RELOAD_NONE, CMD_NONE) + != CP_OK) + condlog(2, "%s: coalesce_paths failed", + param); + dm_lib_release(); + FREE(refwwid); + } + } /*we attempt to create device only once*/ + count++; + } while (!alias && (count < 2)); + + if (!alias) { + condlog(2, "%s: add map failed", param); + return 1; + } + rc = ev_add_map(dev_path, alias, vecs); + FREE(alias); + return rc; +} + +int +cli_del_map (void * v, char ** reply, int * len, void * data) +{ + struct vectors * vecs = (struct vectors *)data; + char * param = get_keyparam(v, MAP); + int major, minor; + char *alias; + int rc; + + param = convert_dev(param, 0); + condlog(2, "%s: remove map (operator)", param); + if (dm_get_major_minor(param, &major, &minor) < 0) { + condlog(2, "%s: not a device mapper table", param); + return 1; + } + alias = dm_mapname(major, minor); + if (!alias) { + condlog(2, "%s: mapname not found for %d:%d", + param, major, minor); + return 1; + } + rc = ev_remove_map(param, alias, minor, vecs); + FREE(alias); + return rc; +} + +int +cli_reload(void *v, char **reply, int *len, void *data) +{ + struct vectors * vecs = (struct vectors *)data; + char * mapname = get_keyparam(v, MAP); + struct multipath *mpp; + int minor; + + mapname = convert_dev(mapname, 0); + condlog(2, "%s: reload map (operator)", mapname); + if (sscanf(mapname, "dm-%d", &minor) == 1) + mpp = find_mp_by_minor(vecs->mpvec, minor); + else + mpp = find_mp_by_alias(vecs->mpvec, mapname); + + if (!mpp) { + condlog(0, "%s: invalid map name. cannot reload", mapname); + return 1; + } + if (mpp->wait_for_udev) { + condlog(2, "%s: device not fully created, failing reload", + mpp->alias); + return 1; + } + + return update_path_groups(mpp, vecs, 0); +} + +int resize_map(struct multipath *mpp, unsigned long long size, + struct vectors * vecs) +{ + char params[PARAMS_SIZE] = {0}; + unsigned long long orig_size = mpp->size; + + mpp->size = size; + update_mpp_paths(mpp, vecs->pathvec); + if (setup_map(mpp, params, PARAMS_SIZE, vecs) != 0) { + condlog(0, "%s: failed to setup map for resize : %s", + mpp->alias, strerror(errno)); + mpp->size = orig_size; + return 1; + } + mpp->action = ACT_RESIZE; + mpp->force_udev_reload = 1; + if (domap(mpp, params, 1) == DOMAP_FAIL) { + condlog(0, "%s: failed to resize map : %s", mpp->alias, + strerror(errno)); + mpp->size = orig_size; + return 1; + } + return 0; +} + +int +cli_resize(void *v, char **reply, int *len, void *data) +{ + struct vectors * vecs = (struct vectors *)data; + char * mapname = get_keyparam(v, MAP); + struct multipath *mpp; + int minor; + unsigned long long size; + struct pathgroup *pgp; + struct path *pp; + + mapname = convert_dev(mapname, 0); + condlog(2, "%s: resize map (operator)", mapname); + if (sscanf(mapname, "dm-%d", &minor) == 1) + mpp = find_mp_by_minor(vecs->mpvec, minor); + else + mpp = find_mp_by_alias(vecs->mpvec, mapname); + + if (!mpp) { + condlog(0, "%s: invalid map name. cannot resize", mapname); + return 1; + } + + if (mpp->wait_for_udev) { + condlog(2, "%s: device not fully created, failing resize", + mpp->alias); + return 1; + } + + pgp = VECTOR_SLOT(mpp->pg, 0); + + if (!pgp){ + condlog(0, "%s: couldn't get path group. cannot resize", + mapname); + return 1; + } + pp = VECTOR_SLOT(pgp->paths, 0); + + if (!pp){ + condlog(0, "%s: couldn't get path. cannot resize", mapname); + return 1; + } + if (!pp->udev || sysfs_get_size(pp, &size)) { + condlog(0, "%s: couldn't get size for sysfs. cannot resize", + mapname); + return 1; + } + if (size == mpp->size) { + condlog(0, "%s: map is still the same size (%llu)", mapname, + mpp->size); + return 0; + } + condlog(3, "%s old size is %llu, new size is %llu", mapname, mpp->size, + size); + + if (resize_map(mpp, size, vecs) != 0) + return 1; + + dm_lib_release(); + if (setup_multipath(vecs, mpp) != 0) + return 1; + sync_map_state(mpp); + + return 0; +} + +int +cli_force_no_daemon_q(void * v, char ** reply, int * len, void * data) +{ + struct config *conf; + + condlog(2, "force queue_without_daemon (operator)"); + conf = get_multipath_config(); + if (conf->queue_without_daemon == QUE_NO_DAEMON_OFF) + conf->queue_without_daemon = QUE_NO_DAEMON_FORCE; + put_multipath_config(conf); + return 0; +} + +int +cli_restore_no_daemon_q(void * v, char ** reply, int * len, void * data) +{ + struct config *conf; + + condlog(2, "restore queue_without_daemon (operator)"); + conf = get_multipath_config(); + if (conf->queue_without_daemon == QUE_NO_DAEMON_FORCE) + conf->queue_without_daemon = QUE_NO_DAEMON_OFF; + put_multipath_config(conf); + return 0; +} + +int +cli_restore_queueing(void *v, char **reply, int *len, void *data) +{ + struct vectors * vecs = (struct vectors *)data; + char * mapname = get_keyparam(v, MAP); + struct multipath *mpp; + int minor; + struct config *conf; + + mapname = convert_dev(mapname, 0); + condlog(2, "%s: restore map queueing (operator)", mapname); + if (sscanf(mapname, "dm-%d", &minor) == 1) + mpp = find_mp_by_minor(vecs->mpvec, minor); + else + mpp = find_mp_by_alias(vecs->mpvec, mapname); + + if (!mpp) { + condlog(0, "%s: invalid map name, cannot restore queueing", mapname); + return 1; + } + + mpp->disable_queueing = 0; + conf = get_multipath_config(); + pthread_cleanup_push(put_multipath_config, conf); + select_no_path_retry(conf, mpp); + pthread_cleanup_pop(1); + + /* + * Don't call set_no_path_retry() for the NO_PATH_RETRY_FAIL case. + * That would disable queueing when "restorequeueing" is called, + * and the code never behaved that way. Users might not expect it. + * In almost all cases, queueing will be disabled anyway when we + * are here. + */ + if (mpp->no_path_retry != NO_PATH_RETRY_UNDEF && + mpp->no_path_retry != NO_PATH_RETRY_FAIL) + set_no_path_retry(mpp); + + return 0; +} + +int +cli_restore_all_queueing(void *v, char **reply, int *len, void *data) +{ + struct vectors * vecs = (struct vectors *)data; + struct multipath *mpp; + int i; + + condlog(2, "restore queueing (operator)"); + vector_foreach_slot(vecs->mpvec, mpp, i) { + mpp->disable_queueing = 0; + struct config *conf = get_multipath_config(); + pthread_cleanup_push(put_multipath_config, conf); + select_no_path_retry(conf, mpp); + pthread_cleanup_pop(1); + /* See comment in cli_restore_queueing() */ + if (mpp->no_path_retry != NO_PATH_RETRY_UNDEF && + mpp->no_path_retry != NO_PATH_RETRY_FAIL) + set_no_path_retry(mpp); + } + return 0; +} + +int +cli_disable_queueing(void *v, char **reply, int *len, void *data) +{ + struct vectors * vecs = (struct vectors *)data; + char * mapname = get_keyparam(v, MAP); + struct multipath *mpp; + int minor; + + mapname = convert_dev(mapname, 0); + condlog(2, "%s: disable map queueing (operator)", mapname); + if (sscanf(mapname, "dm-%d", &minor) == 1) + mpp = find_mp_by_minor(vecs->mpvec, minor); + else + mpp = find_mp_by_alias(vecs->mpvec, mapname); + + if (!mpp) { + condlog(0, "%s: invalid map name, cannot disable queueing", mapname); + return 1; + } + + if (count_active_paths(mpp) == 0) + mpp->stat_map_failures++; + mpp->retry_tick = 0; + mpp->no_path_retry = NO_PATH_RETRY_FAIL; + mpp->disable_queueing = 1; + set_no_path_retry(mpp); + return 0; +} + +int +cli_disable_all_queueing(void *v, char **reply, int *len, void *data) +{ + struct vectors * vecs = (struct vectors *)data; + struct multipath *mpp; + int i; + + condlog(2, "disable queueing (operator)"); + vector_foreach_slot(vecs->mpvec, mpp, i) { + if (count_active_paths(mpp) == 0) + mpp->stat_map_failures++; + mpp->retry_tick = 0; + mpp->no_path_retry = NO_PATH_RETRY_FAIL; + mpp->disable_queueing = 1; + set_no_path_retry(mpp); + } + return 0; +} + +int +cli_switch_group(void * v, char ** reply, int * len, void * data) +{ + char * mapname = get_keyparam(v, MAP); + int groupnum = atoi(get_keyparam(v, GROUP)); + + mapname = convert_dev(mapname, 0); + condlog(2, "%s: switch to path group #%i (operator)", mapname, groupnum); + + return dm_switchgroup(mapname, groupnum); +} + +int +cli_reconfigure(void * v, char ** reply, int * len, void * data) +{ + int rc; + + condlog(2, "reconfigure (operator)"); + + rc = set_config_state(DAEMON_CONFIGURE); + if (rc == ETIMEDOUT) { + condlog(2, "timeout starting reconfiguration"); + return 1; + } else if (rc == EINVAL) + /* daemon shutting down */ + return 1; + return 0; +} + +int +cli_suspend(void * v, char ** reply, int * len, void * data) +{ + struct vectors * vecs = (struct vectors *)data; + char * param = get_keyparam(v, MAP); + int r; + struct multipath * mpp; + + param = convert_dev(param, 0); + mpp = find_mp_by_alias(vecs->mpvec, param); + if (!mpp) + return 1; + + if (mpp->wait_for_udev) { + condlog(2, "%s: device not fully created, failing suspend", + mpp->alias); + return 1; + } + + r = dm_simplecmd_noflush(DM_DEVICE_SUSPEND, param, 0); + + condlog(2, "%s: suspend (operator)", param); + + if (!r) /* error */ + return 1; + + dm_get_info(param, &mpp->dmi); + return 0; +} + +int +cli_resume(void * v, char ** reply, int * len, void * data) +{ + struct vectors * vecs = (struct vectors *)data; + char * param = get_keyparam(v, MAP); + int r; + struct multipath * mpp; + uint16_t udev_flags; + + param = convert_dev(param, 0); + mpp = find_mp_by_alias(vecs->mpvec, param); + if (!mpp) + return 1; + + udev_flags = (mpp->skip_kpartx)? MPATH_UDEV_NO_KPARTX_FLAG : 0; + if (mpp->wait_for_udev) { + condlog(2, "%s: device not fully created, failing resume", + mpp->alias); + return 1; + } + + r = dm_simplecmd_noflush(DM_DEVICE_RESUME, param, udev_flags); + + condlog(2, "%s: resume (operator)", param); + + if (!r) /* error */ + return 1; + + dm_get_info(param, &mpp->dmi); + return 0; +} + +int +cli_reinstate(void * v, char ** reply, int * len, void * data) +{ + struct vectors * vecs = (struct vectors *)data; + char * param = get_keyparam(v, PATH); + struct path * pp; + + param = convert_dev(param, 1); + pp = find_path_by_dev(vecs->pathvec, param); + + if (!pp) + pp = find_path_by_devt(vecs->pathvec, param); + + if (!pp || !pp->mpp || !pp->mpp->alias) + return 1; + + condlog(2, "%s: reinstate path %s (operator)", + pp->mpp->alias, pp->dev_t); + + checker_enable(&pp->checker); + return dm_reinstate_path(pp->mpp->alias, pp->dev_t); +} + +int +cli_reassign (void * v, char ** reply, int * len, void * data) +{ + struct vectors * vecs = (struct vectors *)data; + char * param = get_keyparam(v, MAP); + struct multipath *mpp; + + param = convert_dev(param, 0); + mpp = find_mp_by_alias(vecs->mpvec, param); + if (!mpp) + return 1; + + if (mpp->wait_for_udev) { + condlog(2, "%s: device not fully created, failing reassign", + mpp->alias); + return 1; + } + + condlog(3, "%s: reset devices (operator)", param); + + dm_reassign(param); + return 0; +} + +int +cli_fail(void * v, char ** reply, int * len, void * data) +{ + struct vectors * vecs = (struct vectors *)data; + char * param = get_keyparam(v, PATH); + struct path * pp; + int r; + + param = convert_dev(param, 1); + pp = find_path_by_dev(vecs->pathvec, param); + + if (!pp) + pp = find_path_by_devt(vecs->pathvec, param); + + if (!pp || !pp->mpp || !pp->mpp->alias) + return 1; + + condlog(2, "%s: fail path %s (operator)", + pp->mpp->alias, pp->dev_t); + + r = dm_fail_path(pp->mpp->alias, pp->dev_t); + /* + * Suspend path checking to avoid auto-reinstating the path + */ + if (!r) + checker_disable(&pp->checker); + return r; +} + +int +show_blacklist (char ** r, int * len) +{ + char *c = NULL; + char *reply = NULL; + unsigned int maxlen = INITIAL_REPLY_LEN; + int again = 1; + struct config *conf; + int fail = 0; + + reply = MALLOC(maxlen); + + conf = get_multipath_config(); + pthread_cleanup_push(put_multipath_config, conf); + while (again) { + if (!reply) { + fail = 1; + break; + } + + c = reply; + c += snprint_blacklist_report(conf, c, maxlen); + again = ((c - reply) == maxlen); + REALLOC_REPLY(reply, again, maxlen); + } + pthread_cleanup_pop(1); + + if (fail) + return 1; + *r = reply; + *len = (int)(c - reply + 1); + return 0; +} + +int +cli_list_blacklist (void * v, char ** reply, int * len, void * data) +{ + condlog(3, "list blacklist (operator)"); + + return show_blacklist(reply, len); +} + +int +show_devices (char ** r, int * len, struct vectors *vecs) +{ + char *c = NULL; + char *reply = NULL; + unsigned int maxlen = INITIAL_REPLY_LEN; + int again = 1; + struct config *conf; + int fail = 0; + + reply = MALLOC(maxlen); + + conf = get_multipath_config(); + pthread_cleanup_push(put_multipath_config, conf); + while (again) { + if (!reply) { + fail = 1; + break; + } + + c = reply; + c += snprint_devices(conf, c, maxlen, vecs); + again = ((c - reply) == maxlen); + REALLOC_REPLY(reply, again, maxlen); + } + pthread_cleanup_pop(1); + + if (fail) + return 1; + *r = reply; + *len = (int)(c - reply + 1); + + return 0; +} + +int +cli_list_devices (void * v, char ** reply, int * len, void * data) +{ + struct vectors * vecs = (struct vectors *)data; + + condlog(3, "list devices (operator)"); + + return show_devices(reply, len, vecs); +} + +int +cli_quit (void * v, char ** reply, int * len, void * data) +{ + return 0; +} + +int +cli_shutdown (void * v, char ** reply, int * len, void * data) +{ + condlog(3, "shutdown (operator)"); + exit_daemon(); + return 0; +} + +int +cli_getprstatus (void * v, char ** reply, int * len, void * data) +{ + struct multipath * mpp; + struct vectors * vecs = (struct vectors *)data; + char * param = get_keyparam(v, MAP); + + param = convert_dev(param, 0); + get_path_layout(vecs->pathvec, 0); + mpp = find_mp_by_str(vecs->mpvec, param); + + if (!mpp) + return 1; + + condlog(3, "%s: prflag = %u", param, (unsigned int)mpp->prflag); + + *len = asprintf(reply, "%d", mpp->prflag); + if (*len < 0) + return 1; + + condlog(3, "%s: reply = %s", param, *reply); + + return 0; +} + +int +cli_setprstatus(void * v, char ** reply, int * len, void * data) +{ + struct multipath * mpp; + struct vectors * vecs = (struct vectors *)data; + char * param = get_keyparam(v, MAP); + + param = convert_dev(param, 0); + get_path_layout(vecs->pathvec, 0); + mpp = find_mp_by_str(vecs->mpvec, param); + + if (!mpp) + return 1; + + if (!mpp->prflag) { + mpp->prflag = 1; + condlog(2, "%s: prflag set", param); + } + + + return 0; +} + +int +cli_unsetprstatus(void * v, char ** reply, int * len, void * data) +{ + struct multipath * mpp; + struct vectors * vecs = (struct vectors *)data; + char * param = get_keyparam(v, MAP); + + param = convert_dev(param, 0); + get_path_layout(vecs->pathvec, 0); + mpp = find_mp_by_str(vecs->mpvec, param); + + if (!mpp) + return 1; + + if (mpp->prflag) { + mpp->prflag = 0; + condlog(2, "%s: prflag unset", param); + } + + return 0; +} + +int +cli_getprkey(void * v, char ** reply, int * len, void * data) +{ + struct multipath * mpp; + struct vectors * vecs = (struct vectors *)data; + char *mapname = get_keyparam(v, MAP); + char *flagstr = ""; + + mapname = convert_dev(mapname, 0); + condlog(3, "%s: get persistent reservation key (operator)", mapname); + mpp = find_mp_by_str(vecs->mpvec, mapname); + + if (!mpp) + return 1; + + *reply = malloc(26); + + if (!get_be64(mpp->reservation_key)) { + sprintf(*reply, "none\n"); + *len = strlen(*reply) + 1; + return 0; + } + if (mpp->sa_flags & MPATH_F_APTPL_MASK) + flagstr = ":aptpl"; + snprintf(*reply, 26, "0x%" PRIx64 "%s\n", + get_be64(mpp->reservation_key), flagstr); + (*reply)[19] = '\0'; + *len = strlen(*reply) + 1; + return 0; +} + +int +cli_unsetprkey(void * v, char ** reply, int * len, void * data) +{ + struct multipath * mpp; + struct vectors * vecs = (struct vectors *)data; + char *mapname = get_keyparam(v, MAP); + int ret; + struct config *conf; + + mapname = convert_dev(mapname, 0); + condlog(3, "%s: unset persistent reservation key (operator)", mapname); + mpp = find_mp_by_str(vecs->mpvec, mapname); + + if (!mpp) + return 1; + + conf = get_multipath_config(); + pthread_cleanup_push(put_multipath_config, conf); + ret = set_prkey(conf, mpp, 0, 0); + pthread_cleanup_pop(1); + + return ret; +} + +int +cli_setprkey(void * v, char ** reply, int * len, void * data) +{ + struct multipath * mpp; + struct vectors * vecs = (struct vectors *)data; + char *mapname = get_keyparam(v, MAP); + char *keyparam = get_keyparam(v, KEY); + uint64_t prkey; + uint8_t flags; + int ret; + struct config *conf; + + mapname = convert_dev(mapname, 0); + condlog(3, "%s: set persistent reservation key (operator)", mapname); + mpp = find_mp_by_str(vecs->mpvec, mapname); + + if (!mpp) + return 1; + + if (parse_prkey_flags(keyparam, &prkey, &flags) != 0) { + condlog(0, "%s: invalid prkey : '%s'", mapname, keyparam); + return 1; + } + + conf = get_multipath_config(); + pthread_cleanup_push(put_multipath_config, conf); + ret = set_prkey(conf, mpp, prkey, flags); + pthread_cleanup_pop(1); + + return ret; +} + +int cli_set_marginal(void * v, char ** reply, int * len, void * data) +{ + struct vectors * vecs = (struct vectors *)data; + char * param = get_keyparam(v, PATH); + struct path * pp; + + param = convert_dev(param, 1); + pp = find_path_by_dev(vecs->pathvec, param); + + if (!pp) + pp = find_path_by_devt(vecs->pathvec, param); + + if (!pp || !pp->mpp || !pp->mpp->alias) + return 1; + + condlog(2, "%s: set marginal path %s (operator)", + pp->mpp->alias, pp->dev_t); + if (pp->mpp->wait_for_udev) { + condlog(2, "%s: device not fully created, failing set marginal", + pp->mpp->alias); + return 1; + } + pp->marginal = 1; + + return update_path_groups(pp->mpp, vecs, 0); +} + +int cli_unset_marginal(void * v, char ** reply, int * len, void * data) +{ + struct vectors * vecs = (struct vectors *)data; + char * param = get_keyparam(v, PATH); + struct path * pp; + + param = convert_dev(param, 1); + pp = find_path_by_dev(vecs->pathvec, param); + + if (!pp) + pp = find_path_by_devt(vecs->pathvec, param); + + if (!pp || !pp->mpp || !pp->mpp->alias) + return 1; + + condlog(2, "%s: unset marginal path %s (operator)", + pp->mpp->alias, pp->dev_t); + if (pp->mpp->wait_for_udev) { + condlog(2, "%s: device not fully created, " + "failing unset marginal", pp->mpp->alias); + return 1; + } + pp->marginal = 0; + + return update_path_groups(pp->mpp, vecs, 0); +} + +int cli_unset_all_marginal(void * v, char ** reply, int * len, void * data) +{ + struct vectors * vecs = (struct vectors *)data; + char * mapname = get_keyparam(v, MAP); + struct multipath *mpp; + struct pathgroup * pgp; + struct path * pp; + unsigned int i, j; + int minor; + + mapname = convert_dev(mapname, 0); + condlog(2, "%s: unset all marginal paths (operator)", + mapname); + + if (sscanf(mapname, "dm-%d", &minor) == 1) + mpp = find_mp_by_minor(vecs->mpvec, minor); + else + mpp = find_mp_by_alias(vecs->mpvec, mapname); + + if (!mpp) { + condlog(0, "%s: invalid map name. " + "cannot unset marginal paths", mapname); + return 1; + } + if (mpp->wait_for_udev) { + condlog(2, "%s: device not fully created, " + "failing unset all marginal", mpp->alias); + return 1; + } + + vector_foreach_slot (mpp->pg, pgp, i) + vector_foreach_slot (pgp->paths, pp, j) + pp->marginal = 0; + + return update_path_groups(mpp, vecs, 0); +} diff --git a/multipathd/cli_handlers.h b/multipathd/cli_handlers.h new file mode 100644 index 0000000..0f45106 --- /dev/null +++ b/multipathd/cli_handlers.h @@ -0,0 +1,54 @@ +int cli_list_paths (void * v, char ** reply, int * len, void * data); +int cli_list_paths_fmt (void * v, char ** reply, int * len, void * data); +int cli_list_paths_raw (void * v, char ** reply, int * len, void * data); +int cli_list_path (void * v, char ** reply, int * len, void * data); +int cli_list_status (void * v, char ** reply, int * len, void * data); +int cli_list_daemon (void * v, char ** reply, int * len, void * data); +int cli_list_maps (void * v, char ** reply, int * len, void * data); +int cli_list_maps_fmt (void * v, char ** reply, int * len, void * data); +int cli_list_maps_raw (void * v, char ** reply, int * len, void * data); +int cli_list_map_fmt (void * v, char ** reply, int * len, void * data); +int cli_list_map_raw (void * v, char ** reply, int * len, void * data); +int cli_list_maps_status (void * v, char ** reply, int * len, void * data); +int cli_list_maps_stats (void * v, char ** reply, int * len, void * data); +int cli_list_map_topology (void * v, char ** reply, int * len, void * data); +int cli_list_maps_topology (void * v, char ** reply, int * len, void * data); +int cli_list_map_json (void * v, char ** reply, int * len, void * data); +int cli_list_maps_json (void * v, char ** reply, int * len, void * data); +int cli_list_config (void * v, char ** reply, int * len, void * data); +int cli_list_config_local (void * v, char ** reply, int * len, void * data); +int cli_list_blacklist (void * v, char ** reply, int * len, void * data); +int cli_list_devices (void * v, char ** reply, int * len, void * data); +int cli_list_wildcards (void * v, char ** reply, int * len, void * data); +int cli_reset_maps_stats (void * v, char ** reply, int * len, void * data); +int cli_reset_map_stats (void * v, char ** reply, int * len, void * data); +int cli_add_path (void * v, char ** reply, int * len, void * data); +int cli_del_path (void * v, char ** reply, int * len, void * data); +int cli_add_map (void * v, char ** reply, int * len, void * data); +int cli_del_map (void * v, char ** reply, int * len, void * data); +int cli_switch_group(void * v, char ** reply, int * len, void * data); +int cli_reconfigure(void * v, char ** reply, int * len, void * data); +int cli_resize(void * v, char ** reply, int * len, void * data); +int cli_reload(void * v, char ** reply, int * len, void * data); +int cli_disable_queueing(void * v, char ** reply, int * len, void * data); +int cli_disable_all_queueing(void * v, char ** reply, int * len, void * data); +int cli_restore_queueing(void * v, char ** reply, int * len, void * data); +int cli_restore_all_queueing(void * v, char ** reply, int * len, void * data); +int cli_suspend(void * v, char ** reply, int * len, void * data); +int cli_resume(void * v, char ** reply, int * len, void * data); +int cli_reinstate(void * v, char ** reply, int * len, void * data); +int cli_fail(void * v, char ** reply, int * len, void * data); +int cli_force_no_daemon_q(void * v, char ** reply, int * len, void * data); +int cli_restore_no_daemon_q(void * v, char ** reply, int * len, void * data); +int cli_quit(void * v, char ** reply, int * len, void * data); +int cli_shutdown(void * v, char ** reply, int * len, void * data); +int cli_reassign (void * v, char ** reply, int * len, void * data); +int cli_getprstatus(void * v, char ** reply, int * len, void * data); +int cli_setprstatus(void * v, char ** reply, int * len, void * data); +int cli_unsetprstatus(void * v, char ** reply, int * len, void * data); +int cli_getprkey(void * v, char ** reply, int * len, void * data); +int cli_setprkey(void * v, char ** reply, int * len, void * data); +int cli_unsetprkey(void * v, char ** reply, int * len, void * data); +int cli_set_marginal(void * v, char ** reply, int * len, void * data); +int cli_unset_marginal(void * v, char ** reply, int * len, void * data); +int cli_unset_all_marginal(void * v, char ** reply, int * len, void * data); diff --git a/multipathd/dmevents.c b/multipathd/dmevents.c new file mode 100644 index 0000000..b22b47d --- /dev/null +++ b/multipathd/dmevents.c @@ -0,0 +1,403 @@ +/* + * Copyright (c) 2004, 2005 Christophe Varoqui + * Copyright (c) 2005 Kiyoshi Ueda, NEC + * Copyright (c) 2005 Edward Goggin, EMC + * Copyright (c) 2005, 2018 Benjamin Marzinski, Redhat + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "vector.h" +#include "structs.h" +#include "structs_vec.h" +#include "devmapper.h" +#include "debug.h" +#include "main.h" +#include "dmevents.h" +#include "util.h" + +#ifndef DM_DEV_ARM_POLL +#define DM_DEV_ARM_POLL _IOWR(DM_IOCTL, DM_DEV_SET_GEOMETRY_CMD + 1, struct dm_ioctl) +#endif + +enum event_actions { + EVENT_NOTHING, + EVENT_REMOVE, + EVENT_UPDATE, +}; + +struct dev_event { + char name[WWID_SIZE]; + uint32_t evt_nr; + enum event_actions action; +}; + +struct dmevent_waiter { + int fd; + struct vectors *vecs; + vector events; + pthread_mutex_t events_lock; +}; + +static struct dmevent_waiter *waiter; +/* + * DM_VERSION_MINOR hasn't been updated when DM_DEV_ARM_POLL + * was added in kernel 4.13. 4.37.0 (4.14) has it, safely. + */ +static const unsigned int DM_VERSION_FOR_ARM_POLL[] = {4, 37, 0}; + +int dmevent_poll_supported(void) +{ + unsigned int v[3]; + + if (dm_drv_version(v)) + return 0; + + if (VERSION_GE(v, DM_VERSION_FOR_ARM_POLL)) + return 1; + return 0; +} + + +int init_dmevent_waiter(struct vectors *vecs) +{ + if (!vecs) { + condlog(0, "can't create waiter structure. invalid vectors"); + goto fail; + } + waiter = (struct dmevent_waiter *)malloc(sizeof(struct dmevent_waiter)); + if (!waiter) { + condlog(0, "failed to allocate waiter structure"); + goto fail; + } + memset(waiter, 0, sizeof(struct dmevent_waiter)); + waiter->events = vector_alloc(); + if (!waiter->events) { + condlog(0, "failed to allocate waiter events vector"); + goto fail_waiter; + } + waiter->fd = open("/dev/mapper/control", O_RDWR); + if (waiter->fd < 0) { + condlog(0, "failed to open /dev/mapper/control for waiter"); + goto fail_events; + } + pthread_mutex_init(&waiter->events_lock, NULL); + waiter->vecs = vecs; + + return 0; +fail_events: + vector_free(waiter->events); +fail_waiter: + free(waiter); +fail: + waiter = NULL; + return -1; +} + +void cleanup_dmevent_waiter(void) +{ + struct dev_event *dev_evt; + int i; + + if (!waiter) + return; + pthread_mutex_destroy(&waiter->events_lock); + close(waiter->fd); + vector_foreach_slot(waiter->events, dev_evt, i) + free(dev_evt); + vector_free(waiter->events); + free(waiter); + waiter = NULL; +} + +static int arm_dm_event_poll(int fd) +{ + struct dm_ioctl dmi; + memset(&dmi, 0, sizeof(dmi)); + dmi.version[0] = DM_VERSION_FOR_ARM_POLL[0]; + dmi.version[1] = DM_VERSION_FOR_ARM_POLL[1]; + dmi.version[2] = DM_VERSION_FOR_ARM_POLL[2]; + /* This flag currently does nothing. It simply exists to + * duplicate the behavior of libdevmapper */ + dmi.flags = 0x4; + dmi.data_start = offsetof(struct dm_ioctl, data); + dmi.data_size = sizeof(dmi); + return ioctl(fd, DM_DEV_ARM_POLL, &dmi); +} + +/* + * As of version 4.37.0 device-mapper stores the event number in the + * dm_names structure after the name, when DM_DEVICE_LIST is called + */ +static uint32_t dm_event_nr(struct dm_names *n) +{ + return *(uint32_t *)(((uintptr_t)(strchr(n->name, 0) + 1) + 7) & ~7); +} + +static int dm_get_events(void) +{ + struct dm_task *dmt; + struct dm_names *names; + struct dev_event *dev_evt; + int i; + + if (!(dmt = libmp_dm_task_create(DM_DEVICE_LIST))) + return -1; + + dm_task_no_open_count(dmt); + + if (!dm_task_run(dmt)) + goto fail; + + if (!(names = dm_task_get_names(dmt))) + goto fail; + + pthread_mutex_lock(&waiter->events_lock); + vector_foreach_slot(waiter->events, dev_evt, i) + dev_evt->action = EVENT_REMOVE; + while (names->dev) { + uint32_t event_nr; + + /* Don't delete device if dm_is_mpath() fails without + * checking the device type */ + if (dm_is_mpath(names->name) == 0) + goto next; + + event_nr = dm_event_nr(names); + vector_foreach_slot(waiter->events, dev_evt, i) { + if (!strcmp(dev_evt->name, names->name)) { + if (event_nr != dev_evt->evt_nr) { + dev_evt->evt_nr = event_nr; + dev_evt->action = EVENT_UPDATE; + } else + dev_evt->action = EVENT_NOTHING; + break; + } + } +next: + if (!names->next) + break; + names = (void *)names + names->next; + } + pthread_mutex_unlock(&waiter->events_lock); + dm_task_destroy(dmt); + return 0; + +fail: + dm_task_destroy(dmt); + return -1; +} + +/* You must call __setup_multipath() after calling this function, to + * deal with any events that came in before the device was added */ +int watch_dmevents(char *name) +{ + int event_nr; + struct dev_event *dev_evt, *old_dev_evt; + int i; + + /* We know that this is a multipath device, so only fail if + * device-mapper tells us that we're wrong */ + if (dm_is_mpath(name) == 0) { + condlog(0, "%s: not a multipath device. can't watch events", + name); + return -1; + } + + if ((event_nr = dm_geteventnr(name)) < 0) + return -1; + + dev_evt = (struct dev_event *)malloc(sizeof(struct dev_event)); + if (!dev_evt) { + condlog(0, "%s: can't allocate event waiter structure", name); + return -1; + } + + strlcpy(dev_evt->name, name, WWID_SIZE); + dev_evt->evt_nr = event_nr; + dev_evt->action = EVENT_NOTHING; + + pthread_mutex_lock(&waiter->events_lock); + vector_foreach_slot(waiter->events, old_dev_evt, i){ + if (!strcmp(dev_evt->name, old_dev_evt->name)) { + /* caller will be updating this device */ + old_dev_evt->evt_nr = event_nr; + old_dev_evt->action = EVENT_NOTHING; + pthread_mutex_unlock(&waiter->events_lock); + condlog(2, "%s: already waiting for events on device", + name); + free(dev_evt); + return 0; + } + } + if (!vector_alloc_slot(waiter->events)) { + pthread_mutex_unlock(&waiter->events_lock); + free(dev_evt); + return -1; + } + vector_set_slot(waiter->events, dev_evt); + pthread_mutex_unlock(&waiter->events_lock); + return 0; +} + +void unwatch_all_dmevents(void) +{ + struct dev_event *dev_evt; + int i; + + pthread_mutex_lock(&waiter->events_lock); + vector_foreach_slot(waiter->events, dev_evt, i) + free(dev_evt); + vector_reset(waiter->events); + pthread_mutex_unlock(&waiter->events_lock); +} + +static void unwatch_dmevents(char *name) +{ + struct dev_event *dev_evt; + int i; + + pthread_mutex_lock(&waiter->events_lock); + vector_foreach_slot(waiter->events, dev_evt, i) { + if (!strcmp(dev_evt->name, name)) { + vector_del_slot(waiter->events, i); + free(dev_evt); + break; + } + } + pthread_mutex_unlock(&waiter->events_lock); +} + +/* + * returns the reschedule delay + * negative means *stop* + */ + +/* poll, arm, update, return */ +static int dmevent_loop (void) +{ + int r, i = 0; + struct pollfd pfd; + struct dev_event *dev_evt; + + pfd.fd = waiter->fd; + pfd.events = POLLIN; + r = poll(&pfd, 1, -1); + if (r <= 0) { + condlog(0, "failed polling for dm events: %s", strerror(errno)); + /* sleep 1s and hope things get better */ + return 1; + } + + if (arm_dm_event_poll(waiter->fd) != 0) { + condlog(0, "Cannot re-arm event polling: %s", strerror(errno)); + /* sleep 1s and hope things get better */ + return 1; + } + + if (dm_get_events() != 0) { + condlog(0, "failed getting dm events: %s", strerror(errno)); + /* sleep 1s and hope things get better */ + return 1; + } + + /* + * upon event ... + */ + + while (1) { + int done = 1; + struct dev_event curr_dev; + + pthread_mutex_lock(&waiter->events_lock); + vector_foreach_slot(waiter->events, dev_evt, i) { + if (dev_evt->action != EVENT_NOTHING) { + curr_dev = *dev_evt; + if (dev_evt->action == EVENT_REMOVE) { + vector_del_slot(waiter->events, i); + free(dev_evt); + } else + dev_evt->action = EVENT_NOTHING; + done = 0; + break; + } + } + pthread_mutex_unlock(&waiter->events_lock); + if (done) + return 1; + + condlog(3, "%s: devmap event #%i", curr_dev.name, + curr_dev.evt_nr); + + /* + * event might be : + * + * 1) a table reload, which means our mpp structure is + * obsolete : refresh it through update_multipath() + * 2) a path failed by DM : mark as such through + * update_multipath() + * 3) map has gone away : stop the thread. + * 4) a path reinstate : nothing to do + * 5) a switch group : nothing to do + */ + pthread_cleanup_push(cleanup_lock, &waiter->vecs->lock); + lock(&waiter->vecs->lock); + pthread_testcancel(); + r = 0; + if (curr_dev.action == EVENT_REMOVE) + remove_map_by_alias(curr_dev.name, waiter->vecs, 1); + else + r = update_multipath(waiter->vecs, curr_dev.name, 1); + pthread_cleanup_pop(1); + + if (r) { + condlog(2, "%s: stopped watching dmevents", + curr_dev.name); + unwatch_dmevents(curr_dev.name); + } + } + condlog(0, "dmevent waiter thread unexpectedly quit"); + return -1; /* never reach there */ +} + +static void rcu_unregister(__attribute__((unused)) void *param) +{ + rcu_unregister_thread(); +} + +void *wait_dmevents (__attribute__((unused)) void *unused) +{ + int r; + + + if (!waiter) { + condlog(0, "dmevents waiter not intialized"); + return NULL; + } + + pthread_cleanup_push(rcu_unregister, NULL); + rcu_register_thread(); + mlockall(MCL_CURRENT | MCL_FUTURE); + + while (1) { + r = dmevent_loop(); + + if (r < 0) + break; + + sleep(r); + } + + pthread_cleanup_pop(1); + return NULL; +} diff --git a/multipathd/dmevents.h b/multipathd/dmevents.h new file mode 100644 index 0000000..012fbad --- /dev/null +++ b/multipathd/dmevents.h @@ -0,0 +1,13 @@ +#ifndef _DMEVENTS_H +#define _DMEVENTS_H + +#include "structs_vec.h" + +int dmevent_poll_supported(void); +int init_dmevent_waiter(struct vectors *vecs); +void cleanup_dmevent_waiter(void); +int watch_dmevents(char *name); +void unwatch_all_dmevents(void); +void *wait_dmevents (void *unused); + +#endif /* _DMEVENTS_H */ diff --git a/multipathd/main.c b/multipathd/main.c new file mode 100644 index 0000000..8baf9ab --- /dev/null +++ b/multipathd/main.c @@ -0,0 +1,3307 @@ +/* + * Copyright (c) 2004, 2005 Christophe Varoqui + * Copyright (c) 2005 Kiyoshi Ueda, NEC + * Copyright (c) 2005 Benjamin Marzinski, Redhat + * Copyright (c) 2005 Edward Goggin, EMC + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#ifdef USE_SYSTEMD +#include +#endif +#include +#include +#include + +/* + * libmultipath + */ +#include "time-util.h" + +/* + * libcheckers + */ +#include "checkers.h" + +/* + * libmultipath + */ +#include "parser.h" +#include "vector.h" +#include "memory.h" +#include "config.h" +#include "util.h" +#include "hwtable.h" +#include "defaults.h" +#include "structs.h" +#include "blacklist.h" +#include "structs_vec.h" +#include "dmparser.h" +#include "devmapper.h" +#include "sysfs.h" +#include "dict.h" +#include "discovery.h" +#include "debug.h" +#include "propsel.h" +#include "uevent.h" +#include "switchgroup.h" +#include "print.h" +#include "configure.h" +#include "prio.h" +#include "wwids.h" +#include "pgpolicies.h" +#include "uevent.h" +#include "log.h" +#include "uxsock.h" + +#include "mpath_cmd.h" +#include "mpath_persist.h" + +#include "prioritizers/alua_rtpg.h" + +#include "main.h" +#include "pidfile.h" +#include "uxlsnr.h" +#include "uxclnt.h" +#include "cli.h" +#include "cli_handlers.h" +#include "lock.h" +#include "waiter.h" +#include "dmevents.h" +#include "io_err_stat.h" +#include "wwids.h" +#include "foreign.h" +#include "../third-party/valgrind/drd.h" + +#define FILE_NAME_SIZE 256 +#define CMDSIZE 160 + +#define LOG_MSG(lvl, verb, pp) \ +do { \ + if (pp->mpp && checker_selected(&pp->checker) && \ + lvl <= verb) { \ + if (pp->offline) \ + condlog(lvl, "%s: %s - path offline", \ + pp->mpp->alias, pp->dev); \ + else { \ + const char *__m = \ + checker_message(&pp->checker); \ + \ + if (strlen(__m)) \ + condlog(lvl, "%s: %s - %s checker%s", \ + pp->mpp->alias, \ + pp->dev, \ + checker_name(&pp->checker), \ + __m); \ + } \ + } \ +} while(0) + +struct mpath_event_param +{ + char * devname; + struct multipath *mpp; +}; + +int logsink; +int uxsock_timeout; +int verbosity; +int bindings_read_only; +int ignore_new_devs; +#ifdef NO_DMEVENTS_POLL +int poll_dmevents = 0; +#else +int poll_dmevents = 1; +#endif +/* Don't access this variable without holding config_lock */ +enum daemon_status running_state = DAEMON_INIT; +pid_t daemon_pid; +pthread_mutex_t config_lock = PTHREAD_MUTEX_INITIALIZER; +pthread_cond_t config_cond; + +static inline enum daemon_status get_running_state(void) +{ + enum daemon_status st; + + pthread_mutex_lock(&config_lock); + st = running_state; + pthread_mutex_unlock(&config_lock); + return st; +} + +/* + * global copy of vecs for use in sig handlers + */ +struct vectors * gvecs; + +struct udev * udev; + +struct config *multipath_conf; + +/* Local variables */ +static volatile sig_atomic_t exit_sig; +static volatile sig_atomic_t reconfig_sig; +static volatile sig_atomic_t log_reset_sig; + +const char * +daemon_status(void) +{ + switch (get_running_state()) { + case DAEMON_INIT: + return "init"; + case DAEMON_START: + return "startup"; + case DAEMON_CONFIGURE: + return "configure"; + case DAEMON_IDLE: + return "idle"; + case DAEMON_RUNNING: + return "running"; + case DAEMON_SHUTDOWN: + return "shutdown"; + } + return NULL; +} + +/* + * I love you too, systemd ... + */ +static const char * +sd_notify_status(enum daemon_status state) +{ + switch (state) { + case DAEMON_INIT: + return "STATUS=init"; + case DAEMON_START: + return "STATUS=startup"; + case DAEMON_CONFIGURE: + return "STATUS=configure"; + case DAEMON_IDLE: + case DAEMON_RUNNING: + return "STATUS=up"; + case DAEMON_SHUTDOWN: + return "STATUS=shutdown"; + } + return NULL; +} + +#ifdef USE_SYSTEMD +static void do_sd_notify(enum daemon_status old_state, + enum daemon_status new_state) +{ + /* + * Checkerloop switches back and forth between idle and running state. + * No need to tell systemd each time. + * These notifications cause a lot of overhead on dbus. + */ + if ((new_state == DAEMON_IDLE || new_state == DAEMON_RUNNING) && + (old_state == DAEMON_IDLE || old_state == DAEMON_RUNNING)) + return; + sd_notify(0, sd_notify_status(new_state)); +} +#endif + +static void config_cleanup(__attribute__((unused)) void *arg) +{ + pthread_mutex_unlock(&config_lock); +} + +/* + * If the current status is @oldstate, wait for at most @ms milliseconds + * for the state to change, and return the new state, which may still be + * @oldstate. + */ +enum daemon_status wait_for_state_change_if(enum daemon_status oldstate, + unsigned long ms) +{ + enum daemon_status st; + struct timespec tmo; + + if (oldstate == DAEMON_SHUTDOWN) + return DAEMON_SHUTDOWN; + + pthread_mutex_lock(&config_lock); + pthread_cleanup_push(config_cleanup, NULL); + st = running_state; + if (st == oldstate && clock_gettime(CLOCK_MONOTONIC, &tmo) == 0) { + tmo.tv_nsec += ms * 1000 * 1000; + normalize_timespec(&tmo); + (void)pthread_cond_timedwait(&config_cond, &config_lock, &tmo); + st = running_state; + } + pthread_cleanup_pop(1); + return st; +} + +/* must be called with config_lock held */ +static void __post_config_state(enum daemon_status state) +{ + if (state != running_state && running_state != DAEMON_SHUTDOWN) { + enum daemon_status old_state = running_state; + + running_state = state; + pthread_cond_broadcast(&config_cond); +#ifdef USE_SYSTEMD + do_sd_notify(old_state, state); +#endif + } +} + +void post_config_state(enum daemon_status state) +{ + pthread_mutex_lock(&config_lock); + pthread_cleanup_push(config_cleanup, NULL); + __post_config_state(state); + pthread_cleanup_pop(1); +} + +int set_config_state(enum daemon_status state) +{ + int rc = 0; + + pthread_cleanup_push(config_cleanup, NULL); + pthread_mutex_lock(&config_lock); + if (running_state != state) { + enum daemon_status old_state = running_state; + + if (running_state == DAEMON_SHUTDOWN) + rc = EINVAL; + else if (running_state != DAEMON_IDLE) { + struct timespec ts; + + get_monotonic_time(&ts); + ts.tv_sec += 1; + rc = pthread_cond_timedwait(&config_cond, + &config_lock, &ts); + } + if (!rc && (running_state != DAEMON_SHUTDOWN)) { + running_state = state; + pthread_cond_broadcast(&config_cond); +#ifdef USE_SYSTEMD + do_sd_notify(old_state, state); +#endif + } + } + pthread_cleanup_pop(1); + return rc; +} + +struct config *get_multipath_config(void) +{ + rcu_read_lock(); + return rcu_dereference(multipath_conf); +} + +void put_multipath_config(__attribute__((unused)) void *arg) +{ + rcu_read_unlock(); +} + +static int +need_switch_pathgroup (struct multipath * mpp, int refresh) +{ + struct pathgroup * pgp; + struct path * pp; + unsigned int i, j; + struct config *conf; + int bestpg; + + if (!mpp) + return 0; + + /* + * Refresh path priority values + */ + if (refresh) { + vector_foreach_slot (mpp->pg, pgp, i) { + vector_foreach_slot (pgp->paths, pp, j) { + conf = get_multipath_config(); + pthread_cleanup_push(put_multipath_config, + conf); + pathinfo(pp, conf, DI_PRIO); + pthread_cleanup_pop(1); + } + } + } + + if (!mpp->pg || VECTOR_SIZE(mpp->paths) == 0) + return 0; + + bestpg = select_path_group(mpp); + if (mpp->pgfailback == -FAILBACK_MANUAL) + return 0; + + mpp->bestpg = bestpg; + if (mpp->bestpg != mpp->nextpg) + return 1; + + return 0; +} + +static void +switch_pathgroup (struct multipath * mpp) +{ + mpp->stat_switchgroup++; + dm_switchgroup(mpp->alias, mpp->bestpg); + condlog(2, "%s: switch to path group #%i", + mpp->alias, mpp->bestpg); +} + +static int +wait_for_events(struct multipath *mpp, struct vectors *vecs) +{ + if (poll_dmevents) + return watch_dmevents(mpp->alias); + else + return start_waiter_thread(mpp, vecs); +} + +static void +remove_map_and_stop_waiter(struct multipath *mpp, struct vectors *vecs) +{ + /* devices are automatically removed by the dmevent polling code, + * so they don't need to be manually removed here */ + condlog(3, "%s: removing map from internal tables", mpp->alias); + if (!poll_dmevents) + stop_waiter_thread(mpp); + remove_map(mpp, vecs, PURGE_VEC); +} + +static void +remove_maps_and_stop_waiters(struct vectors *vecs) +{ + int i; + struct multipath * mpp; + + if (!vecs) + return; + + if (!poll_dmevents) { + vector_foreach_slot(vecs->mpvec, mpp, i) + stop_waiter_thread(mpp); + } + else + unwatch_all_dmevents(); + + remove_maps(vecs); +} + +static void +set_multipath_wwid (struct multipath * mpp) +{ + if (strlen(mpp->wwid)) + return; + + dm_get_uuid(mpp->alias, mpp->wwid, WWID_SIZE); +} + +int __setup_multipath(struct vectors *vecs, struct multipath *mpp, + int reset) +{ + if (dm_get_info(mpp->alias, &mpp->dmi)) { + /* Error accessing table */ + condlog(3, "%s: cannot access table", mpp->alias); + goto out; + } + + if (update_multipath_strings(mpp, vecs->pathvec, 1)) { + condlog(0, "%s: failed to setup multipath", mpp->alias); + goto out; + } + + if (reset) { + set_no_path_retry(mpp); + if (VECTOR_SIZE(mpp->paths) != 0) + dm_cancel_deferred_remove(mpp); + } + + return 0; +out: + remove_map_and_stop_waiter(mpp, vecs); + return 1; +} + +int update_multipath (struct vectors *vecs, char *mapname, int reset) +{ + struct multipath *mpp; + struct pathgroup *pgp; + struct path *pp; + int i, j; + + mpp = find_mp_by_alias(vecs->mpvec, mapname); + + if (!mpp) { + condlog(3, "%s: multipath map not found", mapname); + return 2; + } + + if (__setup_multipath(vecs, mpp, reset)) + return 1; /* mpp freed in setup_multipath */ + + /* + * compare checkers states with DM states + */ + vector_foreach_slot (mpp->pg, pgp, i) { + vector_foreach_slot (pgp->paths, pp, j) { + if (pp->dmstate != PSTATE_FAILED) + continue; + + if (pp->state != PATH_DOWN) { + struct config *conf; + int oldstate = pp->state; + unsigned int checkint; + + conf = get_multipath_config(); + checkint = conf->checkint; + put_multipath_config(conf); + condlog(2, "%s: mark as failed", pp->dev); + mpp->stat_path_failures++; + pp->state = PATH_DOWN; + if (oldstate == PATH_UP || + oldstate == PATH_GHOST) + update_queue_mode_del_path(mpp); + + /* + * if opportune, + * schedule the next check earlier + */ + if (pp->tick > checkint) + pp->tick = checkint; + } + } + } + return 0; +} + +static int +update_map (struct multipath *mpp, struct vectors *vecs, int new_map) +{ + int retries = 3; + char params[PARAMS_SIZE] = {0}; + +retry: + condlog(4, "%s: updating new map", mpp->alias); + if (adopt_paths(vecs->pathvec, mpp)) { + condlog(0, "%s: failed to adopt paths for new map update", + mpp->alias); + retries = -1; + goto fail; + } + verify_paths(mpp, vecs); + mpp->action = ACT_RELOAD; + + if (setup_map(mpp, params, PARAMS_SIZE, vecs)) { + condlog(0, "%s: failed to setup new map in update", mpp->alias); + retries = -1; + goto fail; + } + if (domap(mpp, params, 1) == DOMAP_FAIL && retries-- > 0) { + condlog(0, "%s: map_udate sleep", mpp->alias); + sleep(1); + goto retry; + } + dm_lib_release(); + +fail: + if (new_map && (retries < 0 || wait_for_events(mpp, vecs))) { + condlog(0, "%s: failed to create new map", mpp->alias); + remove_map(mpp, vecs, 1); + return 1; + } + + if (setup_multipath(vecs, mpp)) + return 1; + + sync_map_state(mpp); + + if (retries < 0) + condlog(0, "%s: failed reload in new map update", mpp->alias); + return 0; +} + +static struct multipath * +add_map_without_path (struct vectors *vecs, const char *alias) +{ + struct multipath * mpp = alloc_multipath(); + struct config *conf; + + if (!mpp) + return NULL; + if (!alias) { + FREE(mpp); + return NULL; + } + + mpp->alias = STRDUP(alias); + + if (dm_get_info(mpp->alias, &mpp->dmi)) { + condlog(3, "%s: cannot access table", mpp->alias); + goto out; + } + set_multipath_wwid(mpp); + conf = get_multipath_config(); + mpp->mpe = find_mpe(conf->mptable, mpp->wwid); + put_multipath_config(conf); + + if (update_multipath_table(mpp, vecs->pathvec, 1)) + goto out; + if (update_multipath_status(mpp)) + goto out; + + if (!vector_alloc_slot(vecs->mpvec)) + goto out; + + vector_set_slot(vecs->mpvec, mpp); + + if (update_map(mpp, vecs, 1) != 0) /* map removed */ + return NULL; + + return mpp; +out: + remove_map(mpp, vecs, PURGE_VEC); + return NULL; +} + +static int +coalesce_maps(struct vectors *vecs, vector nmpv) +{ + struct multipath * ompp; + vector ompv = vecs->mpvec; + unsigned int i, reassign_maps; + struct config *conf; + + conf = get_multipath_config(); + reassign_maps = conf->reassign_maps; + put_multipath_config(conf); + vector_foreach_slot (ompv, ompp, i) { + condlog(3, "%s: coalesce map", ompp->alias); + if (!find_mp_by_wwid(nmpv, ompp->wwid)) { + /* + * remove all current maps not allowed by the + * current configuration + */ + if (dm_flush_map(ompp->alias)) { + condlog(0, "%s: unable to flush devmap", + ompp->alias); + /* + * may be just because the device is open + */ + if (setup_multipath(vecs, ompp) != 0) { + i--; + continue; + } + if (!vector_alloc_slot(nmpv)) + return 1; + + vector_set_slot(nmpv, ompp); + + vector_del_slot(ompv, i); + i--; + } + else { + dm_lib_release(); + condlog(2, "%s devmap removed", ompp->alias); + } + } else if (reassign_maps) { + condlog(3, "%s: Reassign existing device-mapper" + " devices", ompp->alias); + dm_reassign(ompp->alias); + } + } + return 0; +} + +static void +sync_maps_state(vector mpvec) +{ + unsigned int i; + struct multipath *mpp; + + vector_foreach_slot (mpvec, mpp, i) + sync_map_state(mpp); +} + +static int +flush_map(struct multipath * mpp, struct vectors * vecs, int nopaths) +{ + int r; + + if (nopaths) + r = dm_flush_map_nopaths(mpp->alias, mpp->deferred_remove); + else + r = dm_flush_map(mpp->alias); + /* + * clear references to this map before flushing so we can ignore + * the spurious uevent we may generate with the dm_flush_map call below + */ + if (r) { + /* + * May not really be an error -- if the map was already flushed + * from the device mapper by dmsetup(8) for instance. + */ + if (r == 1) + condlog(0, "%s: can't flush", mpp->alias); + else { + condlog(2, "%s: devmap deferred remove", mpp->alias); + mpp->deferred_remove = DEFERRED_REMOVE_IN_PROGRESS; + } + return r; + } + else { + dm_lib_release(); + condlog(2, "%s: map flushed", mpp->alias); + } + + orphan_paths(vecs->pathvec, mpp, "map flushed"); + remove_map_and_stop_waiter(mpp, vecs); + + return 0; +} + +static int +uev_add_map (struct uevent * uev, struct vectors * vecs) +{ + char *alias; + int major = -1, minor = -1, rc; + + condlog(3, "%s: add map (uevent)", uev->kernel); + alias = uevent_get_dm_name(uev); + if (!alias) { + condlog(3, "%s: No DM_NAME in uevent", uev->kernel); + major = uevent_get_major(uev); + minor = uevent_get_minor(uev); + alias = dm_mapname(major, minor); + if (!alias) { + condlog(2, "%s: mapname not found for %d:%d", + uev->kernel, major, minor); + return 1; + } + } + pthread_cleanup_push(cleanup_lock, &vecs->lock); + lock(&vecs->lock); + pthread_testcancel(); + rc = ev_add_map(uev->kernel, alias, vecs); + lock_cleanup_pop(vecs->lock); + FREE(alias); + return rc; +} + +/* + * ev_add_map expects that the multipath device already exists in kernel + * before it is called. It just adds a device to multipathd or updates an + * existing device. + */ +int +ev_add_map (char * dev, const char * alias, struct vectors * vecs) +{ + struct multipath * mpp; + int delayed_reconfig, reassign_maps; + struct config *conf; + + if (dm_is_mpath(alias) != 1) { + condlog(4, "%s: not a multipath map", alias); + return 0; + } + + mpp = find_mp_by_alias(vecs->mpvec, alias); + + if (mpp) { + if (mpp->wait_for_udev > 1) { + condlog(2, "%s: performing delayed actions", + mpp->alias); + if (update_map(mpp, vecs, 0)) + /* setup multipathd removed the map */ + return 1; + } + conf = get_multipath_config(); + delayed_reconfig = conf->delayed_reconfig; + reassign_maps = conf->reassign_maps; + put_multipath_config(conf); + if (mpp->wait_for_udev) { + mpp->wait_for_udev = 0; + if (delayed_reconfig && + !need_to_delay_reconfig(vecs)) { + condlog(2, "reconfigure (delayed)"); + set_config_state(DAEMON_CONFIGURE); + return 0; + } + } + /* + * Not really an error -- we generate our own uevent + * if we create a multipath mapped device as a result + * of uev_add_path + */ + if (reassign_maps) { + condlog(3, "%s: Reassign existing device-mapper devices", + alias); + dm_reassign(alias); + } + return 0; + } + condlog(2, "%s: adding map", alias); + + /* + * now we can register the map + */ + if ((mpp = add_map_without_path(vecs, alias))) { + sync_map_state(mpp); + condlog(2, "%s: devmap %s registered", alias, dev); + return 0; + } else { + condlog(2, "%s: ev_add_map failed", dev); + return 1; + } +} + +static int +uev_remove_map (struct uevent * uev, struct vectors * vecs) +{ + char *alias; + int minor; + struct multipath *mpp; + + condlog(3, "%s: remove map (uevent)", uev->kernel); + alias = uevent_get_dm_name(uev); + if (!alias) { + condlog(3, "%s: No DM_NAME in uevent, ignoring", uev->kernel); + return 0; + } + minor = uevent_get_minor(uev); + + pthread_cleanup_push(cleanup_lock, &vecs->lock); + lock(&vecs->lock); + pthread_testcancel(); + mpp = find_mp_by_minor(vecs->mpvec, minor); + + if (!mpp) { + condlog(2, "%s: devmap not registered, can't remove", + uev->kernel); + goto out; + } + if (strcmp(mpp->alias, alias)) { + condlog(2, "%s: map alias mismatch: have \"%s\", got \"%s\")", + uev->kernel, mpp->alias, alias); + goto out; + } + + remove_map_and_stop_waiter(mpp, vecs); +out: + lock_cleanup_pop(vecs->lock); + FREE(alias); + return 0; +} + +/* Called from CLI handler */ +int +ev_remove_map (char * devname, char * alias, int minor, struct vectors * vecs) +{ + struct multipath * mpp; + + mpp = find_mp_by_minor(vecs->mpvec, minor); + + if (!mpp) { + condlog(2, "%s: devmap not registered, can't remove", + devname); + return 1; + } + if (strcmp(mpp->alias, alias)) { + condlog(2, "%s: minor number mismatch (map %d, event %d)", + mpp->alias, mpp->dmi->minor, minor); + return 1; + } + return flush_map(mpp, vecs, 0); +} + +static int +uev_add_path (struct uevent *uev, struct vectors * vecs, int need_do_map) +{ + struct path *pp; + int ret = 0, i; + struct config *conf; + + condlog(3, "%s: add path (uevent)", uev->kernel); + if (strstr(uev->kernel, "..") != NULL) { + /* + * Don't allow relative device names in the pathvec + */ + condlog(0, "%s: path name is invalid", uev->kernel); + return 1; + } + + pthread_cleanup_push(cleanup_lock, &vecs->lock); + lock(&vecs->lock); + pthread_testcancel(); + pp = find_path_by_dev(vecs->pathvec, uev->kernel); + if (pp) { + int r; + + condlog(3, "%s: spurious uevent, path already in pathvec", + uev->kernel); + if (!pp->mpp && !strlen(pp->wwid)) { + condlog(3, "%s: reinitialize path", uev->kernel); + udev_device_unref(pp->udev); + pp->udev = udev_device_ref(uev->udev); + conf = get_multipath_config(); + pthread_cleanup_push(put_multipath_config, conf); + r = pathinfo(pp, conf, + DI_ALL | DI_BLACKLIST); + pthread_cleanup_pop(1); + if (r == PATHINFO_OK) + ret = ev_add_path(pp, vecs, need_do_map); + else if (r == PATHINFO_SKIPPED) { + condlog(3, "%s: remove blacklisted path", + uev->kernel); + i = find_slot(vecs->pathvec, (void *)pp); + if (i != -1) + vector_del_slot(vecs->pathvec, i); + free_path(pp); + } else { + condlog(0, "%s: failed to reinitialize path", + uev->kernel); + ret = 1; + } + } + } + if (pp) + goto out; + + /* + * get path vital state + */ + conf = get_multipath_config(); + pthread_cleanup_push(put_multipath_config, conf); + ret = alloc_path_with_pathinfo(conf, uev->udev, + uev->wwid, DI_ALL, &pp); + pthread_cleanup_pop(1); + if (!pp) { + if (ret == PATHINFO_SKIPPED) + ret = 0; + else { + condlog(3, "%s: failed to get path info", uev->kernel); + ret = 1; + } + goto out; + } + ret = store_path(vecs->pathvec, pp); + if (!ret) { + conf = get_multipath_config(); + pp->checkint = conf->checkint; + put_multipath_config(conf); + ret = ev_add_path(pp, vecs, need_do_map); + } else { + condlog(0, "%s: failed to store path info, " + "dropping event", + uev->kernel); + free_path(pp); + ret = 1; + } +out: + lock_cleanup_pop(vecs->lock); + return ret; +} + +/* + * returns: + * 0: added + * 1: error + */ +int +ev_add_path (struct path * pp, struct vectors * vecs, int need_do_map) +{ + struct multipath * mpp; + char params[PARAMS_SIZE] = {0}; + int retries = 3; + int start_waiter = 0; + int ret; + + /* + * need path UID to go any further + */ + if (strlen(pp->wwid) == 0) { + condlog(0, "%s: failed to get path uid", pp->dev); + goto fail; /* leave path added to pathvec */ + } + mpp = find_mp_by_wwid(vecs->mpvec, pp->wwid); + if (mpp && pp->size && mpp->size != pp->size) { + condlog(0, "%s: failed to add new path %s, device size mismatch", mpp->alias, pp->dev); + int i = find_slot(vecs->pathvec, (void *)pp); + if (i != -1) + vector_del_slot(vecs->pathvec, i); + free_path(pp); + return 1; + } + if (mpp && mpp->wait_for_udev && + (pathcount(mpp, PATH_UP) > 0 || + (pathcount(mpp, PATH_GHOST) > 0 && + path_get_tpgs(pp) != TPGS_IMPLICIT && + mpp->ghost_delay_tick <= 0))) { + /* if wait_for_udev is set and valid paths exist */ + condlog(3, "%s: delaying path addition until %s is fully initialized", + pp->dev, mpp->alias); + mpp->wait_for_udev = 2; + orphan_path(pp, "waiting for create to complete"); + return 0; + } + + pp->mpp = mpp; +rescan: + if (mpp) { + condlog(4,"%s: adopting all paths for path %s", + mpp->alias, pp->dev); + if (adopt_paths(vecs->pathvec, mpp)) + goto fail; /* leave path added to pathvec */ + + verify_paths(mpp, vecs); + mpp->action = ACT_RELOAD; + } else { + if (!should_multipath(pp, vecs->pathvec, vecs->mpvec)) { + orphan_path(pp, "only one path"); + return 0; + } + condlog(4,"%s: creating new map", pp->dev); + if ((mpp = add_map_with_path(vecs, pp, 1))) { + mpp->action = ACT_CREATE; + /* + * We don't depend on ACT_CREATE, as domap will + * set it to ACT_NOTHING when complete. + */ + start_waiter = 1; + } + if (!start_waiter) + goto fail; /* leave path added to pathvec */ + } + + /* persistent reservation check*/ + mpath_pr_event_handle(pp); + + if (!need_do_map) + return 0; + + if (!dm_map_present(mpp->alias)) { + mpp->action = ACT_CREATE; + start_waiter = 1; + } + /* + * push the map to the device-mapper + */ + if (setup_map(mpp, params, PARAMS_SIZE, vecs)) { + condlog(0, "%s: failed to setup map for addition of new " + "path %s", mpp->alias, pp->dev); + goto fail_map; + } + /* + * reload the map for the multipath mapped device + */ + ret = domap(mpp, params, 1); + while (ret == DOMAP_RETRY && retries-- > 0) { + condlog(0, "%s: retry domap for addition of new " + "path %s", mpp->alias, pp->dev); + sleep(1); + ret = domap(mpp, params, 1); + } + if (ret == DOMAP_FAIL || ret == DOMAP_RETRY) { + condlog(0, "%s: failed in domap for addition of new " + "path %s", mpp->alias, pp->dev); + /* + * deal with asynchronous uevents :(( + */ + if (mpp->action == ACT_RELOAD && retries-- > 0) { + condlog(0, "%s: ev_add_path sleep", mpp->alias); + sleep(1); + update_mpp_paths(mpp, vecs->pathvec); + goto rescan; + } + else if (mpp->action == ACT_RELOAD) + condlog(0, "%s: giving up reload", mpp->alias); + else + goto fail_map; + } + dm_lib_release(); + + if ((mpp->action == ACT_CREATE || + (mpp->action == ACT_NOTHING && start_waiter && !mpp->waiter)) && + wait_for_events(mpp, vecs)) + goto fail_map; + + /* + * update our state from kernel regardless of create or reload + */ + if (setup_multipath(vecs, mpp)) + goto fail; /* if setup_multipath fails, it removes the map */ + + sync_map_state(mpp); + + if (retries >= 0) { + condlog(2, "%s [%s]: path added to devmap %s", + pp->dev, pp->dev_t, mpp->alias); + return 0; + } else + goto fail; + +fail_map: + remove_map(mpp, vecs, 1); +fail: + orphan_path(pp, "failed to add path"); + return 1; +} + +static int +uev_remove_path (struct uevent *uev, struct vectors * vecs, int need_do_map) +{ + struct path *pp; + int ret; + + condlog(3, "%s: remove path (uevent)", uev->kernel); + delete_foreign(uev->udev); + + pthread_cleanup_push(cleanup_lock, &vecs->lock); + lock(&vecs->lock); + pthread_testcancel(); + pp = find_path_by_dev(vecs->pathvec, uev->kernel); + if (pp) + ret = ev_remove_path(pp, vecs, need_do_map); + lock_cleanup_pop(vecs->lock); + if (!pp) { + /* Not an error; path might have been purged earlier */ + condlog(0, "%s: path already removed", uev->kernel); + return 0; + } + return ret; +} + +int +ev_remove_path (struct path *pp, struct vectors * vecs, int need_do_map) +{ + struct multipath * mpp; + int i, retval = 0; + char params[PARAMS_SIZE] = {0}; + + /* + * avoid referring to the map of an orphaned path + */ + if ((mpp = pp->mpp)) { + /* + * transform the mp->pg vector of vectors of paths + * into a mp->params string to feed the device-mapper + */ + if (update_mpp_paths(mpp, vecs->pathvec)) { + condlog(0, "%s: failed to update paths", + mpp->alias); + goto fail; + } + + /* + * Make sure mpp->hwe doesn't point to freed memory + * We call extract_hwe_from_path() below to restore mpp->hwe + */ + if (mpp->hwe == pp->hwe) + mpp->hwe = NULL; + + if ((i = find_slot(mpp->paths, (void *)pp)) != -1) + vector_del_slot(mpp->paths, i); + + /* + * remove the map IF removing the last path + */ + if (VECTOR_SIZE(mpp->paths) == 0) { + char alias[WWID_SIZE]; + + /* + * flush_map will fail if the device is open + */ + strlcpy(alias, mpp->alias, WWID_SIZE); + if (mpp->flush_on_last_del == FLUSH_ENABLED) { + condlog(2, "%s Last path deleted, disabling queueing", mpp->alias); + mpp->retry_tick = 0; + mpp->no_path_retry = NO_PATH_RETRY_FAIL; + mpp->disable_queueing = 1; + mpp->stat_map_failures++; + dm_queue_if_no_path(mpp->alias, 0); + } + if (!flush_map(mpp, vecs, 1)) { + condlog(2, "%s: removed map after" + " removing all paths", + alias); + retval = 0; + goto out; + } + /* + * Not an error, continue + */ + } + + if (mpp->hwe == NULL) + extract_hwe_from_path(mpp); + + if (setup_map(mpp, params, PARAMS_SIZE, vecs)) { + condlog(0, "%s: failed to setup map for" + " removal of path %s", mpp->alias, pp->dev); + goto fail; + } + + if (mpp->wait_for_udev) { + mpp->wait_for_udev = 2; + goto out; + } + + if (!need_do_map) + goto out; + /* + * reload the map + */ + mpp->action = ACT_RELOAD; + if (domap(mpp, params, 1) == DOMAP_FAIL) { + condlog(0, "%s: failed in domap for " + "removal of path %s", + mpp->alias, pp->dev); + retval = 1; + } else { + /* + * update our state from kernel + */ + if (setup_multipath(vecs, mpp)) + return 1; + sync_map_state(mpp); + + condlog(2, "%s [%s]: path removed from map %s", + pp->dev, pp->dev_t, mpp->alias); + } + } + +out: + if ((i = find_slot(vecs->pathvec, (void *)pp)) != -1) + vector_del_slot(vecs->pathvec, i); + + free_path(pp); + + return retval; + +fail: + remove_map_and_stop_waiter(mpp, vecs); + return 1; +} + +static int +uev_update_path (struct uevent *uev, struct vectors * vecs) +{ + int ro, retval = 0, rc; + struct path * pp; + struct config *conf; + int needs_reinit = 0; + + switch ((rc = change_foreign(uev->udev))) { + case FOREIGN_OK: + /* known foreign path, ignore event */ + return 0; + case FOREIGN_IGNORED: + break; + case FOREIGN_ERR: + condlog(3, "%s: error in change_foreign", __func__); + break; + default: + condlog(1, "%s: return code %d of change_forein is unsupported", + __func__, rc); + break; + } + + pthread_cleanup_push(cleanup_lock, &vecs->lock); + lock(&vecs->lock); + pthread_testcancel(); + + pp = find_path_by_dev(vecs->pathvec, uev->kernel); + if (pp) { + struct multipath *mpp = pp->mpp; + char wwid[WWID_SIZE]; + + if (pp->initialized == INIT_REQUESTED_UDEV) { + needs_reinit = 1; + goto out; + } + /* Don't deal with other types of failed initialization + * now. check_path will handle it */ + if (!strlen(pp->wwid)) + goto out; + + strcpy(wwid, pp->wwid); + rc = get_uid(pp, pp->state, uev->udev, 0); + + if (rc != 0) + strcpy(pp->wwid, wwid); + else if (strncmp(wwid, pp->wwid, WWID_SIZE) != 0) { + condlog(0, "%s: path wwid changed from '%s' to '%s'", + uev->kernel, wwid, pp->wwid); + ev_remove_path(pp, vecs, 1); + needs_reinit = 1; + goto out; + } else { + udev_device_unref(pp->udev); + pp->udev = udev_device_ref(uev->udev); + conf = get_multipath_config(); + pthread_cleanup_push(put_multipath_config, conf); + if (pathinfo(pp, conf, DI_SYSFS|DI_NOIO) != PATHINFO_OK) + condlog(1, "%s: pathinfo failed after change uevent", + uev->kernel); + pthread_cleanup_pop(1); + } + + ro = uevent_get_disk_ro(uev); + if (mpp && ro >= 0) { + condlog(2, "%s: update path write_protect to '%d' (uevent)", uev->kernel, ro); + + if (mpp->wait_for_udev) + mpp->wait_for_udev = 2; + else { + if (ro == 1) + pp->mpp->force_readonly = 1; + retval = update_path_groups(mpp, vecs, 0); + if (retval == 2) + condlog(2, "%s: map removed during reload", pp->dev); + else { + pp->mpp->force_readonly = 0; + condlog(2, "%s: map %s reloaded (retval %d)", uev->kernel, mpp->alias, retval); + } + } + } + } +out: + lock_cleanup_pop(vecs->lock); + if (!pp) { + /* If the path is blacklisted, print a debug/non-default verbosity message. */ + if (uev->udev) { + int flag = DI_SYSFS | DI_WWID; + + conf = get_multipath_config(); + pthread_cleanup_push(put_multipath_config, conf); + retval = alloc_path_with_pathinfo(conf, uev->udev, uev->wwid, flag, NULL); + pthread_cleanup_pop(1); + + if (retval == PATHINFO_SKIPPED) { + condlog(3, "%s: spurious uevent, path is blacklisted", uev->kernel); + return 0; + } + } + + condlog(0, "%s: spurious uevent, path not found", uev->kernel); + } + if (needs_reinit) + retval = uev_add_path(uev, vecs, 1); + return retval; +} + +static int +uev_pathfail_check(struct uevent *uev, struct vectors *vecs) +{ + char *action = NULL, *devt = NULL; + struct path *pp; + int r = 1; + + action = uevent_get_dm_action(uev); + if (!action) + return 1; + if (strncmp(action, "PATH_FAILED", 11)) + goto out; + devt = uevent_get_dm_path(uev); + if (!devt) { + condlog(3, "%s: No DM_PATH in uevent", uev->kernel); + goto out; + } + + pthread_cleanup_push(cleanup_lock, &vecs->lock); + lock(&vecs->lock); + pthread_testcancel(); + pp = find_path_by_devt(vecs->pathvec, devt); + if (!pp) + goto out_lock; + r = io_err_stat_handle_pathfail(pp); + if (r) + condlog(3, "io_err_stat: %s: cannot handle pathfail uevent", + pp->dev); +out_lock: + lock_cleanup_pop(vecs->lock); + FREE(devt); + FREE(action); + return r; +out: + FREE(action); + return 1; +} + +static int +map_discovery (struct vectors * vecs) +{ + struct multipath * mpp; + unsigned int i; + + if (dm_get_maps(vecs->mpvec)) + return 1; + + vector_foreach_slot (vecs->mpvec, mpp, i) + if (update_multipath_table(mpp, vecs->pathvec, 1) || + update_multipath_status(mpp)) { + remove_map(mpp, vecs, 1); + i--; + } + + return 0; +} + +int +uxsock_trigger (char * str, char ** reply, int * len, bool is_root, + void * trigger_data) +{ + struct vectors * vecs; + int r; + + *reply = NULL; + *len = 0; + vecs = (struct vectors *)trigger_data; + + if ((str != NULL) && (is_root == false) && + (strncmp(str, "list", strlen("list")) != 0) && + (strncmp(str, "show", strlen("show")) != 0)) { + *reply = STRDUP("permission deny: need to be root"); + if (*reply) + *len = strlen(*reply) + 1; + return 1; + } + + r = parse_cmd(str, reply, len, vecs, uxsock_timeout / 1000); + + if (r > 0) { + if (r == ETIMEDOUT) + *reply = STRDUP("timeout\n"); + else + *reply = STRDUP("fail\n"); + if (*reply) + *len = strlen(*reply) + 1; + r = 1; + } + else if (!r && *len == 0) { + *reply = STRDUP("ok\n"); + if (*reply) + *len = strlen(*reply) + 1; + r = 0; + } + /* else if (r < 0) leave *reply alone */ + + return r; +} + +int +uev_trigger (struct uevent * uev, void * trigger_data) +{ + int r = 0; + struct vectors * vecs; + struct uevent *merge_uev, *tmp; + enum daemon_status state; + + vecs = (struct vectors *)trigger_data; + + pthread_cleanup_push(config_cleanup, NULL); + pthread_mutex_lock(&config_lock); + while (running_state != DAEMON_IDLE && + running_state != DAEMON_RUNNING && + running_state != DAEMON_SHUTDOWN) + pthread_cond_wait(&config_cond, &config_lock); + state = running_state; + pthread_cleanup_pop(1); + + if (state == DAEMON_SHUTDOWN) + return 0; + + /* + * device map event + * Add events are ignored here as the tables + * are not fully initialised then. + */ + if (!strncmp(uev->kernel, "dm-", 3)) { + if (!uevent_is_mpath(uev)) { + if (!strncmp(uev->action, "change", 6)) + (void)add_foreign(uev->udev); + else if (!strncmp(uev->action, "remove", 6)) + (void)delete_foreign(uev->udev); + goto out; + } + if (!strncmp(uev->action, "change", 6)) { + r = uev_add_map(uev, vecs); + + /* + * the kernel-side dm-mpath issues a PATH_FAILED event + * when it encounters a path IO error. It is reason- + * able be the entry of path IO error accounting pro- + * cess. + */ + uev_pathfail_check(uev, vecs); + } else if (!strncmp(uev->action, "remove", 6)) { + r = uev_remove_map(uev, vecs); + } + goto out; + } + + /* + * path add/remove/change event, add/remove maybe merged + */ + list_for_each_entry_safe(merge_uev, tmp, &uev->merge_node, node) { + if (!strncmp(merge_uev->action, "add", 3)) + r += uev_add_path(merge_uev, vecs, 0); + if (!strncmp(merge_uev->action, "remove", 6)) + r += uev_remove_path(merge_uev, vecs, 0); + } + + if (!strncmp(uev->action, "add", 3)) + r += uev_add_path(uev, vecs, 1); + if (!strncmp(uev->action, "remove", 6)) + r += uev_remove_path(uev, vecs, 1); + if (!strncmp(uev->action, "change", 6)) + r += uev_update_path(uev, vecs); + +out: + return r; +} + +static void rcu_unregister(__attribute__((unused)) void *param) +{ + rcu_unregister_thread(); +} + +static void * +ueventloop (void * ap) +{ + struct udev *udev = ap; + + pthread_cleanup_push(rcu_unregister, NULL); + rcu_register_thread(); + if (uevent_listen(udev)) + condlog(0, "error starting uevent listener"); + pthread_cleanup_pop(1); + return NULL; +} + +static void * +uevqloop (void * ap) +{ + pthread_cleanup_push(rcu_unregister, NULL); + rcu_register_thread(); + if (uevent_dispatch(&uev_trigger, ap)) + condlog(0, "error starting uevent dispatcher"); + pthread_cleanup_pop(1); + return NULL; +} +static void * +uxlsnrloop (void * ap) +{ + long ux_sock; + + pthread_cleanup_push(rcu_unregister, NULL); + rcu_register_thread(); + + ux_sock = ux_socket_listen(DEFAULT_SOCKET); + if (ux_sock == -1) { + condlog(1, "could not create uxsock: %d", errno); + exit_daemon(); + goto out; + } + pthread_cleanup_push(uxsock_cleanup, (void *)ux_sock); + + if (cli_init()) { + condlog(1, "Failed to init uxsock listener"); + exit_daemon(); + goto out_sock; + } + + /* Tell main thread that thread has started */ + post_config_state(DAEMON_CONFIGURE); + + set_handler_callback(LIST+PATHS, cli_list_paths); + set_handler_callback(LIST+PATHS+FMT, cli_list_paths_fmt); + set_handler_callback(LIST+PATHS+RAW+FMT, cli_list_paths_raw); + set_handler_callback(LIST+PATH, cli_list_path); + set_handler_callback(LIST+MAPS, cli_list_maps); + set_handler_callback(LIST+STATUS, cli_list_status); + set_unlocked_handler_callback(LIST+DAEMON, cli_list_daemon); + set_handler_callback(LIST+MAPS+STATUS, cli_list_maps_status); + set_handler_callback(LIST+MAPS+STATS, cli_list_maps_stats); + set_handler_callback(LIST+MAPS+FMT, cli_list_maps_fmt); + set_handler_callback(LIST+MAPS+RAW+FMT, cli_list_maps_raw); + set_handler_callback(LIST+MAPS+TOPOLOGY, cli_list_maps_topology); + set_handler_callback(LIST+TOPOLOGY, cli_list_maps_topology); + set_handler_callback(LIST+MAPS+JSON, cli_list_maps_json); + set_handler_callback(LIST+MAP+TOPOLOGY, cli_list_map_topology); + set_handler_callback(LIST+MAP+FMT, cli_list_map_fmt); + set_handler_callback(LIST+MAP+RAW+FMT, cli_list_map_fmt); + set_handler_callback(LIST+MAP+JSON, cli_list_map_json); + set_handler_callback(LIST+CONFIG+LOCAL, cli_list_config_local); + set_handler_callback(LIST+CONFIG, cli_list_config); + set_handler_callback(LIST+BLACKLIST, cli_list_blacklist); + set_handler_callback(LIST+DEVICES, cli_list_devices); + set_handler_callback(LIST+WILDCARDS, cli_list_wildcards); + set_handler_callback(RESET+MAPS+STATS, cli_reset_maps_stats); + set_handler_callback(RESET+MAP+STATS, cli_reset_map_stats); + set_handler_callback(ADD+PATH, cli_add_path); + set_handler_callback(DEL+PATH, cli_del_path); + set_handler_callback(ADD+MAP, cli_add_map); + set_handler_callback(DEL+MAP, cli_del_map); + set_handler_callback(SWITCH+MAP+GROUP, cli_switch_group); + set_unlocked_handler_callback(RECONFIGURE, cli_reconfigure); + set_handler_callback(SUSPEND+MAP, cli_suspend); + set_handler_callback(RESUME+MAP, cli_resume); + set_handler_callback(RESIZE+MAP, cli_resize); + set_handler_callback(RELOAD+MAP, cli_reload); + set_handler_callback(RESET+MAP, cli_reassign); + set_handler_callback(REINSTATE+PATH, cli_reinstate); + set_handler_callback(FAIL+PATH, cli_fail); + set_handler_callback(DISABLEQ+MAP, cli_disable_queueing); + set_handler_callback(RESTOREQ+MAP, cli_restore_queueing); + set_handler_callback(DISABLEQ+MAPS, cli_disable_all_queueing); + set_handler_callback(RESTOREQ+MAPS, cli_restore_all_queueing); + set_unlocked_handler_callback(QUIT, cli_quit); + set_unlocked_handler_callback(SHUTDOWN, cli_shutdown); + set_handler_callback(GETPRSTATUS+MAP, cli_getprstatus); + set_handler_callback(SETPRSTATUS+MAP, cli_setprstatus); + set_handler_callback(UNSETPRSTATUS+MAP, cli_unsetprstatus); + set_handler_callback(FORCEQ+DAEMON, cli_force_no_daemon_q); + set_handler_callback(RESTOREQ+DAEMON, cli_restore_no_daemon_q); + set_handler_callback(GETPRKEY+MAP, cli_getprkey); + set_handler_callback(SETPRKEY+MAP+KEY, cli_setprkey); + set_handler_callback(UNSETPRKEY+MAP, cli_unsetprkey); + set_handler_callback(SETMARGINAL+PATH, cli_set_marginal); + set_handler_callback(UNSETMARGINAL+PATH, cli_unset_marginal); + set_handler_callback(UNSETMARGINAL+MAP, cli_unset_all_marginal); + + umask(077); + uxsock_listen(&uxsock_trigger, ux_sock, ap); + +out_sock: + pthread_cleanup_pop(1); /* uxsock_cleanup */ +out: + pthread_cleanup_pop(1); /* rcu_unregister */ + return NULL; +} + +void +exit_daemon (void) +{ + post_config_state(DAEMON_SHUTDOWN); +} + +static void +fail_path (struct path * pp, int del_active) +{ + if (!pp->mpp) + return; + + condlog(2, "checker failed path %s in map %s", + pp->dev_t, pp->mpp->alias); + + dm_fail_path(pp->mpp->alias, pp->dev_t); + if (del_active) + update_queue_mode_del_path(pp->mpp); +} + +/* + * caller must have locked the path list before calling that function + */ +static int +reinstate_path (struct path * pp) +{ + int ret = 0; + + if (!pp->mpp) + return 0; + + if (dm_reinstate_path(pp->mpp->alias, pp->dev_t)) { + condlog(0, "%s: reinstate failed", pp->dev_t); + ret = 1; + } else { + condlog(2, "%s: reinstated", pp->dev_t); + update_queue_mode_add_path(pp->mpp); + } + return ret; +} + +static void +enable_group(struct path * pp) +{ + struct pathgroup * pgp; + + /* + * if path is added through uev_add_path, pgindex can be unset. + * next update_strings() will set it, upon map reload event. + * + * we can safely return here, because upon map reload, all + * PG will be enabled. + */ + if (!pp->mpp->pg || !pp->pgindex) + return; + + pgp = VECTOR_SLOT(pp->mpp->pg, pp->pgindex - 1); + + if (pgp->status == PGSTATE_DISABLED) { + condlog(2, "%s: enable group #%i", pp->mpp->alias, pp->pgindex); + dm_enablegroup(pp->mpp->alias, pp->pgindex); + } +} + +static void +mpvec_garbage_collector (struct vectors * vecs) +{ + struct multipath * mpp; + unsigned int i; + + if (!vecs->mpvec) + return; + + vector_foreach_slot (vecs->mpvec, mpp, i) { + if (mpp && mpp->alias && !dm_map_present(mpp->alias)) { + condlog(2, "%s: remove dead map", mpp->alias); + remove_map_and_stop_waiter(mpp, vecs); + i--; + } + } +} + +/* This is called after a path has started working again. It the multipath + * device for this path uses the followover failback type, and this is the + * best pathgroup, and this is the first path in the pathgroup to come back + * up, then switch to this pathgroup */ +static int +followover_should_failback(struct path * pp) +{ + struct pathgroup * pgp; + struct path *pp1; + int i; + + if (pp->mpp->pgfailback != -FAILBACK_FOLLOWOVER || + !pp->mpp->pg || !pp->pgindex || + pp->pgindex != pp->mpp->bestpg) + return 0; + + pgp = VECTOR_SLOT(pp->mpp->pg, pp->pgindex - 1); + vector_foreach_slot(pgp->paths, pp1, i) { + if (pp1 == pp) + continue; + if (pp1->chkrstate != PATH_DOWN && pp1->chkrstate != PATH_SHAKY) + return 0; + } + return 1; +} + +static void +missing_uev_wait_tick(struct vectors *vecs) +{ + struct multipath * mpp; + unsigned int i; + int timed_out = 0, delayed_reconfig; + struct config *conf; + + vector_foreach_slot (vecs->mpvec, mpp, i) { + if (mpp->wait_for_udev && --mpp->uev_wait_tick <= 0) { + timed_out = 1; + condlog(0, "%s: timeout waiting on creation uevent. enabling reloads", mpp->alias); + if (mpp->wait_for_udev > 1 && + update_map(mpp, vecs, 0)) { + /* update_map removed map */ + i--; + continue; + } + mpp->wait_for_udev = 0; + } + } + + conf = get_multipath_config(); + delayed_reconfig = conf->delayed_reconfig; + put_multipath_config(conf); + if (timed_out && delayed_reconfig && + !need_to_delay_reconfig(vecs)) { + condlog(2, "reconfigure (delayed)"); + set_config_state(DAEMON_CONFIGURE); + } +} + +static void +ghost_delay_tick(struct vectors *vecs) +{ + struct multipath * mpp; + unsigned int i; + + vector_foreach_slot (vecs->mpvec, mpp, i) { + if (mpp->ghost_delay_tick <= 0) + continue; + if (--mpp->ghost_delay_tick <= 0) { + condlog(0, "%s: timed out waiting for active path", + mpp->alias); + mpp->force_udev_reload = 1; + if (update_map(mpp, vecs, 0) != 0) { + /* update_map removed map */ + i--; + continue; + } + } + } +} + +static void +defered_failback_tick (vector mpvec) +{ + struct multipath * mpp; + unsigned int i; + + vector_foreach_slot (mpvec, mpp, i) { + /* + * deferred failback getting sooner + */ + if (mpp->pgfailback > 0 && mpp->failback_tick > 0) { + mpp->failback_tick--; + + if (!mpp->failback_tick && need_switch_pathgroup(mpp, 1)) + switch_pathgroup(mpp); + } + } +} + +static void +retry_count_tick(vector mpvec) +{ + struct multipath *mpp; + unsigned int i; + + vector_foreach_slot (mpvec, mpp, i) { + if (mpp->retry_tick > 0) { + mpp->stat_total_queueing_time++; + condlog(4, "%s: Retrying.. No active path", mpp->alias); + if(--mpp->retry_tick == 0) { + mpp->stat_map_failures++; + dm_queue_if_no_path(mpp->alias, 0); + condlog(2, "%s: Disable queueing", mpp->alias); + } + } + } +} + +int update_prio(struct path *pp, int refresh_all) +{ + int oldpriority; + struct path *pp1; + struct pathgroup * pgp; + int i, j, changed = 0; + struct config *conf; + + if (refresh_all) { + vector_foreach_slot (pp->mpp->pg, pgp, i) { + vector_foreach_slot (pgp->paths, pp1, j) { + oldpriority = pp1->priority; + conf = get_multipath_config(); + pthread_cleanup_push(put_multipath_config, + conf); + pathinfo(pp1, conf, DI_PRIO); + pthread_cleanup_pop(1); + if (pp1->priority != oldpriority) + changed = 1; + } + } + return changed; + } + oldpriority = pp->priority; + conf = get_multipath_config(); + pthread_cleanup_push(put_multipath_config, conf); + if (pp->state != PATH_DOWN) + pathinfo(pp, conf, DI_PRIO); + pthread_cleanup_pop(1); + + if (pp->priority == oldpriority) + return 0; + return 1; +} + +int update_path_groups(struct multipath *mpp, struct vectors *vecs, int refresh) +{ + if (reload_map(vecs, mpp, refresh, 1)) + return 1; + + dm_lib_release(); + if (setup_multipath(vecs, mpp) != 0) + return 2; + sync_map_state(mpp); + + return 0; +} + +static int check_path_reinstate_state(struct path * pp) { + struct timespec curr_time; + + /* + * This function is only called when the path state changes + * from "bad" to "good". pp->state reflects the *previous* state. + * If this was "bad", we know that a failure must have occured + * beforehand, and count that. + * Note that we count path state _changes_ this way. If a path + * remains in "bad" state, failure count is not increased. + */ + + if (!((pp->mpp->san_path_err_threshold > 0) && + (pp->mpp->san_path_err_forget_rate > 0) && + (pp->mpp->san_path_err_recovery_time >0))) { + return 0; + } + + if (pp->disable_reinstate) { + /* If there are no other usable paths, reinstate the path */ + if (count_active_paths(pp->mpp) == 0) { + condlog(2, "%s : reinstating path early", pp->dev); + goto reinstate_path; + } + get_monotonic_time(&curr_time); + + /* If path became failed again or continue failed, should reset + * path san_path_err_forget_rate and path dis_reinstate_time to + * start a new stable check. + */ + if ((pp->state != PATH_UP) && (pp->state != PATH_GHOST) && + (pp->state != PATH_DELAYED)) { + pp->san_path_err_forget_rate = + pp->mpp->san_path_err_forget_rate; + pp->dis_reinstate_time = curr_time.tv_sec; + } + + if ((curr_time.tv_sec - pp->dis_reinstate_time ) > pp->mpp->san_path_err_recovery_time) { + condlog(2,"%s : reinstate the path after err recovery time", pp->dev); + goto reinstate_path; + } + return 1; + } + /* forget errors on a working path */ + if ((pp->state == PATH_UP || pp->state == PATH_GHOST) && + pp->path_failures > 0) { + if (pp->san_path_err_forget_rate > 0){ + pp->san_path_err_forget_rate--; + } else { + /* for every san_path_err_forget_rate number of + * successful path checks decrement path_failures by 1 + */ + pp->path_failures--; + pp->san_path_err_forget_rate = pp->mpp->san_path_err_forget_rate; + } + return 0; + } + + /* If the path isn't recovering from a failed state, do nothing */ + if (pp->state != PATH_DOWN && pp->state != PATH_SHAKY && + pp->state != PATH_TIMEOUT) + return 0; + + if (pp->path_failures == 0) + pp->san_path_err_forget_rate = pp->mpp->san_path_err_forget_rate; + + pp->path_failures++; + + /* if we don't know the currently time, we don't know how long to + * delay the path, so there's no point in checking if we should + */ + + get_monotonic_time(&curr_time); + /* when path failures has exceeded the san_path_err_threshold + * place the path in delayed state till san_path_err_recovery_time + * so that the cutomer can rectify the issue within this time. After + * the completion of san_path_err_recovery_time it should + * automatically reinstate the path + * (note: we know that san_path_err_threshold > 0 here). + */ + if (pp->path_failures > (unsigned int)pp->mpp->san_path_err_threshold) { + condlog(2, "%s : hit error threshold. Delaying path reinstatement", pp->dev); + pp->dis_reinstate_time = curr_time.tv_sec; + pp->disable_reinstate = 1; + + return 1; + } else { + return 0; + } + +reinstate_path: + pp->path_failures = 0; + pp->disable_reinstate = 0; + pp->san_path_err_forget_rate = 0; + return 0; +} + +static int +should_skip_path(struct path *pp){ + if (marginal_path_check_enabled(pp->mpp)) { + if (pp->io_err_disable_reinstate && need_io_err_check(pp)) + return 1; + } else if (san_path_check_enabled(pp->mpp)) { + if (check_path_reinstate_state(pp)) + return 1; + } + return 0; +} + +/* + * Returns '1' if the path has been checked, '-1' if it was blacklisted + * and '0' otherwise + */ +int +check_path (struct vectors * vecs, struct path * pp, unsigned int ticks) +{ + int newstate; + int new_path_up = 0; + int chkr_new_path_up = 0; + int disable_reinstate = 0; + int oldchkrstate = pp->chkrstate; + int retrigger_tries, verbosity; + unsigned int checkint, max_checkint; + struct config *conf; + int marginal_pathgroups, marginal_changed = 0; + int ret; + + if ((pp->initialized == INIT_OK || + pp->initialized == INIT_REQUESTED_UDEV) && !pp->mpp) + return 0; + + if (pp->tick) + pp->tick -= (pp->tick > ticks) ? ticks : pp->tick; + if (pp->tick) + return 0; /* don't check this path yet */ + + conf = get_multipath_config(); + retrigger_tries = conf->retrigger_tries; + checkint = conf->checkint; + max_checkint = conf->max_checkint; + verbosity = conf->verbosity; + marginal_pathgroups = conf->marginal_pathgroups; + put_multipath_config(conf); + + if (pp->checkint == CHECKINT_UNDEF) { + condlog(0, "%s: BUG: checkint is not set", pp->dev); + pp->checkint = checkint; + }; + + if (!pp->mpp && pp->initialized == INIT_MISSING_UDEV) { + if (pp->retriggers < retrigger_tries) { + condlog(2, "%s: triggering change event to reinitialize", + pp->dev); + pp->initialized = INIT_REQUESTED_UDEV; + pp->retriggers++; + sysfs_attr_set_value(pp->udev, "uevent", "change", + strlen("change")); + return 0; + } else { + condlog(1, "%s: not initialized after %d udev retriggers", + pp->dev, retrigger_tries); + /* + * Make sure that the "add missing path" code path + * below may reinstate the path later, if it ever + * comes up again. + * The WWID needs not be cleared; if it was set, the + * state hadn't been INIT_MISSING_UDEV in the first + * place. + */ + pp->initialized = INIT_FAILED; + return 0; + } + } + + /* + * provision a next check soonest, + * in case we exit abnormaly from here + */ + pp->tick = checkint; + + newstate = path_offline(pp); + if (newstate == PATH_UP) { + conf = get_multipath_config(); + pthread_cleanup_push(put_multipath_config, conf); + newstate = get_state(pp, conf, 1, newstate); + pthread_cleanup_pop(1); + } else { + checker_clear_message(&pp->checker); + condlog(3, "%s: state %s, checker not called", + pp->dev, checker_state_name(newstate)); + } + /* + * Wait for uevent for removed paths; + * some LLDDs like zfcp keep paths unavailable + * without sending uevents. + */ + if (newstate == PATH_REMOVED) + newstate = PATH_DOWN; + + if (newstate == PATH_WILD || newstate == PATH_UNCHECKED) { + condlog(2, "%s: unusable path (%s) - checker failed", + pp->dev, checker_state_name(newstate)); + LOG_MSG(2, verbosity, pp); + conf = get_multipath_config(); + pthread_cleanup_push(put_multipath_config, conf); + pathinfo(pp, conf, 0); + pthread_cleanup_pop(1); + return 1; + } else if ((newstate != PATH_UP && newstate != PATH_GHOST) && + (pp->state == PATH_DELAYED)) { + /* If path state become failed again cancel path delay state */ + pp->state = newstate; + return 1; + } + if (!pp->mpp) { + if (!strlen(pp->wwid) && + (pp->initialized == INIT_FAILED || + pp->initialized == INIT_NEW) && + (newstate == PATH_UP || newstate == PATH_GHOST)) { + condlog(2, "%s: add missing path", pp->dev); + conf = get_multipath_config(); + pthread_cleanup_push(put_multipath_config, conf); + ret = pathinfo(pp, conf, DI_ALL | DI_BLACKLIST); + pthread_cleanup_pop(1); + /* INIT_OK implies ret == PATHINFO_OK */ + if (pp->initialized == INIT_OK) { + ev_add_path(pp, vecs, 1); + pp->tick = 1; + } else { + /* + * We failed multiple times to initialize this + * path properly. Don't re-check too often. + */ + pp->checkint = max_checkint; + if (ret == PATHINFO_SKIPPED) + return -1; + } + } + return 0; + } + /* + * Async IO in flight. Keep the previous path state + * and reschedule as soon as possible + */ + if (newstate == PATH_PENDING) { + pp->tick = 1; + return 0; + } + /* + * Synchronize with kernel state + */ + if (update_multipath_strings(pp->mpp, vecs->pathvec, 1)) { + condlog(1, "%s: Could not synchronize with kernel state", + pp->dev); + pp->dmstate = PSTATE_UNDEF; + } + /* if update_multipath_strings orphaned the path, quit early */ + if (!pp->mpp) + return 0; + set_no_path_retry(pp->mpp); + + if ((newstate == PATH_UP || newstate == PATH_GHOST) && + (san_path_check_enabled(pp->mpp) || + marginal_path_check_enabled(pp->mpp))) { + int was_marginal = pp->marginal; + if (should_skip_path(pp)) { + if (!marginal_pathgroups) { + if (marginal_path_check_enabled(pp->mpp)) + /* to reschedule as soon as possible, + * so that this path can be recovered + * in time */ + pp->tick = 1; + pp->state = PATH_DELAYED; + return 1; + } + if (!was_marginal) { + pp->marginal = 1; + marginal_changed = 1; + } + } else if (marginal_pathgroups && was_marginal) { + pp->marginal = 0; + marginal_changed = 1; + } + } + + /* + * don't reinstate failed path, if its in stand-by + * and if target supports only implicit tpgs mode. + * this will prevent unnecessary i/o by dm on stand-by + * paths if there are no other active paths in map. + */ + disable_reinstate = (newstate == PATH_GHOST && + count_active_paths(pp->mpp) == 0 && + path_get_tpgs(pp) == TPGS_IMPLICIT) ? 1 : 0; + + pp->chkrstate = newstate; + if (newstate != pp->state) { + int oldstate = pp->state; + pp->state = newstate; + + LOG_MSG(1, verbosity, pp); + + /* + * upon state change, reset the checkint + * to the shortest delay + */ + conf = get_multipath_config(); + pp->checkint = conf->checkint; + put_multipath_config(conf); + + if (newstate != PATH_UP && newstate != PATH_GHOST) { + /* + * proactively fail path in the DM + */ + if (oldstate == PATH_UP || + oldstate == PATH_GHOST) + fail_path(pp, 1); + else + fail_path(pp, 0); + + /* + * cancel scheduled failback + */ + pp->mpp->failback_tick = 0; + + pp->mpp->stat_path_failures++; + return 1; + } + + if (newstate == PATH_UP || newstate == PATH_GHOST) { + if (pp->mpp->prflag) { + /* + * Check Persistent Reservation. + */ + condlog(2, "%s: checking persistent " + "reservation registration", pp->dev); + mpath_pr_event_handle(pp); + } + } + + /* + * reinstate this path + */ + if (!disable_reinstate && reinstate_path(pp)) { + condlog(3, "%s: reload map", pp->dev); + ev_add_path(pp, vecs, 1); + pp->tick = 1; + return 0; + } + new_path_up = 1; + + if (oldchkrstate != PATH_UP && oldchkrstate != PATH_GHOST) + chkr_new_path_up = 1; + + /* + * if at least one path is up in a group, and + * the group is disabled, re-enable it + */ + if (newstate == PATH_UP) + enable_group(pp); + } + else if (newstate == PATH_UP || newstate == PATH_GHOST) { + if ((pp->dmstate == PSTATE_FAILED || + pp->dmstate == PSTATE_UNDEF) && + !disable_reinstate) { + /* Clear IO errors */ + if (reinstate_path(pp)) { + condlog(3, "%s: reload map", pp->dev); + ev_add_path(pp, vecs, 1); + pp->tick = 1; + return 0; + } + } else { + LOG_MSG(4, verbosity, pp); + if (pp->checkint != max_checkint) { + /* + * double the next check delay. + * max at conf->max_checkint + */ + if (pp->checkint < (max_checkint / 2)) + pp->checkint = 2 * pp->checkint; + else + pp->checkint = max_checkint; + + condlog(4, "%s: delay next check %is", + pp->dev_t, pp->checkint); + } + pp->tick = pp->checkint; + } + } + else if (newstate != PATH_UP && newstate != PATH_GHOST) { + if (pp->dmstate == PSTATE_ACTIVE || + pp->dmstate == PSTATE_UNDEF) + fail_path(pp, 0); + if (newstate == PATH_DOWN) { + int log_checker_err; + + conf = get_multipath_config(); + log_checker_err = conf->log_checker_err; + put_multipath_config(conf); + if (log_checker_err == LOG_CHKR_ERR_ONCE) + LOG_MSG(3, verbosity, pp); + else + LOG_MSG(2, verbosity, pp); + } + } + + pp->state = newstate; + + if (pp->mpp->wait_for_udev) + return 1; + /* + * path prio refreshing + */ + condlog(4, "path prio refresh"); + + if (marginal_changed) + update_path_groups(pp->mpp, vecs, 1); + else if (update_prio(pp, new_path_up) && + (pp->mpp->pgpolicyfn == (pgpolicyfn *)group_by_prio) && + pp->mpp->pgfailback == -FAILBACK_IMMEDIATE) + update_path_groups(pp->mpp, vecs, !new_path_up); + else if (need_switch_pathgroup(pp->mpp, 0)) { + if (pp->mpp->pgfailback > 0 && + (new_path_up || pp->mpp->failback_tick <= 0)) + pp->mpp->failback_tick = + pp->mpp->pgfailback + 1; + else if (pp->mpp->pgfailback == -FAILBACK_IMMEDIATE || + (chkr_new_path_up && followover_should_failback(pp))) + switch_pathgroup(pp->mpp); + } + return 1; +} + +static void * +checkerloop (void *ap) +{ + struct vectors *vecs; + struct path *pp; + int count = 0; + unsigned int i; + struct timespec last_time; + struct config *conf; + int foreign_tick = 0; + bool use_watchdog; + + pthread_cleanup_push(rcu_unregister, NULL); + rcu_register_thread(); + mlockall(MCL_CURRENT | MCL_FUTURE); + vecs = (struct vectors *)ap; + condlog(2, "path checkers start up"); + + /* Tweak start time for initial path check */ + get_monotonic_time(&last_time); + last_time.tv_sec -= 1; + + /* use_watchdog is set from process environment and never changes */ + conf = get_multipath_config(); + use_watchdog = conf->use_watchdog; + put_multipath_config(conf); + + while (1) { + struct timespec diff_time, start_time, end_time; + int num_paths = 0, strict_timing, rc = 0; + unsigned int ticks = 0; + + get_monotonic_time(&start_time); + if (start_time.tv_sec && last_time.tv_sec) { + timespecsub(&start_time, &last_time, &diff_time); + condlog(4, "tick (%lu.%06lu secs)", + diff_time.tv_sec, diff_time.tv_nsec / 1000); + last_time = start_time; + ticks = diff_time.tv_sec; + } else { + ticks = 1; + condlog(4, "tick (%d ticks)", ticks); + } +#ifdef USE_SYSTEMD + if (use_watchdog) + sd_notify(0, "WATCHDOG=1"); +#endif + rc = set_config_state(DAEMON_RUNNING); + if (rc == ETIMEDOUT) { + condlog(4, "timeout waiting for DAEMON_IDLE"); + continue; + } else if (rc == EINVAL) + /* daemon shutdown */ + break; + + pthread_cleanup_push(cleanup_lock, &vecs->lock); + lock(&vecs->lock); + pthread_testcancel(); + vector_foreach_slot (vecs->pathvec, pp, i) { + rc = check_path(vecs, pp, ticks); + if (rc < 0) { + vector_del_slot(vecs->pathvec, i); + free_path(pp); + i--; + } else + num_paths += rc; + } + lock_cleanup_pop(vecs->lock); + + pthread_cleanup_push(cleanup_lock, &vecs->lock); + lock(&vecs->lock); + pthread_testcancel(); + defered_failback_tick(vecs->mpvec); + retry_count_tick(vecs->mpvec); + missing_uev_wait_tick(vecs); + ghost_delay_tick(vecs); + lock_cleanup_pop(vecs->lock); + + if (count) + count--; + else { + pthread_cleanup_push(cleanup_lock, &vecs->lock); + lock(&vecs->lock); + pthread_testcancel(); + condlog(4, "map garbage collection"); + mpvec_garbage_collector(vecs); + count = MAPGCINT; + lock_cleanup_pop(vecs->lock); + } + + diff_time.tv_nsec = 0; + if (start_time.tv_sec) { + get_monotonic_time(&end_time); + timespecsub(&end_time, &start_time, &diff_time); + if (num_paths) { + unsigned int max_checkint; + + condlog(4, "checked %d path%s in %lu.%06lu secs", + num_paths, num_paths > 1 ? "s" : "", + diff_time.tv_sec, + diff_time.tv_nsec / 1000); + conf = get_multipath_config(); + max_checkint = conf->max_checkint; + put_multipath_config(conf); + if (diff_time.tv_sec > max_checkint) + condlog(1, "path checkers took longer " + "than %lu seconds, consider " + "increasing max_polling_interval", + diff_time.tv_sec); + } + } + + if (foreign_tick == 0) { + conf = get_multipath_config(); + foreign_tick = conf->max_checkint; + put_multipath_config(conf); + } + if (--foreign_tick == 0) + check_foreign(); + + post_config_state(DAEMON_IDLE); + conf = get_multipath_config(); + strict_timing = conf->strict_timing; + put_multipath_config(conf); + if (!strict_timing) + sleep(1); + else { + if (diff_time.tv_nsec) { + diff_time.tv_sec = 0; + diff_time.tv_nsec = + 1000UL * 1000 * 1000 - diff_time.tv_nsec; + } else + diff_time.tv_sec = 1; + + condlog(3, "waiting for %lu.%06lu secs", + diff_time.tv_sec, + diff_time.tv_nsec / 1000); + if (nanosleep(&diff_time, NULL) != 0) { + condlog(3, "nanosleep failed with error %d", + errno); + conf = get_multipath_config(); + conf->strict_timing = 0; + put_multipath_config(conf); + break; + } + } + } + pthread_cleanup_pop(1); + return NULL; +} + +int +configure (struct vectors * vecs) +{ + struct multipath * mpp; + struct path * pp; + vector mpvec; + int i, ret; + struct config *conf; + static int force_reload = FORCE_RELOAD_WEAK; + + if (!vecs->pathvec && !(vecs->pathvec = vector_alloc())) { + condlog(0, "couldn't allocate path vec in configure"); + return 1; + } + + if (!vecs->mpvec && !(vecs->mpvec = vector_alloc())) { + condlog(0, "couldn't allocate multipath vec in configure"); + return 1; + } + + if (!(mpvec = vector_alloc())) { + condlog(0, "couldn't allocate new maps vec in configure"); + return 1; + } + + /* + * probe for current path (from sysfs) and map (from dm) sets + */ + ret = path_discovery(vecs->pathvec, DI_ALL); + if (ret < 0) { + condlog(0, "configure failed at path discovery"); + goto fail; + } + + conf = get_multipath_config(); + pthread_cleanup_push(put_multipath_config, conf); + vector_foreach_slot (vecs->pathvec, pp, i){ + if (filter_path(conf, pp) > 0){ + vector_del_slot(vecs->pathvec, i); + free_path(pp); + i--; + } + } + pthread_cleanup_pop(1); + + if (map_discovery(vecs)) { + condlog(0, "configure failed at map discovery"); + goto fail; + } + + /* + * create new set of maps & push changed ones into dm + * In the first call, use FORCE_RELOAD_WEAK to avoid making + * superfluous ACT_RELOAD ioctls. Later calls are done + * with FORCE_RELOAD_YES. + */ + ret = coalesce_paths(vecs, mpvec, NULL, force_reload, CMD_NONE); + if (force_reload == FORCE_RELOAD_WEAK) + force_reload = FORCE_RELOAD_YES; + if (ret != CP_OK) { + condlog(0, "configure failed while coalescing paths"); + goto fail; + } + + /* + * may need to remove some maps which are no longer relevant + * e.g., due to blacklist changes in conf file + */ + if (coalesce_maps(vecs, mpvec)) { + condlog(0, "configure failed while coalescing maps"); + goto fail; + } + + dm_lib_release(); + + sync_maps_state(mpvec); + vector_foreach_slot(mpvec, mpp, i){ + if (remember_wwid(mpp->wwid) == 1) + trigger_paths_udev_change(mpp, true); + update_map_pr(mpp); + } + + /* + * purge dm of old maps + */ + remove_maps(vecs); + + /* + * save new set of maps formed by considering current path state + */ + vector_free(vecs->mpvec); + vecs->mpvec = mpvec; + + /* + * start dm event waiter threads for these new maps + */ + vector_foreach_slot(vecs->mpvec, mpp, i) { + if (wait_for_events(mpp, vecs)) { + remove_map(mpp, vecs, 1); + i--; + continue; + } + if (setup_multipath(vecs, mpp)) + i--; + } + return 0; + +fail: + vector_free(mpvec); + return 1; +} + +int +need_to_delay_reconfig(struct vectors * vecs) +{ + struct multipath *mpp; + int i; + + if (!VECTOR_SIZE(vecs->mpvec)) + return 0; + + vector_foreach_slot(vecs->mpvec, mpp, i) { + if (mpp->wait_for_udev) + return 1; + } + return 0; +} + +void rcu_free_config(struct rcu_head *head) +{ + struct config *conf = container_of(head, struct config, rcu); + + free_config(conf); +} + +int +reconfigure (struct vectors * vecs) +{ + struct config * old, *conf; + + conf = load_config(DEFAULT_CONFIGFILE); + if (!conf) + return 1; + + /* + * free old map and path vectors ... they use old conf state + */ + if (VECTOR_SIZE(vecs->mpvec)) + remove_maps_and_stop_waiters(vecs); + + free_pathvec(vecs->pathvec, FREE_PATHS); + vecs->pathvec = NULL; + delete_all_foreign(); + + reset_checker_classes(); + /* Re-read any timezone changes */ + tzset(); + + dm_tgt_version(conf->version, TGT_MPATH); + if (verbosity) + conf->verbosity = verbosity; + if (bindings_read_only) + conf->bindings_read_only = bindings_read_only; + uxsock_timeout = conf->uxsock_timeout; + + old = rcu_dereference(multipath_conf); + conf->sequence_nr = old->sequence_nr + 1; + rcu_assign_pointer(multipath_conf, conf); + call_rcu(&old->rcu, rcu_free_config); + + configure(vecs); + + + return 0; +} + +static struct vectors * +init_vecs (void) +{ + struct vectors * vecs; + + vecs = (struct vectors *)MALLOC(sizeof(struct vectors)); + + if (!vecs) + return NULL; + + pthread_mutex_init(&vecs->lock.mutex, NULL); + + return vecs; +} + +static void * +signal_set(int signo, void (*func) (int)) +{ + int r; + struct sigaction sig; + struct sigaction osig; + + sig.sa_handler = func; + sigemptyset(&sig.sa_mask); + sig.sa_flags = 0; + + r = sigaction(signo, &sig, &osig); + + if (r < 0) + return (SIG_ERR); + else + return (osig.sa_handler); +} + +void +handle_signals(bool nonfatal) +{ + if (exit_sig) { + condlog(2, "exit (signal)"); + exit_sig = 0; + exit_daemon(); + } + if (!nonfatal) + return; + if (reconfig_sig) { + condlog(2, "reconfigure (signal)"); + set_config_state(DAEMON_CONFIGURE); + } + if (log_reset_sig) { + condlog(2, "reset log (signal)"); + if (logsink == 1) + log_thread_reset(); + } + reconfig_sig = 0; + log_reset_sig = 0; +} + +static void +sighup(__attribute__((unused)) int sig) +{ + reconfig_sig = 1; +} + +static void +sigend(__attribute__((unused)) int sig) +{ + exit_sig = 1; +} + +static void +sigusr1(__attribute__((unused)) int sig) +{ + log_reset_sig = 1; +} + +static void +sigusr2(__attribute__((unused)) int sig) +{ + condlog(3, "SIGUSR2 received"); +} + +static void +signal_init(void) +{ + sigset_t set; + + /* block all signals */ + sigfillset(&set); + /* SIGPIPE occurs if logging fails */ + sigdelset(&set, SIGPIPE); + pthread_sigmask(SIG_SETMASK, &set, NULL); + + /* Other signals will be unblocked in the uxlsnr thread */ + signal_set(SIGHUP, sighup); + signal_set(SIGUSR1, sigusr1); + signal_set(SIGUSR2, sigusr2); + signal_set(SIGINT, sigend); + signal_set(SIGTERM, sigend); + signal_set(SIGPIPE, sigend); +} + +static void +setscheduler (void) +{ + int res; + static struct sched_param sched_param = { + .sched_priority = 99 + }; + + res = sched_setscheduler (0, SCHED_RR, &sched_param); + + if (res == -1) + condlog(LOG_WARNING, "Could not set SCHED_RR at priority 99"); + return; +} + +static void +set_oom_adj (void) +{ +#ifdef OOM_SCORE_ADJ_MIN + int retry = 1; + char *file = "/proc/self/oom_score_adj"; + int score = OOM_SCORE_ADJ_MIN; +#else + int retry = 0; + char *file = "/proc/self/oom_adj"; + int score = OOM_ADJUST_MIN; +#endif + FILE *fp; + struct stat st; + char *envp; + + envp = getenv("OOMScoreAdjust"); + if (envp) { + condlog(3, "Using systemd provided OOMScoreAdjust"); + return; + } + do { + if (stat(file, &st) == 0){ + fp = fopen(file, "w"); + if (!fp) { + condlog(0, "couldn't fopen %s : %s", file, + strerror(errno)); + return; + } + fprintf(fp, "%i", score); + fclose(fp); + return; + } + if (errno != ENOENT) { + condlog(0, "couldn't stat %s : %s", file, + strerror(errno)); + return; + } +#ifdef OOM_ADJUST_MIN + file = "/proc/self/oom_adj"; + score = OOM_ADJUST_MIN; +#else + retry = 0; +#endif + } while (retry--); + condlog(0, "couldn't adjust oom score"); +} + +static int +child (__attribute__((unused)) void *param) +{ + pthread_t check_thr, uevent_thr, uxlsnr_thr, uevq_thr, dmevent_thr; + pthread_attr_t log_attr, misc_attr, uevent_attr; + struct vectors * vecs; + struct multipath * mpp; + int i; +#ifdef USE_SYSTEMD + int startup_done = 0; +#endif + int rc; + int pid_fd = -1; + struct config *conf; + char *envp; + int queue_without_daemon; + enum daemon_status state; + + mlockall(MCL_CURRENT | MCL_FUTURE); + signal_init(); + rcu_init(); + + setup_thread_attr(&misc_attr, 64 * 1024, 0); + setup_thread_attr(&uevent_attr, DEFAULT_UEVENT_STACKSIZE * 1024, 0); + setup_thread_attr(&waiter_attr, 32 * 1024, 1); + setup_thread_attr(&io_err_stat_attr, 32 * 1024, 0); + + if (logsink == 1) { + setup_thread_attr(&log_attr, 64 * 1024, 0); + log_thread_start(&log_attr); + pthread_attr_destroy(&log_attr); + } + pid_fd = pidfile_create(DEFAULT_PIDFILE, daemon_pid); + if (pid_fd < 0) { + condlog(1, "failed to create pidfile"); + if (logsink == 1) + log_thread_stop(); + exit(1); + } + + post_config_state(DAEMON_START); + + condlog(2, "--------start up--------"); + condlog(2, "read " DEFAULT_CONFIGFILE); + + conf = load_config(DEFAULT_CONFIGFILE); + if (!conf) + goto failed; + + if (verbosity) + conf->verbosity = verbosity; + if (bindings_read_only) + conf->bindings_read_only = bindings_read_only; + uxsock_timeout = conf->uxsock_timeout; + rcu_assign_pointer(multipath_conf, conf); + if (init_checkers(conf->multipath_dir)) { + condlog(0, "failed to initialize checkers"); + goto failed; + } + if (init_prio(conf->multipath_dir)) { + condlog(0, "failed to initialize prioritizers"); + goto failed; + } + /* Failing this is non-fatal */ + + init_foreign(conf->multipath_dir, conf->enable_foreign); + + if (poll_dmevents) + poll_dmevents = dmevent_poll_supported(); + setlogmask(LOG_UPTO(conf->verbosity + 3)); + + envp = getenv("LimitNOFILE"); + + if (envp) + condlog(2,"Using systemd provided open fds limit of %s", envp); + else + set_max_fds(conf->max_fds); + + vecs = gvecs = init_vecs(); + if (!vecs) + goto failed; + + setscheduler(); + set_oom_adj(); + + /* + * Startup done, invalidate configuration + */ + conf = NULL; + + pthread_cleanup_push(config_cleanup, NULL); + pthread_mutex_lock(&config_lock); + + __post_config_state(DAEMON_IDLE); + rc = pthread_create(&uxlsnr_thr, &misc_attr, uxlsnrloop, vecs); + if (!rc) { + /* Wait for uxlsnr startup */ + while (running_state == DAEMON_IDLE) + pthread_cond_wait(&config_cond, &config_lock); + state = running_state; + } + pthread_cleanup_pop(1); + + if (rc) { + condlog(0, "failed to create cli listener: %d", rc); + goto failed; + } + else if (state != DAEMON_CONFIGURE) { + condlog(0, "cli listener failed to start"); + goto failed; + } + + if (poll_dmevents) { + if (init_dmevent_waiter(vecs)) { + condlog(0, "failed to allocate dmevents waiter info"); + goto failed; + } + if ((rc = pthread_create(&dmevent_thr, &misc_attr, + wait_dmevents, NULL))) { + condlog(0, "failed to create dmevent waiter thread: %d", + rc); + goto failed; + } + } + + /* + * Start uevent listener early to catch events + */ + if ((rc = pthread_create(&uevent_thr, &uevent_attr, ueventloop, udev))) { + condlog(0, "failed to create uevent thread: %d", rc); + goto failed; + } + pthread_attr_destroy(&uevent_attr); + + /* + * start threads + */ + if ((rc = pthread_create(&check_thr, &misc_attr, checkerloop, vecs))) { + condlog(0,"failed to create checker loop thread: %d", rc); + goto failed; + } + if ((rc = pthread_create(&uevq_thr, &misc_attr, uevqloop, vecs))) { + condlog(0, "failed to create uevent dispatcher: %d", rc); + goto failed; + } + pthread_attr_destroy(&misc_attr); + + while (1) { + pthread_cleanup_push(config_cleanup, NULL); + pthread_mutex_lock(&config_lock); + while (running_state != DAEMON_CONFIGURE && + running_state != DAEMON_SHUTDOWN) + pthread_cond_wait(&config_cond, &config_lock); + state = running_state; + pthread_cleanup_pop(1); + if (state == DAEMON_SHUTDOWN) + break; + if (state == DAEMON_CONFIGURE) { + pthread_cleanup_push(cleanup_lock, &vecs->lock); + lock(&vecs->lock); + pthread_testcancel(); + if (!need_to_delay_reconfig(vecs)) { + reconfigure(vecs); + } else { + conf = get_multipath_config(); + conf->delayed_reconfig = 1; + put_multipath_config(conf); + } + lock_cleanup_pop(vecs->lock); + post_config_state(DAEMON_IDLE); +#ifdef USE_SYSTEMD + if (!startup_done) { + sd_notify(0, "READY=1"); + startup_done = 1; + } +#endif + } + } + + lock(&vecs->lock); + conf = get_multipath_config(); + queue_without_daemon = conf->queue_without_daemon; + put_multipath_config(conf); + if (queue_without_daemon == QUE_NO_DAEMON_OFF) + vector_foreach_slot(vecs->mpvec, mpp, i) + dm_queue_if_no_path(mpp->alias, 0); + remove_maps_and_stop_waiters(vecs); + unlock(&vecs->lock); + + pthread_cancel(check_thr); + pthread_cancel(uevent_thr); + pthread_cancel(uxlsnr_thr); + pthread_cancel(uevq_thr); + if (poll_dmevents) + pthread_cancel(dmevent_thr); + + pthread_join(check_thr, NULL); + pthread_join(uevent_thr, NULL); + pthread_join(uxlsnr_thr, NULL); + pthread_join(uevq_thr, NULL); + if (poll_dmevents) + pthread_join(dmevent_thr, NULL); + + stop_io_err_stat_thread(); + + lock(&vecs->lock); + free_pathvec(vecs->pathvec, FREE_PATHS); + vecs->pathvec = NULL; + unlock(&vecs->lock); + + pthread_mutex_destroy(&vecs->lock.mutex); + FREE(vecs); + vecs = NULL; + + cleanup_foreign(); + cleanup_checkers(); + cleanup_prio(); + if (poll_dmevents) + cleanup_dmevent_waiter(); + + dm_lib_release(); + dm_lib_exit(); + + /* We're done here */ + condlog(3, "unlink pidfile"); + unlink(DEFAULT_PIDFILE); + + condlog(2, "--------shut down-------"); + + if (logsink == 1) + log_thread_stop(); + + /* + * Freeing config must be done after condlog() and dm_lib_exit(), + * because logging functions like dlog() and dm_write_log() + * reference the config. + */ + conf = rcu_dereference(multipath_conf); + rcu_assign_pointer(multipath_conf, NULL); + call_rcu(&conf->rcu, rcu_free_config); + udev_unref(udev); + udev = NULL; + pthread_attr_destroy(&waiter_attr); + pthread_attr_destroy(&io_err_stat_attr); +#ifdef _DEBUG_ + dbg_free_final(NULL); +#endif + +#ifdef USE_SYSTEMD + sd_notify(0, "ERRNO=0"); +#endif + exit(0); + +failed: +#ifdef USE_SYSTEMD + sd_notify(0, "ERRNO=1"); +#endif + if (pid_fd >= 0) + close(pid_fd); + exit(1); +} + +static int +daemonize(void) +{ + int pid; + int dev_null_fd; + + if( (pid = fork()) < 0){ + fprintf(stderr, "Failed first fork : %s\n", strerror(errno)); + return -1; + } + else if (pid != 0) + return pid; + + setsid(); + + if ( (pid = fork()) < 0) + fprintf(stderr, "Failed second fork : %s\n", strerror(errno)); + else if (pid != 0) + _exit(0); + + if (chdir("/") < 0) + fprintf(stderr, "cannot chdir to '/', continuing\n"); + + dev_null_fd = open("/dev/null", O_RDWR); + if (dev_null_fd < 0){ + fprintf(stderr, "cannot open /dev/null for input & output : %s\n", + strerror(errno)); + _exit(0); + } + + close(STDIN_FILENO); + if (dup(dev_null_fd) < 0) { + fprintf(stderr, "cannot dup /dev/null to stdin : %s\n", + strerror(errno)); + _exit(0); + } + close(STDOUT_FILENO); + if (dup(dev_null_fd) < 0) { + fprintf(stderr, "cannot dup /dev/null to stdout : %s\n", + strerror(errno)); + _exit(0); + } + close(STDERR_FILENO); + if (dup(dev_null_fd) < 0) { + fprintf(stderr, "cannot dup /dev/null to stderr : %s\n", + strerror(errno)); + _exit(0); + } + close(dev_null_fd); + daemon_pid = getpid(); + return 0; +} + +int +main (int argc, char *argv[]) +{ + extern char *optarg; + extern int optind; + int arg; + int err; + int foreground = 0; + struct config *conf; + + ANNOTATE_BENIGN_RACE_SIZED(&multipath_conf, sizeof(multipath_conf), + "Manipulated through RCU"); + ANNOTATE_BENIGN_RACE_SIZED(&uxsock_timeout, sizeof(uxsock_timeout), + "Suppress complaints about this scalar variable"); + + logsink = 1; + + if (getuid() != 0) { + fprintf(stderr, "need to be root\n"); + exit(1); + } + + /* make sure we don't lock any path */ + if (chdir("/") < 0) + fprintf(stderr, "can't chdir to root directory : %s\n", + strerror(errno)); + umask(umask(077) | 022); + + pthread_cond_init_mono(&config_cond); + + udev = udev_new(); + libmp_udev_set_sync_support(0); + + while ((arg = getopt(argc, argv, ":dsv:k::Bniw")) != EOF ) { + switch(arg) { + case 'd': + foreground = 1; + if (logsink > 0) + logsink = 0; + //debug=1; /* ### comment me out ### */ + break; + case 'v': + if (sizeof(optarg) > sizeof(char *) || + !isdigit(optarg[0])) + exit(1); + + verbosity = atoi(optarg); + break; + case 's': + logsink = -1; + break; + case 'k': + logsink = 0; + conf = load_config(DEFAULT_CONFIGFILE); + if (!conf) + exit(1); + if (verbosity) + conf->verbosity = verbosity; + uxsock_timeout = conf->uxsock_timeout; + err = uxclnt(optarg, uxsock_timeout + 100); + free_config(conf); + return err; + case 'B': + bindings_read_only = 1; + break; + case 'n': + condlog(0, "WARNING: ignoring deprecated option -n, use 'ignore_wwids = no' instead"); + break; + case 'w': + poll_dmevents = 0; + break; + default: + fprintf(stderr, "Invalid argument '-%c'\n", + optopt); + exit(1); + } + } + if (optind < argc) { + char cmd[CMDSIZE]; + char * s = cmd; + char * c = s; + + logsink = 0; + conf = load_config(DEFAULT_CONFIGFILE); + if (!conf) + exit(1); + if (verbosity) + conf->verbosity = verbosity; + uxsock_timeout = conf->uxsock_timeout; + memset(cmd, 0x0, CMDSIZE); + while (optind < argc) { + if (strchr(argv[optind], ' ')) + c += snprintf(c, s + CMDSIZE - c, "\"%s\" ", argv[optind]); + else + c += snprintf(c, s + CMDSIZE - c, "%s ", argv[optind]); + optind++; + } + c += snprintf(c, s + CMDSIZE - c, "\n"); + err = uxclnt(s, uxsock_timeout + 100); + free_config(conf); + return err; + } + + if (foreground) { + if (!isatty(fileno(stdout))) + setbuf(stdout, NULL); + err = 0; + daemon_pid = getpid(); + } else + err = daemonize(); + + if (err < 0) + /* error */ + exit(1); + else if (err > 0) + /* parent dies */ + exit(0); + else + /* child lives */ + return (child(NULL)); +} + +void * mpath_pr_event_handler_fn (void * pathp ) +{ + struct multipath * mpp; + unsigned int i; + int ret, isFound; + struct path * pp = (struct path *)pathp; + struct prout_param_descriptor *param; + struct prin_resp *resp; + + rcu_register_thread(); + mpp = pp->mpp; + + resp = mpath_alloc_prin_response(MPATH_PRIN_RKEY_SA); + if (!resp){ + condlog(0,"%s Alloc failed for prin response", pp->dev); + goto out; + } + + ret = prin_do_scsi_ioctl(pp->dev, MPATH_PRIN_RKEY_SA, resp, 0); + if (ret != MPATH_PR_SUCCESS ) + { + condlog(0,"%s : pr in read keys service action failed. Error=%d", pp->dev, ret); + goto out; + } + + condlog(3, " event pr=%d addlen=%d",resp->prin_descriptor.prin_readkeys.prgeneration, + resp->prin_descriptor.prin_readkeys.additional_length ); + + if (resp->prin_descriptor.prin_readkeys.additional_length == 0 ) + { + condlog(1, "%s: No key found. Device may not be registered.", pp->dev); + ret = MPATH_PR_SUCCESS; + goto out; + } + condlog(2, "Multipath reservation_key: 0x%" PRIx64 " ", + get_be64(mpp->reservation_key)); + + isFound =0; + for (i = 0; i < resp->prin_descriptor.prin_readkeys.additional_length/8; i++ ) + { + condlog(2, "PR IN READKEYS[%d] reservation key:",i); + dumpHex((char *)&resp->prin_descriptor.prin_readkeys.key_list[i*8], 8 , -1); + if (!memcmp(&mpp->reservation_key, &resp->prin_descriptor.prin_readkeys.key_list[i*8], 8)) + { + condlog(2, "%s: pr key found in prin readkeys response", mpp->alias); + isFound =1; + break; + } + } + if (!isFound) + { + condlog(0, "%s: Either device not registered or ", pp->dev); + condlog(0, "host is not authorised for registration. Skip path"); + ret = MPATH_PR_OTHER; + goto out; + } + + param= malloc(sizeof(struct prout_param_descriptor)); + memset(param, 0 , sizeof(struct prout_param_descriptor)); + param->sa_flags = mpp->sa_flags; + memcpy(param->sa_key, &mpp->reservation_key, 8); + param->num_transportid = 0; + + condlog(3, "device %s:%s", pp->dev, pp->mpp->wwid); + + ret = prout_do_scsi_ioctl(pp->dev, MPATH_PROUT_REG_IGN_SA, 0, 0, param, 0); + if (ret != MPATH_PR_SUCCESS ) + { + condlog(0,"%s: Reservation registration failed. Error: %d", pp->dev, ret); + } + mpp->prflag = 1; + + free(param); +out: + if (resp) + free(resp); + rcu_unregister_thread(); + return NULL; +} + +int mpath_pr_event_handle(struct path *pp) +{ + pthread_t thread; + int rc; + pthread_attr_t attr; + struct multipath * mpp; + + if (pp->bus != SYSFS_BUS_SCSI) + return 0; + + mpp = pp->mpp; + + if (!get_be64(mpp->reservation_key)) + return -1; + + pthread_attr_init(&attr); + pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_JOINABLE); + + rc = pthread_create(&thread, NULL , mpath_pr_event_handler_fn, pp); + if (rc) { + condlog(0, "%s: ERROR; return code from pthread_create() is %d", pp->dev, rc); + return -1; + } + pthread_attr_destroy(&attr); + rc = pthread_join(thread, NULL); + return 0; +} diff --git a/multipathd/main.h b/multipathd/main.h new file mode 100644 index 0000000..7bb8463 --- /dev/null +++ b/multipathd/main.h @@ -0,0 +1,51 @@ +#ifndef MAIN_H +#define MAIN_H + +#define MAPGCINT 5 + +enum daemon_status { + DAEMON_INIT, + DAEMON_START, + DAEMON_CONFIGURE, + DAEMON_IDLE, + DAEMON_RUNNING, + DAEMON_SHUTDOWN, +}; + +struct prout_param_descriptor; +struct prin_resp; + +extern pid_t daemon_pid; +extern int uxsock_timeout; + +void exit_daemon(void); +const char * daemon_status(void); +enum daemon_status wait_for_state_change_if(enum daemon_status oldstate, + unsigned long ms); +int need_to_delay_reconfig (struct vectors *); +int reconfigure (struct vectors *); +int ev_add_path (struct path *, struct vectors *, int); +int ev_remove_path (struct path *, struct vectors *, int); +int ev_add_map (char *, const char *, struct vectors *); +int ev_remove_map (char *, char *, int, struct vectors *); +int set_config_state(enum daemon_status); +void * mpath_alloc_prin_response(int prin_sa); +int prin_do_scsi_ioctl(char *, int rq_servact, struct prin_resp * resp, + int noisy); +void dumpHex(const char * , int len, int no_ascii); +int prout_do_scsi_ioctl(char * , int rq_servact, int rq_scope, + unsigned int rq_type, + struct prout_param_descriptor *param, int noisy); +int mpath_pr_event_handle(struct path *pp); +void * mpath_pr_event_handler_fn (void * ); +int update_map_pr(struct multipath *mpp); +void * mpath_pr_event_handler_fn (void * pathp ); +void handle_signals(bool); +int __setup_multipath (struct vectors * vecs, struct multipath * mpp, + int reset); +#define setup_multipath(vecs, mpp) __setup_multipath(vecs, mpp, 1) +int update_multipath (struct vectors *vecs, char *mapname, int reset); +int update_path_groups(struct multipath *mpp, struct vectors *vecs, + int refresh); + +#endif /* MAIN_H */ diff --git a/multipathd/multipathd.8 b/multipathd/multipathd.8 new file mode 100644 index 0000000..048a838 --- /dev/null +++ b/multipathd/multipathd.8 @@ -0,0 +1,355 @@ +.\" ---------------------------------------------------------------------------- +.\" Update the date below if you make any significant change. +.\" Make sure there are no errors with: +.\" groff -z -wall -b -e -t multipathd/multipathd.8 +.\" +.\" ---------------------------------------------------------------------------- +. +.TH MULTIPATHD 8 2016-10-27 Linux +. +. +.\" ---------------------------------------------------------------------------- +.SH NAME +.\" ---------------------------------------------------------------------------- +. +multipathd \- Multipath daemon. +. +. +.\" ---------------------------------------------------------------------------- +.SH SYNOPSIS +.\" ---------------------------------------------------------------------------- +. +.B multipathd +.RB [\| \-d | \-k \|] +.RB [\| \-s \|] +.RB [\| \-v\ \c +.IR verbosity \|] +.RB [\| \-B \|] +.RB [\| \-w \|] +. +. +.\" ---------------------------------------------------------------------------- +.SH DESCRIPTION +.\" ---------------------------------------------------------------------------- +. +The \fBmultipathd\fR daemon is in charge of checking for failed paths. When this +happens, it will reconfigure the multipath map the path belongs to, so that this +map regains its maximum performance and redundancy. + +This daemon executes the external \fBmultipath\fR tool when events occur. +In turn, the multipath tool signals the multipathd daemon when it is done with +devmap reconfiguration, so that it can refresh its failed path list. +. +. +.\" ---------------------------------------------------------------------------- +.SH OPTIONS +.\" ---------------------------------------------------------------------------- +. +.TP +.B \-d +Foreground Mode. Don't daemonize, and print all messages to stdout and stderr. +. +.TP +.B \-s +Suppress timestamps. Do not prefix logging messages with a timestamp. +. +.TP +.BI \-v " level" +Verbosity level. Print additional information while running multipathd. A level +of 0 means only print errors. A level of 3 or greater prints debugging information +as well. +. +.TP +.B \-B +Read-only bindings file. multipathd will not write to the \fIuser_friendly_names\fR +bindings file. If a \fIuser_friendly_name\fR doesn't already exist for a device, it +will use its WWID as its alias. +. +.TP +.B \-k +multipathd will enter interactive mode. From this mode, the available commands can +be viewed by entering '\fIhelp\fR'. When you are finished entering commands, press +\fBCTRL-D\fR to quit. +. +.TP +.B \-n +\fBIGNORED\fR. Use the option +\fIfind_multipaths\fR to control the treatment of newly detected devices by +multipathd. See +.BR multipath.conf(5). +. +.TP +.B \-w +Since kernel 4.14 a new device-mapper event polling interface is used for updating +multipath devices on dmevents. Use this flag to force it to use the old event +waiting method, based on creating a seperate thread for each device. +. +. +. +.\" ---------------------------------------------------------------------------- +.SH COMMANDS +.\" ---------------------------------------------------------------------------- +. +.TP +The following commands can be used in interactive mode: +. +.TP +.B list|show paths +Show the paths that multipathd is monitoring, and their state. +. +.TP +.B list|show paths format $format +Show the paths that multipathd is monitoring, using a format string with path +format wildcards. +. +.TP +.B list|show maps|multipaths +Show the multipath devices that the multipathd is monitoring. +. +.TP +.B list|show maps|multipaths format $format +Show the status of all multipath devices that the multipathd is monitoring, +using a format string with multipath format wildcards. +. +.TP +.B list|show maps|multipaths status +Show the status of all multipath devices that the multipathd is monitoring. +. +.TP +.B list|show maps|multipaths stats +Show some statistics of all multipath devices that the multipathd is monitoring. +. +.TP +.B list|show maps|multipaths topology +Show the current multipath topology. Same as '\fImultipath \-ll\fR'. +. +.TP +.B list|show topology +Show the current multipath topology. Same as '\fImultipath \-ll\fR'. +. +.TP +.B list|show map|multipath $map topology +Show topology of a single multipath device specified by $map, for example +36005076303ffc56200000000000010aa. This map could be obtained from '\fIlist maps\fR'. +. +.TP +.B list|show wildcards +Show the format wildcards used in interactive commands taking $format. +. +.TP +.B list|show config +Show the currently used configuration, derived from default values and values +specified within the configuration file \fI/etc/multipath.conf\fR. +. +.TP +.B list|show config local +Show the currently used configuration like \fIshow config\fR, but limiting +the devices section to those devices that are actually present in the system. +. +.TP +.B list|show blacklist +Show the currently used blacklist rules, derived from default values and values +specified within the configuration file \fI/etc/multipath.conf\fR. +. +.TP +.B list|show devices +Show all available block devices by name including the information if they are +blacklisted or not. +. +.TP +.B list|show status +Show the number of path checkers in each possible state, the number of monitored +paths, and whether multipathd is currently handling a uevent. +. +.TP +.B list|show daemon +Show the current state of the multipathd daemon. +. +.TP +.B add path $path +Add a path to the list of monitored paths. $path is as listed in /sys/block (e.g. sda). +. +.TP +.B remove|del path $path +Stop monitoring a path. $path is as listed in /sys/block (e.g. sda). +. +.TP +.B add map|multipath $map +Add a multipath device to the list of monitored devices. $map can either be a +device-mapper device as listed in /sys/block (e.g. dm-0) or it can be the alias +for the multipath device (e.g. mpath1) or the uid of the multipath device +(e.g. 36005076303ffc56200000000000010aa). +. +.TP +.B remove|del map|multipath $map +Stop monitoring a multipath device. +. +.TP +.B resize map|multipath $map +Resizes map $map to the given size. +. +.TP +.B switch|switchgroup map|multipath $map group $group +Force a multipath device to switch to a specific path group. $group is the path +group index, starting with 1. +. +.TP +.B reconfigure +Reconfigures the multipaths. This should be triggered automatically after anyi +hotplug event. +. +.TP +.B suspend map|multipath $map +Sets map $map into suspend state. +. +.TP +.B resume map|multipath $map +Resumes map $map from suspend state. +. +.TP +.B reset map|multipath $map +Reassign existing device-mapper table(s) use use the multipath device, instead +of its path devices. +. +.TP +.B reload map|multipath $map +Reload a multipath device. +. +.TP +.B fail path $path +Sets path $path into failed state. +. +.TP +.B reinstate path $path +Resumes path $path from failed state. +. +.TP +.B disablequeueing maps|multipaths +Disable queueing on all multipath devices. +. +.TP +.B restorequeueing maps|multipaths +Restore queueing on all multipath devices. +. +.TP +.B disablequeueing map|multipath $map +Disable queuing on multipathed map $map. +. +.TP +.B restorequeueing map|multipath $map +Restore queuing on multipahted map $map. +. +.TP +.B forcequeueing daemon +Forces multipathd into queue_without_daemon mode, so that no_path_retry queueing +will not be disabled when the daemon stops. +. +.TP +.B restorequeueing daemon +Restores configured queue_without_daemon mode. +. +.TP +.B map|multipath $map setprstatus +Enable persistent reservation management on $map. +. +.TP +.B map|multipath $map unsetprstatus +Disable persistent reservation management on $map. +. +.TP +.B map|multipath $map getprstatus +Get the current persistent reservation management status of $map. +. +.TP +.B map|multipath $map getprkey +Get the current persistent reservation key associated with $map. +. +.TP +.B map|multipath $map setprkey key $key +Set the persistent reservation key associated with $map to $key in the +\fIprkeys_file\fR. This key will only be used by multipathd if +\fIreservation_key\fR is set to \fBfile\fR in \fI/etc/multipath.conf\fR. +. +.TP +.B map|multipath $map unsetprkey +Remove the persistent reservation key associated with $map from the +\fIprkeys_file\fR. This will only unset the key used by multipathd if +\fIreservation_key\fR is set to \fBfile\fR in \fI/etc/multipath.conf\fR. +. +.TP +.B path $path setmarginal +move $path to a marginal pathgroup. The path will remain in the marginal +path group until \fIunsetmarginal\fR is called. This command will only +work if \fImarginal_pathgroups\fR is enabled and there is no Shaky paths +detection method configured (see the multipath.conf man page for details). +. +.TP +.B path $path unsetmarginal +return marginal path $path to its normal pathgroup. This command will only +work if \fImarginal_pathgroups\fR is enabled and there is no Shaky paths +detection method configured (see the multipath.conf man page for details). +. +.TP +.B map $map unsetmarginal +return all marginal paths in $map to their normal pathgroups. This command +will only work if \fImarginal_pathgroups\fR is enabled and there is no Shaky +paths detection method configured (see the multipath.conf man page for details). +. +.TP +.B quit|exit +End interactive session. +. +.TP +.B shutdown +Stop multipathd. +. +. +.\" ---------------------------------------------------------------------------- +.SH "SYSTEMD INTEGRATION" +.\" ---------------------------------------------------------------------------- +. +When compiled with systemd support two systemd service files are installed, +\fImultipathd.service\fR and \fImultipathd.socket\fR The \fImultipathd.socket\fR +service instructs systemd to intercept the CLI command socket, so that any call +to the CLI interface will start-up the daemon if required. +The \fImultipathd.service\fR file carries the definitions for controlling the +multipath daemon. The daemon itself uses the \fBsd_notify\fR(3) interface to +communicate with systemd. The following unit keywords are recognized: +. +.TP +.B WatchdogSec= +Enables the internal watchdog from systemd. multipath will send a +notification via \fBsd_notify\fR(3) to systemd to reset the watchdog. If +specified the \fIpolling_interval\fR and \fImax_polling_interval\fR settings +will be overridden by the watchdog settings. +Please note that systemd prior to version 207 has issues which prevent +the systemd-provided watchdog from working correctly. So the watchdog +is not enabled per default, but has to be enabled manually by updating +the \fImultipathd.service\fR file. +. +.TP +.B OOMScoreAdjust= +Overrides the internal OOM adjust mechanism. +. +.TP +.B LimitNOFILE= +Overrides the \fImax_fds\fR configuration setting. +. +. +.\" ---------------------------------------------------------------------------- +.SH "SEE ALSO" +.\" ---------------------------------------------------------------------------- +. +.BR multipath (8), +.BR kpartx (8), +.BR sd_notify (3), +.BR system.service (5). +. +. +.\" ---------------------------------------------------------------------------- +.SH AUTHORS +.\" ---------------------------------------------------------------------------- +. +\fImultipath-tools\fR was developed by Christophe Varoqui +and others. +.\" EOF diff --git a/multipathd/multipathd.service b/multipathd/multipathd.service new file mode 100644 index 0000000..ba24983 --- /dev/null +++ b/multipathd/multipathd.service @@ -0,0 +1,23 @@ +[Unit] +Description=Device-Mapper Multipath Device Controller +Wants=systemd-udev-trigger.service systemd-udev-settle.service +Before=iscsi.service iscsid.service lvm2-activation-early.service +Before=local-fs-pre.target blk-availability.service +After=multipathd.socket systemd-udev-trigger.service systemd-udev-settle.service +DefaultDependencies=no +Conflicts=shutdown.target +ConditionKernelCommandLine=!nompath +ConditionKernelCommandLine=!multipath=off + +[Service] +Type=notify +NotifyAccess=main +LimitCORE=infinity +ExecStartPre=-/sbin/modprobe -a scsi_dh_alua scsi_dh_emc scsi_dh_rdac dm-multipath +ExecStart=/sbin/multipathd -d -s +ExecReload=/sbin/multipathd reconfigure +TasksMax=infinity + +[Install] +WantedBy=sysinit.target +Also=multipathd.socket diff --git a/multipathd/multipathd.socket b/multipathd/multipathd.socket new file mode 100644 index 0000000..0ed4a1f --- /dev/null +++ b/multipathd/multipathd.socket @@ -0,0 +1,10 @@ +[Unit] +Description=multipathd control socket +DefaultDependencies=no +Before=sockets.target + +[Socket] +ListenStream=@/org/kernel/linux/storage/multipathd + +[Install] +WantedBy=sockets.target diff --git a/multipathd/pidfile.c b/multipathd/pidfile.c new file mode 100644 index 0000000..cc0fbec --- /dev/null +++ b/multipathd/pidfile.c @@ -0,0 +1,65 @@ +#include /* for pid_t */ +#include /* for open */ +#include /* for EACCESS and EAGAIN */ +#include /* for snprintf() */ +#include /* for memset() */ +#include /* for ftruncate() */ +#include /* for fcntl() */ + +#include "debug.h" + +#include "pidfile.h" + +int pidfile_create(const char *pidFile, pid_t pid) +{ + char buf[20]; + struct flock lock; + int fd, value; + + if((fd = open(pidFile, O_WRONLY | O_CREAT, + (S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH))) < 0) { + condlog(0, "Cannot open pidfile [%s], error was [%s]", + pidFile, strerror(errno)); + return -errno; + } + lock.l_type = F_WRLCK; + lock.l_start = 0; + lock.l_whence = SEEK_SET; + lock.l_len = 0; + + if (fcntl(fd, F_SETLK, &lock) < 0) { + if (errno != EACCES && errno != EAGAIN) + condlog(0, "Cannot lock pidfile [%s], error was [%s]", + pidFile, strerror(errno)); + else + condlog(0, "process is already running"); + goto fail; + } + if (ftruncate(fd, 0) < 0) { + condlog(0, "Cannot truncate pidfile [%s], error was [%s]", + pidFile, strerror(errno)); + goto fail; + } + memset(buf, 0, sizeof(buf)); + snprintf(buf, sizeof(buf)-1, "%u", pid); + if (write(fd, buf, strlen(buf)) != (ssize_t)strlen(buf)) { + condlog(0, "Cannot write pid to pidfile [%s], error was [%s]", + pidFile, strerror(errno)); + goto fail; + } + if ((value = fcntl(fd, F_GETFD, 0)) < 0) { + condlog(0, "Cannot get close-on-exec flag from pidfile [%s], " + "error was [%s]", pidFile, strerror(errno)); + goto fail; + } + value |= FD_CLOEXEC; + if (fcntl(fd, F_SETFD, value) < 0) { + condlog(0, "Cannot set close-on-exec flag from pidfile [%s], " + "error was [%s]", pidFile, strerror(errno)); + goto fail; + } + return fd; +fail: + close(fd); + return -errno; +} diff --git a/multipathd/pidfile.h b/multipathd/pidfile.h new file mode 100644 index 0000000..d308892 --- /dev/null +++ b/multipathd/pidfile.h @@ -0,0 +1 @@ +int pidfile_create(const char *pidFile, pid_t pid); diff --git a/multipathd/uxclnt.c b/multipathd/uxclnt.c new file mode 100644 index 0000000..a76f8e2 --- /dev/null +++ b/multipathd/uxclnt.c @@ -0,0 +1,147 @@ +/* + * Original author : tridge@samba.org, January 2002 + * + * Copyright (c) 2005 Christophe Varoqui + * Copyright (c) 2005 Benjamin Marzinski, Redhat + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "mpath_cmd.h" +#include "uxsock.h" +#include "memory.h" +#include "defaults.h" + +#include "vector.h" +#include "cli.h" +#include "uxclnt.h" + +static void print_reply(char *s) +{ + if (!s) + return; + + if (isatty(1)) { + printf("%s", s); + return; + } + /* strip ANSI color markers */ + while (*s != '\0') { + if ((*s == 0x1b) && (*(s+1) == '[')) + while ((*s++ != 'm') && (*s != '\0')) {}; + putchar(*s++); + } +} + +static int need_quit(char *str, size_t len) +{ + char *ptr, *start; + size_t trimed_len = len; + + for (ptr = str; trimed_len && isspace(*ptr); + trimed_len--, ptr++) + ; + + start = ptr; + + for (ptr = str + len - 1; trimed_len && isspace(*ptr); + trimed_len--, ptr--) + ; + + if ((trimed_len == 4 && !strncmp(start, "exit", 4)) || + (trimed_len == 4 && !strncmp(start, "quit", 4))) + return 1; + + return 0; +} + +/* + * process the client + */ +static void process(int fd, unsigned int timeout) +{ + char *line; + char *reply; + int ret; + + cli_init(); + rl_readline_name = "multipathd"; + rl_completion_entry_function = key_generator; + while ((line = readline("multipathd> "))) { + size_t llen = strlen(line); + + if (!llen) { + free(line); + continue; + } + + if (need_quit(line, llen)) + break; + + if (send_packet(fd, line) != 0) break; + ret = recv_packet(fd, &reply, timeout); + if (ret != 0) break; + + print_reply(reply); + + if (line && *line) + add_history(line); + + free(line); + FREE(reply); + } +} + +static int process_req(int fd, char * inbuf, unsigned int timeout) +{ + char *reply; + int ret; + + if (send_packet(fd, inbuf) != 0) { + printf("cannot send packet\n"); + return 1; + } + ret = recv_packet(fd, &reply, timeout); + if (ret < 0) { + if (ret == -ETIMEDOUT) + printf("timeout receiving packet\n"); + else + printf("error %d receiving packet\n", ret); + return 1; + } else { + printf("%s", reply); + ret = (strcmp(reply, "fail\n") == 0); + FREE(reply); + return ret; + } +} + +/* + * entry point + */ +int uxclnt(char * inbuf, unsigned int timeout) +{ + int fd, ret = 0; + + fd = mpath_connect(); + if (fd == -1) + exit(1); + + if (inbuf) + ret = process_req(fd, inbuf, timeout); + else + process(fd, timeout); + mpath_disconnect(fd); + return ret; +} diff --git a/multipathd/uxclnt.h b/multipathd/uxclnt.h new file mode 100644 index 0000000..8e2cdce --- /dev/null +++ b/multipathd/uxclnt.h @@ -0,0 +1 @@ +int uxclnt(char * inbuf, unsigned int timeout); diff --git a/multipathd/uxlsnr.c b/multipathd/uxlsnr.c new file mode 100644 index 0000000..1c5ce9d --- /dev/null +++ b/multipathd/uxlsnr.c @@ -0,0 +1,453 @@ +/* + * Original author : tridge@samba.org, January 2002 + * + * Copyright (c) 2005 Christophe Varoqui + * Copyright (c) 2005 Benjamin Marzinski, Redhat + */ + +/* + * A simple domain socket listener + */ +#define _GNU_SOURCE +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "checkers.h" +#include "memory.h" +#include "debug.h" +#include "vector.h" +#include "structs.h" +#include "structs_vec.h" +#include "uxsock.h" +#include "defaults.h" +#include "config.h" +#include "mpath_cmd.h" +#include "time-util.h" + +#include "main.h" +#include "cli.h" +#include "uxlsnr.h" + +static struct timespec sleep_time = {5, 0}; + +struct client { + struct list_head node; + int fd; +}; + +#define MIN_POLLS 1023 + +static LIST_HEAD(clients); +static pthread_mutex_t client_lock = PTHREAD_MUTEX_INITIALIZER; +static struct pollfd *polls; +static int notify_fd = -1; +static char *watch_config_dir; + +static bool _socket_client_is_root(int fd); + +static bool _socket_client_is_root(int fd) +{ + socklen_t len = 0; + struct ucred uc; + + len = sizeof(struct ucred); + if ((fd >= 0) && + (getsockopt(fd, SOL_SOCKET, SO_PEERCRED, &uc, &len) == 0) && + (uc.uid == 0)) + return true; + + /* Treat error as not root client */ + return false; +} + +/* + * handle a new client joining + */ +static void new_client(int ux_sock) +{ + struct client *c; + struct sockaddr addr; + socklen_t len = sizeof(addr); + int fd; + + fd = accept(ux_sock, &addr, &len); + + if (fd == -1) + return; + + c = (struct client *)MALLOC(sizeof(*c)); + if (!c) { + close(fd); + return; + } + memset(c, 0, sizeof(*c)); + INIT_LIST_HEAD(&c->node); + c->fd = fd; + + /* put it in our linked list */ + pthread_mutex_lock(&client_lock); + list_add_tail(&c->node, &clients); + pthread_mutex_unlock(&client_lock); +} + +/* + * kill off a dead client + */ +static void _dead_client(struct client *c) +{ + int fd = c->fd; + list_del_init(&c->node); + c->fd = -1; + FREE(c); + close(fd); +} + +static void dead_client(struct client *c) +{ + pthread_cleanup_push(cleanup_lock, &client_lock); + pthread_mutex_lock(&client_lock); + _dead_client(c); + pthread_cleanup_pop(1); +} + +static void free_polls (void) +{ + if (polls) + FREE(polls); +} + +static void check_timeout(struct timespec start_time, char *inbuf, + unsigned int timeout) +{ + struct timespec diff_time, end_time; + + if (start_time.tv_sec) { + unsigned long msecs; + + get_monotonic_time(&end_time); + timespecsub(&end_time, &start_time, &diff_time); + msecs = diff_time.tv_sec * 1000 + + diff_time.tv_nsec / (1000 * 1000); + if (msecs > timeout) + condlog(2, "cli cmd '%s' timeout reached " + "after %lu.%06lu secs", inbuf, + diff_time.tv_sec, diff_time.tv_nsec / 1000); + } +} + +void uxsock_cleanup(void *arg) +{ + struct client *client_loop; + struct client *client_tmp; + long ux_sock = (long)arg; + + close(ux_sock); + close(notify_fd); + free(watch_config_dir); + + pthread_mutex_lock(&client_lock); + list_for_each_entry_safe(client_loop, client_tmp, &clients, node) { + _dead_client(client_loop); + } + pthread_mutex_unlock(&client_lock); + + cli_exit(); + free_polls(); +} + +struct watch_descriptors { + int conf_wd; + int dir_wd; +}; + +/* failing to set the watch descriptor is o.k. we just miss a warning + * message */ +static void reset_watch(int notify_fd, struct watch_descriptors *wds, + unsigned int *sequence_nr) +{ + struct config *conf; + int dir_reset = 0; + int conf_reset = 0; + + if (notify_fd == -1) + return; + + conf = get_multipath_config(); + /* instead of repeatedly try to reset the inotify watch if + * the config directory or multipath.conf isn't there, just + * do it once per reconfigure */ + if (*sequence_nr != conf->sequence_nr) { + *sequence_nr = conf->sequence_nr; + if (wds->conf_wd == -1) + conf_reset = 1; + if (!watch_config_dir || !conf->config_dir || + strcmp(watch_config_dir, conf->config_dir)) { + dir_reset = 1; + if (watch_config_dir) + free(watch_config_dir); + if (conf->config_dir) + watch_config_dir = strdup(conf->config_dir); + else + watch_config_dir = NULL; + } else if (wds->dir_wd == -1) + dir_reset = 1; + } + put_multipath_config(conf); + + if (dir_reset) { + if (wds->dir_wd != -1) { + inotify_rm_watch(notify_fd, wds->dir_wd); + wds->dir_wd = -1; + } + if (watch_config_dir) { + wds->dir_wd = inotify_add_watch(notify_fd, + watch_config_dir, + IN_CLOSE_WRITE | + IN_DELETE | IN_ONLYDIR); + if (wds->dir_wd == -1) + condlog(3, "didn't set up notifications on %s: %m", watch_config_dir); + } + } + if (conf_reset) { + wds->conf_wd = inotify_add_watch(notify_fd, DEFAULT_CONFIGFILE, + IN_CLOSE_WRITE); + if (wds->conf_wd == -1) + condlog(3, "didn't set up notifications on /etc/multipath.conf: %m"); + } + return; +} + +static void handle_inotify(int fd, struct watch_descriptors *wds) +{ + char buff[1024] + __attribute__ ((aligned(__alignof__(struct inotify_event)))); + const struct inotify_event *event; + ssize_t len; + char *ptr; + int got_notify = 0; + + for (;;) { + len = read(fd, buff, sizeof(buff)); + if (len <= 0) { + if (len < 0 && errno != EAGAIN) { + condlog(3, "error reading from inotify_fd"); + if (wds->conf_wd != -1) + inotify_rm_watch(fd, wds->conf_wd); + if (wds->dir_wd != -1) + inotify_rm_watch(fd, wds->dir_wd); + wds->conf_wd = wds->dir_wd = -1; + } + break; + } + + got_notify = 1; + for (ptr = buff; ptr < buff + len; + ptr += sizeof(struct inotify_event) + event->len) { + event = (const struct inotify_event *) ptr; + + if (event->mask & IN_IGNORED) { + /* multipathd.conf may have been overwritten. + * Try once to reset the notification */ + if (wds->conf_wd == event->wd) + wds->conf_wd = inotify_add_watch(notify_fd, DEFAULT_CONFIGFILE, IN_CLOSE_WRITE); + else if (wds->dir_wd == event->wd) + wds->dir_wd = -1; + } + } + } + if (got_notify) + condlog(1, "Multipath configuration updated.\nReload multipathd for changes to take effect"); +} + +/* + * entry point + */ +void * uxsock_listen(uxsock_trigger_fn uxsock_trigger, long ux_sock, + void * trigger_data) +{ + int rlen; + char *inbuf; + char *reply; + sigset_t mask; + int old_clients = MIN_POLLS; + /* conf->sequence_nr will be 1 when uxsock_listen is first called */ + unsigned int sequence_nr = 0; + struct watch_descriptors wds = { .conf_wd = -1, .dir_wd = -1 }; + + condlog(3, "uxsock: startup listener"); + polls = (struct pollfd *)MALLOC((MIN_POLLS + 2) * sizeof(struct pollfd)); + if (!polls) { + condlog(0, "uxsock: failed to allocate poll fds"); + exit_daemon(); + } + notify_fd = inotify_init1(IN_NONBLOCK); + if (notify_fd == -1) /* it's fine if notifications fail */ + condlog(3, "failed to start up configuration notifications"); + sigfillset(&mask); + sigdelset(&mask, SIGINT); + sigdelset(&mask, SIGTERM); + sigdelset(&mask, SIGHUP); + sigdelset(&mask, SIGUSR1); + while (1) { + struct client *c, *tmp; + int i, poll_count, num_clients; + + /* setup for a poll */ + pthread_mutex_lock(&client_lock); + num_clients = 0; + list_for_each_entry(c, &clients, node) { + num_clients++; + } + if (num_clients != old_clients) { + struct pollfd *new; + if (num_clients <= MIN_POLLS && old_clients > MIN_POLLS) { + new = REALLOC(polls, (2 + MIN_POLLS) * + sizeof(struct pollfd)); + } else if (num_clients <= MIN_POLLS && old_clients <= MIN_POLLS) { + new = polls; + } else { + new = REALLOC(polls, (2 + num_clients) * + sizeof(struct pollfd)); + } + if (!new) { + pthread_mutex_unlock(&client_lock); + condlog(0, "%s: failed to realloc %d poll fds", + "uxsock", 2 + num_clients); + sched_yield(); + continue; + } + old_clients = num_clients; + polls = new; + } + polls[0].fd = ux_sock; + polls[0].events = POLLIN; + + reset_watch(notify_fd, &wds, &sequence_nr); + if (notify_fd == -1 || (wds.conf_wd == -1 && wds.dir_wd == -1)) + polls[1].fd = -1; + else + polls[1].fd = notify_fd; + polls[1].events = POLLIN; + + /* setup the clients */ + i = 2; + list_for_each_entry(c, &clients, node) { + polls[i].fd = c->fd; + polls[i].events = POLLIN; + i++; + } + pthread_mutex_unlock(&client_lock); + + /* most of our life is spent in this call */ + poll_count = ppoll(polls, i, &sleep_time, &mask); + + handle_signals(false); + if (poll_count == -1) { + if (errno == EINTR) { + handle_signals(true); + continue; + } + + /* something went badly wrong! */ + condlog(0, "uxsock: poll failed with %d", errno); + exit_daemon(); + break; + } + + if (poll_count == 0) { + handle_signals(true); + continue; + } + + /* + * Client connection. We shouldn't answer while we're + * configuring - nothing may be configured yet. + * But we can't wait forever either, because this thread + * must handle signals. So wait a short while only. + */ + if (wait_for_state_change_if(DAEMON_CONFIGURE, 10) + == DAEMON_CONFIGURE) { + handle_signals(false); + continue; + } + + /* see if a client wants to speak to us */ + for (i = 2; i < num_clients + 2; i++) { + if (polls[i].revents & POLLIN) { + struct timespec start_time; + + c = NULL; + pthread_mutex_lock(&client_lock); + list_for_each_entry(tmp, &clients, node) { + if (tmp->fd == polls[i].fd) { + c = tmp; + break; + } + } + pthread_mutex_unlock(&client_lock); + if (!c) { + condlog(4, "cli%d: new fd %d", + i, polls[i].fd); + continue; + } + get_monotonic_time(&start_time); + if (recv_packet_from_client(c->fd, &inbuf, + uxsock_timeout) + != 0) { + dead_client(c); + continue; + } + if (!inbuf) { + condlog(4, "recv_packet_from_client " + "get null request"); + continue; + } + condlog(4, "cli[%d]: Got request [%s]", + i, inbuf); + uxsock_trigger(inbuf, &reply, &rlen, + _socket_client_is_root(c->fd), + trigger_data); + if (reply) { + if (send_packet(c->fd, + reply) != 0) { + dead_client(c); + } else { + condlog(4, "cli[%d]: " + "Reply [%d bytes]", + i, rlen); + } + FREE(reply); + reply = NULL; + } + check_timeout(start_time, inbuf, + uxsock_timeout); + FREE(inbuf); + } + } + /* see if we got a non-fatal signal */ + handle_signals(true); + + /* see if we got a new client */ + if (polls[0].revents & POLLIN) { + new_client(ux_sock); + } + + /* handle inotify events on config files */ + if (polls[1].revents & POLLIN) + handle_inotify(notify_fd, &wds); + } + + return NULL; +} diff --git a/multipathd/uxlsnr.h b/multipathd/uxlsnr.h new file mode 100644 index 0000000..18f008d --- /dev/null +++ b/multipathd/uxlsnr.h @@ -0,0 +1,12 @@ +#ifndef _UXLSNR_H +#define _UXLSNR_H + +#include + +typedef int (uxsock_trigger_fn)(char *, char **, int *, bool, void *); + +void uxsock_cleanup(void *arg); +void *uxsock_listen(uxsock_trigger_fn uxsock_trigger, long ux_sock, + void * trigger_data); + +#endif diff --git a/multipathd/waiter.c b/multipathd/waiter.c new file mode 100644 index 0000000..e645766 --- /dev/null +++ b/multipathd/waiter.c @@ -0,0 +1,231 @@ +/* + * Copyright (c) 2004, 2005 Christophe Varoqui + * Copyright (c) 2005 Kiyoshi Ueda, NEC + * Copyright (c) 2005 Benjamin Marzinski, Redhat + * Copyright (c) 2005 Edward Goggin, EMC + */ +#include +#include +#include +#include +#include +#include + +#include "util.h" +#include "vector.h" +#include "memory.h" +#include "checkers.h" +#include "config.h" +#include "structs.h" +#include "structs_vec.h" +#include "devmapper.h" +#include "debug.h" +#include "lock.h" +#include "waiter.h" +#include "main.h" + +pthread_attr_t waiter_attr; +struct mutex_lock waiter_lock = { .mutex = PTHREAD_MUTEX_INITIALIZER }; + +static struct event_thread *alloc_waiter (void) +{ + + struct event_thread *wp; + + wp = (struct event_thread *)MALLOC(sizeof(struct event_thread)); + memset(wp, 0, sizeof(struct event_thread)); + + return wp; +} + +static void free_waiter (void *data) +{ + struct event_thread *wp = (struct event_thread *)data; + + if (wp->dmt) + dm_task_destroy(wp->dmt); + + rcu_unregister_thread(); + FREE(wp); +} + +void stop_waiter_thread (struct multipath *mpp) +{ + pthread_t thread; + + if (mpp->waiter == (pthread_t)0) { + condlog(3, "%s: event checker thread already stopped", + mpp->alias); + return; + } + /* Don't cancel yourself. __setup_multipath is called by + by the waiter thread, and may remove a multipath device */ + if (pthread_equal(mpp->waiter, pthread_self())) + return; + + condlog(3, "%s: stop event checker thread (%lu)", mpp->alias, + mpp->waiter); + thread = mpp->waiter; + mpp->waiter = (pthread_t)0; + pthread_cleanup_push(cleanup_lock, &waiter_lock); + lock(&waiter_lock); + pthread_kill(thread, SIGUSR2); + pthread_cancel(thread); + lock_cleanup_pop(&waiter_lock); +} + +/* + * returns the reschedule delay + * negative means *stop* + */ +static int waiteventloop (struct event_thread *waiter) +{ + sigset_t set, oldset; + int event_nr; + int r; + + if (!waiter->event_nr) + waiter->event_nr = dm_geteventnr(waiter->mapname); + + if (!(waiter->dmt = libmp_dm_task_create(DM_DEVICE_WAITEVENT))) { + condlog(0, "%s: devmap event #%i dm_task_create error", + waiter->mapname, waiter->event_nr); + return 1; + } + + if (!dm_task_set_name(waiter->dmt, waiter->mapname)) { + condlog(0, "%s: devmap event #%i dm_task_set_name error", + waiter->mapname, waiter->event_nr); + dm_task_destroy(waiter->dmt); + waiter->dmt = NULL; + return 1; + } + + if (waiter->event_nr && !dm_task_set_event_nr(waiter->dmt, + waiter->event_nr)) { + condlog(0, "%s: devmap event #%i dm_task_set_event_nr error", + waiter->mapname, waiter->event_nr); + dm_task_destroy(waiter->dmt); + waiter->dmt = NULL; + return 1; + } + + dm_task_no_open_count(waiter->dmt); + + /* wait */ + sigemptyset(&set); + sigaddset(&set, SIGUSR2); + pthread_sigmask(SIG_UNBLOCK, &set, &oldset); + + pthread_testcancel(); + r = dm_task_run(waiter->dmt); + pthread_testcancel(); + + pthread_sigmask(SIG_SETMASK, &oldset, NULL); + dm_task_destroy(waiter->dmt); + waiter->dmt = NULL; + + if (!r) { /* wait interrupted by signal. check for cancellation */ + pthread_cleanup_push(cleanup_lock, &waiter_lock); + lock(&waiter_lock); + pthread_testcancel(); + lock_cleanup_pop(&waiter_lock); + return 1; /* If we weren't cancelled, just reschedule */ + } + + waiter->event_nr++; + + /* + * upon event ... + */ + while (1) { + condlog(3, "%s: devmap event #%i", + waiter->mapname, waiter->event_nr); + + /* + * event might be : + * + * 1) a table reload, which means our mpp structure is + * obsolete : refresh it through update_multipath() + * 2) a path failed by DM : mark as such through + * update_multipath() + * 3) map has gone away : stop the thread. + * 4) a path reinstate : nothing to do + * 5) a switch group : nothing to do + */ + pthread_cleanup_push(cleanup_lock, &waiter->vecs->lock); + lock(&waiter->vecs->lock); + pthread_testcancel(); + r = update_multipath(waiter->vecs, waiter->mapname, 1); + lock_cleanup_pop(waiter->vecs->lock); + + if (r) { + condlog(2, "%s: event checker exit", + waiter->mapname); + return -1; /* stop the thread */ + } + + event_nr = dm_geteventnr(waiter->mapname); + + if (waiter->event_nr == event_nr) + return 1; /* upon problem reschedule 1s later */ + + waiter->event_nr = event_nr; + } + return -1; /* never reach there */ +} + +static void *waitevent (void *et) +{ + int r; + struct event_thread *waiter; + + mlockall(MCL_CURRENT | MCL_FUTURE); + + waiter = (struct event_thread *)et; + pthread_cleanup_push(free_waiter, et); + + rcu_register_thread(); + while (1) { + r = waiteventloop(waiter); + + if (r < 0) + break; + + sleep(r); + } + + pthread_cleanup_pop(1); + return NULL; +} + +int start_waiter_thread (struct multipath *mpp, struct vectors *vecs) +{ + struct event_thread *wp; + + if (!mpp) + return 0; + + wp = alloc_waiter(); + + if (!wp) + goto out; + + strlcpy(wp->mapname, mpp->alias, WWID_SIZE); + wp->vecs = vecs; + + if (pthread_create(&wp->thread, &waiter_attr, waitevent, wp)) { + condlog(0, "%s: cannot create event checker", wp->mapname); + goto out1; + } + mpp->waiter = wp->thread; + condlog(3, "%s: event checker started", wp->mapname); + + return 0; +out1: + free_waiter(wp); + mpp->waiter = (pthread_t)0; +out: + condlog(0, "failed to start waiter thread"); + return 1; +} diff --git a/multipathd/waiter.h b/multipathd/waiter.h new file mode 100644 index 0000000..28e0f6d --- /dev/null +++ b/multipathd/waiter.h @@ -0,0 +1,17 @@ +#ifndef _WAITER_H +#define _WAITER_H + +extern pthread_attr_t waiter_attr; + +struct event_thread { + struct dm_task *dmt; + pthread_t thread; + int event_nr; + char mapname[WWID_SIZE]; + struct vectors *vecs; +}; + +void stop_waiter_thread (struct multipath *mpp); +int start_waiter_thread (struct multipath *mpp, struct vectors *vecs); + +#endif /* _WAITER_H */ diff --git a/tests/Makefile b/tests/Makefile new file mode 100644 index 0000000..77ff324 --- /dev/null +++ b/tests/Makefile @@ -0,0 +1,96 @@ +include ../Makefile.inc + +# Test special behavior of gcc 4.8 with nested initializers +# gcc 4.8 compiles blacklist.c only with -Wno-missing-field-initializers +TEST_MISSING_INITIALIZERS = $(shell \ + echo 'struct A {int a, b;}; struct B {struct A a; int b;} b = {.a.a=1};' | \ + $(CC) -c -Werror -Wmissing-field-initializers -o /dev/null -xc - >/dev/null 2>&1 \ + || echo -Wno-missing-field-initializers) +W_MISSING_INITIALIZERS := $(call TEST_MISSING_INITIALIZERS) + +CFLAGS += $(BIN_CFLAGS) -I$(multipathdir) -I$(mpathcmddir) \ + -Wno-unused-parameter $(W_MISSING_INITIALIZERS) +LIBDEPS += -L$(multipathdir) -lmultipath -lcmocka + +TESTS := uevent parser util dmevents hwtable blacklist unaligned vpd pgpolicy \ + alias directio + +.SILENT: $(TESTS:%=%.o) +.PRECIOUS: $(TESTS:%=%-test) + +all: $(TESTS:%=%.out) + +# test-specific compiler flags +# XYZ-test_FLAGS: Additional compiler flags for this test + +ifneq ($(wildcard directio_test_dev),) +DIO_TEST_DEV = $(shell sed -n -e 's/^[[:space:]]*DIO_TEST_DEV[[:space:]]*=[[:space:]]*\([^[:space:]\#]\+\).*/\1/p' < directio_test_dev) +endif +ifneq ($(DIO_TEST_DEV),) +directio-test_FLAGS := -DDIO_TEST_DEV=\"$(DIO_TEST_DEV)\" +endif + +# test-specific linker flags +# XYZ-test_TESTDEPS: test libraries containing __wrap_xyz functions +# XYZ-test_OBJDEPS: object files from libraries to link in explicitly +# That may be necessary if functions called from the object file are wrapped +# (wrapping works only for symbols which are undefined after processing a +# linker input file). +# XYZ-test_LIBDEPS: Additional libs to link for this test + +dmevents-test_LIBDEPS = -lpthread -ldevmapper -lurcu +hwtable-test_TESTDEPS := test-lib.o +hwtable-test_OBJDEPS := ../libmultipath/discovery.o ../libmultipath/blacklist.o \ + ../libmultipath/prio.o ../libmultipath/callout.o ../libmultipath/structs.o +hwtable-test_LIBDEPS := -ludev -lpthread -ldl +blacklist-test_TESTDEPS := test-log.o +blacklist-test_OBJDEPS := ../libmultipath/blacklist.o +blacklist-test_LIBDEPS := -ludev +vpd-test_OBJDEPS := ../libmultipath/discovery.o +vpd-test_LIBDEPS := -ludev -lpthread -ldl +alias-test_TESTDEPS := test-log.o +alias-test_LIBDEPS := -lpthread -ldl +ifneq ($(DIO_TEST_DEV),) +directio-test_LIBDEPS := -laio +endif + +%.o: %.c + $(CC) $(CFLAGS) $($*-test_FLAGS) -c -o $@ $< + +lib/libchecktur.so: + mkdir lib + ln -t lib ../libmultipath/{checkers,prioritizers,foreign}/*.so + +%.out: %-test lib/libchecktur.so + @echo == running $< == + @LD_LIBRARY_PATH=$(multipathdir):$(mpathcmddir) ./$< >$@ + +OBJS = $(TESTS:%=%.o) test-lib.o + +test_clean: + $(RM) $(TESTS:%=%.out) + +clean: test_clean dep_clean + $(RM) $(TESTS:%=%-test) $(OBJS) *.o.wrap + $(RM) -rf lib + +.SECONDARY: $(OBJS) + +include $(wildcard $(OBJS:.o=.d)) + +dep_clean: + $(RM) $(OBJS:.o=.d) + +%.o.wrap: %.c + @sed -n 's/^.*__wrap_\([a-zA-Z0-9_]*\).*$$/-Wl,--wrap=\1/p' $< | \ + sort -u | tr '\n' ' ' >$@ + + +# COLON will get expanded during second expansion below +COLON:=: +.SECONDEXPANSION: +%-test: %.o %.o.wrap $$($$@_OBJDEPS) $$($$@_TESTDEPS) $$($$@_TESTDEPS$$(COLON).o=.o.wrap) \ + $(multipathdir)/libmultipath.so Makefile + $(CC) $(CFLAGS) -o $@ $(LDFLAGS) $< $($@_TESTDEPS) $($@_OBJDEPS) \ + $(LIBDEPS) $($@_LIBDEPS) \ + $(shell cat $<.wrap) $(foreach dep,$($@_TESTDEPS),$(shell cat $(dep).wrap)) diff --git a/tests/README.md b/tests/README.md new file mode 100644 index 0000000..6438a82 --- /dev/null +++ b/tests/README.md @@ -0,0 +1,72 @@ +# multipath-tools unit tests + +Unit tests are built and run by running `make test` in the top directory, +or simply `make` in the `tests` subdirectory. The test output is saved as +`.out`. The test programs are called `-test`, and can +be run standalone e.g. for debugging purposes. + +## Notes on individual tests + +### Tests that require root permissions + +The following tests must be run as root, otherwise some test items will be +skipped because of missing permissions, or the test will fail outright: + + * `dmevents` + * `directio` (if `DIO_TEST_DEV` is set, see below) + +To run these tests, after building the tests as non-root user, change to the +`tests` directory and run `make test-clean`; then run `make` again as root. + +### directio test + +This test includes test items that require a access to a block device. The +device will be opened in read-only mode; you don't need to worry about data +loss. However, the user needs to specify a device to be used. Set the +environment variable `DIO_TEST_DEV` to the path of the device. +Alternatively, create a file `directio_test_dev` under +the `tests` directory containting a single line that sets this environment +variable in Bourne Shell syntax, like this: + + DIO_TEST_DEV=/dev/sdc3 + +After that, run `make directio.out` as root in the `tests` directory to +perform the test. + +## Adding tests + +The unit tests are based on the [cmocka test framework](https://cmocka.org/), +and make use of cmocka's "mock objects" feature to simulate how the code behaves +for different input values. cmocka achieves this by modifying the symbol +lookup at link time, substituting "wrapper functions" for the originally +called function. The Makefile contains code to make sure that `__wrap_xyz()` +wrapper functions are automatically passed to the linker with matching +`-Wl,--wrap` command line arguments, so that tests are correctly rebuilt if +wrapper functions are added or removed. + +### Making sure symbol wrapping works: OBJDEPS + +Special care must be taken to wrap function calls inside a library. Suppose you want +to wrap a function which is both defined in libmultipath and called from other +functions in libmultipath, such as `checker_check()`. When `libmultipath.so` is +created, the linker resolves calls to `checker_check()` inside the `.so` +file. When later the test executable is built by linking the test object file with +`libmultipath.so`, these calls can't be wrapped any more, because they've +already been resolved, and wrapping works only for *unresolved* symbols. +Therefore, object files from libraries that contain calls to functions +which need to be wrapped must be explicitly listed on the linker command line +in order to make the wrapping work. To enforce this, add these object files to +the `xyz-test_OBJDEPS` variable in the Makefile. + +### Using wrapper function libraries: TESTDEPS + +Some wrapper functions are useful in multiple tests. These are maintained in +separate input files, such as `test-lib.c` or `test-log.c`. List these files +in the `xyz-test_TESTDEPS` variable for your test program if you need these +wrappers. + +### Specifying library dependencies: LIBDEPS + +In order to keep the tests lean, not all libraries that libmultipath +normally pulls in are used for every test. Add libraries you need (such as +`-lpthread`) to the `xyz-test_LIBDEPS` variable. diff --git a/tests/alias.c b/tests/alias.c new file mode 100644 index 0000000..30414db --- /dev/null +++ b/tests/alias.c @@ -0,0 +1,744 @@ +#include +#include +#include +#include +#include "util.h" +#include "alias.h" +#include "test-log.h" +#include + +#include "globals.c" +#include "../libmultipath/alias.c" + +#if INT_MAX == 0x7fffffff +/* user_friendly_name for map #INT_MAX */ +#define MPATH_ID_INT_MAX "fxshrxw" +/* ... and one less */ +#define MPATH_ID_INT_MAX_m1 "fxshrxv" +/* ... and one more */ +#define MPATH_ID_INT_MAX_p1 "fxshrxx" +#endif + +void __wrap_rewind(FILE *stream) +{} + +char *__wrap_fgets(char *buf, int n, FILE *stream) +{ + char *val = mock_ptr_type(char *); + if (!val) + return NULL; + strlcpy(buf, val, n); + return buf; +} + +static int __set_errno(int err) +{ + if (err >= 0) { + errno = 0; + return err; + } else { + errno = -err; + return -1; + } +} + +off_t __wrap_lseek(int fd, off_t offset, int whence) +{ + return __set_errno(mock_type(int)); + +} + +ssize_t __wrap_write(int fd, const void *buf, size_t count) +{ + check_expected(count); + check_expected(buf); + return __set_errno(mock_type(int)); +} + +int __wrap_ftruncate(int fd, off_t length) +{ + check_expected(length); + return __set_errno(mock_type(int)); +} + +static void fd_mpatha(void **state) +{ + char buf[32]; + int rc; + + rc = format_devname(buf, 1, sizeof(buf), "FOO"); + assert_int_equal(rc, 4); + assert_string_equal(buf, "FOOa"); +} + +static void fd_mpathz(void **state) +{ + /* This also tests a "short" buffer, see fd_mpath_short1 */ + char buf[5]; + int rc; + + rc = format_devname(buf, 26, sizeof(buf), "FOO"); + assert_int_equal(rc, 4); + assert_string_equal(buf, "FOOz"); +} + +static void fd_mpathaa(void **state) +{ + char buf[32]; + int rc; + + rc = format_devname(buf, 26 + 1, sizeof(buf), "FOO"); + assert_int_equal(rc, 5); + assert_string_equal(buf, "FOOaa"); +} + +static void fd_mpathzz(void **state) +{ + char buf[32]; + int rc; + + rc = format_devname(buf, 26*26 + 26, sizeof(buf), "FOO"); + assert_int_equal(rc, 5); + assert_string_equal(buf, "FOOzz"); +} + +static void fd_mpathaaa(void **state) +{ + char buf[32]; + int rc; + + rc = format_devname(buf, 26*26 + 27, sizeof(buf), "FOO"); + assert_int_equal(rc, 6); + assert_string_equal(buf, "FOOaaa"); +} + +static void fd_mpathzzz(void **state) +{ + char buf[32]; + int rc; + + rc = format_devname(buf, 26*26*26 + 26*26 + 26, sizeof(buf), "FOO"); + assert_int_equal(rc, 6); + assert_string_equal(buf, "FOOzzz"); +} + +static void fd_mpathaaaa(void **state) +{ + char buf[32]; + int rc; + + rc = format_devname(buf, 26*26*26 + 26*26 + 27, sizeof(buf), "FOO"); + assert_int_equal(rc, 7); + assert_string_equal(buf, "FOOaaaa"); +} + +static void fd_mpathzzzz(void **state) +{ + char buf[32]; + int rc; + + rc = format_devname(buf, 26*26*26*26 + 26*26*26 + 26*26 + 26, + sizeof(buf), "FOO"); + assert_int_equal(rc, 7); + assert_string_equal(buf, "FOOzzzz"); +} + +#ifdef MPATH_ID_INT_MAX +static void fd_mpath_max(void **state) +{ + char buf[32]; + int rc; + + rc = format_devname(buf, INT_MAX, sizeof(buf), ""); + assert_int_equal(rc, strlen(MPATH_ID_INT_MAX)); + assert_string_equal(buf, MPATH_ID_INT_MAX); +} +#endif + +static void fd_mpath_max1(void **state) +{ + char buf[32]; + int rc; + + rc = format_devname(buf, INT_MIN, sizeof(buf), ""); + assert_int_equal(rc, -1); +} + +static void fd_mpath_short(void **state) +{ + char buf[4]; + int rc; + + rc = format_devname(buf, 1, sizeof(buf), "FOO"); + assert_int_equal(rc, -1); +} + +static void fd_mpath_short1(void **state) +{ + char buf[5]; + int rc; + + rc = format_devname(buf, 27, sizeof(buf), "FOO"); + assert_int_equal(rc, -1); +} + +static int test_format_devname(void) +{ + const struct CMUnitTest tests[] = { + cmocka_unit_test(fd_mpatha), + cmocka_unit_test(fd_mpathz), + cmocka_unit_test(fd_mpathaa), + cmocka_unit_test(fd_mpathzz), + cmocka_unit_test(fd_mpathaaa), + cmocka_unit_test(fd_mpathzzz), + cmocka_unit_test(fd_mpathaaaa), + cmocka_unit_test(fd_mpathzzzz), +#ifdef MPATH_ID_INT_MAX + cmocka_unit_test(fd_mpath_max), +#endif + cmocka_unit_test(fd_mpath_max1), + cmocka_unit_test(fd_mpath_short), + cmocka_unit_test(fd_mpath_short1), + }; + + return cmocka_run_group_tests(tests, NULL, NULL); +} + +static void sd_mpatha(void **state) +{ + int rc = scan_devname("MPATHa", "MPATH"); + + assert_int_equal(rc, 1); +} + +/* + * Text after whitespace is ignored. But an overlong input + * errors out, even if it's just whitespace. + * It's kind of strange that scan_devname() treats whitespace + * like this. But I'm not sure if some corner case depends + * on this behavior. + */ +static void sd_mpatha_spc(void **state) +{ + int rc = scan_devname("MPATHa 00", "MPATH"); + + assert_int_equal(rc, 1); +} + +static void sd_mpatha_tab(void **state) +{ + int rc = scan_devname("MPATHa\t00", "MPATH"); + + assert_int_equal(rc, 1); +} + +static void sd_overlong(void **state) +{ + int rc = scan_devname("MPATHa ", "MPATH"); + + assert_int_equal(rc, -1); +} + +static void sd_overlong1(void **state) +{ + int rc = scan_devname("MPATHabcdefgh", "MPATH"); + + assert_int_equal(rc, -1); +} + +static void sd_noprefix(void **state) +{ + int rc = scan_devname("MPATHa", NULL); + + assert_int_equal(rc, -1); +} + +static void sd_nomatchprefix(void **state) +{ + int rc = scan_devname("MPATHa", "mpath"); + + assert_int_equal(rc, -1); +} + +static void sd_eq_prefix(void **state) +{ + int rc = scan_devname("MPATH", "MPATH"); + + assert_int_equal(rc, -1); +} + +static void sd_bad_1(void **state) +{ + int rc = scan_devname("MPATH0", "MPATH"); + + assert_int_equal(rc, -1); +} + +static void sd_bad_2(void **state) +{ + int rc = scan_devname("MPATHa0c", "MPATH"); + + assert_int_equal(rc, -1); +} + +#ifdef MPATH_ID_INT_MAX +static void sd_max(void **state) +{ + int rc = scan_devname("MPATH" MPATH_ID_INT_MAX, "MPATH"); + + assert_int_equal(rc, INT_MAX); +} + +static void sd_max_p1(void **state) +{ + int rc = scan_devname("MPATH" MPATH_ID_INT_MAX_p1, "MPATH"); + + assert_int_equal(rc, -1); +} +#endif + +static void sd_fd_many(void **state) +{ + char buf[32]; + int rc, i; + + for (i = 1; i < 5000; i++) { + rc = format_devname(buf, i, sizeof(buf), "MPATH"); + assert_in_range(rc, 6, 8); + rc = scan_devname(buf, "MPATH"); + assert_int_equal(rc, i); + } +} + +static void sd_fd_random(void **state) +{ + char buf[32]; + int rc, i, n; + + srandom(1); + for (i = 1; i < 1000; i++) { + n = random() & 0xffff; + rc = format_devname(buf, n, sizeof(buf), "MPATH"); + assert_in_range(rc, 6, 9); + rc = scan_devname(buf, "MPATH"); + assert_int_equal(rc, n); + } +} + +static int test_scan_devname(void) +{ + const struct CMUnitTest tests[] = { + cmocka_unit_test(sd_mpatha), + cmocka_unit_test(sd_mpatha_spc), + cmocka_unit_test(sd_mpatha_tab), + cmocka_unit_test(sd_overlong), + cmocka_unit_test(sd_overlong1), + cmocka_unit_test(sd_noprefix), + cmocka_unit_test(sd_nomatchprefix), + cmocka_unit_test(sd_eq_prefix), + cmocka_unit_test(sd_bad_1), + cmocka_unit_test(sd_bad_2), +#ifdef MPATH_ID_INT_MAX + cmocka_unit_test(sd_max), + cmocka_unit_test(sd_max_p1), +#endif + cmocka_unit_test(sd_fd_many), + cmocka_unit_test(sd_fd_random), + }; + + return cmocka_run_group_tests(tests, NULL, NULL); +} + +static void lb_empty(void **state) +{ + int rc; + char *alias; + + will_return(__wrap_fgets, NULL); + expect_condlog(3, "No matching wwid [WWID0] in bindings file.\n"); + rc = lookup_binding(NULL, "WWID0", &alias, NULL); + assert_int_equal(rc, 1); + assert_ptr_equal(alias, NULL); +} + +static void lb_match_a(void **state) +{ + int rc; + char *alias; + + will_return(__wrap_fgets, "MPATHa WWID0\n"); + expect_condlog(3, "Found matching wwid [WWID0] in bindings file." + " Setting alias to MPATHa\n"); + rc = lookup_binding(NULL, "WWID0", &alias, "MPATH"); + assert_int_equal(rc, 0); + assert_ptr_not_equal(alias, NULL); + assert_string_equal(alias, "MPATHa"); + free(alias); +} + +static void lb_nomatch_a(void **state) +{ + int rc; + char *alias; + + will_return(__wrap_fgets, "MPATHa WWID0\n"); + will_return(__wrap_fgets, NULL); + expect_condlog(3, "No matching wwid [WWID1] in bindings file.\n"); + rc = lookup_binding(NULL, "WWID1", &alias, "MPATH"); + assert_int_equal(rc, 2); + assert_ptr_equal(alias, NULL); +} + +static void lb_match_c(void **state) +{ + int rc; + char *alias; + + will_return(__wrap_fgets, "MPATHa WWID0\n"); + will_return(__wrap_fgets, "MPATHc WWID1\n"); + expect_condlog(3, "Found matching wwid [WWID1] in bindings file." + " Setting alias to MPATHc\n"); + rc = lookup_binding(NULL, "WWID1", &alias, "MPATH"); + assert_int_equal(rc, 0); + assert_ptr_not_equal(alias, NULL); + assert_string_equal(alias, "MPATHc"); + free(alias); +} + +static void lb_nomatch_a_c(void **state) +{ + int rc; + char *alias; + + will_return(__wrap_fgets, "MPATHa WWID0\n"); + will_return(__wrap_fgets, "MPATHc WWID1\n"); + will_return(__wrap_fgets, NULL); + expect_condlog(3, "No matching wwid [WWID2] in bindings file.\n"); + rc = lookup_binding(NULL, "WWID2", &alias, "MPATH"); + assert_int_equal(rc, 2); + assert_ptr_equal(alias, NULL); +} + +static void lb_nomatch_c_a(void **state) +{ + int rc; + char *alias; + + will_return(__wrap_fgets, "MPATHc WWID1\n"); + will_return(__wrap_fgets, "MPATHa WWID0\n"); + will_return(__wrap_fgets, NULL); + expect_condlog(3, "No matching wwid [WWID2] in bindings file.\n"); + rc = lookup_binding(NULL, "WWID2", &alias, "MPATH"); + assert_int_equal(rc, 2); + assert_ptr_equal(alias, NULL); +} + +static void lb_nomatch_a_b(void **state) +{ + int rc; + char *alias; + + will_return(__wrap_fgets, "MPATHa WWID0\n"); + will_return(__wrap_fgets, "MPATHz WWID26\n"); + will_return(__wrap_fgets, "MPATHb WWID1\n"); + will_return(__wrap_fgets, NULL); + expect_condlog(3, "No matching wwid [WWID2] in bindings file.\n"); + rc = lookup_binding(NULL, "WWID2", &alias, "MPATH"); + assert_int_equal(rc, 3); + assert_ptr_equal(alias, NULL); +} + +static void lb_nomatch_a_b_bad(void **state) +{ + int rc; + char *alias; + + will_return(__wrap_fgets, "MPATHa WWID0\n"); + will_return(__wrap_fgets, "MPATHz WWID26\n"); + will_return(__wrap_fgets, "MPATHb\n"); + will_return(__wrap_fgets, NULL); + expect_condlog(3, "Ignoring malformed line 3 in bindings file\n"); + expect_condlog(3, "No matching wwid [WWID2] in bindings file.\n"); + rc = lookup_binding(NULL, "WWID2", &alias, "MPATH"); + assert_int_equal(rc, 3); + assert_ptr_equal(alias, NULL); +} + +static void lb_nomatch_b_a(void **state) +{ + int rc; + char *alias; + + will_return(__wrap_fgets, "MPATHb WWID1\n"); + will_return(__wrap_fgets, "MPATHz WWID26\n"); + will_return(__wrap_fgets, "MPATHa WWID0\n"); + will_return(__wrap_fgets, NULL); + expect_condlog(3, "No matching wwid [WWID2] in bindings file.\n"); + rc = lookup_binding(NULL, "WWID2", &alias, "MPATH"); + assert_int_equal(rc, 27); + assert_ptr_equal(alias, NULL); +} + +#ifdef MPATH_ID_INT_MAX +static void lb_nomatch_int_max(void **state) +{ + int rc; + char *alias; + + will_return(__wrap_fgets, "MPATHb WWID1\n"); + will_return(__wrap_fgets, "MPATH" MPATH_ID_INT_MAX " WWIDMAX\n"); + will_return(__wrap_fgets, "MPATHa WWID0\n"); + will_return(__wrap_fgets, NULL); + expect_condlog(0, "no more available user_friendly_names\n"); + rc = lookup_binding(NULL, "WWID2", &alias, "MPATH"); + assert_int_equal(rc, -1); + assert_ptr_equal(alias, NULL); +} + +static void lb_nomatch_int_max_m1(void **state) +{ + int rc; + char *alias; + + will_return(__wrap_fgets, "MPATHb WWID1\n"); + will_return(__wrap_fgets, "MPATH" MPATH_ID_INT_MAX_m1 " WWIDMAX\n"); + will_return(__wrap_fgets, "MPATHa WWID0\n"); + will_return(__wrap_fgets, NULL); + expect_condlog(3, "No matching wwid [WWID2] in bindings file.\n"); + rc = lookup_binding(NULL, "WWID2", &alias, "MPATH"); + assert_int_equal(rc, INT_MAX); + assert_ptr_equal(alias, NULL); +} +#endif + +static int test_lookup_binding(void) +{ + const struct CMUnitTest tests[] = { + cmocka_unit_test(lb_empty), + cmocka_unit_test(lb_match_a), + cmocka_unit_test(lb_nomatch_a), + cmocka_unit_test(lb_match_c), + cmocka_unit_test(lb_nomatch_a_c), + cmocka_unit_test(lb_nomatch_c_a), + cmocka_unit_test(lb_nomatch_a_b), + cmocka_unit_test(lb_nomatch_a_b_bad), + cmocka_unit_test(lb_nomatch_b_a), +#ifdef MPATH_ID_INT_MAX + cmocka_unit_test(lb_nomatch_int_max), + cmocka_unit_test(lb_nomatch_int_max_m1), +#endif + }; + + return cmocka_run_group_tests(tests, NULL, NULL); +} + +static void rl_empty(void **state) +{ + int rc; + char buf[WWID_SIZE]; + + buf[0] = '\0'; + will_return(__wrap_fgets, NULL); + expect_condlog(3, "No matching alias [MPATHa] in bindings file.\n"); + rc = rlookup_binding(NULL, buf, "MPATHa"); + assert_int_equal(rc, -1); + assert_string_equal(buf, ""); +} + +static void rl_match_a(void **state) +{ + int rc; + char buf[WWID_SIZE]; + + buf[0] = '\0'; + will_return(__wrap_fgets, "MPATHa WWID0\n"); + expect_condlog(3, "Found matching alias [MPATHa] in bindings file.\n" + "Setting wwid to WWID0\n"); + rc = rlookup_binding(NULL, buf, "MPATHa"); + assert_int_equal(rc, 0); + assert_string_equal(buf, "WWID0"); +} + +static void rl_nomatch_a(void **state) +{ + int rc; + char buf[WWID_SIZE]; + + buf[0] = '\0'; + will_return(__wrap_fgets, "MPATHa WWID0\n"); + will_return(__wrap_fgets, NULL); + expect_condlog(3, "No matching alias [MPATHb] in bindings file.\n"); + rc = rlookup_binding(NULL, buf, "MPATHb"); + assert_int_equal(rc, -1); + assert_string_equal(buf, ""); +} + +static void rl_malformed_a(void **state) +{ + int rc; + char buf[WWID_SIZE]; + + buf[0] = '\0'; + will_return(__wrap_fgets, "MPATHa \n"); + will_return(__wrap_fgets, NULL); + expect_condlog(3, "Ignoring malformed line 1 in bindings file\n"); + expect_condlog(3, "No matching alias [MPATHa] in bindings file.\n"); + rc = rlookup_binding(NULL, buf, "MPATHa"); + assert_int_equal(rc, -1); + assert_string_equal(buf, ""); +} + +static void rl_overlong_a(void **state) +{ + int rc; + char buf[WWID_SIZE]; + char line[WWID_SIZE + 10]; + + snprintf(line, sizeof(line), "MPATHa "); + memset(line + strlen(line), 'W', sizeof(line) - 2 - strlen(line)); + snprintf(line + sizeof(line) - 2, 2, "\n"); + + buf[0] = '\0'; + will_return(__wrap_fgets, line); + will_return(__wrap_fgets, NULL); + expect_condlog(3, "Ignoring too large wwid at 1 in bindings file\n"); + expect_condlog(3, "No matching alias [MPATHa] in bindings file.\n"); + rc = rlookup_binding(NULL, buf, "MPATHa"); + assert_int_equal(rc, -1); + assert_string_equal(buf, ""); +} + +static void rl_match_b(void **state) +{ + int rc; + char buf[WWID_SIZE]; + + buf[0] = '\0'; + will_return(__wrap_fgets, "MPATHa WWID0\n"); + will_return(__wrap_fgets, "MPATHz WWID26\n"); + will_return(__wrap_fgets, "MPATHb WWID2\n"); + expect_condlog(3, "Found matching alias [MPATHb] in bindings file.\n" + "Setting wwid to WWID2\n"); + rc = rlookup_binding(NULL, buf, "MPATHb"); + assert_int_equal(rc, 0); + assert_string_equal(buf, "WWID2"); +} + +static int test_rlookup_binding(void) +{ + const struct CMUnitTest tests[] = { + cmocka_unit_test(rl_empty), + cmocka_unit_test(rl_match_a), + cmocka_unit_test(rl_nomatch_a), + cmocka_unit_test(rl_malformed_a), + cmocka_unit_test(rl_overlong_a), + cmocka_unit_test(rl_match_b), + }; + + return cmocka_run_group_tests(tests, NULL, NULL); +} + +static void al_a(void **state) +{ + static const char ln[] = "MPATHa WWIDa\n"; + char *alias; + + will_return(__wrap_lseek, 0); + expect_value(__wrap_write, count, strlen(ln)); + expect_string(__wrap_write, buf, ln); + will_return(__wrap_write, strlen(ln)); + expect_condlog(3, "Created new binding [MPATHa] for WWID [WWIDa]\n"); + + alias = allocate_binding(0, "WWIDa", 1, "MPATH"); + assert_ptr_not_equal(alias, NULL); + assert_string_equal(alias, "MPATHa"); +} + +static void al_zz(void **state) +{ + static const char ln[] = "MPATHzz WWIDzz\n"; + char *alias; + + will_return(__wrap_lseek, 0); + expect_value(__wrap_write, count, strlen(ln)); + expect_string(__wrap_write, buf, ln); + will_return(__wrap_write, strlen(ln)); + expect_condlog(3, "Created new binding [MPATHzz] for WWID [WWIDzz]\n"); + + alias = allocate_binding(0, "WWIDzz", 26*26 + 26, "MPATH"); + assert_ptr_not_equal(alias, NULL); + assert_string_equal(alias, "MPATHzz"); +} + +static void al_0(void **state) +{ + char *alias; + + expect_condlog(0, "allocate_binding: cannot allocate new binding for id 0\n"); + alias = allocate_binding(0, "WWIDa", 0, "MPATH"); + assert_ptr_equal(alias, NULL); +} + +static void al_m2(void **state) +{ + char *alias; + + expect_condlog(0, "allocate_binding: cannot allocate new binding for id -2\n"); + alias = allocate_binding(0, "WWIDa", -2, "MPATH"); + assert_ptr_equal(alias, NULL); +} + +static void al_lseek_err(void **state) +{ + char *alias; + + will_return(__wrap_lseek, -ENODEV); + expect_condlog(0, "Cannot seek to end of bindings file : No such device\n"); + alias = allocate_binding(0, "WWIDa", 1, "MPATH"); + assert_ptr_equal(alias, NULL); +} + +static void al_write_err(void **state) +{ + static const char ln[] = "MPATHa WWIDa\n"; + const int offset = 20; + char *alias; + + will_return(__wrap_lseek, offset); + expect_value(__wrap_write, count, strlen(ln)); + expect_string(__wrap_write, buf, ln); + will_return(__wrap_write, strlen(ln) - 1); + expect_value(__wrap_ftruncate, length, offset); + will_return(__wrap_ftruncate, 0); + expect_condlog(0, "Cannot write binding to bindings file : Success\n"); + + alias = allocate_binding(0, "WWIDa", 1, "MPATH"); + assert_ptr_equal(alias, NULL); +} + +static int test_allocate_binding(void) +{ + const struct CMUnitTest tests[] = { + cmocka_unit_test(al_a), + cmocka_unit_test(al_zz), + cmocka_unit_test(al_0), + cmocka_unit_test(al_m2), + cmocka_unit_test(al_lseek_err), + cmocka_unit_test(al_write_err), + }; + + return cmocka_run_group_tests(tests, NULL, NULL); +} + +int main(void) +{ + int ret = 0; + + ret += test_format_devname(); + ret += test_scan_devname(); + ret += test_lookup_binding(); + ret += test_rlookup_binding(); + ret += test_allocate_binding(); + + return ret; +} diff --git a/tests/blacklist.c b/tests/blacklist.c new file mode 100644 index 0000000..6e7c186 --- /dev/null +++ b/tests/blacklist.c @@ -0,0 +1,522 @@ +/* + * Copyright (c) 2018 Benjamin Marzinski, Redhat + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + * + */ +#include +#include +#include +#include +#include "globals.c" +#include "blacklist.h" +#include "test-log.h" + +struct udev_device { + const char *sysname; + char *property_list[]; +}; + +const char * +__wrap_udev_device_get_sysname(struct udev_device *udev_device) +{ + assert_non_null(udev_device); + assert_non_null(udev_device->sysname); + return udev_device->sysname; +} + +struct udev_list_entry * +__wrap_udev_device_get_properties_list_entry(struct udev_device *udev_device) +{ + assert_non_null(udev_device); + if (!*udev_device->property_list) + return NULL; + return (struct udev_list_entry *)udev_device->property_list; +} + +struct udev_list_entry * +__wrap_udev_list_entry_get_next(struct udev_list_entry *list_entry) +{ + assert_non_null(list_entry); + if (!*((char **)list_entry + 1)) + return NULL; + return (struct udev_list_entry *)(((char **)list_entry) + 1); +} + +const char * +__wrap_udev_list_entry_get_name(struct udev_list_entry *list_entry) +{ + return *(const char **)list_entry; +} + +vector blist_devnode_sdb; +vector blist_all; +vector blist_device_foo_bar; +vector blist_device_all; +vector blist_wwid_xyzzy; +vector blist_protocol_fcp; +vector blist_property_wwn; + +static int setup(void **state) +{ + blist_devnode_sdb = vector_alloc(); + if (!blist_devnode_sdb || + store_ble(blist_devnode_sdb, strdup("sdb"), ORIGIN_CONFIG)) + return -1; + + blist_all = vector_alloc(); + if (!blist_all || store_ble(blist_all, strdup(".*"), ORIGIN_CONFIG)) + return -1; + + blist_device_foo_bar = vector_alloc(); + if (!blist_device_foo_bar || alloc_ble_device(blist_device_foo_bar) || + set_ble_device(blist_device_foo_bar, strdup("foo"), strdup("bar"), + ORIGIN_CONFIG)) + return -1; + + blist_device_all = vector_alloc(); + if (!blist_device_all || alloc_ble_device(blist_device_all) || + set_ble_device(blist_device_all, strdup(".*"), strdup(".*"), + ORIGIN_CONFIG)) + return -1; + + blist_wwid_xyzzy = vector_alloc(); + if (!blist_wwid_xyzzy || + store_ble(blist_wwid_xyzzy, strdup("xyzzy"), ORIGIN_CONFIG)) + return -1; + + blist_protocol_fcp = vector_alloc(); + if (!blist_protocol_fcp || + store_ble(blist_protocol_fcp, strdup("scsi:fcp"), ORIGIN_CONFIG)) + return -1; + + blist_property_wwn = vector_alloc(); + if (!blist_property_wwn || + store_ble(blist_property_wwn, strdup("ID_WWN"), ORIGIN_CONFIG)) + return -1; + + return 0; +} + +static int teardown(void **state) +{ + free_blacklist(blist_devnode_sdb); + free_blacklist(blist_all); + free_blacklist_device(blist_device_foo_bar); + free_blacklist_device(blist_device_all); + free_blacklist(blist_wwid_xyzzy); + free_blacklist(blist_protocol_fcp); + free_blacklist(blist_property_wwn); + return 0; +} + +static int reset_blists(void **state) +{ + conf.blist_devnode = NULL; + conf.blist_wwid = NULL; + conf.blist_property = NULL; + conf.blist_protocol = NULL; + conf.blist_device = NULL; + conf.elist_devnode = NULL; + conf.elist_wwid = NULL; + conf.elist_property = NULL; + conf.elist_protocol = NULL; + conf.elist_device = NULL; + return 0; +} + +static void test_devnode_blacklist(void **state) +{ + expect_condlog(3, "sdb: device node name blacklisted\n"); + assert_int_equal(filter_devnode(blist_devnode_sdb, NULL, "sdb"), + MATCH_DEVNODE_BLIST); +} + +static void test_devnode_whitelist(void **state) +{ + expect_condlog(3, "sdb: device node name whitelisted\n"); + assert_int_equal(filter_devnode(blist_all, blist_devnode_sdb, "sdb"), + MATCH_DEVNODE_BLIST_EXCEPT); + expect_condlog(3, "sdc: device node name blacklisted\n"); + assert_int_equal(filter_devnode(blist_all, blist_devnode_sdb, "sdc"), + MATCH_DEVNODE_BLIST); +} + +static void test_devnode_missing(void **state) +{ + assert_int_equal(filter_devnode(blist_devnode_sdb, NULL, "sdc"), + MATCH_NOTHING); +} + +static void test_device_blacklist(void **state) +{ + expect_condlog(3, "sdb: (foo:bar) vendor/product blacklisted\n"); + assert_int_equal(filter_device(blist_device_foo_bar, NULL, "foo", + "bar", "sdb"), + MATCH_DEVICE_BLIST); +} + +static void test_device_whitelist(void **state) +{ + expect_condlog(3, "sdb: (foo:bar) vendor/product whitelisted\n"); + assert_int_equal(filter_device(blist_device_all, blist_device_foo_bar, + "foo", "bar", "sdb"), + MATCH_DEVICE_BLIST_EXCEPT); + expect_condlog(3, "sdb: (foo:baz) vendor/product blacklisted\n"); + assert_int_equal(filter_device(blist_device_all, blist_device_foo_bar, + "foo", "baz", "sdb"), + MATCH_DEVICE_BLIST); +} + +static void test_device_missing(void **state) +{ + assert_int_equal(filter_device(blist_device_foo_bar, NULL, "foo", + "baz", "sdb"), + MATCH_NOTHING); +} + +static void test_wwid_blacklist(void **state) +{ + expect_condlog(3, "sdb: wwid xyzzy blacklisted\n"); + assert_int_equal(filter_wwid(blist_wwid_xyzzy, NULL, "xyzzy", "sdb"), + MATCH_WWID_BLIST); +} + +static void test_wwid_whitelist(void **state) +{ + expect_condlog(3, "sdb: wwid xyzzy whitelisted\n"); + assert_int_equal(filter_wwid(blist_all, blist_wwid_xyzzy, + "xyzzy", "sdb"), + MATCH_WWID_BLIST_EXCEPT); + expect_condlog(3, "sdb: wwid plugh blacklisted\n"); + assert_int_equal(filter_wwid(blist_all, blist_wwid_xyzzy, + "plugh", "sdb"), + MATCH_WWID_BLIST); +} + +static void test_wwid_missing(void **state) +{ + assert_int_equal(filter_wwid(blist_wwid_xyzzy, NULL, "plugh", "sdb"), + MATCH_NOTHING); +} + +static void test_protocol_blacklist(void **state) +{ + struct path pp = { .dev = "sdb", .bus = SYSFS_BUS_SCSI, + .sg_id.proto_id = SCSI_PROTOCOL_FCP }; + expect_condlog(3, "sdb: protocol scsi:fcp blacklisted\n"); + assert_int_equal(filter_protocol(blist_protocol_fcp, NULL, &pp), + MATCH_PROTOCOL_BLIST); +} + +static void test_protocol_whitelist(void **state) +{ + struct path pp1 = { .dev = "sdb", .bus = SYSFS_BUS_SCSI, + .sg_id.proto_id = SCSI_PROTOCOL_FCP }; + struct path pp2 = { .dev = "sdb", .bus = SYSFS_BUS_SCSI, + .sg_id.proto_id = SCSI_PROTOCOL_ISCSI }; + expect_condlog(3, "sdb: protocol scsi:fcp whitelisted\n"); + assert_int_equal(filter_protocol(blist_all, blist_protocol_fcp, &pp1), + MATCH_PROTOCOL_BLIST_EXCEPT); + expect_condlog(3, "sdb: protocol scsi:iscsi blacklisted\n"); + assert_int_equal(filter_protocol(blist_all, blist_protocol_fcp, &pp2), + MATCH_PROTOCOL_BLIST); +} + +static void test_protocol_missing(void **state) +{ + struct path pp = { .dev = "sdb", .bus = SYSFS_BUS_SCSI, + .sg_id.proto_id = SCSI_PROTOCOL_ISCSI }; + assert_int_equal(filter_protocol(blist_protocol_fcp, NULL, &pp), + MATCH_NOTHING); +} + +static void test_property_blacklist(void **state) +{ + static struct udev_device udev = { "sdb", { "ID_FOO", "ID_WWN", "ID_BAR", NULL } }; + conf.blist_property = blist_property_wwn; + expect_condlog(3, "sdb: udev property ID_WWN blacklisted\n"); + assert_int_equal(filter_property(&conf, &udev, 3, "ID_SERIAL"), + MATCH_PROPERTY_BLIST); +} + +/* the property check works different in that you check all the property + * names, so setting a blacklist value will blacklist the device if any + * of the property on the blacklist are found before the property names + * in the whitelist. This might be worth changing. although it would + * force multipath to go through the properties twice */ +static void test_property_whitelist(void **state) +{ + static struct udev_device udev = { "sdb", { "ID_FOO", "ID_WWN", "ID_BAR", NULL } }; + conf.elist_property = blist_property_wwn; + expect_condlog(3, "sdb: udev property ID_WWN whitelisted\n"); + assert_int_equal(filter_property(&conf, &udev, 3, "ID_SERIAL"), + MATCH_PROPERTY_BLIST_EXCEPT); +} + +static void test_property_missing(void **state) +{ + static struct udev_device udev = { "sdb", { "ID_FOO", "ID_BAZ", "ID_BAR", "ID_SERIAL", NULL } }; + conf.blist_property = blist_property_wwn; + expect_condlog(3, "sdb: blacklisted, udev property missing\n"); + assert_int_equal(filter_property(&conf, &udev, 3, "ID_SERIAL"), + MATCH_PROPERTY_BLIST_MISSING); + assert_int_equal(filter_property(&conf, &udev, 3, "ID_BLAH"), + MATCH_NOTHING); + assert_int_equal(filter_property(&conf, &udev, 3, ""), + MATCH_NOTHING); + assert_int_equal(filter_property(&conf, &udev, 3, NULL), + MATCH_NOTHING); +} + +struct udev_device test_udev = { "sdb", { "ID_FOO", "ID_WWN", "ID_BAR", NULL } }; + +struct path test_pp = { .dev = "sdb", .bus = SYSFS_BUS_SCSI, .udev = &test_udev, + .sg_id.proto_id = SCSI_PROTOCOL_FCP, .vendor_id = "foo", + .product_id = "bar", .wwid = "xyzzy" }; + +static void test_filter_path_property(void **state) +{ + conf.blist_property = blist_property_wwn; + expect_condlog(3, "sdb: udev property ID_WWN blacklisted\n"); + assert_int_equal(filter_path(&conf, &test_pp), MATCH_PROPERTY_BLIST); +} + +static void test_filter_path_devnode(void **state) +{ + /* always must include property elist, to avoid "missing property" + * blacklisting */ + conf.elist_property = blist_property_wwn; + conf.blist_devnode = blist_devnode_sdb; + expect_condlog(3, "sdb: udev property ID_WWN whitelisted\n"); + expect_condlog(3, "sdb: device node name blacklisted\n"); + assert_int_equal(filter_path(&conf, &test_pp), MATCH_DEVNODE_BLIST); +} + +static void test_filter_path_device(void **state) +{ + /* always must include property elist, to avoid "missing property" + * blacklisting */ + conf.elist_property = blist_property_wwn; + conf.blist_device = blist_device_foo_bar; + expect_condlog(3, "sdb: udev property ID_WWN whitelisted\n"); + expect_condlog(3, "sdb: (foo:bar) vendor/product blacklisted\n"); + assert_int_equal(filter_path(&conf, &test_pp), MATCH_DEVICE_BLIST); +} + +static void test_filter_path_protocol(void **state) +{ + conf.elist_property = blist_property_wwn; + conf.blist_protocol = blist_protocol_fcp; + expect_condlog(3, "sdb: udev property ID_WWN whitelisted\n"); + expect_condlog(3, "sdb: protocol scsi:fcp blacklisted\n"); + assert_int_equal(filter_path(&conf, &test_pp), MATCH_PROTOCOL_BLIST); +} + +static void test_filter_path_wwid(void **state) +{ + conf.elist_property = blist_property_wwn; + conf.blist_wwid = blist_wwid_xyzzy; + expect_condlog(3, "sdb: udev property ID_WWN whitelisted\n"); + expect_condlog(3, "sdb: wwid xyzzy blacklisted\n"); + assert_int_equal(filter_path(&conf, &test_pp), MATCH_WWID_BLIST); +} + +struct udev_device miss_udev = { "sdb", { "ID_FOO", "ID_BAZ", "ID_BAR", "ID_SERIAL", NULL } }; + +struct path miss1_pp = { .dev = "sdc", .bus = SYSFS_BUS_SCSI, + .udev = &miss_udev, + .uid_attribute = "ID_SERIAL", + .sg_id.proto_id = SCSI_PROTOCOL_ISCSI, + .vendor_id = "foo", .product_id = "baz", + .wwid = "plugh" }; + +struct path miss2_pp = { .dev = "sdc", .bus = SYSFS_BUS_SCSI, + .udev = &test_udev, + .uid_attribute = "ID_SERIAL", + .sg_id.proto_id = SCSI_PROTOCOL_ISCSI, + .vendor_id = "foo", .product_id = "baz", + .wwid = "plugh" }; + +struct path miss3_pp = { .dev = "sdc", .bus = SYSFS_BUS_SCSI, + .udev = &miss_udev, + .uid_attribute = "ID_EGGS", + .sg_id.proto_id = SCSI_PROTOCOL_ISCSI, + .vendor_id = "foo", .product_id = "baz", + .wwid = "plugh" }; + +static void test_filter_path_missing1(void **state) +{ + conf.blist_property = blist_property_wwn; + conf.blist_devnode = blist_devnode_sdb; + conf.blist_device = blist_device_foo_bar; + conf.blist_protocol = blist_protocol_fcp; + conf.blist_wwid = blist_wwid_xyzzy; + expect_condlog(3, "sdb: blacklisted, udev property missing\n"); + assert_int_equal(filter_path(&conf, &miss1_pp), + MATCH_PROPERTY_BLIST_MISSING); +} + +/* This one matches the property whitelist, to test the other missing + * functions */ +static void test_filter_path_missing2(void **state) +{ + conf.elist_property = blist_property_wwn; + conf.blist_devnode = blist_devnode_sdb; + conf.blist_device = blist_device_foo_bar; + conf.blist_protocol = blist_protocol_fcp; + conf.blist_wwid = blist_wwid_xyzzy; + expect_condlog(3, "sdb: udev property ID_WWN whitelisted\n"); + assert_int_equal(filter_path(&conf, &miss2_pp), + MATCH_NOTHING); +} + +/* Here we use a different uid_attribute which is also missing, thus + the path is not blacklisted */ +static void test_filter_path_missing3(void **state) +{ + conf.blist_property = blist_property_wwn; + conf.blist_devnode = blist_devnode_sdb; + conf.blist_device = blist_device_foo_bar; + conf.blist_protocol = blist_protocol_fcp; + conf.blist_wwid = blist_wwid_xyzzy; + assert_int_equal(filter_path(&conf, &miss3_pp), + MATCH_NOTHING); +} + +static void test_filter_path_whitelist(void **state) +{ + conf.elist_property = blist_property_wwn; + conf.elist_devnode = blist_devnode_sdb; + conf.elist_device = blist_device_foo_bar; + conf.elist_protocol = blist_protocol_fcp; + conf.elist_wwid = blist_wwid_xyzzy; + expect_condlog(3, "sdb: udev property ID_WWN whitelisted\n"); + expect_condlog(3, "sdb: device node name whitelisted\n"); + expect_condlog(3, "sdb: (foo:bar) vendor/product whitelisted\n"); + expect_condlog(3, "sdb: protocol scsi:fcp whitelisted\n"); + expect_condlog(3, "sdb: wwid xyzzy whitelisted\n"); + assert_int_equal(filter_path(&conf, &test_pp), + MATCH_WWID_BLIST_EXCEPT); +} + +static void test_filter_path_whitelist_property(void **state) +{ + conf.blist_property = blist_property_wwn; + conf.elist_devnode = blist_devnode_sdb; + conf.elist_device = blist_device_foo_bar; + conf.elist_protocol = blist_protocol_fcp; + conf.elist_wwid = blist_wwid_xyzzy; + expect_condlog(3, "sdb: udev property ID_WWN blacklisted\n"); + assert_int_equal(filter_path(&conf, &test_pp), MATCH_PROPERTY_BLIST); +} + +static void test_filter_path_whitelist_devnode(void **state) +{ + conf.elist_property = blist_property_wwn; + conf.blist_devnode = blist_devnode_sdb; + conf.elist_device = blist_device_foo_bar; + conf.elist_protocol = blist_protocol_fcp; + conf.elist_wwid = blist_wwid_xyzzy; + expect_condlog(3, "sdb: udev property ID_WWN whitelisted\n"); + expect_condlog(3, "sdb: device node name blacklisted\n"); + assert_int_equal(filter_path(&conf, &test_pp), MATCH_DEVNODE_BLIST); +} + +static void test_filter_path_whitelist_device(void **state) +{ + conf.elist_property = blist_property_wwn; + conf.elist_devnode = blist_devnode_sdb; + conf.blist_device = blist_device_foo_bar; + conf.elist_protocol = blist_protocol_fcp; + conf.elist_wwid = blist_wwid_xyzzy; + expect_condlog(3, "sdb: udev property ID_WWN whitelisted\n"); + expect_condlog(3, "sdb: device node name whitelisted\n"); + expect_condlog(3, "sdb: (foo:bar) vendor/product blacklisted\n"); + assert_int_equal(filter_path(&conf, &test_pp), MATCH_DEVICE_BLIST); +} + +static void test_filter_path_whitelist_protocol(void **state) +{ + conf.elist_property = blist_property_wwn; + conf.elist_devnode = blist_devnode_sdb; + conf.elist_device = blist_device_foo_bar; + conf.blist_protocol = blist_protocol_fcp; + conf.elist_wwid = blist_wwid_xyzzy; + expect_condlog(3, "sdb: udev property ID_WWN whitelisted\n"); + expect_condlog(3, "sdb: device node name whitelisted\n"); + expect_condlog(3, "sdb: (foo:bar) vendor/product whitelisted\n"); + expect_condlog(3, "sdb: protocol scsi:fcp blacklisted\n"); + assert_int_equal(filter_path(&conf, &test_pp), MATCH_PROTOCOL_BLIST); +} + +static void test_filter_path_whitelist_wwid(void **state) +{ + conf.elist_property = blist_property_wwn; + conf.elist_devnode = blist_devnode_sdb; + conf.elist_device = blist_device_foo_bar; + conf.elist_protocol = blist_protocol_fcp; + conf.blist_wwid = blist_wwid_xyzzy; + expect_condlog(3, "sdb: udev property ID_WWN whitelisted\n"); + expect_condlog(3, "sdb: device node name whitelisted\n"); + expect_condlog(3, "sdb: (foo:bar) vendor/product whitelisted\n"); + expect_condlog(3, "sdb: protocol scsi:fcp whitelisted\n"); + expect_condlog(3, "sdb: wwid xyzzy blacklisted\n"); + assert_int_equal(filter_path(&conf, &test_pp), MATCH_WWID_BLIST); +} + +#define test_and_reset(x) cmocka_unit_test_teardown((x), reset_blists) + +int test_blacklist(void) +{ + const struct CMUnitTest tests[] = { + cmocka_unit_test(test_devnode_blacklist), + cmocka_unit_test(test_devnode_whitelist), + cmocka_unit_test(test_devnode_missing), + cmocka_unit_test(test_device_blacklist), + cmocka_unit_test(test_device_whitelist), + cmocka_unit_test(test_device_missing), + cmocka_unit_test(test_wwid_blacklist), + cmocka_unit_test(test_wwid_whitelist), + cmocka_unit_test(test_wwid_missing), + cmocka_unit_test(test_protocol_blacklist), + cmocka_unit_test(test_protocol_whitelist), + cmocka_unit_test(test_protocol_missing), + test_and_reset(test_property_blacklist), + test_and_reset(test_property_whitelist), + test_and_reset(test_property_missing), + test_and_reset(test_filter_path_property), + test_and_reset(test_filter_path_devnode), + test_and_reset(test_filter_path_device), + test_and_reset(test_filter_path_protocol), + test_and_reset(test_filter_path_wwid), + test_and_reset(test_filter_path_missing1), + test_and_reset(test_filter_path_missing2), + test_and_reset(test_filter_path_missing3), + test_and_reset(test_filter_path_whitelist), + test_and_reset(test_filter_path_whitelist_property), + test_and_reset(test_filter_path_whitelist_devnode), + test_and_reset(test_filter_path_whitelist_device), + test_and_reset(test_filter_path_whitelist_protocol), + test_and_reset(test_filter_path_whitelist_wwid), + }; + return cmocka_run_group_tests(tests, setup, teardown); +} + +int main(void) +{ + int ret = 0; + ret += test_blacklist(); + return ret; +} diff --git a/tests/directio.c b/tests/directio.c new file mode 100644 index 0000000..3cd7a52 --- /dev/null +++ b/tests/directio.c @@ -0,0 +1,776 @@ +/* + * Copyright (c) 2018 Benjamin Marzinski, Redhat + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + * + */ + +#define _GNU_SOURCE +#include +#include +#include +#include +#include +#include +#include +#include "globals.c" +#include "../libmultipath/checkers/directio.c" + +int test_fd = 111; +int ioctx_count = 0; +struct io_event mock_events[AIO_GROUP_SIZE]; /* same as the checker max */ +int ev_off = 0; +struct timespec zero_timeout = {0}; +struct timespec full_timeout = { .tv_sec = -1 }; + +int __real_ioctl(int fd, unsigned long request, void *argp); + +int __wrap_ioctl(int fd, unsigned long request, void *argp) +{ +#ifdef DIO_TEST_DEV + mock_type(int); + return __real_ioctl(fd, request, argp); +#else + int *blocksize = (int *)argp; + + assert_int_equal(fd, test_fd); + assert_int_equal(request, BLKBSZGET); + assert_non_null(blocksize); + *blocksize = mock_type(int); + return 0; +#endif +} + +int __real_fcntl(int fd, int cmd, long arg); + +int __wrap_fcntl(int fd, int cmd, long arg) +{ +#ifdef DIO_TEST_DEV + return __real_fcntl(fd, cmd, arg); +#else + assert_int_equal(fd, test_fd); + assert_int_equal(cmd, F_GETFL); + return O_DIRECT; +#endif +} + +int __real___fxstat(int ver, int fd, struct stat *statbuf); + +int __wrap___fxstat(int ver, int fd, struct stat *statbuf) +{ +#ifdef DIO_TEST_DEV + return __real___fxstat(ver, fd, statbuf); +#else + assert_int_equal(fd, test_fd); + assert_non_null(statbuf); + memset(statbuf, 0, sizeof(struct stat)); + return 0; +#endif +} + +int __real_io_setup(int maxevents, io_context_t *ctxp); + +int __wrap_io_setup(int maxevents, io_context_t *ctxp) +{ + ioctx_count++; +#ifdef DIO_TEST_DEV + int ret = mock_type(int); + assert_int_equal(ret, __real_io_setup(maxevents, ctxp)); + return ret; +#else + return mock_type(int); +#endif +} + +int __real_io_destroy(io_context_t ctx); + +int __wrap_io_destroy(io_context_t ctx) +{ + ioctx_count--; +#ifdef DIO_TEST_DEV + int ret = mock_type(int); + assert_int_equal(ret, __real_io_destroy(ctx)); + return ret; +#else + return mock_type(int); +#endif +} + +int __real_io_submit(io_context_t ctx, long nr, struct iocb *ios[]); + +int __wrap_io_submit(io_context_t ctx, long nr, struct iocb *ios[]) +{ +#ifdef DIO_TEST_DEV + struct timespec dev_delay = { .tv_nsec = 100000 }; + int ret = mock_type(int); + assert_int_equal(ret, __real_io_submit(ctx, nr, ios)); + nanosleep(&dev_delay, NULL); + return ret; +#else + return mock_type(int); +#endif +} + +int __real_io_cancel(io_context_t ctx, struct iocb *iocb, struct io_event *evt); + +int __wrap_io_cancel(io_context_t ctx, struct iocb *iocb, struct io_event *evt) +{ +#ifdef DIO_TEST_DEV + mock_type(int); + return __real_io_cancel(ctx, iocb, evt); +#else + return mock_type(int); +#endif +} + +int __real_io_getevents(io_context_t ctx, long min_nr, long nr, + struct io_event *events, struct timespec *timeout); + +int __wrap_io_getevents(io_context_t ctx, long min_nr, long nr, + struct io_event *events, struct timespec *timeout) +{ + int nr_evs; +#ifndef DIO_TEST_DEV + struct timespec *sleep_tmo; + int i; + struct io_event *evs; +#endif + + assert_non_null(timeout); + nr_evs = mock_type(int); + assert_true(nr_evs <= nr); + if (!nr_evs) + return 0; +#ifdef DIO_TEST_DEV + mock_ptr_type(struct timespec *); + mock_ptr_type(struct io_event *); + assert_int_equal(nr_evs, __real_io_getevents(ctx, min_nr, nr_evs, + events, timeout)); +#else + sleep_tmo = mock_ptr_type(struct timespec *); + if (sleep_tmo) { + if (sleep_tmo->tv_sec < 0) + nanosleep(timeout, NULL); + else + nanosleep(sleep_tmo, NULL); + } + if (nr_evs < 0) { + errno = -nr_evs; + return -1; + } + evs = mock_ptr_type(struct io_event *); + for (i = 0; i < nr_evs; i++) + events[i] = evs[i]; +#endif + ev_off -= nr_evs; + return nr_evs; +} + +static void return_io_getevents_none(void) +{ + will_return(__wrap_io_getevents, 0); +} + +static void return_io_getevents_nr(struct timespec *ts, int nr, + struct async_req **reqs, int *res) +{ + int i, off = 0; + + for(i = 0; i < nr; i++) { + mock_events[i + ev_off].obj = &reqs[i]->io; + if (res[i] == 0) + mock_events[i + ev_off].res = reqs[i]->blksize; + } + while (nr > 0) { + will_return(__wrap_io_getevents, (nr > 128)? 128 : nr); + will_return(__wrap_io_getevents, ts); + will_return(__wrap_io_getevents, &mock_events[off + ev_off]); + ts = NULL; + off += 128; + nr -= 128; + } + if (nr == 0) + will_return(__wrap_io_getevents, 0); + ev_off += i; +} + +void do_check_state(struct checker *c, int sync, int timeout, int chk_state) +{ + struct directio_context * ct = (struct directio_context *)c->context; + + if (!ct->running) + will_return(__wrap_io_submit, 1); + assert_int_equal(check_state(test_fd, ct, sync, timeout), chk_state); + assert_int_equal(ev_off, 0); + memset(mock_events, 0, sizeof(mock_events)); +} + +void do_libcheck_reset(int nr_aio_grps) +{ + int count = 0; + struct aio_group *aio_grp; + + list_for_each_entry(aio_grp, &aio_grp_list, node) + count++; + assert_int_equal(count, nr_aio_grps); + for (count = 0; count < nr_aio_grps; count++) + will_return(__wrap_io_destroy, 0); + libcheck_reset(); + assert_true(list_empty(&aio_grp_list)); + assert_int_equal(ioctx_count, 0); +} + +static void do_libcheck_init(struct checker *c, int blocksize, + struct async_req **req) +{ + struct directio_context * ct; + + c->fd = test_fd; + will_return(__wrap_ioctl, blocksize); + assert_int_equal(libcheck_init(c), 0); + ct = (struct directio_context *)c->context; + assert_non_null(ct); + assert_non_null(ct->aio_grp); + assert_non_null(ct->req); + if (req) + *req = ct->req; +#ifndef DIO_TEST_DEV + /* don't check fake blocksize on real devices */ + assert_int_equal(ct->req->blksize, blocksize); +#endif +} + +static int is_checker_running(struct checker *c) +{ + struct directio_context * ct = (struct directio_context *)c->context; + return ct->running; +} + +static struct aio_group *get_aio_grp(struct checker *c) +{ + struct directio_context * ct = (struct directio_context *)c->context; + + assert_non_null(ct); + return ct->aio_grp; +} + +static void check_aio_grp(struct aio_group *aio_grp, int holders, + int orphans) +{ + int count = 0; + struct list_head *item; + + list_for_each(item, &aio_grp->orphans) + count++; + assert_int_equal(holders, aio_grp->holders); + assert_int_equal(orphans, count); +} + +/* simple resetting test */ +static void test_reset(void **state) +{ + assert_true(list_empty(&aio_grp_list)); + do_libcheck_reset(0); +} + +/* tests initializing, then resetting, and then initializing again */ +static void test_init_reset_init(void **state) +{ + struct checker c = {0}; + struct aio_group *aio_grp, *tmp_grp; + + assert_true(list_empty(&aio_grp_list)); + will_return(__wrap_io_setup, 0); + do_libcheck_init(&c, 4096, NULL); + aio_grp = get_aio_grp(&c); + check_aio_grp(aio_grp, 1, 0); + list_for_each_entry(tmp_grp, &aio_grp_list, node) + assert_ptr_equal(aio_grp, tmp_grp); + libcheck_free(&c); + check_aio_grp(aio_grp, 0, 0); + do_libcheck_reset(1); + will_return(__wrap_io_setup, 0); + do_libcheck_init(&c, 4096, NULL); + aio_grp = get_aio_grp(&c); + check_aio_grp(aio_grp, 1, 0); + list_for_each_entry(tmp_grp, &aio_grp_list, node) + assert_ptr_equal(aio_grp, tmp_grp); + libcheck_free(&c); + check_aio_grp(aio_grp, 0, 0); + do_libcheck_reset(1); +} + +/* test initializing and then freeing 4096 checkers */ +static void test_init_free(void **state) +{ + int i, count = 0; + struct checker c[4096] = {0}; + struct aio_group *aio_grp; + + assert_true(list_empty(&aio_grp_list)); + will_return(__wrap_io_setup, 0); + will_return(__wrap_io_setup, 0); + will_return(__wrap_io_setup, 0); + will_return(__wrap_io_setup, 0); + for (i = 0; i < 4096; i++) { + struct directio_context * ct; + + if (i % 3 == 0) + do_libcheck_init(&c[i], 512, NULL); + else if (i % 3 == 1) + do_libcheck_init(&c[i], 1024, NULL); + else + do_libcheck_init(&c[i], 4096, NULL); + ct = (struct directio_context *)c[i].context; + assert_non_null(ct->aio_grp); + if ((i & 1023) == 0) + aio_grp = ct->aio_grp; + else { + assert_ptr_equal(ct->aio_grp, aio_grp); + assert_int_equal(aio_grp->holders, (i & 1023) + 1); + } + } + count = 0; + list_for_each_entry(aio_grp, &aio_grp_list, node) + count++; + assert_int_equal(count, 4); + for (i = 0; i < 4096; i++) { + struct directio_context * ct = (struct directio_context *)c[i].context; + + aio_grp = ct->aio_grp; + libcheck_free(&c[i]); + assert_int_equal(aio_grp->holders, 1023 - (i & 1023)); + } + list_for_each_entry(aio_grp, &aio_grp_list, node) + assert_int_equal(aio_grp->holders, 0); + do_libcheck_reset(4); +} + +/* check mixed initializing and freeing 4096 checkers */ +static void test_multi_init_free(void **state) +{ + int i, count; + struct checker c[4096] = {0}; + struct aio_group *aio_grp; + + assert_true(list_empty(&aio_grp_list)); + will_return(__wrap_io_setup, 0); + will_return(__wrap_io_setup, 0); + will_return(__wrap_io_setup, 0); + will_return(__wrap_io_setup, 0); + for (count = 0, i = 0; i < 4096; count++) { + /* usually init, but occasionally free checkers */ + if (count == 0 || (count % 5 != 0 && count % 7 != 0)) { + do_libcheck_init(&c[i], 4096, NULL); + i++; + } else { + i--; + libcheck_free(&c[i]); + } + } + count = 0; + list_for_each_entry(aio_grp, &aio_grp_list, node) { + assert_int_equal(aio_grp->holders, 1024); + count++; + } + assert_int_equal(count, 4); + for (count = 0, i = 4096; i > 0; count++) { + /* usually free, but occasionally init checkers */ + if (count == 0 || (count % 5 != 0 && count % 7 != 0)) { + i--; + libcheck_free(&c[i]); + } else { + do_libcheck_init(&c[i], 4096, NULL); + i++; + } + } + do_libcheck_reset(4); +} + +/* simple single checker sync test */ +static void test_check_state_simple(void **state) +{ + struct checker c = {0}; + struct async_req *req; + int res = 0; + + assert_true(list_empty(&aio_grp_list)); + will_return(__wrap_io_setup, 0); + do_libcheck_init(&c, 4096, &req); + return_io_getevents_nr(NULL, 1, &req, &res); + do_check_state(&c, 1, 30, PATH_UP); + libcheck_free(&c); + do_libcheck_reset(1); +} + +/* test sync timeout */ +static void test_check_state_timeout(void **state) +{ + struct checker c = {0}; + struct aio_group *aio_grp; + + assert_true(list_empty(&aio_grp_list)); + will_return(__wrap_io_setup, 0); + do_libcheck_init(&c, 4096, NULL); + aio_grp = get_aio_grp(&c); + return_io_getevents_none(); + will_return(__wrap_io_cancel, 0); + do_check_state(&c, 1, 30, PATH_DOWN); + check_aio_grp(aio_grp, 1, 0); +#ifdef DIO_TEST_DEV + /* io_cancel will return negative value on timeout, so it happens again + * when freeing the checker */ + will_return(__wrap_io_cancel, 0); +#endif + libcheck_free(&c); + do_libcheck_reset(1); +} + +/* test async timeout */ +static void test_check_state_async_timeout(void **state) +{ + struct checker c = {0}; + struct aio_group *aio_grp; + + assert_true(list_empty(&aio_grp_list)); + will_return(__wrap_io_setup, 0); + do_libcheck_init(&c, 4096, NULL); + aio_grp = get_aio_grp(&c); + return_io_getevents_none(); + do_check_state(&c, 0, 3, PATH_PENDING); + return_io_getevents_none(); + do_check_state(&c, 0, 3, PATH_PENDING); + return_io_getevents_none(); + do_check_state(&c, 0, 3, PATH_PENDING); + return_io_getevents_none(); + will_return(__wrap_io_cancel, 0); + do_check_state(&c, 0, 3, PATH_DOWN); + check_aio_grp(aio_grp, 1, 0); +#ifdef DIO_TEST_DEV + will_return(__wrap_io_cancel, 0); +#endif + libcheck_free(&c); + do_libcheck_reset(1); +} + +/* test freeing checkers with outstanding requests */ +static void test_free_with_pending(void **state) +{ + struct checker c[2] = {0}; + struct aio_group *aio_grp; + struct async_req *req; + int res = 0; + + assert_true(list_empty(&aio_grp_list)); + will_return(__wrap_io_setup, 0); + do_libcheck_init(&c[0], 4096, &req); + do_libcheck_init(&c[1], 4096, NULL); + aio_grp = get_aio_grp(c); + return_io_getevents_none(); + do_check_state(&c[0], 0, 30, PATH_PENDING); + return_io_getevents_nr(NULL, 1, &req, &res); + return_io_getevents_none(); + do_check_state(&c[1], 0, 30, PATH_PENDING); + assert_true(is_checker_running(&c[0])); + assert_true(is_checker_running(&c[1])); + check_aio_grp(aio_grp, 2, 0); + libcheck_free(&c[0]); + check_aio_grp(aio_grp, 1, 0); + will_return(__wrap_io_cancel, 0); + libcheck_free(&c[1]); +#ifdef DIO_TEST_DEV + check_aio_grp(aio_grp, 1, 1); /* real cancel doesn't remove request */ +#else + check_aio_grp(aio_grp, 0, 0); +#endif + do_libcheck_reset(1); +} + +/* test removing orpahed aio_group on free */ +static void test_orphaned_aio_group(void **state) +{ + struct checker c[AIO_GROUP_SIZE] = {0}; + struct aio_group *aio_grp, *tmp_grp; + int i; + + assert_true(list_empty(&aio_grp_list)); + will_return(__wrap_io_setup, 0); + for (i = 0; i < AIO_GROUP_SIZE; i++) { + do_libcheck_init(&c[i], 4096, NULL); + return_io_getevents_none(); + do_check_state(&c[i], 0, 30, PATH_PENDING); + } + aio_grp = get_aio_grp(c); + check_aio_grp(aio_grp, AIO_GROUP_SIZE, 0); + i = 0; + list_for_each_entry(tmp_grp, &aio_grp_list, node) + i++; + assert_int_equal(i, 1); + for (i = 0; i < AIO_GROUP_SIZE; i++) { + assert_true(is_checker_running(&c[i])); + will_return(__wrap_io_cancel, -1); + if (i == AIO_GROUP_SIZE - 1) { + /* remove the orphaned group and create a new one */ + will_return(__wrap_io_destroy, 0); + } + libcheck_free(&c[i]); + } + do_libcheck_reset(0); +} + +/* test sync timeout with failed cancel and cleanup by another + * checker */ +static void test_timeout_cancel_failed(void **state) +{ + struct checker c[2] = {0}; + struct aio_group *aio_grp; + struct async_req *reqs[2]; + int res[] = {0,0}; + int i; + + assert_true(list_empty(&aio_grp_list)); + will_return(__wrap_io_setup, 0); + for (i = 0; i < 2; i++) + do_libcheck_init(&c[i], 4096, &reqs[i]); + aio_grp = get_aio_grp(c); + return_io_getevents_none(); + will_return(__wrap_io_cancel, -1); + do_check_state(&c[0], 1, 30, PATH_DOWN); + assert_true(is_checker_running(&c[0])); + check_aio_grp(aio_grp, 2, 0); + return_io_getevents_none(); + will_return(__wrap_io_cancel, -1); + do_check_state(&c[0], 1, 30, PATH_DOWN); + assert_true(is_checker_running(&c[0])); + return_io_getevents_nr(NULL, 1, &reqs[0], &res[0]); + return_io_getevents_nr(NULL, 1, &reqs[1], &res[1]); + do_check_state(&c[1], 1, 30, PATH_UP); + do_check_state(&c[0], 1, 30, PATH_UP); + for (i = 0; i < 2; i++) { + assert_false(is_checker_running(&c[i])); + libcheck_free(&c[i]); + } + do_libcheck_reset(1); +} + +/* test async timeout with failed cancel and cleanup by another + * checker */ +static void test_async_timeout_cancel_failed(void **state) +{ + struct checker c[2] = {0}; + struct async_req *reqs[2]; + int res[] = {0,0}; + int i; + + assert_true(list_empty(&aio_grp_list)); + will_return(__wrap_io_setup, 0); + for (i = 0; i < 2; i++) + do_libcheck_init(&c[i], 4096, &reqs[i]); + return_io_getevents_none(); + do_check_state(&c[0], 0, 2, PATH_PENDING); + return_io_getevents_none(); + do_check_state(&c[1], 0, 2, PATH_PENDING); + return_io_getevents_none(); + do_check_state(&c[0], 0, 2, PATH_PENDING); + return_io_getevents_none(); + do_check_state(&c[1], 0, 2, PATH_PENDING); + return_io_getevents_none(); + will_return(__wrap_io_cancel, -1); + do_check_state(&c[0], 0, 2, PATH_DOWN); +#ifndef DIO_TEST_DEV + /* can't pick which even gets returned on real devices */ + return_io_getevents_nr(NULL, 1, &reqs[1], &res[1]); + do_check_state(&c[1], 0, 2, PATH_UP); +#endif + return_io_getevents_none(); + will_return(__wrap_io_cancel, -1); + do_check_state(&c[0], 0, 2, PATH_DOWN); + assert_true(is_checker_running(&c[0])); + return_io_getevents_nr(NULL, 2, reqs, res); + do_check_state(&c[1], 0, 2, PATH_UP); + do_check_state(&c[0], 0, 2, PATH_UP); + for (i = 0; i < 2; i++) { + assert_false(is_checker_running(&c[i])); + libcheck_free(&c[i]); + } + do_libcheck_reset(1); +} + +/* test orphaning a request, and having another checker clean it up */ +static void test_orphan_checker_cleanup(void **state) +{ + struct checker c[2] = {0}; + struct async_req *reqs[2]; + int res[] = {0,0}; + struct aio_group *aio_grp; + int i; + + assert_true(list_empty(&aio_grp_list)); + will_return(__wrap_io_setup, 0); + for (i = 0; i < 2; i++) + do_libcheck_init(&c[i], 4096, &reqs[i]); + aio_grp = get_aio_grp(c); + return_io_getevents_none(); + do_check_state(&c[0], 0, 30, PATH_PENDING); + will_return(__wrap_io_cancel, -1); + check_aio_grp(aio_grp, 2, 0); + libcheck_free(&c[0]); + check_aio_grp(aio_grp, 2, 1); + return_io_getevents_nr(NULL, 2, reqs, res); + do_check_state(&c[1], 0, 2, PATH_UP); + check_aio_grp(aio_grp, 1, 0); + libcheck_free(&c[1]); + check_aio_grp(aio_grp, 0, 0); + do_libcheck_reset(1); +} + +/* test orphaning a request, and having reset clean it up */ +static void test_orphan_reset_cleanup(void **state) +{ + struct checker c; + struct aio_group *orphan_aio_grp, *tmp_aio_grp; + int found, count; + + assert_true(list_empty(&aio_grp_list)); + will_return(__wrap_io_setup, 0); + do_libcheck_init(&c, 4096, NULL); + orphan_aio_grp = get_aio_grp(&c); + return_io_getevents_none(); + do_check_state(&c, 0, 30, PATH_PENDING); + will_return(__wrap_io_cancel, -1); + check_aio_grp(orphan_aio_grp, 1, 0); + libcheck_free(&c); + check_aio_grp(orphan_aio_grp, 1, 1); + found = count = 0; + list_for_each_entry(tmp_aio_grp, &aio_grp_list, node) { + count++; + if (tmp_aio_grp == orphan_aio_grp) + found = 1; + } + assert_int_equal(count, 1); + assert_int_equal(found, 1); + do_libcheck_reset(1); +} + +/* test checkers with different blocksizes */ +static void test_check_state_blksize(void **state) +{ + int i; + struct checker c[3] = {0}; + int blksize[] = {4096, 1024, 512}; + struct async_req *reqs[3]; + int res[] = {0,1,0}; +#ifdef DIO_TEST_DEV + /* can't pick event return state on real devices */ + int chk_state[] = {PATH_UP, PATH_UP, PATH_UP}; +#else + int chk_state[] = {PATH_UP, PATH_DOWN, PATH_UP}; +#endif + + assert_true(list_empty(&aio_grp_list)); + will_return(__wrap_io_setup, 0); + for (i = 0; i < 3; i++) + do_libcheck_init(&c[i], blksize[i], &reqs[i]); + for (i = 0; i < 3; i++) { + return_io_getevents_nr(NULL, 1, &reqs[i], &res[i]); + do_check_state(&c[i], 1, 30, chk_state[i]); + } + for (i = 0; i < 3; i++) { + assert_false(is_checker_running(&c[i])); + libcheck_free(&c[i]); + } + do_libcheck_reset(1); +} + +/* test async checkers pending and getting resovled by another checker + * as well as the loops for getting multiple events */ +static void test_check_state_async(void **state) +{ + int i; + struct checker c[257] = {0}; + struct async_req *reqs[257]; + int res[257] = {0}; + + assert_true(list_empty(&aio_grp_list)); + will_return(__wrap_io_setup, 0); + for (i = 0; i < 257; i++) + do_libcheck_init(&c[i], 4096, &reqs[i]); + for (i = 0; i < 256; i++) { + return_io_getevents_none(); + do_check_state(&c[i], 0, 30, PATH_PENDING); + assert_true(is_checker_running(&c[i])); + } + return_io_getevents_nr(&full_timeout, 256, reqs, res); + return_io_getevents_nr(NULL, 1, &reqs[256], &res[256]); + do_check_state(&c[256], 0, 30, PATH_UP); + assert_false(is_checker_running(&c[256])); + libcheck_free(&c[256]); + for (i = 0; i < 256; i++) { + do_check_state(&c[i], 0, 30, PATH_UP); + assert_false(is_checker_running(&c[i])); + libcheck_free(&c[i]); + } + do_libcheck_reset(1); +} + +static int setup(void **state) +{ +#ifdef DIO_TEST_DEV + test_fd = open(DIO_TEST_DEV, O_RDONLY); + if (test_fd < 0) + fail_msg("cannot open %s: %m", DIO_TEST_DEV); +#endif + return 0; +} + +static int teardown(void **state) +{ +#ifdef DIO_TEST_DEV + assert_true(test_fd > 0); + assert_int_equal(close(test_fd), 0); +#endif + return 0; +} + +int test_directio(void) +{ + const struct CMUnitTest tests[] = { + cmocka_unit_test(test_reset), + cmocka_unit_test(test_init_reset_init), + cmocka_unit_test(test_init_free), + cmocka_unit_test(test_multi_init_free), + cmocka_unit_test(test_check_state_simple), + cmocka_unit_test(test_check_state_timeout), + cmocka_unit_test(test_check_state_async_timeout), + cmocka_unit_test(test_free_with_pending), + cmocka_unit_test(test_timeout_cancel_failed), + cmocka_unit_test(test_async_timeout_cancel_failed), + cmocka_unit_test(test_orphan_checker_cleanup), + cmocka_unit_test(test_orphan_reset_cleanup), + cmocka_unit_test(test_check_state_blksize), + cmocka_unit_test(test_check_state_async), + cmocka_unit_test(test_orphaned_aio_group), + }; + + return cmocka_run_group_tests(tests, setup, teardown); +} + +int main(void) +{ + int ret = 0; + + conf.verbosity = 2; + ret += test_directio(); + return ret; +} diff --git a/tests/dmevents.c b/tests/dmevents.c new file mode 100644 index 0000000..bee117a --- /dev/null +++ b/tests/dmevents.c @@ -0,0 +1,917 @@ +/* + * Copyright (c) 2018 Benjamin Marzinski, Redhat + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + * + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "structs.h" +#include "structs_vec.h" + +#include "globals.c" +/* I have to do this to get at the static variables */ +#include "../multipathd/dmevents.c" + +/* pretend dm device */ +struct dm_device { + char name[WWID_SIZE]; + /* is this a mpath device, or other dm device */ + int is_mpath; + uint32_t evt_nr; + /* tracks the event number when the multipath device was updated */ + uint32_t update_nr; +}; + +struct test_data { + struct vectors vecs; + vector dm_devices; + struct dm_names *names; +}; + +struct test_data data; + +/* Add a pretend dm device, or update its event number. This is used to build + * up the dm devices that the dmevents code queries with dm_task_get_names, + * dm_geteventnr, and dm_is_mpath */ +int add_dm_device_event(char *name, int is_mpath, uint32_t evt_nr) +{ + struct dm_device *dev; + int i; + + vector_foreach_slot(data.dm_devices, dev, i) { + if (strcmp(name, dev->name) == 0) { + dev->evt_nr = evt_nr; + return 0; + } + } + dev = (struct dm_device *)malloc(sizeof(struct dm_device)); + if (!dev){ + condlog(0, "Testing error mallocing dm_device"); + return -1; + } + strncpy(dev->name, name, WWID_SIZE); + dev->name[WWID_SIZE - 1] = 0; + dev->is_mpath = is_mpath; + dev->evt_nr = evt_nr; + if (!vector_alloc_slot(data.dm_devices)) { + condlog(0, "Testing error setting dm_devices slot"); + free(dev); + return -1; + } + vector_set_slot(data.dm_devices, dev); + return 0; +} + +/* helper function for pretend dm devices */ +struct dm_device *find_dm_device(const char *name) +{ + struct dm_device *dev; + int i; + + vector_foreach_slot(data.dm_devices, dev, i) + if (strcmp(name, dev->name) == 0) + return dev; + return NULL; +} + +/* helper function for pretend dm devices */ +int remove_dm_device_event(const char *name) +{ + struct dm_device *dev; + int i; + + vector_foreach_slot(data.dm_devices, dev, i) { + if (strcmp(name, dev->name) == 0) { + vector_del_slot(data.dm_devices, i); + free(dev); + return 0; + } + } + return -1; +} + +/* helper function for pretend dm devices */ +void remove_all_dm_device_events(void) +{ + struct dm_device *dev; + int i; + + vector_foreach_slot(data.dm_devices, dev, i) + free(dev); + vector_reset(data.dm_devices); +} + +static inline size_t align_val(size_t val) +{ + return (val + 7) & ~7; +} +static inline void *align_ptr(void *ptr) +{ + return (void *)align_val((size_t)ptr); +} + +/* copied off of list_devices in dm-ioctl.c except that it uses + * the pretend dm devices, and saves the output to the test_data + * structure */ +struct dm_names *build_dm_names(void) +{ + struct dm_names *names, *np, *old_np = NULL; + uint32_t *event_nr; + struct dm_device *dev; + int i, size = 0; + + if (VECTOR_SIZE(data.dm_devices) == 0) { + names = (struct dm_names *)malloc(sizeof(struct dm_names)); + if (!names) { + condlog(0, "Testing error allocating empty dm_names"); + return NULL; + } + names->dev = 0; + names->next = 0; + return names; + } + vector_foreach_slot(data.dm_devices, dev, i) { + size += align_val(offsetof(struct dm_names, name) + + strlen(dev->name) + 1); + size += align_val(sizeof(uint32_t)); + } + names = (struct dm_names *)malloc(size); + if (!names) { + condlog(0, "Testing error allocating dm_names"); + return NULL; + } + np = names; + vector_foreach_slot(data.dm_devices, dev, i) { + if (old_np) + old_np->next = (uint32_t) ((uintptr_t) np - + (uintptr_t) old_np); + np->dev = 1; + np->next = 0; + strcpy(np->name, dev->name); + + old_np = np; + event_nr = align_ptr(np->name + strlen(dev->name) + 1); + *event_nr = dev->evt_nr; + np = align_ptr(event_nr + 1); + } + assert_int_equal((char *)np - (char *)names, size); + return names; +} + +static int setup(void **state) +{ + if (dmevent_poll_supported()) { + data.dm_devices = vector_alloc(); + *state = &data; + } else + *state = NULL; + return 0; +} + +static int teardown(void **state) +{ + struct dm_device *dev; + int i; + struct test_data *datap = (struct test_data *)(*state); + if (datap == NULL) + return 0; + vector_foreach_slot(datap->dm_devices, dev, i) + free(dev); + vector_free(datap->dm_devices); + datap = NULL; + return 0; +} + +int __wrap_open(const char *pathname, int flags) +{ + assert_ptr_equal(pathname, "/dev/mapper/control"); + assert_int_equal(flags, O_RDWR); + return mock_type(int); +} + +/* We never check the result of the close(), so there's no need to + * to mock a return value */ +int __wrap_close(int fd) +{ + assert_int_equal(fd, waiter->fd); + return 0; +} + +/* the pretend dm device code checks the input and supplies the + * return value, so there's no need to do that here */ +int __wrap_dm_is_mpath(const char *name) +{ + struct dm_device *dev; + int i; + + vector_foreach_slot(data.dm_devices, dev, i) + if (strcmp(name, dev->name) == 0) + return dev->is_mpath; + return 0; +} + +/* either get return info from the pretend dm device, or + * override it to test -1 return */ +int __wrap_dm_geteventnr(const char *name) +{ + struct dm_device *dev; + int fail = mock_type(int); + + if (fail) + return -1; + dev = find_dm_device(name); + if (dev) { + /* simulate updating device state after adding it */ + dev->update_nr = dev->evt_nr; + return dev->evt_nr; + } + return -1; +} + +int __wrap_ioctl(int fd, unsigned long request, void *argp) +{ + assert_int_equal(fd, waiter->fd); + assert_int_equal(request, DM_DEV_ARM_POLL); + return mock_type(int); +} + +struct dm_task *__wrap_libmp_dm_task_create(int task) +{ + assert_int_equal(task, DM_DEVICE_LIST); + return mock_type(struct dm_task *); +} + +int __wrap_dm_task_no_open_count(struct dm_task *dmt) +{ + assert_ptr_equal((struct test_data *)dmt, &data); + return mock_type(int); +} + +int __wrap_dm_task_run(struct dm_task *dmt) +{ + assert_ptr_equal((struct test_data *)dmt, &data); + return mock_type(int); +} + +/* either get return info from the pretend dm device, or + * override it to test NULL return */ +struct dm_names * __wrap_dm_task_get_names(struct dm_task *dmt) +{ + int good = mock_type(int); + assert_ptr_equal((struct test_data *)dmt, &data); + + if (data.names) { + condlog(0, "Testing error. data.names already allocated"); + return NULL; + } + if (!good) + return NULL; + data.names = build_dm_names(); + return data.names; +} + +void __wrap_dm_task_destroy(struct dm_task *dmt) +{ + assert_ptr_equal((struct test_data *)dmt, &data); + + if (data.names) { + free(data.names); + data.names = NULL; + } +} + +int __wrap_poll(struct pollfd *fds, nfds_t nfds, int timeout) +{ + assert_int_equal(nfds, 1); + assert_int_equal(timeout, -1); + assert_int_equal(fds->fd, waiter->fd); + assert_int_equal(fds->events, POLLIN); + return mock_type(int); +} + +void __wrap_remove_map_by_alias(const char *alias, struct vectors * vecs, + int purge_vec) +{ + check_expected(alias); + assert_ptr_equal(vecs, waiter->vecs); + assert_int_equal(purge_vec, 1); +} + +/* pretend update the pretend dm devices. If fail is set, it + * simulates having the dm device removed. Otherwise it just sets + * update_nr to record when the update happened */ +int __wrap_update_multipath(struct vectors *vecs, char *mapname, int reset) +{ + int fail; + + check_expected(mapname); + assert_ptr_equal(vecs, waiter->vecs); + assert_int_equal(reset, 1); + fail = mock_type(int); + if (fail) { + assert_int_equal(remove_dm_device_event(mapname), 0); + return fail; + } else { + struct dm_device *dev; + int i; + + vector_foreach_slot(data.dm_devices, dev, i) { + if (strcmp(mapname, dev->name) == 0) { + dev->update_nr = dev->evt_nr; + return 0; + } + } + fail(); + } + return fail; +} + +/* helper function used to check if the dmevents list of devices + * includes a specific device. To make sure that dmevents is + * in the correct state after running a function */ +struct dev_event *find_dmevents(const char *name) +{ + struct dev_event *dev_evt; + int i; + + vector_foreach_slot(waiter->events, dev_evt, i) + if (!strcmp(dev_evt->name, name)) + return dev_evt; + return NULL; +} + +/* null vecs pointer when initialized dmevents */ +static void test_init_waiter_bad0(void **state) +{ + /* this boilerplate code just skips the test if + * dmevents polling is not supported */ + struct test_data *datap = (struct test_data *)(*state); + if (datap == NULL) + skip(); + + assert_int_equal(init_dmevent_waiter(NULL), -1); +} + +/* fail to open /dev/mapper/control */ +static void test_init_waiter_bad1(void **state) +{ + struct test_data *datap = (struct test_data *)(*state); + if (datap == NULL) + skip(); + will_return(__wrap_open, -1); + assert_int_equal(init_dmevent_waiter(&datap->vecs), -1); + assert_ptr_equal(waiter, NULL); +} + +/* waiter remains initialized after this test */ +static void test_init_waiter_good0(void **state) +{ + struct test_data *datap = (struct test_data *)(*state); + if (datap == NULL) + skip(); + will_return(__wrap_open, 2); + assert_int_equal(init_dmevent_waiter(&datap->vecs), 0); + assert_ptr_not_equal(waiter, NULL); +} + +/* No dm device named foo */ +static void test_watch_dmevents_bad0(void **state) +{ + struct test_data *datap = (struct test_data *)(*state); + if (datap == NULL) + skip(); + assert_int_equal(watch_dmevents("foo"), -1); + assert_ptr_equal(find_dmevents("foo"), NULL); +} + +/* foo is not a multipath device */ +static void test_watch_dmevents_bad1(void **state) +{ + struct test_data *datap = (struct test_data *)(*state); + if (datap == NULL) + skip(); + + assert_int_equal(add_dm_device_event("foo", 0, 5), 0); + assert_int_equal(watch_dmevents("foo"), -1); + assert_ptr_equal(find_dmevents("foo"), NULL); +} + +/* failed getting the dmevent number */ +static void test_watch_dmevents_bad2(void **state) +{ + struct test_data *datap = (struct test_data *)(*state); + if (datap == NULL) + skip(); + + remove_all_dm_device_events(); + assert_int_equal(add_dm_device_event("foo", 1, 5), 0); + will_return(__wrap_dm_geteventnr, -1); + assert_int_equal(watch_dmevents("foo"), -1); + assert_ptr_equal(find_dmevents("foo"), NULL); +} + +/* verify that you can watch and unwatch dm multipath device "foo" */ +static void test_watch_dmevents_good0(void **state) +{ + struct dev_event *dev_evt; + struct test_data *datap = (struct test_data *)(*state); + if (datap == NULL) + skip(); + + remove_all_dm_device_events(); + assert_int_equal(add_dm_device_event("foo", 1, 5), 0); + will_return(__wrap_dm_geteventnr, 0); + assert_int_equal(watch_dmevents("foo"), 0); + /* verify foo is being watched */ + dev_evt = find_dmevents("foo"); + assert_ptr_not_equal(dev_evt, NULL); + assert_int_equal(dev_evt->evt_nr, 5); + assert_int_equal(dev_evt->action, EVENT_NOTHING); + assert_int_equal(VECTOR_SIZE(waiter->events), 1); + unwatch_dmevents("foo"); + /* verify foo is no longer being watched */ + assert_int_equal(VECTOR_SIZE(waiter->events), 0); + assert_ptr_equal(find_dmevents("foo"), NULL); +} + +/* verify that if you try to watch foo multiple times, it only + * is placed on the waiter list once */ +static void test_watch_dmevents_good1(void **state) +{ + struct dev_event *dev_evt; + struct test_data *datap = (struct test_data *)(*state); + if (datap == NULL) + skip(); + + remove_all_dm_device_events(); + assert_int_equal(add_dm_device_event("foo", 1, 5), 0); + will_return(__wrap_dm_geteventnr, 0); + assert_int_equal(watch_dmevents("foo"), 0); + dev_evt = find_dmevents("foo"); + assert_ptr_not_equal(dev_evt, NULL); + assert_int_equal(dev_evt->evt_nr, 5); + assert_int_equal(dev_evt->action, EVENT_NOTHING); + assert_int_equal(add_dm_device_event("foo", 1, 6), 0); + will_return(__wrap_dm_geteventnr, 0); + assert_int_equal(watch_dmevents("foo"), 0); + dev_evt = find_dmevents("foo"); + assert_ptr_not_equal(dev_evt, NULL); + assert_int_equal(dev_evt->evt_nr, 6); + assert_int_equal(dev_evt->action, EVENT_NOTHING); + assert_int_equal(VECTOR_SIZE(waiter->events), 1); + unwatch_dmevents("foo"); + assert_int_equal(VECTOR_SIZE(waiter->events), 0); + assert_ptr_equal(find_dmevents("foo"), NULL); +} + +/* watch and then unwatch multiple devices */ +static void test_watch_dmevents_good2(void **state) +{ + struct dev_event *dev_evt; + struct test_data *datap = (struct test_data *)(*state); + if (datap == NULL) + skip(); + + unwatch_all_dmevents(); + remove_all_dm_device_events(); + assert_int_equal(add_dm_device_event("foo", 1, 5), 0); + assert_int_equal(add_dm_device_event("bar", 1, 7), 0); + will_return(__wrap_dm_geteventnr, 0); + assert_int_equal(watch_dmevents("foo"), 0); + dev_evt = find_dmevents("foo"); + assert_ptr_not_equal(dev_evt, NULL); + assert_int_equal(dev_evt->evt_nr, 5); + assert_int_equal(dev_evt->action, EVENT_NOTHING); + assert_ptr_equal(find_dmevents("bar"), NULL); + will_return(__wrap_dm_geteventnr, 0); + assert_int_equal(watch_dmevents("bar"), 0); + dev_evt = find_dmevents("foo"); + assert_ptr_not_equal(dev_evt, NULL); + assert_int_equal(dev_evt->evt_nr, 5); + assert_int_equal(dev_evt->action, EVENT_NOTHING); + dev_evt = find_dmevents("bar"); + assert_ptr_not_equal(dev_evt, NULL); + assert_int_equal(dev_evt->evt_nr, 7); + assert_int_equal(dev_evt->action, EVENT_NOTHING); + assert_int_equal(VECTOR_SIZE(waiter->events), 2); + unwatch_all_dmevents(); + assert_int_equal(VECTOR_SIZE(waiter->events), 0); + assert_ptr_equal(find_dmevents("foo"), NULL); + assert_ptr_equal(find_dmevents("bar"), NULL); +} + +/* dm_task_create fails */ +static void test_get_events_bad0(void **state) +{ + struct test_data *datap = (struct test_data *)(*state); + if (datap == NULL) + skip(); + + unwatch_all_dmevents(); + remove_all_dm_device_events(); + + will_return(__wrap_libmp_dm_task_create, NULL); + assert_int_equal(dm_get_events(), -1); +} + +/* dm_task_run fails */ +static void test_get_events_bad1(void **state) +{ + struct test_data *datap = (struct test_data *)(*state); + if (datap == NULL) + skip(); + + will_return(__wrap_libmp_dm_task_create, &data); + will_return(__wrap_dm_task_no_open_count, 1); + will_return(__wrap_dm_task_run, 0); + assert_int_equal(dm_get_events(), -1); +} + +/* dm_task_get_names fails */ +static void test_get_events_bad2(void **state) +{ + struct test_data *datap = (struct test_data *)(*state); + if (datap == NULL) + skip(); + + will_return(__wrap_libmp_dm_task_create, &data); + will_return(__wrap_dm_task_no_open_count, 1); + will_return(__wrap_dm_task_run, 1); + will_return(__wrap_dm_task_get_names, 0); + assert_int_equal(dm_get_events(), -1); +} + +/* If the device isn't being watched, dm_get_events returns NULL */ +static void test_get_events_good0(void **state) +{ + struct test_data *datap = (struct test_data *)(*state); + if (datap == NULL) + skip(); + + assert_int_equal(add_dm_device_event("foo", 1, 5), 0); + will_return(__wrap_libmp_dm_task_create, &data); + will_return(__wrap_dm_task_no_open_count, 1); + will_return(__wrap_dm_task_run, 1); + will_return(__wrap_dm_task_get_names, 1); + assert_int_equal(dm_get_events(), 0); + assert_ptr_equal(find_dmevents("foo"), NULL); + assert_int_equal(VECTOR_SIZE(waiter->events), 0); +} + +/* There are 5 dm devices. 4 of them are multipath devices. + * Only 3 of them are being watched. "foo" has a new event + * "xyzzy" gets removed. Nothing happens to bar. Verify + * that all the events are properly set, and that nothing + * happens with the two devices that aren't being watched */ +static void test_get_events_good1(void **state) +{ + struct dev_event *dev_evt; + struct test_data *datap = (struct test_data *)(*state); + if (datap == NULL) + skip(); + + remove_all_dm_device_events(); + assert_int_equal(add_dm_device_event("foo", 1, 5), 0); + assert_int_equal(add_dm_device_event("bar", 1, 7), 0); + assert_int_equal(add_dm_device_event("baz", 1, 12), 0); + assert_int_equal(add_dm_device_event("qux", 0, 4), 0); + assert_int_equal(add_dm_device_event("xyzzy", 1, 8), 0); + will_return(__wrap_dm_geteventnr, 0); + assert_int_equal(watch_dmevents("foo"), 0); + will_return(__wrap_dm_geteventnr, 0); + assert_int_equal(watch_dmevents("bar"), 0); + will_return(__wrap_dm_geteventnr, 0); + assert_int_equal(watch_dmevents("xyzzy"), 0); + assert_int_equal(add_dm_device_event("foo", 1, 6), 0); + assert_int_equal(remove_dm_device_event("xyzzy"), 0); + will_return(__wrap_libmp_dm_task_create, &data); + will_return(__wrap_dm_task_no_open_count, 1); + will_return(__wrap_dm_task_run, 1); + will_return(__wrap_dm_task_get_names, 1); + assert_int_equal(dm_get_events(), 0); + dev_evt = find_dmevents("foo"); + assert_ptr_not_equal(dev_evt, NULL); + assert_int_equal(dev_evt->evt_nr, 6); + assert_int_equal(dev_evt->action, EVENT_UPDATE); + dev_evt = find_dmevents("bar"); + assert_ptr_not_equal(dev_evt, NULL); + assert_int_equal(dev_evt->evt_nr, 7); + assert_int_equal(dev_evt->action, EVENT_NOTHING); + dev_evt = find_dmevents("xyzzy"); + assert_ptr_not_equal(dev_evt, NULL); + assert_int_equal(dev_evt->evt_nr, 8); + assert_int_equal(dev_evt->action, EVENT_REMOVE); + assert_ptr_equal(find_dmevents("baz"), NULL); + assert_ptr_equal(find_dmevents("qux"), NULL); + assert_int_equal(VECTOR_SIZE(waiter->events), 3); +} + +/* poll does not return an event. nothing happens. The + * devices remain after this test */ +static void test_dmevent_loop_bad0(void **state) +{ + struct dm_device *dev; + struct dev_event *dev_evt; + struct test_data *datap = (struct test_data *)(*state); + if (datap == NULL) + skip(); + + remove_all_dm_device_events(); + unwatch_all_dmevents(); + assert_int_equal(add_dm_device_event("foo", 1, 5), 0); + will_return(__wrap_dm_geteventnr, 0); + assert_int_equal(watch_dmevents("foo"), 0); + assert_int_equal(add_dm_device_event("foo", 1, 6), 0); + will_return(__wrap_poll, 0); + assert_int_equal(dmevent_loop(), 1); + dev_evt = find_dmevents("foo"); + assert_ptr_not_equal(dev_evt, NULL); + assert_int_equal(dev_evt->evt_nr, 5); + assert_int_equal(dev_evt->action, EVENT_NOTHING); + dev = find_dm_device("foo"); + assert_ptr_not_equal(dev, NULL); + assert_int_equal(dev->evt_nr, 6); + assert_int_equal(dev->update_nr, 5); +} + +/* arm_dm_event_poll's ioctl fails. Nothing happens */ +static void test_dmevent_loop_bad1(void **state) +{ + struct dm_device *dev; + struct dev_event *dev_evt; + struct test_data *datap = (struct test_data *)(*state); + if (datap == NULL) + skip(); + + will_return(__wrap_poll, 1); + will_return(__wrap_ioctl, -1); + assert_int_equal(dmevent_loop(), 1); + dev_evt = find_dmevents("foo"); + assert_ptr_not_equal(dev_evt, NULL); + assert_int_equal(dev_evt->evt_nr, 5); + assert_int_equal(dev_evt->action, EVENT_NOTHING); + dev = find_dm_device("foo"); + assert_ptr_not_equal(dev, NULL); + assert_int_equal(dev->evt_nr, 6); + assert_int_equal(dev->update_nr, 5); +} + +/* dm_get_events fails. Nothing happens */ +static void test_dmevent_loop_bad2(void **state) +{ + struct dm_device *dev; + struct dev_event *dev_evt; + struct test_data *datap = (struct test_data *)(*state); + if (datap == NULL) + skip(); + + will_return(__wrap_poll, 1); + will_return(__wrap_ioctl, 0); + will_return(__wrap_libmp_dm_task_create, NULL); + assert_int_equal(dmevent_loop(), 1); + dev_evt = find_dmevents("foo"); + assert_ptr_not_equal(dev_evt, NULL); + assert_int_equal(dev_evt->evt_nr, 5); + assert_int_equal(dev_evt->action, EVENT_NOTHING); + dev = find_dm_device("foo"); + assert_ptr_not_equal(dev, NULL); + assert_int_equal(dev->evt_nr, 6); + assert_int_equal(dev->update_nr, 5); +} + +/* verify dmevent_loop runs successfully when no devices are being + * watched */ +static void test_dmevent_loop_good0(void **state) +{ + struct test_data *datap = (struct test_data *)(*state); + if (datap == NULL) + skip(); + + remove_all_dm_device_events(); + unwatch_all_dmevents(); + will_return(__wrap_poll, 1); + will_return(__wrap_ioctl, 0); + will_return(__wrap_libmp_dm_task_create, &data); + will_return(__wrap_dm_task_no_open_count, 1); + will_return(__wrap_dm_task_run, 1); + will_return(__wrap_dm_task_get_names, 1); + assert_int_equal(dmevent_loop(), 1); +} + +/* Watch 3 devices, where one device has an event (foo), one device is + * removed (xyzzy), and one device does nothing (bar). Verify that + * the device with the event gets updated, the device that is removed + * gets unwatched, and the device with no events stays the same. + * The devices remain after this test */ +static void test_dmevent_loop_good1(void **state) +{ + struct dm_device *dev; + struct dev_event *dev_evt; + struct test_data *datap = (struct test_data *)(*state); + if (datap == NULL) + skip(); + + remove_all_dm_device_events(); + unwatch_all_dmevents(); + assert_int_equal(add_dm_device_event("foo", 1, 5), 0); + assert_int_equal(add_dm_device_event("bar", 1, 7), 0); + assert_int_equal(add_dm_device_event("baz", 1, 12), 0); + assert_int_equal(add_dm_device_event("xyzzy", 1, 8), 0); + will_return(__wrap_dm_geteventnr, 0); + assert_int_equal(watch_dmevents("foo"), 0); + will_return(__wrap_dm_geteventnr, 0); + assert_int_equal(watch_dmevents("bar"), 0); + will_return(__wrap_dm_geteventnr, 0); + assert_int_equal(watch_dmevents("xyzzy"), 0); + assert_int_equal(add_dm_device_event("foo", 1, 6), 0); + assert_int_equal(remove_dm_device_event("xyzzy"), 0); + will_return(__wrap_poll, 1); + will_return(__wrap_ioctl, 0); + will_return(__wrap_libmp_dm_task_create, &data); + will_return(__wrap_dm_task_no_open_count, 1); + will_return(__wrap_dm_task_run, 1); + will_return(__wrap_dm_task_get_names, 1); + expect_string(__wrap_update_multipath, mapname, "foo"); + will_return(__wrap_update_multipath, 0); + expect_string(__wrap_remove_map_by_alias, alias, "xyzzy"); + assert_int_equal(dmevent_loop(), 1); + assert_int_equal(VECTOR_SIZE(waiter->events), 2); + assert_int_equal(VECTOR_SIZE(data.dm_devices), 3); + dev_evt = find_dmevents("foo"); + assert_ptr_not_equal(dev_evt, NULL); + assert_int_equal(dev_evt->evt_nr, 6); + assert_int_equal(dev_evt->action, EVENT_NOTHING); + dev = find_dm_device("foo"); + assert_ptr_not_equal(dev, NULL); + assert_int_equal(dev->evt_nr, 6); + assert_int_equal(dev->update_nr, 6); + dev_evt = find_dmevents("bar"); + assert_ptr_not_equal(dev_evt, NULL); + assert_int_equal(dev_evt->evt_nr, 7); + assert_int_equal(dev_evt->action, EVENT_NOTHING); + dev = find_dm_device("bar"); + assert_ptr_not_equal(dev, NULL); + assert_int_equal(dev->evt_nr, 7); + assert_int_equal(dev->update_nr, 7); + assert_ptr_equal(find_dmevents("xyzzy"), NULL); + assert_ptr_equal(find_dm_device("xyzzy"), NULL); +} + +/* watch another dm device and add events to two of them, so bar and + * baz have new events, and foo doesn't. Set update_multipath to + * fail for baz. Verify that baz is unwatched, bar is updated, and + * foo stays the same. */ +static void test_dmevent_loop_good2(void **state) +{ + struct dm_device *dev; + struct dev_event *dev_evt; + struct test_data *datap = (struct test_data *)(*state); + if (datap == NULL) + skip(); + + assert_int_equal(add_dm_device_event("bar", 1, 9), 0); + will_return(__wrap_dm_geteventnr, 0); + assert_int_equal(watch_dmevents("baz"), 0); + assert_int_equal(add_dm_device_event("baz", 1, 14), 0); + will_return(__wrap_poll, 1); + will_return(__wrap_ioctl, 0); + will_return(__wrap_libmp_dm_task_create, &data); + will_return(__wrap_dm_task_no_open_count, 1); + will_return(__wrap_dm_task_run, 1); + will_return(__wrap_dm_task_get_names, 1); + expect_string(__wrap_update_multipath, mapname, "bar"); + will_return(__wrap_update_multipath, 0); + expect_string(__wrap_update_multipath, mapname, "baz"); + will_return(__wrap_update_multipath, 1); + assert_int_equal(dmevent_loop(), 1); + assert_int_equal(VECTOR_SIZE(waiter->events), 2); + assert_int_equal(VECTOR_SIZE(data.dm_devices), 2); + dev_evt = find_dmevents("foo"); + assert_ptr_not_equal(dev_evt, NULL); + assert_int_equal(dev_evt->evt_nr, 6); + assert_int_equal(dev_evt->action, EVENT_NOTHING); + dev = find_dm_device("foo"); + assert_ptr_not_equal(dev, NULL); + assert_int_equal(dev->evt_nr, 6); + assert_int_equal(dev->update_nr, 6); + dev_evt = find_dmevents("bar"); + assert_ptr_not_equal(dev_evt, NULL); + assert_int_equal(dev_evt->evt_nr, 9); + assert_int_equal(dev_evt->action, EVENT_NOTHING); + dev = find_dm_device("bar"); + assert_ptr_not_equal(dev, NULL); + assert_int_equal(dev->evt_nr, 9); + assert_int_equal(dev->update_nr, 9); + assert_ptr_equal(find_dmevents("baz"), NULL); + assert_ptr_equal(find_dm_device("baz"), NULL); +} + +/* remove dm device foo, and unwatch events on bar. Verify that + * foo is cleaned up and unwatched, and bar is no longer updated */ +static void test_dmevent_loop_good3(void **state) +{ + struct dm_device *dev; + struct test_data *datap = (struct test_data *)(*state); + if (datap == NULL) + skip(); + + assert_int_equal(remove_dm_device_event("foo"), 0); + unwatch_dmevents("bar"); + will_return(__wrap_poll, 1); + will_return(__wrap_ioctl, 0); + will_return(__wrap_libmp_dm_task_create, &data); + will_return(__wrap_dm_task_no_open_count, 1); + will_return(__wrap_dm_task_run, 1); + will_return(__wrap_dm_task_get_names, 1); + expect_string(__wrap_remove_map_by_alias, alias, "foo"); + assert_int_equal(dmevent_loop(), 1); + assert_int_equal(VECTOR_SIZE(waiter->events), 0); + assert_int_equal(VECTOR_SIZE(data.dm_devices), 1); + dev = find_dm_device("bar"); + assert_ptr_not_equal(dev, NULL); + assert_int_equal(dev->evt_nr, 9); + assert_int_equal(dev->update_nr, 9); + assert_ptr_equal(find_dmevents("foo"), NULL); + assert_ptr_equal(find_dmevents("bar"), NULL); + assert_ptr_equal(find_dm_device("foo"), NULL); +} + + +/* verify that rearming the dmevents polling works */ +static void test_arm_poll(void **state) +{ + struct test_data *datap = (struct test_data *)(*state); + if (datap == NULL) + skip(); + will_return(__wrap_ioctl, 0); + assert_int_equal(arm_dm_event_poll(waiter->fd), 0); +} + +/* verify that the waiter is cleaned up */ +static void test_cleanup_waiter(void **state) +{ + struct test_data *datap = (struct test_data *)(*state); + if (datap == NULL) + skip(); + cleanup_dmevent_waiter(); + assert_ptr_equal(waiter, NULL); +} + +int test_dmevents(void) +{ + const struct CMUnitTest tests[] = { + cmocka_unit_test(test_init_waiter_bad0), + cmocka_unit_test(test_init_waiter_bad1), + cmocka_unit_test(test_init_waiter_good0), + cmocka_unit_test(test_watch_dmevents_bad0), + cmocka_unit_test(test_watch_dmevents_bad1), + cmocka_unit_test(test_watch_dmevents_bad2), + cmocka_unit_test(test_watch_dmevents_good0), + cmocka_unit_test(test_watch_dmevents_good1), + cmocka_unit_test(test_watch_dmevents_good2), + cmocka_unit_test(test_get_events_bad0), + cmocka_unit_test(test_get_events_bad1), + cmocka_unit_test(test_get_events_bad2), + cmocka_unit_test(test_get_events_good0), + cmocka_unit_test(test_get_events_good1), + cmocka_unit_test(test_arm_poll), + cmocka_unit_test(test_dmevent_loop_bad0), + cmocka_unit_test(test_dmevent_loop_bad1), + cmocka_unit_test(test_dmevent_loop_bad2), + cmocka_unit_test(test_dmevent_loop_good0), + cmocka_unit_test(test_dmevent_loop_good1), + cmocka_unit_test(test_dmevent_loop_good2), + cmocka_unit_test(test_dmevent_loop_good3), + cmocka_unit_test(test_cleanup_waiter), + }; + return cmocka_run_group_tests(tests, setup, teardown); +} + +int main(void) +{ + int ret = 0; + + ret += test_dmevents(); + return ret; +} diff --git a/tests/globals.c b/tests/globals.c new file mode 100644 index 0000000..8add5eb --- /dev/null +++ b/tests/globals.c @@ -0,0 +1,17 @@ +#include "structs.h" +#include "config.h" + +/* Required globals */ +struct udev *udev; +int logsink = -1; +struct config conf = { + .verbosity = 4, +}; + +struct config *get_multipath_config(void) +{ + return &conf; +} + +void put_multipath_config(void *arg) +{} diff --git a/tests/hwtable.c b/tests/hwtable.c new file mode 100644 index 0000000..473028b --- /dev/null +++ b/tests/hwtable.c @@ -0,0 +1,1781 @@ +/* Set BROKEN to 1 to treat broken behavior as success */ +#define BROKEN 1 +#define VERBOSITY 2 + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "structs.h" +#include "structs_vec.h" +#include "config.h" +#include "debug.h" +#include "defaults.h" +#include "pgpolicies.h" +#include "test-lib.h" +#include "print.h" +#include "util.h" + +#define N_CONF_FILES 2 + +static const char tmplate[] = "/tmp/hwtable-XXXXXX"; +/* pretend new dm, use minio_rq */ +static const unsigned int dm_tgt_version[3] = { 1, 1, 1 }; + +struct key_value { + const char *key; + const char *value; +}; + +struct hwt_state { + char *tmpname; + char *dirname; + FILE *config_file; + FILE *conf_dir_file[N_CONF_FILES]; + struct vectors *vecs; + void (*test)(const struct hwt_state *); + const char *test_name; +}; + +#define SET_TEST_FUNC(hwt, func) do { \ + hwt->test = func; \ + hwt->test_name = #func; \ + } while (0) + +static struct config *_conf; +struct udev *udev; +int logsink = -1; + +struct config *get_multipath_config(void) +{ + return _conf; +} + +void put_multipath_config(void *arg) +{} + +void make_config_file_path(char *buf, int buflen, + const struct hwt_state *hwt, int i) +{ + static const char fn_template[] = "%s/test-%02d.conf"; + + if (i == -1) + /* main config file */ + snprintf(buf, buflen, fn_template, hwt->tmpname, 0); + else + snprintf(buf, buflen, fn_template, hwt->dirname, i); +} + +static void reset_vecs(struct vectors *vecs) +{ + remove_maps(vecs); + free_pathvec(vecs->pathvec, FREE_PATHS); + + vecs->pathvec = vector_alloc(); + assert_ptr_not_equal(vecs->pathvec, NULL); + vecs->mpvec = vector_alloc(); + assert_ptr_not_equal(vecs->mpvec, NULL); +} + +static void free_hwt(struct hwt_state *hwt) +{ + char buf[PATH_MAX]; + int i; + + if (hwt->config_file != NULL) + fclose(hwt->config_file); + for (i = 0; i < N_CONF_FILES; i++) { + if (hwt->conf_dir_file[i] != NULL) + fclose(hwt->conf_dir_file[i]); + } + + if (hwt->tmpname != NULL) { + make_config_file_path(buf, sizeof(buf), hwt, -1); + unlink(buf); + rmdir(hwt->tmpname); + free(hwt->tmpname); + } + + if (hwt->dirname != NULL) { + for (i = 0; i < N_CONF_FILES; i++) { + make_config_file_path(buf, sizeof(buf), hwt, i); + unlink(buf); + } + rmdir(hwt->dirname); + free(hwt->dirname); + } + + if (hwt->vecs != NULL) { + if (hwt->vecs->mpvec != NULL) + remove_maps(hwt->vecs); + if (hwt->vecs->pathvec != NULL) + free_pathvec(hwt->vecs->pathvec, FREE_PATHS); + pthread_mutex_destroy(&hwt->vecs->lock.mutex); + free(hwt->vecs); + } + free(hwt); +} + +static int setup(void **state) +{ + struct hwt_state *hwt; + char buf[PATH_MAX]; + int i; + + *state = NULL; + hwt = calloc(1, sizeof(*hwt)); + if (hwt == NULL) + return -1; + + snprintf(buf, sizeof(buf), "%s", tmplate); + if (mkdtemp(buf) == NULL) { + condlog(0, "mkdtemp: %s", strerror(errno)); + goto err; + } + hwt->tmpname = strdup(buf); + + snprintf(buf, sizeof(buf), "%s", tmplate); + if (mkdtemp(buf) == NULL) { + condlog(0, "mkdtemp (2): %s", strerror(errno)); + goto err; + } + hwt->dirname = strdup(buf); + + make_config_file_path(buf, sizeof(buf), hwt, -1); + hwt->config_file = fopen(buf, "w+"); + if (hwt->config_file == NULL) + goto err; + + for (i = 0; i < N_CONF_FILES; i++) { + make_config_file_path(buf, sizeof(buf), hwt, i); + hwt->conf_dir_file[i] = fopen(buf, "w+"); + if (hwt->conf_dir_file[i] == NULL) + goto err; + } + + hwt->vecs = calloc(1, sizeof(*hwt->vecs)); + if (hwt->vecs == NULL) + goto err; + pthread_mutex_init(&hwt->vecs->lock.mutex, NULL); + hwt->vecs->pathvec = vector_alloc(); + hwt->vecs->mpvec = vector_alloc(); + if (hwt->vecs->pathvec == NULL || hwt->vecs->mpvec == NULL) + goto err; + + *state = hwt; + return 0; + +err: + free_hwt(hwt); + return -1; +} + +static int teardown(void **state) +{ + if (state == NULL || *state == NULL) + return -1; + + free_hwt(*state); + *state = NULL; + + return 0; +} + +/* + * Helpers for creating the config file(s) + */ + +static void reset_config(FILE *ff) +{ + if (ff == NULL) + return; + rewind(ff); + if (ftruncate(fileno(ff), 0) == -1) + condlog(1, "ftruncate: %s", strerror(errno)); +} + +static void reset_configs(const struct hwt_state *hwt) +{ + int i; + + reset_config(hwt->config_file); + for (i = 0; i < N_CONF_FILES; i++) + reset_config(hwt->conf_dir_file[i]); +} + +static void write_key_values(FILE *ff, int nkv, const struct key_value *kv) +{ + int i; + + for (i = 0; i < nkv; i++) { + if (strchr(kv[i].value, ' ') == NULL && + strchr(kv[i].value, '\"') == NULL) + fprintf(ff, "\t%s %s\n", kv[i].key, kv[i].value); + else + fprintf(ff, "\t%s \"%s\"\n", kv[i].key, kv[i].value); + } +} + +static void begin_section(FILE *ff, const char *section) +{ + fprintf(ff, "%s {\n", section); +} + +static void end_section(FILE *ff) +{ + fprintf(ff, "}\n"); +} + +static void write_section(FILE *ff, const char *section, + int nkv, const struct key_value *kv) +{ + begin_section(ff, section); + write_key_values(ff, nkv, kv); + end_section(ff); +} + +static void write_defaults(const struct hwt_state *hwt) +{ + static const char bindings_name[] = "bindings"; + static struct key_value defaults[] = { + { "config_dir", NULL }, + { "bindings_file", NULL }, + { "multipath_dir", NULL }, + { "detect_prio", "no" }, + { "detect_checker", "no" }, + }; + char buf[sizeof(tmplate) + sizeof(bindings_name)]; + char dirbuf[PATH_MAX]; + + snprintf(buf, sizeof(buf), "%s/%s", hwt->tmpname, bindings_name); + defaults[0].value = hwt->dirname; + defaults[1].value = buf; + assert_ptr_not_equal(getcwd(dirbuf, sizeof(dirbuf)), NULL); + strncat(dirbuf, "/lib", sizeof(dirbuf) - 5); + defaults[2].value = dirbuf; + write_section(hwt->config_file, "defaults", + ARRAY_SIZE(defaults), defaults); +} + +static void begin_config(const struct hwt_state *hwt) +{ + reset_configs(hwt); + write_defaults(hwt); +} + +static void begin_section_all(const struct hwt_state *hwt, const char *section) +{ + int i; + + begin_section(hwt->config_file, section); + for (i = 0; i < N_CONF_FILES; i++) + begin_section(hwt->conf_dir_file[i], section); +} + +static void end_section_all(const struct hwt_state *hwt) +{ + int i; + + end_section(hwt->config_file); + for (i = 0; i < N_CONF_FILES; i++) + end_section(hwt->conf_dir_file[i]); +} + +static void finish_config(const struct hwt_state *hwt) +{ + int i; + + fflush(hwt->config_file); + for (i = 0; i < N_CONF_FILES; i++) { + fflush(hwt->conf_dir_file[i]); + } +} + +static void write_device(FILE *ff, int nkv, const struct key_value *kv) +{ + write_section(ff, "device", nkv, kv); +} + +/* + * Some macros to avoid boilerplace code + */ + +#define CHECK_STATE(state) ({ \ + assert_ptr_not_equal(state, NULL); \ + assert_ptr_not_equal(*(state), NULL); \ + *state; }) + +#define WRITE_EMPTY_CONF(hwt) do { \ + begin_config(hwt); \ + finish_config(hwt); \ + } while (0) + +#define WRITE_ONE_DEVICE(hwt, kv) do { \ + begin_config(hwt); \ + begin_section_all(hwt, "devices"); \ + write_device(hwt->config_file, ARRAY_SIZE(kv), kv); \ + end_section_all(hwt); \ + finish_config(hwt); \ + } while (0) + +#define WRITE_TWO_DEVICES(hwt, kv1, kv2) do { \ + begin_config(hwt); \ + begin_section_all(hwt, "devices"); \ + write_device(hwt->config_file, ARRAY_SIZE(kv1), kv1); \ + write_device(hwt->config_file, ARRAY_SIZE(kv2), kv2); \ + end_section_all(hwt); \ + finish_config(hwt); \ + } while (0) + +#define WRITE_TWO_DEVICES_W_DIR(hwt, kv1, kv2) do { \ + begin_config(hwt); \ + begin_section_all(hwt, "devices"); \ + write_device(hwt->config_file, ARRAY_SIZE(kv1), kv1); \ + write_device(hwt->conf_dir_file[0], \ + ARRAY_SIZE(kv2), kv2); \ + end_section_all(hwt); \ + finish_config(hwt); \ + } while (0) + +#define LOAD_CONFIG(hwt) ({ \ + char buf[PATH_MAX]; \ + struct config *__cf; \ + \ + make_config_file_path(buf, sizeof(buf), hwt, -1); \ + __cf = load_config(buf); \ + assert_ptr_not_equal(__cf, NULL); \ + assert_ptr_not_equal(__cf->hwtable, NULL); \ + __cf->verbosity = VERBOSITY; \ + memcpy(&__cf->version, dm_tgt_version, sizeof(__cf->version)); \ + __cf; }) + +#define FREE_CONFIG(conf) do { \ + free_config(conf); \ + conf = NULL; \ + } while (0) + +static void replace_config(const struct hwt_state *hwt, + const char *conf_str) +{ + FREE_CONFIG(_conf); + reset_configs(hwt); + fprintf(hwt->config_file, "%s", conf_str); + fflush(hwt->config_file); + _conf = LOAD_CONFIG(hwt); +} + +#define TEST_PROP(prop, val) do { \ + if (val == NULL) \ + assert_ptr_equal(prop, NULL); \ + else { \ + assert_ptr_not_equal(prop, NULL); \ + assert_string_equal(prop, val); \ + } \ + } while (0) + +#if BROKEN +#define TEST_PROP_BROKEN(name, prop, bad, good) do { \ + condlog(1, "%s: WARNING: Broken test for %s == \"%s\" on line %d, should be \"%s\"", \ + __func__, name, bad ? bad : "NULL", \ + __LINE__, good ? good : "NULL"); \ + TEST_PROP(prop, bad); \ + } while (0) +#else +#define TEST_PROP_BROKEN(name, prop, bad, good) TEST_PROP(prop, good) +#endif + +/* + * Some predefined key/value pairs + */ + +static const char _wwid[] = "wwid"; +static const char _vendor[] = "vendor"; +static const char _product[] = "product"; +static const char _prio[] = "prio"; +static const char _checker[] = "path_checker"; +static const char _getuid[] = "getuid_callout"; +static const char _uid_attr[] = "uid_attribute"; +static const char _bl_product[] = "product_blacklist"; +static const char _minio[] = "rr_min_io_rq"; +static const char _no_path_retry[] = "no_path_retry"; + +/* Device identifiers */ +static const struct key_value vnd_foo = { _vendor, "foo" }; +static const struct key_value prd_bar = { _product, "bar" }; +static const struct key_value prd_bam = { _product, "bam" }; +static const struct key_value prd_baq = { _product, "\"bar\"" }; +static const struct key_value prd_baqq = { _product, "\"\"bar\"\"" }; +static const struct key_value prd_barz = { _product, "barz" }; +static const struct key_value vnd_boo = { _vendor, "boo" }; +static const struct key_value prd_baz = { _product, "baz" }; +static const struct key_value wwid_test = { _wwid, default_wwid }; + +/* Regular expresssions */ +static const struct key_value vnd__oo = { _vendor, ".oo" }; +static const struct key_value vnd_t_oo = { _vendor, "^.oo" }; +static const struct key_value prd_ba_ = { _product, "ba." }; +static const struct key_value prd_ba_s = { _product, "(bar|baz|ba\\.)$" }; +/* Pathological cases, see below */ +static const struct key_value prd_barx = { _product, "ba[[rxy]" }; +static const struct key_value prd_bazy = { _product, "ba[zy]" }; +static const struct key_value prd_bazy1 = { _product, "ba(z|y)" }; + +/* Properties */ +static const struct key_value prio_emc = { _prio, "emc" }; +static const struct key_value prio_hds = { _prio, "hds" }; +static const struct key_value prio_rdac = { _prio, "rdac" }; +static const struct key_value chk_hp = { _checker, "hp_sw" }; +static const struct key_value gui_foo = { _getuid, "/tmp/foo" }; +static const struct key_value uid_baz = { _uid_attr, "BAZ_ATTR" }; +static const struct key_value bl_bar = { _bl_product, "bar" }; +static const struct key_value bl_baz = { _bl_product, "baz" }; +static const struct key_value bl_barx = { _bl_product, "ba[[rxy]" }; +static const struct key_value bl_bazy = { _bl_product, "ba[zy]" }; +static const struct key_value minio_99 = { _minio, "99" }; +static const struct key_value npr_37 = { _no_path_retry, "37" }; +static const struct key_value npr_queue = { _no_path_retry, "queue" }; + +/***** BEGIN TESTS SECTION *****/ + +/* + * Dump the configuration, subistitute the dumped configuration + * for the current one, and verify that the result is identical. + */ +static void replicate_config(const struct hwt_state *hwt, bool local) +{ + char *cfg1, *cfg2; + vector hwtable; + struct config *conf; + + condlog(3, "--- %s: replicating %s configuration", __func__, + local ? "local" : "full"); + + conf = get_multipath_config(); + if (!local) + /* "full" configuration */ + cfg1 = snprint_config(conf, NULL, NULL, NULL); + else { + /* "local" configuration */ + hwtable = get_used_hwes(hwt->vecs->pathvec); + cfg1 = snprint_config(conf, NULL, hwtable, hwt->vecs->mpvec); + } + + assert_non_null(cfg1); + put_multipath_config(conf); + + replace_config(hwt, cfg1); + + /* + * The local configuration adds multipath entries, and may move device + * entries for local devices to the end of the list. Identical config + * strings therefore can't be expected in the "local" case. + * That doesn't matter. The important thing is that, with the reloaded + * configuration, the test case still passes. + */ + if (local) { + free(cfg1); + return; + } + + conf = get_multipath_config(); + cfg2 = snprint_config(conf, NULL, NULL, NULL); + assert_non_null(cfg2); + put_multipath_config(conf); + +// #define DBG_CONFIG 1 +#ifdef DBG_CONFIG +#define DUMP_CFG_STR(x) do { \ + FILE *tmp = fopen("/tmp/hwtable-" #x ".txt", "w"); \ + fprintf(tmp, "%s", x); \ + fclose(tmp); \ + } while (0) + + DUMP_CFG_STR(cfg1); + DUMP_CFG_STR(cfg2); +#endif + + assert_int_equal(strlen(cfg2), strlen(cfg1)); + assert_string_equal(cfg2, cfg1); + free(cfg1); + free(cfg2); +} + +/* + * Run hwt->test three times; once with the constructed configuration, + * once after re-reading the full dumped configuration, and once with the + * dumped local configuration. + * + * Expected: test passes every time. + */ +static void test_driver(void **state) +{ + const struct hwt_state *hwt; + + hwt = CHECK_STATE(state); + _conf = LOAD_CONFIG(hwt); + hwt->test(hwt); + + replicate_config(hwt, false); + reset_vecs(hwt->vecs); + hwt->test(hwt); + + replicate_config(hwt, true); + reset_vecs(hwt->vecs); + hwt->test(hwt); + + reset_vecs(hwt->vecs); + FREE_CONFIG(_conf); +} + +/* + * Sanity check for the test itself, because defaults may be changed + * in libmultipath. + * + * Our checking for match or non-match relies on the defaults being + * different from what our device sections contain. + */ +static void test_sanity_globals(void **state) +{ + assert_string_not_equal(prio_emc.value, DEFAULT_PRIO); + assert_string_not_equal(prio_hds.value, DEFAULT_PRIO); + assert_string_not_equal(chk_hp.value, DEFAULT_CHECKER); + assert_int_not_equal(MULTIBUS, DEFAULT_PGPOLICY); + assert_int_not_equal(NO_PATH_RETRY_QUEUE, DEFAULT_NO_PATH_RETRY); + assert_int_not_equal(atoi(minio_99.value), DEFAULT_MINIO_RQ); + assert_int_not_equal(atoi(npr_37.value), DEFAULT_NO_PATH_RETRY); +} + +/* + * Regression test for internal hwtable. NVME is an example of two entries + * in the built-in hwtable, one if which matches a subset of the other. + */ +static void test_internal_nvme(const struct hwt_state *hwt) +{ + struct path *pp; + struct multipath *mp; + + /* + * Generic NVMe: expect defaults for pgpolicy and no_path_retry + */ + pp = mock_path("NVME", "NoName"); + mp = mock_multipath(pp); + assert_ptr_not_equal(mp, NULL); + TEST_PROP(checker_name(&pp->checker), NONE); + TEST_PROP(pp->uid_attribute, DEFAULT_NVME_UID_ATTRIBUTE); + assert_int_equal(mp->pgpolicy, DEFAULT_PGPOLICY); + assert_int_equal(mp->no_path_retry, DEFAULT_NO_PATH_RETRY); + assert_int_equal(mp->retain_hwhandler, RETAIN_HWHANDLER_OFF); + + /* + * NetApp NVMe: expect special values for pgpolicy and no_path_retry + */ + pp = mock_path_wwid("NVME", "NetApp ONTAP Controller", + default_wwid_1); + mp = mock_multipath(pp); + assert_ptr_not_equal(mp, NULL); + TEST_PROP(checker_name(&pp->checker), NONE); + TEST_PROP(pp->uid_attribute, "ID_WWN"); + assert_int_equal(mp->pgpolicy, MULTIBUS); + assert_int_equal(mp->no_path_retry, NO_PATH_RETRY_QUEUE); + assert_int_equal(mp->retain_hwhandler, RETAIN_HWHANDLER_OFF); +} + +static int setup_internal_nvme(void **state) +{ + struct hwt_state *hwt = CHECK_STATE(state); + + WRITE_EMPTY_CONF(hwt); + SET_TEST_FUNC(hwt, test_internal_nvme); + + return 0; +} + +/* + * Device section with a simple entry qith double quotes ('foo:"bar"') + */ +static void test_quoted_hwe(const struct hwt_state *hwt) +{ + struct path *pp; + + /* foo:"bar" matches */ + pp = mock_path(vnd_foo.value, prd_baq.value); + TEST_PROP(prio_name(&pp->prio), prio_emc.value); + + /* foo:bar doesn't match */ + pp = mock_path(vnd_foo.value, prd_bar.value); + TEST_PROP(prio_name(&pp->prio), DEFAULT_PRIO); +} + +static int setup_quoted_hwe(void **state) +{ + struct hwt_state *hwt = CHECK_STATE(state); + const struct key_value kv[] = { vnd_foo, prd_baqq, prio_emc }; + + WRITE_ONE_DEVICE(hwt, kv); + SET_TEST_FUNC(hwt, test_quoted_hwe); + return 0; +} + +/* + * Device section with a single simple entry ("foo:bar") + */ +static void test_string_hwe(const struct hwt_state *hwt) +{ + struct path *pp; + + /* foo:bar matches */ + pp = mock_path(vnd_foo.value, prd_bar.value); + TEST_PROP(prio_name(&pp->prio), prio_emc.value); + + /* foo:baz doesn't match */ + pp = mock_path(vnd_foo.value, prd_baz.value); + TEST_PROP(prio_name(&pp->prio), DEFAULT_PRIO); + + /* boo:bar doesn't match */ + pp = mock_path(vnd_boo.value, prd_bar.value); + TEST_PROP(prio_name(&pp->prio), DEFAULT_PRIO); +} + +static int setup_string_hwe(void **state) +{ + struct hwt_state *hwt = CHECK_STATE(state); + const struct key_value kv[] = { vnd_foo, prd_bar, prio_emc }; + + WRITE_ONE_DEVICE(hwt, kv); + SET_TEST_FUNC(hwt, test_string_hwe); + return 0; +} + +/* + * Device section with a broken entry (no product) + * It should be ignored. + */ +static void test_broken_hwe(const struct hwt_state *hwt) +{ + struct path *pp; + + /* foo:bar doesn't match, as hwentry is ignored */ + pp = mock_path(vnd_foo.value, prd_bar.value); + TEST_PROP(prio_name(&pp->prio), DEFAULT_PRIO); + + /* boo:bar doesn't match */ + pp = mock_path(vnd_boo.value, prd_bar.value); + TEST_PROP(prio_name(&pp->prio), DEFAULT_PRIO); +} + +static int setup_broken_hwe(void **state) +{ + struct hwt_state *hwt = CHECK_STATE(state); + const struct key_value kv[] = { vnd_foo, prio_emc }; + + WRITE_ONE_DEVICE(hwt, kv); + SET_TEST_FUNC(hwt, test_broken_hwe); + return 0; +} + +/* + * Like test_broken_hwe, but in config_dir file. + */ +static int setup_broken_hwe_dir(void **state) +{ + struct hwt_state *hwt = CHECK_STATE(state); + const struct key_value kv[] = { vnd_foo, prio_emc }; + + begin_config(hwt); + begin_section_all(hwt, "devices"); + write_device(hwt->conf_dir_file[0], ARRAY_SIZE(kv), kv); + end_section_all(hwt); + finish_config(hwt); + hwt->test = test_broken_hwe; + hwt->test_name = "test_broken_hwe_dir"; + return 0; +} + +/* + * Device section with a single regex entry ("^.foo:(bar|baz|ba\.)$") + */ +static void test_regex_hwe(const struct hwt_state *hwt) +{ + struct path *pp; + + /* foo:bar matches */ + pp = mock_path(vnd_foo.value, prd_bar.value); + TEST_PROP(prio_name(&pp->prio), prio_emc.value); + + /* foo:baz matches */ + pp = mock_path(vnd_foo.value, prd_baz.value); + TEST_PROP(prio_name(&pp->prio), prio_emc.value); + + /* boo:baz matches */ + pp = mock_path(vnd_boo.value, prd_bar.value); + TEST_PROP(prio_name(&pp->prio), prio_emc.value); + + /* foo:BAR doesn't match */ + pp = mock_path(vnd_foo.value, "BAR"); + TEST_PROP(prio_name(&pp->prio), DEFAULT_PRIO); + + /* bboo:bar doesn't match */ + pp = mock_path("bboo", prd_bar.value); + TEST_PROP(prio_name(&pp->prio), DEFAULT_PRIO); +} + +static int setup_regex_hwe(void **state) +{ + struct hwt_state *hwt = CHECK_STATE(state); + const struct key_value kv[] = { vnd_t_oo, prd_ba_s, prio_emc }; + + WRITE_ONE_DEVICE(hwt, kv); + SET_TEST_FUNC(hwt, test_regex_hwe); + return 0; +} + +/* + * Two device entries, kv1 is a regex match ("^.foo:(bar|baz|ba\.)$"), + * kv2 a string match (foo:bar) which matches a subset of the regex. + * Both are added to the main config file. + * + * Expected: Devices matching both get properties from both, kv2 taking + * precedence. Devices matching kv1 only just get props from kv1. + */ +static void test_regex_string_hwe(const struct hwt_state *hwt) +{ + struct path *pp; + + /* foo:baz matches kv1 */ + pp = mock_path(vnd_foo.value, prd_baz.value); + TEST_PROP(prio_name(&pp->prio), prio_emc.value); + TEST_PROP(pp->getuid, NULL); + TEST_PROP(checker_name(&pp->checker), chk_hp.value); + + /* boo:baz matches kv1 */ + pp = mock_path(vnd_boo.value, prd_baz.value); + TEST_PROP(prio_name(&pp->prio), prio_emc.value); + TEST_PROP(pp->getuid, NULL); + TEST_PROP(checker_name(&pp->checker), chk_hp.value); + + /* .oo:ba. matches kv1 */ + pp = mock_path(vnd__oo.value, prd_ba_.value); + TEST_PROP(prio_name(&pp->prio), prio_emc.value); + TEST_PROP(pp->getuid, NULL); + TEST_PROP(checker_name(&pp->checker), chk_hp.value); + + /* .foo:(bar|baz|ba\.) doesn't match */ + pp = mock_path(vnd__oo.value, prd_ba_s.value); + TEST_PROP(prio_name(&pp->prio), DEFAULT_PRIO); + TEST_PROP(pp->getuid, NULL); + TEST_PROP(checker_name(&pp->checker), DEFAULT_CHECKER); + + /* foo:bar matches kv2 and kv1 */ + pp = mock_path_flags(vnd_foo.value, prd_bar.value, USE_GETUID); + TEST_PROP(prio_name(&pp->prio), prio_hds.value); + TEST_PROP(pp->getuid, gui_foo.value); + TEST_PROP(checker_name(&pp->checker), chk_hp.value); +} + +static int setup_regex_string_hwe(void **state) +{ + struct hwt_state *hwt = CHECK_STATE(state); + const struct key_value kv1[] = { vnd_t_oo, prd_ba_s, prio_emc, chk_hp }; + const struct key_value kv2[] = { vnd_foo, prd_bar, prio_hds, gui_foo }; + + WRITE_TWO_DEVICES(hwt, kv1, kv2); + SET_TEST_FUNC(hwt, test_regex_string_hwe); + return 0; +} + +/* + * Two device entries, kv1 is a regex match ("^.foo:(bar|baz|ba\.)$"), + * kv2 a string match (foo:bar) which matches a subset of the regex. + * kv1 is added to the main config file, kv2 to a config_dir file. + * This case is more important as you may think, because it's equivalent + * to kv1 being in the built-in hwtable and kv2 in multipath.conf. + * + * Expected: Devices matching kv2 (and thus, both) get properties + * from both, kv2 taking precedence. + * Devices matching kv1 only just get props from kv1. + */ +static void test_regex_string_hwe_dir(const struct hwt_state *hwt) +{ + struct path *pp; + + /* foo:baz matches kv1 */ + pp = mock_path(vnd_foo.value, prd_baz.value); + TEST_PROP(prio_name(&pp->prio), prio_emc.value); + TEST_PROP(pp->getuid, NULL); + TEST_PROP(checker_name(&pp->checker), chk_hp.value); + + /* boo:baz matches kv1 */ + pp = mock_path(vnd_boo.value, prd_baz.value); + TEST_PROP(prio_name(&pp->prio), prio_emc.value); + TEST_PROP(pp->getuid, NULL); + TEST_PROP(checker_name(&pp->checker), chk_hp.value); + + /* .oo:ba. matches kv1 */ + pp = mock_path(vnd__oo.value, prd_ba_.value); + TEST_PROP(prio_name(&pp->prio), prio_emc.value); + TEST_PROP(pp->getuid, NULL); + TEST_PROP(checker_name(&pp->checker), chk_hp.value); + + /* .oo:(bar|baz|ba\.)$ doesn't match */ + pp = mock_path(vnd__oo.value, prd_ba_s.value); + TEST_PROP(prio_name(&pp->prio), DEFAULT_PRIO); + TEST_PROP(pp->getuid, NULL); + TEST_PROP(checker_name(&pp->checker), DEFAULT_CHECKER); + + /* foo:bar matches kv2 */ + pp = mock_path_flags(vnd_foo.value, prd_bar.value, USE_GETUID); + /* Later match takes prio */ + TEST_PROP(prio_name(&pp->prio), prio_hds.value); + TEST_PROP(pp->getuid, gui_foo.value); + TEST_PROP(checker_name(&pp->checker), chk_hp.value); +} + +static int setup_regex_string_hwe_dir(void **state) +{ + const struct key_value kv1[] = { vnd_t_oo, prd_ba_s, prio_emc, chk_hp }; + const struct key_value kv2[] = { vnd_foo, prd_bar, prio_hds, gui_foo }; + struct hwt_state *hwt = CHECK_STATE(state); + + WRITE_TWO_DEVICES_W_DIR(hwt, kv1, kv2); + SET_TEST_FUNC(hwt, test_regex_string_hwe_dir); + return 0; +} + +/* + * Three device entries, kv1 is a regex match and kv2 and kv3 string + * matches, where kv3 is a substring of kv2. All in different config + * files. + * + * Expected: Devices matching kv3 get props from all, devices matching + * kv2 from kv2 and kv1, and devices matching kv1 only just from kv1. + */ +static void test_regex_2_strings_hwe_dir(const struct hwt_state *hwt) +{ + struct path *pp; + + /* foo:baz matches kv1 */ + pp = mock_path(vnd_foo.value, prd_baz.value); + TEST_PROP(prio_name(&pp->prio), prio_emc.value); + TEST_PROP(pp->getuid, NULL); + TEST_PROP(pp->uid_attribute, DEFAULT_UID_ATTRIBUTE); + TEST_PROP(checker_name(&pp->checker), chk_hp.value); + + /* boo:baz doesn't match */ + pp = mock_path(vnd_boo.value, prd_baz.value); + TEST_PROP(prio_name(&pp->prio), DEFAULT_PRIO); + TEST_PROP(pp->getuid, NULL); + TEST_PROP(pp->uid_attribute, DEFAULT_UID_ATTRIBUTE); + TEST_PROP(checker_name(&pp->checker), DEFAULT_CHECKER); + + /* foo:bar matches kv2 and kv1 */ + pp = mock_path(vnd_foo.value, prd_bar.value); + TEST_PROP(prio_name(&pp->prio), prio_hds.value); + TEST_PROP(pp->getuid, NULL); + TEST_PROP(pp->uid_attribute, uid_baz.value); + TEST_PROP(checker_name(&pp->checker), chk_hp.value); + + /* foo:barz matches kv3 and kv2 and kv1 */ + pp = mock_path_flags(vnd_foo.value, prd_barz.value, USE_GETUID); + TEST_PROP(prio_name(&pp->prio), prio_rdac.value); + TEST_PROP(pp->getuid, gui_foo.value); + TEST_PROP(pp->uid_attribute, NULL); + TEST_PROP(checker_name(&pp->checker), chk_hp.value); +} + +static int setup_regex_2_strings_hwe_dir(void **state) +{ + const struct key_value kv1[] = { vnd_foo, prd_ba_, prio_emc, chk_hp }; + const struct key_value kv2[] = { vnd_foo, prd_bar, prio_hds, uid_baz }; + const struct key_value kv3[] = { vnd_foo, prd_barz, + prio_rdac, gui_foo }; + struct hwt_state *hwt = CHECK_STATE(state); + + begin_config(hwt); + begin_section_all(hwt, "devices"); + write_device(hwt->config_file, ARRAY_SIZE(kv1), kv1); + write_device(hwt->conf_dir_file[0], ARRAY_SIZE(kv2), kv2); + write_device(hwt->conf_dir_file[1], ARRAY_SIZE(kv3), kv3); + end_section_all(hwt); + finish_config(hwt); + SET_TEST_FUNC(hwt, test_regex_2_strings_hwe_dir); + return 0; +} + +/* + * Like test_regex_string_hwe_dir, but the order of kv1 and kv2 is exchanged. + * + * Expected: Devices matching kv1 (and thus, both) get properties + * from both, kv1 taking precedence. + * Devices matching kv1 only just get props from kv1. + */ +static void test_string_regex_hwe_dir(const struct hwt_state *hwt) +{ + struct path *pp; + + /* foo:bar matches kv2 and kv1 */ + pp = mock_path_flags(vnd_foo.value, prd_bar.value, USE_GETUID); + TEST_PROP(prio_name(&pp->prio), prio_emc.value); + TEST_PROP(pp->getuid, gui_foo.value); + TEST_PROP(checker_name(&pp->checker), chk_hp.value); + + /* foo:baz matches kv1 */ + pp = mock_path(vnd_foo.value, prd_baz.value); + TEST_PROP(prio_name(&pp->prio), prio_emc.value); + TEST_PROP(pp->getuid, NULL); + TEST_PROP(checker_name(&pp->checker), chk_hp.value); + + /* boo:baz matches kv1 */ + pp = mock_path(vnd_boo.value, prd_baz.value); + TEST_PROP(prio_name(&pp->prio), prio_emc.value); + TEST_PROP(pp->getuid, NULL); + TEST_PROP(checker_name(&pp->checker), chk_hp.value); + + /* .oo:ba. matches kv1 */ + pp = mock_path(vnd__oo.value, prd_ba_.value); + TEST_PROP(prio_name(&pp->prio), prio_emc.value); + TEST_PROP(pp->getuid, NULL); + TEST_PROP(checker_name(&pp->checker), chk_hp.value); + + /* .oo:(bar|baz|ba\.)$ doesn't match */ + pp = mock_path(vnd__oo.value, prd_ba_s.value); + TEST_PROP(prio_name(&pp->prio), DEFAULT_PRIO); + TEST_PROP(pp->getuid, NULL); + TEST_PROP(checker_name(&pp->checker), DEFAULT_CHECKER); +} + +static int setup_string_regex_hwe_dir(void **state) +{ + const struct key_value kv1[] = { vnd_t_oo, prd_ba_s, prio_emc, chk_hp }; + const struct key_value kv2[] = { vnd_foo, prd_bar, prio_hds, gui_foo }; + struct hwt_state *hwt = CHECK_STATE(state); + + WRITE_TWO_DEVICES_W_DIR(hwt, kv2, kv1); + SET_TEST_FUNC(hwt, test_string_regex_hwe_dir); + return 0; +} + +/* + * Two identical device entries kv1 and kv2, trival regex ("string"). + * Both are added to the main config file. + * These entries are NOT merged. + * This could happen in a large multipath.conf file. + * + * Expected: matching devices get props from both, kv2 taking precedence. + */ +static void test_2_ident_strings_hwe(const struct hwt_state *hwt) +{ + struct path *pp; + + /* foo:baz doesn't match */ + pp = mock_path(vnd_foo.value, prd_baz.value); + TEST_PROP(prio_name(&pp->prio), DEFAULT_PRIO); + TEST_PROP(pp->getuid, NULL); + TEST_PROP(checker_name(&pp->checker), DEFAULT_CHECKER); + + /* foo:bar matches both */ + pp = mock_path_flags(vnd_foo.value, prd_bar.value, USE_GETUID); + TEST_PROP(prio_name(&pp->prio), prio_hds.value); + TEST_PROP(pp->getuid, gui_foo.value); + TEST_PROP(checker_name(&pp->checker), chk_hp.value); +} + +static int setup_2_ident_strings_hwe(void **state) +{ + const struct key_value kv1[] = { vnd_foo, prd_bar, prio_emc, chk_hp }; + const struct key_value kv2[] = { vnd_foo, prd_bar, prio_hds, gui_foo }; + struct hwt_state *hwt = CHECK_STATE(state); + + WRITE_TWO_DEVICES(hwt, kv1, kv2); + SET_TEST_FUNC(hwt, test_2_ident_strings_hwe); + return 0; +} + +/* + * Two identical device entries kv1 and kv2, trival regex ("string"). + * Both are added to an extra config file. + * This could happen in a large multipath.conf file. + * + * Expected: matching devices get props from both, kv2 taking precedence. + */ +static void test_2_ident_strings_both_dir(const struct hwt_state *hwt) +{ + struct path *pp; + + /* foo:baz doesn't match */ + pp = mock_path(vnd_foo.value, prd_baz.value); + TEST_PROP(prio_name(&pp->prio), DEFAULT_PRIO); + TEST_PROP(pp->getuid, NULL); + TEST_PROP(checker_name(&pp->checker), DEFAULT_CHECKER); + + /* foo:bar matches both */ + pp = mock_path_flags(vnd_foo.value, prd_bar.value, USE_GETUID); + TEST_PROP(prio_name(&pp->prio), prio_hds.value); + TEST_PROP(pp->getuid, gui_foo.value); + TEST_PROP(checker_name(&pp->checker), chk_hp.value); +} + +static int setup_2_ident_strings_both_dir(void **state) +{ + const struct key_value kv1[] = { vnd_foo, prd_bar, prio_emc, chk_hp }; + const struct key_value kv2[] = { vnd_foo, prd_bar, prio_hds, gui_foo }; + struct hwt_state *hwt = CHECK_STATE(state); + + begin_config(hwt); + begin_section_all(hwt, "devices"); + write_device(hwt->conf_dir_file[1], ARRAY_SIZE(kv1), kv1); + write_device(hwt->conf_dir_file[1], ARRAY_SIZE(kv2), kv2); + end_section_all(hwt); + finish_config(hwt); + SET_TEST_FUNC(hwt, test_2_ident_strings_both_dir); + return 0; +} + +/* + * Two identical device entries kv1 and kv2, trival regex ("string"). + * Both are added to an extra config file. + * An empty entry kv0 with the same string exists in the main config file. + * + * Expected: matching devices get props from both, kv2 taking precedence. + */ +static void test_2_ident_strings_both_dir_w_prev(const struct hwt_state *hwt) +{ + struct path *pp; + + /* foo:baz doesn't match */ + pp = mock_path(vnd_foo.value, prd_baz.value); + TEST_PROP(prio_name(&pp->prio), DEFAULT_PRIO); + TEST_PROP(pp->getuid, NULL); + TEST_PROP(checker_name(&pp->checker), DEFAULT_CHECKER); + + /* foo:bar matches both */ + pp = mock_path_flags(vnd_foo.value, prd_bar.value, USE_GETUID); + TEST_PROP(prio_name(&pp->prio), prio_hds.value); + TEST_PROP(pp->getuid, gui_foo.value); + TEST_PROP(checker_name(&pp->checker), chk_hp.value); +} + +static int setup_2_ident_strings_both_dir_w_prev(void **state) +{ + struct hwt_state *hwt = CHECK_STATE(state); + + const struct key_value kv0[] = { vnd_foo, prd_bar }; + const struct key_value kv1[] = { vnd_foo, prd_bar, prio_emc, chk_hp }; + const struct key_value kv2[] = { vnd_foo, prd_bar, prio_hds, gui_foo }; + + begin_config(hwt); + begin_section_all(hwt, "devices"); + write_device(hwt->config_file, ARRAY_SIZE(kv0), kv0); + write_device(hwt->conf_dir_file[1], ARRAY_SIZE(kv1), kv1); + write_device(hwt->conf_dir_file[1], ARRAY_SIZE(kv2), kv2); + end_section_all(hwt); + finish_config(hwt); + SET_TEST_FUNC(hwt, test_2_ident_strings_both_dir_w_prev); + return 0; +} + +/* + * Two identical device entries kv1 and kv2, trival regex ("string"). + * kv1 is added to the main config file, kv2 to a config_dir file. + * These entries are merged. + * This case is more important as you may think, because it's equivalent + * to kv1 being in the built-in hwtable and kv2 in multipath.conf. + * + * Expected: matching devices get props from both, kv2 taking precedence. + */ +static void test_2_ident_strings_hwe_dir(const struct hwt_state *hwt) +{ + struct path *pp; + + /* foo:baz doesn't match */ + pp = mock_path(vnd_foo.value, prd_baz.value); + TEST_PROP(prio_name(&pp->prio), DEFAULT_PRIO); + TEST_PROP(pp->getuid, NULL); + TEST_PROP(checker_name(&pp->checker), DEFAULT_CHECKER); + + /* foo:bar matches both */ + pp = mock_path_flags(vnd_foo.value, prd_bar.value, USE_GETUID); + TEST_PROP(prio_name(&pp->prio), prio_hds.value); + TEST_PROP(pp->getuid, gui_foo.value); + TEST_PROP(checker_name(&pp->checker), chk_hp.value); +} + +static int setup_2_ident_strings_hwe_dir(void **state) +{ + const struct key_value kv1[] = { vnd_foo, prd_bar, prio_emc, chk_hp }; + const struct key_value kv2[] = { vnd_foo, prd_bar, prio_hds, gui_foo }; + struct hwt_state *hwt = CHECK_STATE(state); + + WRITE_TWO_DEVICES_W_DIR(hwt, kv1, kv2); + SET_TEST_FUNC(hwt, test_2_ident_strings_hwe_dir); + return 0; +} + +/* + * Like test_2_ident_strings_hwe_dir, but this time the config_dir file + * contains an additional, empty entry (kv0). + * + * Expected: matching devices get props from kv1 and kv2, kv2 taking precedence. + */ +static void test_3_ident_strings_hwe_dir(const struct hwt_state *hwt) +{ + struct path *pp; + + /* foo:baz doesn't match */ + pp = mock_path(vnd_foo.value, prd_baz.value); + TEST_PROP(prio_name(&pp->prio), DEFAULT_PRIO); + TEST_PROP(pp->getuid, NULL); + TEST_PROP(checker_name(&pp->checker), DEFAULT_CHECKER); + + /* foo:bar matches both */ + pp = mock_path_flags(vnd_foo.value, prd_bar.value, USE_GETUID); + TEST_PROP(prio_name(&pp->prio), prio_hds.value); + TEST_PROP(pp->getuid, gui_foo.value); + TEST_PROP(checker_name(&pp->checker), chk_hp.value); +} + +static int setup_3_ident_strings_hwe_dir(void **state) +{ + const struct key_value kv0[] = { vnd_foo, prd_bar }; + const struct key_value kv1[] = { vnd_foo, prd_bar, prio_emc, chk_hp }; + const struct key_value kv2[] = { vnd_foo, prd_bar, prio_hds, gui_foo }; + struct hwt_state *hwt = CHECK_STATE(state); + + begin_config(hwt); + begin_section_all(hwt, "devices"); + write_device(hwt->config_file, ARRAY_SIZE(kv1), kv1); + write_device(hwt->conf_dir_file[1], ARRAY_SIZE(kv0), kv0); + write_device(hwt->conf_dir_file[1], ARRAY_SIZE(kv2), kv2); + end_section_all(hwt); + finish_config(hwt); + SET_TEST_FUNC(hwt, test_3_ident_strings_hwe_dir); + return 0; +} + +/* + * Two identical device entries kv1 and kv2, non-trival regex that matches + * itself (string ".oo" matches regex ".oo"). + * kv1 is added to the main config file, kv2 to a config_dir file. + * This case is more important as you may think, because it's equivalent + * to kv1 being in the built-in hwtable and kv2 in multipath.conf. + * + * Expected: matching devices get props from both, kv2 taking precedence. + */ +static void test_2_ident_self_matching_re_hwe_dir(const struct hwt_state *hwt) +{ + struct path *pp; + + /* foo:baz doesn't match */ + pp = mock_path(vnd_foo.value, prd_baz.value); + TEST_PROP(prio_name(&pp->prio), DEFAULT_PRIO); + TEST_PROP(pp->getuid, NULL); + TEST_PROP(checker_name(&pp->checker), DEFAULT_CHECKER); + + /* foo:bar matches both */ + pp = mock_path_flags(vnd_foo.value, prd_bar.value, USE_GETUID); + TEST_PROP(prio_name(&pp->prio), prio_hds.value); + TEST_PROP(pp->getuid, gui_foo.value); + TEST_PROP(checker_name(&pp->checker), chk_hp.value); +} + +static int setup_2_ident_self_matching_re_hwe_dir(void **state) +{ + const struct key_value kv1[] = { vnd__oo, prd_bar, prio_emc, chk_hp }; + const struct key_value kv2[] = { vnd__oo, prd_bar, prio_hds, gui_foo }; + struct hwt_state *hwt = CHECK_STATE(state); + + WRITE_TWO_DEVICES_W_DIR(hwt, kv1, kv2); + SET_TEST_FUNC(hwt, test_2_ident_self_matching_re_hwe_dir); + return 0; +} + +/* + * Two identical device entries kv1 and kv2, non-trival regex that matches + * itself (string ".oo" matches regex ".oo"). + * kv1 and kv2 are added to the main config file. + * + * Expected: matching devices get props from both, kv2 taking precedence. + */ +static void test_2_ident_self_matching_re_hwe(const struct hwt_state *hwt) +{ + struct path *pp; + + /* foo:baz doesn't match */ + pp = mock_path(vnd_foo.value, prd_baz.value); + TEST_PROP(prio_name(&pp->prio), DEFAULT_PRIO); + TEST_PROP(pp->getuid, NULL); + TEST_PROP(checker_name(&pp->checker), DEFAULT_CHECKER); + + /* foo:bar matches */ + pp = mock_path_flags(vnd_foo.value, prd_bar.value, USE_GETUID); + TEST_PROP(prio_name(&pp->prio), prio_hds.value); + TEST_PROP(pp->getuid, gui_foo.value); + TEST_PROP(checker_name(&pp->checker), chk_hp.value); +} + +static int setup_2_ident_self_matching_re_hwe(void **state) +{ + const struct key_value kv1[] = { vnd__oo, prd_bar, prio_emc, chk_hp }; + const struct key_value kv2[] = { vnd__oo, prd_bar, prio_hds, gui_foo }; + struct hwt_state *hwt = CHECK_STATE(state); + + WRITE_TWO_DEVICES(hwt, kv1, kv2); + SET_TEST_FUNC(hwt, test_2_ident_self_matching_re_hwe); + return 0; +} + +/* + * Two identical device entries kv1 and kv2, non-trival regex that doesn't + * match itself (string "^.oo" doesn't match regex "^.oo"). + * kv1 is added to the main config file, kv2 to a config_dir file. + * This case is more important as you may think, see above. + * + * Expected: matching devices get props from both, kv2 taking precedence. + */ +static void +test_2_ident_not_self_matching_re_hwe_dir(const struct hwt_state *hwt) +{ + struct path *pp; + + /* foo:baz doesn't match */ + pp = mock_path(vnd_foo.value, prd_baz.value); + TEST_PROP(prio_name(&pp->prio), DEFAULT_PRIO); + TEST_PROP(pp->getuid, NULL); + TEST_PROP(checker_name(&pp->checker), DEFAULT_CHECKER); + + /* foo:bar matches both */ + pp = mock_path_flags(vnd_foo.value, prd_bar.value, USE_GETUID); + TEST_PROP(prio_name(&pp->prio), prio_hds.value); + TEST_PROP(pp->getuid, gui_foo.value); + TEST_PROP(checker_name(&pp->checker), chk_hp.value); +} + +static int setup_2_ident_not_self_matching_re_hwe_dir(void **state) +{ + const struct key_value kv1[] = { vnd_t_oo, prd_bar, prio_emc, chk_hp }; + const struct key_value kv2[] = { vnd_t_oo, prd_bar, prio_hds, gui_foo }; + struct hwt_state *hwt = CHECK_STATE(state); + + WRITE_TWO_DEVICES_W_DIR(hwt, kv1, kv2); + SET_TEST_FUNC(hwt, test_2_ident_not_self_matching_re_hwe_dir); + return 0; +} + +/* + * Two different non-trivial regexes kv1, kv2. The 1st one matches the 2nd, but + * it doesn't match all possible strings matching the second. + * ("ba[zy]" matches regex "ba[[rxy]", but "baz" does not). + * + * Expected: Devices matching both regexes get properties from both, kv2 + * taking precedence. Devices matching just one regex get properties from + * that one regex only. + */ +static void test_2_matching_res_hwe_dir(const struct hwt_state *hwt) +{ + struct path *pp; + + /* foo:bar matches k1 only */ + pp = mock_path(vnd_foo.value, prd_bar.value); + TEST_PROP(prio_name(&pp->prio), prio_emc.value); + TEST_PROP(pp->getuid, NULL); + TEST_PROP(checker_name(&pp->checker), chk_hp.value); + + /* foo:bay matches k1 and k2 */ + pp = mock_path_flags(vnd_foo.value, "bay", USE_GETUID); + TEST_PROP(prio_name(&pp->prio), prio_hds.value); + TEST_PROP(pp->getuid, gui_foo.value); + TEST_PROP(checker_name(&pp->checker), chk_hp.value); + + /* foo:baz matches k2 only. */ + pp = mock_path_flags(vnd_foo.value, prd_baz.value, USE_GETUID); + TEST_PROP(prio_name(&pp->prio), prio_hds.value); + TEST_PROP(pp->getuid, gui_foo.value); + TEST_PROP(checker_name(&pp->checker), DEFAULT_CHECKER); +} + +static int setup_2_matching_res_hwe_dir(void **state) +{ + const struct key_value kv1[] = { vnd_foo, prd_barx, prio_emc, chk_hp }; + const struct key_value kv2[] = { vnd_foo, prd_bazy, prio_hds, gui_foo }; + struct hwt_state *hwt = CHECK_STATE(state); + + WRITE_TWO_DEVICES_W_DIR(hwt, kv1, kv2); + SET_TEST_FUNC(hwt, test_2_matching_res_hwe_dir); + return 0; +} + +/* + * Two different non-trivial regexes which match the same set of strings. + * But they don't match each other. + * "baz" matches both regex "ba[zy]" and "ba(z|y)" + * + * Expected: matching devices get properties from both, kv2 taking precedence. + */ +static void test_2_nonmatching_res_hwe_dir(const struct hwt_state *hwt) +{ + struct path *pp; + + /* foo:bar doesn't match */ + pp = mock_path(vnd_foo.value, prd_bar.value); + TEST_PROP(prio_name(&pp->prio), DEFAULT_PRIO); + TEST_PROP(pp->getuid, NULL); + TEST_PROP(checker_name(&pp->checker), DEFAULT_CHECKER); + + pp = mock_path_flags(vnd_foo.value, prd_baz.value, USE_GETUID); + TEST_PROP(prio_name(&pp->prio), prio_hds.value); + TEST_PROP(pp->getuid, gui_foo.value); + TEST_PROP(checker_name(&pp->checker), chk_hp.value); +} + +static int setup_2_nonmatching_res_hwe_dir(void **state) +{ + const struct key_value kv1[] = { vnd_foo, prd_bazy, prio_emc, chk_hp }; + const struct key_value kv2[] = { vnd_foo, prd_bazy1, + prio_hds, gui_foo }; + struct hwt_state *hwt = CHECK_STATE(state); + + WRITE_TWO_DEVICES_W_DIR(hwt, kv1, kv2); + SET_TEST_FUNC(hwt, test_2_nonmatching_res_hwe_dir); + return 0; +} + +/* + * Simple blacklist test. + * + * NOTE: test failures in blacklisting tests will manifest as cmocka errors + * "Could not get value to mock function XYZ", because pathinfo() takes + * different code paths for blacklisted devices. + */ +static void test_blacklist(const struct hwt_state *hwt) +{ + mock_path_flags(vnd_foo.value, prd_bar.value, BL_BY_DEVICE); + mock_path(vnd_foo.value, prd_baz.value); +} + +static int setup_blacklist(void **state) +{ + const struct key_value kv1[] = { vnd_foo, prd_bar }; + struct hwt_state *hwt = CHECK_STATE(state); + + begin_config(hwt); + begin_section_all(hwt, "blacklist"); + write_device(hwt->config_file, ARRAY_SIZE(kv1), kv1); + end_section_all(hwt); + finish_config(hwt); + SET_TEST_FUNC(hwt, test_blacklist); + return 0; +} + +/* + * Simple blacklist test with regex and exception + */ +static void test_blacklist_regex(const struct hwt_state *hwt) +{ + mock_path(vnd_foo.value, prd_bar.value); + mock_path_flags(vnd_foo.value, prd_baz.value, BL_BY_DEVICE); + mock_path(vnd_foo.value, prd_bam.value); +} + +static int setup_blacklist_regex(void **state) +{ + const struct key_value kv1[] = { vnd_foo, prd_ba_s }; + const struct key_value kv2[] = { vnd_foo, prd_bar }; + struct hwt_state *hwt = CHECK_STATE(state); + + hwt = CHECK_STATE(state); + begin_config(hwt); + begin_section_all(hwt, "blacklist"); + write_device(hwt->config_file, ARRAY_SIZE(kv1), kv1); + end_section_all(hwt); + begin_section_all(hwt, "blacklist_exceptions"); + write_device(hwt->conf_dir_file[0], ARRAY_SIZE(kv2), kv2); + end_section_all(hwt); + finish_config(hwt); + SET_TEST_FUNC(hwt, test_blacklist_regex); + return 0; +} + +/* + * Simple blacklist test with regex and exception + * config file order inverted wrt test_blacklist_regex + */ +static int setup_blacklist_regex_inv(void **state) +{ + const struct key_value kv1[] = { vnd_foo, prd_ba_s }; + const struct key_value kv2[] = { vnd_foo, prd_bar }; + struct hwt_state *hwt = CHECK_STATE(state); + + begin_config(hwt); + begin_section_all(hwt, "blacklist"); + write_device(hwt->conf_dir_file[0], ARRAY_SIZE(kv1), kv1); + end_section_all(hwt); + begin_section_all(hwt, "blacklist_exceptions"); + write_device(hwt->config_file, ARRAY_SIZE(kv2), kv2); + end_section_all(hwt); + finish_config(hwt); + SET_TEST_FUNC(hwt, test_blacklist_regex); + return 0; +} + +/* + * Simple blacklist test with regex and exception + * config file order inverted wrt test_blacklist_regex + */ +static void test_blacklist_regex_matching(const struct hwt_state *hwt) +{ + mock_path_flags(vnd_foo.value, prd_bar.value, BL_BY_DEVICE); + mock_path_flags(vnd_foo.value, prd_baz.value, BL_BY_DEVICE); + mock_path(vnd_foo.value, prd_bam.value); +} + +static int setup_blacklist_regex_matching(void **state) +{ + const struct key_value kv1[] = { vnd_foo, prd_barx }; + const struct key_value kv2[] = { vnd_foo, prd_bazy }; + struct hwt_state *hwt = CHECK_STATE(state); + + begin_config(hwt); + begin_section_all(hwt, "blacklist"); + write_device(hwt->config_file, ARRAY_SIZE(kv1), kv1); + write_device(hwt->conf_dir_file[0], ARRAY_SIZE(kv2), kv2); + end_section_all(hwt); + finish_config(hwt); + SET_TEST_FUNC(hwt, test_blacklist_regex_matching); + return 0; +} + +/* + * Test for blacklisting by WWID + * + * Note that default_wwid is a substring of default_wwid_1. Because + * matching is done by regex, both paths are blacklisted. + */ +static void test_blacklist_wwid(const struct hwt_state *hwt) +{ + mock_path_flags(vnd_foo.value, prd_bar.value, BL_BY_WWID); + mock_path_wwid_flags(vnd_foo.value, prd_baz.value, default_wwid_1, + BL_BY_WWID); +} + +static int setup_blacklist_wwid(void **state) +{ + const struct key_value kv[] = { wwid_test }; + struct hwt_state *hwt = CHECK_STATE(state); + + begin_config(hwt); + write_section(hwt->config_file, "blacklist", ARRAY_SIZE(kv), kv); + finish_config(hwt); + SET_TEST_FUNC(hwt, test_blacklist_wwid); + return 0; +} + +/* + * Test for blacklisting by WWID + * + * Here the blacklist contains only default_wwid_1. Thus the path + * with default_wwid is NOT blacklisted. + */ +static void test_blacklist_wwid_1(const struct hwt_state *hwt) +{ + mock_path(vnd_foo.value, prd_bar.value); + mock_path_wwid_flags(vnd_foo.value, prd_baz.value, default_wwid_1, + BL_BY_WWID); +} + +static int setup_blacklist_wwid_1(void **state) +{ + const struct key_value kv[] = { { _wwid, default_wwid_1 }, }; + struct hwt_state *hwt = CHECK_STATE(state); + + begin_config(hwt); + write_section(hwt->config_file, "blacklist", ARRAY_SIZE(kv), kv); + finish_config(hwt); + SET_TEST_FUNC(hwt, test_blacklist_wwid_1); + return 0; +} + +/* + * Test for product_blacklist. Two entries blacklisting each other. + * + * Expected: Both are blacklisted. + */ +static void test_product_blacklist(const struct hwt_state *hwt) +{ + mock_path_flags(vnd_foo.value, prd_baz.value, BL_BY_DEVICE); + mock_path_flags(vnd_foo.value, prd_bar.value, BL_BY_DEVICE); + mock_path(vnd_foo.value, prd_bam.value); +} + +static int setup_product_blacklist(void **state) +{ + const struct key_value kv1[] = { vnd_foo, prd_bar, bl_baz }; + const struct key_value kv2[] = { vnd_foo, prd_baz, bl_bar }; + struct hwt_state *hwt = CHECK_STATE(state); + + WRITE_TWO_DEVICES(hwt, kv1, kv2); + SET_TEST_FUNC(hwt, test_product_blacklist); + return 0; +} + +/* + * Test for product_blacklist. The second regex "matches" the first. + * This is a pathological example. + * + * Expected: "foo:bar", "foo:baz" are blacklisted. + */ +static void test_product_blacklist_matching(const struct hwt_state *hwt) +{ + mock_path_flags(vnd_foo.value, prd_bar.value, BL_BY_DEVICE); + mock_path_flags(vnd_foo.value, prd_baz.value, BL_BY_DEVICE); + mock_path(vnd_foo.value, prd_bam.value); +} + +static int setup_product_blacklist_matching(void **state) +{ + const struct key_value kv1[] = { vnd_foo, prd_bar, bl_barx }; + const struct key_value kv2[] = { vnd_foo, prd_baz, bl_bazy }; + struct hwt_state *hwt = CHECK_STATE(state); + + WRITE_TWO_DEVICES(hwt, kv1, kv2); + SET_TEST_FUNC(hwt, test_product_blacklist_matching); + return 0; +} + +/* + * Basic test for multipath-based configuration. + * + * Expected: properties, including pp->prio, are taken from multipath + * section. + */ +static void test_multipath_config(const struct hwt_state *hwt) +{ + struct path *pp; + struct multipath *mp; + + pp = mock_path(vnd_foo.value, prd_bar.value); + mp = mock_multipath(pp); + assert_ptr_not_equal(mp->mpe, NULL); + TEST_PROP(prio_name(&pp->prio), prio_rdac.value); + assert_int_equal(mp->minio, atoi(minio_99.value)); + TEST_PROP(pp->uid_attribute, uid_baz.value); + + /* test different wwid */ + pp = mock_path_wwid(vnd_foo.value, prd_bar.value, default_wwid_1); + mp = mock_multipath(pp); + // assert_ptr_equal(mp->mpe, NULL); + TEST_PROP(prio_name(&pp->prio), prio_emc.value); + assert_int_equal(mp->minio, DEFAULT_MINIO_RQ); + TEST_PROP(pp->uid_attribute, uid_baz.value); +} + +static int setup_multipath_config(void **state) +{ + struct hwt_state *hwt = CHECK_STATE(state); + const struct key_value kvm[] = { wwid_test, prio_rdac, minio_99 }; + const struct key_value kvp[] = { vnd_foo, prd_bar, prio_emc, uid_baz }; + + begin_config(hwt); + begin_section_all(hwt, "devices"); + write_section(hwt->conf_dir_file[0], "device", ARRAY_SIZE(kvp), kvp); + end_section_all(hwt); + begin_section_all(hwt, "multipaths"); + write_section(hwt->config_file, "multipath", ARRAY_SIZE(kvm), kvm); + end_section_all(hwt); + finish_config(hwt); + SET_TEST_FUNC(hwt, test_multipath_config); + return 0; +} + +/* + * Basic test for multipath-based configuration. Two sections for the same wwid. + * + * Expected: properties are taken from both multipath sections, later taking + * precedence + */ +static void test_multipath_config_2(const struct hwt_state *hwt) +{ + struct path *pp; + struct multipath *mp; + + pp = mock_path(vnd_foo.value, prd_bar.value); + mp = mock_multipath(pp); + assert_ptr_not_equal(mp, NULL); + assert_ptr_not_equal(mp->mpe, NULL); + TEST_PROP(prio_name(&pp->prio), prio_rdac.value); + assert_int_equal(mp->minio, atoi(minio_99.value)); + assert_int_equal(mp->no_path_retry, atoi(npr_37.value)); +} + +static int setup_multipath_config_2(void **state) +{ + const struct key_value kv1[] = { wwid_test, prio_rdac, npr_queue }; + const struct key_value kv2[] = { wwid_test, minio_99, npr_37 }; + struct hwt_state *hwt = CHECK_STATE(state); + + begin_config(hwt); + begin_section_all(hwt, "multipaths"); + write_section(hwt->config_file, "multipath", ARRAY_SIZE(kv1), kv1); + write_section(hwt->conf_dir_file[1], "multipath", ARRAY_SIZE(kv2), kv2); + end_section_all(hwt); + finish_config(hwt); + SET_TEST_FUNC(hwt, test_multipath_config_2); + return 0; +} + +/* + * Same as test_multipath_config_2, both entries in the same config file. + * + * Expected: properties are taken from both multipath sections. + */ +static void test_multipath_config_3(const struct hwt_state *hwt) +{ + struct path *pp; + struct multipath *mp; + + pp = mock_path(vnd_foo.value, prd_bar.value); + mp = mock_multipath(pp); + assert_ptr_not_equal(mp, NULL); + assert_ptr_not_equal(mp->mpe, NULL); + TEST_PROP(prio_name(&pp->prio), prio_rdac.value); + assert_int_equal(mp->minio, atoi(minio_99.value)); + assert_int_equal(mp->no_path_retry, atoi(npr_37.value)); +} + +static int setup_multipath_config_3(void **state) +{ + const struct key_value kv1[] = { wwid_test, prio_rdac, npr_queue }; + const struct key_value kv2[] = { wwid_test, minio_99, npr_37 }; + struct hwt_state *hwt = CHECK_STATE(state); + + begin_config(hwt); + begin_section_all(hwt, "multipaths"); + write_section(hwt->config_file, "multipath", ARRAY_SIZE(kv1), kv1); + write_section(hwt->config_file, "multipath", ARRAY_SIZE(kv2), kv2); + end_section_all(hwt); + finish_config(hwt); + SET_TEST_FUNC(hwt, test_multipath_config_3); + return 0; +} + +/* + * Test for device with "hidden" attribute + */ +static void test_hidden(const struct hwt_state *hwt) +{ + mock_path_flags("NVME", "NoName", DEV_HIDDEN|BL_MASK); +} + +static int setup_hidden(void **state) +{ + struct hwt_state *hwt = CHECK_STATE(state); + + WRITE_EMPTY_CONF(hwt); + SET_TEST_FUNC(hwt, test_hidden); + + return 0; +} + +/* + * Create wrapper functions around test_driver() to avoid that cmocka + * always uses the same test name. That makes it easier to read test results. + */ + +#define define_test(x) \ + static void run_##x(void **state) \ + { \ + return test_driver(state); \ + } + +define_test(string_hwe) +define_test(broken_hwe) +define_test(broken_hwe_dir) +define_test(quoted_hwe) +define_test(internal_nvme) +define_test(regex_hwe) +define_test(regex_string_hwe) +define_test(regex_string_hwe_dir) +define_test(regex_2_strings_hwe_dir) +define_test(string_regex_hwe_dir) +define_test(2_ident_strings_hwe) +define_test(2_ident_strings_both_dir) +define_test(2_ident_strings_both_dir_w_prev) +define_test(2_ident_strings_hwe_dir) +define_test(3_ident_strings_hwe_dir) +define_test(2_ident_self_matching_re_hwe_dir) +define_test(2_ident_self_matching_re_hwe) +define_test(2_ident_not_self_matching_re_hwe_dir) +define_test(2_matching_res_hwe_dir) +define_test(2_nonmatching_res_hwe_dir) +define_test(blacklist) +define_test(blacklist_wwid) +define_test(blacklist_wwid_1) +define_test(blacklist_regex) +define_test(blacklist_regex_inv) +define_test(blacklist_regex_matching) +define_test(product_blacklist) +define_test(product_blacklist_matching) +define_test(multipath_config) +define_test(multipath_config_2) +define_test(multipath_config_3) +define_test(hidden) + +#define test_entry(x) \ + cmocka_unit_test_setup(run_##x, setup_##x) + +static int test_hwtable(void) +{ + const struct CMUnitTest tests[] = { + cmocka_unit_test(test_sanity_globals), + test_entry(internal_nvme), + test_entry(string_hwe), + test_entry(broken_hwe), + test_entry(broken_hwe_dir), + test_entry(quoted_hwe), + test_entry(regex_hwe), + test_entry(regex_string_hwe), + test_entry(regex_string_hwe_dir), + test_entry(regex_2_strings_hwe_dir), + test_entry(string_regex_hwe_dir), + test_entry(2_ident_strings_hwe), + test_entry(2_ident_strings_both_dir), + test_entry(2_ident_strings_both_dir_w_prev), + test_entry(2_ident_strings_hwe_dir), + test_entry(3_ident_strings_hwe_dir), + test_entry(2_ident_self_matching_re_hwe_dir), + test_entry(2_ident_self_matching_re_hwe), + test_entry(2_ident_not_self_matching_re_hwe_dir), + test_entry(2_matching_res_hwe_dir), + test_entry(2_nonmatching_res_hwe_dir), + test_entry(blacklist), + test_entry(blacklist_wwid), + test_entry(blacklist_wwid_1), + test_entry(blacklist_regex), + test_entry(blacklist_regex_inv), + test_entry(blacklist_regex_matching), + test_entry(product_blacklist), + test_entry(product_blacklist_matching), + test_entry(multipath_config), + test_entry(multipath_config_2), + test_entry(multipath_config_3), + test_entry(hidden), + }; + + return cmocka_run_group_tests(tests, setup, teardown); +} + +int main(void) +{ + int ret = 0; + + ret += test_hwtable(); + return ret; +} diff --git a/tests/parser.c b/tests/parser.c new file mode 100644 index 0000000..29859da --- /dev/null +++ b/tests/parser.c @@ -0,0 +1,474 @@ +/* + * Copyright (c) 2018 SUSE Linux GmbH + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + * + */ + +#include +#include +#include +#include +#include +#include +// #include "list.h" +#include "parser.h" +#include "vector.h" + +#include "globals.c" + +/* Set these to 1 to get success for current broken behavior */ +/* Strip leading whitespace between quotes */ +#define LSTRIP_QUOTED_WSP 0 +/* Stop parsing at 2nd quote */ +#define TWO_QUOTES_ONLY 0 + +static char *test_file = "test.conf"; + +/* Missing declaration */ +int validate_config_strvec(vector strvec, char *file); + +/* Stringify helpers */ +#define _str_(x) #x +#define str(x) _str_(x) + +static int setup(void **state) +{ + return 0; +} + +static int teardown(void **state) +{ + return 0; +} + +static void test01(void **state) +{ + vector v = alloc_strvec("keyword value"); + char *val; + + assert_int_equal(validate_config_strvec(v, test_file), 0); + assert_int_equal(VECTOR_SIZE(v), 2); + assert_string_equal(VECTOR_SLOT(v, 0), "keyword"); + assert_string_equal(VECTOR_SLOT(v, 1), "value"); + + val = set_value(v); + assert_string_equal(val, "value"); + + free(val); + free_strvec(v); +} + +static void test02(void **state) +{ + vector v = alloc_strvec("keyword \"value\""); + char *val; + + assert_int_equal(validate_config_strvec(v, test_file), 0); + assert_int_equal(VECTOR_SIZE(v), 4); + assert_string_equal(VECTOR_SLOT(v, 0), "keyword"); + assert_true(is_quote(VECTOR_SLOT(v, 1)));; + assert_string_equal(VECTOR_SLOT(v, 2), "value"); + assert_true(is_quote(VECTOR_SLOT(v, 3)));; + + val = set_value(v); + assert_string_equal(val, "value"); + + free(val); + free_strvec(v); +} + +static void test03(void **state) +{ + vector v = alloc_strvec("keyword value\n"); + char *val; + + assert_int_equal(validate_config_strvec(v, test_file), 0); + assert_int_equal(VECTOR_SIZE(v), 2); + assert_string_equal(VECTOR_SLOT(v, 0), "keyword"); + assert_string_equal(VECTOR_SLOT(v, 1), "value"); + + val = set_value(v); + assert_string_equal(val, "value"); + + free(val); + free_strvec(v); +} + +static void test04(void **state) +{ + vector v = alloc_strvec("keyword \t value \t \n "); + char *val; + + assert_int_equal(validate_config_strvec(v, test_file), 0); + assert_int_equal(VECTOR_SIZE(v), 2); + assert_string_equal(VECTOR_SLOT(v, 0), "keyword"); + assert_string_equal(VECTOR_SLOT(v, 1), "value"); + + val = set_value(v); + assert_string_equal(val, "value"); + + free(val); + free_strvec(v); +} + +static void test05(void **state) +{ + vector v = alloc_strvec("keyword \t value \t ! comment "); + char *val; + + assert_int_equal(validate_config_strvec(v, test_file), 0); + assert_int_equal(VECTOR_SIZE(v), 2); + assert_string_equal(VECTOR_SLOT(v, 0), "keyword"); + assert_string_equal(VECTOR_SLOT(v, 1), "value"); + + val = set_value(v); + assert_string_equal(val, "value"); + + free(val); + free_strvec(v); +} + +static void test06(void **state) +{ + vector v = alloc_strvec("keyword \t value # \n comment "); + char *val; + + assert_int_equal(validate_config_strvec(v, test_file), 0); + assert_int_equal(VECTOR_SIZE(v), 2); + assert_string_equal(VECTOR_SLOT(v, 0), "keyword"); + assert_string_equal(VECTOR_SLOT(v, 1), "value"); + + val = set_value(v); + assert_string_equal(val, "value"); + + free(val); + free_strvec(v); +} + +static void test07(void **state) +{ + vector v = alloc_strvec("keyword \t value more "); + char *val; + + assert_int_equal(validate_config_strvec(v, test_file), 0); + assert_int_equal(VECTOR_SIZE(v), 3); + assert_string_equal(VECTOR_SLOT(v, 0), "keyword"); + assert_string_equal(VECTOR_SLOT(v, 1), "value"); + assert_string_equal(VECTOR_SLOT(v, 2), "more"); + + val = set_value(v); + assert_string_equal(val, "value"); + + free(val); + free_strvec(v); +} + +static void test08(void **state) +{ +#define QUOTED08 " value more " +#define QUOTED08B "value more " + vector v = alloc_strvec("keyword \t \"" QUOTED08 "\""); + char *val; + + assert_int_equal(validate_config_strvec(v, test_file), 0); + assert_int_equal(VECTOR_SIZE(v), 4); + assert_string_equal(VECTOR_SLOT(v, 0), "keyword"); + assert_true(is_quote(VECTOR_SLOT(v, 1)));; +#if LSTRIP_QUOTED_WSP + assert_string_equal(VECTOR_SLOT(v, 2), QUOTED08B); +#else + assert_string_equal(VECTOR_SLOT(v, 2), QUOTED08); +#endif + assert_true(is_quote(VECTOR_SLOT(v, 3)));; + + val = set_value(v); +#if LSTRIP_QUOTED_WSP + assert_string_equal(val, QUOTED08B); +#else + assert_string_equal(val, QUOTED08); +#endif + free(val); + free_strvec(v); +} + +static void test09(void **state) +{ +#define QUOTED09 "value # more" + vector v = alloc_strvec("keyword \"" QUOTED09 "\""); + char *val; + + assert_int_equal(validate_config_strvec(v, test_file), 0); + assert_int_equal(VECTOR_SIZE(v), 4); + assert_string_equal(VECTOR_SLOT(v, 0), "keyword"); + assert_true(is_quote(VECTOR_SLOT(v, 1)));; + assert_string_equal(VECTOR_SLOT(v, 2), QUOTED09); + assert_true(is_quote(VECTOR_SLOT(v, 3)));; + + val = set_value(v); + assert_string_equal(val, QUOTED09); + + free(val); + free_strvec(v); +} + +static void test10(void **state) +{ +#define QUOTED10 "value ! more" + vector v = alloc_strvec("keyword \"" QUOTED10 "\""); + char *val; + + assert_int_equal(validate_config_strvec(v, test_file), 0); + assert_int_equal(VECTOR_SIZE(v), 4); + assert_string_equal(VECTOR_SLOT(v, 0), "keyword"); + assert_true(is_quote(VECTOR_SLOT(v, 1)));; + assert_string_equal(VECTOR_SLOT(v, 2), QUOTED10); + assert_true(is_quote(VECTOR_SLOT(v, 3)));; + + val = set_value(v); + assert_string_equal(val, QUOTED10); + + free(val); + free_strvec(v); +} + +static void test11(void **state) +{ +#define QUOTED11 "value comment" + vector v = alloc_strvec("keyword\"" QUOTED11 "\""); + char *val; + + assert_int_equal(validate_config_strvec(v, test_file), 0); + assert_int_equal(VECTOR_SIZE(v), 4); + assert_string_equal(VECTOR_SLOT(v, 0), "keyword"); + assert_true(is_quote(VECTOR_SLOT(v, 1)));; + assert_string_equal(VECTOR_SLOT(v, 2), QUOTED11); + assert_true(is_quote(VECTOR_SLOT(v, 3)));; + + val = set_value(v); + assert_string_equal(val, QUOTED11); + + free(val); + free_strvec(v); +} + +static void test12(void **state) +{ + vector v = alloc_strvec("key\"word\""); + char *val; + + assert_int_equal(validate_config_strvec(v, test_file), 0); + assert_int_equal(VECTOR_SIZE(v), 4); + assert_string_equal(VECTOR_SLOT(v, 0), "key"); + assert_true(is_quote(VECTOR_SLOT(v, 1)));; + assert_string_equal(VECTOR_SLOT(v, 2), "word"); + assert_true(is_quote(VECTOR_SLOT(v, 3)));; + + val = set_value(v); + assert_string_equal(val, "word"); + + free(val); + free_strvec(v); +} + +static void test13(void **state) +{ + vector v = alloc_strvec("keyword value \"quoted\""); + char *val; + + assert_int_equal(validate_config_strvec(v, test_file), 0); + assert_int_equal(VECTOR_SIZE(v), 5); + assert_string_equal(VECTOR_SLOT(v, 0), "keyword"); + assert_string_equal(VECTOR_SLOT(v, 1), "value"); + assert_true(is_quote(VECTOR_SLOT(v, 2)));; + assert_string_equal(VECTOR_SLOT(v, 3), "quoted"); + assert_true(is_quote(VECTOR_SLOT(v, 4)));; + + val = set_value(v); + assert_string_equal(val, "value"); + + free(val); + free_strvec(v); +} + +static void test14(void **state) +{ + vector v = alloc_strvec("keyword \"value \" comment\"\""); + char *val; + + assert_int_equal(validate_config_strvec(v, test_file), 0); + assert_int_equal(VECTOR_SIZE(v), 7); + assert_string_equal(VECTOR_SLOT(v, 0), "keyword"); + assert_true(is_quote(VECTOR_SLOT(v, 1)));; + assert_string_equal(VECTOR_SLOT(v, 2), "value "); + assert_true(is_quote(VECTOR_SLOT(v, 3)));; + assert_string_equal(VECTOR_SLOT(v, 4), "comment"); + assert_true(is_quote(VECTOR_SLOT(v, 5)));; + assert_true(is_quote(VECTOR_SLOT(v, 6)));; + + val = set_value(v); + assert_string_equal(val, "value "); + + free(val); + free_strvec(v); +} + +static void test15(void **state) +{ +#define QUOTED15 "word value\n comment" + vector v = alloc_strvec("key\"" QUOTED15 "\""); + char *val; + + assert_int_equal(VECTOR_SIZE(v), 4); + assert_string_equal(VECTOR_SLOT(v, 0), "key"); + assert_true(is_quote(VECTOR_SLOT(v, 1)));; + assert_string_equal(VECTOR_SLOT(v, 2), QUOTED15); + assert_true(is_quote(VECTOR_SLOT(v, 3)));; + assert_int_equal(validate_config_strvec(v, test_file), 0); + + val = set_value(v); + assert_string_equal(val, QUOTED15); + + free(val); + free_strvec(v); +} + +static void test16(void **state) +{ + vector v = alloc_strvec("keyword \"2.5\"\" SSD\""); + char *val; + +#if TWO_QUOTES_ONLY + assert_int_equal(validate_config_strvec(v, test_file), 0); + assert_int_equal(VECTOR_SIZE(v), 6); + assert_string_equal(VECTOR_SLOT(v, 0), "keyword"); + assert_true(is_quote(VECTOR_SLOT(v, 1)));; + assert_string_equal(VECTOR_SLOT(v, 2), "2.5"); + assert_true(is_quote(VECTOR_SLOT(v, 3)));; + assert_string_equal(VECTOR_SLOT(v, 4), "SSD"); + assert_true(is_quote(VECTOR_SLOT(v, 5)));; + + val = set_value(v); + assert_string_equal(val, "2.5"); +#else + assert_int_equal(VECTOR_SIZE(v), 4); + assert_string_equal(VECTOR_SLOT(v, 0), "keyword"); + assert_true(is_quote(VECTOR_SLOT(v, 1)));; + assert_string_equal(VECTOR_SLOT(v, 2), "2.5\" SSD"); + assert_true(is_quote(VECTOR_SLOT(v, 3)));; + + val = set_value(v); + assert_string_equal(val, "2.5\" SSD"); +#endif + free(val); + free_strvec(v); +} + +static void test17(void **state) +{ + vector v = alloc_strvec("keyword \"\"\"\"\" is empty\""); + char *val; +#if TWO_QUOTES_ONLY + assert_int_equal(validate_config_strvec(v, test_file), 0); + assert_int_equal(VECTOR_SIZE(v), 6); + assert_string_equal(VECTOR_SLOT(v, 0), "keyword"); + assert_true(is_quote(VECTOR_SLOT(v, 1)));; + assert_true(is_quote(VECTOR_SLOT(v, 2)));; + assert_true(is_quote(VECTOR_SLOT(v, 3)));; +#if LSTRIP_QUOTED_WSP + assert_string_equal(VECTOR_SLOT(v, 4), "is empty"); +#else + assert_string_equal(VECTOR_SLOT(v, 4), " is empty"); +#endif + assert_true(is_quote(VECTOR_SLOT(v, 5)));; + + val = set_value(v); + assert_string_equal(val, ""); +#else + assert_int_equal(validate_config_strvec(v, test_file), 0); + assert_int_equal(VECTOR_SIZE(v), 4); + assert_string_equal(VECTOR_SLOT(v, 0), "keyword"); + assert_true(is_quote(VECTOR_SLOT(v, 1)));; + assert_string_equal(VECTOR_SLOT(v, 2), "\"\" is empty"); + assert_true(is_quote(VECTOR_SLOT(v, 3)));; + + val = set_value(v); + assert_string_equal(val, "\"\" is empty"); +#endif + free(val); + free_strvec(v); +} + +static void test18(void **state) +{ + vector v = alloc_strvec("keyword \"\"\"\""); + char *val; +#if TWO_QUOTES_ONLY + assert_int_equal(validate_config_strvec(v, test_file), 0); + assert_int_equal(VECTOR_SIZE(v), 5); + assert_string_equal(VECTOR_SLOT(v, 0), "keyword"); + assert_true(is_quote(VECTOR_SLOT(v, 1)));; + assert_true(is_quote(VECTOR_SLOT(v, 2)));; + assert_true(is_quote(VECTOR_SLOT(v, 3)));; + assert_true(is_quote(VECTOR_SLOT(v, 4)));; + + val = set_value(v); + assert_string_equal(val, ""); +#else + assert_int_equal(validate_config_strvec(v, test_file), 0); + assert_int_equal(VECTOR_SIZE(v), 4); + assert_string_equal(VECTOR_SLOT(v, 0), "keyword"); + assert_true(is_quote(VECTOR_SLOT(v, 1)));; + assert_string_equal(VECTOR_SLOT(v, 2), "\""); + assert_true(is_quote(VECTOR_SLOT(v, 3)));; + + val = set_value(v); + assert_string_equal(val, "\""); +#endif + free(val); + free_strvec(v); +} + +int test_config_parser(void) +{ + const struct CMUnitTest tests[] = { + cmocka_unit_test(test01), + cmocka_unit_test(test02), + cmocka_unit_test(test03), + cmocka_unit_test(test04), + cmocka_unit_test(test05), + cmocka_unit_test(test06), + cmocka_unit_test(test07), + cmocka_unit_test(test08), + cmocka_unit_test(test09), + cmocka_unit_test(test10), + cmocka_unit_test(test11), + cmocka_unit_test(test12), + cmocka_unit_test(test13), + cmocka_unit_test(test14), + cmocka_unit_test(test15), + cmocka_unit_test(test16), + cmocka_unit_test(test17), + cmocka_unit_test(test18), + }; + return cmocka_run_group_tests(tests, setup, teardown); +} + +int main(void) +{ + int ret = 0; + + ret += test_config_parser(); + return ret; +} diff --git a/tests/pgpolicy.c b/tests/pgpolicy.c new file mode 100644 index 0000000..3f61b12 --- /dev/null +++ b/tests/pgpolicy.c @@ -0,0 +1,1036 @@ +/* + * Copyright (c) 2018 Benjamin Marzinski, Redhat + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + * + */ + +#include +#include +#include +#include +#include +#include +#include + +#include "globals.c" +#include "pgpolicies.h" + +struct multipath mp8, mp4, mp1, mp0, mp_null; +struct path p8[8], p4[4], p1[1]; + + +static void set_priority(struct path *pp, int *prio, int size) +{ + int i; + + for (i = 0; i < size; i++) { + pp[i].priority = prio[i]; + } +} + +static void set_marginal(struct path *pp, int *marginal, int size) +{ + int i; + + for (i = 0; i < size; i++) { + pp[i].marginal = marginal[i]; + } +} + +static void set_tgt_node_name(struct path *pp, char **tgt_node_name, int size) +{ + int i; + + for (i = 0; i < size; i++) { + strcpy(pp[i].tgt_node_name, tgt_node_name[i]); + } +} + +static void set_serial(struct path *pp, char **serial, int size) +{ + int i; + + for (i = 0; i < size; i++) { + strcpy(pp[i].serial, serial[i]); + } +} + +static int setup(void **state) +{ + int i; + + for (i = 0; i < 8; i++) { + sprintf(p8[i].dev, "p8_%d", i); + sprintf(p8[i].dev_t, "8:%d", i); + p8[i].state = PATH_UP; + } + for (i = 0; i < 4; i++) { + sprintf(p4[i].dev, "p4_%d", i); + sprintf(p4[i].dev_t, "4:%d", i); + p4[i].state = PATH_UP; + } + sprintf(p1[0].dev, "p1_0"); + sprintf(p1[0].dev_t, "4:0"); + p1[0].state = PATH_UP; + return 0; +} + +static int setupX(struct multipath *mp, struct path *pp, int size) +{ + int i; + int prio[8] = {10, 10, 10, 10, 10, 10, 10, 10}; + int marginal[8] = {0, 0, 0, 0, 0, 0, 0, 0}; + + mp->paths = vector_alloc(); + if (!mp->paths) + return -1; + for (i = 0; i < size; i++) { + if (!vector_alloc_slot(mp->paths)) + return -1; + vector_set_slot(mp->paths, &pp[i]); + } + set_priority(pp, prio, size); + set_marginal(pp, marginal, size); + mp->pgpolicyfn = NULL; + return 0; +} + +static int setup8(void **state) +{ + return setupX(&mp8, p8, 8); +} + +static int setup4(void **state) +{ + return setupX(&mp4, p4, 4); +} + +static int setup1(void **state) +{ + return setupX(&mp1, p1, 1); +} + +static int setup0(void **state) +{ + return setupX(&mp0, NULL, 0); +} + +static int setup_null(void **state) +{ + return 0; +} + +static int teardownX(struct multipath *mp) +{ + free_pgvec(mp->pg, KEEP_PATHS); + mp->pg = NULL; + return 0; +} + +static int teardown8(void **state) +{ + return teardownX(&mp8); +} + +static int teardown4(void **state) +{ + return teardownX(&mp4); +} + +static int teardown1(void **state) +{ + return teardownX(&mp1); +} + +static int teardown0(void **state) +{ + return teardownX(&mp0); +} + +static int teardown_null(void **state) +{ + return teardownX(&mp_null); +} + +static void +verify_pathgroups(struct multipath *mp, struct path *pp, int **groups, + int *group_size, int *marginal, int size) +{ + int i, j; + struct pathgroup *pgp; + + assert_null(mp->paths); + assert_non_null(mp->pg); + assert_int_equal(VECTOR_SIZE(mp->pg), size); + for (i = 0; i < size; i++) { + pgp = VECTOR_SLOT(mp->pg, i); + assert_non_null(pgp); + assert_non_null(pgp->paths); + assert_int_equal(VECTOR_SIZE(pgp->paths), group_size[i]); + if (marginal) + assert_int_equal(pgp->marginal, marginal[i]); + else + assert_int_equal(pgp->marginal, 0); + for (j = 0; j < group_size[i]; j++) { + int path_nr = groups[i][j]; + struct path *pgp_path = VECTOR_SLOT(pgp->paths, j); + struct path *pp_path = &pp[path_nr]; + /* Test names instead of pointers to get a more + * useful error message */ + assert_string_equal(pgp_path->dev, pp_path->dev); + /* This test is just a backkup in case the + * something wenth wrong naming the paths */ + assert_ptr_equal(pgp_path, pp_path); + } + } +} + +static void test_one_group8(void **state) +{ + int paths[] = {0,1,2,3,4,5,6,7}; + int *groups[] = {paths}; + int group_size[] = {8}; + + mp8.pgpolicyfn = one_group; + assert_int_equal(group_paths(&mp8, 0), 0); + verify_pathgroups(&mp8, p8, groups, group_size, NULL, 1); +} + +static void test_one_group4(void **state) +{ + int paths[] = {0,1,2,3}; + int *groups[] = {paths}; + int group_size[] = {4}; + + mp4.pgpolicyfn = one_group; + assert_int_equal(group_paths(&mp4, 0), 0); + verify_pathgroups(&mp4, p4, groups, group_size, NULL, 1); +} + +static void test_one_group1(void **state) +{ + int paths[] = {0}; + int *groups[] = {paths}; + int group_size[] = {1}; + + mp1.pgpolicyfn = one_group; + assert_int_equal(group_paths(&mp1, 0), 0); + verify_pathgroups(&mp1, p1, groups, group_size, NULL, 1); +} + +static void test_one_group0(void **state) +{ + mp0.pgpolicyfn = one_group; + assert_int_equal(group_paths(&mp0, 0), 0); + verify_pathgroups(&mp0, NULL, NULL, NULL, NULL, 0); +} + +static void test_one_group_null(void **state) +{ + mp_null.pgpolicyfn = one_group; + assert_int_equal(group_paths(&mp_null, 0), 0); + verify_pathgroups(&mp_null, NULL, NULL, NULL, NULL, 0); +} + +static void test_one_group_all_marginal8(void **state) +{ + int paths[] = {0,1,2,3,4,5,6,7}; + int marginal[] = {1,1,1,1,1,1,1,1}; + int *groups[] = {paths}; + int group_size[] = {8}; + int group_marginal[] = {1}; + + set_marginal(p8, marginal, 8); + mp8.pgpolicyfn = one_group; + assert_int_equal(group_paths(&mp8, 1), 0); + verify_pathgroups(&mp8, p8, groups, group_size, group_marginal, 1); +} + +static void test_one_group_half_marginal8(void **state) +{ + int marginal[] = {1,0,1,0,1,1,0,0}; + int group0[] = {1,3,6,7}; + int group1[] = {0,2,4,5}; + int *groups[] = {group0, group1}; + int group_size[] = {4,4}; + int group_marginal[] = {0,1}; + + set_marginal(p8, marginal, 8); + mp8.pgpolicyfn = one_group; + assert_int_equal(group_paths(&mp8, 1), 0); + verify_pathgroups(&mp8, p8, groups, group_size, group_marginal, 2); +} + +static void test_one_group_ignore_marginal8(void **state) +{ + int marginal[] = {1,0,1,0,1,1,0,0}; + int paths[] = {0,1,2,3,4,5,6,7}; + int *groups[] = {paths}; + int group_size[] = {8}; + + set_marginal(p8, marginal, 8); + mp8.pgpolicyfn = one_group; + assert_int_equal(group_paths(&mp8, 0), 0); + verify_pathgroups(&mp8, p8, groups, group_size, NULL, 1); +} + +static void test_one_group_one_marginal8(void **state) +{ + int marginal[] = {0,0,0,0,0,1,0,0}; + int group0[] = {0,1,2,3,4,6,7}; + int group1[] = {5}; + int *groups[] = {group0, group1}; + int group_size[] = {7,1}; + int group_marginal[] = {0,1}; + + set_marginal(p8, marginal, 8); + mp8.pgpolicyfn = one_group; + assert_int_equal(group_paths(&mp8, 1), 0); + verify_pathgroups(&mp8, p8, groups, group_size, group_marginal, 2); +} + +static void test_one_path_per_group_same8(void **state) +{ + int paths[] = {0,1,2,3,4,5,6,7}; + int *groups[] = {&paths[0], &paths[1], &paths[2], &paths[3], + &paths[4], &paths[5], &paths[6], &paths[7]}; + int group_size[] = {1,1,1,1,1,1,1,1}; + + mp8.pgpolicyfn = one_path_per_group; + assert_int_equal(group_paths(&mp8, 0), 0); + verify_pathgroups(&mp8, p8, groups, group_size, NULL, 8); +} + +static void test_one_path_per_group_increasing8(void **state) +{ + int prio[] = {1,2,3,4,5,6,7,8}; + int paths[] = {7,6,5,4,3,2,1,0}; + int *groups[] = {&paths[0], &paths[1], &paths[2], &paths[3], + &paths[4], &paths[5], &paths[6], &paths[7]}; + int group_size[] = {1,1,1,1,1,1,1,1}; + + set_priority(p8, prio, 8); + mp8.pgpolicyfn = one_path_per_group; + assert_int_equal(group_paths(&mp8, 0), 0); + verify_pathgroups(&mp8, p8, groups, group_size, NULL, 8); +} + +static void test_one_path_per_group_decreasing8(void **state) +{ + int prio[] = {8,7,6,5,4,3,2,1}; + int paths[] = {0,1,2,3,4,5,6,7}; + int *groups[] = {&paths[0], &paths[1], &paths[2], &paths[3], + &paths[4], &paths[5], &paths[6], &paths[7]}; + int group_size[] = {1,1,1,1,1,1,1,1}; + + set_priority(p8, prio, 8); + mp8.pgpolicyfn = one_path_per_group; + assert_int_equal(group_paths(&mp8, 0), 0); + verify_pathgroups(&mp8, p8, groups, group_size, NULL, 8); +} + +static void test_one_path_per_group_mixed8(void **state) +{ + int prio[] = {7,1,3,3,5,2,8,2}; + int paths[] = {6,0,4,2,3,5,7,1}; + int *groups[] = {&paths[0], &paths[1], &paths[2], &paths[3], + &paths[4], &paths[5], &paths[6], &paths[7]}; + int group_size[] = {1,1,1,1,1,1,1,1}; + + set_priority(p8, prio, 8); + mp8.pgpolicyfn = one_path_per_group; + assert_int_equal(group_paths(&mp8, 0), 0); + verify_pathgroups(&mp8, p8, groups, group_size, NULL, 8); +} + +static void test_one_path_per_group4(void **state) +{ + int paths[] = {0,1,2,3}; + int *groups[] = {&paths[0], &paths[1], &paths[2], &paths[3]}; + int group_size[] = {1,1,1,1}; + + mp4.pgpolicyfn = one_path_per_group; + assert_int_equal(group_paths(&mp4, 0), 0); + verify_pathgroups(&mp4, p4, groups, group_size, NULL, 4); +} + +static void test_one_path_per_group1(void **state) +{ + int paths[] = {0}; + int *groups[] = {paths}; + int group_size[] = {1}; + + mp1.pgpolicyfn = one_path_per_group; + assert_int_equal(group_paths(&mp1, 0), 0); + verify_pathgroups(&mp1, p1, groups, group_size, NULL, 1); +} + +static void test_one_path_per_group0(void **state) +{ + mp0.pgpolicyfn = one_path_per_group; + assert_int_equal(group_paths(&mp0, 0), 0); + verify_pathgroups(&mp0, NULL, NULL, NULL, NULL, 0); +} + +static void test_one_path_per_group_null(void **state) +{ + mp_null.pgpolicyfn = one_path_per_group; + assert_int_equal(group_paths(&mp_null, 0), 0); + verify_pathgroups(&mp_null, NULL, NULL, NULL, NULL, 0); +} + +static void test_one_path_per_group_mixed_all_marginal8(void **state) +{ + int prio[] = {7,1,3,3,5,2,8,2}; + int marginal[] = {1,1,1,1,1,1,1,1}; + int paths[] = {6,0,4,2,3,5,7,1}; + int *groups[] = {&paths[0], &paths[1], &paths[2], &paths[3], + &paths[4], &paths[5], &paths[6], &paths[7]}; + int group_size[] = {1,1,1,1,1,1,1,1}; + int group_marginal[] = {1,1,1,1,1,1,1,1}; + + set_priority(p8, prio, 8); + set_marginal(p8, marginal, 8); + mp8.pgpolicyfn = one_path_per_group; + assert_int_equal(group_paths(&mp8, 1), 0); + verify_pathgroups(&mp8, p8, groups, group_size, group_marginal, 8); +} + +static void test_one_path_per_group_mixed_half_marginal8(void **state) +{ + int prio[] = {7,1,3,3,5,2,8,2}; + int marginal[] = {0,1,1,0,0,0,1,1}; + int paths[] = {0,4,3,5,6,2,7,1}; + int *groups[] = {&paths[0], &paths[1], &paths[2], &paths[3], + &paths[4], &paths[5], &paths[6], &paths[7]}; + int group_size[] = {1,1,1,1,1,1,1,1}; + int group_marginal[] = {0,0,0,0,1,1,1,1}; + + set_priority(p8, prio, 8); + set_marginal(p8, marginal, 8); + mp8.pgpolicyfn = one_path_per_group; + assert_int_equal(group_paths(&mp8, 1), 0); + verify_pathgroups(&mp8, p8, groups, group_size, group_marginal, 8); +} + +static void test_group_by_prio_same8(void **state) +{ + int paths[] = {0,1,2,3,4,5,6,7}; + int *groups[] = {paths}; + int group_size[] = {8}; + + mp8.pgpolicyfn = group_by_prio; + assert_int_equal(group_paths(&mp8, 0), 0); + verify_pathgroups(&mp8, p8, groups, group_size, NULL, 1); +} + +static void test_group_by_prio_increasing8(void **state) +{ + int prio[] = {1,2,3,4,5,6,7,8}; + int paths[] = {7,6,5,4,3,2,1,0}; + int *groups[] = {&paths[0], &paths[1], &paths[2], &paths[3], + &paths[4], &paths[5], &paths[6], &paths[7]}; + int group_size[] = {1,1,1,1,1,1,1,1}; + + set_priority(p8, prio, 8); + mp8.pgpolicyfn = group_by_prio; + assert_int_equal(group_paths(&mp8, 0), 0); + verify_pathgroups(&mp8, p8, groups, group_size, NULL, 8); +} + +static void test_group_by_prio_decreasing8(void **state) +{ + int prio[] = {8,7,6,5,4,3,2,1}; + int paths[] = {0,1,2,3,4,5,6,7}; + int *groups[] = {&paths[0], &paths[1], &paths[2], &paths[3], + &paths[4], &paths[5], &paths[6], &paths[7]}; + int group_size[] = {1,1,1,1,1,1,1,1}; + + set_priority(p8, prio, 8); + mp8.pgpolicyfn = group_by_prio; + assert_int_equal(group_paths(&mp8, 0), 0); + verify_pathgroups(&mp8, p8, groups, group_size, NULL, 8); +} + +static void test_group_by_prio_mixed8(void **state) +{ + int prio[] = {7,1,3,3,5,2,8,2}; + int group0[] = {6}; + int group1[] = {0}; + int group2[] = {4}; + int group3[] = {2,3}; + int group4[] = {5,7}; + int group5[] = {1}; + int *groups[] = {group0, group1, group2, group3, + group4, group5}; + int group_size[] = {1,1,1,2,2,1}; + + set_priority(p8, prio, 8); + mp8.pgpolicyfn = group_by_prio; + assert_int_equal(group_paths(&mp8, 0), 0); + verify_pathgroups(&mp8, p8, groups, group_size, NULL, 6); +} + +static void test_group_by_prio_mixed_no_marginal8(void **state) +{ + int prio[] = {7,1,3,3,5,2,8,2}; + int group0[] = {6}; + int group1[] = {0}; + int group2[] = {4}; + int group3[] = {2,3}; + int group4[] = {5,7}; + int group5[] = {1}; + int *groups[] = {group0, group1, group2, group3, + group4, group5}; + int group_size[] = {1,1,1,2,2,1}; + + set_priority(p8, prio, 8); + mp8.pgpolicyfn = group_by_prio; + assert_int_equal(group_paths(&mp8, 1), 0); + verify_pathgroups(&mp8, p8, groups, group_size, NULL, 6); +} + +static void test_group_by_prio_2_groups8(void **state) +{ + int prio[] = {1,2,2,1,2,1,1,2}; + int group0[] = {1,2,4,7}; + int group1[] = {0,3,5,6}; + int *groups[] = {group0, group1}; + int group_size[] = {4,4}; + + set_priority(p8, prio, 8); + mp8.pgpolicyfn = group_by_prio; + assert_int_equal(group_paths(&mp8, 0), 0); + verify_pathgroups(&mp8, p8, groups, group_size, NULL, 2); +} + +static void test_group_by_prio_mixed4(void **state) +{ + int prio[] = {2,3,1,3}; + int group0[] = {1,3}; + int group1[] = {0}; + int group2[] = {2}; + int *groups[] = {group0, group1, group2}; + int group_size[] = {2,1,1}; + + set_priority(p4, prio, 4); + mp4.pgpolicyfn = group_by_prio; + assert_int_equal(group_paths(&mp4, 0), 0); + verify_pathgroups(&mp4, p4, groups, group_size, NULL, 3); +} + +static void test_group_by_prio_2_groups4(void **state) +{ + int prio[] = {2,1,1,2}; + int group0[] = {0,3}; + int group1[] = {1,2}; + int *groups[] = {group0, group1}; + int group_size[] = {2,2}; + + set_priority(p4, prio, 4); + mp4.pgpolicyfn = group_by_prio; + assert_int_equal(group_paths(&mp4, 0), 0); + verify_pathgroups(&mp4, p4, groups, group_size, NULL, 2); +} + +static void test_group_by_prio1(void **state) +{ + int paths[] = {0}; + int *groups[] = {paths}; + int group_size[] = {1}; + + mp1.pgpolicyfn = group_by_prio; + assert_int_equal(group_paths(&mp1, 0), 0); + verify_pathgroups(&mp1, p1, groups, group_size, NULL, 1); +} + +static void test_group_by_prio0(void **state) +{ + mp0.pgpolicyfn = group_by_prio; + assert_int_equal(group_paths(&mp0, 0), 0); + verify_pathgroups(&mp0, NULL, NULL, NULL, NULL, 0); +} + +static void test_group_by_prio_null(void **state) +{ + mp_null.pgpolicyfn = group_by_prio; + assert_int_equal(group_paths(&mp_null, 0), 0); + verify_pathgroups(&mp_null, NULL, NULL, NULL, NULL, 0); +} + +static void test_group_by_prio_mixed_all_marginal8(void **state) +{ + int prio[] = {7,1,3,3,5,2,8,2}; + int marginal[] = {1,1,1,1,1,1,1,1}; + int group0[] = {6}; + int group1[] = {0}; + int group2[] = {4}; + int group3[] = {2,3}; + int group4[] = {5,7}; + int group5[] = {1}; + int *groups[] = {group0, group1, group2, group3, + group4, group5}; + int group_size[] = {1,1,1,2,2,1}; + int group_marginal[] = {1,1,1,1,1,1}; + + set_priority(p8, prio, 8); + set_marginal(p8, marginal, 8); + mp8.pgpolicyfn = group_by_prio; + assert_int_equal(group_paths(&mp8, 1), 0); + verify_pathgroups(&mp8, p8, groups, group_size, group_marginal, 6); +} + +static void test_group_by_prio_mixed_half_marginal8(void **state) +{ + int prio[] = {7,1,3,3,5,2,8,2}; + int marginal[] = {0,0,0,1,0,1,1,1}; + int group0[] = {0}; + int group1[] = {4}; + int group2[] = {2}; + int group3[] = {1}; + int group4[] = {6}; + int group5[] = {3}; + int group6[] = {5,7}; + int *groups[] = {group0, group1, group2, group3, + group4, group5, group6}; + int group_size[] = {1,1,1,1,1,1,2}; + int group_marginal[] = {0,0,0,0,1,1,1}; + + set_priority(p8, prio, 8); + set_marginal(p8, marginal, 8); + mp8.pgpolicyfn = group_by_prio; + assert_int_equal(group_paths(&mp8, 1), 0); + verify_pathgroups(&mp8, p8, groups, group_size, group_marginal, 7); +} + +static void test_group_by_prio_mixed_one_marginal8(void **state) +{ + int prio[] = {7,1,3,3,5,2,8,2}; + int marginal[] = {0,0,0,0,0,1,0,0}; + int group0[] = {6}; + int group1[] = {0}; + int group2[] = {4}; + int group3[] = {2,3}; + int group4[] = {7}; + int group5[] = {1}; + int group6[] = {5}; + int *groups[] = {group0, group1, group2, group3, + group4, group5, group6}; + int group_size[] = {1,1,1,2,1,1,1}; + int group_marginal[] = {0,0,0,0,0,0,1}; + + set_priority(p8, prio, 8); + set_marginal(p8, marginal, 8); + mp8.pgpolicyfn = group_by_prio; + assert_int_equal(group_paths(&mp8, 1), 0); + verify_pathgroups(&mp8, p8, groups, group_size, group_marginal, 7); +} + +static void test_group_by_node_name_same8(void **state) +{ + char *node_name[] = {"a","a","a","a","a","a","a","a"}; + int paths[] = {0,1,2,3,4,5,6,7}; + int *groups[] = {paths}; + int group_size[] = {8}; + + set_tgt_node_name(p8, node_name, 8); + mp8.pgpolicyfn = group_by_node_name; + assert_int_equal(group_paths(&mp8, 0), 0); + verify_pathgroups(&mp8, p8, groups, group_size, NULL, 1); +} + +static void test_group_by_node_name_increasing8(void **state) +{ + char *node_name[] = {"a","b","c","d","e","f","g","h"}; + int prio[] = {1,2,3,4,5,6,7,8}; + int paths[] = {7,6,5,4,3,2,1,0}; + int *groups[] = {&paths[0], &paths[1], &paths[2], &paths[3], + &paths[4], &paths[5], &paths[6], &paths[7]}; + int group_size[] = {1,1,1,1,1,1,1,1}; + + set_priority(p8, prio, 8); + set_tgt_node_name(p8, node_name, 8); + mp8.pgpolicyfn = group_by_node_name; + assert_int_equal(group_paths(&mp8, 0), 0); + verify_pathgroups(&mp8, p8, groups, group_size, NULL, 8); +} + +static void test_group_by_node_name_3_groups8(void **state) +{ + char *node_name[] = {"a","b","a","c","b","c","c","a"}; + int prio[] = {4,1,4,1,1,1,1,4}; + int group0[] = {0,2,7}; + int group1[] = {3,5,6}; + int group2[] = {1,4}; + int *groups[] = {group0, group1, group2}; + int group_size[] = {3,3,2}; + + set_priority(p8, prio, 8); + set_tgt_node_name(p8, node_name, 8); + mp8.pgpolicyfn = group_by_node_name; + assert_int_equal(group_paths(&mp8, 0), 0); + verify_pathgroups(&mp8, p8, groups, group_size, NULL, 3); +} + +static void test_group_by_node_name_2_groups8(void **state) +{ + char *node_name[] = {"a", "a", "b", "a", "b", "b", "b", "a"}; + int prio[] = {4,1,2,1,2,2,2,1}; + int group0[] = {2,4,5,6}; + int group1[] = {0,1,3,7}; + int *groups[] = {group0, group1}; + int group_size[] = {4,4}; + + set_priority(p8, prio, 8); + set_tgt_node_name(p8, node_name, 8); + mp8.pgpolicyfn = group_by_node_name; + assert_int_equal(group_paths(&mp8, 0), 0); + verify_pathgroups(&mp8, p8, groups, group_size, NULL, 2); +} + +static void test_group_by_node_name_3_groups4(void **state) +{ + char *node_name[] = {"a","b","c","a"}; + int prio[] = {3,1,3,1}; + int group0[] = {2}; + int group1[] = {0,3}; + int group2[] = {1}; + int *groups[] = {group0, group1, group2}; + int group_size[] = {1,2,1}; + + set_priority(p4, prio, 4); + set_tgt_node_name(p4, node_name, 4); + mp4.pgpolicyfn = group_by_node_name; + assert_int_equal(group_paths(&mp4, 0), 0); + verify_pathgroups(&mp4, p4, groups, group_size, NULL, 3); +} + +static void test_group_by_node_name_2_groups4(void **state) +{ + char *node_name[] = {"a","b","b","a"}; + int prio[] = {2,1,1,2}; + int group0[] = {0,3}; + int group1[] = {1,2}; + int *groups[] = {group0, group1}; + int group_size[] = {2,2}; + + set_priority(p4, prio, 4); + set_tgt_node_name(p4, node_name, 4); + mp4.pgpolicyfn = group_by_node_name; + assert_int_equal(group_paths(&mp4, 0), 0); + verify_pathgroups(&mp4, p4, groups, group_size, NULL, 2); +} + +static void test_group_by_node_name1(void **state) +{ + char *node_name[] = {"a"}; + int paths[] = {0}; + int *groups[] = {paths}; + int group_size[] = {1}; + + set_tgt_node_name(p1, node_name, 1); + mp1.pgpolicyfn = group_by_node_name; + assert_int_equal(group_paths(&mp1,0), 0); + verify_pathgroups(&mp1, p1, groups, group_size, NULL, 1); +} + +static void test_group_by_node_name0(void **state) +{ + mp0.pgpolicyfn = group_by_node_name; + assert_int_equal(group_paths(&mp0, 0), 0); + verify_pathgroups(&mp0, NULL, NULL, NULL, NULL, 0); +} + +static void test_group_by_node_name_null(void **state) +{ + mp_null.pgpolicyfn = group_by_node_name; + assert_int_equal(group_paths(&mp_null, 0), 0); + verify_pathgroups(&mp_null, NULL, NULL, NULL, NULL, 0); +} + +static void test_group_by_node_name_2_groups_all_marginal8(void **state) +{ + char *node_name[] = {"a", "a", "b", "a", "b", "b", "b", "a"}; + int prio[] = {4,1,2,1,2,2,2,1}; + int marginal[] = {1,1,1,1,1,1,1,1}; + int group0[] = {2,4,5,6}; + int group1[] = {0,1,3,7}; + int *groups[] = {group0, group1}; + int group_size[] = {4,4}; + int group_marginal[] = {1,1}; + + set_priority(p8, prio, 8); + set_marginal(p8, marginal, 8); + set_tgt_node_name(p8, node_name, 8); + mp8.pgpolicyfn = group_by_node_name; + assert_int_equal(group_paths(&mp8, 1), 0); + verify_pathgroups(&mp8, p8, groups, group_size, group_marginal, 2); +} + +static void test_group_by_node_name_2_groups_half_marginal8(void **state) +{ + char *node_name[] = {"a", "a", "b", "a", "b", "b", "b", "a"}; + int prio[] = {4,1,2,1,2,2,2,1}; + int marginal[] = {1,0,1,1,0,1,0,0}; + int group0[] = {4,6}; + int group1[] = {1,7}; + int group2[] = {0,3}; + int group3[] = {2,5}; + int *groups[] = {group0, group1, group2, group3}; + int group_size[] = {2,2,2,2}; + int group_marginal[] = {0,0,1,1}; + + set_priority(p8, prio, 8); + set_marginal(p8, marginal, 8); + set_tgt_node_name(p8, node_name, 8); + mp8.pgpolicyfn = group_by_node_name; + assert_int_equal(group_paths(&mp8, 1), 0); + verify_pathgroups(&mp8, p8, groups, group_size, group_marginal, 4); +} + +static void test_group_by_serial_same8(void **state) +{ + char *serial[] = {"1","1","1","1","1","1","1","1"}; + int paths[] = {0,1,2,3,4,5,6,7}; + int *groups[] = {paths}; + int group_size[] = {8}; + + set_serial(p8, serial, 8); + mp8.pgpolicyfn = group_by_serial; + assert_int_equal(group_paths(&mp8, 0), 0); + verify_pathgroups(&mp8, p8, groups, group_size, NULL, 1); +} + +static void test_group_by_serial_increasing8(void **state) +{ + char *serial[] = {"1","2","3","4","5","6","7","8"}; + int prio[] = {1,2,3,4,5,6,7,8}; + int paths[] = {7,6,5,4,3,2,1,0}; + int *groups[] = {&paths[0], &paths[1], &paths[2], &paths[3], + &paths[4], &paths[5], &paths[6], &paths[7]}; + int group_size[] = {1,1,1,1,1,1,1,1}; + + set_priority(p8, prio, 8); + set_serial(p8, serial, 8); + mp8.pgpolicyfn = group_by_serial; + assert_int_equal(group_paths(&mp8, 0), 0); + verify_pathgroups(&mp8, p8, groups, group_size, NULL, 8); +} + +static void test_group_by_serial_3_groups8(void **state) +{ + char *serial[] = {"1","2","1","3","2","3","2","1"}; + int prio[] = {4,1,4,3,1,3,1,4}; + int group0[] = {0,2,7}; + int group1[] = {3,5}; + int group2[] = {1,4,6}; + int *groups[] = {group0, group1, group2}; + int group_size[] = {3,2,3}; + + set_priority(p8, prio, 8); + set_serial(p8, serial, 8); + mp8.pgpolicyfn = group_by_serial; + assert_int_equal(group_paths(&mp8, 0), 0); + verify_pathgroups(&mp8, p8, groups, group_size, NULL, 3); +} + +static void test_group_by_serial_2_groups8(void **state) +{ + char *serial[] = {"1", "2", "1", "1", "2", "2", "1", "2"}; + int prio[] = {3,2,2,1,2,2,1,2}; + int group0[] = {1,4,5,7}; + int group1[] = {0,2,3,6}; + int *groups[] = {group0, group1}; + int group_size[] = {4,4}; + + set_priority(p8, prio, 8); + set_serial(p8, serial, 8); + mp8.pgpolicyfn = group_by_serial; + assert_int_equal(group_paths(&mp8, 0), 0); + verify_pathgroups(&mp8, p8, groups, group_size, NULL, 2); +} + +static void test_group_by_serial_3_groups4(void **state) +{ + char *serial[] = {"1","2","3","2"}; + int prio[] = {3,1,3,1}; + int group0[] = {0}; + int group1[] = {2}; + int group2[] = {1,3}; + int *groups[] = {group0, group1, group2}; + int group_size[] = {1,1,2}; + + set_priority(p4, prio, 4); + set_serial(p4, serial, 4); + mp4.pgpolicyfn = group_by_serial; + assert_int_equal(group_paths(&mp4, 0), 0); + verify_pathgroups(&mp4, p4, groups, group_size, NULL, 3); +} + +static void test_group_by_serial_2_groups4(void **state) +{ + char *serial[] = {"1","2","1","2"}; + int prio[] = {3,1,3,1}; + int group0[] = {0,2}; + int group1[] = {1,3}; + int *groups[] = {group0, group1}; + int group_size[] = {2,2}; + + set_priority(p4, prio, 4); + set_serial(p4, serial, 4); + mp4.pgpolicyfn = group_by_serial; + assert_int_equal(group_paths(&mp4, 0), 0); + verify_pathgroups(&mp4, p4, groups, group_size, NULL, 2); +} + +static void test_group_by_serial1(void **state) +{ + char *serial[1] = {"1"}; + int paths[1] = {0}; + int *groups[1] = {paths}; + int group_size[1] = {1}; + + set_serial(p1, serial, 1); + mp1.pgpolicyfn = group_by_serial; + assert_int_equal(group_paths(&mp1, 0), 0); + verify_pathgroups(&mp1, p1, groups, group_size, NULL, 1); +} + +static void test_group_by_serial0(void **state) +{ + mp0.pgpolicyfn = group_by_serial; + assert_int_equal(group_paths(&mp0, 0), 0); + verify_pathgroups(&mp0, NULL, NULL, NULL, NULL, 0); +} + +static void test_group_by_serial_null(void **state) +{ + mp_null.pgpolicyfn = group_by_serial; + assert_int_equal(group_paths(&mp_null, 0), 0); + verify_pathgroups(&mp_null, NULL, NULL, NULL, NULL, 0); +} + +static void test_group_by_serial_2_groups8_all_marginal8(void **state) +{ + char *serial[] = {"1", "2", "1", "1", "2", "2", "1", "2"}; + int marginal[] = {1,1,1,1,1,1,1,1}; + int prio[] = {3,2,2,1,2,2,1,2}; + int group0[] = {1,4,5,7}; + int group1[] = {0,2,3,6}; + int *groups[] = {group0, group1}; + int group_size[] = {4,4}; + int group_marginal[] = {1,1}; + + set_priority(p8, prio, 8); + set_serial(p8, serial, 8); + set_marginal(p8, marginal, 8); + mp8.pgpolicyfn = group_by_serial; + assert_int_equal(group_paths(&mp8, 1), 0); + verify_pathgroups(&mp8, p8, groups, group_size, group_marginal, 2); +} + +static void test_group_by_serial_2_groups8_half_marginal8(void **state) +{ + char *serial[] = {"1", "2", "1", "1", "2", "2", "1", "2"}; + int marginal[] = {0,0,1,1,1,1,0,0}; + int prio[] = {3,2,2,1,2,2,1,2}; + int group0[] = {0,6}; + int group1[] = {1,7}; + int group2[] = {4,5}; + int group3[] = {2,3}; + int *groups[] = {group0, group1, group2, group3}; + int group_size[] = {2,2,2,2}; + int group_marginal[] = {0,0,1,1}; + + set_priority(p8, prio, 8); + set_serial(p8, serial, 8); + set_marginal(p8, marginal, 8); + mp8.pgpolicyfn = group_by_serial; + assert_int_equal(group_paths(&mp8, 1), 0); + verify_pathgroups(&mp8, p8, groups, group_size, group_marginal, 4); +} + +#define setup_test(name, nr) \ +cmocka_unit_test_setup_teardown(name ## nr, setup ## nr, teardown ## nr) + +int test_pgpolicies(void) +{ + const struct CMUnitTest tests[] = { + setup_test(test_one_group, 8), + setup_test(test_one_group, 4), + setup_test(test_one_group, 1), + setup_test(test_one_group, 0), + setup_test(test_one_group, _null), + setup_test(test_one_group_all_marginal, 8), + setup_test(test_one_group_half_marginal, 8), + setup_test(test_one_group_ignore_marginal, 8), + setup_test(test_one_group_one_marginal, 8), + setup_test(test_one_path_per_group_same, 8), + setup_test(test_one_path_per_group_increasing, 8), + setup_test(test_one_path_per_group_decreasing, 8), + setup_test(test_one_path_per_group_mixed, 8), + setup_test(test_one_path_per_group, 4), + setup_test(test_one_path_per_group, 1), + setup_test(test_one_path_per_group, 0), + setup_test(test_one_path_per_group, _null), + setup_test(test_one_path_per_group_mixed_all_marginal, 8), + setup_test(test_one_path_per_group_mixed_half_marginal, 8), + setup_test(test_group_by_prio_same, 8), + setup_test(test_group_by_prio_increasing, 8), + setup_test(test_group_by_prio_decreasing, 8), + setup_test(test_group_by_prio_mixed, 8), + setup_test(test_group_by_prio_mixed_no_marginal, 8), + setup_test(test_group_by_prio_2_groups, 8), + setup_test(test_group_by_prio_mixed, 4), + setup_test(test_group_by_prio_2_groups, 4), + setup_test(test_group_by_prio, 1), + setup_test(test_group_by_prio, 0), + setup_test(test_group_by_prio, _null), + setup_test(test_group_by_prio_mixed_all_marginal, 8), + setup_test(test_group_by_prio_mixed_half_marginal, 8), + setup_test(test_group_by_prio_mixed_one_marginal, 8), + setup_test(test_group_by_node_name_same, 8), + setup_test(test_group_by_node_name_increasing, 8), + setup_test(test_group_by_node_name_3_groups, 8), + setup_test(test_group_by_node_name_2_groups, 8), + setup_test(test_group_by_node_name_3_groups, 4), + setup_test(test_group_by_node_name_2_groups, 4), + setup_test(test_group_by_node_name, 1), + setup_test(test_group_by_node_name, 0), + setup_test(test_group_by_node_name, _null), + setup_test(test_group_by_node_name_2_groups_all_marginal, 8), + setup_test(test_group_by_node_name_2_groups_half_marginal, 8), + setup_test(test_group_by_serial_same, 8), + setup_test(test_group_by_serial_increasing, 8), + setup_test(test_group_by_serial_3_groups, 8), + setup_test(test_group_by_serial_2_groups, 8), + setup_test(test_group_by_serial_3_groups, 4), + setup_test(test_group_by_serial_2_groups, 4), + setup_test(test_group_by_serial, 1), + setup_test(test_group_by_serial, 0), + setup_test(test_group_by_serial, _null), + setup_test(test_group_by_serial_2_groups8_all_marginal, 8), + setup_test(test_group_by_serial_2_groups8_half_marginal, 8), + }; + return cmocka_run_group_tests(tests, setup, NULL); +} + +int main(void) +{ + int ret = 0; + + ret += test_pgpolicies(); + return ret; +} diff --git a/tests/test-lib.c b/tests/test-lib.c new file mode 100644 index 0000000..5927516 --- /dev/null +++ b/tests/test-lib.c @@ -0,0 +1,365 @@ +#include +#include +#include +#include +#include +#include +#include "debug.h" +#include "util.h" +#include "vector.h" +#include "structs.h" +#include "structs_vec.h" +#include "config.h" +#include "discovery.h" +#include "propsel.h" +#include "test-lib.h" + +const int default_mask = (DI_SYSFS|DI_BLACKLIST|DI_WWID|DI_CHECKER|DI_PRIO); +const char default_devnode[] = "sdTEST"; +const char default_wwid[] = "TEST-WWID"; +/* default_wwid should be a substring of default_wwid_1! */ +const char default_wwid_1[] = "TEST-WWID-1"; + +/* + * Helper wrappers for mock_path(). + * + * We need to make pathinfo() think it has detected a device with + * certain vendor/product/rev. This requires faking lots of udev + * and sysfs function responses. + * + * This requires hwtable-test_OBJDEPS = ../libmultipath/discovery.o + * in the Makefile in order to wrap calls from discovery.o. + * + * Note that functions that are called and defined in discovery.o can't + * be wrapped this way (e.g. sysfs_get_vendor), because symbols are + * resolved by the assembler before the linking stage. + */ + +int __real_open(const char *path, int flags, int mode); + +static const char _mocked_filename[] = "mocked_path"; +int __wrap_open(const char *path, int flags, int mode) +{ + condlog(4, "%s: %s", __func__, path); + + if (!strcmp(path, _mocked_filename)) + return 111; + return __real_open(path, flags, mode); +} + +int __wrap_execute_program(char *path, char *value, int len) +{ + char *val = mock_ptr_type(char *); + + condlog(5, "%s: %s", __func__, val); + strlcpy(value, val, len); + return 0; +} + +bool __wrap_is_claimed_by_foreign(struct udev_device *ud) +{ + condlog(5, "%s: %p", __func__, ud); + return false; +} + +struct udev_list_entry +*__wrap_udev_device_get_properties_list_entry(struct udev_device *ud) +{ + void *p = (void*)0x12345678; + condlog(5, "%s: %p", __func__, p); + + return p; +} + +struct udev_list_entry +*__wrap_udev_list_entry_get_next(struct udev_list_entry *udle) +{ + void *p = NULL; + condlog(5, "%s: %p", __func__, p); + + return p; +} + +const char *__wrap_udev_list_entry_get_name(struct udev_list_entry *udle) +{ + char *val = mock_ptr_type(char *); + + condlog(5, "%s: %s", __func__, val); + return val; +} + +struct udev_device *__wrap_udev_device_ref(struct udev_device *ud) +{ + return ud; +} + +struct udev_device *__wrap_udev_device_unref(struct udev_device *ud) +{ + return ud; +} + +char *__wrap_udev_device_get_subsystem(struct udev_device *ud) +{ + char *val = mock_ptr_type(char *); + + condlog(5, "%s: %s", __func__, val); + return val; +} + +char *__wrap_udev_device_get_sysname(struct udev_device *ud) +{ + char *val = mock_ptr_type(char *); + + condlog(5, "%s: %s", __func__, val); + return val; +} + +char *__wrap_udev_device_get_devnode(struct udev_device *ud) +{ + char *val = mock_ptr_type(char *); + + condlog(5, "%s: %s", __func__, val); + return val; +} + +dev_t __wrap_udev_device_get_devnum(struct udev_device *ud) +{ + condlog(5, "%s: %p", __func__, ud); + return makedev(17, 17); +} + +char *__wrap_udev_device_get_sysattr_value(struct udev_device *ud, + const char *attr) +{ + char *val = mock_ptr_type(char *); + + condlog(5, "%s: %s->%s", __func__, attr, val); + return val; +} + +char *__wrap_udev_device_get_property_value(struct udev_device *ud, + const char *attr) +{ + char *val = mock_ptr_type(char *); + + condlog(5, "%s: %s->%s", __func__, attr, val); + return val; +} + +int __wrap_sysfs_get_size(struct path *pp, unsigned long long *sz) +{ + *sz = 12345678UL; + return 0; +} + +void *__wrap_udev_device_get_parent_with_subsystem_devtype( + struct udev_device *ud, const char *subsys, char *type) +{ + /* return non-NULL for sysfs_get_tgt_nodename */ + return type; +} + +void *__wrap_udev_device_get_parent(struct udev_device *ud) +{ + char *val = mock_ptr_type(void *); + + condlog(5, "%s: %p", __func__, val); + return val; +} + +ssize_t __wrap_sysfs_attr_get_value(struct udev_device *dev, + const char *attr_name, + char *value, size_t sz) +{ + char *val = mock_ptr_type(char *); + + condlog(5, "%s: %s", __func__, val); + strlcpy(value, val, sz); + return strlen(value); +} + +int __wrap_checker_check(struct checker *c, int st) +{ + condlog(5, "%s: %d", __func__, st); + return st; +} + +int __wrap_prio_getprio(struct prio *p, struct path *pp, unsigned int tmo) +{ + int pr = 5; + + condlog(5, "%s: %d", __func__, pr); + return pr; +} + +struct mocked_path *fill_mocked_path(struct mocked_path *mp, + const char *vendor, const char *product, + const char *rev, const char *wwid, + const char *devnode, unsigned int flags) +{ + mp->vendor = (vendor ? vendor : "noname"); + mp->product = (product ? product : "noprod"); + mp->rev = (rev ? rev : "0"); + mp->wwid = (wwid ? wwid : default_wwid); + mp->devnode = (devnode ? devnode : default_devnode); + mp->flags = flags|NEED_SELECT_PRIO|NEED_FD; + return mp; +} + +struct mocked_path *mocked_path_from_path(struct mocked_path *mp, + const struct path *pp) +{ + mp->vendor = pp->vendor_id; + mp->product = pp->product_id; + mp->rev = pp->rev; + mp->wwid = pp->wwid; + mp->devnode = pp->dev; + mp->flags = (prio_selected(&pp->prio) ? 0 : NEED_SELECT_PRIO) | + (pp->fd < 0 ? NEED_FD : 0) | + (pp->getuid ? USE_GETUID : 0); + return mp; +} + +static void mock_sysfs_pathinfo(const struct mocked_path *mp) +{ + static const char hbtl[] = "4:0:3:1"; + + will_return(__wrap_udev_device_get_subsystem, "scsi"); + will_return(__wrap_udev_device_get_sysname, hbtl); + will_return(__wrap_udev_device_get_sysname, hbtl); + will_return(__wrap_udev_device_get_sysattr_value, mp->vendor); + will_return(__wrap_udev_device_get_sysname, hbtl); + will_return(__wrap_udev_device_get_sysattr_value, mp->product); + will_return(__wrap_udev_device_get_sysname, hbtl); + will_return(__wrap_udev_device_get_sysattr_value, mp->rev); + + /* sysfs_get_tgt_nodename */ + will_return(__wrap_udev_device_get_sysattr_value, NULL); + will_return(__wrap_udev_device_get_parent, NULL); + will_return(__wrap_udev_device_get_parent, NULL); + will_return(__wrap_udev_device_get_sysname, "nofibre"); + will_return(__wrap_udev_device_get_sysname, "noiscsi"); + will_return(__wrap_udev_device_get_parent, NULL); + will_return(__wrap_udev_device_get_sysname, "ata25"); +} + +/* + * Pretend we detected a SCSI device with given vendor/prod/rev + */ +void mock_pathinfo(int mask, const struct mocked_path *mp) +{ + if (mp->flags & DEV_HIDDEN) { + will_return(__wrap_udev_device_get_sysattr_value, "1"); + return; + } else + will_return(__wrap_udev_device_get_sysattr_value, "0"); + + /* filter_property */ + will_return(__wrap_udev_device_get_sysname, mp->devnode); + if (mp->flags & BL_BY_PROPERTY) { + will_return(__wrap_udev_list_entry_get_name, "BAZ"); + return; + } else + will_return(__wrap_udev_list_entry_get_name, + "SCSI_IDENT_LUN_NAA_EXT"); + if (mask & DI_SYSFS) + mock_sysfs_pathinfo(mp); + + if (mp->flags & BL_BY_DEVICE && + (mask & DI_BLACKLIST && mask & DI_SYSFS)) + return; + + /* path_offline */ + will_return(__wrap_udev_device_get_subsystem, "scsi"); + will_return(__wrap_sysfs_attr_get_value, "running"); + + if (mask & DI_NOIO) + return; + + /* fake open() in pathinfo() */ + if (mp->flags & NEED_FD) + will_return(__wrap_udev_device_get_devnode, _mocked_filename); + /* DI_SERIAL is unsupported */ + assert_false(mask & DI_SERIAL); + + if (mask & DI_WWID) { + if (mp->flags & USE_GETUID) + will_return(__wrap_execute_program, mp->wwid); + else + /* get_udev_uid() */ + will_return(__wrap_udev_device_get_property_value, + mp->wwid); + } + + if (mask & DI_CHECKER) { + /* get_state -> sysfs_get_timeout */ + will_return(__wrap_udev_device_get_subsystem, "scsi"); + will_return(__wrap_udev_device_get_sysattr_value, "180"); + } + + if (mask & DI_PRIO && mp->flags & NEED_SELECT_PRIO) { + + /* sysfs_get_timeout, again (!?) */ + will_return(__wrap_udev_device_get_subsystem, "scsi"); + will_return(__wrap_udev_device_get_sysattr_value, "180"); + + } +} + +void mock_store_pathinfo(int mask, const struct mocked_path *mp) +{ + will_return(__wrap_udev_device_get_sysname, mp->devnode); + mock_pathinfo(mask, mp); +} + +struct path *__mock_path(vector pathvec, + const char *vnd, const char *prd, + const char *rev, const char *wwid, + const char *dev, + unsigned int flags, int mask) +{ + struct mocked_path mop; + struct path *pp; + struct config *conf; + int r; + + fill_mocked_path(&mop, vnd, prd, rev, wwid, dev, flags); + mock_store_pathinfo(mask, &mop); + + conf = get_multipath_config(); + r = store_pathinfo(pathvec, conf, (void *)&mop, mask, &pp); + put_multipath_config(conf); + + if (flags & BL_MASK) { + assert_int_equal(r, PATHINFO_SKIPPED); + return NULL; + } + assert_int_equal(r, PATHINFO_OK); + assert_non_null(pp); + return pp; +} + + +struct multipath *__mock_multipath(struct vectors *vecs, struct path *pp) +{ + struct multipath *mp; + struct config *conf; + struct mocked_path mop; + + mocked_path_from_path(&mop, pp); + /* pathinfo() call in adopt_paths */ + mock_pathinfo(DI_CHECKER|DI_PRIO, &mop); + + mp = add_map_with_path(vecs, pp, 1); + assert_ptr_not_equal(mp, NULL); + + /* TBD: mock setup_map() ... */ + conf = get_multipath_config(); + select_pgpolicy(conf, mp); + select_no_path_retry(conf, mp); + select_retain_hwhandler(conf, mp); + select_minio(conf, mp); + put_multipath_config(conf); + + return mp; +} diff --git a/tests/test-lib.h b/tests/test-lib.h new file mode 100644 index 0000000..7643ab6 --- /dev/null +++ b/tests/test-lib.h @@ -0,0 +1,68 @@ +#ifndef __LIB_H +#define __LIB_H + +extern const int default_mask; +extern const char default_devnode[]; +extern const char default_wwid[]; +extern const char default_wwid_1[]; + +enum { + BL_BY_DEVNODE = (1 << 0), + BL_BY_DEVICE = (1 << 1), + BL_BY_WWID = (1 << 2), + BL_BY_PROPERTY = (1 << 3), + BL_MASK = BL_BY_DEVNODE|BL_BY_DEVICE|BL_BY_WWID|BL_BY_PROPERTY, + NEED_SELECT_PRIO = (1 << 8), + NEED_FD = (1 << 9), + USE_GETUID = (1 << 10), + DEV_HIDDEN = (1 << 11), +}; + +struct mocked_path { + const char *vendor; + const char *product; + const char *rev; + const char *wwid; + const char *devnode; + unsigned int flags; +}; + +struct mocked_path *fill_mocked_path(struct mocked_path *mp, + const char *vendor, + const char *product, + const char *rev, + const char *wwid, + const char *devnode, + unsigned int flags); + +struct mocked_path *mocked_path_from_path(struct mocked_path *mp, + const struct path *pp); + +void mock_pathinfo(int mask, const struct mocked_path *mp); +void mock_store_pathinfo(int mask, const struct mocked_path *mp); +struct path *__mock_path(vector pathvec, + const char *vnd, const char *prd, + const char *rev, const char *wwid, + const char *dev, + unsigned int flags, int mask); + +#define mock_path(v, p) \ + __mock_path(hwt->vecs->pathvec, (v), (p), "0", NULL, NULL, \ + 0, default_mask) +#define mock_path_flags(v, p, f) \ + __mock_path(hwt->vecs->pathvec, (v), (p), "0", NULL, NULL, \ + (f), default_mask) +#define mock_path_blacklisted(v, p) \ + __mock_path(hwt->vecs->pathvec, (v), (p), "0", NULL, NULL, \ + BL_BY_DEVICE, default_mask) +#define mock_path_wwid(v, p, w) \ + __mock_path(hwt->vecs->pathvec, (v), (p), "0", (w), NULL, \ + 0, default_mask) +#define mock_path_wwid_flags(v, p, w, f) \ + __mock_path(hwt->vecs->pathvec, (v), (p), "0", (w), \ + NULL, (f), default_mask) + +struct multipath *__mock_multipath(struct vectors *vecs, struct path *pp); +#define mock_multipath(pp) __mock_multipath(hwt->vecs, (pp)) + +#endif diff --git a/tests/test-log.c b/tests/test-log.c new file mode 100644 index 0000000..d685d58 --- /dev/null +++ b/tests/test-log.c @@ -0,0 +1,27 @@ +#include +#include +#include +#include +#include +#include "log.h" +#include "test-log.h" + +__attribute__((format(printf, 3, 0))) +void __wrap_dlog (int sink, int prio, const char * fmt, ...) +{ + char buff[MAX_MSG_SIZE]; + va_list ap; + + assert_int_equal(prio, mock_type(int)); + va_start(ap, fmt); + vsnprintf(buff, MAX_MSG_SIZE, fmt, ap); + va_end(ap); + assert_string_equal(buff, mock_ptr_type(char *)); +} + +void expect_condlog(int prio, char *string) +{ + will_return(__wrap_dlog, prio); + will_return(__wrap_dlog, string); +} + diff --git a/tests/test-log.h b/tests/test-log.h new file mode 100644 index 0000000..2c878c6 --- /dev/null +++ b/tests/test-log.h @@ -0,0 +1,7 @@ +#ifndef _TEST_LOG_H +#define _TEST_LOG_H + +void __wrap_dlog (int sink, int prio, const char * fmt, ...); +void expect_condlog(int prio, char *string); + +#endif diff --git a/tests/uevent.c b/tests/uevent.c new file mode 100644 index 0000000..f4afd9b --- /dev/null +++ b/tests/uevent.c @@ -0,0 +1,331 @@ +/* + * Copyright (c) 2018 SUSE Linux GmbH + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + * + */ + +#include +#include +#include +#include +#include +#include +#include "list.h" +#include "uevent.h" + +#include "globals.c" + +/* Private prototypes missing in uevent.h */ +struct uevent * alloc_uevent(void); +void uevent_get_wwid(struct uevent *uev); + +/* Stringify helpers */ +#define _str_(x) #x +#define str(x) _str_(x) + +#define MAJOR 17 +#define MINOR 217 +#define DISK_RO 0 +#define DM_NAME "spam" +#define WWID "foo" + +static int setup_uev(void **state) +{ + static char test_uid_attrs[] = + "dasd:ID_SPAM sd:ID_BOGUS nvme:ID_EGGS "; + + struct uevent *uev = alloc_uevent(); + struct config *conf; + + if (uev == NULL) + return -1; + + *state = uev; + uev->kernel = "sdo"; + uev->envp[0] = "MAJOR=" str(MAJOR); + uev->envp[1] = "ID_SPAM=nonsense"; + uev->envp[1] = "ID_BOGUS=" WWID; + uev->envp[2] = "MINOR=" str(MINOR); + uev->envp[3] = "DM_NAME=" DM_NAME; + uev->envp[4] = "DISK_RO=" str(DISK_RO); + uev->envp[5] = NULL; + + conf = get_multipath_config(); + parse_uid_attrs(test_uid_attrs, conf); + put_multipath_config(conf); + return 0; +} + +static int teardown(void **state) +{ + free(*state); + return 0; +} + +static void test_major_good(void **state) +{ + struct uevent *uev = *state; + + assert_int_equal(uevent_get_major(uev), MAJOR); +} + +static void test_minor_good(void **state) +{ + struct uevent *uev = *state; + + assert_int_equal(uevent_get_minor(uev), MINOR); +} + +static void test_ro_good(void **state) +{ + struct uevent *uev = *state; + + assert_int_equal(uevent_get_disk_ro(uev), DISK_RO); +} + +static void test_uid_attrs(void **state) +{ + /* see test_uid_attrs above */ + struct config *conf = get_multipath_config(); + vector attrs = &conf->uid_attrs; + + assert_int_equal(VECTOR_SIZE(attrs), 3); + assert_null(get_uid_attribute_by_attrs(conf, "hda")); + assert_string_equal("ID_BOGUS", + get_uid_attribute_by_attrs(conf, "sdaw")); + assert_string_equal("ID_SPAM", + get_uid_attribute_by_attrs(conf, "dasdu")); + assert_string_equal("ID_EGGS", + get_uid_attribute_by_attrs(conf, "nvme2n4")); + put_multipath_config(conf); +} + +static void test_wwid(void **state) +{ + struct uevent *uev = *state; + uevent_get_wwid(uev); + + assert_string_equal(uev->wwid, WWID); +} + +static void test_major_bad_0(void **state) +{ + struct uevent *uev = *state; + + uev->envp[0] = "MAJOR" str(MAJOR); + assert_int_equal(uevent_get_major(uev), -1); +} + +static void test_major_bad_1(void **state) +{ + struct uevent *uev = *state; + + uev->envp[0] = "MAJOr=" str(MAJOR); + assert_int_equal(uevent_get_major(uev), -1); +} + +static void test_major_bad_2(void **state) +{ + struct uevent *uev = *state; + + uev->envp[0] = "MAJORIE=" str(MAJOR); + assert_int_equal(uevent_get_major(uev), -1); +} + +static void test_major_bad_3(void **state) +{ + struct uevent *uev = *state; + + uev->envp[0] = "MAJOR=max"; + assert_int_equal(uevent_get_major(uev), -1); +} + +static void test_major_bad_4(void **state) +{ + struct uevent *uev = *state; + + uev->envp[0] = "MAJOR=0x10"; + assert_int_equal(uevent_get_major(uev), -1); +} + +static void test_major_bad_5(void **state) +{ + struct uevent *uev = *state; + + uev->envp[0] = "MAJO=" str(MAJOR); + assert_int_equal(uevent_get_major(uev), -1); +} + +static void test_major_bad_6(void **state) +{ + struct uevent *uev = *state; + + uev->envp[0] = "MAJOR=" str(-MAJOR); + assert_int_equal(uevent_get_major(uev), -1); +} + +static void test_major_bad_7(void **state) +{ + struct uevent *uev = *state; + + uev->envp[0] = "MAJOR="; + assert_int_equal(uevent_get_major(uev), -1); +} + +static void test_major_bad_8(void **state) +{ + struct uevent *uev = *state; + + uev->envp[0] = "MAJOR"; + assert_int_equal(uevent_get_major(uev), -1); +} + +static void test_dm_name_good(void **state) +{ + struct uevent *uev = *state; + char *name = uevent_get_dm_name(uev); + + assert_string_equal(name, DM_NAME); + FREE(name); +} + +static void test_dm_name_bad_0(void **state) +{ + struct uevent *uev = *state; + char *name; + + uev->envp[3] = "DM_NAME" DM_NAME; + name = uevent_get_dm_name(uev); + assert_ptr_equal(name, NULL); + FREE(name); +} + +static void test_dm_name_bad_1(void **state) +{ + struct uevent *uev = *state; + char *name; + + uev->envp[3] = "DM_NAMES=" DM_NAME; + name = uevent_get_dm_name(uev); + assert_ptr_equal(name, NULL); + FREE(name); +} + +static void test_dm_name_good_1(void **state) +{ + struct uevent *uev = *state; + char *name; + + /* Note we change index 2 here */ + uev->envp[2] = "DM_NAME=" DM_NAME; + name = uevent_get_dm_name(uev); + assert_string_equal(name, DM_NAME); + FREE(name); +} + +static void test_dm_uuid_false_0(void **state) +{ + struct uevent *uev = *state; + + assert_false(uevent_is_mpath(uev)); +} + +static void test_dm_uuid_true_0(void **state) +{ + struct uevent *uev = *state; + + uev->envp[3] = "DM_UUID=mpath-foo"; + assert_true(uevent_is_mpath(uev)); +} + +static void test_dm_uuid_false_1(void **state) +{ + struct uevent *uev = *state; + + uev->envp[3] = "DM_UUID.mpath-foo"; + assert_false(uevent_is_mpath(uev)); +} + +static void test_dm_uuid_false_2(void **state) +{ + struct uevent *uev = *state; + + uev->envp[3] = "DM_UUID=mpath-"; + assert_false(uevent_is_mpath(uev)); +} + +static void test_dm_uuid_false_3(void **state) +{ + struct uevent *uev = *state; + + uev->envp[3] = "DM_UU=mpath-foo"; + assert_false(uevent_is_mpath(uev)); +} + +static void test_dm_uuid_false_4(void **state) +{ + struct uevent *uev = *state; + + uev->envp[3] = "DM_UUID=mpathfoo"; + assert_false(uevent_is_mpath(uev)); +} + +static void test_dm_uuid_false_5(void **state) +{ + struct uevent *uev = *state; + + uev->envp[3] = "DM_UUID="; + assert_false(uevent_is_mpath(uev)); +} + +int test_uevent_get_XXX(void) +{ + const struct CMUnitTest tests[] = { + cmocka_unit_test(test_major_good), + cmocka_unit_test(test_minor_good), + cmocka_unit_test(test_ro_good), + cmocka_unit_test(test_dm_name_good), + cmocka_unit_test(test_uid_attrs), + cmocka_unit_test(test_wwid), + cmocka_unit_test(test_major_bad_0), + cmocka_unit_test(test_major_bad_1), + cmocka_unit_test(test_major_bad_2), + cmocka_unit_test(test_major_bad_3), + cmocka_unit_test(test_major_bad_4), + cmocka_unit_test(test_major_bad_5), + cmocka_unit_test(test_major_bad_6), + cmocka_unit_test(test_major_bad_7), + cmocka_unit_test(test_major_bad_8), + cmocka_unit_test(test_dm_name_bad_0), + cmocka_unit_test(test_dm_name_bad_1), + cmocka_unit_test(test_dm_name_good_1), + cmocka_unit_test(test_dm_uuid_false_0), + cmocka_unit_test(test_dm_uuid_true_0), + cmocka_unit_test(test_dm_uuid_false_1), + cmocka_unit_test(test_dm_uuid_false_2), + cmocka_unit_test(test_dm_uuid_false_3), + cmocka_unit_test(test_dm_uuid_false_4), + cmocka_unit_test(test_dm_uuid_false_5), + }; + return cmocka_run_group_tests(tests, setup_uev, teardown); +} + +int main(void) +{ + int ret = 0; + + ret += test_uevent_get_XXX(); + return ret; +} diff --git a/tests/unaligned.c b/tests/unaligned.c new file mode 100644 index 0000000..7ece1de --- /dev/null +++ b/tests/unaligned.c @@ -0,0 +1,96 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "unaligned.h" + +#define SIZE 16 +static const char memory[8] = { + 0x01, 0x23, 0x45, 0x67, 0x89, 0xab, 0xcd, 0xef +}; + +static const uint64_t intval64 = 0x0123456789abcdef; +static const uint32_t intval32 = 0x01234567; +static const uint16_t intval16 = 0x0123; + +#include "globals.c" + +static int setup(void **state) +{ + return posix_memalign(state, 16, 2 * SIZE); +} + +static int teardown(void **state) +{ + free(*state); + return 0; +} + + +#define make_test(bits, offset) \ + static void test_ ## bits ## _ ## offset(void **state) \ +{ \ + int len = bits/8; \ + uint8_t *c = *state; \ + uint8_t *p = *state + SIZE; \ + uint64_t u; \ + \ + assert_in_range(len, 1, SIZE); \ + assert_in_range(offset + len, 1, SIZE); \ + memset(c, 0, 2 * SIZE); \ + memcpy(c + offset, memory, len); \ + \ + u = get_unaligned_be##bits(c + offset); \ + assert_int_equal(u, intval##bits); \ + put_unaligned_be##bits(u, p + offset); \ + assert_memory_equal(c + offset, p + offset, len); \ +} + +make_test(16, 0); +make_test(16, 1); +make_test(32, 0); +make_test(32, 1); +make_test(32, 2); +make_test(32, 3); +make_test(64, 0); +make_test(64, 1); +make_test(64, 2); +make_test(64, 3); +make_test(64, 4); +make_test(64, 5); +make_test(64, 6); +make_test(64, 7); + +int test_unaligned(void) +{ + const struct CMUnitTest tests[] = { + cmocka_unit_test(test_16_0), + cmocka_unit_test(test_16_1), + cmocka_unit_test(test_32_0), + cmocka_unit_test(test_32_1), + cmocka_unit_test(test_32_2), + cmocka_unit_test(test_32_3), + cmocka_unit_test(test_64_0), + cmocka_unit_test(test_64_1), + cmocka_unit_test(test_64_2), + cmocka_unit_test(test_64_3), + cmocka_unit_test(test_64_4), + cmocka_unit_test(test_64_5), + cmocka_unit_test(test_64_6), + cmocka_unit_test(test_64_7), + }; + return cmocka_run_group_tests(tests, setup, teardown); +} + +int main(void) +{ + int ret = 0; + + ret += test_unaligned(); + return ret; +} diff --git a/tests/util.c b/tests/util.c new file mode 100644 index 0000000..7c486fc --- /dev/null +++ b/tests/util.c @@ -0,0 +1,411 @@ +/* + * Copyright (c) 2018 Benjamin Marzinski, Redhat + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + * + */ + +#include +#include +#include +#include +#include +#include +#include "util.h" + +#include "globals.c" + +#define BITARR_SZ 4 + +static void test_basenamecpy_good0(void **state) +{ + char dst[10]; + + assert_int_equal(basenamecpy("foobar", dst, sizeof(dst)), 6); + assert_string_equal(dst, "foobar"); +} + +static void test_basenamecpy_good1(void **state) +{ + char dst[10]; + + assert_int_equal(basenamecpy("foo/bar", dst, sizeof(dst)), 3); + assert_string_equal(dst, "bar"); +} + +static void test_basenamecpy_good2(void **state) +{ + char dst[10]; + + assert_int_equal(basenamecpy("/thud/blat", dst, sizeof(dst)), 4); + assert_string_equal(dst, "blat"); +} + +static void test_basenamecpy_good3(void **state) +{ + char dst[4]; + + assert_int_equal(basenamecpy("foo/bar", dst, sizeof(dst)), 3); + assert_string_equal(dst, "bar"); +} + +static void test_basenamecpy_good4(void **state) +{ + char dst[10]; + + assert_int_equal(basenamecpy("/xyzzy", dst, sizeof(dst)), 5); + assert_string_equal(dst, "xyzzy"); +} + +static void test_basenamecpy_good5(void **state) +{ + char dst[4]; + + assert_int_equal(basenamecpy("/foo/bar\n", dst, sizeof(dst)), 3); + assert_string_equal(dst, "bar"); +} + +/* multipath expects any trailing whitespace to be stripped off the basename, + * so that it will match pp->dev */ +static void test_basenamecpy_good6(void **state) +{ + char dst[6]; + + assert_int_equal(basenamecpy("/xyzzy/plugh ", dst, sizeof(dst)), 5); + assert_string_equal(dst, "plugh"); +} + +static void test_basenamecpy_good7(void **state) +{ + char src[] = "/foo/bar"; + char dst[10]; + + assert_int_equal(basenamecpy(src, dst, sizeof(dst)), 3); + + strcpy(src, "badbadno"); + assert_string_equal(dst, "bar"); +} + +/* buffer too small */ +static void test_basenamecpy_bad0(void **state) +{ + char dst[3]; + + assert_int_equal(basenamecpy("baz", dst, sizeof(dst)), 0); +} + +/* ends in slash */ +static void test_basenamecpy_bad1(void **state) +{ + char dst[10]; + + assert_int_equal(basenamecpy("foo/bar/", dst, sizeof(dst)), 0); +} + +static void test_basenamecpy_bad2(void **state) +{ + char dst[10]; + + assert_int_equal(basenamecpy(NULL, dst, sizeof(dst)), 0); +} + +static void test_basenamecpy_bad3(void **state) +{ + char dst[10]; + + assert_int_equal(basenamecpy("", dst, sizeof(dst)), 0); +} + +static void test_basenamecpy_bad4(void **state) +{ + char dst[10]; + + assert_int_equal(basenamecpy("/", dst, sizeof(dst)), 0); +} + +static void test_basenamecpy_bad5(void **state) +{ + char dst[10]; + + assert_int_equal(basenamecpy("baz/qux", NULL, sizeof(dst)), 0); +} + +static void test_bitmask_1(void **state) +{ + uint64_t arr[BITARR_SZ]; + int i, j, k, m, b; + + memset(arr, 0, sizeof(arr)); + + for (j = 0; j < BITARR_SZ; j++) { + for (i = 0; i < 64; i++) { + b = 64 * j + i; + assert(!is_bit_set_in_array(b, arr)); + set_bit_in_array(b, arr); + for (k = 0; k < BITARR_SZ; k++) { + printf("b = %d j = %d k = %d a = %"PRIx64"\n", + b, j, k, arr[k]); + if (k == j) + assert_int_equal(arr[j], 1ULL << i); + else + assert_int_equal(arr[k], 0ULL); + } + for (m = 0; m < 64; m++) + if (i == m) + assert(is_bit_set_in_array(64 * j + m, + arr)); + else + assert(!is_bit_set_in_array(64 * j + m, + arr)); + clear_bit_in_array(b, arr); + assert(!is_bit_set_in_array(b, arr)); + for (k = 0; k < BITARR_SZ; k++) + assert_int_equal(arr[k], 0ULL); + } + } +} + +static void test_bitmask_2(void **state) +{ + uint64_t arr[BITARR_SZ]; + int i, j, k, m, b; + + memset(arr, 0, sizeof(arr)); + + for (j = 0; j < BITARR_SZ; j++) { + for (i = 0; i < 64; i++) { + b = 64 * j + i; + assert(!is_bit_set_in_array(b, arr)); + set_bit_in_array(b, arr); + for (m = 0; m < 64; m++) + if (m <= i) + assert(is_bit_set_in_array(64 * j + m, + arr)); + else + assert(!is_bit_set_in_array(64 * j + m, + arr)); + assert(is_bit_set_in_array(b, arr)); + for (k = 0; k < BITARR_SZ; k++) { + if (k < j || (k == j && i == 63)) + assert_int_equal(arr[k], ~0ULL); + else if (k > j) + assert_int_equal(arr[k], 0ULL); + else + assert_int_equal( + arr[k], + (1ULL << (i + 1)) - 1); + } + } + } + for (j = 0; j < BITARR_SZ; j++) { + for (i = 0; i < 64; i++) { + b = 64 * j + i; + assert(is_bit_set_in_array(b, arr)); + clear_bit_in_array(b, arr); + for (m = 0; m < 64; m++) + if (m <= i) + assert(!is_bit_set_in_array(64 * j + m, + arr)); + else + assert(is_bit_set_in_array(64 * j + m, + arr)); + assert(!is_bit_set_in_array(b, arr)); + for (k = 0; k < BITARR_SZ; k++) { + if (k < j || (k == j && i == 63)) + assert_int_equal(arr[k], 0ULL); + else if (k > j) + assert_int_equal(arr[k], ~0ULL); + else + assert_int_equal( + arr[k], + ~((1ULL << (i + 1)) - 1)); + } + } + } +} + +int test_basenamecpy(void) +{ + const struct CMUnitTest tests[] = { + cmocka_unit_test(test_basenamecpy_good0), + cmocka_unit_test(test_basenamecpy_good1), + cmocka_unit_test(test_basenamecpy_good2), + cmocka_unit_test(test_basenamecpy_good3), + cmocka_unit_test(test_basenamecpy_good4), + cmocka_unit_test(test_basenamecpy_good5), + cmocka_unit_test(test_basenamecpy_good6), + cmocka_unit_test(test_basenamecpy_good7), + cmocka_unit_test(test_basenamecpy_bad0), + cmocka_unit_test(test_basenamecpy_bad1), + cmocka_unit_test(test_basenamecpy_bad2), + cmocka_unit_test(test_basenamecpy_bad3), + cmocka_unit_test(test_basenamecpy_bad4), + cmocka_unit_test(test_basenamecpy_bad5), + cmocka_unit_test(test_bitmask_1), + cmocka_unit_test(test_bitmask_2), + }; + return cmocka_run_group_tests(tests, NULL, NULL); +} + +static const char src_str[] = "Hello"; + +/* strlcpy with length 0 */ +static void test_strlcpy_0(void **state) +{ + char tst[] = "word"; + int rc; + + rc = strlcpy(tst, src_str, 0); + assert_int_equal(rc, strlen(src_str)); + assert_string_equal(tst, "word"); +} + +/* strlcpy with length 1 */ +static void test_strlcpy_1(void **state) +{ + char tst[] = "word"; + int rc; + + rc = strlcpy(tst, src_str, 1); + assert_int_equal(rc, strlen(src_str)); + assert_int_equal(tst[0], '\0'); + assert_string_equal(tst + 1, "ord"); +} + +/* strlcpy with length 2 */ +static void test_strlcpy_2(void **state) +{ + char tst[] = "word"; + int rc; + + rc = strlcpy(tst, src_str, 2); + assert_int_equal(rc, strlen(src_str)); + assert_int_equal(tst[0], src_str[0]); + assert_int_equal(tst[1], '\0'); + assert_string_equal(tst + 2, "rd"); +} + +/* strlcpy with dst length < src length */ +static void test_strlcpy_3(void **state) +{ + char tst[] = "word"; + int rc; + + rc = strlcpy(tst, src_str, sizeof(tst)); + assert_int_equal(rc, strlen(src_str)); + assert_int_equal(sizeof(tst) - 1, strlen(tst)); + assert_true(strncmp(tst, src_str, sizeof(tst) - 1) == 0); +} + +/* strlcpy with dst length > src length */ +static void test_strlcpy_4(void **state) +{ + static const char old[] = "0123456789"; + char *tst; + int rc; + + tst = strdup(old); + rc = strlcpy(tst, src_str, sizeof(old)); + assert_int_equal(rc, strlen(src_str)); + assert_string_equal(src_str, tst); + assert_string_equal(tst + sizeof(src_str), old + sizeof(src_str)); + free(tst); +} + +/* strlcpy with dst length = src length, dst not terminated */ +static void test_strlcpy_5(void **state) +{ + char *tst; + int rc; + const int sz = sizeof(src_str); + + tst = malloc(sz); + memset(tst, 'f', sizeof(src_str)); + + rc = strlcpy(tst, src_str, sz); + assert_int_equal(rc, strlen(src_str)); + assert_string_equal(src_str, tst); + + free(tst); +} + +/* strlcpy with dst length > src length, dst not terminated */ +static void test_strlcpy_6(void **state) +{ + char *tst; + int rc; + const int sz = sizeof(src_str); + + tst = malloc(sz + 2); + memset(tst, 'f', sz + 2); + + rc = strlcpy(tst, src_str, sz + 2); + assert_int_equal(rc, strlen(src_str)); + assert_string_equal(src_str, tst); + assert_int_equal(tst[sz], 'f'); + assert_int_equal(tst[sz + 1], 'f'); + + free(tst); +} + +/* strlcpy with empty src */ +static void test_strlcpy_7(void **state) +{ + char tst[] = "word"; + static const char empty[] = ""; + int rc; + + rc = strlcpy(tst, empty, sizeof(tst)); + assert_int_equal(rc, strlen(empty)); + assert_string_equal(empty, tst); + assert_string_equal(tst + 1, "ord"); +} + +/* strlcpy with empty src, length 0 */ +static void test_strlcpy_8(void **state) +{ + char tst[] = "word"; + static const char empty[] = ""; + int rc; + + rc = strlcpy(tst, empty, 0); + assert_int_equal(rc, strlen(empty)); + assert_string_equal("word", tst); +} + +static int test_strlcpy(void) +{ + const struct CMUnitTest tests[] = { + cmocka_unit_test(test_strlcpy_0), + cmocka_unit_test(test_strlcpy_1), + cmocka_unit_test(test_strlcpy_2), + cmocka_unit_test(test_strlcpy_3), + cmocka_unit_test(test_strlcpy_4), + cmocka_unit_test(test_strlcpy_5), + cmocka_unit_test(test_strlcpy_6), + cmocka_unit_test(test_strlcpy_7), + cmocka_unit_test(test_strlcpy_8), + }; + + return cmocka_run_group_tests(tests, NULL, NULL); +} + +int main(void) +{ + int ret = 0; + + ret += test_basenamecpy(); + ret += test_strlcpy(); + return ret; +} diff --git a/tests/vpd.c b/tests/vpd.c new file mode 100644 index 0000000..3cbad81 --- /dev/null +++ b/tests/vpd.c @@ -0,0 +1,806 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* Copyright (c) 2019 Martin Wilck, SUSE Linux GmbH, Nuremberg */ + +#define _GNU_SOURCE +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "unaligned.h" +#include "debug.h" +#include "vector.h" +#include "structs.h" +#include "discovery.h" +#include "globals.c" + +#define VPD_BUFSIZ 4096 + +struct vpdtest { + unsigned char vpdbuf[VPD_BUFSIZ]; + char wwid[WWID_SIZE]; +}; + +static int setup(void **state) +{ + struct vpdtest *vt = malloc(sizeof(*vt)); + + if (vt == NULL) + return -1; + *state = vt; + return 0; +} + +static int teardown(void **state) +{ + struct vpdtest *vt = *state; + + free(vt); + *state = NULL; + return 0; +} + +/* vendor_id should have less than 8 chars to test space handling */ +static const char vendor_id[] = "Linux"; +static const char test_id[] = + "A123456789AbcDefB123456789AbcDefC123456789AbcDefD123456789AbcDef"; + +int __wrap_ioctl(int fd, unsigned long request, void *param) +{ + int len; + struct sg_io_hdr *io_hdr; + unsigned char *val; + + len = mock(); + io_hdr = (struct sg_io_hdr *)param; + assert_in_range(len, 0, io_hdr->dxfer_len); + val = mock_ptr_type(unsigned char *); + io_hdr->status = 0; + memcpy(io_hdr->dxferp, val, len); + return 0; +} + + +/** + * create_vpd80() - create a "unit serial number" VPD page. + * @buf: VPD buffer + * @bufsiz: length of VPD buffer + * @id: input ID + * @size: value for the "page length" field + * @len: actual number of characters to use from @id + * + * If len < size, the content will be right aligned, as mandated by the + * SPC spec. + * + * Return: VPD length. + */ +static int create_vpd80(unsigned char *buf, size_t bufsiz, const char *id, + int size, int len) +{ + assert_true(len <= size); + + memset(buf, 0, bufsiz); + buf[1] = 0x80; + put_unaligned_be16(size, buf + 2); + + memset(buf + 4, ' ', size - len); + memcpy(buf + 4 + size - len, id, len); + return size + 4; +} + +static int _hex2bin(const char hx) +{ + assert_true(isxdigit(hx)); + if (hx >= '0' && hx <= '9') + return hx - '0'; + if (hx >= 'a' && hx <= 'f') + return hx - 'a' + 10; + if (hx >= 'A' && hx <= 'F') + return hx - 'A' + 10; + return -1; +} + +static void hex2bin(unsigned char *dst, const char *src, + size_t dstlen, size_t srclen) +{ + const char *sc; + unsigned char *ds; + + assert(srclen % 2 == 0); + for (sc = src, ds = dst; + sc < src + srclen && ds < dst + dstlen; + sc += 2, ++ds) + *ds = 16 * _hex2bin(sc[0]) + _hex2bin(sc[1]); +} + +/** + * create_t10_vendor_id_desc() - Create T10 vendor ID + * @desc: descriptor buffer + * @id: input ID + * @n: number of characters to use for ID + * (includes 8 bytes for vendor ID!) + * + * Create a "T10 vendor specific ID" designation descriptor. + * The vendor field (8 bytes) is filled with vendor_id (above). + * + * Return: descriptor length. + */ +static int create_t10_vendor_id_desc(unsigned char *desc, + const char *id, size_t n) +{ + int vnd_len = sizeof(vendor_id) - 1; + + /* code set: ascii */ + desc[0] = 2; + /* type: 10 vendor ID */ + desc[1] = 1; + desc[2] = 0; + desc[3] = n; + + memcpy(desc + 4, (const unsigned char *)vendor_id, vnd_len); + memset(desc + 4 + vnd_len, ' ', 8 - vnd_len); + memcpy(desc + 4 + 8, (const unsigned char *)id, n - 8); + + return n + 4; +} + +/** + * create_eui64_desc() - create EUI64 descriptor. + * @desc, @id: see above. + * @n: number of bytes (8, 12, or 16). + * + * Create an EUI64 designation descriptor. + * + * Return: descriptor length. + */ +static int create_eui64_desc(unsigned char *desc, + const char *id, size_t n) +{ + assert_true(n == 8 || n == 12 || n == 16); + + /* code set: binary */ + desc[0] = 1; + /* type: EUI64 */ + desc[1] = 2; + desc[2] = 0; + desc[3] = n; + + hex2bin(desc + 4, id, n, 2 * n); + return n + 4; +} + +/** + * create_naa_desc() - create an NAA designation descriptor + * @desc, @id: see above. + * @naa: Name Address Authority field (2, 3, 5, or 6). + * + * Return: descriptor length. + */ +static int create_naa_desc(unsigned char *desc, + const char *id, int naa) +{ + assert_true(naa == 2 || naa == 3 || naa == 5 || naa == 6); + + /* code set: binary */ + desc[0] = 1; + /* type: NAA */ + desc[1] = 3; + desc[2] = 0; + desc[4] = _hex2bin(id[0]) | (naa << 4); + switch (naa) { + case 2: + case 3: + case 5: + hex2bin(desc + 5, id + 1, 7, 14); + desc[3] = 8; + return 12; + case 6: + hex2bin(desc + 5, id + 1, 15, 30); + desc[3] = 16; + return 20; + default: + return 0; + } +} + +/* type and flags for SCSI name string designation descriptor */ +enum { + STR_EUI = 0, + STR_NAA, + STR_IQN, + STR_MASK = 0xf, + ZERO_LAST = 0x10, /* flag to zero out some bytes at string end */ +}; + +static const char * const str_prefix[] = { + [STR_EUI] = "eui.", + [STR_NAA] = "naa.", + [STR_IQN] = "iqn.", +}; + +static const char byte0[] = { + [STR_EUI] = '2', + [STR_NAA] = '3', + [STR_IQN] = '8', +}; + +/** + * create_scsi_string_desc() - create a SCSI name string descriptor. + * @desc, @id: see above. + * @typ: one of STR_EUI, STR_NAA, STR_IQN, possibly ORd with ZERO_LAST + * @maxlen: number of characters to use from input ID. + * + * If ZERO_LAST is set, zero out the last byte. + * + * Return: descriptor length. + */ +static int create_scsi_string_desc(unsigned char *desc, + const char *id, int typ, int maxlen) +{ + int len, plen; + int type = typ & STR_MASK; + + /* code set: UTF-8 */ + desc[0] = 3; + /* type: SCSI string */ + desc[1] = 8; + desc[2] = 0; + + assert_in_range(type, STR_EUI, STR_IQN); + assert_true(maxlen % 4 == 0); + len = snprintf((char *)(desc + 4), maxlen, "%s%s", + str_prefix[type], id); + if (len > maxlen) + len = maxlen; + /* zero-pad */ + if (typ & ZERO_LAST) + len -= 2; + plen = 4 * ((len - 1) / 4) + 4; + memset(desc + 4 + len, '\0', plen - len); + desc[3] = plen; + return plen + 4; +} + +/** + * create_vpd83() - create "device identification" VPD page + * @buf, @bufsiz, @id: see above. + * @type: descriptor type to use (1, 2, 3, 8) + * @parm: opaque parameter (e.g. means "naa" for NAA type) + * @len: designator length (exact meaning depends on designator type) + * + * Create a "device identification" VPD page with a single + * designation descriptor. + * + * Return: VPD page length. + */ +static int create_vpd83(unsigned char *buf, size_t bufsiz, const char *id, + uint8_t type, int parm, int len) +{ + unsigned char *desc; + int n = 0; + + memset(buf, 0, bufsiz); + buf[1] = 0x83; + + desc = buf + 4; + switch (type) { + case 1: + n = create_t10_vendor_id_desc(desc, id, len); + break; + case 2: + n = create_eui64_desc(desc, id, len); + break; + case 3: + n = create_naa_desc(desc, id, parm); + break; + case 8: + n = create_scsi_string_desc(desc, id, parm, len); + break; + default: + break; + } + put_unaligned_be16(n, buf + 2); + return n + 4; +} + +/** + * assert_correct_wwid() - test that a retrieved WWID matches expectations + * @test: test name + * @expected: expected WWID length + * @returned: WWID length as returned by code under test + * @byte0, @byte1: leading chars that our code prepends to the ID + * (e.g. "36" for "NAA registered extended" type) + * @lowercase: set if lower case WWID is expected + * @orig: original ID string, may be longer than wwid + * @wwid: WWID as returned by code under test + */ +static void assert_correct_wwid(const char *test, + int expected, int returned, + int byte0, int byte1, bool lowercase, + const char *orig, + const char *wwid) +{ + int ofs = 0, i; + + condlog(2, "%s: exp/ret: %d/%d, wwid: %s", test, + expected, returned, wwid); + /* + * byte0 and byte1 are the leading chars that our code prepends + * to the ID to indicate the designation descriptor type, . + */ + if (byte0 != 0) { + assert_int_equal(byte0, wwid[0]); + ++ofs; + } + if (byte1 != 0) { + assert_int_equal(byte1, wwid[1]); + ++ofs; + } + /* check matching length, and length of WWID string */ + assert_int_equal(expected, returned); + assert_int_equal(returned, strlen(wwid)); + /* check expected string value up to expected length */ + for (i = 0; i < returned - ofs; i++) + assert_int_equal(wwid[ofs + i], + lowercase ? tolower(orig[i]) : orig[i]); +} + +/* + * For T10 vendor ID - replace sequences of spaces with a single underscore. + * Use a different implementation then libmultipath, deliberately. + */ +static char *subst_spaces(const char *src) +{ + char *dst = calloc(1, strlen(src) + 1); + char *p; + static regex_t *re; + regmatch_t match; + int rc; + + assert_non_null(dst); + if (re == NULL) { + re = calloc(1, sizeof(*re)); + assert_non_null(re); + rc = regcomp(re, " +", REG_EXTENDED); + assert_int_equal(rc, 0); + } + + for (rc = regexec(re, src, 1, &match, 0), p = dst; + rc == 0; + src += match.rm_eo, rc = regexec(re, src, 1, &match, 0)) { + memcpy(p, src, match.rm_so); + p += match.rm_so; + *p = '_'; + ++p; + } + assert_int_equal(rc, REG_NOMATCH); + strcpy(p, src); + return dst; +} + +/** + * test_vpd_vnd_LEN_WLEN() - test code for VPD 83, T10 vendor ID + * @LEN: ID length in the VPD page (includes 8 byte vendor ID) + * @WLEN: WWID buffer size + * + * The input ID is modified by inserting some spaces, to be able to + * test the handling of spaces by the code. This is relevant only for + * a minimum input length of 24. + * The expected result must be adjusted accordingly. + */ +#define make_test_vpd_vnd(len, wlen) \ +static void test_vpd_vnd_ ## len ## _ ## wlen(void **state) \ +{ \ + struct vpdtest *vt = *state; \ + int n, ret, rc; \ + int exp_len; \ + char *exp_wwid, *exp_subst, *input; \ + \ + input = strdup(test_id); \ + /* 8 vendor bytes collapsed to actual vendor ID length + 1 */ \ + /* and one '1' prepended */ \ + exp_len = len - 8 + sizeof(vendor_id) + 1; \ + \ + /* insert some spaces to test space collapsing */ \ + input[15] = input[17] = input[18] = ' '; \ + /* adjust expectation for space treatment */ \ + /* drop char for 2nd space on offset 17/18 */ \ + if (len >= 18 + 9) \ + --exp_len; \ + /* drop trailing single '_' if input ends with space */ \ + if (len == 15 + 9 || len == 17 + 9 || len == 18 + 9) \ + --exp_len; \ + if (exp_len >= wlen) \ + exp_len = wlen - 1; \ + n = create_vpd83(vt->vpdbuf, sizeof(vt->vpdbuf), input, \ + 1, 0, len); \ + rc = asprintf(&exp_wwid, "%s_%s", vendor_id, input); \ + assert_int_not_equal(rc, -1); \ + free(input); \ + /* Replace spaces, like code under test */ \ + exp_subst = subst_spaces(exp_wwid); \ + free(exp_wwid); \ + will_return(__wrap_ioctl, n); \ + will_return(__wrap_ioctl, vt->vpdbuf); \ + ret = get_vpd_sgio(10, 0x83, 0, vt->wwid, wlen); \ + assert_correct_wwid("test_vpd_vnd_" #len "_" #wlen, \ + exp_len, ret, '1', 0, false, \ + exp_subst, vt->wwid); \ + free(exp_subst); \ +} + +/** + * test_vpd_str_TYP_LEN_WLEN() - test code for VPD 83, SCSI name string + * @TYP: numeric value of STR_EUI, STR_NAA, STR_IQN above + * @LEN: ID length the VPD page + * @WLEN: WWID buffer size + */ +#define make_test_vpd_str(typ, len, wlen) \ +static void test_vpd_str_ ## typ ## _ ## len ## _ ## wlen(void **state) \ +{ \ + struct vpdtest *vt = *state; \ + int n, ret; \ + int exp_len; \ + int type = typ & STR_MASK; \ + \ + n = create_vpd83(vt->vpdbuf, sizeof(vt->vpdbuf), test_id, \ + 8, typ, len); \ + exp_len = len - strlen(str_prefix[type]); \ + if (typ & ZERO_LAST) \ + exp_len--; \ + if (exp_len >= wlen) \ + exp_len = wlen - 1; \ + will_return(__wrap_ioctl, n); \ + will_return(__wrap_ioctl, vt->vpdbuf); \ + ret = get_vpd_sgio(10, 0x83, 0, vt->wwid, wlen); \ + assert_correct_wwid("test_vpd_str_" #typ "_" #len "_" #wlen, \ + exp_len, ret, byte0[type], 0, \ + type != STR_IQN, \ + test_id, vt->wwid); \ +} + +/** + * test_vpd_naa_NAA_WLEN() - test code for VPD 83 NAA designation + * @NAA: Network Name Authority (2, 3, 5, or 6) + * @WLEN: WWID buffer size + */ +#define make_test_vpd_naa(naa, wlen) \ +static void test_vpd_naa_ ## naa ## _ ## wlen(void **state) \ +{ \ + struct vpdtest *vt = *state; \ + int n, ret; \ + int len, exp_len; \ + \ + switch (naa) { \ + case 2: \ + case 3: \ + case 5: \ + len = 17; \ + break; \ + case 6: \ + len = 33; \ + break; \ + } \ + /* returned size is always uneven */ \ + exp_len = wlen > len ? len : \ + wlen % 2 == 0 ? wlen - 1 : wlen - 2; \ + \ + n = create_vpd83(vt->vpdbuf, sizeof(vt->vpdbuf), test_id, \ + 3, naa, 0); \ + will_return(__wrap_ioctl, n); \ + will_return(__wrap_ioctl, vt->vpdbuf); \ + ret = get_vpd_sgio(10, 0x83, 0, vt->wwid, wlen); \ + assert_correct_wwid("test_vpd_naa_" #naa "_" #wlen, \ + exp_len, ret, '3', '0' + naa, true, \ + test_id, vt->wwid); \ +} + +/** + * test_vpd_eui_LEN_WLEN() - test code for VPD 83, EUI64 + * @LEN: EUI64 length (8, 12, or 16) + * @WLEN: WWID buffer size + * @SML: Use small VPD page size + */ +#define make_test_vpd_eui(len, wlen, sml) \ +static void test_vpd_eui_ ## len ## _ ## wlen ## _ ## sml(void **state) \ +{ \ + struct vpdtest *vt = *state; \ + int n, ret; \ + /* returned size is always uneven */ \ + int exp_len = wlen > 2 * len + 1 ? 2 * len + 1 : \ + wlen % 2 == 0 ? wlen - 1 : wlen - 2; \ + \ + n = create_vpd83(vt->vpdbuf, sizeof(vt->vpdbuf), test_id, \ + 2, 0, len); \ + if (sml) { \ + /* overwrite the page size to DEFAULT_SGIO_LEN + 1 */ \ + put_unaligned_be16(255, vt->vpdbuf + 2); \ + /* this causes get_vpd_sgio to do a second ioctl */ \ + will_return(__wrap_ioctl, n); \ + will_return(__wrap_ioctl, vt->vpdbuf); \ + } \ + will_return(__wrap_ioctl, n); \ + will_return(__wrap_ioctl, vt->vpdbuf); \ + ret = get_vpd_sgio(10, 0x83, 0, vt->wwid, wlen); \ + assert_correct_wwid("test_vpd_eui_" #len "_" #wlen "_" #sml, \ + exp_len, ret, '2', 0, true, \ + test_id, vt->wwid); \ +} + +/** + * test_vpd80_SIZE_LEN_WLEN() - test code for VPD 80 + * @SIZE, @LEN: see create_vpd80() + * @WLEN: WWID buffer size + */ +#define make_test_vpd80(size, len, wlen) \ +static void test_vpd80_ ## size ## _ ## len ## _ ## wlen(void **state) \ +{ \ + struct vpdtest *vt = *state; \ + int n, ret; \ + int exp_len = len > 20 ? 20 : len; \ + char *input = strdup(test_id); \ + \ + /* insert trailing whitespace after pos 20 */ \ + memset(input + 20, ' ', sizeof(test_id) - 20); \ + if (exp_len >= wlen) \ + exp_len = wlen - 1; \ + n = create_vpd80(vt->vpdbuf, sizeof(vt->vpdbuf), input, \ + size, len); \ + will_return(__wrap_ioctl, n); \ + will_return(__wrap_ioctl, vt->vpdbuf); \ + ret = get_vpd_sgio(10, 0x80, 0, vt->wwid, wlen); \ + assert_correct_wwid("test_vpd80_" #size "_" #len "_" #wlen, \ + exp_len, ret, 0, 0, false, \ + input, vt->wwid); \ + free(input); \ +} + +/* VPD 80 */ +/* Tests without trailing whitespace: 21 WWID bytes required */ +make_test_vpd80(20, 20, 30); +make_test_vpd80(20, 20, 21); +make_test_vpd80(20, 20, 20); +make_test_vpd80(20, 20, 10); + +/* Tests with 4 byte trailing whitespace: 21 WWID bytes required */ +make_test_vpd80(24, 24, 30); +make_test_vpd80(24, 24, 25); +make_test_vpd80(24, 24, 24); +make_test_vpd80(24, 24, 21); +make_test_vpd80(24, 24, 20); + +/* Tests with 4 byte leading whitespace: 17 WWID bytes required */ +make_test_vpd80(20, 16, 30); +make_test_vpd80(20, 16, 17); +make_test_vpd80(20, 16, 16); + +/* Tests with 4 byte leading whitespace: 21 WWID bytes required */ +make_test_vpd80(24, 20, 21); +make_test_vpd80(24, 20, 20); + +/* Tests with leading and trailing whitespace: 21 WWID bytes required */ +make_test_vpd80(30, 24, 30); +make_test_vpd80(30, 24, 21); +make_test_vpd80(30, 24, 20); + +/* VPD 83, T10 vendor ID */ +make_test_vpd_vnd(40, 40); +make_test_vpd_vnd(40, 30); +make_test_vpd_vnd(30, 20); +make_test_vpd_vnd(29, 30); +make_test_vpd_vnd(28, 30); +make_test_vpd_vnd(27, 30); /* space at end */ +make_test_vpd_vnd(26, 30); /* space at end */ +make_test_vpd_vnd(25, 30); +make_test_vpd_vnd(24, 30); /* space at end */ +make_test_vpd_vnd(23, 30); +make_test_vpd_vnd(24, 20); +make_test_vpd_vnd(23, 20); +make_test_vpd_vnd(22, 20); +make_test_vpd_vnd(21, 20); +make_test_vpd_vnd(20, 20); +make_test_vpd_vnd(19, 20); +make_test_vpd_vnd(20, 10); +make_test_vpd_vnd(10, 10); + +/* EUI64 tests */ +/* small vpd page test */ +make_test_vpd_eui(8, 32, 1); +make_test_vpd_eui(12, 32, 1); +make_test_vpd_eui(16, 40, 1); + +/* 64bit, WWID size: 18 */ +make_test_vpd_eui(8, 32, 0); +make_test_vpd_eui(8, 18, 0); +make_test_vpd_eui(8, 17, 0); +make_test_vpd_eui(8, 16, 0); +make_test_vpd_eui(8, 10, 0); + +/* 96 bit, WWID size: 26 */ +make_test_vpd_eui(12, 32, 0); +make_test_vpd_eui(12, 26, 0); +make_test_vpd_eui(12, 25, 0); +make_test_vpd_eui(12, 20, 0); +make_test_vpd_eui(12, 10, 0); + +/* 128 bit, WWID size: 34 */ +make_test_vpd_eui(16, 40, 0); +make_test_vpd_eui(16, 34, 0); +make_test_vpd_eui(16, 33, 0); +make_test_vpd_eui(16, 20, 0); + +/* NAA IEEE registered extended (36), WWID size: 34 */ +make_test_vpd_naa(6, 40); +make_test_vpd_naa(6, 34); +make_test_vpd_naa(6, 33); +make_test_vpd_naa(6, 32); +make_test_vpd_naa(6, 20); + +/* NAA IEEE registered (35), WWID size: 18 */ +make_test_vpd_naa(5, 20); +make_test_vpd_naa(5, 18); +make_test_vpd_naa(5, 17); +make_test_vpd_naa(5, 16); + +/* NAA local (33), WWID size: 18 */ +make_test_vpd_naa(3, 20); +make_test_vpd_naa(3, 18); +make_test_vpd_naa(3, 17); +make_test_vpd_naa(3, 16); + +/* NAA IEEE extended (32), WWID size: 18 */ +make_test_vpd_naa(2, 20); +make_test_vpd_naa(2, 18); +make_test_vpd_naa(2, 17); +make_test_vpd_naa(2, 16); + +/* SCSI Name string: EUI64, WWID size: 17 */ +make_test_vpd_str(0, 20, 18) +make_test_vpd_str(0, 20, 17) +make_test_vpd_str(0, 20, 16) +make_test_vpd_str(0, 20, 15) + +/* SCSI Name string: EUI64, zero padded, WWID size: 16 */ +make_test_vpd_str(16, 20, 18) +make_test_vpd_str(16, 20, 17) +make_test_vpd_str(16, 20, 16) +make_test_vpd_str(16, 20, 15) + +/* SCSI Name string: NAA, WWID size: 17 */ +make_test_vpd_str(1, 20, 18) +make_test_vpd_str(1, 20, 17) +make_test_vpd_str(1, 20, 16) +make_test_vpd_str(1, 20, 15) + +/* SCSI Name string: NAA, zero padded, WWID size: 16 */ +make_test_vpd_str(17, 20, 18) +make_test_vpd_str(17, 20, 17) +make_test_vpd_str(17, 20, 16) +make_test_vpd_str(17, 20, 15) + +/* SCSI Name string: IQN, WWID size: 17 */ +make_test_vpd_str(2, 20, 18) +make_test_vpd_str(2, 20, 17) +make_test_vpd_str(2, 20, 16) +make_test_vpd_str(2, 20, 15) + +/* SCSI Name string: IQN, zero padded, WWID size: 16 */ +make_test_vpd_str(18, 20, 18) +make_test_vpd_str(18, 20, 17) +make_test_vpd_str(18, 20, 16) +make_test_vpd_str(18, 20, 15) + +static int test_vpd(void) +{ + const struct CMUnitTest tests[] = { + cmocka_unit_test(test_vpd80_20_20_30), + cmocka_unit_test(test_vpd80_20_20_21), + cmocka_unit_test(test_vpd80_20_20_20), + cmocka_unit_test(test_vpd80_20_20_10), + cmocka_unit_test(test_vpd80_24_24_30), + cmocka_unit_test(test_vpd80_24_24_25), + cmocka_unit_test(test_vpd80_24_24_24), + cmocka_unit_test(test_vpd80_24_24_21), + cmocka_unit_test(test_vpd80_24_24_20), + cmocka_unit_test(test_vpd80_20_16_30), + cmocka_unit_test(test_vpd80_20_16_17), + cmocka_unit_test(test_vpd80_20_16_16), + cmocka_unit_test(test_vpd80_24_20_21), + cmocka_unit_test(test_vpd80_24_20_20), + cmocka_unit_test(test_vpd80_30_24_30), + cmocka_unit_test(test_vpd80_30_24_21), + cmocka_unit_test(test_vpd80_30_24_20), + cmocka_unit_test(test_vpd_vnd_40_40), + cmocka_unit_test(test_vpd_vnd_40_30), + cmocka_unit_test(test_vpd_vnd_30_20), + cmocka_unit_test(test_vpd_vnd_29_30), + cmocka_unit_test(test_vpd_vnd_28_30), + cmocka_unit_test(test_vpd_vnd_27_30), + cmocka_unit_test(test_vpd_vnd_26_30), + cmocka_unit_test(test_vpd_vnd_25_30), + cmocka_unit_test(test_vpd_vnd_24_30), + cmocka_unit_test(test_vpd_vnd_23_30), + cmocka_unit_test(test_vpd_vnd_24_20), + cmocka_unit_test(test_vpd_vnd_23_20), + cmocka_unit_test(test_vpd_vnd_22_20), + cmocka_unit_test(test_vpd_vnd_21_20), + cmocka_unit_test(test_vpd_vnd_20_20), + cmocka_unit_test(test_vpd_vnd_19_20), + cmocka_unit_test(test_vpd_vnd_20_10), + cmocka_unit_test(test_vpd_vnd_10_10), + cmocka_unit_test(test_vpd_eui_8_32_1), + cmocka_unit_test(test_vpd_eui_12_32_1), + cmocka_unit_test(test_vpd_eui_16_40_1), + cmocka_unit_test(test_vpd_eui_8_32_0), + cmocka_unit_test(test_vpd_eui_8_18_0), + cmocka_unit_test(test_vpd_eui_8_17_0), + cmocka_unit_test(test_vpd_eui_8_16_0), + cmocka_unit_test(test_vpd_eui_8_10_0), + cmocka_unit_test(test_vpd_eui_12_32_0), + cmocka_unit_test(test_vpd_eui_12_26_0), + cmocka_unit_test(test_vpd_eui_12_25_0), + cmocka_unit_test(test_vpd_eui_12_20_0), + cmocka_unit_test(test_vpd_eui_12_10_0), + cmocka_unit_test(test_vpd_eui_16_40_0), + cmocka_unit_test(test_vpd_eui_16_34_0), + cmocka_unit_test(test_vpd_eui_16_33_0), + cmocka_unit_test(test_vpd_eui_16_20_0), + cmocka_unit_test(test_vpd_naa_6_40), + cmocka_unit_test(test_vpd_naa_6_34), + cmocka_unit_test(test_vpd_naa_6_33), + cmocka_unit_test(test_vpd_naa_6_32), + cmocka_unit_test(test_vpd_naa_6_20), + cmocka_unit_test(test_vpd_naa_5_20), + cmocka_unit_test(test_vpd_naa_5_18), + cmocka_unit_test(test_vpd_naa_5_17), + cmocka_unit_test(test_vpd_naa_5_16), + cmocka_unit_test(test_vpd_naa_3_20), + cmocka_unit_test(test_vpd_naa_3_18), + cmocka_unit_test(test_vpd_naa_3_17), + cmocka_unit_test(test_vpd_naa_3_16), + cmocka_unit_test(test_vpd_naa_2_20), + cmocka_unit_test(test_vpd_naa_2_18), + cmocka_unit_test(test_vpd_naa_2_17), + cmocka_unit_test(test_vpd_naa_2_16), + cmocka_unit_test(test_vpd_str_0_20_18), + cmocka_unit_test(test_vpd_str_0_20_17), + cmocka_unit_test(test_vpd_str_0_20_16), + cmocka_unit_test(test_vpd_str_0_20_15), + cmocka_unit_test(test_vpd_str_16_20_18), + cmocka_unit_test(test_vpd_str_16_20_17), + cmocka_unit_test(test_vpd_str_16_20_16), + cmocka_unit_test(test_vpd_str_16_20_15), + cmocka_unit_test(test_vpd_str_1_20_18), + cmocka_unit_test(test_vpd_str_1_20_17), + cmocka_unit_test(test_vpd_str_1_20_16), + cmocka_unit_test(test_vpd_str_1_20_15), + cmocka_unit_test(test_vpd_str_17_20_18), + cmocka_unit_test(test_vpd_str_17_20_17), + cmocka_unit_test(test_vpd_str_17_20_16), + cmocka_unit_test(test_vpd_str_17_20_15), + cmocka_unit_test(test_vpd_str_2_20_18), + cmocka_unit_test(test_vpd_str_2_20_17), + cmocka_unit_test(test_vpd_str_2_20_16), + cmocka_unit_test(test_vpd_str_2_20_15), + cmocka_unit_test(test_vpd_str_18_20_18), + cmocka_unit_test(test_vpd_str_18_20_17), + cmocka_unit_test(test_vpd_str_18_20_16), + cmocka_unit_test(test_vpd_str_18_20_15), + }; + return cmocka_run_group_tests(tests, setup, teardown); +} + +int main(void) +{ + int ret = 0; + + ret += test_vpd(); + return ret; +} diff --git a/third-party/valgrind/drd.h b/third-party/valgrind/drd.h new file mode 100644 index 0000000..d63b3dd --- /dev/null +++ b/third-party/valgrind/drd.h @@ -0,0 +1,571 @@ +/* + ---------------------------------------------------------------- + + Notice that the following BSD-style license applies to this one + file (drd.h) only. The rest of Valgrind is licensed under the + terms of the GNU General Public License, version 2, unless + otherwise indicated. See the COPYING file in the source + distribution for details. + + ---------------------------------------------------------------- + + This file is part of DRD, a Valgrind tool for verification of + multithreaded programs. + + Copyright (C) 2006-2017 Bart Van Assche . + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + + 1. Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + 2. The origin of this software must not be misrepresented; you must + not claim that you wrote the original software. If you use this + software in a product, an acknowledgment in the product + documentation would be appreciated but is not required. + + 3. Altered source versions must be plainly marked as such, and must + not be misrepresented as being the original software. + + 4. The name of the author may not be used to endorse or promote + products derived from this software without specific prior written + permission. + + THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS + OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY + DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE + GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + ---------------------------------------------------------------- + + Notice that the above BSD-style license applies to this one file + (drd.h) only. The entire rest of Valgrind is licensed under + the terms of the GNU General Public License, version 2. See the + COPYING file in the source distribution for details. + + ---------------------------------------------------------------- +*/ + +#ifndef __VALGRIND_DRD_H +#define __VALGRIND_DRD_H + + +#include "valgrind.h" + + +/** Obtain the thread ID assigned by Valgrind's core. */ +#define DRD_GET_VALGRIND_THREADID \ + (unsigned)VALGRIND_DO_CLIENT_REQUEST_EXPR(0, \ + VG_USERREQ__DRD_GET_VALGRIND_THREAD_ID, \ + 0, 0, 0, 0, 0) + +/** Obtain the thread ID assigned by DRD. */ +#define DRD_GET_DRD_THREADID \ + (unsigned)VALGRIND_DO_CLIENT_REQUEST_EXPR(0, \ + VG_USERREQ__DRD_GET_DRD_THREAD_ID, \ + 0, 0, 0, 0, 0) + + +/** Tell DRD not to complain about data races for the specified variable. */ +#define DRD_IGNORE_VAR(x) ANNOTATE_BENIGN_RACE_SIZED(&(x), sizeof(x), "") + +/** Tell DRD to no longer ignore data races for the specified variable. */ +#define DRD_STOP_IGNORING_VAR(x) \ + VALGRIND_DO_CLIENT_REQUEST_STMT(VG_USERREQ__DRD_FINISH_SUPPRESSION, \ + &(x), sizeof(x), 0, 0, 0) + +/** + * Tell DRD to trace all memory accesses for the specified variable + * until the memory that was allocated for the variable is freed. + */ +#define DRD_TRACE_VAR(x) \ + VALGRIND_DO_CLIENT_REQUEST_STMT(VG_USERREQ__DRD_START_TRACE_ADDR, \ + &(x), sizeof(x), 0, 0, 0) + +/** + * Tell DRD to stop tracing memory accesses for the specified variable. + */ +#define DRD_STOP_TRACING_VAR(x) \ + VALGRIND_DO_CLIENT_REQUEST_STMT(VG_USERREQ__DRD_STOP_TRACE_ADDR, \ + &(x), sizeof(x), 0, 0, 0) + +/** + * @defgroup RaceDetectionAnnotations Data race detection annotations. + * + * @see See also the source file producer-consumer. + */ +#define ANNOTATE_PCQ_CREATE(pcq) do { } while(0) + +/** Tell DRD that a FIFO queue has been destroyed. */ +#define ANNOTATE_PCQ_DESTROY(pcq) do { } while(0) + +/** + * Tell DRD that an element has been added to the FIFO queue at address pcq. + */ +#define ANNOTATE_PCQ_PUT(pcq) do { } while(0) + +/** + * Tell DRD that an element has been removed from the FIFO queue at address pcq, + * and that DRD should insert a happens-before relationship between the memory + * accesses that occurred before the corresponding ANNOTATE_PCQ_PUT(pcq) + * annotation and the memory accesses after this annotation. Correspondence + * between PUT and GET annotations happens in FIFO order. Since locking + * of the queue is needed anyway to add elements to or to remove elements from + * the queue, for DRD all four FIFO annotations are defined as no-ops. + */ +#define ANNOTATE_PCQ_GET(pcq) do { } while(0) + +/** + * Tell DRD that data races at the specified address are expected and must not + * be reported. + */ +#define ANNOTATE_BENIGN_RACE(addr, descr) \ + ANNOTATE_BENIGN_RACE_SIZED(addr, sizeof(*addr), descr) + +/* Same as ANNOTATE_BENIGN_RACE(addr, descr), but applies to + the memory range [addr, addr + size). */ +#define ANNOTATE_BENIGN_RACE_SIZED(addr, size, descr) \ + VALGRIND_DO_CLIENT_REQUEST_STMT(VG_USERREQ__DRD_START_SUPPRESSION, \ + addr, size, 0, 0, 0) + +/** Tell DRD to ignore all reads performed by the current thread. */ +#define ANNOTATE_IGNORE_READS_BEGIN() \ + VALGRIND_DO_CLIENT_REQUEST_STMT(VG_USERREQ__DRD_RECORD_LOADS, \ + 0, 0, 0, 0, 0); + + +/** Tell DRD to no longer ignore the reads performed by the current thread. */ +#define ANNOTATE_IGNORE_READS_END() \ + VALGRIND_DO_CLIENT_REQUEST_STMT(VG_USERREQ__DRD_RECORD_LOADS, \ + 1, 0, 0, 0, 0); + +/** Tell DRD to ignore all writes performed by the current thread. */ +#define ANNOTATE_IGNORE_WRITES_BEGIN() \ + VALGRIND_DO_CLIENT_REQUEST_STMT(VG_USERREQ__DRD_RECORD_STORES, \ + 0, 0, 0, 0, 0) + +/** Tell DRD to no longer ignore the writes performed by the current thread. */ +#define ANNOTATE_IGNORE_WRITES_END() \ + VALGRIND_DO_CLIENT_REQUEST_STMT(VG_USERREQ__DRD_RECORD_STORES, \ + 1, 0, 0, 0, 0) + +/** Tell DRD to ignore all memory accesses performed by the current thread. */ +#define ANNOTATE_IGNORE_READS_AND_WRITES_BEGIN() \ + do { ANNOTATE_IGNORE_READS_BEGIN(); ANNOTATE_IGNORE_WRITES_BEGIN(); } while(0) + +/** + * Tell DRD to no longer ignore the memory accesses performed by the current + * thread. + */ +#define ANNOTATE_IGNORE_READS_AND_WRITES_END() \ + do { ANNOTATE_IGNORE_READS_END(); ANNOTATE_IGNORE_WRITES_END(); } while(0) + +/** + * Tell DRD that size bytes starting at addr has been allocated by a custom + * memory allocator. + */ +#define ANNOTATE_NEW_MEMORY(addr, size) \ + VALGRIND_DO_CLIENT_REQUEST_STMT(VG_USERREQ__DRD_CLEAN_MEMORY, \ + addr, size, 0, 0, 0) + +/** Ask DRD to report every access to the specified address. */ +#define ANNOTATE_TRACE_MEMORY(addr) DRD_TRACE_VAR(*(char*)(addr)) + +/** + * Tell DRD to assign the specified name to the current thread. This name will + * be used in error messages printed by DRD. + */ +#define ANNOTATE_THREAD_NAME(name) \ + VALGRIND_DO_CLIENT_REQUEST_STMT(VG_USERREQ__DRD_SET_THREAD_NAME, \ + name, 0, 0, 0, 0) + +/*@}*/ + + +/* !! ABIWARNING !! ABIWARNING !! ABIWARNING !! ABIWARNING !! + This enum comprises an ABI exported by Valgrind to programs + which use client requests. DO NOT CHANGE THE ORDER OF THESE + ENTRIES, NOR DELETE ANY -- add new ones at the end. +*/ +enum { + /* Ask the DRD tool to discard all information about memory accesses */ + /* and client objects for the specified range. This client request is */ + /* binary compatible with the similarly named Helgrind client request. */ + VG_USERREQ__DRD_CLEAN_MEMORY = VG_USERREQ_TOOL_BASE('H','G'), + /* args: Addr, SizeT. */ + + /* Ask the DRD tool the thread ID assigned by Valgrind. */ + VG_USERREQ__DRD_GET_VALGRIND_THREAD_ID = VG_USERREQ_TOOL_BASE('D','R'), + /* args: none. */ + /* Ask the DRD tool the thread ID assigned by DRD. */ + VG_USERREQ__DRD_GET_DRD_THREAD_ID, + /* args: none. */ + + /* To tell the DRD tool to suppress data race detection on the */ + /* specified address range. */ + VG_USERREQ__DRD_START_SUPPRESSION, + /* args: start address, size in bytes */ + /* To tell the DRD tool no longer to suppress data race detection on */ + /* the specified address range. */ + VG_USERREQ__DRD_FINISH_SUPPRESSION, + /* args: start address, size in bytes */ + + /* To ask the DRD tool to trace all accesses to the specified range. */ + VG_USERREQ__DRD_START_TRACE_ADDR, + /* args: Addr, SizeT. */ + /* To ask the DRD tool to stop tracing accesses to the specified range. */ + VG_USERREQ__DRD_STOP_TRACE_ADDR, + /* args: Addr, SizeT. */ + + /* Tell DRD whether or not to record memory loads in the calling thread. */ + VG_USERREQ__DRD_RECORD_LOADS, + /* args: Bool. */ + /* Tell DRD whether or not to record memory stores in the calling thread. */ + VG_USERREQ__DRD_RECORD_STORES, + /* args: Bool. */ + + /* Set the name of the thread that performs this client request. */ + VG_USERREQ__DRD_SET_THREAD_NAME, + /* args: null-terminated character string. */ + + /* Tell DRD that a DRD annotation has not yet been implemented. */ + VG_USERREQ__DRD_ANNOTATION_UNIMP, + /* args: char*. */ + + /* Tell DRD that a user-defined semaphore synchronization object + * is about to be created. */ + VG_USERREQ__DRD_ANNOTATE_SEM_INIT_PRE, + /* args: Addr, UInt value. */ + /* Tell DRD that a user-defined semaphore synchronization object + * has been destroyed. */ + VG_USERREQ__DRD_ANNOTATE_SEM_DESTROY_POST, + /* args: Addr. */ + /* Tell DRD that a user-defined semaphore synchronization + * object is going to be acquired (semaphore wait). */ + VG_USERREQ__DRD_ANNOTATE_SEM_WAIT_PRE, + /* args: Addr. */ + /* Tell DRD that a user-defined semaphore synchronization + * object has been acquired (semaphore wait). */ + VG_USERREQ__DRD_ANNOTATE_SEM_WAIT_POST, + /* args: Addr. */ + /* Tell DRD that a user-defined semaphore synchronization + * object is about to be released (semaphore post). */ + VG_USERREQ__DRD_ANNOTATE_SEM_POST_PRE, + /* args: Addr. */ + + /* Tell DRD to ignore the inter-thread ordering introduced by a mutex. */ + VG_USERREQ__DRD_IGNORE_MUTEX_ORDERING, + /* args: Addr. */ + + /* Tell DRD that a user-defined reader-writer synchronization object + * has been created. */ + VG_USERREQ__DRD_ANNOTATE_RWLOCK_CREATE + = VG_USERREQ_TOOL_BASE('H','G') + 256 + 14, + /* args: Addr. */ + /* Tell DRD that a user-defined reader-writer synchronization object + * is about to be destroyed. */ + VG_USERREQ__DRD_ANNOTATE_RWLOCK_DESTROY + = VG_USERREQ_TOOL_BASE('H','G') + 256 + 15, + /* args: Addr. */ + /* Tell DRD that a lock on a user-defined reader-writer synchronization + * object has been acquired. */ + VG_USERREQ__DRD_ANNOTATE_RWLOCK_ACQUIRED + = VG_USERREQ_TOOL_BASE('H','G') + 256 + 17, + /* args: Addr, Int is_rw. */ + /* Tell DRD that a lock on a user-defined reader-writer synchronization + * object is about to be released. */ + VG_USERREQ__DRD_ANNOTATE_RWLOCK_RELEASED + = VG_USERREQ_TOOL_BASE('H','G') + 256 + 18, + /* args: Addr, Int is_rw. */ + + /* Tell DRD that a Helgrind annotation has not yet been implemented. */ + VG_USERREQ__HELGRIND_ANNOTATION_UNIMP + = VG_USERREQ_TOOL_BASE('H','G') + 256 + 32, + /* args: char*. */ + + /* Tell DRD to insert a happens-before annotation. */ + VG_USERREQ__DRD_ANNOTATE_HAPPENS_BEFORE + = VG_USERREQ_TOOL_BASE('H','G') + 256 + 33, + /* args: Addr. */ + /* Tell DRD to insert a happens-after annotation. */ + VG_USERREQ__DRD_ANNOTATE_HAPPENS_AFTER + = VG_USERREQ_TOOL_BASE('H','G') + 256 + 34, + /* args: Addr. */ + +}; + + +/** + * @addtogroup RaceDetectionAnnotations + */ +/*@{*/ + +#ifdef __cplusplus +/* ANNOTATE_UNPROTECTED_READ is the preferred way to annotate racy reads. + + Instead of doing + ANNOTATE_IGNORE_READS_BEGIN(); + ... = x; + ANNOTATE_IGNORE_READS_END(); + one can use + ... = ANNOTATE_UNPROTECTED_READ(x); */ +template +inline T ANNOTATE_UNPROTECTED_READ(const volatile T& x) { + ANNOTATE_IGNORE_READS_BEGIN(); + const T result = x; + ANNOTATE_IGNORE_READS_END(); + return result; +} +/* Apply ANNOTATE_BENIGN_RACE_SIZED to a static variable. */ +#define ANNOTATE_BENIGN_RACE_STATIC(static_var, description) \ + namespace { \ + static class static_var##_annotator \ + { \ + public: \ + static_var##_annotator() \ + { \ + ANNOTATE_BENIGN_RACE_SIZED(&static_var, sizeof(static_var), \ + #static_var ": " description); \ + } \ + } the_##static_var##_annotator; \ + } +#endif + +/*@}*/ + +#endif /* __VALGRIND_DRD_H */ diff --git a/third-party/valgrind/valgrind.h b/third-party/valgrind/valgrind.h new file mode 100644 index 0000000..577c8f0 --- /dev/null +++ b/third-party/valgrind/valgrind.h @@ -0,0 +1,6647 @@ +/* -*- c -*- + ---------------------------------------------------------------- + + Notice that the following BSD-style license applies to this one + file (valgrind.h) only. The rest of Valgrind is licensed under the + terms of the GNU General Public License, version 2, unless + otherwise indicated. See the COPYING file in the source + distribution for details. + + ---------------------------------------------------------------- + + This file is part of Valgrind, a dynamic binary instrumentation + framework. + + Copyright (C) 2000-2017 Julian Seward. All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + + 1. Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + 2. The origin of this software must not be misrepresented; you must + not claim that you wrote the original software. If you use this + software in a product, an acknowledgment in the product + documentation would be appreciated but is not required. + + 3. Altered source versions must be plainly marked as such, and must + not be misrepresented as being the original software. + + 4. The name of the author may not be used to endorse or promote + products derived from this software without specific prior written + permission. + + THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS + OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY + DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE + GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + ---------------------------------------------------------------- + + Notice that the above BSD-style license applies to this one file + (valgrind.h) only. The entire rest of Valgrind is licensed under + the terms of the GNU General Public License, version 2. See the + COPYING file in the source distribution for details. + + ---------------------------------------------------------------- +*/ + + +/* This file is for inclusion into client (your!) code. + + You can use these macros to manipulate and query Valgrind's + execution inside your own programs. + + The resulting executables will still run without Valgrind, just a + little bit more slowly than they otherwise would, but otherwise + unchanged. When not running on valgrind, each client request + consumes very few (eg. 7) instructions, so the resulting performance + loss is negligible unless you plan to execute client requests + millions of times per second. Nevertheless, if that is still a + problem, you can compile with the NVALGRIND symbol defined (gcc + -DNVALGRIND) so that client requests are not even compiled in. */ + +#ifndef __VALGRIND_H +#define __VALGRIND_H + + +/* ------------------------------------------------------------------ */ +/* VERSION NUMBER OF VALGRIND */ +/* ------------------------------------------------------------------ */ + +/* Specify Valgrind's version number, so that user code can + conditionally compile based on our version number. Note that these + were introduced at version 3.6 and so do not exist in version 3.5 + or earlier. The recommended way to use them to check for "version + X.Y or later" is (eg) + +#if defined(__VALGRIND_MAJOR__) && defined(__VALGRIND_MINOR__) \ + && (__VALGRIND_MAJOR__ > 3 \ + || (__VALGRIND_MAJOR__ == 3 && __VALGRIND_MINOR__ >= 6)) +*/ +#define __VALGRIND_MAJOR__ 3 +#define __VALGRIND_MINOR__ 14 + + +#include + +/* Nb: this file might be included in a file compiled with -ansi. So + we can't use C++ style "//" comments nor the "asm" keyword (instead + use "__asm__"). */ + +/* Derive some tags indicating what the target platform is. Note + that in this file we're using the compiler's CPP symbols for + identifying architectures, which are different to the ones we use + within the rest of Valgrind. Note, __powerpc__ is active for both + 32 and 64-bit PPC, whereas __powerpc64__ is only active for the + latter (on Linux, that is). + + Misc note: how to find out what's predefined in gcc by default: + gcc -Wp,-dM somefile.c +*/ +#undef PLAT_x86_darwin +#undef PLAT_amd64_darwin +#undef PLAT_x86_win32 +#undef PLAT_amd64_win64 +#undef PLAT_x86_linux +#undef PLAT_amd64_linux +#undef PLAT_ppc32_linux +#undef PLAT_ppc64be_linux +#undef PLAT_ppc64le_linux +#undef PLAT_arm_linux +#undef PLAT_arm64_linux +#undef PLAT_s390x_linux +#undef PLAT_mips32_linux +#undef PLAT_mips64_linux +#undef PLAT_x86_solaris +#undef PLAT_amd64_solaris + + +#if defined(__APPLE__) && defined(__i386__) +# define PLAT_x86_darwin 1 +#elif defined(__APPLE__) && defined(__x86_64__) +# define PLAT_amd64_darwin 1 +#elif (defined(__MINGW32__) && !defined(__MINGW64__)) \ + || defined(__CYGWIN32__) \ + || (defined(_WIN32) && defined(_M_IX86)) +# define PLAT_x86_win32 1 +#elif defined(__MINGW64__) \ + || (defined(_WIN64) && defined(_M_X64)) +# define PLAT_amd64_win64 1 +#elif defined(__linux__) && defined(__i386__) +# define PLAT_x86_linux 1 +#elif defined(__linux__) && defined(__x86_64__) && !defined(__ILP32__) +# define PLAT_amd64_linux 1 +#elif defined(__linux__) && defined(__powerpc__) && !defined(__powerpc64__) +# define PLAT_ppc32_linux 1 +#elif defined(__linux__) && defined(__powerpc__) && defined(__powerpc64__) && _CALL_ELF != 2 +/* Big Endian uses ELF version 1 */ +# define PLAT_ppc64be_linux 1 +#elif defined(__linux__) && defined(__powerpc__) && defined(__powerpc64__) && _CALL_ELF == 2 +/* Little Endian uses ELF version 2 */ +# define PLAT_ppc64le_linux 1 +#elif defined(__linux__) && defined(__arm__) && !defined(__aarch64__) +# define PLAT_arm_linux 1 +#elif defined(__linux__) && defined(__aarch64__) && !defined(__arm__) +# define PLAT_arm64_linux 1 +#elif defined(__linux__) && defined(__s390__) && defined(__s390x__) +# define PLAT_s390x_linux 1 +#elif defined(__linux__) && defined(__mips__) && (__mips==64) +# define PLAT_mips64_linux 1 +#elif defined(__linux__) && defined(__mips__) && (__mips!=64) +# define PLAT_mips32_linux 1 +#elif defined(__sun) && defined(__i386__) +# define PLAT_x86_solaris 1 +#elif defined(__sun) && defined(__x86_64__) +# define PLAT_amd64_solaris 1 +#else +/* If we're not compiling for our target platform, don't generate + any inline asms. */ +# if !defined(NVALGRIND) +# define NVALGRIND 1 +# endif +#endif + + +/* ------------------------------------------------------------------ */ +/* ARCHITECTURE SPECIFICS for SPECIAL INSTRUCTIONS. There is nothing */ +/* in here of use to end-users -- skip to the next section. */ +/* ------------------------------------------------------------------ */ + +/* + * VALGRIND_DO_CLIENT_REQUEST(): a statement that invokes a Valgrind client + * request. Accepts both pointers and integers as arguments. + * + * VALGRIND_DO_CLIENT_REQUEST_STMT(): a statement that invokes a Valgrind + * client request that does not return a value. + + * VALGRIND_DO_CLIENT_REQUEST_EXPR(): a C expression that invokes a Valgrind + * client request and whose value equals the client request result. Accepts + * both pointers and integers as arguments. Note that such calls are not + * necessarily pure functions -- they may have side effects. + */ + +#define VALGRIND_DO_CLIENT_REQUEST(_zzq_rlval, _zzq_default, \ + _zzq_request, _zzq_arg1, _zzq_arg2, \ + _zzq_arg3, _zzq_arg4, _zzq_arg5) \ + do { (_zzq_rlval) = VALGRIND_DO_CLIENT_REQUEST_EXPR((_zzq_default), \ + (_zzq_request), (_zzq_arg1), (_zzq_arg2), \ + (_zzq_arg3), (_zzq_arg4), (_zzq_arg5)); } while (0) + +#define VALGRIND_DO_CLIENT_REQUEST_STMT(_zzq_request, _zzq_arg1, \ + _zzq_arg2, _zzq_arg3, _zzq_arg4, _zzq_arg5) \ + do { (void) VALGRIND_DO_CLIENT_REQUEST_EXPR(0, \ + (_zzq_request), (_zzq_arg1), (_zzq_arg2), \ + (_zzq_arg3), (_zzq_arg4), (_zzq_arg5)); } while (0) + +#if defined(NVALGRIND) + +/* Define NVALGRIND to completely remove the Valgrind magic sequence + from the compiled code (analogous to NDEBUG's effects on + assert()) */ +#define VALGRIND_DO_CLIENT_REQUEST_EXPR( \ + _zzq_default, _zzq_request, \ + _zzq_arg1, _zzq_arg2, _zzq_arg3, _zzq_arg4, _zzq_arg5) \ + (_zzq_default) + +#else /* ! NVALGRIND */ + +/* The following defines the magic code sequences which the JITter + spots and handles magically. Don't look too closely at them as + they will rot your brain. + + The assembly code sequences for all architectures is in this one + file. This is because this file must be stand-alone, and we don't + want to have multiple files. + + For VALGRIND_DO_CLIENT_REQUEST, we must ensure that the default + value gets put in the return slot, so that everything works when + this is executed not under Valgrind. Args are passed in a memory + block, and so there's no intrinsic limit to the number that could + be passed, but it's currently five. + + The macro args are: + _zzq_rlval result lvalue + _zzq_default default value (result returned when running on real CPU) + _zzq_request request code + _zzq_arg1..5 request params + + The other two macros are used to support function wrapping, and are + a lot simpler. VALGRIND_GET_NR_CONTEXT returns the value of the + guest's NRADDR pseudo-register and whatever other information is + needed to safely run the call original from the wrapper: on + ppc64-linux, the R2 value at the divert point is also needed. This + information is abstracted into a user-visible type, OrigFn. + + VALGRIND_CALL_NOREDIR_* behaves the same as the following on the + guest, but guarantees that the branch instruction will not be + redirected: x86: call *%eax, amd64: call *%rax, ppc32/ppc64: + branch-and-link-to-r11. VALGRIND_CALL_NOREDIR is just text, not a + complete inline asm, since it needs to be combined with more magic + inline asm stuff to be useful. +*/ + +/* ----------------- x86-{linux,darwin,solaris} ---------------- */ + +#if defined(PLAT_x86_linux) || defined(PLAT_x86_darwin) \ + || (defined(PLAT_x86_win32) && defined(__GNUC__)) \ + || defined(PLAT_x86_solaris) + +typedef + struct { + unsigned int nraddr; /* where's the code? */ + } + OrigFn; + +#define __SPECIAL_INSTRUCTION_PREAMBLE \ + "roll $3, %%edi ; roll $13, %%edi\n\t" \ + "roll $29, %%edi ; roll $19, %%edi\n\t" + +#define VALGRIND_DO_CLIENT_REQUEST_EXPR( \ + _zzq_default, _zzq_request, \ + _zzq_arg1, _zzq_arg2, _zzq_arg3, _zzq_arg4, _zzq_arg5) \ + __extension__ \ + ({volatile unsigned int _zzq_args[6]; \ + volatile unsigned int _zzq_result; \ + _zzq_args[0] = (unsigned int)(_zzq_request); \ + _zzq_args[1] = (unsigned int)(_zzq_arg1); \ + _zzq_args[2] = (unsigned int)(_zzq_arg2); \ + _zzq_args[3] = (unsigned int)(_zzq_arg3); \ + _zzq_args[4] = (unsigned int)(_zzq_arg4); \ + _zzq_args[5] = (unsigned int)(_zzq_arg5); \ + __asm__ volatile(__SPECIAL_INSTRUCTION_PREAMBLE \ + /* %EDX = client_request ( %EAX ) */ \ + "xchgl %%ebx,%%ebx" \ + : "=d" (_zzq_result) \ + : "a" (&_zzq_args[0]), "0" (_zzq_default) \ + : "cc", "memory" \ + ); \ + _zzq_result; \ + }) + +#define VALGRIND_GET_NR_CONTEXT(_zzq_rlval) \ + { volatile OrigFn* _zzq_orig = &(_zzq_rlval); \ + volatile unsigned int __addr; \ + __asm__ volatile(__SPECIAL_INSTRUCTION_PREAMBLE \ + /* %EAX = guest_NRADDR */ \ + "xchgl %%ecx,%%ecx" \ + : "=a" (__addr) \ + : \ + : "cc", "memory" \ + ); \ + _zzq_orig->nraddr = __addr; \ + } + +#define VALGRIND_CALL_NOREDIR_EAX \ + __SPECIAL_INSTRUCTION_PREAMBLE \ + /* call-noredir *%EAX */ \ + "xchgl %%edx,%%edx\n\t" + +#define VALGRIND_VEX_INJECT_IR() \ + do { \ + __asm__ volatile(__SPECIAL_INSTRUCTION_PREAMBLE \ + "xchgl %%edi,%%edi\n\t" \ + : : : "cc", "memory" \ + ); \ + } while (0) + +#endif /* PLAT_x86_linux || PLAT_x86_darwin || (PLAT_x86_win32 && __GNUC__) + || PLAT_x86_solaris */ + +/* ------------------------- x86-Win32 ------------------------- */ + +#if defined(PLAT_x86_win32) && !defined(__GNUC__) + +typedef + struct { + unsigned int nraddr; /* where's the code? */ + } + OrigFn; + +#if defined(_MSC_VER) + +#define __SPECIAL_INSTRUCTION_PREAMBLE \ + __asm rol edi, 3 __asm rol edi, 13 \ + __asm rol edi, 29 __asm rol edi, 19 + +#define VALGRIND_DO_CLIENT_REQUEST_EXPR( \ + _zzq_default, _zzq_request, \ + _zzq_arg1, _zzq_arg2, _zzq_arg3, _zzq_arg4, _zzq_arg5) \ + valgrind_do_client_request_expr((uintptr_t)(_zzq_default), \ + (uintptr_t)(_zzq_request), (uintptr_t)(_zzq_arg1), \ + (uintptr_t)(_zzq_arg2), (uintptr_t)(_zzq_arg3), \ + (uintptr_t)(_zzq_arg4), (uintptr_t)(_zzq_arg5)) + +static __inline uintptr_t +valgrind_do_client_request_expr(uintptr_t _zzq_default, uintptr_t _zzq_request, + uintptr_t _zzq_arg1, uintptr_t _zzq_arg2, + uintptr_t _zzq_arg3, uintptr_t _zzq_arg4, + uintptr_t _zzq_arg5) +{ + volatile uintptr_t _zzq_args[6]; + volatile unsigned int _zzq_result; + _zzq_args[0] = (uintptr_t)(_zzq_request); + _zzq_args[1] = (uintptr_t)(_zzq_arg1); + _zzq_args[2] = (uintptr_t)(_zzq_arg2); + _zzq_args[3] = (uintptr_t)(_zzq_arg3); + _zzq_args[4] = (uintptr_t)(_zzq_arg4); + _zzq_args[5] = (uintptr_t)(_zzq_arg5); + __asm { __asm lea eax, _zzq_args __asm mov edx, _zzq_default + __SPECIAL_INSTRUCTION_PREAMBLE + /* %EDX = client_request ( %EAX ) */ + __asm xchg ebx,ebx + __asm mov _zzq_result, edx + } + return _zzq_result; +} + +#define VALGRIND_GET_NR_CONTEXT(_zzq_rlval) \ + { volatile OrigFn* _zzq_orig = &(_zzq_rlval); \ + volatile unsigned int __addr; \ + __asm { __SPECIAL_INSTRUCTION_PREAMBLE \ + /* %EAX = guest_NRADDR */ \ + __asm xchg ecx,ecx \ + __asm mov __addr, eax \ + } \ + _zzq_orig->nraddr = __addr; \ + } + +#define VALGRIND_CALL_NOREDIR_EAX ERROR + +#define VALGRIND_VEX_INJECT_IR() \ + do { \ + __asm { __SPECIAL_INSTRUCTION_PREAMBLE \ + __asm xchg edi,edi \ + } \ + } while (0) + +#else +#error Unsupported compiler. +#endif + +#endif /* PLAT_x86_win32 */ + +/* ----------------- amd64-{linux,darwin,solaris} --------------- */ + +#if defined(PLAT_amd64_linux) || defined(PLAT_amd64_darwin) \ + || defined(PLAT_amd64_solaris) \ + || (defined(PLAT_amd64_win64) && defined(__GNUC__)) + +typedef + struct { + unsigned long int nraddr; /* where's the code? */ + } + OrigFn; + +#define __SPECIAL_INSTRUCTION_PREAMBLE \ + "rolq $3, %%rdi ; rolq $13, %%rdi\n\t" \ + "rolq $61, %%rdi ; rolq $51, %%rdi\n\t" + +#define VALGRIND_DO_CLIENT_REQUEST_EXPR( \ + _zzq_default, _zzq_request, \ + _zzq_arg1, _zzq_arg2, _zzq_arg3, _zzq_arg4, _zzq_arg5) \ + __extension__ \ + ({ volatile unsigned long int _zzq_args[6]; \ + volatile unsigned long int _zzq_result; \ + _zzq_args[0] = (unsigned long int)(_zzq_request); \ + _zzq_args[1] = (unsigned long int)(_zzq_arg1); \ + _zzq_args[2] = (unsigned long int)(_zzq_arg2); \ + _zzq_args[3] = (unsigned long int)(_zzq_arg3); \ + _zzq_args[4] = (unsigned long int)(_zzq_arg4); \ + _zzq_args[5] = (unsigned long int)(_zzq_arg5); \ + __asm__ volatile(__SPECIAL_INSTRUCTION_PREAMBLE \ + /* %RDX = client_request ( %RAX ) */ \ + "xchgq %%rbx,%%rbx" \ + : "=d" (_zzq_result) \ + : "a" (&_zzq_args[0]), "0" (_zzq_default) \ + : "cc", "memory" \ + ); \ + _zzq_result; \ + }) + +#define VALGRIND_GET_NR_CONTEXT(_zzq_rlval) \ + { volatile OrigFn* _zzq_orig = &(_zzq_rlval); \ + volatile unsigned long int __addr; \ + __asm__ volatile(__SPECIAL_INSTRUCTION_PREAMBLE \ + /* %RAX = guest_NRADDR */ \ + "xchgq %%rcx,%%rcx" \ + : "=a" (__addr) \ + : \ + : "cc", "memory" \ + ); \ + _zzq_orig->nraddr = __addr; \ + } + +#define VALGRIND_CALL_NOREDIR_RAX \ + __SPECIAL_INSTRUCTION_PREAMBLE \ + /* call-noredir *%RAX */ \ + "xchgq %%rdx,%%rdx\n\t" + +#define VALGRIND_VEX_INJECT_IR() \ + do { \ + __asm__ volatile(__SPECIAL_INSTRUCTION_PREAMBLE \ + "xchgq %%rdi,%%rdi\n\t" \ + : : : "cc", "memory" \ + ); \ + } while (0) + +#endif /* PLAT_amd64_linux || PLAT_amd64_darwin || PLAT_amd64_solaris */ + +/* ------------------------- amd64-Win64 ------------------------- */ + +#if defined(PLAT_amd64_win64) && !defined(__GNUC__) + +#error Unsupported compiler. + +#endif /* PLAT_amd64_win64 */ + +/* ------------------------ ppc32-linux ------------------------ */ + +#if defined(PLAT_ppc32_linux) + +typedef + struct { + unsigned int nraddr; /* where's the code? */ + } + OrigFn; + +#define __SPECIAL_INSTRUCTION_PREAMBLE \ + "rlwinm 0,0,3,0,31 ; rlwinm 0,0,13,0,31\n\t" \ + "rlwinm 0,0,29,0,31 ; rlwinm 0,0,19,0,31\n\t" + +#define VALGRIND_DO_CLIENT_REQUEST_EXPR( \ + _zzq_default, _zzq_request, \ + _zzq_arg1, _zzq_arg2, _zzq_arg3, _zzq_arg4, _zzq_arg5) \ + \ + __extension__ \ + ({ unsigned int _zzq_args[6]; \ + unsigned int _zzq_result; \ + unsigned int* _zzq_ptr; \ + _zzq_args[0] = (unsigned int)(_zzq_request); \ + _zzq_args[1] = (unsigned int)(_zzq_arg1); \ + _zzq_args[2] = (unsigned int)(_zzq_arg2); \ + _zzq_args[3] = (unsigned int)(_zzq_arg3); \ + _zzq_args[4] = (unsigned int)(_zzq_arg4); \ + _zzq_args[5] = (unsigned int)(_zzq_arg5); \ + _zzq_ptr = _zzq_args; \ + __asm__ volatile("mr 3,%1\n\t" /*default*/ \ + "mr 4,%2\n\t" /*ptr*/ \ + __SPECIAL_INSTRUCTION_PREAMBLE \ + /* %R3 = client_request ( %R4 ) */ \ + "or 1,1,1\n\t" \ + "mr %0,3" /*result*/ \ + : "=b" (_zzq_result) \ + : "b" (_zzq_default), "b" (_zzq_ptr) \ + : "cc", "memory", "r3", "r4"); \ + _zzq_result; \ + }) + +#define VALGRIND_GET_NR_CONTEXT(_zzq_rlval) \ + { volatile OrigFn* _zzq_orig = &(_zzq_rlval); \ + unsigned int __addr; \ + __asm__ volatile(__SPECIAL_INSTRUCTION_PREAMBLE \ + /* %R3 = guest_NRADDR */ \ + "or 2,2,2\n\t" \ + "mr %0,3" \ + : "=b" (__addr) \ + : \ + : "cc", "memory", "r3" \ + ); \ + _zzq_orig->nraddr = __addr; \ + } + +#define VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ + __SPECIAL_INSTRUCTION_PREAMBLE \ + /* branch-and-link-to-noredir *%R11 */ \ + "or 3,3,3\n\t" + +#define VALGRIND_VEX_INJECT_IR() \ + do { \ + __asm__ volatile(__SPECIAL_INSTRUCTION_PREAMBLE \ + "or 5,5,5\n\t" \ + ); \ + } while (0) + +#endif /* PLAT_ppc32_linux */ + +/* ------------------------ ppc64-linux ------------------------ */ + +#if defined(PLAT_ppc64be_linux) + +typedef + struct { + unsigned long int nraddr; /* where's the code? */ + unsigned long int r2; /* what tocptr do we need? */ + } + OrigFn; + +#define __SPECIAL_INSTRUCTION_PREAMBLE \ + "rotldi 0,0,3 ; rotldi 0,0,13\n\t" \ + "rotldi 0,0,61 ; rotldi 0,0,51\n\t" + +#define VALGRIND_DO_CLIENT_REQUEST_EXPR( \ + _zzq_default, _zzq_request, \ + _zzq_arg1, _zzq_arg2, _zzq_arg3, _zzq_arg4, _zzq_arg5) \ + \ + __extension__ \ + ({ unsigned long int _zzq_args[6]; \ + unsigned long int _zzq_result; \ + unsigned long int* _zzq_ptr; \ + _zzq_args[0] = (unsigned long int)(_zzq_request); \ + _zzq_args[1] = (unsigned long int)(_zzq_arg1); \ + _zzq_args[2] = (unsigned long int)(_zzq_arg2); \ + _zzq_args[3] = (unsigned long int)(_zzq_arg3); \ + _zzq_args[4] = (unsigned long int)(_zzq_arg4); \ + _zzq_args[5] = (unsigned long int)(_zzq_arg5); \ + _zzq_ptr = _zzq_args; \ + __asm__ volatile("mr 3,%1\n\t" /*default*/ \ + "mr 4,%2\n\t" /*ptr*/ \ + __SPECIAL_INSTRUCTION_PREAMBLE \ + /* %R3 = client_request ( %R4 ) */ \ + "or 1,1,1\n\t" \ + "mr %0,3" /*result*/ \ + : "=b" (_zzq_result) \ + : "b" (_zzq_default), "b" (_zzq_ptr) \ + : "cc", "memory", "r3", "r4"); \ + _zzq_result; \ + }) + +#define VALGRIND_GET_NR_CONTEXT(_zzq_rlval) \ + { volatile OrigFn* _zzq_orig = &(_zzq_rlval); \ + unsigned long int __addr; \ + __asm__ volatile(__SPECIAL_INSTRUCTION_PREAMBLE \ + /* %R3 = guest_NRADDR */ \ + "or 2,2,2\n\t" \ + "mr %0,3" \ + : "=b" (__addr) \ + : \ + : "cc", "memory", "r3" \ + ); \ + _zzq_orig->nraddr = __addr; \ + __asm__ volatile(__SPECIAL_INSTRUCTION_PREAMBLE \ + /* %R3 = guest_NRADDR_GPR2 */ \ + "or 4,4,4\n\t" \ + "mr %0,3" \ + : "=b" (__addr) \ + : \ + : "cc", "memory", "r3" \ + ); \ + _zzq_orig->r2 = __addr; \ + } + +#define VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ + __SPECIAL_INSTRUCTION_PREAMBLE \ + /* branch-and-link-to-noredir *%R11 */ \ + "or 3,3,3\n\t" + +#define VALGRIND_VEX_INJECT_IR() \ + do { \ + __asm__ volatile(__SPECIAL_INSTRUCTION_PREAMBLE \ + "or 5,5,5\n\t" \ + ); \ + } while (0) + +#endif /* PLAT_ppc64be_linux */ + +#if defined(PLAT_ppc64le_linux) + +typedef + struct { + unsigned long int nraddr; /* where's the code? */ + unsigned long int r2; /* what tocptr do we need? */ + } + OrigFn; + +#define __SPECIAL_INSTRUCTION_PREAMBLE \ + "rotldi 0,0,3 ; rotldi 0,0,13\n\t" \ + "rotldi 0,0,61 ; rotldi 0,0,51\n\t" + +#define VALGRIND_DO_CLIENT_REQUEST_EXPR( \ + _zzq_default, _zzq_request, \ + _zzq_arg1, _zzq_arg2, _zzq_arg3, _zzq_arg4, _zzq_arg5) \ + \ + __extension__ \ + ({ unsigned long int _zzq_args[6]; \ + unsigned long int _zzq_result; \ + unsigned long int* _zzq_ptr; \ + _zzq_args[0] = (unsigned long int)(_zzq_request); \ + _zzq_args[1] = (unsigned long int)(_zzq_arg1); \ + _zzq_args[2] = (unsigned long int)(_zzq_arg2); \ + _zzq_args[3] = (unsigned long int)(_zzq_arg3); \ + _zzq_args[4] = (unsigned long int)(_zzq_arg4); \ + _zzq_args[5] = (unsigned long int)(_zzq_arg5); \ + _zzq_ptr = _zzq_args; \ + __asm__ volatile("mr 3,%1\n\t" /*default*/ \ + "mr 4,%2\n\t" /*ptr*/ \ + __SPECIAL_INSTRUCTION_PREAMBLE \ + /* %R3 = client_request ( %R4 ) */ \ + "or 1,1,1\n\t" \ + "mr %0,3" /*result*/ \ + : "=b" (_zzq_result) \ + : "b" (_zzq_default), "b" (_zzq_ptr) \ + : "cc", "memory", "r3", "r4"); \ + _zzq_result; \ + }) + +#define VALGRIND_GET_NR_CONTEXT(_zzq_rlval) \ + { volatile OrigFn* _zzq_orig = &(_zzq_rlval); \ + unsigned long int __addr; \ + __asm__ volatile(__SPECIAL_INSTRUCTION_PREAMBLE \ + /* %R3 = guest_NRADDR */ \ + "or 2,2,2\n\t" \ + "mr %0,3" \ + : "=b" (__addr) \ + : \ + : "cc", "memory", "r3" \ + ); \ + _zzq_orig->nraddr = __addr; \ + __asm__ volatile(__SPECIAL_INSTRUCTION_PREAMBLE \ + /* %R3 = guest_NRADDR_GPR2 */ \ + "or 4,4,4\n\t" \ + "mr %0,3" \ + : "=b" (__addr) \ + : \ + : "cc", "memory", "r3" \ + ); \ + _zzq_orig->r2 = __addr; \ + } + +#define VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R12 \ + __SPECIAL_INSTRUCTION_PREAMBLE \ + /* branch-and-link-to-noredir *%R12 */ \ + "or 3,3,3\n\t" + +#define VALGRIND_VEX_INJECT_IR() \ + do { \ + __asm__ volatile(__SPECIAL_INSTRUCTION_PREAMBLE \ + "or 5,5,5\n\t" \ + ); \ + } while (0) + +#endif /* PLAT_ppc64le_linux */ + +/* ------------------------- arm-linux ------------------------- */ + +#if defined(PLAT_arm_linux) + +typedef + struct { + unsigned int nraddr; /* where's the code? */ + } + OrigFn; + +#define __SPECIAL_INSTRUCTION_PREAMBLE \ + "mov r12, r12, ror #3 ; mov r12, r12, ror #13 \n\t" \ + "mov r12, r12, ror #29 ; mov r12, r12, ror #19 \n\t" + +#define VALGRIND_DO_CLIENT_REQUEST_EXPR( \ + _zzq_default, _zzq_request, \ + _zzq_arg1, _zzq_arg2, _zzq_arg3, _zzq_arg4, _zzq_arg5) \ + \ + __extension__ \ + ({volatile unsigned int _zzq_args[6]; \ + volatile unsigned int _zzq_result; \ + _zzq_args[0] = (unsigned int)(_zzq_request); \ + _zzq_args[1] = (unsigned int)(_zzq_arg1); \ + _zzq_args[2] = (unsigned int)(_zzq_arg2); \ + _zzq_args[3] = (unsigned int)(_zzq_arg3); \ + _zzq_args[4] = (unsigned int)(_zzq_arg4); \ + _zzq_args[5] = (unsigned int)(_zzq_arg5); \ + __asm__ volatile("mov r3, %1\n\t" /*default*/ \ + "mov r4, %2\n\t" /*ptr*/ \ + __SPECIAL_INSTRUCTION_PREAMBLE \ + /* R3 = client_request ( R4 ) */ \ + "orr r10, r10, r10\n\t" \ + "mov %0, r3" /*result*/ \ + : "=r" (_zzq_result) \ + : "r" (_zzq_default), "r" (&_zzq_args[0]) \ + : "cc","memory", "r3", "r4"); \ + _zzq_result; \ + }) + +#define VALGRIND_GET_NR_CONTEXT(_zzq_rlval) \ + { volatile OrigFn* _zzq_orig = &(_zzq_rlval); \ + unsigned int __addr; \ + __asm__ volatile(__SPECIAL_INSTRUCTION_PREAMBLE \ + /* R3 = guest_NRADDR */ \ + "orr r11, r11, r11\n\t" \ + "mov %0, r3" \ + : "=r" (__addr) \ + : \ + : "cc", "memory", "r3" \ + ); \ + _zzq_orig->nraddr = __addr; \ + } + +#define VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R4 \ + __SPECIAL_INSTRUCTION_PREAMBLE \ + /* branch-and-link-to-noredir *%R4 */ \ + "orr r12, r12, r12\n\t" + +#define VALGRIND_VEX_INJECT_IR() \ + do { \ + __asm__ volatile(__SPECIAL_INSTRUCTION_PREAMBLE \ + "orr r9, r9, r9\n\t" \ + : : : "cc", "memory" \ + ); \ + } while (0) + +#endif /* PLAT_arm_linux */ + +/* ------------------------ arm64-linux ------------------------- */ + +#if defined(PLAT_arm64_linux) + +typedef + struct { + unsigned long int nraddr; /* where's the code? */ + } + OrigFn; + +#define __SPECIAL_INSTRUCTION_PREAMBLE \ + "ror x12, x12, #3 ; ror x12, x12, #13 \n\t" \ + "ror x12, x12, #51 ; ror x12, x12, #61 \n\t" + +#define VALGRIND_DO_CLIENT_REQUEST_EXPR( \ + _zzq_default, _zzq_request, \ + _zzq_arg1, _zzq_arg2, _zzq_arg3, _zzq_arg4, _zzq_arg5) \ + \ + __extension__ \ + ({volatile unsigned long int _zzq_args[6]; \ + volatile unsigned long int _zzq_result; \ + _zzq_args[0] = (unsigned long int)(_zzq_request); \ + _zzq_args[1] = (unsigned long int)(_zzq_arg1); \ + _zzq_args[2] = (unsigned long int)(_zzq_arg2); \ + _zzq_args[3] = (unsigned long int)(_zzq_arg3); \ + _zzq_args[4] = (unsigned long int)(_zzq_arg4); \ + _zzq_args[5] = (unsigned long int)(_zzq_arg5); \ + __asm__ volatile("mov x3, %1\n\t" /*default*/ \ + "mov x4, %2\n\t" /*ptr*/ \ + __SPECIAL_INSTRUCTION_PREAMBLE \ + /* X3 = client_request ( X4 ) */ \ + "orr x10, x10, x10\n\t" \ + "mov %0, x3" /*result*/ \ + : "=r" (_zzq_result) \ + : "r" ((unsigned long int)(_zzq_default)), \ + "r" (&_zzq_args[0]) \ + : "cc","memory", "x3", "x4"); \ + _zzq_result; \ + }) + +#define VALGRIND_GET_NR_CONTEXT(_zzq_rlval) \ + { volatile OrigFn* _zzq_orig = &(_zzq_rlval); \ + unsigned long int __addr; \ + __asm__ volatile(__SPECIAL_INSTRUCTION_PREAMBLE \ + /* X3 = guest_NRADDR */ \ + "orr x11, x11, x11\n\t" \ + "mov %0, x3" \ + : "=r" (__addr) \ + : \ + : "cc", "memory", "x3" \ + ); \ + _zzq_orig->nraddr = __addr; \ + } + +#define VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_X8 \ + __SPECIAL_INSTRUCTION_PREAMBLE \ + /* branch-and-link-to-noredir X8 */ \ + "orr x12, x12, x12\n\t" + +#define VALGRIND_VEX_INJECT_IR() \ + do { \ + __asm__ volatile(__SPECIAL_INSTRUCTION_PREAMBLE \ + "orr x9, x9, x9\n\t" \ + : : : "cc", "memory" \ + ); \ + } while (0) + +#endif /* PLAT_arm64_linux */ + +/* ------------------------ s390x-linux ------------------------ */ + +#if defined(PLAT_s390x_linux) + +typedef + struct { + unsigned long int nraddr; /* where's the code? */ + } + OrigFn; + +/* __SPECIAL_INSTRUCTION_PREAMBLE will be used to identify Valgrind specific + * code. This detection is implemented in platform specific toIR.c + * (e.g. VEX/priv/guest_s390_decoder.c). + */ +#define __SPECIAL_INSTRUCTION_PREAMBLE \ + "lr 15,15\n\t" \ + "lr 1,1\n\t" \ + "lr 2,2\n\t" \ + "lr 3,3\n\t" + +#define __CLIENT_REQUEST_CODE "lr 2,2\n\t" +#define __GET_NR_CONTEXT_CODE "lr 3,3\n\t" +#define __CALL_NO_REDIR_CODE "lr 4,4\n\t" +#define __VEX_INJECT_IR_CODE "lr 5,5\n\t" + +#define VALGRIND_DO_CLIENT_REQUEST_EXPR( \ + _zzq_default, _zzq_request, \ + _zzq_arg1, _zzq_arg2, _zzq_arg3, _zzq_arg4, _zzq_arg5) \ + __extension__ \ + ({volatile unsigned long int _zzq_args[6]; \ + volatile unsigned long int _zzq_result; \ + _zzq_args[0] = (unsigned long int)(_zzq_request); \ + _zzq_args[1] = (unsigned long int)(_zzq_arg1); \ + _zzq_args[2] = (unsigned long int)(_zzq_arg2); \ + _zzq_args[3] = (unsigned long int)(_zzq_arg3); \ + _zzq_args[4] = (unsigned long int)(_zzq_arg4); \ + _zzq_args[5] = (unsigned long int)(_zzq_arg5); \ + __asm__ volatile(/* r2 = args */ \ + "lgr 2,%1\n\t" \ + /* r3 = default */ \ + "lgr 3,%2\n\t" \ + __SPECIAL_INSTRUCTION_PREAMBLE \ + __CLIENT_REQUEST_CODE \ + /* results = r3 */ \ + "lgr %0, 3\n\t" \ + : "=d" (_zzq_result) \ + : "a" (&_zzq_args[0]), "0" (_zzq_default) \ + : "cc", "2", "3", "memory" \ + ); \ + _zzq_result; \ + }) + +#define VALGRIND_GET_NR_CONTEXT(_zzq_rlval) \ + { volatile OrigFn* _zzq_orig = &(_zzq_rlval); \ + volatile unsigned long int __addr; \ + __asm__ volatile(__SPECIAL_INSTRUCTION_PREAMBLE \ + __GET_NR_CONTEXT_CODE \ + "lgr %0, 3\n\t" \ + : "=a" (__addr) \ + : \ + : "cc", "3", "memory" \ + ); \ + _zzq_orig->nraddr = __addr; \ + } + +#define VALGRIND_CALL_NOREDIR_R1 \ + __SPECIAL_INSTRUCTION_PREAMBLE \ + __CALL_NO_REDIR_CODE + +#define VALGRIND_VEX_INJECT_IR() \ + do { \ + __asm__ volatile(__SPECIAL_INSTRUCTION_PREAMBLE \ + __VEX_INJECT_IR_CODE); \ + } while (0) + +#endif /* PLAT_s390x_linux */ + +/* ------------------------- mips32-linux ---------------- */ + +#if defined(PLAT_mips32_linux) + +typedef + struct { + unsigned int nraddr; /* where's the code? */ + } + OrigFn; + +/* .word 0x342 + * .word 0x742 + * .word 0xC2 + * .word 0x4C2*/ +#define __SPECIAL_INSTRUCTION_PREAMBLE \ + "srl $0, $0, 13\n\t" \ + "srl $0, $0, 29\n\t" \ + "srl $0, $0, 3\n\t" \ + "srl $0, $0, 19\n\t" + +#define VALGRIND_DO_CLIENT_REQUEST_EXPR( \ + _zzq_default, _zzq_request, \ + _zzq_arg1, _zzq_arg2, _zzq_arg3, _zzq_arg4, _zzq_arg5) \ + __extension__ \ + ({ volatile unsigned int _zzq_args[6]; \ + volatile unsigned int _zzq_result; \ + _zzq_args[0] = (unsigned int)(_zzq_request); \ + _zzq_args[1] = (unsigned int)(_zzq_arg1); \ + _zzq_args[2] = (unsigned int)(_zzq_arg2); \ + _zzq_args[3] = (unsigned int)(_zzq_arg3); \ + _zzq_args[4] = (unsigned int)(_zzq_arg4); \ + _zzq_args[5] = (unsigned int)(_zzq_arg5); \ + __asm__ volatile("move $11, %1\n\t" /*default*/ \ + "move $12, %2\n\t" /*ptr*/ \ + __SPECIAL_INSTRUCTION_PREAMBLE \ + /* T3 = client_request ( T4 ) */ \ + "or $13, $13, $13\n\t" \ + "move %0, $11\n\t" /*result*/ \ + : "=r" (_zzq_result) \ + : "r" (_zzq_default), "r" (&_zzq_args[0]) \ + : "$11", "$12", "memory"); \ + _zzq_result; \ + }) + +#define VALGRIND_GET_NR_CONTEXT(_zzq_rlval) \ + { volatile OrigFn* _zzq_orig = &(_zzq_rlval); \ + volatile unsigned int __addr; \ + __asm__ volatile(__SPECIAL_INSTRUCTION_PREAMBLE \ + /* %t9 = guest_NRADDR */ \ + "or $14, $14, $14\n\t" \ + "move %0, $11" /*result*/ \ + : "=r" (__addr) \ + : \ + : "$11" \ + ); \ + _zzq_orig->nraddr = __addr; \ + } + +#define VALGRIND_CALL_NOREDIR_T9 \ + __SPECIAL_INSTRUCTION_PREAMBLE \ + /* call-noredir *%t9 */ \ + "or $15, $15, $15\n\t" + +#define VALGRIND_VEX_INJECT_IR() \ + do { \ + __asm__ volatile(__SPECIAL_INSTRUCTION_PREAMBLE \ + "or $11, $11, $11\n\t" \ + ); \ + } while (0) + + +#endif /* PLAT_mips32_linux */ + +/* ------------------------- mips64-linux ---------------- */ + +#if defined(PLAT_mips64_linux) + +typedef + struct { + unsigned long nraddr; /* where's the code? */ + } + OrigFn; + +/* dsll $0,$0, 3 + * dsll $0,$0, 13 + * dsll $0,$0, 29 + * dsll $0,$0, 19*/ +#define __SPECIAL_INSTRUCTION_PREAMBLE \ + "dsll $0,$0, 3 ; dsll $0,$0,13\n\t" \ + "dsll $0,$0,29 ; dsll $0,$0,19\n\t" + +#define VALGRIND_DO_CLIENT_REQUEST_EXPR( \ + _zzq_default, _zzq_request, \ + _zzq_arg1, _zzq_arg2, _zzq_arg3, _zzq_arg4, _zzq_arg5) \ + __extension__ \ + ({ volatile unsigned long int _zzq_args[6]; \ + volatile unsigned long int _zzq_result; \ + _zzq_args[0] = (unsigned long int)(_zzq_request); \ + _zzq_args[1] = (unsigned long int)(_zzq_arg1); \ + _zzq_args[2] = (unsigned long int)(_zzq_arg2); \ + _zzq_args[3] = (unsigned long int)(_zzq_arg3); \ + _zzq_args[4] = (unsigned long int)(_zzq_arg4); \ + _zzq_args[5] = (unsigned long int)(_zzq_arg5); \ + __asm__ volatile("move $11, %1\n\t" /*default*/ \ + "move $12, %2\n\t" /*ptr*/ \ + __SPECIAL_INSTRUCTION_PREAMBLE \ + /* $11 = client_request ( $12 ) */ \ + "or $13, $13, $13\n\t" \ + "move %0, $11\n\t" /*result*/ \ + : "=r" (_zzq_result) \ + : "r" (_zzq_default), "r" (&_zzq_args[0]) \ + : "$11", "$12", "memory"); \ + _zzq_result; \ + }) + +#define VALGRIND_GET_NR_CONTEXT(_zzq_rlval) \ + { volatile OrigFn* _zzq_orig = &(_zzq_rlval); \ + volatile unsigned long int __addr; \ + __asm__ volatile(__SPECIAL_INSTRUCTION_PREAMBLE \ + /* $11 = guest_NRADDR */ \ + "or $14, $14, $14\n\t" \ + "move %0, $11" /*result*/ \ + : "=r" (__addr) \ + : \ + : "$11"); \ + _zzq_orig->nraddr = __addr; \ + } + +#define VALGRIND_CALL_NOREDIR_T9 \ + __SPECIAL_INSTRUCTION_PREAMBLE \ + /* call-noredir $25 */ \ + "or $15, $15, $15\n\t" + +#define VALGRIND_VEX_INJECT_IR() \ + do { \ + __asm__ volatile(__SPECIAL_INSTRUCTION_PREAMBLE \ + "or $11, $11, $11\n\t" \ + ); \ + } while (0) + +#endif /* PLAT_mips64_linux */ + +/* Insert assembly code for other platforms here... */ + +#endif /* NVALGRIND */ + + +/* ------------------------------------------------------------------ */ +/* PLATFORM SPECIFICS for FUNCTION WRAPPING. This is all very */ +/* ugly. It's the least-worst tradeoff I can think of. */ +/* ------------------------------------------------------------------ */ + +/* This section defines magic (a.k.a appalling-hack) macros for doing + guaranteed-no-redirection macros, so as to get from function + wrappers to the functions they are wrapping. The whole point is to + construct standard call sequences, but to do the call itself with a + special no-redirect call pseudo-instruction that the JIT + understands and handles specially. This section is long and + repetitious, and I can't see a way to make it shorter. + + The naming scheme is as follows: + + CALL_FN_{W,v}_{v,W,WW,WWW,WWWW,5W,6W,7W,etc} + + 'W' stands for "word" and 'v' for "void". Hence there are + different macros for calling arity 0, 1, 2, 3, 4, etc, functions, + and for each, the possibility of returning a word-typed result, or + no result. +*/ + +/* Use these to write the name of your wrapper. NOTE: duplicates + VG_WRAP_FUNCTION_Z{U,Z} in pub_tool_redir.h. NOTE also: inserts + the default behaviour equivalance class tag "0000" into the name. + See pub_tool_redir.h for details -- normally you don't need to + think about this, though. */ + +/* Use an extra level of macroisation so as to ensure the soname/fnname + args are fully macro-expanded before pasting them together. */ +#define VG_CONCAT4(_aa,_bb,_cc,_dd) _aa##_bb##_cc##_dd + +#define I_WRAP_SONAME_FNNAME_ZU(soname,fnname) \ + VG_CONCAT4(_vgw00000ZU_,soname,_,fnname) + +#define I_WRAP_SONAME_FNNAME_ZZ(soname,fnname) \ + VG_CONCAT4(_vgw00000ZZ_,soname,_,fnname) + +/* Use this macro from within a wrapper function to collect the + context (address and possibly other info) of the original function. + Once you have that you can then use it in one of the CALL_FN_ + macros. The type of the argument _lval is OrigFn. */ +#define VALGRIND_GET_ORIG_FN(_lval) VALGRIND_GET_NR_CONTEXT(_lval) + +/* Also provide end-user facilities for function replacement, rather + than wrapping. A replacement function differs from a wrapper in + that it has no way to get hold of the original function being + called, and hence no way to call onwards to it. In a replacement + function, VALGRIND_GET_ORIG_FN always returns zero. */ + +#define I_REPLACE_SONAME_FNNAME_ZU(soname,fnname) \ + VG_CONCAT4(_vgr00000ZU_,soname,_,fnname) + +#define I_REPLACE_SONAME_FNNAME_ZZ(soname,fnname) \ + VG_CONCAT4(_vgr00000ZZ_,soname,_,fnname) + +/* Derivatives of the main macros below, for calling functions + returning void. */ + +#define CALL_FN_v_v(fnptr) \ + do { volatile unsigned long _junk; \ + CALL_FN_W_v(_junk,fnptr); } while (0) + +#define CALL_FN_v_W(fnptr, arg1) \ + do { volatile unsigned long _junk; \ + CALL_FN_W_W(_junk,fnptr,arg1); } while (0) + +#define CALL_FN_v_WW(fnptr, arg1,arg2) \ + do { volatile unsigned long _junk; \ + CALL_FN_W_WW(_junk,fnptr,arg1,arg2); } while (0) + +#define CALL_FN_v_WWW(fnptr, arg1,arg2,arg3) \ + do { volatile unsigned long _junk; \ + CALL_FN_W_WWW(_junk,fnptr,arg1,arg2,arg3); } while (0) + +#define CALL_FN_v_WWWW(fnptr, arg1,arg2,arg3,arg4) \ + do { volatile unsigned long _junk; \ + CALL_FN_W_WWWW(_junk,fnptr,arg1,arg2,arg3,arg4); } while (0) + +#define CALL_FN_v_5W(fnptr, arg1,arg2,arg3,arg4,arg5) \ + do { volatile unsigned long _junk; \ + CALL_FN_W_5W(_junk,fnptr,arg1,arg2,arg3,arg4,arg5); } while (0) + +#define CALL_FN_v_6W(fnptr, arg1,arg2,arg3,arg4,arg5,arg6) \ + do { volatile unsigned long _junk; \ + CALL_FN_W_6W(_junk,fnptr,arg1,arg2,arg3,arg4,arg5,arg6); } while (0) + +#define CALL_FN_v_7W(fnptr, arg1,arg2,arg3,arg4,arg5,arg6,arg7) \ + do { volatile unsigned long _junk; \ + CALL_FN_W_7W(_junk,fnptr,arg1,arg2,arg3,arg4,arg5,arg6,arg7); } while (0) + +/* ----------------- x86-{linux,darwin,solaris} ---------------- */ + +#if defined(PLAT_x86_linux) || defined(PLAT_x86_darwin) \ + || defined(PLAT_x86_solaris) + +/* These regs are trashed by the hidden call. No need to mention eax + as gcc can already see that, plus causes gcc to bomb. */ +#define __CALLER_SAVED_REGS /*"eax"*/ "ecx", "edx" + +/* Macros to save and align the stack before making a function + call and restore it afterwards as gcc may not keep the stack + pointer aligned if it doesn't realise calls are being made + to other functions. */ + +#define VALGRIND_ALIGN_STACK \ + "movl %%esp,%%edi\n\t" \ + "andl $0xfffffff0,%%esp\n\t" +#define VALGRIND_RESTORE_STACK \ + "movl %%edi,%%esp\n\t" + +/* These CALL_FN_ macros assume that on x86-linux, sizeof(unsigned + long) == 4. */ + +#define CALL_FN_W_v(lval, orig) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[1]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + __asm__ volatile( \ + VALGRIND_ALIGN_STACK \ + "movl (%%eax), %%eax\n\t" /* target->%eax */ \ + VALGRIND_CALL_NOREDIR_EAX \ + VALGRIND_RESTORE_STACK \ + : /*out*/ "=a" (_res) \ + : /*in*/ "a" (&_argvec[0]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS, "edi" \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_W(lval, orig, arg1) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[2]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + _argvec[1] = (unsigned long)(arg1); \ + __asm__ volatile( \ + VALGRIND_ALIGN_STACK \ + "subl $12, %%esp\n\t" \ + "pushl 4(%%eax)\n\t" \ + "movl (%%eax), %%eax\n\t" /* target->%eax */ \ + VALGRIND_CALL_NOREDIR_EAX \ + VALGRIND_RESTORE_STACK \ + : /*out*/ "=a" (_res) \ + : /*in*/ "a" (&_argvec[0]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS, "edi" \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_WW(lval, orig, arg1,arg2) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[3]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + _argvec[1] = (unsigned long)(arg1); \ + _argvec[2] = (unsigned long)(arg2); \ + __asm__ volatile( \ + VALGRIND_ALIGN_STACK \ + "subl $8, %%esp\n\t" \ + "pushl 8(%%eax)\n\t" \ + "pushl 4(%%eax)\n\t" \ + "movl (%%eax), %%eax\n\t" /* target->%eax */ \ + VALGRIND_CALL_NOREDIR_EAX \ + VALGRIND_RESTORE_STACK \ + : /*out*/ "=a" (_res) \ + : /*in*/ "a" (&_argvec[0]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS, "edi" \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_WWW(lval, orig, arg1,arg2,arg3) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[4]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + _argvec[1] = (unsigned long)(arg1); \ + _argvec[2] = (unsigned long)(arg2); \ + _argvec[3] = (unsigned long)(arg3); \ + __asm__ volatile( \ + VALGRIND_ALIGN_STACK \ + "subl $4, %%esp\n\t" \ + "pushl 12(%%eax)\n\t" \ + "pushl 8(%%eax)\n\t" \ + "pushl 4(%%eax)\n\t" \ + "movl (%%eax), %%eax\n\t" /* target->%eax */ \ + VALGRIND_CALL_NOREDIR_EAX \ + VALGRIND_RESTORE_STACK \ + : /*out*/ "=a" (_res) \ + : /*in*/ "a" (&_argvec[0]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS, "edi" \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_WWWW(lval, orig, arg1,arg2,arg3,arg4) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[5]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + _argvec[1] = (unsigned long)(arg1); \ + _argvec[2] = (unsigned long)(arg2); \ + _argvec[3] = (unsigned long)(arg3); \ + _argvec[4] = (unsigned long)(arg4); \ + __asm__ volatile( \ + VALGRIND_ALIGN_STACK \ + "pushl 16(%%eax)\n\t" \ + "pushl 12(%%eax)\n\t" \ + "pushl 8(%%eax)\n\t" \ + "pushl 4(%%eax)\n\t" \ + "movl (%%eax), %%eax\n\t" /* target->%eax */ \ + VALGRIND_CALL_NOREDIR_EAX \ + VALGRIND_RESTORE_STACK \ + : /*out*/ "=a" (_res) \ + : /*in*/ "a" (&_argvec[0]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS, "edi" \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_5W(lval, orig, arg1,arg2,arg3,arg4,arg5) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[6]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + _argvec[1] = (unsigned long)(arg1); \ + _argvec[2] = (unsigned long)(arg2); \ + _argvec[3] = (unsigned long)(arg3); \ + _argvec[4] = (unsigned long)(arg4); \ + _argvec[5] = (unsigned long)(arg5); \ + __asm__ volatile( \ + VALGRIND_ALIGN_STACK \ + "subl $12, %%esp\n\t" \ + "pushl 20(%%eax)\n\t" \ + "pushl 16(%%eax)\n\t" \ + "pushl 12(%%eax)\n\t" \ + "pushl 8(%%eax)\n\t" \ + "pushl 4(%%eax)\n\t" \ + "movl (%%eax), %%eax\n\t" /* target->%eax */ \ + VALGRIND_CALL_NOREDIR_EAX \ + VALGRIND_RESTORE_STACK \ + : /*out*/ "=a" (_res) \ + : /*in*/ "a" (&_argvec[0]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS, "edi" \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_6W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[7]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + _argvec[1] = (unsigned long)(arg1); \ + _argvec[2] = (unsigned long)(arg2); \ + _argvec[3] = (unsigned long)(arg3); \ + _argvec[4] = (unsigned long)(arg4); \ + _argvec[5] = (unsigned long)(arg5); \ + _argvec[6] = (unsigned long)(arg6); \ + __asm__ volatile( \ + VALGRIND_ALIGN_STACK \ + "subl $8, %%esp\n\t" \ + "pushl 24(%%eax)\n\t" \ + "pushl 20(%%eax)\n\t" \ + "pushl 16(%%eax)\n\t" \ + "pushl 12(%%eax)\n\t" \ + "pushl 8(%%eax)\n\t" \ + "pushl 4(%%eax)\n\t" \ + "movl (%%eax), %%eax\n\t" /* target->%eax */ \ + VALGRIND_CALL_NOREDIR_EAX \ + VALGRIND_RESTORE_STACK \ + : /*out*/ "=a" (_res) \ + : /*in*/ "a" (&_argvec[0]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS, "edi" \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_7W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \ + arg7) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[8]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + _argvec[1] = (unsigned long)(arg1); \ + _argvec[2] = (unsigned long)(arg2); \ + _argvec[3] = (unsigned long)(arg3); \ + _argvec[4] = (unsigned long)(arg4); \ + _argvec[5] = (unsigned long)(arg5); \ + _argvec[6] = (unsigned long)(arg6); \ + _argvec[7] = (unsigned long)(arg7); \ + __asm__ volatile( \ + VALGRIND_ALIGN_STACK \ + "subl $4, %%esp\n\t" \ + "pushl 28(%%eax)\n\t" \ + "pushl 24(%%eax)\n\t" \ + "pushl 20(%%eax)\n\t" \ + "pushl 16(%%eax)\n\t" \ + "pushl 12(%%eax)\n\t" \ + "pushl 8(%%eax)\n\t" \ + "pushl 4(%%eax)\n\t" \ + "movl (%%eax), %%eax\n\t" /* target->%eax */ \ + VALGRIND_CALL_NOREDIR_EAX \ + VALGRIND_RESTORE_STACK \ + : /*out*/ "=a" (_res) \ + : /*in*/ "a" (&_argvec[0]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS, "edi" \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_8W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \ + arg7,arg8) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[9]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + _argvec[1] = (unsigned long)(arg1); \ + _argvec[2] = (unsigned long)(arg2); \ + _argvec[3] = (unsigned long)(arg3); \ + _argvec[4] = (unsigned long)(arg4); \ + _argvec[5] = (unsigned long)(arg5); \ + _argvec[6] = (unsigned long)(arg6); \ + _argvec[7] = (unsigned long)(arg7); \ + _argvec[8] = (unsigned long)(arg8); \ + __asm__ volatile( \ + VALGRIND_ALIGN_STACK \ + "pushl 32(%%eax)\n\t" \ + "pushl 28(%%eax)\n\t" \ + "pushl 24(%%eax)\n\t" \ + "pushl 20(%%eax)\n\t" \ + "pushl 16(%%eax)\n\t" \ + "pushl 12(%%eax)\n\t" \ + "pushl 8(%%eax)\n\t" \ + "pushl 4(%%eax)\n\t" \ + "movl (%%eax), %%eax\n\t" /* target->%eax */ \ + VALGRIND_CALL_NOREDIR_EAX \ + VALGRIND_RESTORE_STACK \ + : /*out*/ "=a" (_res) \ + : /*in*/ "a" (&_argvec[0]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS, "edi" \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_9W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \ + arg7,arg8,arg9) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[10]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + _argvec[1] = (unsigned long)(arg1); \ + _argvec[2] = (unsigned long)(arg2); \ + _argvec[3] = (unsigned long)(arg3); \ + _argvec[4] = (unsigned long)(arg4); \ + _argvec[5] = (unsigned long)(arg5); \ + _argvec[6] = (unsigned long)(arg6); \ + _argvec[7] = (unsigned long)(arg7); \ + _argvec[8] = (unsigned long)(arg8); \ + _argvec[9] = (unsigned long)(arg9); \ + __asm__ volatile( \ + VALGRIND_ALIGN_STACK \ + "subl $12, %%esp\n\t" \ + "pushl 36(%%eax)\n\t" \ + "pushl 32(%%eax)\n\t" \ + "pushl 28(%%eax)\n\t" \ + "pushl 24(%%eax)\n\t" \ + "pushl 20(%%eax)\n\t" \ + "pushl 16(%%eax)\n\t" \ + "pushl 12(%%eax)\n\t" \ + "pushl 8(%%eax)\n\t" \ + "pushl 4(%%eax)\n\t" \ + "movl (%%eax), %%eax\n\t" /* target->%eax */ \ + VALGRIND_CALL_NOREDIR_EAX \ + VALGRIND_RESTORE_STACK \ + : /*out*/ "=a" (_res) \ + : /*in*/ "a" (&_argvec[0]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS, "edi" \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_10W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \ + arg7,arg8,arg9,arg10) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[11]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + _argvec[1] = (unsigned long)(arg1); \ + _argvec[2] = (unsigned long)(arg2); \ + _argvec[3] = (unsigned long)(arg3); \ + _argvec[4] = (unsigned long)(arg4); \ + _argvec[5] = (unsigned long)(arg5); \ + _argvec[6] = (unsigned long)(arg6); \ + _argvec[7] = (unsigned long)(arg7); \ + _argvec[8] = (unsigned long)(arg8); \ + _argvec[9] = (unsigned long)(arg9); \ + _argvec[10] = (unsigned long)(arg10); \ + __asm__ volatile( \ + VALGRIND_ALIGN_STACK \ + "subl $8, %%esp\n\t" \ + "pushl 40(%%eax)\n\t" \ + "pushl 36(%%eax)\n\t" \ + "pushl 32(%%eax)\n\t" \ + "pushl 28(%%eax)\n\t" \ + "pushl 24(%%eax)\n\t" \ + "pushl 20(%%eax)\n\t" \ + "pushl 16(%%eax)\n\t" \ + "pushl 12(%%eax)\n\t" \ + "pushl 8(%%eax)\n\t" \ + "pushl 4(%%eax)\n\t" \ + "movl (%%eax), %%eax\n\t" /* target->%eax */ \ + VALGRIND_CALL_NOREDIR_EAX \ + VALGRIND_RESTORE_STACK \ + : /*out*/ "=a" (_res) \ + : /*in*/ "a" (&_argvec[0]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS, "edi" \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_11W(lval, orig, arg1,arg2,arg3,arg4,arg5, \ + arg6,arg7,arg8,arg9,arg10, \ + arg11) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[12]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + _argvec[1] = (unsigned long)(arg1); \ + _argvec[2] = (unsigned long)(arg2); \ + _argvec[3] = (unsigned long)(arg3); \ + _argvec[4] = (unsigned long)(arg4); \ + _argvec[5] = (unsigned long)(arg5); \ + _argvec[6] = (unsigned long)(arg6); \ + _argvec[7] = (unsigned long)(arg7); \ + _argvec[8] = (unsigned long)(arg8); \ + _argvec[9] = (unsigned long)(arg9); \ + _argvec[10] = (unsigned long)(arg10); \ + _argvec[11] = (unsigned long)(arg11); \ + __asm__ volatile( \ + VALGRIND_ALIGN_STACK \ + "subl $4, %%esp\n\t" \ + "pushl 44(%%eax)\n\t" \ + "pushl 40(%%eax)\n\t" \ + "pushl 36(%%eax)\n\t" \ + "pushl 32(%%eax)\n\t" \ + "pushl 28(%%eax)\n\t" \ + "pushl 24(%%eax)\n\t" \ + "pushl 20(%%eax)\n\t" \ + "pushl 16(%%eax)\n\t" \ + "pushl 12(%%eax)\n\t" \ + "pushl 8(%%eax)\n\t" \ + "pushl 4(%%eax)\n\t" \ + "movl (%%eax), %%eax\n\t" /* target->%eax */ \ + VALGRIND_CALL_NOREDIR_EAX \ + VALGRIND_RESTORE_STACK \ + : /*out*/ "=a" (_res) \ + : /*in*/ "a" (&_argvec[0]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS, "edi" \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_12W(lval, orig, arg1,arg2,arg3,arg4,arg5, \ + arg6,arg7,arg8,arg9,arg10, \ + arg11,arg12) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[13]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + _argvec[1] = (unsigned long)(arg1); \ + _argvec[2] = (unsigned long)(arg2); \ + _argvec[3] = (unsigned long)(arg3); \ + _argvec[4] = (unsigned long)(arg4); \ + _argvec[5] = (unsigned long)(arg5); \ + _argvec[6] = (unsigned long)(arg6); \ + _argvec[7] = (unsigned long)(arg7); \ + _argvec[8] = (unsigned long)(arg8); \ + _argvec[9] = (unsigned long)(arg9); \ + _argvec[10] = (unsigned long)(arg10); \ + _argvec[11] = (unsigned long)(arg11); \ + _argvec[12] = (unsigned long)(arg12); \ + __asm__ volatile( \ + VALGRIND_ALIGN_STACK \ + "pushl 48(%%eax)\n\t" \ + "pushl 44(%%eax)\n\t" \ + "pushl 40(%%eax)\n\t" \ + "pushl 36(%%eax)\n\t" \ + "pushl 32(%%eax)\n\t" \ + "pushl 28(%%eax)\n\t" \ + "pushl 24(%%eax)\n\t" \ + "pushl 20(%%eax)\n\t" \ + "pushl 16(%%eax)\n\t" \ + "pushl 12(%%eax)\n\t" \ + "pushl 8(%%eax)\n\t" \ + "pushl 4(%%eax)\n\t" \ + "movl (%%eax), %%eax\n\t" /* target->%eax */ \ + VALGRIND_CALL_NOREDIR_EAX \ + VALGRIND_RESTORE_STACK \ + : /*out*/ "=a" (_res) \ + : /*in*/ "a" (&_argvec[0]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS, "edi" \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#endif /* PLAT_x86_linux || PLAT_x86_darwin || PLAT_x86_solaris */ + +/* ---------------- amd64-{linux,darwin,solaris} --------------- */ + +#if defined(PLAT_amd64_linux) || defined(PLAT_amd64_darwin) \ + || defined(PLAT_amd64_solaris) + +/* ARGREGS: rdi rsi rdx rcx r8 r9 (the rest on stack in R-to-L order) */ + +/* These regs are trashed by the hidden call. */ +#define __CALLER_SAVED_REGS /*"rax",*/ "rcx", "rdx", "rsi", \ + "rdi", "r8", "r9", "r10", "r11" + +/* This is all pretty complex. It's so as to make stack unwinding + work reliably. See bug 243270. The basic problem is the sub and + add of 128 of %rsp in all of the following macros. If gcc believes + the CFA is in %rsp, then unwinding may fail, because what's at the + CFA is not what gcc "expected" when it constructs the CFIs for the + places where the macros are instantiated. + + But we can't just add a CFI annotation to increase the CFA offset + by 128, to match the sub of 128 from %rsp, because we don't know + whether gcc has chosen %rsp as the CFA at that point, or whether it + has chosen some other register (eg, %rbp). In the latter case, + adding a CFI annotation to change the CFA offset is simply wrong. + + So the solution is to get hold of the CFA using + __builtin_dwarf_cfa(), put it in a known register, and add a + CFI annotation to say what the register is. We choose %rbp for + this (perhaps perversely), because: + + (1) %rbp is already subject to unwinding. If a new register was + chosen then the unwinder would have to unwind it in all stack + traces, which is expensive, and + + (2) %rbp is already subject to precise exception updates in the + JIT. If a new register was chosen, we'd have to have precise + exceptions for it too, which reduces performance of the + generated code. + + However .. one extra complication. We can't just whack the result + of __builtin_dwarf_cfa() into %rbp and then add %rbp to the + list of trashed registers at the end of the inline assembly + fragments; gcc won't allow %rbp to appear in that list. Hence + instead we need to stash %rbp in %r15 for the duration of the asm, + and say that %r15 is trashed instead. gcc seems happy to go with + that. + + Oh .. and this all needs to be conditionalised so that it is + unchanged from before this commit, when compiled with older gccs + that don't support __builtin_dwarf_cfa. Furthermore, since + this header file is freestanding, it has to be independent of + config.h, and so the following conditionalisation cannot depend on + configure time checks. + + Although it's not clear from + 'defined(__GNUC__) && defined(__GCC_HAVE_DWARF2_CFI_ASM)', + this expression excludes Darwin. + .cfi directives in Darwin assembly appear to be completely + different and I haven't investigated how they work. + + For even more entertainment value, note we have to use the + completely undocumented __builtin_dwarf_cfa(), which appears to + really compute the CFA, whereas __builtin_frame_address(0) claims + to but actually doesn't. See + https://bugs.kde.org/show_bug.cgi?id=243270#c47 +*/ +#if defined(__GNUC__) && defined(__GCC_HAVE_DWARF2_CFI_ASM) +# define __FRAME_POINTER \ + ,"r"(__builtin_dwarf_cfa()) +# define VALGRIND_CFI_PROLOGUE \ + "movq %%rbp, %%r15\n\t" \ + "movq %2, %%rbp\n\t" \ + ".cfi_remember_state\n\t" \ + ".cfi_def_cfa rbp, 0\n\t" +# define VALGRIND_CFI_EPILOGUE \ + "movq %%r15, %%rbp\n\t" \ + ".cfi_restore_state\n\t" +#else +# define __FRAME_POINTER +# define VALGRIND_CFI_PROLOGUE +# define VALGRIND_CFI_EPILOGUE +#endif + +/* Macros to save and align the stack before making a function + call and restore it afterwards as gcc may not keep the stack + pointer aligned if it doesn't realise calls are being made + to other functions. */ + +#define VALGRIND_ALIGN_STACK \ + "movq %%rsp,%%r14\n\t" \ + "andq $0xfffffffffffffff0,%%rsp\n\t" +#define VALGRIND_RESTORE_STACK \ + "movq %%r14,%%rsp\n\t" + +/* These CALL_FN_ macros assume that on amd64-linux, sizeof(unsigned + long) == 8. */ + +/* NB 9 Sept 07. There is a nasty kludge here in all these CALL_FN_ + macros. In order not to trash the stack redzone, we need to drop + %rsp by 128 before the hidden call, and restore afterwards. The + nastyness is that it is only by luck that the stack still appears + to be unwindable during the hidden call - since then the behaviour + of any routine using this macro does not match what the CFI data + says. Sigh. + + Why is this important? Imagine that a wrapper has a stack + allocated local, and passes to the hidden call, a pointer to it. + Because gcc does not know about the hidden call, it may allocate + that local in the redzone. Unfortunately the hidden call may then + trash it before it comes to use it. So we must step clear of the + redzone, for the duration of the hidden call, to make it safe. + + Probably the same problem afflicts the other redzone-style ABIs too + (ppc64-linux); but for those, the stack is + self describing (none of this CFI nonsense) so at least messing + with the stack pointer doesn't give a danger of non-unwindable + stack. */ + +#define CALL_FN_W_v(lval, orig) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[1]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + __asm__ volatile( \ + VALGRIND_CFI_PROLOGUE \ + VALGRIND_ALIGN_STACK \ + "subq $128,%%rsp\n\t" \ + "movq (%%rax), %%rax\n\t" /* target->%rax */ \ + VALGRIND_CALL_NOREDIR_RAX \ + VALGRIND_RESTORE_STACK \ + VALGRIND_CFI_EPILOGUE \ + : /*out*/ "=a" (_res) \ + : /*in*/ "a" (&_argvec[0]) __FRAME_POINTER \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS, "r14", "r15" \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_W(lval, orig, arg1) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[2]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + _argvec[1] = (unsigned long)(arg1); \ + __asm__ volatile( \ + VALGRIND_CFI_PROLOGUE \ + VALGRIND_ALIGN_STACK \ + "subq $128,%%rsp\n\t" \ + "movq 8(%%rax), %%rdi\n\t" \ + "movq (%%rax), %%rax\n\t" /* target->%rax */ \ + VALGRIND_CALL_NOREDIR_RAX \ + VALGRIND_RESTORE_STACK \ + VALGRIND_CFI_EPILOGUE \ + : /*out*/ "=a" (_res) \ + : /*in*/ "a" (&_argvec[0]) __FRAME_POINTER \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS, "r14", "r15" \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_WW(lval, orig, arg1,arg2) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[3]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + _argvec[1] = (unsigned long)(arg1); \ + _argvec[2] = (unsigned long)(arg2); \ + __asm__ volatile( \ + VALGRIND_CFI_PROLOGUE \ + VALGRIND_ALIGN_STACK \ + "subq $128,%%rsp\n\t" \ + "movq 16(%%rax), %%rsi\n\t" \ + "movq 8(%%rax), %%rdi\n\t" \ + "movq (%%rax), %%rax\n\t" /* target->%rax */ \ + VALGRIND_CALL_NOREDIR_RAX \ + VALGRIND_RESTORE_STACK \ + VALGRIND_CFI_EPILOGUE \ + : /*out*/ "=a" (_res) \ + : /*in*/ "a" (&_argvec[0]) __FRAME_POINTER \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS, "r14", "r15" \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_WWW(lval, orig, arg1,arg2,arg3) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[4]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + _argvec[1] = (unsigned long)(arg1); \ + _argvec[2] = (unsigned long)(arg2); \ + _argvec[3] = (unsigned long)(arg3); \ + __asm__ volatile( \ + VALGRIND_CFI_PROLOGUE \ + VALGRIND_ALIGN_STACK \ + "subq $128,%%rsp\n\t" \ + "movq 24(%%rax), %%rdx\n\t" \ + "movq 16(%%rax), %%rsi\n\t" \ + "movq 8(%%rax), %%rdi\n\t" \ + "movq (%%rax), %%rax\n\t" /* target->%rax */ \ + VALGRIND_CALL_NOREDIR_RAX \ + VALGRIND_RESTORE_STACK \ + VALGRIND_CFI_EPILOGUE \ + : /*out*/ "=a" (_res) \ + : /*in*/ "a" (&_argvec[0]) __FRAME_POINTER \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS, "r14", "r15" \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_WWWW(lval, orig, arg1,arg2,arg3,arg4) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[5]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + _argvec[1] = (unsigned long)(arg1); \ + _argvec[2] = (unsigned long)(arg2); \ + _argvec[3] = (unsigned long)(arg3); \ + _argvec[4] = (unsigned long)(arg4); \ + __asm__ volatile( \ + VALGRIND_CFI_PROLOGUE \ + VALGRIND_ALIGN_STACK \ + "subq $128,%%rsp\n\t" \ + "movq 32(%%rax), %%rcx\n\t" \ + "movq 24(%%rax), %%rdx\n\t" \ + "movq 16(%%rax), %%rsi\n\t" \ + "movq 8(%%rax), %%rdi\n\t" \ + "movq (%%rax), %%rax\n\t" /* target->%rax */ \ + VALGRIND_CALL_NOREDIR_RAX \ + VALGRIND_RESTORE_STACK \ + VALGRIND_CFI_EPILOGUE \ + : /*out*/ "=a" (_res) \ + : /*in*/ "a" (&_argvec[0]) __FRAME_POINTER \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS, "r14", "r15" \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_5W(lval, orig, arg1,arg2,arg3,arg4,arg5) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[6]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + _argvec[1] = (unsigned long)(arg1); \ + _argvec[2] = (unsigned long)(arg2); \ + _argvec[3] = (unsigned long)(arg3); \ + _argvec[4] = (unsigned long)(arg4); \ + _argvec[5] = (unsigned long)(arg5); \ + __asm__ volatile( \ + VALGRIND_CFI_PROLOGUE \ + VALGRIND_ALIGN_STACK \ + "subq $128,%%rsp\n\t" \ + "movq 40(%%rax), %%r8\n\t" \ + "movq 32(%%rax), %%rcx\n\t" \ + "movq 24(%%rax), %%rdx\n\t" \ + "movq 16(%%rax), %%rsi\n\t" \ + "movq 8(%%rax), %%rdi\n\t" \ + "movq (%%rax), %%rax\n\t" /* target->%rax */ \ + VALGRIND_CALL_NOREDIR_RAX \ + VALGRIND_RESTORE_STACK \ + VALGRIND_CFI_EPILOGUE \ + : /*out*/ "=a" (_res) \ + : /*in*/ "a" (&_argvec[0]) __FRAME_POINTER \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS, "r14", "r15" \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_6W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[7]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + _argvec[1] = (unsigned long)(arg1); \ + _argvec[2] = (unsigned long)(arg2); \ + _argvec[3] = (unsigned long)(arg3); \ + _argvec[4] = (unsigned long)(arg4); \ + _argvec[5] = (unsigned long)(arg5); \ + _argvec[6] = (unsigned long)(arg6); \ + __asm__ volatile( \ + VALGRIND_CFI_PROLOGUE \ + VALGRIND_ALIGN_STACK \ + "subq $128,%%rsp\n\t" \ + "movq 48(%%rax), %%r9\n\t" \ + "movq 40(%%rax), %%r8\n\t" \ + "movq 32(%%rax), %%rcx\n\t" \ + "movq 24(%%rax), %%rdx\n\t" \ + "movq 16(%%rax), %%rsi\n\t" \ + "movq 8(%%rax), %%rdi\n\t" \ + "movq (%%rax), %%rax\n\t" /* target->%rax */ \ + VALGRIND_CALL_NOREDIR_RAX \ + VALGRIND_RESTORE_STACK \ + VALGRIND_CFI_EPILOGUE \ + : /*out*/ "=a" (_res) \ + : /*in*/ "a" (&_argvec[0]) __FRAME_POINTER \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS, "r14", "r15" \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_7W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \ + arg7) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[8]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + _argvec[1] = (unsigned long)(arg1); \ + _argvec[2] = (unsigned long)(arg2); \ + _argvec[3] = (unsigned long)(arg3); \ + _argvec[4] = (unsigned long)(arg4); \ + _argvec[5] = (unsigned long)(arg5); \ + _argvec[6] = (unsigned long)(arg6); \ + _argvec[7] = (unsigned long)(arg7); \ + __asm__ volatile( \ + VALGRIND_CFI_PROLOGUE \ + VALGRIND_ALIGN_STACK \ + "subq $136,%%rsp\n\t" \ + "pushq 56(%%rax)\n\t" \ + "movq 48(%%rax), %%r9\n\t" \ + "movq 40(%%rax), %%r8\n\t" \ + "movq 32(%%rax), %%rcx\n\t" \ + "movq 24(%%rax), %%rdx\n\t" \ + "movq 16(%%rax), %%rsi\n\t" \ + "movq 8(%%rax), %%rdi\n\t" \ + "movq (%%rax), %%rax\n\t" /* target->%rax */ \ + VALGRIND_CALL_NOREDIR_RAX \ + VALGRIND_RESTORE_STACK \ + VALGRIND_CFI_EPILOGUE \ + : /*out*/ "=a" (_res) \ + : /*in*/ "a" (&_argvec[0]) __FRAME_POINTER \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS, "r14", "r15" \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_8W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \ + arg7,arg8) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[9]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + _argvec[1] = (unsigned long)(arg1); \ + _argvec[2] = (unsigned long)(arg2); \ + _argvec[3] = (unsigned long)(arg3); \ + _argvec[4] = (unsigned long)(arg4); \ + _argvec[5] = (unsigned long)(arg5); \ + _argvec[6] = (unsigned long)(arg6); \ + _argvec[7] = (unsigned long)(arg7); \ + _argvec[8] = (unsigned long)(arg8); \ + __asm__ volatile( \ + VALGRIND_CFI_PROLOGUE \ + VALGRIND_ALIGN_STACK \ + "subq $128,%%rsp\n\t" \ + "pushq 64(%%rax)\n\t" \ + "pushq 56(%%rax)\n\t" \ + "movq 48(%%rax), %%r9\n\t" \ + "movq 40(%%rax), %%r8\n\t" \ + "movq 32(%%rax), %%rcx\n\t" \ + "movq 24(%%rax), %%rdx\n\t" \ + "movq 16(%%rax), %%rsi\n\t" \ + "movq 8(%%rax), %%rdi\n\t" \ + "movq (%%rax), %%rax\n\t" /* target->%rax */ \ + VALGRIND_CALL_NOREDIR_RAX \ + VALGRIND_RESTORE_STACK \ + VALGRIND_CFI_EPILOGUE \ + : /*out*/ "=a" (_res) \ + : /*in*/ "a" (&_argvec[0]) __FRAME_POINTER \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS, "r14", "r15" \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_9W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \ + arg7,arg8,arg9) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[10]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + _argvec[1] = (unsigned long)(arg1); \ + _argvec[2] = (unsigned long)(arg2); \ + _argvec[3] = (unsigned long)(arg3); \ + _argvec[4] = (unsigned long)(arg4); \ + _argvec[5] = (unsigned long)(arg5); \ + _argvec[6] = (unsigned long)(arg6); \ + _argvec[7] = (unsigned long)(arg7); \ + _argvec[8] = (unsigned long)(arg8); \ + _argvec[9] = (unsigned long)(arg9); \ + __asm__ volatile( \ + VALGRIND_CFI_PROLOGUE \ + VALGRIND_ALIGN_STACK \ + "subq $136,%%rsp\n\t" \ + "pushq 72(%%rax)\n\t" \ + "pushq 64(%%rax)\n\t" \ + "pushq 56(%%rax)\n\t" \ + "movq 48(%%rax), %%r9\n\t" \ + "movq 40(%%rax), %%r8\n\t" \ + "movq 32(%%rax), %%rcx\n\t" \ + "movq 24(%%rax), %%rdx\n\t" \ + "movq 16(%%rax), %%rsi\n\t" \ + "movq 8(%%rax), %%rdi\n\t" \ + "movq (%%rax), %%rax\n\t" /* target->%rax */ \ + VALGRIND_CALL_NOREDIR_RAX \ + VALGRIND_RESTORE_STACK \ + VALGRIND_CFI_EPILOGUE \ + : /*out*/ "=a" (_res) \ + : /*in*/ "a" (&_argvec[0]) __FRAME_POINTER \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS, "r14", "r15" \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_10W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \ + arg7,arg8,arg9,arg10) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[11]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + _argvec[1] = (unsigned long)(arg1); \ + _argvec[2] = (unsigned long)(arg2); \ + _argvec[3] = (unsigned long)(arg3); \ + _argvec[4] = (unsigned long)(arg4); \ + _argvec[5] = (unsigned long)(arg5); \ + _argvec[6] = (unsigned long)(arg6); \ + _argvec[7] = (unsigned long)(arg7); \ + _argvec[8] = (unsigned long)(arg8); \ + _argvec[9] = (unsigned long)(arg9); \ + _argvec[10] = (unsigned long)(arg10); \ + __asm__ volatile( \ + VALGRIND_CFI_PROLOGUE \ + VALGRIND_ALIGN_STACK \ + "subq $128,%%rsp\n\t" \ + "pushq 80(%%rax)\n\t" \ + "pushq 72(%%rax)\n\t" \ + "pushq 64(%%rax)\n\t" \ + "pushq 56(%%rax)\n\t" \ + "movq 48(%%rax), %%r9\n\t" \ + "movq 40(%%rax), %%r8\n\t" \ + "movq 32(%%rax), %%rcx\n\t" \ + "movq 24(%%rax), %%rdx\n\t" \ + "movq 16(%%rax), %%rsi\n\t" \ + "movq 8(%%rax), %%rdi\n\t" \ + "movq (%%rax), %%rax\n\t" /* target->%rax */ \ + VALGRIND_CALL_NOREDIR_RAX \ + VALGRIND_RESTORE_STACK \ + VALGRIND_CFI_EPILOGUE \ + : /*out*/ "=a" (_res) \ + : /*in*/ "a" (&_argvec[0]) __FRAME_POINTER \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS, "r14", "r15" \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_11W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \ + arg7,arg8,arg9,arg10,arg11) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[12]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + _argvec[1] = (unsigned long)(arg1); \ + _argvec[2] = (unsigned long)(arg2); \ + _argvec[3] = (unsigned long)(arg3); \ + _argvec[4] = (unsigned long)(arg4); \ + _argvec[5] = (unsigned long)(arg5); \ + _argvec[6] = (unsigned long)(arg6); \ + _argvec[7] = (unsigned long)(arg7); \ + _argvec[8] = (unsigned long)(arg8); \ + _argvec[9] = (unsigned long)(arg9); \ + _argvec[10] = (unsigned long)(arg10); \ + _argvec[11] = (unsigned long)(arg11); \ + __asm__ volatile( \ + VALGRIND_CFI_PROLOGUE \ + VALGRIND_ALIGN_STACK \ + "subq $136,%%rsp\n\t" \ + "pushq 88(%%rax)\n\t" \ + "pushq 80(%%rax)\n\t" \ + "pushq 72(%%rax)\n\t" \ + "pushq 64(%%rax)\n\t" \ + "pushq 56(%%rax)\n\t" \ + "movq 48(%%rax), %%r9\n\t" \ + "movq 40(%%rax), %%r8\n\t" \ + "movq 32(%%rax), %%rcx\n\t" \ + "movq 24(%%rax), %%rdx\n\t" \ + "movq 16(%%rax), %%rsi\n\t" \ + "movq 8(%%rax), %%rdi\n\t" \ + "movq (%%rax), %%rax\n\t" /* target->%rax */ \ + VALGRIND_CALL_NOREDIR_RAX \ + VALGRIND_RESTORE_STACK \ + VALGRIND_CFI_EPILOGUE \ + : /*out*/ "=a" (_res) \ + : /*in*/ "a" (&_argvec[0]) __FRAME_POINTER \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS, "r14", "r15" \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_12W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \ + arg7,arg8,arg9,arg10,arg11,arg12) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[13]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + _argvec[1] = (unsigned long)(arg1); \ + _argvec[2] = (unsigned long)(arg2); \ + _argvec[3] = (unsigned long)(arg3); \ + _argvec[4] = (unsigned long)(arg4); \ + _argvec[5] = (unsigned long)(arg5); \ + _argvec[6] = (unsigned long)(arg6); \ + _argvec[7] = (unsigned long)(arg7); \ + _argvec[8] = (unsigned long)(arg8); \ + _argvec[9] = (unsigned long)(arg9); \ + _argvec[10] = (unsigned long)(arg10); \ + _argvec[11] = (unsigned long)(arg11); \ + _argvec[12] = (unsigned long)(arg12); \ + __asm__ volatile( \ + VALGRIND_CFI_PROLOGUE \ + VALGRIND_ALIGN_STACK \ + "subq $128,%%rsp\n\t" \ + "pushq 96(%%rax)\n\t" \ + "pushq 88(%%rax)\n\t" \ + "pushq 80(%%rax)\n\t" \ + "pushq 72(%%rax)\n\t" \ + "pushq 64(%%rax)\n\t" \ + "pushq 56(%%rax)\n\t" \ + "movq 48(%%rax), %%r9\n\t" \ + "movq 40(%%rax), %%r8\n\t" \ + "movq 32(%%rax), %%rcx\n\t" \ + "movq 24(%%rax), %%rdx\n\t" \ + "movq 16(%%rax), %%rsi\n\t" \ + "movq 8(%%rax), %%rdi\n\t" \ + "movq (%%rax), %%rax\n\t" /* target->%rax */ \ + VALGRIND_CALL_NOREDIR_RAX \ + VALGRIND_RESTORE_STACK \ + VALGRIND_CFI_EPILOGUE \ + : /*out*/ "=a" (_res) \ + : /*in*/ "a" (&_argvec[0]) __FRAME_POINTER \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS, "r14", "r15" \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#endif /* PLAT_amd64_linux || PLAT_amd64_darwin || PLAT_amd64_solaris */ + +/* ------------------------ ppc32-linux ------------------------ */ + +#if defined(PLAT_ppc32_linux) + +/* This is useful for finding out about the on-stack stuff: + + extern int f9 ( int,int,int,int,int,int,int,int,int ); + extern int f10 ( int,int,int,int,int,int,int,int,int,int ); + extern int f11 ( int,int,int,int,int,int,int,int,int,int,int ); + extern int f12 ( int,int,int,int,int,int,int,int,int,int,int,int ); + + int g9 ( void ) { + return f9(11,22,33,44,55,66,77,88,99); + } + int g10 ( void ) { + return f10(11,22,33,44,55,66,77,88,99,110); + } + int g11 ( void ) { + return f11(11,22,33,44,55,66,77,88,99,110,121); + } + int g12 ( void ) { + return f12(11,22,33,44,55,66,77,88,99,110,121,132); + } +*/ + +/* ARGREGS: r3 r4 r5 r6 r7 r8 r9 r10 (the rest on stack somewhere) */ + +/* These regs are trashed by the hidden call. */ +#define __CALLER_SAVED_REGS \ + "lr", "ctr", "xer", \ + "cr0", "cr1", "cr2", "cr3", "cr4", "cr5", "cr6", "cr7", \ + "r0", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", \ + "r11", "r12", "r13" + +/* Macros to save and align the stack before making a function + call and restore it afterwards as gcc may not keep the stack + pointer aligned if it doesn't realise calls are being made + to other functions. */ + +#define VALGRIND_ALIGN_STACK \ + "mr 28,1\n\t" \ + "rlwinm 1,1,0,0,27\n\t" +#define VALGRIND_RESTORE_STACK \ + "mr 1,28\n\t" + +/* These CALL_FN_ macros assume that on ppc32-linux, + sizeof(unsigned long) == 4. */ + +#define CALL_FN_W_v(lval, orig) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[1]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + __asm__ volatile( \ + VALGRIND_ALIGN_STACK \ + "mr 11,%1\n\t" \ + "lwz 11,0(11)\n\t" /* target->r11 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ + VALGRIND_RESTORE_STACK \ + "mr %0,3" \ + : /*out*/ "=r" (_res) \ + : /*in*/ "r" (&_argvec[0]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS, "r28" \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_W(lval, orig, arg1) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[2]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + _argvec[1] = (unsigned long)arg1; \ + __asm__ volatile( \ + VALGRIND_ALIGN_STACK \ + "mr 11,%1\n\t" \ + "lwz 3,4(11)\n\t" /* arg1->r3 */ \ + "lwz 11,0(11)\n\t" /* target->r11 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ + VALGRIND_RESTORE_STACK \ + "mr %0,3" \ + : /*out*/ "=r" (_res) \ + : /*in*/ "r" (&_argvec[0]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS, "r28" \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_WW(lval, orig, arg1,arg2) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[3]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + _argvec[1] = (unsigned long)arg1; \ + _argvec[2] = (unsigned long)arg2; \ + __asm__ volatile( \ + VALGRIND_ALIGN_STACK \ + "mr 11,%1\n\t" \ + "lwz 3,4(11)\n\t" /* arg1->r3 */ \ + "lwz 4,8(11)\n\t" \ + "lwz 11,0(11)\n\t" /* target->r11 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ + VALGRIND_RESTORE_STACK \ + "mr %0,3" \ + : /*out*/ "=r" (_res) \ + : /*in*/ "r" (&_argvec[0]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS, "r28" \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_WWW(lval, orig, arg1,arg2,arg3) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[4]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + _argvec[1] = (unsigned long)arg1; \ + _argvec[2] = (unsigned long)arg2; \ + _argvec[3] = (unsigned long)arg3; \ + __asm__ volatile( \ + VALGRIND_ALIGN_STACK \ + "mr 11,%1\n\t" \ + "lwz 3,4(11)\n\t" /* arg1->r3 */ \ + "lwz 4,8(11)\n\t" \ + "lwz 5,12(11)\n\t" \ + "lwz 11,0(11)\n\t" /* target->r11 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ + VALGRIND_RESTORE_STACK \ + "mr %0,3" \ + : /*out*/ "=r" (_res) \ + : /*in*/ "r" (&_argvec[0]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS, "r28" \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_WWWW(lval, orig, arg1,arg2,arg3,arg4) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[5]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + _argvec[1] = (unsigned long)arg1; \ + _argvec[2] = (unsigned long)arg2; \ + _argvec[3] = (unsigned long)arg3; \ + _argvec[4] = (unsigned long)arg4; \ + __asm__ volatile( \ + VALGRIND_ALIGN_STACK \ + "mr 11,%1\n\t" \ + "lwz 3,4(11)\n\t" /* arg1->r3 */ \ + "lwz 4,8(11)\n\t" \ + "lwz 5,12(11)\n\t" \ + "lwz 6,16(11)\n\t" /* arg4->r6 */ \ + "lwz 11,0(11)\n\t" /* target->r11 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ + VALGRIND_RESTORE_STACK \ + "mr %0,3" \ + : /*out*/ "=r" (_res) \ + : /*in*/ "r" (&_argvec[0]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS, "r28" \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_5W(lval, orig, arg1,arg2,arg3,arg4,arg5) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[6]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + _argvec[1] = (unsigned long)arg1; \ + _argvec[2] = (unsigned long)arg2; \ + _argvec[3] = (unsigned long)arg3; \ + _argvec[4] = (unsigned long)arg4; \ + _argvec[5] = (unsigned long)arg5; \ + __asm__ volatile( \ + VALGRIND_ALIGN_STACK \ + "mr 11,%1\n\t" \ + "lwz 3,4(11)\n\t" /* arg1->r3 */ \ + "lwz 4,8(11)\n\t" \ + "lwz 5,12(11)\n\t" \ + "lwz 6,16(11)\n\t" /* arg4->r6 */ \ + "lwz 7,20(11)\n\t" \ + "lwz 11,0(11)\n\t" /* target->r11 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ + VALGRIND_RESTORE_STACK \ + "mr %0,3" \ + : /*out*/ "=r" (_res) \ + : /*in*/ "r" (&_argvec[0]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS, "r28" \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_6W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[7]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + _argvec[1] = (unsigned long)arg1; \ + _argvec[2] = (unsigned long)arg2; \ + _argvec[3] = (unsigned long)arg3; \ + _argvec[4] = (unsigned long)arg4; \ + _argvec[5] = (unsigned long)arg5; \ + _argvec[6] = (unsigned long)arg6; \ + __asm__ volatile( \ + VALGRIND_ALIGN_STACK \ + "mr 11,%1\n\t" \ + "lwz 3,4(11)\n\t" /* arg1->r3 */ \ + "lwz 4,8(11)\n\t" \ + "lwz 5,12(11)\n\t" \ + "lwz 6,16(11)\n\t" /* arg4->r6 */ \ + "lwz 7,20(11)\n\t" \ + "lwz 8,24(11)\n\t" \ + "lwz 11,0(11)\n\t" /* target->r11 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ + VALGRIND_RESTORE_STACK \ + "mr %0,3" \ + : /*out*/ "=r" (_res) \ + : /*in*/ "r" (&_argvec[0]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS, "r28" \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_7W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \ + arg7) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[8]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + _argvec[1] = (unsigned long)arg1; \ + _argvec[2] = (unsigned long)arg2; \ + _argvec[3] = (unsigned long)arg3; \ + _argvec[4] = (unsigned long)arg4; \ + _argvec[5] = (unsigned long)arg5; \ + _argvec[6] = (unsigned long)arg6; \ + _argvec[7] = (unsigned long)arg7; \ + __asm__ volatile( \ + VALGRIND_ALIGN_STACK \ + "mr 11,%1\n\t" \ + "lwz 3,4(11)\n\t" /* arg1->r3 */ \ + "lwz 4,8(11)\n\t" \ + "lwz 5,12(11)\n\t" \ + "lwz 6,16(11)\n\t" /* arg4->r6 */ \ + "lwz 7,20(11)\n\t" \ + "lwz 8,24(11)\n\t" \ + "lwz 9,28(11)\n\t" \ + "lwz 11,0(11)\n\t" /* target->r11 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ + VALGRIND_RESTORE_STACK \ + "mr %0,3" \ + : /*out*/ "=r" (_res) \ + : /*in*/ "r" (&_argvec[0]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS, "r28" \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_8W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \ + arg7,arg8) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[9]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + _argvec[1] = (unsigned long)arg1; \ + _argvec[2] = (unsigned long)arg2; \ + _argvec[3] = (unsigned long)arg3; \ + _argvec[4] = (unsigned long)arg4; \ + _argvec[5] = (unsigned long)arg5; \ + _argvec[6] = (unsigned long)arg6; \ + _argvec[7] = (unsigned long)arg7; \ + _argvec[8] = (unsigned long)arg8; \ + __asm__ volatile( \ + VALGRIND_ALIGN_STACK \ + "mr 11,%1\n\t" \ + "lwz 3,4(11)\n\t" /* arg1->r3 */ \ + "lwz 4,8(11)\n\t" \ + "lwz 5,12(11)\n\t" \ + "lwz 6,16(11)\n\t" /* arg4->r6 */ \ + "lwz 7,20(11)\n\t" \ + "lwz 8,24(11)\n\t" \ + "lwz 9,28(11)\n\t" \ + "lwz 10,32(11)\n\t" /* arg8->r10 */ \ + "lwz 11,0(11)\n\t" /* target->r11 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ + VALGRIND_RESTORE_STACK \ + "mr %0,3" \ + : /*out*/ "=r" (_res) \ + : /*in*/ "r" (&_argvec[0]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS, "r28" \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_9W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \ + arg7,arg8,arg9) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[10]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + _argvec[1] = (unsigned long)arg1; \ + _argvec[2] = (unsigned long)arg2; \ + _argvec[3] = (unsigned long)arg3; \ + _argvec[4] = (unsigned long)arg4; \ + _argvec[5] = (unsigned long)arg5; \ + _argvec[6] = (unsigned long)arg6; \ + _argvec[7] = (unsigned long)arg7; \ + _argvec[8] = (unsigned long)arg8; \ + _argvec[9] = (unsigned long)arg9; \ + __asm__ volatile( \ + VALGRIND_ALIGN_STACK \ + "mr 11,%1\n\t" \ + "addi 1,1,-16\n\t" \ + /* arg9 */ \ + "lwz 3,36(11)\n\t" \ + "stw 3,8(1)\n\t" \ + /* args1-8 */ \ + "lwz 3,4(11)\n\t" /* arg1->r3 */ \ + "lwz 4,8(11)\n\t" \ + "lwz 5,12(11)\n\t" \ + "lwz 6,16(11)\n\t" /* arg4->r6 */ \ + "lwz 7,20(11)\n\t" \ + "lwz 8,24(11)\n\t" \ + "lwz 9,28(11)\n\t" \ + "lwz 10,32(11)\n\t" /* arg8->r10 */ \ + "lwz 11,0(11)\n\t" /* target->r11 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ + VALGRIND_RESTORE_STACK \ + "mr %0,3" \ + : /*out*/ "=r" (_res) \ + : /*in*/ "r" (&_argvec[0]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS, "r28" \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_10W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \ + arg7,arg8,arg9,arg10) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[11]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + _argvec[1] = (unsigned long)arg1; \ + _argvec[2] = (unsigned long)arg2; \ + _argvec[3] = (unsigned long)arg3; \ + _argvec[4] = (unsigned long)arg4; \ + _argvec[5] = (unsigned long)arg5; \ + _argvec[6] = (unsigned long)arg6; \ + _argvec[7] = (unsigned long)arg7; \ + _argvec[8] = (unsigned long)arg8; \ + _argvec[9] = (unsigned long)arg9; \ + _argvec[10] = (unsigned long)arg10; \ + __asm__ volatile( \ + VALGRIND_ALIGN_STACK \ + "mr 11,%1\n\t" \ + "addi 1,1,-16\n\t" \ + /* arg10 */ \ + "lwz 3,40(11)\n\t" \ + "stw 3,12(1)\n\t" \ + /* arg9 */ \ + "lwz 3,36(11)\n\t" \ + "stw 3,8(1)\n\t" \ + /* args1-8 */ \ + "lwz 3,4(11)\n\t" /* arg1->r3 */ \ + "lwz 4,8(11)\n\t" \ + "lwz 5,12(11)\n\t" \ + "lwz 6,16(11)\n\t" /* arg4->r6 */ \ + "lwz 7,20(11)\n\t" \ + "lwz 8,24(11)\n\t" \ + "lwz 9,28(11)\n\t" \ + "lwz 10,32(11)\n\t" /* arg8->r10 */ \ + "lwz 11,0(11)\n\t" /* target->r11 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ + VALGRIND_RESTORE_STACK \ + "mr %0,3" \ + : /*out*/ "=r" (_res) \ + : /*in*/ "r" (&_argvec[0]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS, "r28" \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_11W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \ + arg7,arg8,arg9,arg10,arg11) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[12]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + _argvec[1] = (unsigned long)arg1; \ + _argvec[2] = (unsigned long)arg2; \ + _argvec[3] = (unsigned long)arg3; \ + _argvec[4] = (unsigned long)arg4; \ + _argvec[5] = (unsigned long)arg5; \ + _argvec[6] = (unsigned long)arg6; \ + _argvec[7] = (unsigned long)arg7; \ + _argvec[8] = (unsigned long)arg8; \ + _argvec[9] = (unsigned long)arg9; \ + _argvec[10] = (unsigned long)arg10; \ + _argvec[11] = (unsigned long)arg11; \ + __asm__ volatile( \ + VALGRIND_ALIGN_STACK \ + "mr 11,%1\n\t" \ + "addi 1,1,-32\n\t" \ + /* arg11 */ \ + "lwz 3,44(11)\n\t" \ + "stw 3,16(1)\n\t" \ + /* arg10 */ \ + "lwz 3,40(11)\n\t" \ + "stw 3,12(1)\n\t" \ + /* arg9 */ \ + "lwz 3,36(11)\n\t" \ + "stw 3,8(1)\n\t" \ + /* args1-8 */ \ + "lwz 3,4(11)\n\t" /* arg1->r3 */ \ + "lwz 4,8(11)\n\t" \ + "lwz 5,12(11)\n\t" \ + "lwz 6,16(11)\n\t" /* arg4->r6 */ \ + "lwz 7,20(11)\n\t" \ + "lwz 8,24(11)\n\t" \ + "lwz 9,28(11)\n\t" \ + "lwz 10,32(11)\n\t" /* arg8->r10 */ \ + "lwz 11,0(11)\n\t" /* target->r11 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ + VALGRIND_RESTORE_STACK \ + "mr %0,3" \ + : /*out*/ "=r" (_res) \ + : /*in*/ "r" (&_argvec[0]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS, "r28" \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_12W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \ + arg7,arg8,arg9,arg10,arg11,arg12) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[13]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + _argvec[1] = (unsigned long)arg1; \ + _argvec[2] = (unsigned long)arg2; \ + _argvec[3] = (unsigned long)arg3; \ + _argvec[4] = (unsigned long)arg4; \ + _argvec[5] = (unsigned long)arg5; \ + _argvec[6] = (unsigned long)arg6; \ + _argvec[7] = (unsigned long)arg7; \ + _argvec[8] = (unsigned long)arg8; \ + _argvec[9] = (unsigned long)arg9; \ + _argvec[10] = (unsigned long)arg10; \ + _argvec[11] = (unsigned long)arg11; \ + _argvec[12] = (unsigned long)arg12; \ + __asm__ volatile( \ + VALGRIND_ALIGN_STACK \ + "mr 11,%1\n\t" \ + "addi 1,1,-32\n\t" \ + /* arg12 */ \ + "lwz 3,48(11)\n\t" \ + "stw 3,20(1)\n\t" \ + /* arg11 */ \ + "lwz 3,44(11)\n\t" \ + "stw 3,16(1)\n\t" \ + /* arg10 */ \ + "lwz 3,40(11)\n\t" \ + "stw 3,12(1)\n\t" \ + /* arg9 */ \ + "lwz 3,36(11)\n\t" \ + "stw 3,8(1)\n\t" \ + /* args1-8 */ \ + "lwz 3,4(11)\n\t" /* arg1->r3 */ \ + "lwz 4,8(11)\n\t" \ + "lwz 5,12(11)\n\t" \ + "lwz 6,16(11)\n\t" /* arg4->r6 */ \ + "lwz 7,20(11)\n\t" \ + "lwz 8,24(11)\n\t" \ + "lwz 9,28(11)\n\t" \ + "lwz 10,32(11)\n\t" /* arg8->r10 */ \ + "lwz 11,0(11)\n\t" /* target->r11 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ + VALGRIND_RESTORE_STACK \ + "mr %0,3" \ + : /*out*/ "=r" (_res) \ + : /*in*/ "r" (&_argvec[0]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS, "r28" \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#endif /* PLAT_ppc32_linux */ + +/* ------------------------ ppc64-linux ------------------------ */ + +#if defined(PLAT_ppc64be_linux) + +/* ARGREGS: r3 r4 r5 r6 r7 r8 r9 r10 (the rest on stack somewhere) */ + +/* These regs are trashed by the hidden call. */ +#define __CALLER_SAVED_REGS \ + "lr", "ctr", "xer", \ + "cr0", "cr1", "cr2", "cr3", "cr4", "cr5", "cr6", "cr7", \ + "r0", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", \ + "r11", "r12", "r13" + +/* Macros to save and align the stack before making a function + call and restore it afterwards as gcc may not keep the stack + pointer aligned if it doesn't realise calls are being made + to other functions. */ + +#define VALGRIND_ALIGN_STACK \ + "mr 28,1\n\t" \ + "rldicr 1,1,0,59\n\t" +#define VALGRIND_RESTORE_STACK \ + "mr 1,28\n\t" + +/* These CALL_FN_ macros assume that on ppc64-linux, sizeof(unsigned + long) == 8. */ + +#define CALL_FN_W_v(lval, orig) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[3+0]; \ + volatile unsigned long _res; \ + /* _argvec[0] holds current r2 across the call */ \ + _argvec[1] = (unsigned long)_orig.r2; \ + _argvec[2] = (unsigned long)_orig.nraddr; \ + __asm__ volatile( \ + VALGRIND_ALIGN_STACK \ + "mr 11,%1\n\t" \ + "std 2,-16(11)\n\t" /* save tocptr */ \ + "ld 2,-8(11)\n\t" /* use nraddr's tocptr */ \ + "ld 11, 0(11)\n\t" /* target->r11 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ + "mr 11,%1\n\t" \ + "mr %0,3\n\t" \ + "ld 2,-16(11)\n\t" /* restore tocptr */ \ + VALGRIND_RESTORE_STACK \ + : /*out*/ "=r" (_res) \ + : /*in*/ "r" (&_argvec[2]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS, "r28" \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_W(lval, orig, arg1) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[3+1]; \ + volatile unsigned long _res; \ + /* _argvec[0] holds current r2 across the call */ \ + _argvec[1] = (unsigned long)_orig.r2; \ + _argvec[2] = (unsigned long)_orig.nraddr; \ + _argvec[2+1] = (unsigned long)arg1; \ + __asm__ volatile( \ + VALGRIND_ALIGN_STACK \ + "mr 11,%1\n\t" \ + "std 2,-16(11)\n\t" /* save tocptr */ \ + "ld 2,-8(11)\n\t" /* use nraddr's tocptr */ \ + "ld 3, 8(11)\n\t" /* arg1->r3 */ \ + "ld 11, 0(11)\n\t" /* target->r11 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ + "mr 11,%1\n\t" \ + "mr %0,3\n\t" \ + "ld 2,-16(11)\n\t" /* restore tocptr */ \ + VALGRIND_RESTORE_STACK \ + : /*out*/ "=r" (_res) \ + : /*in*/ "r" (&_argvec[2]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS, "r28" \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_WW(lval, orig, arg1,arg2) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[3+2]; \ + volatile unsigned long _res; \ + /* _argvec[0] holds current r2 across the call */ \ + _argvec[1] = (unsigned long)_orig.r2; \ + _argvec[2] = (unsigned long)_orig.nraddr; \ + _argvec[2+1] = (unsigned long)arg1; \ + _argvec[2+2] = (unsigned long)arg2; \ + __asm__ volatile( \ + VALGRIND_ALIGN_STACK \ + "mr 11,%1\n\t" \ + "std 2,-16(11)\n\t" /* save tocptr */ \ + "ld 2,-8(11)\n\t" /* use nraddr's tocptr */ \ + "ld 3, 8(11)\n\t" /* arg1->r3 */ \ + "ld 4, 16(11)\n\t" /* arg2->r4 */ \ + "ld 11, 0(11)\n\t" /* target->r11 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ + "mr 11,%1\n\t" \ + "mr %0,3\n\t" \ + "ld 2,-16(11)\n\t" /* restore tocptr */ \ + VALGRIND_RESTORE_STACK \ + : /*out*/ "=r" (_res) \ + : /*in*/ "r" (&_argvec[2]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS, "r28" \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_WWW(lval, orig, arg1,arg2,arg3) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[3+3]; \ + volatile unsigned long _res; \ + /* _argvec[0] holds current r2 across the call */ \ + _argvec[1] = (unsigned long)_orig.r2; \ + _argvec[2] = (unsigned long)_orig.nraddr; \ + _argvec[2+1] = (unsigned long)arg1; \ + _argvec[2+2] = (unsigned long)arg2; \ + _argvec[2+3] = (unsigned long)arg3; \ + __asm__ volatile( \ + VALGRIND_ALIGN_STACK \ + "mr 11,%1\n\t" \ + "std 2,-16(11)\n\t" /* save tocptr */ \ + "ld 2,-8(11)\n\t" /* use nraddr's tocptr */ \ + "ld 3, 8(11)\n\t" /* arg1->r3 */ \ + "ld 4, 16(11)\n\t" /* arg2->r4 */ \ + "ld 5, 24(11)\n\t" /* arg3->r5 */ \ + "ld 11, 0(11)\n\t" /* target->r11 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ + "mr 11,%1\n\t" \ + "mr %0,3\n\t" \ + "ld 2,-16(11)\n\t" /* restore tocptr */ \ + VALGRIND_RESTORE_STACK \ + : /*out*/ "=r" (_res) \ + : /*in*/ "r" (&_argvec[2]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS, "r28" \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_WWWW(lval, orig, arg1,arg2,arg3,arg4) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[3+4]; \ + volatile unsigned long _res; \ + /* _argvec[0] holds current r2 across the call */ \ + _argvec[1] = (unsigned long)_orig.r2; \ + _argvec[2] = (unsigned long)_orig.nraddr; \ + _argvec[2+1] = (unsigned long)arg1; \ + _argvec[2+2] = (unsigned long)arg2; \ + _argvec[2+3] = (unsigned long)arg3; \ + _argvec[2+4] = (unsigned long)arg4; \ + __asm__ volatile( \ + VALGRIND_ALIGN_STACK \ + "mr 11,%1\n\t" \ + "std 2,-16(11)\n\t" /* save tocptr */ \ + "ld 2,-8(11)\n\t" /* use nraddr's tocptr */ \ + "ld 3, 8(11)\n\t" /* arg1->r3 */ \ + "ld 4, 16(11)\n\t" /* arg2->r4 */ \ + "ld 5, 24(11)\n\t" /* arg3->r5 */ \ + "ld 6, 32(11)\n\t" /* arg4->r6 */ \ + "ld 11, 0(11)\n\t" /* target->r11 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ + "mr 11,%1\n\t" \ + "mr %0,3\n\t" \ + "ld 2,-16(11)\n\t" /* restore tocptr */ \ + VALGRIND_RESTORE_STACK \ + : /*out*/ "=r" (_res) \ + : /*in*/ "r" (&_argvec[2]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS, "r28" \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_5W(lval, orig, arg1,arg2,arg3,arg4,arg5) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[3+5]; \ + volatile unsigned long _res; \ + /* _argvec[0] holds current r2 across the call */ \ + _argvec[1] = (unsigned long)_orig.r2; \ + _argvec[2] = (unsigned long)_orig.nraddr; \ + _argvec[2+1] = (unsigned long)arg1; \ + _argvec[2+2] = (unsigned long)arg2; \ + _argvec[2+3] = (unsigned long)arg3; \ + _argvec[2+4] = (unsigned long)arg4; \ + _argvec[2+5] = (unsigned long)arg5; \ + __asm__ volatile( \ + VALGRIND_ALIGN_STACK \ + "mr 11,%1\n\t" \ + "std 2,-16(11)\n\t" /* save tocptr */ \ + "ld 2,-8(11)\n\t" /* use nraddr's tocptr */ \ + "ld 3, 8(11)\n\t" /* arg1->r3 */ \ + "ld 4, 16(11)\n\t" /* arg2->r4 */ \ + "ld 5, 24(11)\n\t" /* arg3->r5 */ \ + "ld 6, 32(11)\n\t" /* arg4->r6 */ \ + "ld 7, 40(11)\n\t" /* arg5->r7 */ \ + "ld 11, 0(11)\n\t" /* target->r11 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ + "mr 11,%1\n\t" \ + "mr %0,3\n\t" \ + "ld 2,-16(11)\n\t" /* restore tocptr */ \ + VALGRIND_RESTORE_STACK \ + : /*out*/ "=r" (_res) \ + : /*in*/ "r" (&_argvec[2]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS, "r28" \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_6W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[3+6]; \ + volatile unsigned long _res; \ + /* _argvec[0] holds current r2 across the call */ \ + _argvec[1] = (unsigned long)_orig.r2; \ + _argvec[2] = (unsigned long)_orig.nraddr; \ + _argvec[2+1] = (unsigned long)arg1; \ + _argvec[2+2] = (unsigned long)arg2; \ + _argvec[2+3] = (unsigned long)arg3; \ + _argvec[2+4] = (unsigned long)arg4; \ + _argvec[2+5] = (unsigned long)arg5; \ + _argvec[2+6] = (unsigned long)arg6; \ + __asm__ volatile( \ + VALGRIND_ALIGN_STACK \ + "mr 11,%1\n\t" \ + "std 2,-16(11)\n\t" /* save tocptr */ \ + "ld 2,-8(11)\n\t" /* use nraddr's tocptr */ \ + "ld 3, 8(11)\n\t" /* arg1->r3 */ \ + "ld 4, 16(11)\n\t" /* arg2->r4 */ \ + "ld 5, 24(11)\n\t" /* arg3->r5 */ \ + "ld 6, 32(11)\n\t" /* arg4->r6 */ \ + "ld 7, 40(11)\n\t" /* arg5->r7 */ \ + "ld 8, 48(11)\n\t" /* arg6->r8 */ \ + "ld 11, 0(11)\n\t" /* target->r11 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ + "mr 11,%1\n\t" \ + "mr %0,3\n\t" \ + "ld 2,-16(11)\n\t" /* restore tocptr */ \ + VALGRIND_RESTORE_STACK \ + : /*out*/ "=r" (_res) \ + : /*in*/ "r" (&_argvec[2]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS, "r28" \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_7W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \ + arg7) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[3+7]; \ + volatile unsigned long _res; \ + /* _argvec[0] holds current r2 across the call */ \ + _argvec[1] = (unsigned long)_orig.r2; \ + _argvec[2] = (unsigned long)_orig.nraddr; \ + _argvec[2+1] = (unsigned long)arg1; \ + _argvec[2+2] = (unsigned long)arg2; \ + _argvec[2+3] = (unsigned long)arg3; \ + _argvec[2+4] = (unsigned long)arg4; \ + _argvec[2+5] = (unsigned long)arg5; \ + _argvec[2+6] = (unsigned long)arg6; \ + _argvec[2+7] = (unsigned long)arg7; \ + __asm__ volatile( \ + VALGRIND_ALIGN_STACK \ + "mr 11,%1\n\t" \ + "std 2,-16(11)\n\t" /* save tocptr */ \ + "ld 2,-8(11)\n\t" /* use nraddr's tocptr */ \ + "ld 3, 8(11)\n\t" /* arg1->r3 */ \ + "ld 4, 16(11)\n\t" /* arg2->r4 */ \ + "ld 5, 24(11)\n\t" /* arg3->r5 */ \ + "ld 6, 32(11)\n\t" /* arg4->r6 */ \ + "ld 7, 40(11)\n\t" /* arg5->r7 */ \ + "ld 8, 48(11)\n\t" /* arg6->r8 */ \ + "ld 9, 56(11)\n\t" /* arg7->r9 */ \ + "ld 11, 0(11)\n\t" /* target->r11 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ + "mr 11,%1\n\t" \ + "mr %0,3\n\t" \ + "ld 2,-16(11)\n\t" /* restore tocptr */ \ + VALGRIND_RESTORE_STACK \ + : /*out*/ "=r" (_res) \ + : /*in*/ "r" (&_argvec[2]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS, "r28" \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_8W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \ + arg7,arg8) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[3+8]; \ + volatile unsigned long _res; \ + /* _argvec[0] holds current r2 across the call */ \ + _argvec[1] = (unsigned long)_orig.r2; \ + _argvec[2] = (unsigned long)_orig.nraddr; \ + _argvec[2+1] = (unsigned long)arg1; \ + _argvec[2+2] = (unsigned long)arg2; \ + _argvec[2+3] = (unsigned long)arg3; \ + _argvec[2+4] = (unsigned long)arg4; \ + _argvec[2+5] = (unsigned long)arg5; \ + _argvec[2+6] = (unsigned long)arg6; \ + _argvec[2+7] = (unsigned long)arg7; \ + _argvec[2+8] = (unsigned long)arg8; \ + __asm__ volatile( \ + VALGRIND_ALIGN_STACK \ + "mr 11,%1\n\t" \ + "std 2,-16(11)\n\t" /* save tocptr */ \ + "ld 2,-8(11)\n\t" /* use nraddr's tocptr */ \ + "ld 3, 8(11)\n\t" /* arg1->r3 */ \ + "ld 4, 16(11)\n\t" /* arg2->r4 */ \ + "ld 5, 24(11)\n\t" /* arg3->r5 */ \ + "ld 6, 32(11)\n\t" /* arg4->r6 */ \ + "ld 7, 40(11)\n\t" /* arg5->r7 */ \ + "ld 8, 48(11)\n\t" /* arg6->r8 */ \ + "ld 9, 56(11)\n\t" /* arg7->r9 */ \ + "ld 10, 64(11)\n\t" /* arg8->r10 */ \ + "ld 11, 0(11)\n\t" /* target->r11 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ + "mr 11,%1\n\t" \ + "mr %0,3\n\t" \ + "ld 2,-16(11)\n\t" /* restore tocptr */ \ + VALGRIND_RESTORE_STACK \ + : /*out*/ "=r" (_res) \ + : /*in*/ "r" (&_argvec[2]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS, "r28" \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_9W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \ + arg7,arg8,arg9) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[3+9]; \ + volatile unsigned long _res; \ + /* _argvec[0] holds current r2 across the call */ \ + _argvec[1] = (unsigned long)_orig.r2; \ + _argvec[2] = (unsigned long)_orig.nraddr; \ + _argvec[2+1] = (unsigned long)arg1; \ + _argvec[2+2] = (unsigned long)arg2; \ + _argvec[2+3] = (unsigned long)arg3; \ + _argvec[2+4] = (unsigned long)arg4; \ + _argvec[2+5] = (unsigned long)arg5; \ + _argvec[2+6] = (unsigned long)arg6; \ + _argvec[2+7] = (unsigned long)arg7; \ + _argvec[2+8] = (unsigned long)arg8; \ + _argvec[2+9] = (unsigned long)arg9; \ + __asm__ volatile( \ + VALGRIND_ALIGN_STACK \ + "mr 11,%1\n\t" \ + "std 2,-16(11)\n\t" /* save tocptr */ \ + "ld 2,-8(11)\n\t" /* use nraddr's tocptr */ \ + "addi 1,1,-128\n\t" /* expand stack frame */ \ + /* arg9 */ \ + "ld 3,72(11)\n\t" \ + "std 3,112(1)\n\t" \ + /* args1-8 */ \ + "ld 3, 8(11)\n\t" /* arg1->r3 */ \ + "ld 4, 16(11)\n\t" /* arg2->r4 */ \ + "ld 5, 24(11)\n\t" /* arg3->r5 */ \ + "ld 6, 32(11)\n\t" /* arg4->r6 */ \ + "ld 7, 40(11)\n\t" /* arg5->r7 */ \ + "ld 8, 48(11)\n\t" /* arg6->r8 */ \ + "ld 9, 56(11)\n\t" /* arg7->r9 */ \ + "ld 10, 64(11)\n\t" /* arg8->r10 */ \ + "ld 11, 0(11)\n\t" /* target->r11 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ + "mr 11,%1\n\t" \ + "mr %0,3\n\t" \ + "ld 2,-16(11)\n\t" /* restore tocptr */ \ + VALGRIND_RESTORE_STACK \ + : /*out*/ "=r" (_res) \ + : /*in*/ "r" (&_argvec[2]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS, "r28" \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_10W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \ + arg7,arg8,arg9,arg10) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[3+10]; \ + volatile unsigned long _res; \ + /* _argvec[0] holds current r2 across the call */ \ + _argvec[1] = (unsigned long)_orig.r2; \ + _argvec[2] = (unsigned long)_orig.nraddr; \ + _argvec[2+1] = (unsigned long)arg1; \ + _argvec[2+2] = (unsigned long)arg2; \ + _argvec[2+3] = (unsigned long)arg3; \ + _argvec[2+4] = (unsigned long)arg4; \ + _argvec[2+5] = (unsigned long)arg5; \ + _argvec[2+6] = (unsigned long)arg6; \ + _argvec[2+7] = (unsigned long)arg7; \ + _argvec[2+8] = (unsigned long)arg8; \ + _argvec[2+9] = (unsigned long)arg9; \ + _argvec[2+10] = (unsigned long)arg10; \ + __asm__ volatile( \ + VALGRIND_ALIGN_STACK \ + "mr 11,%1\n\t" \ + "std 2,-16(11)\n\t" /* save tocptr */ \ + "ld 2,-8(11)\n\t" /* use nraddr's tocptr */ \ + "addi 1,1,-128\n\t" /* expand stack frame */ \ + /* arg10 */ \ + "ld 3,80(11)\n\t" \ + "std 3,120(1)\n\t" \ + /* arg9 */ \ + "ld 3,72(11)\n\t" \ + "std 3,112(1)\n\t" \ + /* args1-8 */ \ + "ld 3, 8(11)\n\t" /* arg1->r3 */ \ + "ld 4, 16(11)\n\t" /* arg2->r4 */ \ + "ld 5, 24(11)\n\t" /* arg3->r5 */ \ + "ld 6, 32(11)\n\t" /* arg4->r6 */ \ + "ld 7, 40(11)\n\t" /* arg5->r7 */ \ + "ld 8, 48(11)\n\t" /* arg6->r8 */ \ + "ld 9, 56(11)\n\t" /* arg7->r9 */ \ + "ld 10, 64(11)\n\t" /* arg8->r10 */ \ + "ld 11, 0(11)\n\t" /* target->r11 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ + "mr 11,%1\n\t" \ + "mr %0,3\n\t" \ + "ld 2,-16(11)\n\t" /* restore tocptr */ \ + VALGRIND_RESTORE_STACK \ + : /*out*/ "=r" (_res) \ + : /*in*/ "r" (&_argvec[2]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS, "r28" \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_11W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \ + arg7,arg8,arg9,arg10,arg11) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[3+11]; \ + volatile unsigned long _res; \ + /* _argvec[0] holds current r2 across the call */ \ + _argvec[1] = (unsigned long)_orig.r2; \ + _argvec[2] = (unsigned long)_orig.nraddr; \ + _argvec[2+1] = (unsigned long)arg1; \ + _argvec[2+2] = (unsigned long)arg2; \ + _argvec[2+3] = (unsigned long)arg3; \ + _argvec[2+4] = (unsigned long)arg4; \ + _argvec[2+5] = (unsigned long)arg5; \ + _argvec[2+6] = (unsigned long)arg6; \ + _argvec[2+7] = (unsigned long)arg7; \ + _argvec[2+8] = (unsigned long)arg8; \ + _argvec[2+9] = (unsigned long)arg9; \ + _argvec[2+10] = (unsigned long)arg10; \ + _argvec[2+11] = (unsigned long)arg11; \ + __asm__ volatile( \ + VALGRIND_ALIGN_STACK \ + "mr 11,%1\n\t" \ + "std 2,-16(11)\n\t" /* save tocptr */ \ + "ld 2,-8(11)\n\t" /* use nraddr's tocptr */ \ + "addi 1,1,-144\n\t" /* expand stack frame */ \ + /* arg11 */ \ + "ld 3,88(11)\n\t" \ + "std 3,128(1)\n\t" \ + /* arg10 */ \ + "ld 3,80(11)\n\t" \ + "std 3,120(1)\n\t" \ + /* arg9 */ \ + "ld 3,72(11)\n\t" \ + "std 3,112(1)\n\t" \ + /* args1-8 */ \ + "ld 3, 8(11)\n\t" /* arg1->r3 */ \ + "ld 4, 16(11)\n\t" /* arg2->r4 */ \ + "ld 5, 24(11)\n\t" /* arg3->r5 */ \ + "ld 6, 32(11)\n\t" /* arg4->r6 */ \ + "ld 7, 40(11)\n\t" /* arg5->r7 */ \ + "ld 8, 48(11)\n\t" /* arg6->r8 */ \ + "ld 9, 56(11)\n\t" /* arg7->r9 */ \ + "ld 10, 64(11)\n\t" /* arg8->r10 */ \ + "ld 11, 0(11)\n\t" /* target->r11 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ + "mr 11,%1\n\t" \ + "mr %0,3\n\t" \ + "ld 2,-16(11)\n\t" /* restore tocptr */ \ + VALGRIND_RESTORE_STACK \ + : /*out*/ "=r" (_res) \ + : /*in*/ "r" (&_argvec[2]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS, "r28" \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_12W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \ + arg7,arg8,arg9,arg10,arg11,arg12) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[3+12]; \ + volatile unsigned long _res; \ + /* _argvec[0] holds current r2 across the call */ \ + _argvec[1] = (unsigned long)_orig.r2; \ + _argvec[2] = (unsigned long)_orig.nraddr; \ + _argvec[2+1] = (unsigned long)arg1; \ + _argvec[2+2] = (unsigned long)arg2; \ + _argvec[2+3] = (unsigned long)arg3; \ + _argvec[2+4] = (unsigned long)arg4; \ + _argvec[2+5] = (unsigned long)arg5; \ + _argvec[2+6] = (unsigned long)arg6; \ + _argvec[2+7] = (unsigned long)arg7; \ + _argvec[2+8] = (unsigned long)arg8; \ + _argvec[2+9] = (unsigned long)arg9; \ + _argvec[2+10] = (unsigned long)arg10; \ + _argvec[2+11] = (unsigned long)arg11; \ + _argvec[2+12] = (unsigned long)arg12; \ + __asm__ volatile( \ + VALGRIND_ALIGN_STACK \ + "mr 11,%1\n\t" \ + "std 2,-16(11)\n\t" /* save tocptr */ \ + "ld 2,-8(11)\n\t" /* use nraddr's tocptr */ \ + "addi 1,1,-144\n\t" /* expand stack frame */ \ + /* arg12 */ \ + "ld 3,96(11)\n\t" \ + "std 3,136(1)\n\t" \ + /* arg11 */ \ + "ld 3,88(11)\n\t" \ + "std 3,128(1)\n\t" \ + /* arg10 */ \ + "ld 3,80(11)\n\t" \ + "std 3,120(1)\n\t" \ + /* arg9 */ \ + "ld 3,72(11)\n\t" \ + "std 3,112(1)\n\t" \ + /* args1-8 */ \ + "ld 3, 8(11)\n\t" /* arg1->r3 */ \ + "ld 4, 16(11)\n\t" /* arg2->r4 */ \ + "ld 5, 24(11)\n\t" /* arg3->r5 */ \ + "ld 6, 32(11)\n\t" /* arg4->r6 */ \ + "ld 7, 40(11)\n\t" /* arg5->r7 */ \ + "ld 8, 48(11)\n\t" /* arg6->r8 */ \ + "ld 9, 56(11)\n\t" /* arg7->r9 */ \ + "ld 10, 64(11)\n\t" /* arg8->r10 */ \ + "ld 11, 0(11)\n\t" /* target->r11 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ + "mr 11,%1\n\t" \ + "mr %0,3\n\t" \ + "ld 2,-16(11)\n\t" /* restore tocptr */ \ + VALGRIND_RESTORE_STACK \ + : /*out*/ "=r" (_res) \ + : /*in*/ "r" (&_argvec[2]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS, "r28" \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#endif /* PLAT_ppc64be_linux */ + +/* ------------------------- ppc64le-linux ----------------------- */ +#if defined(PLAT_ppc64le_linux) + +/* ARGREGS: r3 r4 r5 r6 r7 r8 r9 r10 (the rest on stack somewhere) */ + +/* These regs are trashed by the hidden call. */ +#define __CALLER_SAVED_REGS \ + "lr", "ctr", "xer", \ + "cr0", "cr1", "cr2", "cr3", "cr4", "cr5", "cr6", "cr7", \ + "r0", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", \ + "r11", "r12", "r13" + +/* Macros to save and align the stack before making a function + call and restore it afterwards as gcc may not keep the stack + pointer aligned if it doesn't realise calls are being made + to other functions. */ + +#define VALGRIND_ALIGN_STACK \ + "mr 28,1\n\t" \ + "rldicr 1,1,0,59\n\t" +#define VALGRIND_RESTORE_STACK \ + "mr 1,28\n\t" + +/* These CALL_FN_ macros assume that on ppc64-linux, sizeof(unsigned + long) == 8. */ + +#define CALL_FN_W_v(lval, orig) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[3+0]; \ + volatile unsigned long _res; \ + /* _argvec[0] holds current r2 across the call */ \ + _argvec[1] = (unsigned long)_orig.r2; \ + _argvec[2] = (unsigned long)_orig.nraddr; \ + __asm__ volatile( \ + VALGRIND_ALIGN_STACK \ + "mr 12,%1\n\t" \ + "std 2,-16(12)\n\t" /* save tocptr */ \ + "ld 2,-8(12)\n\t" /* use nraddr's tocptr */ \ + "ld 12, 0(12)\n\t" /* target->r12 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R12 \ + "mr 12,%1\n\t" \ + "mr %0,3\n\t" \ + "ld 2,-16(12)\n\t" /* restore tocptr */ \ + VALGRIND_RESTORE_STACK \ + : /*out*/ "=r" (_res) \ + : /*in*/ "r" (&_argvec[2]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS, "r28" \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_W(lval, orig, arg1) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[3+1]; \ + volatile unsigned long _res; \ + /* _argvec[0] holds current r2 across the call */ \ + _argvec[1] = (unsigned long)_orig.r2; \ + _argvec[2] = (unsigned long)_orig.nraddr; \ + _argvec[2+1] = (unsigned long)arg1; \ + __asm__ volatile( \ + VALGRIND_ALIGN_STACK \ + "mr 12,%1\n\t" \ + "std 2,-16(12)\n\t" /* save tocptr */ \ + "ld 2,-8(12)\n\t" /* use nraddr's tocptr */ \ + "ld 3, 8(12)\n\t" /* arg1->r3 */ \ + "ld 12, 0(12)\n\t" /* target->r12 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R12 \ + "mr 12,%1\n\t" \ + "mr %0,3\n\t" \ + "ld 2,-16(12)\n\t" /* restore tocptr */ \ + VALGRIND_RESTORE_STACK \ + : /*out*/ "=r" (_res) \ + : /*in*/ "r" (&_argvec[2]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS, "r28" \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_WW(lval, orig, arg1,arg2) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[3+2]; \ + volatile unsigned long _res; \ + /* _argvec[0] holds current r2 across the call */ \ + _argvec[1] = (unsigned long)_orig.r2; \ + _argvec[2] = (unsigned long)_orig.nraddr; \ + _argvec[2+1] = (unsigned long)arg1; \ + _argvec[2+2] = (unsigned long)arg2; \ + __asm__ volatile( \ + VALGRIND_ALIGN_STACK \ + "mr 12,%1\n\t" \ + "std 2,-16(12)\n\t" /* save tocptr */ \ + "ld 2,-8(12)\n\t" /* use nraddr's tocptr */ \ + "ld 3, 8(12)\n\t" /* arg1->r3 */ \ + "ld 4, 16(12)\n\t" /* arg2->r4 */ \ + "ld 12, 0(12)\n\t" /* target->r12 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R12 \ + "mr 12,%1\n\t" \ + "mr %0,3\n\t" \ + "ld 2,-16(12)\n\t" /* restore tocptr */ \ + VALGRIND_RESTORE_STACK \ + : /*out*/ "=r" (_res) \ + : /*in*/ "r" (&_argvec[2]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS, "r28" \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_WWW(lval, orig, arg1,arg2,arg3) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[3+3]; \ + volatile unsigned long _res; \ + /* _argvec[0] holds current r2 across the call */ \ + _argvec[1] = (unsigned long)_orig.r2; \ + _argvec[2] = (unsigned long)_orig.nraddr; \ + _argvec[2+1] = (unsigned long)arg1; \ + _argvec[2+2] = (unsigned long)arg2; \ + _argvec[2+3] = (unsigned long)arg3; \ + __asm__ volatile( \ + VALGRIND_ALIGN_STACK \ + "mr 12,%1\n\t" \ + "std 2,-16(12)\n\t" /* save tocptr */ \ + "ld 2,-8(12)\n\t" /* use nraddr's tocptr */ \ + "ld 3, 8(12)\n\t" /* arg1->r3 */ \ + "ld 4, 16(12)\n\t" /* arg2->r4 */ \ + "ld 5, 24(12)\n\t" /* arg3->r5 */ \ + "ld 12, 0(12)\n\t" /* target->r12 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R12 \ + "mr 12,%1\n\t" \ + "mr %0,3\n\t" \ + "ld 2,-16(12)\n\t" /* restore tocptr */ \ + VALGRIND_RESTORE_STACK \ + : /*out*/ "=r" (_res) \ + : /*in*/ "r" (&_argvec[2]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS, "r28" \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_WWWW(lval, orig, arg1,arg2,arg3,arg4) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[3+4]; \ + volatile unsigned long _res; \ + /* _argvec[0] holds current r2 across the call */ \ + _argvec[1] = (unsigned long)_orig.r2; \ + _argvec[2] = (unsigned long)_orig.nraddr; \ + _argvec[2+1] = (unsigned long)arg1; \ + _argvec[2+2] = (unsigned long)arg2; \ + _argvec[2+3] = (unsigned long)arg3; \ + _argvec[2+4] = (unsigned long)arg4; \ + __asm__ volatile( \ + VALGRIND_ALIGN_STACK \ + "mr 12,%1\n\t" \ + "std 2,-16(12)\n\t" /* save tocptr */ \ + "ld 2,-8(12)\n\t" /* use nraddr's tocptr */ \ + "ld 3, 8(12)\n\t" /* arg1->r3 */ \ + "ld 4, 16(12)\n\t" /* arg2->r4 */ \ + "ld 5, 24(12)\n\t" /* arg3->r5 */ \ + "ld 6, 32(12)\n\t" /* arg4->r6 */ \ + "ld 12, 0(12)\n\t" /* target->r12 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R12 \ + "mr 12,%1\n\t" \ + "mr %0,3\n\t" \ + "ld 2,-16(12)\n\t" /* restore tocptr */ \ + VALGRIND_RESTORE_STACK \ + : /*out*/ "=r" (_res) \ + : /*in*/ "r" (&_argvec[2]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS, "r28" \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_5W(lval, orig, arg1,arg2,arg3,arg4,arg5) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[3+5]; \ + volatile unsigned long _res; \ + /* _argvec[0] holds current r2 across the call */ \ + _argvec[1] = (unsigned long)_orig.r2; \ + _argvec[2] = (unsigned long)_orig.nraddr; \ + _argvec[2+1] = (unsigned long)arg1; \ + _argvec[2+2] = (unsigned long)arg2; \ + _argvec[2+3] = (unsigned long)arg3; \ + _argvec[2+4] = (unsigned long)arg4; \ + _argvec[2+5] = (unsigned long)arg5; \ + __asm__ volatile( \ + VALGRIND_ALIGN_STACK \ + "mr 12,%1\n\t" \ + "std 2,-16(12)\n\t" /* save tocptr */ \ + "ld 2,-8(12)\n\t" /* use nraddr's tocptr */ \ + "ld 3, 8(12)\n\t" /* arg1->r3 */ \ + "ld 4, 16(12)\n\t" /* arg2->r4 */ \ + "ld 5, 24(12)\n\t" /* arg3->r5 */ \ + "ld 6, 32(12)\n\t" /* arg4->r6 */ \ + "ld 7, 40(12)\n\t" /* arg5->r7 */ \ + "ld 12, 0(12)\n\t" /* target->r12 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R12 \ + "mr 12,%1\n\t" \ + "mr %0,3\n\t" \ + "ld 2,-16(12)\n\t" /* restore tocptr */ \ + VALGRIND_RESTORE_STACK \ + : /*out*/ "=r" (_res) \ + : /*in*/ "r" (&_argvec[2]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS, "r28" \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_6W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[3+6]; \ + volatile unsigned long _res; \ + /* _argvec[0] holds current r2 across the call */ \ + _argvec[1] = (unsigned long)_orig.r2; \ + _argvec[2] = (unsigned long)_orig.nraddr; \ + _argvec[2+1] = (unsigned long)arg1; \ + _argvec[2+2] = (unsigned long)arg2; \ + _argvec[2+3] = (unsigned long)arg3; \ + _argvec[2+4] = (unsigned long)arg4; \ + _argvec[2+5] = (unsigned long)arg5; \ + _argvec[2+6] = (unsigned long)arg6; \ + __asm__ volatile( \ + VALGRIND_ALIGN_STACK \ + "mr 12,%1\n\t" \ + "std 2,-16(12)\n\t" /* save tocptr */ \ + "ld 2,-8(12)\n\t" /* use nraddr's tocptr */ \ + "ld 3, 8(12)\n\t" /* arg1->r3 */ \ + "ld 4, 16(12)\n\t" /* arg2->r4 */ \ + "ld 5, 24(12)\n\t" /* arg3->r5 */ \ + "ld 6, 32(12)\n\t" /* arg4->r6 */ \ + "ld 7, 40(12)\n\t" /* arg5->r7 */ \ + "ld 8, 48(12)\n\t" /* arg6->r8 */ \ + "ld 12, 0(12)\n\t" /* target->r12 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R12 \ + "mr 12,%1\n\t" \ + "mr %0,3\n\t" \ + "ld 2,-16(12)\n\t" /* restore tocptr */ \ + VALGRIND_RESTORE_STACK \ + : /*out*/ "=r" (_res) \ + : /*in*/ "r" (&_argvec[2]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS, "r28" \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_7W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \ + arg7) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[3+7]; \ + volatile unsigned long _res; \ + /* _argvec[0] holds current r2 across the call */ \ + _argvec[1] = (unsigned long)_orig.r2; \ + _argvec[2] = (unsigned long)_orig.nraddr; \ + _argvec[2+1] = (unsigned long)arg1; \ + _argvec[2+2] = (unsigned long)arg2; \ + _argvec[2+3] = (unsigned long)arg3; \ + _argvec[2+4] = (unsigned long)arg4; \ + _argvec[2+5] = (unsigned long)arg5; \ + _argvec[2+6] = (unsigned long)arg6; \ + _argvec[2+7] = (unsigned long)arg7; \ + __asm__ volatile( \ + VALGRIND_ALIGN_STACK \ + "mr 12,%1\n\t" \ + "std 2,-16(12)\n\t" /* save tocptr */ \ + "ld 2,-8(12)\n\t" /* use nraddr's tocptr */ \ + "ld 3, 8(12)\n\t" /* arg1->r3 */ \ + "ld 4, 16(12)\n\t" /* arg2->r4 */ \ + "ld 5, 24(12)\n\t" /* arg3->r5 */ \ + "ld 6, 32(12)\n\t" /* arg4->r6 */ \ + "ld 7, 40(12)\n\t" /* arg5->r7 */ \ + "ld 8, 48(12)\n\t" /* arg6->r8 */ \ + "ld 9, 56(12)\n\t" /* arg7->r9 */ \ + "ld 12, 0(12)\n\t" /* target->r12 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R12 \ + "mr 12,%1\n\t" \ + "mr %0,3\n\t" \ + "ld 2,-16(12)\n\t" /* restore tocptr */ \ + VALGRIND_RESTORE_STACK \ + : /*out*/ "=r" (_res) \ + : /*in*/ "r" (&_argvec[2]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS, "r28" \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_8W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \ + arg7,arg8) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[3+8]; \ + volatile unsigned long _res; \ + /* _argvec[0] holds current r2 across the call */ \ + _argvec[1] = (unsigned long)_orig.r2; \ + _argvec[2] = (unsigned long)_orig.nraddr; \ + _argvec[2+1] = (unsigned long)arg1; \ + _argvec[2+2] = (unsigned long)arg2; \ + _argvec[2+3] = (unsigned long)arg3; \ + _argvec[2+4] = (unsigned long)arg4; \ + _argvec[2+5] = (unsigned long)arg5; \ + _argvec[2+6] = (unsigned long)arg6; \ + _argvec[2+7] = (unsigned long)arg7; \ + _argvec[2+8] = (unsigned long)arg8; \ + __asm__ volatile( \ + VALGRIND_ALIGN_STACK \ + "mr 12,%1\n\t" \ + "std 2,-16(12)\n\t" /* save tocptr */ \ + "ld 2,-8(12)\n\t" /* use nraddr's tocptr */ \ + "ld 3, 8(12)\n\t" /* arg1->r3 */ \ + "ld 4, 16(12)\n\t" /* arg2->r4 */ \ + "ld 5, 24(12)\n\t" /* arg3->r5 */ \ + "ld 6, 32(12)\n\t" /* arg4->r6 */ \ + "ld 7, 40(12)\n\t" /* arg5->r7 */ \ + "ld 8, 48(12)\n\t" /* arg6->r8 */ \ + "ld 9, 56(12)\n\t" /* arg7->r9 */ \ + "ld 10, 64(12)\n\t" /* arg8->r10 */ \ + "ld 12, 0(12)\n\t" /* target->r12 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R12 \ + "mr 12,%1\n\t" \ + "mr %0,3\n\t" \ + "ld 2,-16(12)\n\t" /* restore tocptr */ \ + VALGRIND_RESTORE_STACK \ + : /*out*/ "=r" (_res) \ + : /*in*/ "r" (&_argvec[2]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS, "r28" \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_9W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \ + arg7,arg8,arg9) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[3+9]; \ + volatile unsigned long _res; \ + /* _argvec[0] holds current r2 across the call */ \ + _argvec[1] = (unsigned long)_orig.r2; \ + _argvec[2] = (unsigned long)_orig.nraddr; \ + _argvec[2+1] = (unsigned long)arg1; \ + _argvec[2+2] = (unsigned long)arg2; \ + _argvec[2+3] = (unsigned long)arg3; \ + _argvec[2+4] = (unsigned long)arg4; \ + _argvec[2+5] = (unsigned long)arg5; \ + _argvec[2+6] = (unsigned long)arg6; \ + _argvec[2+7] = (unsigned long)arg7; \ + _argvec[2+8] = (unsigned long)arg8; \ + _argvec[2+9] = (unsigned long)arg9; \ + __asm__ volatile( \ + VALGRIND_ALIGN_STACK \ + "mr 12,%1\n\t" \ + "std 2,-16(12)\n\t" /* save tocptr */ \ + "ld 2,-8(12)\n\t" /* use nraddr's tocptr */ \ + "addi 1,1,-128\n\t" /* expand stack frame */ \ + /* arg9 */ \ + "ld 3,72(12)\n\t" \ + "std 3,96(1)\n\t" \ + /* args1-8 */ \ + "ld 3, 8(12)\n\t" /* arg1->r3 */ \ + "ld 4, 16(12)\n\t" /* arg2->r4 */ \ + "ld 5, 24(12)\n\t" /* arg3->r5 */ \ + "ld 6, 32(12)\n\t" /* arg4->r6 */ \ + "ld 7, 40(12)\n\t" /* arg5->r7 */ \ + "ld 8, 48(12)\n\t" /* arg6->r8 */ \ + "ld 9, 56(12)\n\t" /* arg7->r9 */ \ + "ld 10, 64(12)\n\t" /* arg8->r10 */ \ + "ld 12, 0(12)\n\t" /* target->r12 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R12 \ + "mr 12,%1\n\t" \ + "mr %0,3\n\t" \ + "ld 2,-16(12)\n\t" /* restore tocptr */ \ + VALGRIND_RESTORE_STACK \ + : /*out*/ "=r" (_res) \ + : /*in*/ "r" (&_argvec[2]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS, "r28" \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_10W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \ + arg7,arg8,arg9,arg10) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[3+10]; \ + volatile unsigned long _res; \ + /* _argvec[0] holds current r2 across the call */ \ + _argvec[1] = (unsigned long)_orig.r2; \ + _argvec[2] = (unsigned long)_orig.nraddr; \ + _argvec[2+1] = (unsigned long)arg1; \ + _argvec[2+2] = (unsigned long)arg2; \ + _argvec[2+3] = (unsigned long)arg3; \ + _argvec[2+4] = (unsigned long)arg4; \ + _argvec[2+5] = (unsigned long)arg5; \ + _argvec[2+6] = (unsigned long)arg6; \ + _argvec[2+7] = (unsigned long)arg7; \ + _argvec[2+8] = (unsigned long)arg8; \ + _argvec[2+9] = (unsigned long)arg9; \ + _argvec[2+10] = (unsigned long)arg10; \ + __asm__ volatile( \ + VALGRIND_ALIGN_STACK \ + "mr 12,%1\n\t" \ + "std 2,-16(12)\n\t" /* save tocptr */ \ + "ld 2,-8(12)\n\t" /* use nraddr's tocptr */ \ + "addi 1,1,-128\n\t" /* expand stack frame */ \ + /* arg10 */ \ + "ld 3,80(12)\n\t" \ + "std 3,104(1)\n\t" \ + /* arg9 */ \ + "ld 3,72(12)\n\t" \ + "std 3,96(1)\n\t" \ + /* args1-8 */ \ + "ld 3, 8(12)\n\t" /* arg1->r3 */ \ + "ld 4, 16(12)\n\t" /* arg2->r4 */ \ + "ld 5, 24(12)\n\t" /* arg3->r5 */ \ + "ld 6, 32(12)\n\t" /* arg4->r6 */ \ + "ld 7, 40(12)\n\t" /* arg5->r7 */ \ + "ld 8, 48(12)\n\t" /* arg6->r8 */ \ + "ld 9, 56(12)\n\t" /* arg7->r9 */ \ + "ld 10, 64(12)\n\t" /* arg8->r10 */ \ + "ld 12, 0(12)\n\t" /* target->r12 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R12 \ + "mr 12,%1\n\t" \ + "mr %0,3\n\t" \ + "ld 2,-16(12)\n\t" /* restore tocptr */ \ + VALGRIND_RESTORE_STACK \ + : /*out*/ "=r" (_res) \ + : /*in*/ "r" (&_argvec[2]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS, "r28" \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_11W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \ + arg7,arg8,arg9,arg10,arg11) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[3+11]; \ + volatile unsigned long _res; \ + /* _argvec[0] holds current r2 across the call */ \ + _argvec[1] = (unsigned long)_orig.r2; \ + _argvec[2] = (unsigned long)_orig.nraddr; \ + _argvec[2+1] = (unsigned long)arg1; \ + _argvec[2+2] = (unsigned long)arg2; \ + _argvec[2+3] = (unsigned long)arg3; \ + _argvec[2+4] = (unsigned long)arg4; \ + _argvec[2+5] = (unsigned long)arg5; \ + _argvec[2+6] = (unsigned long)arg6; \ + _argvec[2+7] = (unsigned long)arg7; \ + _argvec[2+8] = (unsigned long)arg8; \ + _argvec[2+9] = (unsigned long)arg9; \ + _argvec[2+10] = (unsigned long)arg10; \ + _argvec[2+11] = (unsigned long)arg11; \ + __asm__ volatile( \ + VALGRIND_ALIGN_STACK \ + "mr 12,%1\n\t" \ + "std 2,-16(12)\n\t" /* save tocptr */ \ + "ld 2,-8(12)\n\t" /* use nraddr's tocptr */ \ + "addi 1,1,-144\n\t" /* expand stack frame */ \ + /* arg11 */ \ + "ld 3,88(12)\n\t" \ + "std 3,112(1)\n\t" \ + /* arg10 */ \ + "ld 3,80(12)\n\t" \ + "std 3,104(1)\n\t" \ + /* arg9 */ \ + "ld 3,72(12)\n\t" \ + "std 3,96(1)\n\t" \ + /* args1-8 */ \ + "ld 3, 8(12)\n\t" /* arg1->r3 */ \ + "ld 4, 16(12)\n\t" /* arg2->r4 */ \ + "ld 5, 24(12)\n\t" /* arg3->r5 */ \ + "ld 6, 32(12)\n\t" /* arg4->r6 */ \ + "ld 7, 40(12)\n\t" /* arg5->r7 */ \ + "ld 8, 48(12)\n\t" /* arg6->r8 */ \ + "ld 9, 56(12)\n\t" /* arg7->r9 */ \ + "ld 10, 64(12)\n\t" /* arg8->r10 */ \ + "ld 12, 0(12)\n\t" /* target->r12 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R12 \ + "mr 12,%1\n\t" \ + "mr %0,3\n\t" \ + "ld 2,-16(12)\n\t" /* restore tocptr */ \ + VALGRIND_RESTORE_STACK \ + : /*out*/ "=r" (_res) \ + : /*in*/ "r" (&_argvec[2]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS, "r28" \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_12W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \ + arg7,arg8,arg9,arg10,arg11,arg12) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[3+12]; \ + volatile unsigned long _res; \ + /* _argvec[0] holds current r2 across the call */ \ + _argvec[1] = (unsigned long)_orig.r2; \ + _argvec[2] = (unsigned long)_orig.nraddr; \ + _argvec[2+1] = (unsigned long)arg1; \ + _argvec[2+2] = (unsigned long)arg2; \ + _argvec[2+3] = (unsigned long)arg3; \ + _argvec[2+4] = (unsigned long)arg4; \ + _argvec[2+5] = (unsigned long)arg5; \ + _argvec[2+6] = (unsigned long)arg6; \ + _argvec[2+7] = (unsigned long)arg7; \ + _argvec[2+8] = (unsigned long)arg8; \ + _argvec[2+9] = (unsigned long)arg9; \ + _argvec[2+10] = (unsigned long)arg10; \ + _argvec[2+11] = (unsigned long)arg11; \ + _argvec[2+12] = (unsigned long)arg12; \ + __asm__ volatile( \ + VALGRIND_ALIGN_STACK \ + "mr 12,%1\n\t" \ + "std 2,-16(12)\n\t" /* save tocptr */ \ + "ld 2,-8(12)\n\t" /* use nraddr's tocptr */ \ + "addi 1,1,-144\n\t" /* expand stack frame */ \ + /* arg12 */ \ + "ld 3,96(12)\n\t" \ + "std 3,120(1)\n\t" \ + /* arg11 */ \ + "ld 3,88(12)\n\t" \ + "std 3,112(1)\n\t" \ + /* arg10 */ \ + "ld 3,80(12)\n\t" \ + "std 3,104(1)\n\t" \ + /* arg9 */ \ + "ld 3,72(12)\n\t" \ + "std 3,96(1)\n\t" \ + /* args1-8 */ \ + "ld 3, 8(12)\n\t" /* arg1->r3 */ \ + "ld 4, 16(12)\n\t" /* arg2->r4 */ \ + "ld 5, 24(12)\n\t" /* arg3->r5 */ \ + "ld 6, 32(12)\n\t" /* arg4->r6 */ \ + "ld 7, 40(12)\n\t" /* arg5->r7 */ \ + "ld 8, 48(12)\n\t" /* arg6->r8 */ \ + "ld 9, 56(12)\n\t" /* arg7->r9 */ \ + "ld 10, 64(12)\n\t" /* arg8->r10 */ \ + "ld 12, 0(12)\n\t" /* target->r12 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R12 \ + "mr 12,%1\n\t" \ + "mr %0,3\n\t" \ + "ld 2,-16(12)\n\t" /* restore tocptr */ \ + VALGRIND_RESTORE_STACK \ + : /*out*/ "=r" (_res) \ + : /*in*/ "r" (&_argvec[2]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS, "r28" \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#endif /* PLAT_ppc64le_linux */ + +/* ------------------------- arm-linux ------------------------- */ + +#if defined(PLAT_arm_linux) + +/* These regs are trashed by the hidden call. */ +#define __CALLER_SAVED_REGS "r0", "r1", "r2", "r3","r4", "r12", "r14" + +/* Macros to save and align the stack before making a function + call and restore it afterwards as gcc may not keep the stack + pointer aligned if it doesn't realise calls are being made + to other functions. */ + +/* This is a bit tricky. We store the original stack pointer in r10 + as it is callee-saves. gcc doesn't allow the use of r11 for some + reason. Also, we can't directly "bic" the stack pointer in thumb + mode since r13 isn't an allowed register number in that context. + So use r4 as a temporary, since that is about to get trashed + anyway, just after each use of this macro. Side effect is we need + to be very careful about any future changes, since + VALGRIND_ALIGN_STACK simply assumes r4 is usable. */ +#define VALGRIND_ALIGN_STACK \ + "mov r10, sp\n\t" \ + "mov r4, sp\n\t" \ + "bic r4, r4, #7\n\t" \ + "mov sp, r4\n\t" +#define VALGRIND_RESTORE_STACK \ + "mov sp, r10\n\t" + +/* These CALL_FN_ macros assume that on arm-linux, sizeof(unsigned + long) == 4. */ + +#define CALL_FN_W_v(lval, orig) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[1]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + __asm__ volatile( \ + VALGRIND_ALIGN_STACK \ + "ldr r4, [%1] \n\t" /* target->r4 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R4 \ + VALGRIND_RESTORE_STACK \ + "mov %0, r0\n" \ + : /*out*/ "=r" (_res) \ + : /*in*/ "0" (&_argvec[0]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS, "r10" \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_W(lval, orig, arg1) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[2]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + _argvec[1] = (unsigned long)(arg1); \ + __asm__ volatile( \ + VALGRIND_ALIGN_STACK \ + "ldr r0, [%1, #4] \n\t" \ + "ldr r4, [%1] \n\t" /* target->r4 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R4 \ + VALGRIND_RESTORE_STACK \ + "mov %0, r0\n" \ + : /*out*/ "=r" (_res) \ + : /*in*/ "0" (&_argvec[0]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS, "r10" \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_WW(lval, orig, arg1,arg2) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[3]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + _argvec[1] = (unsigned long)(arg1); \ + _argvec[2] = (unsigned long)(arg2); \ + __asm__ volatile( \ + VALGRIND_ALIGN_STACK \ + "ldr r0, [%1, #4] \n\t" \ + "ldr r1, [%1, #8] \n\t" \ + "ldr r4, [%1] \n\t" /* target->r4 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R4 \ + VALGRIND_RESTORE_STACK \ + "mov %0, r0\n" \ + : /*out*/ "=r" (_res) \ + : /*in*/ "0" (&_argvec[0]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS, "r10" \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_WWW(lval, orig, arg1,arg2,arg3) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[4]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + _argvec[1] = (unsigned long)(arg1); \ + _argvec[2] = (unsigned long)(arg2); \ + _argvec[3] = (unsigned long)(arg3); \ + __asm__ volatile( \ + VALGRIND_ALIGN_STACK \ + "ldr r0, [%1, #4] \n\t" \ + "ldr r1, [%1, #8] \n\t" \ + "ldr r2, [%1, #12] \n\t" \ + "ldr r4, [%1] \n\t" /* target->r4 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R4 \ + VALGRIND_RESTORE_STACK \ + "mov %0, r0\n" \ + : /*out*/ "=r" (_res) \ + : /*in*/ "0" (&_argvec[0]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS, "r10" \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_WWWW(lval, orig, arg1,arg2,arg3,arg4) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[5]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + _argvec[1] = (unsigned long)(arg1); \ + _argvec[2] = (unsigned long)(arg2); \ + _argvec[3] = (unsigned long)(arg3); \ + _argvec[4] = (unsigned long)(arg4); \ + __asm__ volatile( \ + VALGRIND_ALIGN_STACK \ + "ldr r0, [%1, #4] \n\t" \ + "ldr r1, [%1, #8] \n\t" \ + "ldr r2, [%1, #12] \n\t" \ + "ldr r3, [%1, #16] \n\t" \ + "ldr r4, [%1] \n\t" /* target->r4 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R4 \ + VALGRIND_RESTORE_STACK \ + "mov %0, r0" \ + : /*out*/ "=r" (_res) \ + : /*in*/ "0" (&_argvec[0]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS, "r10" \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_5W(lval, orig, arg1,arg2,arg3,arg4,arg5) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[6]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + _argvec[1] = (unsigned long)(arg1); \ + _argvec[2] = (unsigned long)(arg2); \ + _argvec[3] = (unsigned long)(arg3); \ + _argvec[4] = (unsigned long)(arg4); \ + _argvec[5] = (unsigned long)(arg5); \ + __asm__ volatile( \ + VALGRIND_ALIGN_STACK \ + "sub sp, sp, #4 \n\t" \ + "ldr r0, [%1, #20] \n\t" \ + "push {r0} \n\t" \ + "ldr r0, [%1, #4] \n\t" \ + "ldr r1, [%1, #8] \n\t" \ + "ldr r2, [%1, #12] \n\t" \ + "ldr r3, [%1, #16] \n\t" \ + "ldr r4, [%1] \n\t" /* target->r4 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R4 \ + VALGRIND_RESTORE_STACK \ + "mov %0, r0" \ + : /*out*/ "=r" (_res) \ + : /*in*/ "0" (&_argvec[0]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS, "r10" \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_6W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[7]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + _argvec[1] = (unsigned long)(arg1); \ + _argvec[2] = (unsigned long)(arg2); \ + _argvec[3] = (unsigned long)(arg3); \ + _argvec[4] = (unsigned long)(arg4); \ + _argvec[5] = (unsigned long)(arg5); \ + _argvec[6] = (unsigned long)(arg6); \ + __asm__ volatile( \ + VALGRIND_ALIGN_STACK \ + "ldr r0, [%1, #20] \n\t" \ + "ldr r1, [%1, #24] \n\t" \ + "push {r0, r1} \n\t" \ + "ldr r0, [%1, #4] \n\t" \ + "ldr r1, [%1, #8] \n\t" \ + "ldr r2, [%1, #12] \n\t" \ + "ldr r3, [%1, #16] \n\t" \ + "ldr r4, [%1] \n\t" /* target->r4 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R4 \ + VALGRIND_RESTORE_STACK \ + "mov %0, r0" \ + : /*out*/ "=r" (_res) \ + : /*in*/ "0" (&_argvec[0]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS, "r10" \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_7W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \ + arg7) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[8]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + _argvec[1] = (unsigned long)(arg1); \ + _argvec[2] = (unsigned long)(arg2); \ + _argvec[3] = (unsigned long)(arg3); \ + _argvec[4] = (unsigned long)(arg4); \ + _argvec[5] = (unsigned long)(arg5); \ + _argvec[6] = (unsigned long)(arg6); \ + _argvec[7] = (unsigned long)(arg7); \ + __asm__ volatile( \ + VALGRIND_ALIGN_STACK \ + "sub sp, sp, #4 \n\t" \ + "ldr r0, [%1, #20] \n\t" \ + "ldr r1, [%1, #24] \n\t" \ + "ldr r2, [%1, #28] \n\t" \ + "push {r0, r1, r2} \n\t" \ + "ldr r0, [%1, #4] \n\t" \ + "ldr r1, [%1, #8] \n\t" \ + "ldr r2, [%1, #12] \n\t" \ + "ldr r3, [%1, #16] \n\t" \ + "ldr r4, [%1] \n\t" /* target->r4 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R4 \ + VALGRIND_RESTORE_STACK \ + "mov %0, r0" \ + : /*out*/ "=r" (_res) \ + : /*in*/ "0" (&_argvec[0]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS, "r10" \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_8W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \ + arg7,arg8) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[9]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + _argvec[1] = (unsigned long)(arg1); \ + _argvec[2] = (unsigned long)(arg2); \ + _argvec[3] = (unsigned long)(arg3); \ + _argvec[4] = (unsigned long)(arg4); \ + _argvec[5] = (unsigned long)(arg5); \ + _argvec[6] = (unsigned long)(arg6); \ + _argvec[7] = (unsigned long)(arg7); \ + _argvec[8] = (unsigned long)(arg8); \ + __asm__ volatile( \ + VALGRIND_ALIGN_STACK \ + "ldr r0, [%1, #20] \n\t" \ + "ldr r1, [%1, #24] \n\t" \ + "ldr r2, [%1, #28] \n\t" \ + "ldr r3, [%1, #32] \n\t" \ + "push {r0, r1, r2, r3} \n\t" \ + "ldr r0, [%1, #4] \n\t" \ + "ldr r1, [%1, #8] \n\t" \ + "ldr r2, [%1, #12] \n\t" \ + "ldr r3, [%1, #16] \n\t" \ + "ldr r4, [%1] \n\t" /* target->r4 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R4 \ + VALGRIND_RESTORE_STACK \ + "mov %0, r0" \ + : /*out*/ "=r" (_res) \ + : /*in*/ "0" (&_argvec[0]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS, "r10" \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_9W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \ + arg7,arg8,arg9) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[10]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + _argvec[1] = (unsigned long)(arg1); \ + _argvec[2] = (unsigned long)(arg2); \ + _argvec[3] = (unsigned long)(arg3); \ + _argvec[4] = (unsigned long)(arg4); \ + _argvec[5] = (unsigned long)(arg5); \ + _argvec[6] = (unsigned long)(arg6); \ + _argvec[7] = (unsigned long)(arg7); \ + _argvec[8] = (unsigned long)(arg8); \ + _argvec[9] = (unsigned long)(arg9); \ + __asm__ volatile( \ + VALGRIND_ALIGN_STACK \ + "sub sp, sp, #4 \n\t" \ + "ldr r0, [%1, #20] \n\t" \ + "ldr r1, [%1, #24] \n\t" \ + "ldr r2, [%1, #28] \n\t" \ + "ldr r3, [%1, #32] \n\t" \ + "ldr r4, [%1, #36] \n\t" \ + "push {r0, r1, r2, r3, r4} \n\t" \ + "ldr r0, [%1, #4] \n\t" \ + "ldr r1, [%1, #8] \n\t" \ + "ldr r2, [%1, #12] \n\t" \ + "ldr r3, [%1, #16] \n\t" \ + "ldr r4, [%1] \n\t" /* target->r4 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R4 \ + VALGRIND_RESTORE_STACK \ + "mov %0, r0" \ + : /*out*/ "=r" (_res) \ + : /*in*/ "0" (&_argvec[0]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS, "r10" \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_10W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \ + arg7,arg8,arg9,arg10) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[11]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + _argvec[1] = (unsigned long)(arg1); \ + _argvec[2] = (unsigned long)(arg2); \ + _argvec[3] = (unsigned long)(arg3); \ + _argvec[4] = (unsigned long)(arg4); \ + _argvec[5] = (unsigned long)(arg5); \ + _argvec[6] = (unsigned long)(arg6); \ + _argvec[7] = (unsigned long)(arg7); \ + _argvec[8] = (unsigned long)(arg8); \ + _argvec[9] = (unsigned long)(arg9); \ + _argvec[10] = (unsigned long)(arg10); \ + __asm__ volatile( \ + VALGRIND_ALIGN_STACK \ + "ldr r0, [%1, #40] \n\t" \ + "push {r0} \n\t" \ + "ldr r0, [%1, #20] \n\t" \ + "ldr r1, [%1, #24] \n\t" \ + "ldr r2, [%1, #28] \n\t" \ + "ldr r3, [%1, #32] \n\t" \ + "ldr r4, [%1, #36] \n\t" \ + "push {r0, r1, r2, r3, r4} \n\t" \ + "ldr r0, [%1, #4] \n\t" \ + "ldr r1, [%1, #8] \n\t" \ + "ldr r2, [%1, #12] \n\t" \ + "ldr r3, [%1, #16] \n\t" \ + "ldr r4, [%1] \n\t" /* target->r4 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R4 \ + VALGRIND_RESTORE_STACK \ + "mov %0, r0" \ + : /*out*/ "=r" (_res) \ + : /*in*/ "0" (&_argvec[0]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS, "r10" \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_11W(lval, orig, arg1,arg2,arg3,arg4,arg5, \ + arg6,arg7,arg8,arg9,arg10, \ + arg11) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[12]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + _argvec[1] = (unsigned long)(arg1); \ + _argvec[2] = (unsigned long)(arg2); \ + _argvec[3] = (unsigned long)(arg3); \ + _argvec[4] = (unsigned long)(arg4); \ + _argvec[5] = (unsigned long)(arg5); \ + _argvec[6] = (unsigned long)(arg6); \ + _argvec[7] = (unsigned long)(arg7); \ + _argvec[8] = (unsigned long)(arg8); \ + _argvec[9] = (unsigned long)(arg9); \ + _argvec[10] = (unsigned long)(arg10); \ + _argvec[11] = (unsigned long)(arg11); \ + __asm__ volatile( \ + VALGRIND_ALIGN_STACK \ + "sub sp, sp, #4 \n\t" \ + "ldr r0, [%1, #40] \n\t" \ + "ldr r1, [%1, #44] \n\t" \ + "push {r0, r1} \n\t" \ + "ldr r0, [%1, #20] \n\t" \ + "ldr r1, [%1, #24] \n\t" \ + "ldr r2, [%1, #28] \n\t" \ + "ldr r3, [%1, #32] \n\t" \ + "ldr r4, [%1, #36] \n\t" \ + "push {r0, r1, r2, r3, r4} \n\t" \ + "ldr r0, [%1, #4] \n\t" \ + "ldr r1, [%1, #8] \n\t" \ + "ldr r2, [%1, #12] \n\t" \ + "ldr r3, [%1, #16] \n\t" \ + "ldr r4, [%1] \n\t" /* target->r4 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R4 \ + VALGRIND_RESTORE_STACK \ + "mov %0, r0" \ + : /*out*/ "=r" (_res) \ + : /*in*/ "0" (&_argvec[0]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS, "r10" \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_12W(lval, orig, arg1,arg2,arg3,arg4,arg5, \ + arg6,arg7,arg8,arg9,arg10, \ + arg11,arg12) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[13]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + _argvec[1] = (unsigned long)(arg1); \ + _argvec[2] = (unsigned long)(arg2); \ + _argvec[3] = (unsigned long)(arg3); \ + _argvec[4] = (unsigned long)(arg4); \ + _argvec[5] = (unsigned long)(arg5); \ + _argvec[6] = (unsigned long)(arg6); \ + _argvec[7] = (unsigned long)(arg7); \ + _argvec[8] = (unsigned long)(arg8); \ + _argvec[9] = (unsigned long)(arg9); \ + _argvec[10] = (unsigned long)(arg10); \ + _argvec[11] = (unsigned long)(arg11); \ + _argvec[12] = (unsigned long)(arg12); \ + __asm__ volatile( \ + VALGRIND_ALIGN_STACK \ + "ldr r0, [%1, #40] \n\t" \ + "ldr r1, [%1, #44] \n\t" \ + "ldr r2, [%1, #48] \n\t" \ + "push {r0, r1, r2} \n\t" \ + "ldr r0, [%1, #20] \n\t" \ + "ldr r1, [%1, #24] \n\t" \ + "ldr r2, [%1, #28] \n\t" \ + "ldr r3, [%1, #32] \n\t" \ + "ldr r4, [%1, #36] \n\t" \ + "push {r0, r1, r2, r3, r4} \n\t" \ + "ldr r0, [%1, #4] \n\t" \ + "ldr r1, [%1, #8] \n\t" \ + "ldr r2, [%1, #12] \n\t" \ + "ldr r3, [%1, #16] \n\t" \ + "ldr r4, [%1] \n\t" /* target->r4 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R4 \ + VALGRIND_RESTORE_STACK \ + "mov %0, r0" \ + : /*out*/ "=r" (_res) \ + : /*in*/ "0" (&_argvec[0]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS, "r10" \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#endif /* PLAT_arm_linux */ + +/* ------------------------ arm64-linux ------------------------ */ + +#if defined(PLAT_arm64_linux) + +/* These regs are trashed by the hidden call. */ +#define __CALLER_SAVED_REGS \ + "x0", "x1", "x2", "x3","x4", "x5", "x6", "x7", "x8", "x9", \ + "x10", "x11", "x12", "x13", "x14", "x15", "x16", "x17", \ + "x18", "x19", "x20", "x30", \ + "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9", \ + "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17", \ + "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", \ + "v26", "v27", "v28", "v29", "v30", "v31" + +/* x21 is callee-saved, so we can use it to save and restore SP around + the hidden call. */ +#define VALGRIND_ALIGN_STACK \ + "mov x21, sp\n\t" \ + "bic sp, x21, #15\n\t" +#define VALGRIND_RESTORE_STACK \ + "mov sp, x21\n\t" + +/* These CALL_FN_ macros assume that on arm64-linux, + sizeof(unsigned long) == 8. */ + +#define CALL_FN_W_v(lval, orig) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[1]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + __asm__ volatile( \ + VALGRIND_ALIGN_STACK \ + "ldr x8, [%1] \n\t" /* target->x8 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_X8 \ + VALGRIND_RESTORE_STACK \ + "mov %0, x0\n" \ + : /*out*/ "=r" (_res) \ + : /*in*/ "0" (&_argvec[0]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS, "x21" \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_W(lval, orig, arg1) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[2]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + _argvec[1] = (unsigned long)(arg1); \ + __asm__ volatile( \ + VALGRIND_ALIGN_STACK \ + "ldr x0, [%1, #8] \n\t" \ + "ldr x8, [%1] \n\t" /* target->x8 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_X8 \ + VALGRIND_RESTORE_STACK \ + "mov %0, x0\n" \ + : /*out*/ "=r" (_res) \ + : /*in*/ "0" (&_argvec[0]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS, "x21" \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_WW(lval, orig, arg1,arg2) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[3]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + _argvec[1] = (unsigned long)(arg1); \ + _argvec[2] = (unsigned long)(arg2); \ + __asm__ volatile( \ + VALGRIND_ALIGN_STACK \ + "ldr x0, [%1, #8] \n\t" \ + "ldr x1, [%1, #16] \n\t" \ + "ldr x8, [%1] \n\t" /* target->x8 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_X8 \ + VALGRIND_RESTORE_STACK \ + "mov %0, x0\n" \ + : /*out*/ "=r" (_res) \ + : /*in*/ "0" (&_argvec[0]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS, "x21" \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_WWW(lval, orig, arg1,arg2,arg3) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[4]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + _argvec[1] = (unsigned long)(arg1); \ + _argvec[2] = (unsigned long)(arg2); \ + _argvec[3] = (unsigned long)(arg3); \ + __asm__ volatile( \ + VALGRIND_ALIGN_STACK \ + "ldr x0, [%1, #8] \n\t" \ + "ldr x1, [%1, #16] \n\t" \ + "ldr x2, [%1, #24] \n\t" \ + "ldr x8, [%1] \n\t" /* target->x8 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_X8 \ + VALGRIND_RESTORE_STACK \ + "mov %0, x0\n" \ + : /*out*/ "=r" (_res) \ + : /*in*/ "0" (&_argvec[0]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS, "x21" \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_WWWW(lval, orig, arg1,arg2,arg3,arg4) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[5]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + _argvec[1] = (unsigned long)(arg1); \ + _argvec[2] = (unsigned long)(arg2); \ + _argvec[3] = (unsigned long)(arg3); \ + _argvec[4] = (unsigned long)(arg4); \ + __asm__ volatile( \ + VALGRIND_ALIGN_STACK \ + "ldr x0, [%1, #8] \n\t" \ + "ldr x1, [%1, #16] \n\t" \ + "ldr x2, [%1, #24] \n\t" \ + "ldr x3, [%1, #32] \n\t" \ + "ldr x8, [%1] \n\t" /* target->x8 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_X8 \ + VALGRIND_RESTORE_STACK \ + "mov %0, x0" \ + : /*out*/ "=r" (_res) \ + : /*in*/ "0" (&_argvec[0]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS, "x21" \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_5W(lval, orig, arg1,arg2,arg3,arg4,arg5) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[6]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + _argvec[1] = (unsigned long)(arg1); \ + _argvec[2] = (unsigned long)(arg2); \ + _argvec[3] = (unsigned long)(arg3); \ + _argvec[4] = (unsigned long)(arg4); \ + _argvec[5] = (unsigned long)(arg5); \ + __asm__ volatile( \ + VALGRIND_ALIGN_STACK \ + "ldr x0, [%1, #8] \n\t" \ + "ldr x1, [%1, #16] \n\t" \ + "ldr x2, [%1, #24] \n\t" \ + "ldr x3, [%1, #32] \n\t" \ + "ldr x4, [%1, #40] \n\t" \ + "ldr x8, [%1] \n\t" /* target->x8 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_X8 \ + VALGRIND_RESTORE_STACK \ + "mov %0, x0" \ + : /*out*/ "=r" (_res) \ + : /*in*/ "0" (&_argvec[0]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS, "x21" \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_6W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[7]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + _argvec[1] = (unsigned long)(arg1); \ + _argvec[2] = (unsigned long)(arg2); \ + _argvec[3] = (unsigned long)(arg3); \ + _argvec[4] = (unsigned long)(arg4); \ + _argvec[5] = (unsigned long)(arg5); \ + _argvec[6] = (unsigned long)(arg6); \ + __asm__ volatile( \ + VALGRIND_ALIGN_STACK \ + "ldr x0, [%1, #8] \n\t" \ + "ldr x1, [%1, #16] \n\t" \ + "ldr x2, [%1, #24] \n\t" \ + "ldr x3, [%1, #32] \n\t" \ + "ldr x4, [%1, #40] \n\t" \ + "ldr x5, [%1, #48] \n\t" \ + "ldr x8, [%1] \n\t" /* target->x8 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_X8 \ + VALGRIND_RESTORE_STACK \ + "mov %0, x0" \ + : /*out*/ "=r" (_res) \ + : /*in*/ "0" (&_argvec[0]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS, "x21" \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_7W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \ + arg7) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[8]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + _argvec[1] = (unsigned long)(arg1); \ + _argvec[2] = (unsigned long)(arg2); \ + _argvec[3] = (unsigned long)(arg3); \ + _argvec[4] = (unsigned long)(arg4); \ + _argvec[5] = (unsigned long)(arg5); \ + _argvec[6] = (unsigned long)(arg6); \ + _argvec[7] = (unsigned long)(arg7); \ + __asm__ volatile( \ + VALGRIND_ALIGN_STACK \ + "ldr x0, [%1, #8] \n\t" \ + "ldr x1, [%1, #16] \n\t" \ + "ldr x2, [%1, #24] \n\t" \ + "ldr x3, [%1, #32] \n\t" \ + "ldr x4, [%1, #40] \n\t" \ + "ldr x5, [%1, #48] \n\t" \ + "ldr x6, [%1, #56] \n\t" \ + "ldr x8, [%1] \n\t" /* target->x8 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_X8 \ + VALGRIND_RESTORE_STACK \ + "mov %0, x0" \ + : /*out*/ "=r" (_res) \ + : /*in*/ "0" (&_argvec[0]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS, "x21" \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_8W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \ + arg7,arg8) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[9]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + _argvec[1] = (unsigned long)(arg1); \ + _argvec[2] = (unsigned long)(arg2); \ + _argvec[3] = (unsigned long)(arg3); \ + _argvec[4] = (unsigned long)(arg4); \ + _argvec[5] = (unsigned long)(arg5); \ + _argvec[6] = (unsigned long)(arg6); \ + _argvec[7] = (unsigned long)(arg7); \ + _argvec[8] = (unsigned long)(arg8); \ + __asm__ volatile( \ + VALGRIND_ALIGN_STACK \ + "ldr x0, [%1, #8] \n\t" \ + "ldr x1, [%1, #16] \n\t" \ + "ldr x2, [%1, #24] \n\t" \ + "ldr x3, [%1, #32] \n\t" \ + "ldr x4, [%1, #40] \n\t" \ + "ldr x5, [%1, #48] \n\t" \ + "ldr x6, [%1, #56] \n\t" \ + "ldr x7, [%1, #64] \n\t" \ + "ldr x8, [%1] \n\t" /* target->x8 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_X8 \ + VALGRIND_RESTORE_STACK \ + "mov %0, x0" \ + : /*out*/ "=r" (_res) \ + : /*in*/ "0" (&_argvec[0]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS, "x21" \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_9W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \ + arg7,arg8,arg9) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[10]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + _argvec[1] = (unsigned long)(arg1); \ + _argvec[2] = (unsigned long)(arg2); \ + _argvec[3] = (unsigned long)(arg3); \ + _argvec[4] = (unsigned long)(arg4); \ + _argvec[5] = (unsigned long)(arg5); \ + _argvec[6] = (unsigned long)(arg6); \ + _argvec[7] = (unsigned long)(arg7); \ + _argvec[8] = (unsigned long)(arg8); \ + _argvec[9] = (unsigned long)(arg9); \ + __asm__ volatile( \ + VALGRIND_ALIGN_STACK \ + "sub sp, sp, #0x20 \n\t" \ + "ldr x0, [%1, #8] \n\t" \ + "ldr x1, [%1, #16] \n\t" \ + "ldr x2, [%1, #24] \n\t" \ + "ldr x3, [%1, #32] \n\t" \ + "ldr x4, [%1, #40] \n\t" \ + "ldr x5, [%1, #48] \n\t" \ + "ldr x6, [%1, #56] \n\t" \ + "ldr x7, [%1, #64] \n\t" \ + "ldr x8, [%1, #72] \n\t" \ + "str x8, [sp, #0] \n\t" \ + "ldr x8, [%1] \n\t" /* target->x8 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_X8 \ + VALGRIND_RESTORE_STACK \ + "mov %0, x0" \ + : /*out*/ "=r" (_res) \ + : /*in*/ "0" (&_argvec[0]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS, "x21" \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_10W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \ + arg7,arg8,arg9,arg10) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[11]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + _argvec[1] = (unsigned long)(arg1); \ + _argvec[2] = (unsigned long)(arg2); \ + _argvec[3] = (unsigned long)(arg3); \ + _argvec[4] = (unsigned long)(arg4); \ + _argvec[5] = (unsigned long)(arg5); \ + _argvec[6] = (unsigned long)(arg6); \ + _argvec[7] = (unsigned long)(arg7); \ + _argvec[8] = (unsigned long)(arg8); \ + _argvec[9] = (unsigned long)(arg9); \ + _argvec[10] = (unsigned long)(arg10); \ + __asm__ volatile( \ + VALGRIND_ALIGN_STACK \ + "sub sp, sp, #0x20 \n\t" \ + "ldr x0, [%1, #8] \n\t" \ + "ldr x1, [%1, #16] \n\t" \ + "ldr x2, [%1, #24] \n\t" \ + "ldr x3, [%1, #32] \n\t" \ + "ldr x4, [%1, #40] \n\t" \ + "ldr x5, [%1, #48] \n\t" \ + "ldr x6, [%1, #56] \n\t" \ + "ldr x7, [%1, #64] \n\t" \ + "ldr x8, [%1, #72] \n\t" \ + "str x8, [sp, #0] \n\t" \ + "ldr x8, [%1, #80] \n\t" \ + "str x8, [sp, #8] \n\t" \ + "ldr x8, [%1] \n\t" /* target->x8 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_X8 \ + VALGRIND_RESTORE_STACK \ + "mov %0, x0" \ + : /*out*/ "=r" (_res) \ + : /*in*/ "0" (&_argvec[0]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS, "x21" \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_11W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \ + arg7,arg8,arg9,arg10,arg11) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[12]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + _argvec[1] = (unsigned long)(arg1); \ + _argvec[2] = (unsigned long)(arg2); \ + _argvec[3] = (unsigned long)(arg3); \ + _argvec[4] = (unsigned long)(arg4); \ + _argvec[5] = (unsigned long)(arg5); \ + _argvec[6] = (unsigned long)(arg6); \ + _argvec[7] = (unsigned long)(arg7); \ + _argvec[8] = (unsigned long)(arg8); \ + _argvec[9] = (unsigned long)(arg9); \ + _argvec[10] = (unsigned long)(arg10); \ + _argvec[11] = (unsigned long)(arg11); \ + __asm__ volatile( \ + VALGRIND_ALIGN_STACK \ + "sub sp, sp, #0x30 \n\t" \ + "ldr x0, [%1, #8] \n\t" \ + "ldr x1, [%1, #16] \n\t" \ + "ldr x2, [%1, #24] \n\t" \ + "ldr x3, [%1, #32] \n\t" \ + "ldr x4, [%1, #40] \n\t" \ + "ldr x5, [%1, #48] \n\t" \ + "ldr x6, [%1, #56] \n\t" \ + "ldr x7, [%1, #64] \n\t" \ + "ldr x8, [%1, #72] \n\t" \ + "str x8, [sp, #0] \n\t" \ + "ldr x8, [%1, #80] \n\t" \ + "str x8, [sp, #8] \n\t" \ + "ldr x8, [%1, #88] \n\t" \ + "str x8, [sp, #16] \n\t" \ + "ldr x8, [%1] \n\t" /* target->x8 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_X8 \ + VALGRIND_RESTORE_STACK \ + "mov %0, x0" \ + : /*out*/ "=r" (_res) \ + : /*in*/ "0" (&_argvec[0]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS, "x21" \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_12W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \ + arg7,arg8,arg9,arg10,arg11, \ + arg12) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[13]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + _argvec[1] = (unsigned long)(arg1); \ + _argvec[2] = (unsigned long)(arg2); \ + _argvec[3] = (unsigned long)(arg3); \ + _argvec[4] = (unsigned long)(arg4); \ + _argvec[5] = (unsigned long)(arg5); \ + _argvec[6] = (unsigned long)(arg6); \ + _argvec[7] = (unsigned long)(arg7); \ + _argvec[8] = (unsigned long)(arg8); \ + _argvec[9] = (unsigned long)(arg9); \ + _argvec[10] = (unsigned long)(arg10); \ + _argvec[11] = (unsigned long)(arg11); \ + _argvec[12] = (unsigned long)(arg12); \ + __asm__ volatile( \ + VALGRIND_ALIGN_STACK \ + "sub sp, sp, #0x30 \n\t" \ + "ldr x0, [%1, #8] \n\t" \ + "ldr x1, [%1, #16] \n\t" \ + "ldr x2, [%1, #24] \n\t" \ + "ldr x3, [%1, #32] \n\t" \ + "ldr x4, [%1, #40] \n\t" \ + "ldr x5, [%1, #48] \n\t" \ + "ldr x6, [%1, #56] \n\t" \ + "ldr x7, [%1, #64] \n\t" \ + "ldr x8, [%1, #72] \n\t" \ + "str x8, [sp, #0] \n\t" \ + "ldr x8, [%1, #80] \n\t" \ + "str x8, [sp, #8] \n\t" \ + "ldr x8, [%1, #88] \n\t" \ + "str x8, [sp, #16] \n\t" \ + "ldr x8, [%1, #96] \n\t" \ + "str x8, [sp, #24] \n\t" \ + "ldr x8, [%1] \n\t" /* target->x8 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_X8 \ + VALGRIND_RESTORE_STACK \ + "mov %0, x0" \ + : /*out*/ "=r" (_res) \ + : /*in*/ "0" (&_argvec[0]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS, "x21" \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#endif /* PLAT_arm64_linux */ + +/* ------------------------- s390x-linux ------------------------- */ + +#if defined(PLAT_s390x_linux) + +/* Similar workaround as amd64 (see above), but we use r11 as frame + pointer and save the old r11 in r7. r11 might be used for + argvec, therefore we copy argvec in r1 since r1 is clobbered + after the call anyway. */ +#if defined(__GNUC__) && defined(__GCC_HAVE_DWARF2_CFI_ASM) +# define __FRAME_POINTER \ + ,"d"(__builtin_dwarf_cfa()) +# define VALGRIND_CFI_PROLOGUE \ + ".cfi_remember_state\n\t" \ + "lgr 1,%1\n\t" /* copy the argvec pointer in r1 */ \ + "lgr 7,11\n\t" \ + "lgr 11,%2\n\t" \ + ".cfi_def_cfa r11, 0\n\t" +# define VALGRIND_CFI_EPILOGUE \ + "lgr 11, 7\n\t" \ + ".cfi_restore_state\n\t" +#else +# define __FRAME_POINTER +# define VALGRIND_CFI_PROLOGUE \ + "lgr 1,%1\n\t" +# define VALGRIND_CFI_EPILOGUE +#endif + +/* Nb: On s390 the stack pointer is properly aligned *at all times* + according to the s390 GCC maintainer. (The ABI specification is not + precise in this regard.) Therefore, VALGRIND_ALIGN_STACK and + VALGRIND_RESTORE_STACK are not defined here. */ + +/* These regs are trashed by the hidden call. Note that we overwrite + r14 in s390_irgen_noredir (VEX/priv/guest_s390_irgen.c) to give the + function a proper return address. All others are ABI defined call + clobbers. */ +#define __CALLER_SAVED_REGS "0","1","2","3","4","5","14", \ + "f0","f1","f2","f3","f4","f5","f6","f7" + +/* Nb: Although r11 is modified in the asm snippets below (inside + VALGRIND_CFI_PROLOGUE) it is not listed in the clobber section, for + two reasons: + (1) r11 is restored in VALGRIND_CFI_EPILOGUE, so effectively it is not + modified + (2) GCC will complain that r11 cannot appear inside a clobber section, + when compiled with -O -fno-omit-frame-pointer + */ + +#define CALL_FN_W_v(lval, orig) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[1]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + __asm__ volatile( \ + VALGRIND_CFI_PROLOGUE \ + "aghi 15,-160\n\t" \ + "lg 1, 0(1)\n\t" /* target->r1 */ \ + VALGRIND_CALL_NOREDIR_R1 \ + "lgr %0, 2\n\t" \ + "aghi 15,160\n\t" \ + VALGRIND_CFI_EPILOGUE \ + : /*out*/ "=d" (_res) \ + : /*in*/ "d" (&_argvec[0]) __FRAME_POINTER \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS,"7" \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +/* The call abi has the arguments in r2-r6 and stack */ +#define CALL_FN_W_W(lval, orig, arg1) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[2]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + _argvec[1] = (unsigned long)arg1; \ + __asm__ volatile( \ + VALGRIND_CFI_PROLOGUE \ + "aghi 15,-160\n\t" \ + "lg 2, 8(1)\n\t" \ + "lg 1, 0(1)\n\t" \ + VALGRIND_CALL_NOREDIR_R1 \ + "lgr %0, 2\n\t" \ + "aghi 15,160\n\t" \ + VALGRIND_CFI_EPILOGUE \ + : /*out*/ "=d" (_res) \ + : /*in*/ "a" (&_argvec[0]) __FRAME_POINTER \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS,"7" \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_WW(lval, orig, arg1, arg2) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[3]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + _argvec[1] = (unsigned long)arg1; \ + _argvec[2] = (unsigned long)arg2; \ + __asm__ volatile( \ + VALGRIND_CFI_PROLOGUE \ + "aghi 15,-160\n\t" \ + "lg 2, 8(1)\n\t" \ + "lg 3,16(1)\n\t" \ + "lg 1, 0(1)\n\t" \ + VALGRIND_CALL_NOREDIR_R1 \ + "lgr %0, 2\n\t" \ + "aghi 15,160\n\t" \ + VALGRIND_CFI_EPILOGUE \ + : /*out*/ "=d" (_res) \ + : /*in*/ "a" (&_argvec[0]) __FRAME_POINTER \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS,"7" \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_WWW(lval, orig, arg1, arg2, arg3) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[4]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + _argvec[1] = (unsigned long)arg1; \ + _argvec[2] = (unsigned long)arg2; \ + _argvec[3] = (unsigned long)arg3; \ + __asm__ volatile( \ + VALGRIND_CFI_PROLOGUE \ + "aghi 15,-160\n\t" \ + "lg 2, 8(1)\n\t" \ + "lg 3,16(1)\n\t" \ + "lg 4,24(1)\n\t" \ + "lg 1, 0(1)\n\t" \ + VALGRIND_CALL_NOREDIR_R1 \ + "lgr %0, 2\n\t" \ + "aghi 15,160\n\t" \ + VALGRIND_CFI_EPILOGUE \ + : /*out*/ "=d" (_res) \ + : /*in*/ "a" (&_argvec[0]) __FRAME_POINTER \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS,"7" \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_WWWW(lval, orig, arg1, arg2, arg3, arg4) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[5]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + _argvec[1] = (unsigned long)arg1; \ + _argvec[2] = (unsigned long)arg2; \ + _argvec[3] = (unsigned long)arg3; \ + _argvec[4] = (unsigned long)arg4; \ + __asm__ volatile( \ + VALGRIND_CFI_PROLOGUE \ + "aghi 15,-160\n\t" \ + "lg 2, 8(1)\n\t" \ + "lg 3,16(1)\n\t" \ + "lg 4,24(1)\n\t" \ + "lg 5,32(1)\n\t" \ + "lg 1, 0(1)\n\t" \ + VALGRIND_CALL_NOREDIR_R1 \ + "lgr %0, 2\n\t" \ + "aghi 15,160\n\t" \ + VALGRIND_CFI_EPILOGUE \ + : /*out*/ "=d" (_res) \ + : /*in*/ "a" (&_argvec[0]) __FRAME_POINTER \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS,"7" \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_5W(lval, orig, arg1, arg2, arg3, arg4, arg5) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[6]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + _argvec[1] = (unsigned long)arg1; \ + _argvec[2] = (unsigned long)arg2; \ + _argvec[3] = (unsigned long)arg3; \ + _argvec[4] = (unsigned long)arg4; \ + _argvec[5] = (unsigned long)arg5; \ + __asm__ volatile( \ + VALGRIND_CFI_PROLOGUE \ + "aghi 15,-160\n\t" \ + "lg 2, 8(1)\n\t" \ + "lg 3,16(1)\n\t" \ + "lg 4,24(1)\n\t" \ + "lg 5,32(1)\n\t" \ + "lg 6,40(1)\n\t" \ + "lg 1, 0(1)\n\t" \ + VALGRIND_CALL_NOREDIR_R1 \ + "lgr %0, 2\n\t" \ + "aghi 15,160\n\t" \ + VALGRIND_CFI_EPILOGUE \ + : /*out*/ "=d" (_res) \ + : /*in*/ "a" (&_argvec[0]) __FRAME_POINTER \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS,"6","7" \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_6W(lval, orig, arg1, arg2, arg3, arg4, arg5, \ + arg6) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[7]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + _argvec[1] = (unsigned long)arg1; \ + _argvec[2] = (unsigned long)arg2; \ + _argvec[3] = (unsigned long)arg3; \ + _argvec[4] = (unsigned long)arg4; \ + _argvec[5] = (unsigned long)arg5; \ + _argvec[6] = (unsigned long)arg6; \ + __asm__ volatile( \ + VALGRIND_CFI_PROLOGUE \ + "aghi 15,-168\n\t" \ + "lg 2, 8(1)\n\t" \ + "lg 3,16(1)\n\t" \ + "lg 4,24(1)\n\t" \ + "lg 5,32(1)\n\t" \ + "lg 6,40(1)\n\t" \ + "mvc 160(8,15), 48(1)\n\t" \ + "lg 1, 0(1)\n\t" \ + VALGRIND_CALL_NOREDIR_R1 \ + "lgr %0, 2\n\t" \ + "aghi 15,168\n\t" \ + VALGRIND_CFI_EPILOGUE \ + : /*out*/ "=d" (_res) \ + : /*in*/ "a" (&_argvec[0]) __FRAME_POINTER \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS,"6","7" \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_7W(lval, orig, arg1, arg2, arg3, arg4, arg5, \ + arg6, arg7) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[8]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + _argvec[1] = (unsigned long)arg1; \ + _argvec[2] = (unsigned long)arg2; \ + _argvec[3] = (unsigned long)arg3; \ + _argvec[4] = (unsigned long)arg4; \ + _argvec[5] = (unsigned long)arg5; \ + _argvec[6] = (unsigned long)arg6; \ + _argvec[7] = (unsigned long)arg7; \ + __asm__ volatile( \ + VALGRIND_CFI_PROLOGUE \ + "aghi 15,-176\n\t" \ + "lg 2, 8(1)\n\t" \ + "lg 3,16(1)\n\t" \ + "lg 4,24(1)\n\t" \ + "lg 5,32(1)\n\t" \ + "lg 6,40(1)\n\t" \ + "mvc 160(8,15), 48(1)\n\t" \ + "mvc 168(8,15), 56(1)\n\t" \ + "lg 1, 0(1)\n\t" \ + VALGRIND_CALL_NOREDIR_R1 \ + "lgr %0, 2\n\t" \ + "aghi 15,176\n\t" \ + VALGRIND_CFI_EPILOGUE \ + : /*out*/ "=d" (_res) \ + : /*in*/ "a" (&_argvec[0]) __FRAME_POINTER \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS,"6","7" \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_8W(lval, orig, arg1, arg2, arg3, arg4, arg5, \ + arg6, arg7 ,arg8) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[9]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + _argvec[1] = (unsigned long)arg1; \ + _argvec[2] = (unsigned long)arg2; \ + _argvec[3] = (unsigned long)arg3; \ + _argvec[4] = (unsigned long)arg4; \ + _argvec[5] = (unsigned long)arg5; \ + _argvec[6] = (unsigned long)arg6; \ + _argvec[7] = (unsigned long)arg7; \ + _argvec[8] = (unsigned long)arg8; \ + __asm__ volatile( \ + VALGRIND_CFI_PROLOGUE \ + "aghi 15,-184\n\t" \ + "lg 2, 8(1)\n\t" \ + "lg 3,16(1)\n\t" \ + "lg 4,24(1)\n\t" \ + "lg 5,32(1)\n\t" \ + "lg 6,40(1)\n\t" \ + "mvc 160(8,15), 48(1)\n\t" \ + "mvc 168(8,15), 56(1)\n\t" \ + "mvc 176(8,15), 64(1)\n\t" \ + "lg 1, 0(1)\n\t" \ + VALGRIND_CALL_NOREDIR_R1 \ + "lgr %0, 2\n\t" \ + "aghi 15,184\n\t" \ + VALGRIND_CFI_EPILOGUE \ + : /*out*/ "=d" (_res) \ + : /*in*/ "a" (&_argvec[0]) __FRAME_POINTER \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS,"6","7" \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_9W(lval, orig, arg1, arg2, arg3, arg4, arg5, \ + arg6, arg7 ,arg8, arg9) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[10]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + _argvec[1] = (unsigned long)arg1; \ + _argvec[2] = (unsigned long)arg2; \ + _argvec[3] = (unsigned long)arg3; \ + _argvec[4] = (unsigned long)arg4; \ + _argvec[5] = (unsigned long)arg5; \ + _argvec[6] = (unsigned long)arg6; \ + _argvec[7] = (unsigned long)arg7; \ + _argvec[8] = (unsigned long)arg8; \ + _argvec[9] = (unsigned long)arg9; \ + __asm__ volatile( \ + VALGRIND_CFI_PROLOGUE \ + "aghi 15,-192\n\t" \ + "lg 2, 8(1)\n\t" \ + "lg 3,16(1)\n\t" \ + "lg 4,24(1)\n\t" \ + "lg 5,32(1)\n\t" \ + "lg 6,40(1)\n\t" \ + "mvc 160(8,15), 48(1)\n\t" \ + "mvc 168(8,15), 56(1)\n\t" \ + "mvc 176(8,15), 64(1)\n\t" \ + "mvc 184(8,15), 72(1)\n\t" \ + "lg 1, 0(1)\n\t" \ + VALGRIND_CALL_NOREDIR_R1 \ + "lgr %0, 2\n\t" \ + "aghi 15,192\n\t" \ + VALGRIND_CFI_EPILOGUE \ + : /*out*/ "=d" (_res) \ + : /*in*/ "a" (&_argvec[0]) __FRAME_POINTER \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS,"6","7" \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_10W(lval, orig, arg1, arg2, arg3, arg4, arg5, \ + arg6, arg7 ,arg8, arg9, arg10) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[11]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + _argvec[1] = (unsigned long)arg1; \ + _argvec[2] = (unsigned long)arg2; \ + _argvec[3] = (unsigned long)arg3; \ + _argvec[4] = (unsigned long)arg4; \ + _argvec[5] = (unsigned long)arg5; \ + _argvec[6] = (unsigned long)arg6; \ + _argvec[7] = (unsigned long)arg7; \ + _argvec[8] = (unsigned long)arg8; \ + _argvec[9] = (unsigned long)arg9; \ + _argvec[10] = (unsigned long)arg10; \ + __asm__ volatile( \ + VALGRIND_CFI_PROLOGUE \ + "aghi 15,-200\n\t" \ + "lg 2, 8(1)\n\t" \ + "lg 3,16(1)\n\t" \ + "lg 4,24(1)\n\t" \ + "lg 5,32(1)\n\t" \ + "lg 6,40(1)\n\t" \ + "mvc 160(8,15), 48(1)\n\t" \ + "mvc 168(8,15), 56(1)\n\t" \ + "mvc 176(8,15), 64(1)\n\t" \ + "mvc 184(8,15), 72(1)\n\t" \ + "mvc 192(8,15), 80(1)\n\t" \ + "lg 1, 0(1)\n\t" \ + VALGRIND_CALL_NOREDIR_R1 \ + "lgr %0, 2\n\t" \ + "aghi 15,200\n\t" \ + VALGRIND_CFI_EPILOGUE \ + : /*out*/ "=d" (_res) \ + : /*in*/ "a" (&_argvec[0]) __FRAME_POINTER \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS,"6","7" \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_11W(lval, orig, arg1, arg2, arg3, arg4, arg5, \ + arg6, arg7 ,arg8, arg9, arg10, arg11) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[12]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + _argvec[1] = (unsigned long)arg1; \ + _argvec[2] = (unsigned long)arg2; \ + _argvec[3] = (unsigned long)arg3; \ + _argvec[4] = (unsigned long)arg4; \ + _argvec[5] = (unsigned long)arg5; \ + _argvec[6] = (unsigned long)arg6; \ + _argvec[7] = (unsigned long)arg7; \ + _argvec[8] = (unsigned long)arg8; \ + _argvec[9] = (unsigned long)arg9; \ + _argvec[10] = (unsigned long)arg10; \ + _argvec[11] = (unsigned long)arg11; \ + __asm__ volatile( \ + VALGRIND_CFI_PROLOGUE \ + "aghi 15,-208\n\t" \ + "lg 2, 8(1)\n\t" \ + "lg 3,16(1)\n\t" \ + "lg 4,24(1)\n\t" \ + "lg 5,32(1)\n\t" \ + "lg 6,40(1)\n\t" \ + "mvc 160(8,15), 48(1)\n\t" \ + "mvc 168(8,15), 56(1)\n\t" \ + "mvc 176(8,15), 64(1)\n\t" \ + "mvc 184(8,15), 72(1)\n\t" \ + "mvc 192(8,15), 80(1)\n\t" \ + "mvc 200(8,15), 88(1)\n\t" \ + "lg 1, 0(1)\n\t" \ + VALGRIND_CALL_NOREDIR_R1 \ + "lgr %0, 2\n\t" \ + "aghi 15,208\n\t" \ + VALGRIND_CFI_EPILOGUE \ + : /*out*/ "=d" (_res) \ + : /*in*/ "a" (&_argvec[0]) __FRAME_POINTER \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS,"6","7" \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_12W(lval, orig, arg1, arg2, arg3, arg4, arg5, \ + arg6, arg7 ,arg8, arg9, arg10, arg11, arg12)\ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[13]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + _argvec[1] = (unsigned long)arg1; \ + _argvec[2] = (unsigned long)arg2; \ + _argvec[3] = (unsigned long)arg3; \ + _argvec[4] = (unsigned long)arg4; \ + _argvec[5] = (unsigned long)arg5; \ + _argvec[6] = (unsigned long)arg6; \ + _argvec[7] = (unsigned long)arg7; \ + _argvec[8] = (unsigned long)arg8; \ + _argvec[9] = (unsigned long)arg9; \ + _argvec[10] = (unsigned long)arg10; \ + _argvec[11] = (unsigned long)arg11; \ + _argvec[12] = (unsigned long)arg12; \ + __asm__ volatile( \ + VALGRIND_CFI_PROLOGUE \ + "aghi 15,-216\n\t" \ + "lg 2, 8(1)\n\t" \ + "lg 3,16(1)\n\t" \ + "lg 4,24(1)\n\t" \ + "lg 5,32(1)\n\t" \ + "lg 6,40(1)\n\t" \ + "mvc 160(8,15), 48(1)\n\t" \ + "mvc 168(8,15), 56(1)\n\t" \ + "mvc 176(8,15), 64(1)\n\t" \ + "mvc 184(8,15), 72(1)\n\t" \ + "mvc 192(8,15), 80(1)\n\t" \ + "mvc 200(8,15), 88(1)\n\t" \ + "mvc 208(8,15), 96(1)\n\t" \ + "lg 1, 0(1)\n\t" \ + VALGRIND_CALL_NOREDIR_R1 \ + "lgr %0, 2\n\t" \ + "aghi 15,216\n\t" \ + VALGRIND_CFI_EPILOGUE \ + : /*out*/ "=d" (_res) \ + : /*in*/ "a" (&_argvec[0]) __FRAME_POINTER \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS,"6","7" \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + + +#endif /* PLAT_s390x_linux */ + +/* ------------------------- mips32-linux ----------------------- */ + +#if defined(PLAT_mips32_linux) + +/* These regs are trashed by the hidden call. */ +#define __CALLER_SAVED_REGS "$2", "$3", "$4", "$5", "$6", \ +"$7", "$8", "$9", "$10", "$11", "$12", "$13", "$14", "$15", "$24", \ +"$25", "$31" + +/* These CALL_FN_ macros assume that on mips-linux, sizeof(unsigned + long) == 4. */ + +#define CALL_FN_W_v(lval, orig) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[1]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + __asm__ volatile( \ + "subu $29, $29, 8 \n\t" \ + "sw $28, 0($29) \n\t" \ + "sw $31, 4($29) \n\t" \ + "subu $29, $29, 16 \n\t" \ + "lw $25, 0(%1) \n\t" /* target->t9 */ \ + VALGRIND_CALL_NOREDIR_T9 \ + "addu $29, $29, 16\n\t" \ + "lw $28, 0($29) \n\t" \ + "lw $31, 4($29) \n\t" \ + "addu $29, $29, 8 \n\t" \ + "move %0, $2\n" \ + : /*out*/ "=r" (_res) \ + : /*in*/ "0" (&_argvec[0]) \ + : /*trash*/ "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_W(lval, orig, arg1) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[2]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + _argvec[1] = (unsigned long)(arg1); \ + __asm__ volatile( \ + "subu $29, $29, 8 \n\t" \ + "sw $28, 0($29) \n\t" \ + "sw $31, 4($29) \n\t" \ + "subu $29, $29, 16 \n\t" \ + "lw $4, 4(%1) \n\t" /* arg1*/ \ + "lw $25, 0(%1) \n\t" /* target->t9 */ \ + VALGRIND_CALL_NOREDIR_T9 \ + "addu $29, $29, 16 \n\t" \ + "lw $28, 0($29) \n\t" \ + "lw $31, 4($29) \n\t" \ + "addu $29, $29, 8 \n\t" \ + "move %0, $2\n" \ + : /*out*/ "=r" (_res) \ + : /*in*/ "0" (&_argvec[0]) \ + : /*trash*/ "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_WW(lval, orig, arg1,arg2) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[3]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + _argvec[1] = (unsigned long)(arg1); \ + _argvec[2] = (unsigned long)(arg2); \ + __asm__ volatile( \ + "subu $29, $29, 8 \n\t" \ + "sw $28, 0($29) \n\t" \ + "sw $31, 4($29) \n\t" \ + "subu $29, $29, 16 \n\t" \ + "lw $4, 4(%1) \n\t" \ + "lw $5, 8(%1) \n\t" \ + "lw $25, 0(%1) \n\t" /* target->t9 */ \ + VALGRIND_CALL_NOREDIR_T9 \ + "addu $29, $29, 16 \n\t" \ + "lw $28, 0($29) \n\t" \ + "lw $31, 4($29) \n\t" \ + "addu $29, $29, 8 \n\t" \ + "move %0, $2\n" \ + : /*out*/ "=r" (_res) \ + : /*in*/ "0" (&_argvec[0]) \ + : /*trash*/ "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_WWW(lval, orig, arg1,arg2,arg3) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[4]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + _argvec[1] = (unsigned long)(arg1); \ + _argvec[2] = (unsigned long)(arg2); \ + _argvec[3] = (unsigned long)(arg3); \ + __asm__ volatile( \ + "subu $29, $29, 8 \n\t" \ + "sw $28, 0($29) \n\t" \ + "sw $31, 4($29) \n\t" \ + "subu $29, $29, 16 \n\t" \ + "lw $4, 4(%1) \n\t" \ + "lw $5, 8(%1) \n\t" \ + "lw $6, 12(%1) \n\t" \ + "lw $25, 0(%1) \n\t" /* target->t9 */ \ + VALGRIND_CALL_NOREDIR_T9 \ + "addu $29, $29, 16 \n\t" \ + "lw $28, 0($29) \n\t" \ + "lw $31, 4($29) \n\t" \ + "addu $29, $29, 8 \n\t" \ + "move %0, $2\n" \ + : /*out*/ "=r" (_res) \ + : /*in*/ "0" (&_argvec[0]) \ + : /*trash*/ "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_WWWW(lval, orig, arg1,arg2,arg3,arg4) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[5]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + _argvec[1] = (unsigned long)(arg1); \ + _argvec[2] = (unsigned long)(arg2); \ + _argvec[3] = (unsigned long)(arg3); \ + _argvec[4] = (unsigned long)(arg4); \ + __asm__ volatile( \ + "subu $29, $29, 8 \n\t" \ + "sw $28, 0($29) \n\t" \ + "sw $31, 4($29) \n\t" \ + "subu $29, $29, 16 \n\t" \ + "lw $4, 4(%1) \n\t" \ + "lw $5, 8(%1) \n\t" \ + "lw $6, 12(%1) \n\t" \ + "lw $7, 16(%1) \n\t" \ + "lw $25, 0(%1) \n\t" /* target->t9 */ \ + VALGRIND_CALL_NOREDIR_T9 \ + "addu $29, $29, 16 \n\t" \ + "lw $28, 0($29) \n\t" \ + "lw $31, 4($29) \n\t" \ + "addu $29, $29, 8 \n\t" \ + "move %0, $2\n" \ + : /*out*/ "=r" (_res) \ + : /*in*/ "0" (&_argvec[0]) \ + : /*trash*/ "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_5W(lval, orig, arg1,arg2,arg3,arg4,arg5) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[6]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + _argvec[1] = (unsigned long)(arg1); \ + _argvec[2] = (unsigned long)(arg2); \ + _argvec[3] = (unsigned long)(arg3); \ + _argvec[4] = (unsigned long)(arg4); \ + _argvec[5] = (unsigned long)(arg5); \ + __asm__ volatile( \ + "subu $29, $29, 8 \n\t" \ + "sw $28, 0($29) \n\t" \ + "sw $31, 4($29) \n\t" \ + "lw $4, 20(%1) \n\t" \ + "subu $29, $29, 24\n\t" \ + "sw $4, 16($29) \n\t" \ + "lw $4, 4(%1) \n\t" \ + "lw $5, 8(%1) \n\t" \ + "lw $6, 12(%1) \n\t" \ + "lw $7, 16(%1) \n\t" \ + "lw $25, 0(%1) \n\t" /* target->t9 */ \ + VALGRIND_CALL_NOREDIR_T9 \ + "addu $29, $29, 24 \n\t" \ + "lw $28, 0($29) \n\t" \ + "lw $31, 4($29) \n\t" \ + "addu $29, $29, 8 \n\t" \ + "move %0, $2\n" \ + : /*out*/ "=r" (_res) \ + : /*in*/ "0" (&_argvec[0]) \ + : /*trash*/ "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) +#define CALL_FN_W_6W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[7]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + _argvec[1] = (unsigned long)(arg1); \ + _argvec[2] = (unsigned long)(arg2); \ + _argvec[3] = (unsigned long)(arg3); \ + _argvec[4] = (unsigned long)(arg4); \ + _argvec[5] = (unsigned long)(arg5); \ + _argvec[6] = (unsigned long)(arg6); \ + __asm__ volatile( \ + "subu $29, $29, 8 \n\t" \ + "sw $28, 0($29) \n\t" \ + "sw $31, 4($29) \n\t" \ + "lw $4, 20(%1) \n\t" \ + "subu $29, $29, 32\n\t" \ + "sw $4, 16($29) \n\t" \ + "lw $4, 24(%1) \n\t" \ + "nop\n\t" \ + "sw $4, 20($29) \n\t" \ + "lw $4, 4(%1) \n\t" \ + "lw $5, 8(%1) \n\t" \ + "lw $6, 12(%1) \n\t" \ + "lw $7, 16(%1) \n\t" \ + "lw $25, 0(%1) \n\t" /* target->t9 */ \ + VALGRIND_CALL_NOREDIR_T9 \ + "addu $29, $29, 32 \n\t" \ + "lw $28, 0($29) \n\t" \ + "lw $31, 4($29) \n\t" \ + "addu $29, $29, 8 \n\t" \ + "move %0, $2\n" \ + : /*out*/ "=r" (_res) \ + : /*in*/ "0" (&_argvec[0]) \ + : /*trash*/ "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_7W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \ + arg7) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[8]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + _argvec[1] = (unsigned long)(arg1); \ + _argvec[2] = (unsigned long)(arg2); \ + _argvec[3] = (unsigned long)(arg3); \ + _argvec[4] = (unsigned long)(arg4); \ + _argvec[5] = (unsigned long)(arg5); \ + _argvec[6] = (unsigned long)(arg6); \ + _argvec[7] = (unsigned long)(arg7); \ + __asm__ volatile( \ + "subu $29, $29, 8 \n\t" \ + "sw $28, 0($29) \n\t" \ + "sw $31, 4($29) \n\t" \ + "lw $4, 20(%1) \n\t" \ + "subu $29, $29, 32\n\t" \ + "sw $4, 16($29) \n\t" \ + "lw $4, 24(%1) \n\t" \ + "sw $4, 20($29) \n\t" \ + "lw $4, 28(%1) \n\t" \ + "sw $4, 24($29) \n\t" \ + "lw $4, 4(%1) \n\t" \ + "lw $5, 8(%1) \n\t" \ + "lw $6, 12(%1) \n\t" \ + "lw $7, 16(%1) \n\t" \ + "lw $25, 0(%1) \n\t" /* target->t9 */ \ + VALGRIND_CALL_NOREDIR_T9 \ + "addu $29, $29, 32 \n\t" \ + "lw $28, 0($29) \n\t" \ + "lw $31, 4($29) \n\t" \ + "addu $29, $29, 8 \n\t" \ + "move %0, $2\n" \ + : /*out*/ "=r" (_res) \ + : /*in*/ "0" (&_argvec[0]) \ + : /*trash*/ "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_8W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \ + arg7,arg8) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[9]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + _argvec[1] = (unsigned long)(arg1); \ + _argvec[2] = (unsigned long)(arg2); \ + _argvec[3] = (unsigned long)(arg3); \ + _argvec[4] = (unsigned long)(arg4); \ + _argvec[5] = (unsigned long)(arg5); \ + _argvec[6] = (unsigned long)(arg6); \ + _argvec[7] = (unsigned long)(arg7); \ + _argvec[8] = (unsigned long)(arg8); \ + __asm__ volatile( \ + "subu $29, $29, 8 \n\t" \ + "sw $28, 0($29) \n\t" \ + "sw $31, 4($29) \n\t" \ + "lw $4, 20(%1) \n\t" \ + "subu $29, $29, 40\n\t" \ + "sw $4, 16($29) \n\t" \ + "lw $4, 24(%1) \n\t" \ + "sw $4, 20($29) \n\t" \ + "lw $4, 28(%1) \n\t" \ + "sw $4, 24($29) \n\t" \ + "lw $4, 32(%1) \n\t" \ + "sw $4, 28($29) \n\t" \ + "lw $4, 4(%1) \n\t" \ + "lw $5, 8(%1) \n\t" \ + "lw $6, 12(%1) \n\t" \ + "lw $7, 16(%1) \n\t" \ + "lw $25, 0(%1) \n\t" /* target->t9 */ \ + VALGRIND_CALL_NOREDIR_T9 \ + "addu $29, $29, 40 \n\t" \ + "lw $28, 0($29) \n\t" \ + "lw $31, 4($29) \n\t" \ + "addu $29, $29, 8 \n\t" \ + "move %0, $2\n" \ + : /*out*/ "=r" (_res) \ + : /*in*/ "0" (&_argvec[0]) \ + : /*trash*/ "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_9W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \ + arg7,arg8,arg9) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[10]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + _argvec[1] = (unsigned long)(arg1); \ + _argvec[2] = (unsigned long)(arg2); \ + _argvec[3] = (unsigned long)(arg3); \ + _argvec[4] = (unsigned long)(arg4); \ + _argvec[5] = (unsigned long)(arg5); \ + _argvec[6] = (unsigned long)(arg6); \ + _argvec[7] = (unsigned long)(arg7); \ + _argvec[8] = (unsigned long)(arg8); \ + _argvec[9] = (unsigned long)(arg9); \ + __asm__ volatile( \ + "subu $29, $29, 8 \n\t" \ + "sw $28, 0($29) \n\t" \ + "sw $31, 4($29) \n\t" \ + "lw $4, 20(%1) \n\t" \ + "subu $29, $29, 40\n\t" \ + "sw $4, 16($29) \n\t" \ + "lw $4, 24(%1) \n\t" \ + "sw $4, 20($29) \n\t" \ + "lw $4, 28(%1) \n\t" \ + "sw $4, 24($29) \n\t" \ + "lw $4, 32(%1) \n\t" \ + "sw $4, 28($29) \n\t" \ + "lw $4, 36(%1) \n\t" \ + "sw $4, 32($29) \n\t" \ + "lw $4, 4(%1) \n\t" \ + "lw $5, 8(%1) \n\t" \ + "lw $6, 12(%1) \n\t" \ + "lw $7, 16(%1) \n\t" \ + "lw $25, 0(%1) \n\t" /* target->t9 */ \ + VALGRIND_CALL_NOREDIR_T9 \ + "addu $29, $29, 40 \n\t" \ + "lw $28, 0($29) \n\t" \ + "lw $31, 4($29) \n\t" \ + "addu $29, $29, 8 \n\t" \ + "move %0, $2\n" \ + : /*out*/ "=r" (_res) \ + : /*in*/ "0" (&_argvec[0]) \ + : /*trash*/ "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_10W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \ + arg7,arg8,arg9,arg10) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[11]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + _argvec[1] = (unsigned long)(arg1); \ + _argvec[2] = (unsigned long)(arg2); \ + _argvec[3] = (unsigned long)(arg3); \ + _argvec[4] = (unsigned long)(arg4); \ + _argvec[5] = (unsigned long)(arg5); \ + _argvec[6] = (unsigned long)(arg6); \ + _argvec[7] = (unsigned long)(arg7); \ + _argvec[8] = (unsigned long)(arg8); \ + _argvec[9] = (unsigned long)(arg9); \ + _argvec[10] = (unsigned long)(arg10); \ + __asm__ volatile( \ + "subu $29, $29, 8 \n\t" \ + "sw $28, 0($29) \n\t" \ + "sw $31, 4($29) \n\t" \ + "lw $4, 20(%1) \n\t" \ + "subu $29, $29, 48\n\t" \ + "sw $4, 16($29) \n\t" \ + "lw $4, 24(%1) \n\t" \ + "sw $4, 20($29) \n\t" \ + "lw $4, 28(%1) \n\t" \ + "sw $4, 24($29) \n\t" \ + "lw $4, 32(%1) \n\t" \ + "sw $4, 28($29) \n\t" \ + "lw $4, 36(%1) \n\t" \ + "sw $4, 32($29) \n\t" \ + "lw $4, 40(%1) \n\t" \ + "sw $4, 36($29) \n\t" \ + "lw $4, 4(%1) \n\t" \ + "lw $5, 8(%1) \n\t" \ + "lw $6, 12(%1) \n\t" \ + "lw $7, 16(%1) \n\t" \ + "lw $25, 0(%1) \n\t" /* target->t9 */ \ + VALGRIND_CALL_NOREDIR_T9 \ + "addu $29, $29, 48 \n\t" \ + "lw $28, 0($29) \n\t" \ + "lw $31, 4($29) \n\t" \ + "addu $29, $29, 8 \n\t" \ + "move %0, $2\n" \ + : /*out*/ "=r" (_res) \ + : /*in*/ "0" (&_argvec[0]) \ + : /*trash*/ "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_11W(lval, orig, arg1,arg2,arg3,arg4,arg5, \ + arg6,arg7,arg8,arg9,arg10, \ + arg11) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[12]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + _argvec[1] = (unsigned long)(arg1); \ + _argvec[2] = (unsigned long)(arg2); \ + _argvec[3] = (unsigned long)(arg3); \ + _argvec[4] = (unsigned long)(arg4); \ + _argvec[5] = (unsigned long)(arg5); \ + _argvec[6] = (unsigned long)(arg6); \ + _argvec[7] = (unsigned long)(arg7); \ + _argvec[8] = (unsigned long)(arg8); \ + _argvec[9] = (unsigned long)(arg9); \ + _argvec[10] = (unsigned long)(arg10); \ + _argvec[11] = (unsigned long)(arg11); \ + __asm__ volatile( \ + "subu $29, $29, 8 \n\t" \ + "sw $28, 0($29) \n\t" \ + "sw $31, 4($29) \n\t" \ + "lw $4, 20(%1) \n\t" \ + "subu $29, $29, 48\n\t" \ + "sw $4, 16($29) \n\t" \ + "lw $4, 24(%1) \n\t" \ + "sw $4, 20($29) \n\t" \ + "lw $4, 28(%1) \n\t" \ + "sw $4, 24($29) \n\t" \ + "lw $4, 32(%1) \n\t" \ + "sw $4, 28($29) \n\t" \ + "lw $4, 36(%1) \n\t" \ + "sw $4, 32($29) \n\t" \ + "lw $4, 40(%1) \n\t" \ + "sw $4, 36($29) \n\t" \ + "lw $4, 44(%1) \n\t" \ + "sw $4, 40($29) \n\t" \ + "lw $4, 4(%1) \n\t" \ + "lw $5, 8(%1) \n\t" \ + "lw $6, 12(%1) \n\t" \ + "lw $7, 16(%1) \n\t" \ + "lw $25, 0(%1) \n\t" /* target->t9 */ \ + VALGRIND_CALL_NOREDIR_T9 \ + "addu $29, $29, 48 \n\t" \ + "lw $28, 0($29) \n\t" \ + "lw $31, 4($29) \n\t" \ + "addu $29, $29, 8 \n\t" \ + "move %0, $2\n" \ + : /*out*/ "=r" (_res) \ + : /*in*/ "0" (&_argvec[0]) \ + : /*trash*/ "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_12W(lval, orig, arg1,arg2,arg3,arg4,arg5, \ + arg6,arg7,arg8,arg9,arg10, \ + arg11,arg12) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[13]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + _argvec[1] = (unsigned long)(arg1); \ + _argvec[2] = (unsigned long)(arg2); \ + _argvec[3] = (unsigned long)(arg3); \ + _argvec[4] = (unsigned long)(arg4); \ + _argvec[5] = (unsigned long)(arg5); \ + _argvec[6] = (unsigned long)(arg6); \ + _argvec[7] = (unsigned long)(arg7); \ + _argvec[8] = (unsigned long)(arg8); \ + _argvec[9] = (unsigned long)(arg9); \ + _argvec[10] = (unsigned long)(arg10); \ + _argvec[11] = (unsigned long)(arg11); \ + _argvec[12] = (unsigned long)(arg12); \ + __asm__ volatile( \ + "subu $29, $29, 8 \n\t" \ + "sw $28, 0($29) \n\t" \ + "sw $31, 4($29) \n\t" \ + "lw $4, 20(%1) \n\t" \ + "subu $29, $29, 56\n\t" \ + "sw $4, 16($29) \n\t" \ + "lw $4, 24(%1) \n\t" \ + "sw $4, 20($29) \n\t" \ + "lw $4, 28(%1) \n\t" \ + "sw $4, 24($29) \n\t" \ + "lw $4, 32(%1) \n\t" \ + "sw $4, 28($29) \n\t" \ + "lw $4, 36(%1) \n\t" \ + "sw $4, 32($29) \n\t" \ + "lw $4, 40(%1) \n\t" \ + "sw $4, 36($29) \n\t" \ + "lw $4, 44(%1) \n\t" \ + "sw $4, 40($29) \n\t" \ + "lw $4, 48(%1) \n\t" \ + "sw $4, 44($29) \n\t" \ + "lw $4, 4(%1) \n\t" \ + "lw $5, 8(%1) \n\t" \ + "lw $6, 12(%1) \n\t" \ + "lw $7, 16(%1) \n\t" \ + "lw $25, 0(%1) \n\t" /* target->t9 */ \ + VALGRIND_CALL_NOREDIR_T9 \ + "addu $29, $29, 56 \n\t" \ + "lw $28, 0($29) \n\t" \ + "lw $31, 4($29) \n\t" \ + "addu $29, $29, 8 \n\t" \ + "move %0, $2\n" \ + : /*out*/ "=r" (_res) \ + : /*in*/ "r" (&_argvec[0]) \ + : /*trash*/ "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#endif /* PLAT_mips32_linux */ + +/* ------------------------- mips64-linux ------------------------- */ + +#if defined(PLAT_mips64_linux) + +/* These regs are trashed by the hidden call. */ +#define __CALLER_SAVED_REGS "$2", "$3", "$4", "$5", "$6", \ +"$7", "$8", "$9", "$10", "$11", "$12", "$13", "$14", "$15", "$24", \ +"$25", "$31" + +/* These CALL_FN_ macros assume that on mips64-linux, + sizeof(long long) == 8. */ + +#define MIPS64_LONG2REG_CAST(x) ((long long)(long)x) + +#define CALL_FN_W_v(lval, orig) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long long _argvec[1]; \ + volatile unsigned long long _res; \ + _argvec[0] = MIPS64_LONG2REG_CAST(_orig.nraddr); \ + __asm__ volatile( \ + "ld $25, 0(%1)\n\t" /* target->t9 */ \ + VALGRIND_CALL_NOREDIR_T9 \ + "move %0, $2\n" \ + : /*out*/ "=r" (_res) \ + : /*in*/ "0" (&_argvec[0]) \ + : /*trash*/ "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) (long)_res; \ + } while (0) + +#define CALL_FN_W_W(lval, orig, arg1) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long long _argvec[2]; \ + volatile unsigned long long _res; \ + _argvec[0] = MIPS64_LONG2REG_CAST(_orig.nraddr); \ + _argvec[1] = MIPS64_LONG2REG_CAST(arg1); \ + __asm__ volatile( \ + "ld $4, 8(%1)\n\t" /* arg1*/ \ + "ld $25, 0(%1)\n\t" /* target->t9 */ \ + VALGRIND_CALL_NOREDIR_T9 \ + "move %0, $2\n" \ + : /*out*/ "=r" (_res) \ + : /*in*/ "r" (&_argvec[0]) \ + : /*trash*/ "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) (long)_res; \ + } while (0) + +#define CALL_FN_W_WW(lval, orig, arg1,arg2) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long long _argvec[3]; \ + volatile unsigned long long _res; \ + _argvec[0] = _orig.nraddr; \ + _argvec[1] = MIPS64_LONG2REG_CAST(arg1); \ + _argvec[2] = MIPS64_LONG2REG_CAST(arg2); \ + __asm__ volatile( \ + "ld $4, 8(%1)\n\t" \ + "ld $5, 16(%1)\n\t" \ + "ld $25, 0(%1)\n\t" /* target->t9 */ \ + VALGRIND_CALL_NOREDIR_T9 \ + "move %0, $2\n" \ + : /*out*/ "=r" (_res) \ + : /*in*/ "r" (&_argvec[0]) \ + : /*trash*/ "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) (long)_res; \ + } while (0) + + +#define CALL_FN_W_WWW(lval, orig, arg1,arg2,arg3) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long long _argvec[4]; \ + volatile unsigned long long _res; \ + _argvec[0] = _orig.nraddr; \ + _argvec[1] = MIPS64_LONG2REG_CAST(arg1); \ + _argvec[2] = MIPS64_LONG2REG_CAST(arg2); \ + _argvec[3] = MIPS64_LONG2REG_CAST(arg3); \ + __asm__ volatile( \ + "ld $4, 8(%1)\n\t" \ + "ld $5, 16(%1)\n\t" \ + "ld $6, 24(%1)\n\t" \ + "ld $25, 0(%1)\n\t" /* target->t9 */ \ + VALGRIND_CALL_NOREDIR_T9 \ + "move %0, $2\n" \ + : /*out*/ "=r" (_res) \ + : /*in*/ "r" (&_argvec[0]) \ + : /*trash*/ "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) (long)_res; \ + } while (0) + +#define CALL_FN_W_WWWW(lval, orig, arg1,arg2,arg3,arg4) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long long _argvec[5]; \ + volatile unsigned long long _res; \ + _argvec[0] = MIPS64_LONG2REG_CAST(_orig.nraddr); \ + _argvec[1] = MIPS64_LONG2REG_CAST(arg1); \ + _argvec[2] = MIPS64_LONG2REG_CAST(arg2); \ + _argvec[3] = MIPS64_LONG2REG_CAST(arg3); \ + _argvec[4] = MIPS64_LONG2REG_CAST(arg4); \ + __asm__ volatile( \ + "ld $4, 8(%1)\n\t" \ + "ld $5, 16(%1)\n\t" \ + "ld $6, 24(%1)\n\t" \ + "ld $7, 32(%1)\n\t" \ + "ld $25, 0(%1)\n\t" /* target->t9 */ \ + VALGRIND_CALL_NOREDIR_T9 \ + "move %0, $2\n" \ + : /*out*/ "=r" (_res) \ + : /*in*/ "r" (&_argvec[0]) \ + : /*trash*/ "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) (long)_res; \ + } while (0) + +#define CALL_FN_W_5W(lval, orig, arg1,arg2,arg3,arg4,arg5) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long long _argvec[6]; \ + volatile unsigned long long _res; \ + _argvec[0] = MIPS64_LONG2REG_CAST(_orig.nraddr); \ + _argvec[1] = MIPS64_LONG2REG_CAST(arg1); \ + _argvec[2] = MIPS64_LONG2REG_CAST(arg2); \ + _argvec[3] = MIPS64_LONG2REG_CAST(arg3); \ + _argvec[4] = MIPS64_LONG2REG_CAST(arg4); \ + _argvec[5] = MIPS64_LONG2REG_CAST(arg5); \ + __asm__ volatile( \ + "ld $4, 8(%1)\n\t" \ + "ld $5, 16(%1)\n\t" \ + "ld $6, 24(%1)\n\t" \ + "ld $7, 32(%1)\n\t" \ + "ld $8, 40(%1)\n\t" \ + "ld $25, 0(%1)\n\t" /* target->t9 */ \ + VALGRIND_CALL_NOREDIR_T9 \ + "move %0, $2\n" \ + : /*out*/ "=r" (_res) \ + : /*in*/ "r" (&_argvec[0]) \ + : /*trash*/ "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) (long)_res; \ + } while (0) + +#define CALL_FN_W_6W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long long _argvec[7]; \ + volatile unsigned long long _res; \ + _argvec[0] = MIPS64_LONG2REG_CAST(_orig.nraddr); \ + _argvec[1] = MIPS64_LONG2REG_CAST(arg1); \ + _argvec[2] = MIPS64_LONG2REG_CAST(arg2); \ + _argvec[3] = MIPS64_LONG2REG_CAST(arg3); \ + _argvec[4] = MIPS64_LONG2REG_CAST(arg4); \ + _argvec[5] = MIPS64_LONG2REG_CAST(arg5); \ + _argvec[6] = MIPS64_LONG2REG_CAST(arg6); \ + __asm__ volatile( \ + "ld $4, 8(%1)\n\t" \ + "ld $5, 16(%1)\n\t" \ + "ld $6, 24(%1)\n\t" \ + "ld $7, 32(%1)\n\t" \ + "ld $8, 40(%1)\n\t" \ + "ld $9, 48(%1)\n\t" \ + "ld $25, 0(%1)\n\t" /* target->t9 */ \ + VALGRIND_CALL_NOREDIR_T9 \ + "move %0, $2\n" \ + : /*out*/ "=r" (_res) \ + : /*in*/ "r" (&_argvec[0]) \ + : /*trash*/ "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) (long)_res; \ + } while (0) + +#define CALL_FN_W_7W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \ + arg7) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long long _argvec[8]; \ + volatile unsigned long long _res; \ + _argvec[0] = MIPS64_LONG2REG_CAST(_orig.nraddr); \ + _argvec[1] = MIPS64_LONG2REG_CAST(arg1); \ + _argvec[2] = MIPS64_LONG2REG_CAST(arg2); \ + _argvec[3] = MIPS64_LONG2REG_CAST(arg3); \ + _argvec[4] = MIPS64_LONG2REG_CAST(arg4); \ + _argvec[5] = MIPS64_LONG2REG_CAST(arg5); \ + _argvec[6] = MIPS64_LONG2REG_CAST(arg6); \ + _argvec[7] = MIPS64_LONG2REG_CAST(arg7); \ + __asm__ volatile( \ + "ld $4, 8(%1)\n\t" \ + "ld $5, 16(%1)\n\t" \ + "ld $6, 24(%1)\n\t" \ + "ld $7, 32(%1)\n\t" \ + "ld $8, 40(%1)\n\t" \ + "ld $9, 48(%1)\n\t" \ + "ld $10, 56(%1)\n\t" \ + "ld $25, 0(%1) \n\t" /* target->t9 */ \ + VALGRIND_CALL_NOREDIR_T9 \ + "move %0, $2\n" \ + : /*out*/ "=r" (_res) \ + : /*in*/ "r" (&_argvec[0]) \ + : /*trash*/ "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) (long)_res; \ + } while (0) + +#define CALL_FN_W_8W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \ + arg7,arg8) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long long _argvec[9]; \ + volatile unsigned long long _res; \ + _argvec[0] = MIPS64_LONG2REG_CAST(_orig.nraddr); \ + _argvec[1] = MIPS64_LONG2REG_CAST(arg1); \ + _argvec[2] = MIPS64_LONG2REG_CAST(arg2); \ + _argvec[3] = MIPS64_LONG2REG_CAST(arg3); \ + _argvec[4] = MIPS64_LONG2REG_CAST(arg4); \ + _argvec[5] = MIPS64_LONG2REG_CAST(arg5); \ + _argvec[6] = MIPS64_LONG2REG_CAST(arg6); \ + _argvec[7] = MIPS64_LONG2REG_CAST(arg7); \ + _argvec[8] = MIPS64_LONG2REG_CAST(arg8); \ + __asm__ volatile( \ + "ld $4, 8(%1)\n\t" \ + "ld $5, 16(%1)\n\t" \ + "ld $6, 24(%1)\n\t" \ + "ld $7, 32(%1)\n\t" \ + "ld $8, 40(%1)\n\t" \ + "ld $9, 48(%1)\n\t" \ + "ld $10, 56(%1)\n\t" \ + "ld $11, 64(%1)\n\t" \ + "ld $25, 0(%1) \n\t" /* target->t9 */ \ + VALGRIND_CALL_NOREDIR_T9 \ + "move %0, $2\n" \ + : /*out*/ "=r" (_res) \ + : /*in*/ "r" (&_argvec[0]) \ + : /*trash*/ "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) (long)_res; \ + } while (0) + +#define CALL_FN_W_9W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \ + arg7,arg8,arg9) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long long _argvec[10]; \ + volatile unsigned long long _res; \ + _argvec[0] = MIPS64_LONG2REG_CAST(_orig.nraddr); \ + _argvec[1] = MIPS64_LONG2REG_CAST(arg1); \ + _argvec[2] = MIPS64_LONG2REG_CAST(arg2); \ + _argvec[3] = MIPS64_LONG2REG_CAST(arg3); \ + _argvec[4] = MIPS64_LONG2REG_CAST(arg4); \ + _argvec[5] = MIPS64_LONG2REG_CAST(arg5); \ + _argvec[6] = MIPS64_LONG2REG_CAST(arg6); \ + _argvec[7] = MIPS64_LONG2REG_CAST(arg7); \ + _argvec[8] = MIPS64_LONG2REG_CAST(arg8); \ + _argvec[9] = MIPS64_LONG2REG_CAST(arg9); \ + __asm__ volatile( \ + "dsubu $29, $29, 8\n\t" \ + "ld $4, 72(%1)\n\t" \ + "sd $4, 0($29)\n\t" \ + "ld $4, 8(%1)\n\t" \ + "ld $5, 16(%1)\n\t" \ + "ld $6, 24(%1)\n\t" \ + "ld $7, 32(%1)\n\t" \ + "ld $8, 40(%1)\n\t" \ + "ld $9, 48(%1)\n\t" \ + "ld $10, 56(%1)\n\t" \ + "ld $11, 64(%1)\n\t" \ + "ld $25, 0(%1)\n\t" /* target->t9 */ \ + VALGRIND_CALL_NOREDIR_T9 \ + "daddu $29, $29, 8\n\t" \ + "move %0, $2\n" \ + : /*out*/ "=r" (_res) \ + : /*in*/ "r" (&_argvec[0]) \ + : /*trash*/ "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) (long)_res; \ + } while (0) + +#define CALL_FN_W_10W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \ + arg7,arg8,arg9,arg10) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long long _argvec[11]; \ + volatile unsigned long long _res; \ + _argvec[0] = MIPS64_LONG2REG_CAST(_orig.nraddr); \ + _argvec[1] = MIPS64_LONG2REG_CAST(arg1); \ + _argvec[2] = MIPS64_LONG2REG_CAST(arg2); \ + _argvec[3] = MIPS64_LONG2REG_CAST(arg3); \ + _argvec[4] = MIPS64_LONG2REG_CAST(arg4); \ + _argvec[5] = MIPS64_LONG2REG_CAST(arg5); \ + _argvec[6] = MIPS64_LONG2REG_CAST(arg6); \ + _argvec[7] = MIPS64_LONG2REG_CAST(arg7); \ + _argvec[8] = MIPS64_LONG2REG_CAST(arg8); \ + _argvec[9] = MIPS64_LONG2REG_CAST(arg9); \ + _argvec[10] = MIPS64_LONG2REG_CAST(arg10); \ + __asm__ volatile( \ + "dsubu $29, $29, 16\n\t" \ + "ld $4, 72(%1)\n\t" \ + "sd $4, 0($29)\n\t" \ + "ld $4, 80(%1)\n\t" \ + "sd $4, 8($29)\n\t" \ + "ld $4, 8(%1)\n\t" \ + "ld $5, 16(%1)\n\t" \ + "ld $6, 24(%1)\n\t" \ + "ld $7, 32(%1)\n\t" \ + "ld $8, 40(%1)\n\t" \ + "ld $9, 48(%1)\n\t" \ + "ld $10, 56(%1)\n\t" \ + "ld $11, 64(%1)\n\t" \ + "ld $25, 0(%1)\n\t" /* target->t9 */ \ + VALGRIND_CALL_NOREDIR_T9 \ + "daddu $29, $29, 16\n\t" \ + "move %0, $2\n" \ + : /*out*/ "=r" (_res) \ + : /*in*/ "r" (&_argvec[0]) \ + : /*trash*/ "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) (long)_res; \ + } while (0) + +#define CALL_FN_W_11W(lval, orig, arg1,arg2,arg3,arg4,arg5, \ + arg6,arg7,arg8,arg9,arg10, \ + arg11) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long long _argvec[12]; \ + volatile unsigned long long _res; \ + _argvec[0] = MIPS64_LONG2REG_CAST(_orig.nraddr); \ + _argvec[1] = MIPS64_LONG2REG_CAST(arg1); \ + _argvec[2] = MIPS64_LONG2REG_CAST(arg2); \ + _argvec[3] = MIPS64_LONG2REG_CAST(arg3); \ + _argvec[4] = MIPS64_LONG2REG_CAST(arg4); \ + _argvec[5] = MIPS64_LONG2REG_CAST(arg5); \ + _argvec[6] = MIPS64_LONG2REG_CAST(arg6); \ + _argvec[7] = MIPS64_LONG2REG_CAST(arg7); \ + _argvec[8] = MIPS64_LONG2REG_CAST(arg8); \ + _argvec[9] = MIPS64_LONG2REG_CAST(arg9); \ + _argvec[10] = MIPS64_LONG2REG_CAST(arg10); \ + _argvec[11] = MIPS64_LONG2REG_CAST(arg11); \ + __asm__ volatile( \ + "dsubu $29, $29, 24\n\t" \ + "ld $4, 72(%1)\n\t" \ + "sd $4, 0($29)\n\t" \ + "ld $4, 80(%1)\n\t" \ + "sd $4, 8($29)\n\t" \ + "ld $4, 88(%1)\n\t" \ + "sd $4, 16($29)\n\t" \ + "ld $4, 8(%1)\n\t" \ + "ld $5, 16(%1)\n\t" \ + "ld $6, 24(%1)\n\t" \ + "ld $7, 32(%1)\n\t" \ + "ld $8, 40(%1)\n\t" \ + "ld $9, 48(%1)\n\t" \ + "ld $10, 56(%1)\n\t" \ + "ld $11, 64(%1)\n\t" \ + "ld $25, 0(%1)\n\t" /* target->t9 */ \ + VALGRIND_CALL_NOREDIR_T9 \ + "daddu $29, $29, 24\n\t" \ + "move %0, $2\n" \ + : /*out*/ "=r" (_res) \ + : /*in*/ "r" (&_argvec[0]) \ + : /*trash*/ "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) (long)_res; \ + } while (0) + +#define CALL_FN_W_12W(lval, orig, arg1,arg2,arg3,arg4,arg5, \ + arg6,arg7,arg8,arg9,arg10, \ + arg11,arg12) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long long _argvec[13]; \ + volatile unsigned long long _res; \ + _argvec[0] = MIPS64_LONG2REG_CAST(_orig.nraddr); \ + _argvec[1] = MIPS64_LONG2REG_CAST(arg1); \ + _argvec[2] = MIPS64_LONG2REG_CAST(arg2); \ + _argvec[3] = MIPS64_LONG2REG_CAST(arg3); \ + _argvec[4] = MIPS64_LONG2REG_CAST(arg4); \ + _argvec[5] = MIPS64_LONG2REG_CAST(arg5); \ + _argvec[6] = MIPS64_LONG2REG_CAST(arg6); \ + _argvec[7] = MIPS64_LONG2REG_CAST(arg7); \ + _argvec[8] = MIPS64_LONG2REG_CAST(arg8); \ + _argvec[9] = MIPS64_LONG2REG_CAST(arg9); \ + _argvec[10] = MIPS64_LONG2REG_CAST(arg10); \ + _argvec[11] = MIPS64_LONG2REG_CAST(arg11); \ + _argvec[12] = MIPS64_LONG2REG_CAST(arg12); \ + __asm__ volatile( \ + "dsubu $29, $29, 32\n\t" \ + "ld $4, 72(%1)\n\t" \ + "sd $4, 0($29)\n\t" \ + "ld $4, 80(%1)\n\t" \ + "sd $4, 8($29)\n\t" \ + "ld $4, 88(%1)\n\t" \ + "sd $4, 16($29)\n\t" \ + "ld $4, 96(%1)\n\t" \ + "sd $4, 24($29)\n\t" \ + "ld $4, 8(%1)\n\t" \ + "ld $5, 16(%1)\n\t" \ + "ld $6, 24(%1)\n\t" \ + "ld $7, 32(%1)\n\t" \ + "ld $8, 40(%1)\n\t" \ + "ld $9, 48(%1)\n\t" \ + "ld $10, 56(%1)\n\t" \ + "ld $11, 64(%1)\n\t" \ + "ld $25, 0(%1)\n\t" /* target->t9 */ \ + VALGRIND_CALL_NOREDIR_T9 \ + "daddu $29, $29, 32\n\t" \ + "move %0, $2\n" \ + : /*out*/ "=r" (_res) \ + : /*in*/ "r" (&_argvec[0]) \ + : /*trash*/ "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) (long)_res; \ + } while (0) + +#endif /* PLAT_mips64_linux */ + +/* ------------------------------------------------------------------ */ +/* ARCHITECTURE INDEPENDENT MACROS for CLIENT REQUESTS. */ +/* */ +/* ------------------------------------------------------------------ */ + +/* Some request codes. There are many more of these, but most are not + exposed to end-user view. These are the public ones, all of the + form 0x1000 + small_number. + + Core ones are in the range 0x00000000--0x0000ffff. The non-public + ones start at 0x2000. +*/ + +/* These macros are used by tools -- they must be public, but don't + embed them into other programs. */ +#define VG_USERREQ_TOOL_BASE(a,b) \ + ((unsigned int)(((a)&0xff) << 24 | ((b)&0xff) << 16)) +#define VG_IS_TOOL_USERREQ(a, b, v) \ + (VG_USERREQ_TOOL_BASE(a,b) == ((v) & 0xffff0000)) + +/* !! ABIWARNING !! ABIWARNING !! ABIWARNING !! ABIWARNING !! + This enum comprises an ABI exported by Valgrind to programs + which use client requests. DO NOT CHANGE THE NUMERIC VALUES OF THESE + ENTRIES, NOR DELETE ANY -- add new ones at the end of the most + relevant group. */ +typedef + enum { VG_USERREQ__RUNNING_ON_VALGRIND = 0x1001, + VG_USERREQ__DISCARD_TRANSLATIONS = 0x1002, + + /* These allow any function to be called from the simulated + CPU but run on the real CPU. Nb: the first arg passed to + the function is always the ThreadId of the running + thread! So CLIENT_CALL0 actually requires a 1 arg + function, etc. */ + VG_USERREQ__CLIENT_CALL0 = 0x1101, + VG_USERREQ__CLIENT_CALL1 = 0x1102, + VG_USERREQ__CLIENT_CALL2 = 0x1103, + VG_USERREQ__CLIENT_CALL3 = 0x1104, + + /* Can be useful in regression testing suites -- eg. can + send Valgrind's output to /dev/null and still count + errors. */ + VG_USERREQ__COUNT_ERRORS = 0x1201, + + /* Allows the client program and/or gdbserver to execute a monitor + command. */ + VG_USERREQ__GDB_MONITOR_COMMAND = 0x1202, + + /* These are useful and can be interpreted by any tool that + tracks malloc() et al, by using vg_replace_malloc.c. */ + VG_USERREQ__MALLOCLIKE_BLOCK = 0x1301, + VG_USERREQ__RESIZEINPLACE_BLOCK = 0x130b, + VG_USERREQ__FREELIKE_BLOCK = 0x1302, + /* Memory pool support. */ + VG_USERREQ__CREATE_MEMPOOL = 0x1303, + VG_USERREQ__DESTROY_MEMPOOL = 0x1304, + VG_USERREQ__MEMPOOL_ALLOC = 0x1305, + VG_USERREQ__MEMPOOL_FREE = 0x1306, + VG_USERREQ__MEMPOOL_TRIM = 0x1307, + VG_USERREQ__MOVE_MEMPOOL = 0x1308, + VG_USERREQ__MEMPOOL_CHANGE = 0x1309, + VG_USERREQ__MEMPOOL_EXISTS = 0x130a, + + /* Allow printfs to valgrind log. */ + /* The first two pass the va_list argument by value, which + assumes it is the same size as or smaller than a UWord, + which generally isn't the case. Hence are deprecated. + The second two pass the vargs by reference and so are + immune to this problem. */ + /* both :: char* fmt, va_list vargs (DEPRECATED) */ + VG_USERREQ__PRINTF = 0x1401, + VG_USERREQ__PRINTF_BACKTRACE = 0x1402, + /* both :: char* fmt, va_list* vargs */ + VG_USERREQ__PRINTF_VALIST_BY_REF = 0x1403, + VG_USERREQ__PRINTF_BACKTRACE_VALIST_BY_REF = 0x1404, + + /* Stack support. */ + VG_USERREQ__STACK_REGISTER = 0x1501, + VG_USERREQ__STACK_DEREGISTER = 0x1502, + VG_USERREQ__STACK_CHANGE = 0x1503, + + /* Wine support */ + VG_USERREQ__LOAD_PDB_DEBUGINFO = 0x1601, + + /* Querying of debug info. */ + VG_USERREQ__MAP_IP_TO_SRCLOC = 0x1701, + + /* Disable/enable error reporting level. Takes a single + Word arg which is the delta to this thread's error + disablement indicator. Hence 1 disables or further + disables errors, and -1 moves back towards enablement. + Other values are not allowed. */ + VG_USERREQ__CHANGE_ERR_DISABLEMENT = 0x1801, + + /* Some requests used for Valgrind internal, such as + self-test or self-hosting. */ + /* Initialise IR injection */ + VG_USERREQ__VEX_INIT_FOR_IRI = 0x1901, + /* Used by Inner Valgrind to inform Outer Valgrind where to + find the list of inner guest threads */ + VG_USERREQ__INNER_THREADS = 0x1902 + } Vg_ClientRequest; + +#if !defined(__GNUC__) +# define __extension__ /* */ +#endif + + +/* Returns the number of Valgrinds this code is running under. That + is, 0 if running natively, 1 if running under Valgrind, 2 if + running under Valgrind which is running under another Valgrind, + etc. */ +#define RUNNING_ON_VALGRIND \ + (unsigned)VALGRIND_DO_CLIENT_REQUEST_EXPR(0 /* if not */, \ + VG_USERREQ__RUNNING_ON_VALGRIND, \ + 0, 0, 0, 0, 0) \ + + +/* Discard translation of code in the range [_qzz_addr .. _qzz_addr + + _qzz_len - 1]. Useful if you are debugging a JITter or some such, + since it provides a way to make sure valgrind will retranslate the + invalidated area. Returns no value. */ +#define VALGRIND_DISCARD_TRANSLATIONS(_qzz_addr,_qzz_len) \ + VALGRIND_DO_CLIENT_REQUEST_STMT(VG_USERREQ__DISCARD_TRANSLATIONS, \ + _qzz_addr, _qzz_len, 0, 0, 0) + +#define VALGRIND_INNER_THREADS(_qzz_addr) \ + VALGRIND_DO_CLIENT_REQUEST_STMT(VG_USERREQ__INNER_THREADS, \ + _qzz_addr, 0, 0, 0, 0) + + +/* These requests are for getting Valgrind itself to print something. + Possibly with a backtrace. This is a really ugly hack. The return value + is the number of characters printed, excluding the "**** " part at the + start and the backtrace (if present). */ + +#if defined(__GNUC__) || defined(__INTEL_COMPILER) && !defined(_MSC_VER) +/* Modern GCC will optimize the static routine out if unused, + and unused attribute will shut down warnings about it. */ +static int VALGRIND_PRINTF(const char *format, ...) + __attribute__((format(__printf__, 1, 2), __unused__)); +#endif +static int +#if defined(_MSC_VER) +__inline +#endif +VALGRIND_PRINTF(const char *format, ...) +{ +#if defined(NVALGRIND) + (void)format; + return 0; +#else /* NVALGRIND */ +#if defined(_MSC_VER) || defined(__MINGW64__) + uintptr_t _qzz_res; +#else + unsigned long _qzz_res; +#endif + va_list vargs; + va_start(vargs, format); +#if defined(_MSC_VER) || defined(__MINGW64__) + _qzz_res = VALGRIND_DO_CLIENT_REQUEST_EXPR(0, + VG_USERREQ__PRINTF_VALIST_BY_REF, + (uintptr_t)format, + (uintptr_t)&vargs, + 0, 0, 0); +#else + _qzz_res = VALGRIND_DO_CLIENT_REQUEST_EXPR(0, + VG_USERREQ__PRINTF_VALIST_BY_REF, + (unsigned long)format, + (unsigned long)&vargs, + 0, 0, 0); +#endif + va_end(vargs); + return (int)_qzz_res; +#endif /* NVALGRIND */ +} + +#if defined(__GNUC__) || defined(__INTEL_COMPILER) && !defined(_MSC_VER) +static int VALGRIND_PRINTF_BACKTRACE(const char *format, ...) + __attribute__((format(__printf__, 1, 2), __unused__)); +#endif +static int +#if defined(_MSC_VER) +__inline +#endif +VALGRIND_PRINTF_BACKTRACE(const char *format, ...) +{ +#if defined(NVALGRIND) + (void)format; + return 0; +#else /* NVALGRIND */ +#if defined(_MSC_VER) || defined(__MINGW64__) + uintptr_t _qzz_res; +#else + unsigned long _qzz_res; +#endif + va_list vargs; + va_start(vargs, format); +#if defined(_MSC_VER) || defined(__MINGW64__) + _qzz_res = VALGRIND_DO_CLIENT_REQUEST_EXPR(0, + VG_USERREQ__PRINTF_BACKTRACE_VALIST_BY_REF, + (uintptr_t)format, + (uintptr_t)&vargs, + 0, 0, 0); +#else + _qzz_res = VALGRIND_DO_CLIENT_REQUEST_EXPR(0, + VG_USERREQ__PRINTF_BACKTRACE_VALIST_BY_REF, + (unsigned long)format, + (unsigned long)&vargs, + 0, 0, 0); +#endif + va_end(vargs); + return (int)_qzz_res; +#endif /* NVALGRIND */ +} + + +/* These requests allow control to move from the simulated CPU to the + real CPU, calling an arbitrary function. + + Note that the current ThreadId is inserted as the first argument. + So this call: + + VALGRIND_NON_SIMD_CALL2(f, arg1, arg2) + + requires f to have this signature: + + Word f(Word tid, Word arg1, Word arg2) + + where "Word" is a word-sized type. + + Note that these client requests are not entirely reliable. For example, + if you call a function with them that subsequently calls printf(), + there's a high chance Valgrind will crash. Generally, your prospects of + these working are made higher if the called function does not refer to + any global variables, and does not refer to any libc or other functions + (printf et al). Any kind of entanglement with libc or dynamic linking is + likely to have a bad outcome, for tricky reasons which we've grappled + with a lot in the past. +*/ +#define VALGRIND_NON_SIMD_CALL0(_qyy_fn) \ + VALGRIND_DO_CLIENT_REQUEST_EXPR(0 /* default return */, \ + VG_USERREQ__CLIENT_CALL0, \ + _qyy_fn, \ + 0, 0, 0, 0) + +#define VALGRIND_NON_SIMD_CALL1(_qyy_fn, _qyy_arg1) \ + VALGRIND_DO_CLIENT_REQUEST_EXPR(0 /* default return */, \ + VG_USERREQ__CLIENT_CALL1, \ + _qyy_fn, \ + _qyy_arg1, 0, 0, 0) + +#define VALGRIND_NON_SIMD_CALL2(_qyy_fn, _qyy_arg1, _qyy_arg2) \ + VALGRIND_DO_CLIENT_REQUEST_EXPR(0 /* default return */, \ + VG_USERREQ__CLIENT_CALL2, \ + _qyy_fn, \ + _qyy_arg1, _qyy_arg2, 0, 0) + +#define VALGRIND_NON_SIMD_CALL3(_qyy_fn, _qyy_arg1, _qyy_arg2, _qyy_arg3) \ + VALGRIND_DO_CLIENT_REQUEST_EXPR(0 /* default return */, \ + VG_USERREQ__CLIENT_CALL3, \ + _qyy_fn, \ + _qyy_arg1, _qyy_arg2, \ + _qyy_arg3, 0) + + +/* Counts the number of errors that have been recorded by a tool. Nb: + the tool must record the errors with VG_(maybe_record_error)() or + VG_(unique_error)() for them to be counted. */ +#define VALGRIND_COUNT_ERRORS \ + (unsigned)VALGRIND_DO_CLIENT_REQUEST_EXPR( \ + 0 /* default return */, \ + VG_USERREQ__COUNT_ERRORS, \ + 0, 0, 0, 0, 0) + +/* Several Valgrind tools (Memcheck, Massif, Helgrind, DRD) rely on knowing + when heap blocks are allocated in order to give accurate results. This + happens automatically for the standard allocator functions such as + malloc(), calloc(), realloc(), memalign(), new, new[], free(), delete, + delete[], etc. + + But if your program uses a custom allocator, this doesn't automatically + happen, and Valgrind will not do as well. For example, if you allocate + superblocks with mmap() and then allocates chunks of the superblocks, all + Valgrind's observations will be at the mmap() level and it won't know that + the chunks should be considered separate entities. In Memcheck's case, + that means you probably won't get heap block overrun detection (because + there won't be redzones marked as unaddressable) and you definitely won't + get any leak detection. + + The following client requests allow a custom allocator to be annotated so + that it can be handled accurately by Valgrind. + + VALGRIND_MALLOCLIKE_BLOCK marks a region of memory as having been allocated + by a malloc()-like function. For Memcheck (an illustrative case), this + does two things: + + - It records that the block has been allocated. This means any addresses + within the block mentioned in error messages will be + identified as belonging to the block. It also means that if the block + isn't freed it will be detected by the leak checker. + + - It marks the block as being addressable and undefined (if 'is_zeroed' is + not set), or addressable and defined (if 'is_zeroed' is set). This + controls how accesses to the block by the program are handled. + + 'addr' is the start of the usable block (ie. after any + redzone), 'sizeB' is its size. 'rzB' is the redzone size if the allocator + can apply redzones -- these are blocks of padding at the start and end of + each block. Adding redzones is recommended as it makes it much more likely + Valgrind will spot block overruns. `is_zeroed' indicates if the memory is + zeroed (or filled with another predictable value), as is the case for + calloc(). + + VALGRIND_MALLOCLIKE_BLOCK should be put immediately after the point where a + heap block -- that will be used by the client program -- is allocated. + It's best to put it at the outermost level of the allocator if possible; + for example, if you have a function my_alloc() which calls + internal_alloc(), and the client request is put inside internal_alloc(), + stack traces relating to the heap block will contain entries for both + my_alloc() and internal_alloc(), which is probably not what you want. + + For Memcheck users: if you use VALGRIND_MALLOCLIKE_BLOCK to carve out + custom blocks from within a heap block, B, that has been allocated with + malloc/calloc/new/etc, then block B will be *ignored* during leak-checking + -- the custom blocks will take precedence. + + VALGRIND_FREELIKE_BLOCK is the partner to VALGRIND_MALLOCLIKE_BLOCK. For + Memcheck, it does two things: + + - It records that the block has been deallocated. This assumes that the + block was annotated as having been allocated via + VALGRIND_MALLOCLIKE_BLOCK. Otherwise, an error will be issued. + + - It marks the block as being unaddressable. + + VALGRIND_FREELIKE_BLOCK should be put immediately after the point where a + heap block is deallocated. + + VALGRIND_RESIZEINPLACE_BLOCK informs a tool about reallocation. For + Memcheck, it does four things: + + - It records that the size of a block has been changed. This assumes that + the block was annotated as having been allocated via + VALGRIND_MALLOCLIKE_BLOCK. Otherwise, an error will be issued. + + - If the block shrunk, it marks the freed memory as being unaddressable. + + - If the block grew, it marks the new area as undefined and defines a red + zone past the end of the new block. + + - The V-bits of the overlap between the old and the new block are preserved. + + VALGRIND_RESIZEINPLACE_BLOCK should be put after allocation of the new block + and before deallocation of the old block. + + In many cases, these three client requests will not be enough to get your + allocator working well with Memcheck. More specifically, if your allocator + writes to freed blocks in any way then a VALGRIND_MAKE_MEM_UNDEFINED call + will be necessary to mark the memory as addressable just before the zeroing + occurs, otherwise you'll get a lot of invalid write errors. For example, + you'll need to do this if your allocator recycles freed blocks, but it + zeroes them before handing them back out (via VALGRIND_MALLOCLIKE_BLOCK). + Alternatively, if your allocator reuses freed blocks for allocator-internal + data structures, VALGRIND_MAKE_MEM_UNDEFINED calls will also be necessary. + + Really, what's happening is a blurring of the lines between the client + program and the allocator... after VALGRIND_FREELIKE_BLOCK is called, the + memory should be considered unaddressable to the client program, but the + allocator knows more than the rest of the client program and so may be able + to safely access it. Extra client requests are necessary for Valgrind to + understand the distinction between the allocator and the rest of the + program. + + Ignored if addr == 0. +*/ +#define VALGRIND_MALLOCLIKE_BLOCK(addr, sizeB, rzB, is_zeroed) \ + VALGRIND_DO_CLIENT_REQUEST_STMT(VG_USERREQ__MALLOCLIKE_BLOCK, \ + addr, sizeB, rzB, is_zeroed, 0) + +/* See the comment for VALGRIND_MALLOCLIKE_BLOCK for details. + Ignored if addr == 0. +*/ +#define VALGRIND_RESIZEINPLACE_BLOCK(addr, oldSizeB, newSizeB, rzB) \ + VALGRIND_DO_CLIENT_REQUEST_STMT(VG_USERREQ__RESIZEINPLACE_BLOCK, \ + addr, oldSizeB, newSizeB, rzB, 0) + +/* See the comment for VALGRIND_MALLOCLIKE_BLOCK for details. + Ignored if addr == 0. +*/ +#define VALGRIND_FREELIKE_BLOCK(addr, rzB) \ + VALGRIND_DO_CLIENT_REQUEST_STMT(VG_USERREQ__FREELIKE_BLOCK, \ + addr, rzB, 0, 0, 0) + +/* Create a memory pool. */ +#define VALGRIND_CREATE_MEMPOOL(pool, rzB, is_zeroed) \ + VALGRIND_DO_CLIENT_REQUEST_STMT(VG_USERREQ__CREATE_MEMPOOL, \ + pool, rzB, is_zeroed, 0, 0) + +/* Create a memory pool with some flags specifying extended behaviour. + When flags is zero, the behaviour is identical to VALGRIND_CREATE_MEMPOOL. + + The flag VALGRIND_MEMPOOL_METAPOOL specifies that the pieces of memory + associated with the pool using VALGRIND_MEMPOOL_ALLOC will be used + by the application as superblocks to dole out MALLOC_LIKE blocks using + VALGRIND_MALLOCLIKE_BLOCK. In other words, a meta pool is a "2 levels" + pool : first level is the blocks described by VALGRIND_MEMPOOL_ALLOC. + The second level blocks are described using VALGRIND_MALLOCLIKE_BLOCK. + Note that the association between the pool and the second level blocks + is implicit : second level blocks will be located inside first level + blocks. It is necessary to use the VALGRIND_MEMPOOL_METAPOOL flag + for such 2 levels pools, as otherwise valgrind will detect overlapping + memory blocks, and will abort execution (e.g. during leak search). + + Such a meta pool can also be marked as an 'auto free' pool using the flag + VALGRIND_MEMPOOL_AUTO_FREE, which must be OR-ed together with the + VALGRIND_MEMPOOL_METAPOOL. For an 'auto free' pool, VALGRIND_MEMPOOL_FREE + will automatically free the second level blocks that are contained + inside the first level block freed with VALGRIND_MEMPOOL_FREE. + In other words, calling VALGRIND_MEMPOOL_FREE will cause implicit calls + to VALGRIND_FREELIKE_BLOCK for all the second level blocks included + in the first level block. + Note: it is an error to use the VALGRIND_MEMPOOL_AUTO_FREE flag + without the VALGRIND_MEMPOOL_METAPOOL flag. +*/ +#define VALGRIND_MEMPOOL_AUTO_FREE 1 +#define VALGRIND_MEMPOOL_METAPOOL 2 +#define VALGRIND_CREATE_MEMPOOL_EXT(pool, rzB, is_zeroed, flags) \ + VALGRIND_DO_CLIENT_REQUEST_STMT(VG_USERREQ__CREATE_MEMPOOL, \ + pool, rzB, is_zeroed, flags, 0) + +/* Destroy a memory pool. */ +#define VALGRIND_DESTROY_MEMPOOL(pool) \ + VALGRIND_DO_CLIENT_REQUEST_STMT(VG_USERREQ__DESTROY_MEMPOOL, \ + pool, 0, 0, 0, 0) + +/* Associate a piece of memory with a memory pool. */ +#define VALGRIND_MEMPOOL_ALLOC(pool, addr, size) \ + VALGRIND_DO_CLIENT_REQUEST_STMT(VG_USERREQ__MEMPOOL_ALLOC, \ + pool, addr, size, 0, 0) + +/* Disassociate a piece of memory from a memory pool. */ +#define VALGRIND_MEMPOOL_FREE(pool, addr) \ + VALGRIND_DO_CLIENT_REQUEST_STMT(VG_USERREQ__MEMPOOL_FREE, \ + pool, addr, 0, 0, 0) + +/* Disassociate any pieces outside a particular range. */ +#define VALGRIND_MEMPOOL_TRIM(pool, addr, size) \ + VALGRIND_DO_CLIENT_REQUEST_STMT(VG_USERREQ__MEMPOOL_TRIM, \ + pool, addr, size, 0, 0) + +/* Resize and/or move a piece associated with a memory pool. */ +#define VALGRIND_MOVE_MEMPOOL(poolA, poolB) \ + VALGRIND_DO_CLIENT_REQUEST_STMT(VG_USERREQ__MOVE_MEMPOOL, \ + poolA, poolB, 0, 0, 0) + +/* Resize and/or move a piece associated with a memory pool. */ +#define VALGRIND_MEMPOOL_CHANGE(pool, addrA, addrB, size) \ + VALGRIND_DO_CLIENT_REQUEST_STMT(VG_USERREQ__MEMPOOL_CHANGE, \ + pool, addrA, addrB, size, 0) + +/* Return 1 if a mempool exists, else 0. */ +#define VALGRIND_MEMPOOL_EXISTS(pool) \ + (unsigned)VALGRIND_DO_CLIENT_REQUEST_EXPR(0, \ + VG_USERREQ__MEMPOOL_EXISTS, \ + pool, 0, 0, 0, 0) + +/* Mark a piece of memory as being a stack. Returns a stack id. + start is the lowest addressable stack byte, end is the highest + addressable stack byte. */ +#define VALGRIND_STACK_REGISTER(start, end) \ + (unsigned)VALGRIND_DO_CLIENT_REQUEST_EXPR(0, \ + VG_USERREQ__STACK_REGISTER, \ + start, end, 0, 0, 0) + +/* Unmark the piece of memory associated with a stack id as being a + stack. */ +#define VALGRIND_STACK_DEREGISTER(id) \ + VALGRIND_DO_CLIENT_REQUEST_STMT(VG_USERREQ__STACK_DEREGISTER, \ + id, 0, 0, 0, 0) + +/* Change the start and end address of the stack id. + start is the new lowest addressable stack byte, end is the new highest + addressable stack byte. */ +#define VALGRIND_STACK_CHANGE(id, start, end) \ + VALGRIND_DO_CLIENT_REQUEST_STMT(VG_USERREQ__STACK_CHANGE, \ + id, start, end, 0, 0) + +/* Load PDB debug info for Wine PE image_map. */ +#define VALGRIND_LOAD_PDB_DEBUGINFO(fd, ptr, total_size, delta) \ + VALGRIND_DO_CLIENT_REQUEST_STMT(VG_USERREQ__LOAD_PDB_DEBUGINFO, \ + fd, ptr, total_size, delta, 0) + +/* Map a code address to a source file name and line number. buf64 + must point to a 64-byte buffer in the caller's address space. The + result will be dumped in there and is guaranteed to be zero + terminated. If no info is found, the first byte is set to zero. */ +#define VALGRIND_MAP_IP_TO_SRCLOC(addr, buf64) \ + (unsigned)VALGRIND_DO_CLIENT_REQUEST_EXPR(0, \ + VG_USERREQ__MAP_IP_TO_SRCLOC, \ + addr, buf64, 0, 0, 0) + +/* Disable error reporting for this thread. Behaves in a stack like + way, so you can safely call this multiple times provided that + VALGRIND_ENABLE_ERROR_REPORTING is called the same number of times + to re-enable reporting. The first call of this macro disables + reporting. Subsequent calls have no effect except to increase the + number of VALGRIND_ENABLE_ERROR_REPORTING calls needed to re-enable + reporting. Child threads do not inherit this setting from their + parents -- they are always created with reporting enabled. */ +#define VALGRIND_DISABLE_ERROR_REPORTING \ + VALGRIND_DO_CLIENT_REQUEST_STMT(VG_USERREQ__CHANGE_ERR_DISABLEMENT, \ + 1, 0, 0, 0, 0) + +/* Re-enable error reporting, as per comments on + VALGRIND_DISABLE_ERROR_REPORTING. */ +#define VALGRIND_ENABLE_ERROR_REPORTING \ + VALGRIND_DO_CLIENT_REQUEST_STMT(VG_USERREQ__CHANGE_ERR_DISABLEMENT, \ + -1, 0, 0, 0, 0) + +/* Execute a monitor command from the client program. + If a connection is opened with GDB, the output will be sent + according to the output mode set for vgdb. + If no connection is opened, output will go to the log output. + Returns 1 if command not recognised, 0 otherwise. */ +#define VALGRIND_MONITOR_COMMAND(command) \ + VALGRIND_DO_CLIENT_REQUEST_EXPR(0, VG_USERREQ__GDB_MONITOR_COMMAND, \ + command, 0, 0, 0, 0) + + +#undef PLAT_x86_darwin +#undef PLAT_amd64_darwin +#undef PLAT_x86_win32 +#undef PLAT_amd64_win64 +#undef PLAT_x86_linux +#undef PLAT_amd64_linux +#undef PLAT_ppc32_linux +#undef PLAT_ppc64be_linux +#undef PLAT_ppc64le_linux +#undef PLAT_arm_linux +#undef PLAT_s390x_linux +#undef PLAT_mips32_linux +#undef PLAT_mips64_linux +#undef PLAT_x86_solaris +#undef PLAT_amd64_solaris + +#endif /* __VALGRIND_H */