diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..320600f --- /dev/null +++ b/.gitignore @@ -0,0 +1,17 @@ +*.cmi +*.cmo +*.cmx +*.o +*.obj +*.a +*.lib +*.cma +*.cmxa +*.cmxs +*.cmt +*.cmti +/src/extBytes.mli +/src/doc/*.html +/test/extlib_test +/test/extlib_test.exe +/test/util/zlib-test/zlib-test diff --git a/.merlin b/.merlin new file mode 100644 index 0000000..da9b150 --- /dev/null +++ b/.merlin @@ -0,0 +1,4 @@ +S src +S test +B src +B test diff --git a/.travis.yml b/.travis.yml new file mode 100644 index 0000000..adef916 --- /dev/null +++ b/.travis.yml @@ -0,0 +1,32 @@ +language: c +sudo: required +install: wget https://raw.githubusercontent.com/ocaml/ocaml-ci-scripts/master/.travis-opam.sh +script: bash -ex .travis-opam.sh +matrix: + include: + - env: OCAML_VERSION=4.02 + os: osx + - env: OCAML_VERSION=4.03 + os: osx + - env: OCAML_VERSION=4.04 + os: osx + - env: OCAML_VERSION=4.05 + os: osx + - env: OCAML_VERSION=4.06 + os: osx + - env: OCAML_VERSION=4.02 + os: linux + - env: OCAML_VERSION=4.03 + os: linux + - env: OCAML_VERSION=4.04 + os: linux + - env: OCAML_VERSION=4.05 + os: linux + - env: OCAML_VERSION=4.06 + os: linux + - env: OCAML_VERSION=4.06 OPAM_SWITCH=4.06.1+32bit + os: linux + addons: + apt: + packages: + - gcc-multilib diff --git a/CHANGES b/CHANGES new file mode 100644 index 0000000..e133334 --- /dev/null +++ b/CHANGES @@ -0,0 +1,81 @@ +1.7.5 (2018-07-08) + * IO: restore compilation on 32-bit (broken in 1.7.3) + * sync with OCaml 4.07 + * install.ml is now deprecated, install with Makefile + + IO: read_i32_as_int + +1.7.4 (2018-03-12) + * fix tests with OCaml 4.06 + +1.7.3 (2018-03-11) + * String: faster slice + * sync with OCaml 4.06 (thx test/std.ml) + * IO: fix sign bug in read_i31 + * IO: add write_i31 and bounds-checking on write_i32 + * base64: add encode_string decode_string + * install cmt and cmti + +1.7.2 (2017-03-29) + * sync with OCaml 4.05 + +1.7.1 (2016-11-11) + * sync with OCaml 4.04 + * add Hashtbl.Make for better compatibility + +1.7.0 (2015-08-29) + * Switch to git and move repo to github + * `-safe-string` compatibility + * use cppo instead of camlp4 + * allow to set the exit code in optParse + * add new upstream functions in String + +1.6.1 (2013-11-26 - trunk @ r436) + * Drop `extlib_min` package + * Choose at build-time whether to build full or reduced library + +1.6.0 (2013-11-25 - trunk @ r429) + * Fix OCaml 4 Hashtbl compatibility + * Install additionally `extlib_min` with reduced set of modules (to mitigate linking conflicts) + * Build with debugging information by default + * Fix signature for `ExtList.iteri` and `OptParse.OptParser.error` + * Speed up `String.nsplit` + * New functions: + * `String.find_from` (by Alexander Markov) + * `IO.output_strings` (by Mehdi Dogguy) + * `IO.read_float32` and `IO.write_float32` (by Philippe Strauss) + * `IO.scanf` (by Warren Harris) + * `UTF8.substring` (by Berke Durak) + * `Enum.next` + +1.5.4 (2013-05-08 - trunk @ r407) + * Fix installation + * Streamline release process + +1.5.3 (2012-08-12 - trunk @ r397) + * Bug fixes / improvements: + * OCaml 4.00 compatibility (Hashtbl) + * Std.dump: handle float array, never throw + * New functions: + * `Array.map2` + * Install `*`.cmx + * Build extLib.cmxs + +1.5.2 (2011-08-06 - trunk @ r389) + * Bug fixes / improvements: + * memory corruption in `DynArray.insert` + * `ExtList.make` is now tail-recursive + * stack overflow in `ExtString.nsplit` + * `ExtList.(@)` is now in scope after `open ExtLib` + * `DynArray` will not attempt to grow past `Sys.max_array_length` + * faster `ExtString.starts_with` and `ExtString.ends_with` + * some documentation comments + * New functions: + * `List.find_map` + +1.5.1 (2007-12-29 - trunk @ r363) + * First release from http://code.google.com/p/ocaml-extlib + * Bug fixes / improvements: + * Add `ExtArray.Array.iter2` + * `Unzip` module fix & test case as reported by Robert Atkey + * `BitSet.enum` problem reported by Pascal Zimmer + * `nsplit "" _` ==> `[]` diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..34e5027 --- /dev/null +++ b/LICENSE @@ -0,0 +1,199 @@ +The Library is distributed under the terms of the GNU Lesser General +Public License version 2 (included below). + +As a special exception to the GNU Lesser General Public License, you +may link, statically or dynamically, a "work that uses the Library" +with a publicly distributed version of the Library to produce an +executable file containing portions of the Library, and distribute +that executable file under terms of your choice, without any of the +additional requirements listed in clause 6 of the GNU Lesser General +Public License. By "a publicly distributed version of the Library", +we mean either the unmodified Library as distributed, or a +modified version of the Library that is distributed under the +conditions defined in clause 2 of the GNU Lesser General Public +License. This exception does not however invalidate any other reasons +why the executable file might be covered by the GNU Lesser General +Public License. + +------------ + +GNU LESSER GENERAL PUBLIC LICENSE +Version 2.1, February 1999 + + +Copyright (C) 1991, 1999 Free Software Foundation, Inc. +59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +Everyone is permitted to copy and distribute verbatim copies +of this license document, but changing it is not allowed. + +[This is the first released version of the Lesser GPL. It also counts + as the successor of the GNU Library Public License, version 2, hence + the version number 2.1.] + +Preamble +The licenses for most software are designed to take away your freedom to share and change it. By contrast, the GNU General Public Licenses are intended to guarantee your freedom to share and change free software--to make sure the software is free for all its users. + +This license, the Lesser General Public License, applies to some specially designated software packages--typically libraries--of the Free Software Foundation and other authors who decide to use it. You can use it too, but we suggest you first think carefully about whether this license or the ordinary General Public License is the better strategy to use in any particular case, based on the explanations below. + +When we speak of free software, we are referring to freedom of use, not price. Our General Public Licenses are designed to make sure that you have the freedom to distribute copies of free software (and charge for this service if you wish); that you receive source code or can get it if you want it; that you can change the software and use pieces of it in new free programs; and that you are informed that you can do these things. + +To protect your rights, we need to make restrictions that forbid distributors to deny you these rights or to ask you to surrender these rights. These restrictions translate to certain responsibilities for you if you distribute copies of the library or if you modify it. + +For example, if you distribute copies of the library, whether gratis or for a fee, you must give the recipients all the rights that we gave you. You must make sure that they, too, receive or can get the source code. If you link other code with the library, you must provide complete object files to the recipients, so that they can relink them with the library after making changes to the library and recompiling it. And you must show them these terms so they know their rights. + +We protect your rights with a two-step method: (1) we copyright the library, and (2) we offer you this license, which gives you legal permission to copy, distribute and/or modify the library. + +To protect each distributor, we want to make it very clear that there is no warranty for the free library. Also, if the library is modified by someone else and passed on, the recipients should know that what they have is not the original version, so that the original author's reputation will not be affected by problems that might be introduced by others. + +Finally, software patents pose a constant threat to the existence of any free program. We wish to make sure that a company cannot effectively restrict the users of a free program by obtaining a restrictive license from a patent holder. Therefore, we insist that any patent license obtained for a version of the library must be consistent with the full freedom of use specified in this license. + +Most GNU software, including some libraries, is covered by the ordinary GNU General Public License. This license, the GNU Lesser General Public License, applies to certain designated libraries, and is quite different from the ordinary General Public License. We use this license for certain libraries in order to permit linking those libraries into non-free programs. + +When a program is linked with a library, whether statically or using a shared library, the combination of the two is legally speaking a combined work, a derivative of the original library. The ordinary General Public License therefore permits such linking only if the entire combination fits its criteria of freedom. The Lesser General Public License permits more lax criteria for linking other code with the library. + +We call this license the "Lesser" General Public License because it does Less to protect the user's freedom than the ordinary General Public License. It also provides other free software developers Less of an advantage over competing non-free programs. These disadvantages are the reason we use the ordinary General Public License for many libraries. However, the Lesser license provides advantages in certain special circumstances. + +For example, on rare occasions, there may be a special need to encourage the widest possible use of a certain library, so that it becomes a de-facto standard. To achieve this, non-free programs must be allowed to use the library. A more frequent case is that a free library does the same job as widely used non-free libraries. In this case, there is little to gain by limiting the free library to free software only, so we use the Lesser General Public License. + +In other cases, permission to use a particular library in non-free programs enables a greater number of people to use a large body of free software. For example, permission to use the GNU C Library in non-free programs enables many more people to use the whole GNU operating system, as well as its variant, the GNU/Linux operating system. + +Although the Lesser General Public License is Less protective of the users' freedom, it does ensure that the user of a program that is linked with the Library has the freedom and the wherewithal to run that program using a modified version of the Library. + +The precise terms and conditions for copying, distribution and modification follow. Pay close attention to the difference between a "work based on the library" and a "work that uses the library". The former contains code derived from the library, whereas the latter must be combined with the library in order to run. + + +TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION +0. This License Agreement applies to any software library or other program which contains a notice placed by the copyright holder or other authorized party saying it may be distributed under the terms of this Lesser General Public License (also called "this License"). Each licensee is addressed as "you". + +A "library" means a collection of software functions and/or data prepared so as to be conveniently linked with application programs (which use some of those functions and data) to form executables. + +The "Library", below, refers to any such software library or work which has been distributed under these terms. A "work based on the Library" means either the Library or any derivative work under copyright law: that is to say, a work containing the Library or a portion of it, either verbatim or with modifications and/or translated straightforwardly into another language. (Hereinafter, translation is included without limitation in the term "modification".) + +"Source code" for a work means the preferred form of the work for making modifications to it. For a library, complete source code means all the source code for all modules it contains, plus any associated interface definition files, plus the scripts used to control compilation and installation of the library. + +Activities other than copying, distribution and modification are not covered by this License; they are outside its scope. The act of running a program using the Library is not restricted, and output from such a program is covered only if its contents constitute a work based on the Library (independent of the use of the Library in a tool for writing it). Whether that is true depends on what the Library does and what the program that uses the Library does. + +1. You may copy and distribute verbatim copies of the Library's complete source code as you receive it, in any medium, provided that you conspicuously and appropriately publish on each copy an appropriate copyright notice and disclaimer of warranty; keep intact all the notices that refer to this License and to the absence of any warranty; and distribute a copy of this License along with the Library. + +You may charge a fee for the physical act of transferring a copy, and you may at your option offer warranty protection in exchange for a fee. + +2. You may modify your copy or copies of the Library or any portion of it, thus forming a work based on the Library, and copy and distribute such modifications or work under the terms of Section 1 above, provided that you also meet all of these conditions: + + +a) The modified work must itself be a software library. +b) You must cause the files modified to carry prominent notices stating that you changed the files and the date of any change. +c) You must cause the whole of the work to be licensed at no charge to all third parties under the terms of this License. +d) If a facility in the modified Library refers to a function or a table of data to be supplied by an application program that uses the facility, other than as an argument passed when the facility is invoked, then you must make a good faith effort to ensure that, in the event an application does not supply such function or table, the facility still operates, and performs whatever part of its purpose remains meaningful. +(For example, a function in a library to compute square roots has a purpose that is entirely well-defined independent of the application. Therefore, Subsection 2d requires that any application-supplied function or table used by this function must be optional: if the application does not supply it, the square root function must still compute square roots.) + +These requirements apply to the modified work as a whole. If identifiable sections of that work are not derived from the Library, and can be reasonably considered independent and separate works in themselves, then this License, and its terms, do not apply to those sections when you distribute them as separate works. But when you distribute the same sections as part of a whole which is a work based on the Library, the distribution of the whole must be on the terms of this License, whose permissions for other licensees extend to the entire whole, and thus to each and every part regardless of who wrote it. + +Thus, it is not the intent of this section to claim rights or contest your rights to work written entirely by you; rather, the intent is to exercise the right to control the distribution of derivative or collective works based on the Library. + +In addition, mere aggregation of another work not based on the Library with the Library (or with a work based on the Library) on a volume of a storage or distribution medium does not bring the other work under the scope of this License. + +3. You may opt to apply the terms of the ordinary GNU General Public License instead of this License to a given copy of the Library. To do this, you must alter all the notices that refer to this License, so that they refer to the ordinary GNU General Public License, version 2, instead of to this License. (If a newer version than version 2 of the ordinary GNU General Public License has appeared, then you can specify that version instead if you wish.) Do not make any other change in these notices. + +Once this change is made in a given copy, it is irreversible for that copy, so the ordinary GNU General Public License applies to all subsequent copies and derivative works made from that copy. + +This option is useful when you wish to copy part of the code of the Library into a program that is not a library. + +4. You may copy and distribute the Library (or a portion or derivative of it, under Section 2) in object code or executable form under the terms of Sections 1 and 2 above provided that you accompany it with the complete corresponding machine-readable source code, which must be distributed under the terms of Sections 1 and 2 above on a medium customarily used for software interchange. + +If distribution of object code is made by offering access to copy from a designated place, then offering equivalent access to copy the source code from the same place satisfies the requirement to distribute the source code, even though third parties are not compelled to copy the source along with the object code. + +5. A program that contains no derivative of any portion of the Library, but is designed to work with the Library by being compiled or linked with it, is called a "work that uses the Library". Such a work, in isolation, is not a derivative work of the Library, and therefore falls outside the scope of this License. + +However, linking a "work that uses the Library" with the Library creates an executable that is a derivative of the Library (because it contains portions of the Library), rather than a "work that uses the library". The executable is therefore covered by this License. Section 6 states terms for distribution of such executables. + +When a "work that uses the Library" uses material from a header file that is part of the Library, the object code for the work may be a derivative work of the Library even though the source code is not. Whether this is true is especially significant if the work can be linked without the Library, or if the work is itself a library. The threshold for this to be true is not precisely defined by law. + +If such an object file uses only numerical parameters, data structure layouts and accessors, and small macros and small inline functions (ten lines or less in length), then the use of the object file is unrestricted, regardless of whether it is legally a derivative work. (Executables containing this object code plus portions of the Library will still fall under Section 6.) + +Otherwise, if the work is a derivative of the Library, you may distribute the object code for the work under the terms of Section 6. Any executables containing that work also fall under Section 6, whether or not they are linked directly with the Library itself. + +6. As an exception to the Sections above, you may also combine or link a "work that uses the Library" with the Library to produce a work containing portions of the Library, and distribute that work under terms of your choice, provided that the terms permit modification of the work for the customer's own use and reverse engineering for debugging such modifications. + +You must give prominent notice with each copy of the work that the Library is used in it and that the Library and its use are covered by this License. You must supply a copy of this License. If the work during execution displays copyright notices, you must include the copyright notice for the Library among them, as well as a reference directing the user to the copy of this License. Also, you must do one of these things: + + +a) Accompany the work with the complete corresponding machine-readable source code for the Library including whatever changes were used in the work (which must be distributed under Sections 1 and 2 above); and, if the work is an executable linked with the Library, with the complete machine-readable "work that uses the Library", as object code and/or source code, so that the user can modify the Library and then relink to produce a modified executable containing the modified Library. (It is understood that the user who changes the contents of definitions files in the Library will not necessarily be able to recompile the application to use the modified definitions.) +b) Use a suitable shared library mechanism for linking with the Library. A suitable mechanism is one that (1) uses at run time a copy of the library already present on the user's computer system, rather than copying library functions into the executable, and (2) will operate properly with a modified version of the library, if the user installs one, as long as the modified version is interface-compatible with the version that the work was made with. +c) Accompany the work with a written offer, valid for at least three years, to give the same user the materials specified in Subsection 6a, above, for a charge no more than the cost of performing this distribution. +d) If distribution of the work is made by offering access to copy from a designated place, offer equivalent access to copy the above specified materials from the same place. +e) Verify that the user has already received a copy of these materials or that you have already sent this user a copy. +For an executable, the required form of the "work that uses the Library" must include any data and utility programs needed for reproducing the executable from it. However, as a special exception, the materials to be distributed need not include anything that is normally distributed (in either source or binary form) with the major components (compiler, kernel, and so on) of the operating system on which the executable runs, unless that component itself accompanies the executable. + +It may happen that this requirement contradicts the license restrictions of other proprietary libraries that do not normally accompany the operating system. Such a contradiction means you cannot use both them and the Library together in an executable that you distribute. + +7. You may place library facilities that are a work based on the Library side-by-side in a single library together with other library facilities not covered by this License, and distribute such a combined library, provided that the separate distribution of the work based on the Library and of the other library facilities is otherwise permitted, and provided that you do these two things: + + +a) Accompany the combined library with a copy of the same work based on the Library, uncombined with any other library facilities. This must be distributed under the terms of the Sections above. +b) Give prominent notice with the combined library of the fact that part of it is a work based on the Library, and explaining where to find the accompanying uncombined form of the same work. +8. You may not copy, modify, sublicense, link with, or distribute the Library except as expressly provided under this License. Any attempt otherwise to copy, modify, sublicense, link with, or distribute the Library is void, and will automatically terminate your rights under this License. However, parties who have received copies, or rights, from you under this License will not have their licenses terminated so long as such parties remain in full compliance. + +9. You are not required to accept this License, since you have not signed it. However, nothing else grants you permission to modify or distribute the Library or its derivative works. These actions are prohibited by law if you do not accept this License. Therefore, by modifying or distributing the Library (or any work based on the Library), you indicate your acceptance of this License to do so, and all its terms and conditions for copying, distributing or modifying the Library or works based on it. + +10. Each time you redistribute the Library (or any work based on the Library), the recipient automatically receives a license from the original licensor to copy, distribute, link with or modify the Library subject to these terms and conditions. You may not impose any further restrictions on the recipients' exercise of the rights granted herein. You are not responsible for enforcing compliance by third parties with this License. + +11. If, as a consequence of a court judgment or allegation of patent infringement or for any other reason (not limited to patent issues), conditions are imposed on you (whether by court order, agreement or otherwise) that contradict the conditions of this License, they do not excuse you from the conditions of this License. If you cannot distribute so as to satisfy simultaneously your obligations under this License and any other pertinent obligations, then as a consequence you may not distribute the Library at all. For example, if a patent license would not permit royalty-free redistribution of the Library by all those who receive copies directly or indirectly through you, then the only way you could satisfy both it and this License would be to refrain entirely from distribution of the Library. + +If any portion of this section is held invalid or unenforceable under any particular circumstance, the balance of the section is intended to apply, and the section as a whole is intended to apply in other circumstances. + +It is not the purpose of this section to induce you to infringe any patents or other property right claims or to contest validity of any such claims; this section has the sole purpose of protecting the integrity of the free software distribution system which is implemented by public license practices. Many people have made generous contributions to the wide range of software distributed through that system in reliance on consistent application of that system; it is up to the author/donor to decide if he or she is willing to distribute software through any other system and a licensee cannot impose that choice. + +This section is intended to make thoroughly clear what is believed to be a consequence of the rest of this License. + +12. If the distribution and/or use of the Library is restricted in certain countries either by patents or by copyrighted interfaces, the original copyright holder who places the Library under this License may add an explicit geographical distribution limitation excluding those countries, so that distribution is permitted only in or among countries not thus excluded. In such case, this License incorporates the limitation as if written in the body of this License. + +13. The Free Software Foundation may publish revised and/or new versions of the Lesser General Public License from time to time. Such new versions will be similar in spirit to the present version, but may differ in detail to address new problems or concerns. + +Each version is given a distinguishing version number. If the Library specifies a version number of this License which applies to it and "any later version", you have the option of following the terms and conditions either of that version or of any later version published by the Free Software Foundation. If the Library does not specify a license version number, you may choose any version ever published by the Free Software Foundation. + +14. If you wish to incorporate parts of the Library into other free programs whose distribution conditions are incompatible with these, write to the author to ask for permission. For software which is copyrighted by the Free Software Foundation, write to the Free Software Foundation; we sometimes make exceptions for this. Our decision will be guided by the two goals of preserving the free status of all derivatives of our free software and of promoting the sharing and reuse of software generally. + +NO WARRANTY + +15. BECAUSE THE LIBRARY IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY FOR THE LIBRARY, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES PROVIDE THE LIBRARY "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE LIBRARY IS WITH YOU. SHOULD THE LIBRARY PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, REPAIR OR CORRECTION. + +16. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR REDISTRIBUTE THE LIBRARY AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OR INABILITY TO USE THE LIBRARY (INCLUDING BUT NOT LIMITED TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD PARTIES OR A FAILURE OF THE LIBRARY TO OPERATE WITH ANY OTHER SOFTWARE), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGES. + + +END OF TERMS AND CONDITIONS +How to Apply These Terms to Your New Libraries +If you develop a new library, and you want it to be of the greatest possible use to the public, we recommend making it free software that everyone can redistribute and change. You can do so by permitting redistribution under these terms (or, alternatively, under the terms of the ordinary General Public License). + +To apply these terms, attach the following notices to the library. It is safest to attach them to the start of each source file to most effectively convey the exclusion of warranty; and each file should have at least the "copyright" line and a pointer to where the full notice is found. + + +one line to give the library's name and an idea of what it does. +Copyright (C) year name of author + +This library is free software; you can redistribute it and/or +modify it under the terms of the GNU Lesser General Public +License as published by the Free Software Foundation; either +version 2.1 of the License, or (at your option) any later version. + +This library is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +Lesser General Public License for more details. + +You should have received a copy of the GNU Lesser General Public +License along with this library; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + +Also add information on how to contact you by electronic and paper mail. + +You should also get your employer (if you work as a programmer) or your school, if any, to sign a "copyright disclaimer" for the library, if necessary. Here is a sample; alter the names: + + +Yoyodyne, Inc., hereby disclaims all copyright interest in +the library `Frob' (a library for tweaking knobs) written +by James Random Hacker. + +signature of Ty Coon, 1 April 1990 +Ty Coon, President of Vice + +That's all there is to it! diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..7836f22 --- /dev/null +++ b/Makefile @@ -0,0 +1,35 @@ + +VERSION:=$(shell git --git-dir=.git describe --always --long) +RELEASE:=1.7.5 + +ifndef VERSION +VERSION:=$(RELEASE) +endif + +.NOTPARALLEL: +.SUFFIXES: +.PHONY: build clean test doc release install + +build: + $(MAKE) -C src build + +install: + $(MAKE) -C src VERSION=$(VERSION) install + +doc: + $(MAKE) -C src doc + +test: + $(MAKE) -C test all run + $(MAKE) -C test opt run + +clean: + $(MAKE) -C src clean + $(MAKE) -C test clean + +NAME=extlib-$(RELEASE) + +release: + git tag -a -m $(RELEASE) $(RELEASE) + git archive --prefix=$(NAME)/ $(RELEASE) | gzip > $(NAME).tar.gz + gpg -a -b $(NAME).tar.gz diff --git a/README.md b/README.md new file mode 100644 index 0000000..7f52df0 --- /dev/null +++ b/README.md @@ -0,0 +1,91 @@ +OCaml Extended standard Library - ExtLib. +========================================= + +[![Build Status](https://travis-ci.org/ygrek/ocaml-extlib.svg?branch=master)](https://travis-ci.org/ygrek/ocaml-extlib) +[![Build status](https://ci.appveyor.com/api/projects/status/6a3t5iq7ljbd25iq?svg=true)](https://ci.appveyor.com/project/ygrek/ocaml-extlib/branch/master) + +``` + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version,, + * with the special exception on linking described in file LICENSE. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +``` + +What is ExtLib ? +---------------- + +ExtLib is a set of additional useful functions and modules for OCaml. + +Project page : + https://github.com/ygrek/ocaml-extlib + +Online API documentation : + http://ygrek.org.ua/p/extlib/doc/ + +Dependencies +------------ + +* `cppo` - enables conditional compilation to ensure compatibility with various OCaml versions +* `ocamlfind >= 1.5.1` - provides bytes package + +Installation +------------ + +Unzip or untar in any directory and run + + `make minimal=1 build install` + +This will build and install bytecode and native libraries. +On bytecode-only architecture run + + `make minimal=1 all install` + +`minimal=1` will exclude from build several modules (namely `Unzip` `UChar` `UTF8`) potentially +conflicting with other well established OCaml libraries. If your code is expecting to find +these modules in extlib - omit this parameter during build to produce the full library. + +Usage +----- + +Generate and read the documentation. + +Release +------- + +* Review `git log` and update CHANGES +* Update version in Makefile +* Commit +* `make release` +* upload tarball and make release on github +* opam publish + +Contributors +------------ + +* Nicolas Cannasse +* Brian Hurt +* Yamagata Yoriyuki +* Markus Mottl +* Jesse Guardiani +* John Skaller +* Bardur Arantsson +* Janne Hellsten +* Richard W.M. Jones +* ygrek +* Gabriel Scherer +* Pietro Abate + +License +------- + +See LICENSE diff --git a/appveyor.yml b/appveyor.yml new file mode 100644 index 0000000..74ee06f --- /dev/null +++ b/appveyor.yml @@ -0,0 +1,13 @@ +platform: + - x86 + +environment: + FORK_USER: ocaml + FORK_BRANCH: master + CYG_ROOT: C:\cygwin64 + +install: + - ps: iex ((new-object net.webclient).DownloadString("https://raw.githubusercontent.com/$env:FORK_USER/ocaml-ci-scripts/$env:FORK_BRANCH/appveyor-install.ps1")) + +build_script: + - call %CYG_ROOT%\bin\bash.exe -l %APPVEYOR_BUILD_FOLDER%\appveyor-opam.sh diff --git a/opam b/opam new file mode 100644 index 0000000..b6aec21 --- /dev/null +++ b/opam @@ -0,0 +1,35 @@ +opam-version: "1.2" +maintainer: "ygrek@autistici.org" +homepage: "https://github.com/ygrek/ocaml-extlib" +dev-repo: "git://github.com/ygrek/ocaml-extlib.git" +bug-reports: "https://github.com/ygrek/ocaml-extlib/issues" +doc: ["http://ygrek.org.ua/p/extlib/doc/"] +license: "LGPL-2.1 with OCaml linking exception" +authors: [ + "Nicolas Cannasse" + "Brian Hurt" + "Yamagata Yoriyuki" + "Markus Mottl" + "Jesse Guardiani" + "John Skaller" + "Bardur Arantsson" + "Janne Hellsten" + "Richard W.M. Jones" + "ygrek" + "Gabriel Scherer" + "Pietro Abate" +] +build: [ + [make "minimal=1" "build"] +] +install: [ [make "minimal=1" "install"] ] +build-doc: [ [make "doc"] ] +build-test: [ [make "test"] ] +remove: [ + ["ocamlfind" "remove" "extlib"] +] +depends: [ + "ocamlfind" {build} + "cppo" {build} + "base-bytes" {build} +] diff --git a/src/IO.ml b/src/IO.ml new file mode 100644 index 0000000..45b73a0 --- /dev/null +++ b/src/IO.ml @@ -0,0 +1,884 @@ +(* + * IO - Abstract input/output + * Copyright (C) 2003 Nicolas Cannasse + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version, + * with the special exception on linking described in file LICENSE. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + *) + +open ExtBytes + +type input = { + mutable in_read : unit -> char; + mutable in_input : Bytes.t -> int -> int -> int; + mutable in_close : unit -> unit; +} + +type 'a output = { + mutable out_write : char -> unit; + mutable out_output : Bytes.t -> int -> int -> int; + mutable out_close : unit -> 'a; + mutable out_flush : unit -> unit; +} + +exception No_more_input +exception Input_closed +exception Output_closed + +(* -------------------------------------------------------------- *) +(* API *) + +let default_close = (fun () -> ()) + +let create_in ~read ~input ~close = + { + in_read = read; + in_input = input; + in_close = close; + } + +let create_out ~write ~output ~flush ~close = + { + out_write = write; + out_output = output; + out_close = close; + out_flush = flush; + } + +let read i = i.in_read() + +let nread i n = + if n < 0 then invalid_arg "IO.nread"; + if n = 0 then Bytes.empty + else + let s = Bytes.create n in + let l = ref n in + let p = ref 0 in + try + while !l > 0 do + let r = i.in_input s !p !l in + if r = 0 then raise No_more_input; + p := !p + r; + l := !l - r; + done; + s + with + No_more_input as e -> + if !p = 0 then raise e; + Bytes.sub s 0 !p + +let nread_string i n = + (* [nread] transfers ownership of the returned string, so + [unsafe_to_string] is safe here *) + Bytes.unsafe_to_string (nread i n) + +let really_output o s p l' = + let sl = Bytes.length s in + if p + l' > sl || p < 0 || l' < 0 then invalid_arg "IO.really_output"; + let l = ref l' in + let p = ref p in + while !l > 0 do + let w = o.out_output s !p !l in + if w = 0 then raise Sys_blocked_io; + p := !p + w; + l := !l - w; + done; + l' + +let input i s p l = + let sl = Bytes.length s in + if p + l > sl || p < 0 || l < 0 then invalid_arg "IO.input"; + if l = 0 then + 0 + else + i.in_input s p l + +let really_input i s p l' = + let sl = Bytes.length s in + if p + l' > sl || p < 0 || l' < 0 then invalid_arg "IO.really_input"; + let l = ref l' in + let p = ref p in + while !l > 0 do + let r = i.in_input s !p !l in + if r = 0 then raise Sys_blocked_io; + p := !p + r; + l := !l - r; + done; + l' + +let really_nread i n = + if n < 0 then invalid_arg "IO.really_nread"; + if n = 0 then Bytes.empty + else + let s = Bytes.create n + in + ignore(really_input i s 0 n); + s + +let really_nread_string i n = + (* [really_nread] transfers ownership of the returned string, + so [unsafe_to_string] is safe here *) + Bytes.unsafe_to_string (really_nread i n) + +let close_in i = + let f _ = raise Input_closed in + i.in_close(); + i.in_read <- f; + i.in_input <- f; + i.in_close <- f + +let write o x = o.out_write x + +let nwrite o s = + let p = ref 0 in + let l = ref (Bytes.length s) in + while !l > 0 do + let w = o.out_output s !p !l in + if w = 0 then raise Sys_blocked_io; + p := !p + w; + l := !l - w; + done + +let nwrite_string o s = + (* [nwrite] does not mutate or capture its [bytes] input, + so using [Bytes.unsafe_of_string] is safe here *) + nwrite o (Bytes.unsafe_of_string s) + +let output o s p l = + let sl = Bytes.length s in + if p + l > sl || p < 0 || l < 0 then invalid_arg "IO.output"; + o.out_output s p l + +let scanf i fmt = + let ib = Scanf.Scanning.from_function (fun () -> try read i with No_more_input -> raise End_of_file) in + Scanf.kscanf ib (fun _ exn -> raise exn) fmt + +let printf o fmt = + Printf.kprintf (fun s -> nwrite_string o s) fmt + +let flush o = o.out_flush() + +let close_out o = + let f _ = raise Output_closed in + let r = o.out_close() in + o.out_write <- f; + o.out_output <- f; + o.out_close <- f; + o.out_flush <- f; + r + +let read_all i = + let maxlen = 1024 in + let str = ref [] in + let pos = ref 0 in + let rec loop() = + let s = nread i maxlen in + str := (s,!pos) :: !str; + pos := !pos + Bytes.length s; + loop() + in + try + loop() + with + No_more_input -> + let buf = Bytes.create !pos in + List.iter (fun (s,p) -> + Bytes.blit s 0 buf p (Bytes.length s) + ) !str; + (* 'buf' doesn't escape, it won't be mutated again *) + Bytes.unsafe_to_string buf + +let pos_in i = + let p = ref 0 in + { + in_read = (fun () -> + let c = i.in_read() in + incr p; + c + ); + in_input = (fun s sp l -> + let n = i.in_input s sp l in + p := !p + n; + n + ); + in_close = i.in_close + } , (fun () -> !p) + +let pos_out o = + let p = ref 0 in + { + out_write = (fun c -> + o.out_write c; + incr p + ); + out_output = (fun s sp l -> + let n = o.out_output s sp l in + p := !p + n; + n + ); + out_close = o.out_close; + out_flush = o.out_flush; + } , (fun () -> !p) + +(* -------------------------------------------------------------- *) +(* Standard IO *) + +let input_bytes s = + let pos = ref 0 in + let len = Bytes.length s in + { + in_read = (fun () -> + if !pos >= len then raise No_more_input; + let c = Bytes.unsafe_get s !pos in + incr pos; + c + ); + in_input = (fun sout p l -> + if !pos >= len then raise No_more_input; + let n = (if !pos + l > len then len - !pos else l) in + Bytes.unsafe_blit s !pos sout p n; + pos := !pos + n; + n + ); + in_close = (fun () -> ()); + } + +let input_string s = + (* Bytes.unsafe_of_string is safe here as input_bytes does not + mutate the byte sequence *) + input_bytes (Bytes.unsafe_of_string s) + +open ExtBuffer + +let output_buffer close = + let b = Buffer.create 0 in + { + out_write = (fun c -> Buffer.add_char b c); + out_output = (fun s p l -> Buffer.add_subbytes b s p l; l); + out_close = (fun () -> close b); + out_flush = (fun () -> ()); + } + +let output_string () = output_buffer Buffer.contents +let output_bytes () = output_buffer Buffer.to_bytes + +let output_strings() = + let sl = ref [] in + let size = ref 0 in + let b = Buffer.create 0 in + { + out_write = (fun c -> + if !size = Sys.max_string_length then begin + sl := Buffer.contents b :: !sl; + Buffer.clear b; + size := 0; + end else incr size; + Buffer.add_char b c + ); + out_output = (fun s p l -> + if !size + l > Sys.max_string_length then begin + sl := Buffer.contents b :: !sl; + Buffer.clear b; + size := 0; + end else size := !size + l; + Buffer.add_subbytes b s p l; + l + ); + out_close = (fun () -> sl := Buffer.contents b :: !sl; List.rev (!sl)); + out_flush = (fun () -> ()); + } + + +let input_channel ch = + { + in_read = (fun () -> + try + input_char ch + with + End_of_file -> raise No_more_input + ); + in_input = (fun s p l -> + let n = Pervasives.input ch s p l in + if n = 0 then raise No_more_input; + n + ); + in_close = (fun () -> Pervasives.close_in ch); + } + +let output_channel ch = + { + out_write = (fun c -> output_char ch c); + out_output = (fun s p l -> Pervasives.output ch s p l; l); + out_close = (fun () -> Pervasives.close_out ch); + out_flush = (fun () -> Pervasives.flush ch); + } + +let input_enum e = + let pos = ref 0 in + { + in_read = (fun () -> + match Enum.get e with + | None -> raise No_more_input + | Some c -> + incr pos; + c + ); + in_input = (fun s p l -> + let rec loop p l = + if l = 0 then + 0 + else + match Enum.get e with + | None -> l + | Some c -> + Bytes.unsafe_set s p c; + loop (p + 1) (l - 1) + in + let k = loop p l in + if k = l then raise No_more_input; + l - k + ); + in_close = (fun () -> ()); + } + +let output_enum() = + let b = Buffer.create 0 in + { + out_write = (fun x -> + Buffer.add_char b x + ); + out_output = (fun s p l -> + Buffer.add_subbytes b s p l; + l + ); + out_close = (fun () -> + let s = Buffer.contents b in + ExtString.String.enum s + ); + out_flush = (fun () -> ()); + } + +let pipe() = + let input = ref "" in + let inpos = ref 0 in + let output = Buffer.create 0 in + let flush() = + input := Buffer.contents output; + inpos := 0; + Buffer.reset output; + if String.length !input = 0 then raise No_more_input + in + let read() = + if !inpos = String.length !input then flush(); + let c = String.unsafe_get !input !inpos in + incr inpos; + c + in + let input s p l = + if !inpos = String.length !input then flush(); + let r = (if !inpos + l > String.length !input then String.length !input - !inpos else l) in + String.unsafe_blit !input !inpos s p r; + inpos := !inpos + r; + r + in + let write c = + Buffer.add_char output c + in + let output s p l = + Buffer.add_subbytes output s p l; + l + in + let input = { + in_read = read; + in_input = input; + in_close = (fun () -> ()); + } in + let output = { + out_write = write; + out_output = output; + out_close = (fun () -> ()); + out_flush = (fun () -> ()); + } in + input , output + +external cast_output : 'a output -> unit output = "%identity" + +(* -------------------------------------------------------------- *) +(* BINARY APIs *) + +exception Overflow of string + +let read_byte i = int_of_char (i.in_read()) + +let read_signed_byte i = + let c = int_of_char (i.in_read()) in + if c land 128 <> 0 then + c - 256 + else + c + +let read_string_into_buffer i = + let b = Buffer.create 8 in + let rec loop() = + let c = i.in_read() in + if c <> '\000' then begin + Buffer.add_char b c; + loop(); + end; + in + loop(); + b + +let read_string i = + Buffer.contents + (read_string_into_buffer i) + +let read_bytes i = + Buffer.to_bytes + (read_string_into_buffer i) + +let read_line i = + let b = Buffer.create 8 in + let cr = ref false in + let rec loop() = + let c = i.in_read() in + match c with + | '\n' -> + () + | '\r' -> + cr := true; + loop() + | _ when !cr -> + cr := false; + Buffer.add_char b '\r'; + Buffer.add_char b c; + loop(); + | _ -> + Buffer.add_char b c; + loop(); + in + try + loop(); + Buffer.contents b + with + No_more_input -> + if !cr then Buffer.add_char b '\r'; + if Buffer.length b > 0 then + Buffer.contents b + else + raise No_more_input + +let read_ui16 i = + let ch1 = read_byte i in + let ch2 = read_byte i in + ch1 lor (ch2 lsl 8) + +let read_i16 i = + let ch1 = read_byte i in + let ch2 = read_byte i in + let n = ch1 lor (ch2 lsl 8) in + if ch2 land 128 <> 0 then + n - 65536 + else + n + +let sign_bit_i32 = lnot 0x7FFF_FFFF + +let read_32 ~i31 ch = + let ch1 = read_byte ch in + let ch2 = read_byte ch in + let ch3 = read_byte ch in + let ch4 = read_byte ch in + if ch4 land 128 <> 0 then begin + if i31 && ch4 land 64 = 0 then raise (Overflow "read_i31"); + ch1 lor (ch2 lsl 8) lor (ch3 lsl 16) lor ((ch4 land 127) lsl 24) lor sign_bit_i32 + end else begin + if i31 && ch4 land 64 <> 0 then raise (Overflow "read_i31"); + ch1 lor (ch2 lsl 8) lor (ch3 lsl 16) lor (ch4 lsl 24) + end + +let read_i31 ch = read_32 ~i31:true ch +let read_i32_as_int ch = read_32 ~i31:false ch + +let read_i32 = read_i31 + +let read_real_i32 ch = + let ch1 = read_byte ch in + let ch2 = read_byte ch in + let ch3 = read_byte ch in + let base = Int32.of_int (ch1 lor (ch2 lsl 8) lor (ch3 lsl 16)) in + let big = Int32.shift_left (Int32.of_int (read_byte ch)) 24 in + Int32.logor base big + +let read_i64 ch = + let ch1 = read_byte ch in + let ch2 = read_byte ch in + let ch3 = read_byte ch in + let ch4 = read_byte ch in + let base = Int64.of_int (ch1 lor (ch2 lsl 8) lor (ch3 lsl 16)) in + let small = Int64.logor base (Int64.shift_left (Int64.of_int ch4) 24) in + let big = Int64.of_int32 (read_real_i32 ch) in + Int64.logor (Int64.shift_left big 32) small + +let read_float32 ch = + Int32.float_of_bits (read_real_i32 ch) + +let read_double ch = + Int64.float_of_bits (read_i64 ch) + +let write_byte o n = + (* doesn't test bounds of n in order to keep semantics of Pervasives.output_byte *) + write o (Char.unsafe_chr (n land 0xFF)) + +let write_string o s = + nwrite_string o s; + write o '\000' + +let write_bytes o s = + nwrite o s; + write o '\000' + +let write_line o s = + nwrite_string o s; + write o '\n' + +let write_ui16 ch n = + if n < 0 || n > 0xFFFF then raise (Overflow "write_ui16"); + write_byte ch n; + write_byte ch (n lsr 8) + +let write_i16 ch n = + if n < -0x8000 || n > 0x7FFF then raise (Overflow "write_i16"); + if n < 0 then + write_ui16 ch (65536 + n) + else + write_ui16 ch n + +let write_32 ch n = + write_byte ch n; + write_byte ch (n lsr 8); + write_byte ch (n lsr 16); + write_byte ch (n asr 24) + +let write_i31 ch n = +#ifndef WORD_SIZE_32 + if n < -0x4000_0000 || n > 0x3FFF_FFFF then raise (Overflow "write_i31"); +#endif + write_32 ch n + +let write_i32 ch n = +#ifndef WORD_SIZE_32 + if n < -0x8000_0000 || n > 0x7FFF_FFFF then raise (Overflow "write_i32"); +#endif + write_32 ch n + +let write_real_i32 ch n = + let base = Int32.to_int n in + let big = Int32.to_int (Int32.shift_right_logical n 24) in + write_byte ch base; + write_byte ch (base lsr 8); + write_byte ch (base lsr 16); + write_byte ch big + +let write_i64 ch n = + write_real_i32 ch (Int64.to_int32 n); + write_real_i32 ch (Int64.to_int32 (Int64.shift_right_logical n 32)) + +let write_float32 ch f = + write_real_i32 ch (Int32.bits_of_float f) + +let write_double ch f = + write_i64 ch (Int64.bits_of_float f) + +(* -------------------------------------------------------------- *) +(* Big Endians *) + +module BigEndian = struct + +let read_ui16 i = + let ch2 = read_byte i in + let ch1 = read_byte i in + ch1 lor (ch2 lsl 8) + +let read_i16 i = + let ch2 = read_byte i in + let ch1 = read_byte i in + let n = ch1 lor (ch2 lsl 8) in + if ch2 land 128 <> 0 then + n - 65536 + else + n + +let sign_bit_i32 = lnot 0x7FFF_FFFF + +let read_32 ~i31 ch = + let ch4 = read_byte ch in + let ch3 = read_byte ch in + let ch2 = read_byte ch in + let ch1 = read_byte ch in + if ch4 land 128 <> 0 then begin + if i31 && ch4 land 64 = 0 then raise (Overflow "read_i31"); + ch1 lor (ch2 lsl 8) lor (ch3 lsl 16) lor ((ch4 land 127) lsl 24) lor sign_bit_i32 + end else begin + if i31 && ch4 land 64 <> 0 then raise (Overflow "read_i31"); + ch1 lor (ch2 lsl 8) lor (ch3 lsl 16) lor (ch4 lsl 24) + end + +let read_i31 ch = read_32 ~i31:true ch +let read_i32_as_int ch = read_32 ~i31:false ch + +let read_i32 = read_i31 + +let read_real_i32 ch = + let big = Int32.shift_left (Int32.of_int (read_byte ch)) 24 in + let ch3 = read_byte ch in + let ch2 = read_byte ch in + let ch1 = read_byte ch in + let base = Int32.of_int (ch1 lor (ch2 lsl 8) lor (ch3 lsl 16)) in + Int32.logor base big + +let read_i64 ch = + let big = Int64.of_int32 (read_real_i32 ch) in + let ch4 = read_byte ch in + let ch3 = read_byte ch in + let ch2 = read_byte ch in + let ch1 = read_byte ch in + let base = Int64.of_int (ch1 lor (ch2 lsl 8) lor (ch3 lsl 16)) in + let small = Int64.logor base (Int64.shift_left (Int64.of_int ch4) 24) in + Int64.logor (Int64.shift_left big 32) small + +let read_float32 ch = + Int32.float_of_bits (read_real_i32 ch) + +let read_double ch = + Int64.float_of_bits (read_i64 ch) + +let write_ui16 ch n = + if n < 0 || n > 0xFFFF then raise (Overflow "write_ui16"); + write_byte ch (n lsr 8); + write_byte ch n + +let write_i16 ch n = + if n < -0x8000 || n > 0x7FFF then raise (Overflow "write_i16"); + if n < 0 then + write_ui16 ch (65536 + n) + else + write_ui16 ch n + +let write_32 ch n = + write_byte ch (n asr 24); + write_byte ch (n lsr 16); + write_byte ch (n lsr 8); + write_byte ch n + +let write_i31 ch n = +#ifndef WORD_SIZE_32 + if n < -0x4000_0000 || n > 0x3FFF_FFFF then raise (Overflow "write_i31"); +#endif + write_32 ch n + +let write_i32 ch n = +#ifndef WORD_SIZE_32 + if n < -0x8000_0000 || n > 0x7FFF_FFFF then raise (Overflow "write_i32"); +#endif + write_32 ch n + +let write_real_i32 ch n = + let base = Int32.to_int n in + let big = Int32.to_int (Int32.shift_right_logical n 24) in + write_byte ch big; + write_byte ch (base lsr 16); + write_byte ch (base lsr 8); + write_byte ch base + +let write_i64 ch n = + write_real_i32 ch (Int64.to_int32 (Int64.shift_right_logical n 32)); + write_real_i32 ch (Int64.to_int32 n) + +let write_float32 ch f = + write_real_i32 ch (Int32.bits_of_float f) + +let write_double ch f = + write_i64 ch (Int64.bits_of_float f) + +end + +(* -------------------------------------------------------------- *) +(* Bits API *) + +type 'a bc = { + ch : 'a; + mutable nbits : int; + mutable bits : int; +} + +type in_bits = input bc +type out_bits = unit output bc + +exception Bits_error + +let input_bits ch = + { + ch = ch; + nbits = 0; + bits = 0; + } + +let output_bits ch = + { + ch = cast_output ch; + nbits = 0; + bits = 0; + } + +let rec read_bits b n = + if b.nbits >= n then begin + let c = b.nbits - n in + let k = (b.bits asr c) land ((1 lsl n) - 1) in + b.nbits <- c; + k + end else begin + let k = read_byte b.ch in + if b.nbits >= 24 then begin + if n >= 31 then raise Bits_error; + let c = 8 + b.nbits - n in + let d = b.bits land ((1 lsl b.nbits) - 1) in + let d = (d lsl (8 - c)) lor (k lsr c) in + b.bits <- k; + b.nbits <- c; + d + end else begin + b.bits <- (b.bits lsl 8) lor k; + b.nbits <- b.nbits + 8; + read_bits b n; + end + end + +let drop_bits b = + b.nbits <- 0 + +let rec write_bits b ~nbits x = + let n = nbits in + if n + b.nbits >= 32 then begin + if n > 31 then raise Bits_error; + let n2 = 32 - b.nbits - 1 in + let n3 = n - n2 in + write_bits b ~nbits:n2 (x asr n3); + write_bits b ~nbits:n3 (x land ((1 lsl n3) - 1)); + end else begin + if n < 0 then raise Bits_error; + if (x < 0 || x > (1 lsl n - 1)) && n <> 31 then raise Bits_error; + b.bits <- (b.bits lsl n) lor x; + b.nbits <- b.nbits + n; + while b.nbits >= 8 do + b.nbits <- b.nbits - 8; + write_byte b.ch (b.bits asr b.nbits) + done + end + +let flush_bits b = + if b.nbits > 0 then write_bits b (8 - b.nbits) 0 + +(* -------------------------------------------------------------- *) +(* Generic IO *) + +class in_channel ch = + object + method input s pos len = input ch s pos len + method close_in() = close_in ch + end + +class out_channel ch = + object + method output s pos len = output ch s pos len + method flush() = flush ch + method close_out() = ignore(close_out ch) + end + +class in_chars ch = + object + method get() = try read ch with No_more_input -> raise End_of_file + method close_in() = close_in ch + end + +class out_chars ch = + object + method put t = write ch t + method flush() = flush ch + method close_out() = ignore(close_out ch) + end + +let from_in_channel ch = + let cbuf = Bytes.create 1 in + let read() = + try + if ch#input cbuf 0 1 = 0 then raise Sys_blocked_io; + Bytes.unsafe_get cbuf 0 + with + End_of_file -> raise No_more_input + in + let input s p l = + ch#input s p l + in + create_in + ~read + ~input + ~close:ch#close_in + +let from_out_channel ch = + let cbuf = Bytes.create 1 in + let write c = + Bytes.unsafe_set cbuf 0 c; + if ch#output cbuf 0 1 = 0 then raise Sys_blocked_io; + in + let output s p l = + ch#output s p l + in + create_out + ~write + ~output + ~flush:ch#flush + ~close:ch#close_out + +let from_in_chars ch = + let input s p l = + let i = ref 0 in + try + while !i < l do + Bytes.unsafe_set s (p + !i) (ch#get()); + incr i + done; + l + with + End_of_file when !i > 0 -> + !i + in + create_in + ~read:ch#get + ~input + ~close:ch#close_in + +let from_out_chars ch = + let output s p l = + for i = p to p + l - 1 do + ch#put (Bytes.unsafe_get s i) + done; + l + in + create_out + ~write:ch#put + ~output + ~flush:ch#flush + ~close:ch#close_out diff --git a/src/IO.mli b/src/IO.mli new file mode 100644 index 0000000..f0236eb --- /dev/null +++ b/src/IO.mli @@ -0,0 +1,374 @@ +(* + * IO - Abstract input/output + * Copyright (C) 2003 Nicolas Cannasse + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version, + * with the special exception on linking described in file LICENSE. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + *) + +(** High-order abstract I/O. + + IO module simply deals with abstract inputs/outputs. It provides a + set of methods for working with these IO as well as several + constructors that enable to write to an underlying channel, buffer, + or enum. +*) + +open ExtBytes + +type input +(** The abstract input type. *) + +type 'a output +(** The abstract output type, ['a] is the accumulator data, it is returned + when the [close_out] function is called. *) + +exception No_more_input +(** This exception is raised when reading on an input with the [read] or + [nread] functions while there is no available token to read. *) + +exception Input_closed +(** This exception is raised when reading on a closed input. *) + +exception Output_closed +(** This exception is raised when reading on a closed output. *) + +(** {6 Standard API} *) + +val read : input -> char +(** Read a single char from an input or raise [No_more_input] if + no input available. *) + +val nread : input -> int -> Bytes.t +(** [nread i n] reads a byte sequence of size up to [n] from an input. + The function will raise [No_more_input] if no input is available. + It will raise [Invalid_argument] if [n] < 0. *) + +val really_nread : input -> int -> Bytes.t +(** [really_nread i n] reads a byte sequence of exactly [n] characters + from the input. Raises [No_more_input] if at least [n] characters are + not available. Raises [Invalid_argument] if [n] < 0. *) + +val nread_string : input -> int -> string +(** as [nread], but reads a string. *) + +val really_nread_string : input -> int -> string +(** as [really_nread], but reads a string. *) + +val input : input -> Bytes.t -> int -> int -> int +(** [input i b p l] reads up to [l] characters from the given input, storing + them in buffer [b], starting at character number [p]. It returns the actual + number of characters read or raise [No_more_input] if no character can be + read. It will raise [Invalid_argument] if [p] and [l] do not designate a + valid subsequence of [b]. *) + +val really_input : input -> Bytes.t -> int -> int -> int +(** [really_input i b p l] reads exactly [l] characters from the given input, + storing them in the buffer [b], starting at position [p]. For consistency with + {!IO.input} it returns [l]. Raises [No_more_input] if at [l] characters are + not available. Raises [Invalid_argument] if [p] and [l] do not designate a + valid subsequence of [b]. *) + +val close_in : input -> unit +(** Close the input. It can no longer be read from. *) + +val write : 'a output -> char -> unit +(** Write a single char to an output. *) + +val nwrite : 'a output -> Bytes.t -> unit +(** Write a byte sequence to an output. *) + +val nwrite_string : 'a output -> string -> unit +(** Write a string to an output. *) + +val output : 'a output -> Bytes.t -> int -> int -> int +(** [output o b p l] writes up to [l] characters from byte sequence [b], starting at + offset [p]. It returns the number of characters written. It will raise + [Invalid_argument] if [p] and [l] do not designate a valid subsequence of [b]. *) + +val really_output : 'a output -> Bytes.t -> int -> int -> int +(** [really_output o b p l] writes exactly [l] characters from byte sequence [b] onto + the the output, starting with the character at offset [p]. For consistency with + {!IO.output} it returns [l]. Raises [Invalid_argument] if [p] and [l] do not + designate a valid subsequence of [b]. *) + +val flush : 'a output -> unit +(** Flush an output. *) + +val close_out : 'a output -> 'a +(** Close the output and return its accumulator data. + It can no longer be written. *) + +(** {6 Creation of IO Inputs/Outputs} *) + +val input_string : string -> input +(** Create an input that will read from a string. *) + +val input_bytes : Bytes.t -> input +(** Create an input that will read from a byte sequence. *) + +val output_string : unit -> string output +(** Create an output that will write into a string in an efficient way. + When closed, the output returns all the data written into it. *) + +val output_bytes : unit -> Bytes.t output +(** Create an output that will write into a byte sequence in an efficient way. + When closed, the output returns all the data written into it. *) + +val output_strings : unit -> string list output +(** Create an output that will write into a string in an efficient way. + When closed, the output returns all the data written into it. + Several strings are used in case the output size excess max_string_length +*) + +val input_channel : in_channel -> input +(** Create an input that will read from a channel. *) + +val output_channel : out_channel -> unit output +(** Create an output that will write into a channel. *) + +val input_enum : char Enum.t -> input +(** Create an input that will read from an [enum]. *) + +val output_enum : unit -> char Enum.t output +(** Create an output that will write into an [enum]. The + final enum is returned when the output is closed. *) + +val create_in : + read:(unit -> char) -> + input:(Bytes.t -> int -> int -> int) -> close:(unit -> unit) -> input +(** Fully create an input by giving all the needed functions. *) + +val create_out : + write:(char -> unit) -> + output:(Bytes.t -> int -> int -> int) -> + flush:(unit -> unit) -> close:(unit -> 'a) -> 'a output +(** Fully create an output by giving all the needed functions. *) + +(** {6 Utilities} *) + +val scanf : input -> ('a, 'b, 'c, 'd) Scanf.scanner +(** The scanf function works for any input. *) + +val printf : 'a output -> ('b, unit, string, unit) format4 -> 'b +(** The printf function works for any output. *) + +val read_all : input -> string +(** read all the contents of the input until [No_more_input] is raised. *) + +val pipe : unit -> input * unit output +(** Create a pipe between an input and an ouput. Data written from + the output can be read from the input. *) + +val pos_in : input -> input * (unit -> int) +(** Create an input that provide a count function of the number of Bytes.t + read from it. *) + +val pos_out : 'a output -> 'a output * (unit -> int) +(** Create an output that provide a count function of the number of Bytes.t + written through it. *) + +external cast_output : 'a output -> unit output = "%identity" +(** You can safely transform any output to an unit output in a safe way + by using this function. *) + +(** {6 Binary files API} + + Here is some API useful for working with binary files, in particular + binary files generated by C applications. By default, encoding of + multibyte integers is low-endian. The BigEndian module provide multibyte + operations with other encoding. +*) + +exception Overflow of string +(** Exception raised when a read or write operation cannot be completed. *) + +val read_byte : input -> int +(** Read an unsigned 8-bit integer. *) + +val read_signed_byte : input -> int +(** Read an signed 8-bit integer. *) + +val read_ui16 : input -> int +(** Read an unsigned 16-bit word. *) + +val read_i16 : input -> int +(** Read a signed 16-bit word. *) + +val read_i31 : input -> int +(** Read a signed 32-bit integer. Raise [Overflow] if the + read integer cannot be represented as an OCaml 31-bit integer. *) + +val read_i32 : input -> int +(** Deprecated, same as read_i31 *) + +val read_i32_as_int : input -> int +(** Read a signed 32-bit integer, represented as OCaml integer, wrapping around 31-bit int on 32-bit architecture *) + +val read_real_i32 : input -> int32 +(** Read a signed 32-bit integer as an OCaml int32. *) + +val read_i64 : input -> int64 +(** Read a signed 64-bit integer as an OCaml int64. *) + +val read_float32 : input -> float +(** Read an IEEE single precision floating point value (32 bits). *) + +val read_double : input -> float +(** Read an IEEE double precision floating point value (64 bits). *) + +val read_string : input -> string +(** Read a null-terminated string. *) + +val read_bytes : input -> Bytes.t +(** Read a null-terminated byte sequence. *) + +val read_line : input -> string +(** Read a LF or CRLF terminated string. *) + +val write_byte : 'a output -> int -> unit +(** Write an unsigned 8-bit byte. *) + +val write_ui16 : 'a output -> int -> unit +(** Write an unsigned 16-bit word. *) + +val write_i16 : 'a output -> int -> unit +(** Write a signed 16-bit word. *) + +val write_i31 : 'a output -> int -> unit +(** Write a signed 31-bit integer as 4 bytes. *) + +val write_i32 : 'a output -> int -> unit +(** Write a signed 32-bit integer. *) + +val write_real_i32 : 'a output -> int32 -> unit +(** Write an OCaml int32. *) + +val write_i64 : 'a output -> int64 -> unit +(** Write an OCaml int64. *) + +val write_float32 : 'a output -> float -> unit +(** Write an IEEE single precision floating point value (32 bits). *) + +val write_double : 'a output -> float -> unit +(** Write an IEEE double precision floating point value (64 bits). *) + +val write_string : 'a output -> string -> unit +(** Write a string and append an null character. *) + +val write_bytes : 'a output -> Bytes.t -> unit +(** Write a byte sequence and append an null character. *) + +val write_line : 'a output -> string -> unit +(** Write a line and append a LF (it might be converted + to CRLF on some systems depending on the underlying IO). *) + +(** Same as operations above, but use big-endian encoding *) +module BigEndian : +sig + + val read_ui16 : input -> int + val read_i16 : input -> int + val read_i31 : input -> int + val read_i32 : input -> int + val read_i32_as_int : input -> int + val read_real_i32 : input -> int32 + val read_i64 : input -> int64 + val read_float32 : input -> float + val read_double : input -> float + + val write_ui16 : 'a output -> int -> unit + val write_i16 : 'a output -> int -> unit + val write_i31 : 'a output -> int -> unit + val write_i32 : 'a output -> int -> unit + val write_real_i32 : 'a output -> int32 -> unit + val write_i64 : 'a output -> int64 -> unit + val write_float32 : 'a output -> float -> unit + val write_double : 'a output -> float -> unit + +end + +(** {6 Bits API} + + This enable you to read and write from an IO bit-by-bit or several bits + at the same time. +*) + +type in_bits +type out_bits + +exception Bits_error + +val input_bits : input -> in_bits +(** Read bits from an input *) + +val output_bits : 'a output -> out_bits +(** Write bits to an output *) + +val read_bits : in_bits -> int -> int +(** Read up to 31 bits, raise Bits_error if n < 0 or n > 31 *) + +val write_bits : out_bits -> nbits:int -> int -> unit +(** Write up to 31 bits represented as a value, raise Bits_error if nbits < 0 + or nbits > 31 or the value representation excess nbits. *) + +val flush_bits : out_bits -> unit +(** Flush remaining unwritten bits, adding up to 7 bits which values 0. *) + +val drop_bits : in_bits -> unit +(** Drop up to 7 buffered bits and restart to next input character. *) + +(** {6 Generic IO Object Wrappers} + + Theses OO Wrappers have been written to provide easy support of ExtLib + IO by external librairies. If you want your library to support ExtLib + IO without actually requiring ExtLib to compile, you can should implement + the classes [in_channel], [out_channel], [poly_in_channel] and/or + [poly_out_channel] which are the common IO specifications established + for ExtLib, OCamlNet and Camomile. + + (see http://www.ocaml-programming.de/tmp/IO-Classes.html for more details). +*) + +class in_channel : input -> + object + method input : Bytes.t -> int -> int -> int + method close_in : unit -> unit + end + +class out_channel : 'a output -> + object + method output : Bytes.t -> int -> int -> int + method flush : unit -> unit + method close_out : unit -> unit + end + +class in_chars : input -> + object + method get : unit -> char + method close_in : unit -> unit + end + +class out_chars : 'a output -> + object + method put : char -> unit + method flush : unit -> unit + method close_out : unit -> unit + end + +val from_in_channel : #in_channel -> input +val from_out_channel : #out_channel -> unit output +val from_in_chars : #in_chars -> input +val from_out_chars : #out_chars -> unit output diff --git a/src/META b/src/META new file mode 100644 index 0000000..5f6db55 --- /dev/null +++ b/src/META @@ -0,0 +1,7 @@ +description="Extended standard library" +requires="bytes" +archive(byte)="extLib.cma" +archive(native)="extLib.cmxa" +archive(byte, plugin) = "extLib.cma" +archive(native, plugin) = "extLib.cmxs" +exists_if = "extLib.cma" diff --git a/src/Makefile b/src/Makefile new file mode 100644 index 0000000..696525f --- /dev/null +++ b/src/Makefile @@ -0,0 +1,67 @@ +# Makefile contributed by Alain Frisch + +# the list is topologically sorted +MODULES := \ + extBytes enum bitSet dynArray extArray extHashtbl extList extString extBuffer \ + global IO option pMap std uChar uTF8 base64 unzip refList optParse dllist extLib + +ifdef minimal +MODULES := $(filter-out unzip uChar uTF8, $(MODULES)) +endif + +CPPO_ARGS := $(shell ocaml configure.ml -cppo-args) +CPPO := cppo $(CPPO_ARGS) + +OCAML_ARGS := -pp "$(CPPO)" -g +OCAML_ARGS += $(shell ocaml configure.ml -compile-args) + +OCAMLC = ocamlfind ocamlc $(OCAML_ARGS) +OCAMLOPT = ocamlfind ocamlopt $(OCAML_ARGS) +OCAMLDOC = ocamldoc -pp "$(CPPO)" + +MLI = $(filter-out extLib.mli, $(MODULES:=.mli)) +CMI = $(MODULES:=.cmi) +CMO = $(MODULES:=.cmo) +CMX = $(MODULES:=.cmx) +CMT = $(MODULES:=.cmt) +CMTI = $(MODULES:=.cmti) + +.NOTPARALLEL: +.SUFFIXES: +.PHONY: build all opt cmxs doc install uninstall clean release + +build: all opt cmxs + +all: extLib.cma +opt: extLib.cmxa +cmxs: extLib.cmxs + +doc: $(MLI) + $(OCAMLC) -c $(MLI) + $(OCAMLDOC) -sort -html -d doc/ $(MLI) extLib.ml + +extLib.cma: $(CMO) + $(OCAMLC) -a -o $@ $^ +extLib.cmxa: $(CMX) + $(OCAMLOPT) -a -o $@ $^ +%.cmxs: %.cmxa + $(OCAMLOPT) -shared -linkall $< -o $@ +%.cmo: %.mli %.ml + $(OCAMLC) -c $^ +%.cmx: %.mli %.ml + $(OCAMLOPT) -c $^ +extLib.cmo: extLib.ml + $(OCAMLC) -c $< +extLib.cmx: extLib.ml + $(OCAMLOPT) -c $< +extBytes.mli: extBytes.ml + $(OCAMLC) -i $< > $@ + +install: + ocamlfind install -patch-version $(VERSION) extlib META extLib.cma $(MLI) $(CMI) -optional extLib.cmxa $(CMX) extLib.cmxs extLib.a extLib.lib $(CMT) $(CMTI) + +uninstall: + ocamlfind remove extlib + +clean: + rm -f *.cmo *.cmx *.o *.obj *.cmi *.cma *.cmxa *.cmxs *.a *.lib doc/*.html extBytes.mli diff --git a/src/base64.ml b/src/base64.ml new file mode 100644 index 0000000..e19cadc --- /dev/null +++ b/src/base64.ml @@ -0,0 +1,130 @@ +(* + * Base64 - Base64 codec + * Copyright (C) 2003 Nicolas Cannasse + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version, + * with the special exception on linking described in file LICENSE. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + *) + +open ExtBytes + +exception Invalid_char +exception Invalid_table + +external unsafe_char_of_int : int -> char = "%identity" + +type encoding_table = char array +type decoding_table = int array + +let chars = [| + 'A';'B';'C';'D';'E';'F';'G';'H';'I';'J';'K';'L';'M';'N';'O';'P'; + 'Q';'R';'S';'T';'U';'V';'W';'X';'Y';'Z';'a';'b';'c';'d';'e';'f'; + 'g';'h';'i';'j';'k';'l';'m';'n';'o';'p';'q';'r';'s';'t';'u';'v'; + 'w';'x';'y';'z';'0';'1';'2';'3';'4';'5';'6';'7';'8';'9';'+';'/' +|] + +let make_decoding_table tbl = + if Array.length tbl <> 64 then raise Invalid_table; + let d = Array.make 256 (-1) in + for i = 0 to 63 do + Array.unsafe_set d (int_of_char (Array.unsafe_get tbl i)) i; + done; + d + +let inv_chars = make_decoding_table chars + +let encode ?(tbl=chars) ch = + if Array.length tbl <> 64 then raise Invalid_table; + let data = ref 0 in + let count = ref 0 in + let flush() = + if !count > 0 then begin + let d = (!data lsl (6 - !count)) land 63 in + IO.write ch (Array.unsafe_get tbl d); + end; + in + let write c = + let c = int_of_char c in + data := (!data lsl 8) lor c; + count := !count + 8; + while !count >= 6 do + count := !count - 6; + let d = (!data asr !count) land 63 in + IO.write ch (Array.unsafe_get tbl d) + done; + in + let output s p l = + for i = p to p + l - 1 do + write (Bytes.unsafe_get s i) + done; + l + in + IO.create_out ~write ~output + ~flush:(fun () -> flush(); IO.flush ch) + ~close:(fun() -> flush(); IO.close_out ch) + +let decode ?(tbl=inv_chars) ch = + if Array.length tbl <> 256 then raise Invalid_table; + let data = ref 0 in + let count = ref 0 in + let rec fetch() = + if !count >= 8 then begin + count := !count - 8; + let d = (!data asr !count) land 0xFF in + unsafe_char_of_int d + end else + let c = int_of_char (IO.read ch) in + let c = Array.unsafe_get tbl c in + if c = -1 then raise Invalid_char; + data := (!data lsl 6) lor c; + count := !count + 6; + fetch() + in + let read = fetch in + let input s p l = + let i = ref 0 in + try + while !i < l do + Bytes.unsafe_set s (p + !i) (fetch()); + incr i; + done; + l + with + IO.No_more_input when !i > 0 -> + !i + in + let close() = + count := 0; + IO.close_in ch + in + IO.create_in ~read ~input ~close + +let str_encode ?(tbl=chars) s = + let ch = encode ~tbl (IO.output_bytes()) in + IO.nwrite_string ch s; + IO.close_out ch + +let str_decode ?(tbl=inv_chars) s = + let ch = decode ~tbl (IO.input_bytes s) in + IO.nread_string ch ((Bytes.length s * 6) / 8) + +let encode_string ?(tbl=chars) s = + let ch = encode ~tbl (IO.output_string ()) in + IO.nwrite_string ch s; + IO.close_out ch + +let decode_string ?(tbl=inv_chars) s = + let ch = decode ~tbl (IO.input_string s) in + IO.nread_string ch ((String.length s * 6) / 8) diff --git a/src/base64.mli b/src/base64.mli new file mode 100644 index 0000000..bc40942 --- /dev/null +++ b/src/base64.mli @@ -0,0 +1,65 @@ +(* + * Base64 - Base64 codec + * Copyright (C) 2003 Nicolas Cannasse + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version, + * with the special exception on linking described in file LICENSE. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + *) + +(** Base64 codec. + + 8-bit characters are encoded into 6-bit ones using ASCII lookup tables. + Default tables maps 0..63 values on characters A-Z, a-z, 0-9, '+' and '/' + (in that order). +*) + +open ExtBytes + +(** This exception is raised when reading an invalid character + from a base64 input. *) +exception Invalid_char + +(** This exception is raised if the encoding or decoding table + size is not correct. *) +exception Invalid_table + +(** An encoding table maps integers 0..63 to the corresponding char. *) +type encoding_table = char array + +(** A decoding table maps chars 0..255 to the corresponding 0..63 value + or -1 if the char is not accepted. *) +type decoding_table = int array + +(** erroneous interface, kept for compatibility use [encode_string] instead *) +val str_encode : ?tbl:encoding_table -> string -> Bytes.t + +(** erroneous interface, kept for compatibility use [decode_string] instead *) +val str_decode : ?tbl:decoding_table -> Bytes.t -> string + +(** Encode a string into Base64. *) +val encode_string : ?tbl:encoding_table -> string -> string + +(** Decode a string encoded into Base64, raise [Invalid_char] if a + character in the input string is not a valid one. *) +val decode_string : ?tbl:decoding_table -> string -> string + +(** Generic base64 encoding over an output. *) +val encode : ?tbl:encoding_table -> 'a IO.output -> 'a IO.output + +(** Generic base64 decoding over an input. *) +val decode : ?tbl:decoding_table -> IO.input -> IO.input + +(** Create a valid decoding table from an encoding one. *) +val make_decoding_table : encoding_table -> decoding_table diff --git a/src/bitSet.ml b/src/bitSet.ml new file mode 100644 index 0000000..653e663 --- /dev/null +++ b/src/bitSet.ml @@ -0,0 +1,326 @@ +(* + * Bitset - Efficient bit sets + * Copyright (C) 2003 Nicolas Cannasse + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version, + * with the special exception on linking described in file LICENSE. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + *) + +open ExtBytes + +type intern + +let bcreate : int -> intern = Obj.magic Bytes.create +external fast_get : intern -> int -> int = "%string_unsafe_get" +let fast_set : intern -> int -> int -> unit = Obj.magic Bytes.unsafe_set +external fast_bool : int -> bool = "%identity" +let fast_blit : intern -> int -> intern -> int -> int -> unit = Obj.magic Bytes.blit +let fast_fill : intern -> int -> int -> int -> unit = Obj.magic Bytes.fill +let fast_length : intern -> int= Obj.magic Bytes.length + +let bget s ndx = + assert (ndx >= 0 && ndx < fast_length s); + fast_get s ndx + +let bset s ndx v = + assert (ndx >= 0 && ndx < fast_length s); + fast_set s ndx v + +let bblit src srcoff dst dstoff len = + assert (srcoff >= 0 && dstoff >= 0 && len >= 0); + fast_blit src srcoff dst dstoff len + +let bfill dst start len c = + assert (start >= 0 && len >= 0); + fast_fill dst start len c + +exception Negative_index of string + +type t = { + mutable data : intern; + mutable len : int; +} + +let error fname = raise (Negative_index fname) + +let empty() = + { + data = bcreate 0; + len = 0; + } + +let int_size = 7 (* value used to round up index *) +let log_int_size = 3 (* number of shifts *) + +let create n = + if n < 0 then error "create"; + let size = (n+int_size) lsr log_int_size in + let b = bcreate size in + bfill b 0 size 0; + { + data = b; + len = size; + } + +let copy t = + let b = bcreate t.len in + bblit t.data 0 b 0 t.len; + { + data = b; + len = t.len + } + +let clone = copy + +let set t x = + if x < 0 then error "set"; + let pos = x lsr log_int_size and delta = x land int_size in + let size = t.len in + if pos >= size then begin + let b = bcreate (pos+1) in + bblit t.data 0 b 0 size; + bfill b size (pos - size + 1) 0; + t.len <- pos + 1; + t.data <- b; + end; + bset t.data pos ((bget t.data pos) lor (1 lsl delta)) + +let unset t x = + if x < 0 then error "unset"; + let pos = x lsr log_int_size and delta = x land int_size in + if pos < t.len then + bset t.data pos ((bget t.data pos) land (0xFF lxor (1 lsl delta))) + +let toggle t x = + if x < 0 then error "toggle"; + let pos = x lsr log_int_size and delta = x land int_size in + let size = t.len in + if pos >= size then begin + let b = bcreate (pos+1) in + bblit t.data 0 b 0 size; + bfill b size (pos - size + 1) 0; + t.len <- pos + 1; + t.data <- b; + end; + bset t.data pos ((bget t.data pos) lxor (1 lsl delta)) + +let put t = function + | true -> set t + | false -> unset t + +let is_set t x = + if x < 0 then error "is_set"; + let pos = x lsr log_int_size and delta = x land int_size in + let size = t.len in + if pos < size then + fast_bool (((bget t.data pos) lsr delta) land 1) + else + false + + +exception Break_int of int + +(* Find highest set element or raise Not_found *) +let find_msb t = + (* Find highest set bit in a byte. Does not work with zero. *) + let byte_msb b = + assert (b <> 0); + let rec loop n = + if b land (1 lsl n) = 0 then + loop (n-1) + else n in + loop 7 in + let n = t.len - 1 + and buf = t.data in + try + for i = n downto 0 do + let byte = bget buf i in + if byte <> 0 then raise (Break_int ((i lsl log_int_size)+(byte_msb byte))) + done; + raise Not_found + with + Break_int n -> n + | _ -> raise Not_found + +let compare t1 t2 = + let some_msb b = try Some (find_msb b) with Not_found -> None in + match (some_msb t1, some_msb t2) with + (None, Some _) -> -1 (* 0-y -> -1 *) + | (Some _, None) -> 1 (* x-0 -> 1 *) + | (None, None) -> 0 (* 0-0 -> 0 *) + | (Some a, Some b) -> (* x-y *) + if a < b then -1 + else if a > b then 1 + else + begin + (* MSBs differ, we need to scan arrays until we find a + difference *) + let ndx = a lsr log_int_size in + assert (ndx < t1.len && ndx < t2.len); + try + for i = ndx downto 0 do + let b1 = bget t1.data i + and b2 = bget t2.data i in + if b1 <> b2 then raise (Break_int (compare b1 b2)) + done; + 0 + with + Break_int res -> res + end + +let equals t1 t2 = + compare t1 t2 = 0 + +let partial_count t x = + let rec nbits x = + if x = 0 then + 0 + else if fast_bool (x land 1) then + 1 + (nbits (x lsr 1)) + else + nbits (x lsr 1) + in + let size = t.len in + let pos = x lsr log_int_size and delta = x land int_size in + let rec loop n acc = + if n = size then + acc + else + let x = bget t.data n in + loop (n+1) (acc + nbits x) + in + if pos >= size then + 0 + else + loop (pos+1) (nbits ((bget t.data pos) lsr delta)) + +let count t = + partial_count t 0 + +(* Find the first set bit in the bit array *) +let find_first_set b n = + (* TODO there are many ways to speed this up. Lookup table would be + one way to speed this up. *) + let find_lsb b = + assert (b <> 0); + let rec loop n = + if b land (1 lsl n) <> 0 then n else loop (n+1) in + loop 0 in + + let buf = b.data in + let rec find_bit byte_ndx bit_offs = + if byte_ndx >= b.len then + None + else + let byte = (bget buf byte_ndx) lsr bit_offs in + if byte = 0 then + find_bit (byte_ndx + 1) 0 + else + Some ((find_lsb byte) + (byte_ndx lsl log_int_size) + bit_offs) in + find_bit (n lsr log_int_size) (n land int_size) + +let enum t = + let rec make n = + let cur = ref n in + let rec next () = + match find_first_set t !cur with + Some elem -> + cur := (elem+1); + elem + | None -> + raise Enum.No_more_elements in + Enum.make + ~next + ~count:(fun () -> partial_count t !cur) + ~clone:(fun () -> make !cur) + in + make 0 + +let raw_create size = + let b = bcreate size in + bfill b 0 size 0; + { data = b; len = size } + +let inter a b = + let max_size = max a.len b.len in + let d = raw_create max_size in + let sl = min a.len b.len in + let abuf = a.data + and bbuf = b.data in + (* Note: rest of the array is set to zero automatically *) + for i = 0 to sl-1 do + bset d.data i ((bget abuf i) land (bget bbuf i)) + done; + d + +(* Note: rest of the array is handled automatically correct, since we + took a copy of the bigger set. *) +let union a b = + let d = if a.len > b.len then copy a else copy b in + let sl = min a.len b.len in + let abuf = a.data + and bbuf = b.data in + for i = 0 to sl-1 do + bset d.data i ((bget abuf i) lor (bget bbuf i)) + done; + d + +let diff a b = + let maxlen = max a.len b.len in + let buf = bcreate maxlen in + bblit a.data 0 buf 0 a.len; + let sl = min a.len b.len in + let abuf = a.data + and bbuf = b.data in + for i = 0 to sl-1 do + bset buf i ((bget abuf i) land (lnot (bget bbuf i))) + done; + { data = buf; len = maxlen } + +let sym_diff a b = + let maxlen = max a.len b.len in + let buf = bcreate maxlen in + (* Copy larger (assumes missing bits are zero) *) + bblit (if a.len > b.len then a.data else b.data) 0 buf 0 maxlen; + let sl = min a.len b.len in + let abuf = a.data + and bbuf = b.data in + for i = 0 to sl-1 do + bset buf i ((bget abuf i) lxor (bget bbuf i)) + done; + { data = buf; len = maxlen } + +(* TODO the following set operations can be made faster if you do the + set operation in-place instead of taking a copy. But be careful + when the sizes of the bitvector strings differ. *) +let intersect t t' = + let d = inter t t' in + t.data <- d.data; + t.len <- d.len + +let differentiate t t' = + let d = diff t t' in + t.data <- d.data; + t.len <- d.len + +let unite t t' = + let d = union t t' in + t.data <- d.data; + t.len <- d.len + +let differentiate_sym t t' = + let d = sym_diff t t' in + t.data <- d.data; + t.len <- d.len diff --git a/src/bitSet.mli b/src/bitSet.mli new file mode 100644 index 0000000..aeff7f7 --- /dev/null +++ b/src/bitSet.mli @@ -0,0 +1,101 @@ +(* + * Bitset - Efficient bit sets + * Copyright (C) 2003 Nicolas Cannasse + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version, + * with the special exception on linking described in file LICENSE. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + *) + +(** Efficient bit sets. + + A bitset is an array of boolean values that can be accessed with indexes + like an array but provides a better memory usage (divided by 8) for a + very small speed trade-off. *) + +type t + +exception Negative_index of string +(** When a negative bit value is used for one of the BitSet functions, + this exception is raised with the name of the function. *) + +val empty : unit -> t +(** Create an empty bitset of size 0, the bitset will automatically expand + when needed. *) + +val create : int -> t +(** Create an empty bitset with an initial size (in number of bits). *) + +val copy : t -> t +(** Copy a bitset : further modifications of first one will not affect the + copy. *) + +val clone : t -> t +(** Same as [copy] *) + +val set : t -> int -> unit +(** [set s n] sets the nth-bit in the bitset [s] to true. *) + +val unset : t -> int -> unit +(** [unset s n] sets the nth-bit in the bitset [s] to false. *) + +val put : t -> bool -> int -> unit +(** [put s v n] sets the nth-bit in the bitset [s] to [v]. *) + +val toggle : t -> int -> unit +(** [toggle s n] changes the nth-bit value in the bitset [s]. *) + +val is_set : t -> int -> bool +(** [is_set s n] returns true if nth-bit in the bitset [s] is set, + or false otherwise. *) + +val compare : t -> t -> int +(** [compare s1 s2] compares two bitsets. Highest bit indexes are + compared first. *) + +val equals : t -> t -> bool +(** [equals s1 s2] returns true if, and only if, all bits values in s1 are + the same as in s2. *) + +val count : t -> int +(** [count s] returns the number of bits set in the bitset [s]. *) + +val enum : t -> int Enum.t +(** [enum s] returns an enumeration of bits which are set + in the bitset [s]. *) + +val intersect : t -> t -> unit +(** [intersect s t] sets [s] to the intersection of the sets [s] and [t]. *) + +val unite : t -> t -> unit +(** [unite s t] sets [s] to the union of the sets [s] and [t]. *) + +val differentiate : t -> t -> unit +(** [differentiate s t] removes the elements of [t] from [s]. *) + +val differentiate_sym : t -> t -> unit +(** [differentiate_sym s t] sets [s] to the symmetrical difference of the + sets [s] and [t]. *) + +val inter : t -> t -> t +(** [inter s t] returns the intersection of sets [s] and [t]. *) + +val union : t -> t -> t +(** [union s t] return the union of sets [s] and [t]. *) + +val diff : t -> t -> t +(** [diff s t] returns [s]-[t]. *) + +val sym_diff : t -> t -> t +(** [sym_diff s t] returns the symmetrical difference of [s] and [t]. *) diff --git a/src/configure.ml b/src/configure.ml new file mode 100644 index 0000000..f13d63e --- /dev/null +++ b/src/configure.ml @@ -0,0 +1,18 @@ +open Printf + +let show_bytes s = + let (_:int) = Sys.command (sprintf "ocamlfind query -format %s bytes" (Filename.quote s)) in () + +let () = + match Sys.argv with + | [|_;"-cppo-args"|] -> + let version = Scanf.sscanf Sys.ocaml_version "%d.%d." (fun major minor -> major * 100 + minor) in + printf "-D \\\"OCAML %d\\\"\n" version; + print_endline (if Sys.word_size = 32 then "-D WORD_SIZE_32 " else ""); + show_bytes "-D WITH_BYTES"; + exit 0 + | [|_;"-compile-args"|] -> + if Sys.ocaml_version >= "4.00.0" then print_endline "-bin-annot"; + show_bytes "-package bytes"; + exit 0 + | _ -> failwith "not gonna happen" diff --git a/src/dllist.ml b/src/dllist.ml new file mode 100644 index 0000000..cd21e67 --- /dev/null +++ b/src/dllist.ml @@ -0,0 +1,287 @@ +(* + * Dllist- a mutable, circular, doubly linked list library + * Copyright (C) 2004 Brian Hurt, Jesse Guardiani + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version, + * with the special exception on linking described in file LICENSE. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + *) + +type 'a node_t = { + mutable data : 'a; + mutable next : 'a node_t; + mutable prev : 'a node_t +} + +type 'a enum_t = { + mutable curr : 'a node_t; + mutable valid : bool +} + +exception Empty + +let create x = let rec nn = { data = x; next = nn; prev = nn} in nn + +let length node = + let rec loop cnt n = + if n == node then + cnt + else + loop (cnt + 1) n.next + in + loop 1 node.next + +let add node elem = + let nn = { data = elem; next = node.next; prev = node } in + node.next.prev <- nn; + node.next <- nn + +let append node elem = + let nn = { data = elem; next = node.next; prev = node } in + node.next.prev <- nn; + node.next <- nn; + nn + +let prepend node elem = + let nn = { data = elem; next = node; prev = node.prev } in + node.prev.next <- nn; + node.prev <- nn; + nn + +let promote node = + let next = node.next in + let prev = node.prev in + if next != prev then begin + next.next.prev <- node; + node.next <- next.next; + node.prev <- next; + next.next <- node; + next.prev <- prev; + prev.next <- next + end + +let demote node = + let next = node.next in + let prev = node.prev in + if next != prev then begin + prev.prev.next <- node; + node.prev <- prev.prev; + node.next <- prev; + prev.prev <- node; + prev.next <- next; + next.prev <- prev + end + +let remove node = + let next = node.next in + let prev = node.prev in + prev.next <- next; + next.prev <- prev; + node.next <- node; + node.prev <- node + +let drop node = + let next = node.next in + let prev = node.prev in + prev.next <- next; + next.prev <- prev; + node.next <- node; + node.prev <- node; + next + +let rev_drop node = + let next = node.next in + let prev = node.prev in + prev.next <- next; + next.prev <- prev; + node.next <- node; + node.prev <- node; + prev + +let splice node1 node2 = + let next = node1.next in + let prev = node2.prev in + node1.next <- node2; + node2.prev <- node1; + next.prev <- prev; + prev.next <- next + +let set node data = node.data <- data + +let get node = node.data + +let next node = node.next + +let prev node = node.prev + +let skip node idx = + let m = if idx > 0 then -1 else 1 in + let rec loop idx n = + if idx == 0 then + n + else + loop (idx + m) n.next + in + loop idx node + +let rev node = + let rec loop next n = + begin + let prev = n.prev in + n.next <- prev; + n.prev <- next; + + if n != node then + loop n prev + end + in + loop node node.prev + +let iter f node = + let () = f node.data in + let rec loop n = + if n != node then + let () = f n.data in + loop n.next + in + loop node.next + +let fold_left f init node = + let rec loop accu n = + if n == node then + accu + else + loop (f accu n.data) n.next + in + loop (f init node.data) node.next + +let fold_right f node init = + let rec loop accu n = + if n == node then + f n.data accu + else + loop (f n.data accu) n.prev + in + loop init node.prev + +let map f node = + let first = create (f node.data) in + let rec loop last n = + if n == node then + begin + first.prev <- last; + first + end + else + begin + let nn = { data = f n.data; next = first; prev = last } in + last.next <- nn; + loop nn n.next + end + in + loop first node.next + +let copy node = map (fun x -> x) node + +let to_list node = fold_right (fun d l -> d::l) node [] + +let of_list lst = + match lst with + | [] -> raise Empty + | h :: t -> + let first = create h in + let rec loop last = function + | [] -> + last.next <- first; + first.prev <- last; + first + | h :: t -> + let nn = { data = h; next = first; prev = last } in + last.next <- nn; + loop nn t + in + loop first t + +let enum node = + let next e () = + if e.valid == false then + raise Enum.No_more_elements + else + begin + let rval = e.curr.data in + e.curr <- e.curr.next; + + if (e.curr == node) then + e.valid <- false; + rval + end + and count e () = + if e.valid == false then + 0 + else + let rec loop cnt n = + if n == node then + cnt + else + loop (cnt + 1) (n.next) + in + loop 1 (e.curr.next) + in + let rec clone e () = + let e' = { curr = e.curr; valid = e.valid } in + Enum.make ~next:(next e') ~count:(count e') ~clone:(clone e') + in + let e = { curr = node; valid = true } in + Enum.make ~next:(next e) ~count:(count e) ~clone:(clone e) + +let rev_enum node = + let prev e () = + if e.valid == false then + raise Enum.No_more_elements + else + begin + let rval = e.curr.data in + e.curr <- e.curr.prev; + + if (e.curr == node) then + e.valid <- false; + rval + end + and count e () = + if e.valid == false then + 0 + else + let rec loop cnt n = + if n == node then + cnt + else + loop (cnt + 1) (n.prev) + in + loop 1 (e.curr.prev) + in + let rec clone e () = + let e' = { curr = e.curr; valid = e.valid } in + Enum.make ~next:(prev e') ~count:(count e') ~clone:(clone e') + in + let e = { curr = node; valid = true } in + Enum.make ~next:(prev e) ~count:(count e) ~clone:(clone e) + +let of_enum enm = + match Enum.get enm with + | None -> raise Empty + | Some(d) -> + let first = create d in + let f d n = append n d in + ignore(Enum.fold f first enm); + first diff --git a/src/dllist.mli b/src/dllist.mli new file mode 100644 index 0000000..0e0990e --- /dev/null +++ b/src/dllist.mli @@ -0,0 +1,182 @@ +(* + * Dllist- a mutable, circular, doubly linked list library + * Copyright (C) 2004 Brian Hurt, Jesse Guardiani + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version, + * with the special exception on linking described in file LICENSE. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + *) + +(** A mutable, imperative, circular, doubly linked list library + + This module implements a doubly linked list in a mutable or imperitive + style (changes to the list are visible to all copies of the list). +*) + + +type 'a node_t (* abstract *) + +exception Empty + +(** {6 node functions } *) + +(** Creates a node. This is an O(1) operation. *) +val create : 'a -> 'a node_t + +(** Copy the list attached to the given node and return the copy of the given + node. This is an O(N) operation. +*) +val copy : 'a node_t -> 'a node_t + +(** Returns the length of the list. This is an O(N) operation. *) +val length : 'a node_t -> int + +(** List reversal. This is an O(N) operation. +*) +val rev : 'a node_t -> unit + +(** [add n a] Creates a new node containing data [a] and inserts it into + the list after node [n]. This is an O(1) operation. +*) +val add : 'a node_t -> 'a -> unit + +(** [append n a] Creates a new node containing data [a] and inserts it into + the list after node [n]. Returns new node. This is an O(1) operation. +*) +val append : 'a node_t -> 'a -> 'a node_t + +(** [prepend n a] Creates a new node containing data [a] and inserts it into + the list before node [n]. Returns new node. This is an O(1) operation. +*) +val prepend : 'a node_t -> 'a -> 'a node_t + +(** [promote n] Swaps [n] with [next n]. This is an O(1) operation. +*) +val promote : 'a node_t -> unit + +(** [demote n] Swaps [n] with [prev n]. This is an O(1) operation. +*) +val demote : 'a node_t -> unit + +(** Remove node from the list no matter where it is. This is an O(1) operation. +*) +val remove : 'a node_t -> unit + +(** Remove node from the list no matter where it is. Return next node. This is + an O(1) operation. +*) +val drop : 'a node_t -> 'a node_t + +(** Remove node from the list no matter where it is. Return previous node. This + is an O(1) operation. +*) +val rev_drop : 'a node_t -> 'a node_t + +(** [splice n1 n2] Connects [n1] and [n2] so that + [next n1 == n2 && prev n2 == n1]. This can be used to connect two discrete + lists, or, if used on two nodes within the same list, it can be used to + separate the nodes between [n1] and [n2] from the rest of the list. In this + case, those nodes become a discrete list by themselves. This is an O(1) + operation. +*) +val splice : 'a node_t -> 'a node_t -> unit + +(** Given a node, get the data associated with that node. This is an + O(1) operation. +*) +val get : 'a node_t -> 'a + +(** Given a node, set the data associated with that node. This is an O(1) + operation. +*) +val set : 'a node_t -> 'a -> unit + +(** Given a node, get the next element in the list after the node. + + The list is circular, so the last node of the list returns the first + node of the list as it's next node. + + This is an O(1) operation. +*) +val next : 'a node_t -> 'a node_t + +(** Given a node, get the previous element in the list before the node. + + The list is circular, so the first node of the list returns the + last element of the list as it's previous node. + + This is an O(1) operation. +*) +val prev : 'a node_t -> 'a node_t + +(** [skip n i] Return the node that is [i] nodes after node [n] in the list. + If [i] is negative then return the node that is [i] nodes before node [n] + in the list. This is an O(N) operation. +*) +val skip : 'a node_t -> int -> 'a node_t + +(** [iter f n] Apply [f] to every element in the list, starting at [n]. This + is an O(N) operation. +*) +val iter : ('a -> unit) -> 'a node_t -> unit + +(** Accumulate a value over the entire list. + This works like List.fold_left. This is an O(N) operation. +*) +val fold_left : ('a -> 'b -> 'a) -> 'a -> 'b node_t -> 'a + +(** Accumulate a value over the entire list. + This works like List.fold_right, but since the list is bidirectional, + it doesn't suffer the performance problems of List.fold_right. + This is an O(N) operation. +*) +val fold_right : ('a -> 'b -> 'b) -> 'a node_t -> 'b -> 'b + +(** Allocate a new list, with entirely new nodes, whose values are + the transforms of the values of the original list. Note that this + does not modify the given list. This is an O(N) operation. +*) +val map : ('a -> 'b) -> 'a node_t -> 'b node_t + + +(** {6 list conversion } *) + +(** Converts a dllist to a normal list. This is an O(N) operation. *) +val to_list : 'a node_t -> 'a list + +(** Converts from a normal list to a Dllist and returns the first node. Raises + [Empty] if given list is empty. This is an O(N) operation. +*) +val of_list : 'a list -> 'a node_t + + +(** {6 enums } *) + +(** Create an enum of the list. + Note that modifying the list while the enum exists will have undefined + effects. This is an O(1) operation. +*) +val enum : 'a node_t -> 'a Enum.t + +(** Create a reverse enum of the list. + Note that modifying the list while the enum exists will have undefined + effects. This is an O(1) operation. +*) +val rev_enum : 'a node_t -> 'a Enum.t + +(** Create a dllist from an enum. + This consumes the enum, and allocates a whole new dllist. Raises + [Empty] if given enum is empty. This is an O(N) operation. +*) +val of_enum : 'a Enum.t -> 'a node_t diff --git a/src/doc/style.css b/src/doc/style.css new file mode 100644 index 0000000..e1db9ad --- /dev/null +++ b/src/doc/style.css @@ -0,0 +1,24 @@ +body { padding: 0px 20px 0px 26px; background: #ffffff; color: #000000; font-family: Verdana, Arial, Helvetica, sans-serif; font-size: 90%; } +h1 { padding : 5px 0px 5px 0px; font-size : 16pt; font-weight : normal; background-color : #E0E0FF } +h6 { padding : 5px 0px 5px 20px; font-size : 16pt; font-weight : normal; background-color : #E0E0FF } +a:link, a:visited, a:active { text-decoration: none; } +a:link { color: #000077; } +a:visited { color: #000077; } +a:hover { color: #cc9900; } +.keyword { font-weight : bold ; color : Blue } +.keywordsign { color : #606060 } +.superscript { font-size : 4 } +.subscript { font-size : 4 } +.comment { color : #606060 } +.constructor { color : #808080; } +.type { color : #606060 } +.string { color : Red } +.warning { color : Red ; font-weight : bold } +.info { margin-left : 3em; margin-right : 3em } +.code { color : #606060 ; } +.title1 { font-size : 16pt ; background-color : #E0E0E0 } +.title2 { font-size : 16pt ; background-color : #E0E0E0 } +.title3 { font-size : 16pt ; background-color : #E0E0E0 } +.title4 { font-size : 16pt ; background-color : #E0E0E0 } +.title5 { font-size : 16pt ; background-color : #E0E0E0 } +.title6 { font-size : 16pt ; background-color : #E0E0E0; } \ No newline at end of file diff --git a/src/dynArray.ml b/src/dynArray.ml new file mode 100644 index 0000000..02c3f49 --- /dev/null +++ b/src/dynArray.ml @@ -0,0 +1,451 @@ +(* + * DynArray - Resizeable Ocaml arrays + * Copyright (C) 2003 Brian Hurt + * Copyright (C) 2003 Nicolas Cannasse + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version, + * with the special exception on linking described in file LICENSE. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + *) + +type resizer_t = currslots:int -> oldlength:int -> newlength:int -> int + +type 'a intern + +external ilen : 'a intern -> int = "%obj_size" +let idup (x : 'a intern) = if ilen x = 0 then x else (Obj.magic (Obj.dup (Obj.repr x)) : 'a intern) +let imake tag len = (Obj.magic (Obj.new_block tag len) : 'a intern) +external iget : 'a intern -> int -> 'a = "%obj_field" +external iset : 'a intern -> int -> 'a -> unit = "%obj_set_field" + +type 'a t = { + mutable arr : 'a intern; + mutable len : int; + mutable resize: resizer_t; +} + +exception Invalid_arg of int * string * string + +let invalid_arg n f p = raise (Invalid_arg (n,f,p)) + +let length d = d.len + +let exponential_resizer ~currslots ~oldlength ~newlength = + let rec doubler x = if x >= newlength then x else doubler (x * 2) in + let rec halfer x = if x / 2 < newlength then x else halfer (x / 2) in + if newlength = 1 then + 1 + else if currslots = 0 then + doubler 1 + else if currslots < newlength then + doubler currslots + else + halfer currslots + +let step_resizer step = + if step <= 0 then invalid_arg step "step_resizer" "step"; + (fun ~currslots ~oldlength ~newlength -> + if currslots < newlength || newlength < (currslots - step) + then + (newlength + step - (newlength mod step)) + else + currslots) + +let conservative_exponential_resizer ~currslots ~oldlength ~newlength = + let rec doubler x = if x >= newlength then x else doubler (x * 2) in + let rec halfer x = if x / 2 < newlength then x else halfer (x / 2) in + if currslots < newlength then begin + if newlength = 1 then + 1 + else if currslots = 0 then + doubler 1 + else + doubler currslots + end else if oldlength < newlength then + halfer currslots + else + currslots + +let default_resizer = conservative_exponential_resizer + +let changelen (d : 'a t) newlen = + if newlen > Sys.max_array_length then invalid_arg newlen "changelen" "newlen"; + + let oldsize = ilen d.arr in + let r = d.resize + ~currslots:oldsize + ~oldlength:d.len + ~newlength:newlen + in + (* We require the size to be at least large enough to hold the number + * of elements we know we need! + * Also be sure not to exceed max_array_length + *) + let newsize = if r < newlen then newlen else min Sys.max_array_length r in + if newsize <> oldsize then begin + let newarr = imake 0 newsize in + let cpylen = (if newlen < d.len then newlen else d.len) in + for i = 0 to cpylen - 1 do + iset newarr i (iget d.arr i); + done; + d.arr <- newarr; + end; + d.len <- newlen + +let compact d = + if d.len <> ilen d.arr then begin + let newarr = imake 0 d.len in + for i = 0 to d.len - 1 do + iset newarr i (iget d.arr i) + done; + d.arr <- newarr; + end + +let create() = + { + resize = default_resizer; + len = 0; + arr = imake 0 0; + } + +let make initsize = + if initsize < 0 then invalid_arg initsize "make" "size"; + { + resize = default_resizer; + len = 0; + arr = imake 0 initsize; + } + +let init initlen f = + if initlen < 0 then invalid_arg initlen "init" "len"; + let arr = imake 0 initlen in + for i = 0 to initlen-1 do + iset arr i (f i) + done; + { + resize = default_resizer; + len = initlen; + arr = arr; + } + +let set_resizer d resizer = + d.resize <- resizer + +let get_resizer d = + d.resize + +let empty d = + d.len = 0 + +let get d idx = + if idx < 0 || idx >= d.len then invalid_arg idx "get" "index"; + iget d.arr idx + +let last d = + if d.len = 0 then invalid_arg 0 "last" ""; + iget d.arr (d.len - 1) + +let set d idx v = + if idx < 0 || idx >= d.len then invalid_arg idx "set" "index"; + iset d.arr idx v + +let insert d idx v = + if idx < 0 || idx > d.len then invalid_arg idx "insert" "index"; + if d.len = ilen d.arr then changelen d (d.len + 1) else d.len <- d.len + 1; + if idx < d.len - 1 then begin + for i = d.len - 2 downto idx do + iset d.arr (i+1) (iget d.arr i) + done; + end; + iset d.arr idx v + +let add d v = + if d.len = ilen d.arr then changelen d (d.len + 1) else d.len <- d.len + 1; + iset d.arr (d.len - 1) v + +let delete d idx = + if idx < 0 || idx >= d.len then invalid_arg idx "delete" "index"; + let oldsize = ilen d.arr in + (* we don't call changelen because we want to blit *) + let r = d.resize + ~currslots:oldsize + ~oldlength:d.len + ~newlength:(d.len - 1) + in + let newsize = (if r < d.len - 1 then d.len - 1 else r) in + if oldsize <> newsize then begin + let newarr = imake 0 newsize in + for i = 0 to idx - 1 do + iset newarr i (iget d.arr i); + done; + for i = idx to d.len - 2 do + iset newarr i (iget d.arr (i+1)); + done; + d.arr <- newarr; + end else begin + for i = idx to d.len - 2 do + iset d.arr i (iget d.arr (i+1)); + done; + iset d.arr (d.len - 1) (Obj.magic 0) + end; + d.len <- d.len - 1 + + +let delete_range d idx len = + if len < 0 then invalid_arg len "delete_range" "length"; + if idx < 0 || idx + len > d.len then invalid_arg idx "delete_range" "index"; + let oldsize = ilen d.arr in + (* we don't call changelen because we want to blit *) + let r = d.resize + ~currslots:oldsize + ~oldlength:d.len + ~newlength:(d.len - len) + in + let newsize = (if r < d.len - len then d.len - len else r) in + if oldsize <> newsize then begin + let newarr = imake 0 newsize in + for i = 0 to idx - 1 do + iset newarr i (iget d.arr i); + done; + for i = idx to d.len - len - 1 do + iset newarr i (iget d.arr (i+len)); + done; + d.arr <- newarr; + end else begin + for i = idx to d.len - len - 1 do + iset d.arr i (iget d.arr (i+len)); + done; + for i = d.len - len to d.len - 1 do + iset d.arr i (Obj.magic 0) + done; + end; + d.len <- d.len - len + +let clear d = + d.len <- 0; + d.arr <- imake 0 0 + +let delete_last d = + if d.len <= 0 then invalid_arg 0 "delete_last" ""; + (* erase for GC, in case changelen don't resize our array *) + iset d.arr (d.len - 1) (Obj.magic 0); + changelen d (d.len - 1) + +let rec blit src srcidx dst dstidx len = + if len < 0 then invalid_arg len "blit" "len"; + if srcidx < 0 || srcidx + len > src.len then invalid_arg srcidx "blit" "source index"; + if dstidx < 0 || dstidx > dst.len then invalid_arg dstidx "blit" "dest index"; + let newlen = dstidx + len in + if newlen > ilen dst.arr then begin + (* this case could be inlined so we don't blit on just-copied elements *) + changelen dst newlen + end else begin + if newlen > dst.len then dst.len <- newlen; + end; + (* same array ! we need to copy in reverse order *) + if src.arr == dst.arr && dstidx > srcidx then + for i = len - 1 downto 0 do + iset dst.arr (dstidx+i) (iget src.arr (srcidx+i)); + done + else + for i = 0 to len - 1 do + iset dst.arr (dstidx+i) (iget src.arr (srcidx+i)); + done + +let append src dst = + blit src 0 dst dst.len src.len + +let to_list d = + let rec loop idx accum = + if idx < 0 then accum else loop (idx - 1) (iget d.arr idx :: accum) + in + loop (d.len - 1) [] + +let to_array d = + if d.len = 0 then begin + (* since the empty array is an atom, we don't care if float or not *) + [||] + end else begin + let arr = Array.make d.len (iget d.arr 0) in + for i = 1 to d.len - 1 do + Array.unsafe_set arr i (iget d.arr i) + done; + arr; + end + +let of_list lst = + let size = List.length lst in + let arr = imake 0 size in + let rec loop idx = function + | h :: t -> iset arr idx h; loop (idx + 1) t + | [] -> () + in + loop 0 lst; + { + resize = default_resizer; + len = size; + arr = arr; + } + +let of_array src = + let size = Array.length src in + let is_float = Obj.tag (Obj.repr src) = Obj.double_array_tag in + let arr = (if is_float then begin + let arr = imake 0 size in + for i = 0 to size - 1 do + iset arr i (Array.unsafe_get src i); + done; + arr + end else + (* copy the fields *) + idup (Obj.magic src : 'a intern)) + in + { + resize = default_resizer; + len = size; + arr = arr; + } + +let copy src = + { + resize = src.resize; + len = src.len; + arr = idup src.arr; + } + +let sub src start len = + if len < 0 then invalid_arg len "sub" "len"; + if start < 0 || start + len > src.len then invalid_arg start "sub" "start"; + let arr = imake 0 len in + for i = 0 to len - 1 do + iset arr i (iget src.arr (i+start)); + done; + { + resize = src.resize; + len = len; + arr = arr; + } + +let iter f d = + for i = 0 to d.len - 1 do + f (iget d.arr i) + done + +let iteri f d = + for i = 0 to d.len - 1 do + f i (iget d.arr i) + done + +let filter f d = + let l = d.len in + let a = imake 0 l in + let a2 = d.arr in + let p = ref 0 in + for i = 0 to l - 1 do + let x = iget a2 i in + if f x then begin + iset a !p x; + incr p; + end; + done; + d.len <- !p; + d.arr <- a + +let index_of f d = + let rec loop i = + if i >= d.len then + raise Not_found + else + if f (iget d.arr i) then + i + else + loop (i+1) + in + loop 0 + +let map f src = + let arr = imake 0 src.len in + for i = 0 to src.len - 1 do + iset arr i (f (iget src.arr i)) + done; + { + resize = src.resize; + len = src.len; + arr = arr; + } + +let mapi f src = + let arr = imake 0 src.len in + for i = 0 to src.len - 1 do + iset arr i (f i (iget src.arr i)) + done; + { + resize = src.resize; + len = src.len; + arr = arr; + } + +let fold_left f x a = + let rec loop idx x = + if idx >= a.len then x else loop (idx + 1) (f x (iget a.arr idx)) + in + loop 0 x + +let fold_right f a x = + let rec loop idx x = + if idx < 0 then x + else loop (idx - 1) (f (iget a.arr idx) x) + in + loop (a.len - 1) x + +let enum d = + let rec make start = + let idxref = ref 0 in + let next () = + if !idxref >= d.len then + raise Enum.No_more_elements + else + let retval = iget d.arr !idxref in + incr idxref; + retval + and count () = + if !idxref >= d.len then 0 + else d.len - !idxref + and clone () = + make !idxref + in + Enum.make ~next:next ~count:count ~clone:clone + in + make 0 + +let of_enum e = + if Enum.fast_count e then begin + let c = Enum.count e in + let arr = imake 0 c in + Enum.iteri (fun i x -> iset arr i x) e; + { + resize = default_resizer; + len = c; + arr = arr; + } + end else + let d = make 0 in + Enum.iter (add d) e; + d + +let unsafe_get a n = + iget a.arr n + +let unsafe_set a n x = + iset a.arr n x diff --git a/src/dynArray.mli b/src/dynArray.mli new file mode 100644 index 0000000..31996e0 --- /dev/null +++ b/src/dynArray.mli @@ -0,0 +1,281 @@ +(* + * DynArray - Resizeable Ocaml arrays + * Copyright (C) 2003 Brian Hurt + * Copyright (C) 2003 Nicolas Cannasse + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version, + * with the special exception on linking described in file LICENSE. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + *) + +(** Dynamic arrays. + + A dynamic array is equivalent to a OCaml array that will resize itself + when elements are added or removed, except that floats are boxed and + that no initialization element is required. +*) + +type 'a t + +exception Invalid_arg of int * string * string +(** When an operation on an array fails, [Invalid_arg] is raised. The + integer is the value that made the operation fail, the first string + contains the function name that has been called and the second string + contains the parameter name that made the operation fail. +*) + +(** {6 Array creation} *) + +val create : unit -> 'a t +(** [create()] returns a new empty dynamic array. *) + +val make : int -> 'a t +(** [make count] returns an array with some memory already allocated so + up to [count] elements can be stored into it without resizing. *) + +val init : int -> (int -> 'a) -> 'a t +(** [init n f] returns an array of [n] elements filled with values + returned by [f 0 , f 1, ... f (n-1)]. *) + +(** {6 Array manipulation functions} *) + +val empty : 'a t -> bool +(** Return true if the number of elements in the array is 0. *) + +val length : 'a t -> int +(** Return the number of elements in the array. *) + +val get : 'a t -> int -> 'a +(** [get darr idx] gets the element in [darr] at index [idx]. If [darr] has + [len] elements in it, then the valid indexes range from [0] to [len-1]. *) + +val last : 'a t -> 'a +(** [last darr] returns the last element of [darr]. *) + +val set : 'a t -> int -> 'a -> unit +(** [set darr idx v] sets the element of [darr] at index [idx] to value + [v]. The previous value is overwritten. *) + +val insert : 'a t -> int -> 'a -> unit +(** [insert darr idx v] inserts [v] into [darr] at index [idx]. All elements + of [darr] with an index greater than or equal to [idx] have their + index incremented (are moved up one place) to make room for the new + element. *) + +val add : 'a t -> 'a -> unit +(** [add darr v] appends [v] onto [darr]. [v] becomes the new + last element of [darr]. *) + +val append : 'a t -> 'a t -> unit +(** [append src dst] adds all elements of [src] to the end of [dst]. *) + +val delete : 'a t -> int -> unit +(** [delete darr idx] deletes the element of [darr] at [idx]. All elements + with an index greater than [idx] have their index decremented (are + moved down one place) to fill in the hole. *) + +val delete_last : 'a t -> unit +(** [delete_last darr] deletes the last element of [darr]. This is equivalent + of doing [delete darr ((length darr) - 1)]. *) + +val delete_range : 'a t -> int -> int -> unit +(** [delete_range darr p len] deletes [len] elements starting at index [p]. + All elements with an index greater than [p+len] are moved to fill + in the hole. *) + +val clear : 'a t -> unit +(** remove all elements from the array and resize it to 0. *) + +val blit : 'a t -> int -> 'a t -> int -> int -> unit +(** [blit src srcidx dst dstidx len] copies [len] elements from [src] + starting with index [srcidx] to [dst] starting at [dstidx]. *) + +val compact : 'a t -> unit +(** [compact darr] ensures that the space allocated by the array is minimal.*) + +(** {6 Array copy and conversion} *) + +val to_list : 'a t -> 'a list +(** [to_list darr] returns the elements of [darr] in order as a list. *) + +val to_array : 'a t -> 'a array +(** [to_array darr] returns the elements of [darr] in order as an array. *) + +val enum : 'a t -> 'a Enum.t +(** [enum darr] returns the enumeration of [darr] elements. *) + +val of_list : 'a list -> 'a t +(** [of_list lst] returns a dynamic array with the elements of [lst] in + it in order. *) + +val of_array : 'a array -> 'a t +(** [of_array arr] returns an array with the elements of [arr] in it + in order. *) + +val of_enum : 'a Enum.t -> 'a t +(** [of_enum e] returns an array that holds, in order, the elements of [e]. *) + +val copy : 'a t -> 'a t +(** [copy src] returns a fresh copy of [src], such that no modification of + [src] affects the copy, or vice versa (all new memory is allocated for + the copy). *) + +val sub : 'a t -> int -> int -> 'a t +(** [sub darr start len] returns an array holding the subset of [len] + elements from [darr] starting with the element at index [idx]. *) + +(** {6 Array functional support} *) + +val iter : ('a -> unit) -> 'a t -> unit +(** [iter f darr] calls the function [f] on every element of [darr]. It + is equivalent to [for i = 0 to length darr - 1 do f (get darr i) done;] *) + +val iteri : (int -> 'a -> unit) -> 'a t -> unit +(** [iteri f darr] calls the function [f] on every element of [darr]. It + is equivalent to [for i = 0 to length darr - 1 do f i (get darr i) done;] + *) + +val map : ('a -> 'b) -> 'a t -> 'b t +(** [map f darr] applies the function [f] to every element of [darr] + and creates a dynamic array from the results - similar to [List.map] or + [Array.map]. *) + +val mapi : (int -> 'a -> 'b) -> 'a t -> 'b t +(** [mapi f darr] applies the function [f] to every element of [darr] + and creates a dynamic array from the results - similar to [List.mapi] or + [Array.mapi]. *) + +val fold_left : ('a -> 'b -> 'a) -> 'a -> 'b t -> 'a +(** [fold_left f x darr] computes + [f ( ... ( f ( f (get darr 0) x) (get darr 1) ) ... ) (get darr n-1)], + similar to [Array.fold_left] or [List.fold_left]. *) + +val fold_right : ('a -> 'b -> 'b) -> 'a t -> 'b -> 'b +(** [fold_right f darr x] computes + [ f (get darr 0) (f (get darr 1) ( ... ( f (get darr n-1) x ) ... ) ) ] + similar to [Array.fold_right] or [List.fold_right]. *) + +val index_of : ('a -> bool) -> 'a t -> int +(** [index_of f darr] returns the index of the first element [x] in darr such + as [f x] returns [true] or raise [Not_found] if not found. *) + +val filter : ('a -> bool) -> 'a t -> unit + +(** {6 Array resizers} *) + +type resizer_t = currslots:int -> oldlength:int -> newlength:int -> int +(** The type of a resizer function. + + Resizer functions are called whenever elements are added to + or removed from the dynamic array to determine what the current number of + storage spaces in the array should be. The three named arguments + passed to a resizer are the current number of storage spaces in + the array, the length of the array before the elements are + added or removed, and the length the array will be after the + elements are added or removed. If elements are being added, newlength + will be larger than oldlength, if elements are being removed, + newlength will be smaller than oldlength. If the resizer function + returns exactly oldlength, the size of the array is only changed when + adding an element while there is not enough space for it. + + By default, all dynamic arrays are created with the [default_resizer]. + When a dynamic array is created from another dynamic array (using [copy], + [map] , etc. ) the resizer of the copy will be the same as the original + dynamic array resizer. To change the resizer, use the [set_resizer] + function. +*) + +val set_resizer : 'a t -> resizer_t -> unit +(** Change the resizer for this array. *) + +val get_resizer : 'a t -> resizer_t +(** Get the current resizer function for a given array *) + +val default_resizer : resizer_t +(** The default resizer function the library is using - in this version + of DynArray, this is the [exponential_resizer] but should change in + next versions. *) + +val exponential_resizer : resizer_t +(** The exponential resizer- The default resizer except when the resizer + is being copied from some other darray. + + [exponential_resizer] works by doubling or halving the number of + slots until they "fit". If the number of slots is less than the + new length, the number of slots is doubled until it is greater + than the new length (or Sys.max_array_size is reached). + + If the number of slots is more than four times the new length, + the number of slots is halved until it is less than four times the + new length. + + Allowing darrays to fall below 25% utilization before shrinking them + prevents "thrashing". Consider the case where the caller is constantly + adding a few elements, and then removing a few elements, causing + the length to constantly cross above and below a power of two. + Shrinking the array when it falls below 50% would causing the + underlying array to be constantly allocated and deallocated. + A few elements would be added, causing the array to be reallocated + and have a usage of just above 50%. Then a few elements would be + remove, and the array would fall below 50% utilization and be + reallocated yet again. The bulk of the array, untouched, would be + copied and copied again. By setting the threshold at 25% instead, + such "thrashing" only occurs with wild swings- adding and removing + huge numbers of elements (more than half of the elements in the array). + + [exponential_resizer] is a good performing resizer for most + applications. A list allocates 2 words for every element, while an + array (with large numbers of elements) allocates only 1 word per + element (ignoring unboxed floats). On insert, [exponential_resizer] + keeps the amount of wasted "extra" array elements below 50%, meaning + that less than 2 words per element are used. Even on removals + where the amount of wasted space is allowed to rise to 75%, that + only means that darray is using 4 words per element. This is + generally not a significant overhead. + + Furthermore, [exponential_resizer] minimizes the number of copies + needed- appending n elements into an empty darray with initial size + 0 requires between n and 2n elements of the array be copied- O(n) + work, or O(1) work per element (on average). A similar argument + can be made that deletes from the end of the array are O(1) as + well (obviously deletes from anywhere else are O(n) work- you + have to move the n or so elements above the deleted element down). + +*) + +val step_resizer : int -> resizer_t +(** The stepwise resizer- another example of a resizer function, this + time of a parameterized resizer. + + The resizer returned by [step_resizer step] returns the smallest + multiple of [step] larger than [newlength] if [currslots] is less + then [newlength]-[step] or greater than [newlength]. + + For example, to make an darray with a step of 10, a length + of len, and a null of null, you would do: + [make] ~resizer:([step_resizer] 10) len null +*) + +val conservative_exponential_resizer : resizer_t +(** [conservative_exponential_resizer] is an example resizer function + which uses the oldlength parameter. It only shrinks the array + on inserts- no deletes shrink the array, only inserts. It does + this by comparing the oldlength and newlength parameters. Other + than that, it acts like [exponential_resizer]. +*) + +(** {6 Unsafe operations} **) + +val unsafe_get : 'a t -> int -> 'a +val unsafe_set : 'a t -> int -> 'a -> unit diff --git a/src/enum.ml b/src/enum.ml new file mode 100644 index 0000000..8e4028c --- /dev/null +++ b/src/enum.ml @@ -0,0 +1,378 @@ +(* + * Enum - Enumeration over abstract collection of elements. + * Copyright (C) 2003 Nicolas Cannasse + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version, + * with the special exception on linking described in file LICENSE. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + *) + +type 'a t = { + mutable count : unit -> int; + mutable next : unit -> 'a; + mutable clone : unit -> 'a t; + mutable fast : bool; +} + +(* raised by 'next' functions, should NOT go outside the API *) +exception No_more_elements + +let _dummy () = assert false + +let make ~next ~count ~clone = + { + count = count; + next = next; + clone = clone; + fast = true; + } + +let rec init n f = + if n < 0 then invalid_arg "Enum.init"; + let count = ref n in + { + count = (fun () -> !count); + next = (fun () -> + match !count with + | 0 -> raise No_more_elements + | _ -> + decr count; + f (n - 1 - !count)); + clone = (fun () -> init !count f); + fast = true; + } + +let rec empty () = + { + count = (fun () -> 0); + next = (fun () -> raise No_more_elements); + clone = (fun () -> empty()); + fast = true; + } + +type 'a _mut_list = { + hd : 'a; + mutable tl : 'a _mut_list; +} + +let force t = + let rec clone enum count = + let enum = ref !enum + and count = ref !count in + { + count = (fun () -> !count); + next = (fun () -> + match !enum with + | [] -> raise No_more_elements + | h :: t -> decr count; enum := t; h); + clone = (fun () -> + let enum = ref !enum + and count = ref !count in + clone enum count); + fast = true; + } + in + let count = ref 0 in + let _empty = Obj.magic [] in + let rec loop dst = + let x = { hd = t.next(); tl = _empty } in + incr count; + dst.tl <- x; + loop x + in + let enum = ref _empty in + (try + enum := { hd = t.next(); tl = _empty }; + incr count; + loop !enum; + with No_more_elements -> ()); + let tc = clone (Obj.magic enum) count in + t.clone <- tc.clone; + t.next <- tc.next; + t.count <- tc.count; + t.fast <- true + +let from f = + let e = { + next = f; + count = _dummy; + clone = _dummy; + fast = false; + } in + e.count <- (fun () -> force e; e.count()); + e.clone <- (fun () -> force e; e.clone()); + e + +let from2 next clone = + let e = { + next = next; + count = _dummy; + clone = clone; + fast = false; + } in + e.count <- (fun () -> force e; e.count()); + e + +let next t = t.next () + +let get t = + try + Some (t.next()) + with + No_more_elements -> None + +let push t e = + let rec make t = + let fnext = t.next in + let fcount = t.count in + let fclone = t.clone in + let next_called = ref false in + t.next <- (fun () -> + next_called := true; + t.next <- fnext; + t.count <- fcount; + t.clone <- fclone; + e); + t.count <- (fun () -> + let n = fcount() in + if !next_called then n else n+1); + t.clone <- (fun () -> + let tc = fclone() in + if not !next_called then make tc; + tc); + in + make t + +let peek t = + match get t with + | None -> None + | Some x -> + push t x; + Some x + +let junk t = + try + ignore(t.next()) + with + No_more_elements -> () + +let is_empty t = + if t.fast then + t.count() = 0 + else + peek t = None + +let count t = + t.count() + +let fast_count t = + t.fast + +let clone t = + t.clone() + +let iter f t = + let rec loop () = + f (t.next()); + loop(); + in + try + loop(); + with + No_more_elements -> () + +let iteri f t = + let rec loop idx = + f idx (t.next()); + loop (idx+1); + in + try + loop 0; + with + No_more_elements -> () + +let iter2 f t u = + let push_t = ref None in + let rec loop () = + push_t := None; + let e = t.next() in + push_t := Some e; + f e (u.next()); + loop () + in + try + loop () + with + No_more_elements -> + match !push_t with + | None -> () + | Some e -> + push t e + +let iter2i f t u = + let push_t = ref None in + let rec loop idx = + push_t := None; + let e = t.next() in + push_t := Some e; + f idx e (u.next()); + loop (idx + 1) + in + try + loop 0 + with + No_more_elements -> + match !push_t with + | None -> () + | Some e -> push t e + +let fold f init t = + let acc = ref init in + let rec loop() = + acc := f (t.next()) !acc; + loop() + in + try + loop() + with + No_more_elements -> !acc + +let foldi f init t = + let acc = ref init in + let rec loop idx = + acc := f idx (t.next()) !acc; + loop (idx + 1) + in + try + loop 0 + with + No_more_elements -> !acc + +let fold2 f init t u = + let acc = ref init in + let push_t = ref None in + let rec loop() = + push_t := None; + let e = t.next() in + push_t := Some e; + acc := f e (u.next()) !acc; + loop() + in + try + loop() + with + No_more_elements -> + match !push_t with + | None -> !acc + | Some e -> + push t e; + !acc + +let fold2i f init t u = + let acc = ref init in + let push_t = ref None in + let rec loop idx = + push_t := None; + let e = t.next() in + push_t := Some e; + acc := f idx e (u.next()) !acc; + loop (idx + 1) + in + try + loop 0 + with + No_more_elements -> + match !push_t with + | None -> !acc + | Some e -> + push t e; + !acc + +let find f t = + let rec loop () = + let x = t.next() in + if f x then x else loop() + in + try + loop() + with + No_more_elements -> raise Not_found + +let rec map f t = + { + count = t.count; + next = (fun () -> f (t.next())); + clone = (fun () -> map f (t.clone())); + fast = t.fast; + } + +let rec mapi f t = + let idx = ref (-1) in + { + count = t.count; + next = (fun () -> incr idx; f !idx (t.next())); + clone = (fun () -> mapi f (t.clone())); + fast = t.fast; + } + +let rec filter f t = + let rec next() = + let x = t.next() in + if f x then x else next() + in + from2 next (fun () -> filter f (t.clone())) + +let rec filter_map f t = + let rec next () = + match f (t.next()) with + | None -> next() + | Some x -> x + in + from2 next (fun () -> filter_map f (t.clone())) + +let rec append ta tb = + let t = { + count = (fun () -> ta.count() + tb.count()); + next = _dummy; + clone = (fun () -> append (ta.clone()) (tb.clone())); + fast = ta.fast && tb.fast; + } in + t.next <- (fun () -> + try + ta.next() + with + No_more_elements -> + (* add one indirection because tb can mute *) + t.next <- (fun () -> tb.next()); + t.count <- (fun () -> tb.count()); + t.clone <- (fun () -> tb.clone()); + t.fast <- tb.fast; + t.next() + ); + t + +let rec concat t = + let concat_ref = ref _dummy in + let rec concat_next() = + let tn = t.next() in + concat_ref := (fun () -> + try + tn.next() + with + No_more_elements -> + concat_next()); + !concat_ref () + in + concat_ref := concat_next; + from2 (fun () -> !concat_ref ()) (fun () -> concat (t.clone())) diff --git a/src/enum.mli b/src/enum.mli new file mode 100644 index 0000000..1f0fd84 --- /dev/null +++ b/src/enum.mli @@ -0,0 +1,205 @@ +(* + * Enum - enumeration over abstract collection of elements. + * Copyright (C) 2003 Nicolas Cannasse + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version, + * with the special exception on linking described in file LICENSE. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + *) + +(** Enumeration over abstract collection of elements. + + Enumerations are entirely functional and most of the operations do not + actually require the allocation of data structures. Using enumerations + to manipulate data is therefore efficient and simple. All data structures in + ExtLib such as lists, arrays, etc. have support to convert from and to + enumerations. +*) + + +type 'a t + +(** {6 Final functions} + + These functions consume the enumeration until + it ends or an exception is raised by the first + argument function. +*) + +val iter : ('a -> unit) -> 'a t -> unit +(** [iter f e] calls the function [f] with each elements of [e] in turn. *) + +val iter2 : ('a -> 'b -> unit) -> 'a t -> 'b t -> unit +(** [iter2 f e1 e2] calls the function [f] with the next elements of [e] and + [e2] repeatedly until one of the two enumerations ends. *) + +val fold : ('a -> 'b -> 'b) -> 'b -> 'a t -> 'b +(** [fold f v e] returns [v] if [e] is empty, + otherwise [f aN (... (f a2 (f a1 v)) ...)] where a1..N are + the elements of [e]. +*) + +val fold2 : ('a -> 'b -> 'c -> 'c) -> 'c -> 'a t -> 'b t -> 'c +(** [fold2] is similar to [fold] but will fold over two enumerations at the + same time until one of the two enumerations ends. *) + +(** Indexed functions : these functions are similar to previous ones + except that they call the function with one additional argument which + is an index starting at 0 and incremented after each call to the function. *) + +val iteri : (int -> 'a -> unit) -> 'a t -> unit + +val iter2i : ( int -> 'a -> 'b -> unit) -> 'a t -> 'b t -> unit + +val foldi : (int -> 'a -> 'b -> 'b) -> 'b -> 'a t -> 'b + +val fold2i : (int -> 'a -> 'b -> 'c -> 'c) -> 'c -> 'a t -> 'b t -> 'c + +(** {6 Useful functions} *) + +val find : ('a -> bool) -> 'a t -> 'a +(** [find f e] returns the first element [x] of [e] such that [f x] returns + [true], consuming the enumeration up to and including the + found element, or, raises [Not_found] if no such element exists + in the enumeration, consuming the whole enumeration in the search. + + Since [find] consumes a prefix of the enumeration, it can be used several + times on the same enumeration to find the next element. *) + +val is_empty : 'a t -> bool +(** [is_empty e] returns true if [e] does not contains any element. *) + +val peek : 'a t -> 'a option +(** [peek e] returns [None] if [e] is empty or [Some x] where [x] is + the next element of [e]. The element is not removed from the enumeration. *) + +val get : 'a t -> 'a option +(** [get e] returns [None] if [e] is empty or [Some x] where [x] is + the next element of [e], in which case the element is removed from the enumeration. *) + +val next : 'a t -> 'a +(** [next e] returns the next element of [e] (and removes it from enumeration). + @raise No_more_elements if enumeration is empty *) + +val push : 'a t -> 'a -> unit +(** [push e x] will add [x] at the beginning of [e]. *) + +val junk : 'a t -> unit +(** [junk e] removes the first element from the enumeration, if any. *) + +val clone : 'a t -> 'a t +(** [clone e] creates a new enumeration that is copy of [e]. If [e] + is consumed by later operations, the clone will not get affected. *) + +val force : 'a t -> unit +(** [force e] forces the application of all lazy functions and the + enumeration of all elements, exhausting the enumeration. + + An efficient intermediate data structure + of enumerated elements is constructed and [e] will now enumerate over + that data structure. *) + +(** {6 Lazy constructors} + + These functions are lazy which means that they will create a new modified + enumeration without actually enumerating any element until they are asked + to do so by the programmer (using one of the functions above). + + When the resulting enumerations of these functions are consumed, the + underlying enumerations they were created from are also consumed. *) + +val map : ('a -> 'b) -> 'a t -> 'b t +(** [map f e] returns an enumeration over [(f a1, f a2, ... , f aN)] where + a1...N are the elements of [e]. *) + +val mapi : (int -> 'a -> 'b) -> 'a t -> 'b t +(** [mapi] is similar to [map] except that [f] is passed one extra argument + which is the index of the element in the enumeration, starting from 0. *) + +val filter : ('a -> bool) -> 'a t -> 'a t +(** [filter f e] returns an enumeration over all elements [x] of [e] such + as [f x] returns [true]. *) + +val filter_map : ('a -> 'b option) -> 'a t -> 'b t +(** [filter_map f e] returns an enumeration over all elements [x] such as + [f y] returns [Some x] , where [y] is an element of [e]. *) + +val append : 'a t -> 'a t -> 'a t +(** [append e1 e2] returns an enumeration that will enumerate over all + elements of [e1] followed by all elements of [e2]. *) + +val concat : 'a t t -> 'a t +(** [concat e] returns an enumeration over all elements of all enumerations + of [e]. *) + +(** {6 Constructors} + + In this section the word {i shall} denotes a semantic + requirement. The correct operation + of the functions in this interface are conditional + on the client meeting these requirements. +*) + +exception No_more_elements +(** This exception {i shall} be raised by the [next] function of [make] + or [from] when no more elements can be enumerated, it {i shall not} + be raised by any function which is an argument to any + other function specified in the interface. +*) + +val empty : unit -> 'a t +(** The empty enumeration : contains no element *) + +val make : next:(unit -> 'a) -> count:(unit -> int) -> clone:(unit -> 'a t) -> 'a t +(** This function creates a fully defined enumeration. + {ul {li the [next] function {i shall} return the next element of the + enumeration or raise [No_more_elements] if the underlying data structure + does not have any more elements to enumerate.} + {li the [count] function {i shall} return the actual number of remaining + elements in the enumeration.} + {li the [clone] function {i shall} create a clone of the enumeration + such as operations on the original enumeration will not affect the + clone. }} + + For some samples on how to correctly use [make], you can have a look + at implementation of [ExtList.enum]. +*) + +val from : (unit -> 'a) -> 'a t +(** [from next] creates an enumeration from the [next] function. + [next] {i shall} return the next element of the enumeration or raise + [No_more_elements] when no more elements can be enumerated. Since the + enumeration definition is incomplete, a call to [clone] or [count] will + result in a call to [force] that will enumerate all elements in order to + return a correct value. *) + +val init : int -> (int -> 'a) -> 'a t +(** [init n f] creates a new enumeration over elements + [f 0, f 1, ..., f (n-1)] *) + +(** {6 Counting} *) + +val count : 'a t -> int +(** [count e] returns the number of remaining elements in [e] without + consuming the enumeration. + +Depending of the underlying data structure that is implementing the +enumeration functions, the count operation can be costly, and even sometimes +can cause a call to [force]. *) + +val fast_count : 'a t -> bool +(** For users worried about the speed of [count] you can call the [fast_count] + function that will give an hint about [count] implementation. Basically, if + the enumeration has been created with [make] or [init] or if [force] has + been called on it, then [fast_count] will return true. *) diff --git a/src/extArray.ml b/src/extArray.ml new file mode 100644 index 0000000..b633a88 --- /dev/null +++ b/src/extArray.ml @@ -0,0 +1,191 @@ +(* + * ExtList - additional and modified functions for lists. + * Copyright (C) 2005 Richard W.M. Jones (rich @ annexia.org) + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version, + * with the special exception on linking described in file LICENSE. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + *) + +module Array = struct + +include Array + +let rev_in_place xs = + let n = length xs in + let j = ref (n-1) in + for i = 0 to n/2-1 do + let c = xs.(i) in + xs.(i) <- xs.(!j); + xs.(!j) <- c; + decr j + done + +let rev xs = + let ys = Array.copy xs in + rev_in_place ys; + ys + +#if OCAML < 403 +let for_all p xs = + let n = length xs in + let rec loop i = + if i = n then true + else if p xs.(i) then loop (succ i) + else false + in + loop 0 + +exception Exists + +let exists p xs = + try + for i = 0 to Array.length xs - 1 do + if p xs.(i) then raise Exists + done; false + with Exists -> true + +let mem a xs = + let n = length xs in + let rec loop i = + if i = n then false + else if a = xs.(i) then true + else loop (succ i) + in + loop 0 + +let memq a xs = + let n = length xs in + let rec loop i = + if i = n then false + else if a == xs.(i) then true + else loop (succ i) + in + loop 0 +#endif + +let findi p xs = + let n = length xs in + let rec loop i = + if i = n then raise Not_found + else if p xs.(i) then i + else loop (succ i) + in + loop 0 + +let find p xs = xs.(findi p xs) + +(* Use of BitSet suggested by Brian Hurt. *) +let filter p xs = + let n = length xs in + (* Use a bitset to store which elements will be in the final array. *) + let bs = BitSet.create n in + for i = 0 to n-1 do + if p xs.(i) then BitSet.set bs i + done; + (* Allocate the final array and copy elements into it. *) + let n' = BitSet.count bs in + let j = ref 0 in + let xs' = init n' + (fun _ -> + (* Find the next set bit in the BitSet. *) + while not (BitSet.is_set bs !j) do incr j done; + let r = xs.(!j) in + incr j; + r) in + xs' + +let find_all = filter + +let partition p xs = + let n = length xs in + (* Use a bitset to store which elements will be in which final array. *) + let bs = BitSet.create n in + for i = 0 to n-1 do + if p xs.(i) then BitSet.set bs i + done; + (* Allocate the final arrays and copy elements into them. *) + let n1 = BitSet.count bs in + let n2 = n - n1 in + let j = ref 0 in + let xs1 = init n1 + (fun _ -> + (* Find the next set bit in the BitSet. *) + while not (BitSet.is_set bs !j) do incr j done; + let r = xs.(!j) in + incr j; + r) in + let j = ref 0 in + let xs2 = init n2 + (fun _ -> + (* Find the next clear bit in the BitSet. *) + while BitSet.is_set bs !j do incr j done; + let r = xs.(!j) in + incr j; + r) in + xs1, xs2 + +let enum xs = + let rec make start xs = + let n = length xs in + Enum.make + ~next:(fun () -> + if !start < n then ( + let r = xs.(!start) in + incr start; + r + ) else + raise Enum.No_more_elements) + ~count:(fun () -> + n - !start) + ~clone:(fun () -> + let xs' = Array.sub xs !start (n - !start) in + make (ref 0) xs') + in + make (ref 0) xs + +let of_enum e = + let n = Enum.count e in + (* This assumes, reasonably, that init traverses the array in order. *) + Array.init n + (fun i -> + match Enum.get e with + | Some x -> x + | None -> assert false) + +#if OCAML < 403 +let iter2 f a1 a2 = + if Array.length a1 <> Array.length a2 + then raise (Invalid_argument "Array.iter2"); + for i = 0 to Array.length a1 - 1 do + f a1.(i) a2.(i); + done + +let map2 f a1 a2 = + if Array.length a1 <> Array.length a2 + then raise (Invalid_argument "Array.map2"); + Array.init (Array.length a1) (fun i -> f a1.(i) a2.(i)) +#endif + +#if OCAML >= 403 +#else +#if OCAML >= 402 +let create_float = make_float +#else +let make_float n = make n 0. +let create_float = make_float +#endif +#endif + +end diff --git a/src/extArray.mli b/src/extArray.mli new file mode 100644 index 0000000..4d6d08f --- /dev/null +++ b/src/extArray.mli @@ -0,0 +1,174 @@ +(* + * ExtArray - additional and modified functions for arrays. + * Copyright (C) 2005 Richard W.M. Jones (rich @ annexia.org) + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version, + * with the special exception on linking described in file LICENSE. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + *) + +(** Additional and modified functions for arrays. + + The OCaml standard library provides a module of array functions. + This ExtArray module can be used to override the Array module or + as a standalone module. It provides some additional functions. +*) + +module Array : +sig + + (** {6 New functions} *) + val rev : 'a array -> 'a array + (** Array reversal. *) + + val rev_in_place : 'a array -> unit + (** In-place array reversal. The array argument is updated. *) + + val iter2 : ('a -> 'b -> unit) -> 'a array -> 'b array -> unit + (** [Array.iter2 f [|a1; ...; an|] [|b1; ...; bn|]] performs + calls [f a1 b1; ...; f an bn] in that order. + + @raise Invalid_argument if the length of [a1] does not equal the + length of [a2]. *) + + val map2 : ('a -> 'b -> 'c) -> 'a array -> 'b array -> 'c array + (** [Array.map2 f [|a1; ...; an|] [|b1; ...; bn|]] creates new array + [[|f a1 b1; ...; f an bn|]]. + + @raise Invalid_argument if the length of [a1] does not equal the + length of [a2]. *) + + val for_all : ('a -> bool) -> 'a array -> bool + (** [for_all p [a1; ...; an]] checks if all elements of the array + satisfy the predicate [p]. That is, it returns + [ (p a1) && (p a2) && ... && (p an)]. + *) + + val exists : ('a -> bool) -> 'a array -> bool + (** [exists p [a1; ...; an]] checks if at least one element of + the array satisfies the predicate [p]. That is, it returns + [ (p a1) || (p a2) || ... || (p an)]. + *) + + val mem : 'a -> 'a array -> bool + (** [mem m a] is true if and only if [m] is equal to an element of [a]. *) + + val memq : 'a -> 'a array -> bool + (** Same as {!Array.mem} but uses physical equality instead of + structural equality to compare array elements. + *) + + val find : ('a -> bool) -> 'a array -> 'a + (** [find p a] returns the first element of array [a] + that satisfies the predicate [p]. + Raise [Not_found] if there is no value that satisfies [p] in the + array [a]. + *) + + val findi : ('a -> bool) -> 'a array -> int + (** [findi p a] returns the index of the first element of array [a] + that satisfies the predicate [p]. + Raise [Not_found] if there is no value that satisfies [p] in the + array [a]. + *) + + val filter : ('a -> bool) -> 'a array -> 'a array + (** [filter p a] returns all the elements of the array [a] + that satisfy the predicate [p]. The order of the elements + in the input array is preserved. *) + + val find_all : ('a -> bool) -> 'a array -> 'a array + (** [find_all] is another name for {!Array.filter}. *) + + val partition : ('a -> bool) -> 'a array -> 'a array * 'a array + (** [partition p a] returns a pair of arrays [(a1, a2)], where + [a1] is the array of all the elements of [a] that + satisfy the predicate [p], and [a2] is the array of all the + elements of [a] that do not satisfy [p]. + The order of the elements in the input array is preserved. *) + + (** {6 Enumerations} *) + + val enum : 'a array -> 'a Enum.t + (** Returns an enumeration of the elements of an array. *) + + val of_enum : 'a Enum.t -> 'a array + (** Build an array from an enumeration. *) + + (** {6 Compatibility functions} *) + + (** These functions are reimplemented in extlib when they are missing from the stdlib *) + +#if OCAML >= 403 + external create_float : int -> float array = "caml_make_float_vect" +#else + val create_float : int -> float array +#endif + + val make_float : int -> float array + +#if OCAML >= 406 + module Floatarray : + sig + external create : int -> floatarray = "caml_floatarray_create" + external length : floatarray -> int = "%floatarray_length" + external get : floatarray -> int -> float = "%floatarray_safe_get" + external set : floatarray -> int -> float -> unit = "%floatarray_safe_set" + external unsafe_get : floatarray -> int -> float = "%floatarray_unsafe_get" + external unsafe_set : floatarray -> int -> float -> unit = "%floatarray_unsafe_set" + end +#endif + + (** {6 Old functions} *) + + (** These functions are already part of the Ocaml standard library + and have not been modified. Please refer to the Ocaml Manual for + documentation. *) + + external length : 'a array -> int = "%array_length" + external get : 'a array -> int -> 'a = "%array_safe_get" + external set : 'a array -> int -> 'a -> unit = "%array_safe_set" + external make : int -> 'a -> 'a array = "caml_make_vect" + external create : int -> 'a -> 'a array = "caml_make_vect" + val init : int -> (int -> 'a) -> 'a array + val make_matrix : int -> int -> 'a -> 'a array array + val create_matrix : int -> int -> 'a -> 'a array array + val append : 'a array -> 'a array -> 'a array + val concat : 'a array list -> 'a array + val sub : 'a array -> int -> int -> 'a array + val copy : 'a array -> 'a array + val fill : 'a array -> int -> int -> 'a -> unit + val blit : 'a array -> int -> 'a array -> int -> int -> unit + val to_list : 'a array -> 'a list + val of_list : 'a list -> 'a array + val iter : ('a -> unit) -> 'a array -> unit + val map : ('a -> 'b) -> 'a array -> 'b array + val iteri : (int -> 'a -> unit) -> 'a array -> unit + val mapi : (int -> 'a -> 'b) -> 'a array -> 'b array + val fold_left : ('a -> 'b -> 'a) -> 'a -> 'b array -> 'a + val fold_right : ('b -> 'a -> 'a) -> 'b array -> 'a -> 'a + val sort : ('a -> 'a -> int) -> 'a array -> unit + val stable_sort : ('a -> 'a -> int) -> 'a array -> unit + val fast_sort : ('a -> 'a -> int) -> 'a array -> unit + external unsafe_get : 'a array -> int -> 'a = "%array_unsafe_get" + external unsafe_set : 'a array -> int -> 'a -> unit = "%array_unsafe_set" + +#if OCAML >= 407 + (** [*_seq] functions were introduced in OCaml 4.07.0, and are _not_ implemented in extlib for older OCaml versions *) + val to_seq : 'a array -> 'a Seq.t + val to_seqi : 'a array -> (int * 'a) Seq.t + val of_seq : 'a Seq.t -> 'a array +#endif + +end diff --git a/src/extBuffer.ml b/src/extBuffer.ml new file mode 100644 index 0000000..7bcb694 --- /dev/null +++ b/src/extBuffer.ml @@ -0,0 +1,24 @@ +open ExtBytes + +module Buffer = struct + + include Buffer + +#if OCAML < 402 + (* The uses of unsafe_{of,to}_string above are not semantically + justified, as the Buffer implementation may very well capture and + share parts of its internal buffer, or of input string given as + input. + + They are however correct with respect to the implementation being + used in OCaml 4.02.0; this implementation must be revisited if + the string representation changes. *) + let to_bytes b = + Bytes.unsafe_of_string (contents b) + + let add_subbytes b s offset len = + add_substring b (Bytes.unsafe_to_string s) offset len + + let add_bytes b s = add_string b (Bytes.unsafe_to_string s) +#endif +end diff --git a/src/extBuffer.mli b/src/extBuffer.mli new file mode 100644 index 0000000..cc9c7fc --- /dev/null +++ b/src/extBuffer.mli @@ -0,0 +1,92 @@ +(* + * ExtBuffer - extra functions over buffers. + * Copyright (C) 2014 Gabriel Scherer + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version, + * with the special exception on linking described in file LICENSE. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + *) + +(** Extra functions over text buffers. + + We in fact provide the exact same interface as Buffer on 4.02 OCaml + versions, with the implementation for the 4.02-and-above + bytes-specific functions backported. +*) + +open ExtBytes + +module Buffer : sig + +type t = Buffer.t + +val create : int -> t + +val contents : t -> string + +val to_bytes : t -> Bytes.t + +val sub : t -> int -> int -> string + +val blit : t -> int -> Bytes.t -> int -> int -> unit + +val nth : t -> int -> char + +val length : t -> int + +val clear : t -> unit + +val reset : t -> unit + +val add_char : t -> char -> unit + +val add_string : t -> string -> unit + +val add_bytes : t -> Bytes.t -> unit + +val add_substring : t -> string -> int -> int -> unit + +val add_subbytes : t -> Bytes.t -> int -> int -> unit + +val add_substitute : t -> (string -> string) -> string -> unit + +val add_buffer : t -> t -> unit + +val add_channel : t -> in_channel -> int -> unit + +val output_buffer : out_channel -> t -> unit + +#if OCAML >= 405 + +val truncate : t -> int -> unit + +#endif + +#if OCAML >= 406 + +val add_utf_8_uchar : t -> Uchar.t -> unit +val add_utf_16le_uchar : t -> Uchar.t -> unit +val add_utf_16be_uchar : t -> Uchar.t -> unit + +#endif + +#if OCAML >= 407 +(** [*_seq] functions were introduced in OCaml 4.07.0, and are _not_ implemented in extlib for older OCaml versions *) +val to_seq : t -> char Seq.t +val to_seqi : t -> (int * char) Seq.t +val add_seq : t -> char Seq.t -> unit +val of_seq : char Seq.t -> t +#endif + +end diff --git a/src/extBytes.ml b/src/extBytes.ml new file mode 100644 index 0000000..ec5c932 --- /dev/null +++ b/src/extBytes.ml @@ -0,0 +1,19 @@ +#if OCAML >= 402 || defined WITH_BYTES +module Bytes = Bytes +#else +module Bytes = struct + +include String + +let empty = "" +let of_string = copy +let to_string = copy + +let sub_string = sub +let blit_string = blit + +let unsafe_to_string : t -> string = fun s -> s +let unsafe_of_string : string -> t = fun s -> s + +end +#endif diff --git a/src/extHashtbl.ml b/src/extHashtbl.ml new file mode 100644 index 0000000..140e9c2 --- /dev/null +++ b/src/extHashtbl.ml @@ -0,0 +1,166 @@ +(* + * ExtHashtbl, extra functions over hashtables. + * Copyright (C) 2003 Nicolas Cannasse + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version, + * with the special exception on linking described in file LICENSE. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + *) + + +module Hashtbl = + struct + +#if OCAML >= 400 + external old_hash_param : + int -> int -> 'a -> int = "caml_hash_univ_param" "noalloc" +#endif + + type ('a, 'b) h_bucketlist = + | Empty + | Cons of 'a * 'b * ('a, 'b) h_bucketlist + +#if OCAML >= 400 + type ('a, 'b) h_t = { + mutable size: int; + mutable data: ('a, 'b) h_bucketlist array; + mutable seed: int; + initial_size: int; + } +#else + type ('a, 'b) h_t = { + mutable size: int; + mutable data: ('a, 'b) h_bucketlist array + } +#endif + + include Hashtbl + +#if OCAML < 400 + let create ?random:_ n = Hashtbl.create (* no seed *) n +#endif + + external h_conv : ('a, 'b) t -> ('a, 'b) h_t = "%identity" + external h_make : ('a, 'b) h_t -> ('a, 'b) t = "%identity" + + let exists = mem + + let enum h = + let rec make ipos ibuck idata icount = + let pos = ref ipos in + let buck = ref ibuck in + let hdata = ref idata in + let hcount = ref icount in + let force() = + (** this is a hack in order to keep an O(1) enum constructor **) + if !hcount = -1 then begin + hcount := (h_conv h).size; + hdata := Array.copy (h_conv h).data; + end; + in + let rec next() = + force(); + match !buck with + | Empty -> + if !hcount = 0 then raise Enum.No_more_elements; + incr pos; + buck := Array.unsafe_get !hdata !pos; + next() + | Cons (k,i,next_buck) -> + buck := next_buck; + decr hcount; + (k,i) + in + let count() = + if !hcount = -1 then (h_conv h).size else !hcount + in + let clone() = + force(); + make !pos !buck !hdata !hcount + in + Enum.make ~next ~count ~clone + in + make (-1) Empty (Obj.magic()) (-1) + + let keys h = + Enum.map (fun (k,_) -> k) (enum h) + + let values h = + Enum.map (fun (_,v) -> v) (enum h) + + let map f h = + let rec loop = function + | Empty -> Empty + | Cons (k,v,next) -> Cons (k,f v,loop next) + in + h_make { (h_conv h) with + data = Array.map loop (h_conv h).data; + } + +#if OCAML >= 400 + (* copied from stdlib :( *) + let key_index h key = + (* compatibility with old hash tables *) + if Obj.size (Obj.repr h) >= 3 + then (seeded_hash_param 10 100 (h_conv h).seed key) land (Array.length (h_conv h).data - 1) + else (old_hash_param 10 100 key) mod (Array.length (h_conv h).data) +#else + let key_index h key = (hash key) mod (Array.length (h_conv h).data) +#endif + + let remove_all h key = + let hc = h_conv h in + let rec loop = function + | Empty -> Empty + | Cons(k,v,next) -> + if k = key then begin + hc.size <- pred hc.size; + loop next + end else + Cons(k,v,loop next) + in + let pos = key_index h key in + Array.unsafe_set hc.data pos (loop (Array.unsafe_get hc.data pos)) + + let find_default h key defval = + let rec loop = function + | Empty -> defval + | Cons (k,v,next) -> + if k = key then v else loop next + in + let pos = key_index h key in + loop (Array.unsafe_get (h_conv h).data pos) + +#if OCAML < 405 + let find_opt h key = + let rec loop = function + | Empty -> None + | Cons (k,v,next) -> + if k = key then Some v else loop next + in + let pos = key_index h key in + loop (Array.unsafe_get (h_conv h).data pos) +#endif + + let find_option = find_opt + + let of_enum e = + let h = create (if Enum.fast_count e then Enum.count e else 0) in + Enum.iter (fun (k,v) -> add h k v) e; + h + + let length h = + (h_conv h).size + + end diff --git a/src/extHashtbl.mli b/src/extHashtbl.mli new file mode 100644 index 0000000..5153fd5 --- /dev/null +++ b/src/extHashtbl.mli @@ -0,0 +1,217 @@ +(* + * ExtHashtbl - extra functions over hashtables. + * Copyright (C) 2003 Nicolas Cannasse + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version, + * with the special exception on linking described in file LICENSE. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + *) + +(** Extra functions over hashtables. *) + +module Hashtbl : + (** The wrapper module *) + sig + + type ('a,'b) t = ('a,'b) Hashtbl.t + (** The type of a hashtable. *) + + (** {6 New Functions} *) + + val exists : ('a,'b) t -> 'a -> bool + (** [exists h k] returns true is at least one item with key [k] is + found in the hashtable. *) + + val keys : ('a,'b) t -> 'a Enum.t + (** Return an enumeration of all the keys of a hashtable. + If the key is in the Hashtable multiple times, all occurrences + will be returned. *) + + val values : ('a,'b) t -> 'b Enum.t + (** Return an enumeration of all the values of a hashtable. *) + + val enum : ('a, 'b) t -> ('a * 'b) Enum.t + (** Return an enumeration of (key,value) pairs of a hashtable. *) + + val of_enum : ('a * 'b) Enum.t -> ('a, 'b) t + (** Create a hashtable from a (key,value) enumeration. *) + + val find_default : ('a,'b) t -> 'a -> 'b -> 'b + (** Find a binding for the key, and return a default + value if not found *) + + val find_opt : ('a,'b) Hashtbl.t -> 'a -> 'b option + (** Find a binding for the key, or return [None] if no + value is found *) + + val find_option : ('a,'b) Hashtbl.t -> 'a -> 'b option + (** compatibility, use [find_opt] *) + + val remove_all : ('a,'b) t -> 'a -> unit + (** Remove all bindings for the given key *) + + val map : ('b -> 'c) -> ('a,'b) t -> ('a,'c) t + (** [map f x] creates a new hashtable with the same + keys as [x], but with the function [f] applied to + all the values *) + + val length : ('a,'b) t -> int + (** Return the number of elements inserted into the Hashtbl + (including duplicates) *) + +#if OCAML >= 400 + val reset : ('a,'b) t -> unit + val randomize : unit -> unit + + type statistics = Hashtbl.statistics = { + num_bindings: int; + num_buckets: int; + max_bucket_length: int; + bucket_histogram: int array; + } + + val stats : ('a,'b) t -> statistics + + val seeded_hash_param : int -> int -> int -> 'a -> int + val seeded_hash : int -> 'a -> int +#endif + +#if OCAML >= 403 + val is_randomized : unit -> bool + val filter_map_inplace : ('a -> 'b -> 'b option) -> ('a, 'b) t -> unit +#endif + + (** {6 Older Functions} *) + + (** Please refer to the Ocaml Manual for documentation of these + functions. *) + + (** @before 4.00.0 [random] is ignored *) + val create : ?random:bool -> int -> ('a, 'b) t + val clear : ('a, 'b) t -> unit + val add : ('a, 'b) t -> 'a -> 'b -> unit + val copy : ('a, 'b) t -> ('a, 'b) t + val find : ('a, 'b) t -> 'a -> 'b + val find_all : ('a, 'b) t -> 'a -> 'b list + val mem : ('a, 'b) t -> 'a -> bool + val remove : ('a, 'b) t -> 'a -> unit + val replace : ('a, 'b) t -> 'a -> 'b -> unit + val iter : ('a -> 'b -> unit) -> ('a, 'b) t -> unit + val fold : ('a -> 'b -> 'c -> 'c) -> ('a, 'b) t -> 'c -> 'c + val hash : 'a -> int + val hash_param : int -> int -> 'a -> int + +#if OCAML >= 407 + (** [*_seq] functions were introduced in OCaml 4.07.0, and are _not_ implemented in extlib for older OCaml versions *) + val to_seq : ('a,'b) t -> ('a * 'b) Seq.t + val to_seq_keys : ('a,_) t -> 'a Seq.t + val to_seq_values : (_,'b) t -> 'b Seq.t + val add_seq : ('a,'b) t -> ('a * 'b) Seq.t -> unit + val replace_seq : ('a,'b) t -> ('a * 'b) Seq.t -> unit + val of_seq : ('a * 'b) Seq.t -> ('a, 'b) t +#endif + +(** Functor interface forwards directly to stdlib implementation (i.e. no enum functions) *) + +#if OCAML >= 407 + +module type HashedType = Hashtbl.HashedType +module type S = Hashtbl.S +module Make = Hashtbl.Make + +module type SeededHashedType = Hashtbl.SeededHashedType +module type SeededS = Hashtbl.SeededS +module MakeSeeded = Hashtbl.MakeSeeded + +#else + +module type HashedType = + sig + type t + val equal : t -> t -> bool + val hash : t -> int + end + +module type S = + sig + type key + type 'a t + val create : int -> 'a t + val clear : 'a t -> unit +#if OCAML >= 400 + val reset : 'a t -> unit +#endif + val copy : 'a t -> 'a t + val add : 'a t -> key -> 'a -> unit + val remove : 'a t -> key -> unit + val find : 'a t -> key -> 'a +#if OCAML >= 405 + val find_opt : 'a t -> key -> 'a option +#endif + val find_all : 'a t -> key -> 'a list + val replace : 'a t -> key -> 'a -> unit + val mem : 'a t -> key -> bool + val iter : (key -> 'a -> unit) -> 'a t -> unit +#if OCAML >= 403 + val filter_map_inplace: (key -> 'a -> 'a option) -> 'a t -> unit +#endif + val fold : (key -> 'a -> 'b -> 'b) -> 'a t -> 'b -> 'b + val length : 'a t -> int +#if OCAML >= 400 + val stats: 'a t -> statistics +#endif + end + +module Make (H : HashedType) : S with type key = H.t + +#if OCAML >= 400 +module type SeededHashedType = + sig + type t + val equal: t -> t -> bool + val hash: int -> t -> int + end + +module type SeededS = + sig + type key + type 'a t + val create : ?random:bool -> int -> 'a t + val clear : 'a t -> unit + val reset : 'a t -> unit + val copy : 'a t -> 'a t + val add : 'a t -> key -> 'a -> unit + val remove : 'a t -> key -> unit + val find : 'a t -> key -> 'a +#if OCAML >= 405 + val find_opt : 'a t -> key -> 'a option +#endif + val find_all : 'a t -> key -> 'a list + val replace : 'a t -> key -> 'a -> unit + val mem : 'a t -> key -> bool + val iter : (key -> 'a -> unit) -> 'a t -> unit +#if OCAML >= 403 + val filter_map_inplace: (key -> 'a -> 'a option) -> 'a t -> unit +#endif + val fold : (key -> 'a -> 'b -> 'b) -> 'a t -> 'b -> 'b + val length : 'a t -> int + val stats: 'a t -> statistics + end + +module MakeSeeded (H : SeededHashedType) : SeededS with type key = H.t +#endif + +#endif + + end diff --git a/src/extLib.ml b/src/extLib.ml new file mode 100644 index 0000000..92c02d6 --- /dev/null +++ b/src/extLib.ml @@ -0,0 +1,46 @@ +(* + * ExtLib - use extensions as separate modules + * Copyright (C) 2003 Nicolas Cannasse + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version, + * with the special exception on linking described in file LICENSE. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + *) + +(* + Note: + + Since ExtLib is provided for namespace convenience for + users who wants to keep the usage of the original + Ocaml Standard Library, no MLI CMI nor documentation will + be provided for this module. + + Users can simply do an "open ExtLib" to import all Ext* + namespaces instead of doing "open ExtList" for example. + + The trade-off is that they'll have to link all the modules + included below so the resulting binary is bigger. +*) + +module List = ExtList.List +module String = ExtString.String +module Hashtbl = ExtHashtbl.Hashtbl +module Array = ExtArray.Array +module Buffer = ExtBuffer.Buffer + +exception Invalid_string = ExtString.Invalid_string + +let (@) = ExtList.(@) + +include Std diff --git a/src/extList.ml b/src/extList.ml new file mode 100644 index 0000000..2ec5cee --- /dev/null +++ b/src/extList.ml @@ -0,0 +1,562 @@ +(* + * ExtList - additional and modified functions for lists. + * Copyright (C) 2003 Brian Hurt + * Copyright (C) 2003 Nicolas Cannasse + * Copyright (C) 2008 Red Hat Inc. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version, + * with the special exception on linking described in file LICENSE. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + *) + +module List = struct + +exception Empty_list +exception Invalid_index of int +exception Different_list_size of string + +include List + +(* Thanks to Jacques Garrigue for suggesting the following structure *) +type 'a mut_list = { + hd: 'a; + mutable tl: 'a list +} +external inj : 'a mut_list -> 'a list = "%identity" + + +let dummy_node () = { hd = Obj.magic (); tl = [] } + +let hd = function + | [] -> raise Empty_list + | h :: t -> h + +let tl = function + | [] -> raise Empty_list + | h :: t -> t + +let nth l index = + if index < 0 then raise (Invalid_index index); + let rec loop n = function + | [] -> raise (Invalid_index index); + | h :: t -> + if n = 0 then h else loop (n - 1) t + in + loop index l + +let append l1 l2 = + match l1 with + | [] -> l2 + | h :: t -> + let rec loop dst = function + | [] -> + dst.tl <- l2 + | h :: t -> + let cell = { hd = h; tl = [] } in + dst.tl <- inj cell; + loop cell t + in + let r = { hd = h; tl = [] } in + loop r t; + inj r + +let rec flatten l = + let rec inner dst = function + | [] -> dst + | h :: t -> + let r = { hd = h; tl = [] } in + dst.tl <- inj r; + inner r t + in + let rec outer dst = function + | [] -> () + | h :: t -> outer (inner dst h) t + in + let r = dummy_node () in + outer r l; + r.tl + +let concat = flatten + +let map f = function + | [] -> [] + | h :: t -> + let rec loop dst = function + | [] -> () + | h :: t -> + let r = { hd = f h; tl = [] } in + dst.tl <- inj r; + loop r t + in + let r = { hd = f h; tl = [] } in + loop r t; + inj r + +let rec drop n = function + | _ :: l when n > 0 -> drop (n-1) l + | l -> l + +let take n l = + let rec loop n dst = function + | h :: t when n > 0 -> + let r = { hd = h; tl = [] } in + dst.tl <- inj r; + loop (n-1) r t + | _ -> + () + in + let dummy = dummy_node() in + loop n dummy l; + dummy.tl + +(* takewhile and dropwhile by Richard W.M. Jones. *) +let rec takewhile f = function + | [] -> [] + | x :: xs when f x -> x :: takewhile f xs + | _ -> [] + +let rec dropwhile f = function + | [] -> [] + | x :: xs when f x -> dropwhile f xs + | xs -> xs + + +let rec unique ?(cmp = ( = )) l = + let rec loop dst = function + | [] -> () + | h :: t -> + match exists (cmp h) t with + | true -> loop dst t + | false -> + let r = { hd = h; tl = [] } in + dst.tl <- inj r; + loop r t + in + let dummy = dummy_node() in + loop dummy l; + dummy.tl + +let filter_map f l = + let rec loop dst = function + | [] -> () + | h :: t -> + match f h with + | None -> loop dst t + | Some x -> + let r = { hd = x; tl = [] } in + dst.tl <- inj r; + loop r t + in + let dummy = dummy_node() in + loop dummy l; + dummy.tl + +let rec find_map f = function + | [] -> raise Not_found + | x :: xs -> + match f x with + | Some y -> y + | None -> find_map f xs + +let fold_right_max = 1000 + +let fold_right f l init = + let rec tail_loop acc = function + | [] -> acc + | h :: t -> tail_loop (f h acc) t + in + let rec loop n = function + | [] -> init + | h :: t -> + if n < fold_right_max then + f h (loop (n+1) t) + else + f h (tail_loop init (rev t)) + in + loop 0 l + +let map2 f l1 l2 = + let rec loop dst src1 src2 = + match src1, src2 with + | [], [] -> () + | h1 :: t1, h2 :: t2 -> + let r = { hd = f h1 h2; tl = [] } in + dst.tl <- inj r; + loop r t1 t2 + | _ -> raise (Different_list_size "map2") + in + let dummy = dummy_node () in + loop dummy l1 l2; + dummy.tl + +let rev_map2 f l1 l2 = + let rec loop acc l1 l2 = + match l1, l2 with + | [], [] -> acc + | h1 :: t1, h2 :: t2 -> loop (f h1 h2 :: acc) t1 t2 + | _ -> raise (Different_list_size "rev_map2") + in + loop [] l1 l2 + +let rec iter2 f l1 l2 = + match l1, l2 with + | [], [] -> () + | h1 :: t1, h2 :: t2 -> f h1 h2; iter2 f t1 t2 + | _ -> raise (Different_list_size "iter2") + +let rec fold_left2 f accum l1 l2 = + match l1, l2 with + | [], [] -> accum + | h1 :: t1, h2 :: t2 -> fold_left2 f (f accum h1 h2) t1 t2 + | _ -> raise (Different_list_size "fold_left2") + +let fold_right2 f l1 l2 init = + let rec tail_loop acc l1 l2 = + match l1, l2 with + | [] , [] -> acc + | h1 :: t1 , h2 :: t2 -> tail_loop (f h1 h2 acc) t1 t2 + | _ -> raise (Different_list_size "fold_right2") + in + let rec loop n l1 l2 = + match l1, l2 with + | [], [] -> init + | h1 :: t1, h2 :: t2 -> + if n < fold_right_max then + f h1 h2 (loop (n+1) t1 t2) + else + f h1 h2 (tail_loop init (rev t1) (rev t2)) + | _ -> raise (Different_list_size "fold_right2") + in + loop 0 l1 l2 + +let for_all2 p l1 l2 = + let rec loop l1 l2 = + match l1, l2 with + | [], [] -> true + | h1 :: t1, h2 :: t2 -> if p h1 h2 then loop t1 t2 else false + | _ -> raise (Different_list_size "for_all2") + in + loop l1 l2 + +let exists2 p l1 l2 = + let rec loop l1 l2 = + match l1, l2 with + | [], [] -> false + | h1 :: t1, h2 :: t2 -> if p h1 h2 then true else loop t1 t2 + | _ -> raise (Different_list_size "exists2") + in + loop l1 l2 + +let remove_assoc x lst = + let rec loop dst = function + | [] -> () + | (a, _ as pair) :: t -> + if a = x then + dst.tl <- t + else + let r = { hd = pair; tl = [] } in + dst.tl <- inj r; + loop r t + in + let dummy = dummy_node () in + loop dummy lst; + dummy.tl + +let remove_assq x lst = + let rec loop dst = function + | [] -> () + | (a, _ as pair) :: t -> + if a == x then + dst.tl <- t + else + let r = { hd = pair; tl = [] } in + dst.tl <- inj r; + loop r t + in + let dummy = dummy_node() in + loop dummy lst; + dummy.tl + +let rfind p l = find p (rev l) + +let find_all p l = + let rec findnext dst = function + | [] -> () + | h :: t -> + if p h then + let r = { hd = h; tl = [] } in + dst.tl <- inj r; + findnext r t + else + findnext dst t + in + let dummy = dummy_node () in + findnext dummy l; + dummy.tl + +let rec findi p l = + let rec loop n = function + | [] -> raise Not_found + | h :: t -> + if p n h then (n,h) else loop (n+1) t + in + loop 0 l + +let filter = find_all + +let partition p lst = + let rec loop yesdst nodst = function + | [] -> () + | h :: t -> + let r = { hd = h; tl = [] } in + if p h then + begin + yesdst.tl <- inj r; + loop r nodst t + end + else + begin + nodst.tl <- inj r; + loop yesdst r t + end + in + let yesdummy = dummy_node() + and nodummy = dummy_node() + in + loop yesdummy nodummy lst; + yesdummy.tl, nodummy.tl + +let split lst = + let rec loop adst bdst = function + | [] -> () + | (a, b) :: t -> + let x = { hd = a; tl = [] } + and y = { hd = b; tl = [] } in + adst.tl <- inj x; + bdst.tl <- inj y; + loop x y t + in + let adummy = dummy_node () + and bdummy = dummy_node () + in + loop adummy bdummy lst; + adummy.tl, bdummy.tl + +let combine l1 l2 = + let rec loop dst l1 l2 = + match l1, l2 with + | [], [] -> () + | h1 :: t1, h2 :: t2 -> + let r = { hd = h1, h2; tl = [] } in + dst.tl <- inj r; + loop r t1 t2 + | _, _ -> raise (Different_list_size "combine") + in + let dummy = dummy_node () in + loop dummy l1 l2; + dummy.tl + +let sort ?(cmp=compare) = List.sort cmp + +#if OCAML < 406 +let rec init size f = + if size = 0 then [] + else if size < 0 then invalid_arg "ExtList.init" + else + let rec loop dst n = + if n < size then + let r = { hd = f n; tl = [] } in + dst.tl <- inj r; + loop r (n+1) + in + let r = { hd = f 0; tl = [] } in + loop r 1; + inj r +#endif + +let make i x = + if i < 0 then invalid_arg "ExtList.List.make"; + let rec loop acc x = function + | 0 -> acc + | i -> loop (x::acc) x (i-1) + in + loop [] x i + +let mapi f = function + | [] -> [] + | h :: t -> + let rec loop dst n = function + | [] -> () + | h :: t -> + let r = { hd = f n h; tl = [] } in + dst.tl <- inj r; + loop r (n+1) t + in + let r = { hd = f 0 h; tl = [] } in + loop r 1 t; + inj r + +#if OCAML < 400 +let iteri f l = + let rec loop n = function + | [] -> () + | h :: t -> + f n h; + loop (n+1) t + in + loop 0 l +#endif + +let first = hd + +let rec last = function + | [] -> raise Empty_list + | h :: [] -> h + | _ :: t -> last t + +let split_nth index = function + | [] -> if index = 0 then [],[] else raise (Invalid_index index) + | (h :: t as l) -> + if index = 0 then [],l + else if index < 0 then raise (Invalid_index index) + else + let rec loop n dst l = + if n = 0 then l else + match l with + | [] -> raise (Invalid_index index) + | h :: t -> + let r = { hd = h; tl = [] } in + dst.tl <- inj r; + loop (n-1) r t + in + let r = { hd = h; tl = [] } in + inj r, loop (index-1) r t + +let find_exc f e l = + try + find f l + with + Not_found -> raise e + +let remove l x = + let rec loop dst = function + | [] -> () + | h :: t -> + if x = h then + dst.tl <- t + else + let r = { hd = h; tl = [] } in + dst.tl <- inj r; + loop r t + in + let dummy = dummy_node () in + loop dummy l; + dummy.tl + +let rec remove_if f lst = + let rec loop dst = function + | [] -> () + | x :: l -> + if f x then + dst.tl <- l + else + let r = { hd = x; tl = [] } in + dst.tl <- inj r; + loop r l + in + let dummy = dummy_node () in + loop dummy lst; + dummy.tl + +let rec remove_all l x = + let rec loop dst = function + | [] -> () + | h :: t -> + if x = h then + loop dst t + else + let r = { hd = h; tl = [] } in + dst.tl <- inj r; + loop r t + in + let dummy = dummy_node () in + loop dummy l; + dummy.tl + +let enum l = + let rec make lr count = + Enum.make + ~next:(fun () -> + match !lr with + | [] -> raise Enum.No_more_elements + | h :: t -> + decr count; + lr := t; + h + ) + ~count:(fun () -> + if !count < 0 then count := length !lr; + !count + ) + ~clone:(fun () -> + make (ref !lr) (ref !count) + ) + in + make (ref l) (ref (-1)) + +let of_enum e = + let h = dummy_node() in + let _ = Enum.fold (fun x acc -> + let r = { hd = x; tl = [] } in + acc.tl <- inj r; + r) h e in + h.tl + +#if OCAML < 403 +let cons x l = x :: l +#endif + +#if OCAML < 405 + +let assoc_opt k l = try Some (assoc k l) with Not_found -> None +let assq_opt k l = try Some (assq k l) with Not_found -> None +let find_opt p l = try Some (find p l) with Not_found -> None + +let nth_opt = + let rec loop n = function + | [] -> None + | h :: t -> + if n = 0 then Some h else loop (n - 1) t + in + fun l index -> if index < 0 then None else loop index l + +let rec compare_lengths l1 l2 = + match l1, l2 with + | [], [] -> 0 + | [], _ -> -1 + | _, [] -> 1 + | _ :: l1, _ :: l2 -> compare_lengths l1 l2 + +let rec compare_length_with l n = + match l, n with + | [], 0 -> 0 + | [], _ -> if n > 0 then -1 else 1 + | _, 0 -> 1 + | _ :: l, n -> compare_length_with l (n-1) + +#endif + +end + +let ( @ ) = List.append diff --git a/src/extList.mli b/src/extList.mli new file mode 100644 index 0000000..57cbd00 --- /dev/null +++ b/src/extList.mli @@ -0,0 +1,272 @@ +(* + * ExtList - additional and modified functions for lists. + * Copyright (C) 2003 Brian Hurt + * Copyright (C) 2003 Nicolas Cannasse + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version, + * with the special exception on linking described in file LICENSE. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + *) + +(** Additional and modified functions for lists. + + The OCaml standard library provides a module for list functions. + This ExtList module can be used to override the List module or + as a standalone module. It provides new functions and modify + the behavior of some other ones (in particular all functions + are now {b tail-recursive}). +*) + +module List : + sig + + (** {6 New functions} *) + + val init : int -> (int -> 'a) -> 'a list + (** Similar to [Array.init], [init n f] returns the list containing + the results of (f 0),(f 1).... (f (n-1)). + Raise [Invalid_arg "ExtList.init"] if n < 0. + Uses stdlib implementation in OCaml 4.06.0 and newer. + *) + + val make : int -> 'a -> 'a list + (** Similar to [String.make], [make n x] returns a + * list containing [n] elements [x]. + *) + + val first : 'a list -> 'a + (** Returns the first element of the list, or raise [Empty_list] if + the list is empty (similar to [hd]). *) + + val last : 'a list -> 'a + (** Returns the last element of the list, or raise [Empty_list] if + the list is empty. This function takes linear time. *) + + val iteri : (int -> 'a -> unit) -> 'a list -> unit + (** [iteri f l] will call [(f 0 a0);(f 1 a1) ... (f n an)] where + [a0..an] are the elements of the list [l]. *) + + val mapi : (int -> 'a -> 'b) -> 'a list -> 'b list + (** [mapi f l] will build the list containing + [(f 0 a0);(f 1 a1) ... (f n an)] where [a0..an] are the elements of + the list [l]. *) + + val rfind : ('a -> bool) -> 'a list -> 'a + (** [rfind p l] returns the last element [x] of [l] such as [p x] returns + [true] or raises [Not_found] if such element as not been found. *) + + val find_exc : ('a -> bool) -> exn -> 'a list -> 'a + (** [find_exc p e l] returns the first element of [l] such as [p x] + returns [true] or raises [e] if such element as not been found. *) + + val findi : (int -> 'a -> bool) -> 'a list -> (int * 'a) + (** [findi p e l] returns the first element [ai] of [l] along with its + index [i] such that [p i ai] is true, or raises [Not_found] if no + such element has been found. *) + + val unique : ?cmp:('a -> 'a -> bool) -> 'a list -> 'a list + (** [unique cmp l] returns the list [l] without any duplicate element. + Default comparator ( = ) is used if no comparison function specified. *) + + val filter_map : ('a -> 'b option) -> 'a list -> 'b list + (** [filter_map f l] call [(f a0) (f a1).... (f an)] where [a0..an] are + the elements of [l]. It returns the list of elements [bi] such as + [f ai = Some bi] (when [f] returns [None], the corresponding element of + [l] is discarded). *) + + val find_map : ('a -> 'b option) -> 'a list -> 'b + (** [find_map pred list] finds the first element of [list] for which + [pred element] returns [Some r]. It returns [r] immediately + once found or raises [Not_found] if no element matches the + predicate. See also {!filter_map}. *) + + val split_nth : int -> 'a list -> 'a list * 'a list + (** [split_nth n l] returns two lists [l1] and [l2], [l1] containing the + first [n] elements of [l] and [l2] the others. Raise [Invalid_index] if + [n] is outside of [l] size bounds. *) + + val remove : 'a list -> 'a -> 'a list + (** [remove l x] returns the list [l] without the first element [x] found + or returns [l] if no element is equal to [x]. Elements are compared + using ( = ). *) + + val remove_if : ('a -> bool) -> 'a list -> 'a list + (** [remove_if cmp l] is similar to [remove], but with [cmp] used + instead of ( = ). *) + + val remove_all : 'a list -> 'a -> 'a list + (** [remove_all l x] is similar to [remove] but removes all elements that + are equal to [x] and not only the first one. *) + + val take : int -> 'a list -> 'a list + (** [take n l] returns up to the [n] first elements from list [l], if + available. *) + + val drop : int -> 'a list -> 'a list + (** [drop n l] returns [l] without the first [n] elements, or the empty + list if [l] have less than [n] elements. *) + + val takewhile : ('a -> bool) -> 'a list -> 'a list + (** [takewhile f xs] returns the first elements of list [xs] + which satisfy the predicate [f]. *) + + val dropwhile : ('a -> bool) -> 'a list -> 'a list + (** [dropwhile f xs] returns the list [xs] with the first + elements satisfying the predicate [f] dropped. *) + + (** {6 Enum functions} *) + + (** Enumerations are important in ExtLib, they are a good way to work with + abstract enumeration of elements, regardless if they are located in a list, + an array, or a file. *) + + val enum : 'a list -> 'a Enum.t + (** Returns an enumeration of the elements of a list. *) + + val of_enum : 'a Enum.t -> 'a list + (** Build a list from an enumeration. *) + + (** {6 Compatibility functions} *) + + val cons : 'a -> 'a list -> 'a list + + val assoc_opt : 'a -> ('a * 'b) list -> 'b option + val assq_opt : 'a -> ('a * 'b) list -> 'b option + + val find_opt : ('a -> bool) -> 'a list -> 'a option + val nth_opt : 'a list -> int -> 'a option + + val compare_lengths : 'a list -> 'b list -> int + val compare_length_with : 'a list -> int -> int + + (** {6 Modified functions} *) + + (** Some minor modifications have been made to the specification of some + functions, especially concerning exceptions raised. *) + + val hd : 'a list -> 'a + (** Returns the first element of the list or raise [Empty_list] if the + list is empty. *) + + val tl : 'a list -> 'a list + (** Returns the list without its first elements or raise [Empty_list] if + the list is empty. *) + + val nth : 'a list -> int -> 'a + (** [nth l n] returns the n-th element of the list [l] or raise + [Invalid_index] is the index is outside of [l] bounds. *) + + val sort : ?cmp:('a -> 'a -> int) -> 'a list -> 'a list + (** Sort the list using optional comparator (by default [compare]). *) + + (** The following functions have been improved so all of them are + tail-recursive. They have also been modified so they no longer + raise [Invalid_arg] but [Different_list_size] when used on two + lists having a different number of elements. *) + + val map2 : ('a -> 'b -> 'c) -> 'a list -> 'b list -> 'c list + val rev_map2 : ('a -> 'b -> 'c) -> 'a list -> 'b list -> 'c list + val iter2 : ('a -> 'b -> unit) -> 'a list -> 'b list -> unit + val fold_left2 : ('a -> 'b -> 'c -> 'a) -> 'a -> 'b list -> 'c list -> 'a + val fold_right2 : ('a -> 'b -> 'c -> 'c) -> 'a list -> 'b list -> 'c -> 'c + val for_all2 : ('a -> 'b -> bool) -> 'a list -> 'b list -> bool + val exists2 : ('a -> 'b -> bool) -> 'a list -> 'b list -> bool + val combine : 'a list -> 'b list -> ('a * 'b) list + + + (** {6 Improved functions} *) + + (** The following functions have the same behavior as the [List] + module ones but are tail-recursive. That means they will not + cause a [Stack_overflow] when used on very long list. + + The implementation might be a little more slow in bytecode, + but compiling in native code will not affect performances. *) + + val map : ('a -> 'b) -> 'a list -> 'b list + val append : 'a list -> 'a list -> 'a list + val flatten : 'a list list -> 'a list + val concat : 'a list list -> 'a list + val fold_right : ('a -> 'b -> 'b) -> 'a list -> 'b -> 'b + val remove_assoc : 'a -> ('a * 'b) list -> ('a * 'b) list + val remove_assq : 'a -> ('a * 'b) list -> ('a * 'b) list + val split : ('a * 'b) list -> 'a list * 'b list + + (** The following functions were already tail-recursive in the [List] + module but were using [List.rev] calls. The new implementations + have better performances. *) + + val filter : ('a -> bool) -> 'a list -> 'a list + val find_all : ('a -> bool) -> 'a list -> 'a list + val partition : ('a -> bool) -> 'a list -> 'a list * 'a list + + (** {6 Older functions} *) + + (** These functions are already part of the Ocaml standard library + and have not been modified. Please refer to the Ocaml Manual for + documentation. *) + + val length : 'a list -> int + val rev_append : 'a list -> 'a list -> 'a list + val rev : 'a list -> 'a list + val rev_map : ('a -> 'b) -> 'a list -> 'b list + val iter : ('a -> unit) -> 'a list -> unit + val fold_left : ('b -> 'a -> 'b) -> 'b -> 'a list -> 'b + val for_all : ('a -> bool) -> 'a list -> bool + val exists : ('a -> bool) -> 'a list -> bool + val find : ('a -> bool) -> 'a list -> 'a + + val mem : 'a -> 'a list -> bool + val memq : 'a -> 'a list -> bool + val assoc : 'a -> ('a * 'b) list -> 'b + val assq : 'a -> ('a * 'b) list -> 'b + val mem_assoc : 'a -> ('a * 'b) list -> bool + val mem_assq : 'a -> ('a * 'b) list -> bool + + + val stable_sort : ('a -> 'a -> int) -> 'a list -> 'a list + val fast_sort : ('a -> 'a -> int) -> 'a list -> 'a list + val merge : ('a -> 'a -> int) -> 'a list -> 'a list -> 'a list + +#if OCAML >= 402 + val sort_uniq : ('a -> 'a -> int) -> 'a list -> 'a list + (** Same as {!List.sort}, but also remove duplicates. + @since 4.02.0 *) +#endif + +#if OCAML >= 407 + (** [*_seq] functions were introduced in OCaml 4.07.0, and are _not_ implemented in extlib for older OCaml versions *) + val to_seq : 'a list -> 'a Seq.t + val of_seq : 'a Seq.t -> 'a list +#endif + + (** {6 Exceptions} *) + + exception Empty_list + (** [Empty_list] is raised when an operation applied on an empty list + is invalid : [hd] for example. *) + + exception Invalid_index of int + (** [Invalid_index] is raised when an indexed access on a list is + out of list bounds. *) + + exception Different_list_size of string + (** [Different_list_size] is raised when applying functions such as + [iter2] on two lists having different size. *) + + +end + +val ( @ ) : 'a list -> 'a list -> 'a list +(** the new implementation for ( @ ) operator, see [List.append]. *) diff --git a/src/extString.ml b/src/extString.ml new file mode 100644 index 0000000..b000dd4 --- /dev/null +++ b/src/extString.ml @@ -0,0 +1,333 @@ +(* + * ExtString - Additional functions for string manipulations. + * Copyright (C) 2003 Nicolas Cannasse + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version, + * with the special exception on linking described in file LICENSE. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + *) + +exception Invalid_string + +open ExtBytes + +module String = struct + +include String + +#if OCAML < 402 +let init len f = + let s = Bytes.create len in + for i = 0 to len - 1 do + Bytes.unsafe_set s i (f i) + done; + (* 's' doesn't escape and will never be mutated again *) + Bytes.unsafe_to_string s +#endif + +let starts_with str p = + if length str < length p then + false + else + let rec loop str p i = + if i = length p then true else + if unsafe_get str i <> unsafe_get p i then false + else loop str p (i+1) + in + loop str p 0 + +let ends_with s e = + if length s < length e then + false + else + let rec loop s e i = + if i = length e then true else + if unsafe_get s (length s - length e + i) <> unsafe_get e i then false + else loop s e (i+1) + in + loop s e 0 + +let find_from str pos sub = + let sublen = length sub in + if sublen = 0 then + 0 + else + let found = ref 0 in + let len = length str in + try + for i = pos to len - sublen do + let j = ref 0 in + while unsafe_get str (i + !j) = unsafe_get sub !j do + incr j; + if !j = sublen then begin found := i; raise Exit; end; + done; + done; + raise Invalid_string + with + Exit -> !found + +let find str sub = find_from str 0 sub + +let exists str sub = + try + ignore(find str sub); + true + with + Invalid_string -> false + +let strip ?(chars=" \t\r\n") s = + let p = ref 0 in + let l = length s in + while !p < l && contains chars (unsafe_get s !p) do + incr p; + done; + let p = !p in + let l = ref (l - 1) in + while !l >= p && contains chars (unsafe_get s !l) do + decr l; + done; + sub s p (!l - p + 1) + +#if OCAML < 400 +let trim s = strip ~chars:" \t\r\n\012" s +#endif + +let split str sep = + let p = find str sep in + let len = length sep in + let slen = length str in + sub str 0 p, sub str (p + len) (slen - p - len) + +let nsplit str sep = + if str = "" then [] + else if sep = "" then raise Invalid_string + else + let rec loop acc pos = + if pos > String.length str then + List.rev acc + else + let i = try find_from str pos sep with Invalid_string -> String.length str in + loop (String.sub str pos (i - pos) :: acc) (i + String.length sep) + in + loop [] 0 + +let join = concat + +let slice = + let clip max x = if x > max then max else if x < 0 then 0 else x in + fun ?(first=0) ?(last=Sys.max_string_length) s -> + let len = String.length s in + let i = if first = 0 then 0 else clip len (if first < 0 then len + first else first) in + let j = if last = Sys.max_string_length then len else clip len (if last < 0 then len + last else last) in + if i>=j || i=len then + make 0 ' ' + else + sub s i (j-i) + +let lchop s = + if s = "" then "" else sub s 1 (length s - 1) + +let rchop s = + if s = "" then "" else sub s 0 (length s - 1) + +let of_int = string_of_int + +let of_float = string_of_float + +let of_char = make 1 + +let to_int s = + try + int_of_string s + with + _ -> raise Invalid_string + +let to_float s = + try + float_of_string s + with + _ -> raise Invalid_string + +let enum s = + let l = length s in + let rec make i = + Enum.make + ~next:(fun () -> + if !i = l then + raise Enum.No_more_elements + else + let p = !i in + incr i; + unsafe_get s p + ) + ~count:(fun () -> l - !i) + ~clone:(fun () -> make (ref !i)) + in + make (ref 0) + +let of_enum e = + let l = Enum.count e in + let s = Bytes.create l in + let i = ref 0 in + Enum.iter (fun c -> Bytes.unsafe_set s !i c; incr i) e; + (* 's' doesn't escape and will never be mutated again *) + Bytes.unsafe_to_string s + +#if OCAML < 400 +let map f s = + let len = length s in + let sc = Bytes.create len in + for i = 0 to len - 1 do + Bytes.unsafe_set sc i (f (unsafe_get s i)) + done; + (* 'sc' doesn't escape and will never be mutated again *) + Bytes.unsafe_to_string sc +#endif + +#if OCAML < 402 +let mapi f s = + let len = length s in + let sc = Bytes.create len in + for i = 0 to len - 1 do + Bytes.unsafe_set sc i (f i (unsafe_get s i)) + done; + (* 'sc' doesn't escape and will never be mutated again *) + Bytes.unsafe_to_string sc +#endif + +#if OCAML < 400 +let iteri f s = + for i = 0 to length s - 1 do + let () = f i (unsafe_get s i) in () + done +#endif + +(* fold_left and fold_right by Eric C. Cooper *) +let fold_left f init str = + let n = String.length str in + let rec loop i result = + if i = n then result + else loop (i + 1) (f result str.[i]) + in + loop 0 init + +let fold_right f str init = + let n = String.length str in + let rec loop i result = + if i = 0 then result + else + let i' = i - 1 in + loop i' (f str.[i'] result) + in + loop n init + +(* explode and implode from the OCaml Expert FAQ. *) +let explode s = + let rec exp i l = + if i < 0 then l else exp (i - 1) (s.[i] :: l) in + exp (String.length s - 1) [] + +let implode l = + let res = Bytes.create (List.length l) in + let rec imp i = function + | [] -> res + | c :: l -> Bytes.set res i c; imp (i + 1) l in + let s = imp 0 l in + (* 's' doesn't escape and will never be mutated again *) + Bytes.unsafe_to_string s + +let replace_chars f s = + let len = String.length s in + let tlen = ref 0 in + let rec loop i acc = + if i = len then + acc + else + let s = f (unsafe_get s i) in + tlen := !tlen + length s; + loop (i+1) (s :: acc) + in + let strs = loop 0 [] in + let sbuf = Bytes.create !tlen in + let pos = ref !tlen in + let rec loop2 = function + | [] -> () + | s :: acc -> + let len = length s in + pos := !pos - len; + blit s 0 sbuf !pos len; + loop2 acc + in + loop2 strs; + (* 'sbuf' doesn't escape and will never be mutated again *) + Bytes.unsafe_to_string sbuf + +let replace ~str ~sub ~by = + try + let i = find str sub in + (true, (slice ~last:i str) ^ by ^ + (slice ~first:(i+(String.length sub)) str)) + with + Invalid_string -> (false, String.sub str 0 (String.length str)) + +#if OCAML < 403 +let uppercase_ascii = uppercase +let lowercase_ascii = lowercase +let capitalize_ascii = capitalize +let uncapitalize_ascii = uncapitalize + +let equal = (=) +#endif + +#if OCAML < 404 +let split_on_char sep s = + let r = ref [] in + let j = ref (length s) in + for i = length s - 1 downto 0 do + if unsafe_get s i = sep then begin + r := sub s (i + 1) (!j - i - 1) :: !r; + j := i + end + done; + sub s 0 !j :: !r +#endif + +#if OCAML < 405 + +let rec index_rec_opt s lim i c = + if i >= lim then None else + if unsafe_get s i = c then Some i else index_rec_opt s lim (i + 1) c + +let index_opt s c = index_rec_opt s (length s) 0 c + +let index_from_opt s i c = + let l = length s in + if i < 0 || i > l then invalid_arg "ExtString.index_from_opt" else + index_rec_opt s l i c + +let rec rindex_rec_opt s i c = + if i < 0 then None else + if unsafe_get s i = c then Some i else rindex_rec_opt s (i - 1) c + +let rindex_opt s c = rindex_rec_opt s (length s - 1) c + +let rindex_from_opt s i c = + if i < -1 || i >= length s then + invalid_arg "ExtString.rindex_from_opt" + else + rindex_rec_opt s i c + +#endif + +end diff --git a/src/extString.mli b/src/extString.mli new file mode 100644 index 0000000..a92a583 --- /dev/null +++ b/src/extString.mli @@ -0,0 +1,223 @@ +(* + * ExtString - Additional functions for string manipulations. + * Copyright (C) 2003 Nicolas Cannasse + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version, + * with the special exception on linking described in file LICENSE. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + *) + +(** Additional functions for string manipulations. *) + +open ExtBytes + +exception Invalid_string + +module String : + sig + + (** {6 New Functions} *) + + val init : int -> (int -> char) -> string + (** [init l f] returns the string of length [l] with the chars + f 0 , f 1 , f 2 ... f (l-1). *) + + val find : string -> string -> int + (** [find s x] returns the starting index of the string [x] + within the string [s] or raises [Invalid_string] if [x] + is not a substring of [s]. *) + + val find_from : string -> int -> string -> int + (** [find s i x] returns the starting index of the string [x] + within the string [s] (starting search from position [i]) or + raises [Invalid_string] if no such substring exists. + [find s x] is equivalent to [find_from s 0 x]. *) + + val split : string -> string -> string * string + (** [split s sep] splits the string [s] between the first + occurrence of [sep]. + raises [Invalid_string] if the separator is not found. *) + + val nsplit : string -> string -> string list + (** [nsplit s sep] splits the string [s] into a list of strings + which are separated by [sep]. + [nsplit "" _] returns the empty list. + @raise Invalid_string if [sep] is empty string. *) + + val join : string -> string list -> string + (** Same as [concat] *) + + val slice : ?first:int -> ?last:int -> string -> string + (** [slice ?first ?last s] returns a "slice" of the string + which corresponds to the characters [s.[first]], + [s.[first+1]], ..., [s[last-1]]. Note that the character at + index [last] is {b not} included! If [first] is omitted it + defaults to the start of the string, i.e. index 0, and if + [last] is omitted is defaults to point just past the end of + [s], i.e. [length s]. Thus, [slice s] is equivalent to + [copy s]. + + Negative indexes are interpreted as counting from the end of + the string. For example, [slice ~last:-2 s] will return the + string [s], but without the last two characters. + + This function {b never} raises any exceptions. If the + indexes are out of bounds they are automatically clipped. + *) + + val lchop : string -> string + (** Returns the same string but without the first character. + does nothing if the string is empty. *) + + val rchop : string -> string + (** Returns the same string but without the last character. + does nothing if the string is empty. *) + + val of_int : int -> string + (** Returns the string representation of an int. *) + + val of_float : float -> string + (** Returns the string representation of an float. *) + + val of_char : char -> string + (** Returns a string containing one given character. *) + + val to_int : string -> int + (** Returns the integer represented by the given string or + raises [Invalid_string] if the string does not represent an integer.*) + + val to_float : string -> float + (** Returns the float represented by the given string or + raises Invalid_string if the string does not represent a float. *) + + val ends_with : string -> string -> bool + (** [ends_with s x] returns true if the string [s] is ending with [x]. *) + + val starts_with : string -> string -> bool + (** [starts_with s x] return true if [s] is starting with [x]. *) + + val enum : string -> char Enum.t + (** Returns an enumeration of the characters of a string.*) + + val of_enum : char Enum.t -> string + (** Creates a string from a character enumeration. *) + + val map : (char -> char) -> string -> string + (** [map f s] returns a string where all characters [c] in [s] have been + replaced by [f c]. **) + + val mapi : (int -> char -> char) -> string -> string + (** [map f s] returns a string where all characters [c] in [s] have been replaced by [f i s.\[i\]]. **) + + val iteri : (int -> char -> unit) -> string -> unit + (** Call [f i s.\[i\]] for every position [i] in string *) + + val fold_left : ('a -> char -> 'a) -> 'a -> string -> 'a + (** [fold_left f a s] is + [f (... (f (f a s.[0]) s.[1]) ...) s.[n-1]] *) + val fold_right : (char -> 'a -> 'a) -> string -> 'a -> 'a + (** [fold_right f s b] is + [f s.[0] (f s.[1] (... (f s.[n-1] b) ...))] *) + + val explode : string -> char list + (** [explode s] returns the list of characters in the string [s]. *) + val implode : char list -> string + (** [implode cs] returns a string resulting from concatenating + the characters in the list [cs]. *) + + val strip : ?chars:string -> string -> string + (** Returns the string without the chars if they are at the beginning or + at the end of the string. By default chars are " \t\r\n". *) + + val exists : string -> string -> bool + (** [exists str sub] returns true if [sub] is a substring of [str] or + false otherwise. *) + + val replace_chars : (char -> string) -> string -> string + (** [replace_chars f s] returns a string where all chars [c] of [s] have been + replaced by the string returned by [f c]. *) + + val replace : str:string -> sub:string -> by:string -> bool * string + (** [replace ~str ~sub ~by] returns a tuple constisting of a boolean + and a string where the first occurrence of the string [sub] + within [str] has been replaced by the string [by]. The boolean + is true if a subtitution has taken place. *) + + (** Return a copy of the argument, without leading and trailing + whitespace. The characters regarded as whitespace are: [' '], + ['\012'], ['\n'], ['\r'], and ['\t']. + (Note that it is different from {!strip} defaults). *) + val trim : string -> string + + (** {6 Compatibility Functions} *) + + val uppercase_ascii : string -> string + val lowercase_ascii : string -> string + val capitalize_ascii : string -> string + val uncapitalize_ascii : string -> string + + val split_on_char : char -> string -> string list + + (** {6 Older Functions} *) + + (** Please refer to the Ocaml Manual for documentation of these + functions. *) + + val length : string -> int + val get : string -> int -> char + val set : Bytes.t -> int -> char -> unit + val create : int -> Bytes.t + val make : int -> char -> string + val copy : string -> string + val sub : string -> int -> int -> string + val fill : Bytes.t -> int -> int -> char -> unit + val blit : string -> int -> Bytes.t -> int -> int -> unit + val concat : string -> string list -> string + val iter : (char -> unit) -> string -> unit + val escaped : string -> string + val index : string -> char -> int + val index_opt : string -> char -> int option + val rindex : string -> char -> int + val rindex_opt : string -> char -> int option + val index_from : string -> int -> char -> int + val index_from_opt : string -> int -> char -> int option + val rindex_from : string -> int -> char -> int + val rindex_from_opt : string -> int -> char -> int option + val contains : string -> char -> bool + val contains_from : string -> int -> char -> bool + val rcontains_from : string -> int -> char -> bool + val uppercase : string -> string + val lowercase : string -> string + val capitalize : string -> string + val uncapitalize : string -> string + + type t = string + val compare : t -> t -> int + val equal : t -> t -> bool + +#if OCAML >= 407 + (** [*_seq] functions were introduced in OCaml 4.07.0, and are _not_ implemented in extlib for older OCaml versions *) + val to_seq : t -> char Seq.t + val to_seqi : t -> (int * char) Seq.t + val of_seq : char Seq.t -> t +#endif + + (**/**) + + external unsafe_get : string -> int -> char = "%string_unsafe_get" + val unsafe_set : Bytes.t -> int -> char -> unit + val unsafe_blit : string -> int -> Bytes.t -> int -> int -> unit + val unsafe_fill : Bytes.t -> int -> int -> char -> unit + + end diff --git a/src/global.ml b/src/global.ml new file mode 100644 index 0000000..b15aaf0 --- /dev/null +++ b/src/global.ml @@ -0,0 +1,40 @@ +(* + * Global - Mutable global variable + * Copyright (C) 2003 Nicolas Cannasse + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version, + * with the special exception on linking described in file LICENSE. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + *) + +exception Global_not_initialized of string + +type 'a t = ('a option ref * string) + +let empty name = ref None,name + +let name = snd + +let set (r,_) v = r := Some v + +let get (r,name) = + match !r with + | None -> raise (Global_not_initialized name) + | Some v -> v + +let undef (r,_) = r := None + +let isdef (r,_) = !r <> None + +let opt (r,_) = !r diff --git a/src/global.mli b/src/global.mli new file mode 100644 index 0000000..584c49e --- /dev/null +++ b/src/global.mli @@ -0,0 +1,58 @@ +(* + * Global - Mutable global variable + * Copyright (C) 2003 Nicolas Cannasse + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version, + * with the special exception on linking described in file LICENSE. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + *) + +(** Mutable global variable. + + Often in OCaml you want to have a global variable, which is mutable + and uninitialized when declared. You can use a ['a option ref] but + this is not very convenient. The Global module provides functions + to easily create and manipulate such variables. +*) + +type 'a t +(** Abstract type of a global *) + +exception Global_not_initialized of string +(** Raised when a global variable is accessed without first having been + assigned a value. The parameter contains the name of the global. *) + +val empty : string -> 'a t +(** Returns an new named empty global. The name of the global can be any + string. It identifies the global and makes debugging easier. *) + +val name : 'a t -> string +(** Retrieve the name of a global. *) + +val set : 'a t -> 'a -> unit +(** Set the global value contents. *) + +val get : 'a t -> 'a +(** Get the global value contents - raise Global_not_initialized if not + defined. *) + +val undef : 'a t -> unit +(** Reset the global value contents to undefined. *) + +val isdef : 'a t -> bool + (** Return [true] if the global value has been set. *) + +val opt : 'a t -> 'a option + (** Return [None] if the global is undefined, else [Some v] where v is the + current global value contents. *) diff --git a/src/install.ml b/src/install.ml new file mode 100644 index 0000000..5406cbe --- /dev/null +++ b/src/install.ml @@ -0,0 +1,262 @@ +(* + * Install - ExtLib installation + * Copyright (C) 2003 Nicolas Cannasse + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version, + * with the special exception on linking described in file LICENSE. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + *) + +open Printf + +type path = + | PathUnix + | PathDos + +let modules_min = [ + "extBytes"; + "enum"; + "bitSet"; + "dynArray"; + "extArray"; + "extHashtbl"; + "extList"; + "extString"; + "global"; + "extBuffer"; + "IO"; + "option"; + "pMap"; + "std"; + "base64"; + "refList"; + "optParse"; + "dllist"; +] + +let modules_compat = [ + "uChar"; + "uTF8"; + "unzip"; +] + +(* ocaml/mingw uses unix extensions but will have Sys.os_type = "Win32" :( *) +let obj_ext , lib_ext , cp_cmd , path_type = match Sys.os_type with + | "Unix" | "Cygwin" | "MacOS" -> ".o" , ".a" , "cp", PathUnix + | "Win32" -> ".obj" , ".lib" , "copy", PathDos + | _ -> failwith "Unknown OS" + +let run cmd = + print_endline cmd; + let ecode = Sys.command cmd in + if ecode <> 0 then failwith (sprintf "Exit Code %d - Stopped" ecode) + +let copy file dest = + if dest <> "" && dest <> "." then begin + print_endline ("Installing " ^ file); + let path = dest ^ file in + (try Sys.remove path with _ -> ()); + try + Sys.rename file path; + with + _ -> failwith "Aborted" + end + +let get_version () = + let ch = open_in "../Makefile" in + let rec loop () = + let s = input_line ch in + try + Scanf.sscanf s " RELEASE := %s %!" (fun s -> s) + with + _ -> loop () + in + try + let s = loop () in close_in_noerr ch; s + with _ -> + close_in_noerr ch; + failwith "No RELEASE present in ../Makefile" + +let complete_path p = + if p = "" then + p + else + let c = p.[String.length p - 1] in + if c = '/' || c = '\\' then + p + else + p ^ (match path_type with PathUnix -> "/" | PathDos -> "\\") + +let remove file = + try + Sys.remove file + with + _ -> prerr_endline ("Warning : failed to delete " ^ file) + +let is_findlib() = + let findlib = Sys.command (if Sys.os_type = "Win32" then "ocamlfind printconf 2>NUL" else "ocamlfind printconf") = 0 in + if findlib then print_endline "Using Findlib"; + findlib + +type install_dir = Findlib | Dir of string + +let install() = + let autodir = ref None in + let autodoc = ref None in + let autobyte = ref false in + let autonative = ref false in + let autofull = ref None in + let version = get_version () in + let usage = sprintf "ExtLib installation program v%s\n(C) 2003 Nicolas Cannasse" version in + Arg.parse [ + ("-d", Arg.String (fun s -> autodir := Some s) , " : install in target directory"); + ("-b", Arg.Unit (fun () -> autobyte := true) , ": byte code installation"); + ("-n", Arg.Unit (fun () -> autonative := true) , ": native code installation"); + ("-min", Arg.Unit (fun () -> autofull := Some false) , ": exclude potentially conflicting modules (recommended)"); + ("-full", Arg.Unit (fun () -> autofull := Some true) , ": include all modules (compatibility)"); + ("-doc", Arg.Unit (fun () -> autodoc := Some true) , ": documentation installation"); + ("-nodoc", Arg.Unit (fun () -> autodoc := Some false) , ": disable documentation installation"); + ] (fun s -> raise (Arg.Bad s)) usage; + let findlib = is_findlib () in + let install_dir = ( + match !autodir with + | Some dir -> + if not !autobyte && not !autonative && !autodoc = None then failwith "Nothing to do."; + Dir (complete_path dir) + | None -> + let byte, native = + if !autobyte || !autonative then + (!autobyte, !autonative) + else begin + printf "Choose one of the following :\n1- Bytecode installation only\n2- Native installation only\n[3]- Both Native and Bytecode installation\n> "; + (match read_line() with + | "1" -> true, false + | "2" -> false, true + | "" | "3" -> true, true + | _ -> failwith "Invalid choice, exit.") + end + in + let dest = + if not findlib then begin + printf "Choose installation directory :\n> "; + let dest = complete_path (read_line()) in + (try + close_out (open_out (dest ^ "test.file")); + Sys.remove (dest ^ "test.file"); + with + _ -> failwith ("Directory " ^ dest ^ " does not exists or cannot be written.")); + Dir dest; + end else Findlib in + autobyte := byte; + autonative := native; + dest + ) in + let modules = + let full = + match !autofull with + | Some f -> f + | None -> + printf "Do you want to exclude potentially conflicting modules (Unzip UChar UTF8) from build ([Y]/N) ?\n> "; + (match read_line() with + | "" | "y" | "Y" -> false + | "n" | "N" -> true + | _ -> failwith "Invalid choice, exit.") + in + match full with + | true -> modules_min @ modules_compat + | false -> modules_min + in + let m_list suffix = String.concat " " (List.map (fun m -> m ^ "." ^ suffix) modules) in + let doc = + match !autodoc with + | Some doc -> doc + | None -> + printf "Do you want to generate ocamldoc documentation ([Y]/N) ?\n> "; + (match read_line() with + | "" | "y" | "Y" -> true + | "n" | "N" -> false + | _ -> failwith "Invalid choice, exit.") + in + let doc_dir = + match install_dir with + | Findlib -> "doc" + | Dir install_dir -> Filename.concat install_dir "extlib-doc" + in + if doc && not (Sys.file_exists doc_dir) then run (sprintf "mkdir %s" doc_dir); + (* generate *) + let defines = + let version = Scanf.sscanf Sys.ocaml_version "%d.%d." (fun major minor -> major * 100 + minor) in + sprintf "-D 'OCAML %d' %s" version (if Sys.word_size = 32 then "-D WORD_SIZE_32 " else ""); + in + let pp = sprintf "-pp \"cppo %s\"" defines in + let ocamlc fmt = ksprintf (fun s -> run (sprintf "ocamlc %s %s" pp s)) fmt in + let ocamlopt fmt = ksprintf (fun s -> run (sprintf "ocamlopt %s %s" pp s)) fmt in + ocamlc "-i extBytes.ml > extBytes.mli"; + (* compile mli *) + ocamlc "-c %s" (m_list "mli"); + (* compile ml *) + if !autobyte then begin + List.iter (fun m -> ocamlc "-g -c %s.ml" m) modules; + ocamlc "-g -a -o extLib.cma %s extLib.ml" (m_list "cmo"); + List.iter (fun m -> remove (m ^ ".cmo")) modules; + remove "extLib.cmo"; + end; + if !autonative then begin + List.iter (fun m -> ocamlopt "-g -c %s.ml" m) modules; + ocamlopt "-g -a -o extLib.cmxa %s extLib.ml" (m_list "cmx"); + List.iter (fun m -> remove (m ^ obj_ext)) modules; + remove ("extLib" ^ obj_ext); + end; + if doc then begin + run (sprintf "ocamldoc %s -sort -html -d %s %s" pp doc_dir (m_list "mli")); + if doc_dir <> "doc" then (* style.css is already there *) + run ((match path_type with + | PathDos -> sprintf "%s doc\\style.css %s\\style.css"; + | PathUnix -> sprintf "%s doc/style.css %s/style.css") cp_cmd doc_dir); + end; + match install_dir with + Findlib -> + let files = Buffer.create 0 in + List.iter (fun m -> + Buffer.add_string files (m ^ ".cmi "); + Buffer.add_string files (m ^ ".mli ")) + modules; + Buffer.add_string files "extLib.cmi "; + if !autobyte then Buffer.add_string files "extLib.cma "; + if !autonative then begin + Buffer.add_string files "extLib.cmxa extLib.cmx "; + List.iter (fun m -> Buffer.add_string files (m ^ ".cmx ")) modules; + end; + let optional = if !autonative then "-optional extLib.lib extLib.a" else "" in + let files = Buffer.contents files in + run (sprintf "ocamlfind install -patch-version %s extlib META %s %s" version files optional); + | Dir install_dir -> + List.iter (fun m -> + copy (m ^ ".cmi") install_dir; + if !autonative then copy (m ^ ".cmx") install_dir + ) ("extLib" :: modules); + if !autobyte then copy "extLib.cma" install_dir; + if !autonative then begin + copy "extLib.cmxa" install_dir; + copy ("extLib" ^ lib_ext) install_dir; + end; +;; +try + print_endline "\nATTENTION! install.ml is deprecated and will be removed in next release. Please use Makefile instead.\n"; + install(); + print_endline "Done."; +with + Failure msg -> + prerr_endline msg; + exit 1 diff --git a/src/optParse.ml b/src/optParse.ml new file mode 100644 index 0000000..a3d8ef3 --- /dev/null +++ b/src/optParse.ml @@ -0,0 +1,722 @@ +(* + * optParse - Functions for parsing command line arguments. + * Copyright (C) 2004 Bardur Arantsson + * + * Heavily influenced by the optparse.py module from the Python + * standard library, but with lots of adaptation to the 'Ocaml Way' + * + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version, + * with the special exception on linking described in file LICENSE. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + *) +open Printf +open ExtString +open ExtList + + +let terminal_width = + try + int_of_string (Sys.getenv "COLUMNS") (* Might as well use it if it's there... *) + with + Failure _ -> 80 + | Not_found -> 80 + +module GetOpt = + struct + + type action = string -> string list -> unit + type long_opt = string * int * action + type short_opt = char * int * action + + exception Error of (string * string) + + let split1 haystack needle = + try + let (h, x) = String.split haystack needle in h, [x] + with + Invalid_string -> haystack, [] + + let find_opt format_name options s = + let rec loop l = + match l with + (x, y, z) :: t -> if x = s then x, y, z else loop t + | [] -> raise (Error (format_name s, "no such option")) + in + loop options + + let find_short_opt options = find_opt (fun c -> sprintf "-%c" c) options + + let find_long_opt options = find_opt (fun s -> "--" ^ s) options + + let parse other find_short_opt find_long_opt args = + let rec loop args = + let rec gather_args name n args = + try + List.split_nth n args + with + List.Invalid_index _ -> + raise (Error (name, "missing required arguments")) + in + let gather_long_opt s args = + let (h, t) = split1 s "=" in + let (_, nargs, action) = find_long_opt (String.slice ~first:2 h) in + let (accum, args') = gather_args h (nargs - List.length t) args in + action h (t @ accum); args' + in + let rec gather_short_opt_concat seen_args s k args = + if k < String.length s then + let ostr = sprintf "-%c" s.[k] + and (_, nargs, action) = find_short_opt s.[k] in + if nargs = 0 then + begin + action ostr []; + gather_short_opt_concat seen_args s (k + 1) args + end + else if not seen_args then + let (accum, args') = gather_args ostr nargs args in + action ostr accum; gather_short_opt_concat true s (k + 1) args' + else + raise + (Error + (sprintf "-%c" s.[k], + sprintf "option list '%s' already contains an option requiring an argument" + s)) + else args + in + let gather_short_opt s k args = + let ostr = sprintf "-%c" s.[k] in + let (_, nargs, action) = find_short_opt s.[k] in + if nargs = 0 then gather_short_opt_concat false s k args + else + let (accum, args') = + let h = String.slice ~first:(k+1) s in + if String.length h = 0 then gather_args ostr nargs args + else + let (t, args'') = gather_args ostr (nargs - 1) args in + h :: t, args'' + in + action ostr accum; args' + in + match args with + [] -> [] + | arg :: args' -> + if arg = "--" then args' + else if String.starts_with arg "--" then + loop (gather_long_opt arg args') + else if arg = "-" then begin other arg; loop args' end + else if String.starts_with arg "-" then + loop (gather_short_opt arg 1 args') + else begin other arg; loop args' end + in + let args' = loop args in List.iter other args' + end + + +module Opt = + struct + + exception No_value + exception Option_error of string * string + exception Option_help + + type 'a t = { + option_set : string -> string list -> unit; + option_set_value : 'a -> unit; + option_get : unit -> 'a option; + option_metavars : string list; + option_defhelp : string option + } + + let get opt = + match opt.option_get () with + Some x -> x + | None -> raise No_value + + let set opt v = + opt.option_set_value v + + let is_set opt = Option.is_some (opt.option_get ()) + + let opt opt = opt.option_get () + + let value_option metavar default coerce errfmt = + let data = ref default in + { + option_metavars = [metavar]; + option_defhelp = None; + option_get = (fun _ -> !data); + option_set_value = (fun x -> data := Some x); + option_set = + (fun option args -> + let arg = List.hd args in + try + data := Some (coerce arg) + with + exn -> raise (Option_error (option, errfmt exn arg))) + } + + let callback_option metavar coerce errfmt f = + { + option_metavars = [metavar]; + option_defhelp = None; + option_get = (fun _ -> Some ()); + option_set_value = (fun () -> ()); + option_set = + (fun option args -> + let arg = List.hd args in + let datum = ref None in + begin + try + datum := Some (coerce arg) + with + exn -> raise (Option_error (option, errfmt exn arg)) + end; + + Option.may f !datum) + } + end + +module StdOpt = + struct + + open Opt + + let store_const ?default const = + let data = ref default in + { + option_metavars = []; + option_defhelp = None; + option_get = (fun _ -> !data); + option_set_value = (fun x -> data := Some x); + option_set = fun _ _ -> data := Some const + } + + let store_true () = store_const ~default:false true + + let store_false () = store_const ~default:true false + + let int_option ?default ?(metavar = "INT") () = + value_option metavar default int_of_string + (fun _ s -> sprintf "invalid integer value '%s'" s) + + let int_callback ?(metavar = "INT") = + callback_option metavar int_of_string + (fun _ s -> sprintf "invalid integer value '%s'" s) + + let float_option ?default ?(metavar = "FLOAT") () = + value_option metavar default float_of_string + (fun _ s -> sprintf "invalid floating point value '%s'" s) + + let float_callback ?(metavar = "FLOAT") = + callback_option metavar float_of_string + (fun _ s -> sprintf "invalid floating point value '%s'" s) + + let str_option ?default ?(metavar = "STR") () = + value_option metavar default (fun s -> s) (fun _ _ -> "cannot happen") + + let str_callback ?(metavar = "STR") = + callback_option metavar (fun s -> s) (fun _ _ -> "cannot happen") + + let count_option ?(dest = ref 0) ?(increment = 1) () = + { + option_metavars = []; + option_defhelp = None; + option_get = (fun _ -> Some !dest); + option_set_value = (fun x -> dest := x); + option_set = fun _ _ -> dest := !dest + increment + } + + let incr_option ?(dest = ref 0) = + count_option ~dest ~increment:1 + + let decr_option ?(dest = ref 0) = + count_option ~dest ~increment:(-1) + + let help_option () = + { + option_metavars = []; + option_defhelp = Some "show this help message and exit"; + option_get = (fun _ -> raise No_value); + option_set_value = (fun _ -> ()); + option_set = fun _ _ -> raise Option_help + } + + let version_option vfunc = + { + option_metavars = []; + option_defhelp = Some "show program's version and exit"; + option_get = (fun _ -> raise No_value); + option_set_value = (fun _ -> ()); + option_set = fun _ _ -> print_endline (vfunc ()); exit 0 + } + end + + + + +module Formatter = + struct + + (* Note that the whitespace regexps must NOT treat the non-breaking + space character as whitespace. *) + let whitespace = "\t\n\013\014\r " + + let split_into_chunks s = + let buf = Buffer.create (String.length s) in + let flush () = + let s = Buffer.contents buf + in + Buffer.clear buf; + s + in + let rec loop state accum i = + if (i 0 then + loop (not state) (flush () :: accum) i + else + loop (not state) accum i + else + begin + Buffer.add_char buf s.[i]; + loop state accum (i+1) + end + else + if Buffer.length buf > 0 then + flush () :: accum + else + accum + in + List.rev (loop false [] 0) + + let is_whitespace s = + let rec loop i = + if i + let n = tab_size - col mod tab_size in + Buffer.add_string b (spaces n); + expand (i + 1) (col + n) + | '\n' -> + Buffer.add_string b "\n"; + expand (i + 1) 0 + | c -> + Buffer.add_char b c; + expand (i + 1) (col + 1) + in + expand 0 0; + Buffer.contents b + + let wrap ?(initial_indent = 0) ?(subsequent_indent = 0) text _width = + let wrap_chunks_line width acc = + let rec wrap (chunks, cur_line, cur_len) = + match chunks with + [] -> [], cur_line, cur_len + | hd :: tl -> + let l = String.length hd in + if cur_len + l <= width then + wrap (tl, hd :: cur_line, cur_len + l) + else chunks, cur_line, cur_len + in + wrap acc + in + let wrap_long_last_word width (chunks, cur_line, cur_len) = + match chunks with + [] -> [], cur_line, cur_len + | hd :: tl -> + let l = String.length hd in + if l > width then + match cur_line with + [] -> tl, [hd], cur_len + l + | _ -> chunks, cur_line, cur_len + else chunks, cur_line, cur_len + in + let wrap_remove_last_ws (chunks, cur_line, cur_len) = + match cur_line with + [] -> chunks, cur_line, cur_len + | hd :: tl -> + if is_whitespace hd then chunks, tl, cur_len - String.length hd + else chunks, cur_line, cur_len + in + let rec wrap_chunks_lines chunks lines = + let indent = + match lines with + [] -> initial_indent + | _ -> subsequent_indent + in + let width = _width - indent in + match chunks with + hd :: tl -> + if is_whitespace hd && lines <> [] then wrap_chunks_lines tl lines + else (* skip *) + let (chunks', cur_line, _) = + wrap_remove_last_ws + (wrap_long_last_word width + (wrap_chunks_line width (chunks, [], 0))) + in + wrap_chunks_lines chunks' + ((String.make indent ' ' ^ + String.concat "" (List.rev cur_line)) :: + lines) + | [] -> List.rev lines + in + let chunks = split_into_chunks (expand_tabs text) in + wrap_chunks_lines chunks [] + + + let fill ?(initial_indent = 0) ?(subsequent_indent = 0) text width = + String.concat "\n" (wrap ~initial_indent ~subsequent_indent text width) + + + + type t = { + indent : unit -> unit; + dedent : unit -> unit; + format_usage : string -> string; + format_heading : string -> string; + format_description : string -> string; + format_option : char list * string list -> string list -> + string option -> string + } + + let format_option_strings short_first (snames, lnames) metavars = + let metavar = String.concat " " metavars in + let lopts = + List.map + (match metavar with + "" -> (fun z -> sprintf "--%s" z) + | _ -> fun z -> sprintf "--%s=%s" z metavar) + lnames + and sopts = List.map (fun x -> sprintf "-%c%s" x metavar) snames in + match short_first with + true -> String.concat ", " (sopts @ lopts) + | false -> String.concat ", " (lopts @ sopts) + + + let indented_formatter ?level:(extlevel = ref 0) + ?indent:(extindent = ref 0) ?(indent_increment = 2) + ?(max_help_position = 24) ?(width = terminal_width - 1) + ?(short_first = true) () = + let indent = ref 0 + and level = ref 0 in + let help_position = ref max_help_position + and help_width = ref (width - max_help_position) in + { + indent = + (fun () -> + indent := !indent + indent_increment; + level := !level + 1; + extindent := !indent; + extlevel := !level); + + dedent = + (fun () -> + indent := !indent - indent_increment; + level := !level - 1; + assert (!level >= 0); + extindent := !indent; + extlevel := !level); + + format_usage = (fun usage -> sprintf "usage: %s\n" usage); + + format_heading = + (fun heading -> sprintf "%*s%s:\n\n" !indent "" heading); + + format_description = + (fun description -> + let x = + fill ~initial_indent:(!indent) ~subsequent_indent:(!indent) + description (width - !indent) + in + if not (String.ends_with x "\n") then x ^ "\n\n" else x ^ "\n"); + + format_option = + fun names metavars help -> + let opt_width = !help_position - !indent - 2 in + let opt_strings = + format_option_strings short_first names metavars + in + let buf = Buffer.create 256 in + let indent_first = + if String.length opt_strings > opt_width then + begin + bprintf buf "%*s%s\n" !indent "" opt_strings; !help_position + end + else + begin + bprintf buf "%*s%-*s " !indent "" opt_width opt_strings; 0 + end + in + Option.may + (fun option_help -> + let lines = wrap option_help !help_width in + match lines with + h :: t -> + bprintf buf "%*s%s\n" indent_first "" h; + List.iter + (fun x -> bprintf buf "%*s%s\n" !help_position "" x) t + | [] -> ()) + help; + + let contents = + Buffer.contents buf + in + if String.length contents > 0 && not (String.ends_with contents "\n") then + contents ^ "\n" + else + contents + } + + let titled_formatter ?(level = ref 0) ?(indent = ref 0) + ?(indent_increment = 0) ?(max_help_position = 24) + ?(width = terminal_width - 1) ?(short_first = true) + () = + let formatter = + indented_formatter ~level ~indent ~indent_increment ~max_help_position + ~width ~short_first () + in + let format_heading h = + let c = + match !level with + 0 -> '=' + | 1 -> '-' + | _ -> failwith "titled_formatter: Too much indentation" + in + sprintf "%*s%s\n%*s%s\n\n" !indent "" (String.capitalize h) !indent "" + (String.make (String.length h) c) + in + let format_usage usage = + sprintf "%s %s\n" (format_heading "Usage") usage + in + { formatter with + format_usage = format_usage; + format_heading = format_heading + } + end + + + +open Opt +open Formatter + +module OptParser = + struct + + exception Option_conflict of string + + type group = { + og_heading : string; + og_description : string option; + og_options : + ((char list * string list) * string list * string option) RefList.t; + og_children : group RefList.t + } + + type t = { + op_usage : string; + op_status : int; + op_suppress_usage : bool; + op_prog : string; + + op_formatter : Formatter.t; + + op_long_options : GetOpt.long_opt RefList.t; + op_short_options : GetOpt.short_opt RefList.t; + + op_groups : group + } + + let unprogify optparser s = + (snd (String.replace ~str:s ~sub:"%prog" ~by:optparser.op_prog)) + + let add optparser ?(group = optparser.op_groups) ?help ?(hide = false) + ?short_name ?(short_names = []) ?long_name ?(long_names = []) opt = + let lnames = + match long_name with + None -> long_names + | Some x -> x :: long_names + and snames = + match short_name with + None -> short_names + | Some x -> x :: short_names + in + if lnames = [] && snames = [] then + failwith "Options must have at least one name" + else + (* Checking for duplicates: *) + let snames' = + List.fold_left (fun r (x, _, _) -> x :: r) [] + (RefList.to_list optparser.op_short_options) + and lnames' = + List.fold_left (fun r (x, _, _) -> x :: r) [] + (RefList.to_list optparser.op_long_options) + in + let sconf = + List.filter (fun e -> List.exists (( = ) e) snames') snames + and lconf = + List.filter (fun e -> List.exists (( = ) e) lnames') lnames + in + if List.length sconf > 0 then + raise (Option_conflict (sprintf "-%c" (List.hd sconf))) + else if List.length lconf > 0 then + raise (Option_conflict (sprintf "--%s" (List.hd lconf))); + + (* Add to display list. *) + if not hide then + RefList.add group.og_options + ((snames, lnames), opt.option_metavars, + (match help with + None -> opt.option_defhelp + | Some _ -> help)); + + (* Getopt: *) + let nargs = List.length opt.option_metavars in + List.iter + (fun short -> + RefList.add optparser.op_short_options + (short, nargs, opt.option_set)) + snames; + List.iter + (fun long -> + RefList.add optparser.op_long_options + (long, nargs, opt.option_set)) + lnames + + let add_group optparser ?(parent = optparser.op_groups) ?description heading = + let g = + { + og_heading = heading; + og_description = description; + og_options = RefList.empty (); + og_children = RefList.empty () + } + in + RefList.add parent.og_children g; g + + let make ?(usage = "%prog [options]") ?(status = 1) ?description ?version + ?(suppress_usage = false) ?(suppress_help = false) ?prog + ?(formatter = Formatter.indented_formatter ()) () = + let optparser = + { + op_usage = usage; + op_status = status; + op_suppress_usage = suppress_usage; + op_prog = Option.default (Filename.basename Sys.argv.(0)) prog; + op_formatter = formatter; + op_short_options = RefList.empty (); + op_long_options = RefList.empty (); + op_groups = { + og_heading = "options"; + og_options = RefList.empty (); + og_children = RefList.empty (); + og_description = description + } + } + in + Option.may (* Add version option? *) + (fun version -> + add optparser ~long_name:"version" + (StdOpt.version_option + (fun () -> unprogify optparser version))) + version; + if not suppress_help then (* Add help option? *) + add optparser ~short_name:'h' ~long_name:"help" + (StdOpt.help_option ()); + + optparser + + let format_usage optparser eol = + match optparser.op_suppress_usage with + true -> "" + | false -> + unprogify optparser + (optparser.op_formatter.format_usage optparser.op_usage) ^ eol + + let error optparser ?(chn = stderr) ?status message = + fprintf chn "%s%s: %s\n" (format_usage optparser "\n") optparser.op_prog + message; + flush chn; + exit (Option.default optparser.op_status status) + + let usage optparser ?(chn = stdout) () = + let rec loop g = + (* Heading: *) + output_string chn + (optparser.op_formatter.format_heading g.og_heading); + + optparser.op_formatter.indent (); + (* Description: *) + Option.may + (fun x -> + output_string chn (optparser.op_formatter.format_description x)) + g.og_description; + (* Options: *) + RefList.iter + (fun (names, metavars, help) -> + output_string chn + (optparser.op_formatter.format_option names metavars help)) + g.og_options; + (* Child groups: *) + output_string chn "\n"; + RefList.iter loop g.og_children; + + optparser.op_formatter.dedent () + in + output_string chn (format_usage optparser "\n"); + loop optparser.op_groups; + flush chn + + let parse optparser ?(first = 0) ?last argv = + let args = RefList.empty () + and n = + match last with + None -> Array.length argv - first + | Some m -> m - first + 1 + in + begin + try + GetOpt.parse (RefList.push args) + (GetOpt.find_short_opt + (RefList.to_list optparser.op_short_options)) + (GetOpt.find_long_opt (RefList.to_list optparser.op_long_options)) + (Array.to_list (Array.sub argv first n)) + with + GetOpt.Error (opt, errmsg) -> + error optparser (sprintf "option '%s': %s" opt errmsg) + | Option_error (opt, errmsg) -> + error optparser (sprintf "option '%s': %s" opt errmsg) + | Option_help -> usage optparser (); exit 0 + end; + List.rev (RefList.to_list args) + + let parse_argv optparser = + parse optparser ~first:1 Sys.argv + end diff --git a/src/optParse.mli b/src/optParse.mli new file mode 100644 index 0000000..00a45a2 --- /dev/null +++ b/src/optParse.mli @@ -0,0 +1,468 @@ +(* + * optParse - Functions for parsing command line arguments. + * Copyright (C) 2004 Bardur Arantsson + * + * Heavily influenced by the optparse.py module from the Python + * standard library, but with lots of adaptation to the 'Ocaml Way' + * + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version, + * with the special exception on linking described in file LICENSE. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + *) + +(** Modules for GNU [getopt(3)]-style command line parsing. *) + + +(** This module contains the basic functions and types for defining + new option types and accessing the values of options. *) +module Opt : + sig + + (** {6 Exceptions} *) + + exception No_value + (** [No_value] gets raised by {!OptParse.Opt.get} when an option + value is not available. *) + + exception Option_error of string * string + (** This exception signals that an option value is invalid. The + first string contains the option string ('-x' or '--long-name') + and the second string contains an error message. + + This exception is only used when implementing custom option types + and can never "escape" the scope of a {!OptParse.OptParser.parse}. + The user should therefore not attempt to catch it. *) + + exception Option_help + (** When an option wants to display a usage message, this exception + may be raised. It can never "escape" the scope of a + {!OptParse.OptParser.parse} call and the user should therefore not + attempt to catch it. *) + + + (** {6 Types} *) + + type 'a t = { + option_set : string -> string list -> unit; + option_set_value : 'a -> unit; + option_get : unit -> 'a option; + option_metavars : string list; + option_defhelp : string option + } + (** Option type. + + [option_set] is a closure which converts and records the value of + an option so that it can be retrieved with a later call to the + [option_get] closure. It is called with the option name which was + given on the command line and a list of strings, each representing + one of the argument values given on the command line. It may raise + [Option_error] if the value is invalid (for whatever reason). + + [option_set_value] is a closure which sets the value of an option + to a particular value. + + [option_get] is a closure which retrieves the recorded value + of the option. If the option value has not been set from the + command line, the default value is used. If there is no default + value, then [None] should be returned. + + [option_metavars] is a list of "meta-variables" (arguments) + which this option accepts. This is mainly for display purposes, + but the length of this list determines how many arguments the + option parser accepts for this option (currently only lists of + length 0 or 1 are supported). + + [option_defhelp] is the default help string (if any). It is + used for displaying help messages whenever the user does {b + not} specify a help string manually when adding this + option. Using a non-None value here only makes sense for + completely generic options like {!OptParse.StdOpt.help_option}. + + *) + + + (** {6 Option value retrieval} *) + + val get : 'a t -> 'a + (** Get the value of an option. + + @return the value of the option. If the option has not been + encountered while parsing the command line, the default value is + returned. + + @raise No_value if no default values has been given + and the option value has not been set from the command line. + + *) + + val set : 'a t -> 'a -> unit + (** Set the value of an option. *) + + val opt : 'a t -> 'a option + (** Get the value of an option as an optional value. + + @return [Some x] if the option has value [x] (either by default or + from the command line). If the option doesn't have a value [None] + is returned. *) + + val is_set : 'a t -> bool + (** Find out if the option has a value (either by default or + from the command line). + + @return [True] iff the option has a value. + *) + + + + (** {6 Option creation} *) + + val value_option : + string -> 'a option -> (string -> 'a) -> (exn -> string -> string) -> + 'a t + (** Make an option which takes a single argument. + + [value_option metavar default coerce errfmt] returns an option + which takes a single argument from the command line and calls + [coerce] to coerce it to the proper type. If [coerce] raises an + exception, [exn], then [errfmt exn argval] is called to generate + an error message for display. [metavar] is the name of the + metavariable of the option. + + [default] is the default value of the option. If [None], the the + option has no default value. + + @return the newly created option. + + *) + + val callback_option : + string -> (string -> 'a) -> (exn -> string -> string) -> ('a -> unit) -> + unit t + (** Make a callback option which takes a single argument. + + [callback_option metavar coerce errfmt f] returns an option which + takes a single argument from the command line and calls [coerce] + to coerce it to the proper type. If [coerce] raises an exception + [errfmt exn argval] is called to format an error message for + display. If [coerce] succeeds, the callback function [f] is called + with the coerced value. Finally, [metavar] is the name of the + metavariable of the option. + + @return the newly created option. + *) + + + end + + +(** This module contains various standard options. *) +module StdOpt : + sig + + (** {6 Flag options} *) + + val store_const : ?default: 'a -> 'a -> 'a Opt.t + (** [store_const ?default const] returns a flag option which + stores the constant value [const] when the option is + encountered on the command line. *) + + val store_true : unit -> bool Opt.t + (** [store_true ()] returns an option which is set to true when + it is encountered on the command line. The default value is + false. *) + + val store_false : unit -> bool Opt.t + (** [store_false ()] returns an option which is set to false when + it is encountered on the command line. The default value is + true. *) + + val count_option : ?dest: int ref -> ?increment: int -> unit -> int Opt.t + (** Create a counting option which increments its value each time the + option is encountered on the command line. + + @param increment Increment to add to the option value each + time the option is encountered. + + @param dest Reference to the option value. Useful for making + options like '--quiet' and '--verbose' sharing a single value. + + @return the newly created option. + *) + + val incr_option : ?dest: int ref -> unit -> int Opt.t + (** Exactly identical to [count_option ~dest:dest ~increment:1 ()]. *) + + val decr_option : ?dest: int ref -> unit -> int Opt.t + (** Exactly identical to [count_option ~dest:dest ~increment:(-1) ()]. *) + + + (** {6 Value options} *) + + val int_option : ?default: int -> ?metavar: string -> unit -> int Opt.t + (** [int_option ?default ?metavar ()] returns an option which takes + a single integer argument. If [~default] is given it is the + default value returned when the option has not been encountered + on the command line. *) + + val float_option : + ?default: float -> ?metavar: string -> unit -> float Opt.t + (** See {!OptParse.StdOpt.int_option}. *) + + val str_option : + ?default: string -> ?metavar: string -> unit -> string Opt.t + (** See {!OptParse.StdOpt.int_option}. *) + + + (** {6 Callback options} *) + + val int_callback : ?metavar: string -> (int -> unit) -> unit Opt.t + (** [int_callback ?metavar f] returns an option which takes a single + integer argument and calls [f] with that argument when encountered + on the command line. *) + + val float_callback : ?metavar: string -> (float -> unit) -> unit Opt.t + (** See {!OptParse.StdOpt.int_callback}. *) + + val str_callback : ?metavar: string -> (string -> unit) -> unit Opt.t + (** See {!OptParse.StdOpt.int_callback}. *) + + + (** {6 Special options} *) + + val help_option : unit -> 'a Opt.t + (** [help_option ()] returns the standard help option which + displays a usage message and exits the program when encountered + on the command line. *) + + val version_option : (unit -> string) -> 'a Opt.t + (** [version_option f] returns the standard version option which + displays the string returned by [f ()] (and nothing else) on + standard output and exits. *) + + end + + +(** This module contains the types and functions for implementing + custom usage message formatters. *) +module Formatter : + sig + type t = { + indent : unit -> unit; (** Increase the indentation level. *) + dedent : unit -> unit; (** Decrease the indentation level. *) + format_usage : string -> string; (** Format usage string into style of this formatter. *) + format_heading : string -> string; (** Format heading into style of this formatter. *) + format_description : string -> string; (** Format description into style of this formatter. *) + format_option : + char list * string list -> string list -> string option -> string (** Format option into style of this formatter (see explanation below). *) + } + + (** This is the type of a formatter. The [format_option] has + signature [format_option (snames,lnames) metavars help], where + [snames] is a list of the short option names, [lnames] is a + list of the long option names, [metavars] is a list of the + metavars the option takes as arguments, and [help] is the help + string supplied by the user. *) + + + (** {6 Standard formatters} *) + + + val indented_formatter : + ?level: int ref -> ?indent: int ref -> ?indent_increment: int -> + ?max_help_position: int -> ?width: int -> ?short_first: bool -> + unit -> t + (** Create an "indented" formatter with the given options. + + @param width Total with of the usage messages printed. + + @param max_help_position Maximum starting column for the help + messages relating to each option. + + @param short_first List all the short option names first? + + @param indent_increment Number of columns to indent by when + more indentation is required. + + @param indent Reference to the current indentation amount. Its + value reflects changes in indentation level. + + @param level Reference to the current indentation level. Its + value reflects changes in indentation level. *) + + val titled_formatter : ?level: int ref -> ?indent: int ref -> + ?indent_increment: int -> ?max_help_position: int -> + ?width: int -> ?short_first: bool -> unit -> t + (** Creates a titled formatter which is quite similar to the + indented formatter. See + {!OptParse.Formatter.indented_formatter} for a description of + the options. *) + + + (** {6 Low-level formatting} *) + + + val wrap : ?initial_indent: int -> ?subsequent_indent: int -> + string -> int -> string list + (** [wrap text width] reflows the given text paragraph into lines + of width at most [width] (lines may exceed this if the are + single words that exceed this limit). + + @param initial_indent Indentation of the first line. + + @param subsequent_indent Indentation of the following lines. + + @return a list of lines making up the reformatted paragraph. *) + + val fill : ?initial_indent: int -> ?subsequent_indent: int -> + string -> int -> string + (** See {!OptParse.Formatter.wrap}. + + @return a string containing the reformatted paragraph. *) + + end + + + +(** This module contains the option parser itself. + + It provides functions to create, populate and use option parsers to + parse command line arguments. *) +module OptParser : + sig + + (** {6 Exceptions} *) + + + exception Option_conflict of string + (** [Option_conflict name] is raised by {!OptParse.OptParser.add} + when two different options are added with identical + names. Usually this doesn't need to be caught since this error + is usually easily fixed permanently by removing/renaming the + conflicting option names. *) + + + (** {6 Types} *) + + + type t + (** The type of an option parser. *) + + type group + (** The type of an option group. *) + + + (** {6 Option parser creation} *) + + val make : ?usage: string -> ?status: int -> ?description: string -> ?version: string -> + ?suppress_usage: bool -> ?suppress_help: bool -> ?prog: string -> + ?formatter: Formatter.t -> unit -> t + (** Creates a new option parser with the given options. + + @param usage Usage message. The default is a reasonable usage + message for most programs. Any occurrence of the substring + ["%prog"] in [usage] is replaced with the name of the program + (see [prog]). + + @param prog Program name. The default is the base name of the + executable. + + @param suppress_usage Suppress the usage message if set. + + @param status Set the program exit status (default is 1). + + @param suppress_help Suppress the 'help' option which is + otherwise added by default. + + @param version Version string. If set, a '--version' option is + automatically added. When encountered on the command line it + causes [version] to be printed to the standard output and the + program to exit. + + @param description: description of the main purpose of the + program. + + @return the new option parser. + + *) + + + val add : t -> ?group: group -> ?help: string -> ?hide: bool -> + ?short_name: char -> ?short_names: char list -> ?long_name: string -> + ?long_names: string list -> 'a Opt.t -> unit + (** Add an option to the option parser. + + @raise Option_conflict if the short name(s) or long name(s) + have alread been used for some other option. + + @param help Short help message describing the option (for the usage message). + + @param hide If true, hide the option from the usage + message. This can be used to implement "secret" options which + are not shown, but work just the same as regular options in all + other respects. + + @param short_name is the name for the short form of the option + (e.g. ['x'] means that the option is invoked with [-x] on the + command line). + + @param short_names is a list of names for the short form of the + option (see [short_name]). + + @param long_name is the name for the long form of the option + (e.g. ["xyzzy"] means that the option is invoked with [--xyzzy] + on the command line). + + @param long_names is a list of names for the long form of the + option (see [long_name]). + *) + + + val add_group : t -> ?parent: group -> ?description: string -> + string -> group + (** Add a group to the option parser. + + @param parent is the parent group (if any). + + @param description is a description of the group. + + @return the new group. + + *) + + (** {6 Output and error handling} *) + + val error : t -> ?chn: out_channel -> ?status: int -> string -> 'a + (** Display an error message and exit the program. The error + message is printed to the channel [chn] (default is + [Pervasives.stderr]) and the program exits with exit status + [status] (default depends on [t] : see [make]). *) + + val usage : t -> ?chn: out_channel -> unit -> unit + (** Display the usage message to the channel [chn] (default is + [Pervasives.stdout]) and return. *) + + + (** {6 Option parsing} *) + + val parse : t -> ?first: int -> ?last: int -> string array -> string list + (** Parse arguments as if the arguments [args.(first)], + [args.(first+1)], ..., [args.(last)] had been given on the + command line. By default [first] is 0 and [last] is the index + of the last element of the array. *) + + val parse_argv : t -> string list + (** Parse all the arguments in [Sys.argv]. *) + + end diff --git a/src/option.ml b/src/option.ml new file mode 100644 index 0000000..3fa33fb --- /dev/null +++ b/src/option.ml @@ -0,0 +1,49 @@ +(* + * Option - functions for the option type + * Copyright (C) 2003 Nicolas Cannasse + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version, + * with the special exception on linking described in file LICENSE. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + *) + +exception No_value + +let may f = function + | None -> () + | Some v -> f v + +let map f = function + | None -> None + | Some v -> Some (f v) + +let default v = function + | None -> v + | Some v -> v + +let is_some = function + | None -> false + | _ -> true + +let is_none = function + | None -> true + | _ -> false + +let get = function + | None -> raise No_value + | Some v -> v + +let map_default f v = function + | None -> v + | Some v2 -> f v2 diff --git a/src/option.mli b/src/option.mli new file mode 100644 index 0000000..ada369e --- /dev/null +++ b/src/option.mli @@ -0,0 +1,53 @@ +(* + * Options - functions for the option type + * Copyright (C) 2003 Nicolas Cannasse + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version, + * with the special exception on linking described in file LICENSE. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + *) + +(** Functions for the option type. + + Options are an Ocaml standard type that can be either [None] (undefined) + or [Some x] where x can be any value. Options are widely used in Ocaml + to represent undefined values (a little like NULL in C, but in a type + and memory safe way). This module adds some functions for working with + options. +*) + +val may : ('a -> unit) -> 'a option -> unit +(** [may f (Some x)] calls [f x] and [may f None] does nothing. *) + +val map : ('a -> 'b) -> 'a option -> 'b option +(** [map f (Some x)] returns [Some (f x)] and [map None] returns [None]. *) + +val default : 'a -> 'a option -> 'a +(** [default x (Some v)] returns [v] and [default x None] returns [x]. *) + +val map_default : ('a -> 'b) -> 'b -> 'a option -> 'b +(** [map_default f x (Some v)] returns [f v] and [map_default f x None] + returns [x]. *) + +val is_none : 'a option -> bool +(** [is_none None] returns [true] otherwise it returns [false]. *) + +val is_some : 'a option -> bool +(** [is_some (Some x)] returns [true] otherwise it returns [false]. *) + +val get : 'a option -> 'a +(** [get (Some x)] returns [x] and [get None] raises [No_value]. *) + +exception No_value +(** Raised when calling [get None]. *) diff --git a/src/pMap.ml b/src/pMap.ml new file mode 100644 index 0000000..9532461 --- /dev/null +++ b/src/pMap.ml @@ -0,0 +1,197 @@ +(* + * PMap - Polymorphic maps + * Copyright (C) 1996-2003 Xavier Leroy, Nicolas Cannasse, Markus Mottl + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version, + * with the special exception on linking described in file LICENSE. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + *) + +type ('k, 'v) map = + | Empty + | Node of ('k, 'v) map * 'k * 'v * ('k, 'v) map * int + +type ('k, 'v) t = + { + cmp : 'k -> 'k -> int; + map : ('k, 'v) map; + } + +let height = function + | Node (_, _, _, _, h) -> h + | Empty -> 0 + +let make l k v r = Node (l, k, v, r, max (height l) (height r) + 1) + +let bal l k v r = + let hl = height l in + let hr = height r in + if hl > hr + 2 then + match l with + | Node (ll, lk, lv, lr, _) -> + if height ll >= height lr then make ll lk lv (make lr k v r) + else + (match lr with + | Node (lrl, lrk, lrv, lrr, _) -> + make (make ll lk lv lrl) lrk lrv (make lrr k v r) + | Empty -> assert false) + | Empty -> assert false + else if hr > hl + 2 then + match r with + | Node (rl, rk, rv, rr, _) -> + if height rr >= height rl then make (make l k v rl) rk rv rr + else + (match rl with + | Node (rll, rlk, rlv, rlr, _) -> + make (make l k v rll) rlk rlv (make rlr rk rv rr) + | Empty -> assert false) + | Empty -> assert false + else Node (l, k, v, r, max hl hr + 1) + +let rec min_binding = function + | Node (Empty, k, v, _, _) -> k, v + | Node (l, _, _, _, _) -> min_binding l + | Empty -> raise Not_found + +let rec remove_min_binding = function + | Node (Empty, _, _, r, _) -> r + | Node (l, k, v, r, _) -> bal (remove_min_binding l) k v r + | Empty -> invalid_arg "PMap.remove_min_binding" + +let merge t1 t2 = + match t1, t2 with + | Empty, _ -> t2 + | _, Empty -> t1 + | _ -> + let k, v = min_binding t2 in + bal t1 k v (remove_min_binding t2) + +let create cmp = { cmp = cmp; map = Empty } +let empty = { cmp = compare; map = Empty } + +let is_empty x = + x.map = Empty + +let add x d { cmp = cmp; map = map } = + let rec loop = function + | Node (l, k, v, r, h) -> + let c = cmp x k in + if c = 0 then Node (l, x, d, r, h) + else if c < 0 then + let nl = loop l in + bal nl k v r + else + let nr = loop r in + bal l k v nr + | Empty -> Node (Empty, x, d, Empty, 1) in + { cmp = cmp; map = loop map } + +let find x { cmp = cmp; map = map } = + let rec loop = function + | Node (l, k, v, r, _) -> + let c = cmp x k in + if c < 0 then loop l + else if c > 0 then loop r + else v + | Empty -> raise Not_found in + loop map + +let remove x { cmp = cmp; map = map } = + let rec loop = function + | Node (l, k, v, r, _) -> + let c = cmp x k in + if c = 0 then merge l r else + if c < 0 then bal (loop l) k v r else bal l k v (loop r) + | Empty -> Empty in + { cmp = cmp; map = loop map } + +let mem x { cmp = cmp; map = map } = + let rec loop = function + | Node (l, k, v, r, _) -> + let c = cmp x k in + c = 0 || loop (if c < 0 then l else r) + | Empty -> false in + loop map + +let exists = mem + +let iter f { map = map } = + let rec loop = function + | Empty -> () + | Node (l, k, v, r, _) -> loop l; f k v; loop r in + loop map + +let map f { cmp = cmp; map = map } = + let rec loop = function + | Empty -> Empty + | Node (l, k, v, r, h) -> + let l = loop l in + let r = loop r in + Node (l, k, f v, r, h) in + { cmp = cmp; map = loop map } + +let mapi f { cmp = cmp; map = map } = + let rec loop = function + | Empty -> Empty + | Node (l, k, v, r, h) -> + let l = loop l in + let r = loop r in + Node (l, k, f k v, r, h) in + { cmp = cmp; map = loop map } + +let fold f { cmp = cmp; map = map } acc = + let rec loop acc = function + | Empty -> acc + | Node (l, k, v, r, _) -> + loop (f v (loop acc l)) r in + loop acc map + +let foldi f { cmp = cmp; map = map } acc = + let rec loop acc = function + | Empty -> acc + | Node (l, k, v, r, _) -> + loop (f k v (loop acc l)) r in + loop acc map + +let rec enum m = + let rec make l = + let l = ref l in + let rec next() = + match !l with + | [] -> raise Enum.No_more_elements + | Empty :: tl -> l := tl; next() + | Node (m1, key, data, m2, h) :: tl -> + l := m1 :: m2 :: tl; + (key, data) + in + let count() = + let n = ref 0 in + let r = !l in + try + while true do + ignore (next()); + incr n + done; + assert false + with + Enum.No_more_elements -> l := r; !n + in + let clone() = make !l in + Enum.make ~next ~count ~clone + in + make [m.map] + + +let uncurry_add (k, v) m = add k v m +let of_enum ?(cmp = compare) e = Enum.fold uncurry_add (create cmp) e diff --git a/src/pMap.mli b/src/pMap.mli new file mode 100644 index 0000000..b3bc756 --- /dev/null +++ b/src/pMap.mli @@ -0,0 +1,92 @@ +(* + * PMap - Polymorphic maps + * Copyright (C) 1996-2003 Xavier Leroy, Nicolas Cannasse, Markus Mottl + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version, + * with the special exception on linking described in file LICENSE. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + *) + +(** Polymorphic Map. + + This is a polymorphic map, similar to standard library [Map] module + but in a defunctorized style. +*) + +type ('a, 'b) t + +val empty : ('a, 'b) t +(** The empty map, using [compare] as key comparison function. *) + +val is_empty : ('a, 'b) t -> bool +(** returns true if the map is empty. *) + +val create : ('a -> 'a -> int) -> ('a, 'b) t +(** creates a new empty map, using the provided function for key comparison.*) + +val add : 'a -> 'b -> ('a, 'b) t -> ('a, 'b) t +(** [add x y m] returns a map containing the same bindings as + [m], plus a binding of [x] to [y]. If [x] was already bound + in [m], its previous binding disappears. *) + +val find : 'a -> ('a, 'b) t -> 'b +(** [find x m] returns the current binding of [x] in [m], + or raises [Not_found] if no such binding exists. *) + +val remove : 'a -> ('a, 'b) t -> ('a, 'b) t +(** [remove x m] returns a map containing the same bindings as + [m], except for [x] which is unbound in the returned map. *) + +val mem : 'a -> ('a, 'b) t -> bool +(** [mem x m] returns [true] if [m] contains a binding for [x], + and [false] otherwise. *) + +val exists : 'a -> ('a, 'b) t -> bool +(** same as [mem]. *) + +val iter : ('a -> 'b -> unit) -> ('a, 'b) t -> unit +(** [iter f m] applies [f] to all bindings in map [m]. + [f] receives the key as first argument, and the associated value + as second argument. The order in which the bindings are passed to + [f] is unspecified. Only current bindings are presented to [f]: + bindings hidden by more recent bindings are not passed to [f]. *) + +val map : ('b -> 'c) -> ('a, 'b) t -> ('a, 'c) t +(** [map f m] returns a map with same domain as [m], where the + associated value [a] of all bindings of [m] has been + replaced by the result of the application of [f] to [a]. + The order in which the associated values are passed to [f] + is unspecified. *) + +val mapi : ('a -> 'b -> 'c) -> ('a, 'b) t -> ('a, 'c) t +(** Same as [map], but the function receives as arguments both the + key and the associated value for each binding of the map. *) + +val fold : ('b -> 'c -> 'c) -> ('a , 'b) t -> 'c -> 'c +(** [fold f m a] computes [(f kN dN ... (f k1 d1 a)...)], + where [k1 ... kN] are the keys of all bindings in [m], + and [d1 ... dN] are the associated data. + The order in which the bindings are presented to [f] is + unspecified. *) + +val foldi : ('a -> 'b -> 'c -> 'c) -> ('a , 'b) t -> 'c -> 'c +(** Same as [fold], but the function receives as arguments both the + key and the associated value for each binding of the map. *) + +val enum : ('a, 'b) t -> ('a * 'b) Enum.t +(** creates an enumeration for this map. *) + +val of_enum : ?cmp:('a -> 'a -> int) -> ('a * 'b) Enum.t -> ('a, 'b) t +(** creates a map from an enumeration, using the specified function + for key comparison or [compare] by default. *) diff --git a/src/refList.ml b/src/refList.ml new file mode 100644 index 0000000..7f1ae6e --- /dev/null +++ b/src/refList.ml @@ -0,0 +1,139 @@ +(* + * RefList - List reference + * Copyright (C) 2003 Nicolas Cannasse + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version, + * with the special exception on linking described in file LICENSE. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + *) +open ExtList + +exception Empty_list +exception Invalid_index of int + +type 'a t = 'a list ref + +let empty () = ref [] + +let is_empty x = + match !x with + | [] -> true + | _ -> false + +let of_list l = ref l +let to_list rl = !rl +let copy ~dst ~src = dst := !src +let copy_list ~dst ~src = dst := src + +let add rl item = rl := List.append !rl [item] +let push rl item = rl := item::!rl + +let clear rl = rl := [] + +let length rl = List.length !rl +let hd rl = try List.hd !rl with _ -> raise Empty_list +let tl rl = try ref (List.tl !rl) with _ -> raise Empty_list +let iter f rl = List.iter f !rl +let for_all f rl = List.for_all f !rl +let map f rl = ref (List.map f !rl) +let transform f rl = rl := List.map f !rl +let map_list f rl = List.map f !rl +let find f rl = List.find f !rl +let rev rl = rl := List.rev !rl +let find_exc f exn rl = try List.find f !rl with _ -> raise exn +let exists f rl = List.exists f !rl +let sort ?(cmp=compare) rl = rl := List.sort ~cmp !rl + +let rfind f rl = List.rfind f !rl + +let first = hd + +let last rl = + let rec loop = function + | x :: [] -> x + | x :: l -> loop l + | [] -> assert false + in + match !rl with + | [] -> raise Empty_list + | l -> loop l + +let remove rl item = rl := List.remove !rl item +let remove_if pred rl = rl := List.remove_if pred !rl +let remove_all rl item = rl := List.remove_all !rl item +let filter pred rl = rl := List.filter pred !rl + +let add_sort ?(cmp=compare) rl item = + let rec add_aux = function + | x::lnext as l -> + let r = cmp x item in + if r < 0 then item::l else x::(add_aux lnext) + | [] -> [item] + in + rl := add_aux !rl + +let pop rl = + match !rl with + | [] -> raise Empty_list + | e::l -> rl := l; e + +let npop rl n = + let rec pop_aux l n = + if n = 0 then begin + rl := l; + [] + end else + match l with + | [] -> raise Empty_list + | x::l -> x::(pop_aux l (n-1)) + in + pop_aux !rl n + +let copy_enum ~dst ~src = dst := List.of_enum src +let enum rl = List.enum !rl +let of_enum e = ref (List.of_enum e) + +module Index = struct + + let remove_at rl pos = + let p = ref (-1) in + let rec del_aux = function + | x::l -> incr p; if !p = pos then l else x::(del_aux l) + | [] -> raise (Invalid_index pos) + in + rl := del_aux !rl + + let index pred rl = + let index = ref (-1) in + List.find (fun it -> incr index; pred it; ) !rl; + !index + + let index_of rl item = + let index = ref (-1) in + List.find (fun it -> incr index; it = item; ) !rl; + !index + + let at_index rl pos = + try + List.nth !rl pos + with + _ -> raise (Invalid_index pos) + + let set rl pos newitem = + let p = ref (-1) in + rl := List.map (fun item -> incr p; if !p = pos then newitem else item) !rl; + if !p < pos || pos < 0 then raise (Invalid_index pos) + + +end diff --git a/src/refList.mli b/src/refList.mli new file mode 100644 index 0000000..e2c0b5f --- /dev/null +++ b/src/refList.mli @@ -0,0 +1,201 @@ +(* + * RefList - List reference + * Copyright (C) 2003 Nicolas Cannasse + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version, + * with the special exception on linking described in file LICENSE. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + *) + +(** Reference on lists. + + RefList is a extended set of functions that manipulate list + references. +*) + +exception Empty_list +exception Invalid_index of int + +type 'a t + +val empty : unit -> 'a t +(** Returns a new empty ref list *) + +val is_empty : 'a t -> bool +(** Return [true] if a ref list is empty *) + +val clear : 'a t -> unit +(** Removes all elements *) + +val length : 'a t -> int +(** Returns the number of elements - O(n) *) + +val copy : dst:'a t -> src:'a t -> unit +(** Makes a copy of a ref list - O(1) *) + +val copy_list : dst:'a t -> src:'a list -> unit +(** Makes a copy of a list - O(1) *) + +val copy_enum : dst:'a t -> src:'a Enum.t -> unit +(** Makes a copy of a enum *) + +val of_list : 'a list -> 'a t +(** Creates a ref list from a list - O(1) *) + +val to_list : 'a t -> 'a list +(** Returns the current elements as a list - O(1) *) + +val of_enum : 'a Enum.t -> 'a t +(** Creates a ref list from an enumeration *) + +val enum : 'a t -> 'a Enum.t +(** Returns an enumeration of current elements in the ref list *) + +val add : 'a t -> 'a -> unit +(** Adds an element at the end - O(n) *) + +val push : 'a t -> 'a -> unit +(** Adds an element at the head - O(1) *) + +val add_sort : ?cmp:('a -> 'a -> int) -> 'a t -> 'a -> unit +(** Adds an element in a sorted list, using optional comparator + or 'compare' as default. *) + +val first : 'a t -> 'a +(** Returns the first element or + raises [Empty_list] if the ref list is empty *) + +val last : 'a t -> 'a +(** Returns the last element - O(n) or + raises Empty_list if the ref list is empty *) + +val pop : 'a t -> 'a +(** Removes and returns the first element or + raises [Empty_list] if the ref list is empty *) + +val npop : 'a t -> int -> 'a list +(** Removes and returns the n first elements or + raises [Empty_list] if the ref list does not + contain enough elements *) + +val hd : 'a t -> 'a +(** same as [first] *) + +val tl : 'a t -> 'a t +(** Returns a ref list containing the same elements + but without the first one or + raises [Empty_list] if the ref list is empty *) + +val rev : 'a t -> unit +(** Reverses the ref list - O(n) *) + +(** {6 Functional Operations} *) + +val iter : ('a -> unit) -> 'a t -> unit +(** Apply the given function to all elements of the + ref list, in respect with the order of the list *) + +val find : ('a -> bool) -> 'a t -> 'a +(** Find the first element matching + the specified predicate + raise [Not_found] if no element is found *) + +val rfind : ('a -> bool) -> 'a t -> 'a +(** Find the first element in the reversed ref list matching + the specified predicate + raise [Not_found] if no element is found *) + +val find_exc : ('a -> bool) -> exn -> 'a t -> 'a +(** Same as find but takes an exception to be raised when + no element is found as additional parameter *) + +val exists : ('a -> bool) -> 'a t -> bool +(** Return [true] if an element matches the specified + predicate *) + +val for_all : ('a -> bool) -> 'a t -> bool +(** Return [true] if all elements match the specified + predicate *) + +val map : ('a -> 'b) -> 'a t -> 'b t +(** Apply a function to all elements + and return the ref list constructed with + the function returned values *) + +val transform : ('a -> 'a) -> 'a t -> unit +(** transform all elements in the ref list + using a function. *) + +val map_list : ('a -> 'b) -> 'a t -> 'b list +(** Apply a function to all elements + and return the list constructed with + the function returned values *) + +val sort : ?cmp:('a -> 'a -> int) -> 'a t -> unit +(** Sort elements using the specified comparator + or compare as default comparator *) + +val filter : ('a -> bool) -> 'a t -> unit +(** Remove all elements that do not match the + specified predicate *) + +val remove : 'a t -> 'a -> unit +(** Remove an element from the ref list + raise [Not_found] if the element is not found *) + +val remove_if : ('a -> bool) -> 'a t -> unit +(** Remove the first element matching the + specified predicate + raise [Not_found] if no element has been removed *) + +val remove_all : 'a t -> 'a -> unit +(** Remove all elements equal to the specified + element from the ref list *) + + + +(** Functions that operate on the [i]th element of a list. + + While it is sometimes necessary to perform these + operations on lists (hence their inclusion here), the + functions were moved to an inner module to prevent + their overuse: all functions work in O(n) time. You + might prefer to use [Array] or [DynArray] for constant + time indexed element access. +*) +module Index : sig + + val index_of : 'a t -> 'a -> int + (** Return the index (position : 0 starting) of an element in + a ref list, using ( = ) for testing element equality + raise [Not_found] if no element was found *) + + val index : ('a -> bool) -> 'a t -> int + (** Return the index (position : 0 starting) of an element in + a ref list, using the specified comparator + raise [Not_found] if no element was found *) + + val at_index : 'a t -> int -> 'a + (** Return the element of ref list at the specified index + raise [Invalid_index] if the index is outside [0 ; length-1] *) + + val set : 'a t -> int -> 'a -> unit + (** Change the element at the specified index + raise [Invalid_index] if the index is outside [0 ; length-1] *) + + val remove_at : 'a t -> int -> unit + (** Remove the element at the specified index + raise [Invalid_index] if the index is outside [0 ; length-1] *) + +end diff --git a/src/std.ml b/src/std.ml new file mode 100644 index 0000000..3a82654 --- /dev/null +++ b/src/std.ml @@ -0,0 +1,186 @@ +(* + * Std - Additional functions + * Copyright (C) 2003 Nicolas Cannasse and Markus Mottl + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version, + * with the special exception on linking described in file LICENSE. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + *) + +open ExtBytes + +let finally handler f x = + let r = ( + try + f x + with + e -> handler (); raise e + ) in + handler (); + r + +let input_lines ch = + Enum.from (fun () -> + try input_line ch with End_of_file -> raise Enum.No_more_elements) + +let input_chars ch = + Enum.from (fun () -> + try input_char ch with End_of_file -> raise Enum.No_more_elements) + +type 'a _mut_list = { + hd : 'a; + mutable tl : 'a _mut_list; +} + +let input_list ch = + let _empty = Obj.magic [] in + let rec loop dst = + let r = { hd = input_line ch; tl = _empty } in + dst.tl <- r; + loop r in + let r = { hd = Obj.magic(); tl = _empty } in + try loop r + with + End_of_file -> + Obj.magic r.tl + +let buf_len = 8192 + +let input_all ic = + let rec loop acc total buf ofs = + let n = input ic buf ofs (buf_len - ofs) in + if n = 0 then + let res = Bytes.create total in + let pos = total - ofs in + let _ = Bytes.blit buf 0 res pos ofs in + let coll pos buf = + let new_pos = pos - buf_len in + Bytes.blit buf 0 res new_pos buf_len; + new_pos in + let _ = List.fold_left coll pos acc in + (* [res] doesn't escape and will not be mutated again *) + Bytes.unsafe_to_string res + else + let new_ofs = ofs + n in + let new_total = total + n in + if new_ofs = buf_len then + loop (buf :: acc) new_total (Bytes.create buf_len) 0 + else loop acc new_total buf new_ofs in + loop [] 0 (Bytes.create buf_len) 0 + +let input_file ?(bin=false) fname = + let ch = (if bin then open_in_bin else open_in) fname in + finally (fun () -> close_in ch) input_all ch + +let output_file ~filename ~text = + let ch = open_out filename in + finally (fun () -> close_out ch) (output_string ch) text + +let print_bool = function + | true -> print_string "true" + | false -> print_string "false" + +let prerr_bool = function + | true -> prerr_string "true" + | false -> prerr_string "false" + +let string_of_char c = String.make 1 c + +external identity : 'a -> 'a = "%identity" + +let rec dump r = + if Obj.is_int r then + string_of_int (Obj.magic r : int) + else (* Block. *) + let rec get_fields acc = function + | 0 -> acc + | n -> let n = n-1 in get_fields (Obj.field r n :: acc) n + in + let rec is_list r = + if Obj.is_int r then + r = Obj.repr 0 (* [] *) + else + let s = Obj.size r and t = Obj.tag r in + t = 0 && s = 2 && is_list (Obj.field r 1) (* h :: t *) + in + let rec get_list r = + if Obj.is_int r then + [] + else + let h = Obj.field r 0 and t = get_list (Obj.field r 1) in + h :: t + in + let opaque name = + (* XXX In future, print the address of value 'r'. Not possible in + * pure OCaml at the moment. + *) + "<" ^ name ^ ">" + in + let s = Obj.size r and t = Obj.tag r in + (* From the tag, determine the type of block. *) + match t with + | _ when is_list r -> + let fields = get_list r in + "[" ^ String.concat "; " (List.map dump fields) ^ "]" + | 0 -> + let fields = get_fields [] s in + "(" ^ String.concat ", " (List.map dump fields) ^ ")" + | x when x = Obj.lazy_tag -> + (* Note that [lazy_tag .. forward_tag] are < no_scan_tag. Not + * clear if very large constructed values could have the same + * tag. XXX *) + opaque "lazy" + | x when x = Obj.closure_tag -> + opaque "closure" + | x when x = Obj.object_tag -> + let fields = get_fields [] s in + let clasz, id, slots = + match fields with + | h::h'::t -> h, h', t + | _ -> assert false + in + (* No information on decoding the class (first field). So just print + * out the ID and the slots. *) + "Object #" ^ dump id ^ " (" ^ String.concat ", " (List.map dump slots) ^ ")" + | x when x = Obj.infix_tag -> + opaque "infix" + | x when x = Obj.forward_tag -> + opaque "forward" + | x when x < Obj.no_scan_tag -> + let fields = get_fields [] s in + "Tag" ^ string_of_int t ^ + " (" ^ String.concat ", " (List.map dump fields) ^ ")" + | x when x = Obj.string_tag -> + "\"" ^ String.escaped (Obj.magic r : string) ^ "\"" + | x when x = Obj.double_tag -> + string_of_float (Obj.magic r : float) + | x when x = Obj.abstract_tag -> + opaque "abstract" + | x when x = Obj.custom_tag -> + opaque "custom" + | x when x = Obj.double_array_tag -> + let l = ExtList.List.init s (fun i -> string_of_float (Obj.double_field r i)) in + "[| " ^ String.concat "; " l ^ " |]" + | _ -> + opaque (Printf.sprintf "unknown: tag %d size %d" t s) + +let dump v = dump (Obj.repr v) + +let print v = print_endline (dump v) + +let __unique_counter = ref 0 + +let unique () = + incr __unique_counter; + !__unique_counter diff --git a/src/std.mli b/src/std.mli new file mode 100644 index 0000000..96000d2 --- /dev/null +++ b/src/std.mli @@ -0,0 +1,69 @@ +(* + * Std - Additional functions + * Copyright (C) 2003 Nicolas Cannasse + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version, + * with the special exception on linking described in file LICENSE. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + *) + +(** Additional functions. *) + +val input_lines : in_channel -> string Enum.t +(** Returns an enumeration over lines of an input channel, as read by the + [input_line] function. *) + +val input_chars : in_channel -> char Enum.t +(** Returns an enumeration over characters of an input channel. *) + +val input_list : in_channel -> string list +(** Returns the list of lines read from an input channel. *) + +val input_all : in_channel -> string +(** Return the whole contents of an input channel as a single + string. *) + +val print_bool : bool -> unit +(** Print a boolean to stdout. *) + +val prerr_bool : bool -> unit +(** Print a boolean to stderr. *) + +val input_file : ?bin:bool -> string -> string +(** returns the data of a given filename. *) + +val output_file : filename:string -> text:string -> unit +(** creates a filename, write text into it and close it. *) + +val string_of_char : char -> string +(** creates a string from a char. *) + +external identity : 'a -> 'a = "%identity" +(** the identity function. *) + +val unique : unit -> int +(** returns an unique identifier every time it is called. *) + +val dump : 'a -> string +(** represent a runtime value as a string. Since types are lost at compile + time, the representation might not match your type. For example, None + will be printed 0 since they share the same runtime representation. *) + +val print : 'a -> unit +(** print the representation of a runtime value on stdout. + See remarks for [dump]. *) + +val finally : (unit -> unit) -> ('a -> 'b) -> 'a -> 'b +(** [finally fend f x] calls [f x] and then [fend()] even if [f x] raised + an exception. *) diff --git a/src/uChar.ml b/src/uChar.ml new file mode 100644 index 0000000..194a753 --- /dev/null +++ b/src/uChar.ml @@ -0,0 +1,48 @@ +(* + * UChar - Unicode (ISO-UCS) characters + * Copyright (C) 2002, 2003 Yamagata Yoriyuki + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version, + * with the special exception on linking described in file LICENSE. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + *) + +type t = int + +exception Out_of_range + +external unsafe_chr_of_uint : int -> t = "%identity" +external uint_code : t -> int = "%identity" + +let char_of c = + if c >= 0 && c < 0x100 then Char.chr c else raise Out_of_range + +let of_char = Char.code + +let code c = if c >= 0 then c else raise Out_of_range + +let chr n = + if n >= 0 && n lsr 31 = 0 then n else invalid_arg "UChar.chr" + +let chr_of_uint n = if n lsr 31 = 0 then n else invalid_arg "UChar.uint_chr" + +let eq (u1 : t) (u2 : t) = u1 = u2 +let compare u1 u2 = + let sgn = (u1 lsr 16) - (u2 lsr 16) in + if sgn = 0 then (u1 land 0xFFFF) - (u2 land 0xFFFF) else sgn + +type uchar = t + +let int_of_uchar u = uint_code u +let uchar_of_int n = chr_of_uint n diff --git a/src/uChar.mli b/src/uChar.mli new file mode 100644 index 0000000..2301505 --- /dev/null +++ b/src/uChar.mli @@ -0,0 +1,79 @@ +(* + * UChar - Unicode (ISO-UCS) characters + * Copyright (C) 2002, 2003 Yamagata Yoriyuki + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version, + * with the special exception on linking described in file LICENSE. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + *) + +(** Unicode (ISO-UCS) characters. + + This module implements Unicode (actually ISO-UCS) characters. All + 31-bit code points are allowed. +*) + +(** Unicode characters. All 31-bit code points are allowed.*) +type t + +exception Out_of_range + +(** [char_of u] returns the Latin-1 representation of [u]. + If [u] can not be represented by Latin-1, raises Out_of_range *) +val char_of : t -> char + +(** [of_char c] returns the Unicode character of the Latin-1 character [c] *) +val of_char : char -> t + +(** [code u] returns the Unicode code number of [u]. + If the value can not be represented by a positive integer, + raise Out_of_range *) +val code : t -> int + +(** [code n] returns the Unicode character with the code number [n]. + If n >= 2^32 or n < 0, raises [invalid_arg] *) +val chr : int -> t + +(** [uint_code u] returns the Unicode code number of [u]. + The returned int is unsigned, that is, on 32-bit platforms, + the sign bit is used for storing the 31-th bit of the code number. *) +external uint_code : t -> int = "%identity" + +(** [chr_of_uint n] returns the Unicode character of the code number [n]. + [n] is interpreted as unsigned, that is, on 32-bit platforms, + the sign bit is treated as the 31-th bit of the code number. + If n exceeds 31-bit values, then raise [Invalid_arg]. *) +val chr_of_uint : int -> t + +(** Unsafe version of {!UChar.chr_of_uint}. + No check of its argument is performed. *) +external unsafe_chr_of_uint : int -> t = "%identity" + +(** Equality by code point comparison *) +val eq : t -> t -> bool + +(** [compare u1 u2] returns, + a value > 0 if [u1] has a larger Unicode code number than [u2], + 0 if [u1] and [u2] are the same Unicode character, + a value < 0 if [u1] has a smaller Unicode code number than [u2]. *) +val compare : t -> t -> int + +(** Aliases of [type t] *) +type uchar = t + +(** Alias of [uint_code] *) +val int_of_uchar : uchar -> int + +(** Alias of [chr_of_uint] *) +val uchar_of_int : int -> uchar diff --git a/src/uTF8.ml b/src/uTF8.ml new file mode 100644 index 0000000..06c58f9 --- /dev/null +++ b/src/uTF8.ml @@ -0,0 +1,225 @@ +(* + * UTF-8 - UTF-8 encoded Unicode string + * Copyright 2002, 2003 (C) Yamagata Yoriyuki. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version, + * with the special exception on linking described in file LICENSE. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + *) + +open UChar + +type t = string +type index = int + +let look s i = + let n' = + let n = Char.code s.[i] in + if n < 0x80 then n else + if n <= 0xdf then + (n - 0xc0) lsl 6 lor (0x7f land (Char.code s.[i + 1])) + else if n <= 0xef then + let n' = n - 0xe0 in + let m0 = Char.code s.[i + 2] in + let m = Char.code (String.unsafe_get s (i + 1)) in + let n' = n' lsl 6 lor (0x7f land m) in + n' lsl 6 lor (0x7f land m0) + else if n <= 0xf7 then + let n' = n - 0xf0 in + let m0 = Char.code s.[i + 3] in + let m = Char.code (String.unsafe_get s (i + 1)) in + let n' = n' lsl 6 lor (0x7f land m) in + let m = Char.code (String.unsafe_get s (i + 2)) in + let n' = n' lsl 6 lor (0x7f land m) in + n' lsl 6 lor (0x7f land m0) + else if n <= 0xfb then + let n' = n - 0xf8 in + let m0 = Char.code s.[i + 4] in + let m = Char.code (String.unsafe_get s (i + 1)) in + let n' = n' lsl 6 lor (0x7f land m) in + let m = Char.code (String.unsafe_get s (i + 2)) in + let n' = n' lsl 6 lor (0x7f land m) in + let m = Char.code (String.unsafe_get s (i + 3)) in + let n' = n' lsl 6 lor (0x7f land m) in + n' lsl 6 lor (0x7f land m0) + else if n <= 0xfd then + let n' = n - 0xfc in + let m0 = Char.code s.[i + 5] in + let m = Char.code (String.unsafe_get s (i + 1)) in + let n' = n' lsl 6 lor (0x7f land m) in + let m = Char.code (String.unsafe_get s (i + 2)) in + let n' = n' lsl 6 lor (0x7f land m) in + let m = Char.code (String.unsafe_get s (i + 3)) in + let n' = n' lsl 6 lor (0x7f land m) in + let m = Char.code (String.unsafe_get s (i + 4)) in + let n' = n' lsl 6 lor (0x7f land m) in + n' lsl 6 lor (0x7f land m0) + else invalid_arg "UTF8.look" + in + Obj.magic n' + +let rec search_head s i = + if i >= String.length s then i else + let n = Char.code (String.unsafe_get s i) in + if n < 0x80 || n >= 0xc2 then i else + search_head s (i + 1) + +let next s i = + let n = Char.code s.[i] in + if n < 0x80 then i + 1 else + if n < 0xc0 then search_head s (i + 1) else + if n <= 0xdf then i + 2 + else if n <= 0xef then i + 3 + else if n <= 0xf7 then i + 4 + else if n <= 0xfb then i + 5 + else if n <= 0xfd then i + 6 + else invalid_arg "UTF8.next" + +let rec search_head_backward s i = + if i < 0 then -1 else + let n = Char.code s.[i] in + if n < 0x80 || n >= 0xc2 then i else + search_head_backward s (i - 1) + +let prev s i = search_head_backward s (i - 1) + +let move s i n = + if n >= 0 then + let rec loop i n = if n <= 0 then i else loop (next s i) (n - 1) in + loop i n + else + let rec loop i n = if n >= 0 then i else loop (prev s i) (n + 1) in + loop i n + +let rec nth_aux s i n = + if n = 0 then i else + nth_aux s (next s i) (n - 1) + +let nth s n = nth_aux s 0 n + +let substring s i n = + let j = nth s i in + let j' = (nth_aux s j n) - 1 in + String.sub s j (j' - j + 1) + +let last s = search_head_backward s (String.length s - 1) + +let out_of_range s i = i < 0 || i >= String.length s + +let compare_index _ i j = i - j + +let get s n = look s (nth s n) + +let add_uchar buf u = + let masq = 0b111111 in + let k = int_of_uchar u in + if k < 0 || k >= 0x4000000 then begin + Buffer.add_char buf (Char.chr (0xfc + (k lsr 30))); + Buffer.add_char buf (Char.unsafe_chr (0x80 lor ((k lsr 24) land masq))); + Buffer.add_char buf (Char.unsafe_chr (0x80 lor ((k lsr 18) land masq))); + Buffer.add_char buf (Char.unsafe_chr (0x80 lor ((k lsr 12) land masq))); + Buffer.add_char buf (Char.unsafe_chr (0x80 lor ((k lsr 6) land masq))); + Buffer.add_char buf (Char.unsafe_chr (0x80 lor (k land masq))); + end else if k <= 0x7f then + Buffer.add_char buf (Char.unsafe_chr k) + else if k <= 0x7ff then begin + Buffer.add_char buf (Char.unsafe_chr (0xc0 lor (k lsr 6))); + Buffer.add_char buf (Char.unsafe_chr (0x80 lor (k land masq))) + end else if k <= 0xffff then begin + Buffer.add_char buf (Char.unsafe_chr (0xe0 lor (k lsr 12))); + Buffer.add_char buf (Char.unsafe_chr (0x80 lor ((k lsr 6) land masq))); + Buffer.add_char buf (Char.unsafe_chr (0x80 lor (k land masq))); + end else if k <= 0x1fffff then begin + Buffer.add_char buf (Char.unsafe_chr (0xf0 + (k lsr 18))); + Buffer.add_char buf (Char.unsafe_chr (0x80 lor ((k lsr 12) land masq))); + Buffer.add_char buf (Char.unsafe_chr (0x80 lor ((k lsr 6) land masq))); + Buffer.add_char buf (Char.unsafe_chr (0x80 lor (k land masq))); + end else begin + Buffer.add_char buf (Char.unsafe_chr (0xf8 + (k lsr 24))); + Buffer.add_char buf (Char.unsafe_chr (0x80 lor ((k lsr 18) land masq))); + Buffer.add_char buf (Char.unsafe_chr (0x80 lor ((k lsr 12) land masq))); + Buffer.add_char buf (Char.unsafe_chr (0x80 lor ((k lsr 6) land masq))); + Buffer.add_char buf (Char.unsafe_chr (0x80 lor (k land masq))); + end + +let init len f = + let buf = Buffer.create len in + for c = 0 to len - 1 do add_uchar buf (f c) done; + Buffer.contents buf + +let rec length_aux s c i = + if i >= String.length s then c else + let n = Char.code (String.unsafe_get s i) in + let k = + if n < 0x80 then 1 else + if n < 0xc0 then invalid_arg "UTF8.length" else + if n < 0xe0 then 2 else + if n < 0xf0 then 3 else + if n < 0xf8 then 4 else + if n < 0xfc then 5 else + if n < 0xfe then 6 else + invalid_arg "UTF8.length" in + length_aux s (c + 1) (i + k) + +let length s = length_aux s 0 0 + +let rec iter_aux proc s i = + if i >= String.length s then () else + let u = look s i in + proc u; + iter_aux proc s (next s i) + +let iter proc s = iter_aux proc s 0 + +let compare s1 s2 = Pervasives.compare s1 s2 + +exception Malformed_code + +let validate s = + let rec trail c i a = + if c = 0 then a else + if i >= String.length s then raise Malformed_code else + let n = Char.code (String.unsafe_get s i) in + if n < 0x80 || n >= 0xc0 then raise Malformed_code else + trail (c - 1) (i + 1) (a lsl 6 lor (n - 0x80)) in + let rec main i = + if i >= String.length s then () else + let n = Char.code (String.unsafe_get s i) in + if n < 0x80 then main (i + 1) else + if n < 0xc2 then raise Malformed_code else + if n <= 0xdf then + if trail 1 (i + 1) (n - 0xc0) < 0x80 then raise Malformed_code else + main (i + 2) + else if n <= 0xef then + if trail 2 (i + 1) (n - 0xe0) < 0x800 then raise Malformed_code else + main (i + 3) + else if n <= 0xf7 then + if trail 3 (i + 1) (n - 0xf0) < 0x10000 then raise Malformed_code else + main (i + 4) + else if n <= 0xfb then + if trail 4 (i + 1) (n - 0xf8) < 0x200000 then raise Malformed_code else + main (i + 5) + else if n <= 0xfd then + let n = trail 5 (i + 1) (n - 0xfc) in + if n lsr 16 < 0x400 then raise Malformed_code else + main (i + 6) + else raise Malformed_code in + main 0 + +module Buf = + struct + include Buffer + type buf = t + let add_char = add_uchar + end diff --git a/src/uTF8.mli b/src/uTF8.mli new file mode 100644 index 0000000..a864ab6 --- /dev/null +++ b/src/uTF8.mli @@ -0,0 +1,148 @@ +(* + * UTF-8 - UTF-8 encoded Unicode string + * Copyright 2002, 2003 (C) Yamagata Yoriyuki. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version, + * with the special exception on linking described in file LICENSE. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + *) + +(** UTF-8 encoded Unicode strings. + + The Module for UTF-8 encoded Unicode strings. +*) + +open UChar + +(** UTF-8 encoded Unicode strings. the type is normal string. *) +type t = string + +exception Malformed_code + +(** [validate s] + Succeeds if s is valid UTF-8, otherwise raises Malformed_code. + Other functions assume strings are valid UTF-8, so it is prudent + to test their validity for strings from untrusted origins. *) +val validate : t -> unit + +(* All functions below assume string are valid UTF-8. If not, + * the result is unspecified. *) + +(** [get s n] returns [n]-th Unicode character of [s]. + The call requires O(n)-time. *) +val get : t -> int -> uchar + +(** [init len f] + returns a new string which contains [len] Unicode characters. + The i-th Unicode character is initialized by [f i] *) +val init : int -> (int -> uchar) -> t + +(** [length s] returns the number of Unicode characters contained in s *) +val length : t -> int + +(** Positions in the string represented by the number of bytes from the head. + The location of the first character is [0] *) +type index = int + +(** [nth s n] returns the position of the [n]-th Unicode character. + The call requires O(n)-time *) +val nth : t -> int -> index + +(** The position of the head of the last Unicode character. *) +val last : t -> index + +(** [look s i] + returns the Unicode character of the location [i] in the string [s]. *) +val look : t -> index -> uchar + +(** [substring s i len] returns the substring made of the Unicode locations [i] to [i + len - 1] inclusive. + The string is always copied *) +val substring : t -> int -> int -> t + +(** [out_of_range s i] + tests whether [i] is a position inside of [s]. *) +val out_of_range : t -> index -> bool + +(** [compare_index s i1 i2] returns + a value < 0 if [i1] is the position located before [i2], + 0 if [i1] and [i2] points the same location, + a value > 0 if [i1] is the position located after [i2]. *) +val compare_index : t -> index -> index -> int + +(** [next s i] + returns the position of the head of the Unicode character + located immediately after [i]. + If [i] is inside of [s], the function always successes. + If [i] is inside of [s] and there is no Unicode character after [i], + the position outside [s] is returned. + If [i] is not inside of [s], the behaviour is unspecified. *) +val next : t -> index -> index + +(** [prev s i] + returns the position of the head of the Unicode character + located immediately before [i]. + If [i] is inside of [s], the function always successes. + If [i] is inside of [s] and there is no Unicode character before [i], + the position outside [s] is returned. + If [i] is not inside of [s], the behaviour is unspecified. *) +val prev : t -> index -> index + +(** [move s i n] + returns [n]-th Unicode character after [i] if n >= 0, + [n]-th Unicode character before [i] if n < 0. + If there is no such character, the result is unspecified. *) +val move : t -> index -> int -> index + +(** [iter f s] + applies [f] to all Unicode characters in [s]. + The order of application is same to the order + of the Unicode characters in [s]. *) +val iter : (uchar -> unit) -> t -> unit + +(** Code point comparison by the lexicographic order. + [compare s1 s2] returns + a positive integer if [s1] > [s2], + 0 if [s1] = [s2], + a negative integer if [s1] < [s2]. *) +val compare : t -> t -> int + +(** Buffer module for UTF-8 strings *) +module Buf : sig + (** Buffers for UTF-8 strings. *) + type buf + + (** [create n] creates a buffer with the initial size [n]-bytes. *) + val create : int -> buf + + (* The rest of functions is similar to the ones of Buffer in stdlib. *) + (** [contents buf] returns the contents of the buffer. *) + val contents : buf -> t + + (** Empty the buffer, + but retains the internal storage which was holding the contents *) + val clear : buf -> unit + + (** Empty the buffer and de-allocate the internal storage. *) + val reset : buf -> unit + + (** Add one Unicode character to the buffer. *) + val add_char : buf -> uchar -> unit + + (** Add the UTF-8 string to the buffer. *) + val add_string : buf -> t -> unit + + (** [add_buffer b1 b2] adds the contents of [b2] to [b1]. + The contents of [b2] is not changed. *) + val add_buffer : buf -> buf -> unit +end diff --git a/src/unzip.ml b/src/unzip.ml new file mode 100644 index 0000000..a5fbca5 --- /dev/null +++ b/src/unzip.ml @@ -0,0 +1,451 @@ +(* + * Unzip - inflate format decompression algorithm + * Copyright (C) 2004 Nicolas Cannasse + * Compliant with RFC 1950 and 1951 + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version, + * with the special exception on linking described in file LICENSE. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + *) + +open ExtBytes + +type huffman = + | Found of int + | NeedBit of huffman * huffman + | NeedBits of int * huffman array + + +type adler32 = { + mutable a1 : int; + mutable a2 : int; +} + +type window = { + mutable wbuffer : Bytes.t; + mutable wpos : int; + wcrc : adler32; +} + +type state = + | Head + | Block + | CData + | Flat + | Crc + | Dist + | DistOne + | Done + +type t = { + mutable znbits : int; + mutable zbits : int; + mutable zstate : state; + mutable zfinal : bool; + mutable zhuffman : huffman; + mutable zhuffdist : huffman option; + mutable zlen : int; + mutable zdist : int; + mutable zneeded : int; + mutable zoutput : Bytes.t; + mutable zoutpos : int; + zinput : IO.input; + zlengths : int array; + zwindow : window; +} + +type error_msg = + | Invalid_huffman + | Invalid_data + | Invalid_crc + | Truncated_data + | Unsupported_dictionary + +exception Error of error_msg + +let error msg = raise (Error msg) + +(* ************************************************************************ *) +(* HUFFMAN TREES *) + +let rec tree_depth = function + | Found _ -> 0 + | NeedBits _ -> assert false + | NeedBit (a,b) -> + 1 + min (tree_depth a) (tree_depth b) + +let rec tree_compress t = + match tree_depth t with + | 0 -> t + | 1 -> + (match t with + | NeedBit (a,b) -> NeedBit (tree_compress a,tree_compress b) + | _ -> assert false) + | d -> + let size = 1 lsl d in + let tbl = Array.make size (Found (-1)) in + tree_walk tbl 0 0 d t; + NeedBits (d,tbl) + +and tree_walk tbl p cd d = function + | NeedBit (a,b) when d > 0 -> + tree_walk tbl p (cd + 1) (d-1) a; + tree_walk tbl (p lor (1 lsl cd)) (cd + 1) (d-1) b; + | t -> + Array.set tbl p (tree_compress t) + +let make_huffman lengths pos nlengths maxbits = + let counts = Array.make maxbits 0 in + for i = 0 to nlengths - 1 do + let p = Array.unsafe_get lengths (i + pos) in + if p >= maxbits then error Invalid_huffman; + Array.unsafe_set counts p (Array.unsafe_get counts p + 1); + done; + let code = ref 0 in + let tmp = Array.make maxbits 0 in + for i = 1 to maxbits - 2 do + code := (!code + Array.unsafe_get counts i) lsl 1; + Array.unsafe_set tmp i !code; + done; + let bits = Hashtbl.create 0 in + for i = 0 to nlengths - 1 do + let l = Array.unsafe_get lengths (i + pos) in + if l <> 0 then begin + let n = Array.unsafe_get tmp (l - 1) in + Array.unsafe_set tmp (l - 1) (n + 1); + Hashtbl.add bits (n,l) i; + end; + done; + let rec tree_make v l = + if l > maxbits then error Invalid_huffman; + try + Found (Hashtbl.find bits (v,l)) + with + Not_found -> + NeedBit (tree_make (v lsl 1) (l + 1) , tree_make (v lsl 1 lor 1) (l + 1)) + in + tree_compress (NeedBit (tree_make 0 1 , tree_make 1 1)) + +(* ************************************************************************ *) +(* ADLER32 (CRC) *) + +let adler32_create() = { + a1 = 1; + a2 = 0; +} + +let adler32_update a s p l = + let p = ref p in + for i = 0 to l - 1 do + let c = int_of_char (Bytes.unsafe_get s !p) in + a.a1 <- (a.a1 + c) mod 65521; + a.a2 <- (a.a2 + a.a1) mod 65521; + incr p; + done + +let adler32_read ch = + let a2a = IO.read_byte ch in + let a2b = IO.read_byte ch in + let a1a = IO.read_byte ch in + let a1b = IO.read_byte ch in + { + a1 = (a1a lsl 8) lor a1b; + a2 = (a2a lsl 8) lor a2b; + } + +(* ************************************************************************ *) +(* WINDOW *) + +let window_size = 1 lsl 15 +let buffer_size = 1 lsl 16 + +let window_create size = { + wbuffer = Bytes.create buffer_size; + wpos = 0; + wcrc = adler32_create() + } + +let window_slide w = + adler32_update w.wcrc w.wbuffer 0 window_size; + let b = Bytes.create buffer_size in + w.wpos <- w.wpos - window_size; + Bytes.unsafe_blit w.wbuffer window_size b 0 w.wpos; + w.wbuffer <- b + +let window_add_bytes w s p len = + if w.wpos + len > buffer_size then window_slide w; + Bytes.unsafe_blit s p w.wbuffer w.wpos len; + w.wpos <- w.wpos + len + +let window_add_char w c = + if w.wpos = buffer_size then window_slide w; + Bytes.unsafe_set w.wbuffer w.wpos c; + w.wpos <- w.wpos + 1 + +let window_get_last_char w = + Bytes.unsafe_get w.wbuffer (w.wpos - 1) + +let window_available w = + w.wpos + +let window_checksum w = + adler32_update w.wcrc w.wbuffer 0 w.wpos; + w.wcrc + +(* ************************************************************************ *) + +let len_extra_bits_tbl = [|0;0;0;0;0;0;0;0;1;1;1;1;2;2;2;2;3;3;3;3;4;4;4;4;5;5;5;5;0;-1;-1|] +let len_base_val_tbl = [|3;4;5;6;7;8;9;10;11;13;15;17;19;23;27;31;35;43;51;59;67;83;99;115;131;163;195;227;258|] +let dist_extra_bits_tbl = [|0;0;0;0;1;1;2;2;3;3;4;4;5;5;6;6;7;7;8;8;9;9;10;10;11;11;12;12;13;13;-1;-1|] +let dist_base_val_tbl = [|1;2;3;4;5;7;9;13;17;25;33;49;65;97;129;193;257;385;513;769;1025;1537;2049;3073;4097;6145;8193;12289;16385;24577|] +let code_lengths_pos = [|16;17;18;0;8;7;9;6;10;5;11;4;12;3;13;2;14;1;15|] + +let fixed_huffman = make_huffman (Array.init 288 (fun n -> + if n <= 143 then 8 + else if n <= 255 then 9 + else if n <= 279 then 7 + else 8 + )) 0 288 10 + +let get_bits z n = + while z.znbits < n do + z.zbits <- z.zbits lor ((IO.read_byte z.zinput) lsl z.znbits); + z.znbits <- z.znbits + 8; + done; + let b = z.zbits land (1 lsl n - 1) in + z.znbits <- z.znbits - n; + z.zbits <- z.zbits lsr n; + b + +let get_bit z = + if z.znbits = 0 then begin + z.znbits <- 8; + z.zbits <- IO.read_byte z.zinput; + end; + let b = z.zbits land 1 = 1 in + z.znbits <- z.znbits - 1; + z.zbits <- z.zbits lsr 1; + b + +let rec get_rev_bits z n = + if n = 0 then + 0 + else if get_bit z then + (1 lsl (n - 1)) lor (get_rev_bits z (n-1)) + else + get_rev_bits z (n-1) + +let reset_bits z = + z.zbits <- 0; + z.znbits <- 0 + +let add_bytes z s p l = + window_add_bytes z.zwindow s p l; + Bytes.unsafe_blit s p z.zoutput z.zoutpos l; + z.zneeded <- z.zneeded - l; + z.zoutpos <- z.zoutpos + l + +let add_char z c = + window_add_char z.zwindow c; + Bytes.unsafe_set z.zoutput z.zoutpos c; + z.zneeded <- z.zneeded - 1; + z.zoutpos <- z.zoutpos + 1 + +let add_dist_one z n = + let c = window_get_last_char z.zwindow in + let s = Bytes.make n c in + add_bytes z s 0 n + +let add_dist z d l = + add_bytes z z.zwindow.wbuffer (z.zwindow.wpos - d) l + +let rec apply_huffman z = function + | Found n -> n + | NeedBit (a,b) -> apply_huffman z (if get_bit z then b else a) + | NeedBits (n,t) -> apply_huffman z (Array.unsafe_get t (get_bits z n)) + +let inflate_lengths z a max = + let i = ref 0 in + let prev = ref 0 in + while !i < max do + match apply_huffman z z.zhuffman with + | n when n <= 15 -> + prev := n; + Array.unsafe_set a !i n; + incr i + | 16 -> + let n = 3 + get_bits z 2 in + if !i + n > max then error Invalid_data; + for k = 0 to n - 1 do + Array.unsafe_set a !i !prev; + incr i; + done; + | 17 -> + let n = 3 + get_bits z 3 in + i := !i + n; + if !i > max then error Invalid_data; + | 18 -> + let n = 11 + get_bits z 7 in + i := !i + n; + if !i > max then error Invalid_data; + | _ -> + error Invalid_data + done + +let rec inflate_loop z = + match z.zstate with + | Head -> + let cmf = IO.read_byte z.zinput in + let cm = cmf land 15 in + let cinfo = cmf lsr 4 in + if cm <> 8 || cinfo <> 7 then error Invalid_data; + let flg = IO.read_byte z.zinput in + (*let fcheck = flg land 31 in*) + let fdict = flg land 32 <> 0 in + (*let flevel = flg lsr 6 in*) + if (cmf lsl 8 + flg) mod 31 <> 0 then error Invalid_data; + if fdict then error Unsupported_dictionary; + z.zstate <- Block; + inflate_loop z + | Crc -> + let calc = window_checksum z.zwindow in + let crc = adler32_read z.zinput in + if calc <> crc then error Invalid_crc; + z.zstate <- Done; + inflate_loop z + | Done -> + () + | Block -> + z.zfinal <- get_bit z; + let btype = get_bits z 2 in + (match btype with + | 0 -> (* no compression *) + z.zlen <- IO.read_ui16 z.zinput; + let nlen = IO.read_ui16 z.zinput in + if nlen <> 0xffff - z.zlen then error Invalid_data; + z.zstate <- Flat; + inflate_loop z; + reset_bits z + | 1 -> (* fixed Huffman *) + z.zhuffman <- fixed_huffman; + z.zhuffdist <- None; + z.zstate <- CData; + inflate_loop z + | 2 -> (* dynamic Huffman *) + let hlit = get_bits z 5 + 257 in + let hdist = get_bits z 5 + 1 in + let hclen = get_bits z 4 + 4 in + for i = 0 to hclen - 1 do + Array.unsafe_set z.zlengths (Array.unsafe_get code_lengths_pos i) (get_bits z 3); + done; + for i = hclen to 18 do + Array.unsafe_set z.zlengths (Array.unsafe_get code_lengths_pos i) 0; + done; + z.zhuffman <- make_huffman z.zlengths 0 19 8; + let lengths = Array.make (hlit + hdist) 0 in + inflate_lengths z lengths (hlit + hdist); + z.zhuffdist <- Some (make_huffman lengths hlit hdist 16); + z.zhuffman <- make_huffman lengths 0 hlit 16; + z.zstate <- CData; + inflate_loop z + | _ -> + error Invalid_data) + | Flat -> + let rlen = min z.zlen z.zneeded in + let str = IO.nread z.zinput rlen in + let len = Bytes.length str in + z.zlen <- z.zlen - len; + add_bytes z str 0 len; + if z.zlen = 0 then z.zstate <- (if z.zfinal then Crc else Block); + if z.zneeded > 0 then inflate_loop z + | DistOne -> + let len = min z.zlen z.zneeded in + add_dist_one z len; + z.zlen <- z.zlen - len; + if z.zlen = 0 then z.zstate <- CData; + if z.zneeded > 0 then inflate_loop z + | Dist -> + while z.zlen > 0 && z.zneeded > 0 do + let len = min z.zneeded (min z.zlen z.zdist) in + add_dist z z.zdist len; + z.zlen <- z.zlen - len; + done; + if z.zlen = 0 then z.zstate <- CData; + if z.zneeded > 0 then inflate_loop z + | CData -> + match apply_huffman z z.zhuffman with + | n when n < 256 -> + add_char z (Char.unsafe_chr n); + if z.zneeded > 0 then inflate_loop z + | 256 -> + z.zstate <- if z.zfinal then Crc else Block; + inflate_loop z + | n -> + let n = n - 257 in + let extra_bits = Array.unsafe_get len_extra_bits_tbl n in + if extra_bits = -1 then error Invalid_data; + z.zlen <- (Array.unsafe_get len_base_val_tbl n) + (get_bits z extra_bits); + let dist_code = (match z.zhuffdist with None -> get_rev_bits z 5 | Some h -> apply_huffman z h) in + let extra_bits = Array.unsafe_get dist_extra_bits_tbl dist_code in + if extra_bits = -1 then error Invalid_data; + z.zdist <- (Array.unsafe_get dist_base_val_tbl dist_code) + (get_bits z extra_bits); + if z.zdist > window_available z.zwindow then error Invalid_data; + z.zstate <- (if z.zdist = 1 then DistOne else Dist); + inflate_loop z + +let inflate_data z s pos len = + if pos < 0 || len < 0 || pos + len > Bytes.length s then invalid_arg "inflate_data"; + z.zneeded <- len; + z.zoutpos <- pos; + z.zoutput <- s; + try + if len > 0 then inflate_loop z; + len - z.zneeded + with + IO.No_more_input -> error Truncated_data + +let inflate_init ?(header=true) ch = + { + zfinal = false; + zhuffman = fixed_huffman; + zhuffdist = None; + zlen = 0; + zdist = 0; + zstate = (if header then Head else Block); + zinput = ch; + zbits = 0; + znbits = 0; + zneeded = 0; + zoutput = Bytes.empty; + zoutpos = 0; + zlengths = Array.make 19 (-1); + zwindow = window_create (1 lsl 15) + } + +let inflate ?(header=true) ch = + let z = inflate_init ~header ch in + let s = Bytes.create 1 in + IO.create_in + ~read:(fun() -> + let l = inflate_data z s 0 1 in + if l = 1 then Bytes.unsafe_get s 0 else raise IO.No_more_input + ) + ~input:(fun s p l -> + let n = inflate_data z s p l in + if n = 0 then raise IO.No_more_input; + n + ) + ~close:(fun () -> + IO.close_in ch + ) diff --git a/src/unzip.mli b/src/unzip.mli new file mode 100644 index 0000000..0002682 --- /dev/null +++ b/src/unzip.mli @@ -0,0 +1,47 @@ +(* + * Unzip - inflate format decompression algorithm + * Copyright (C) 2004 Nicolas Cannasse + * Compliant with RFC 1950 and 1951 + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version, + * with the special exception on linking described in file LICENSE. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + *) + +(** Decompression algorithm. + + Unzip decompression algorithm is compliant with RFC 1950 and 1951 which + are describing the "inflate" algorithm used in most popular file formats. + This format is also the one used by the popular ZLib library. +*) + +open ExtBytes + +type error_msg = + | Invalid_huffman + | Invalid_data + | Invalid_crc + | Truncated_data + | Unsupported_dictionary + +exception Error of error_msg + +val inflate : ?header:bool -> IO.input -> IO.input +(** wrap an input using "inflate" decompression algorithm. raises [Error] if + an error occurs (this can only be caused by malformed input data). *) + +type t + +val inflate_init : ?header:bool -> IO.input -> t +val inflate_data : t -> Bytes.t -> int -> int -> int diff --git a/test/Makefile b/test/Makefile new file mode 100644 index 0000000..a21a7fd --- /dev/null +++ b/test/Makefile @@ -0,0 +1,32 @@ + +.NOTPARALLEL: +.SUFFIXES: +.PHONY: all opt run clean + +TESTS := $(wildcard test_*.ml) + +ifdef minimal +TESTS := $(filter-out test_Unzip.ml test_UChar.ml test_UTF8.ml, $(TESTS)) +endif + +all: +ifndef INSTALLED + $(MAKE) -C ../src all minimal=$(minimal) + ocamlfind ocamlc -linkall -linkpkg -package bytes -I ../src extLib.cma util.ml $(TESTS) runner.ml -o extlib_test +else + ocamlfind ocamlc -linkall -linkpkg -package extlib util.ml $(TESTS) runner.ml -o extlib_test +endif + +opt: +ifndef INSTALLED + $(MAKE) -C ../src opt minimal=$(minimal) + ocamlfind ocamlopt -linkall -linkpkg -package bytes -I ../src extLib.cmxa util.ml $(TESTS) runner.ml -o extlib_test +else + ocamlfind ocamlopt -linkall -linkpkg -package extlib util.ml $(TESTS) runner.ml -o extlib_test +endif + +run: + ./extlib_test + +clean: + rm -rf *.cm* *.o *.obj extlib_test diff --git a/test/runner.ml b/test/runner.ml new file mode 100644 index 0000000..ec29560 --- /dev/null +++ b/test/runner.ml @@ -0,0 +1,9 @@ +(** test runner *) + +let () = + let filter = + match Array.to_list Sys.argv with + | [] | [_] -> None + | _::l -> Some (List.map String.lowercase l) + in + exit (if Util.run_all filter then 0 else 1) diff --git a/test/std.ml b/test/std.ml new file mode 100644 index 0000000..8b737a9 --- /dev/null +++ b/test/std.ml @@ -0,0 +1,24 @@ +(* check compatibility of interfaces *) + +#directory "src";; +#load "extLib.cma";; + +module XS = (struct + include ExtLib.String + external length : string -> int = "%string_length" + external get : string -> int -> char = "%string_safe_get" + external set : bytes -> int -> char -> unit = "%string_safe_set" + external create : int -> bytes = "caml_create_string" + external unsafe_set : bytes -> int -> char -> unit = "%string_unsafe_set" + external unsafe_blit : string -> int -> bytes -> int -> int -> unit = "caml_blit_string" [@@noalloc] + external unsafe_fill : bytes -> int -> int -> char -> unit = "caml_fill_string" [@@noalloc] +end : module type of String) + +module XL = (struct + include ExtLib.List + let sort = List.sort +end : module type of List) + +module XA = (ExtLib.Array : module type of Array) +module XB = (ExtLib.Buffer : module type of Buffer) +module XH = (ExtLib.Hashtbl : module type of Hashtbl) diff --git a/test/test_Base64.ml b/test/test_Base64.ml new file mode 100644 index 0000000..ffb1e7e --- /dev/null +++ b/test/test_Base64.ml @@ -0,0 +1,48 @@ +(* + * ExtLib Testing Suite + * Copyright (C) 2004 Janne Hellsten + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version, + * with the special exception on linking described in file LICENSE. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + *) + +let in_range c a b = + let i = int_of_char c + and ai = int_of_char a + and bi = int_of_char b in + (i >= ai && i <= bi) + +let check_chars s = + let len = String.length s in + if len > 0 then + begin + for i = 0 to len-1 do + let c = s.[i] in + if not (in_range c 'A' 'Z') then + if not (in_range c 'a' 'z') then + if not (in_range c '0' '9') then + assert (c = '/' || c = '+') + done + end + +let () = + Util.register1 "Base64" "random" + (fun () -> + for i = 0 to 64 do + let s = Util.random_string () in + let enc = Base64.encode_string s in + assert ((Base64.decode_string enc) = s); + check_chars enc + done) diff --git a/test/test_BitSet.ml b/test/test_BitSet.ml new file mode 100644 index 0000000..2567b84 --- /dev/null +++ b/test/test_BitSet.ml @@ -0,0 +1,407 @@ +(* + * ExtLib Testing Suite + * Copyright (C) 2004 Janne Hellsten + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version, + * with the special exception on linking described in file LICENSE. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + *) + +open ExtList + +module B = BitSet + +let biased_rnd_28 () = + let n_bits = [| 4; 8; 16; 28 |] in + let n = n_bits.(Random.int (Array.length n_bits)) in + Random.int (1 lsl n) + +let popcount n = + let p = ref 0 in + for i = 0 to 29 do + if n land (1 lsl i) <> 0 then + incr p + done; + !p + +let set_bitset s n = + for i = 0 to 29 do + if (n land (1 lsl i)) <> 0 then + B.set s i + done; + assert (popcount n = B.count s) + +let bitset_of_int n = + assert (n <= (1 lsl 29)); + let s = B.create 30 in + set_bitset s n; + s + +let int_of_bitset s = + let n = ref 0 in + for i = 0 to 29 do + if B.is_set s i then + n := !n lor (1 lsl i) + done; + !n + +let bitset_of_int_scale n scl = + assert (n <= (1 lsl 29)); + let s = B.create 30 in + for i = 0 to 29 do + if (n land (1 lsl i)) <> 0 then + B.set s (i*scl) + done; + assert (popcount n = B.count s); + s + +let int_of_bitset_scale s scl = + let n = ref 0 in + for i = 0 to 29 do + if B.is_set s (i*scl) then + n := !n lor (1 lsl i) + done; + !n + +let test_rnd_creation () = + for i = 0 to 255 do + let r1 = biased_rnd_28 () in + let s = bitset_of_int r1 in + let c = B.copy s in + assert (int_of_bitset s = r1); + assert (c = s); + assert (B.compare c s = 0); + B.unite c s; + assert (c = s); + B.intersect c (B.empty ()); + assert (B.count c = 0); + done + +let test_intersect () = + for i = 0 to 255 do + let s = bitset_of_int (biased_rnd_28 ()) in + B.intersect s (B.empty ()); + assert (B.count s = 0) + done + +let test_diff () = + for i = 0 to 255 do + let r = biased_rnd_28 () in + let s = bitset_of_int r in + if r <> 0 then + assert (B.count s <> 0); + assert (B.count ((B.diff s s)) = 0); + done + +let test_sym_diff () = + for i = 0 to 255 do + let s = (Random.int 3)+1 in + let r1 = biased_rnd_28 () in + let r2 = biased_rnd_28 () in + let s1 = bitset_of_int_scale r1 s in + let s2 = bitset_of_int_scale r2 s in + assert (int_of_bitset_scale (bitset_of_int_scale r1 s) s = r1); + assert (int_of_bitset_scale (B.sym_diff s1 s2) s = r1 lxor r2); + assert (int_of_bitset_scale (B.sym_diff s2 s1) s = r1 lxor r2); + assert (B.count (B.sym_diff s1 s1) = 0); + assert (B.count (B.sym_diff s2 s2) = 0); + done + +let test_compare () = + assert (B.compare (B.empty ()) (B.empty ()) = 0); + for i = 0 to 255 do + let r1 = biased_rnd_28 () in + let r2 = biased_rnd_28 () in + let s1 = bitset_of_int r1 + and s2 = bitset_of_int r2 in + let sr = B.compare s1 s2 + and ir = compare r1 r2 in + assert (sr = ir); + assert (B.compare s1 s1 = 0); + assert (B.compare s2 s2 = 0) + done; + for i = 0 to 255 do + let scl = (Random.int 15)+1 in + let r1 = biased_rnd_28 () in + let r2 = biased_rnd_28 () in + let s1 = bitset_of_int_scale r1 scl + and s2 = bitset_of_int_scale r2 scl in + let sr = B.compare s1 s2 + and ir = compare r1 r2 in + assert (sr = ir) + done; + for i = 1 to 255 do + let s1 = bitset_of_int (i-1) in + let s2 = bitset_of_int i in + assert (B.compare s1 s2 = -1) + done; + for i = 1 to 255 do + let s1 = bitset_of_int (i-1) in + let s2 = bitset_of_int i in + assert (B.compare s1 s2 = -1) + done + +module BSSet = Set.Make (struct type t = BitSet.t let compare = B.compare end) + +let test_compare_2 () = + let nums = List.init 256 Std.identity in + let num_set = + List.fold_left (fun acc e -> BSSet.add (bitset_of_int e) acc) BSSet.empty nums in + List.iter + (fun e -> + let bs = bitset_of_int e in + assert (BSSet.mem bs num_set)) nums + +let test_compare_3 () = + for i = 0 to 63 do + for j = 0 to 63 do + let b1 = B.create ((Random.int 128)+32) in + let b2 = B.create ((Random.int 128)+32) in + set_bitset b1 i; + set_bitset b2 j; + assert (B.compare b1 b2 = compare i j) + done + done + +let test_empty () = + for len = 0 to 63 do + let s = B.empty () in + for i = 0 to len do + assert (not (B.is_set s i)); + B.set s i + done; + assert (not (B.is_set s (len+1))); + for i = 0 to len do + assert (B.is_set s i) + done + done + +let test_exceptions () = + let expect_exn f = + try + f (); + false (* Should've raised an exception! *) + with B.Negative_index _ -> true in + let s = B.create 100 in + assert (expect_exn (fun () -> B.set s (-15))); + assert (expect_exn (fun () -> B.unset s (-15))); + assert (expect_exn (fun () -> B.toggle s (-15))); + assert (expect_exn + (fun () -> + let s = B.create 8 in + B.is_set s (-19))) + +module IS = Set.Make (struct type t = int let compare = compare end) + +let set_of_int n = + let rec loop accu i = + if i < 30 then + if ((1 lsl i) land n) <> 0 then + loop (IS.add i accu) (i+1) + else + loop accu (i+1) + else accu in + loop IS.empty 0 + +let int_of_set s = + IS.fold (fun i acc -> (1 lsl i) lor acc) s 0 + +let test_set_opers () = + let rnd_oper () = + match Random.int 3 with + 0 -> (IS.inter, B.inter) + | 1 -> (IS.diff, B.diff) + | 2 -> (IS.union, B.union) + | _ -> assert false in + for i = 0 to 255 do + let r1 = biased_rnd_28 () in + let r2 = biased_rnd_28 () in + let s1 = set_of_int r1 + and s2 = set_of_int r2 + and bs1 = bitset_of_int r1 + and bs2 = bitset_of_int r2 in + assert (int_of_set s1 = r1); + assert (int_of_set s2 = r2); + assert (int_of_bitset bs1 = r1); + assert (int_of_bitset bs2 = r2); + let (isop,bsop) = rnd_oper () in + let is = isop s1 s2 + and bs = bsop bs1 bs2 in + let is_int = int_of_set is in + let bs_int = int_of_bitset bs in + assert (is_int = bs_int); + done + +let test_unite () = + for i = 0 to 255 do + let r1 = biased_rnd_28 () in + let s = bitset_of_int r1 in + let c = B.copy s in + assert (int_of_bitset s = r1); + let pop = B.count c in + B.unite c (B.empty ()); + assert (B.count c = pop); + done + +let test_intersect_2 () = + for i = 0 to 255 do + let r1 = biased_rnd_28 () in + let s = bitset_of_int r1 in + let c = B.copy s in + assert (int_of_bitset s = r1); + B.intersect c (B.empty ()); + assert (B.count c = 0); + done + +let test_differentiate () = + for i = 0 to 255 do + let r1 = biased_rnd_28 () in + let s = bitset_of_int r1 in + let d = B.copy s in + B.differentiate d s; + assert (B.count d = 0); + for j = 0 to 32 do + B.set s (Random.int 256) + done; + let d = B.copy s in + B.differentiate d (B.empty ()); + assert (B.count s = B.count d); + assert (B.compare d s = 0); + B.differentiate d s; + assert (B.count d = 0); + done + +(* TODO *) +let test_differentiate_sym () = + for i = 0 to 255 do + let r1 = biased_rnd_28 () in + let r2 = biased_rnd_28 () in + let s = bitset_of_int r1 in + let d = B.copy s in + B.differentiate_sym d s; + assert (B.count d = 0); + for j = 0 to 32 do + B.set s (Random.int 256) + done; + let d = B.copy s in + B.differentiate_sym d (B.empty ()); + assert (B.count s = B.count d); + assert (B.compare d s = 0); + B.differentiate_sym d s; + assert (B.count d = 0); + + let s1 = bitset_of_int r1 + and s2 = bitset_of_int r2 in + let d1 = B.copy s1 in + B.differentiate_sym d1 s2; + assert (r1 lxor r2 = int_of_bitset d1); + done + +let test_bs_1 () = + let b = BitSet.empty () in + BitSet.set b 8; + BitSet.set b 9; + assert (not (BitSet.is_set b 7)); + assert (BitSet.is_set b 8); + assert (BitSet.is_set b 9); + assert (not (BitSet.is_set b 10)); + () + +let test_enum_1 () = + let b = BitSet.empty () in + BitSet.set b 0; + BitSet.set b 1; + let e = BitSet.enum b in + let a = Enum.get e in + let b = Enum.get e in + assert (Option.get a = 0); + assert (Option.get b = 1); + () + +let test_enum_2 () = + let n = 13 in + let b = BitSet.empty () in + for i = 0 to n do + BitSet.set b i + done; + let e = BitSet.enum b in + for i = 0 to n do + let a = Enum.get e in + match a with + Some v -> assert (v = i) + | None -> assert false + done; + assert (Enum.get e = None); + () + +let test_enum_3 () = + let b = BitSet.empty () in + BitSet.set b 9; + BitSet.set b 10; + let e = BitSet.enum b in + let i = Enum.get e in + let j = Enum.get e in + assert (Option.get i = 9); + begin + match j with + Some v -> + assert (v = 10); + | None -> + assert false (* Should NOT come here! *) + end; + assert (Enum.get e = None); + () + +(* Bug reported by Pascal Zimmer on Feb 27, 2007. The latter assert + returned None when it should've returned Some 9. *) +let test_enum_regr_pz () = + let b = BitSet.empty () in + BitSet.set b 8; + BitSet.set b 9; + let e = BitSet.enum b in + let i = Enum.get e in + let j = Enum.get e in + assert (Option.get i = 8); + begin + match j with + Some v -> + assert (v = 9); + | None -> + assert false (* Should NOT come here! *) + end; + () + + +let () = + Util.register "BitSet" [ + "basic", test_bs_1; + "enum_1", test_enum_1; + "enum_2", test_enum_2; + "enum_3", test_enum_3; + "enum_regr_pz", test_enum_regr_pz; + "intersect", test_intersect; + "diff", test_diff; + "sym_diff", test_sym_diff; + "rnd_creation", test_rnd_creation; + "empty", test_empty; + "exceptions", test_exceptions; + "compare", test_compare; + "compare_2", test_compare_2; + "compare_3", test_compare_3; + "set_opers", test_set_opers; + "unite",test_unite; + "intersect_2",test_intersect_2; + "differentiate",test_differentiate; + "differentiate_sym", test_differentiate_sym; + ] diff --git a/test/test_Dllist.ml b/test/test_Dllist.ml new file mode 100644 index 0000000..ee8e8b5 --- /dev/null +++ b/test/test_Dllist.ml @@ -0,0 +1,55 @@ +(* + * ExtLib Testing Suite + * Copyright (C) 2004 Janne Hellsten + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version, + * with the special exception on linking described in file LICENSE. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + *) + +let test_simple () = + for i = 1 to 5 do + let rec make_lst accu n = + if n < i then make_lst (i::accu) (n+1) + else accu in + let lst = make_lst [] 0 in + let dlst = Dllist.of_list lst in + assert (List.length lst = Dllist.length dlst); + List.iter + (fun e -> + let dl_elem = Dllist.get dlst in + assert (e = dl_elem); + Dllist.remove dlst) lst; + done + +(* Failure case reported by Christopher Wedman on extlib mailing list 2005/Feb/12. *) +let test_regression_1 () = + let lst = Dllist.create 1 in + ignore (Dllist.append lst 2); + ignore (Dllist.demote lst); + ignore (Dllist.length lst) (* <-- hangs here *) + +(* Failure case reported by Christopher Wedman on extlib mailing list 2005/Feb/12. *) +let test_regression_2 () = + let lst = Dllist.create 1 in + ignore (Dllist.append lst 2); + ignore (Dllist.promote lst); + assert (Dllist.length lst = 2) (* returned 1, but should return 2 *) + +let () = + Util.register "Dllist" [ + "simple", test_simple; + "regression_1", test_regression_1; + "regression_2", test_regression_2; + ] diff --git a/test/test_DynArray.ml b/test/test_DynArray.ml new file mode 100644 index 0000000..b643d01 --- /dev/null +++ b/test/test_DynArray.ml @@ -0,0 +1,74 @@ +(* + * ExtLib Testing Suite + * Copyright (C) 2004 John Skaller + * Copyright (C) 2004 Janne Hellsten + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version, + * with the special exception on linking described in file LICENSE. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + *) + +(* NOTE this is a copy of Skaller's trivial DynArray test. Apparently + he hit the nail on the head with a first try, since test_triv causes a + segfault if you run Gc.major () after test_triv. If you change the + initial size of the DynArray to one or bigger, the crash does not + occur. *) + +open DynArray + +let test_triv () = + let a = make 0 in (* ZERO here causes a segfault later in GC??? *) + let b = copy a in + assert (length a == 0); + assert (length b == 0); + Gc.major () + +(* Failure reported by Jeff Henrikson. Should be fixed in CVS + already? JH 2005/Mar/01 *) +let test_regr_1 () = + for i = 0 to 30 do + ignore (DynArray.of_array [||]) + done + +(* Memory corruption in DynArray.insert; fixed. BD 2009/Jun/18. *) +let test_insert () = + let d = ref (DynArray.create ()) and n = 4100 in + for i = 0 to n do + assert (i = DynArray.length !d); + (* This is needed in order to expose the memory corruption. *) + Printf.ifprintf stdout "%d %d\n" i (DynArray.length !d); flush stdout; + DynArray.insert !d 0 (Array.create 42 "") + done + +(* Issue 2: Error in DynArray exponential resizer *) +let test_dynarray1 () = + let a = DynArray.create () in + for i = 1 to 2817131 do + DynArray.add a i + done + +let test_dynarray2 () = + let a = DynArray.make 2817131 in + for i = 1 to 2817131 do + DynArray.add a i + done + +let () = + Util.register "DynArray" [ + "triv", test_triv; + "regr_1", test_regr_1; + "insert", test_insert; + "simple_1",test_dynarray1; + "simple_2",test_dynarray2; + ] diff --git a/test/test_ExtArray.ml b/test/test_ExtArray.ml new file mode 100644 index 0000000..1fbb25b --- /dev/null +++ b/test/test_ExtArray.ml @@ -0,0 +1,154 @@ +(* + * ExtLib Testing Suite + * Copyright (C) 2005 Richard W.M. Jones + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version, + * with the special exception on linking described in file LICENSE. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + *) + +(* Standard library array. *) +module StdArray = Array + +open ExtArray + +let test_rev () = + assert ([| 1; 2; 3; 4; 5 |] = Array.rev [| 5; 4; 3; 2; 1 |]); + assert ([| 1; 2; 3; 4; 5; 6 |] = Array.rev [| 6; 5; 4; 3; 2; 1 |]); + assert ([| "a"; "b"; "c" |] = Array.rev [| "c"; "b"; "a" |]); + assert ([| "a"; "b" |] = Array.rev [| "b"; "a" |]); + assert ([| "a" |] = Array.rev [| "a" |]); + assert ([| |] = Array.rev [| |]) + +let test_rev_in_place () = + let a = [| 5; 4; 3; 2; 1 |] in + Array.rev_in_place a; + assert ([| 1; 2; 3; 4; 5 |] = a); + let a = [| 6; 5; 4; 3; 2; 1 |] in + Array.rev_in_place a; + assert ([| 1; 2; 3; 4; 5; 6 |] = a); + let a = [| "c"; "b"; "a" |] in + Array.rev_in_place a; + assert ([| "a"; "b"; "c" |] = a); + let a = [| "b"; "a" |] in + Array.rev_in_place a; + assert ([| "a"; "b" |] = a); + let a = [| "a" |] in + Array.rev_in_place a; + assert ([| "a" |] = a); + let a = [| |] in + Array.rev_in_place a; + assert ([| |] = a) + +let test_for_all () = + let a = [| 0; 2; 4; 6; 8; 10; 12 |] in + let is_even i = 0 = (i land 1) in + assert (Array.for_all is_even a); + assert (Array.for_all is_even [| |]) + +let test_exists () = + let a = [| 0; 2; 4; 6; 8; 10; 11; 12 |] in + let b = [| 0; 2; 4; 6; 8; 10; 12 |] in + let is_even i = 0 = (i land 1) in + let is_odd i = 1 = (i land 1) in + assert (Array.exists is_odd a); + assert (not (Array.exists is_odd b)); + assert (not (Array.exists is_even [| |])) + +let test_mem () = + let a = [| 0; 2; 4; 6; 8; 10; 11; 12 |] in + assert (Array.mem 11 a); + assert (Array.mem 12 a); + assert (not (Array.mem 13 a)); + assert (not (Array.mem 13 [| |])) + +let test_memq () = + let a = [| 0; 2; 4; 6; 8; 10; 11; 12 |] in + assert (Array.memq 11 a); + assert (Array.memq 12 a); + assert (not (Array.memq 13 a)); + assert (not (Array.memq 13 [| |])) + +let test_find () = + let a = [| 0; 2; 4; 6; 8; 10; 11; 12 |] in + assert (11 = Array.find ((=) 11) a); + assert (12 = Array.find ((=) 12) a); + assert (try ignore (Array.find ((=) 13) a); false with Not_found -> true); + assert (try ignore (Array.find ((=) 13) [| |]); false with Not_found -> true) + +let test_findi () = + let a = [| 0; 2; 4; 6; 8; 10; 11; 12 |] in + assert (6 = Array.findi ((=) 11) a); + assert (7 = Array.findi ((=) 12) a); + assert (try ignore (Array.findi ((=) 13) a); false with Not_found -> true); + assert (try ignore (Array.findi ((=) 13) [| |]); false + with Not_found -> true) + +let test_filter () = + let a = [| 0; 1; 2; 3; 4; 5; 6; 7; 8; 9 |] in + let is_even i = 0 = (i land 1) in + let is_odd i = 1 = (i land 1) in + assert ([| 0; 2; 4; 6; 8 |] = Array.filter is_even a); + assert ([| 1; 3; 5; 7; 9 |] = Array.filter is_odd a); + let a = Array.init 10_000 (fun i -> i) in + let b = Array.init 5_000 (fun i -> i * 2) in + let c = Array.init 5_000 (fun i -> i * 2 + 1) in + assert (b = Array.filter is_even a); + assert (c = Array.filter is_odd a); + assert ([| |] = Array.filter is_even [| |]) + +let test_partition () = + let a = [| 0; 1; 2; 3; 4; 5; 6; 7; 8; 9 |] in + let is_even i = 0 = (i land 1) in + let is_odd i = 1 = (i land 1) in + let x, y = Array.partition is_even a in + assert ([| 0; 2; 4; 6; 8 |] = x); + assert ([| 1; 3; 5; 7; 9 |] = y); + let x, y = Array.partition is_odd a in + assert ([| 1; 3; 5; 7; 9 |] = x); + assert ([| 0; 2; 4; 6; 8 |] = y); + assert (([| |], [| |]) = Array.partition is_even [| |]) + +let test_enum () = + let a = Array.init 1000 (fun i -> i) in + let e = Array.enum a in + let l = ExtList.List.of_enum e in + assert (l = Array.to_list a); + let l = ExtList.List.init 2000 (fun i -> i) in + let e = ExtList.List.enum l in + let a = Array.of_enum e in + assert (a = Array.of_list l) + +let test_map2 () = + let a = Array.init 100 (fun i -> i) in + let b = Array.init 100 (fun i -> 99 - i) in + assert (Array.make 100 99 = Array.map2 (+) a b); + assert (try let _ = Array.map2 (+) [||] [|1|] in false with Invalid_argument _ -> true); + assert (Array.map2 (-) a b = Array.of_list (List.map2 (-) (Array.to_list a) (Array.to_list b))) + +let () = + Util.register "ExtArray" [ + "rev", test_rev; + "rev_in_place", test_rev_in_place; + "for_all", test_for_all; + "exists", test_exists; + "mem", test_mem; + "memq", test_memq; + "find", test_find; + "findi", test_findi; + "filter", test_filter; + "partition", test_partition; + "enum", test_enum; + "map2", test_map2; + ] diff --git a/test/test_ExtHashtbl.ml b/test/test_ExtHashtbl.ml new file mode 100644 index 0000000..01070f3 --- /dev/null +++ b/test/test_ExtHashtbl.ml @@ -0,0 +1,33 @@ +(* + * ExtLib Testing Suite + * Copyright (C) 2013 ygrek + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version, + * with the special exception on linking described in file LICENSE. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + *) + +open ExtHashtbl + +(* Issue 26: Hashtbl.map is broken in OCaml >= 4.00 *) +let test_map () = + let h = Hashtbl.create 1 in + Hashtbl.add h "test" 1; + let h1 = Hashtbl.map (fun x -> x + 1) h in + let find h k = try Some (Hashtbl.find h k) with Not_found -> None in + assert (find h "test" = Some 1); + assert (find h1 "test" = Some 2) + +let () = + Util.register1 "ExtHashtbl" "map" test_map diff --git a/test/test_ExtList.ml b/test/test_ExtList.ml new file mode 100644 index 0000000..46f7aa1 --- /dev/null +++ b/test/test_ExtList.ml @@ -0,0 +1,177 @@ +(* + * ExtLib Testing Suite + * Copyright (C) 2004 Janne Hellsten + * Copyright (C) 2008 Red Hat, Inc. + * Copyright (C) 2010 ygrek + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version, + * with the special exception on linking described in file LICENSE. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + *) + +(* Standard library list *) +module StdList = List + +open ExtList + +exception Test_Exception + +let check_empty_list_exn f = + try f (); false with List.Empty_list -> true + +(** Random length list with [0;1;2;..n] contents. *) +let rnd_list () = + let len = Random.int 3 in + List.init len Std.identity + +let test_iteri () = + for i = 0 to 15 do + List.iteri (fun i e -> assert (i = e)) (rnd_list ()); + done + +let test_mapi () = + for i = 0 to 15 do + let rnd_list = rnd_list () in + let lst = List.mapi (fun n e -> (e,"foo")) rnd_list in + let lst' = + List.mapi (fun n (e,s) -> assert (s = "foo"); assert (n = e); n) lst in + List.iteri (fun i e -> assert (i = e)) lst' + done + +let test_exceptions () = + assert (check_empty_list_exn (fun () -> List.hd [])); + assert (check_empty_list_exn (fun () -> List.first [])); + assert (check_empty_list_exn (fun () -> List.last [])) + +let test_find_exc () = + let check_exn f = try f (); false with Test_Exception -> true | _ -> false in + assert (check_exn (fun () -> (List.find_exc (fun _ -> true) Test_Exception []))); + try + for i = 0 to 15 do + let rnd_lst = rnd_list () in + begin + match rnd_lst with + [] -> () + | lst -> + let rnd_elem = Random.int (List.length lst) in + assert (check_exn + (fun () -> + List.find_exc (fun e -> e = List.length lst) Test_Exception lst)); + assert (not (check_exn + (fun () -> + List.find_exc (fun e -> e = rnd_elem) Test_Exception lst))) + end + done + with _ -> assert false + +let test_findi () = + let check_fn f = try (let e,i = f () in e<>i) with Not_found -> true in + try + for i = 0 to 15 do + let rnd_lst = rnd_list () in + begin + match rnd_lst with + [] -> () + | lst -> + let rnd_elem = Random.int (List.length lst) in + assert (check_fn + (fun () -> + List.findi (fun i e -> e = List.length lst) lst)); + assert (not (check_fn + (fun () -> + List.findi (fun i e -> e = rnd_elem) lst))) + end + done + with _ -> assert false + +let test_fold_right () = + let maxlen = 2000 in + (* NOTE assuming we will not blow the stack with 2000 elements *) + let lst = List.init maxlen Std.identity in + let a = StdList.fold_right (fun e a -> e::a) lst [] in + let b = List.fold_right (fun e a -> e::a) lst [] in + assert (a = b) + +let test_fold_right2 () = + let len = 2000 in + let cnt = ref 0 in + let lst = List.init len Std.identity in + ignore (StdList.fold_right (fun e a -> incr cnt; e::a) lst []); + let cnt_std = !cnt in + cnt := 0; + ignore (List.fold_right (fun e a -> incr cnt; e::a) lst []); + assert (cnt_std = len); + assert (!cnt = cnt_std) + +let test_map () = + for i = 0 to 10 do + let f = ( * ) 2 in + let lst = rnd_list () in + let a = StdList.map f lst in + let b = List.map f lst in + assert (a = b) + done + +let test_find_map () = + let f = function "this", v -> Some v | _ -> None in + (try + let r = List.find_map f [ "a", 1; "b", 2; "this", 3; "d", 4 ] in + assert (3 = r); + let r = List.find_map f [ "this", 1; "b", 2; "c", 3; "d", 4 ] in + assert (1 = r); + let r = List.find_map f [ "a", 1; "b", 2; "c", 3; "this", 4 ] in + assert (4 = r); + let r = List.find_map f [ "this", 1; "b", 2; "c", 3; "this", 4 ] in + assert (1 = r); + let r = List.find_map f [ "a", 1; "b", 2; "this", 3; "this", 4 ] in + assert (3 = r); + let r = List.find_map f [ "this", 5 ] in + assert (5 = r) + with + Not_found -> assert false + ); + (try + ignore (List.find_map f []); assert false + with + Not_found -> () + ); + (try + ignore (List.find_map f [ "a", 1 ]); assert false + with + Not_found -> () + ); + (try + ignore (List.find_map f [ "a", 1; "b", 2 ]); assert false + with + Not_found -> () + ) + +(* Issue 12: List.make not tail-recursive *) +let test_make () = + let l = List.make 10_000_000 1 in + assert (List.length l = 10_000_000) + +let () = + Util.register "ExtList" [ + "iteri", test_iteri; + "mapi", test_mapi; + "exceptions", test_exceptions; + "find_exc", test_find_exc; + "findi", test_findi; + "fold_right", test_fold_right; + "fold_right2", test_fold_right2; + "map", test_map; + "find_map", test_find_map; + "make", test_make; + ] diff --git a/test/test_ExtString.ml b/test/test_ExtString.ml new file mode 100644 index 0000000..b8fcfa8 --- /dev/null +++ b/test/test_ExtString.ml @@ -0,0 +1,137 @@ +(* + * ExtLib Testing Suite + * Copyright (C) 2004 Janne Hellsten + * Copyright (C) 2011 ygrek + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version, + * with the special exception on linking described in file LICENSE. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + *) + +open ExtString + +module S = String + +let t_starts_with () = + let s0 = "foo" in + assert (S.starts_with s0 s0); + assert (S.starts_with s0 "f"); + assert (not (S.starts_with s0 "bo")); + assert (not (S.starts_with "" "foo")); + assert (S.starts_with s0 ""); + assert (S.starts_with "" "") + +let t_ends_with () = + let s0 = "foo" in + assert (S.ends_with s0 "foo"); + assert (S.ends_with s0 "oo"); + assert (S.ends_with s0 "o"); + assert (S.ends_with s0 ""); + assert (S.ends_with "" ""); + assert (not (S.ends_with "" "b")); + assert (not (S.ends_with s0 "f")) + +let t_map () = + let s0 = "foobar" in + assert (S.map Std.identity s0 = s0) + +let t_lchop () = + for len = 0 to 15 do + let s0 = Util.random_string_len len in + let s0len = String.length s0 + and s0r = ref (String.copy s0) in + for i = 0 to s0len-1 do + assert (!s0r.[0] = s0.[i]); + s0r := String.lchop !s0r + done; + done + +let t_rchop () = + for len = 0 to 15 do + let s0 = Util.random_string_len len in + let s0len = String.length s0 + and s0r = ref (String.copy s0) in + for i = 0 to s0len-1 do + assert (!s0r.[String.length !s0r - 1] = s0.[s0len-1-i]); + s0r := String.rchop !s0r + done; + done + +let t_split () = + for i = 0 to 64 do + let s = Util.random_string () in + let s' = String.replace_chars + (fun c -> if c = '|' then "_" else String.of_char c) s in + let len = String.length s' in + if len > 0 then + begin + let rpos = Random.int len in + (* Insert separator and split based on that *) + let modified = + let b = Bytes.of_string s' in + b.[rpos] <- '|'; + Bytes.to_string b + in + let (half1, half2) = String.split modified "|" in + if rpos > 1 then + begin + assert (String.length half1 = rpos); + assert (String.sub s' 0 rpos = half1) + end; + if rpos < len-1 then + begin + assert (String.length half2 = len-rpos-1); + assert (String.sub s' (rpos+1) (len-rpos-1) = half2); + end; + assert (String.join "|" [half1; half2] = modified); + end + done + +let t_replace1 () = + let s = "karhupullo" in + assert (String.replace s "karhu" "kalja" = (true, "kaljapullo")); + assert (String.replace s "kalja" "karhu" = (false, s)); + (* TODO is this correct? Is "" supposed to always match? *) + assert (String.replace s "" "karhu" = (true, "karhu"^s)); + assert (String.replace "" "" "karhu" = (true, "karhu")) + +let t_strip () = + let s = "1234abcd5678" in + assert (S.strip ~chars:"" s = s); + assert (S.strip ~chars:"1" s = String.sub s 1 (String.length s-1)); + assert (S.strip ~chars:"12" s = String.sub s 2 (String.length s-2)); + assert (S.strip ~chars:"1234" s = "abcd5678"); + assert (S.ends_with (S.strip ~chars:"8" s) "567"); + assert (S.ends_with (S.strip ~chars:"87" s) "56"); + assert (S.ends_with (S.strip ~chars:"86" s) "567"); + assert (S.ends_with (S.strip ~chars:"" s) "5678") + +let t_nsplit () = + let s = "testsuite" in + assert (S.nsplit s "t" = ["";"es";"sui";"e"]); + assert (S.nsplit s "te" = ["";"stsui";""]); + assert (try let _ = S.nsplit s "" in false with Invalid_string -> true) + +let () = + Util.register "ExtString" [ + "starts_with", t_starts_with; + "ends_with", t_ends_with; + "map", t_map; + "lchop", t_lchop; + "rchop", t_rchop; + "split", t_split; + "replace_1", t_replace1; + "strip", t_strip; + "nsplit", t_nsplit; + ] diff --git a/test/test_IO.ml b/test/test_IO.ml new file mode 100644 index 0000000..f2988fc --- /dev/null +++ b/test/test_IO.ml @@ -0,0 +1,108 @@ +(* + * ExtLib Testing Suite + * Copyright (C) 2004, 2007 Janne Hellsten + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version, + * with the special exception on linking described in file LICENSE. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + *) + +(* NOTE The IO module test case was contributed by Robert Atkey on + ocaml-lib-devel@lists.sourceforge.net on Nov 26, 2007. + Thanks Rob! *) + +let fail fmt = Printf.ksprintf failwith fmt + +let test_write_read values invalid show write read = + let s = IO.output_string () in + List.iter begin fun x -> + if try write s x; true with IO.Overflow _ -> false then fail "write %s expected to fail, but didn't" (show x) + end invalid; + List.iter begin fun i -> + try write s i with exn -> fail "failed to write %s : %s" (show i) (Printexc.to_string exn) + end values; + let s = IO.close_out s in + let s = IO.input_string s in + List.iter begin fun expect -> + let i = read s in + if i <> expect then fail "failed to read %s : got %s" (show expect) (show i) + end values; + match IO.read_all s with + | "" -> () + | s -> fail "expected empty input, got %S" s + +let test_i8 () = + let values = [~-0x80;-1;0;1;0x7F] in + let invalid = [] in (* never fails - truncates *) + test_write_read values invalid string_of_int IO.write_byte IO.read_signed_byte; + () + +let test_u8 () = + let values = [0;1;0xFF] in + let invalid = [] in (* never fails *) + test_write_read values invalid string_of_int IO.write_byte IO.read_byte; + () + +let test_i16 () = + (* Bug was that write_i16 did not accept -0x8000 *) + let values = [~-0x8000;-1;0;1;0x7FFF] in + let invalid = [~-0x8001;0x8000] in + let test = test_write_read values invalid string_of_int in + test IO.write_i16 IO.read_i16; + test IO.BigEndian.write_i16 IO.BigEndian.read_i16; + () + +let test_u16 () = + let values = [0;1;0xFFFF] in + let invalid = [~-1;0x10000] in + let test = test_write_read values invalid string_of_int in + test IO.write_ui16 IO.read_ui16; + test IO.BigEndian.write_ui16 IO.BigEndian.read_ui16; + () + +let test_i31 () = + let values = [~-0x4000_0000;-1;0;1;0x3FFF_FFFF] in + let invalid = if Sys.word_size = 32 then [] else [~-0x4000_0001;0x4000_0000] in + let test = test_write_read values invalid string_of_int in + test IO.write_i31 IO.read_i31; + test IO.BigEndian.write_i31 IO.BigEndian.read_i31; + () + +let test_i32 () = + let min_i32 = Int32.to_int Int32.min_int in + let max_i32 = Int32.to_int Int32.max_int in + let values = [~-0x4000_0000;-1;0;1;0x3FFF_FFFF] @ if Sys.word_size = 32 then [] else [min_i32;max_i32] in + let invalid = if Sys.word_size = 32 then [] else [min_i32-1;max_i32+1] in + let test = test_write_read values invalid string_of_int in + test IO.write_i32 IO.read_i32_as_int; + test IO.BigEndian.write_i32 IO.BigEndian.read_i32_as_int; + () + +let test_real_i32 () = + let values = [Int32.min_int;-1l;0l;1l;Int32.max_int] in + let invalid = [] in + let test = test_write_read values invalid Int32.to_string in + test IO.write_real_i32 IO.read_real_i32; + test IO.BigEndian.write_real_i32 IO.BigEndian.read_real_i32; + () + +let () = + Util.register1 "IO" "i32" test_i32; + Util.register1 "IO" "real_i32" test_real_i32; + Util.register1 "IO" "i31" test_i31; + Util.register1 "IO" "u16" test_u16; + Util.register1 "IO" "i16" test_i16; + Util.register1 "IO" "u8" test_u8; + Util.register1 "IO" "i8" test_i8; + () diff --git a/test/test_UTF8.ml b/test/test_UTF8.ml new file mode 100644 index 0000000..dd8676a --- /dev/null +++ b/test/test_UTF8.ml @@ -0,0 +1,75 @@ +let substring_inputs = +[ + [| + ""; + "⟿"; + "⟿ቄ"; + "⟿ቄş"; + "⟿ቄş龟"; + "⟿ቄş龟¯"; + |]; + [| + ""; + "ç"; + "çe"; + "çek"; + "çeko"; + "çekos"; + "çekosl"; + "çekoslo"; + "çekoslov"; + "çekoslova"; + "çekoslovak"; + "çekoslovaky"; + "çekoslovakya"; + "çekoslovakyal"; + "çekoslovakyala"; + "çekoslovakyalaş"; + "çekoslovakyalaşt"; + "çekoslovakyalaştı"; + "çekoslovakyalaştır"; + "çekoslovakyalaştıra"; + "çekoslovakyalaştıram"; + "çekoslovakyalaştırama"; + "çekoslovakyalaştıramad"; + "çekoslovakyalaştıramadı"; + "çekoslovakyalaştıramadık"; + "çekoslovakyalaştıramadıkl"; + "çekoslovakyalaştıramadıkla"; + "çekoslovakyalaştıramadıklar"; + "çekoslovakyalaştıramadıkları"; + "çekoslovakyalaştıramadıklarım"; + "çekoslovakyalaştıramadıklarımı"; + "çekoslovakyalaştıramadıklarımız"; + "çekoslovakyalaştıramadıklarımızd"; + "çekoslovakyalaştıramadıklarımızda"; + "çekoslovakyalaştıramadıklarımızdan"; + "çekoslovakyalaştıramadıklarımızdanm"; + "çekoslovakyalaştıramadıklarımızdanmı"; + "çekoslovakyalaştıramadıklarımızdanmıs"; + "çekoslovakyalaştıramadıklarımızdanmısı"; + "çekoslovakyalaştıramadıklarımızdanmısın"; + "çekoslovakyalaştıramadıklarımızdanmısını"; + "çekoslovakyalaştıramadıklarımızdanmısınız"; + |] +] + +let test_substring () = + let test a = + let m = Array.length a - 1 in + let v = a.(m) in + assert(UTF8.length v = m); + for i = 0 to m do + assert(a.(i) = UTF8.substring v 0 i); + done; + for i = 0 to m - 1 do + for j = i to m - 1 do + let u = UTF8.substring v i (j - i + 1) in + UTF8.validate u + done + done + in + List.iter test substring_inputs + +let () = + Util.register1 "UTF" "substring" test_substring diff --git a/test/test_Unzip.ml b/test/test_Unzip.ml new file mode 100644 index 0000000..7b0bc73 --- /dev/null +++ b/test/test_Unzip.ml @@ -0,0 +1,109 @@ +(* + * ExtLib Testing Suite + * Copyright (C) 2004, 2007 Janne Hellsten + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version, + * with the special exception on linking described in file LICENSE. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + *) + +(* test_unzip_bug1 test case was contributed by Robert Atkey on + ocaml-lib-devel@lists.sourceforge.net on Nov 26, 2007. + Thanks Rob! *) + +let test_unzip_bug1 () = + let test data = + let input = IO.input_string data in + let unzipped = Unzip.inflate input in + try + let str = IO.read_all unzipped in + assert (str = "XY") + with Unzip.Error Unzip.Invalid_data -> assert false + in + (* this is "XY" compressed by zlib at level 9 *) + test "\x78\xda\x8b\x88\x04\x00\x01\x0b\x00\xb2"; + (* this is "XY" compressed by zlib at level 0 *) + test "\x78\x01\x01\x02\x00\xfd\xff\x58\x59\x01\x0b\x00\xb2" + +(* Some zlib compressed strings with various compression levels. See + extlib-test/util/zlib-test/gen_ml.sh for more info. This is not + very exhaustive but is still better than nothing. The generation + script should be pretty easy to extend to cover longer strings. *) +let inputs = + [("a", "\x78\x01\x4b\x04\x00\x00\x62\x00\x62"); + ("a", "\x78\x5e\x4b\x04\x00\x00\x62\x00\x62"); + ("a", "\x78\x9c\x4b\x04\x00\x00\x62\x00\x62"); + ("a", "\x78\xda\x4b\x04\x00\x00\x62\x00\x62"); + ("b", "\x78\x01\x4b\x02\x00\x00\x63\x00\x63"); + ("b", "\x78\x5e\x4b\x02\x00\x00\x63\x00\x63"); + ("b", "\x78\x9c\x4b\x02\x00\x00\x63\x00\x63"); + ("b", "\x78\xda\x4b\x02\x00\x00\x63\x00\x63"); + ("c", "\x78\x01\x4b\x06\x00\x00\x64\x00\x64"); + ("c", "\x78\x5e\x4b\x06\x00\x00\x64\x00\x64"); + ("c", "\x78\x9c\x4b\x06\x00\x00\x64\x00\x64"); + ("c", "\x78\xda\x4b\x06\x00\x00\x64\x00\x64"); + ("aa", "\x78\x01\x4b\x4c\x04\x00\x01\x25\x00\xc3"); + ("aa", "\x78\x5e\x4b\x4c\x04\x00\x01\x25\x00\xc3"); + ("aa", "\x78\x9c\x4b\x4c\x04\x00\x01\x25\x00\xc3"); + ("aa", "\x78\xda\x4b\x4c\x04\x00\x01\x25\x00\xc3"); + ("aaa", "\x78\x01\x4b\x4c\x4c\x04\x00\x02\x49\x01\x24"); + ("aaa", "\x78\x5e\x4b\x4c\x4c\x04\x00\x02\x49\x01\x24"); + ("aaa", "\x78\x9c\x4b\x4c\x4c\x04\x00\x02\x49\x01\x24"); + ("aaa", "\x78\xda\x4b\x4c\x4c\x04\x00\x02\x49\x01\x24"); + ("aaaa", "\x78\x01\x4b\x4c\x4c\x4c\x04\x00\x03\xce\x01\x85"); + ("aaaa", "\x78\x5e\x4b\x4c\x4c\x4c\x04\x00\x03\xce\x01\x85"); + ("aaaa", "\x78\x9c\x4b\x4c\x4c\x4c\x04\x00\x03\xce\x01\x85"); + ("aaaa", "\x78\xda\x4b\x4c\x4c\x4c\x04\x00\x03\xce\x01\x85"); + ("foobar", "\x78\x01\x4b\xcb\xcf\x4f\x4a\x2c\x02\x00\x08\xab\x02\x7a"); + ("foobar", "\x78\x5e\x4b\xcb\xcf\x4f\x4a\x2c\x02\x00\x08\xab\x02\x7a"); + ("foobar", "\x78\x9c\x4b\xcb\xcf\x4f\x4a\x2c\x02\x00\x08\xab\x02\x7a"); + ("foobar", "\x78\xda\x4b\xcb\xcf\x4f\x4a\x2c\x02\x00\x08\xab\x02\x7a"); + ("012345678", "\x78\x01\x33\x30\x34\x32\x36\x31\x35\x33\xb7\x00\x00\x08\xf1\x01\xd5"); + ("012345678", "\x78\x5e\x33\x30\x34\x32\x36\x31\x35\x33\xb7\x00\x00\x08\xf1\x01\xd5"); + ("012345678", "\x78\x9c\x33\x30\x34\x32\x36\x31\x35\x33\xb7\x00\x00\x08\xf1\x01\xd5"); + ("012345678", "\x78\xda\x33\x30\x34\x32\x36\x31\x35\x33\xb7\x00\x00\x08\xf1\x01\xd5"); + ("00000000", "\x78\x01\x33\x30\x80\x00\x00\x06\xc8\x01\x81"); + ("00000000", "\x78\x5e\x33\x30\x80\x00\x00\x06\xc8\x01\x81"); + ("00000000", "\x78\x9c\x33\x30\x80\x00\x00\x06\xc8\x01\x81"); + ("00000000", "\x78\xda\x33\x30\x80\x00\x00\x06\xc8\x01\x81"); + ("aaaaaaaaaa", "\x78\x01\x4b\x4c\x84\x01\x00\x14\xe1\x03\xcb"); + ("aaaaaaaaaa", "\x78\x5e\x4b\x4c\x84\x01\x00\x14\xe1\x03\xcb"); + ("aaaaaaaaaa", "\x78\x9c\x4b\x4c\x84\x01\x00\x14\xe1\x03\xcb"); + ("aaaaaaaaaa", "\x78\xda\x4b\x4c\x84\x01\x00\x14\xe1\x03\xcb"); + ("-------------------------aaaaaaaaa------------------", "\x78\x01\xd3\xd5\xc5\x01\x12\x61\x00\x53\x1e\x00\x1c\x89\x0a\xf9"); + ("-------------------------aaaaaaaaa------------------", "\x78\x5e\xd3\xd5\xc5\x01\x12\x61\x00\x53\x1e\x00\x1c\x89\x0a\xf9"); + ("-------------------------aaaaaaaaa------------------", "\x78\x5e\xd3\xd5\xc5\x01\x12\x61\x00\x5d\x42\x57\x17\x00\x1c\x89\x0a\xf9"); + ("-------------------------aaaaaaaaa------------------", "\x78\x5e\xd3\xd5\xc5\x01\x12\x61\x00\x53\x0a\x00\x1c\x89\x0a\xf9"); + ("-------------------------aaaaaaaaa------------------", "\x78\x9c\xd3\xd5\xc5\x01\x12\x61\x00\x53\x0a\x00\x1c\x89\x0a\xf9"); + ("-------------------------aaaaaaaaa------------------", "\x78\xda\xd3\xd5\xc5\x01\x12\x61\x00\x53\x0a\x00\x1c\x89\x0a\xf9")] + +let test_unzip_gen_inputs () = + let test orig data = + let input = IO.input_string data in + let unzipped = Unzip.inflate input in + try + let str = IO.read_all unzipped in + assert (str = orig) + with Unzip.Error Unzip.Invalid_data -> assert false + in + List.iter + (fun (orig,compressed) -> + test orig compressed) + inputs + +let () = + Util.register "Unzip" [ + "bug1", test_unzip_bug1; + "gen_inputs", test_unzip_gen_inputs; + ] diff --git a/test/util.ml b/test/util.ml new file mode 100644 index 0000000..1220f40 --- /dev/null +++ b/test/util.ml @@ -0,0 +1,108 @@ +(* + * ExtLib Testing Suite + * Copyright (C) 2004 Janne Hellsten + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version, + * with the special exception on linking described in file LICENSE. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + *) + +module P = Printf + +let log s = + P.printf "%s\n" s; + flush stdout + +let random_char () = + char_of_int (Random.int 256) + +let random_string () = + let len = Random.int 256 in + let str = String.create len in + if len > 0 then + for i = 0 to (len-1) do + str.[i] <- random_char () + done; + Bytes.unsafe_to_string str + + +let random_string_len len = + let len = len in + let str = String.create len in + if len > 0 then + for i = 0 to (len-1) do + str.[i] <- random_char () + done; + Bytes.unsafe_to_string str + +(* For counting the success ratio *) +let test_run_count = ref 0 +let test_success_count = ref 0 +let g_test_run_count = ref 0 +let g_test_success_count = ref 0 + +let test_module name f = + P.printf "%s\n" name; + flush stdout; + test_run_count := 0; + test_success_count := 0; + f (); + if !test_run_count <> 0 then + P.printf " %i/%i tests succeeded.\n" + !test_success_count !test_run_count + +let run_test ~test_name f = + try + incr g_test_run_count; + incr test_run_count; + P.printf " %s" test_name; + flush stdout; + let () = f () in + incr g_test_success_count; + incr test_success_count; + P.printf " - OK\n" + with + Assert_failure (file,line,column) -> + P.printf " - FAILED\n reason: "; + P.printf " %s:%i:%i\n" file line column; + flush stdout + +let all_tests = Hashtbl.create 10 + +let register modname l = + let existing = try Hashtbl.find all_tests modname with Not_found -> [] in + Hashtbl.replace all_tests modname (l @ existing) + +let register1 modname name f = register modname [name,f] + +let run_all filter = + let allowed name = + match filter with + | None -> true + | Some l -> List.mem (String.lowercase name) l + in + g_test_run_count := 0; + g_test_success_count := 0; + Hashtbl.iter begin fun modname tests -> + let allowed_module = allowed modname in + test_module modname begin fun () -> + List.iter begin fun (test_name,f) -> + if allowed_module || allowed (modname^"."^test_name) then run_test ~test_name f + end tests + end + end all_tests; + if !g_test_run_count <> 0 then + P.printf "\nOverall %i/%i tests succeeded.\n" + !g_test_success_count !g_test_run_count; + !g_test_run_count = !g_test_success_count diff --git a/test/util/zlib-test/gen_ml.sh b/test/util/zlib-test/gen_ml.sh new file mode 100755 index 0000000..31a8002 --- /dev/null +++ b/test/util/zlib-test/gen_ml.sh @@ -0,0 +1,13 @@ +#!/bin/sh + +gcc zlib-test.c -lz -o zlib-test + +compression_levels="1 2 3 4 5 6 7 8 9" +strings="a b c aa aaa aaaa foobar 012345678 00000000 aaaaaaaaaa -------------------------aaaaaaaaa------------------" + +for s in $strings; do +for cl in $compression_levels; do + compressed=`./zlib-test $cl "$s"` + echo "$s, $compressed" +done; +done diff --git a/test/util/zlib-test/zlib-test.c b/test/util/zlib-test/zlib-test.c new file mode 100644 index 0000000..ded75dd --- /dev/null +++ b/test/util/zlib-test/zlib-test.c @@ -0,0 +1,43 @@ +/* compile using gcc -o zlib-test zlib-test.c -lz + + usage: zlib-test + + where compression-level is from 0 to 9 +*/ +#include +#include +#include +#include + +int +main (int argc, char *argv[]) +{ + unsigned char dest[32]; + unsigned long destLen = 31; + int retval, i; + int level; + char *endptr; + + if (argc != 3) { + fprintf (stderr, "Usage: %s: \n", argv[0]); + return 1; + } + + level = strtol (argv[1], &endptr, 10); + if (endptr == argv[1] || level < 0 || level > 9) { + fprintf (stderr, "Invalid compression level\n"); + return 1; + } + + retval = compress2 (dest, &destLen, (unsigned char *)argv[2], strlen (argv[2]), level); + if (retval != Z_OK) { + fprintf (stderr, "Error calling zlib compress2 function: %d\n", retval); + return 1; + } + + for (i = 0; i < destLen; i++) + printf ("\\x%02hhx", dest[i]); + printf ("\n"); + + return 0; +}