diff options
-rw-r--r-- | Cargo.toml | 19 | ||||
-rw-r--r-- | LICENSE.txt | 661 | ||||
-rw-r--r-- | README.md | 26 | ||||
-rw-r--r-- | src/errors.rs | 57 | ||||
-rw-r--r-- | src/lib.rs | 151 | ||||
-rw-r--r-- | src/parser.rs | 182 | ||||
-rw-r--r-- | src/pattern.rs | 59 | ||||
-rw-r--r-- | src/vm.rs | 1248 | ||||
-rw-r--r-- | tests/basic_match.rs | 284 | ||||
-rw-r--r-- | tests/common/mod.rs | 189 | ||||
-rw-r--r-- | tests/parser_prop.rs | 28 |
11 files changed, 922 insertions, 1982 deletions
diff --git a/Cargo.toml b/Cargo.toml index fc15218..a0532df 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,10 +1,10 @@ [package] name = "datafu" -version = "0.0.7" +version = "0.1.0" authors = ["SoniEx2 <endermoneymod@gmail.com>"] -license = "AGPL-3.0-or-later" +license = "MIT OR Apache-2.0" description = "A Rust library for extracting data from config objects and other arbitrary object graphs." -edition = "2015" +edition = "2021" repository = "https://soniex2.autistic.space/git-repos/dfu.git" readme = "README.md" homepage = "https://soniex2.github.io/ganarchy/project/c0b4a8a326a320ac33c5d9d6bac2f7ea7eb703ce/" @@ -13,14 +13,15 @@ homepage = "https://soniex2.github.io/ganarchy/project/c0b4a8a326a320ac33c5d9d6b [dependencies] regex = "1" -impl_trait = "0.1.2" - -[target.'cfg(target_os = "linux")'.dependencies] -totally-safe-transmute = "0.0.3" +impl_trait = "0.1.7" +serde = "1.0.140" +erased-serde = "0.3.21" [dev-dependencies] -proptest = "0.10.1" +proptest = "1.0.0" +serde_json = "1.0.82" +serde = {version = "1.0.140", features = ["derive"]} [features] -default = [] +default = ['stable'] stable = [] diff --git a/LICENSE.txt b/LICENSE.txt deleted file mode 100644 index be3f7b2..0000000 --- a/LICENSE.txt +++ /dev/null @@ -1,661 +0,0 @@ - GNU AFFERO GENERAL PUBLIC LICENSE - Version 3, 19 November 2007 - - Copyright (C) 2007 Free Software Foundation, Inc. <https://fsf.org/> - Everyone is permitted to copy and distribute verbatim copies - of this license document, but changing it is not allowed. - - Preamble - - The GNU Affero General Public License is a free, copyleft license for -software and other kinds of works, specifically designed to ensure -cooperation with the community in the case of network server software. - - The licenses for most software and other practical works are designed -to take away your freedom to share and change the works. By contrast, -our General Public Licenses are intended to guarantee your freedom to -share and change all versions of a program--to make sure it remains free -software for all its users. - - When we speak of free software, we are referring to freedom, not -price. Our General Public Licenses are designed to make sure that you -have the freedom to distribute copies of free software (and charge for -them if you wish), that you receive source code or can get it if you -want it, that you can change the software or use pieces of it in new -free programs, and that you know you can do these things. - - Developers that use our General Public Licenses protect your rights -with two steps: (1) assert copyright on the software, and (2) offer -you this License which gives you legal permission to copy, distribute -and/or modify the software. - - A secondary benefit of defending all users' freedom is that -improvements made in alternate versions of the program, if they -receive widespread use, become available for other developers to -incorporate. Many developers of free software are heartened and -encouraged by the resulting cooperation. However, in the case of -software used on network servers, this result may fail to come about. -The GNU General Public License permits making a modified version and -letting the public access it on a server without ever releasing its -source code to the public. - - The GNU Affero General Public License is designed specifically to -ensure that, in such cases, the modified source code becomes available -to the community. It requires the operator of a network server to -provide the source code of the modified version running there to the -users of that server. Therefore, public use of a modified version, on -a publicly accessible server, gives the public access to the source -code of the modified version. - - An older license, called the Affero General Public License and -published by Affero, was designed to accomplish similar goals. This is -a different license, not a version of the Affero GPL, but Affero has -released a new version of the Affero GPL which permits relicensing under -this license. - - The precise terms and conditions for copying, distribution and -modification follow. - - TERMS AND CONDITIONS - - 0. Definitions. - - "This License" refers to version 3 of the GNU Affero General Public License. - - "Copyright" also means copyright-like laws that apply to other kinds of -works, such as semiconductor masks. - - "The Program" refers to any copyrightable work licensed under this -License. Each licensee is addressed as "you". "Licensees" and -"recipients" may be individuals or organizations. - - To "modify" a work means to copy from or adapt all or part of the work -in a fashion requiring copyright permission, other than the making of an -exact copy. The resulting work is called a "modified version" of the -earlier work or a work "based on" the earlier work. - - A "covered work" means either the unmodified Program or a work based -on the Program. - - To "propagate" a work means to do anything with it that, without -permission, would make you directly or secondarily liable for -infringement under applicable copyright law, except executing it on a -computer or modifying a private copy. Propagation includes copying, -distribution (with or without modification), making available to the -public, and in some countries other activities as well. - - To "convey" a work means any kind of propagation that enables other -parties to make or receive copies. Mere interaction with a user through -a computer network, with no transfer of a copy, is not conveying. - - An interactive user interface displays "Appropriate Legal Notices" -to the extent that it includes a convenient and prominently visible -feature that (1) displays an appropriate copyright notice, and (2) -tells the user that there is no warranty for the work (except to the -extent that warranties are provided), that licensees may convey the -work under this License, and how to view a copy of this License. If -the interface presents a list of user commands or options, such as a -menu, a prominent item in the list meets this criterion. - - 1. Source Code. - - The "source code" for a work means the preferred form of the work -for making modifications to it. "Object code" means any non-source -form of a work. - - A "Standard Interface" means an interface that either is an official -standard defined by a recognized standards body, or, in the case of -interfaces specified for a particular programming language, one that -is widely used among developers working in that language. - - The "System Libraries" of an executable work include anything, other -than the work as a whole, that (a) is included in the normal form of -packaging a Major Component, but which is not part of that Major -Component, and (b) serves only to enable use of the work with that -Major Component, or to implement a Standard Interface for which an -implementation is available to the public in source code form. A -"Major Component", in this context, means a major essential component -(kernel, window system, and so on) of the specific operating system -(if any) on which the executable work runs, or a compiler used to -produce the work, or an object code interpreter used to run it. - - The "Corresponding Source" for a work in object code form means all -the source code needed to generate, install, and (for an executable -work) run the object code and to modify the work, including scripts to -control those activities. However, it does not include the work's -System Libraries, or general-purpose tools or generally available free -programs which are used unmodified in performing those activities but -which are not part of the work. For example, Corresponding Source -includes interface definition files associated with source files for -the work, and the source code for shared libraries and dynamically -linked subprograms that the work is specifically designed to require, -such as by intimate data communication or control flow between those -subprograms and other parts of the work. - - The Corresponding Source need not include anything that users -can regenerate automatically from other parts of the Corresponding -Source. - - The Corresponding Source for a work in source code form is that -same work. - - 2. Basic Permissions. - - All rights granted under this License are granted for the term of -copyright on the Program, and are irrevocable provided the stated -conditions are met. This License explicitly affirms your unlimited -permission to run the unmodified Program. The output from running a -covered work is covered by this License only if the output, given its -content, constitutes a covered work. This License acknowledges your -rights of fair use or other equivalent, as provided by copyright law. - - You may make, run and propagate covered works that you do not -convey, without conditions so long as your license otherwise remains -in force. You may convey covered works to others for the sole purpose -of having them make modifications exclusively for you, or provide you -with facilities for running those works, provided that you comply with -the terms of this License in conveying all material for which you do -not control copyright. Those thus making or running the covered works -for you must do so exclusively on your behalf, under your direction -and control, on terms that prohibit them from making any copies of -your copyrighted material outside their relationship with you. - - Conveying under any other circumstances is permitted solely under -the conditions stated below. Sublicensing is not allowed; section 10 -makes it unnecessary. - - 3. Protecting Users' Legal Rights From Anti-Circumvention Law. - - No covered work shall be deemed part of an effective technological -measure under any applicable law fulfilling obligations under article -11 of the WIPO copyright treaty adopted on 20 December 1996, or -similar laws prohibiting or restricting circumvention of such -measures. - - When you convey a covered work, you waive any legal power to forbid -circumvention of technological measures to the extent such circumvention -is effected by exercising rights under this License with respect to -the covered work, and you disclaim any intention to limit operation or -modification of the work as a means of enforcing, against the work's -users, your or third parties' legal rights to forbid circumvention of -technological measures. - - 4. Conveying Verbatim Copies. - - You may convey verbatim copies of the Program's source code as you -receive it, in any medium, provided that you conspicuously and -appropriately publish on each copy an appropriate copyright notice; -keep intact all notices stating that this License and any -non-permissive terms added in accord with section 7 apply to the code; -keep intact all notices of the absence of any warranty; and give all -recipients a copy of this License along with the Program. - - You may charge any price or no price for each copy that you convey, -and you may offer support or warranty protection for a fee. - - 5. Conveying Modified Source Versions. - - You may convey a work based on the Program, or the modifications to -produce it from the Program, in the form of source code under the -terms of section 4, provided that you also meet all of these conditions: - - a) The work must carry prominent notices stating that you modified - it, and giving a relevant date. - - b) The work must carry prominent notices stating that it is - released under this License and any conditions added under section - 7. This requirement modifies the requirement in section 4 to - "keep intact all notices". - - c) You must license the entire work, as a whole, under this - License to anyone who comes into possession of a copy. This - License will therefore apply, along with any applicable section 7 - additional terms, to the whole of the work, and all its parts, - regardless of how they are packaged. This License gives no - permission to license the work in any other way, but it does not - invalidate such permission if you have separately received it. - - d) If the work has interactive user interfaces, each must display - Appropriate Legal Notices; however, if the Program has interactive - interfaces that do not display Appropriate Legal Notices, your - work need not make them do so. - - A compilation of a covered work with other separate and independent -works, which are not by their nature extensions of the covered work, -and which are not combined with it such as to form a larger program, -in or on a volume of a storage or distribution medium, is called an -"aggregate" if the compilation and its resulting copyright are not -used to limit the access or legal rights of the compilation's users -beyond what the individual works permit. Inclusion of a covered work -in an aggregate does not cause this License to apply to the other -parts of the aggregate. - - 6. Conveying Non-Source Forms. - - You may convey a covered work in object code form under the terms -of sections 4 and 5, provided that you also convey the -machine-readable Corresponding Source under the terms of this License, -in one of these ways: - - a) Convey the object code in, or embodied in, a physical product - (including a physical distribution medium), accompanied by the - Corresponding Source fixed on a durable physical medium - customarily used for software interchange. - - b) Convey the object code in, or embodied in, a physical product - (including a physical distribution medium), accompanied by a - written offer, valid for at least three years and valid for as - long as you offer spare parts or customer support for that product - model, to give anyone who possesses the object code either (1) a - copy of the Corresponding Source for all the software in the - product that is covered by this License, on a durable physical - medium customarily used for software interchange, for a price no - more than your reasonable cost of physically performing this - conveying of source, or (2) access to copy the - Corresponding Source from a network server at no charge. - - c) Convey individual copies of the object code with a copy of the - written offer to provide the Corresponding Source. This - alternative is allowed only occasionally and noncommercially, and - only if you received the object code with such an offer, in accord - with subsection 6b. - - d) Convey the object code by offering access from a designated - place (gratis or for a charge), and offer equivalent access to the - Corresponding Source in the same way through the same place at no - further charge. You need not require recipients to copy the - Corresponding Source along with the object code. If the place to - copy the object code is a network server, the Corresponding Source - may be on a different server (operated by you or a third party) - that supports equivalent copying facilities, provided you maintain - clear directions next to the object code saying where to find the - Corresponding Source. Regardless of what server hosts the - Corresponding Source, you remain obligated to ensure that it is - available for as long as needed to satisfy these requirements. - - e) Convey the object code using peer-to-peer transmission, provided - you inform other peers where the object code and Corresponding - Source of the work are being offered to the general public at no - charge under subsection 6d. - - A separable portion of the object code, whose source code is excluded -from the Corresponding Source as a System Library, need not be -included in conveying the object code work. - - A "User Product" is either (1) a "consumer product", which means any -tangible personal property which is normally used for personal, family, -or household purposes, or (2) anything designed or sold for incorporation -into a dwelling. In determining whether a product is a consumer product, -doubtful cases shall be resolved in favor of coverage. For a particular -product received by a particular user, "normally used" refers to a -typical or common use of that class of product, regardless of the status -of the particular user or of the way in which the particular user -actually uses, or expects or is expected to use, the product. A product -is a consumer product regardless of whether the product has substantial -commercial, industrial or non-consumer uses, unless such uses represent -the only significant mode of use of the product. - - "Installation Information" for a User Product means any methods, -procedures, authorization keys, or other information required to install -and execute modified versions of a covered work in that User Product from -a modified version of its Corresponding Source. The information must -suffice to ensure that the continued functioning of the modified object -code is in no case prevented or interfered with solely because -modification has been made. - - If you convey an object code work under this section in, or with, or -specifically for use in, a User Product, and the conveying occurs as -part of a transaction in which the right of possession and use of the -User Product is transferred to the recipient in perpetuity or for a -fixed term (regardless of how the transaction is characterized), the -Corresponding Source conveyed under this section must be accompanied -by the Installation Information. But this requirement does not apply -if neither you nor any third party retains the ability to install -modified object code on the User Product (for example, the work has -been installed in ROM). - - The requirement to provide Installation Information does not include a -requirement to continue to provide support service, warranty, or updates -for a work that has been modified or installed by the recipient, or for -the User Product in which it has been modified or installed. Access to a -network may be denied when the modification itself materially and -adversely affects the operation of the network or violates the rules and -protocols for communication across the network. - - Corresponding Source conveyed, and Installation Information provided, -in accord with this section must be in a format that is publicly -documented (and with an implementation available to the public in -source code form), and must require no special password or key for -unpacking, reading or copying. - - 7. Additional Terms. - - "Additional permissions" are terms that supplement the terms of this -License by making exceptions from one or more of its conditions. -Additional permissions that are applicable to the entire Program shall -be treated as though they were included in this License, to the extent -that they are valid under applicable law. If additional permissions -apply only to part of the Program, that part may be used separately -under those permissions, but the entire Program remains governed by -this License without regard to the additional permissions. - - When you convey a copy of a covered work, you may at your option -remove any additional permissions from that copy, or from any part of -it. (Additional permissions may be written to require their own -removal in certain cases when you modify the work.) You may place -additional permissions on material, added by you to a covered work, -for which you have or can give appropriate copyright permission. - - Notwithstanding any other provision of this License, for material you -add to a covered work, you may (if authorized by the copyright holders of -that material) supplement the terms of this License with terms: - - a) Disclaiming warranty or limiting liability differently from the - terms of sections 15 and 16 of this License; or - - b) Requiring preservation of specified reasonable legal notices or - author attributions in that material or in the Appropriate Legal - Notices displayed by works containing it; or - - c) Prohibiting misrepresentation of the origin of that material, or - requiring that modified versions of such material be marked in - reasonable ways as different from the original version; or - - d) Limiting the use for publicity purposes of names of licensors or - authors of the material; or - - e) Declining to grant rights under trademark law for use of some - trade names, trademarks, or service marks; or - - f) Requiring indemnification of licensors and authors of that - material by anyone who conveys the material (or modified versions of - it) with contractual assumptions of liability to the recipient, for - any liability that these contractual assumptions directly impose on - those licensors and authors. - - All other non-permissive additional terms are considered "further -restrictions" within the meaning of section 10. If the Program as you -received it, or any part of it, contains a notice stating that it is -governed by this License along with a term that is a further -restriction, you may remove that term. If a license document contains -a further restriction but permits relicensing or conveying under this -License, you may add to a covered work material governed by the terms -of that license document, provided that the further restriction does -not survive such relicensing or conveying. - - If you add terms to a covered work in accord with this section, you -must place, in the relevant source files, a statement of the -additional terms that apply to those files, or a notice indicating -where to find the applicable terms. - - Additional terms, permissive or non-permissive, may be stated in the -form of a separately written license, or stated as exceptions; -the above requirements apply either way. - - 8. Termination. - - You may not propagate or modify a covered work except as expressly -provided under this License. Any attempt otherwise to propagate or -modify it is void, and will automatically terminate your rights under -this License (including any patent licenses granted under the third -paragraph of section 11). - - However, if you cease all violation of this License, then your -license from a particular copyright holder is reinstated (a) -provisionally, unless and until the copyright holder explicitly and -finally terminates your license, and (b) permanently, if the copyright -holder fails to notify you of the violation by some reasonable means -prior to 60 days after the cessation. - - Moreover, your license from a particular copyright holder is -reinstated permanently if the copyright holder notifies you of the -violation by some reasonable means, this is the first time you have -received notice of violation of this License (for any work) from that -copyright holder, and you cure the violation prior to 30 days after -your receipt of the notice. - - Termination of your rights under this section does not terminate the -licenses of parties who have received copies or rights from you under -this License. If your rights have been terminated and not permanently -reinstated, you do not qualify to receive new licenses for the same -material under section 10. - - 9. Acceptance Not Required for Having Copies. - - You are not required to accept this License in order to receive or -run a copy of the Program. Ancillary propagation of a covered work -occurring solely as a consequence of using peer-to-peer transmission -to receive a copy likewise does not require acceptance. However, -nothing other than this License grants you permission to propagate or -modify any covered work. These actions infringe copyright if you do -not accept this License. Therefore, by modifying or propagating a -covered work, you indicate your acceptance of this License to do so. - - 10. Automatic Licensing of Downstream Recipients. - - Each time you convey a covered work, the recipient automatically -receives a license from the original licensors, to run, modify and -propagate that work, subject to this License. You are not responsible -for enforcing compliance by third parties with this License. - - An "entity transaction" is a transaction transferring control of an -organization, or substantially all assets of one, or subdividing an -organization, or merging organizations. If propagation of a covered -work results from an entity transaction, each party to that -transaction who receives a copy of the work also receives whatever -licenses to the work the party's predecessor in interest had or could -give under the previous paragraph, plus a right to possession of the -Corresponding Source of the work from the predecessor in interest, if -the predecessor has it or can get it with reasonable efforts. - - You may not impose any further restrictions on the exercise of the -rights granted or affirmed under this License. For example, you may -not impose a license fee, royalty, or other charge for exercise of -rights granted under this License, and you may not initiate litigation -(including a cross-claim or counterclaim in a lawsuit) alleging that -any patent claim is infringed by making, using, selling, offering for -sale, or importing the Program or any portion of it. - - 11. Patents. - - A "contributor" is a copyright holder who authorizes use under this -License of the Program or a work on which the Program is based. The -work thus licensed is called the contributor's "contributor version". - - A contributor's "essential patent claims" are all patent claims -owned or controlled by the contributor, whether already acquired or -hereafter acquired, that would be infringed by some manner, permitted -by this License, of making, using, or selling its contributor version, -but do not include claims that would be infringed only as a -consequence of further modification of the contributor version. For -purposes of this definition, "control" includes the right to grant -patent sublicenses in a manner consistent with the requirements of -this License. - - Each contributor grants you a non-exclusive, worldwide, royalty-free -patent license under the contributor's essential patent claims, to -make, use, sell, offer for sale, import and otherwise run, modify and -propagate the contents of its contributor version. - - In the following three paragraphs, a "patent license" is any express -agreement or commitment, however denominated, not to enforce a patent -(such as an express permission to practice a patent or covenant not to -sue for patent infringement). To "grant" such a patent license to a -party means to make such an agreement or commitment not to enforce a -patent against the party. - - If you convey a covered work, knowingly relying on a patent license, -and the Corresponding Source of the work is not available for anyone -to copy, free of charge and under the terms of this License, through a -publicly available network server or other readily accessible means, -then you must either (1) cause the Corresponding Source to be so -available, or (2) arrange to deprive yourself of the benefit of the -patent license for this particular work, or (3) arrange, in a manner -consistent with the requirements of this License, to extend the patent -license to downstream recipients. "Knowingly relying" means you have -actual knowledge that, but for the patent license, your conveying the -covered work in a country, or your recipient's use of the covered work -in a country, would infringe one or more identifiable patents in that -country that you have reason to believe are valid. - - If, pursuant to or in connection with a single transaction or -arrangement, you convey, or propagate by procuring conveyance of, a -covered work, and grant a patent license to some of the parties -receiving the covered work authorizing them to use, propagate, modify -or convey a specific copy of the covered work, then the patent license -you grant is automatically extended to all recipients of the covered -work and works based on it. - - A patent license is "discriminatory" if it does not include within -the scope of its coverage, prohibits the exercise of, or is -conditioned on the non-exercise of one or more of the rights that are -specifically granted under this License. You may not convey a covered -work if you are a party to an arrangement with a third party that is -in the business of distributing software, under which you make payment -to the third party based on the extent of your activity of conveying -the work, and under which the third party grants, to any of the -parties who would receive the covered work from you, a discriminatory -patent license (a) in connection with copies of the covered work -conveyed by you (or copies made from those copies), or (b) primarily -for and in connection with specific products or compilations that -contain the covered work, unless you entered into that arrangement, -or that patent license was granted, prior to 28 March 2007. - - Nothing in this License shall be construed as excluding or limiting -any implied license or other defenses to infringement that may -otherwise be available to you under applicable patent law. - - 12. No Surrender of Others' Freedom. - - If conditions are imposed on you (whether by court order, agreement or -otherwise) that contradict the conditions of this License, they do not -excuse you from the conditions of this License. If you cannot convey a -covered work so as to satisfy simultaneously your obligations under this -License and any other pertinent obligations, then as a consequence you may -not convey it at all. For example, if you agree to terms that obligate you -to collect a royalty for further conveying from those to whom you convey -the Program, the only way you could satisfy both those terms and this -License would be to refrain entirely from conveying the Program. - - 13. Remote Network Interaction; Use with the GNU General Public License. - - Notwithstanding any other provision of this License, if you modify the -Program, your modified version must prominently offer all users -interacting with it remotely through a computer network (if your version -supports such interaction) an opportunity to receive the Corresponding -Source of your version by providing access to the Corresponding Source -from a network server at no charge, through some standard or customary -means of facilitating copying of software. This Corresponding Source -shall include the Corresponding Source for any work covered by version 3 -of the GNU General Public License that is incorporated pursuant to the -following paragraph. - - Notwithstanding any other provision of this License, you have -permission to link or combine any covered work with a work licensed -under version 3 of the GNU General Public License into a single -combined work, and to convey the resulting work. The terms of this -License will continue to apply to the part which is the covered work, -but the work with which it is combined will remain governed by version -3 of the GNU General Public License. - - 14. Revised Versions of this License. - - The Free Software Foundation may publish revised and/or new versions of -the GNU Affero General Public License from time to time. Such new versions -will be similar in spirit to the present version, but may differ in detail to -address new problems or concerns. - - Each version is given a distinguishing version number. If the -Program specifies that a certain numbered version of the GNU Affero General -Public License "or any later version" applies to it, you have the -option of following the terms and conditions either of that numbered -version or of any later version published by the Free Software -Foundation. If the Program does not specify a version number of the -GNU Affero General Public License, you may choose any version ever published -by the Free Software Foundation. - - If the Program specifies that a proxy can decide which future -versions of the GNU Affero General Public License can be used, that proxy's -public statement of acceptance of a version permanently authorizes you -to choose that version for the Program. - - Later license versions may give you additional or different -permissions. However, no additional obligations are imposed on any -author or copyright holder as a result of your choosing to follow a -later version. - - 15. Disclaimer of Warranty. - - THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY -APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT -HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY -OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, -THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM -IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF -ALL NECESSARY SERVICING, REPAIR OR CORRECTION. - - 16. Limitation of Liability. - - IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING -WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS -THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY -GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE -USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF -DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD -PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS), -EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF -SUCH DAMAGES. - - 17. Interpretation of Sections 15 and 16. - - If the disclaimer of warranty and limitation of liability provided -above cannot be given local legal effect according to their terms, -reviewing courts shall apply local law that most closely approximates -an absolute waiver of all civil liability in connection with the -Program, unless a warranty or assumption of liability accompanies a -copy of the Program in return for a fee. - - END OF TERMS AND CONDITIONS - - How to Apply These Terms to Your New Programs - - If you develop a new program, and you want it to be of the greatest -possible use to the public, the best way to achieve this is to make it -free software which everyone can redistribute and change under these terms. - - To do so, attach the following notices to the program. It is safest -to attach them to the start of each source file to most effectively -state the exclusion of warranty; and each file should have at least -the "copyright" line and a pointer to where the full notice is found. - - <one line to give the program's name and a brief idea of what it does.> - Copyright (C) <year> <name of author> - - This program is free software: you can redistribute it and/or modify - it under the terms of the GNU Affero General Public License as published by - the Free Software Foundation, either version 3 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU Affero General Public License for more details. - - You should have received a copy of the GNU Affero General Public License - along with this program. If not, see <https://www.gnu.org/licenses/>. - -Also add information on how to contact you by electronic and paper mail. - - If your software can interact with users remotely through a computer -network, you should also make sure that it provides a way for users to -get its source. For example, if your program is a web application, its -interface could display a "Source" link that leads users to an archive -of the code. There are many ways you could offer source, and different -solutions will be better for different programs; see section 13 for the -specific requirements. - - You should also get your employer (if you work as a programmer) or school, -if any, to sign a "copyright disclaimer" for the program, if necessary. -For more information on this, and how to apply and follow the GNU AGPL, see -<https://www.gnu.org/licenses/>. diff --git a/README.md b/README.md index 9567cbc..764f4e7 100644 --- a/README.md +++ b/README.md @@ -30,23 +30,9 @@ your tool of choice. https://en.wikipedia.org/wiki/Halting_problem "Halting problem (Wikipedia)" -License -------- - -```text -Datafu - Rust library for extracting data from object graphs. -Copyright (C) 2021 Soni L. - -This program is free software: you can redistribute it and/or modify -it under the terms of the GNU Affero General Public License as published by -the Free Software Foundation, either version 3 of the License, or -(at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU Affero General Public License for more details. - -You should have received a copy of the GNU Affero General Public License -along with this program. If not, see <https://www.gnu.org/licenses/>. -``` +Comparison with Serde +--------------------- + +As of Datafu 0.1.0, Datafu is powered by Serde. This enables relying on Serde +for deserialization and Datafu for efficiency. Note, however, that Datafu is +stateful. diff --git a/src/errors.rs b/src/errors.rs index f29d635..b41b225 100644 --- a/src/errors.rs +++ b/src/errors.rs @@ -1,25 +1,10 @@ -/* - * This file is part of Datafu - * Copyright (C) 2021 Soni L. - * - * This program is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Affero General Public License for more details. - * - * You should have received a copy of the GNU Affero General Public License - * along with this program. If not, see <https://www.gnu.org/licenses/>. - */ +// Copyright (C) 2021-2022 Soni L. +// SPDX-License-Identifier: MIT OR Apache-2.0 //! <!-- TODO figure out module-level docs for this --> -#[cfg(doc)] -use crate::PatternTypes; +// #[cfg(doc)] +// use crate::PatternTypes; /// These are errors that may be returned by the pattern compiler when /// compiling a pattern. @@ -51,20 +36,20 @@ pub enum PatternError<'a> { Regex(usize, &'a str, ::regex::Error), } -/// These are errors that may be returned by the matcher when matching a -/// pattern. -#[derive(Clone, Debug)] -pub enum MatchError { - /// Returned if the pattern nests too deeply. - StackOverflow, - /// Returned if the pattern rejects the input. - ValidationError, - /// Returned if the pattern attempts an unsupported operation. - /// - /// In particular, if the [`PatternTypes`] doesn't support `get` or `pairs` - /// for a given value, this error will be returned. It can be treated as a - /// ValidationError, or as a bug in the pattern, at the user's discretion. - UnsupportedOperation, - /// Returned if an unspecified non-critical error occurred. - Other -} +// /// These are errors that may be returned by the matcher when matching a +// /// pattern. +// #[derive(Clone, Debug)] +// pub enum MatchError { +// /// Returned if the pattern nests too deeply. +// StackOverflow, +// /// Returned if the pattern rejects the input. +// ValidationError, +// /// Returned if the pattern attempts an unsupported operation. +// /// +// /// In particular, if the [`PatternTypes`] doesn't support `get` or `pairs` +// /// for a given value, this error will be returned. It can be treated as a +// /// ValidationError, or as a bug in the pattern, at the user's discretion. +// UnsupportedOperation, +// /// Returned if an unspecified non-critical error occurred. +// Other +// } diff --git a/src/lib.rs b/src/lib.rs index 3fc542f..8fa727f 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,26 +1,9 @@ -/* - * Datafu - Rust library for extracting data from object graphs. - * Copyright (C) 2021 Soni L. - * - * This program is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Affero General Public License for more details. - * - * You should have received a copy of the GNU Affero General Public License - * along with this program. If not, see <https://www.gnu.org/licenses/>. - */ -#![warn(rust_2018_idioms)] -#![cfg_attr(not(feature = "stable"), feature(label_break_value))] +// Copyright (C) 2021-2022 Soni L. +// SPDX-License-Identifier: MIT OR Apache-2.0 //! Datafu is a regex-inspired query language. It was primarily //! designed for processing object trees parsed from configuration files, but -//! can also be used with JSON APIs, and even XML. +//! can be used with anything that supports serde. //! //! # Languge Reference //! @@ -34,8 +17,7 @@ //! being used by default. //! //! A variable is a sequence of alphanumeric characters, not starting with -//! a digit. A `(key, value)` tuple containing the respective matched -//! element will be identified by this name in the results map. +//! a digit. The value of the matched element will be identified by this name. //! //! A literal is a sequence of characters delimited by `'`, optionally //! followed by `?`, with `%` as the escape character, and defines a @@ -65,11 +47,12 @@ //! //! A key match is a datafu expression (including, but not limited to, the //! empty datafu expression) enclosed within `[` and `]`, optionally -//! prefixed with one or more predicates, and applies the enclosed -//! predicates and datafu expression to the key (or index) being processed. -//! A key match enables additional validation of keys and/or extraction of -//! values from keys, and accepts a key if and only if the enclosed -//! predicates accept the key and the enclosed expression matches the key. +//! prefixed with an identifier and zero or more predicates, and applies the +//! enclosed predicates and datafu expression to the key (or index) being +//! processed. A key match enables additional validation of keys and/or +//! extraction of values from keys, and accepts a key if and only if the +//! enclosed predicates accept the key and the enclosed expression matches the +//! key. The matched key is stored in the identifier. //! //! A subvalue is a datafu expression (including, but not limited to, the //! empty datafu expression) enclosed within `(` and `)`, and applies @@ -104,7 +87,7 @@ //! arg ::= parameter | literal | regex | keymatch //! //! arrow ::= '->' -//! keymatch ::= '[' {predicate} expression ']' +//! keymatch ::= '[' {tag} {predicate} expression ']' //! subvalue ::= '(' {predicate} expression ')' ['?'] //! ``` //! @@ -115,12 +98,6 @@ //! //! <!-- TODO --> -extern crate impl_trait; -extern crate regex; - -#[cfg(test)] -extern crate proptest; - pub mod errors; mod parser; mod pattern; @@ -128,103 +105,17 @@ mod vm; pub use pattern::Pattern; -pub use vm::Matcher; +/// A predicate. +pub type Predicate = dyn (for<'x, 'de, 'a> Fn( + &'x (dyn 'a + erased_serde::Deserializer<'de>) +) -> bool) + Send + Sync; -// TODO replace with GATs -/// A borrowed or owned value of various types. -/// -/// This exists purely as a workaround for Rust not having GATs yet. -#[derive(Debug)] -pub enum RefOwn<'b, T: ?Sized, U> { - /// Borrowed T. - Ref(&'b T), - /// Borrowed string. - Str(&'b str), - /// Owned U. - Own(U), -} - -impl<'b, T, U> PartialEq for RefOwn<'b, T, U> +/// Helper to build predicates because HRTB inference is the worst. +pub fn pred<F>(f: F) -> Box<Predicate> where - T: ?Sized + PartialEq<T> + PartialEq<U> + PartialEq<str>, - U: PartialEq<T> + PartialEq<U> + PartialEq<str>, - str: PartialEq<T> + PartialEq<U> + PartialEq<str> + F: (for<'x, 'de, 'a> Fn( + &'x (dyn 'a + erased_serde::Deserializer<'de>) + ) -> bool) + Send + Sync + 'static, { - fn eq(&self, other: &Self) -> bool { - match (self, other) { - (RefOwn::Ref(l), RefOwn::Ref(r)) => l.eq(r), - (RefOwn::Own(l), RefOwn::Own(r)) => l.eq(r), - (RefOwn::Str(l), RefOwn::Str(r)) => l.eq(r), - (RefOwn::Ref(l), RefOwn::Own(r)) => PartialEq::eq(*l, r), - (RefOwn::Own(l), RefOwn::Str(r)) => PartialEq::eq(l, *r), - (RefOwn::Str(l), RefOwn::Ref(r)) => l.eq(r), - (RefOwn::Ref(l), RefOwn::Str(r)) => l.eq(r), - (RefOwn::Own(l), RefOwn::Ref(r)) => PartialEq::eq(l, *r), - (RefOwn::Str(l), RefOwn::Own(r)) => PartialEq::eq(*l, r), - } - } -} - -impl<'b, T: ?Sized, U: Copy> Copy for RefOwn<'b, T, U> { -} - -impl<'b, T: ?Sized, U: Clone> Clone for RefOwn<'b, T, U> { - fn clone(&self) -> Self { - match self { - RefOwn::Ref(r) => RefOwn::Ref(r), - RefOwn::Str(r) => RefOwn::Str(r), - RefOwn::Own(v) => RefOwn::Own(v.clone()), - } - } + Box::new(f) } - -/// A tuple representing a key-value pair. -pub type KVPair<'b, T> = (RefOwn<'b, <T as PatternTypes>::Ref, <T as PatternTypes>::Own>, RefOwn<'b, <T as PatternTypes>::Ref, <T as PatternTypes>::Own>); - -impl<'b, T, U> From<&'b T> for RefOwn<'b, T, U> { - fn from(x: &'b T) -> RefOwn<'b, T, U> { - RefOwn::Ref(x) - } -} - -// TODO investigate if this should be PatternTypes: Default -/// Defines the types and operations used for matching. -pub trait PatternTypes { - /// The borrowed type. - type Ref: ?Sized; - - // TODO replace with GATs. - // TODO potentially relax with Clone? - /// The owned type. - type Own: Copy + 'static; - - /// Returns an iterator over key-value pairs contained within an item, or - /// None if this operation is unsupported for the given value. - fn pairs<'b>( - item: RefOwn<'b, Self::Ref, Self::Own> - ) -> Option<Box<dyn Iterator<Item=KVPair<'b, Self>> + 'b>>; - - /// Returns an optional key-value pair keyed by the given key, or None if - /// this operation is unsupported for the given value. - fn get<'a, 'b>( - item: RefOwn<'b, Self::Ref, Self::Own>, - key: RefOwn<'a, Self::Ref, Self::Own> - ) -> Option<Option<KVPair<'b, Self>>>; - - // TODO replace with GATs + newtypes - /// Returns whether two keys/values are the same/equivalent. This must provide - /// the same guarantees as PartialEq. In fact, this is a replacement for - /// PartialEq for cases where it's not possible to just use PartialEq. - fn matches( - left: RefOwn<'_, Self::Ref, Self::Own>, - right: RefOwn<'_, Self::Ref, Self::Own> - ) -> bool; - - /// Returns the value as an &str. - fn as_str<'b>( - value: RefOwn<'b, Self::Ref, Self::Own> - ) -> Option<&'b str>; -} - -/// A predicate for keys and values. -pub type Predicate<T> = dyn (Fn(RefOwn<'_, <T as PatternTypes>::Ref, <T as PatternTypes>::Own>) -> bool) + Send + Sync; diff --git a/src/parser.rs b/src/parser.rs index ff3407a..c929653 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -1,20 +1,7 @@ -/* - * This file is part of Datafu - * Copyright (C) 2021 Soni L. - * - * This program is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Affero General Public License for more details. - * - * You should have received a copy of the GNU Affero General Public License - * along with this program. If not, see <https://www.gnu.org/licenses/>. - */ +// Copyright (C) 2021-2022 Soni L. +// SPDX-License-Identifier: MIT OR Apache-2.0 + +//! The recursive-descent datafu language parser. use std::borrow::Borrow; use std::collections::BTreeMap; @@ -22,14 +9,13 @@ use std::mem::ManuallyDrop; use impl_trait::impl_trait; use regex::Regex; +use serde::Serialize; -use crate::PatternTypes; use crate::Predicate; use crate::errors::PatternError; use crate::vm::PatternConstants; use crate::vm::PatternElement; - /// try! with bools. (the b comes from bool.) macro_rules! bry { ($l:lifetime $e:expr) => { @@ -47,6 +33,7 @@ macro_rules! bry { // the following macros rely on unlabeled-break-through-labeled-block being an // error. // NOTE: always test changes to this module on nightly! +// still waiting for label-break-value stabilization... #[cfg(not(feature = "stable"))] /// labeled block. on nightly: better compile errors. but also works on stable. @@ -67,23 +54,45 @@ macro_rules! lblock { } } +/// Attempts to shift `s` forward by removing `prefix`. +/// +/// Returns whether `s` has had `prefix` removed. // can't use Pattern here :( fn strip_prefix(s: &mut &str, prefix: &str) -> bool { s.strip_prefix(prefix).map(|ns| *s = ns).is_some() } +/// Returns the position (index) of `sub` within `base`, in bytes. +/// +/// Returns bogus results if `base` and `sub` are unrelated. fn pos_of<'a>(base: &'a str, sub: &'a str) -> Option<usize> { - // FIXME + // FIXME is there any non-UB way to check if `sub` is in `base`? Some((sub.as_ptr() as usize) - (base.as_ptr() as usize)) } -struct SubtreeHelper<'r, 's, P: Borrow<str> + Ord, O: Borrow<str> + Ord, T: PatternTypes> where Self: 'r { - root: &'r mut Parser<'s, P, O, T>, +/// Helper to collect "subtree" sections of the pattern. +/// +/// This is a RAII-like guard which handles cleaning up the parsed pattern when +/// dropped. +struct SubtreeHelper<'r, 's, PKey, OKey, O> +where + Self: 'r, + PKey: Borrow<str> + Ord, + OKey: Borrow<str> + Ord, + O: Serialize, +{ + root: &'r mut Parser<'s, PKey, OKey, O>, } impl_trait! { - impl<'r, 's, P: Borrow<str> + Ord, O: Borrow<str> + Ord, T: PatternTypes> SubtreeHelper<'r, 's, P, O, T> where Self: 'r { - fn start(value: &'r mut Parser<'s, P, O, T>) -> Self { + impl<'r, 's, PKey, OKey, O> SubtreeHelper<'r, 's, PKey, OKey, O> + where + Self: 'r, + PKey: Borrow<str> + Ord, + OKey: Borrow<str> + Ord, + O: Serialize, + { + fn start(value: &'r mut Parser<'s, PKey, OKey, O>) -> Self { value.consts.protos.push(Default::default()); Self { root: value, @@ -99,7 +108,7 @@ impl_trait! { } impl trait std::ops::Deref { - type Target = Parser<'s, P, O, T>; + type Target = Parser<'s, PKey, OKey, O>; fn deref(&self) -> &Self::Target { &*self.root @@ -121,14 +130,30 @@ impl_trait! { } } -struct TagHelper<'r, 's, P: Borrow<str> + Ord, O: Borrow<str> + Ord, T: PatternTypes> where Self: 'r { - root: &'r mut Parser<'s, P, O, T>, +/// Helper to collect "tag" sections of the pattern. +/// +/// This is a RAII-like guard which handles cleaning up the parsed pattern when +/// dropped. +struct TagHelper<'r, 's, PKey, OKey, O> +where + Self: 'r, + PKey: Borrow<str> + Ord, + OKey: Borrow<str> + Ord, + O: Serialize, +{ + root: &'r mut Parser<'s, PKey, OKey, O>, len: usize, } impl_trait! { - impl<'r, 's, P: Borrow<str> + Ord, O: Borrow<str> + Ord, T: PatternTypes> TagHelper<'r, 's, P, O, T> where Self: 'r { - fn start(value: &'r mut Parser<'s, P, O, T>) -> Self { + impl<'r, 's, PKey, OKey, O> TagHelper<'r, 's, PKey, OKey, O> + where + Self: 'r, + PKey: Borrow<str> + Ord, + OKey: Borrow<str> + Ord, + O: Serialize, + { + fn start(value: &'r mut Parser<'s, PKey, OKey, O>) -> Self { let len = value.consts.protos.last().unwrap().len(); Self { root: value, @@ -141,7 +166,7 @@ impl_trait! { } impl trait std::ops::Deref { - type Target = Parser<'s, P, O, T>; + type Target = Parser<'s, PKey, OKey, O>; fn deref(&self) -> &Self::Target { &*self.root @@ -166,20 +191,30 @@ impl_trait! { } } -struct Parser<'s, P: Borrow<str> + Ord, O: Borrow<str> + Ord, T: PatternTypes> { +struct Parser<'s, PKey, OKey, O> +where + PKey: Borrow<str> + Ord, + OKey: Borrow<str> + Ord, + O: Serialize, +{ base: &'s str, - preds: Option<BTreeMap<P, Box<Predicate<T>>>>, - objs: Option<BTreeMap<O, T::Own>>, - pred_ids: BTreeMap<P, usize>, - obj_ids: BTreeMap<O, usize>, - consts: PatternConstants<T>, + preds: Option<BTreeMap<PKey, Box<Predicate>>>, + objs: Option<BTreeMap<OKey, O>>, + pred_ids: BTreeMap<PKey, usize>, + obj_ids: BTreeMap<OKey, usize>, + consts: PatternConstants<O>, closed_subtrees: std::ops::RangeFrom<usize>, } // These are documented using LPeg.re syntax // http://www.inf.puc-rio.br/~roberto/lpeg/re.html #[rustfmt::skip] -impl<'s, P: Borrow<str> + Ord, O: Borrow<str> + Ord, T: PatternTypes> Parser<'s, P, O, T> { +impl<'s, PKey, OKey, O> Parser<'s, PKey, OKey, O> +where + PKey: Borrow<str> + Ord, + OKey: Borrow<str> + Ord, + O: Serialize, +{ /// str_literal <- sp ( ( "'" str_char* ( "'" / ( !. -> ErrorStrEnd ) ) ( '?' -> MarkSkippable ) ) -> String ) sp /// str_char <- ( str_escape / [^%'] ) /// str_escape <- '%' ( '%' / "'" ) / ( ( '%' .? ) -> ErrorStrEscape ) @@ -451,7 +486,7 @@ impl<'s, P: Borrow<str> + Ord, O: Borrow<str> + Ord, T: PatternTypes> Parser<'s, })) } - /// key_subtree <- sp '[' sp predicate* sp subtree sp ( ']' / unexpected_token / unexpected_end ) sp ( '?'? -> MarkSkippable ) + /// key_subtree <- sp '[' sp name? sp predicate* sp subtree sp ( ']' / unexpected_token / unexpected_end ) sp ( '?'? -> MarkSkippable ) fn key_subtree(&mut self, s: &mut &'s str) -> Result<bool, PatternError<'s>> { let mut cursor = *s; Ok(lblock!('matches: { @@ -459,6 +494,8 @@ impl<'s, P: Borrow<str> + Ord, O: Borrow<str> + Ord, T: PatternTypes> Parser<'s, bry!('matches strip_prefix(&mut cursor, "[")); self.sp(&mut cursor); let mut subtree = SubtreeHelper::start(&mut *self); + subtree.name(&mut cursor)?; + subtree.sp(&mut cursor); while subtree.predicate(&mut cursor)? { } subtree.sp(&mut cursor); @@ -566,17 +603,20 @@ impl<'s, P: Borrow<str> + Ord, O: Borrow<str> + Ord, T: PatternTypes> Parser<'s, } } -pub(crate) fn parse<'s, P, O, T>( +/// Parses a DFU expression. +/// +/// The given +pub(crate) fn parse<'s, PKey, OKey, O>( input: &'s str, - preds: Option<BTreeMap<P, Box<Predicate<T>>>>, - objs: Option<BTreeMap<O, T::Own>> -) -> Result<PatternConstants<T>, PatternError<'s>> - where - P: Borrow<str> + Ord, - O: Borrow<str> + Ord, - T: PatternTypes, + preds: Option<BTreeMap<PKey, Box<Predicate>>>, + objs: Option<BTreeMap<OKey, O>> +) -> Result<PatternConstants<O>, PatternError<'s>> +where + PKey: Borrow<str> + Ord, + OKey: Borrow<str> + Ord, + O: Serialize, { - let mut parser = Parser::<'s, P, O, T> { + let mut parser = Parser::<'s, PKey, OKey, O> { base: input, preds: preds, objs: objs, @@ -598,57 +638,15 @@ pub(crate) fn parse<'s, P, O, T>( #[cfg(test)] mod tests { - use crate::PatternTypes; - use crate::RefOwn; - use crate::KVPair; use crate::errors::PatternError; use super::Parser; use proptest::prelude::*; - struct Dummy; - impl PatternTypes for Dummy { - type Ref = (); - type Own = (); - fn pairs<'b>( - item: RefOwn<'b, Self::Ref, Self::Own> - ) -> Option<Box<dyn Iterator<Item=KVPair<'b, Self>> + 'b>> { - let _ = item; - None - } - - fn get<'a, 'b>( - item: RefOwn<'b, Self::Ref, Self::Own>, - key: RefOwn<'a, Self::Ref, Self::Own> - ) -> Option<Option<KVPair<'b, Self>>> { - let _ = item; - let _ = key; - None - } - - fn matches( - left: RefOwn<'_, Self::Ref, Self::Own>, - right: RefOwn<'_, Self::Ref, Self::Own> - ) -> bool { - let _ = left; - let _ = right; - false - } - - fn as_str<'b>( - item: RefOwn<'b, Self::Ref, Self::Own> - ) -> Option<&'b str> { - match item { - RefOwn::Str(key) => Some(key), - _ => None, - } - } - } - #[test] fn test_identifier() { fn identifier_input<'s>(s: &mut &'s str) -> Result<bool, PatternError<'s>> { - let mut parser = Parser::<'s, &'static str, &'static str, Dummy> { + let mut parser = Parser::<'s, &'static str, &'static str, ()> { base: *s, preds: None, objs: None, @@ -674,8 +672,8 @@ mod tests { proptest! { #[test] fn test_no_crash(s in ".{0,4096}") { - fn prep_parser<'s>(s: &'s str) -> Parser<'s, &'static str, &'static str, Dummy> { - let mut parser = Parser::<'s, &'static str, &'static str, Dummy> { + fn prep_parser<'s>(s: &'s str) -> Parser<'s, &'static str, &'static str, ()> { + let mut parser = Parser::<'s, &'static str, &'static str, ()> { base: s, preds: None, objs: None, diff --git a/src/pattern.rs b/src/pattern.rs index 3349db8..3a8c91f 100644 --- a/src/pattern.rs +++ b/src/pattern.rs @@ -1,57 +1,46 @@ -/* - * This file is part of Datafu - * Copyright (C) 2021 Soni L. - * - * This program is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Affero General Public License for more details. - * - * You should have received a copy of the GNU Affero General Public License - * along with this program. If not, see <https://www.gnu.org/licenses/>. - */ +// Copyright (C) 2021-2022 Soni L. +// SPDX-License-Identifier: MIT OR Apache-2.0 use std::borrow::Borrow; use std::collections::BTreeMap; -use crate::PatternTypes; -use crate::RefOwn; +use serde::Deserialize; +use serde::Deserializer; +use serde::Serialize; + use crate::Predicate; use crate::errors::PatternError; use crate::parser::parse; -use crate::vm::Matcher; +//use crate::vm::Matcher; use crate::vm::PatternConstants; -use crate::vm::MAX_CALLS; +//use crate::vm::MAX_CALLS; -pub struct Pattern<T: PatternTypes> { - consts: PatternConstants<T>, +pub struct Pattern<O: Serialize> { + consts: PatternConstants<O>, } -impl<T: PatternTypes> Pattern<T> { +impl<O: Serialize> Pattern<O> { /// Compiles the input into a pattern. - pub fn compile<'s, P, O>( + pub fn compile<'s, PKey, OKey>( input: &'s str, - preds: Option<BTreeMap<P, Box<Predicate<T>>>>, - objs: Option<BTreeMap<O, T::Own>> + preds: Option<BTreeMap<PKey, Box<Predicate>>>, + objs: Option<BTreeMap<OKey, O>> ) -> Result<Self, PatternError<'s>> - where - P: Borrow<str> + Ord, - O: Borrow<str> + Ord, + where + PKey: Borrow<str> + Ord, + OKey: Borrow<str> + Ord, { Ok(Self { consts: parse(input, preds, objs)? }) } - pub fn attempt_match<'a, 'b>( - &'a self, - value: impl Into<RefOwn<'b, T::Ref, T::Own>> - ) -> Matcher<'a, 'b, T> { - Matcher::new(value.into(), &self.consts, self.consts.protos.len() - 1, MAX_CALLS).ok().expect("datafu internal error: MAX_CALLS must not be 0") + /// Matches the pattern against an input. + pub fn deserialize<'de, Der, De>(&self, de: Der) -> Result<De, Der::Error> + where + Der: Deserializer<'de>, + De: Deserialize<'de>, + { + todo!() } } diff --git a/src/vm.rs b/src/vm.rs index dd48752..6bcbf70 100644 --- a/src/vm.rs +++ b/src/vm.rs @@ -1,53 +1,33 @@ -/* - * This file is part of Datafu - * Copyright (C) 2021 Soni L. - * - * This program is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Affero General Public License for more details. - * - * You should have received a copy of the GNU Affero General Public License - * along with this program. If not, see <https://www.gnu.org/licenses/>. - */ - -use std::collections::BTreeMap; -use std::iter::Peekable; +// Copyright (C) 2021-2022 Soni L. +// SPDX-License-Identifier: MIT OR Apache-2.0 use regex::Regex; +use serde::Serialize; -use crate::KVPair; -use crate::RefOwn; -use crate::PatternTypes; use crate::Predicate; -use crate::errors::MatchError; +//use crate::errors::MatchError; pub(crate) const MAX_CALLS: usize = 250; -type Matches<'a, 'b, T> = BTreeMap<&'a str, KVPair<'b, T>>; +//type Matches<'a, 'b, T> = BTreeMap<&'a str, KVPair<'b, T>>; // TODO: use a builder for this? /// The constant pool for a pattern. -pub(crate) struct PatternConstants<T: PatternTypes> { +pub(crate) struct PatternConstants<O: Serialize> { // last proto is implicitly the whole pattern. pub(crate) protos: Vec<Vec<PatternElement>>, // Note that we can borrow these when creating the output map. // https://play.rust-lang.org/?version=stable&mode=debug&edition=2018&gist=da26f9175e96273fa0b94971a4e6172f pub(crate) strings: Vec<String>, pub(crate) regices: Vec<Regex>, - pub(crate) predicates: Vec<Box<Predicate<T>>>, + pub(crate) predicates: Vec<Box<Predicate>>, // NOTE these are part of the constant pool and so have lifetime analogous // to 'a (consistently used to indicate constant pool lifetime) when used // elsewhere. In particular, they can't be yielded by the iterator. - pub(crate) defs: Vec<T::Own>, + pub(crate) defs: Vec<O>, } -impl<T: PatternTypes> Default for PatternConstants<T> { +impl<O: Serialize> Default for PatternConstants<O> { fn default() -> Self { Self { protos: Default::default(), @@ -76,612 +56,612 @@ pub(crate) enum PatternElement { End } -struct Frame<'a, 'b, T: PatternTypes> { - //obj: RefOwn<'b, T::Ref, T::Own>, - ops: &'a [PatternElement], - iar: Option<usize>, - depth: usize, - path: Vec<Holder<'a, 'b, T>>, - in_key: bool, -} - -impl<'a, 'b, T: PatternTypes> Frame<'a, 'b, T> { - /// Advances the instruction address register. - /// - /// # Returns - /// - /// `true` if successful, `false` otherwise. - fn next(&mut self) -> bool { - let new = self.iar.map_or(0, |v| v + 1); - new < self.ops.len() && { - self.iar = Some(new); - true - } - } - - /// Returns the current instruction. - fn op(&self) -> PatternElement { - self.ops[self.iar.expect("ops[iar]")] - } - - /// Rewinds the instruction address register. - /// - /// # Returns - /// - /// `true` if successful, `false` otherwise. - fn prev(&mut self) -> bool { - let new = self.iar.expect("iar").checked_sub(1); - new.is_some() && { - self.iar = new; - true - } - } -} - -/// Stores a single match. -/// -/// See also Holder. -enum HolderState<'a, 'b, T: PatternTypes> { - /// Empty holder, for a key-value pair. - EmptyKey, - /// Empty holder, for a Matcher and a key-value pair. - EmptyKeySubtree, - // /// Empty holder, for a Matcher and a value. - // EmptyValueSubtree, - /// Occupied holder, for a key-value pair.. - Key(KVPair<'b, T>), - /// Occupied holder, for a Matcher and a key-value pair. - KeySubtree(Peekable<Matcher<'a, 'b, T>>, KVPair<'b, T>), - /// Occupied holder, for a Matcher and a value. The empty variant is - /// omitted as it would never be used otherwise. - ValueSubtree(Peekable<Matcher<'a, 'b, T>>, RefOwn<'b, T::Ref, T::Own>), - /// Occupied holder, for a value. The empty variant is omitted as it would - /// never be used otherwise. - Value(RefOwn<'b, T::Ref, T::Own>), -} - -/// Helper enum for HolderState. -#[derive(Copy, Clone, Debug, Eq, PartialEq)] -enum HolderKind { - Key, - KeySubtree, - ValueSubtree, - Value -} - -//impl<'a, 'b, T: PatternTypes> Clone for HolderState<'a, 'b, T> { -// fn clone(&self) -> Self { +//struct Frame<'a, 'b, T: PatternTypes> { +// //obj: RefOwn<'b, T::Ref, T::Own>, +// ops: &'a [PatternElement], +// iar: Option<usize>, +// depth: usize, +// path: Vec<Holder<'a, 'b, T>>, +// in_key: bool, +//} +// +//impl<'a, 'b, T: PatternTypes> Frame<'a, 'b, T> { +// /// Advances the instruction address register. +// /// +// /// # Returns +// /// +// /// `true` if successful, `false` otherwise. +// fn next(&mut self) -> bool { +// let new = self.iar.map_or(0, |v| v + 1); +// new < self.ops.len() && { +// self.iar = Some(new); +// true +// } +// } +// +// /// Returns the current instruction. +// fn op(&self) -> PatternElement { +// self.ops[self.iar.expect("ops[iar]")] +// } +// +// /// Rewinds the instruction address register. +// /// +// /// # Returns +// /// +// /// `true` if successful, `false` otherwise. +// fn prev(&mut self) -> bool { +// let new = self.iar.expect("iar").checked_sub(1); +// new.is_some() && { +// self.iar = new; +// true +// } +// } +//} +// +///// Stores a single match. +///// +///// See also Holder. +//enum HolderState<'a, 'b, T: PatternTypes> { +// /// Empty holder, for a key-value pair. +// EmptyKey, +// /// Empty holder, for a Matcher and a key-value pair. +// EmptyKeySubtree, +// // /// Empty holder, for a Matcher and a value. +// // EmptyValueSubtree, +// /// Occupied holder, for a key-value pair.. +// Key(KVPair<'b, T>), +// /// Occupied holder, for a Matcher and a key-value pair. +// KeySubtree(Peekable<Matcher<'a, 'b, T>>, KVPair<'b, T>), +// /// Occupied holder, for a Matcher and a value. The empty variant is +// /// omitted as it would never be used otherwise. +// ValueSubtree(Peekable<Matcher<'a, 'b, T>>, RefOwn<'b, T::Ref, T::Own>), +// /// Occupied holder, for a value. The empty variant is omitted as it would +// /// never be used otherwise. +// Value(RefOwn<'b, T::Ref, T::Own>), +//} +// +///// Helper enum for HolderState. +//#[derive(Copy, Clone, Debug, Eq, PartialEq)] +//enum HolderKind { +// Key, +// KeySubtree, +// ValueSubtree, +// Value +//} +// +////impl<'a, 'b, T: PatternTypes> Clone for HolderState<'a, 'b, T> { +//// fn clone(&self) -> Self { +//// match self { +//// HolderState::EmptyKey => HolderState::EmptyKey, +//// HolderState::EmptySubtree => HolderState::EmptySubtree, +//// HolderState::Key(v) => HolderState::Key(*v), +//// HolderState::KeySubtree(m, v) => HolderState::KeySubtree(m.clone(), *v), +//// HolderState::ValueSubtree(m, v) => HolderState::ValueSubtree(m.clone(), *v), +//// HolderState::Value(v) => HolderState::Value(*v), +//// } +//// } +////} +// +//impl<'a, 'b, T: PatternTypes> HolderState<'a, 'b, T> { +// #[rustfmt::skip] +// fn is_empty(&self) -> bool { // match self { -// HolderState::EmptyKey => HolderState::EmptyKey, -// HolderState::EmptySubtree => HolderState::EmptySubtree, -// HolderState::Key(v) => HolderState::Key(*v), -// HolderState::KeySubtree(m, v) => HolderState::KeySubtree(m.clone(), *v), -// HolderState::ValueSubtree(m, v) => HolderState::ValueSubtree(m.clone(), *v), -// HolderState::Value(v) => HolderState::Value(*v), +// | HolderState::EmptyKey +// | HolderState::EmptyKeySubtree +// //| HolderState::EmptyValueSubtree +// => true, _ => false +// } +// } +// +// fn has_value(&self) -> bool { +// !self.is_empty() +// } +// +// fn kind(&self) -> HolderKind { +// match self { +// | HolderState::EmptyKey +// | HolderState::Key(_) +// => HolderKind::Key, +// | HolderState::EmptyKeySubtree +// | HolderState::KeySubtree(_, _) +// => HolderKind::KeySubtree, +// //| HolderState::EmptyValueSubtree +// | HolderState::ValueSubtree(_, _) +// => HolderKind::ValueSubtree, +// | HolderState::Value(_) +// => HolderKind::Value, +// } +// } +// +// fn value(&self) -> Option<RefOwn<'b, T::Ref, T::Own>> { +// match *self { +// HolderState::Key((_, value)) => Some(value), +// HolderState::KeySubtree(_, (_, value)) => Some(value), +// HolderState::ValueSubtree(_, value) => Some(value), +// HolderState::Value(value) => Some(value), +// _ => None +// } +// } +// +// fn key(&self) -> Option<RefOwn<'b, T::Ref, T::Own>> { +// match *self { +// HolderState::Key((key, _)) => Some(key), +// HolderState::KeySubtree(_, (key, _)) => Some(key), +// _ => None +// } +// } +// +// fn pair(&self) -> Option<KVPair<'b, T>> { +// match *self { +// HolderState::Key(pair) => Some(pair), +// HolderState::KeySubtree(_, pair) => Some(pair), +// _ => None // } // } +// +// fn subtree(&mut self) -> Option<&mut Peekable<Matcher<'a, 'b, T>>> { +// match *self { +// HolderState::KeySubtree(ref mut subtree, _) => Some(subtree), +// HolderState::ValueSubtree(ref mut subtree, _) => Some(subtree), +// _ => None +// } +// } +// +// fn clear(&mut self) { +// *self = match self.kind() { +// HolderKind::Key => HolderState::EmptyKey, +// HolderKind::KeySubtree => HolderState::EmptyKeySubtree, +// HolderKind::ValueSubtree => unreachable!(), //HolderState::EmptyValueSubtree, +// HolderKind::Value => unreachable!(), +// }; +// assert!(self.is_empty()); +// } +//} +// +///// Stores a single match and associated metadata. +///// +///// A single match is generally a key-value pair, but may be a collection of +///// named pairs in the case of subtree matches, or just a value for the initial +///// holder. +//struct Holder<'a, 'b, T: PatternTypes> { +// name: Option<&'a str>, +// value: HolderState<'a, 'b, T>, +// parent: Option<RefOwn<'b, T::Ref, T::Own>>, +// iterator: Option<Box<dyn Iterator<Item=KVPair<'b, T>> + 'b>>, +// filters: Vec<Box<dyn (for<'c> Fn(&'c mut HolderState<'a, 'b, T>) -> Result<(), MatchError>) + 'a>>, +//} +// +//impl<'a, 'b, T: PatternTypes> Holder<'a, 'b, T> { +// fn next(&mut self) -> Result<bool, MatchError> { +// self.ensure_iterator()?; +// if let Self { +// value: ref mut v, +// iterator: Some(ref mut it), +// ref filters, +// .. +// } = self { +// // check if we're in a subtree and (not) done. +// if let Some(matcher) = v.subtree() { +// if let Some(res) = matcher.peek() { +// // report any errors +// return res.as_ref().map(|_| true).map_err(|e| e.clone()); +// } +// } +// let kind = v.kind(); +// let mut next_v; +// loop { +// next_v = match it.next() { +// Some(pair) => HolderState::Key(pair), +// None => return Ok(false) +// }; +// for filter in filters { +// filter(&mut next_v)?; +// if next_v.is_empty() { +// break; +// } +// } +// if next_v.has_value() { +// break; +// } +// } +// assert!(next_v.has_value()); +// assert_eq!(next_v.kind(), kind); +// *v = next_v; +// Ok(true) +// } else { +// unreachable!() +// } +// } +// +// /// Ensure `self.iterator.is_some()`, creating an iterator if necessary. +// fn ensure_iterator(&mut self) -> Result<(), MatchError> { +// if self.iterator.is_none() { +// let iter = T::pairs(self.parent.unwrap()); +// if iter.is_none() { +// return Err(MatchError::UnsupportedOperation); +// } +// self.iterator = iter; +// } +// assert!(self.iterator.is_some()); +// Ok(()) +// } +//} +// +//impl<'a, 'b, T: PatternTypes> Default for Holder<'a, 'b, T> { +// fn default() -> Self { +// Self { +// name: Default::default(), +// value: HolderState::EmptyKey, +// parent: Default::default(), +// iterator: Default::default(), +// filters: Default::default(), +// } +// } +//} +// +//pub struct Matcher<'a, 'b, T: PatternTypes> { +// defs: &'a PatternConstants<T>, +// frame: Frame<'a, 'b, T>, +//} +// +//// TODO: +//// +//// [x] Arrow +//// [x] StringKey +//// [x] RegexKey +//// [x] KeySubtree +//// [x] ValueSubtree +//// [x] Ident +//// [x] Param (untested) +//// [x] ApplyPredicate +//// [x] End +// +///// Helper for `PatternElement::StringKey`. +//fn on_string_key<'a, 'b, T: PatternTypes>( +// matcher: &mut Matcher<'a, 'b, T>, +// id: usize, +// skippable: bool, +//) -> Result<bool, MatchError> { +// let path = matcher.frame.path.last_mut().unwrap(); +// assert!(path.iterator.is_none()); +// let key = &matcher.defs.strings[id]; +// let iter = T::get(path.parent.unwrap(), RefOwn::Str(key)); +// match iter { +// Some(None) if !skippable => Err(MatchError::ValidationError), +// Some(opt) => { +// path.iterator = Some(Box::new(opt.into_iter())); +// Ok(true) +// } +// None => Err(MatchError::UnsupportedOperation), +// } +//} +// +///// Helper for `PatternElement::ParameterKey`. +//fn on_parameter_key<'a, 'b, T: PatternTypes>( +// matcher: &mut Matcher<'a, 'b, T>, +// id: usize, +// skippable: bool, +//) -> Result<bool, MatchError> { +// let path = matcher.frame.path.last_mut().unwrap(); +// assert!(path.iterator.is_none()); +// let key = matcher.defs.defs[id]; +// let iter = T::get(path.parent.unwrap(), RefOwn::Own(key)); +// match iter { +// Some(None) if !skippable => Err(MatchError::ValidationError), +// Some(opt) => { +// path.iterator = Some(Box::new(opt.into_iter())); +// Ok(true) +// } +// None => Err(MatchError::UnsupportedOperation), +// } +//} +// +///// Helper for `PatternElement::RegexKey`. +//fn on_regex_key<'a, 'b, T: PatternTypes>( +// matcher: &mut Matcher<'a, 'b, T>, +// id: usize, +// skippable: bool, +//) -> Result<bool, MatchError> { +// matcher.frame.path.last_mut().unwrap().ensure_iterator()?; +// let re = &matcher.defs.regices[id]; +// let path = matcher.frame.path.last_mut().unwrap(); +// path.filters.push(Box::new(move |value| { +// let s = T::as_str(value.key().unwrap()); +// match (s.map_or(false, |s| re.is_match(s)), skippable) { +// (true, _) => Ok(()), +// (false, true) => { +// value.clear(); +// Ok(()) +// }, +// (false, false) => Err(MatchError::ValidationError), +// } +// })); +// Ok(true) +//} +// +///// Helper for `PatternElement::KeySubtree`. +//fn on_key_subtree<'a, 'b, T: PatternTypes>( +// matcher: &mut Matcher<'a, 'b, T>, +// id: usize, +// skippable: bool, +//) -> Result<bool, MatchError> { +// let _ = skippable; // FIXME what should a skippable KeySubtree even do?! +// matcher.frame.path.last_mut().unwrap().ensure_iterator()?; +// let defs = matcher.defs; +// let rlimit: usize = matcher.frame.depth; +// let path = matcher.frame.path.last_mut().unwrap(); +// assert!(path.value.is_empty()); +// assert_eq!(path.value.kind(), HolderKind::Key); +// path.value = HolderState::EmptyKeySubtree; +// path.filters.push(Box::new(move |value| { +// let key = value.key().unwrap(); +// let mut subtree = Matcher::new(key, defs, id, rlimit)?.peekable(); +// match subtree.peek() { +// Some(&Ok(_)) => { +// *value = HolderState::KeySubtree(subtree, value.pair().unwrap()); +// Ok(()) +// }, +// Some(&Err(ref e)) => { +// Err(e.clone()) +// }, +// None => { +// value.clear(); +// Ok(()) +// } +// } +// })); +// Ok(true) +//} +// +//const DUMMY_OPS: &'static [PatternElement] = &[]; +// +//impl<'a, 'b, T: PatternTypes> Matcher<'a, 'b, T> { +// pub(crate) fn new(obj: RefOwn<'b, T::Ref, T::Own>, defs: &'a PatternConstants<T>, proto: usize, rlimit: usize) -> Result<Self, MatchError> { +// let ops: &[_] = &defs.protos[proto]; +// Self::with_ops(obj, defs, ops, rlimit) +// } +// +// /// Constructs a Matcher that yields a single dummy result. +// fn with_ops(obj: RefOwn<'b, T::Ref, T::Own>, defs: &'a PatternConstants<T>, ops: &'a [PatternElement], rlimit: usize) -> Result<Self, MatchError> { +// let depth = rlimit.checked_sub(1).ok_or(MatchError::StackOverflow)?; +// Ok(Self { +// defs: defs, +// frame: Frame { +// //obj: obj, +// ops: ops, +// iar: None, +// depth: depth, +// path: { +// let mut holder = Holder::default(); +// holder.value = HolderState::Value(obj); +// holder.iterator = Some(Box::new(std::iter::empty())); +// vec![holder] +// }, +// in_key: false, +// }, +// }) +// } +// +// fn on_in_key(&mut self) -> Result<bool, MatchError> { +// match self.frame.op() { +// PatternElement::End => { +// let path = self.frame.path.last_mut().unwrap(); +// if path.next()? { +// Ok(false) +// } else { +// drop(path); +// self.frame.path.pop().unwrap(); +// // stop at previous End, or start of frame +// while self.frame.prev() { +// if matches!(self.frame.op(), PatternElement::End) { +// break; +// } +// } +// // is start of frame? +// if !self.frame.prev() { +// self.frame.path.clear(); +// } +// Ok(true) +// } +// }, +// PatternElement::ApplyPredicate(id, skippable) => { +// // failing on T::get() is already handled, but we may need a +// // T::pairs(). construct it here. +// self.frame.path.last_mut().unwrap().ensure_iterator()?; +// let pred = &self.defs.predicates[id]; +// let path = self.frame.path.last_mut().unwrap(); +// path.filters.push(Box::new(move |value| { +// match (pred(value.value().unwrap()), skippable) { +// (true, _) => Ok(()), +// (false, true) => { +// value.clear(); +// Ok(()) +// }, +// (false, false) => Err(MatchError::ValidationError), +// } +// })); +// Ok(true) +// }, +// PatternElement::StringKey(id, skippable) => { +// on_string_key(self, id, skippable) +// }, +// PatternElement::ParameterKey(id, skippable) => { +// on_parameter_key(self, id, skippable) +// }, +// PatternElement::RegexKey(id, skippable) => { +// on_regex_key(self, id, skippable) +// }, +// PatternElement::KeySubtree(id, skippable) => { +// on_key_subtree(self, id, skippable) +// }, +// _ => unreachable!("on_in_key") +// } +// } +// +// fn on_not_in_key(&mut self) -> Result<bool, MatchError> { +// match self.frame.op() { +// PatternElement::Arrow => { +// // this *should* always pass. +// assert!(self.frame.path.last().unwrap().iterator.is_some()); +// let mut holder = Holder::default(); +// holder.parent = self.frame.path.last().unwrap().value.value(); +// assert!(holder.parent.is_some()); +// self.frame.path.push(holder); +// Ok(false) +// }, +// PatternElement::Identifier(id) => { +// let name = self.defs.strings.get(id).map(|s| &**s); +// let path = self.frame.path.last_mut().unwrap(); +// path.name = name; +// assert!(path.iterator.is_none()); +// // we don't actually create the iterator here, +// // as we may still wanna use T::get() instead. +// Ok(true) +// }, +// PatternElement::ApplyPredicate(id, skippable) => { +// assert!(self.frame.path.len() == 1); +// let pred = &self.defs.predicates[id]; +// let value = self.frame.path.last().unwrap().value.value(); +// match (pred(value.unwrap()), skippable) { +// (true, _) => Ok(false), +// (false, true) => { +// self.frame.path.clear(); +// // any Ok(_) will do +// Ok(false) +// }, +// (false, false) => Err(MatchError::ValidationError), +// } +// }, +// PatternElement::StringKey(id, skippable) => { +// on_string_key(self, id, skippable) +// }, +// PatternElement::ParameterKey(id, skippable) => { +// on_parameter_key(self, id, skippable) +// }, +// PatternElement::RegexKey(id, skippable) => { +// on_regex_key(self, id, skippable) +// }, +// PatternElement::KeySubtree(id, skippable) => { +// on_key_subtree(self, id, skippable) +// }, +// PatternElement::ValueSubtree(id, skippable) => { +// let value = self.frame.path.last().unwrap().value.value().unwrap(); +// let mut subtree = Matcher::new( +// value, +// self.defs, +// id, +// self.frame.depth +// )?.peekable(); +// let mut dummy = Matcher::with_ops( +// value, +// self.defs, +// DUMMY_OPS, +// self.frame.depth +// )?.peekable(); +// // may panic. +// let peeked = subtree.peek(); +// // shouldn't panic. +// let _ = dummy.peek(); +// // push Holder after peek. +// self.frame.path.push(Holder::default()); +// let mut holder = self.frame.path.last_mut().unwrap(); +// holder.parent = Some(value); +// holder.iterator = Some(Box::new(std::iter::empty())); +// match peeked { +// None if skippable => { +// holder.value = HolderState::ValueSubtree(dummy, value); +// Ok(true) +// }, +// Some(&Ok(_)) | None => { +// drop(peeked); +// holder.value = HolderState::ValueSubtree(subtree, value); +// Ok(true) +// }, +// Some(&Err(ref e)) => { +// Err(e.clone()) +// }, +// } +// }, +// _ => unreachable!("on_not_in_key") +// } +// } +// +// fn collect_results(&mut self) -> Matches<'a, 'b, T> { +// let mut res: Matches<'a, 'b, T> = Default::default(); +// for holder in &mut self.frame.path { +// // make sure it's not empty. +// assert!(holder.value.has_value()); +// // handle subtrees. +// if let Some(matcher) = holder.value.subtree() { +// if let Some(matches) = matcher.next() { +// // NOTE: we have checked these already. +// // (and if we haven't, that's a bug.) +// res.extend(matches.unwrap()); +// } +// } +// // handle pairs. +// if let Some(pair) = holder.value.pair() { +// if let Some(name) = holder.name { +// res.insert(name, pair); +// } +// } +// } +// res +// } +// +// fn on_end(&mut self) -> (bool, Matches<'a, 'b, T>) { +// match self.frame.op() { +// PatternElement::End => { +// assert!(!self.frame.path.last().expect("path").value.is_empty()); +// let res = self.collect_results(); +// if !self.frame.prev() { +// // NOTE: frame.prev() must always be called, even if this +// // gets replaced with debug_assert!() in the future. +// assert!(false, "frame.prev()"); +// } +// (true, res) +// } +// PatternElement::ApplyPredicate {..} => { +// assert!(!self.frame.in_key); +// let res = self.collect_results(); +// self.frame.path.clear(); +// (false, res) +// } +// _ => unreachable!("on_end") +// } +// } +//} +// +//impl<'a, 'b, T: PatternTypes> Iterator for Matcher<'a, 'b, T> { +// type Item = Result<BTreeMap<&'a str, KVPair<'b, T>>, MatchError>; +// +// fn next(&mut self) -> Option<Self::Item> { +// if self.frame.ops.is_empty() { +// if !self.frame.path.is_empty() { +// self.frame.path.clear(); +// return Some(Ok(Default::default())); +// } +// } +// while !self.frame.path.is_empty() { +// if !self.frame.next() { +// let (in_key, res) = self.on_end(); +// self.frame.in_key = in_key; +// return Some(Ok(res)); +// } else { +// let in_key = if self.frame.in_key { +// self.on_in_key() +// } else { +// self.on_not_in_key() +// }; +// match in_key { +// Ok(in_key) => self.frame.in_key = in_key, +// Err(e) => { +// self.frame.path.clear(); +// return Some(Err(e)) +// }, +// } +// } +// } +// None +// } //} - -impl<'a, 'b, T: PatternTypes> HolderState<'a, 'b, T> { - #[rustfmt::skip] - fn is_empty(&self) -> bool { - match self { - | HolderState::EmptyKey - | HolderState::EmptyKeySubtree - //| HolderState::EmptyValueSubtree - => true, _ => false - } - } - - fn has_value(&self) -> bool { - !self.is_empty() - } - - fn kind(&self) -> HolderKind { - match self { - | HolderState::EmptyKey - | HolderState::Key(_) - => HolderKind::Key, - | HolderState::EmptyKeySubtree - | HolderState::KeySubtree(_, _) - => HolderKind::KeySubtree, - //| HolderState::EmptyValueSubtree - | HolderState::ValueSubtree(_, _) - => HolderKind::ValueSubtree, - | HolderState::Value(_) - => HolderKind::Value, - } - } - - fn value(&self) -> Option<RefOwn<'b, T::Ref, T::Own>> { - match *self { - HolderState::Key((_, value)) => Some(value), - HolderState::KeySubtree(_, (_, value)) => Some(value), - HolderState::ValueSubtree(_, value) => Some(value), - HolderState::Value(value) => Some(value), - _ => None - } - } - - fn key(&self) -> Option<RefOwn<'b, T::Ref, T::Own>> { - match *self { - HolderState::Key((key, _)) => Some(key), - HolderState::KeySubtree(_, (key, _)) => Some(key), - _ => None - } - } - - fn pair(&self) -> Option<KVPair<'b, T>> { - match *self { - HolderState::Key(pair) => Some(pair), - HolderState::KeySubtree(_, pair) => Some(pair), - _ => None - } - } - - fn subtree(&mut self) -> Option<&mut Peekable<Matcher<'a, 'b, T>>> { - match *self { - HolderState::KeySubtree(ref mut subtree, _) => Some(subtree), - HolderState::ValueSubtree(ref mut subtree, _) => Some(subtree), - _ => None - } - } - - fn clear(&mut self) { - *self = match self.kind() { - HolderKind::Key => HolderState::EmptyKey, - HolderKind::KeySubtree => HolderState::EmptyKeySubtree, - HolderKind::ValueSubtree => unreachable!(), //HolderState::EmptyValueSubtree, - HolderKind::Value => unreachable!(), - }; - assert!(self.is_empty()); - } -} - -/// Stores a single match and associated metadata. -/// -/// A single match is generally a key-value pair, but may be a collection of -/// named pairs in the case of subtree matches, or just a value for the initial -/// holder. -struct Holder<'a, 'b, T: PatternTypes> { - name: Option<&'a str>, - value: HolderState<'a, 'b, T>, - parent: Option<RefOwn<'b, T::Ref, T::Own>>, - iterator: Option<Box<dyn Iterator<Item=KVPair<'b, T>> + 'b>>, - filters: Vec<Box<dyn (for<'c> Fn(&'c mut HolderState<'a, 'b, T>) -> Result<(), MatchError>) + 'a>>, -} - -impl<'a, 'b, T: PatternTypes> Holder<'a, 'b, T> { - fn next(&mut self) -> Result<bool, MatchError> { - self.ensure_iterator()?; - if let Self { - value: ref mut v, - iterator: Some(ref mut it), - ref filters, - .. - } = self { - // check if we're in a subtree and (not) done. - if let Some(matcher) = v.subtree() { - if let Some(res) = matcher.peek() { - // report any errors - return res.as_ref().map(|_| true).map_err(|e| e.clone()); - } - } - let kind = v.kind(); - let mut next_v; - loop { - next_v = match it.next() { - Some(pair) => HolderState::Key(pair), - None => return Ok(false) - }; - for filter in filters { - filter(&mut next_v)?; - if next_v.is_empty() { - break; - } - } - if next_v.has_value() { - break; - } - } - assert!(next_v.has_value()); - assert_eq!(next_v.kind(), kind); - *v = next_v; - Ok(true) - } else { - unreachable!() - } - } - - /// Ensure `self.iterator.is_some()`, creating an iterator if necessary. - fn ensure_iterator(&mut self) -> Result<(), MatchError> { - if self.iterator.is_none() { - let iter = T::pairs(self.parent.unwrap()); - if iter.is_none() { - return Err(MatchError::UnsupportedOperation); - } - self.iterator = iter; - } - assert!(self.iterator.is_some()); - Ok(()) - } -} - -impl<'a, 'b, T: PatternTypes> Default for Holder<'a, 'b, T> { - fn default() -> Self { - Self { - name: Default::default(), - value: HolderState::EmptyKey, - parent: Default::default(), - iterator: Default::default(), - filters: Default::default(), - } - } -} - -pub struct Matcher<'a, 'b, T: PatternTypes> { - defs: &'a PatternConstants<T>, - frame: Frame<'a, 'b, T>, -} - -// TODO: -// -// [x] Arrow -// [x] StringKey -// [x] RegexKey -// [x] KeySubtree -// [x] ValueSubtree -// [x] Ident -// [x] Param (untested) -// [x] ApplyPredicate -// [x] End - -/// Helper for `PatternElement::StringKey`. -fn on_string_key<'a, 'b, T: PatternTypes>( - matcher: &mut Matcher<'a, 'b, T>, - id: usize, - skippable: bool, -) -> Result<bool, MatchError> { - let path = matcher.frame.path.last_mut().unwrap(); - assert!(path.iterator.is_none()); - let key = &matcher.defs.strings[id]; - let iter = T::get(path.parent.unwrap(), RefOwn::Str(key)); - match iter { - Some(None) if !skippable => Err(MatchError::ValidationError), - Some(opt) => { - path.iterator = Some(Box::new(opt.into_iter())); - Ok(true) - } - None => Err(MatchError::UnsupportedOperation), - } -} - -/// Helper for `PatternElement::ParameterKey`. -fn on_parameter_key<'a, 'b, T: PatternTypes>( - matcher: &mut Matcher<'a, 'b, T>, - id: usize, - skippable: bool, -) -> Result<bool, MatchError> { - let path = matcher.frame.path.last_mut().unwrap(); - assert!(path.iterator.is_none()); - let key = matcher.defs.defs[id]; - let iter = T::get(path.parent.unwrap(), RefOwn::Own(key)); - match iter { - Some(None) if !skippable => Err(MatchError::ValidationError), - Some(opt) => { - path.iterator = Some(Box::new(opt.into_iter())); - Ok(true) - } - None => Err(MatchError::UnsupportedOperation), - } -} - -/// Helper for `PatternElement::RegexKey`. -fn on_regex_key<'a, 'b, T: PatternTypes>( - matcher: &mut Matcher<'a, 'b, T>, - id: usize, - skippable: bool, -) -> Result<bool, MatchError> { - matcher.frame.path.last_mut().unwrap().ensure_iterator()?; - let re = &matcher.defs.regices[id]; - let path = matcher.frame.path.last_mut().unwrap(); - path.filters.push(Box::new(move |value| { - let s = T::as_str(value.key().unwrap()); - match (s.map_or(false, |s| re.is_match(s)), skippable) { - (true, _) => Ok(()), - (false, true) => { - value.clear(); - Ok(()) - }, - (false, false) => Err(MatchError::ValidationError), - } - })); - Ok(true) -} - -/// Helper for `PatternElement::KeySubtree`. -fn on_key_subtree<'a, 'b, T: PatternTypes>( - matcher: &mut Matcher<'a, 'b, T>, - id: usize, - skippable: bool, -) -> Result<bool, MatchError> { - let _ = skippable; // FIXME what should a skippable KeySubtree even do?! - matcher.frame.path.last_mut().unwrap().ensure_iterator()?; - let defs = matcher.defs; - let rlimit: usize = matcher.frame.depth; - let path = matcher.frame.path.last_mut().unwrap(); - assert!(path.value.is_empty()); - assert_eq!(path.value.kind(), HolderKind::Key); - path.value = HolderState::EmptyKeySubtree; - path.filters.push(Box::new(move |value| { - let key = value.key().unwrap(); - let mut subtree = Matcher::new(key, defs, id, rlimit)?.peekable(); - match subtree.peek() { - Some(&Ok(_)) => { - *value = HolderState::KeySubtree(subtree, value.pair().unwrap()); - Ok(()) - }, - Some(&Err(ref e)) => { - Err(e.clone()) - }, - None => { - value.clear(); - Ok(()) - } - } - })); - Ok(true) -} - -const DUMMY_OPS: &'static [PatternElement] = &[]; - -impl<'a, 'b, T: PatternTypes> Matcher<'a, 'b, T> { - pub(crate) fn new(obj: RefOwn<'b, T::Ref, T::Own>, defs: &'a PatternConstants<T>, proto: usize, rlimit: usize) -> Result<Self, MatchError> { - let ops: &[_] = &defs.protos[proto]; - Self::with_ops(obj, defs, ops, rlimit) - } - - /// Constructs a Matcher that yields a single dummy result. - fn with_ops(obj: RefOwn<'b, T::Ref, T::Own>, defs: &'a PatternConstants<T>, ops: &'a [PatternElement], rlimit: usize) -> Result<Self, MatchError> { - let depth = rlimit.checked_sub(1).ok_or(MatchError::StackOverflow)?; - Ok(Self { - defs: defs, - frame: Frame { - //obj: obj, - ops: ops, - iar: None, - depth: depth, - path: { - let mut holder = Holder::default(); - holder.value = HolderState::Value(obj); - holder.iterator = Some(Box::new(std::iter::empty())); - vec![holder] - }, - in_key: false, - }, - }) - } - - fn on_in_key(&mut self) -> Result<bool, MatchError> { - match self.frame.op() { - PatternElement::End => { - let path = self.frame.path.last_mut().unwrap(); - if path.next()? { - Ok(false) - } else { - drop(path); - self.frame.path.pop().unwrap(); - // stop at previous End, or start of frame - while self.frame.prev() { - if matches!(self.frame.op(), PatternElement::End) { - break; - } - } - // is start of frame? - if !self.frame.prev() { - self.frame.path.clear(); - } - Ok(true) - } - }, - PatternElement::ApplyPredicate(id, skippable) => { - // failing on T::get() is already handled, but we may need a - // T::pairs(). construct it here. - self.frame.path.last_mut().unwrap().ensure_iterator()?; - let pred = &self.defs.predicates[id]; - let path = self.frame.path.last_mut().unwrap(); - path.filters.push(Box::new(move |value| { - match (pred(value.value().unwrap()), skippable) { - (true, _) => Ok(()), - (false, true) => { - value.clear(); - Ok(()) - }, - (false, false) => Err(MatchError::ValidationError), - } - })); - Ok(true) - }, - PatternElement::StringKey(id, skippable) => { - on_string_key(self, id, skippable) - }, - PatternElement::ParameterKey(id, skippable) => { - on_parameter_key(self, id, skippable) - }, - PatternElement::RegexKey(id, skippable) => { - on_regex_key(self, id, skippable) - }, - PatternElement::KeySubtree(id, skippable) => { - on_key_subtree(self, id, skippable) - }, - _ => unreachable!("on_in_key") - } - } - - fn on_not_in_key(&mut self) -> Result<bool, MatchError> { - match self.frame.op() { - PatternElement::Arrow => { - // this *should* always pass. - assert!(self.frame.path.last().unwrap().iterator.is_some()); - let mut holder = Holder::default(); - holder.parent = self.frame.path.last().unwrap().value.value(); - assert!(holder.parent.is_some()); - self.frame.path.push(holder); - Ok(false) - }, - PatternElement::Identifier(id) => { - let name = self.defs.strings.get(id).map(|s| &**s); - let path = self.frame.path.last_mut().unwrap(); - path.name = name; - assert!(path.iterator.is_none()); - // we don't actually create the iterator here, - // as we may still wanna use T::get() instead. - Ok(true) - }, - PatternElement::ApplyPredicate(id, skippable) => { - assert!(self.frame.path.len() == 1); - let pred = &self.defs.predicates[id]; - let value = self.frame.path.last().unwrap().value.value(); - match (pred(value.unwrap()), skippable) { - (true, _) => Ok(false), - (false, true) => { - self.frame.path.clear(); - // any Ok(_) will do - Ok(false) - }, - (false, false) => Err(MatchError::ValidationError), - } - }, - PatternElement::StringKey(id, skippable) => { - on_string_key(self, id, skippable) - }, - PatternElement::ParameterKey(id, skippable) => { - on_parameter_key(self, id, skippable) - }, - PatternElement::RegexKey(id, skippable) => { - on_regex_key(self, id, skippable) - }, - PatternElement::KeySubtree(id, skippable) => { - on_key_subtree(self, id, skippable) - }, - PatternElement::ValueSubtree(id, skippable) => { - let value = self.frame.path.last().unwrap().value.value().unwrap(); - let mut subtree = Matcher::new( - value, - self.defs, - id, - self.frame.depth - )?.peekable(); - let mut dummy = Matcher::with_ops( - value, - self.defs, - DUMMY_OPS, - self.frame.depth - )?.peekable(); - // may panic. - let peeked = subtree.peek(); - // shouldn't panic. - let _ = dummy.peek(); - // push Holder after peek. - self.frame.path.push(Holder::default()); - let mut holder = self.frame.path.last_mut().unwrap(); - holder.parent = Some(value); - holder.iterator = Some(Box::new(std::iter::empty())); - match peeked { - None if skippable => { - holder.value = HolderState::ValueSubtree(dummy, value); - Ok(true) - }, - Some(&Ok(_)) | None => { - drop(peeked); - holder.value = HolderState::ValueSubtree(subtree, value); - Ok(true) - }, - Some(&Err(ref e)) => { - Err(e.clone()) - }, - } - }, - _ => unreachable!("on_not_in_key") - } - } - - fn collect_results(&mut self) -> Matches<'a, 'b, T> { - let mut res: Matches<'a, 'b, T> = Default::default(); - for holder in &mut self.frame.path { - // make sure it's not empty. - assert!(holder.value.has_value()); - // handle subtrees. - if let Some(matcher) = holder.value.subtree() { - if let Some(matches) = matcher.next() { - // NOTE: we have checked these already. - // (and if we haven't, that's a bug.) - res.extend(matches.unwrap()); - } - } - // handle pairs. - if let Some(pair) = holder.value.pair() { - if let Some(name) = holder.name { - res.insert(name, pair); - } - } - } - res - } - - fn on_end(&mut self) -> (bool, Matches<'a, 'b, T>) { - match self.frame.op() { - PatternElement::End => { - assert!(!self.frame.path.last().expect("path").value.is_empty()); - let res = self.collect_results(); - if !self.frame.prev() { - // NOTE: frame.prev() must always be called, even if this - // gets replaced with debug_assert!() in the future. - assert!(false, "frame.prev()"); - } - (true, res) - } - PatternElement::ApplyPredicate {..} => { - assert!(!self.frame.in_key); - let res = self.collect_results(); - self.frame.path.clear(); - (false, res) - } - _ => unreachable!("on_end") - } - } -} - -impl<'a, 'b, T: PatternTypes> Iterator for Matcher<'a, 'b, T> { - type Item = Result<BTreeMap<&'a str, KVPair<'b, T>>, MatchError>; - - fn next(&mut self) -> Option<Self::Item> { - if self.frame.ops.is_empty() { - if !self.frame.path.is_empty() { - self.frame.path.clear(); - return Some(Ok(Default::default())); - } - } - while !self.frame.path.is_empty() { - if !self.frame.next() { - let (in_key, res) = self.on_end(); - self.frame.in_key = in_key; - return Some(Ok(res)); - } else { - let in_key = if self.frame.in_key { - self.on_in_key() - } else { - self.on_not_in_key() - }; - match in_key { - Ok(in_key) => self.frame.in_key = in_key, - Err(e) => { - self.frame.path.clear(); - return Some(Err(e)) - }, - } - } - } - None - } -} diff --git a/tests/basic_match.rs b/tests/basic_match.rs index 4697c63..1500356 100644 --- a/tests/basic_match.rs +++ b/tests/basic_match.rs @@ -1,157 +1,139 @@ -/* - * This file is part of Datafu - * Copyright (C) 2021 Soni L. - * - * This program is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Affero General Public License for more details. - * - * You should have received a copy of the GNU Affero General Public License - * along with this program. If not, see <https://www.gnu.org/licenses/>. - */ +// Copyright (C) 2021-2022 Soni L. +// SPDX-License-Identifier: MIT OR Apache-2.0 -extern crate datafu; - -mod common; - -use common::Value; - -use datafu::RefOwn; +use serde_json::Deserializer as JsonDer; +use serde::Deserialize; #[test] fn test_basic() { - let tree = Value::M(vec![ - ("foo".into(), Value::U(1)), - ("bar".into(), Value::M(vec![ - ("baz".into(), Value::U(2)), - ].into_iter().collect())), - ].into_iter().collect()); - let preds = vec![("dict", Box::new(|v: RefOwn<'_, _, _>| matches!(v, RefOwn::Ref(&Value::M(_)))) as Box<datafu::Predicate<Value>>)].into_iter().collect(); - let pat = datafu::Pattern::<Value>::compile::<&str, &str>("->X:?$dict->Y", Some(preds), None).unwrap(); - let mut matcher = pat.attempt_match(&tree); - let m = matcher.next().unwrap().unwrap(); - assert_eq!(m["X"].0, RefOwn::Ref(&Value::from("bar"))); - assert_eq!(m["Y"].0, RefOwn::Ref(&Value::from("baz"))); - assert_eq!(m["Y"].1, RefOwn::Ref(&Value::U(2))); - assert!(matcher.next().is_none()); -} - -#[test] -fn test_str() { - let tree = Value::M(vec![ - ("foo".into(), Value::U(1)), - ("bar".into(), Value::M(vec![ - ("baz".into(), Value::U(2)), - ].into_iter().collect())), - ].into_iter().collect()); - let preds = vec![("dict", Box::new(|v: RefOwn<'_, _, _>| matches!(v, RefOwn::Ref(&Value::M(_)))) as Box<datafu::Predicate<Value>>)].into_iter().collect(); - let pat = datafu::Pattern::<Value>::compile::<&str, &str>("->X'bar'->Y", Some(preds), None).unwrap(); - let mut matcher = pat.attempt_match(&tree); - let m = matcher.next().unwrap().unwrap(); - assert_eq!(m["X"].0, RefOwn::Ref(&Value::from("bar"))); - assert_eq!(m["Y"].0, RefOwn::Ref(&Value::from("baz"))); - assert_eq!(m["Y"].1, RefOwn::Ref(&Value::U(2))); - assert!(matcher.next().is_none()); -} - -#[test] -fn test_basic_2() { - let tree = Value::M(vec![ - ("projects".into(), Value::M(vec![ - ("385e734a52e13949a7a5c71827f6de920dbfea43".into(), Value::M(vec![ - ("https://soniex2.autistic.space/git-repos/ganarchy.git".into(), Value::M(vec![ - ("HEAD".into(), Value::M(vec![ - ("active".into(), Value::B(true)), - ].into_iter().collect())), - ].into_iter().collect())), - ].into_iter().collect())), - ].into_iter().collect())), - ].into_iter().collect()); - let preds = vec![("d", Box::new(|v: RefOwn<'_, _, _>| matches!(v, RefOwn::Ref(&Value::M(_)))) as Box<datafu::Predicate<Value>>)].into_iter().collect(); - let pat = datafu::Pattern::<Value>::compile::<&str, &str>("->'projects':?$d->P/[0-9a-fA-F]{40}|[0-9a-fA-F]{64}/?:?$d->U:?$d->B", Some(preds), None).unwrap(); - let mut matcher = pat.attempt_match(&tree); - let m = matcher.next().unwrap().unwrap(); - assert_eq!(m["P"].0, RefOwn::Ref(&Value::from("385e734a52e13949a7a5c71827f6de920dbfea43"))); - assert_eq!(m["U"].0, RefOwn::Ref(&Value::from("https://soniex2.autistic.space/git-repos/ganarchy.git"))); - assert_eq!(m["B"].0, RefOwn::Ref(&Value::from("HEAD"))); - assert_eq!(m["B"].1, RefOwn::Ref(&Value::M(vec![(Value::from("active"), Value::B(true))].into_iter().collect()))); - assert!(matcher.next().is_none()); + let mut der = JsonDer::from_str(r#"{"foo": 1, "bar": {"baz": 2}}"#); + let preds = vec![("dict", datafu::pred(|v| { todo!() }))].into_iter().collect(); + let pat = datafu::Pattern::<()>::compile::<&str, &str>("->[x]:?$dict->y[yk]", Some(preds), None).unwrap(); + #[derive(Deserialize)] + struct Values { + x: String, + yk: String, + y: usize, + } + // should this error about needing multiple results/Vec requirement? + let matches: Values = pat.deserialize(&mut der).unwrap(); + assert_eq!(matches.x, "bar"); + assert_eq!(matches.yk, "baz"); + assert_eq!(matches.y, 2); + assert!(der.end().is_ok()); } -#[test] -fn test_spaces() { - let tree = Value::M(vec![ - ("projects".into(), Value::M(vec![ - ("385e734a52e13949a7a5c71827f6de920dbfea43".into(), Value::M(vec![ - ("https://soniex2.autistic.space/git-repos/ganarchy.git".into(), Value::M(vec![ - ("HEAD".into(), Value::M(vec![ - ("active".into(), Value::B(true)), - ].into_iter().collect())), - ].into_iter().collect())), - ].into_iter().collect())), - ].into_iter().collect())), - ].into_iter().collect()); - let preds = vec![("dict", Box::new(|v: RefOwn<'_, _, _>| matches!(v, RefOwn::Ref(&Value::M(_)))) as Box<datafu::Predicate<Value>>)].into_iter().collect(); - let pat = datafu::Pattern::<Value>::compile::<_, &str>(" - -> 'projects'? - -> commit /[0-9a-fA-F]{40}|[0-9a-fA-F]{64}/? :?$dict - -> url :?$dict - -> branch :?$dict", - Some(preds), - None, - ).unwrap(); - let mut matcher = pat.attempt_match(&tree); - let m = matcher.next().unwrap().unwrap(); - assert_eq!(m["commit"].0, RefOwn::Ref(&Value::from("385e734a52e13949a7a5c71827f6de920dbfea43"))); - assert_eq!(m["url"].0, RefOwn::Ref(&Value::from("https://soniex2.autistic.space/git-repos/ganarchy.git"))); - assert_eq!(m["branch"].0, RefOwn::Ref(&Value::from("HEAD"))); - assert_eq!(m["branch"].1, RefOwn::Ref(&Value::M(vec![(Value::from("active"), Value::B(true))].into_iter().collect()))); - assert!(matcher.next().is_none()); -} - -#[test] -fn test_harder() { - let tree = Value::M(vec![ - ("projects".into(), Value::M(vec![ - ("385e734a52e13949a7a5c71827f6de920dbfea43".into(), Value::M(vec![ - ("https://soniex2.autistic.space/git-repos/ganarchy.git".into(), Value::M(vec![ - ("HEAD".into(), Value::M(vec![ - ("active".into(), Value::B(true)), - ].into_iter().collect())), - ].into_iter().collect())), - ].into_iter().collect())), - ].into_iter().collect())), - ].into_iter().collect()); - let preds = vec![ - ("dict", Box::new(|v: RefOwn<'_, _, _>| matches!(v, RefOwn::Ref(&Value::M(_)))) as Box<datafu::Predicate<Value>>), - ("str", Box::new(|v: RefOwn<'_, _, _>| matches!(v, RefOwn::Ref(&Value::S(_)))) as Box<datafu::Predicate<Value>>), - ("bool", Box::new(|v: RefOwn<'_, _, _>| matches!(v, RefOwn::Ref(&Value::B(_)))) as Box<datafu::Predicate<Value>>), - // stubs, we don't particularly need to test these - ("commit", Box::new(|v: RefOwn<'_, _, _>| matches!(v, RefOwn::Ref(&Value::S(_)))) as Box<datafu::Predicate<Value>>), - ("uri", Box::new(|v: RefOwn<'_, _, _>| matches!(v, RefOwn::Ref(&Value::S(_)))) as Box<datafu::Predicate<Value>>), - ].into_iter().collect(); - let pat = datafu::Pattern::<Value>::compile::<_, &str>(" - ->'projects':$dict - ->commit[:?$str:?$commit]:?$dict - ->url[:?$str:?$uri]:?$dict - ->branch:?$dict - (->active'active'?:?$bool) - (->federate'federate'?:?$bool)?", - Some(preds), - None, - ).unwrap(); - let mut matcher = pat.attempt_match(&tree); - let m = matcher.next().unwrap().unwrap(); - assert_eq!(m["commit"].0, RefOwn::Ref(&Value::from("385e734a52e13949a7a5c71827f6de920dbfea43"))); - assert_eq!(m["url"].0, RefOwn::Ref(&Value::from("https://soniex2.autistic.space/git-repos/ganarchy.git"))); - assert_eq!(m["branch"].0, RefOwn::Ref(&Value::from("HEAD"))); - assert_eq!(m["active"].1, RefOwn::Ref(&Value::B(true))); - assert_eq!(m.get("federate"), None); - assert!(matcher.next().is_none()); -} +//#[test] +//fn test_str() { +// //let tree = Value::M(vec![ +// // ("foo".into(), Value::U(1)), +// // ("bar".into(), Value::M(vec![ +// // ("baz".into(), Value::U(2)), +// // ].into_iter().collect())), +// //].into_iter().collect()); +// let der = JsonDer::from_str(r#"{"foo": 1, "bar": {"baz": 2}}"#) +// let preds = vec![("dict", Box::new(|v: RefOwn<'_, _, _>| matches!(v, RefOwn::Ref(&Value::M(_)))) as Box<datafu::Predicate<Value>>)].into_iter().collect(); +// let pat = datafu::Pattern::<Value>::compile::<&str, &str>("->X'bar'->Y", Some(preds), None).unwrap(); +// let mut matcher = pat.attempt_match(&tree); +// let m = matcher.next().unwrap().unwrap(); +// assert_eq!(m["X"].0, RefOwn::Ref(&Value::from("bar"))); +// assert_eq!(m["Y"].0, RefOwn::Ref(&Value::from("baz"))); +// assert_eq!(m["Y"].1, RefOwn::Ref(&Value::U(2))); +// assert!(matcher.next().is_none()); +//} +// +//#[test] +//fn test_basic_2() { +// let tree = Value::M(vec![ +// ("projects".into(), Value::M(vec![ +// ("385e734a52e13949a7a5c71827f6de920dbfea43".into(), Value::M(vec![ +// ("https://soniex2.autistic.space/git-repos/ganarchy.git".into(), Value::M(vec![ +// ("HEAD".into(), Value::M(vec![ +// ("active".into(), Value::B(true)), +// ].into_iter().collect())), +// ].into_iter().collect())), +// ].into_iter().collect())), +// ].into_iter().collect())), +// ].into_iter().collect()); +// let preds = vec![("d", Box::new(|v: RefOwn<'_, _, _>| matches!(v, RefOwn::Ref(&Value::M(_)))) as Box<datafu::Predicate<Value>>)].into_iter().collect(); +// let pat = datafu::Pattern::<Value>::compile::<&str, &str>("->'projects':?$d->P/[0-9a-fA-F]{40}|[0-9a-fA-F]{64}/?:?$d->U:?$d->B", Some(preds), None).unwrap(); +// let mut matcher = pat.attempt_match(&tree); +// let m = matcher.next().unwrap().unwrap(); +// assert_eq!(m["P"].0, RefOwn::Ref(&Value::from("385e734a52e13949a7a5c71827f6de920dbfea43"))); +// assert_eq!(m["U"].0, RefOwn::Ref(&Value::from("https://soniex2.autistic.space/git-repos/ganarchy.git"))); +// assert_eq!(m["B"].0, RefOwn::Ref(&Value::from("HEAD"))); +// assert_eq!(m["B"].1, RefOwn::Ref(&Value::M(vec![(Value::from("active"), Value::B(true))].into_iter().collect()))); +// assert!(matcher.next().is_none()); +//} +// +//#[test] +//fn test_spaces() { +// let tree = Value::M(vec![ +// ("projects".into(), Value::M(vec![ +// ("385e734a52e13949a7a5c71827f6de920dbfea43".into(), Value::M(vec![ +// ("https://soniex2.autistic.space/git-repos/ganarchy.git".into(), Value::M(vec![ +// ("HEAD".into(), Value::M(vec![ +// ("active".into(), Value::B(true)), +// ].into_iter().collect())), +// ].into_iter().collect())), +// ].into_iter().collect())), +// ].into_iter().collect())), +// ].into_iter().collect()); +// let preds = vec![("dict", Box::new(|v: RefOwn<'_, _, _>| matches!(v, RefOwn::Ref(&Value::M(_)))) as Box<datafu::Predicate<Value>>)].into_iter().collect(); +// let pat = datafu::Pattern::<Value>::compile::<_, &str>(" +// -> 'projects'? +// -> commit /[0-9a-fA-F]{40}|[0-9a-fA-F]{64}/? :?$dict +// -> url :?$dict +// -> branch :?$dict", +// Some(preds), +// None, +// ).unwrap(); +// let mut matcher = pat.attempt_match(&tree); +// let m = matcher.next().unwrap().unwrap(); +// assert_eq!(m["commit"].0, RefOwn::Ref(&Value::from("385e734a52e13949a7a5c71827f6de920dbfea43"))); +// assert_eq!(m["url"].0, RefOwn::Ref(&Value::from("https://soniex2.autistic.space/git-repos/ganarchy.git"))); +// assert_eq!(m["branch"].0, RefOwn::Ref(&Value::from("HEAD"))); +// assert_eq!(m["branch"].1, RefOwn::Ref(&Value::M(vec![(Value::from("active"), Value::B(true))].into_iter().collect()))); +// assert!(matcher.next().is_none()); +//} +// +//#[test] +//fn test_harder() { +// let tree = Value::M(vec![ +// ("projects".into(), Value::M(vec![ +// ("385e734a52e13949a7a5c71827f6de920dbfea43".into(), Value::M(vec![ +// ("https://soniex2.autistic.space/git-repos/ganarchy.git".into(), Value::M(vec![ +// ("HEAD".into(), Value::M(vec![ +// ("active".into(), Value::B(true)), +// ].into_iter().collect())), +// ].into_iter().collect())), +// ].into_iter().collect())), +// ].into_iter().collect())), +// ].into_iter().collect()); +// let preds = vec![ +// ("dict", Box::new(|v: RefOwn<'_, _, _>| matches!(v, RefOwn::Ref(&Value::M(_)))) as Box<datafu::Predicate<Value>>), +// ("str", Box::new(|v: RefOwn<'_, _, _>| matches!(v, RefOwn::Ref(&Value::S(_)))) as Box<datafu::Predicate<Value>>), +// ("bool", Box::new(|v: RefOwn<'_, _, _>| matches!(v, RefOwn::Ref(&Value::B(_)))) as Box<datafu::Predicate<Value>>), +// // stubs, we don't particularly need to test these +// ("commit", Box::new(|v: RefOwn<'_, _, _>| matches!(v, RefOwn::Ref(&Value::S(_)))) as Box<datafu::Predicate<Value>>), +// ("uri", Box::new(|v: RefOwn<'_, _, _>| matches!(v, RefOwn::Ref(&Value::S(_)))) as Box<datafu::Predicate<Value>>), +// ].into_iter().collect(); +// let pat = datafu::Pattern::<Value>::compile::<_, &str>(" +// ->'projects':$dict +// ->commit[:?$str:?$commit]:?$dict +// ->url[:?$str:?$uri]:?$dict +// ->branch:?$dict +// (->active'active'?:?$bool) +// (->federate'federate'?:?$bool)?", +// Some(preds), +// None, +// ).unwrap(); +// let mut matcher = pat.attempt_match(&tree); +// let m = matcher.next().unwrap().unwrap(); +// assert_eq!(m["commit"].0, RefOwn::Ref(&Value::from("385e734a52e13949a7a5c71827f6de920dbfea43"))); +// assert_eq!(m["url"].0, RefOwn::Ref(&Value::from("https://soniex2.autistic.space/git-repos/ganarchy.git"))); +// assert_eq!(m["branch"].0, RefOwn::Ref(&Value::from("HEAD"))); +// assert_eq!(m["active"].1, RefOwn::Ref(&Value::B(true))); +// assert_eq!(m.get("federate"), None); +// assert!(matcher.next().is_none()); +//} diff --git a/tests/common/mod.rs b/tests/common/mod.rs deleted file mode 100644 index d0b9e89..0000000 --- a/tests/common/mod.rs +++ /dev/null @@ -1,189 +0,0 @@ -/* - * This file is part of Datafu - * Copyright (C) 2021 Soni L. - * - * This program is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Affero General Public License for more details. - * - * You should have received a copy of the GNU Affero General Public License - * along with this program. If not, see <https://www.gnu.org/licenses/>. - */ - -use std::borrow::Borrow; -use std::borrow::Cow; -use std::collections::BTreeMap; - -use datafu::RefOwn; -use datafu::PatternTypes; -use datafu::KVPair; - -#[derive(PartialEq, Eq, PartialOrd, Ord, Debug)] -pub enum Value { - U(usize), - B(bool), - M(BTreeMap<Value, Value>), - S(Cow<'static, str>), -} - -#[derive(Copy, Clone, Eq, PartialEq, Debug)] -pub enum Dummy { -} - -trait ValueHelper { - fn as_value(&self) -> Result<&Value, &str>; -} - -impl ValueHelper for Value { - fn as_value(&self) -> Result<&Value, &str> { - Ok(self) - } -} - -impl ValueHelper for &str { - fn as_value(&self) -> Result<&Value, &str> { - Err(self) - } -} - -impl PartialEq<str> for Value { - fn eq(&self, other: &str) -> bool { - matches!(self, Value::S(l) if l == other) - } -} - -impl PartialEq<Value> for str { - fn eq(&self, other: &Value) -> bool { - matches!(other, Value::S(r) if self == r) - } -} - -impl PartialEq<Dummy> for Value { - fn eq(&self, other: &Dummy) -> bool { - let _ = other; - unreachable!() - } -} - -impl PartialEq<Value> for Dummy { - fn eq(&self, other: &Value) -> bool { - let _ = other; - unreachable!() - } -} - -impl PartialEq<str> for Dummy { - fn eq(&self, other: &str) -> bool { - let _ = other; - unreachable!() - } -} - -impl PartialEq<Dummy> for str { - fn eq(&self, other: &Dummy) -> bool { - let _ = other; - unreachable!() - } -} - -impl<'a> PartialEq for dyn ValueHelper + 'a { - fn eq(&self, other: &(dyn ValueHelper + 'a)) -> bool { - match (self.as_value(), other.as_value()) { - (a, b) if a == b => true, - (Ok(Value::S(a)), Err(b)) | (Err(b), Ok(Value::S(a))) => { - a.eq(b) - }, - _ => false, - } - } -} - -impl<'a> PartialOrd for dyn ValueHelper + 'a { - fn partial_cmp(&self, other: &(dyn ValueHelper + 'a)) -> Option<std::cmp::Ordering> { - Some(self.cmp(other)) - } -} - -impl<'a> Eq for dyn ValueHelper + 'a { -} - -impl<'a> Ord for dyn ValueHelper + 'a { - fn cmp(&self, other: &(dyn ValueHelper + 'a)) -> std::cmp::Ordering { - match (self.as_value(), other.as_value()) { - (Ok(a), Ok(b)) => a.cmp(b), - (Err(a), Err(b)) => a.cmp(b), - (Ok(Value::S(a)), Err(b)) => (**a).cmp(b), - (Err(a), Ok(Value::S(b))) => a.cmp(b.borrow()), - (Ok(_), Err(_)) => std::cmp::Ordering::Less, - (Err(_), Ok(_)) => std::cmp::Ordering::Greater, - } - } -} - -impl From<&'static str> for Value { - fn from(x: &'static str) -> Value { - Value::S(x.into()) - } -} - - -impl<'a> Borrow<dyn ValueHelper + 'a> for Value { - fn borrow(&self) -> &(dyn ValueHelper + 'a) { - self - } -} - -impl PatternTypes for Value { - type Ref = Self; - type Own = Dummy; - - fn pairs<'b>( - item: RefOwn<'b, Self, Dummy> - ) -> Option<Box<dyn Iterator<Item=KVPair<'b, Self>> + 'b>> { - match item { - RefOwn::Ref(Value::M(map)) => { - Some(Box::new(map.iter().map(|(a, b)| (a.into(), b.into())))) - }, - _ => None - } - } - - fn get<'a, 'b>( - item: RefOwn<'b, Self, Dummy>, - key: RefOwn<'a, Self, Dummy> - ) -> Option<Option<KVPair<'b, Self>>> { - match item { - RefOwn::Ref(Value::M(map)) => { - Some(match key { - RefOwn::Ref(key) => map.get_key_value(key), - RefOwn::Own(_key) => unreachable!(), - RefOwn::Str(key) => map.get_key_value(&key as &dyn ValueHelper), - }.map(|(k,v)| (k.into(), v.into()))) - }, - _ => None - } - } - - fn matches( - left: RefOwn<'_, Self, Dummy>, - right: RefOwn<'_, Self, Dummy> - ) -> bool { - left == right - } - - fn as_str<'b>( - item: RefOwn<'b, Self, Dummy> - ) -> Option<&'b str> { - match item { - RefOwn::Str(key) => Some(key), - RefOwn::Ref(Value::S(key)) => Some(key), - _ => None, - } - } -} diff --git a/tests/parser_prop.rs b/tests/parser_prop.rs index 57976cb..0b57171 100644 --- a/tests/parser_prop.rs +++ b/tests/parser_prop.rs @@ -1,33 +1,11 @@ -/* - * This file is part of Datafu - * Copyright (C) 2021 Soni L. - * - * This program is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Affero General Public License for more details. - * - * You should have received a copy of the GNU Affero General Public License - * along with this program. If not, see <https://www.gnu.org/licenses/>. - */ - -extern crate proptest; -extern crate datafu; - -mod common; - -use common::Value; +// Copyright (C) 2021 Soni L. +// SPDX-License-Identifier: MIT OR Apache-2.0 use proptest::prelude::*; proptest! { #[test] fn doesnt_panic(s in "\\PC*") { - let _ = datafu::Pattern::<Value>::compile::<&str, &str>(&s, None, None); + let _ = datafu::Pattern::<Box<dyn erased_serde::Serialize>>::compile::<&str, &str>(&s, None, None); } } |