/* * Datafu - Rust library for extracting data from object graphs. * Copyright (C) 2021 Soni L. * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU Affero General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Affero General Public License for more details. * * You should have received a copy of the GNU Affero General Public License * along with this program. If not, see . */ #![warn(rust_2018_idioms)] #![cfg_attr(not(feature = "stable"), feature(label_break_value))] //! Datafu is a regex-inspired query language. It was primarily //! designed for processing object trees parsed from configuration files, but //! can also be used with JSON APIs, and even XML. //! //! # Languge Reference //! //! Datafu expressions have the ability to iterate, index, validate and filter //! data structures, through the use of the syntax elements below. //! //! ## Syntax Elements of Datafu Expressions //! //! An arrow is `->` and indicates indexing/iteration. Whether indexing or //! iteration is used is defined by the elements that follow, with iteration //! being used by default. //! //! A variable is a sequence of alphanumeric characters, not starting with //! a digit. A `(key, value)` tuple containing the respective matched //! element will be identified by this name in the results map. //! //! A literal is a sequence of characters delimited by `'`, optionally //! followed by `?`, with `%` as the escape character, and defines a //! string-keyed indexing operation. A literal can contain any character, //! except unescaped `%` or `'` symbols, which must be escaped as //! `%%` and `%'`, respectively. The sequence of characters defined by //! a literal is used as the string object in the indexing operation. //! //! A parameter is `$`, optionally followed by `?`, followed by a //! sequence of alphanumeric characters, not starting with a digit, and //! defines an object-keyed indexing operation. The sequence of characters //! defined by a parameter is used to retrieve, from the pattern's //! definitions, the object to be used in the indexing operation. //! //! A regex is a sequence of characters delimited by `/`, optionally //! followed by `?`, with `%` as the escape character. A regex can //! contain any character, except unescaped `%` or `/` symbols, which //! must be escaped as `%%` and `%/`, respectively. The sequence of //! characters defined by a regex is passed to the `regex` crate, which //! may apply further restrictions on the characters used, and is used to //! accept the respective keys processed by the iterator. //! //! A predicate is `:`, optionally followed by `?`, followed by an //! `$` and a sequence of alphanumeric characters, not starting with a //! digit, and is used to accept values to be processed based on an //! external [`Predicate`]. //! //! A key match is a datafu expression (including, but not limited to, the //! empty datafu expression) enclosed within `[` and `]`, optionally //! prefixed with one or more predicates, and applies the enclosed //! predicates and datafu expression to the key (or index) being processed. //! A key match enables additional validation of keys and/or extraction of //! values from keys, and accepts a key if and only if the enclosed //! predicates accept the key and the enclosed expression matches the key. //! //! A subvalue is a datafu expression (including, but not limited to, the //! empty datafu expression) enclosed within `(` and `)`, and applies //! the enclosed datafu expression to the value (or index) being processed. //! A subvalue enables the ability to match multiple values on the same //! object, and accepts a value if and only the enclosed expression //! matches the value. A subvalue can be made optional by the presence of //! a `?` after the subvalue - in case of no match, it will just omit //! the relevant keys in the result. Optional subvalues are unrelated to //! non-validating syntax elements (see below), they just use the same //! syntax. //! //! Some syntax elements can be validating or non-validating. Validating //! syntax elements will return a [`errors::MatchError::ValidationError`] //! whenever a non-accepted element is encountered, whereas non-validating //! ones will skip them. Whether an element is validating is determined by //! the absence of an optional `?` in the documented position. Note that //! it is possible for a validating syntax element to still yield results //! before returning a [`errors::MatchError::ValidationError`], so one //! needs to be careful when writing code where such behaviour could //! result in a security vulnerability. //! //! The empty pattern matches anything, but only does so once. //! //! ## Syntax of Datafu Expressions //! //! Datafu Expressions follow the given syntax, in (pseudo-)extended BNF: //! //! ```text //! expression ::= {arrow tag} {subvalue} //! tag ::= identifier [arg] {predicate} | arg {predicate} //! arg ::= parameter | literal | regex | keymatch //! //! arrow ::= '->' //! keymatch ::= '[' {predicate} expression ']' //! subvalue ::= '(' {predicate} expression ')' ['?'] //! ``` //! //! For a description of the terminals "parameter", "literal", "regex" and //! "predicate", see "Syntax Elements of Datafu Expressions" above. //! //! # Examples //! //! extern crate regex; #[cfg(test)] extern crate proptest; pub mod errors; mod parser; mod pattern; mod vm; pub use pattern::Pattern; pub use vm::Matcher; // TODO replace with GATs /// A borrowed or owned value of various types. /// /// This exists purely as a workaround for Rust not having GATs yet. #[derive(Debug)] pub enum RefOwn<'b, T: ?Sized, U> { /// Borrowed T. Ref(&'b T), /// Borrowed string. Str(&'b str), /// Owned U. Own(U), } impl<'b, T, U> PartialEq for RefOwn<'b, T, U> where T: ?Sized + PartialEq + PartialEq + PartialEq, U: PartialEq + PartialEq + PartialEq, str: PartialEq + PartialEq + PartialEq { fn eq(&self, other: &Self) -> bool { match (self, other) { (RefOwn::Ref(l), RefOwn::Ref(r)) => l.eq(r), (RefOwn::Own(l), RefOwn::Own(r)) => l.eq(r), (RefOwn::Str(l), RefOwn::Str(r)) => l.eq(r), (RefOwn::Ref(l), RefOwn::Own(r)) => PartialEq::eq(*l, r), (RefOwn::Own(l), RefOwn::Str(r)) => PartialEq::eq(l, *r), (RefOwn::Str(l), RefOwn::Ref(r)) => l.eq(r), (RefOwn::Ref(l), RefOwn::Str(r)) => l.eq(r), (RefOwn::Own(l), RefOwn::Ref(r)) => PartialEq::eq(l, *r), (RefOwn::Str(l), RefOwn::Own(r)) => PartialEq::eq(*l, r), } } } impl<'b, T: ?Sized, U: Copy> Copy for RefOwn<'b, T, U> { } impl<'b, T: ?Sized, U: Clone> Clone for RefOwn<'b, T, U> { fn clone(&self) -> Self { match self { RefOwn::Ref(r) => RefOwn::Ref(r), RefOwn::Str(r) => RefOwn::Str(r), RefOwn::Own(v) => RefOwn::Own(v.clone()), } } } /// A tuple representing a key-value pair. pub type KVPair<'b, T> = (RefOwn<'b, ::Ref, ::Own>, RefOwn<'b, ::Ref, ::Own>); impl<'b, T, U> From<&'b T> for RefOwn<'b, T, U> { fn from(x: &'b T) -> RefOwn<'b, T, U> { RefOwn::Ref(x) } } // TODO investigate if this should be PatternTypes: Default /// Defines the types and operations used for matching. pub trait PatternTypes { /// The borrowed type. type Ref: ?Sized; // TODO replace with GATs. // TODO potentially relax with Clone? /// The owned type. type Own: Copy + 'static; /// Returns an iterator over key-value pairs contained within an item, or /// None if this operation is unsupported for the given value. fn pairs<'b>( item: RefOwn<'b, Self::Ref, Self::Own> ) -> Option> + 'b>>; /// Returns an optional key-value pair keyed by the given key, or None if /// this operation is unsupported for the given value. fn get<'a, 'b>( item: RefOwn<'b, Self::Ref, Self::Own>, key: RefOwn<'a, Self::Ref, Self::Own> ) -> Option>>; // TODO replace with GATs + newtypes /// Returns whether two keys/values are the same/equivalent. This must provide /// the same guarantees as PartialEq. In fact, this is a replacement for /// PartialEq for cases where it's not possible to just use PartialEq. fn matches( left: RefOwn<'_, Self::Ref, Self::Own>, right: RefOwn<'_, Self::Ref, Self::Own> ) -> bool; /// Returns the value as an &str. fn as_str<'b>( value: RefOwn<'b, Self::Ref, Self::Own> ) -> Option<&'b str>; } /// A predicate for keys and values. pub type Predicate = dyn (Fn(RefOwn<'_, ::Ref, ::Own>) -> bool) + Send + Sync;