diff options
-rw-r--r-- | Cargo.toml | 2 | ||||
-rw-r--r-- | src/errors.rs | 28 | ||||
-rw-r--r-- | src/lib.rs | 104 |
3 files changed, 118 insertions, 16 deletions
diff --git a/Cargo.toml b/Cargo.toml index 0ad46cd..ea4342e 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "datafu" -version = "0.0.4" +version = "0.0.5" authors = ["SoniEx2 <endermoneymod@gmail.com>"] license = "AGPL-3.0-or-later" description = "A Rust library for extracting data from config objects and other arbitrary object graphs." diff --git a/src/errors.rs b/src/errors.rs index e98483f..f29d635 100644 --- a/src/errors.rs +++ b/src/errors.rs @@ -16,37 +16,43 @@ * along with this program. If not, see <https://www.gnu.org/licenses/>. */ +//! <!-- TODO figure out module-level docs for this --> + +#[cfg(doc)] +use crate::PatternTypes; + /// These are errors that may be returned by the pattern compiler when /// compiling a pattern. /// /// "String" here refers to a string literal in the pattern, not the input -/// string. The input string is referred to as "the input". +/// string. The input string is referred to as "the pattern". #[derive(Debug)] pub enum PatternError<'a> { // Syntax Errors: - /// The input contains an invalid string escape. + /// The pattern contains an invalid string escape. StringEscape(usize, &'a str), - /// The input ends in the middle of a string literal. + /// The pattern ends in the middle of a string literal. StringEnd(usize, &'a str), - /// The input contains an invalid regex escape. + /// The pattern contains an invalid regex escape. RegexEscape(usize, &'a str), - /// The input ends in the middle of a regex literal. + /// The pattern ends in the middle of a regex literal. RegexEnd(usize, &'a str), - /// The input contains characters that don't make up a token. + /// The pattern contains characters that don't make up a token. Token(usize, &'a str), // Link Errors: - /// The input requests a parameter that wasn't provided. + /// The pattern requests a parameter that wasn't provided. UnknownParameter(usize, &'a str), - /// The input requests a predicate that wasn't provided. + /// The pattern requests a predicate that wasn't provided. UnknownPredicate(usize, &'a str), - /// The input contains an invalid regex. + /// The pattern contains an invalid regex. Regex(usize, &'a str, ::regex::Error), } -/// Error type returned by the matcher. +/// These are errors that may be returned by the matcher when matching a +/// pattern. #[derive(Clone, Debug)] pub enum MatchError { /// Returned if the pattern nests too deeply. @@ -55,7 +61,7 @@ pub enum MatchError { ValidationError, /// Returned if the pattern attempts an unsupported operation. /// - /// In particular, if the PatternTypes doesn't support `get` or `pairs` + /// In particular, if the [`PatternTypes`] doesn't support `get` or `pairs` /// for a given value, this error will be returned. It can be treated as a /// ValidationError, or as a bug in the pattern, at the user's discretion. UnsupportedOperation, diff --git a/src/lib.rs b/src/lib.rs index 099b268..005e9bf 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -18,9 +18,102 @@ #![warn(rust_2018_idioms)] #![cfg_attr(not(feature = "stable"), feature(label_break_value))] -//! Datafu is a more-or-less simple query language of sorts. It was primarily -//! designed for dealing with object trees parsed from configuration files, -//! but can also be used with JSON APIs and whatnot. +//! Datafu is a regex-inspired query language. It was primarily +//! designed for processing object trees parsed from configuration files, but +//! can also be used with JSON APIs, and even XML. +//! +//! # Languge Reference +//! +//! Datafu expressions have the ability to iterate, index, validate and filter +//! data structures, through the use of the syntax elements below. +//! +//! ## Syntax Elements of Datafu Expressions +//! +//! An arrow is `->` and indicates indexing/iteration. Whether indexing or +//! iteration is used is defined by the elements that follow, with iteration +//! being used by default. +//! +//! A variable is a sequence of alphanumeric characters, not starting with +//! a digit. A `(key, value)` tuple containing the respective matched +//! element will be identified by this name in the results map. +//! +//! A literal is a sequence of characters delimited by `'`, optionally +//! followed by `?`, with `%` as the escape character, and defines a +//! string-keyed indexing operation. A literal can contain any character, +//! except unescaped `%` or `'` symbols, which must be escaped as +//! `%%` and `%'`, respectively. The sequence of characters defined by +//! a literal is used as the string object in the indexing operation. +//! +//! A parameter is `$`, optionally followed by `?`, followed by a +//! sequence of alphanumeric characters, not starting with a digit, and +//! defines an object-keyed indexing operation. The sequence of characters +//! defined by a parameter is used to retrieve, from the pattern's +//! definitions, the object to be used in the indexing operation. +//! +//! A regex is a sequence of characters delimited by `/`, optionally +//! followed by `?`, with `%` as the escape character. A regex can +//! contain any character, except unescaped `%` or `/` symbols, which +//! must be escaped as `%%` and `%/`, respectively. The sequence of +//! characters defined by a regex is passed to the `regex` crate, which +//! may apply further restrictions on the characters used, and is used to +//! accept the respective keys processed by the iterator. +//! +//! A predicate is `:`, optionally followed by `?`, followed by an +//! `$` and a sequence of alphanumeric characters, not starting with a +//! digit, and is used to accept values to be processed based on an +//! external [`Predicate`]. +//! +//! A key match is a datafu expression (including, but not limited to, the +//! empty datafu expression) enclosed within `[` and `]`, optionally +//! prefixed with one or more predicates, and applies the enclosed +//! predicates and datafu expression to the key (or index) being processed. +//! A key match enables additional validation of keys and/or extraction of +//! values from keys, and accepts a key if and only if the enclosed +//! predicates accept the key and the enclosed expression matches the key. +//! +//! A subvalue is a datafu expression (including, but not limited to, the +//! empty datafu expression) enclosed within `(` and `)`, and applies +//! the enclosed datafu expression to the value (or index) being processed. +//! A subvalue enables the ability to match multiple values on the same +//! object, and accepts a value if and only the enclosed expression +//! matches the value. A subvalue can be made optional by the presence of +//! a `?` after the subvalue - in case of no match, it will just omit +//! the relevant keys in the result. Optional subvalues are unrelated to +//! non-validating syntax elements (see below), they just use the same +//! syntax. +//! +//! Some syntax elements can be validating or non-validating. Validating +//! syntax elements will return a [`errors::MatchError::ValidationError`] +//! whenever a non-accepted element is encountered, whereas non-validating +//! ones will skip them. Whether an element is validating is determined by +//! the absence of an optional `?` in the documented position. Note that +//! it is possible for a validating syntax element to still yield results +//! before returning a [`errors::MatchError::ValidationError`], so one +//! needs to be careful when writing code where such behaviour could +//! result in a security vulnerability. +//! +//! The empty pattern matches anything, but only does so once. +//! +//! ## Syntax of Datafu Expressions +//! +//! Datafu Expressions follow the given syntax, in (pseudo-)extended BNF: +//! +//! ```text +//! expression ::= {arrow tag} {subvalue} +//! tag ::= identifier [arg] {predicate} | arg {predicate} +//! arg ::= parameter | literal | regex | keymatch +//! +//! arrow ::= '->' +//! keymatch ::= '[' {predicate} expression ']' +//! subvalue ::= '(' {predicate} expression ')' ['?'] +//! ``` +//! +//! For a description of the terminals "parameter", "literal", "regex" and +//! "predicate", see "Syntax Elements of Datafu Expressions" above. +//! +//! # Examples +//! +//! <!-- TODO --> extern crate regex; @@ -36,6 +129,8 @@ pub use pattern::Pattern; // TODO replace with GATs /// A borrowed or owned value of various types. +/// +/// This exists purely as a workaround for Rust not having GATs yet. #[derive(Debug)] pub enum RefOwn<'b, T: ?Sized, U> { /// Borrowed T. @@ -80,6 +175,7 @@ impl<'b, T: ?Sized, U: Clone> Clone for RefOwn<'b, T, U> { } } +/// A tuple representing a key-value pair. pub type KVPair<'b, T> = (RefOwn<'b, <T as PatternTypes>::Ref, <T as PatternTypes>::Own>, RefOwn<'b, <T as PatternTypes>::Ref, <T as PatternTypes>::Own>); impl<'b, T, U> From<&'b T> for RefOwn<'b, T, U> { @@ -137,5 +233,5 @@ pub trait PatternTypes { ) -> Option<&'b str>; } -// TODO +/// A predicate for keys and values. pub type Predicate<T> = dyn (Fn(RefOwn<'_, <T as PatternTypes>::Ref, <T as PatternTypes>::Own>) -> bool) + Send + Sync; |