summary refs log tree commit diff stats
diff options
context:
space:
mode:
authorSoniEx2 <endermoneymod@gmail.com>2021-02-17 23:30:06 -0300
committerSoniEx2 <endermoneymod@gmail.com>2021-02-17 23:30:06 -0300
commit6d6426743298a23a4e084684778b623b9e71882c (patch)
tree16ba2affdd57aaea7d72b3f649050f0e4f5db9ac
parent81a1d3ca72d9f28605bd22687ab20cb61a4ceb1b (diff)
Improve main documentation
-rw-r--r--Cargo.toml2
-rw-r--r--src/errors.rs28
-rw-r--r--src/lib.rs104
3 files changed, 118 insertions, 16 deletions
diff --git a/Cargo.toml b/Cargo.toml
index 0ad46cd..ea4342e 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -1,6 +1,6 @@
 [package]
 name = "datafu"
-version = "0.0.4"
+version = "0.0.5"
 authors = ["SoniEx2 <endermoneymod@gmail.com>"]
 license = "AGPL-3.0-or-later"
 description = "A Rust library for extracting data from config objects and other arbitrary object graphs."
diff --git a/src/errors.rs b/src/errors.rs
index e98483f..f29d635 100644
--- a/src/errors.rs
+++ b/src/errors.rs
@@ -16,37 +16,43 @@
  * along with this program.  If not, see <https://www.gnu.org/licenses/>.
  */
 
+//! <!-- TODO figure out module-level docs for this -->
+
+#[cfg(doc)]
+use crate::PatternTypes;
+
 /// These are errors that may be returned by the pattern compiler when
 /// compiling a pattern.
 ///
 /// "String" here refers to a string literal in the pattern, not the input
-/// string. The input string is referred to as "the input".
+/// string. The input string is referred to as "the pattern".
 #[derive(Debug)]
 pub enum PatternError<'a> {
     // Syntax Errors:
 
-    /// The input contains an invalid string escape.
+    /// The pattern contains an invalid string escape.
     StringEscape(usize, &'a str),
-    /// The input ends in the middle of a string literal.
+    /// The pattern ends in the middle of a string literal.
     StringEnd(usize, &'a str),
-    /// The input contains an invalid regex escape.
+    /// The pattern contains an invalid regex escape.
     RegexEscape(usize, &'a str),
-    /// The input ends in the middle of a regex literal.
+    /// The pattern ends in the middle of a regex literal.
     RegexEnd(usize, &'a str),
-    /// The input contains characters that don't make up a token.
+    /// The pattern contains characters that don't make up a token.
     Token(usize, &'a str),
 
     // Link Errors:
 
-    /// The input requests a parameter that wasn't provided.
+    /// The pattern requests a parameter that wasn't provided.
     UnknownParameter(usize, &'a str),
-    /// The input requests a predicate that wasn't provided.
+    /// The pattern requests a predicate that wasn't provided.
     UnknownPredicate(usize, &'a str),
-    /// The input contains an invalid regex.
+    /// The pattern contains an invalid regex.
     Regex(usize, &'a str, ::regex::Error),
 }
 
-/// Error type returned by the matcher.
+/// These are errors that may be returned by the matcher when matching a
+/// pattern.
 #[derive(Clone, Debug)]
 pub enum MatchError {
     /// Returned if the pattern nests too deeply.
@@ -55,7 +61,7 @@ pub enum MatchError {
     ValidationError,
     /// Returned if the pattern attempts an unsupported operation.
     ///
-    /// In particular, if the PatternTypes doesn't support `get` or `pairs`
+    /// In particular, if the [`PatternTypes`] doesn't support `get` or `pairs`
     /// for a given value, this error will be returned. It can be treated as a
     /// ValidationError, or as a bug in the pattern, at the user's discretion.
     UnsupportedOperation,
diff --git a/src/lib.rs b/src/lib.rs
index 099b268..005e9bf 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -18,9 +18,102 @@
 #![warn(rust_2018_idioms)]
 #![cfg_attr(not(feature = "stable"), feature(label_break_value))]
 
-//! Datafu is a more-or-less simple query language of sorts. It was primarily
-//! designed for dealing with object trees parsed from configuration files,
-//! but can also be used with JSON APIs and whatnot.
+//! Datafu is a regex-inspired query language. It was primarily
+//! designed for processing object trees parsed from configuration files, but
+//! can also be used with JSON APIs, and even XML.
+//!
+//! # Languge Reference
+//!
+//! Datafu expressions have the ability to iterate, index, validate and filter
+//! data structures, through the use of the syntax elements below.
+//!
+//! ## Syntax Elements of Datafu Expressions
+//!
+//! An arrow is `->` and indicates indexing/iteration. Whether indexing or
+//! iteration is used is defined by the elements that follow, with iteration
+//! being used by default.
+//!
+//! A variable is a sequence of alphanumeric characters, not starting with
+//! a digit. A `(key, value)` tuple containing the respective matched
+//! element will be identified by this name in the results map.
+//!
+//! A literal is a sequence of characters delimited by `'`, optionally
+//! followed by `?`, with `%` as the escape character, and defines a
+//! string-keyed indexing operation. A literal can contain any character,
+//! except unescaped `%` or `'` symbols, which must be escaped as
+//! `%%` and `%'`, respectively. The sequence of characters defined by
+//! a literal is used as the string object in the indexing operation.
+//!
+//! A parameter is `$`, optionally followed by `?`, followed by a
+//! sequence of alphanumeric characters, not starting with a digit, and
+//! defines an object-keyed indexing operation. The sequence of characters
+//! defined by a parameter is used to retrieve, from the pattern's
+//! definitions, the object to be used in the indexing operation.
+//!
+//! A regex is a sequence of characters delimited by `/`, optionally
+//! followed by `?`, with `%` as the escape character. A regex can
+//! contain any character, except unescaped `%` or `/` symbols, which
+//! must be escaped as `%%` and `%/`, respectively. The sequence of
+//! characters defined by a regex is passed to the `regex` crate, which
+//! may apply further restrictions on the characters used, and is used to
+//! accept the respective keys processed by the iterator.
+//!
+//! A predicate is `:`, optionally followed by `?`, followed by an
+//! `$` and a sequence of alphanumeric characters, not starting with a
+//! digit, and is used to accept values to be processed based on an
+//! external [`Predicate`].
+//!
+//! A key match is a datafu expression (including, but not limited to, the
+//! empty datafu expression) enclosed within `[` and `]`, optionally
+//! prefixed with one or more predicates, and applies the enclosed
+//! predicates and datafu expression to the key (or index) being processed.
+//! A key match enables additional validation of keys and/or extraction of
+//! values from keys, and accepts a key if and only if the enclosed
+//! predicates accept the key and the enclosed expression matches the key.
+//!
+//! A subvalue is a datafu expression (including, but not limited to, the
+//! empty datafu expression) enclosed within `(` and `)`, and applies
+//! the enclosed datafu expression to the value (or index) being processed.
+//! A subvalue enables the ability to match multiple values on the same
+//! object, and accepts a value if and only the enclosed expression
+//! matches the value. A subvalue can be made optional by the presence of
+//! a `?` after the subvalue - in case of no match, it will just omit
+//! the relevant keys in the result. Optional subvalues are unrelated to
+//! non-validating syntax elements (see below), they just use the same
+//! syntax.
+//!
+//! Some syntax elements can be validating or non-validating. Validating
+//! syntax elements will return a [`errors::MatchError::ValidationError`]
+//! whenever a non-accepted element is encountered, whereas non-validating
+//! ones will skip them. Whether an element is validating is determined by
+//! the absence of an optional `?` in the documented position. Note that
+//! it is possible for a validating syntax element to still yield results
+//! before returning a [`errors::MatchError::ValidationError`], so one
+//! needs to be careful when writing code where such behaviour could
+//! result in a security vulnerability.
+//!
+//! The empty pattern matches anything, but only does so once.
+//!
+//! ## Syntax of Datafu Expressions
+//!
+//! Datafu Expressions follow the given syntax, in (pseudo-)extended BNF:
+//!
+//! ```text
+//! expression ::= {arrow tag} {subvalue}
+//! tag ::= identifier [arg] {predicate} | arg {predicate}
+//! arg ::= parameter | literal | regex | keymatch
+//!
+//! arrow ::= '->'
+//! keymatch ::= '[' {predicate} expression ']'
+//! subvalue ::= '(' {predicate} expression ')' ['?']
+//! ```
+//!
+//! For a description of the terminals "parameter", "literal", "regex" and
+//! "predicate", see "Syntax Elements of Datafu Expressions" above.
+//!
+//! # Examples
+//!
+//! <!-- TODO -->
 
 extern crate regex;
 
@@ -36,6 +129,8 @@ pub use pattern::Pattern;
 
 // TODO replace with GATs
 /// A borrowed or owned value of various types.
+///
+/// This exists purely as a workaround for Rust not having GATs yet.
 #[derive(Debug)]
 pub enum RefOwn<'b, T: ?Sized, U> {
     /// Borrowed T.
@@ -80,6 +175,7 @@ impl<'b, T: ?Sized, U: Clone> Clone for RefOwn<'b, T, U> {
     }
 }
 
+/// A tuple representing a key-value pair.
 pub type KVPair<'b, T> = (RefOwn<'b, <T as PatternTypes>::Ref, <T as PatternTypes>::Own>, RefOwn<'b, <T as PatternTypes>::Ref, <T as PatternTypes>::Own>);
 
 impl<'b, T, U> From<&'b T> for RefOwn<'b, T, U> {
@@ -137,5 +233,5 @@ pub trait PatternTypes {
     ) -> Option<&'b str>;
 }
 
-// TODO
+/// A predicate for keys and values.
 pub type Predicate<T> = dyn (Fn(RefOwn<'_, <T as PatternTypes>::Ref, <T as PatternTypes>::Own>) -> bool) + Send + Sync;