/*
* Datafu - Rust library for extracting data from object graphs.
* Copyright (C) 2021 Soni L.
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <https://www.gnu.org/licenses/>.
*/
#![warn(rust_2018_idioms)]
#![cfg_attr(not(feature = "stable"), feature(label_break_value))]
//! Datafu is a regex-inspired query language. It was primarily
//! designed for processing object trees parsed from configuration files, but
//! can also be used with JSON APIs, and even XML.
//!
//! # Languge Reference
//!
//! Datafu expressions have the ability to iterate, index, validate and filter
//! data structures, through the use of the syntax elements below.
//!
//! ## Syntax Elements of Datafu Expressions
//!
//! An arrow is `->` and indicates indexing/iteration. Whether indexing or
//! iteration is used is defined by the elements that follow, with iteration
//! being used by default.
//!
//! A variable is a sequence of alphanumeric characters, not starting with
//! a digit. A `(key, value)` tuple containing the respective matched
//! element will be identified by this name in the results map.
//!
//! A literal is a sequence of characters delimited by `'`, optionally
//! followed by `?`, with `%` as the escape character, and defines a
//! string-keyed indexing operation. A literal can contain any character,
//! except unescaped `%` or `'` symbols, which must be escaped as
//! `%%` and `%'`, respectively. The sequence of characters defined by
//! a literal is used as the string object in the indexing operation.
//!
//! A parameter is `$`, optionally followed by `?`, followed by a
//! sequence of alphanumeric characters, not starting with a digit, and
//! defines an object-keyed indexing operation. The sequence of characters
//! defined by a parameter is used to retrieve, from the pattern's
//! definitions, the object to be used in the indexing operation.
//!
//! A regex is a sequence of characters delimited by `/`, optionally
//! followed by `?`, with `%` as the escape character. A regex can
//! contain any character, except unescaped `%` or `/` symbols, which
//! must be escaped as `%%` and `%/`, respectively. The sequence of
//! characters defined by a regex is passed to the `regex` crate, which
//! may apply further restrictions on the characters used, and is used to
//! accept the respective keys processed by the iterator.
//!
//! A predicate is `:`, optionally followed by `?`, followed by an
//! `$` and a sequence of alphanumeric characters, not starting with a
//! digit, and is used to accept values to be processed based on an
//! external [`Predicate`].
//!
//! A key match is a datafu expression (including, but not limited to, the
//! empty datafu expression) enclosed within `[` and `]`, optionally
//! prefixed with one or more predicates, and applies the enclosed
//! predicates and datafu expression to the key (or index) being processed.
//! A key match enables additional validation of keys and/or extraction of
//! values from keys, and accepts a key if and only if the enclosed
//! predicates accept the key and the enclosed expression matches the key.
//!
//! A subvalue is a datafu expression (including, but not limited to, the
//! empty datafu expression) enclosed within `(` and `)`, and applies
//! the enclosed datafu expression to the value (or index) being processed.
//! A subvalue enables the ability to match multiple values on the same
//! object, and accepts a value if and only the enclosed expression
//! matches the value. A subvalue can be made optional by the presence of
//! a `?` after the subvalue - in case of no match, it will just omit
//! the relevant keys in the result. Optional subvalues are unrelated to
//! non-validating syntax elements (see below), they just use the same
//! syntax.
//!
//! Some syntax elements can be validating or non-validating. Validating
//! syntax elements will return a [`errors::MatchError::ValidationError`]
//! whenever a non-accepted element is encountered, whereas non-validating
//! ones will skip them. Whether an element is validating is determined by
//! the absence of an optional `?` in the documented position. Note that
//! it is possible for a validating syntax element to still yield results
//! before returning a [`errors::MatchError::ValidationError`], so one
//! needs to be careful when writing code where such behaviour could
//! result in a security vulnerability.
//!
//! The empty pattern matches anything, but only does so once.
//!
//! ## Syntax of Datafu Expressions
//!
//! Datafu Expressions follow the given syntax, in (pseudo-)extended BNF:
//!
//! ```text
//! expression ::= {arrow tag} {subvalue}
//! tag ::= identifier [arg] {predicate} | arg {predicate}
//! arg ::= parameter | literal | regex | keymatch
//!
//! arrow ::= '->'
//! keymatch ::= '[' {predicate} expression ']'
//! subvalue ::= '(' {predicate} expression ')' ['?']
//! ```
//!
//! For a description of the terminals "parameter", "literal", "regex" and
//! "predicate", see "Syntax Elements of Datafu Expressions" above.
//!
//! # Examples
//!
//! <!-- TODO -->
extern crate regex;
#[cfg(test)]
extern crate proptest;
pub mod errors;
mod parser;
mod pattern;
mod vm;
pub use pattern::Pattern;
// TODO replace with GATs
/// A borrowed or owned value of various types.
///
/// This exists purely as a workaround for Rust not having GATs yet.
#[derive(Debug)]
pub enum RefOwn<'b, T: ?Sized, U> {
/// Borrowed T.
Ref(&'b T),
/// Borrowed string.
Str(&'b str),
/// Owned U.
Own(U),
}
impl<'b, T, U> PartialEq for RefOwn<'b, T, U>
where
T: ?Sized + PartialEq<T> + PartialEq<U> + PartialEq<str>,
U: PartialEq<T> + PartialEq<U> + PartialEq<str>,
str: PartialEq<T> + PartialEq<U> + PartialEq<str>
{
fn eq(&self, other: &Self) -> bool {
match (self, other) {
(RefOwn::Ref(l), RefOwn::Ref(r)) => l.eq(r),
(RefOwn::Own(l), RefOwn::Own(r)) => l.eq(r),
(RefOwn::Str(l), RefOwn::Str(r)) => l.eq(r),
(RefOwn::Ref(l), RefOwn::Own(r)) => PartialEq::eq(*l, r),
(RefOwn::Own(l), RefOwn::Str(r)) => PartialEq::eq(l, *r),
(RefOwn::Str(l), RefOwn::Ref(r)) => l.eq(r),
(RefOwn::Ref(l), RefOwn::Str(r)) => l.eq(r),
(RefOwn::Own(l), RefOwn::Ref(r)) => PartialEq::eq(l, *r),
(RefOwn::Str(l), RefOwn::Own(r)) => PartialEq::eq(*l, r),
}
}
}
impl<'b, T: ?Sized, U: Copy> Copy for RefOwn<'b, T, U> {
}
impl<'b, T: ?Sized, U: Clone> Clone for RefOwn<'b, T, U> {
fn clone(&self) -> Self {
match self {
RefOwn::Ref(r) => RefOwn::Ref(r),
RefOwn::Str(r) => RefOwn::Str(r),
RefOwn::Own(v) => RefOwn::Own(v.clone()),
}
}
}
/// A tuple representing a key-value pair.
pub type KVPair<'b, T> = (RefOwn<'b, <T as PatternTypes>::Ref, <T as PatternTypes>::Own>, RefOwn<'b, <T as PatternTypes>::Ref, <T as PatternTypes>::Own>);
impl<'b, T, U> From<&'b T> for RefOwn<'b, T, U> {
fn from(x: &'b T) -> RefOwn<'b, T, U> {
RefOwn::Ref(x)
}
}
// TODO investigate if this should be PatternTypes: Default
/// Defines the types and operations used for matching.
pub trait PatternTypes {
/// The borrowed type.
type Ref: ?Sized;
// TODO replace with GATs.
// TODO potentially relax with Clone?
/// The owned type.
type Own: Copy + 'static;
/// Returns an iterator over key-value pairs contained within an item, or
/// None if this operation is unsupported for the given value.
fn pairs<'b>(
item: RefOwn<'b, Self::Ref, Self::Own>
) -> Option<Box<dyn Iterator<Item=KVPair<'b, Self>> + 'b>> {
// TODO remove these default impls that only exist for testing purposes
let _ = item;
let x = None;
Some(Box::new(x.into_iter()))
}
/// Returns an optional key-value pair keyed by the given key, or None if
/// this operation is unsupported for the given value.
fn get<'a, 'b>(
item: RefOwn<'b, Self::Ref, Self::Own>,
key: RefOwn<'a, Self::Ref, Self::Own>
) -> Option<Option<KVPair<'b, Self>>> {
// TODO remove these default impls that only exist for testing purposes
let _ = item;
let _ = key;
Some(None)
}
// TODO replace with GATs + newtypes
/// Returns whether two keys/values are the same/equivalent. This must provide
/// the same guarantees as PartialEq. In fact, this is a replacement for
/// PartialEq for cases where it's not possible to just use PartialEq.
fn matches(
left: RefOwn<'_, Self::Ref, Self::Own>,
right: RefOwn<'_, Self::Ref, Self::Own>
) -> bool;
/// Returns the value as an &str.
fn as_str<'b>(
value: RefOwn<'b, Self::Ref, Self::Own>
) -> Option<&'b str>;
}
/// A predicate for keys and values.
pub type Predicate<T> = dyn (Fn(RefOwn<'_, <T as PatternTypes>::Ref, <T as PatternTypes>::Own>) -> bool) + Send + Sync;