From a8778ff35bde88bb63d9fec769edf66e68d7969e Mon Sep 17 00:00:00 2001 From: SoniEx2 Date: Sat, 30 Jul 2022 11:22:23 -0300 Subject: Initial work on 0.1.0 Missing the VM. --- src/lib.rs | 151 +++++++++---------------------------------------------------- 1 file changed, 21 insertions(+), 130 deletions(-) (limited to 'src/lib.rs') diff --git a/src/lib.rs b/src/lib.rs index 3fc542f..8fa727f 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,26 +1,9 @@ -/* - * Datafu - Rust library for extracting data from object graphs. - * Copyright (C) 2021 Soni L. - * - * This program is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Affero General Public License for more details. - * - * You should have received a copy of the GNU Affero General Public License - * along with this program. If not, see . - */ -#![warn(rust_2018_idioms)] -#![cfg_attr(not(feature = "stable"), feature(label_break_value))] +// Copyright (C) 2021-2022 Soni L. +// SPDX-License-Identifier: MIT OR Apache-2.0 //! Datafu is a regex-inspired query language. It was primarily //! designed for processing object trees parsed from configuration files, but -//! can also be used with JSON APIs, and even XML. +//! can be used with anything that supports serde. //! //! # Languge Reference //! @@ -34,8 +17,7 @@ //! being used by default. //! //! A variable is a sequence of alphanumeric characters, not starting with -//! a digit. A `(key, value)` tuple containing the respective matched -//! element will be identified by this name in the results map. +//! a digit. The value of the matched element will be identified by this name. //! //! A literal is a sequence of characters delimited by `'`, optionally //! followed by `?`, with `%` as the escape character, and defines a @@ -65,11 +47,12 @@ //! //! A key match is a datafu expression (including, but not limited to, the //! empty datafu expression) enclosed within `[` and `]`, optionally -//! prefixed with one or more predicates, and applies the enclosed -//! predicates and datafu expression to the key (or index) being processed. -//! A key match enables additional validation of keys and/or extraction of -//! values from keys, and accepts a key if and only if the enclosed -//! predicates accept the key and the enclosed expression matches the key. +//! prefixed with an identifier and zero or more predicates, and applies the +//! enclosed predicates and datafu expression to the key (or index) being +//! processed. A key match enables additional validation of keys and/or +//! extraction of values from keys, and accepts a key if and only if the +//! enclosed predicates accept the key and the enclosed expression matches the +//! key. The matched key is stored in the identifier. //! //! A subvalue is a datafu expression (including, but not limited to, the //! empty datafu expression) enclosed within `(` and `)`, and applies @@ -104,7 +87,7 @@ //! arg ::= parameter | literal | regex | keymatch //! //! arrow ::= '->' -//! keymatch ::= '[' {predicate} expression ']' +//! keymatch ::= '[' {tag} {predicate} expression ']' //! subvalue ::= '(' {predicate} expression ')' ['?'] //! ``` //! @@ -115,12 +98,6 @@ //! //! -extern crate impl_trait; -extern crate regex; - -#[cfg(test)] -extern crate proptest; - pub mod errors; mod parser; mod pattern; @@ -128,103 +105,17 @@ mod vm; pub use pattern::Pattern; -pub use vm::Matcher; +/// A predicate. +pub type Predicate = dyn (for<'x, 'de, 'a> Fn( + &'x (dyn 'a + erased_serde::Deserializer<'de>) +) -> bool) + Send + Sync; -// TODO replace with GATs -/// A borrowed or owned value of various types. -/// -/// This exists purely as a workaround for Rust not having GATs yet. -#[derive(Debug)] -pub enum RefOwn<'b, T: ?Sized, U> { - /// Borrowed T. - Ref(&'b T), - /// Borrowed string. - Str(&'b str), - /// Owned U. - Own(U), -} - -impl<'b, T, U> PartialEq for RefOwn<'b, T, U> +/// Helper to build predicates because HRTB inference is the worst. +pub fn pred(f: F) -> Box where - T: ?Sized + PartialEq + PartialEq + PartialEq, - U: PartialEq + PartialEq + PartialEq, - str: PartialEq + PartialEq + PartialEq + F: (for<'x, 'de, 'a> Fn( + &'x (dyn 'a + erased_serde::Deserializer<'de>) + ) -> bool) + Send + Sync + 'static, { - fn eq(&self, other: &Self) -> bool { - match (self, other) { - (RefOwn::Ref(l), RefOwn::Ref(r)) => l.eq(r), - (RefOwn::Own(l), RefOwn::Own(r)) => l.eq(r), - (RefOwn::Str(l), RefOwn::Str(r)) => l.eq(r), - (RefOwn::Ref(l), RefOwn::Own(r)) => PartialEq::eq(*l, r), - (RefOwn::Own(l), RefOwn::Str(r)) => PartialEq::eq(l, *r), - (RefOwn::Str(l), RefOwn::Ref(r)) => l.eq(r), - (RefOwn::Ref(l), RefOwn::Str(r)) => l.eq(r), - (RefOwn::Own(l), RefOwn::Ref(r)) => PartialEq::eq(l, *r), - (RefOwn::Str(l), RefOwn::Own(r)) => PartialEq::eq(*l, r), - } - } -} - -impl<'b, T: ?Sized, U: Copy> Copy for RefOwn<'b, T, U> { -} - -impl<'b, T: ?Sized, U: Clone> Clone for RefOwn<'b, T, U> { - fn clone(&self) -> Self { - match self { - RefOwn::Ref(r) => RefOwn::Ref(r), - RefOwn::Str(r) => RefOwn::Str(r), - RefOwn::Own(v) => RefOwn::Own(v.clone()), - } - } + Box::new(f) } - -/// A tuple representing a key-value pair. -pub type KVPair<'b, T> = (RefOwn<'b, ::Ref, ::Own>, RefOwn<'b, ::Ref, ::Own>); - -impl<'b, T, U> From<&'b T> for RefOwn<'b, T, U> { - fn from(x: &'b T) -> RefOwn<'b, T, U> { - RefOwn::Ref(x) - } -} - -// TODO investigate if this should be PatternTypes: Default -/// Defines the types and operations used for matching. -pub trait PatternTypes { - /// The borrowed type. - type Ref: ?Sized; - - // TODO replace with GATs. - // TODO potentially relax with Clone? - /// The owned type. - type Own: Copy + 'static; - - /// Returns an iterator over key-value pairs contained within an item, or - /// None if this operation is unsupported for the given value. - fn pairs<'b>( - item: RefOwn<'b, Self::Ref, Self::Own> - ) -> Option> + 'b>>; - - /// Returns an optional key-value pair keyed by the given key, or None if - /// this operation is unsupported for the given value. - fn get<'a, 'b>( - item: RefOwn<'b, Self::Ref, Self::Own>, - key: RefOwn<'a, Self::Ref, Self::Own> - ) -> Option>>; - - // TODO replace with GATs + newtypes - /// Returns whether two keys/values are the same/equivalent. This must provide - /// the same guarantees as PartialEq. In fact, this is a replacement for - /// PartialEq for cases where it's not possible to just use PartialEq. - fn matches( - left: RefOwn<'_, Self::Ref, Self::Own>, - right: RefOwn<'_, Self::Ref, Self::Own> - ) -> bool; - - /// Returns the value as an &str. - fn as_str<'b>( - value: RefOwn<'b, Self::Ref, Self::Own> - ) -> Option<&'b str>; -} - -/// A predicate for keys and values. -pub type Predicate = dyn (Fn(RefOwn<'_, ::Ref, ::Own>) -> bool) + Send + Sync; -- cgit 1.4.1