diff options
Diffstat (limited to 'src')
-rw-r--r-- | src/errors.rs | 57 | ||||
-rw-r--r-- | src/lib.rs | 151 | ||||
-rw-r--r-- | src/parser.rs | 182 | ||||
-rw-r--r-- | src/pattern.rs | 59 | ||||
-rw-r--r-- | src/vm.rs | 1248 |
5 files changed, 770 insertions, 927 deletions
diff --git a/src/errors.rs b/src/errors.rs index f29d635..b41b225 100644 --- a/src/errors.rs +++ b/src/errors.rs @@ -1,25 +1,10 @@ -/* - * This file is part of Datafu - * Copyright (C) 2021 Soni L. - * - * This program is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Affero General Public License for more details. - * - * You should have received a copy of the GNU Affero General Public License - * along with this program. If not, see <https://www.gnu.org/licenses/>. - */ +// Copyright (C) 2021-2022 Soni L. +// SPDX-License-Identifier: MIT OR Apache-2.0 //! <!-- TODO figure out module-level docs for this --> -#[cfg(doc)] -use crate::PatternTypes; +// #[cfg(doc)] +// use crate::PatternTypes; /// These are errors that may be returned by the pattern compiler when /// compiling a pattern. @@ -51,20 +36,20 @@ pub enum PatternError<'a> { Regex(usize, &'a str, ::regex::Error), } -/// These are errors that may be returned by the matcher when matching a -/// pattern. -#[derive(Clone, Debug)] -pub enum MatchError { - /// Returned if the pattern nests too deeply. - StackOverflow, - /// Returned if the pattern rejects the input. - ValidationError, - /// Returned if the pattern attempts an unsupported operation. - /// - /// In particular, if the [`PatternTypes`] doesn't support `get` or `pairs` - /// for a given value, this error will be returned. It can be treated as a - /// ValidationError, or as a bug in the pattern, at the user's discretion. - UnsupportedOperation, - /// Returned if an unspecified non-critical error occurred. - Other -} +// /// These are errors that may be returned by the matcher when matching a +// /// pattern. +// #[derive(Clone, Debug)] +// pub enum MatchError { +// /// Returned if the pattern nests too deeply. +// StackOverflow, +// /// Returned if the pattern rejects the input. +// ValidationError, +// /// Returned if the pattern attempts an unsupported operation. +// /// +// /// In particular, if the [`PatternTypes`] doesn't support `get` or `pairs` +// /// for a given value, this error will be returned. It can be treated as a +// /// ValidationError, or as a bug in the pattern, at the user's discretion. +// UnsupportedOperation, +// /// Returned if an unspecified non-critical error occurred. +// Other +// } diff --git a/src/lib.rs b/src/lib.rs index 3fc542f..8fa727f 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,26 +1,9 @@ -/* - * Datafu - Rust library for extracting data from object graphs. - * Copyright (C) 2021 Soni L. - * - * This program is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Affero General Public License for more details. - * - * You should have received a copy of the GNU Affero General Public License - * along with this program. If not, see <https://www.gnu.org/licenses/>. - */ -#![warn(rust_2018_idioms)] -#![cfg_attr(not(feature = "stable"), feature(label_break_value))] +// Copyright (C) 2021-2022 Soni L. +// SPDX-License-Identifier: MIT OR Apache-2.0 //! Datafu is a regex-inspired query language. It was primarily //! designed for processing object trees parsed from configuration files, but -//! can also be used with JSON APIs, and even XML. +//! can be used with anything that supports serde. //! //! # Languge Reference //! @@ -34,8 +17,7 @@ //! being used by default. //! //! A variable is a sequence of alphanumeric characters, not starting with -//! a digit. A `(key, value)` tuple containing the respective matched -//! element will be identified by this name in the results map. +//! a digit. The value of the matched element will be identified by this name. //! //! A literal is a sequence of characters delimited by `'`, optionally //! followed by `?`, with `%` as the escape character, and defines a @@ -65,11 +47,12 @@ //! //! A key match is a datafu expression (including, but not limited to, the //! empty datafu expression) enclosed within `[` and `]`, optionally -//! prefixed with one or more predicates, and applies the enclosed -//! predicates and datafu expression to the key (or index) being processed. -//! A key match enables additional validation of keys and/or extraction of -//! values from keys, and accepts a key if and only if the enclosed -//! predicates accept the key and the enclosed expression matches the key. +//! prefixed with an identifier and zero or more predicates, and applies the +//! enclosed predicates and datafu expression to the key (or index) being +//! processed. A key match enables additional validation of keys and/or +//! extraction of values from keys, and accepts a key if and only if the +//! enclosed predicates accept the key and the enclosed expression matches the +//! key. The matched key is stored in the identifier. //! //! A subvalue is a datafu expression (including, but not limited to, the //! empty datafu expression) enclosed within `(` and `)`, and applies @@ -104,7 +87,7 @@ //! arg ::= parameter | literal | regex | keymatch //! //! arrow ::= '->' -//! keymatch ::= '[' {predicate} expression ']' +//! keymatch ::= '[' {tag} {predicate} expression ']' //! subvalue ::= '(' {predicate} expression ')' ['?'] //! ``` //! @@ -115,12 +98,6 @@ //! //! <!-- TODO --> -extern crate impl_trait; -extern crate regex; - -#[cfg(test)] -extern crate proptest; - pub mod errors; mod parser; mod pattern; @@ -128,103 +105,17 @@ mod vm; pub use pattern::Pattern; -pub use vm::Matcher; +/// A predicate. +pub type Predicate = dyn (for<'x, 'de, 'a> Fn( + &'x (dyn 'a + erased_serde::Deserializer<'de>) +) -> bool) + Send + Sync; -// TODO replace with GATs -/// A borrowed or owned value of various types. -/// -/// This exists purely as a workaround for Rust not having GATs yet. -#[derive(Debug)] -pub enum RefOwn<'b, T: ?Sized, U> { - /// Borrowed T. - Ref(&'b T), - /// Borrowed string. - Str(&'b str), - /// Owned U. - Own(U), -} - -impl<'b, T, U> PartialEq for RefOwn<'b, T, U> +/// Helper to build predicates because HRTB inference is the worst. +pub fn pred<F>(f: F) -> Box<Predicate> where - T: ?Sized + PartialEq<T> + PartialEq<U> + PartialEq<str>, - U: PartialEq<T> + PartialEq<U> + PartialEq<str>, - str: PartialEq<T> + PartialEq<U> + PartialEq<str> + F: (for<'x, 'de, 'a> Fn( + &'x (dyn 'a + erased_serde::Deserializer<'de>) + ) -> bool) + Send + Sync + 'static, { - fn eq(&self, other: &Self) -> bool { - match (self, other) { - (RefOwn::Ref(l), RefOwn::Ref(r)) => l.eq(r), - (RefOwn::Own(l), RefOwn::Own(r)) => l.eq(r), - (RefOwn::Str(l), RefOwn::Str(r)) => l.eq(r), - (RefOwn::Ref(l), RefOwn::Own(r)) => PartialEq::eq(*l, r), - (RefOwn::Own(l), RefOwn::Str(r)) => PartialEq::eq(l, *r), - (RefOwn::Str(l), RefOwn::Ref(r)) => l.eq(r), - (RefOwn::Ref(l), RefOwn::Str(r)) => l.eq(r), - (RefOwn::Own(l), RefOwn::Ref(r)) => PartialEq::eq(l, *r), - (RefOwn::Str(l), RefOwn::Own(r)) => PartialEq::eq(*l, r), - } - } -} - -impl<'b, T: ?Sized, U: Copy> Copy for RefOwn<'b, T, U> { -} - -impl<'b, T: ?Sized, U: Clone> Clone for RefOwn<'b, T, U> { - fn clone(&self) -> Self { - match self { - RefOwn::Ref(r) => RefOwn::Ref(r), - RefOwn::Str(r) => RefOwn::Str(r), - RefOwn::Own(v) => RefOwn::Own(v.clone()), - } - } + Box::new(f) } - -/// A tuple representing a key-value pair. -pub type KVPair<'b, T> = (RefOwn<'b, <T as PatternTypes>::Ref, <T as PatternTypes>::Own>, RefOwn<'b, <T as PatternTypes>::Ref, <T as PatternTypes>::Own>); - -impl<'b, T, U> From<&'b T> for RefOwn<'b, T, U> { - fn from(x: &'b T) -> RefOwn<'b, T, U> { - RefOwn::Ref(x) - } -} - -// TODO investigate if this should be PatternTypes: Default -/// Defines the types and operations used for matching. -pub trait PatternTypes { - /// The borrowed type. - type Ref: ?Sized; - - // TODO replace with GATs. - // TODO potentially relax with Clone? - /// The owned type. - type Own: Copy + 'static; - - /// Returns an iterator over key-value pairs contained within an item, or - /// None if this operation is unsupported for the given value. - fn pairs<'b>( - item: RefOwn<'b, Self::Ref, Self::Own> - ) -> Option<Box<dyn Iterator<Item=KVPair<'b, Self>> + 'b>>; - - /// Returns an optional key-value pair keyed by the given key, or None if - /// this operation is unsupported for the given value. - fn get<'a, 'b>( - item: RefOwn<'b, Self::Ref, Self::Own>, - key: RefOwn<'a, Self::Ref, Self::Own> - ) -> Option<Option<KVPair<'b, Self>>>; - - // TODO replace with GATs + newtypes - /// Returns whether two keys/values are the same/equivalent. This must provide - /// the same guarantees as PartialEq. In fact, this is a replacement for - /// PartialEq for cases where it's not possible to just use PartialEq. - fn matches( - left: RefOwn<'_, Self::Ref, Self::Own>, - right: RefOwn<'_, Self::Ref, Self::Own> - ) -> bool; - - /// Returns the value as an &str. - fn as_str<'b>( - value: RefOwn<'b, Self::Ref, Self::Own> - ) -> Option<&'b str>; -} - -/// A predicate for keys and values. -pub type Predicate<T> = dyn (Fn(RefOwn<'_, <T as PatternTypes>::Ref, <T as PatternTypes>::Own>) -> bool) + Send + Sync; diff --git a/src/parser.rs b/src/parser.rs index ff3407a..c929653 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -1,20 +1,7 @@ -/* - * This file is part of Datafu - * Copyright (C) 2021 Soni L. - * - * This program is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Affero General Public License for more details. - * - * You should have received a copy of the GNU Affero General Public License - * along with this program. If not, see <https://www.gnu.org/licenses/>. - */ +// Copyright (C) 2021-2022 Soni L. +// SPDX-License-Identifier: MIT OR Apache-2.0 + +//! The recursive-descent datafu language parser. use std::borrow::Borrow; use std::collections::BTreeMap; @@ -22,14 +9,13 @@ use std::mem::ManuallyDrop; use impl_trait::impl_trait; use regex::Regex; +use serde::Serialize; -use crate::PatternTypes; use crate::Predicate; use crate::errors::PatternError; use crate::vm::PatternConstants; use crate::vm::PatternElement; - /// try! with bools. (the b comes from bool.) macro_rules! bry { ($l:lifetime $e:expr) => { @@ -47,6 +33,7 @@ macro_rules! bry { // the following macros rely on unlabeled-break-through-labeled-block being an // error. // NOTE: always test changes to this module on nightly! +// still waiting for label-break-value stabilization... #[cfg(not(feature = "stable"))] /// labeled block. on nightly: better compile errors. but also works on stable. @@ -67,23 +54,45 @@ macro_rules! lblock { } } +/// Attempts to shift `s` forward by removing `prefix`. +/// +/// Returns whether `s` has had `prefix` removed. // can't use Pattern here :( fn strip_prefix(s: &mut &str, prefix: &str) -> bool { s.strip_prefix(prefix).map(|ns| *s = ns).is_some() } +/// Returns the position (index) of `sub` within `base`, in bytes. +/// +/// Returns bogus results if `base` and `sub` are unrelated. fn pos_of<'a>(base: &'a str, sub: &'a str) -> Option<usize> { - // FIXME + // FIXME is there any non-UB way to check if `sub` is in `base`? Some((sub.as_ptr() as usize) - (base.as_ptr() as usize)) } -struct SubtreeHelper<'r, 's, P: Borrow<str> + Ord, O: Borrow<str> + Ord, T: PatternTypes> where Self: 'r { - root: &'r mut Parser<'s, P, O, T>, +/// Helper to collect "subtree" sections of the pattern. +/// +/// This is a RAII-like guard which handles cleaning up the parsed pattern when +/// dropped. +struct SubtreeHelper<'r, 's, PKey, OKey, O> +where + Self: 'r, + PKey: Borrow<str> + Ord, + OKey: Borrow<str> + Ord, + O: Serialize, +{ + root: &'r mut Parser<'s, PKey, OKey, O>, } impl_trait! { - impl<'r, 's, P: Borrow<str> + Ord, O: Borrow<str> + Ord, T: PatternTypes> SubtreeHelper<'r, 's, P, O, T> where Self: 'r { - fn start(value: &'r mut Parser<'s, P, O, T>) -> Self { + impl<'r, 's, PKey, OKey, O> SubtreeHelper<'r, 's, PKey, OKey, O> + where + Self: 'r, + PKey: Borrow<str> + Ord, + OKey: Borrow<str> + Ord, + O: Serialize, + { + fn start(value: &'r mut Parser<'s, PKey, OKey, O>) -> Self { value.consts.protos.push(Default::default()); Self { root: value, @@ -99,7 +108,7 @@ impl_trait! { } impl trait std::ops::Deref { - type Target = Parser<'s, P, O, T>; + type Target = Parser<'s, PKey, OKey, O>; fn deref(&self) -> &Self::Target { &*self.root @@ -121,14 +130,30 @@ impl_trait! { } } -struct TagHelper<'r, 's, P: Borrow<str> + Ord, O: Borrow<str> + Ord, T: PatternTypes> where Self: 'r { - root: &'r mut Parser<'s, P, O, T>, +/// Helper to collect "tag" sections of the pattern. +/// +/// This is a RAII-like guard which handles cleaning up the parsed pattern when +/// dropped. +struct TagHelper<'r, 's, PKey, OKey, O> +where + Self: 'r, + PKey: Borrow<str> + Ord, + OKey: Borrow<str> + Ord, + O: Serialize, +{ + root: &'r mut Parser<'s, PKey, OKey, O>, len: usize, } impl_trait! { - impl<'r, 's, P: Borrow<str> + Ord, O: Borrow<str> + Ord, T: PatternTypes> TagHelper<'r, 's, P, O, T> where Self: 'r { - fn start(value: &'r mut Parser<'s, P, O, T>) -> Self { + impl<'r, 's, PKey, OKey, O> TagHelper<'r, 's, PKey, OKey, O> + where + Self: 'r, + PKey: Borrow<str> + Ord, + OKey: Borrow<str> + Ord, + O: Serialize, + { + fn start(value: &'r mut Parser<'s, PKey, OKey, O>) -> Self { let len = value.consts.protos.last().unwrap().len(); Self { root: value, @@ -141,7 +166,7 @@ impl_trait! { } impl trait std::ops::Deref { - type Target = Parser<'s, P, O, T>; + type Target = Parser<'s, PKey, OKey, O>; fn deref(&self) -> &Self::Target { &*self.root @@ -166,20 +191,30 @@ impl_trait! { } } -struct Parser<'s, P: Borrow<str> + Ord, O: Borrow<str> + Ord, T: PatternTypes> { +struct Parser<'s, PKey, OKey, O> +where + PKey: Borrow<str> + Ord, + OKey: Borrow<str> + Ord, + O: Serialize, +{ base: &'s str, - preds: Option<BTreeMap<P, Box<Predicate<T>>>>, - objs: Option<BTreeMap<O, T::Own>>, - pred_ids: BTreeMap<P, usize>, - obj_ids: BTreeMap<O, usize>, - consts: PatternConstants<T>, + preds: Option<BTreeMap<PKey, Box<Predicate>>>, + objs: Option<BTreeMap<OKey, O>>, + pred_ids: BTreeMap<PKey, usize>, + obj_ids: BTreeMap<OKey, usize>, + consts: PatternConstants<O>, closed_subtrees: std::ops::RangeFrom<usize>, } // These are documented using LPeg.re syntax // http://www.inf.puc-rio.br/~roberto/lpeg/re.html #[rustfmt::skip] -impl<'s, P: Borrow<str> + Ord, O: Borrow<str> + Ord, T: PatternTypes> Parser<'s, P, O, T> { +impl<'s, PKey, OKey, O> Parser<'s, PKey, OKey, O> +where + PKey: Borrow<str> + Ord, + OKey: Borrow<str> + Ord, + O: Serialize, +{ /// str_literal <- sp ( ( "'" str_char* ( "'" / ( !. -> ErrorStrEnd ) ) ( '?' -> MarkSkippable ) ) -> String ) sp /// str_char <- ( str_escape / [^%'] ) /// str_escape <- '%' ( '%' / "'" ) / ( ( '%' .? ) -> ErrorStrEscape ) @@ -451,7 +486,7 @@ impl<'s, P: Borrow<str> + Ord, O: Borrow<str> + Ord, T: PatternTypes> Parser<'s, })) } - /// key_subtree <- sp '[' sp predicate* sp subtree sp ( ']' / unexpected_token / unexpected_end ) sp ( '?'? -> MarkSkippable ) + /// key_subtree <- sp '[' sp name? sp predicate* sp subtree sp ( ']' / unexpected_token / unexpected_end ) sp ( '?'? -> MarkSkippable ) fn key_subtree(&mut self, s: &mut &'s str) -> Result<bool, PatternError<'s>> { let mut cursor = *s; Ok(lblock!('matches: { @@ -459,6 +494,8 @@ impl<'s, P: Borrow<str> + Ord, O: Borrow<str> + Ord, T: PatternTypes> Parser<'s, bry!('matches strip_prefix(&mut cursor, "[")); self.sp(&mut cursor); let mut subtree = SubtreeHelper::start(&mut *self); + subtree.name(&mut cursor)?; + subtree.sp(&mut cursor); while subtree.predicate(&mut cursor)? { } subtree.sp(&mut cursor); @@ -566,17 +603,20 @@ impl<'s, P: Borrow<str> + Ord, O: Borrow<str> + Ord, T: PatternTypes> Parser<'s, } } -pub(crate) fn parse<'s, P, O, T>( +/// Parses a DFU expression. +/// +/// The given +pub(crate) fn parse<'s, PKey, OKey, O>( input: &'s str, - preds: Option<BTreeMap<P, Box<Predicate<T>>>>, - objs: Option<BTreeMap<O, T::Own>> -) -> Result<PatternConstants<T>, PatternError<'s>> - where - P: Borrow<str> + Ord, - O: Borrow<str> + Ord, - T: PatternTypes, + preds: Option<BTreeMap<PKey, Box<Predicate>>>, + objs: Option<BTreeMap<OKey, O>> +) -> Result<PatternConstants<O>, PatternError<'s>> +where + PKey: Borrow<str> + Ord, + OKey: Borrow<str> + Ord, + O: Serialize, { - let mut parser = Parser::<'s, P, O, T> { + let mut parser = Parser::<'s, PKey, OKey, O> { base: input, preds: preds, objs: objs, @@ -598,57 +638,15 @@ pub(crate) fn parse<'s, P, O, T>( #[cfg(test)] mod tests { - use crate::PatternTypes; - use crate::RefOwn; - use crate::KVPair; use crate::errors::PatternError; use super::Parser; use proptest::prelude::*; - struct Dummy; - impl PatternTypes for Dummy { - type Ref = (); - type Own = (); - fn pairs<'b>( - item: RefOwn<'b, Self::Ref, Self::Own> - ) -> Option<Box<dyn Iterator<Item=KVPair<'b, Self>> + 'b>> { - let _ = item; - None - } - - fn get<'a, 'b>( - item: RefOwn<'b, Self::Ref, Self::Own>, - key: RefOwn<'a, Self::Ref, Self::Own> - ) -> Option<Option<KVPair<'b, Self>>> { - let _ = item; - let _ = key; - None - } - - fn matches( - left: RefOwn<'_, Self::Ref, Self::Own>, - right: RefOwn<'_, Self::Ref, Self::Own> - ) -> bool { - let _ = left; - let _ = right; - false - } - - fn as_str<'b>( - item: RefOwn<'b, Self::Ref, Self::Own> - ) -> Option<&'b str> { - match item { - RefOwn::Str(key) => Some(key), - _ => None, - } - } - } - #[test] fn test_identifier() { fn identifier_input<'s>(s: &mut &'s str) -> Result<bool, PatternError<'s>> { - let mut parser = Parser::<'s, &'static str, &'static str, Dummy> { + let mut parser = Parser::<'s, &'static str, &'static str, ()> { base: *s, preds: None, objs: None, @@ -674,8 +672,8 @@ mod tests { proptest! { #[test] fn test_no_crash(s in ".{0,4096}") { - fn prep_parser<'s>(s: &'s str) -> Parser<'s, &'static str, &'static str, Dummy> { - let mut parser = Parser::<'s, &'static str, &'static str, Dummy> { + fn prep_parser<'s>(s: &'s str) -> Parser<'s, &'static str, &'static str, ()> { + let mut parser = Parser::<'s, &'static str, &'static str, ()> { base: s, preds: None, objs: None, diff --git a/src/pattern.rs b/src/pattern.rs index 3349db8..3a8c91f 100644 --- a/src/pattern.rs +++ b/src/pattern.rs @@ -1,57 +1,46 @@ -/* - * This file is part of Datafu - * Copyright (C) 2021 Soni L. - * - * This program is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Affero General Public License for more details. - * - * You should have received a copy of the GNU Affero General Public License - * along with this program. If not, see <https://www.gnu.org/licenses/>. - */ +// Copyright (C) 2021-2022 Soni L. +// SPDX-License-Identifier: MIT OR Apache-2.0 use std::borrow::Borrow; use std::collections::BTreeMap; -use crate::PatternTypes; -use crate::RefOwn; +use serde::Deserialize; +use serde::Deserializer; +use serde::Serialize; + use crate::Predicate; use crate::errors::PatternError; use crate::parser::parse; -use crate::vm::Matcher; +//use crate::vm::Matcher; use crate::vm::PatternConstants; -use crate::vm::MAX_CALLS; +//use crate::vm::MAX_CALLS; -pub struct Pattern<T: PatternTypes> { - consts: PatternConstants<T>, +pub struct Pattern<O: Serialize> { + consts: PatternConstants<O>, } -impl<T: PatternTypes> Pattern<T> { +impl<O: Serialize> Pattern<O> { /// Compiles the input into a pattern. - pub fn compile<'s, P, O>( + pub fn compile<'s, PKey, OKey>( input: &'s str, - preds: Option<BTreeMap<P, Box<Predicate<T>>>>, - objs: Option<BTreeMap<O, T::Own>> + preds: Option<BTreeMap<PKey, Box<Predicate>>>, + objs: Option<BTreeMap<OKey, O>> ) -> Result<Self, PatternError<'s>> - where - P: Borrow<str> + Ord, - O: Borrow<str> + Ord, + where + PKey: Borrow<str> + Ord, + OKey: Borrow<str> + Ord, { Ok(Self { consts: parse(input, preds, objs)? }) } - pub fn attempt_match<'a, 'b>( - &'a self, - value: impl Into<RefOwn<'b, T::Ref, T::Own>> - ) -> Matcher<'a, 'b, T> { - Matcher::new(value.into(), &self.consts, self.consts.protos.len() - 1, MAX_CALLS).ok().expect("datafu internal error: MAX_CALLS must not be 0") + /// Matches the pattern against an input. + pub fn deserialize<'de, Der, De>(&self, de: Der) -> Result<De, Der::Error> + where + Der: Deserializer<'de>, + De: Deserialize<'de>, + { + todo!() } } diff --git a/src/vm.rs b/src/vm.rs index dd48752..6bcbf70 100644 --- a/src/vm.rs +++ b/src/vm.rs @@ -1,53 +1,33 @@ -/* - * This file is part of Datafu - * Copyright (C) 2021 Soni L. - * - * This program is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Affero General Public License for more details. - * - * You should have received a copy of the GNU Affero General Public License - * along with this program. If not, see <https://www.gnu.org/licenses/>. - */ - -use std::collections::BTreeMap; -use std::iter::Peekable; +// Copyright (C) 2021-2022 Soni L. +// SPDX-License-Identifier: MIT OR Apache-2.0 use regex::Regex; +use serde::Serialize; -use crate::KVPair; -use crate::RefOwn; -use crate::PatternTypes; use crate::Predicate; -use crate::errors::MatchError; +//use crate::errors::MatchError; pub(crate) const MAX_CALLS: usize = 250; -type Matches<'a, 'b, T> = BTreeMap<&'a str, KVPair<'b, T>>; +//type Matches<'a, 'b, T> = BTreeMap<&'a str, KVPair<'b, T>>; // TODO: use a builder for this? /// The constant pool for a pattern. -pub(crate) struct PatternConstants<T: PatternTypes> { +pub(crate) struct PatternConstants<O: Serialize> { // last proto is implicitly the whole pattern. pub(crate) protos: Vec<Vec<PatternElement>>, // Note that we can borrow these when creating the output map. // https://play.rust-lang.org/?version=stable&mode=debug&edition=2018&gist=da26f9175e96273fa0b94971a4e6172f pub(crate) strings: Vec<String>, pub(crate) regices: Vec<Regex>, - pub(crate) predicates: Vec<Box<Predicate<T>>>, + pub(crate) predicates: Vec<Box<Predicate>>, // NOTE these are part of the constant pool and so have lifetime analogous // to 'a (consistently used to indicate constant pool lifetime) when used // elsewhere. In particular, they can't be yielded by the iterator. - pub(crate) defs: Vec<T::Own>, + pub(crate) defs: Vec<O>, } -impl<T: PatternTypes> Default for PatternConstants<T> { +impl<O: Serialize> Default for PatternConstants<O> { fn default() -> Self { Self { protos: Default::default(), @@ -76,612 +56,612 @@ pub(crate) enum PatternElement { End } -struct Frame<'a, 'b, T: PatternTypes> { - //obj: RefOwn<'b, T::Ref, T::Own>, - ops: &'a [PatternElement], - iar: Option<usize>, - depth: usize, - path: Vec<Holder<'a, 'b, T>>, - in_key: bool, -} - -impl<'a, 'b, T: PatternTypes> Frame<'a, 'b, T> { - /// Advances the instruction address register. - /// - /// # Returns - /// - /// `true` if successful, `false` otherwise. - fn next(&mut self) -> bool { - let new = self.iar.map_or(0, |v| v + 1); - new < self.ops.len() && { - self.iar = Some(new); - true - } - } - - /// Returns the current instruction. - fn op(&self) -> PatternElement { - self.ops[self.iar.expect("ops[iar]")] - } - - /// Rewinds the instruction address register. - /// - /// # Returns - /// - /// `true` if successful, `false` otherwise. - fn prev(&mut self) -> bool { - let new = self.iar.expect("iar").checked_sub(1); - new.is_some() && { - self.iar = new; - true - } - } -} - -/// Stores a single match. -/// -/// See also Holder. -enum HolderState<'a, 'b, T: PatternTypes> { - /// Empty holder, for a key-value pair. - EmptyKey, - /// Empty holder, for a Matcher and a key-value pair. - EmptyKeySubtree, - // /// Empty holder, for a Matcher and a value. - // EmptyValueSubtree, - /// Occupied holder, for a key-value pair.. - Key(KVPair<'b, T>), - /// Occupied holder, for a Matcher and a key-value pair. - KeySubtree(Peekable<Matcher<'a, 'b, T>>, KVPair<'b, T>), - /// Occupied holder, for a Matcher and a value. The empty variant is - /// omitted as it would never be used otherwise. - ValueSubtree(Peekable<Matcher<'a, 'b, T>>, RefOwn<'b, T::Ref, T::Own>), - /// Occupied holder, for a value. The empty variant is omitted as it would - /// never be used otherwise. - Value(RefOwn<'b, T::Ref, T::Own>), -} - -/// Helper enum for HolderState. -#[derive(Copy, Clone, Debug, Eq, PartialEq)] -enum HolderKind { - Key, - KeySubtree, - ValueSubtree, - Value -} - -//impl<'a, 'b, T: PatternTypes> Clone for HolderState<'a, 'b, T> { -// fn clone(&self) -> Self { +//struct Frame<'a, 'b, T: PatternTypes> { +// //obj: RefOwn<'b, T::Ref, T::Own>, +// ops: &'a [PatternElement], +// iar: Option<usize>, +// depth: usize, +// path: Vec<Holder<'a, 'b, T>>, +// in_key: bool, +//} +// +//impl<'a, 'b, T: PatternTypes> Frame<'a, 'b, T> { +// /// Advances the instruction address register. +// /// +// /// # Returns +// /// +// /// `true` if successful, `false` otherwise. +// fn next(&mut self) -> bool { +// let new = self.iar.map_or(0, |v| v + 1); +// new < self.ops.len() && { +// self.iar = Some(new); +// true +// } +// } +// +// /// Returns the current instruction. +// fn op(&self) -> PatternElement { +// self.ops[self.iar.expect("ops[iar]")] +// } +// +// /// Rewinds the instruction address register. +// /// +// /// # Returns +// /// +// /// `true` if successful, `false` otherwise. +// fn prev(&mut self) -> bool { +// let new = self.iar.expect("iar").checked_sub(1); +// new.is_some() && { +// self.iar = new; +// true +// } +// } +//} +// +///// Stores a single match. +///// +///// See also Holder. +//enum HolderState<'a, 'b, T: PatternTypes> { +// /// Empty holder, for a key-value pair. +// EmptyKey, +// /// Empty holder, for a Matcher and a key-value pair. +// EmptyKeySubtree, +// // /// Empty holder, for a Matcher and a value. +// // EmptyValueSubtree, +// /// Occupied holder, for a key-value pair.. +// Key(KVPair<'b, T>), +// /// Occupied holder, for a Matcher and a key-value pair. +// KeySubtree(Peekable<Matcher<'a, 'b, T>>, KVPair<'b, T>), +// /// Occupied holder, for a Matcher and a value. The empty variant is +// /// omitted as it would never be used otherwise. +// ValueSubtree(Peekable<Matcher<'a, 'b, T>>, RefOwn<'b, T::Ref, T::Own>), +// /// Occupied holder, for a value. The empty variant is omitted as it would +// /// never be used otherwise. +// Value(RefOwn<'b, T::Ref, T::Own>), +//} +// +///// Helper enum for HolderState. +//#[derive(Copy, Clone, Debug, Eq, PartialEq)] +//enum HolderKind { +// Key, +// KeySubtree, +// ValueSubtree, +// Value +//} +// +////impl<'a, 'b, T: PatternTypes> Clone for HolderState<'a, 'b, T> { +//// fn clone(&self) -> Self { +//// match self { +//// HolderState::EmptyKey => HolderState::EmptyKey, +//// HolderState::EmptySubtree => HolderState::EmptySubtree, +//// HolderState::Key(v) => HolderState::Key(*v), +//// HolderState::KeySubtree(m, v) => HolderState::KeySubtree(m.clone(), *v), +//// HolderState::ValueSubtree(m, v) => HolderState::ValueSubtree(m.clone(), *v), +//// HolderState::Value(v) => HolderState::Value(*v), +//// } +//// } +////} +// +//impl<'a, 'b, T: PatternTypes> HolderState<'a, 'b, T> { +// #[rustfmt::skip] +// fn is_empty(&self) -> bool { // match self { -// HolderState::EmptyKey => HolderState::EmptyKey, -// HolderState::EmptySubtree => HolderState::EmptySubtree, -// HolderState::Key(v) => HolderState::Key(*v), -// HolderState::KeySubtree(m, v) => HolderState::KeySubtree(m.clone(), *v), -// HolderState::ValueSubtree(m, v) => HolderState::ValueSubtree(m.clone(), *v), -// HolderState::Value(v) => HolderState::Value(*v), +// | HolderState::EmptyKey +// | HolderState::EmptyKeySubtree +// //| HolderState::EmptyValueSubtree +// => true, _ => false +// } +// } +// +// fn has_value(&self) -> bool { +// !self.is_empty() +// } +// +// fn kind(&self) -> HolderKind { +// match self { +// | HolderState::EmptyKey +// | HolderState::Key(_) +// => HolderKind::Key, +// | HolderState::EmptyKeySubtree +// | HolderState::KeySubtree(_, _) +// => HolderKind::KeySubtree, +// //| HolderState::EmptyValueSubtree +// | HolderState::ValueSubtree(_, _) +// => HolderKind::ValueSubtree, +// | HolderState::Value(_) +// => HolderKind::Value, +// } +// } +// +// fn value(&self) -> Option<RefOwn<'b, T::Ref, T::Own>> { +// match *self { +// HolderState::Key((_, value)) => Some(value), +// HolderState::KeySubtree(_, (_, value)) => Some(value), +// HolderState::ValueSubtree(_, value) => Some(value), +// HolderState::Value(value) => Some(value), +// _ => None +// } +// } +// +// fn key(&self) -> Option<RefOwn<'b, T::Ref, T::Own>> { +// match *self { +// HolderState::Key((key, _)) => Some(key), +// HolderState::KeySubtree(_, (key, _)) => Some(key), +// _ => None +// } +// } +// +// fn pair(&self) -> Option<KVPair<'b, T>> { +// match *self { +// HolderState::Key(pair) => Some(pair), +// HolderState::KeySubtree(_, pair) => Some(pair), +// _ => None // } // } +// +// fn subtree(&mut self) -> Option<&mut Peekable<Matcher<'a, 'b, T>>> { +// match *self { +// HolderState::KeySubtree(ref mut subtree, _) => Some(subtree), +// HolderState::ValueSubtree(ref mut subtree, _) => Some(subtree), +// _ => None +// } +// } +// +// fn clear(&mut self) { +// *self = match self.kind() { +// HolderKind::Key => HolderState::EmptyKey, +// HolderKind::KeySubtree => HolderState::EmptyKeySubtree, +// HolderKind::ValueSubtree => unreachable!(), //HolderState::EmptyValueSubtree, +// HolderKind::Value => unreachable!(), +// }; +// assert!(self.is_empty()); +// } +//} +// +///// Stores a single match and associated metadata. +///// +///// A single match is generally a key-value pair, but may be a collection of +///// named pairs in the case of subtree matches, or just a value for the initial +///// holder. +//struct Holder<'a, 'b, T: PatternTypes> { +// name: Option<&'a str>, +// value: HolderState<'a, 'b, T>, +// parent: Option<RefOwn<'b, T::Ref, T::Own>>, +// iterator: Option<Box<dyn Iterator<Item=KVPair<'b, T>> + 'b>>, +// filters: Vec<Box<dyn (for<'c> Fn(&'c mut HolderState<'a, 'b, T>) -> Result<(), MatchError>) + 'a>>, +//} +// +//impl<'a, 'b, T: PatternTypes> Holder<'a, 'b, T> { +// fn next(&mut self) -> Result<bool, MatchError> { +// self.ensure_iterator()?; +// if let Self { +// value: ref mut v, +// iterator: Some(ref mut it), +// ref filters, +// .. +// } = self { +// // check if we're in a subtree and (not) done. +// if let Some(matcher) = v.subtree() { +// if let Some(res) = matcher.peek() { +// // report any errors +// return res.as_ref().map(|_| true).map_err(|e| e.clone()); +// } +// } +// let kind = v.kind(); +// let mut next_v; +// loop { +// next_v = match it.next() { +// Some(pair) => HolderState::Key(pair), +// None => return Ok(false) +// }; +// for filter in filters { +// filter(&mut next_v)?; +// if next_v.is_empty() { +// break; +// } +// } +// if next_v.has_value() { +// break; +// } +// } +// assert!(next_v.has_value()); +// assert_eq!(next_v.kind(), kind); +// *v = next_v; +// Ok(true) +// } else { +// unreachable!() +// } +// } +// +// /// Ensure `self.iterator.is_some()`, creating an iterator if necessary. +// fn ensure_iterator(&mut self) -> Result<(), MatchError> { +// if self.iterator.is_none() { +// let iter = T::pairs(self.parent.unwrap()); +// if iter.is_none() { +// return Err(MatchError::UnsupportedOperation); +// } +// self.iterator = iter; +// } +// assert!(self.iterator.is_some()); +// Ok(()) +// } +//} +// +//impl<'a, 'b, T: PatternTypes> Default for Holder<'a, 'b, T> { +// fn default() -> Self { +// Self { +// name: Default::default(), +// value: HolderState::EmptyKey, +// parent: Default::default(), +// iterator: Default::default(), +// filters: Default::default(), +// } +// } +//} +// +//pub struct Matcher<'a, 'b, T: PatternTypes> { +// defs: &'a PatternConstants<T>, +// frame: Frame<'a, 'b, T>, +//} +// +//// TODO: +//// +//// [x] Arrow +//// [x] StringKey +//// [x] RegexKey +//// [x] KeySubtree +//// [x] ValueSubtree +//// [x] Ident +//// [x] Param (untested) +//// [x] ApplyPredicate +//// [x] End +// +///// Helper for `PatternElement::StringKey`. +//fn on_string_key<'a, 'b, T: PatternTypes>( +// matcher: &mut Matcher<'a, 'b, T>, +// id: usize, +// skippable: bool, +//) -> Result<bool, MatchError> { +// let path = matcher.frame.path.last_mut().unwrap(); +// assert!(path.iterator.is_none()); +// let key = &matcher.defs.strings[id]; +// let iter = T::get(path.parent.unwrap(), RefOwn::Str(key)); +// match iter { +// Some(None) if !skippable => Err(MatchError::ValidationError), +// Some(opt) => { +// path.iterator = Some(Box::new(opt.into_iter())); +// Ok(true) +// } +// None => Err(MatchError::UnsupportedOperation), +// } +//} +// +///// Helper for `PatternElement::ParameterKey`. +//fn on_parameter_key<'a, 'b, T: PatternTypes>( +// matcher: &mut Matcher<'a, 'b, T>, +// id: usize, +// skippable: bool, +//) -> Result<bool, MatchError> { +// let path = matcher.frame.path.last_mut().unwrap(); +// assert!(path.iterator.is_none()); +// let key = matcher.defs.defs[id]; +// let iter = T::get(path.parent.unwrap(), RefOwn::Own(key)); +// match iter { +// Some(None) if !skippable => Err(MatchError::ValidationError), +// Some(opt) => { +// path.iterator = Some(Box::new(opt.into_iter())); +// Ok(true) +// } +// None => Err(MatchError::UnsupportedOperation), +// } +//} +// +///// Helper for `PatternElement::RegexKey`. +//fn on_regex_key<'a, 'b, T: PatternTypes>( +// matcher: &mut Matcher<'a, 'b, T>, +// id: usize, +// skippable: bool, +//) -> Result<bool, MatchError> { +// matcher.frame.path.last_mut().unwrap().ensure_iterator()?; +// let re = &matcher.defs.regices[id]; +// let path = matcher.frame.path.last_mut().unwrap(); +// path.filters.push(Box::new(move |value| { +// let s = T::as_str(value.key().unwrap()); +// match (s.map_or(false, |s| re.is_match(s)), skippable) { +// (true, _) => Ok(()), +// (false, true) => { +// value.clear(); +// Ok(()) +// }, +// (false, false) => Err(MatchError::ValidationError), +// } +// })); +// Ok(true) +//} +// +///// Helper for `PatternElement::KeySubtree`. +//fn on_key_subtree<'a, 'b, T: PatternTypes>( +// matcher: &mut Matcher<'a, 'b, T>, +// id: usize, +// skippable: bool, +//) -> Result<bool, MatchError> { +// let _ = skippable; // FIXME what should a skippable KeySubtree even do?! +// matcher.frame.path.last_mut().unwrap().ensure_iterator()?; +// let defs = matcher.defs; +// let rlimit: usize = matcher.frame.depth; +// let path = matcher.frame.path.last_mut().unwrap(); +// assert!(path.value.is_empty()); +// assert_eq!(path.value.kind(), HolderKind::Key); +// path.value = HolderState::EmptyKeySubtree; +// path.filters.push(Box::new(move |value| { +// let key = value.key().unwrap(); +// let mut subtree = Matcher::new(key, defs, id, rlimit)?.peekable(); +// match subtree.peek() { +// Some(&Ok(_)) => { +// *value = HolderState::KeySubtree(subtree, value.pair().unwrap()); +// Ok(()) +// }, +// Some(&Err(ref e)) => { +// Err(e.clone()) +// }, +// None => { +// value.clear(); +// Ok(()) +// } +// } +// })); +// Ok(true) +//} +// +//const DUMMY_OPS: &'static [PatternElement] = &[]; +// +//impl<'a, 'b, T: PatternTypes> Matcher<'a, 'b, T> { +// pub(crate) fn new(obj: RefOwn<'b, T::Ref, T::Own>, defs: &'a PatternConstants<T>, proto: usize, rlimit: usize) -> Result<Self, MatchError> { +// let ops: &[_] = &defs.protos[proto]; +// Self::with_ops(obj, defs, ops, rlimit) +// } +// +// /// Constructs a Matcher that yields a single dummy result. +// fn with_ops(obj: RefOwn<'b, T::Ref, T::Own>, defs: &'a PatternConstants<T>, ops: &'a [PatternElement], rlimit: usize) -> Result<Self, MatchError> { +// let depth = rlimit.checked_sub(1).ok_or(MatchError::StackOverflow)?; +// Ok(Self { +// defs: defs, +// frame: Frame { +// //obj: obj, +// ops: ops, +// iar: None, +// depth: depth, +// path: { +// let mut holder = Holder::default(); +// holder.value = HolderState::Value(obj); +// holder.iterator = Some(Box::new(std::iter::empty())); +// vec![holder] +// }, +// in_key: false, +// }, +// }) +// } +// +// fn on_in_key(&mut self) -> Result<bool, MatchError> { +// match self.frame.op() { +// PatternElement::End => { +// let path = self.frame.path.last_mut().unwrap(); +// if path.next()? { +// Ok(false) +// } else { +// drop(path); +// self.frame.path.pop().unwrap(); +// // stop at previous End, or start of frame +// while self.frame.prev() { +// if matches!(self.frame.op(), PatternElement::End) { +// break; +// } +// } +// // is start of frame? +// if !self.frame.prev() { +// self.frame.path.clear(); +// } +// Ok(true) +// } +// }, +// PatternElement::ApplyPredicate(id, skippable) => { +// // failing on T::get() is already handled, but we may need a +// // T::pairs(). construct it here. +// self.frame.path.last_mut().unwrap().ensure_iterator()?; +// let pred = &self.defs.predicates[id]; +// let path = self.frame.path.last_mut().unwrap(); +// path.filters.push(Box::new(move |value| { +// match (pred(value.value().unwrap()), skippable) { +// (true, _) => Ok(()), +// (false, true) => { +// value.clear(); +// Ok(()) +// }, +// (false, false) => Err(MatchError::ValidationError), +// } +// })); +// Ok(true) +// }, +// PatternElement::StringKey(id, skippable) => { +// on_string_key(self, id, skippable) +// }, +// PatternElement::ParameterKey(id, skippable) => { +// on_parameter_key(self, id, skippable) +// }, +// PatternElement::RegexKey(id, skippable) => { +// on_regex_key(self, id, skippable) +// }, +// PatternElement::KeySubtree(id, skippable) => { +// on_key_subtree(self, id, skippable) +// }, +// _ => unreachable!("on_in_key") +// } +// } +// +// fn on_not_in_key(&mut self) -> Result<bool, MatchError> { +// match self.frame.op() { +// PatternElement::Arrow => { +// // this *should* always pass. +// assert!(self.frame.path.last().unwrap().iterator.is_some()); +// let mut holder = Holder::default(); +// holder.parent = self.frame.path.last().unwrap().value.value(); +// assert!(holder.parent.is_some()); +// self.frame.path.push(holder); +// Ok(false) +// }, +// PatternElement::Identifier(id) => { +// let name = self.defs.strings.get(id).map(|s| &**s); +// let path = self.frame.path.last_mut().unwrap(); +// path.name = name; +// assert!(path.iterator.is_none()); +// // we don't actually create the iterator here, +// // as we may still wanna use T::get() instead. +// Ok(true) +// }, +// PatternElement::ApplyPredicate(id, skippable) => { +// assert!(self.frame.path.len() == 1); +// let pred = &self.defs.predicates[id]; +// let value = self.frame.path.last().unwrap().value.value(); +// match (pred(value.unwrap()), skippable) { +// (true, _) => Ok(false), +// (false, true) => { +// self.frame.path.clear(); +// // any Ok(_) will do +// Ok(false) +// }, +// (false, false) => Err(MatchError::ValidationError), +// } +// }, +// PatternElement::StringKey(id, skippable) => { +// on_string_key(self, id, skippable) +// }, +// PatternElement::ParameterKey(id, skippable) => { +// on_parameter_key(self, id, skippable) +// }, +// PatternElement::RegexKey(id, skippable) => { +// on_regex_key(self, id, skippable) +// }, +// PatternElement::KeySubtree(id, skippable) => { +// on_key_subtree(self, id, skippable) +// }, +// PatternElement::ValueSubtree(id, skippable) => { +// let value = self.frame.path.last().unwrap().value.value().unwrap(); +// let mut subtree = Matcher::new( +// value, +// self.defs, +// id, +// self.frame.depth +// )?.peekable(); +// let mut dummy = Matcher::with_ops( +// value, +// self.defs, +// DUMMY_OPS, +// self.frame.depth +// )?.peekable(); +// // may panic. +// let peeked = subtree.peek(); +// // shouldn't panic. +// let _ = dummy.peek(); +// // push Holder after peek. +// self.frame.path.push(Holder::default()); +// let mut holder = self.frame.path.last_mut().unwrap(); +// holder.parent = Some(value); +// holder.iterator = Some(Box::new(std::iter::empty())); +// match peeked { +// None if skippable => { +// holder.value = HolderState::ValueSubtree(dummy, value); +// Ok(true) +// }, +// Some(&Ok(_)) | None => { +// drop(peeked); +// holder.value = HolderState::ValueSubtree(subtree, value); +// Ok(true) +// }, +// Some(&Err(ref e)) => { +// Err(e.clone()) +// }, +// } +// }, +// _ => unreachable!("on_not_in_key") +// } +// } +// +// fn collect_results(&mut self) -> Matches<'a, 'b, T> { +// let mut res: Matches<'a, 'b, T> = Default::default(); +// for holder in &mut self.frame.path { +// // make sure it's not empty. +// assert!(holder.value.has_value()); +// // handle subtrees. +// if let Some(matcher) = holder.value.subtree() { +// if let Some(matches) = matcher.next() { +// // NOTE: we have checked these already. +// // (and if we haven't, that's a bug.) +// res.extend(matches.unwrap()); +// } +// } +// // handle pairs. +// if let Some(pair) = holder.value.pair() { +// if let Some(name) = holder.name { +// res.insert(name, pair); +// } +// } +// } +// res +// } +// +// fn on_end(&mut self) -> (bool, Matches<'a, 'b, T>) { +// match self.frame.op() { +// PatternElement::End => { +// assert!(!self.frame.path.last().expect("path").value.is_empty()); +// let res = self.collect_results(); +// if !self.frame.prev() { +// // NOTE: frame.prev() must always be called, even if this +// // gets replaced with debug_assert!() in the future. +// assert!(false, "frame.prev()"); +// } +// (true, res) +// } +// PatternElement::ApplyPredicate {..} => { +// assert!(!self.frame.in_key); +// let res = self.collect_results(); +// self.frame.path.clear(); +// (false, res) +// } +// _ => unreachable!("on_end") +// } +// } +//} +// +//impl<'a, 'b, T: PatternTypes> Iterator for Matcher<'a, 'b, T> { +// type Item = Result<BTreeMap<&'a str, KVPair<'b, T>>, MatchError>; +// +// fn next(&mut self) -> Option<Self::Item> { +// if self.frame.ops.is_empty() { +// if !self.frame.path.is_empty() { +// self.frame.path.clear(); +// return Some(Ok(Default::default())); +// } +// } +// while !self.frame.path.is_empty() { +// if !self.frame.next() { +// let (in_key, res) = self.on_end(); +// self.frame.in_key = in_key; +// return Some(Ok(res)); +// } else { +// let in_key = if self.frame.in_key { +// self.on_in_key() +// } else { +// self.on_not_in_key() +// }; +// match in_key { +// Ok(in_key) => self.frame.in_key = in_key, +// Err(e) => { +// self.frame.path.clear(); +// return Some(Err(e)) +// }, +// } +// } +// } +// None +// } //} - -impl<'a, 'b, T: PatternTypes> HolderState<'a, 'b, T> { - #[rustfmt::skip] - fn is_empty(&self) -> bool { - match self { - | HolderState::EmptyKey - | HolderState::EmptyKeySubtree - //| HolderState::EmptyValueSubtree - => true, _ => false - } - } - - fn has_value(&self) -> bool { - !self.is_empty() - } - - fn kind(&self) -> HolderKind { - match self { - | HolderState::EmptyKey - | HolderState::Key(_) - => HolderKind::Key, - | HolderState::EmptyKeySubtree - | HolderState::KeySubtree(_, _) - => HolderKind::KeySubtree, - //| HolderState::EmptyValueSubtree - | HolderState::ValueSubtree(_, _) - => HolderKind::ValueSubtree, - | HolderState::Value(_) - => HolderKind::Value, - } - } - - fn value(&self) -> Option<RefOwn<'b, T::Ref, T::Own>> { - match *self { - HolderState::Key((_, value)) => Some(value), - HolderState::KeySubtree(_, (_, value)) => Some(value), - HolderState::ValueSubtree(_, value) => Some(value), - HolderState::Value(value) => Some(value), - _ => None - } - } - - fn key(&self) -> Option<RefOwn<'b, T::Ref, T::Own>> { - match *self { - HolderState::Key((key, _)) => Some(key), - HolderState::KeySubtree(_, (key, _)) => Some(key), - _ => None - } - } - - fn pair(&self) -> Option<KVPair<'b, T>> { - match *self { - HolderState::Key(pair) => Some(pair), - HolderState::KeySubtree(_, pair) => Some(pair), - _ => None - } - } - - fn subtree(&mut self) -> Option<&mut Peekable<Matcher<'a, 'b, T>>> { - match *self { - HolderState::KeySubtree(ref mut subtree, _) => Some(subtree), - HolderState::ValueSubtree(ref mut subtree, _) => Some(subtree), - _ => None - } - } - - fn clear(&mut self) { - *self = match self.kind() { - HolderKind::Key => HolderState::EmptyKey, - HolderKind::KeySubtree => HolderState::EmptyKeySubtree, - HolderKind::ValueSubtree => unreachable!(), //HolderState::EmptyValueSubtree, - HolderKind::Value => unreachable!(), - }; - assert!(self.is_empty()); - } -} - -/// Stores a single match and associated metadata. -/// -/// A single match is generally a key-value pair, but may be a collection of -/// named pairs in the case of subtree matches, or just a value for the initial -/// holder. -struct Holder<'a, 'b, T: PatternTypes> { - name: Option<&'a str>, - value: HolderState<'a, 'b, T>, - parent: Option<RefOwn<'b, T::Ref, T::Own>>, - iterator: Option<Box<dyn Iterator<Item=KVPair<'b, T>> + 'b>>, - filters: Vec<Box<dyn (for<'c> Fn(&'c mut HolderState<'a, 'b, T>) -> Result<(), MatchError>) + 'a>>, -} - -impl<'a, 'b, T: PatternTypes> Holder<'a, 'b, T> { - fn next(&mut self) -> Result<bool, MatchError> { - self.ensure_iterator()?; - if let Self { - value: ref mut v, - iterator: Some(ref mut it), - ref filters, - .. - } = self { - // check if we're in a subtree and (not) done. - if let Some(matcher) = v.subtree() { - if let Some(res) = matcher.peek() { - // report any errors - return res.as_ref().map(|_| true).map_err(|e| e.clone()); - } - } - let kind = v.kind(); - let mut next_v; - loop { - next_v = match it.next() { - Some(pair) => HolderState::Key(pair), - None => return Ok(false) - }; - for filter in filters { - filter(&mut next_v)?; - if next_v.is_empty() { - break; - } - } - if next_v.has_value() { - break; - } - } - assert!(next_v.has_value()); - assert_eq!(next_v.kind(), kind); - *v = next_v; - Ok(true) - } else { - unreachable!() - } - } - - /// Ensure `self.iterator.is_some()`, creating an iterator if necessary. - fn ensure_iterator(&mut self) -> Result<(), MatchError> { - if self.iterator.is_none() { - let iter = T::pairs(self.parent.unwrap()); - if iter.is_none() { - return Err(MatchError::UnsupportedOperation); - } - self.iterator = iter; - } - assert!(self.iterator.is_some()); - Ok(()) - } -} - -impl<'a, 'b, T: PatternTypes> Default for Holder<'a, 'b, T> { - fn default() -> Self { - Self { - name: Default::default(), - value: HolderState::EmptyKey, - parent: Default::default(), - iterator: Default::default(), - filters: Default::default(), - } - } -} - -pub struct Matcher<'a, 'b, T: PatternTypes> { - defs: &'a PatternConstants<T>, - frame: Frame<'a, 'b, T>, -} - -// TODO: -// -// [x] Arrow -// [x] StringKey -// [x] RegexKey -// [x] KeySubtree -// [x] ValueSubtree -// [x] Ident -// [x] Param (untested) -// [x] ApplyPredicate -// [x] End - -/// Helper for `PatternElement::StringKey`. -fn on_string_key<'a, 'b, T: PatternTypes>( - matcher: &mut Matcher<'a, 'b, T>, - id: usize, - skippable: bool, -) -> Result<bool, MatchError> { - let path = matcher.frame.path.last_mut().unwrap(); - assert!(path.iterator.is_none()); - let key = &matcher.defs.strings[id]; - let iter = T::get(path.parent.unwrap(), RefOwn::Str(key)); - match iter { - Some(None) if !skippable => Err(MatchError::ValidationError), - Some(opt) => { - path.iterator = Some(Box::new(opt.into_iter())); - Ok(true) - } - None => Err(MatchError::UnsupportedOperation), - } -} - -/// Helper for `PatternElement::ParameterKey`. -fn on_parameter_key<'a, 'b, T: PatternTypes>( - matcher: &mut Matcher<'a, 'b, T>, - id: usize, - skippable: bool, -) -> Result<bool, MatchError> { - let path = matcher.frame.path.last_mut().unwrap(); - assert!(path.iterator.is_none()); - let key = matcher.defs.defs[id]; - let iter = T::get(path.parent.unwrap(), RefOwn::Own(key)); - match iter { - Some(None) if !skippable => Err(MatchError::ValidationError), - Some(opt) => { - path.iterator = Some(Box::new(opt.into_iter())); - Ok(true) - } - None => Err(MatchError::UnsupportedOperation), - } -} - -/// Helper for `PatternElement::RegexKey`. -fn on_regex_key<'a, 'b, T: PatternTypes>( - matcher: &mut Matcher<'a, 'b, T>, - id: usize, - skippable: bool, -) -> Result<bool, MatchError> { - matcher.frame.path.last_mut().unwrap().ensure_iterator()?; - let re = &matcher.defs.regices[id]; - let path = matcher.frame.path.last_mut().unwrap(); - path.filters.push(Box::new(move |value| { - let s = T::as_str(value.key().unwrap()); - match (s.map_or(false, |s| re.is_match(s)), skippable) { - (true, _) => Ok(()), - (false, true) => { - value.clear(); - Ok(()) - }, - (false, false) => Err(MatchError::ValidationError), - } - })); - Ok(true) -} - -/// Helper for `PatternElement::KeySubtree`. -fn on_key_subtree<'a, 'b, T: PatternTypes>( - matcher: &mut Matcher<'a, 'b, T>, - id: usize, - skippable: bool, -) -> Result<bool, MatchError> { - let _ = skippable; // FIXME what should a skippable KeySubtree even do?! - matcher.frame.path.last_mut().unwrap().ensure_iterator()?; - let defs = matcher.defs; - let rlimit: usize = matcher.frame.depth; - let path = matcher.frame.path.last_mut().unwrap(); - assert!(path.value.is_empty()); - assert_eq!(path.value.kind(), HolderKind::Key); - path.value = HolderState::EmptyKeySubtree; - path.filters.push(Box::new(move |value| { - let key = value.key().unwrap(); - let mut subtree = Matcher::new(key, defs, id, rlimit)?.peekable(); - match subtree.peek() { - Some(&Ok(_)) => { - *value = HolderState::KeySubtree(subtree, value.pair().unwrap()); - Ok(()) - }, - Some(&Err(ref e)) => { - Err(e.clone()) - }, - None => { - value.clear(); - Ok(()) - } - } - })); - Ok(true) -} - -const DUMMY_OPS: &'static [PatternElement] = &[]; - -impl<'a, 'b, T: PatternTypes> Matcher<'a, 'b, T> { - pub(crate) fn new(obj: RefOwn<'b, T::Ref, T::Own>, defs: &'a PatternConstants<T>, proto: usize, rlimit: usize) -> Result<Self, MatchError> { - let ops: &[_] = &defs.protos[proto]; - Self::with_ops(obj, defs, ops, rlimit) - } - - /// Constructs a Matcher that yields a single dummy result. - fn with_ops(obj: RefOwn<'b, T::Ref, T::Own>, defs: &'a PatternConstants<T>, ops: &'a [PatternElement], rlimit: usize) -> Result<Self, MatchError> { - let depth = rlimit.checked_sub(1).ok_or(MatchError::StackOverflow)?; - Ok(Self { - defs: defs, - frame: Frame { - //obj: obj, - ops: ops, - iar: None, - depth: depth, - path: { - let mut holder = Holder::default(); - holder.value = HolderState::Value(obj); - holder.iterator = Some(Box::new(std::iter::empty())); - vec![holder] - }, - in_key: false, - }, - }) - } - - fn on_in_key(&mut self) -> Result<bool, MatchError> { - match self.frame.op() { - PatternElement::End => { - let path = self.frame.path.last_mut().unwrap(); - if path.next()? { - Ok(false) - } else { - drop(path); - self.frame.path.pop().unwrap(); - // stop at previous End, or start of frame - while self.frame.prev() { - if matches!(self.frame.op(), PatternElement::End) { - break; - } - } - // is start of frame? - if !self.frame.prev() { - self.frame.path.clear(); - } - Ok(true) - } - }, - PatternElement::ApplyPredicate(id, skippable) => { - // failing on T::get() is already handled, but we may need a - // T::pairs(). construct it here. - self.frame.path.last_mut().unwrap().ensure_iterator()?; - let pred = &self.defs.predicates[id]; - let path = self.frame.path.last_mut().unwrap(); - path.filters.push(Box::new(move |value| { - match (pred(value.value().unwrap()), skippable) { - (true, _) => Ok(()), - (false, true) => { - value.clear(); - Ok(()) - }, - (false, false) => Err(MatchError::ValidationError), - } - })); - Ok(true) - }, - PatternElement::StringKey(id, skippable) => { - on_string_key(self, id, skippable) - }, - PatternElement::ParameterKey(id, skippable) => { - on_parameter_key(self, id, skippable) - }, - PatternElement::RegexKey(id, skippable) => { - on_regex_key(self, id, skippable) - }, - PatternElement::KeySubtree(id, skippable) => { - on_key_subtree(self, id, skippable) - }, - _ => unreachable!("on_in_key") - } - } - - fn on_not_in_key(&mut self) -> Result<bool, MatchError> { - match self.frame.op() { - PatternElement::Arrow => { - // this *should* always pass. - assert!(self.frame.path.last().unwrap().iterator.is_some()); - let mut holder = Holder::default(); - holder.parent = self.frame.path.last().unwrap().value.value(); - assert!(holder.parent.is_some()); - self.frame.path.push(holder); - Ok(false) - }, - PatternElement::Identifier(id) => { - let name = self.defs.strings.get(id).map(|s| &**s); - let path = self.frame.path.last_mut().unwrap(); - path.name = name; - assert!(path.iterator.is_none()); - // we don't actually create the iterator here, - // as we may still wanna use T::get() instead. - Ok(true) - }, - PatternElement::ApplyPredicate(id, skippable) => { - assert!(self.frame.path.len() == 1); - let pred = &self.defs.predicates[id]; - let value = self.frame.path.last().unwrap().value.value(); - match (pred(value.unwrap()), skippable) { - (true, _) => Ok(false), - (false, true) => { - self.frame.path.clear(); - // any Ok(_) will do - Ok(false) - }, - (false, false) => Err(MatchError::ValidationError), - } - }, - PatternElement::StringKey(id, skippable) => { - on_string_key(self, id, skippable) - }, - PatternElement::ParameterKey(id, skippable) => { - on_parameter_key(self, id, skippable) - }, - PatternElement::RegexKey(id, skippable) => { - on_regex_key(self, id, skippable) - }, - PatternElement::KeySubtree(id, skippable) => { - on_key_subtree(self, id, skippable) - }, - PatternElement::ValueSubtree(id, skippable) => { - let value = self.frame.path.last().unwrap().value.value().unwrap(); - let mut subtree = Matcher::new( - value, - self.defs, - id, - self.frame.depth - )?.peekable(); - let mut dummy = Matcher::with_ops( - value, - self.defs, - DUMMY_OPS, - self.frame.depth - )?.peekable(); - // may panic. - let peeked = subtree.peek(); - // shouldn't panic. - let _ = dummy.peek(); - // push Holder after peek. - self.frame.path.push(Holder::default()); - let mut holder = self.frame.path.last_mut().unwrap(); - holder.parent = Some(value); - holder.iterator = Some(Box::new(std::iter::empty())); - match peeked { - None if skippable => { - holder.value = HolderState::ValueSubtree(dummy, value); - Ok(true) - }, - Some(&Ok(_)) | None => { - drop(peeked); - holder.value = HolderState::ValueSubtree(subtree, value); - Ok(true) - }, - Some(&Err(ref e)) => { - Err(e.clone()) - }, - } - }, - _ => unreachable!("on_not_in_key") - } - } - - fn collect_results(&mut self) -> Matches<'a, 'b, T> { - let mut res: Matches<'a, 'b, T> = Default::default(); - for holder in &mut self.frame.path { - // make sure it's not empty. - assert!(holder.value.has_value()); - // handle subtrees. - if let Some(matcher) = holder.value.subtree() { - if let Some(matches) = matcher.next() { - // NOTE: we have checked these already. - // (and if we haven't, that's a bug.) - res.extend(matches.unwrap()); - } - } - // handle pairs. - if let Some(pair) = holder.value.pair() { - if let Some(name) = holder.name { - res.insert(name, pair); - } - } - } - res - } - - fn on_end(&mut self) -> (bool, Matches<'a, 'b, T>) { - match self.frame.op() { - PatternElement::End => { - assert!(!self.frame.path.last().expect("path").value.is_empty()); - let res = self.collect_results(); - if !self.frame.prev() { - // NOTE: frame.prev() must always be called, even if this - // gets replaced with debug_assert!() in the future. - assert!(false, "frame.prev()"); - } - (true, res) - } - PatternElement::ApplyPredicate {..} => { - assert!(!self.frame.in_key); - let res = self.collect_results(); - self.frame.path.clear(); - (false, res) - } - _ => unreachable!("on_end") - } - } -} - -impl<'a, 'b, T: PatternTypes> Iterator for Matcher<'a, 'b, T> { - type Item = Result<BTreeMap<&'a str, KVPair<'b, T>>, MatchError>; - - fn next(&mut self) -> Option<Self::Item> { - if self.frame.ops.is_empty() { - if !self.frame.path.is_empty() { - self.frame.path.clear(); - return Some(Ok(Default::default())); - } - } - while !self.frame.path.is_empty() { - if !self.frame.next() { - let (in_key, res) = self.on_end(); - self.frame.in_key = in_key; - return Some(Ok(res)); - } else { - let in_key = if self.frame.in_key { - self.on_in_key() - } else { - self.on_not_in_key() - }; - match in_key { - Ok(in_key) => self.frame.in_key = in_key, - Err(e) => { - self.frame.path.clear(); - return Some(Err(e)) - }, - } - } - } - None - } -} |