From 937774ee1d172cf47a7fc12e435b7dd7c6464aaf Mon Sep 17 00:00:00 2001 From: SoniEx2 Date: Sun, 4 Sep 2022 20:45:19 -0300 Subject: Initial work on Serde VM stuff --- Cargo.toml | 3 +- src/errors.rs | 6 +- src/lib.rs | 7 +- src/pattern.rs | 15 +- src/type_tree.rs | 70 +++++ src/vm.rs | 674 ------------------------------------------------ src/vm/de.rs | 135 +++++++++- src/vm/mod.rs | 765 +++++++++++++++++++++++++++++++++++++++++++++++++++++++ 8 files changed, 988 insertions(+), 687 deletions(-) create mode 100644 src/type_tree.rs delete mode 100644 src/vm.rs create mode 100644 src/vm/mod.rs diff --git a/Cargo.toml b/Cargo.toml index 1a85a35..a9e6de1 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -14,6 +14,7 @@ homepage = "https://soniex2.github.io/ganarchy/project/c0b4a8a326a320ac33c5d9d6b [dependencies] regex = "1" impl_trait = "0.1.7" +serde_transmute = "0.1.4" serde = "1.0.140" erased-serde = "0.3.21" @@ -21,7 +22,7 @@ erased-serde = "0.3.21" proptest = "1.0.0" serde_json = "1.0.82" serde = {version = "1.0.140", features = ["derive"]} -charx = "1" +charx = "1.0.0" [features] default = ['stable'] diff --git a/src/errors.rs b/src/errors.rs index b41b225..9b0025e 100644 --- a/src/errors.rs +++ b/src/errors.rs @@ -39,11 +39,11 @@ pub enum PatternError<'a> { // /// These are errors that may be returned by the matcher when matching a // /// pattern. // #[derive(Clone, Debug)] -// pub enum MatchError { +pub enum MatchError { // /// Returned if the pattern nests too deeply. // StackOverflow, // /// Returned if the pattern rejects the input. -// ValidationError, + ValidationError, // /// Returned if the pattern attempts an unsupported operation. // /// // /// In particular, if the [`PatternTypes`] doesn't support `get` or `pairs` @@ -52,4 +52,4 @@ pub enum PatternError<'a> { // UnsupportedOperation, // /// Returned if an unspecified non-critical error occurred. // Other -// } +} diff --git a/src/lib.rs b/src/lib.rs index a3a6e1e..f71d81c 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,6 +1,8 @@ // Copyright (C) 2021-2022 Soni L. // SPDX-License-Identifier: MIT OR Apache-2.0 +#![warn(elided_lifetimes_in_paths)] + //! Datafu is a regex-inspired query language. It was primarily //! designed for processing object trees parsed from configuration files, but //! can be used with anything that supports serde. @@ -99,6 +101,7 @@ //! pub mod errors; +pub mod type_tree; mod parser; mod pattern; mod vm; @@ -107,7 +110,7 @@ pub use pattern::Pattern; /// A predicate. pub type Predicate = dyn (Fn( - &mut dyn erased_serde::Deserializer<> + &mut dyn erased_serde::Deserializer<'_> ) -> bool) + Send + Sync; /// Helper to build predicates because closure inference is the worst. @@ -133,7 +136,7 @@ pub type Predicate = dyn (Fn( pub fn pred(f: F) -> Box where F: (Fn( - &mut dyn erased_serde::Deserializer<> + &mut dyn erased_serde::Deserializer<'_> ) -> bool) + Send + Sync + 'static, { Box::new(f) diff --git a/src/pattern.rs b/src/pattern.rs index 2e69714..fc3c8a7 100644 --- a/src/pattern.rs +++ b/src/pattern.rs @@ -6,16 +6,17 @@ use std::borrow::Borrow; use std::collections::BTreeMap; -use serde::Deserialize; -use serde::Deserializer; -use serde::Serialize; +use serde::de::Deserialize; +use serde::de::DeserializeSeed; +use serde::de::Deserializer; +use serde::ser::Serialize; use crate::Predicate; use crate::errors::PatternError; use crate::parser::parse; -//use crate::vm::Matcher; +use crate::vm; use crate::vm::PatternConstants; -//use crate::vm::MAX_CALLS; +use crate::vm::MAX_CALLS; /// A compiled Datafu pattern. /// @@ -83,11 +84,13 @@ impl Pattern { } /// Matches the pattern against an input. - pub fn deserialize<'de, Der, De>(&self, de: Der) -> Result + pub fn deserialize<'de, Der, De>(&self, der: Der) -> Result where Der: Deserializer<'de>, De: Deserialize<'de>, { + let pack = vm::Packer::new(&self.consts, MAX_CALLS).deserialize(der)?; + let de = De::deserialize(vm::Unpacker::new(pack, MAX_CALLS)); todo!() } } diff --git a/src/type_tree.rs b/src/type_tree.rs new file mode 100644 index 0000000..8e8098b --- /dev/null +++ b/src/type_tree.rs @@ -0,0 +1,70 @@ +// Copyright (C) 2021-2022 Soni L. +// SPDX-License-Identifier: MIT OR Apache-2.0 + +//! Type Tree support. +//! +//! Type Trees are a Datafu feature for extracting types from a `serde`-based +//! `Deserialize` in such a way that it can be used with Datafu patterns. +//! +//! They work by matching the `Deserialize` against some data, with the help of +//! `serde_transmute`. Datafu then collects the relevant `Deserialize` calls, +//! and uses them to infer an appropriate type tree for dynamic +//! deserialization. +//! +//! When introspecting the `Deserialize`, all matching parts are extracted, and +//! non-matching parts are ignored. Even if an error occurs, Datafu will gladly +//! infer a type tree for what it could match. +//! +//! For example, given a struct and the corresponding data: +//! +//! ``` +//! struct Foo { +//! bar: i32, +//! } +//! +//! let data = Foo { bar: 0 }; +//! ``` +//! +//! Building a type tree will first inspect the struct like so: +//! +//! 1. call `deserialize()` on `Foo`. +//! 2. inspect the `deserialize_struct` from `Foo`, storing the name and +//! fields. +//! 3. give `Foo` the appropriate visitor (from `data`), through +//! `serde_transmute`. +//! 4. inspect the `deserialize_i32` etc, also storing those. +//! +//! The resulting type tree can then be used in any pattern to effectively +//! match a `Foo`, but more efficiently than with a predicate. Another big +//! difference between predicates and type trees is how predicates are eager, +//! and can consume values that would otherwise be matched by the rest of a +//! pattern. +//! +//! Type trees are pretty flexible. Consider the following example: +//! +//! ``` +//! struct Foo { +//! bar: Vec, +//! } +//! +//! let data = Foo { bar: vec![1, 2, 3] }; +//! ``` +//! +//! This will actually produce a type tree which checks that the first 3 items +//! are `u32`! Further, when using different types for the predicate and the +//! data, you can get even more flexiblity. For example, with the following +//! struct and data: +//! +//! ``` +//! struct Foo { +//! bar: Vec, +//! } +//! +//! let data = (); +//! ``` +//! +//! Datafu will actually inspect the `deserialize_struct`, and then the +//! struct visitor will error. But despite the error, it'll still create a type +//! tree for the `deserialize_struct`! + +// TODO diff --git a/src/vm.rs b/src/vm.rs deleted file mode 100644 index ac5c95d..0000000 --- a/src/vm.rs +++ /dev/null @@ -1,674 +0,0 @@ -// Copyright (C) 2021-2022 Soni L. -// SPDX-License-Identifier: MIT OR Apache-2.0 - -//! The Datafu Virtual Machine. -//! -//! This is the stuff that actually matches the pattern. - -use regex::Regex; -use serde::Serialize; - -use crate::Predicate; -//use crate::errors::MatchError; - -mod de; - -/// Max depth for VM/serde recursion. -pub(crate) const MAX_CALLS: usize = 250; - -//type Matches<'a, 'b, T> = BTreeMap<&'a str, KVPair<'b, T>>; - -// maybe we should use a builder for this? -/// The constant pool for a pattern. -pub(crate) struct PatternConstants { - // last proto is implicitly the whole pattern. - pub(crate) protos: Vec>, - // Note that we can borrow these when creating the output map. - // https://play.rust-lang.org/?version=stable&mode=debug&edition=2018&gist=da26f9175e96273fa0b94971a4e6172f - pub(crate) strings: Vec, - pub(crate) regices: Vec, - pub(crate) predicates: Vec>, - // NOTE these are part of the constant pool and so have lifetime analogous - // to 'a (consistently used to indicate constant pool lifetime) when used - // elsewhere. In particular, they can't be yielded by the iterator. - pub(crate) defs: Vec, -} - -impl Default for PatternConstants { - fn default() -> Self { - Self { - protos: Default::default(), - strings: Default::default(), - regices: Default::default(), - predicates: Default::default(), - defs: Default::default(), - } - } -} - -/// A pattern element. -#[derive(Copy, Clone)] -pub(crate) enum PatternElement { - Arrow, - - Identifier(usize), - - StringKey(usize, bool), - RegexKey(usize, bool), - ParameterKey(usize, bool), - KeySubtree(usize, bool), - ValueSubtree(usize, bool), - ApplyPredicate(usize, bool), - - End -} - -//struct Frame<'a, 'b, T: PatternTypes> { -// //obj: RefOwn<'b, T::Ref, T::Own>, -// ops: &'a [PatternElement], -// iar: Option, -// depth: usize, -// path: Vec>, -// in_key: bool, -//} -// -//impl<'a, 'b, T: PatternTypes> Frame<'a, 'b, T> { -// /// Advances the instruction address register. -// /// -// /// # Returns -// /// -// /// `true` if successful, `false` otherwise. -// fn next(&mut self) -> bool { -// let new = self.iar.map_or(0, |v| v + 1); -// new < self.ops.len() && { -// self.iar = Some(new); -// true -// } -// } -// -// /// Returns the current instruction. -// fn op(&self) -> PatternElement { -// self.ops[self.iar.expect("ops[iar]")] -// } -// -// /// Rewinds the instruction address register. -// /// -// /// # Returns -// /// -// /// `true` if successful, `false` otherwise. -// fn prev(&mut self) -> bool { -// let new = self.iar.expect("iar").checked_sub(1); -// new.is_some() && { -// self.iar = new; -// true -// } -// } -//} -// -///// Stores a single match. -///// -///// See also Holder. -//enum HolderState<'a, 'b, T: PatternTypes> { -// /// Empty holder, for a key-value pair. -// EmptyKey, -// /// Empty holder, for a Matcher and a key-value pair. -// EmptyKeySubtree, -// // /// Empty holder, for a Matcher and a value. -// // EmptyValueSubtree, -// /// Occupied holder, for a key-value pair.. -// Key(KVPair<'b, T>), -// /// Occupied holder, for a Matcher and a key-value pair. -// KeySubtree(Peekable>, KVPair<'b, T>), -// /// Occupied holder, for a Matcher and a value. The empty variant is -// /// omitted as it would never be used otherwise. -// ValueSubtree(Peekable>, RefOwn<'b, T::Ref, T::Own>), -// /// Occupied holder, for a value. The empty variant is omitted as it would -// /// never be used otherwise. -// Value(RefOwn<'b, T::Ref, T::Own>), -//} -// -///// Helper enum for HolderState. -//#[derive(Copy, Clone, Debug, Eq, PartialEq)] -//enum HolderKind { -// Key, -// KeySubtree, -// ValueSubtree, -// Value -//} -// -////impl<'a, 'b, T: PatternTypes> Clone for HolderState<'a, 'b, T> { -//// fn clone(&self) -> Self { -//// match self { -//// HolderState::EmptyKey => HolderState::EmptyKey, -//// HolderState::EmptySubtree => HolderState::EmptySubtree, -//// HolderState::Key(v) => HolderState::Key(*v), -//// HolderState::KeySubtree(m, v) => HolderState::KeySubtree(m.clone(), *v), -//// HolderState::ValueSubtree(m, v) => HolderState::ValueSubtree(m.clone(), *v), -//// HolderState::Value(v) => HolderState::Value(*v), -//// } -//// } -////} -// -//impl<'a, 'b, T: PatternTypes> HolderState<'a, 'b, T> { -// #[rustfmt::skip] -// fn is_empty(&self) -> bool { -// match self { -// | HolderState::EmptyKey -// | HolderState::EmptyKeySubtree -// //| HolderState::EmptyValueSubtree -// => true, _ => false -// } -// } -// -// fn has_value(&self) -> bool { -// !self.is_empty() -// } -// -// fn kind(&self) -> HolderKind { -// match self { -// | HolderState::EmptyKey -// | HolderState::Key(_) -// => HolderKind::Key, -// | HolderState::EmptyKeySubtree -// | HolderState::KeySubtree(_, _) -// => HolderKind::KeySubtree, -// //| HolderState::EmptyValueSubtree -// | HolderState::ValueSubtree(_, _) -// => HolderKind::ValueSubtree, -// | HolderState::Value(_) -// => HolderKind::Value, -// } -// } -// -// fn value(&self) -> Option> { -// match *self { -// HolderState::Key((_, value)) => Some(value), -// HolderState::KeySubtree(_, (_, value)) => Some(value), -// HolderState::ValueSubtree(_, value) => Some(value), -// HolderState::Value(value) => Some(value), -// _ => None -// } -// } -// -// fn key(&self) -> Option> { -// match *self { -// HolderState::Key((key, _)) => Some(key), -// HolderState::KeySubtree(_, (key, _)) => Some(key), -// _ => None -// } -// } -// -// fn pair(&self) -> Option> { -// match *self { -// HolderState::Key(pair) => Some(pair), -// HolderState::KeySubtree(_, pair) => Some(pair), -// _ => None -// } -// } -// -// fn subtree(&mut self) -> Option<&mut Peekable>> { -// match *self { -// HolderState::KeySubtree(ref mut subtree, _) => Some(subtree), -// HolderState::ValueSubtree(ref mut subtree, _) => Some(subtree), -// _ => None -// } -// } -// -// fn clear(&mut self) { -// *self = match self.kind() { -// HolderKind::Key => HolderState::EmptyKey, -// HolderKind::KeySubtree => HolderState::EmptyKeySubtree, -// HolderKind::ValueSubtree => unreachable!(), //HolderState::EmptyValueSubtree, -// HolderKind::Value => unreachable!(), -// }; -// assert!(self.is_empty()); -// } -//} -// -///// Stores a single match and associated metadata. -///// -///// A single match is generally a key-value pair, but may be a collection of -///// named pairs in the case of subtree matches, or just a value for the initial -///// holder. -//struct Holder<'a, 'b, T: PatternTypes> { -// name: Option<&'a str>, -// value: HolderState<'a, 'b, T>, -// parent: Option>, -// iterator: Option> + 'b>>, -// filters: Vec Fn(&'c mut HolderState<'a, 'b, T>) -> Result<(), MatchError>) + 'a>>, -//} -// -//impl<'a, 'b, T: PatternTypes> Holder<'a, 'b, T> { -// fn next(&mut self) -> Result { -// self.ensure_iterator()?; -// if let Self { -// value: ref mut v, -// iterator: Some(ref mut it), -// ref filters, -// .. -// } = self { -// // check if we're in a subtree and (not) done. -// if let Some(matcher) = v.subtree() { -// if let Some(res) = matcher.peek() { -// // report any errors -// return res.as_ref().map(|_| true).map_err(|e| e.clone()); -// } -// } -// let kind = v.kind(); -// let mut next_v; -// loop { -// next_v = match it.next() { -// Some(pair) => HolderState::Key(pair), -// None => return Ok(false) -// }; -// for filter in filters { -// filter(&mut next_v)?; -// if next_v.is_empty() { -// break; -// } -// } -// if next_v.has_value() { -// break; -// } -// } -// assert!(next_v.has_value()); -// assert_eq!(next_v.kind(), kind); -// *v = next_v; -// Ok(true) -// } else { -// unreachable!() -// } -// } -// -// /// Ensure `self.iterator.is_some()`, creating an iterator if necessary. -// fn ensure_iterator(&mut self) -> Result<(), MatchError> { -// if self.iterator.is_none() { -// let iter = T::pairs(self.parent.unwrap()); -// if iter.is_none() { -// return Err(MatchError::UnsupportedOperation); -// } -// self.iterator = iter; -// } -// assert!(self.iterator.is_some()); -// Ok(()) -// } -//} -// -//impl<'a, 'b, T: PatternTypes> Default for Holder<'a, 'b, T> { -// fn default() -> Self { -// Self { -// name: Default::default(), -// value: HolderState::EmptyKey, -// parent: Default::default(), -// iterator: Default::default(), -// filters: Default::default(), -// } -// } -//} -// -//pub struct Matcher<'a, 'b, T: PatternTypes> { -// defs: &'a PatternConstants, -// frame: Frame<'a, 'b, T>, -//} -// -//// TODO: -//// -//// [x] Arrow -//// [x] StringKey -//// [x] RegexKey -//// [x] KeySubtree -//// [x] ValueSubtree -//// [x] Ident -//// [x] Param (untested) -//// [x] ApplyPredicate -//// [x] End -// -///// Helper for `PatternElement::StringKey`. -//fn on_string_key<'a, 'b, T: PatternTypes>( -// matcher: &mut Matcher<'a, 'b, T>, -// id: usize, -// skippable: bool, -//) -> Result { -// let path = matcher.frame.path.last_mut().unwrap(); -// assert!(path.iterator.is_none()); -// let key = &matcher.defs.strings[id]; -// let iter = T::get(path.parent.unwrap(), RefOwn::Str(key)); -// match iter { -// Some(None) if !skippable => Err(MatchError::ValidationError), -// Some(opt) => { -// path.iterator = Some(Box::new(opt.into_iter())); -// Ok(true) -// } -// None => Err(MatchError::UnsupportedOperation), -// } -//} -// -///// Helper for `PatternElement::ParameterKey`. -//fn on_parameter_key<'a, 'b, T: PatternTypes>( -// matcher: &mut Matcher<'a, 'b, T>, -// id: usize, -// skippable: bool, -//) -> Result { -// let path = matcher.frame.path.last_mut().unwrap(); -// assert!(path.iterator.is_none()); -// let key = matcher.defs.defs[id]; -// let iter = T::get(path.parent.unwrap(), RefOwn::Own(key)); -// match iter { -// Some(None) if !skippable => Err(MatchError::ValidationError), -// Some(opt) => { -// path.iterator = Some(Box::new(opt.into_iter())); -// Ok(true) -// } -// None => Err(MatchError::UnsupportedOperation), -// } -//} -// -///// Helper for `PatternElement::RegexKey`. -//fn on_regex_key<'a, 'b, T: PatternTypes>( -// matcher: &mut Matcher<'a, 'b, T>, -// id: usize, -// skippable: bool, -//) -> Result { -// matcher.frame.path.last_mut().unwrap().ensure_iterator()?; -// let re = &matcher.defs.regices[id]; -// let path = matcher.frame.path.last_mut().unwrap(); -// path.filters.push(Box::new(move |value| { -// let s = T::as_str(value.key().unwrap()); -// match (s.map_or(false, |s| re.is_match(s)), skippable) { -// (true, _) => Ok(()), -// (false, true) => { -// value.clear(); -// Ok(()) -// }, -// (false, false) => Err(MatchError::ValidationError), -// } -// })); -// Ok(true) -//} -// -///// Helper for `PatternElement::KeySubtree`. -//fn on_key_subtree<'a, 'b, T: PatternTypes>( -// matcher: &mut Matcher<'a, 'b, T>, -// id: usize, -// skippable: bool, -//) -> Result { -// let _ = skippable; // FIXME what should a skippable KeySubtree even do?! -// matcher.frame.path.last_mut().unwrap().ensure_iterator()?; -// let defs = matcher.defs; -// let rlimit: usize = matcher.frame.depth; -// let path = matcher.frame.path.last_mut().unwrap(); -// assert!(path.value.is_empty()); -// assert_eq!(path.value.kind(), HolderKind::Key); -// path.value = HolderState::EmptyKeySubtree; -// path.filters.push(Box::new(move |value| { -// let key = value.key().unwrap(); -// let mut subtree = Matcher::new(key, defs, id, rlimit)?.peekable(); -// match subtree.peek() { -// Some(&Ok(_)) => { -// *value = HolderState::KeySubtree(subtree, value.pair().unwrap()); -// Ok(()) -// }, -// Some(&Err(ref e)) => { -// Err(e.clone()) -// }, -// None => { -// value.clear(); -// Ok(()) -// } -// } -// })); -// Ok(true) -//} -// -//const DUMMY_OPS: &'static [PatternElement] = &[]; -// -//impl<'a, 'b, T: PatternTypes> Matcher<'a, 'b, T> { -// pub(crate) fn new(obj: RefOwn<'b, T::Ref, T::Own>, defs: &'a PatternConstants, proto: usize, rlimit: usize) -> Result { -// let ops: &[_] = &defs.protos[proto]; -// Self::with_ops(obj, defs, ops, rlimit) -// } -// -// /// Constructs a Matcher that yields a single dummy result. -// fn with_ops(obj: RefOwn<'b, T::Ref, T::Own>, defs: &'a PatternConstants, ops: &'a [PatternElement], rlimit: usize) -> Result { -// let depth = rlimit.checked_sub(1).ok_or(MatchError::StackOverflow)?; -// Ok(Self { -// defs: defs, -// frame: Frame { -// //obj: obj, -// ops: ops, -// iar: None, -// depth: depth, -// path: { -// let mut holder = Holder::default(); -// holder.value = HolderState::Value(obj); -// holder.iterator = Some(Box::new(std::iter::empty())); -// vec![holder] -// }, -// in_key: false, -// }, -// }) -// } -// -// fn on_in_key(&mut self) -> Result { -// match self.frame.op() { -// PatternElement::End => { -// let path = self.frame.path.last_mut().unwrap(); -// if path.next()? { -// Ok(false) -// } else { -// drop(path); -// self.frame.path.pop().unwrap(); -// // stop at previous End, or start of frame -// while self.frame.prev() { -// if matches!(self.frame.op(), PatternElement::End) { -// break; -// } -// } -// // is start of frame? -// if !self.frame.prev() { -// self.frame.path.clear(); -// } -// Ok(true) -// } -// }, -// PatternElement::ApplyPredicate(id, skippable) => { -// // failing on T::get() is already handled, but we may need a -// // T::pairs(). construct it here. -// self.frame.path.last_mut().unwrap().ensure_iterator()?; -// let pred = &self.defs.predicates[id]; -// let path = self.frame.path.last_mut().unwrap(); -// path.filters.push(Box::new(move |value| { -// match (pred(value.value().unwrap()), skippable) { -// (true, _) => Ok(()), -// (false, true) => { -// value.clear(); -// Ok(()) -// }, -// (false, false) => Err(MatchError::ValidationError), -// } -// })); -// Ok(true) -// }, -// PatternElement::StringKey(id, skippable) => { -// on_string_key(self, id, skippable) -// }, -// PatternElement::ParameterKey(id, skippable) => { -// on_parameter_key(self, id, skippable) -// }, -// PatternElement::RegexKey(id, skippable) => { -// on_regex_key(self, id, skippable) -// }, -// PatternElement::KeySubtree(id, skippable) => { -// on_key_subtree(self, id, skippable) -// }, -// _ => unreachable!("on_in_key") -// } -// } -// -// fn on_not_in_key(&mut self) -> Result { -// match self.frame.op() { -// PatternElement::Arrow => { -// // this *should* always pass. -// assert!(self.frame.path.last().unwrap().iterator.is_some()); -// let mut holder = Holder::default(); -// holder.parent = self.frame.path.last().unwrap().value.value(); -// assert!(holder.parent.is_some()); -// self.frame.path.push(holder); -// Ok(false) -// }, -// PatternElement::Identifier(id) => { -// let name = self.defs.strings.get(id).map(|s| &**s); -// let path = self.frame.path.last_mut().unwrap(); -// path.name = name; -// assert!(path.iterator.is_none()); -// // we don't actually create the iterator here, -// // as we may still wanna use T::get() instead. -// Ok(true) -// }, -// PatternElement::ApplyPredicate(id, skippable) => { -// assert!(self.frame.path.len() == 1); -// let pred = &self.defs.predicates[id]; -// let value = self.frame.path.last().unwrap().value.value(); -// match (pred(value.unwrap()), skippable) { -// (true, _) => Ok(false), -// (false, true) => { -// self.frame.path.clear(); -// // any Ok(_) will do -// Ok(false) -// }, -// (false, false) => Err(MatchError::ValidationError), -// } -// }, -// PatternElement::StringKey(id, skippable) => { -// on_string_key(self, id, skippable) -// }, -// PatternElement::ParameterKey(id, skippable) => { -// on_parameter_key(self, id, skippable) -// }, -// PatternElement::RegexKey(id, skippable) => { -// on_regex_key(self, id, skippable) -// }, -// PatternElement::KeySubtree(id, skippable) => { -// on_key_subtree(self, id, skippable) -// }, -// PatternElement::ValueSubtree(id, skippable) => { -// let value = self.frame.path.last().unwrap().value.value().unwrap(); -// let mut subtree = Matcher::new( -// value, -// self.defs, -// id, -// self.frame.depth -// )?.peekable(); -// let mut dummy = Matcher::with_ops( -// value, -// self.defs, -// DUMMY_OPS, -// self.frame.depth -// )?.peekable(); -// // may panic. -// let peeked = subtree.peek(); -// // shouldn't panic. -// let _ = dummy.peek(); -// // push Holder after peek. -// self.frame.path.push(Holder::default()); -// let mut holder = self.frame.path.last_mut().unwrap(); -// holder.parent = Some(value); -// holder.iterator = Some(Box::new(std::iter::empty())); -// match peeked { -// None if skippable => { -// holder.value = HolderState::ValueSubtree(dummy, value); -// Ok(true) -// }, -// Some(&Ok(_)) | None => { -// drop(peeked); -// holder.value = HolderState::ValueSubtree(subtree, value); -// Ok(true) -// }, -// Some(&Err(ref e)) => { -// Err(e.clone()) -// }, -// } -// }, -// _ => unreachable!("on_not_in_key") -// } -// } -// -// fn collect_results(&mut self) -> Matches<'a, 'b, T> { -// let mut res: Matches<'a, 'b, T> = Default::default(); -// for holder in &mut self.frame.path { -// // make sure it's not empty. -// assert!(holder.value.has_value()); -// // handle subtrees. -// if let Some(matcher) = holder.value.subtree() { -// if let Some(matches) = matcher.next() { -// // NOTE: we have checked these already. -// // (and if we haven't, that's a bug.) -// res.extend(matches.unwrap()); -// } -// } -// // handle pairs. -// if let Some(pair) = holder.value.pair() { -// if let Some(name) = holder.name { -// res.insert(name, pair); -// } -// } -// } -// res -// } -// -// fn on_end(&mut self) -> (bool, Matches<'a, 'b, T>) { -// match self.frame.op() { -// PatternElement::End => { -// assert!(!self.frame.path.last().expect("path").value.is_empty()); -// let res = self.collect_results(); -// if !self.frame.prev() { -// // NOTE: frame.prev() must always be called, even if this -// // gets replaced with debug_assert!() in the future. -// assert!(false, "frame.prev()"); -// } -// (true, res) -// } -// PatternElement::ApplyPredicate {..} => { -// assert!(!self.frame.in_key); -// let res = self.collect_results(); -// self.frame.path.clear(); -// (false, res) -// } -// _ => unreachable!("on_end") -// } -// } -//} -// -//impl<'a, 'b, T: PatternTypes> Iterator for Matcher<'a, 'b, T> { -// type Item = Result>, MatchError>; -// -// fn next(&mut self) -> Option { -// if self.frame.ops.is_empty() { -// if !self.frame.path.is_empty() { -// self.frame.path.clear(); -// return Some(Ok(Default::default())); -// } -// } -// while !self.frame.path.is_empty() { -// if !self.frame.next() { -// let (in_key, res) = self.on_end(); -// self.frame.in_key = in_key; -// return Some(Ok(res)); -// } else { -// let in_key = if self.frame.in_key { -// self.on_in_key() -// } else { -// self.on_not_in_key() -// }; -// match in_key { -// Ok(in_key) => self.frame.in_key = in_key, -// Err(e) => { -// self.frame.path.clear(); -// return Some(Err(e)) -// }, -// } -// } -// } -// None -// } -//} diff --git a/src/vm/de.rs b/src/vm/de.rs index 7039ea9..4d0d097 100644 --- a/src/vm/de.rs +++ b/src/vm/de.rs @@ -1,6 +1,139 @@ // Copyright (C) 2022 Soni L. // SPDX-License-Identifier: MIT OR Apache-2.0 -use crate::vm; +//! Deserialization-related parts of the VM. +use serde::Serialize; +use serde::de::Error as _; +use super::PatternConstants; +use super::PatternElement; +use super::Pack; + +/// A `DeserializeSeed` for Datafu input. +/// +/// This converts from Serde to Datafu's internal representation (a "pack"). +pub struct Packer<'pat, O: Serialize> { + /// The pattern currently being processed. + pat: &'pat PatternConstants, + /// The instructions/function currently being processed. + ops: &'pat [PatternElement], + /// Maximum number of calls. + call_limit: usize, +} + +impl<'pat, O: Serialize> Packer<'pat, O> { + pub(crate) fn new( + pat: &'pat PatternConstants, + call_limit: usize, + ) -> Self { + Self { + pat, call_limit, ops: &pat.protos.last().unwrap()[..], + } + } +} + +impl<'pat, 'de, O> serde::de::DeserializeSeed<'de> for Packer<'pat, O> +where + O: Serialize, +{ + type Value = Pack; + fn deserialize(self, deserializer: D) -> Result + where + D: serde::Deserializer<'de> + { + // check the first op + let first = self.ops.first(); + match first { + Some(PatternElement::ApplyPredicate(id, skippable)) => { + let predicate = &self.pat.predicates[*id]; + let ok = predicate(todo!()); + match (ok, skippable) { + (true, _) => { + todo!() + }, + (false, false) => { + return Err(D::Error::custom("predicate didn't match")); + }, + (false, true) => { + todo!() + }, + } + }, + _ => { + dbg!(first); + todo!() + }, + } + } +} + +/// A `Deserializer` for Datafu output. +/// +/// This converts from Datafu's internal representation (a "pack") into the +/// desired output type. +pub struct Unpacker { + pack: Pack, + call_limit: usize, +} + +impl Unpacker { + /// Unpacks a Datafu "pack". + pub fn new(pack: Pack, call_limit: usize) -> Self { + Self { + pack, call_limit, + } + } +} + +impl<'de> serde::Deserializer<'de> for Unpacker { + // TODO datafu errors + type Error = serde::de::value::Error; + fn deserialize_any(self, _: V) -> Result where V: serde::de::Visitor<'de> { todo!() } + fn deserialize_bool(self, _: V) -> Result where V: serde::de::Visitor<'de> { todo!() } + fn deserialize_i8(self, _: V) -> Result where V: serde::de::Visitor<'de> { todo!() } + fn deserialize_i16(self, _: V) -> Result where V: serde::de::Visitor<'de> { todo!() } + fn deserialize_i32(self, _: V) -> Result where V: serde::de::Visitor<'de> { todo!() } + fn deserialize_i64(self, _: V) -> Result where V: serde::de::Visitor<'de> { todo!() } + fn deserialize_u8(self, _: V) -> Result where V: serde::de::Visitor<'de> { todo!() } + fn deserialize_u16(self, _: V) -> Result where V: serde::de::Visitor<'de> { todo!() } + fn deserialize_u32(self, _: V) -> Result where V: serde::de::Visitor<'de> { todo!() } + fn deserialize_u64(self, _: V) -> Result where V: serde::de::Visitor<'de> { todo!() } + fn deserialize_f32(self, _: V) -> Result where V: serde::de::Visitor<'de> { todo!() } + fn deserialize_f64(self, _: V) -> Result where V: serde::de::Visitor<'de> { todo!() } + fn deserialize_char(self, _: V) -> Result where V: serde::de::Visitor<'de> { todo!() } + fn deserialize_str(self, _: V) -> Result where V: serde::de::Visitor<'de> { todo!() } + fn deserialize_string(self, _: V) -> Result where V: serde::de::Visitor<'de> { todo!() } + fn deserialize_bytes(self, _: V) -> Result where V: serde::de::Visitor<'de> { todo!() } + fn deserialize_byte_buf(self, _: V) -> Result where V: serde::de::Visitor<'de> { todo!() } + fn deserialize_option(self, _: V) -> Result where V: serde::de::Visitor<'de> { todo!() } + fn deserialize_unit(self, _: V) -> Result where V: serde::de::Visitor<'de> { todo!() } + fn deserialize_unit_struct(self, _: &'static str, _: V) -> Result where V: serde::de::Visitor<'de> { todo!() } + fn deserialize_newtype_struct(self, _: &'static str, _: V) -> Result where V: serde::de::Visitor<'de> { todo!() } + fn deserialize_seq(self, _: V) -> Result where V: serde::de::Visitor<'de> { todo!() } + fn deserialize_tuple(self, _: usize, _: V) -> Result where V: serde::de::Visitor<'de> { todo!() } + fn deserialize_tuple_struct(self, _: &'static str, _: usize, _: V) -> Result where V: serde::de::Visitor<'de> { todo!() } + fn deserialize_map(self, _: V) -> Result where V: serde::de::Visitor<'de> { todo!() } + fn deserialize_struct( + self, + _: &'static str, + fields: &'static [&'static str], + visitor: V, + ) -> Result + where + V: serde::de::Visitor<'de>, + { + todo!() + } + fn deserialize_enum(self, _: &'static str, _: &'static [&'static str], _: V) -> Result where V: serde::de::Visitor<'de> { todo!() } + fn deserialize_identifier(self, _: V) -> Result where V: serde::de::Visitor<'de> { todo!() } + fn deserialize_ignored_any(self, _: V) -> Result where V: serde::de::Visitor<'de> { todo!() } +} + +/// A Deserializer for collecting matches from [`crate::Predicate`]s. +/// +/// What are we doing? +/// +/// We certainly have regrets. +pub struct PredicateCollector { +} diff --git a/src/vm/mod.rs b/src/vm/mod.rs new file mode 100644 index 0000000..92a99d7 --- /dev/null +++ b/src/vm/mod.rs @@ -0,0 +1,765 @@ +// Copyright (C) 2021-2022 Soni L. +// SPDX-License-Identifier: MIT OR Apache-2.0 + +//! The Datafu Virtual Machine. +//! +//! This is the stuff that actually matches the pattern. + +use regex::Regex; +use serde::Serialize; + +use crate::Predicate; +//use crate::errors::MatchError; + +mod de; + +pub use de::Unpacker; +pub use de::Packer; + +/// Max depth for VM/serde recursion. +pub(crate) const MAX_CALLS: usize = 250; + +//type Matches<'a, 'b, T> = BTreeMap<&'a str, KVPair<'b, T>>; + +// maybe we should use a builder for this? +/// The constant pool for a pattern. +pub(crate) struct PatternConstants { + // last proto is implicitly the whole pattern. + pub(crate) protos: Vec>, + // Note that we can borrow these when creating the output map. + // https://play.rust-lang.org/?version=stable&mode=debug&edition=2018&gist=da26f9175e96273fa0b94971a4e6172f + pub(crate) strings: Vec, + pub(crate) regices: Vec, + pub(crate) predicates: Vec>, + // NOTE these are part of the constant pool and so have lifetime analogous + // to 'a (consistently used to indicate constant pool lifetime) when used + // elsewhere. In particular, they can't be yielded by the iterator. + pub(crate) defs: Vec, +} + +impl Default for PatternConstants { + fn default() -> Self { + Self { + protos: Default::default(), + strings: Default::default(), + regices: Default::default(), + predicates: Default::default(), + defs: Default::default(), + } + } +} + +impl std::fmt::Debug for PatternConstants { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.debug_struct( + "PatternConstants" + ).field( + "protos", &self.protos, + ).field( + "strings", &self.strings, + ).field( + "regices", &self.regices, + ).field( + "predicates", + &format_args!("({} predicates)", self.predicates.len()), + ).field( + "defs", + &format_args!("FIXME"), + ).finish() + } +} + +/// A pattern element. +#[derive(Copy, Clone, Debug)] +pub(crate) enum PatternElement { + Arrow, + + Identifier(usize), + + StringKey(usize, bool), + RegexKey(usize, bool), + ParameterKey(usize, bool), + KeySubtree(usize, bool), + ValueSubtree(usize, bool), + + /// Represents a predicate which must be applied. + /// + /// These are custom, arbitrary predicates, powered by serde. They're + /// represented by `:$foo` in a pattern. + ApplyPredicate( + /// The predicate index (in `PatternConstants.predicates`). + usize, + /// Whether to skip non-matching values, instead of erroring. + bool, + ), + + /// Represents a type expectation. + /// + /// These are similar to predicates. They're represented by `:foo`, but are + /// built-in and provide functionality not supported by predicates. + /// + /// Specifically, predicates cannot ask serde for a map or a list directly. + /// Instead, they'd be required to parse a whole map/list/etc, which could + /// cause issues which datafu is designed to avoid. (Datafu is designed to + /// resist malicious input more so than arbitrary serde deserializers.) + Type( + /// The expected type. + Type, + /// Whether to skip non-matching values, instead of erroring. + bool, + ), + + End, +} + +/// The types datafu and serde currently support. +/// +/// These are used as expectations for serde (e.g. +/// `Deserializer::deserialize_string`). +#[derive(Copy, Clone, Debug)] +pub(crate) enum Type { + Bool, + I8, + I16, + I32, + I64, + I128, + U8, + U16, + U32, + U64, + U128, + F32, + F64, + Char, + Str, + String, + Bytes, + ByteBuf, + Option, + Unit, + Seq, + Tuple(usize), + Map, + // these aren't really supported: + // UnitStruct, UnitVariant, NewtypeStruct, NewtypeVariant, TupleStruct, + // TupleVariant, Struct, StructVariant + // instead we use type trees for that. + /// Adapter for Type Trees. See `crate::type_tree` for more details. + Of { + /// The type tree index (in `PatternConstants.type_trees`). + type_tree: usize, + }, +} + +pub struct Pack; + +//struct Frame<'a, 'b, T: PatternTypes> { +// //obj: RefOwn<'b, T::Ref, T::Own>, +// ops: &'a [PatternElement], +// iar: Option, +// depth: usize, +// path: Vec>, +// in_key: bool, +//} +// +//impl<'a, 'b, T: PatternTypes> Frame<'a, 'b, T> { +// /// Advances the instruction address register. +// /// +// /// # Returns +// /// +// /// `true` if successful, `false` otherwise. +// fn next(&mut self) -> bool { +// let new = self.iar.map_or(0, |v| v + 1); +// new < self.ops.len() && { +// self.iar = Some(new); +// true +// } +// } +// +// /// Returns the current instruction. +// fn op(&self) -> PatternElement { +// self.ops[self.iar.expect("ops[iar]")] +// } +// +// /// Rewinds the instruction address register. +// /// +// /// # Returns +// /// +// /// `true` if successful, `false` otherwise. +// fn prev(&mut self) -> bool { +// let new = self.iar.expect("iar").checked_sub(1); +// new.is_some() && { +// self.iar = new; +// true +// } +// } +//} +// +///// Stores a single match. +///// +///// See also Holder. +//enum HolderState<'a, 'b, T: PatternTypes> { +// /// Empty holder, for a key-value pair. +// EmptyKey, +// /// Empty holder, for a Matcher and a key-value pair. +// EmptyKeySubtree, +// // /// Empty holder, for a Matcher and a value. +// // EmptyValueSubtree, +// /// Occupied holder, for a key-value pair.. +// Key(KVPair<'b, T>), +// /// Occupied holder, for a Matcher and a key-value pair. +// KeySubtree(Peekable>, KVPair<'b, T>), +// /// Occupied holder, for a Matcher and a value. The empty variant is +// /// omitted as it would never be used otherwise. +// ValueSubtree(Peekable>, RefOwn<'b, T::Ref, T::Own>), +// /// Occupied holder, for a value. The empty variant is omitted as it would +// /// never be used otherwise. +// Value(RefOwn<'b, T::Ref, T::Own>), +//} +// +///// Helper enum for HolderState. +//#[derive(Copy, Clone, Debug, Eq, PartialEq)] +//enum HolderKind { +// Key, +// KeySubtree, +// ValueSubtree, +// Value +//} +// +////impl<'a, 'b, T: PatternTypes> Clone for HolderState<'a, 'b, T> { +//// fn clone(&self) -> Self { +//// match self { +//// HolderState::EmptyKey => HolderState::EmptyKey, +//// HolderState::EmptySubtree => HolderState::EmptySubtree, +//// HolderState::Key(v) => HolderState::Key(*v), +//// HolderState::KeySubtree(m, v) => HolderState::KeySubtree(m.clone(), *v), +//// HolderState::ValueSubtree(m, v) => HolderState::ValueSubtree(m.clone(), *v), +//// HolderState::Value(v) => HolderState::Value(*v), +//// } +//// } +////} +// +//impl<'a, 'b, T: PatternTypes> HolderState<'a, 'b, T> { +// #[rustfmt::skip] +// fn is_empty(&self) -> bool { +// match self { +// | HolderState::EmptyKey +// | HolderState::EmptyKeySubtree +// //| HolderState::EmptyValueSubtree +// => true, _ => false +// } +// } +// +// fn has_value(&self) -> bool { +// !self.is_empty() +// } +// +// fn kind(&self) -> HolderKind { +// match self { +// | HolderState::EmptyKey +// | HolderState::Key(_) +// => HolderKind::Key, +// | HolderState::EmptyKeySubtree +// | HolderState::KeySubtree(_, _) +// => HolderKind::KeySubtree, +// //| HolderState::EmptyValueSubtree +// | HolderState::ValueSubtree(_, _) +// => HolderKind::ValueSubtree, +// | HolderState::Value(_) +// => HolderKind::Value, +// } +// } +// +// fn value(&self) -> Option> { +// match *self { +// HolderState::Key((_, value)) => Some(value), +// HolderState::KeySubtree(_, (_, value)) => Some(value), +// HolderState::ValueSubtree(_, value) => Some(value), +// HolderState::Value(value) => Some(value), +// _ => None +// } +// } +// +// fn key(&self) -> Option> { +// match *self { +// HolderState::Key((key, _)) => Some(key), +// HolderState::KeySubtree(_, (key, _)) => Some(key), +// _ => None +// } +// } +// +// fn pair(&self) -> Option> { +// match *self { +// HolderState::Key(pair) => Some(pair), +// HolderState::KeySubtree(_, pair) => Some(pair), +// _ => None +// } +// } +// +// fn subtree(&mut self) -> Option<&mut Peekable>> { +// match *self { +// HolderState::KeySubtree(ref mut subtree, _) => Some(subtree), +// HolderState::ValueSubtree(ref mut subtree, _) => Some(subtree), +// _ => None +// } +// } +// +// fn clear(&mut self) { +// *self = match self.kind() { +// HolderKind::Key => HolderState::EmptyKey, +// HolderKind::KeySubtree => HolderState::EmptyKeySubtree, +// HolderKind::ValueSubtree => unreachable!(), //HolderState::EmptyValueSubtree, +// HolderKind::Value => unreachable!(), +// }; +// assert!(self.is_empty()); +// } +//} +// +///// Stores a single match and associated metadata. +///// +///// A single match is generally a key-value pair, but may be a collection of +///// named pairs in the case of subtree matches, or just a value for the initial +///// holder. +//struct Holder<'a, 'b, T: PatternTypes> { +// name: Option<&'a str>, +// value: HolderState<'a, 'b, T>, +// parent: Option>, +// iterator: Option> + 'b>>, +// filters: Vec Fn(&'c mut HolderState<'a, 'b, T>) -> Result<(), MatchError>) + 'a>>, +//} +// +//impl<'a, 'b, T: PatternTypes> Holder<'a, 'b, T> { +// fn next(&mut self) -> Result { +// self.ensure_iterator()?; +// if let Self { +// value: ref mut v, +// iterator: Some(ref mut it), +// ref filters, +// .. +// } = self { +// // check if we're in a subtree and (not) done. +// if let Some(matcher) = v.subtree() { +// if let Some(res) = matcher.peek() { +// // report any errors +// return res.as_ref().map(|_| true).map_err(|e| e.clone()); +// } +// } +// let kind = v.kind(); +// let mut next_v; +// loop { +// next_v = match it.next() { +// Some(pair) => HolderState::Key(pair), +// None => return Ok(false) +// }; +// for filter in filters { +// filter(&mut next_v)?; +// if next_v.is_empty() { +// break; +// } +// } +// if next_v.has_value() { +// break; +// } +// } +// assert!(next_v.has_value()); +// assert_eq!(next_v.kind(), kind); +// *v = next_v; +// Ok(true) +// } else { +// unreachable!() +// } +// } +// +// /// Ensure `self.iterator.is_some()`, creating an iterator if necessary. +// fn ensure_iterator(&mut self) -> Result<(), MatchError> { +// if self.iterator.is_none() { +// let iter = T::pairs(self.parent.unwrap()); +// if iter.is_none() { +// return Err(MatchError::UnsupportedOperation); +// } +// self.iterator = iter; +// } +// assert!(self.iterator.is_some()); +// Ok(()) +// } +//} +// +//impl<'a, 'b, T: PatternTypes> Default for Holder<'a, 'b, T> { +// fn default() -> Self { +// Self { +// name: Default::default(), +// value: HolderState::EmptyKey, +// parent: Default::default(), +// iterator: Default::default(), +// filters: Default::default(), +// } +// } +//} +// +//pub struct Matcher<'a, 'b, T: PatternTypes> { +// defs: &'a PatternConstants, +// frame: Frame<'a, 'b, T>, +//} +// +//// TODO: +//// +//// [x] Arrow +//// [x] StringKey +//// [x] RegexKey +//// [x] KeySubtree +//// [x] ValueSubtree +//// [x] Ident +//// [x] Param (untested) +//// [x] ApplyPredicate +//// [x] End +// +///// Helper for `PatternElement::StringKey`. +//fn on_string_key<'a, 'b, T: PatternTypes>( +// matcher: &mut Matcher<'a, 'b, T>, +// id: usize, +// skippable: bool, +//) -> Result { +// let path = matcher.frame.path.last_mut().unwrap(); +// assert!(path.iterator.is_none()); +// let key = &matcher.defs.strings[id]; +// let iter = T::get(path.parent.unwrap(), RefOwn::Str(key)); +// match iter { +// Some(None) if !skippable => Err(MatchError::ValidationError), +// Some(opt) => { +// path.iterator = Some(Box::new(opt.into_iter())); +// Ok(true) +// } +// None => Err(MatchError::UnsupportedOperation), +// } +//} +// +///// Helper for `PatternElement::ParameterKey`. +//fn on_parameter_key<'a, 'b, T: PatternTypes>( +// matcher: &mut Matcher<'a, 'b, T>, +// id: usize, +// skippable: bool, +//) -> Result { +// let path = matcher.frame.path.last_mut().unwrap(); +// assert!(path.iterator.is_none()); +// let key = matcher.defs.defs[id]; +// let iter = T::get(path.parent.unwrap(), RefOwn::Own(key)); +// match iter { +// Some(None) if !skippable => Err(MatchError::ValidationError), +// Some(opt) => { +// path.iterator = Some(Box::new(opt.into_iter())); +// Ok(true) +// } +// None => Err(MatchError::UnsupportedOperation), +// } +//} +// +///// Helper for `PatternElement::RegexKey`. +//fn on_regex_key<'a, 'b, T: PatternTypes>( +// matcher: &mut Matcher<'a, 'b, T>, +// id: usize, +// skippable: bool, +//) -> Result { +// matcher.frame.path.last_mut().unwrap().ensure_iterator()?; +// let re = &matcher.defs.regices[id]; +// let path = matcher.frame.path.last_mut().unwrap(); +// path.filters.push(Box::new(move |value| { +// let s = T::as_str(value.key().unwrap()); +// match (s.map_or(false, |s| re.is_match(s)), skippable) { +// (true, _) => Ok(()), +// (false, true) => { +// value.clear(); +// Ok(()) +// }, +// (false, false) => Err(MatchError::ValidationError), +// } +// })); +// Ok(true) +//} +// +///// Helper for `PatternElement::KeySubtree`. +//fn on_key_subtree<'a, 'b, T: PatternTypes>( +// matcher: &mut Matcher<'a, 'b, T>, +// id: usize, +// skippable: bool, +//) -> Result { +// let _ = skippable; // FIXME what should a skippable KeySubtree even do?! +// matcher.frame.path.last_mut().unwrap().ensure_iterator()?; +// let defs = matcher.defs; +// let rlimit: usize = matcher.frame.depth; +// let path = matcher.frame.path.last_mut().unwrap(); +// assert!(path.value.is_empty()); +// assert_eq!(path.value.kind(), HolderKind::Key); +// path.value = HolderState::EmptyKeySubtree; +// path.filters.push(Box::new(move |value| { +// let key = value.key().unwrap(); +// let mut subtree = Matcher::new(key, defs, id, rlimit)?.peekable(); +// match subtree.peek() { +// Some(&Ok(_)) => { +// *value = HolderState::KeySubtree(subtree, value.pair().unwrap()); +// Ok(()) +// }, +// Some(&Err(ref e)) => { +// Err(e.clone()) +// }, +// None => { +// value.clear(); +// Ok(()) +// } +// } +// })); +// Ok(true) +//} +// +//const DUMMY_OPS: &'static [PatternElement] = &[]; +// +//impl<'a, 'b, T: PatternTypes> Matcher<'a, 'b, T> { +// pub(crate) fn new(obj: RefOwn<'b, T::Ref, T::Own>, defs: &'a PatternConstants, proto: usize, rlimit: usize) -> Result { +// let ops: &[_] = &defs.protos[proto]; +// Self::with_ops(obj, defs, ops, rlimit) +// } +// +// /// Constructs a Matcher that yields a single dummy result. +// fn with_ops(obj: RefOwn<'b, T::Ref, T::Own>, defs: &'a PatternConstants, ops: &'a [PatternElement], rlimit: usize) -> Result { +// let depth = rlimit.checked_sub(1).ok_or(MatchError::StackOverflow)?; +// Ok(Self { +// defs: defs, +// frame: Frame { +// //obj: obj, +// ops: ops, +// iar: None, +// depth: depth, +// path: { +// let mut holder = Holder::default(); +// holder.value = HolderState::Value(obj); +// holder.iterator = Some(Box::new(std::iter::empty())); +// vec![holder] +// }, +// in_key: false, +// }, +// }) +// } +// +// fn on_in_key(&mut self) -> Result { +// match self.frame.op() { +// PatternElement::End => { +// let path = self.frame.path.last_mut().unwrap(); +// if path.next()? { +// Ok(false) +// } else { +// drop(path); +// self.frame.path.pop().unwrap(); +// // stop at previous End, or start of frame +// while self.frame.prev() { +// if matches!(self.frame.op(), PatternElement::End) { +// break; +// } +// } +// // is start of frame? +// if !self.frame.prev() { +// self.frame.path.clear(); +// } +// Ok(true) +// } +// }, +// PatternElement::ApplyPredicate(id, skippable) => { +// // failing on T::get() is already handled, but we may need a +// // T::pairs(). construct it here. +// self.frame.path.last_mut().unwrap().ensure_iterator()?; +// let pred = &self.defs.predicates[id]; +// let path = self.frame.path.last_mut().unwrap(); +// path.filters.push(Box::new(move |value| { +// match (pred(value.value().unwrap()), skippable) { +// (true, _) => Ok(()), +// (false, true) => { +// value.clear(); +// Ok(()) +// }, +// (false, false) => Err(MatchError::ValidationError), +// } +// })); +// Ok(true) +// }, +// PatternElement::StringKey(id, skippable) => { +// on_string_key(self, id, skippable) +// }, +// PatternElement::ParameterKey(id, skippable) => { +// on_parameter_key(self, id, skippable) +// }, +// PatternElement::RegexKey(id, skippable) => { +// on_regex_key(self, id, skippable) +// }, +// PatternElement::KeySubtree(id, skippable) => { +// on_key_subtree(self, id, skippable) +// }, +// _ => unreachable!("on_in_key") +// } +// } +// +// fn on_not_in_key(&mut self) -> Result { +// match self.frame.op() { +// PatternElement::Arrow => { +// // this *should* always pass. +// assert!(self.frame.path.last().unwrap().iterator.is_some()); +// let mut holder = Holder::default(); +// holder.parent = self.frame.path.last().unwrap().value.value(); +// assert!(holder.parent.is_some()); +// self.frame.path.push(holder); +// Ok(false) +// }, +// PatternElement::Identifier(id) => { +// let name = self.defs.strings.get(id).map(|s| &**s); +// let path = self.frame.path.last_mut().unwrap(); +// path.name = name; +// assert!(path.iterator.is_none()); +// // we don't actually create the iterator here, +// // as we may still wanna use T::get() instead. +// Ok(true) +// }, +// PatternElement::ApplyPredicate(id, skippable) => { +// assert!(self.frame.path.len() == 1); +// let pred = &self.defs.predicates[id]; +// let value = self.frame.path.last().unwrap().value.value(); +// match (pred(value.unwrap()), skippable) { +// (true, _) => Ok(false), +// (false, true) => { +// self.frame.path.clear(); +// // any Ok(_) will do +// Ok(false) +// }, +// (false, false) => Err(MatchError::ValidationError), +// } +// }, +// PatternElement::StringKey(id, skippable) => { +// on_string_key(self, id, skippable) +// }, +// PatternElement::ParameterKey(id, skippable) => { +// on_parameter_key(self, id, skippable) +// }, +// PatternElement::RegexKey(id, skippable) => { +// on_regex_key(self, id, skippable) +// }, +// PatternElement::KeySubtree(id, skippable) => { +// on_key_subtree(self, id, skippable) +// }, +// PatternElement::ValueSubtree(id, skippable) => { +// let value = self.frame.path.last().unwrap().value.value().unwrap(); +// let mut subtree = Matcher::new( +// value, +// self.defs, +// id, +// self.frame.depth +// )?.peekable(); +// let mut dummy = Matcher::with_ops( +// value, +// self.defs, +// DUMMY_OPS, +// self.frame.depth +// )?.peekable(); +// // may panic. +// let peeked = subtree.peek(); +// // shouldn't panic. +// let _ = dummy.peek(); +// // push Holder after peek. +// self.frame.path.push(Holder::default()); +// let mut holder = self.frame.path.last_mut().unwrap(); +// holder.parent = Some(value); +// holder.iterator = Some(Box::new(std::iter::empty())); +// match peeked { +// None if skippable => { +// holder.value = HolderState::ValueSubtree(dummy, value); +// Ok(true) +// }, +// Some(&Ok(_)) | None => { +// drop(peeked); +// holder.value = HolderState::ValueSubtree(subtree, value); +// Ok(true) +// }, +// Some(&Err(ref e)) => { +// Err(e.clone()) +// }, +// } +// }, +// _ => unreachable!("on_not_in_key") +// } +// } +// +// fn collect_results(&mut self) -> Matches<'a, 'b, T> { +// let mut res: Matches<'a, 'b, T> = Default::default(); +// for holder in &mut self.frame.path { +// // make sure it's not empty. +// assert!(holder.value.has_value()); +// // handle subtrees. +// if let Some(matcher) = holder.value.subtree() { +// if let Some(matches) = matcher.next() { +// // NOTE: we have checked these already. +// // (and if we haven't, that's a bug.) +// res.extend(matches.unwrap()); +// } +// } +// // handle pairs. +// if let Some(pair) = holder.value.pair() { +// if let Some(name) = holder.name { +// res.insert(name, pair); +// } +// } +// } +// res +// } +// +// fn on_end(&mut self) -> (bool, Matches<'a, 'b, T>) { +// match self.frame.op() { +// PatternElement::End => { +// assert!(!self.frame.path.last().expect("path").value.is_empty()); +// let res = self.collect_results(); +// if !self.frame.prev() { +// // NOTE: frame.prev() must always be called, even if this +// // gets replaced with debug_assert!() in the future. +// assert!(false, "frame.prev()"); +// } +// (true, res) +// } +// PatternElement::ApplyPredicate {..} => { +// assert!(!self.frame.in_key); +// let res = self.collect_results(); +// self.frame.path.clear(); +// (false, res) +// } +// _ => unreachable!("on_end") +// } +// } +//} +// +//impl<'a, 'b, T: PatternTypes> Iterator for Matcher<'a, 'b, T> { +// type Item = Result>, MatchError>; +// +// fn next(&mut self) -> Option { +// if self.frame.ops.is_empty() { +// if !self.frame.path.is_empty() { +// self.frame.path.clear(); +// return Some(Ok(Default::default())); +// } +// } +// while !self.frame.path.is_empty() { +// if !self.frame.next() { +// let (in_key, res) = self.on_end(); +// self.frame.in_key = in_key; +// return Some(Ok(res)); +// } else { +// let in_key = if self.frame.in_key { +// self.on_in_key() +// } else { +// self.on_not_in_key() +// }; +// match in_key { +// Ok(in_key) => self.frame.in_key = in_key, +// Err(e) => { +// self.frame.path.clear(); +// return Some(Err(e)) +// }, +// } +// } +// } +// None +// } +//} -- cgit 1.4.1