// Copyright (C) 2021, 2022, 2023 Soni L. // SPDX-License-Identifier: MIT OR Apache-2.0 //! The Datafu Virtual Machine. //! //! This is the stuff that actually matches the pattern. use std::borrow::Cow; use std::cell::Cell; use std::cell::RefCell; use std::collections::BTreeMap; use std::collections::VecDeque; use std::marker::PhantomData; use indexmap::IndexMap; use indexmap::IndexSet; use regex::Regex; use serde::Serialize; use crate::Predicate; //use crate::errors::MatchError; mod de; pub(crate) use de::Unpacker; pub(crate) use de::Packer; /// Max depth for VM/serde recursion. pub(crate) const MAX_CALLS: usize = 250; //type Matches<'a, 'b, T> = BTreeMap<&'a str, KVPair<'b, T>>; // maybe we should use a builder for this? /// The constant pool for a pattern. pub(crate) struct PatternConstants { /// The protos ("functions") in a pattern. /// /// The last proto is implicitly the main function/entry point. pub(crate) protos: Vec>, // Note that we can borrow these when creating the output map. // https://play.rust-lang.org/?version=stable&mode=debug&edition=2018&gist=da26f9175e96273fa0b94971a4e6172f pub(crate) strings: IndexSet, pub(crate) regices: Vec, pub(crate) predicates: Vec>, pub(crate) defs: Vec, } impl Default for PatternConstants { fn default() -> Self { Self { protos: Default::default(), strings: Default::default(), regices: Default::default(), predicates: Default::default(), defs: Default::default(), } } } impl std::fmt::Debug for PatternConstants { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { f.debug_struct( "PatternConstants" ).field( "protos", &self.protos, ).field( "strings", &self.strings, ).field( "regices", &self.regices, ).field( "predicates", &format_args!("({} predicates)", self.predicates.len()), ).field( "defs", &self.defs, ).finish() } } /// A datafu pattern element. #[derive(Copy, Clone, Debug)] pub(crate) enum PatternElement { /// A value is the core capturing element. Value { /// The index of the (string) name to apply to this value. name: Option, /// The expected value of this entry. value: Option, }, /// A tag is the core iterative element. It is always followed by a value. /// /// This one is empty. EmptyTag, /// A tag is the core iterative element. It is always followed by a value. Tag { /// The index of the (proto) key to match against. key_subtree: usize, /// Whether to allow this tree subtree to match nothing. /// /// By default, a datafu pattern only matches a tree if every branch of /// the tree matches something. This enables opting out of that. // TODO this isn't currently implemented. optional: bool, }, /// Marks the end of pattern iteration and the start of subtrees (if any). SubtreeMarker, /// A value subtree is a subtree for values. /// /// It is applied *after* tags, and thus any value subtrees come last in /// a pattern's elements. ValueSubtree { /// The proto index of the subtree. index: usize, /// Whether to allow this value subtree to match nothing. /// /// By default, a datafu pattern only matches a tree if every branch of /// the tree matches something. This enables opting out of that. optional: bool, }, } /// A value matcher. #[derive(Copy, Clone, Debug)] pub(crate) enum Value { /// The value must match the specified string. String { /// The index of the string. index: usize, /// Whether to skip non-matching values, instead of erroring. skippable: bool, }, /// The value must match the specified regex. Regex { /// The index of the regex. index: usize, /// Whether to skip non-matching values, instead of erroring. skippable: bool, }, // /// The value must match the specified integer. // Integer { // /// The integer. // value: usize, // /// Whether to skip non-matching values, instead of erroring. // skippable: bool, // }, // /// The value must match the specified integer range. // Range { // /// The range. // value: Range, // /// Whether to skip non-matching values, instead of erroring. // skippable: bool, // }, // /// The value must match the specified predicate. // Predicate { // /// The index of the predicate. // index: usize, // /// Whether to skip non-matching values, instead of erroring. // skippable: bool, // }, // /// The value must match the specified parameter. // Paameter { // /// The index of the parameter. // index: usize, // /// Whether to skip non-matching values, instead of erroring. // skippable: bool, // }, /// The value must have the specified type. Type { /// The expected type. ty: Type, /// Whether to skip non-matching values, instead of erroring. skippable: bool, }, } /// A pattern token. // TODO docs #[derive(Copy, Clone, Debug)] pub(crate) enum PatternToken { /// Start of a tag. Arrow, Identifier(usize), String(usize, bool), Regex(usize, bool), Parameter(usize, bool), KeySubtree(usize, bool), ValueSubtree(usize, bool), /// Represents a predicate which must be applied. /// /// These are custom, arbitrary predicates, powered by serde. They're /// represented by `:$foo` in a pattern. ApplyPredicate( /// The predicate index (in `PatternConstants.predicates`). usize, /// Whether to skip non-matching values, instead of erroring. bool, ), /// Represents a type expectation. /// /// These are similar to predicates. They're represented by `:foo`, but are /// built-in and provide functionality not supported by predicates. /// /// Specifically, predicates cannot ask serde for a map or a list directly. /// Instead, they'd be required to parse a whole map/list/etc, which could /// cause issues which datafu is designed to avoid. (Datafu is designed to /// resist malicious input more so than arbitrary serde deserializers.) Type( /// The expected type. Type, /// Whether to skip non-matching values, instead of erroring. bool, ), /// End of a tag. End, } /// The types datafu and serde currently support. /// /// These are used as expectations for serde (e.g. /// `Deserializer::deserialize_string`). #[derive(Copy, Clone, Debug, Eq, PartialEq)] pub(crate) enum Type { Any, IgnoredAny, Bool, I8, I16, I32, I64, I128, U8, U16, U32, U64, U128, F32, F64, Char, Str, String, Bytes, ByteBuf, Option, Unit, Seq, Tuple(usize), Map, UnitStruct(&'static str), NewtypeStruct(&'static str), TupleStruct { name: &'static str, len: usize, }, Identifier, Struct { name: &'static str, fields: &'static [&'static str], }, Enum { name: &'static str, variants: &'static [&'static str], }, } /// The types which can be deserialized by serde. /// /// We guess this is basically the same thing as a serde_value? #[derive(Clone, Debug, PartialEq)] pub(crate) enum SerdeObject<'de> { Bool(bool), I8(i8), I16(i16), I32(i32), I64(i64), I128(i128), U8(u8), U16(u16), U32(u32), U64(u64), U128(u128), F32(f32), F64(f64), Char(char), Str(Cow<'de, str>), Bytes(Cow<'de, [u8]>), Some(Box>), None, Unit, Seq(Vec>), // NOTE: support for multimaps! Map(Vec<(SerdeObject<'de>, SerdeObject<'de>)>), NewtypeStruct(Box>), Enum { variant: Box>, data: Box>, }, } impl<'de> SerdeObject<'de> { ///// Checks the type of this object. //fn check(&self, ty: Option) -> Result<(), E> { // let ty = match ty { // None => return Ok(()), // Some(ty) => ty, // }; // match (ty, self) { // | (Type::Any, v) // | (Type::IgnoredAny, v) // => Ok(()), // | (Type::Bool, v @ SerdeObject::Bool(_)) // | (Type::I8, v @ SerdeObject::I8(_)) // | (Type::I16, v @ SerdeObject::I16(_)) // | (Type::I32, v @ SerdeObject::I32(_)) // | (Type::I64, v @ SerdeObject::I64(_)) // | (Type::I128, v @ SerdeObject::I128(_)) // | (Type::U8, v @ SerdeObject::U8(_)) // | (Type::U16, v @ SerdeObject::U16(_)) // | (Type::U32, v @ SerdeObject::U32(_)) // | (Type::U64, v @ SerdeObject::U64(_)) // | (Type::U128, v @ SerdeObject::U128(_)) // | (Type::F32, v @ SerdeObject::F32(_)) // | (Type::F64, v @ SerdeObject::F64(_)) // | (Type::Char, v @ SerdeObject::Char(_)) // | (Type::Str, v @ SerdeObject::Str(_)) // | (Type::String, v @ SerdeObject::Str(_)) // | (Type::Bytes, v @ SerdeObject::Bytes(_)) // | (Type::ByteBuf, v @ SerdeObject::Bytes(_)) // | (Type::Option, v @ SerdeObject::None) // | (Type::Option, v @ SerdeObject::Some(_)) // | (Type::Unit, v @ SerdeObject::Unit) // | (Type::Seq, v @ SerdeObject::Seq(_)) // | (Type::Map, v @ SerdeObject::Map(_)) // => Ok(()), // _ => todo!(), // } //} } impl<'de, E> serde::de::IntoDeserializer<'de, E> for SerdeObject<'de> where E: serde::de::Error, { type Deserializer = self::de::SerdeObjectDeserializer<'de, E>; fn into_deserializer(self) -> Self::Deserializer { Self::Deserializer { obj: self, _e: PhantomData, } } } /// Packed serde objects and datafu internal representation. /// /// This is an iterative store of key-value pairs. /// /// It's effectively a tree node. #[derive(Clone, Debug, Default)] pub struct Pack<'pat, 'de> { subpacks: VecDeque<(IndexMap<&'pat str, SerdeObject<'de>>, Pack<'pat, 'de>)>, } impl<'pat, 'de> Pack<'pat, 'de> { /// Adds the elements of the `other` pack to the matching iterarions of the /// current pack. If either pack is empty, pick the non-empty pack. /// /// The current pack will have the same number of iterations, but will /// contain captures from both packs. In case of captures of the same name, /// `other` will override `self`. /// /// # Panics /// /// Panics if the packs have different iteration lengths. fn zip(&mut self, mut other: Self) { match (self.subpacks.len(), other.subpacks.len()) { (0, _) => { *self = other; }, (_, 0) => {}, (a, b) if a == b => { for (l, r) in self.subpacks.iter_mut().zip(other.subpacks) { // note that we can't actually recurse deeper than the VM // actually does itself, so we don't need to worry about // blowing the stack. l.0.extend(r.0); l.1.zip(r.1); } }, _ => unreachable!("zip unbalanced iterations"), } } /// Adds the elements of the `other` pack to all iterations captured by /// this pack, such as to form a cartesian product. If either pack is /// empty, pick the non-empty pack. If both packs have a length of 1, merge /// their captures. /// /// The current pack will contain captures from both packs. In case of /// captures of the same name, `other` will override `self`. fn cartesian_product(&mut self, mut other: Self) { match (self.subpacks.len(), other.subpacks.len()) { (_, 0) => { return; }, (0, _) => { *self = other; return; }, (1, 1) => { let (robjects, rpack) = other.subpacks.pop_back().unwrap(); let (ref mut lobjects, ref mut lpack) = self.subpacks[0]; lobjects.extend(robjects); lpack.cartesian_product(rpack); return; }, (1, _) => { self.subpacks[0].1.cartesian_product(other); return; }, (_, 1) => { // FIXME: need to be careful with this one. // we want `other` to override entries from `self`, so we need // to scan `self` for captures of the same name as those in // `other`, and remove those captures. only then can we swap // `self` and `other` and merge them. // for now we can just do the inefficient thing tho. }, _ => {}, } // FIXME instead of doing this, perhaps we should find the smaller one, // and put clones of the larger one into it? self.subpacks.iter_mut().for_each(|&mut (_, ref mut lpack)| { lpack.cartesian_product(other.clone()) }); } /// Returns the serde object with the given name at the given iteration of /// this pack. #[cfg(test)] fn get_object_at( &self, iter: usize, name: &str, ) -> Option<&SerdeObject<'de>> { self.subpacks.get(iter).map(|x| &x.0).and_then(|x| x.get(name)) } /// Returns the subpack related to the given name at the given iteration of /// this pack. #[cfg(test)] fn get_subpack_at( &self, iter: usize, name: &str, ) -> Option<&Pack<'pat, 'de>> { let _ = name; self.subpacks.get(iter).map(|x| &x.1) } } /// The Datafu interpreter, sorta. #[derive(Debug)] pub(crate) struct Interpreter<'pat, 'state, O: Serialize> { /// The pattern currently being processed. pat: &'pat PatternConstants, /// The error override (if any). error: &'state mut Option, /// The current interpreter frames. frames: &'state mut Vec>, } #[derive(Debug)] pub(crate) struct Frame<'pat> { /// The instructions/function currently being processed. ops: &'pat [PatternElement], /// The instruction index being processed. iar: Option, /// How many steps this frame has not actually advanced for. /// /// This is used at end of frame and on match failure. overstep: usize, /// Whether this frame matches the data so far. matches: bool, /// Whether this frame must not be allowed to match in the key step. poison: bool, } impl<'pat, 'state, O: Serialize> Interpreter<'pat, 'state, O> { pub(crate) fn new( pat: &'pat PatternConstants, error: &'state mut Option, frames: &'state mut Vec>, ) -> Self { debug_assert!(frames.is_empty()); frames.push(Frame { ops: &pat.protos.last().unwrap(), iar: None, overstep: 0, matches: true, poison: false, }); Self { pat: pat, error: error, frames: frames, } } } impl<'pat> Frame<'pat> { /// Gets the type currently associated with this frame. /// /// Returns the type and whether it is required to match. fn get_type( &self, ) -> Option<(Type, bool)> { match self.op() { PatternElement::Value { value: Some(value), .. } => { match value { | Value::String { skippable, .. } | Value::Regex { skippable, .. } => { Some((Type::Str, !skippable)) }, Value::Type { ty, skippable } => { Some((ty, !skippable)) }, } }, | PatternElement::EmptyTag | PatternElement::Tag { .. } => panic!("attempt to get type of tag"), _ => None, } } /// Gets the name currently associated with this frame. fn get_name( &self, pat: &'pat PatternConstants, ) -> Option<&'pat str> { let strings = &pat.strings; match self.op() { PatternElement::Value { name: Some(name), .. } => { Some(&*strings[name]) }, | PatternElement::EmptyTag | PatternElement::Tag { .. } => panic!("attempt to get name of tag"), _ => None, } } /// Advances the instruction address register. /// /// # Returns /// /// `true` if successful, `false` otherwise. fn next(&mut self) -> bool { let new = self.iar.map_or(0, |v| v + 1); new < self.ops.len() && { self.iar = Some(new); true } } /// Returns the current instruction. /// /// # Panics /// /// Panics if called on a non-matching frame or if iteration hasn't begun. fn op(&self) -> PatternElement { assert!(self.active(), "op() called on inactive frame"); self.raw_op() } /// Counts the number of *active* subtrees, if any, and whether any /// subtrees have been unwound. /// /// # Panics /// /// Panics if iteration hasn't begun. fn num_subtrees(&self) -> Option<(usize, bool)> { let iar = self.iar.unwrap(); // check if there are any subtrees matches!( self.ops[iar], | PatternElement::ValueSubtree { .. } | PatternElement::SubtreeMarker ).then(|| { // count the number of subtrees ( self.ops[0..=iar].iter().rev().take_while(|x| { matches!(x, PatternElement::ValueSubtree { .. }) }).count(), self.ops.len() - 1 != iar, ) }) } /// Returns whether this key has a subtree, and if so, its index and /// whether it is optional, as an `(index, optional)` pair. /// /// # Panics /// /// Panics if iteration hasn't begun, or this isn't a key. fn key_subtree(&self) -> Option<(usize, bool)> { match self.op() { PatternElement::Tag { key_subtree, optional } => { Some((key_subtree, optional)) }, PatternElement::EmptyTag => None, _ => unreachable!(), } } /// Returns whether this frame is in a value operation. /// /// # Panics /// /// Panics if the frame isn't active or iteraction hasn't begun. #[inline] fn is_value(&self) -> bool { self.active() && matches!( self.raw_op(), PatternElement::Value { .. }, ) } /// Returns this value subtree, as an `(index, optional)` pair. /// /// # Panics /// /// Panics if iteration hasn't begun, or this isn't a value subtree. fn value_subtree(&self) -> (usize, bool) { if let PatternElement::ValueSubtree { index, optional, } = self.raw_op() { (index, optional) } else { unreachable!() } } /// Returns the raw instruction. /// /// # Panics /// /// Panics if iteration hasn't begun. fn raw_op(&self) -> PatternElement { self.ops[self.iar.expect("ops[iar]")] } /// Returns whether this frame is active (not overstepped). #[inline] fn active(&self) -> bool { self.overstep == 0 } /// Rewinds the instruction address register. /// /// # Returns /// /// `true` if successful, `false` otherwise. fn prev(&mut self) -> bool { let new = self.iar.expect("iar").checked_sub(1); new.is_some() && { self.iar = new; true } } } ///// Stores a single match. ///// ///// See also Holder. //enum HolderState<'a, 'b, T: PatternTypes> { // /// Empty holder, for a key-value pair. // EmptyKey, // /// Empty holder, for a Matcher and a key-value pair. // EmptyKeySubtree, // // /// Empty holder, for a Matcher and a value. // // EmptyValueSubtree, // /// Occupied holder, for a key-value pair.. // Key(KVPair<'b, T>), // /// Occupied holder, for a Matcher and a key-value pair. // KeySubtree(Peekable>, KVPair<'b, T>), // /// Occupied holder, for a Matcher and a value. The empty variant is // /// omitted as it would never be used otherwise. // ValueSubtree(Peekable>, RefOwn<'b, T::Ref, T::Own>), // /// Occupied holder, for a value. The empty variant is omitted as it would // /// never be used otherwise. // Value(RefOwn<'b, T::Ref, T::Own>), //} // ///// Helper enum for HolderState. //#[derive(Copy, Clone, Debug, Eq, PartialEq)] //enum HolderKind { // Key, // KeySubtree, // ValueSubtree, // Value //} // ////impl<'a, 'b, T: PatternTypes> Clone for HolderState<'a, 'b, T> { //// fn clone(&self) -> Self { //// match self { //// HolderState::EmptyKey => HolderState::EmptyKey, //// HolderState::EmptySubtree => HolderState::EmptySubtree, //// HolderState::Key(v) => HolderState::Key(*v), //// HolderState::KeySubtree(m, v) => HolderState::KeySubtree(m.clone(), *v), //// HolderState::ValueSubtree(m, v) => HolderState::ValueSubtree(m.clone(), *v), //// HolderState::Value(v) => HolderState::Value(*v), //// } //// } ////} // //impl<'a, 'b, T: PatternTypes> HolderState<'a, 'b, T> { // #[rustfmt::skip] // fn is_empty(&self) -> bool { // match self { // | HolderState::EmptyKey // | HolderState::EmptyKeySubtree // //| HolderState::EmptyValueSubtree // => true, _ => false // } // } // // fn has_value(&self) -> bool { // !self.is_empty() // } // // fn kind(&self) -> HolderKind { // match self { // | HolderState::EmptyKey // | HolderState::Key(_) // => HolderKind::Key, // | HolderState::EmptyKeySubtree // | HolderState::KeySubtree(_, _) // => HolderKind::KeySubtree, // //| HolderState::EmptyValueSubtree // | HolderState::ValueSubtree(_, _) // => HolderKind::ValueSubtree, // | HolderState::Value(_) // => HolderKind::Value, // } // } // // fn value(&self) -> Option> { // match *self { // HolderState::Key((_, value)) => Some(value), // HolderState::KeySubtree(_, (_, value)) => Some(value), // HolderState::ValueSubtree(_, value) => Some(value), // HolderState::Value(value) => Some(value), // _ => None // } // } // // fn key(&self) -> Option> { // match *self { // HolderState::Key((key, _)) => Some(key), // HolderState::KeySubtree(_, (key, _)) => Some(key), // _ => None // } // } // // fn pair(&self) -> Option> { // match *self { // HolderState::Key(pair) => Some(pair), // HolderState::KeySubtree(_, pair) => Some(pair), // _ => None // } // } // // fn subtree(&mut self) -> Option<&mut Peekable>> { // match *self { // HolderState::KeySubtree(ref mut subtree, _) => Some(subtree), // HolderState::ValueSubtree(ref mut subtree, _) => Some(subtree), // _ => None // } // } // // fn clear(&mut self) { // *self = match self.kind() { // HolderKind::Key => HolderState::EmptyKey, // HolderKind::KeySubtree => HolderState::EmptyKeySubtree, // HolderKind::ValueSubtree => unreachable!(), //HolderState::EmptyValueSubtree, // HolderKind::Value => unreachable!(), // }; // assert!(self.is_empty()); // } //} // ///// Stores a single match and associated metadata. ///// ///// A single match is generally a key-value pair, but may be a collection of ///// named pairs in the case of subtree matches, or just a value for the initial ///// holder. //struct Holder<'a, 'b, T: PatternTypes> { // name: Option<&'a str>, // value: HolderState<'a, 'b, T>, // parent: Option>, // iterator: Option> + 'b>>, // filters: Vec Fn(&'c mut HolderState<'a, 'b, T>) -> Result<(), MatchError>) + 'a>>, //} // //impl<'a, 'b, T: PatternTypes> Holder<'a, 'b, T> { // fn next(&mut self) -> Result { // self.ensure_iterator()?; // if let Self { // value: ref mut v, // iterator: Some(ref mut it), // ref filters, // .. // } = self { // // check if we're in a subtree and (not) done. // if let Some(matcher) = v.subtree() { // if let Some(res) = matcher.peek() { // // report any errors // return res.as_ref().map(|_| true).map_err(|e| e.clone()); // } // } // let kind = v.kind(); // let mut next_v; // loop { // next_v = match it.next() { // Some(pair) => HolderState::Key(pair), // None => return Ok(false) // }; // for filter in filters { // filter(&mut next_v)?; // if next_v.is_empty() { // break; // } // } // if next_v.has_value() { // break; // } // } // assert!(next_v.has_value()); // assert_eq!(next_v.kind(), kind); // *v = next_v; // Ok(true) // } else { // unreachable!() // } // } // // /// Ensure `self.iterator.is_some()`, creating an iterator if necessary. // fn ensure_iterator(&mut self) -> Result<(), MatchError> { // if self.iterator.is_none() { // let iter = T::pairs(self.parent.unwrap()); // if iter.is_none() { // return Err(MatchError::UnsupportedOperation); // } // self.iterator = iter; // } // assert!(self.iterator.is_some()); // Ok(()) // } //} // //impl<'a, 'b, T: PatternTypes> Default for Holder<'a, 'b, T> { // fn default() -> Self { // Self { // name: Default::default(), // value: HolderState::EmptyKey, // parent: Default::default(), // iterator: Default::default(), // filters: Default::default(), // } // } //} // //pub struct Matcher<'a, 'b, T: PatternTypes> { // defs: &'a PatternConstants, // frame: Frame<'a, 'b, T>, //} // //// TODO: //// //// [x] Arrow //// [x] StringKey //// [x] RegexKey //// [x] KeySubtree //// [x] ValueSubtree //// [x] Ident //// [x] Param (untested) //// [x] ApplyPredicate //// [x] End // ///// Helper for `PatternElement::StringKey`. //fn on_string_key<'a, 'b, T: PatternTypes>( // matcher: &mut Matcher<'a, 'b, T>, // id: usize, // skippable: bool, //) -> Result { // let path = matcher.frame.path.last_mut().unwrap(); // assert!(path.iterator.is_none()); // let key = &matcher.defs.strings[id]; // let iter = T::get(path.parent.unwrap(), RefOwn::Str(key)); // match iter { // Some(None) if !skippable => Err(MatchError::ValidationError), // Some(opt) => { // path.iterator = Some(Box::new(opt.into_iter())); // Ok(true) // } // None => Err(MatchError::UnsupportedOperation), // } //} // ///// Helper for `PatternElement::ParameterKey`. //fn on_parameter_key<'a, 'b, T: PatternTypes>( // matcher: &mut Matcher<'a, 'b, T>, // id: usize, // skippable: bool, //) -> Result { // let path = matcher.frame.path.last_mut().unwrap(); // assert!(path.iterator.is_none()); // let key = matcher.defs.defs[id]; // let iter = T::get(path.parent.unwrap(), RefOwn::Own(key)); // match iter { // Some(None) if !skippable => Err(MatchError::ValidationError), // Some(opt) => { // path.iterator = Some(Box::new(opt.into_iter())); // Ok(true) // } // None => Err(MatchError::UnsupportedOperation), // } //} // ///// Helper for `PatternElement::RegexKey`. //fn on_regex_key<'a, 'b, T: PatternTypes>( // matcher: &mut Matcher<'a, 'b, T>, // id: usize, // skippable: bool, //) -> Result { // matcher.frame.path.last_mut().unwrap().ensure_iterator()?; // let re = &matcher.defs.regices[id]; // let path = matcher.frame.path.last_mut().unwrap(); // path.filters.push(Box::new(move |value| { // let s = T::as_str(value.key().unwrap()); // match (s.map_or(false, |s| re.is_match(s)), skippable) { // (true, _) => Ok(()), // (false, true) => { // value.clear(); // Ok(()) // }, // (false, false) => Err(MatchError::ValidationError), // } // })); // Ok(true) //} // ///// Helper for `PatternElement::KeySubtree`. //fn on_key_subtree<'a, 'b, T: PatternTypes>( // matcher: &mut Matcher<'a, 'b, T>, // id: usize, // skippable: bool, //) -> Result { // let _ = skippable; // FIXME what should a skippable KeySubtree even do?! // matcher.frame.path.last_mut().unwrap().ensure_iterator()?; // let defs = matcher.defs; // let rlimit: usize = matcher.frame.depth; // let path = matcher.frame.path.last_mut().unwrap(); // assert!(path.value.is_empty()); // assert_eq!(path.value.kind(), HolderKind::Key); // path.value = HolderState::EmptyKeySubtree; // path.filters.push(Box::new(move |value| { // let key = value.key().unwrap(); // let mut subtree = Matcher::new(key, defs, id, rlimit)?.peekable(); // match subtree.peek() { // Some(&Ok(_)) => { // *value = HolderState::KeySubtree(subtree, value.pair().unwrap()); // Ok(()) // }, // Some(&Err(ref e)) => { // Err(e.clone()) // }, // None => { // value.clear(); // Ok(()) // } // } // })); // Ok(true) //} // //const DUMMY_OPS: &'static [PatternElement] = &[]; // //impl<'a, 'b, T: PatternTypes> Matcher<'a, 'b, T> { // pub(crate) fn new(obj: RefOwn<'b, T::Ref, T::Own>, defs: &'a PatternConstants, proto: usize, rlimit: usize) -> Result { // let ops: &[_] = &defs.protos[proto]; // Self::with_ops(obj, defs, ops, rlimit) // } // // /// Constructs a Matcher that yields a single dummy result. // fn with_ops(obj: RefOwn<'b, T::Ref, T::Own>, defs: &'a PatternConstants, ops: &'a [PatternElement], rlimit: usize) -> Result { // let depth = rlimit.checked_sub(1).ok_or(MatchError::StackOverflow)?; // Ok(Self { // defs: defs, // frame: Frame { // //obj: obj, // ops: ops, // iar: None, // depth: depth, // path: { // let mut holder = Holder::default(); // holder.value = HolderState::Value(obj); // holder.iterator = Some(Box::new(std::iter::empty())); // vec![holder] // }, // in_key: false, // }, // }) // } // // fn on_in_key(&mut self) -> Result { // match self.frame.op() { // PatternElement::End => { // let path = self.frame.path.last_mut().unwrap(); // if path.next()? { // Ok(false) // } else { // drop(path); // self.frame.path.pop().unwrap(); // // stop at previous End, or start of frame // while self.frame.prev() { // if matches!(self.frame.op(), PatternElement::End) { // break; // } // } // // is start of frame? // if !self.frame.prev() { // self.frame.path.clear(); // } // Ok(true) // } // }, // PatternElement::ApplyPredicate(id, skippable) => { // // failing on T::get() is already handled, but we may need a // // T::pairs(). construct it here. // self.frame.path.last_mut().unwrap().ensure_iterator()?; // let pred = &self.defs.predicates[id]; // let path = self.frame.path.last_mut().unwrap(); // path.filters.push(Box::new(move |value| { // match (pred(value.value().unwrap()), skippable) { // (true, _) => Ok(()), // (false, true) => { // value.clear(); // Ok(()) // }, // (false, false) => Err(MatchError::ValidationError), // } // })); // Ok(true) // }, // PatternElement::StringKey(id, skippable) => { // on_string_key(self, id, skippable) // }, // PatternElement::ParameterKey(id, skippable) => { // on_parameter_key(self, id, skippable) // }, // PatternElement::RegexKey(id, skippable) => { // on_regex_key(self, id, skippable) // }, // PatternElement::KeySubtree(id, skippable) => { // on_key_subtree(self, id, skippable) // }, // _ => unreachable!("on_in_key") // } // } // // fn on_not_in_key(&mut self) -> Result { // match self.frame.op() { // PatternElement::Arrow => { // // this *should* always pass. // assert!(self.frame.path.last().unwrap().iterator.is_some()); // let mut holder = Holder::default(); // holder.parent = self.frame.path.last().unwrap().value.value(); // assert!(holder.parent.is_some()); // self.frame.path.push(holder); // Ok(false) // }, // PatternElement::Identifier(id) => { // let name = self.defs.strings.get(id).map(|s| &**s); // let path = self.frame.path.last_mut().unwrap(); // path.name = name; // assert!(path.iterator.is_none()); // // we don't actually create the iterator here, // // as we may still wanna use T::get() instead. // Ok(true) // }, // PatternElement::ApplyPredicate(id, skippable) => { // assert!(self.frame.path.len() == 1); // let pred = &self.defs.predicates[id]; // let value = self.frame.path.last().unwrap().value.value(); // match (pred(value.unwrap()), skippable) { // (true, _) => Ok(false), // (false, true) => { // self.frame.path.clear(); // // any Ok(_) will do // Ok(false) // }, // (false, false) => Err(MatchError::ValidationError), // } // }, // PatternElement::StringKey(id, skippable) => { // on_string_key(self, id, skippable) // }, // PatternElement::ParameterKey(id, skippable) => { // on_parameter_key(self, id, skippable) // }, // PatternElement::RegexKey(id, skippable) => { // on_regex_key(self, id, skippable) // }, // PatternElement::KeySubtree(id, skippable) => { // on_key_subtree(self, id, skippable) // }, // PatternElement::ValueSubtree(id, skippable) => { // let value = self.frame.path.last().unwrap().value.value().unwrap(); // let mut subtree = Matcher::new( // value, // self.defs, // id, // self.frame.depth // )?.peekable(); // let mut dummy = Matcher::with_ops( // value, // self.defs, // DUMMY_OPS, // self.frame.depth // )?.peekable(); // // may panic. // let peeked = subtree.peek(); // // shouldn't panic. // let _ = dummy.peek(); // // push Holder after peek. // self.frame.path.push(Holder::default()); // let mut holder = self.frame.path.last_mut().unwrap(); // holder.parent = Some(value); // holder.iterator = Some(Box::new(std::iter::empty())); // match peeked { // None if skippable => { // holder.value = HolderState::ValueSubtree(dummy, value); // Ok(true) // }, // Some(&Ok(_)) | None => { // drop(peeked); // holder.value = HolderState::ValueSubtree(subtree, value); // Ok(true) // }, // Some(&Err(ref e)) => { // Err(e.clone()) // }, // } // }, // _ => unreachable!("on_not_in_key") // } // } // // fn collect_results(&mut self) -> Matches<'a, 'b, T> { // let mut res: Matches<'a, 'b, T> = Default::default(); // for holder in &mut self.frame.path { // // make sure it's not empty. // assert!(holder.value.has_value()); // // handle subtrees. // if let Some(matcher) = holder.value.subtree() { // if let Some(matches) = matcher.next() { // // NOTE: we have checked these already. // // (and if we haven't, that's a bug.) // res.extend(matches.unwrap()); // } // } // // handle pairs. // if let Some(pair) = holder.value.pair() { // if let Some(name) = holder.name { // res.insert(name, pair); // } // } // } // res // } // // fn on_end(&mut self) -> (bool, Matches<'a, 'b, T>) { // match self.frame.op() { // PatternElement::End => { // assert!(!self.frame.path.last().expect("path").value.is_empty()); // let res = self.collect_results(); // if !self.frame.prev() { // // NOTE: frame.prev() must always be called, even if this // // gets replaced with debug_assert!() in the future. // assert!(false, "frame.prev()"); // } // (true, res) // } // PatternElement::ApplyPredicate {..} => { // assert!(!self.frame.in_key); // let res = self.collect_results(); // self.frame.path.clear(); // (false, res) // } // _ => unreachable!("on_end") // } // } //} // //impl<'a, 'b, T: PatternTypes> Iterator for Matcher<'a, 'b, T> { // type Item = Result>, MatchError>; // // fn next(&mut self) -> Option { // if self.frame.ops.is_empty() { // if !self.frame.path.is_empty() { // self.frame.path.clear(); // return Some(Ok(Default::default())); // } // } // while !self.frame.path.is_empty() { // if !self.frame.next() { // let (in_key, res) = self.on_end(); // self.frame.in_key = in_key; // return Some(Ok(res)); // } else { // let in_key = if self.frame.in_key { // self.on_in_key() // } else { // self.on_not_in_key() // }; // match in_key { // Ok(in_key) => self.frame.in_key = in_key, // Err(e) => { // self.frame.path.clear(); // return Some(Err(e)) // }, // } // } // } // None // } //}