// Copyright (C) 2021-2022 Soni L. // SPDX-License-Identifier: MIT OR Apache-2.0 //! The Datafu Virtual Machine. //! //! This is the stuff that actually matches the pattern. use regex::Regex; use serde::Serialize; use crate::Predicate; //use crate::errors::MatchError; mod de; pub use de::Unpacker; pub use de::Packer; /// Max depth for VM/serde recursion. pub(crate) const MAX_CALLS: usize = 250; //type Matches<'a, 'b, T> = BTreeMap<&'a str, KVPair<'b, T>>; // maybe we should use a builder for this? /// The constant pool for a pattern. pub(crate) struct PatternConstants { // last proto is implicitly the whole pattern. pub(crate) protos: Vec>, // Note that we can borrow these when creating the output map. // https://play.rust-lang.org/?version=stable&mode=debug&edition=2018&gist=da26f9175e96273fa0b94971a4e6172f pub(crate) strings: Vec, pub(crate) regices: Vec, pub(crate) predicates: Vec>, // NOTE these are part of the constant pool and so have lifetime analogous // to 'a (consistently used to indicate constant pool lifetime) when used // elsewhere. In particular, they can't be yielded by the iterator. pub(crate) defs: Vec, } impl Default for PatternConstants { fn default() -> Self { Self { protos: Default::default(), strings: Default::default(), regices: Default::default(), predicates: Default::default(), defs: Default::default(), } } } impl std::fmt::Debug for PatternConstants { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { f.debug_struct( "PatternConstants" ).field( "protos", &self.protos, ).field( "strings", &self.strings, ).field( "regices", &self.regices, ).field( "predicates", &format_args!("({} predicates)", self.predicates.len()), ).field( "defs", &format_args!("FIXME"), ).finish() } } /// A pattern element. #[derive(Copy, Clone, Debug)] pub(crate) enum PatternElement { Arrow, Identifier(usize), StringKey(usize, bool), RegexKey(usize, bool), ParameterKey(usize, bool), KeySubtree(usize, bool), ValueSubtree(usize, bool), /// Represents a predicate which must be applied. /// /// These are custom, arbitrary predicates, powered by serde. They're /// represented by `:$foo` in a pattern. ApplyPredicate( /// The predicate index (in `PatternConstants.predicates`). usize, /// Whether to skip non-matching values, instead of erroring. bool, ), /// Represents a type expectation. /// /// These are similar to predicates. They're represented by `:foo`, but are /// built-in and provide functionality not supported by predicates. /// /// Specifically, predicates cannot ask serde for a map or a list directly. /// Instead, they'd be required to parse a whole map/list/etc, which could /// cause issues which datafu is designed to avoid. (Datafu is designed to /// resist malicious input more so than arbitrary serde deserializers.) Type( /// The expected type. Type, /// Whether to skip non-matching values, instead of erroring. bool, ), End, } /// The types datafu and serde currently support. /// /// These are used as expectations for serde (e.g. /// `Deserializer::deserialize_string`). #[derive(Copy, Clone, Debug)] pub(crate) enum Type { Bool, I8, I16, I32, I64, I128, U8, U16, U32, U64, U128, F32, F64, Char, Str, String, Bytes, ByteBuf, Option, Unit, Seq, Tuple(usize), Map, // these aren't really supported: // UnitStruct, UnitVariant, NewtypeStruct, NewtypeVariant, TupleStruct, // TupleVariant, Struct, StructVariant // instead we use type trees for that. /// Adapter for Type Trees. See `crate::type_tree` for more details. Of { /// The type tree index (in `PatternConstants.type_trees`). type_tree: usize, }, } pub struct Pack; //struct Frame<'a, 'b, T: PatternTypes> { // //obj: RefOwn<'b, T::Ref, T::Own>, // ops: &'a [PatternElement], // iar: Option, // depth: usize, // path: Vec>, // in_key: bool, //} // //impl<'a, 'b, T: PatternTypes> Frame<'a, 'b, T> { // /// Advances the instruction address register. // /// // /// # Returns // /// // /// `true` if successful, `false` otherwise. // fn next(&mut self) -> bool { // let new = self.iar.map_or(0, |v| v + 1); // new < self.ops.len() && { // self.iar = Some(new); // true // } // } // // /// Returns the current instruction. // fn op(&self) -> PatternElement { // self.ops[self.iar.expect("ops[iar]")] // } // // /// Rewinds the instruction address register. // /// // /// # Returns // /// // /// `true` if successful, `false` otherwise. // fn prev(&mut self) -> bool { // let new = self.iar.expect("iar").checked_sub(1); // new.is_some() && { // self.iar = new; // true // } // } //} // ///// Stores a single match. ///// ///// See also Holder. //enum HolderState<'a, 'b, T: PatternTypes> { // /// Empty holder, for a key-value pair. // EmptyKey, // /// Empty holder, for a Matcher and a key-value pair. // EmptyKeySubtree, // // /// Empty holder, for a Matcher and a value. // // EmptyValueSubtree, // /// Occupied holder, for a key-value pair.. // Key(KVPair<'b, T>), // /// Occupied holder, for a Matcher and a key-value pair. // KeySubtree(Peekable>, KVPair<'b, T>), // /// Occupied holder, for a Matcher and a value. The empty variant is // /// omitted as it would never be used otherwise. // ValueSubtree(Peekable>, RefOwn<'b, T::Ref, T::Own>), // /// Occupied holder, for a value. The empty variant is omitted as it would // /// never be used otherwise. // Value(RefOwn<'b, T::Ref, T::Own>), //} // ///// Helper enum for HolderState. //#[derive(Copy, Clone, Debug, Eq, PartialEq)] //enum HolderKind { // Key, // KeySubtree, // ValueSubtree, // Value //} // ////impl<'a, 'b, T: PatternTypes> Clone for HolderState<'a, 'b, T> { //// fn clone(&self) -> Self { //// match self { //// HolderState::EmptyKey => HolderState::EmptyKey, //// HolderState::EmptySubtree => HolderState::EmptySubtree, //// HolderState::Key(v) => HolderState::Key(*v), //// HolderState::KeySubtree(m, v) => HolderState::KeySubtree(m.clone(), *v), //// HolderState::ValueSubtree(m, v) => HolderState::ValueSubtree(m.clone(), *v), //// HolderState::Value(v) => HolderState::Value(*v), //// } //// } ////} // //impl<'a, 'b, T: PatternTypes> HolderState<'a, 'b, T> { // #[rustfmt::skip] // fn is_empty(&self) -> bool { // match self { // | HolderState::EmptyKey // | HolderState::EmptyKeySubtree // //| HolderState::EmptyValueSubtree // => true, _ => false // } // } // // fn has_value(&self) -> bool { // !self.is_empty() // } // // fn kind(&self) -> HolderKind { // match self { // | HolderState::EmptyKey // | HolderState::Key(_) // => HolderKind::Key, // | HolderState::EmptyKeySubtree // | HolderState::KeySubtree(_, _) // => HolderKind::KeySubtree, // //| HolderState::EmptyValueSubtree // | HolderState::ValueSubtree(_, _) // => HolderKind::ValueSubtree, // | HolderState::Value(_) // => HolderKind::Value, // } // } // // fn value(&self) -> Option> { // match *self { // HolderState::Key((_, value)) => Some(value), // HolderState::KeySubtree(_, (_, value)) => Some(value), // HolderState::ValueSubtree(_, value) => Some(value), // HolderState::Value(value) => Some(value), // _ => None // } // } // // fn key(&self) -> Option> { // match *self { // HolderState::Key((key, _)) => Some(key), // HolderState::KeySubtree(_, (key, _)) => Some(key), // _ => None // } // } // // fn pair(&self) -> Option> { // match *self { // HolderState::Key(pair) => Some(pair), // HolderState::KeySubtree(_, pair) => Some(pair), // _ => None // } // } // // fn subtree(&mut self) -> Option<&mut Peekable>> { // match *self { // HolderState::KeySubtree(ref mut subtree, _) => Some(subtree), // HolderState::ValueSubtree(ref mut subtree, _) => Some(subtree), // _ => None // } // } // // fn clear(&mut self) { // *self = match self.kind() { // HolderKind::Key => HolderState::EmptyKey, // HolderKind::KeySubtree => HolderState::EmptyKeySubtree, // HolderKind::ValueSubtree => unreachable!(), //HolderState::EmptyValueSubtree, // HolderKind::Value => unreachable!(), // }; // assert!(self.is_empty()); // } //} // ///// Stores a single match and associated metadata. ///// ///// A single match is generally a key-value pair, but may be a collection of ///// named pairs in the case of subtree matches, or just a value for the initial ///// holder. //struct Holder<'a, 'b, T: PatternTypes> { // name: Option<&'a str>, // value: HolderState<'a, 'b, T>, // parent: Option>, // iterator: Option> + 'b>>, // filters: Vec Fn(&'c mut HolderState<'a, 'b, T>) -> Result<(), MatchError>) + 'a>>, //} // //impl<'a, 'b, T: PatternTypes> Holder<'a, 'b, T> { // fn next(&mut self) -> Result { // self.ensure_iterator()?; // if let Self { // value: ref mut v, // iterator: Some(ref mut it), // ref filters, // .. // } = self { // // check if we're in a subtree and (not) done. // if let Some(matcher) = v.subtree() { // if let Some(res) = matcher.peek() { // // report any errors // return res.as_ref().map(|_| true).map_err(|e| e.clone()); // } // } // let kind = v.kind(); // let mut next_v; // loop { // next_v = match it.next() { // Some(pair) => HolderState::Key(pair), // None => return Ok(false) // }; // for filter in filters { // filter(&mut next_v)?; // if next_v.is_empty() { // break; // } // } // if next_v.has_value() { // break; // } // } // assert!(next_v.has_value()); // assert_eq!(next_v.kind(), kind); // *v = next_v; // Ok(true) // } else { // unreachable!() // } // } // // /// Ensure `self.iterator.is_some()`, creating an iterator if necessary. // fn ensure_iterator(&mut self) -> Result<(), MatchError> { // if self.iterator.is_none() { // let iter = T::pairs(self.parent.unwrap()); // if iter.is_none() { // return Err(MatchError::UnsupportedOperation); // } // self.iterator = iter; // } // assert!(self.iterator.is_some()); // Ok(()) // } //} // //impl<'a, 'b, T: PatternTypes> Default for Holder<'a, 'b, T> { // fn default() -> Self { // Self { // name: Default::default(), // value: HolderState::EmptyKey, // parent: Default::default(), // iterator: Default::default(), // filters: Default::default(), // } // } //} // //pub struct Matcher<'a, 'b, T: PatternTypes> { // defs: &'a PatternConstants, // frame: Frame<'a, 'b, T>, //} // //// TODO: //// //// [x] Arrow //// [x] StringKey //// [x] RegexKey //// [x] KeySubtree //// [x] ValueSubtree //// [x] Ident //// [x] Param (untested) //// [x] ApplyPredicate //// [x] End // ///// Helper for `PatternElement::StringKey`. //fn on_string_key<'a, 'b, T: PatternTypes>( // matcher: &mut Matcher<'a, 'b, T>, // id: usize, // skippable: bool, //) -> Result { // let path = matcher.frame.path.last_mut().unwrap(); // assert!(path.iterator.is_none()); // let key = &matcher.defs.strings[id]; // let iter = T::get(path.parent.unwrap(), RefOwn::Str(key)); // match iter { // Some(None) if !skippable => Err(MatchError::ValidationError), // Some(opt) => { // path.iterator = Some(Box::new(opt.into_iter())); // Ok(true) // } // None => Err(MatchError::UnsupportedOperation), // } //} // ///// Helper for `PatternElement::ParameterKey`. //fn on_parameter_key<'a, 'b, T: PatternTypes>( // matcher: &mut Matcher<'a, 'b, T>, // id: usize, // skippable: bool, //) -> Result { // let path = matcher.frame.path.last_mut().unwrap(); // assert!(path.iterator.is_none()); // let key = matcher.defs.defs[id]; // let iter = T::get(path.parent.unwrap(), RefOwn::Own(key)); // match iter { // Some(None) if !skippable => Err(MatchError::ValidationError), // Some(opt) => { // path.iterator = Some(Box::new(opt.into_iter())); // Ok(true) // } // None => Err(MatchError::UnsupportedOperation), // } //} // ///// Helper for `PatternElement::RegexKey`. //fn on_regex_key<'a, 'b, T: PatternTypes>( // matcher: &mut Matcher<'a, 'b, T>, // id: usize, // skippable: bool, //) -> Result { // matcher.frame.path.last_mut().unwrap().ensure_iterator()?; // let re = &matcher.defs.regices[id]; // let path = matcher.frame.path.last_mut().unwrap(); // path.filters.push(Box::new(move |value| { // let s = T::as_str(value.key().unwrap()); // match (s.map_or(false, |s| re.is_match(s)), skippable) { // (true, _) => Ok(()), // (false, true) => { // value.clear(); // Ok(()) // }, // (false, false) => Err(MatchError::ValidationError), // } // })); // Ok(true) //} // ///// Helper for `PatternElement::KeySubtree`. //fn on_key_subtree<'a, 'b, T: PatternTypes>( // matcher: &mut Matcher<'a, 'b, T>, // id: usize, // skippable: bool, //) -> Result { // let _ = skippable; // FIXME what should a skippable KeySubtree even do?! // matcher.frame.path.last_mut().unwrap().ensure_iterator()?; // let defs = matcher.defs; // let rlimit: usize = matcher.frame.depth; // let path = matcher.frame.path.last_mut().unwrap(); // assert!(path.value.is_empty()); // assert_eq!(path.value.kind(), HolderKind::Key); // path.value = HolderState::EmptyKeySubtree; // path.filters.push(Box::new(move |value| { // let key = value.key().unwrap(); // let mut subtree = Matcher::new(key, defs, id, rlimit)?.peekable(); // match subtree.peek() { // Some(&Ok(_)) => { // *value = HolderState::KeySubtree(subtree, value.pair().unwrap()); // Ok(()) // }, // Some(&Err(ref e)) => { // Err(e.clone()) // }, // None => { // value.clear(); // Ok(()) // } // } // })); // Ok(true) //} // //const DUMMY_OPS: &'static [PatternElement] = &[]; // //impl<'a, 'b, T: PatternTypes> Matcher<'a, 'b, T> { // pub(crate) fn new(obj: RefOwn<'b, T::Ref, T::Own>, defs: &'a PatternConstants, proto: usize, rlimit: usize) -> Result { // let ops: &[_] = &defs.protos[proto]; // Self::with_ops(obj, defs, ops, rlimit) // } // // /// Constructs a Matcher that yields a single dummy result. // fn with_ops(obj: RefOwn<'b, T::Ref, T::Own>, defs: &'a PatternConstants, ops: &'a [PatternElement], rlimit: usize) -> Result { // let depth = rlimit.checked_sub(1).ok_or(MatchError::StackOverflow)?; // Ok(Self { // defs: defs, // frame: Frame { // //obj: obj, // ops: ops, // iar: None, // depth: depth, // path: { // let mut holder = Holder::default(); // holder.value = HolderState::Value(obj); // holder.iterator = Some(Box::new(std::iter::empty())); // vec![holder] // }, // in_key: false, // }, // }) // } // // fn on_in_key(&mut self) -> Result { // match self.frame.op() { // PatternElement::End => { // let path = self.frame.path.last_mut().unwrap(); // if path.next()? { // Ok(false) // } else { // drop(path); // self.frame.path.pop().unwrap(); // // stop at previous End, or start of frame // while self.frame.prev() { // if matches!(self.frame.op(), PatternElement::End) { // break; // } // } // // is start of frame? // if !self.frame.prev() { // self.frame.path.clear(); // } // Ok(true) // } // }, // PatternElement::ApplyPredicate(id, skippable) => { // // failing on T::get() is already handled, but we may need a // // T::pairs(). construct it here. // self.frame.path.last_mut().unwrap().ensure_iterator()?; // let pred = &self.defs.predicates[id]; // let path = self.frame.path.last_mut().unwrap(); // path.filters.push(Box::new(move |value| { // match (pred(value.value().unwrap()), skippable) { // (true, _) => Ok(()), // (false, true) => { // value.clear(); // Ok(()) // }, // (false, false) => Err(MatchError::ValidationError), // } // })); // Ok(true) // }, // PatternElement::StringKey(id, skippable) => { // on_string_key(self, id, skippable) // }, // PatternElement::ParameterKey(id, skippable) => { // on_parameter_key(self, id, skippable) // }, // PatternElement::RegexKey(id, skippable) => { // on_regex_key(self, id, skippable) // }, // PatternElement::KeySubtree(id, skippable) => { // on_key_subtree(self, id, skippable) // }, // _ => unreachable!("on_in_key") // } // } // // fn on_not_in_key(&mut self) -> Result { // match self.frame.op() { // PatternElement::Arrow => { // // this *should* always pass. // assert!(self.frame.path.last().unwrap().iterator.is_some()); // let mut holder = Holder::default(); // holder.parent = self.frame.path.last().unwrap().value.value(); // assert!(holder.parent.is_some()); // self.frame.path.push(holder); // Ok(false) // }, // PatternElement::Identifier(id) => { // let name = self.defs.strings.get(id).map(|s| &**s); // let path = self.frame.path.last_mut().unwrap(); // path.name = name; // assert!(path.iterator.is_none()); // // we don't actually create the iterator here, // // as we may still wanna use T::get() instead. // Ok(true) // }, // PatternElement::ApplyPredicate(id, skippable) => { // assert!(self.frame.path.len() == 1); // let pred = &self.defs.predicates[id]; // let value = self.frame.path.last().unwrap().value.value(); // match (pred(value.unwrap()), skippable) { // (true, _) => Ok(false), // (false, true) => { // self.frame.path.clear(); // // any Ok(_) will do // Ok(false) // }, // (false, false) => Err(MatchError::ValidationError), // } // }, // PatternElement::StringKey(id, skippable) => { // on_string_key(self, id, skippable) // }, // PatternElement::ParameterKey(id, skippable) => { // on_parameter_key(self, id, skippable) // }, // PatternElement::RegexKey(id, skippable) => { // on_regex_key(self, id, skippable) // }, // PatternElement::KeySubtree(id, skippable) => { // on_key_subtree(self, id, skippable) // }, // PatternElement::ValueSubtree(id, skippable) => { // let value = self.frame.path.last().unwrap().value.value().unwrap(); // let mut subtree = Matcher::new( // value, // self.defs, // id, // self.frame.depth // )?.peekable(); // let mut dummy = Matcher::with_ops( // value, // self.defs, // DUMMY_OPS, // self.frame.depth // )?.peekable(); // // may panic. // let peeked = subtree.peek(); // // shouldn't panic. // let _ = dummy.peek(); // // push Holder after peek. // self.frame.path.push(Holder::default()); // let mut holder = self.frame.path.last_mut().unwrap(); // holder.parent = Some(value); // holder.iterator = Some(Box::new(std::iter::empty())); // match peeked { // None if skippable => { // holder.value = HolderState::ValueSubtree(dummy, value); // Ok(true) // }, // Some(&Ok(_)) | None => { // drop(peeked); // holder.value = HolderState::ValueSubtree(subtree, value); // Ok(true) // }, // Some(&Err(ref e)) => { // Err(e.clone()) // }, // } // }, // _ => unreachable!("on_not_in_key") // } // } // // fn collect_results(&mut self) -> Matches<'a, 'b, T> { // let mut res: Matches<'a, 'b, T> = Default::default(); // for holder in &mut self.frame.path { // // make sure it's not empty. // assert!(holder.value.has_value()); // // handle subtrees. // if let Some(matcher) = holder.value.subtree() { // if let Some(matches) = matcher.next() { // // NOTE: we have checked these already. // // (and if we haven't, that's a bug.) // res.extend(matches.unwrap()); // } // } // // handle pairs. // if let Some(pair) = holder.value.pair() { // if let Some(name) = holder.name { // res.insert(name, pair); // } // } // } // res // } // // fn on_end(&mut self) -> (bool, Matches<'a, 'b, T>) { // match self.frame.op() { // PatternElement::End => { // assert!(!self.frame.path.last().expect("path").value.is_empty()); // let res = self.collect_results(); // if !self.frame.prev() { // // NOTE: frame.prev() must always be called, even if this // // gets replaced with debug_assert!() in the future. // assert!(false, "frame.prev()"); // } // (true, res) // } // PatternElement::ApplyPredicate {..} => { // assert!(!self.frame.in_key); // let res = self.collect_results(); // self.frame.path.clear(); // (false, res) // } // _ => unreachable!("on_end") // } // } //} // //impl<'a, 'b, T: PatternTypes> Iterator for Matcher<'a, 'b, T> { // type Item = Result>, MatchError>; // // fn next(&mut self) -> Option { // if self.frame.ops.is_empty() { // if !self.frame.path.is_empty() { // self.frame.path.clear(); // return Some(Ok(Default::default())); // } // } // while !self.frame.path.is_empty() { // if !self.frame.next() { // let (in_key, res) = self.on_end(); // self.frame.in_key = in_key; // return Some(Ok(res)); // } else { // let in_key = if self.frame.in_key { // self.on_in_key() // } else { // self.on_not_in_key() // }; // match in_key { // Ok(in_key) => self.frame.in_key = in_key, // Err(e) => { // self.frame.path.clear(); // return Some(Err(e)) // }, // } // } // } // None // } //}