// Copyright (C) 2021-2022 Soni L.
// SPDX-License-Identifier: MIT OR Apache-2.0
//! The Datafu Virtual Machine.
//!
//! This is the stuff that actually matches the pattern.
use regex::Regex;
use serde::Serialize;
use crate::Predicate;
//use crate::errors::MatchError;
mod de;
pub use de::Unpacker;
pub use de::Packer;
/// Max depth for VM/serde recursion.
pub(crate) const MAX_CALLS: usize = 250;
//type Matches<'a, 'b, T> = BTreeMap<&'a str, KVPair<'b, T>>;
// maybe we should use a builder for this?
/// The constant pool for a pattern.
pub(crate) struct PatternConstants<O: Serialize> {
// last proto is implicitly the whole pattern.
pub(crate) protos: Vec<Vec<PatternElement>>,
// Note that we can borrow these when creating the output map.
// https://play.rust-lang.org/?version=stable&mode=debug&edition=2018&gist=da26f9175e96273fa0b94971a4e6172f
pub(crate) strings: Vec<String>,
pub(crate) regices: Vec<Regex>,
pub(crate) predicates: Vec<Box<Predicate>>,
// NOTE these are part of the constant pool and so have lifetime analogous
// to 'a (consistently used to indicate constant pool lifetime) when used
// elsewhere. In particular, they can't be yielded by the iterator.
pub(crate) defs: Vec<O>,
}
impl<O: Serialize> Default for PatternConstants<O> {
fn default() -> Self {
Self {
protos: Default::default(),
strings: Default::default(),
regices: Default::default(),
predicates: Default::default(),
defs: Default::default(),
}
}
}
impl<O: Serialize> std::fmt::Debug for PatternConstants<O> {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
f.debug_struct(
"PatternConstants"
).field(
"protos", &self.protos,
).field(
"strings", &self.strings,
).field(
"regices", &self.regices,
).field(
"predicates",
&format_args!("({} predicates)", self.predicates.len()),
).field(
"defs",
&format_args!("FIXME"),
).finish()
}
}
/// A pattern element.
#[derive(Copy, Clone, Debug)]
pub(crate) enum PatternElement {
Arrow,
Identifier(usize),
StringKey(usize, bool),
RegexKey(usize, bool),
ParameterKey(usize, bool),
KeySubtree(usize, bool),
ValueSubtree(usize, bool),
/// Represents a predicate which must be applied.
///
/// These are custom, arbitrary predicates, powered by serde. They're
/// represented by `:$foo` in a pattern.
ApplyPredicate(
/// The predicate index (in `PatternConstants.predicates`).
usize,
/// Whether to skip non-matching values, instead of erroring.
bool,
),
/// Represents a type expectation.
///
/// These are similar to predicates. They're represented by `:foo`, but are
/// built-in and provide functionality not supported by predicates.
///
/// Specifically, predicates cannot ask serde for a map or a list directly.
/// Instead, they'd be required to parse a whole map/list/etc, which could
/// cause issues which datafu is designed to avoid. (Datafu is designed to
/// resist malicious input more so than arbitrary serde deserializers.)
Type(
/// The expected type.
Type,
/// Whether to skip non-matching values, instead of erroring.
bool,
),
End,
}
/// The types datafu and serde currently support.
///
/// These are used as expectations for serde (e.g.
/// `Deserializer::deserialize_string`).
#[derive(Copy, Clone, Debug)]
pub(crate) enum Type {
Bool,
I8,
I16,
I32,
I64,
I128,
U8,
U16,
U32,
U64,
U128,
F32,
F64,
Char,
Str,
String,
Bytes,
ByteBuf,
Option,
Unit,
Seq,
Tuple(usize),
Map,
// these aren't really supported:
// UnitStruct, UnitVariant, NewtypeStruct, NewtypeVariant, TupleStruct,
// TupleVariant, Struct, StructVariant
// instead we use type trees for that.
/// Adapter for Type Trees. See `crate::type_tree` for more details.
Of {
/// The type tree index (in `PatternConstants.type_trees`).
type_tree: usize,
},
}
pub struct Pack;
//struct Frame<'a, 'b, T: PatternTypes> {
// //obj: RefOwn<'b, T::Ref, T::Own>,
// ops: &'a [PatternElement],
// iar: Option<usize>,
// depth: usize,
// path: Vec<Holder<'a, 'b, T>>,
// in_key: bool,
//}
//
//impl<'a, 'b, T: PatternTypes> Frame<'a, 'b, T> {
// /// Advances the instruction address register.
// ///
// /// # Returns
// ///
// /// `true` if successful, `false` otherwise.
// fn next(&mut self) -> bool {
// let new = self.iar.map_or(0, |v| v + 1);
// new < self.ops.len() && {
// self.iar = Some(new);
// true
// }
// }
//
// /// Returns the current instruction.
// fn op(&self) -> PatternElement {
// self.ops[self.iar.expect("ops[iar]")]
// }
//
// /// Rewinds the instruction address register.
// ///
// /// # Returns
// ///
// /// `true` if successful, `false` otherwise.
// fn prev(&mut self) -> bool {
// let new = self.iar.expect("iar").checked_sub(1);
// new.is_some() && {
// self.iar = new;
// true
// }
// }
//}
//
///// Stores a single match.
/////
///// See also Holder.
//enum HolderState<'a, 'b, T: PatternTypes> {
// /// Empty holder, for a key-value pair.
// EmptyKey,
// /// Empty holder, for a Matcher and a key-value pair.
// EmptyKeySubtree,
// // /// Empty holder, for a Matcher and a value.
// // EmptyValueSubtree,
// /// Occupied holder, for a key-value pair..
// Key(KVPair<'b, T>),
// /// Occupied holder, for a Matcher and a key-value pair.
// KeySubtree(Peekable<Matcher<'a, 'b, T>>, KVPair<'b, T>),
// /// Occupied holder, for a Matcher and a value. The empty variant is
// /// omitted as it would never be used otherwise.
// ValueSubtree(Peekable<Matcher<'a, 'b, T>>, RefOwn<'b, T::Ref, T::Own>),
// /// Occupied holder, for a value. The empty variant is omitted as it would
// /// never be used otherwise.
// Value(RefOwn<'b, T::Ref, T::Own>),
//}
//
///// Helper enum for HolderState.
//#[derive(Copy, Clone, Debug, Eq, PartialEq)]
//enum HolderKind {
// Key,
// KeySubtree,
// ValueSubtree,
// Value
//}
//
////impl<'a, 'b, T: PatternTypes> Clone for HolderState<'a, 'b, T> {
//// fn clone(&self) -> Self {
//// match self {
//// HolderState::EmptyKey => HolderState::EmptyKey,
//// HolderState::EmptySubtree => HolderState::EmptySubtree,
//// HolderState::Key(v) => HolderState::Key(*v),
//// HolderState::KeySubtree(m, v) => HolderState::KeySubtree(m.clone(), *v),
//// HolderState::ValueSubtree(m, v) => HolderState::ValueSubtree(m.clone(), *v),
//// HolderState::Value(v) => HolderState::Value(*v),
//// }
//// }
////}
//
//impl<'a, 'b, T: PatternTypes> HolderState<'a, 'b, T> {
// #[rustfmt::skip]
// fn is_empty(&self) -> bool {
// match self {
// | HolderState::EmptyKey
// | HolderState::EmptyKeySubtree
// //| HolderState::EmptyValueSubtree
// => true, _ => false
// }
// }
//
// fn has_value(&self) -> bool {
// !self.is_empty()
// }
//
// fn kind(&self) -> HolderKind {
// match self {
// | HolderState::EmptyKey
// | HolderState::Key(_)
// => HolderKind::Key,
// | HolderState::EmptyKeySubtree
// | HolderState::KeySubtree(_, _)
// => HolderKind::KeySubtree,
// //| HolderState::EmptyValueSubtree
// | HolderState::ValueSubtree(_, _)
// => HolderKind::ValueSubtree,
// | HolderState::Value(_)
// => HolderKind::Value,
// }
// }
//
// fn value(&self) -> Option<RefOwn<'b, T::Ref, T::Own>> {
// match *self {
// HolderState::Key((_, value)) => Some(value),
// HolderState::KeySubtree(_, (_, value)) => Some(value),
// HolderState::ValueSubtree(_, value) => Some(value),
// HolderState::Value(value) => Some(value),
// _ => None
// }
// }
//
// fn key(&self) -> Option<RefOwn<'b, T::Ref, T::Own>> {
// match *self {
// HolderState::Key((key, _)) => Some(key),
// HolderState::KeySubtree(_, (key, _)) => Some(key),
// _ => None
// }
// }
//
// fn pair(&self) -> Option<KVPair<'b, T>> {
// match *self {
// HolderState::Key(pair) => Some(pair),
// HolderState::KeySubtree(_, pair) => Some(pair),
// _ => None
// }
// }
//
// fn subtree(&mut self) -> Option<&mut Peekable<Matcher<'a, 'b, T>>> {
// match *self {
// HolderState::KeySubtree(ref mut subtree, _) => Some(subtree),
// HolderState::ValueSubtree(ref mut subtree, _) => Some(subtree),
// _ => None
// }
// }
//
// fn clear(&mut self) {
// *self = match self.kind() {
// HolderKind::Key => HolderState::EmptyKey,
// HolderKind::KeySubtree => HolderState::EmptyKeySubtree,
// HolderKind::ValueSubtree => unreachable!(), //HolderState::EmptyValueSubtree,
// HolderKind::Value => unreachable!(),
// };
// assert!(self.is_empty());
// }
//}
//
///// Stores a single match and associated metadata.
/////
///// A single match is generally a key-value pair, but may be a collection of
///// named pairs in the case of subtree matches, or just a value for the initial
///// holder.
//struct Holder<'a, 'b, T: PatternTypes> {
// name: Option<&'a str>,
// value: HolderState<'a, 'b, T>,
// parent: Option<RefOwn<'b, T::Ref, T::Own>>,
// iterator: Option<Box<dyn Iterator<Item=KVPair<'b, T>> + 'b>>,
// filters: Vec<Box<dyn (for<'c> Fn(&'c mut HolderState<'a, 'b, T>) -> Result<(), MatchError>) + 'a>>,
//}
//
//impl<'a, 'b, T: PatternTypes> Holder<'a, 'b, T> {
// fn next(&mut self) -> Result<bool, MatchError> {
// self.ensure_iterator()?;
// if let Self {
// value: ref mut v,
// iterator: Some(ref mut it),
// ref filters,
// ..
// } = self {
// // check if we're in a subtree and (not) done.
// if let Some(matcher) = v.subtree() {
// if let Some(res) = matcher.peek() {
// // report any errors
// return res.as_ref().map(|_| true).map_err(|e| e.clone());
// }
// }
// let kind = v.kind();
// let mut next_v;
// loop {
// next_v = match it.next() {
// Some(pair) => HolderState::Key(pair),
// None => return Ok(false)
// };
// for filter in filters {
// filter(&mut next_v)?;
// if next_v.is_empty() {
// break;
// }
// }
// if next_v.has_value() {
// break;
// }
// }
// assert!(next_v.has_value());
// assert_eq!(next_v.kind(), kind);
// *v = next_v;
// Ok(true)
// } else {
// unreachable!()
// }
// }
//
// /// Ensure `self.iterator.is_some()`, creating an iterator if necessary.
// fn ensure_iterator(&mut self) -> Result<(), MatchError> {
// if self.iterator.is_none() {
// let iter = T::pairs(self.parent.unwrap());
// if iter.is_none() {
// return Err(MatchError::UnsupportedOperation);
// }
// self.iterator = iter;
// }
// assert!(self.iterator.is_some());
// Ok(())
// }
//}
//
//impl<'a, 'b, T: PatternTypes> Default for Holder<'a, 'b, T> {
// fn default() -> Self {
// Self {
// name: Default::default(),
// value: HolderState::EmptyKey,
// parent: Default::default(),
// iterator: Default::default(),
// filters: Default::default(),
// }
// }
//}
//
//pub struct Matcher<'a, 'b, T: PatternTypes> {
// defs: &'a PatternConstants<T>,
// frame: Frame<'a, 'b, T>,
//}
//
//// TODO:
////
//// [x] Arrow
//// [x] StringKey
//// [x] RegexKey
//// [x] KeySubtree
//// [x] ValueSubtree
//// [x] Ident
//// [x] Param (untested)
//// [x] ApplyPredicate
//// [x] End
//
///// Helper for `PatternElement::StringKey`.
//fn on_string_key<'a, 'b, T: PatternTypes>(
// matcher: &mut Matcher<'a, 'b, T>,
// id: usize,
// skippable: bool,
//) -> Result<bool, MatchError> {
// let path = matcher.frame.path.last_mut().unwrap();
// assert!(path.iterator.is_none());
// let key = &matcher.defs.strings[id];
// let iter = T::get(path.parent.unwrap(), RefOwn::Str(key));
// match iter {
// Some(None) if !skippable => Err(MatchError::ValidationError),
// Some(opt) => {
// path.iterator = Some(Box::new(opt.into_iter()));
// Ok(true)
// }
// None => Err(MatchError::UnsupportedOperation),
// }
//}
//
///// Helper for `PatternElement::ParameterKey`.
//fn on_parameter_key<'a, 'b, T: PatternTypes>(
// matcher: &mut Matcher<'a, 'b, T>,
// id: usize,
// skippable: bool,
//) -> Result<bool, MatchError> {
// let path = matcher.frame.path.last_mut().unwrap();
// assert!(path.iterator.is_none());
// let key = matcher.defs.defs[id];
// let iter = T::get(path.parent.unwrap(), RefOwn::Own(key));
// match iter {
// Some(None) if !skippable => Err(MatchError::ValidationError),
// Some(opt) => {
// path.iterator = Some(Box::new(opt.into_iter()));
// Ok(true)
// }
// None => Err(MatchError::UnsupportedOperation),
// }
//}
//
///// Helper for `PatternElement::RegexKey`.
//fn on_regex_key<'a, 'b, T: PatternTypes>(
// matcher: &mut Matcher<'a, 'b, T>,
// id: usize,
// skippable: bool,
//) -> Result<bool, MatchError> {
// matcher.frame.path.last_mut().unwrap().ensure_iterator()?;
// let re = &matcher.defs.regices[id];
// let path = matcher.frame.path.last_mut().unwrap();
// path.filters.push(Box::new(move |value| {
// let s = T::as_str(value.key().unwrap());
// match (s.map_or(false, |s| re.is_match(s)), skippable) {
// (true, _) => Ok(()),
// (false, true) => {
// value.clear();
// Ok(())
// },
// (false, false) => Err(MatchError::ValidationError),
// }
// }));
// Ok(true)
//}
//
///// Helper for `PatternElement::KeySubtree`.
//fn on_key_subtree<'a, 'b, T: PatternTypes>(
// matcher: &mut Matcher<'a, 'b, T>,
// id: usize,
// skippable: bool,
//) -> Result<bool, MatchError> {
// let _ = skippable; // FIXME what should a skippable KeySubtree even do?!
// matcher.frame.path.last_mut().unwrap().ensure_iterator()?;
// let defs = matcher.defs;
// let rlimit: usize = matcher.frame.depth;
// let path = matcher.frame.path.last_mut().unwrap();
// assert!(path.value.is_empty());
// assert_eq!(path.value.kind(), HolderKind::Key);
// path.value = HolderState::EmptyKeySubtree;
// path.filters.push(Box::new(move |value| {
// let key = value.key().unwrap();
// let mut subtree = Matcher::new(key, defs, id, rlimit)?.peekable();
// match subtree.peek() {
// Some(&Ok(_)) => {
// *value = HolderState::KeySubtree(subtree, value.pair().unwrap());
// Ok(())
// },
// Some(&Err(ref e)) => {
// Err(e.clone())
// },
// None => {
// value.clear();
// Ok(())
// }
// }
// }));
// Ok(true)
//}
//
//const DUMMY_OPS: &'static [PatternElement] = &[];
//
//impl<'a, 'b, T: PatternTypes> Matcher<'a, 'b, T> {
// pub(crate) fn new(obj: RefOwn<'b, T::Ref, T::Own>, defs: &'a PatternConstants<T>, proto: usize, rlimit: usize) -> Result<Self, MatchError> {
// let ops: &[_] = &defs.protos[proto];
// Self::with_ops(obj, defs, ops, rlimit)
// }
//
// /// Constructs a Matcher that yields a single dummy result.
// fn with_ops(obj: RefOwn<'b, T::Ref, T::Own>, defs: &'a PatternConstants<T>, ops: &'a [PatternElement], rlimit: usize) -> Result<Self, MatchError> {
// let depth = rlimit.checked_sub(1).ok_or(MatchError::StackOverflow)?;
// Ok(Self {
// defs: defs,
// frame: Frame {
// //obj: obj,
// ops: ops,
// iar: None,
// depth: depth,
// path: {
// let mut holder = Holder::default();
// holder.value = HolderState::Value(obj);
// holder.iterator = Some(Box::new(std::iter::empty()));
// vec![holder]
// },
// in_key: false,
// },
// })
// }
//
// fn on_in_key(&mut self) -> Result<bool, MatchError> {
// match self.frame.op() {
// PatternElement::End => {
// let path = self.frame.path.last_mut().unwrap();
// if path.next()? {
// Ok(false)
// } else {
// drop(path);
// self.frame.path.pop().unwrap();
// // stop at previous End, or start of frame
// while self.frame.prev() {
// if matches!(self.frame.op(), PatternElement::End) {
// break;
// }
// }
// // is start of frame?
// if !self.frame.prev() {
// self.frame.path.clear();
// }
// Ok(true)
// }
// },
// PatternElement::ApplyPredicate(id, skippable) => {
// // failing on T::get() is already handled, but we may need a
// // T::pairs(). construct it here.
// self.frame.path.last_mut().unwrap().ensure_iterator()?;
// let pred = &self.defs.predicates[id];
// let path = self.frame.path.last_mut().unwrap();
// path.filters.push(Box::new(move |value| {
// match (pred(value.value().unwrap()), skippable) {
// (true, _) => Ok(()),
// (false, true) => {
// value.clear();
// Ok(())
// },
// (false, false) => Err(MatchError::ValidationError),
// }
// }));
// Ok(true)
// },
// PatternElement::StringKey(id, skippable) => {
// on_string_key(self, id, skippable)
// },
// PatternElement::ParameterKey(id, skippable) => {
// on_parameter_key(self, id, skippable)
// },
// PatternElement::RegexKey(id, skippable) => {
// on_regex_key(self, id, skippable)
// },
// PatternElement::KeySubtree(id, skippable) => {
// on_key_subtree(self, id, skippable)
// },
// _ => unreachable!("on_in_key")
// }
// }
//
// fn on_not_in_key(&mut self) -> Result<bool, MatchError> {
// match self.frame.op() {
// PatternElement::Arrow => {
// // this *should* always pass.
// assert!(self.frame.path.last().unwrap().iterator.is_some());
// let mut holder = Holder::default();
// holder.parent = self.frame.path.last().unwrap().value.value();
// assert!(holder.parent.is_some());
// self.frame.path.push(holder);
// Ok(false)
// },
// PatternElement::Identifier(id) => {
// let name = self.defs.strings.get(id).map(|s| &**s);
// let path = self.frame.path.last_mut().unwrap();
// path.name = name;
// assert!(path.iterator.is_none());
// // we don't actually create the iterator here,
// // as we may still wanna use T::get() instead.
// Ok(true)
// },
// PatternElement::ApplyPredicate(id, skippable) => {
// assert!(self.frame.path.len() == 1);
// let pred = &self.defs.predicates[id];
// let value = self.frame.path.last().unwrap().value.value();
// match (pred(value.unwrap()), skippable) {
// (true, _) => Ok(false),
// (false, true) => {
// self.frame.path.clear();
// // any Ok(_) will do
// Ok(false)
// },
// (false, false) => Err(MatchError::ValidationError),
// }
// },
// PatternElement::StringKey(id, skippable) => {
// on_string_key(self, id, skippable)
// },
// PatternElement::ParameterKey(id, skippable) => {
// on_parameter_key(self, id, skippable)
// },
// PatternElement::RegexKey(id, skippable) => {
// on_regex_key(self, id, skippable)
// },
// PatternElement::KeySubtree(id, skippable) => {
// on_key_subtree(self, id, skippable)
// },
// PatternElement::ValueSubtree(id, skippable) => {
// let value = self.frame.path.last().unwrap().value.value().unwrap();
// let mut subtree = Matcher::new(
// value,
// self.defs,
// id,
// self.frame.depth
// )?.peekable();
// let mut dummy = Matcher::with_ops(
// value,
// self.defs,
// DUMMY_OPS,
// self.frame.depth
// )?.peekable();
// // may panic.
// let peeked = subtree.peek();
// // shouldn't panic.
// let _ = dummy.peek();
// // push Holder after peek.
// self.frame.path.push(Holder::default());
// let mut holder = self.frame.path.last_mut().unwrap();
// holder.parent = Some(value);
// holder.iterator = Some(Box::new(std::iter::empty()));
// match peeked {
// None if skippable => {
// holder.value = HolderState::ValueSubtree(dummy, value);
// Ok(true)
// },
// Some(&Ok(_)) | None => {
// drop(peeked);
// holder.value = HolderState::ValueSubtree(subtree, value);
// Ok(true)
// },
// Some(&Err(ref e)) => {
// Err(e.clone())
// },
// }
// },
// _ => unreachable!("on_not_in_key")
// }
// }
//
// fn collect_results(&mut self) -> Matches<'a, 'b, T> {
// let mut res: Matches<'a, 'b, T> = Default::default();
// for holder in &mut self.frame.path {
// // make sure it's not empty.
// assert!(holder.value.has_value());
// // handle subtrees.
// if let Some(matcher) = holder.value.subtree() {
// if let Some(matches) = matcher.next() {
// // NOTE: we have checked these already.
// // (and if we haven't, that's a bug.)
// res.extend(matches.unwrap());
// }
// }
// // handle pairs.
// if let Some(pair) = holder.value.pair() {
// if let Some(name) = holder.name {
// res.insert(name, pair);
// }
// }
// }
// res
// }
//
// fn on_end(&mut self) -> (bool, Matches<'a, 'b, T>) {
// match self.frame.op() {
// PatternElement::End => {
// assert!(!self.frame.path.last().expect("path").value.is_empty());
// let res = self.collect_results();
// if !self.frame.prev() {
// // NOTE: frame.prev() must always be called, even if this
// // gets replaced with debug_assert!() in the future.
// assert!(false, "frame.prev()");
// }
// (true, res)
// }
// PatternElement::ApplyPredicate {..} => {
// assert!(!self.frame.in_key);
// let res = self.collect_results();
// self.frame.path.clear();
// (false, res)
// }
// _ => unreachable!("on_end")
// }
// }
//}
//
//impl<'a, 'b, T: PatternTypes> Iterator for Matcher<'a, 'b, T> {
// type Item = Result<BTreeMap<&'a str, KVPair<'b, T>>, MatchError>;
//
// fn next(&mut self) -> Option<Self::Item> {
// if self.frame.ops.is_empty() {
// if !self.frame.path.is_empty() {
// self.frame.path.clear();
// return Some(Ok(Default::default()));
// }
// }
// while !self.frame.path.is_empty() {
// if !self.frame.next() {
// let (in_key, res) = self.on_end();
// self.frame.in_key = in_key;
// return Some(Ok(res));
// } else {
// let in_key = if self.frame.in_key {
// self.on_in_key()
// } else {
// self.on_not_in_key()
// };
// match in_key {
// Ok(in_key) => self.frame.in_key = in_key,
// Err(e) => {
// self.frame.path.clear();
// return Some(Err(e))
// },
// }
// }
// }
// None
// }
//}