// Copyright (C) 2021-2022 Soni L.
// SPDX-License-Identifier: MIT OR Apache-2.0
//! The Datafu Virtual Machine.
//!
//! This is the stuff that actually matches the pattern.
use std::borrow::Cow;
use std::cell::Cell;
use std::cell::RefCell;
use std::collections::BTreeMap;
use std::collections::VecDeque;
use std::marker::PhantomData;
use indexmap::IndexMap;
use indexmap::IndexSet;
use regex::Regex;
use serde::Serialize;
use crate::Predicate;
//use crate::errors::MatchError;
mod de;
pub(crate) use de::Unpacker;
pub(crate) use de::Packer;
/// Max depth for VM/serde recursion.
pub(crate) const MAX_CALLS: usize = 250;
//type Matches<'a, 'b, T> = BTreeMap<&'a str, KVPair<'b, T>>;
// maybe we should use a builder for this?
/// The constant pool for a pattern.
pub(crate) struct PatternConstants<O: Serialize> {
/// The protos ("functions") in a pattern.
///
/// The last proto is implicitly the main function/entry point.
pub(crate) protos: Vec<Vec<PatternElement>>,
// Note that we can borrow these when creating the output map.
// https://play.rust-lang.org/?version=stable&mode=debug&edition=2018&gist=da26f9175e96273fa0b94971a4e6172f
pub(crate) strings: IndexSet<String>,
pub(crate) regices: Vec<Regex>,
pub(crate) predicates: Vec<Box<Predicate>>,
pub(crate) defs: Vec<O>,
}
impl<O: Serialize> Default for PatternConstants<O> {
fn default() -> Self {
Self {
protos: Default::default(),
strings: Default::default(),
regices: Default::default(),
predicates: Default::default(),
defs: Default::default(),
}
}
}
impl<O: Serialize + std::fmt::Debug> std::fmt::Debug for PatternConstants<O> {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
f.debug_struct(
"PatternConstants"
).field(
"protos", &self.protos,
).field(
"strings", &self.strings,
).field(
"regices", &self.regices,
).field(
"predicates",
&format_args!("({} predicates)", self.predicates.len()),
).field(
"defs",
&self.defs,
).finish()
}
}
/// A datafu pattern element.
#[derive(Copy, Clone, Debug)]
pub(crate) enum PatternElement {
/// A value is the core capturing element.
Value {
/// The index of the (string) name to apply to this value.
name: Option<usize>,
/// The expected value of this entry.
value: Option<Value>,
},
/// A tag is the core iterative element. It is always followed by a value.
///
/// This one is empty.
EmptyTag,
/// A tag is the core iterative element. It is always followed by a value.
Tag {
/// The index of the (proto) key to match against.
key_subtree: usize,
/// Whether to allow this tree subtree to match nothing.
///
/// By default, a datafu pattern only matches a tree if every branch of
/// the tree matches something. This enables opting out of that.
// TODO this isn't currently implemented.
optional: bool,
},
/// Marks the end of pattern iteration and the start of subtrees (if any).
SubtreeMarker,
/// A value subtree is a subtree for values.
///
/// It is applied *after* tags, and thus any value subtrees come last in
/// a pattern's elements.
ValueSubtree {
/// The proto index of the subtree.
index: usize,
/// Whether to allow this value subtree to match nothing.
///
/// By default, a datafu pattern only matches a tree if every branch of
/// the tree matches something. This enables opting out of that.
optional: bool,
},
}
/// A value matcher.
#[derive(Copy, Clone, Debug)]
pub(crate) enum Value {
/// The value must match the specified string.
String {
/// The index of the string.
index: usize,
/// Whether to skip non-matching values, instead of erroring.
skippable: bool,
},
/// The value must match the specified regex.
Regex {
/// The index of the regex.
index: usize,
/// Whether to skip non-matching values, instead of erroring.
skippable: bool,
},
// /// The value must match the specified integer.
// Integer {
// /// The integer.
// value: usize,
// /// Whether to skip non-matching values, instead of erroring.
// skippable: bool,
// },
// /// The value must match the specified integer range.
// Range {
// /// The range.
// value: Range<usize>,
// /// Whether to skip non-matching values, instead of erroring.
// skippable: bool,
// },
// /// The value must match the specified predicate.
// Predicate {
// /// The index of the predicate.
// index: usize,
// /// Whether to skip non-matching values, instead of erroring.
// skippable: bool,
// },
// /// The value must match the specified parameter.
// Paameter {
// /// The index of the parameter.
// index: usize,
// /// Whether to skip non-matching values, instead of erroring.
// skippable: bool,
// },
/// The value must have the specified type.
Type {
/// The expected type.
ty: Type,
/// Whether to skip non-matching values, instead of erroring.
skippable: bool,
},
}
/// A pattern token.
// TODO docs
#[derive(Copy, Clone, Debug)]
pub(crate) enum PatternToken {
/// Start of a tag.
Arrow,
Identifier(usize),
String(usize, bool),
Regex(usize, bool),
Parameter(usize, bool),
KeySubtree(usize, bool),
ValueSubtree(usize, bool),
/// Represents a predicate which must be applied.
///
/// These are custom, arbitrary predicates, powered by serde. They're
/// represented by `:$foo` in a pattern.
ApplyPredicate(
/// The predicate index (in `PatternConstants.predicates`).
usize,
/// Whether to skip non-matching values, instead of erroring.
bool,
),
/// Represents a type expectation.
///
/// These are similar to predicates. They're represented by `:foo`, but are
/// built-in and provide functionality not supported by predicates.
///
/// Specifically, predicates cannot ask serde for a map or a list directly.
/// Instead, they'd be required to parse a whole map/list/etc, which could
/// cause issues which datafu is designed to avoid. (Datafu is designed to
/// resist malicious input more so than arbitrary serde deserializers.)
Type(
/// The expected type.
Type,
/// Whether to skip non-matching values, instead of erroring.
bool,
),
/// End of a tag.
End,
}
/// The types datafu and serde currently support.
///
/// These are used as expectations for serde (e.g.
/// `Deserializer::deserialize_string`).
#[derive(Copy, Clone, Debug, Eq, PartialEq)]
pub(crate) enum Type {
Any,
IgnoredAny,
Bool,
I8,
I16,
I32,
I64,
I128,
U8,
U16,
U32,
U64,
U128,
F32,
F64,
Char,
Str,
String,
Bytes,
ByteBuf,
Option,
Unit,
Seq,
Tuple(usize),
Map,
UnitStruct(&'static str),
NewtypeStruct(&'static str),
TupleStruct {
name: &'static str,
len: usize,
},
Identifier,
Struct {
name: &'static str,
fields: &'static [&'static str],
},
Enum {
name: &'static str,
variants: &'static [&'static str],
},
}
/// The types which can be deserialized by serde.
///
/// We guess this is basically the same thing as a serde_value?
#[derive(Clone, Debug, PartialEq)]
pub(crate) enum SerdeObject<'de> {
Bool(bool),
I8(i8),
I16(i16),
I32(i32),
I64(i64),
I128(i128),
U8(u8),
U16(u16),
U32(u32),
U64(u64),
U128(u128),
F32(f32),
F64(f64),
Char(char),
Str(Cow<'de, str>),
Bytes(Cow<'de, [u8]>),
Some(Box<SerdeObject<'de>>),
None,
Unit,
Seq(Vec<SerdeObject<'de>>),
// NOTE: support for multimaps!
Map(Vec<(SerdeObject<'de>, SerdeObject<'de>)>),
NewtypeStruct(Box<SerdeObject<'de>>),
Enum {
variant: Box<SerdeObject<'de>>,
data: Box<SerdeObject<'de>>,
},
}
impl<'de> SerdeObject<'de> {
///// Checks the type of this object.
//fn check<E: serde::de::Error>(&self, ty: Option<Type>) -> Result<(), E> {
// let ty = match ty {
// None => return Ok(()),
// Some(ty) => ty,
// };
// match (ty, self) {
// | (Type::Any, v)
// | (Type::IgnoredAny, v)
// => Ok(()),
// | (Type::Bool, v @ SerdeObject::Bool(_))
// | (Type::I8, v @ SerdeObject::I8(_))
// | (Type::I16, v @ SerdeObject::I16(_))
// | (Type::I32, v @ SerdeObject::I32(_))
// | (Type::I64, v @ SerdeObject::I64(_))
// | (Type::I128, v @ SerdeObject::I128(_))
// | (Type::U8, v @ SerdeObject::U8(_))
// | (Type::U16, v @ SerdeObject::U16(_))
// | (Type::U32, v @ SerdeObject::U32(_))
// | (Type::U64, v @ SerdeObject::U64(_))
// | (Type::U128, v @ SerdeObject::U128(_))
// | (Type::F32, v @ SerdeObject::F32(_))
// | (Type::F64, v @ SerdeObject::F64(_))
// | (Type::Char, v @ SerdeObject::Char(_))
// | (Type::Str, v @ SerdeObject::Str(_))
// | (Type::String, v @ SerdeObject::Str(_))
// | (Type::Bytes, v @ SerdeObject::Bytes(_))
// | (Type::ByteBuf, v @ SerdeObject::Bytes(_))
// | (Type::Option, v @ SerdeObject::None)
// | (Type::Option, v @ SerdeObject::Some(_))
// | (Type::Unit, v @ SerdeObject::Unit)
// | (Type::Seq, v @ SerdeObject::Seq(_))
// | (Type::Map, v @ SerdeObject::Map(_))
// => Ok(()),
// _ => todo!(),
// }
//}
}
impl<'de, E> serde::de::IntoDeserializer<'de, E> for SerdeObject<'de>
where
E: serde::de::Error,
{
type Deserializer = self::de::SerdeObjectDeserializer<'de, E>;
fn into_deserializer(self) -> Self::Deserializer {
Self::Deserializer {
obj: self,
_e: PhantomData,
}
}
}
/// Packed serde objects and datafu internal representation.
///
/// This is an iterative store of key-value pairs.
///
/// It's effectively a tree node.
#[derive(Clone, Debug, Default)]
pub struct Pack<'pat, 'de> {
subpacks: VecDeque<(IndexMap<&'pat str, SerdeObject<'de>>, Pack<'pat, 'de>)>,
}
impl<'pat, 'de> Pack<'pat, 'de> {
/// Adds the elements of the `other` pack to the matching iterarions of the
/// current pack. If either pack is empty, pick the non-empty pack.
///
/// The current pack will have the same number of iterations, but will
/// contain captures from both packs. In case of captures of the same name,
/// `other` will override `self`.
///
/// # Panics
///
/// Panics if the packs have different iteration lengths.
fn zip(&mut self, mut other: Self) {
match (self.subpacks.len(), other.subpacks.len()) {
(0, _) => {
*self = other;
},
(_, 0) => {},
(a, b) if a == b => {
for (l, r) in self.subpacks.iter_mut().zip(other.subpacks) {
// note that we can't actually recurse deeper than the VM
// actually does itself, so we don't need to worry about
// blowing the stack.
l.0.extend(r.0);
l.1.zip(r.1);
}
},
_ => unreachable!("zip unbalanced iterations"),
}
}
/// Adds the elements of the `other` pack to all iterations captured by
/// this pack, such as to form a cartesian product. If either pack is
/// empty, pick the non-empty pack. If both packs have a length of 1, merge
/// their captures.
///
/// The current pack will contain captures from both packs. In case of
/// captures of the same name, `other` will override `self`.
fn cartesian_product(&mut self, mut other: Self) {
match (self.subpacks.len(), other.subpacks.len()) {
(_, 0) => {
return;
},
(0, _) => {
*self = other;
return;
},
(1, 1) => {
let (robjects, rpack) = other.subpacks.pop_back().unwrap();
let (ref mut lobjects, ref mut lpack) = self.subpacks[0];
lobjects.extend(robjects);
lpack.cartesian_product(rpack);
return;
},
(1, _) => {
self.subpacks[0].1.cartesian_product(other);
return;
},
(_, 1) => {
// FIXME: need to be careful with this one.
// we want `other` to override entries from `self`, so we need
// to scan `self` for captures of the same name as those in
// `other`, and remove those captures. only then can we swap
// `self` and `other` and merge them.
// for now we can just do the inefficient thing tho.
},
_ => {},
}
// FIXME instead of doing this, perhaps we should find the smaller one,
// and put clones of the larger one into it?
self.subpacks.iter_mut().for_each(|&mut (_, ref mut lpack)| {
lpack.cartesian_product(other.clone())
});
}
/// Returns the serde object with the given name at the given iteration of
/// this pack.
#[cfg(test)]
fn get_object_at(
&self,
iter: usize,
name: &str,
) -> Option<&SerdeObject<'de>> {
self.subpacks.get(iter).map(|x| &x.0).and_then(|x| x.get(name))
}
/// Returns the subpack related to the given name at the given iteration of
/// this pack.
#[cfg(test)]
fn get_subpack_at(
&self,
iter: usize,
name: &str,
) -> Option<&Pack<'pat, 'de>> {
let _ = name;
self.subpacks.get(iter).map(|x| &x.1)
}
}
/// The Datafu interpreter, sorta.
#[derive(Debug)]
pub(crate) struct Interpreter<'pat, 'state, O: Serialize> {
/// The pattern currently being processed.
pat: &'pat PatternConstants<O>,
/// The error override (if any).
error: &'state mut Option<crate::errors::MatchError>,
/// The current interpreter frames.
frames: &'state mut Vec<Frame<'pat>>,
}
#[derive(Debug)]
pub(crate) struct Frame<'pat> {
/// The instructions/function currently being processed.
ops: &'pat [PatternElement],
/// The instruction index being processed.
iar: Option<usize>,
/// How many steps this frame has not actually advanced for.
///
/// This is used at end of frame and on match failure.
overstep: usize,
/// Whether this frame matches the data so far.
matches: bool,
/// Whether this frame must not be allowed to match in the key step.
poison: bool,
}
impl<'pat, 'state, O: Serialize> Interpreter<'pat, 'state, O> {
pub(crate) fn new(
pat: &'pat PatternConstants<O>,
error: &'state mut Option<crate::errors::MatchError>,
frames: &'state mut Vec<Frame<'pat>>,
) -> Self {
debug_assert!(frames.is_empty());
frames.push(Frame {
ops: &pat.protos.last().unwrap(),
iar: None,
overstep: 0,
matches: true,
poison: false,
});
Self {
pat: pat,
error: error,
frames: frames,
}
}
}
impl<'pat> Frame<'pat> {
/// Gets the type currently associated with this frame.
///
/// Returns the type and whether it is required to match.
fn get_type(
&self,
) -> Option<(Type, bool)> {
match self.op() {
PatternElement::Value { value: Some(value), .. } => {
match value {
| Value::String { skippable, .. }
| Value::Regex { skippable, .. }
=> {
Some((Type::Str, !skippable))
},
Value::Type { ty, skippable } => {
Some((ty, !skippable))
},
}
},
| PatternElement::EmptyTag
| PatternElement::Tag { .. }
=> panic!("attempt to get type of tag"),
_ => None,
}
}
/// Gets the name currently associated with this frame.
fn get_name<O: Serialize>(
&self,
pat: &'pat PatternConstants<O>,
) -> Option<&'pat str> {
let strings = &pat.strings;
match self.op() {
PatternElement::Value { name: Some(name), .. } => {
Some(&*strings[name])
},
| PatternElement::EmptyTag
| PatternElement::Tag { .. }
=> panic!("attempt to get name of tag"),
_ => None,
}
}
/// Advances the instruction address register.
///
/// # Returns
///
/// `true` if successful, `false` otherwise.
fn next(&mut self) -> bool {
let new = self.iar.map_or(0, |v| v + 1);
new < self.ops.len() && {
self.iar = Some(new);
true
}
}
/// Returns the current instruction.
///
/// # Panics
///
/// Panics if called on a non-matching frame or if iteration hasn't begun.
fn op(&self) -> PatternElement {
assert!(self.active(), "op() called on inactive frame");
self.raw_op()
}
/// Counts the number of *active* subtrees, if any, and whether any
/// subtrees have been unwound.
///
/// # Panics
///
/// Panics if iteration hasn't begun.
fn num_subtrees(&self) -> Option<(usize, bool)> {
let iar = self.iar.unwrap();
// check if there are any subtrees
matches!(
self.ops[iar],
| PatternElement::ValueSubtree { .. }
| PatternElement::SubtreeMarker
).then(|| {
// count the number of subtrees
(
self.ops[0..=iar].iter().rev().take_while(|x| {
matches!(x, PatternElement::ValueSubtree { .. })
}).count(),
self.ops.len() - 1 != iar,
)
})
}
/// Returns whether this key has a subtree, and if so, its index and
/// whether it is optional, as an `(index, optional)` pair.
///
/// # Panics
///
/// Panics if iteration hasn't begun, or this isn't a key.
fn key_subtree(&self) -> Option<(usize, bool)> {
match self.op() {
PatternElement::Tag { key_subtree, optional } => {
Some((key_subtree, optional))
},
PatternElement::EmptyTag => None,
_ => unreachable!(),
}
}
/// Returns whether this frame is in a value operation.
///
/// # Panics
///
/// Panics if the frame isn't active or iteraction hasn't begun.
#[inline]
fn is_value(&self) -> bool {
self.active() && matches!(
self.raw_op(),
PatternElement::Value { .. },
)
}
/// Returns this value subtree, as an `(index, optional)` pair.
///
/// # Panics
///
/// Panics if iteration hasn't begun, or this isn't a value subtree.
fn value_subtree(&self) -> (usize, bool) {
if let PatternElement::ValueSubtree {
index,
optional,
} = self.raw_op() {
(index, optional)
} else {
unreachable!()
}
}
/// Returns the raw instruction.
///
/// # Panics
///
/// Panics if iteration hasn't begun.
fn raw_op(&self) -> PatternElement {
self.ops[self.iar.expect("ops[iar]")]
}
/// Returns whether this frame is active (not overstepped).
#[inline]
fn active(&self) -> bool {
self.overstep == 0
}
/// Rewinds the instruction address register.
///
/// # Returns
///
/// `true` if successful, `false` otherwise.
fn prev(&mut self) -> bool {
let new = self.iar.expect("iar").checked_sub(1);
new.is_some() && {
self.iar = new;
true
}
}
}
///// Stores a single match.
/////
///// See also Holder.
//enum HolderState<'a, 'b, T: PatternTypes> {
// /// Empty holder, for a key-value pair.
// EmptyKey,
// /// Empty holder, for a Matcher and a key-value pair.
// EmptyKeySubtree,
// // /// Empty holder, for a Matcher and a value.
// // EmptyValueSubtree,
// /// Occupied holder, for a key-value pair..
// Key(KVPair<'b, T>),
// /// Occupied holder, for a Matcher and a key-value pair.
// KeySubtree(Peekable<Matcher<'a, 'b, T>>, KVPair<'b, T>),
// /// Occupied holder, for a Matcher and a value. The empty variant is
// /// omitted as it would never be used otherwise.
// ValueSubtree(Peekable<Matcher<'a, 'b, T>>, RefOwn<'b, T::Ref, T::Own>),
// /// Occupied holder, for a value. The empty variant is omitted as it would
// /// never be used otherwise.
// Value(RefOwn<'b, T::Ref, T::Own>),
//}
//
///// Helper enum for HolderState.
//#[derive(Copy, Clone, Debug, Eq, PartialEq)]
//enum HolderKind {
// Key,
// KeySubtree,
// ValueSubtree,
// Value
//}
//
////impl<'a, 'b, T: PatternTypes> Clone for HolderState<'a, 'b, T> {
//// fn clone(&self) -> Self {
//// match self {
//// HolderState::EmptyKey => HolderState::EmptyKey,
//// HolderState::EmptySubtree => HolderState::EmptySubtree,
//// HolderState::Key(v) => HolderState::Key(*v),
//// HolderState::KeySubtree(m, v) => HolderState::KeySubtree(m.clone(), *v),
//// HolderState::ValueSubtree(m, v) => HolderState::ValueSubtree(m.clone(), *v),
//// HolderState::Value(v) => HolderState::Value(*v),
//// }
//// }
////}
//
//impl<'a, 'b, T: PatternTypes> HolderState<'a, 'b, T> {
// #[rustfmt::skip]
// fn is_empty(&self) -> bool {
// match self {
// | HolderState::EmptyKey
// | HolderState::EmptyKeySubtree
// //| HolderState::EmptyValueSubtree
// => true, _ => false
// }
// }
//
// fn has_value(&self) -> bool {
// !self.is_empty()
// }
//
// fn kind(&self) -> HolderKind {
// match self {
// | HolderState::EmptyKey
// | HolderState::Key(_)
// => HolderKind::Key,
// | HolderState::EmptyKeySubtree
// | HolderState::KeySubtree(_, _)
// => HolderKind::KeySubtree,
// //| HolderState::EmptyValueSubtree
// | HolderState::ValueSubtree(_, _)
// => HolderKind::ValueSubtree,
// | HolderState::Value(_)
// => HolderKind::Value,
// }
// }
//
// fn value(&self) -> Option<RefOwn<'b, T::Ref, T::Own>> {
// match *self {
// HolderState::Key((_, value)) => Some(value),
// HolderState::KeySubtree(_, (_, value)) => Some(value),
// HolderState::ValueSubtree(_, value) => Some(value),
// HolderState::Value(value) => Some(value),
// _ => None
// }
// }
//
// fn key(&self) -> Option<RefOwn<'b, T::Ref, T::Own>> {
// match *self {
// HolderState::Key((key, _)) => Some(key),
// HolderState::KeySubtree(_, (key, _)) => Some(key),
// _ => None
// }
// }
//
// fn pair(&self) -> Option<KVPair<'b, T>> {
// match *self {
// HolderState::Key(pair) => Some(pair),
// HolderState::KeySubtree(_, pair) => Some(pair),
// _ => None
// }
// }
//
// fn subtree(&mut self) -> Option<&mut Peekable<Matcher<'a, 'b, T>>> {
// match *self {
// HolderState::KeySubtree(ref mut subtree, _) => Some(subtree),
// HolderState::ValueSubtree(ref mut subtree, _) => Some(subtree),
// _ => None
// }
// }
//
// fn clear(&mut self) {
// *self = match self.kind() {
// HolderKind::Key => HolderState::EmptyKey,
// HolderKind::KeySubtree => HolderState::EmptyKeySubtree,
// HolderKind::ValueSubtree => unreachable!(), //HolderState::EmptyValueSubtree,
// HolderKind::Value => unreachable!(),
// };
// assert!(self.is_empty());
// }
//}
//
///// Stores a single match and associated metadata.
/////
///// A single match is generally a key-value pair, but may be a collection of
///// named pairs in the case of subtree matches, or just a value for the initial
///// holder.
//struct Holder<'a, 'b, T: PatternTypes> {
// name: Option<&'a str>,
// value: HolderState<'a, 'b, T>,
// parent: Option<RefOwn<'b, T::Ref, T::Own>>,
// iterator: Option<Box<dyn Iterator<Item=KVPair<'b, T>> + 'b>>,
// filters: Vec<Box<dyn (for<'c> Fn(&'c mut HolderState<'a, 'b, T>) -> Result<(), MatchError>) + 'a>>,
//}
//
//impl<'a, 'b, T: PatternTypes> Holder<'a, 'b, T> {
// fn next(&mut self) -> Result<bool, MatchError> {
// self.ensure_iterator()?;
// if let Self {
// value: ref mut v,
// iterator: Some(ref mut it),
// ref filters,
// ..
// } = self {
// // check if we're in a subtree and (not) done.
// if let Some(matcher) = v.subtree() {
// if let Some(res) = matcher.peek() {
// // report any errors
// return res.as_ref().map(|_| true).map_err(|e| e.clone());
// }
// }
// let kind = v.kind();
// let mut next_v;
// loop {
// next_v = match it.next() {
// Some(pair) => HolderState::Key(pair),
// None => return Ok(false)
// };
// for filter in filters {
// filter(&mut next_v)?;
// if next_v.is_empty() {
// break;
// }
// }
// if next_v.has_value() {
// break;
// }
// }
// assert!(next_v.has_value());
// assert_eq!(next_v.kind(), kind);
// *v = next_v;
// Ok(true)
// } else {
// unreachable!()
// }
// }
//
// /// Ensure `self.iterator.is_some()`, creating an iterator if necessary.
// fn ensure_iterator(&mut self) -> Result<(), MatchError> {
// if self.iterator.is_none() {
// let iter = T::pairs(self.parent.unwrap());
// if iter.is_none() {
// return Err(MatchError::UnsupportedOperation);
// }
// self.iterator = iter;
// }
// assert!(self.iterator.is_some());
// Ok(())
// }
//}
//
//impl<'a, 'b, T: PatternTypes> Default for Holder<'a, 'b, T> {
// fn default() -> Self {
// Self {
// name: Default::default(),
// value: HolderState::EmptyKey,
// parent: Default::default(),
// iterator: Default::default(),
// filters: Default::default(),
// }
// }
//}
//
//pub struct Matcher<'a, 'b, T: PatternTypes> {
// defs: &'a PatternConstants<T>,
// frame: Frame<'a, 'b, T>,
//}
//
//// TODO:
////
//// [x] Arrow
//// [x] StringKey
//// [x] RegexKey
//// [x] KeySubtree
//// [x] ValueSubtree
//// [x] Ident
//// [x] Param (untested)
//// [x] ApplyPredicate
//// [x] End
//
///// Helper for `PatternElement::StringKey`.
//fn on_string_key<'a, 'b, T: PatternTypes>(
// matcher: &mut Matcher<'a, 'b, T>,
// id: usize,
// skippable: bool,
//) -> Result<bool, MatchError> {
// let path = matcher.frame.path.last_mut().unwrap();
// assert!(path.iterator.is_none());
// let key = &matcher.defs.strings[id];
// let iter = T::get(path.parent.unwrap(), RefOwn::Str(key));
// match iter {
// Some(None) if !skippable => Err(MatchError::ValidationError),
// Some(opt) => {
// path.iterator = Some(Box::new(opt.into_iter()));
// Ok(true)
// }
// None => Err(MatchError::UnsupportedOperation),
// }
//}
//
///// Helper for `PatternElement::ParameterKey`.
//fn on_parameter_key<'a, 'b, T: PatternTypes>(
// matcher: &mut Matcher<'a, 'b, T>,
// id: usize,
// skippable: bool,
//) -> Result<bool, MatchError> {
// let path = matcher.frame.path.last_mut().unwrap();
// assert!(path.iterator.is_none());
// let key = matcher.defs.defs[id];
// let iter = T::get(path.parent.unwrap(), RefOwn::Own(key));
// match iter {
// Some(None) if !skippable => Err(MatchError::ValidationError),
// Some(opt) => {
// path.iterator = Some(Box::new(opt.into_iter()));
// Ok(true)
// }
// None => Err(MatchError::UnsupportedOperation),
// }
//}
//
///// Helper for `PatternElement::RegexKey`.
//fn on_regex_key<'a, 'b, T: PatternTypes>(
// matcher: &mut Matcher<'a, 'b, T>,
// id: usize,
// skippable: bool,
//) -> Result<bool, MatchError> {
// matcher.frame.path.last_mut().unwrap().ensure_iterator()?;
// let re = &matcher.defs.regices[id];
// let path = matcher.frame.path.last_mut().unwrap();
// path.filters.push(Box::new(move |value| {
// let s = T::as_str(value.key().unwrap());
// match (s.map_or(false, |s| re.is_match(s)), skippable) {
// (true, _) => Ok(()),
// (false, true) => {
// value.clear();
// Ok(())
// },
// (false, false) => Err(MatchError::ValidationError),
// }
// }));
// Ok(true)
//}
//
///// Helper for `PatternElement::KeySubtree`.
//fn on_key_subtree<'a, 'b, T: PatternTypes>(
// matcher: &mut Matcher<'a, 'b, T>,
// id: usize,
// skippable: bool,
//) -> Result<bool, MatchError> {
// let _ = skippable; // FIXME what should a skippable KeySubtree even do?!
// matcher.frame.path.last_mut().unwrap().ensure_iterator()?;
// let defs = matcher.defs;
// let rlimit: usize = matcher.frame.depth;
// let path = matcher.frame.path.last_mut().unwrap();
// assert!(path.value.is_empty());
// assert_eq!(path.value.kind(), HolderKind::Key);
// path.value = HolderState::EmptyKeySubtree;
// path.filters.push(Box::new(move |value| {
// let key = value.key().unwrap();
// let mut subtree = Matcher::new(key, defs, id, rlimit)?.peekable();
// match subtree.peek() {
// Some(&Ok(_)) => {
// *value = HolderState::KeySubtree(subtree, value.pair().unwrap());
// Ok(())
// },
// Some(&Err(ref e)) => {
// Err(e.clone())
// },
// None => {
// value.clear();
// Ok(())
// }
// }
// }));
// Ok(true)
//}
//
//const DUMMY_OPS: &'static [PatternElement] = &[];
//
//impl<'a, 'b, T: PatternTypes> Matcher<'a, 'b, T> {
// pub(crate) fn new(obj: RefOwn<'b, T::Ref, T::Own>, defs: &'a PatternConstants<T>, proto: usize, rlimit: usize) -> Result<Self, MatchError> {
// let ops: &[_] = &defs.protos[proto];
// Self::with_ops(obj, defs, ops, rlimit)
// }
//
// /// Constructs a Matcher that yields a single dummy result.
// fn with_ops(obj: RefOwn<'b, T::Ref, T::Own>, defs: &'a PatternConstants<T>, ops: &'a [PatternElement], rlimit: usize) -> Result<Self, MatchError> {
// let depth = rlimit.checked_sub(1).ok_or(MatchError::StackOverflow)?;
// Ok(Self {
// defs: defs,
// frame: Frame {
// //obj: obj,
// ops: ops,
// iar: None,
// depth: depth,
// path: {
// let mut holder = Holder::default();
// holder.value = HolderState::Value(obj);
// holder.iterator = Some(Box::new(std::iter::empty()));
// vec![holder]
// },
// in_key: false,
// },
// })
// }
//
// fn on_in_key(&mut self) -> Result<bool, MatchError> {
// match self.frame.op() {
// PatternElement::End => {
// let path = self.frame.path.last_mut().unwrap();
// if path.next()? {
// Ok(false)
// } else {
// drop(path);
// self.frame.path.pop().unwrap();
// // stop at previous End, or start of frame
// while self.frame.prev() {
// if matches!(self.frame.op(), PatternElement::End) {
// break;
// }
// }
// // is start of frame?
// if !self.frame.prev() {
// self.frame.path.clear();
// }
// Ok(true)
// }
// },
// PatternElement::ApplyPredicate(id, skippable) => {
// // failing on T::get() is already handled, but we may need a
// // T::pairs(). construct it here.
// self.frame.path.last_mut().unwrap().ensure_iterator()?;
// let pred = &self.defs.predicates[id];
// let path = self.frame.path.last_mut().unwrap();
// path.filters.push(Box::new(move |value| {
// match (pred(value.value().unwrap()), skippable) {
// (true, _) => Ok(()),
// (false, true) => {
// value.clear();
// Ok(())
// },
// (false, false) => Err(MatchError::ValidationError),
// }
// }));
// Ok(true)
// },
// PatternElement::StringKey(id, skippable) => {
// on_string_key(self, id, skippable)
// },
// PatternElement::ParameterKey(id, skippable) => {
// on_parameter_key(self, id, skippable)
// },
// PatternElement::RegexKey(id, skippable) => {
// on_regex_key(self, id, skippable)
// },
// PatternElement::KeySubtree(id, skippable) => {
// on_key_subtree(self, id, skippable)
// },
// _ => unreachable!("on_in_key")
// }
// }
//
// fn on_not_in_key(&mut self) -> Result<bool, MatchError> {
// match self.frame.op() {
// PatternElement::Arrow => {
// // this *should* always pass.
// assert!(self.frame.path.last().unwrap().iterator.is_some());
// let mut holder = Holder::default();
// holder.parent = self.frame.path.last().unwrap().value.value();
// assert!(holder.parent.is_some());
// self.frame.path.push(holder);
// Ok(false)
// },
// PatternElement::Identifier(id) => {
// let name = self.defs.strings.get(id).map(|s| &**s);
// let path = self.frame.path.last_mut().unwrap();
// path.name = name;
// assert!(path.iterator.is_none());
// // we don't actually create the iterator here,
// // as we may still wanna use T::get() instead.
// Ok(true)
// },
// PatternElement::ApplyPredicate(id, skippable) => {
// assert!(self.frame.path.len() == 1);
// let pred = &self.defs.predicates[id];
// let value = self.frame.path.last().unwrap().value.value();
// match (pred(value.unwrap()), skippable) {
// (true, _) => Ok(false),
// (false, true) => {
// self.frame.path.clear();
// // any Ok(_) will do
// Ok(false)
// },
// (false, false) => Err(MatchError::ValidationError),
// }
// },
// PatternElement::StringKey(id, skippable) => {
// on_string_key(self, id, skippable)
// },
// PatternElement::ParameterKey(id, skippable) => {
// on_parameter_key(self, id, skippable)
// },
// PatternElement::RegexKey(id, skippable) => {
// on_regex_key(self, id, skippable)
// },
// PatternElement::KeySubtree(id, skippable) => {
// on_key_subtree(self, id, skippable)
// },
// PatternElement::ValueSubtree(id, skippable) => {
// let value = self.frame.path.last().unwrap().value.value().unwrap();
// let mut subtree = Matcher::new(
// value,
// self.defs,
// id,
// self.frame.depth
// )?.peekable();
// let mut dummy = Matcher::with_ops(
// value,
// self.defs,
// DUMMY_OPS,
// self.frame.depth
// )?.peekable();
// // may panic.
// let peeked = subtree.peek();
// // shouldn't panic.
// let _ = dummy.peek();
// // push Holder after peek.
// self.frame.path.push(Holder::default());
// let mut holder = self.frame.path.last_mut().unwrap();
// holder.parent = Some(value);
// holder.iterator = Some(Box::new(std::iter::empty()));
// match peeked {
// None if skippable => {
// holder.value = HolderState::ValueSubtree(dummy, value);
// Ok(true)
// },
// Some(&Ok(_)) | None => {
// drop(peeked);
// holder.value = HolderState::ValueSubtree(subtree, value);
// Ok(true)
// },
// Some(&Err(ref e)) => {
// Err(e.clone())
// },
// }
// },
// _ => unreachable!("on_not_in_key")
// }
// }
//
// fn collect_results(&mut self) -> Matches<'a, 'b, T> {
// let mut res: Matches<'a, 'b, T> = Default::default();
// for holder in &mut self.frame.path {
// // make sure it's not empty.
// assert!(holder.value.has_value());
// // handle subtrees.
// if let Some(matcher) = holder.value.subtree() {
// if let Some(matches) = matcher.next() {
// // NOTE: we have checked these already.
// // (and if we haven't, that's a bug.)
// res.extend(matches.unwrap());
// }
// }
// // handle pairs.
// if let Some(pair) = holder.value.pair() {
// if let Some(name) = holder.name {
// res.insert(name, pair);
// }
// }
// }
// res
// }
//
// fn on_end(&mut self) -> (bool, Matches<'a, 'b, T>) {
// match self.frame.op() {
// PatternElement::End => {
// assert!(!self.frame.path.last().expect("path").value.is_empty());
// let res = self.collect_results();
// if !self.frame.prev() {
// // NOTE: frame.prev() must always be called, even if this
// // gets replaced with debug_assert!() in the future.
// assert!(false, "frame.prev()");
// }
// (true, res)
// }
// PatternElement::ApplyPredicate {..} => {
// assert!(!self.frame.in_key);
// let res = self.collect_results();
// self.frame.path.clear();
// (false, res)
// }
// _ => unreachable!("on_end")
// }
// }
//}
//
//impl<'a, 'b, T: PatternTypes> Iterator for Matcher<'a, 'b, T> {
// type Item = Result<BTreeMap<&'a str, KVPair<'b, T>>, MatchError>;
//
// fn next(&mut self) -> Option<Self::Item> {
// if self.frame.ops.is_empty() {
// if !self.frame.path.is_empty() {
// self.frame.path.clear();
// return Some(Ok(Default::default()));
// }
// }
// while !self.frame.path.is_empty() {
// if !self.frame.next() {
// let (in_key, res) = self.on_end();
// self.frame.in_key = in_key;
// return Some(Ok(res));
// } else {
// let in_key = if self.frame.in_key {
// self.on_in_key()
// } else {
// self.on_not_in_key()
// };
// match in_key {
// Ok(in_key) => self.frame.in_key = in_key,
// Err(e) => {
// self.frame.path.clear();
// return Some(Err(e))
// },
// }
// }
// }
// None
// }
//}