From a8778ff35bde88bb63d9fec769edf66e68d7969e Mon Sep 17 00:00:00 2001 From: SoniEx2 Date: Sat, 30 Jul 2022 11:22:23 -0300 Subject: Initial work on 0.1.0 Missing the VM. --- src/vm.rs | 1248 ++++++++++++++++++++++++++++++------------------------------- 1 file changed, 614 insertions(+), 634 deletions(-) (limited to 'src/vm.rs') diff --git a/src/vm.rs b/src/vm.rs index dd48752..6bcbf70 100644 --- a/src/vm.rs +++ b/src/vm.rs @@ -1,53 +1,33 @@ -/* - * This file is part of Datafu - * Copyright (C) 2021 Soni L. - * - * This program is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Affero General Public License for more details. - * - * You should have received a copy of the GNU Affero General Public License - * along with this program. If not, see . - */ - -use std::collections::BTreeMap; -use std::iter::Peekable; +// Copyright (C) 2021-2022 Soni L. +// SPDX-License-Identifier: MIT OR Apache-2.0 use regex::Regex; +use serde::Serialize; -use crate::KVPair; -use crate::RefOwn; -use crate::PatternTypes; use crate::Predicate; -use crate::errors::MatchError; +//use crate::errors::MatchError; pub(crate) const MAX_CALLS: usize = 250; -type Matches<'a, 'b, T> = BTreeMap<&'a str, KVPair<'b, T>>; +//type Matches<'a, 'b, T> = BTreeMap<&'a str, KVPair<'b, T>>; // TODO: use a builder for this? /// The constant pool for a pattern. -pub(crate) struct PatternConstants { +pub(crate) struct PatternConstants { // last proto is implicitly the whole pattern. pub(crate) protos: Vec>, // Note that we can borrow these when creating the output map. // https://play.rust-lang.org/?version=stable&mode=debug&edition=2018&gist=da26f9175e96273fa0b94971a4e6172f pub(crate) strings: Vec, pub(crate) regices: Vec, - pub(crate) predicates: Vec>>, + pub(crate) predicates: Vec>, // NOTE these are part of the constant pool and so have lifetime analogous // to 'a (consistently used to indicate constant pool lifetime) when used // elsewhere. In particular, they can't be yielded by the iterator. - pub(crate) defs: Vec, + pub(crate) defs: Vec, } -impl Default for PatternConstants { +impl Default for PatternConstants { fn default() -> Self { Self { protos: Default::default(), @@ -76,612 +56,612 @@ pub(crate) enum PatternElement { End } -struct Frame<'a, 'b, T: PatternTypes> { - //obj: RefOwn<'b, T::Ref, T::Own>, - ops: &'a [PatternElement], - iar: Option, - depth: usize, - path: Vec>, - in_key: bool, -} - -impl<'a, 'b, T: PatternTypes> Frame<'a, 'b, T> { - /// Advances the instruction address register. - /// - /// # Returns - /// - /// `true` if successful, `false` otherwise. - fn next(&mut self) -> bool { - let new = self.iar.map_or(0, |v| v + 1); - new < self.ops.len() && { - self.iar = Some(new); - true - } - } - - /// Returns the current instruction. - fn op(&self) -> PatternElement { - self.ops[self.iar.expect("ops[iar]")] - } - - /// Rewinds the instruction address register. - /// - /// # Returns - /// - /// `true` if successful, `false` otherwise. - fn prev(&mut self) -> bool { - let new = self.iar.expect("iar").checked_sub(1); - new.is_some() && { - self.iar = new; - true - } - } -} - -/// Stores a single match. -/// -/// See also Holder. -enum HolderState<'a, 'b, T: PatternTypes> { - /// Empty holder, for a key-value pair. - EmptyKey, - /// Empty holder, for a Matcher and a key-value pair. - EmptyKeySubtree, - // /// Empty holder, for a Matcher and a value. - // EmptyValueSubtree, - /// Occupied holder, for a key-value pair.. - Key(KVPair<'b, T>), - /// Occupied holder, for a Matcher and a key-value pair. - KeySubtree(Peekable>, KVPair<'b, T>), - /// Occupied holder, for a Matcher and a value. The empty variant is - /// omitted as it would never be used otherwise. - ValueSubtree(Peekable>, RefOwn<'b, T::Ref, T::Own>), - /// Occupied holder, for a value. The empty variant is omitted as it would - /// never be used otherwise. - Value(RefOwn<'b, T::Ref, T::Own>), -} - -/// Helper enum for HolderState. -#[derive(Copy, Clone, Debug, Eq, PartialEq)] -enum HolderKind { - Key, - KeySubtree, - ValueSubtree, - Value -} - -//impl<'a, 'b, T: PatternTypes> Clone for HolderState<'a, 'b, T> { -// fn clone(&self) -> Self { +//struct Frame<'a, 'b, T: PatternTypes> { +// //obj: RefOwn<'b, T::Ref, T::Own>, +// ops: &'a [PatternElement], +// iar: Option, +// depth: usize, +// path: Vec>, +// in_key: bool, +//} +// +//impl<'a, 'b, T: PatternTypes> Frame<'a, 'b, T> { +// /// Advances the instruction address register. +// /// +// /// # Returns +// /// +// /// `true` if successful, `false` otherwise. +// fn next(&mut self) -> bool { +// let new = self.iar.map_or(0, |v| v + 1); +// new < self.ops.len() && { +// self.iar = Some(new); +// true +// } +// } +// +// /// Returns the current instruction. +// fn op(&self) -> PatternElement { +// self.ops[self.iar.expect("ops[iar]")] +// } +// +// /// Rewinds the instruction address register. +// /// +// /// # Returns +// /// +// /// `true` if successful, `false` otherwise. +// fn prev(&mut self) -> bool { +// let new = self.iar.expect("iar").checked_sub(1); +// new.is_some() && { +// self.iar = new; +// true +// } +// } +//} +// +///// Stores a single match. +///// +///// See also Holder. +//enum HolderState<'a, 'b, T: PatternTypes> { +// /// Empty holder, for a key-value pair. +// EmptyKey, +// /// Empty holder, for a Matcher and a key-value pair. +// EmptyKeySubtree, +// // /// Empty holder, for a Matcher and a value. +// // EmptyValueSubtree, +// /// Occupied holder, for a key-value pair.. +// Key(KVPair<'b, T>), +// /// Occupied holder, for a Matcher and a key-value pair. +// KeySubtree(Peekable>, KVPair<'b, T>), +// /// Occupied holder, for a Matcher and a value. The empty variant is +// /// omitted as it would never be used otherwise. +// ValueSubtree(Peekable>, RefOwn<'b, T::Ref, T::Own>), +// /// Occupied holder, for a value. The empty variant is omitted as it would +// /// never be used otherwise. +// Value(RefOwn<'b, T::Ref, T::Own>), +//} +// +///// Helper enum for HolderState. +//#[derive(Copy, Clone, Debug, Eq, PartialEq)] +//enum HolderKind { +// Key, +// KeySubtree, +// ValueSubtree, +// Value +//} +// +////impl<'a, 'b, T: PatternTypes> Clone for HolderState<'a, 'b, T> { +//// fn clone(&self) -> Self { +//// match self { +//// HolderState::EmptyKey => HolderState::EmptyKey, +//// HolderState::EmptySubtree => HolderState::EmptySubtree, +//// HolderState::Key(v) => HolderState::Key(*v), +//// HolderState::KeySubtree(m, v) => HolderState::KeySubtree(m.clone(), *v), +//// HolderState::ValueSubtree(m, v) => HolderState::ValueSubtree(m.clone(), *v), +//// HolderState::Value(v) => HolderState::Value(*v), +//// } +//// } +////} +// +//impl<'a, 'b, T: PatternTypes> HolderState<'a, 'b, T> { +// #[rustfmt::skip] +// fn is_empty(&self) -> bool { // match self { -// HolderState::EmptyKey => HolderState::EmptyKey, -// HolderState::EmptySubtree => HolderState::EmptySubtree, -// HolderState::Key(v) => HolderState::Key(*v), -// HolderState::KeySubtree(m, v) => HolderState::KeySubtree(m.clone(), *v), -// HolderState::ValueSubtree(m, v) => HolderState::ValueSubtree(m.clone(), *v), -// HolderState::Value(v) => HolderState::Value(*v), +// | HolderState::EmptyKey +// | HolderState::EmptyKeySubtree +// //| HolderState::EmptyValueSubtree +// => true, _ => false +// } +// } +// +// fn has_value(&self) -> bool { +// !self.is_empty() +// } +// +// fn kind(&self) -> HolderKind { +// match self { +// | HolderState::EmptyKey +// | HolderState::Key(_) +// => HolderKind::Key, +// | HolderState::EmptyKeySubtree +// | HolderState::KeySubtree(_, _) +// => HolderKind::KeySubtree, +// //| HolderState::EmptyValueSubtree +// | HolderState::ValueSubtree(_, _) +// => HolderKind::ValueSubtree, +// | HolderState::Value(_) +// => HolderKind::Value, +// } +// } +// +// fn value(&self) -> Option> { +// match *self { +// HolderState::Key((_, value)) => Some(value), +// HolderState::KeySubtree(_, (_, value)) => Some(value), +// HolderState::ValueSubtree(_, value) => Some(value), +// HolderState::Value(value) => Some(value), +// _ => None +// } +// } +// +// fn key(&self) -> Option> { +// match *self { +// HolderState::Key((key, _)) => Some(key), +// HolderState::KeySubtree(_, (key, _)) => Some(key), +// _ => None +// } +// } +// +// fn pair(&self) -> Option> { +// match *self { +// HolderState::Key(pair) => Some(pair), +// HolderState::KeySubtree(_, pair) => Some(pair), +// _ => None // } // } +// +// fn subtree(&mut self) -> Option<&mut Peekable>> { +// match *self { +// HolderState::KeySubtree(ref mut subtree, _) => Some(subtree), +// HolderState::ValueSubtree(ref mut subtree, _) => Some(subtree), +// _ => None +// } +// } +// +// fn clear(&mut self) { +// *self = match self.kind() { +// HolderKind::Key => HolderState::EmptyKey, +// HolderKind::KeySubtree => HolderState::EmptyKeySubtree, +// HolderKind::ValueSubtree => unreachable!(), //HolderState::EmptyValueSubtree, +// HolderKind::Value => unreachable!(), +// }; +// assert!(self.is_empty()); +// } +//} +// +///// Stores a single match and associated metadata. +///// +///// A single match is generally a key-value pair, but may be a collection of +///// named pairs in the case of subtree matches, or just a value for the initial +///// holder. +//struct Holder<'a, 'b, T: PatternTypes> { +// name: Option<&'a str>, +// value: HolderState<'a, 'b, T>, +// parent: Option>, +// iterator: Option> + 'b>>, +// filters: Vec Fn(&'c mut HolderState<'a, 'b, T>) -> Result<(), MatchError>) + 'a>>, +//} +// +//impl<'a, 'b, T: PatternTypes> Holder<'a, 'b, T> { +// fn next(&mut self) -> Result { +// self.ensure_iterator()?; +// if let Self { +// value: ref mut v, +// iterator: Some(ref mut it), +// ref filters, +// .. +// } = self { +// // check if we're in a subtree and (not) done. +// if let Some(matcher) = v.subtree() { +// if let Some(res) = matcher.peek() { +// // report any errors +// return res.as_ref().map(|_| true).map_err(|e| e.clone()); +// } +// } +// let kind = v.kind(); +// let mut next_v; +// loop { +// next_v = match it.next() { +// Some(pair) => HolderState::Key(pair), +// None => return Ok(false) +// }; +// for filter in filters { +// filter(&mut next_v)?; +// if next_v.is_empty() { +// break; +// } +// } +// if next_v.has_value() { +// break; +// } +// } +// assert!(next_v.has_value()); +// assert_eq!(next_v.kind(), kind); +// *v = next_v; +// Ok(true) +// } else { +// unreachable!() +// } +// } +// +// /// Ensure `self.iterator.is_some()`, creating an iterator if necessary. +// fn ensure_iterator(&mut self) -> Result<(), MatchError> { +// if self.iterator.is_none() { +// let iter = T::pairs(self.parent.unwrap()); +// if iter.is_none() { +// return Err(MatchError::UnsupportedOperation); +// } +// self.iterator = iter; +// } +// assert!(self.iterator.is_some()); +// Ok(()) +// } +//} +// +//impl<'a, 'b, T: PatternTypes> Default for Holder<'a, 'b, T> { +// fn default() -> Self { +// Self { +// name: Default::default(), +// value: HolderState::EmptyKey, +// parent: Default::default(), +// iterator: Default::default(), +// filters: Default::default(), +// } +// } +//} +// +//pub struct Matcher<'a, 'b, T: PatternTypes> { +// defs: &'a PatternConstants, +// frame: Frame<'a, 'b, T>, +//} +// +//// TODO: +//// +//// [x] Arrow +//// [x] StringKey +//// [x] RegexKey +//// [x] KeySubtree +//// [x] ValueSubtree +//// [x] Ident +//// [x] Param (untested) +//// [x] ApplyPredicate +//// [x] End +// +///// Helper for `PatternElement::StringKey`. +//fn on_string_key<'a, 'b, T: PatternTypes>( +// matcher: &mut Matcher<'a, 'b, T>, +// id: usize, +// skippable: bool, +//) -> Result { +// let path = matcher.frame.path.last_mut().unwrap(); +// assert!(path.iterator.is_none()); +// let key = &matcher.defs.strings[id]; +// let iter = T::get(path.parent.unwrap(), RefOwn::Str(key)); +// match iter { +// Some(None) if !skippable => Err(MatchError::ValidationError), +// Some(opt) => { +// path.iterator = Some(Box::new(opt.into_iter())); +// Ok(true) +// } +// None => Err(MatchError::UnsupportedOperation), +// } +//} +// +///// Helper for `PatternElement::ParameterKey`. +//fn on_parameter_key<'a, 'b, T: PatternTypes>( +// matcher: &mut Matcher<'a, 'b, T>, +// id: usize, +// skippable: bool, +//) -> Result { +// let path = matcher.frame.path.last_mut().unwrap(); +// assert!(path.iterator.is_none()); +// let key = matcher.defs.defs[id]; +// let iter = T::get(path.parent.unwrap(), RefOwn::Own(key)); +// match iter { +// Some(None) if !skippable => Err(MatchError::ValidationError), +// Some(opt) => { +// path.iterator = Some(Box::new(opt.into_iter())); +// Ok(true) +// } +// None => Err(MatchError::UnsupportedOperation), +// } +//} +// +///// Helper for `PatternElement::RegexKey`. +//fn on_regex_key<'a, 'b, T: PatternTypes>( +// matcher: &mut Matcher<'a, 'b, T>, +// id: usize, +// skippable: bool, +//) -> Result { +// matcher.frame.path.last_mut().unwrap().ensure_iterator()?; +// let re = &matcher.defs.regices[id]; +// let path = matcher.frame.path.last_mut().unwrap(); +// path.filters.push(Box::new(move |value| { +// let s = T::as_str(value.key().unwrap()); +// match (s.map_or(false, |s| re.is_match(s)), skippable) { +// (true, _) => Ok(()), +// (false, true) => { +// value.clear(); +// Ok(()) +// }, +// (false, false) => Err(MatchError::ValidationError), +// } +// })); +// Ok(true) +//} +// +///// Helper for `PatternElement::KeySubtree`. +//fn on_key_subtree<'a, 'b, T: PatternTypes>( +// matcher: &mut Matcher<'a, 'b, T>, +// id: usize, +// skippable: bool, +//) -> Result { +// let _ = skippable; // FIXME what should a skippable KeySubtree even do?! +// matcher.frame.path.last_mut().unwrap().ensure_iterator()?; +// let defs = matcher.defs; +// let rlimit: usize = matcher.frame.depth; +// let path = matcher.frame.path.last_mut().unwrap(); +// assert!(path.value.is_empty()); +// assert_eq!(path.value.kind(), HolderKind::Key); +// path.value = HolderState::EmptyKeySubtree; +// path.filters.push(Box::new(move |value| { +// let key = value.key().unwrap(); +// let mut subtree = Matcher::new(key, defs, id, rlimit)?.peekable(); +// match subtree.peek() { +// Some(&Ok(_)) => { +// *value = HolderState::KeySubtree(subtree, value.pair().unwrap()); +// Ok(()) +// }, +// Some(&Err(ref e)) => { +// Err(e.clone()) +// }, +// None => { +// value.clear(); +// Ok(()) +// } +// } +// })); +// Ok(true) +//} +// +//const DUMMY_OPS: &'static [PatternElement] = &[]; +// +//impl<'a, 'b, T: PatternTypes> Matcher<'a, 'b, T> { +// pub(crate) fn new(obj: RefOwn<'b, T::Ref, T::Own>, defs: &'a PatternConstants, proto: usize, rlimit: usize) -> Result { +// let ops: &[_] = &defs.protos[proto]; +// Self::with_ops(obj, defs, ops, rlimit) +// } +// +// /// Constructs a Matcher that yields a single dummy result. +// fn with_ops(obj: RefOwn<'b, T::Ref, T::Own>, defs: &'a PatternConstants, ops: &'a [PatternElement], rlimit: usize) -> Result { +// let depth = rlimit.checked_sub(1).ok_or(MatchError::StackOverflow)?; +// Ok(Self { +// defs: defs, +// frame: Frame { +// //obj: obj, +// ops: ops, +// iar: None, +// depth: depth, +// path: { +// let mut holder = Holder::default(); +// holder.value = HolderState::Value(obj); +// holder.iterator = Some(Box::new(std::iter::empty())); +// vec![holder] +// }, +// in_key: false, +// }, +// }) +// } +// +// fn on_in_key(&mut self) -> Result { +// match self.frame.op() { +// PatternElement::End => { +// let path = self.frame.path.last_mut().unwrap(); +// if path.next()? { +// Ok(false) +// } else { +// drop(path); +// self.frame.path.pop().unwrap(); +// // stop at previous End, or start of frame +// while self.frame.prev() { +// if matches!(self.frame.op(), PatternElement::End) { +// break; +// } +// } +// // is start of frame? +// if !self.frame.prev() { +// self.frame.path.clear(); +// } +// Ok(true) +// } +// }, +// PatternElement::ApplyPredicate(id, skippable) => { +// // failing on T::get() is already handled, but we may need a +// // T::pairs(). construct it here. +// self.frame.path.last_mut().unwrap().ensure_iterator()?; +// let pred = &self.defs.predicates[id]; +// let path = self.frame.path.last_mut().unwrap(); +// path.filters.push(Box::new(move |value| { +// match (pred(value.value().unwrap()), skippable) { +// (true, _) => Ok(()), +// (false, true) => { +// value.clear(); +// Ok(()) +// }, +// (false, false) => Err(MatchError::ValidationError), +// } +// })); +// Ok(true) +// }, +// PatternElement::StringKey(id, skippable) => { +// on_string_key(self, id, skippable) +// }, +// PatternElement::ParameterKey(id, skippable) => { +// on_parameter_key(self, id, skippable) +// }, +// PatternElement::RegexKey(id, skippable) => { +// on_regex_key(self, id, skippable) +// }, +// PatternElement::KeySubtree(id, skippable) => { +// on_key_subtree(self, id, skippable) +// }, +// _ => unreachable!("on_in_key") +// } +// } +// +// fn on_not_in_key(&mut self) -> Result { +// match self.frame.op() { +// PatternElement::Arrow => { +// // this *should* always pass. +// assert!(self.frame.path.last().unwrap().iterator.is_some()); +// let mut holder = Holder::default(); +// holder.parent = self.frame.path.last().unwrap().value.value(); +// assert!(holder.parent.is_some()); +// self.frame.path.push(holder); +// Ok(false) +// }, +// PatternElement::Identifier(id) => { +// let name = self.defs.strings.get(id).map(|s| &**s); +// let path = self.frame.path.last_mut().unwrap(); +// path.name = name; +// assert!(path.iterator.is_none()); +// // we don't actually create the iterator here, +// // as we may still wanna use T::get() instead. +// Ok(true) +// }, +// PatternElement::ApplyPredicate(id, skippable) => { +// assert!(self.frame.path.len() == 1); +// let pred = &self.defs.predicates[id]; +// let value = self.frame.path.last().unwrap().value.value(); +// match (pred(value.unwrap()), skippable) { +// (true, _) => Ok(false), +// (false, true) => { +// self.frame.path.clear(); +// // any Ok(_) will do +// Ok(false) +// }, +// (false, false) => Err(MatchError::ValidationError), +// } +// }, +// PatternElement::StringKey(id, skippable) => { +// on_string_key(self, id, skippable) +// }, +// PatternElement::ParameterKey(id, skippable) => { +// on_parameter_key(self, id, skippable) +// }, +// PatternElement::RegexKey(id, skippable) => { +// on_regex_key(self, id, skippable) +// }, +// PatternElement::KeySubtree(id, skippable) => { +// on_key_subtree(self, id, skippable) +// }, +// PatternElement::ValueSubtree(id, skippable) => { +// let value = self.frame.path.last().unwrap().value.value().unwrap(); +// let mut subtree = Matcher::new( +// value, +// self.defs, +// id, +// self.frame.depth +// )?.peekable(); +// let mut dummy = Matcher::with_ops( +// value, +// self.defs, +// DUMMY_OPS, +// self.frame.depth +// )?.peekable(); +// // may panic. +// let peeked = subtree.peek(); +// // shouldn't panic. +// let _ = dummy.peek(); +// // push Holder after peek. +// self.frame.path.push(Holder::default()); +// let mut holder = self.frame.path.last_mut().unwrap(); +// holder.parent = Some(value); +// holder.iterator = Some(Box::new(std::iter::empty())); +// match peeked { +// None if skippable => { +// holder.value = HolderState::ValueSubtree(dummy, value); +// Ok(true) +// }, +// Some(&Ok(_)) | None => { +// drop(peeked); +// holder.value = HolderState::ValueSubtree(subtree, value); +// Ok(true) +// }, +// Some(&Err(ref e)) => { +// Err(e.clone()) +// }, +// } +// }, +// _ => unreachable!("on_not_in_key") +// } +// } +// +// fn collect_results(&mut self) -> Matches<'a, 'b, T> { +// let mut res: Matches<'a, 'b, T> = Default::default(); +// for holder in &mut self.frame.path { +// // make sure it's not empty. +// assert!(holder.value.has_value()); +// // handle subtrees. +// if let Some(matcher) = holder.value.subtree() { +// if let Some(matches) = matcher.next() { +// // NOTE: we have checked these already. +// // (and if we haven't, that's a bug.) +// res.extend(matches.unwrap()); +// } +// } +// // handle pairs. +// if let Some(pair) = holder.value.pair() { +// if let Some(name) = holder.name { +// res.insert(name, pair); +// } +// } +// } +// res +// } +// +// fn on_end(&mut self) -> (bool, Matches<'a, 'b, T>) { +// match self.frame.op() { +// PatternElement::End => { +// assert!(!self.frame.path.last().expect("path").value.is_empty()); +// let res = self.collect_results(); +// if !self.frame.prev() { +// // NOTE: frame.prev() must always be called, even if this +// // gets replaced with debug_assert!() in the future. +// assert!(false, "frame.prev()"); +// } +// (true, res) +// } +// PatternElement::ApplyPredicate {..} => { +// assert!(!self.frame.in_key); +// let res = self.collect_results(); +// self.frame.path.clear(); +// (false, res) +// } +// _ => unreachable!("on_end") +// } +// } +//} +// +//impl<'a, 'b, T: PatternTypes> Iterator for Matcher<'a, 'b, T> { +// type Item = Result>, MatchError>; +// +// fn next(&mut self) -> Option { +// if self.frame.ops.is_empty() { +// if !self.frame.path.is_empty() { +// self.frame.path.clear(); +// return Some(Ok(Default::default())); +// } +// } +// while !self.frame.path.is_empty() { +// if !self.frame.next() { +// let (in_key, res) = self.on_end(); +// self.frame.in_key = in_key; +// return Some(Ok(res)); +// } else { +// let in_key = if self.frame.in_key { +// self.on_in_key() +// } else { +// self.on_not_in_key() +// }; +// match in_key { +// Ok(in_key) => self.frame.in_key = in_key, +// Err(e) => { +// self.frame.path.clear(); +// return Some(Err(e)) +// }, +// } +// } +// } +// None +// } //} - -impl<'a, 'b, T: PatternTypes> HolderState<'a, 'b, T> { - #[rustfmt::skip] - fn is_empty(&self) -> bool { - match self { - | HolderState::EmptyKey - | HolderState::EmptyKeySubtree - //| HolderState::EmptyValueSubtree - => true, _ => false - } - } - - fn has_value(&self) -> bool { - !self.is_empty() - } - - fn kind(&self) -> HolderKind { - match self { - | HolderState::EmptyKey - | HolderState::Key(_) - => HolderKind::Key, - | HolderState::EmptyKeySubtree - | HolderState::KeySubtree(_, _) - => HolderKind::KeySubtree, - //| HolderState::EmptyValueSubtree - | HolderState::ValueSubtree(_, _) - => HolderKind::ValueSubtree, - | HolderState::Value(_) - => HolderKind::Value, - } - } - - fn value(&self) -> Option> { - match *self { - HolderState::Key((_, value)) => Some(value), - HolderState::KeySubtree(_, (_, value)) => Some(value), - HolderState::ValueSubtree(_, value) => Some(value), - HolderState::Value(value) => Some(value), - _ => None - } - } - - fn key(&self) -> Option> { - match *self { - HolderState::Key((key, _)) => Some(key), - HolderState::KeySubtree(_, (key, _)) => Some(key), - _ => None - } - } - - fn pair(&self) -> Option> { - match *self { - HolderState::Key(pair) => Some(pair), - HolderState::KeySubtree(_, pair) => Some(pair), - _ => None - } - } - - fn subtree(&mut self) -> Option<&mut Peekable>> { - match *self { - HolderState::KeySubtree(ref mut subtree, _) => Some(subtree), - HolderState::ValueSubtree(ref mut subtree, _) => Some(subtree), - _ => None - } - } - - fn clear(&mut self) { - *self = match self.kind() { - HolderKind::Key => HolderState::EmptyKey, - HolderKind::KeySubtree => HolderState::EmptyKeySubtree, - HolderKind::ValueSubtree => unreachable!(), //HolderState::EmptyValueSubtree, - HolderKind::Value => unreachable!(), - }; - assert!(self.is_empty()); - } -} - -/// Stores a single match and associated metadata. -/// -/// A single match is generally a key-value pair, but may be a collection of -/// named pairs in the case of subtree matches, or just a value for the initial -/// holder. -struct Holder<'a, 'b, T: PatternTypes> { - name: Option<&'a str>, - value: HolderState<'a, 'b, T>, - parent: Option>, - iterator: Option> + 'b>>, - filters: Vec Fn(&'c mut HolderState<'a, 'b, T>) -> Result<(), MatchError>) + 'a>>, -} - -impl<'a, 'b, T: PatternTypes> Holder<'a, 'b, T> { - fn next(&mut self) -> Result { - self.ensure_iterator()?; - if let Self { - value: ref mut v, - iterator: Some(ref mut it), - ref filters, - .. - } = self { - // check if we're in a subtree and (not) done. - if let Some(matcher) = v.subtree() { - if let Some(res) = matcher.peek() { - // report any errors - return res.as_ref().map(|_| true).map_err(|e| e.clone()); - } - } - let kind = v.kind(); - let mut next_v; - loop { - next_v = match it.next() { - Some(pair) => HolderState::Key(pair), - None => return Ok(false) - }; - for filter in filters { - filter(&mut next_v)?; - if next_v.is_empty() { - break; - } - } - if next_v.has_value() { - break; - } - } - assert!(next_v.has_value()); - assert_eq!(next_v.kind(), kind); - *v = next_v; - Ok(true) - } else { - unreachable!() - } - } - - /// Ensure `self.iterator.is_some()`, creating an iterator if necessary. - fn ensure_iterator(&mut self) -> Result<(), MatchError> { - if self.iterator.is_none() { - let iter = T::pairs(self.parent.unwrap()); - if iter.is_none() { - return Err(MatchError::UnsupportedOperation); - } - self.iterator = iter; - } - assert!(self.iterator.is_some()); - Ok(()) - } -} - -impl<'a, 'b, T: PatternTypes> Default for Holder<'a, 'b, T> { - fn default() -> Self { - Self { - name: Default::default(), - value: HolderState::EmptyKey, - parent: Default::default(), - iterator: Default::default(), - filters: Default::default(), - } - } -} - -pub struct Matcher<'a, 'b, T: PatternTypes> { - defs: &'a PatternConstants, - frame: Frame<'a, 'b, T>, -} - -// TODO: -// -// [x] Arrow -// [x] StringKey -// [x] RegexKey -// [x] KeySubtree -// [x] ValueSubtree -// [x] Ident -// [x] Param (untested) -// [x] ApplyPredicate -// [x] End - -/// Helper for `PatternElement::StringKey`. -fn on_string_key<'a, 'b, T: PatternTypes>( - matcher: &mut Matcher<'a, 'b, T>, - id: usize, - skippable: bool, -) -> Result { - let path = matcher.frame.path.last_mut().unwrap(); - assert!(path.iterator.is_none()); - let key = &matcher.defs.strings[id]; - let iter = T::get(path.parent.unwrap(), RefOwn::Str(key)); - match iter { - Some(None) if !skippable => Err(MatchError::ValidationError), - Some(opt) => { - path.iterator = Some(Box::new(opt.into_iter())); - Ok(true) - } - None => Err(MatchError::UnsupportedOperation), - } -} - -/// Helper for `PatternElement::ParameterKey`. -fn on_parameter_key<'a, 'b, T: PatternTypes>( - matcher: &mut Matcher<'a, 'b, T>, - id: usize, - skippable: bool, -) -> Result { - let path = matcher.frame.path.last_mut().unwrap(); - assert!(path.iterator.is_none()); - let key = matcher.defs.defs[id]; - let iter = T::get(path.parent.unwrap(), RefOwn::Own(key)); - match iter { - Some(None) if !skippable => Err(MatchError::ValidationError), - Some(opt) => { - path.iterator = Some(Box::new(opt.into_iter())); - Ok(true) - } - None => Err(MatchError::UnsupportedOperation), - } -} - -/// Helper for `PatternElement::RegexKey`. -fn on_regex_key<'a, 'b, T: PatternTypes>( - matcher: &mut Matcher<'a, 'b, T>, - id: usize, - skippable: bool, -) -> Result { - matcher.frame.path.last_mut().unwrap().ensure_iterator()?; - let re = &matcher.defs.regices[id]; - let path = matcher.frame.path.last_mut().unwrap(); - path.filters.push(Box::new(move |value| { - let s = T::as_str(value.key().unwrap()); - match (s.map_or(false, |s| re.is_match(s)), skippable) { - (true, _) => Ok(()), - (false, true) => { - value.clear(); - Ok(()) - }, - (false, false) => Err(MatchError::ValidationError), - } - })); - Ok(true) -} - -/// Helper for `PatternElement::KeySubtree`. -fn on_key_subtree<'a, 'b, T: PatternTypes>( - matcher: &mut Matcher<'a, 'b, T>, - id: usize, - skippable: bool, -) -> Result { - let _ = skippable; // FIXME what should a skippable KeySubtree even do?! - matcher.frame.path.last_mut().unwrap().ensure_iterator()?; - let defs = matcher.defs; - let rlimit: usize = matcher.frame.depth; - let path = matcher.frame.path.last_mut().unwrap(); - assert!(path.value.is_empty()); - assert_eq!(path.value.kind(), HolderKind::Key); - path.value = HolderState::EmptyKeySubtree; - path.filters.push(Box::new(move |value| { - let key = value.key().unwrap(); - let mut subtree = Matcher::new(key, defs, id, rlimit)?.peekable(); - match subtree.peek() { - Some(&Ok(_)) => { - *value = HolderState::KeySubtree(subtree, value.pair().unwrap()); - Ok(()) - }, - Some(&Err(ref e)) => { - Err(e.clone()) - }, - None => { - value.clear(); - Ok(()) - } - } - })); - Ok(true) -} - -const DUMMY_OPS: &'static [PatternElement] = &[]; - -impl<'a, 'b, T: PatternTypes> Matcher<'a, 'b, T> { - pub(crate) fn new(obj: RefOwn<'b, T::Ref, T::Own>, defs: &'a PatternConstants, proto: usize, rlimit: usize) -> Result { - let ops: &[_] = &defs.protos[proto]; - Self::with_ops(obj, defs, ops, rlimit) - } - - /// Constructs a Matcher that yields a single dummy result. - fn with_ops(obj: RefOwn<'b, T::Ref, T::Own>, defs: &'a PatternConstants, ops: &'a [PatternElement], rlimit: usize) -> Result { - let depth = rlimit.checked_sub(1).ok_or(MatchError::StackOverflow)?; - Ok(Self { - defs: defs, - frame: Frame { - //obj: obj, - ops: ops, - iar: None, - depth: depth, - path: { - let mut holder = Holder::default(); - holder.value = HolderState::Value(obj); - holder.iterator = Some(Box::new(std::iter::empty())); - vec![holder] - }, - in_key: false, - }, - }) - } - - fn on_in_key(&mut self) -> Result { - match self.frame.op() { - PatternElement::End => { - let path = self.frame.path.last_mut().unwrap(); - if path.next()? { - Ok(false) - } else { - drop(path); - self.frame.path.pop().unwrap(); - // stop at previous End, or start of frame - while self.frame.prev() { - if matches!(self.frame.op(), PatternElement::End) { - break; - } - } - // is start of frame? - if !self.frame.prev() { - self.frame.path.clear(); - } - Ok(true) - } - }, - PatternElement::ApplyPredicate(id, skippable) => { - // failing on T::get() is already handled, but we may need a - // T::pairs(). construct it here. - self.frame.path.last_mut().unwrap().ensure_iterator()?; - let pred = &self.defs.predicates[id]; - let path = self.frame.path.last_mut().unwrap(); - path.filters.push(Box::new(move |value| { - match (pred(value.value().unwrap()), skippable) { - (true, _) => Ok(()), - (false, true) => { - value.clear(); - Ok(()) - }, - (false, false) => Err(MatchError::ValidationError), - } - })); - Ok(true) - }, - PatternElement::StringKey(id, skippable) => { - on_string_key(self, id, skippable) - }, - PatternElement::ParameterKey(id, skippable) => { - on_parameter_key(self, id, skippable) - }, - PatternElement::RegexKey(id, skippable) => { - on_regex_key(self, id, skippable) - }, - PatternElement::KeySubtree(id, skippable) => { - on_key_subtree(self, id, skippable) - }, - _ => unreachable!("on_in_key") - } - } - - fn on_not_in_key(&mut self) -> Result { - match self.frame.op() { - PatternElement::Arrow => { - // this *should* always pass. - assert!(self.frame.path.last().unwrap().iterator.is_some()); - let mut holder = Holder::default(); - holder.parent = self.frame.path.last().unwrap().value.value(); - assert!(holder.parent.is_some()); - self.frame.path.push(holder); - Ok(false) - }, - PatternElement::Identifier(id) => { - let name = self.defs.strings.get(id).map(|s| &**s); - let path = self.frame.path.last_mut().unwrap(); - path.name = name; - assert!(path.iterator.is_none()); - // we don't actually create the iterator here, - // as we may still wanna use T::get() instead. - Ok(true) - }, - PatternElement::ApplyPredicate(id, skippable) => { - assert!(self.frame.path.len() == 1); - let pred = &self.defs.predicates[id]; - let value = self.frame.path.last().unwrap().value.value(); - match (pred(value.unwrap()), skippable) { - (true, _) => Ok(false), - (false, true) => { - self.frame.path.clear(); - // any Ok(_) will do - Ok(false) - }, - (false, false) => Err(MatchError::ValidationError), - } - }, - PatternElement::StringKey(id, skippable) => { - on_string_key(self, id, skippable) - }, - PatternElement::ParameterKey(id, skippable) => { - on_parameter_key(self, id, skippable) - }, - PatternElement::RegexKey(id, skippable) => { - on_regex_key(self, id, skippable) - }, - PatternElement::KeySubtree(id, skippable) => { - on_key_subtree(self, id, skippable) - }, - PatternElement::ValueSubtree(id, skippable) => { - let value = self.frame.path.last().unwrap().value.value().unwrap(); - let mut subtree = Matcher::new( - value, - self.defs, - id, - self.frame.depth - )?.peekable(); - let mut dummy = Matcher::with_ops( - value, - self.defs, - DUMMY_OPS, - self.frame.depth - )?.peekable(); - // may panic. - let peeked = subtree.peek(); - // shouldn't panic. - let _ = dummy.peek(); - // push Holder after peek. - self.frame.path.push(Holder::default()); - let mut holder = self.frame.path.last_mut().unwrap(); - holder.parent = Some(value); - holder.iterator = Some(Box::new(std::iter::empty())); - match peeked { - None if skippable => { - holder.value = HolderState::ValueSubtree(dummy, value); - Ok(true) - }, - Some(&Ok(_)) | None => { - drop(peeked); - holder.value = HolderState::ValueSubtree(subtree, value); - Ok(true) - }, - Some(&Err(ref e)) => { - Err(e.clone()) - }, - } - }, - _ => unreachable!("on_not_in_key") - } - } - - fn collect_results(&mut self) -> Matches<'a, 'b, T> { - let mut res: Matches<'a, 'b, T> = Default::default(); - for holder in &mut self.frame.path { - // make sure it's not empty. - assert!(holder.value.has_value()); - // handle subtrees. - if let Some(matcher) = holder.value.subtree() { - if let Some(matches) = matcher.next() { - // NOTE: we have checked these already. - // (and if we haven't, that's a bug.) - res.extend(matches.unwrap()); - } - } - // handle pairs. - if let Some(pair) = holder.value.pair() { - if let Some(name) = holder.name { - res.insert(name, pair); - } - } - } - res - } - - fn on_end(&mut self) -> (bool, Matches<'a, 'b, T>) { - match self.frame.op() { - PatternElement::End => { - assert!(!self.frame.path.last().expect("path").value.is_empty()); - let res = self.collect_results(); - if !self.frame.prev() { - // NOTE: frame.prev() must always be called, even if this - // gets replaced with debug_assert!() in the future. - assert!(false, "frame.prev()"); - } - (true, res) - } - PatternElement::ApplyPredicate {..} => { - assert!(!self.frame.in_key); - let res = self.collect_results(); - self.frame.path.clear(); - (false, res) - } - _ => unreachable!("on_end") - } - } -} - -impl<'a, 'b, T: PatternTypes> Iterator for Matcher<'a, 'b, T> { - type Item = Result>, MatchError>; - - fn next(&mut self) -> Option { - if self.frame.ops.is_empty() { - if !self.frame.path.is_empty() { - self.frame.path.clear(); - return Some(Ok(Default::default())); - } - } - while !self.frame.path.is_empty() { - if !self.frame.next() { - let (in_key, res) = self.on_end(); - self.frame.in_key = in_key; - return Some(Ok(res)); - } else { - let in_key = if self.frame.in_key { - self.on_in_key() - } else { - self.on_not_in_key() - }; - match in_key { - Ok(in_key) => self.frame.in_key = in_key, - Err(e) => { - self.frame.path.clear(); - return Some(Err(e)) - }, - } - } - } - None - } -} -- cgit 1.4.1