From 81a1d3ca72d9f28605bd22687ab20cb61a4ceb1b Mon Sep 17 00:00:00 2001 From: SoniEx2 Date: Sat, 13 Feb 2021 00:33:52 -0300 Subject: Complete the VM, for the most part Some things are still untested. Needs more tests. --- src/lib.rs | 5 +- src/parser.rs | 15 ++- src/vm.rs | 397 ++++++++++++++++++++++++++++++++++++++++------------------ 3 files changed, 290 insertions(+), 127 deletions(-) (limited to 'src') diff --git a/src/lib.rs b/src/lib.rs index cbd32b2..099b268 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -18,7 +18,10 @@ #![warn(rust_2018_idioms)] #![cfg_attr(not(feature = "stable"), feature(label_break_value))] -extern crate boolinator; +//! Datafu is a more-or-less simple query language of sorts. It was primarily +//! designed for dealing with object trees parsed from configuration files, +//! but can also be used with JSON APIs and whatnot. + extern crate regex; #[cfg(test)] diff --git a/src/parser.rs b/src/parser.rs index eb54c86..8f8c200 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -20,7 +20,6 @@ use std::borrow::Borrow; use std::collections::BTreeMap; use std::mem::ManuallyDrop; -use boolinator::Boolinator; use regex::Regex; use crate::PatternTypes; @@ -88,7 +87,7 @@ impl<'r, 's, P: Borrow + Ord, O: Borrow + Ord, T: PatternTypes> Subtre } } - fn commit(mut self) -> usize { + fn commit(self) -> usize { let mut self_ = ManuallyDrop::new(self); let proto = self_.root.consts.protos.pop().unwrap(); let id = self_.root.closed_subtrees.next().unwrap(); @@ -132,7 +131,7 @@ impl<'r, 's, P: Borrow + Ord, O: Borrow + Ord, T: PatternTypes> TagHel } } - fn commit(mut self) { + fn commit(self) { std::mem::forget(self) } } @@ -439,7 +438,7 @@ impl<'s, P: Borrow + Ord, O: Borrow + Ord, T: PatternTypes> Parser<'s, Ok, )?; let proto = self.consts.protos.last_mut().expect("protos"); - proto.push(PatternElement::ApplyPredicate(id, skippable, std::marker::PhantomData)); + proto.push(PatternElement::ApplyPredicate(id, skippable)); self.sp(&mut cursor); *s = cursor; true @@ -582,8 +581,9 @@ pub(crate) fn parse<'s, P, O, T>( }; let mut parsed = input; - parser.pattern(&mut parsed)?.expect(""); + let matched = parser.pattern(&mut parsed)?; + assert!(matched); assert_eq!(parsed, ""); assert_eq!(parser.closed_subtrees.next().unwrap(), parser.consts.protos.len()); @@ -607,6 +607,7 @@ mod tests { fn pairs<'b>( item: RefOwn<'b, Self::Ref, Self::Own> ) -> Option> + 'b>> { + let _ = item; None } @@ -614,6 +615,8 @@ mod tests { item: RefOwn<'b, Self::Ref, Self::Own>, key: RefOwn<'a, Self::Ref, Self::Own> ) -> Option>> { + let _ = item; + let _ = key; None } @@ -621,6 +624,8 @@ mod tests { left: RefOwn<'_, Self::Ref, Self::Own>, right: RefOwn<'_, Self::Ref, Self::Own> ) -> bool { + let _ = left; + let _ = right; false } diff --git a/src/vm.rs b/src/vm.rs index 62fb074..dd48752 100644 --- a/src/vm.rs +++ b/src/vm.rs @@ -17,7 +17,7 @@ */ use std::collections::BTreeMap; -use std::marker::PhantomData; +use std::iter::Peekable; use regex::Regex; @@ -35,7 +35,7 @@ type Matches<'a, 'b, T> = BTreeMap<&'a str, KVPair<'b, T>>; /// The constant pool for a pattern. pub(crate) struct PatternConstants { // last proto is implicitly the whole pattern. - pub(crate) protos: Vec>>, + pub(crate) protos: Vec>, // Note that we can borrow these when creating the output map. // https://play.rust-lang.org/?version=stable&mode=debug&edition=2018&gist=da26f9175e96273fa0b94971a4e6172f pub(crate) strings: Vec, @@ -60,7 +60,8 @@ impl Default for PatternConstants { } /// A pattern element. -pub(crate) enum PatternElement { +#[derive(Copy, Clone)] +pub(crate) enum PatternElement { Arrow, Identifier(usize), @@ -70,20 +71,14 @@ pub(crate) enum PatternElement { ParameterKey(usize, bool), KeySubtree(usize, bool), ValueSubtree(usize, bool), - ApplyPredicate(usize, bool, PhantomData) -> &Predicate>), + ApplyPredicate(usize, bool), End } -impl Copy for PatternElement {} - -impl Clone for PatternElement { - fn clone(&self) -> Self { *self } -} - struct Frame<'a, 'b, T: PatternTypes> { - obj: RefOwn<'b, T::Ref, T::Own>, - ops: &'a Vec>, + //obj: RefOwn<'b, T::Ref, T::Own>, + ops: &'a [PatternElement], iar: Option, depth: usize, path: Vec>, @@ -105,7 +100,7 @@ impl<'a, 'b, T: PatternTypes> Frame<'a, 'b, T> { } /// Returns the current instruction. - fn op(&self) -> PatternElement { + fn op(&self) -> PatternElement { self.ops[self.iar.expect("ops[iar]")] } @@ -127,47 +122,82 @@ impl<'a, 'b, T: PatternTypes> Frame<'a, 'b, T> { /// /// See also Holder. enum HolderState<'a, 'b, T: PatternTypes> { - /// Empty holder, for KV pair. + /// Empty holder, for a key-value pair. EmptyKey, - /// Empty holder, for subtree. - EmptySubtree, - /// Non-empty KV pair. + /// Empty holder, for a Matcher and a key-value pair. + EmptyKeySubtree, + // /// Empty holder, for a Matcher and a value. + // EmptyValueSubtree, + /// Occupied holder, for a key-value pair.. Key(KVPair<'b, T>), - /// Non-empty subtree. - Subtree(Matches<'a, 'b, T>, RefOwn<'b, T::Ref, T::Own>), - /// Preloaded value. Usually the first holder in a frame. + /// Occupied holder, for a Matcher and a key-value pair. + KeySubtree(Peekable>, KVPair<'b, T>), + /// Occupied holder, for a Matcher and a value. The empty variant is + /// omitted as it would never be used otherwise. + ValueSubtree(Peekable>, RefOwn<'b, T::Ref, T::Own>), + /// Occupied holder, for a value. The empty variant is omitted as it would + /// never be used otherwise. Value(RefOwn<'b, T::Ref, T::Own>), } -impl<'a, 'b, T: PatternTypes> Clone for HolderState<'a, 'b, T> { - fn clone(&self) -> Self { - match self { - HolderState::EmptyKey => HolderState::EmptyKey, - HolderState::EmptySubtree => HolderState::EmptySubtree, - HolderState::Key(v) => HolderState::Key(*v), - HolderState::Subtree(m, v) => HolderState::Subtree(m.clone(), *v), - HolderState::Value(v) => HolderState::Value(*v), - } - } +/// Helper enum for HolderState. +#[derive(Copy, Clone, Debug, Eq, PartialEq)] +enum HolderKind { + Key, + KeySubtree, + ValueSubtree, + Value } +//impl<'a, 'b, T: PatternTypes> Clone for HolderState<'a, 'b, T> { +// fn clone(&self) -> Self { +// match self { +// HolderState::EmptyKey => HolderState::EmptyKey, +// HolderState::EmptySubtree => HolderState::EmptySubtree, +// HolderState::Key(v) => HolderState::Key(*v), +// HolderState::KeySubtree(m, v) => HolderState::KeySubtree(m.clone(), *v), +// HolderState::ValueSubtree(m, v) => HolderState::ValueSubtree(m.clone(), *v), +// HolderState::Value(v) => HolderState::Value(*v), +// } +// } +//} + impl<'a, 'b, T: PatternTypes> HolderState<'a, 'b, T> { + #[rustfmt::skip] fn is_empty(&self) -> bool { - matches!(self, HolderState::EmptyKey | HolderState::EmptySubtree) + match self { + | HolderState::EmptyKey + | HolderState::EmptyKeySubtree + //| HolderState::EmptyValueSubtree + => true, _ => false + } } fn has_value(&self) -> bool { !self.is_empty() } - fn is_subtree(&self) -> bool { - matches!(self, HolderState::EmptySubtree | HolderState::Subtree {..}) + fn kind(&self) -> HolderKind { + match self { + | HolderState::EmptyKey + | HolderState::Key(_) + => HolderKind::Key, + | HolderState::EmptyKeySubtree + | HolderState::KeySubtree(_, _) + => HolderKind::KeySubtree, + //| HolderState::EmptyValueSubtree + | HolderState::ValueSubtree(_, _) + => HolderKind::ValueSubtree, + | HolderState::Value(_) + => HolderKind::Value, + } } fn value(&self) -> Option> { match *self { HolderState::Key((_, value)) => Some(value), - HolderState::Subtree(_, value) => Some(value), + HolderState::KeySubtree(_, (_, value)) => Some(value), + HolderState::ValueSubtree(_, value) => Some(value), HolderState::Value(value) => Some(value), _ => None } @@ -176,16 +206,33 @@ impl<'a, 'b, T: PatternTypes> HolderState<'a, 'b, T> { fn key(&self) -> Option> { match *self { HolderState::Key((key, _)) => Some(key), + HolderState::KeySubtree(_, (key, _)) => Some(key), _ => None } } - fn clear(&mut self) { + fn pair(&self) -> Option> { + match *self { + HolderState::Key(pair) => Some(pair), + HolderState::KeySubtree(_, pair) => Some(pair), + _ => None + } + } + + fn subtree(&mut self) -> Option<&mut Peekable>> { match *self { - HolderState::Key(_) => *self = HolderState::EmptyKey, - HolderState::Subtree(_, _) => *self = HolderState::EmptySubtree, - HolderState::Value(_) => unreachable!(), - _ => {}, + HolderState::KeySubtree(ref mut subtree, _) => Some(subtree), + HolderState::ValueSubtree(ref mut subtree, _) => Some(subtree), + _ => None + } + } + + fn clear(&mut self) { + *self = match self.kind() { + HolderKind::Key => HolderState::EmptyKey, + HolderKind::KeySubtree => HolderState::EmptyKeySubtree, + HolderKind::ValueSubtree => unreachable!(), //HolderState::EmptyValueSubtree, + HolderKind::Value => unreachable!(), }; assert!(self.is_empty()); } @@ -207,36 +254,42 @@ struct Holder<'a, 'b, T: PatternTypes> { impl<'a, 'b, T: PatternTypes> Holder<'a, 'b, T> { fn next(&mut self) -> Result { self.ensure_iterator()?; - match self { - Self { - value: ref mut v, - iterator: Some(ref mut it), - ref filters, - .. - } => { - let is_subtree = v.is_subtree(); - let mut next_v; - loop { - next_v = match it.next() { - Some(pair) => HolderState::Key(pair), - None => return Ok(false) - }; - for filter in filters { - filter(&mut next_v)?; - if next_v.is_empty() { - break; - } - } - if next_v.has_value() { + if let Self { + value: ref mut v, + iterator: Some(ref mut it), + ref filters, + .. + } = self { + // check if we're in a subtree and (not) done. + if let Some(matcher) = v.subtree() { + if let Some(res) = matcher.peek() { + // report any errors + return res.as_ref().map(|_| true).map_err(|e| e.clone()); + } + } + let kind = v.kind(); + let mut next_v; + loop { + next_v = match it.next() { + Some(pair) => HolderState::Key(pair), + None => return Ok(false) + }; + for filter in filters { + filter(&mut next_v)?; + if next_v.is_empty() { break; } } - assert!(next_v.has_value()); - assert!(next_v.is_subtree() == is_subtree); - *v = next_v; - Ok(true) - }, - _ => unreachable!() + if next_v.has_value() { + break; + } + } + assert!(next_v.has_value()); + assert_eq!(next_v.kind(), kind); + *v = next_v; + Ok(true) + } else { + unreachable!() } } @@ -276,10 +329,10 @@ pub struct Matcher<'a, 'b, T: PatternTypes> { // [x] Arrow // [x] StringKey // [x] RegexKey -// [ ] KeySubtree -// [ ] ValueSubtree +// [x] KeySubtree +// [x] ValueSubtree // [x] Ident -// [ ] Param +// [x] Param (untested) // [x] ApplyPredicate // [x] End @@ -294,11 +347,32 @@ fn on_string_key<'a, 'b, T: PatternTypes>( let key = &matcher.defs.strings[id]; let iter = T::get(path.parent.unwrap(), RefOwn::Str(key)); match iter { + Some(None) if !skippable => Err(MatchError::ValidationError), + Some(opt) => { + path.iterator = Some(Box::new(opt.into_iter())); + Ok(true) + } None => Err(MatchError::UnsupportedOperation), + } +} + +/// Helper for `PatternElement::ParameterKey`. +fn on_parameter_key<'a, 'b, T: PatternTypes>( + matcher: &mut Matcher<'a, 'b, T>, + id: usize, + skippable: bool, +) -> Result { + let path = matcher.frame.path.last_mut().unwrap(); + assert!(path.iterator.is_none()); + let key = matcher.defs.defs[id]; + let iter = T::get(path.parent.unwrap(), RefOwn::Own(key)); + match iter { + Some(None) if !skippable => Err(MatchError::ValidationError), Some(opt) => { path.iterator = Some(Box::new(opt.into_iter())); Ok(true) } + None => Err(MatchError::UnsupportedOperation), } } @@ -325,21 +399,59 @@ fn on_regex_key<'a, 'b, T: PatternTypes>( Ok(true) } +/// Helper for `PatternElement::KeySubtree`. +fn on_key_subtree<'a, 'b, T: PatternTypes>( + matcher: &mut Matcher<'a, 'b, T>, + id: usize, + skippable: bool, +) -> Result { + let _ = skippable; // FIXME what should a skippable KeySubtree even do?! + matcher.frame.path.last_mut().unwrap().ensure_iterator()?; + let defs = matcher.defs; + let rlimit: usize = matcher.frame.depth; + let path = matcher.frame.path.last_mut().unwrap(); + assert!(path.value.is_empty()); + assert_eq!(path.value.kind(), HolderKind::Key); + path.value = HolderState::EmptyKeySubtree; + path.filters.push(Box::new(move |value| { + let key = value.key().unwrap(); + let mut subtree = Matcher::new(key, defs, id, rlimit)?.peekable(); + match subtree.peek() { + Some(&Ok(_)) => { + *value = HolderState::KeySubtree(subtree, value.pair().unwrap()); + Ok(()) + }, + Some(&Err(ref e)) => { + Err(e.clone()) + }, + None => { + value.clear(); + Ok(()) + } + } + })); + Ok(true) +} + +const DUMMY_OPS: &'static [PatternElement] = &[]; + impl<'a, 'b, T: PatternTypes> Matcher<'a, 'b, T> { pub(crate) fn new(obj: RefOwn<'b, T::Ref, T::Own>, defs: &'a PatternConstants, proto: usize, rlimit: usize) -> Result { + let ops: &[_] = &defs.protos[proto]; + Self::with_ops(obj, defs, ops, rlimit) + } + + /// Constructs a Matcher that yields a single dummy result. + fn with_ops(obj: RefOwn<'b, T::Ref, T::Own>, defs: &'a PatternConstants, ops: &'a [PatternElement], rlimit: usize) -> Result { let depth = rlimit.checked_sub(1).ok_or(MatchError::StackOverflow)?; - let ops: &Vec<_> = &defs.protos[proto]; Ok(Self { defs: defs, frame: Frame { - obj: obj, + //obj: obj, ops: ops, iar: None, depth: depth, - path: if ops.is_empty() { - // TODO decide on empty matcher behaviour - todo!() - } else { + path: { let mut holder = Holder::default(); holder.value = HolderState::Value(obj); holder.iterator = Some(Box::new(std::iter::empty())); @@ -372,7 +484,7 @@ impl<'a, 'b, T: PatternTypes> Matcher<'a, 'b, T> { Ok(true) } }, - PatternElement::ApplyPredicate(id, skippable, _) => { + PatternElement::ApplyPredicate(id, skippable) => { // failing on T::get() is already handled, but we may need a // T::pairs(). construct it here. self.frame.path.last_mut().unwrap().ensure_iterator()?; @@ -393,9 +505,15 @@ impl<'a, 'b, T: PatternTypes> Matcher<'a, 'b, T> { PatternElement::StringKey(id, skippable) => { on_string_key(self, id, skippable) }, + PatternElement::ParameterKey(id, skippable) => { + on_parameter_key(self, id, skippable) + }, PatternElement::RegexKey(id, skippable) => { on_regex_key(self, id, skippable) }, + PatternElement::KeySubtree(id, skippable) => { + on_key_subtree(self, id, skippable) + }, _ => unreachable!("on_in_key") } } @@ -420,7 +538,7 @@ impl<'a, 'b, T: PatternTypes> Matcher<'a, 'b, T> { // as we may still wanna use T::get() instead. Ok(true) }, - PatternElement::ApplyPredicate(id, skippable, _) => { + PatternElement::ApplyPredicate(id, skippable) => { assert!(self.frame.path.len() == 1); let pred = &self.defs.predicates[id]; let value = self.frame.path.last().unwrap().value.value(); @@ -437,60 +555,97 @@ impl<'a, 'b, T: PatternTypes> Matcher<'a, 'b, T> { PatternElement::StringKey(id, skippable) => { on_string_key(self, id, skippable) }, + PatternElement::ParameterKey(id, skippable) => { + on_parameter_key(self, id, skippable) + }, PatternElement::RegexKey(id, skippable) => { on_regex_key(self, id, skippable) }, + PatternElement::KeySubtree(id, skippable) => { + on_key_subtree(self, id, skippable) + }, + PatternElement::ValueSubtree(id, skippable) => { + let value = self.frame.path.last().unwrap().value.value().unwrap(); + let mut subtree = Matcher::new( + value, + self.defs, + id, + self.frame.depth + )?.peekable(); + let mut dummy = Matcher::with_ops( + value, + self.defs, + DUMMY_OPS, + self.frame.depth + )?.peekable(); + // may panic. + let peeked = subtree.peek(); + // shouldn't panic. + let _ = dummy.peek(); + // push Holder after peek. + self.frame.path.push(Holder::default()); + let mut holder = self.frame.path.last_mut().unwrap(); + holder.parent = Some(value); + holder.iterator = Some(Box::new(std::iter::empty())); + match peeked { + None if skippable => { + holder.value = HolderState::ValueSubtree(dummy, value); + Ok(true) + }, + Some(&Ok(_)) | None => { + drop(peeked); + holder.value = HolderState::ValueSubtree(subtree, value); + Ok(true) + }, + Some(&Err(ref e)) => { + Err(e.clone()) + }, + } + }, _ => unreachable!("on_not_in_key") } } - fn on_end(&mut self) -> Result<(bool, Matches<'a, 'b, T>), MatchError> { + fn collect_results(&mut self) -> Matches<'a, 'b, T> { + let mut res: Matches<'a, 'b, T> = Default::default(); + for holder in &mut self.frame.path { + // make sure it's not empty. + assert!(holder.value.has_value()); + // handle subtrees. + if let Some(matcher) = holder.value.subtree() { + if let Some(matches) = matcher.next() { + // NOTE: we have checked these already. + // (and if we haven't, that's a bug.) + res.extend(matches.unwrap()); + } + } + // handle pairs. + if let Some(pair) = holder.value.pair() { + if let Some(name) = holder.name { + res.insert(name, pair); + } + } + } + res + } + + fn on_end(&mut self) -> (bool, Matches<'a, 'b, T>) { match self.frame.op() { PatternElement::End => { assert!(!self.frame.path.last().expect("path").value.is_empty()); - // TODO this for loop is duplicated with ApplyPredicate - // TODO figure out how to deduplicate them - let mut res: Matches<'a, 'b, T> = Default::default(); - for holder in &self.frame.path { - match holder.value { - HolderState::Subtree(ref matches, _) => { - res.extend(matches); - }, - HolderState::Key(pair) => { - if let Some(name) = holder.name { - res.insert(name, pair); - } - }, - HolderState::Value(_) => (), - _ => unreachable!("on_end (End)"), - } - } + let res = self.collect_results(); if !self.frame.prev() { - // frame.prev() should always be called, even if this gets - // replaced with debug_assert!(). + // NOTE: frame.prev() must always be called, even if this + // gets replaced with debug_assert!() in the future. assert!(false, "frame.prev()"); } - Ok((true, res)) + (true, res) } PatternElement::ApplyPredicate {..} => { assert!(!self.frame.in_key); - let mut res: Matches<'a, 'b, T> = Default::default(); - for holder in &self.frame.path { - match holder.value { - HolderState::Subtree(ref matches, _) => { - res.extend(matches); - }, - HolderState::Key(pair) => { - if let Some(name) = holder.name { - res.insert(name, pair); - } - }, - HolderState::Value(_) => (), - _ => unreachable!("on_end (ApplyPredicate)"), - } - } + let res = self.collect_results(); self.frame.path.clear(); - Ok((false, res)) + (false, res) } _ => unreachable!("on_end") } @@ -501,15 +656,15 @@ impl<'a, 'b, T: PatternTypes> Iterator for Matcher<'a, 'b, T> { type Item = Result>, MatchError>; fn next(&mut self) -> Option { + if self.frame.ops.is_empty() { + if !self.frame.path.is_empty() { + self.frame.path.clear(); + return Some(Ok(Default::default())); + } + } while !self.frame.path.is_empty() { if !self.frame.next() { - let (in_key, res) = match self.on_end() { - Ok(v) => v, - Err(e) => { - self.frame.path.clear(); - return Some(Err(e)) - }, - }; + let (in_key, res) = self.on_end(); self.frame.in_key = in_key; return Some(Ok(res)); } else { -- cgit 1.4.1