/* * This file is part of Datafu * Copyright (C) 2021 Soni L. * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU Affero General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Affero General Public License for more details. * * You should have received a copy of the GNU Affero General Public License * along with this program. If not, see . */ use std::collections::BTreeMap; use std::iter::Peekable; use regex::Regex; use crate::KVPair; use crate::RefOwn; use crate::PatternTypes; use crate::Predicate; use crate::errors::MatchError; pub(crate) const MAX_CALLS: usize = 250; type Matches<'a, 'b, T> = BTreeMap<&'a str, KVPair<'b, T>>; // TODO: use a builder for this? /// The constant pool for a pattern. pub(crate) struct PatternConstants { // last proto is implicitly the whole pattern. pub(crate) protos: Vec>, // Note that we can borrow these when creating the output map. // https://play.rust-lang.org/?version=stable&mode=debug&edition=2018&gist=da26f9175e96273fa0b94971a4e6172f pub(crate) strings: Vec, pub(crate) regices: Vec, pub(crate) predicates: Vec>>, // NOTE these are part of the constant pool and so have lifetime analogous // to 'a (consistently used to indicate constant pool lifetime) when used // elsewhere. In particular, they can't be yielded by the iterator. pub(crate) defs: Vec, } impl Default for PatternConstants { fn default() -> Self { Self { protos: Default::default(), strings: Default::default(), regices: Default::default(), predicates: Default::default(), defs: Default::default(), } } } /// A pattern element. #[derive(Copy, Clone)] pub(crate) enum PatternElement { Arrow, Identifier(usize), StringKey(usize, bool), RegexKey(usize, bool), ParameterKey(usize, bool), KeySubtree(usize, bool), ValueSubtree(usize, bool), ApplyPredicate(usize, bool), End } struct Frame<'a, 'b, T: PatternTypes> { //obj: RefOwn<'b, T::Ref, T::Own>, ops: &'a [PatternElement], iar: Option, depth: usize, path: Vec>, in_key: bool, } impl<'a, 'b, T: PatternTypes> Frame<'a, 'b, T> { /// Advances the instruction address register. /// /// # Returns /// /// `true` if successful, `false` otherwise. fn next(&mut self) -> bool { let new = self.iar.map_or(0, |v| v + 1); new < self.ops.len() && { self.iar = Some(new); true } } /// Returns the current instruction. fn op(&self) -> PatternElement { self.ops[self.iar.expect("ops[iar]")] } /// Rewinds the instruction address register. /// /// # Returns /// /// `true` if successful, `false` otherwise. fn prev(&mut self) -> bool { let new = self.iar.expect("iar").checked_sub(1); new.is_some() && { self.iar = new; true } } } /// Stores a single match. /// /// See also Holder. enum HolderState<'a, 'b, T: PatternTypes> { /// Empty holder, for a key-value pair. EmptyKey, /// Empty holder, for a Matcher and a key-value pair. EmptyKeySubtree, // /// Empty holder, for a Matcher and a value. // EmptyValueSubtree, /// Occupied holder, for a key-value pair.. Key(KVPair<'b, T>), /// Occupied holder, for a Matcher and a key-value pair. KeySubtree(Peekable>, KVPair<'b, T>), /// Occupied holder, for a Matcher and a value. The empty variant is /// omitted as it would never be used otherwise. ValueSubtree(Peekable>, RefOwn<'b, T::Ref, T::Own>), /// Occupied holder, for a value. The empty variant is omitted as it would /// never be used otherwise. Value(RefOwn<'b, T::Ref, T::Own>), } /// Helper enum for HolderState. #[derive(Copy, Clone, Debug, Eq, PartialEq)] enum HolderKind { Key, KeySubtree, ValueSubtree, Value } //impl<'a, 'b, T: PatternTypes> Clone for HolderState<'a, 'b, T> { // fn clone(&self) -> Self { // match self { // HolderState::EmptyKey => HolderState::EmptyKey, // HolderState::EmptySubtree => HolderState::EmptySubtree, // HolderState::Key(v) => HolderState::Key(*v), // HolderState::KeySubtree(m, v) => HolderState::KeySubtree(m.clone(), *v), // HolderState::ValueSubtree(m, v) => HolderState::ValueSubtree(m.clone(), *v), // HolderState::Value(v) => HolderState::Value(*v), // } // } //} impl<'a, 'b, T: PatternTypes> HolderState<'a, 'b, T> { #[rustfmt::skip] fn is_empty(&self) -> bool { match self { | HolderState::EmptyKey | HolderState::EmptyKeySubtree //| HolderState::EmptyValueSubtree => true, _ => false } } fn has_value(&self) -> bool { !self.is_empty() } fn kind(&self) -> HolderKind { match self { | HolderState::EmptyKey | HolderState::Key(_) => HolderKind::Key, | HolderState::EmptyKeySubtree | HolderState::KeySubtree(_, _) => HolderKind::KeySubtree, //| HolderState::EmptyValueSubtree | HolderState::ValueSubtree(_, _) => HolderKind::ValueSubtree, | HolderState::Value(_) => HolderKind::Value, } } fn value(&self) -> Option> { match *self { HolderState::Key((_, value)) => Some(value), HolderState::KeySubtree(_, (_, value)) => Some(value), HolderState::ValueSubtree(_, value) => Some(value), HolderState::Value(value) => Some(value), _ => None } } fn key(&self) -> Option> { match *self { HolderState::Key((key, _)) => Some(key), HolderState::KeySubtree(_, (key, _)) => Some(key), _ => None } } fn pair(&self) -> Option> { match *self { HolderState::Key(pair) => Some(pair), HolderState::KeySubtree(_, pair) => Some(pair), _ => None } } fn subtree(&mut self) -> Option<&mut Peekable>> { match *self { HolderState::KeySubtree(ref mut subtree, _) => Some(subtree), HolderState::ValueSubtree(ref mut subtree, _) => Some(subtree), _ => None } } fn clear(&mut self) { *self = match self.kind() { HolderKind::Key => HolderState::EmptyKey, HolderKind::KeySubtree => HolderState::EmptyKeySubtree, HolderKind::ValueSubtree => unreachable!(), //HolderState::EmptyValueSubtree, HolderKind::Value => unreachable!(), }; assert!(self.is_empty()); } } /// Stores a single match and associated metadata. /// /// A single match is generally a key-value pair, but may be a collection of /// named pairs in the case of subtree matches, or just a value for the initial /// holder. struct Holder<'a, 'b, T: PatternTypes> { name: Option<&'a str>, value: HolderState<'a, 'b, T>, parent: Option>, iterator: Option> + 'b>>, filters: Vec Fn(&'c mut HolderState<'a, 'b, T>) -> Result<(), MatchError>) + 'a>>, } impl<'a, 'b, T: PatternTypes> Holder<'a, 'b, T> { fn next(&mut self) -> Result { self.ensure_iterator()?; if let Self { value: ref mut v, iterator: Some(ref mut it), ref filters, .. } = self { // check if we're in a subtree and (not) done. if let Some(matcher) = v.subtree() { if let Some(res) = matcher.peek() { // report any errors return res.as_ref().map(|_| true).map_err(|e| e.clone()); } } let kind = v.kind(); let mut next_v; loop { next_v = match it.next() { Some(pair) => HolderState::Key(pair), None => return Ok(false) }; for filter in filters { filter(&mut next_v)?; if next_v.is_empty() { break; } } if next_v.has_value() { break; } } assert!(next_v.has_value()); assert_eq!(next_v.kind(), kind); *v = next_v; Ok(true) } else { unreachable!() } } /// Ensure `self.iterator.is_some()`, creating an iterator if necessary. fn ensure_iterator(&mut self) -> Result<(), MatchError> { if self.iterator.is_none() { let iter = T::pairs(self.parent.unwrap()); if iter.is_none() { return Err(MatchError::UnsupportedOperation); } self.iterator = iter; } assert!(self.iterator.is_some()); Ok(()) } } impl<'a, 'b, T: PatternTypes> Default for Holder<'a, 'b, T> { fn default() -> Self { Self { name: Default::default(), value: HolderState::EmptyKey, parent: Default::default(), iterator: Default::default(), filters: Default::default(), } } } pub struct Matcher<'a, 'b, T: PatternTypes> { defs: &'a PatternConstants, frame: Frame<'a, 'b, T>, } // TODO: // // [x] Arrow // [x] StringKey // [x] RegexKey // [x] KeySubtree // [x] ValueSubtree // [x] Ident // [x] Param (untested) // [x] ApplyPredicate // [x] End /// Helper for `PatternElement::StringKey`. fn on_string_key<'a, 'b, T: PatternTypes>( matcher: &mut Matcher<'a, 'b, T>, id: usize, skippable: bool, ) -> Result { let path = matcher.frame.path.last_mut().unwrap(); assert!(path.iterator.is_none()); let key = &matcher.defs.strings[id]; let iter = T::get(path.parent.unwrap(), RefOwn::Str(key)); match iter { Some(None) if !skippable => Err(MatchError::ValidationError), Some(opt) => { path.iterator = Some(Box::new(opt.into_iter())); Ok(true) } None => Err(MatchError::UnsupportedOperation), } } /// Helper for `PatternElement::ParameterKey`. fn on_parameter_key<'a, 'b, T: PatternTypes>( matcher: &mut Matcher<'a, 'b, T>, id: usize, skippable: bool, ) -> Result { let path = matcher.frame.path.last_mut().unwrap(); assert!(path.iterator.is_none()); let key = matcher.defs.defs[id]; let iter = T::get(path.parent.unwrap(), RefOwn::Own(key)); match iter { Some(None) if !skippable => Err(MatchError::ValidationError), Some(opt) => { path.iterator = Some(Box::new(opt.into_iter())); Ok(true) } None => Err(MatchError::UnsupportedOperation), } } /// Helper for `PatternElement::RegexKey`. fn on_regex_key<'a, 'b, T: PatternTypes>( matcher: &mut Matcher<'a, 'b, T>, id: usize, skippable: bool, ) -> Result { matcher.frame.path.last_mut().unwrap().ensure_iterator()?; let re = &matcher.defs.regices[id]; let path = matcher.frame.path.last_mut().unwrap(); path.filters.push(Box::new(move |value| { let s = T::as_str(value.key().unwrap()); match (s.map_or(false, |s| re.is_match(s)), skippable) { (true, _) => Ok(()), (false, true) => { value.clear(); Ok(()) }, (false, false) => Err(MatchError::ValidationError), } })); Ok(true) } /// Helper for `PatternElement::KeySubtree`. fn on_key_subtree<'a, 'b, T: PatternTypes>( matcher: &mut Matcher<'a, 'b, T>, id: usize, skippable: bool, ) -> Result { let _ = skippable; // FIXME what should a skippable KeySubtree even do?! matcher.frame.path.last_mut().unwrap().ensure_iterator()?; let defs = matcher.defs; let rlimit: usize = matcher.frame.depth; let path = matcher.frame.path.last_mut().unwrap(); assert!(path.value.is_empty()); assert_eq!(path.value.kind(), HolderKind::Key); path.value = HolderState::EmptyKeySubtree; path.filters.push(Box::new(move |value| { let key = value.key().unwrap(); let mut subtree = Matcher::new(key, defs, id, rlimit)?.peekable(); match subtree.peek() { Some(&Ok(_)) => { *value = HolderState::KeySubtree(subtree, value.pair().unwrap()); Ok(()) }, Some(&Err(ref e)) => { Err(e.clone()) }, None => { value.clear(); Ok(()) } } })); Ok(true) } const DUMMY_OPS: &'static [PatternElement] = &[]; impl<'a, 'b, T: PatternTypes> Matcher<'a, 'b, T> { pub(crate) fn new(obj: RefOwn<'b, T::Ref, T::Own>, defs: &'a PatternConstants, proto: usize, rlimit: usize) -> Result { let ops: &[_] = &defs.protos[proto]; Self::with_ops(obj, defs, ops, rlimit) } /// Constructs a Matcher that yields a single dummy result. fn with_ops(obj: RefOwn<'b, T::Ref, T::Own>, defs: &'a PatternConstants, ops: &'a [PatternElement], rlimit: usize) -> Result { let depth = rlimit.checked_sub(1).ok_or(MatchError::StackOverflow)?; Ok(Self { defs: defs, frame: Frame { //obj: obj, ops: ops, iar: None, depth: depth, path: { let mut holder = Holder::default(); holder.value = HolderState::Value(obj); holder.iterator = Some(Box::new(std::iter::empty())); vec![holder] }, in_key: false, }, }) } fn on_in_key(&mut self) -> Result { match self.frame.op() { PatternElement::End => { let path = self.frame.path.last_mut().unwrap(); if path.next()? { Ok(false) } else { drop(path); self.frame.path.pop().unwrap(); // stop at previous End, or start of frame while self.frame.prev() { if matches!(self.frame.op(), PatternElement::End) { break; } } // is start of frame? if !self.frame.prev() { self.frame.path.clear(); } Ok(true) } }, PatternElement::ApplyPredicate(id, skippable) => { // failing on T::get() is already handled, but we may need a // T::pairs(). construct it here. self.frame.path.last_mut().unwrap().ensure_iterator()?; let pred = &self.defs.predicates[id]; let path = self.frame.path.last_mut().unwrap(); path.filters.push(Box::new(move |value| { match (pred(value.value().unwrap()), skippable) { (true, _) => Ok(()), (false, true) => { value.clear(); Ok(()) }, (false, false) => Err(MatchError::ValidationError), } })); Ok(true) }, PatternElement::StringKey(id, skippable) => { on_string_key(self, id, skippable) }, PatternElement::ParameterKey(id, skippable) => { on_parameter_key(self, id, skippable) }, PatternElement::RegexKey(id, skippable) => { on_regex_key(self, id, skippable) }, PatternElement::KeySubtree(id, skippable) => { on_key_subtree(self, id, skippable) }, _ => unreachable!("on_in_key") } } fn on_not_in_key(&mut self) -> Result { match self.frame.op() { PatternElement::Arrow => { // this *should* always pass. assert!(self.frame.path.last().unwrap().iterator.is_some()); let mut holder = Holder::default(); holder.parent = self.frame.path.last().unwrap().value.value(); assert!(holder.parent.is_some()); self.frame.path.push(holder); Ok(false) }, PatternElement::Identifier(id) => { let name = self.defs.strings.get(id).map(|s| &**s); let path = self.frame.path.last_mut().unwrap(); path.name = name; assert!(path.iterator.is_none()); // we don't actually create the iterator here, // as we may still wanna use T::get() instead. Ok(true) }, PatternElement::ApplyPredicate(id, skippable) => { assert!(self.frame.path.len() == 1); let pred = &self.defs.predicates[id]; let value = self.frame.path.last().unwrap().value.value(); match (pred(value.unwrap()), skippable) { (true, _) => Ok(false), (false, true) => { self.frame.path.clear(); // any Ok(_) will do Ok(false) }, (false, false) => Err(MatchError::ValidationError), } }, PatternElement::StringKey(id, skippable) => { on_string_key(self, id, skippable) }, PatternElement::ParameterKey(id, skippable) => { on_parameter_key(self, id, skippable) }, PatternElement::RegexKey(id, skippable) => { on_regex_key(self, id, skippable) }, PatternElement::KeySubtree(id, skippable) => { on_key_subtree(self, id, skippable) }, PatternElement::ValueSubtree(id, skippable) => { let value = self.frame.path.last().unwrap().value.value().unwrap(); let mut subtree = Matcher::new( value, self.defs, id, self.frame.depth )?.peekable(); let mut dummy = Matcher::with_ops( value, self.defs, DUMMY_OPS, self.frame.depth )?.peekable(); // may panic. let peeked = subtree.peek(); // shouldn't panic. let _ = dummy.peek(); // push Holder after peek. self.frame.path.push(Holder::default()); let mut holder = self.frame.path.last_mut().unwrap(); holder.parent = Some(value); holder.iterator = Some(Box::new(std::iter::empty())); match peeked { None if skippable => { holder.value = HolderState::ValueSubtree(dummy, value); Ok(true) }, Some(&Ok(_)) | None => { drop(peeked); holder.value = HolderState::ValueSubtree(subtree, value); Ok(true) }, Some(&Err(ref e)) => { Err(e.clone()) }, } }, _ => unreachable!("on_not_in_key") } } fn collect_results(&mut self) -> Matches<'a, 'b, T> { let mut res: Matches<'a, 'b, T> = Default::default(); for holder in &mut self.frame.path { // make sure it's not empty. assert!(holder.value.has_value()); // handle subtrees. if let Some(matcher) = holder.value.subtree() { if let Some(matches) = matcher.next() { // NOTE: we have checked these already. // (and if we haven't, that's a bug.) res.extend(matches.unwrap()); } } // handle pairs. if let Some(pair) = holder.value.pair() { if let Some(name) = holder.name { res.insert(name, pair); } } } res } fn on_end(&mut self) -> (bool, Matches<'a, 'b, T>) { match self.frame.op() { PatternElement::End => { assert!(!self.frame.path.last().expect("path").value.is_empty()); let res = self.collect_results(); if !self.frame.prev() { // NOTE: frame.prev() must always be called, even if this // gets replaced with debug_assert!() in the future. assert!(false, "frame.prev()"); } (true, res) } PatternElement::ApplyPredicate {..} => { assert!(!self.frame.in_key); let res = self.collect_results(); self.frame.path.clear(); (false, res) } _ => unreachable!("on_end") } } } impl<'a, 'b, T: PatternTypes> Iterator for Matcher<'a, 'b, T> { type Item = Result>, MatchError>; fn next(&mut self) -> Option { if self.frame.ops.is_empty() { if !self.frame.path.is_empty() { self.frame.path.clear(); return Some(Ok(Default::default())); } } while !self.frame.path.is_empty() { if !self.frame.next() { let (in_key, res) = self.on_end(); self.frame.in_key = in_key; return Some(Ok(res)); } else { let in_key = if self.frame.in_key { self.on_in_key() } else { self.on_not_in_key() }; match in_key { Ok(in_key) => self.frame.in_key = in_key, Err(e) => { self.frame.path.clear(); return Some(Err(e)) }, } } } None } }