/* * This file is part of Datafu * Copyright (C) 2021 Soni L. * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU Affero General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Affero General Public License for more details. * * You should have received a copy of the GNU Affero General Public License * along with this program. If not, see . */ use std::collections::BTreeMap; use std::marker::PhantomData; use regex::Regex; use crate::KVPair; use crate::RefOwn; use crate::PatternTypes; use crate::Predicate; use crate::errors::MatchError; pub(crate) const MAX_CALLS: usize = 250; type Matches<'a, 'b, T> = BTreeMap<&'a str, KVPair<'b, T>>; // TODO: use a builder for this? /// The constant pool for a pattern. pub(crate) struct PatternConstants { // last proto is implicitly the whole pattern. pub(crate) protos: Vec>>, // Note that we can borrow these when creating the output map. // https://play.rust-lang.org/?version=stable&mode=debug&edition=2018&gist=da26f9175e96273fa0b94971a4e6172f pub(crate) strings: Vec, pub(crate) regices: Vec, pub(crate) predicates: Vec>>, // NOTE these are part of the constant pool and so have lifetime analogous // to 'a (consistently used to indicate constant pool lifetime) when used // elsewhere. In particular, they can't be yielded by the iterator. pub(crate) defs: Vec, } impl Default for PatternConstants { fn default() -> Self { Self { protos: Default::default(), strings: Default::default(), regices: Default::default(), predicates: Default::default(), defs: Default::default(), } } } /// A pattern element. pub(crate) enum PatternElement { Arrow, Identifier(usize), StringKey(usize, bool), RegexKey(usize, bool), ParameterKey(usize, bool), KeySubtree(usize, bool), ValueSubtree(usize, bool), ApplyPredicate(usize, bool, PhantomData) -> &Predicate>), End } impl Copy for PatternElement {} impl Clone for PatternElement { fn clone(&self) -> Self { *self } } struct Frame<'a, 'b, T: PatternTypes> { obj: RefOwn<'b, T::Ref, T::Own>, ops: &'a Vec>, iar: Option, depth: usize, path: Vec>, in_key: bool, } impl<'a, 'b, T: PatternTypes> Frame<'a, 'b, T> { /// Advances the instruction address register. /// /// # Returns /// /// `true` if successful, `false` otherwise. fn next(&mut self) -> bool { let new = self.iar.map_or(0, |v| v + 1); new < self.ops.len() && { self.iar = Some(new); true } } /// Returns the current instruction. fn op(&self) -> PatternElement { self.ops[self.iar.expect("ops[iar]")] } /// Rewinds the instruction address register. /// /// # Returns /// /// `true` if successful, `false` otherwise. fn prev(&mut self) -> bool { let new = self.iar.expect("iar").checked_sub(1); new.is_some() && { self.iar = new; true } } } /// Stores a single match. /// /// See also Holder. enum HolderState<'a, 'b, T: PatternTypes> { /// Empty holder, for KV pair. EmptyKey, /// Empty holder, for subtree. EmptySubtree, /// Non-empty KV pair. Key(KVPair<'b, T>), /// Non-empty subtree. Subtree(Matches<'a, 'b, T>, RefOwn<'b, T::Ref, T::Own>), /// Preloaded value. Usually the first holder in a frame. Value(RefOwn<'b, T::Ref, T::Own>), } impl<'a, 'b, T: PatternTypes> Clone for HolderState<'a, 'b, T> { fn clone(&self) -> Self { match self { HolderState::EmptyKey => HolderState::EmptyKey, HolderState::EmptySubtree => HolderState::EmptySubtree, HolderState::Key(v) => HolderState::Key(*v), HolderState::Subtree(m, v) => HolderState::Subtree(m.clone(), *v), HolderState::Value(v) => HolderState::Value(*v), } } } impl<'a, 'b, T: PatternTypes> HolderState<'a, 'b, T> { fn is_empty(&self) -> bool { matches!(self, HolderState::EmptyKey | HolderState::EmptySubtree) } fn is_subtree(&self) -> bool { matches!(self, HolderState::EmptySubtree | HolderState::Subtree {..}) } fn value(&self) -> Option> { match *self { HolderState::Key((_, value)) => Some(value), HolderState::Subtree(_, value) => Some(value), HolderState::Value(value) => Some(value), _ => None } } } /// Stores a single match and associated metadata. /// /// A single match is generally a key-value pair, but may be a collection of /// named pairs in the case of subtree matches. struct Holder<'a, 'b, T: PatternTypes> { name: Option<&'a str>, value: HolderState<'a, 'b, T>, parent: Option>, iterator: Option> + 'b>>, filters: Vec Fn(&'c mut HolderState<'a, 'b, T>) + 'a>>, } impl<'a, 'b, T: PatternTypes> Holder<'a, 'b, T> { fn next(&mut self) -> Option> { // FIXME what even is the point of this? if let Self { value: ref mut v, iterator: Some(ref mut it), .. } = self { let is_subtree = v.is_subtree(); *v = match it.next() { Some(pair) => HolderState::Key(pair), None => return None }; // just try to make sure the type doesn't change. // (and that if we get to this point the result isn't empty.) assert!(!v.is_empty() && v.is_subtree() == is_subtree); } Some(Ok(())) } } impl<'a, 'b, T: PatternTypes> Default for Holder<'a, 'b, T> { fn default() -> Self { Self { name: Default::default(), value: HolderState::EmptyKey, parent: Default::default(), iterator: Default::default(), filters: Default::default(), } } } pub struct Matcher<'a, 'b, T: PatternTypes> { defs: &'a PatternConstants, frame: Frame<'a, 'b, T>, } impl<'a, 'b, T: PatternTypes> Matcher<'a, 'b, T> { pub(crate) fn new(obj: RefOwn<'b, T::Ref, T::Own>, defs: &'a PatternConstants, proto: usize, rlimit: usize) -> Result { let depth = rlimit.checked_sub(1).ok_or(MatchError::StackOverflow)?; let ops: &Vec<_> = &defs.protos[proto]; Ok(Self { defs: defs, frame: Frame { obj: obj, ops: ops, iar: None, depth: depth, path: if ops.is_empty() { // TODO decide on empty matcher behaviour todo!() } else { let mut holder = Holder::default(); holder.value = HolderState::Value(obj); vec![holder] }, in_key: false, }, }) } fn on_in_key(&mut self) -> Result { match self.frame.op() { PatternElement::End => { todo!() } _ => unreachable!("on_in_key") } } fn on_not_in_key(&mut self) -> Result { match self.frame.op() { PatternElement::Arrow => { assert!(!self.frame.path.last().expect("path").value.is_empty()); let mut holder = Holder::default(); holder.parent = self.frame.path.last().expect("path").value.value(); self.frame.path.push(holder); Ok(false) }, PatternElement::Identifier(id) => { let name = self.defs.strings.get(id).map(|s| &**s); self.frame.path.last_mut().expect("path").name = name; todo!() //Ok(true) }, _ => unreachable!("on_not_in_key") } } fn on_end(&mut self) -> Result<(bool, Matches<'a, 'b, T>), MatchError> { match self.frame.op() { PatternElement::End => { assert!(!self.frame.path.last().expect("path").value.is_empty()); // TODO this for loop is duplicated with ApplyPredicate // TODO figure out how to deduplicate them let mut res: Matches<'a, 'b, T> = Default::default(); for holder in &self.frame.path { match holder.value { HolderState::Subtree(ref matches, _) => { res.extend(matches); }, HolderState::Key(pair) => { if let Some(name) = holder.name { res.insert(name, pair); } }, HolderState::Value(_) => (), _ => unreachable!("on_end (End)"), } } if !self.frame.prev() { // frame.prev() should always be called, even if this gets // replaced with debug_assert!(). assert!(false, "frame.prev()"); } Ok((true, res)) } PatternElement::ApplyPredicate {..} => { assert!(!self.frame.in_key); let mut res: Matches<'a, 'b, T> = Default::default(); for holder in &self.frame.path { match holder.value { HolderState::Subtree(ref matches, _) => { res.extend(matches); }, HolderState::Key(pair) => { if let Some(name) = holder.name { res.insert(name, pair); } }, HolderState::Value(_) => (), _ => unreachable!("on_end (ApplyPredicate)"), } } self.frame.path.clear(); Ok((false, res)) } _ => unreachable!("on_end") } } } impl<'a, 'b, T: PatternTypes> Iterator for Matcher<'a, 'b, T> { type Item = Result>, MatchError>; fn next(&mut self) -> Option { while !self.frame.path.is_empty() { if !self.frame.next() { let (in_key, res) = match self.on_end() { Ok(v) => v, Err(e) => { self.frame.path.clear(); return Some(Err(e)) }, }; self.frame.in_key = in_key; return Some(Ok(res)); } else { let in_key = if self.frame.in_key { self.on_in_key() } else { self.on_not_in_key() }; match in_key { Ok(in_key) => self.frame.in_key = in_key, Err(e) => { self.frame.path.clear(); return Some(Err(e)) }, } } } None } }