/*
* This file is part of Datafu
* Copyright (C) 2021 Soni L.
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <https://www.gnu.org/licenses/>.
*/
use crate::KVPair;
use crate::RefOwn;
use crate::PatternTypes;
use crate::Predicate;
use crate::errors::MatchError;
use std::collections::BTreeMap;
use std::marker::PhantomData;
pub(crate) const MAX_CALLS: usize = 250;
type Matches<'a, 'b, T> = BTreeMap<&'a str, KVPair<'b, T>>;
// TODO: use a builder for this?
/// The constant pool for a pattern.
pub(crate) struct PatternConstants<T: PatternTypes> {
// last proto is implicitly the whole pattern.
pub(crate) protos: Vec<Vec<PatternElement<T>>>,
// Note that we can borrow these when creating the output map.
// https://play.rust-lang.org/?version=stable&mode=debug&edition=2018&gist=da26f9175e96273fa0b94971a4e6172f
pub(crate) strings: Vec<String>,
pub(crate) regices: Vec<()/* TODO */>,
pub(crate) predicates: Vec<Box<Predicate<T>>>,
// NOTE these are part of the constant pool and so have lifetime analogous
// to 'a (consistently used to indicate constant pool lifetime) when used
// elsewhere. In particular, they can't be yielded by the iterator.
pub(crate) defs: Vec<T::Own>,
}
/// A pattern element.
pub(crate) enum PatternElement<T: PatternTypes> {
Arrow,
Identifier(usize),
StringKey(usize, bool),
RegexKey(usize, bool),
ParameterKey(usize, bool),
KeySubtree(usize, bool),
ValueSubtree(usize, bool),
ApplyPredicate(usize, bool, PhantomData<fn(&PatternConstants<T>) -> &Predicate<T>>),
End
}
impl<T: PatternTypes> Copy for PatternElement<T> {}
impl<T: PatternTypes> Clone for PatternElement<T> {
fn clone(&self) -> Self { *self }
}
struct Frame<'a, 'b, T: PatternTypes> {
obj: RefOwn<'b, T::Ref, T::Own>,
ops: &'a Vec<PatternElement<T>>,
iar: Option<usize>,
depth: usize,
path: Vec<Holder<'a, 'b, T>>,
in_key: bool,
}
impl<'a, 'b, T: PatternTypes> Frame<'a, 'b, T> {
/// Advances the instruction address register.
///
/// # Returns
///
/// `true` if successful, `false` otherwise.
fn next(&mut self) -> bool {
let new = self.iar.map_or(0, |v| v + 1);
new < self.ops.len() && {
self.iar = Some(new);
true
}
}
/// Returns the current instruction.
fn op(&self) -> PatternElement<T> {
self.ops[self.iar.expect("ops[iar]")]
}
/// Rewinds the instruction address register.
///
/// # Returns
///
/// `true` if successful, `false` otherwise.
fn prev(&mut self) -> bool {
let new = self.iar.expect("iar").checked_sub(1);
new.is_some() && {
self.iar = new;
true
}
}
}
/// Stores a single match.
///
/// See also Holder.
enum HolderState<'a, 'b, T: PatternTypes> {
EmptyKey,
EmptySubtree,
Key(KVPair<'b, T>),
Subtree(Matches<'a, 'b, T>, RefOwn<'b, T::Ref, T::Own>),
}
impl<'a, 'b, T: PatternTypes> Clone for HolderState<'a, 'b, T> {
fn clone(&self) -> Self {
match self {
HolderState::EmptyKey => HolderState::EmptyKey,
HolderState::EmptySubtree => HolderState::EmptySubtree,
HolderState::Key(v) => HolderState::Key(*v),
HolderState::Subtree(m, v) => HolderState::Subtree(m.clone(), *v),
}
}
}
impl<'a, 'b, T: PatternTypes> HolderState<'a, 'b, T> {
fn is_empty(&self) -> bool {
matches!(self, HolderState::EmptyKey | HolderState::EmptySubtree)
}
fn is_subtree(&self) -> bool {
matches!(self, HolderState::EmptySubtree | HolderState::Subtree {..})
}
fn value(&self) -> Option<RefOwn<'b, T::Ref, T::Own>> {
match *self {
HolderState::Key((_, value)) => Some(value),
HolderState::Subtree(_, value) => Some(value),
_ => None
}
}
}
/// Stores a single match and associated metadata.
///
/// A single match is generally a key-value pair, but may be a collection of
/// named pairs in the case of subtree matches.
struct Holder<'a, 'b, T: PatternTypes> {
name: Option<&'a str>,
value: HolderState<'a, 'b, T>,
parent: Option<RefOwn<'b, T::Ref, T::Own>>,
iterator: Box<dyn Iterator<Item=KVPair<'b, T>> + 'b>,
filters: Vec<Box<dyn for<'c> Fn(&'c mut HolderState<'a, 'b, T>) + 'a>>,
}
impl<'a, 'b, T: PatternTypes> Holder<'a, 'b, T> {
fn next(&mut self) -> Option<Result<(), MatchError>> {
if let Self { value: ref mut v, iterator: ref mut it, .. } = self {
let is_subtree = v.is_subtree();
*v = match it.next() {
Some(pair) => HolderState::Key(pair),
None => return None
};
// just try to make sure the type doesn't change.
// (and that if we get to this point the result isn't empty.)
assert!(!v.is_empty() && v.is_subtree() == is_subtree);
}
Some(Ok(()))
}
}
impl<'a, 'b, T: PatternTypes> Default for Holder<'a, 'b, T> {
fn default() -> Self {
Self {
name: Default::default(),
value: HolderState::EmptyKey,
parent: Default::default(),
// TODO https://github.com/rust-lang/rfcs/issues/3059
iterator: Box::new(std::iter::empty()),
filters: Default::default(),
}
}
}
pub struct Matcher<'a, 'b, T: PatternTypes> {
defs: &'a PatternConstants<T>,
frame: Frame<'a, 'b, T>,
}
impl<'a, 'b, T: PatternTypes> Matcher<'a, 'b, T> {
pub(crate) fn new(obj: RefOwn<'b, T::Ref, T::Own>, defs: &'a PatternConstants<T>, proto: usize, rlimit: usize) -> Result<Self, MatchError> {
let depth = rlimit.checked_sub(1).ok_or(MatchError::StackOverflow)?;
let ops: &Vec<_> = &defs.protos[proto];
Ok(Self {
defs: defs,
frame: Frame {
obj: obj,
ops: ops,
iar: None,
depth: depth,
path: if ops.is_empty() {
Default::default()
} else {
vec![Holder::default()]
},
in_key: false,
},
})
}
fn on_in_key(&mut self) -> Result<bool, MatchError> {
match self.frame.op() {
PatternElement::End => {
unimplemented!()
}
_ => unreachable!("on_in_key")
}
}
fn on_not_in_key(&mut self) -> Result<bool, MatchError> {
match self.frame.op() {
_ => unreachable!("on_not_in_key")
}
}
fn on_end(&mut self) -> Result<(bool, Matches<'a, 'b, T>), MatchError> {
match self.frame.op() {
PatternElement::End => {
assert!(!self.frame.path.last().expect("path").value.is_empty());
// TODO this for loop is duplicated with ApplyPredicate
// TODO figure out how to deduplicate them
let mut res: Matches<'a, 'b, T> = Default::default();
for holder in &self.frame.path {
match holder.value {
HolderState::Subtree(ref matches, _) => {
res.extend(matches);
},
HolderState::Key(pair) => {
if let Some(name) = holder.name {
res.insert(name, pair);
}
},
_ => unreachable!("on_end (End)"),
}
}
if !self.frame.prev() {
// frame.prev() should always be called, even if this gets
// replaced with debug_assert!().
assert!(false, "frame.prev()");
}
Ok((true, res))
},
PatternElement::ApplyPredicate {..} => {
assert!(!self.frame.in_key);
let mut res: Matches<'a, 'b, T> = Default::default();
for holder in &self.frame.path {
match holder.value {
HolderState::Subtree(ref matches, _) => {
res.extend(matches);
},
HolderState::Key(pair) => {
if let Some(name) = holder.name {
res.insert(name, pair);
}
},
_ => unreachable!("on_end (ApplyPredicate)"),
}
}
self.frame.path.clear();
Ok((false, res))
}
_ => unreachable!("on_end")
}
}
}
impl<'a, 'b, T: PatternTypes> Iterator for Matcher<'a, 'b, T> {
type Item = Result<BTreeMap<&'a str, KVPair<'b, T>>, MatchError>;
fn next(&mut self) -> Option<Self::Item> {
while !self.frame.path.is_empty() {
if !self.frame.next() {
let (in_key, res) = match self.on_end() {
Ok(v) => v,
Err(e) => {
self.frame.path.clear();
return Some(Err(e))
},
};
self.frame.in_key = in_key;
return Some(Ok(res));
} else {
let in_key = if self.frame.in_key {
self.on_in_key()
} else {
self.on_not_in_key()
};
match in_key {
Ok(in_key) => self.frame.in_key = in_key,
Err(e) => {
self.frame.path.clear();
return Some(Err(e))
},
}
}
}
None
}
}