/*
* This file is part of Datafu
* Copyright (C) 2021 Soni L.
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see .
*/
use std::collections::BTreeMap;
use std::marker::PhantomData;
use regex::Regex;
use crate::KVPair;
use crate::RefOwn;
use crate::PatternTypes;
use crate::Predicate;
use crate::errors::MatchError;
pub(crate) const MAX_CALLS: usize = 250;
type Matches<'a, 'b, T> = BTreeMap<&'a str, KVPair<'b, T>>;
// TODO: use a builder for this?
/// The constant pool for a pattern.
pub(crate) struct PatternConstants {
// last proto is implicitly the whole pattern.
pub(crate) protos: Vec>>,
// Note that we can borrow these when creating the output map.
// https://play.rust-lang.org/?version=stable&mode=debug&edition=2018&gist=da26f9175e96273fa0b94971a4e6172f
pub(crate) strings: Vec,
pub(crate) regices: Vec,
pub(crate) predicates: Vec>>,
// NOTE these are part of the constant pool and so have lifetime analogous
// to 'a (consistently used to indicate constant pool lifetime) when used
// elsewhere. In particular, they can't be yielded by the iterator.
pub(crate) defs: Vec,
}
impl Default for PatternConstants {
fn default() -> Self {
Self {
protos: Default::default(),
strings: Default::default(),
regices: Default::default(),
predicates: Default::default(),
defs: Default::default(),
}
}
}
/// A pattern element.
pub(crate) enum PatternElement {
Arrow,
Identifier(usize),
StringKey(usize, bool),
RegexKey(usize, bool),
ParameterKey(usize, bool),
KeySubtree(usize, bool),
ValueSubtree(usize, bool),
ApplyPredicate(usize, bool, PhantomData) -> &Predicate>),
End
}
impl Copy for PatternElement {}
impl Clone for PatternElement {
fn clone(&self) -> Self { *self }
}
struct Frame<'a, 'b, T: PatternTypes> {
obj: RefOwn<'b, T::Ref, T::Own>,
ops: &'a Vec>,
iar: Option,
depth: usize,
path: Vec>,
in_key: bool,
}
impl<'a, 'b, T: PatternTypes> Frame<'a, 'b, T> {
/// Advances the instruction address register.
///
/// # Returns
///
/// `true` if successful, `false` otherwise.
fn next(&mut self) -> bool {
let new = self.iar.map_or(0, |v| v + 1);
new < self.ops.len() && {
self.iar = Some(new);
true
}
}
/// Returns the current instruction.
fn op(&self) -> PatternElement {
self.ops[self.iar.expect("ops[iar]")]
}
/// Rewinds the instruction address register.
///
/// # Returns
///
/// `true` if successful, `false` otherwise.
fn prev(&mut self) -> bool {
let new = self.iar.expect("iar").checked_sub(1);
new.is_some() && {
self.iar = new;
true
}
}
}
/// Stores a single match.
///
/// See also Holder.
enum HolderState<'a, 'b, T: PatternTypes> {
/// Empty holder, for KV pair.
EmptyKey,
/// Empty holder, for subtree.
EmptySubtree,
/// Non-empty KV pair.
Key(KVPair<'b, T>),
/// Non-empty subtree.
Subtree(Matches<'a, 'b, T>, RefOwn<'b, T::Ref, T::Own>),
/// Preloaded value. Usually the first holder in a frame.
Value(RefOwn<'b, T::Ref, T::Own>),
}
impl<'a, 'b, T: PatternTypes> Clone for HolderState<'a, 'b, T> {
fn clone(&self) -> Self {
match self {
HolderState::EmptyKey => HolderState::EmptyKey,
HolderState::EmptySubtree => HolderState::EmptySubtree,
HolderState::Key(v) => HolderState::Key(*v),
HolderState::Subtree(m, v) => HolderState::Subtree(m.clone(), *v),
HolderState::Value(v) => HolderState::Value(*v),
}
}
}
impl<'a, 'b, T: PatternTypes> HolderState<'a, 'b, T> {
fn is_empty(&self) -> bool {
matches!(self, HolderState::EmptyKey | HolderState::EmptySubtree)
}
fn has_value(&self) -> bool {
!self.is_empty()
}
fn is_subtree(&self) -> bool {
matches!(self, HolderState::EmptySubtree | HolderState::Subtree {..})
}
fn value(&self) -> Option> {
match *self {
HolderState::Key((_, value)) => Some(value),
HolderState::Subtree(_, value) => Some(value),
HolderState::Value(value) => Some(value),
_ => None
}
}
fn key(&self) -> Option> {
match *self {
HolderState::Key((key, _)) => Some(key),
_ => None
}
}
fn clear(&mut self) {
match *self {
HolderState::Key(_) => *self = HolderState::EmptyKey,
HolderState::Subtree(_, _) => *self = HolderState::EmptySubtree,
HolderState::Value(_) => unreachable!(),
_ => {},
};
assert!(self.is_empty());
}
}
/// Stores a single match and associated metadata.
///
/// A single match is generally a key-value pair, but may be a collection of
/// named pairs in the case of subtree matches, or just a value for the initial
/// holder.
struct Holder<'a, 'b, T: PatternTypes> {
name: Option<&'a str>,
value: HolderState<'a, 'b, T>,
parent: Option>,
iterator: Option> + 'b>>,
filters: Vec Fn(&'c mut HolderState<'a, 'b, T>) -> Result<(), MatchError>) + 'a>>,
}
impl<'a, 'b, T: PatternTypes> Holder<'a, 'b, T> {
fn next(&mut self) -> Result {
self.ensure_iterator()?;
match self {
Self {
value: ref mut v,
iterator: Some(ref mut it),
ref filters,
..
} => {
let is_subtree = v.is_subtree();
let mut next_v;
loop {
next_v = match it.next() {
Some(pair) => HolderState::Key(pair),
None => return Ok(false)
};
for filter in filters {
filter(&mut next_v)?;
if next_v.is_empty() {
break;
}
}
if next_v.has_value() {
break;
}
}
assert!(next_v.has_value());
assert!(next_v.is_subtree() == is_subtree);
*v = next_v;
Ok(true)
},
_ => unreachable!()
}
}
/// Ensure `self.iterator.is_some()`, creating an iterator if necessary.
fn ensure_iterator(&mut self) -> Result<(), MatchError> {
if self.iterator.is_none() {
let iter = T::pairs(self.parent.unwrap());
if iter.is_none() {
return Err(MatchError::UnsupportedOperation);
}
self.iterator = iter;
}
assert!(self.iterator.is_some());
Ok(())
}
}
impl<'a, 'b, T: PatternTypes> Default for Holder<'a, 'b, T> {
fn default() -> Self {
Self {
name: Default::default(),
value: HolderState::EmptyKey,
parent: Default::default(),
iterator: Default::default(),
filters: Default::default(),
}
}
}
pub struct Matcher<'a, 'b, T: PatternTypes> {
defs: &'a PatternConstants,
frame: Frame<'a, 'b, T>,
}
// TODO:
//
// [x] Arrow
// [x] StringKey
// [x] RegexKey
// [ ] KeySubtree
// [ ] ValueSubtree
// [x] Ident
// [ ] Param
// [x] ApplyPredicate
// [x] End
/// Helper for `PatternElement::StringKey`.
fn on_string_key<'a, 'b, T: PatternTypes>(
matcher: &mut Matcher<'a, 'b, T>,
id: usize,
skippable: bool,
) -> Result {
let path = matcher.frame.path.last_mut().unwrap();
assert!(path.iterator.is_none());
let key = &matcher.defs.strings[id];
let iter = T::get(path.parent.unwrap(), RefOwn::Str(key));
match iter {
None => Err(MatchError::UnsupportedOperation),
Some(opt) => {
path.iterator = Some(Box::new(opt.into_iter()));
Ok(true)
}
}
}
/// Helper for `PatternElement::RegexKey`.
fn on_regex_key<'a, 'b, T: PatternTypes>(
matcher: &mut Matcher<'a, 'b, T>,
id: usize,
skippable: bool,
) -> Result {
matcher.frame.path.last_mut().unwrap().ensure_iterator()?;
let re = &matcher.defs.regices[id];
let path = matcher.frame.path.last_mut().unwrap();
path.filters.push(Box::new(move |value| {
let s = T::as_str(value.key().unwrap());
match (s.map_or(false, |s| re.is_match(s)), skippable) {
(true, _) => Ok(()),
(false, true) => {
value.clear();
Ok(())
},
(false, false) => Err(MatchError::ValidationError),
}
}));
Ok(true)
}
impl<'a, 'b, T: PatternTypes> Matcher<'a, 'b, T> {
pub(crate) fn new(obj: RefOwn<'b, T::Ref, T::Own>, defs: &'a PatternConstants, proto: usize, rlimit: usize) -> Result {
let depth = rlimit.checked_sub(1).ok_or(MatchError::StackOverflow)?;
let ops: &Vec<_> = &defs.protos[proto];
Ok(Self {
defs: defs,
frame: Frame {
obj: obj,
ops: ops,
iar: None,
depth: depth,
path: if ops.is_empty() {
// TODO decide on empty matcher behaviour
todo!()
} else {
let mut holder = Holder::default();
holder.value = HolderState::Value(obj);
holder.iterator = Some(Box::new(std::iter::empty()));
vec![holder]
},
in_key: false,
},
})
}
fn on_in_key(&mut self) -> Result {
match self.frame.op() {
PatternElement::End => {
let path = self.frame.path.last_mut().unwrap();
if path.next()? {
Ok(false)
} else {
drop(path);
self.frame.path.pop().unwrap();
// stop at previous End, or start of frame
while self.frame.prev() {
if matches!(self.frame.op(), PatternElement::End) {
break;
}
}
// is start of frame?
if !self.frame.prev() {
self.frame.path.clear();
}
Ok(true)
}
},
PatternElement::ApplyPredicate(id, skippable, _) => {
// failing on T::get() is already handled, but we may need a
// T::pairs(). construct it here.
self.frame.path.last_mut().unwrap().ensure_iterator()?;
let pred = &self.defs.predicates[id];
let path = self.frame.path.last_mut().unwrap();
path.filters.push(Box::new(move |value| {
match (pred(value.value().unwrap()), skippable) {
(true, _) => Ok(()),
(false, true) => {
value.clear();
Ok(())
},
(false, false) => Err(MatchError::ValidationError),
}
}));
Ok(true)
},
PatternElement::StringKey(id, skippable) => {
on_string_key(self, id, skippable)
},
PatternElement::RegexKey(id, skippable) => {
on_regex_key(self, id, skippable)
},
_ => unreachable!("on_in_key")
}
}
fn on_not_in_key(&mut self) -> Result {
match self.frame.op() {
PatternElement::Arrow => {
// this *should* always pass.
assert!(self.frame.path.last().unwrap().iterator.is_some());
let mut holder = Holder::default();
holder.parent = self.frame.path.last().unwrap().value.value();
assert!(holder.parent.is_some());
self.frame.path.push(holder);
Ok(false)
},
PatternElement::Identifier(id) => {
let name = self.defs.strings.get(id).map(|s| &**s);
let path = self.frame.path.last_mut().unwrap();
path.name = name;
assert!(path.iterator.is_none());
// we don't actually create the iterator here,
// as we may still wanna use T::get() instead.
Ok(true)
},
PatternElement::ApplyPredicate(id, skippable, _) => {
assert!(self.frame.path.len() == 1);
let pred = &self.defs.predicates[id];
let value = self.frame.path.last().unwrap().value.value();
match (pred(value.unwrap()), skippable) {
(true, _) => Ok(false),
(false, true) => {
self.frame.path.clear();
// any Ok(_) will do
Ok(false)
},
(false, false) => Err(MatchError::ValidationError),
}
},
PatternElement::StringKey(id, skippable) => {
on_string_key(self, id, skippable)
},
PatternElement::RegexKey(id, skippable) => {
on_regex_key(self, id, skippable)
},
_ => unreachable!("on_not_in_key")
}
}
fn on_end(&mut self) -> Result<(bool, Matches<'a, 'b, T>), MatchError> {
match self.frame.op() {
PatternElement::End => {
assert!(!self.frame.path.last().expect("path").value.is_empty());
// TODO this for loop is duplicated with ApplyPredicate
// TODO figure out how to deduplicate them
let mut res: Matches<'a, 'b, T> = Default::default();
for holder in &self.frame.path {
match holder.value {
HolderState::Subtree(ref matches, _) => {
res.extend(matches);
},
HolderState::Key(pair) => {
if let Some(name) = holder.name {
res.insert(name, pair);
}
},
HolderState::Value(_) => (),
_ => unreachable!("on_end (End)"),
}
}
if !self.frame.prev() {
// frame.prev() should always be called, even if this gets
// replaced with debug_assert!().
assert!(false, "frame.prev()");
}
Ok((true, res))
}
PatternElement::ApplyPredicate {..} => {
assert!(!self.frame.in_key);
let mut res: Matches<'a, 'b, T> = Default::default();
for holder in &self.frame.path {
match holder.value {
HolderState::Subtree(ref matches, _) => {
res.extend(matches);
},
HolderState::Key(pair) => {
if let Some(name) = holder.name {
res.insert(name, pair);
}
},
HolderState::Value(_) => (),
_ => unreachable!("on_end (ApplyPredicate)"),
}
}
self.frame.path.clear();
Ok((false, res))
}
_ => unreachable!("on_end")
}
}
}
impl<'a, 'b, T: PatternTypes> Iterator for Matcher<'a, 'b, T> {
type Item = Result>, MatchError>;
fn next(&mut self) -> Option {
while !self.frame.path.is_empty() {
if !self.frame.next() {
let (in_key, res) = match self.on_end() {
Ok(v) => v,
Err(e) => {
self.frame.path.clear();
return Some(Err(e))
},
};
self.frame.in_key = in_key;
return Some(Ok(res));
} else {
let in_key = if self.frame.in_key {
self.on_in_key()
} else {
self.on_not_in_key()
};
match in_key {
Ok(in_key) => self.frame.in_key = in_key,
Err(e) => {
self.frame.path.clear();
return Some(Err(e))
},
}
}
}
None
}
}