/*
* This file is part of Datafu
* Copyright (C) 2021 Soni L.
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see .
*/
use std::collections::BTreeMap;
use std::iter::Peekable;
use regex::Regex;
use crate::KVPair;
use crate::RefOwn;
use crate::PatternTypes;
use crate::Predicate;
use crate::errors::MatchError;
pub(crate) const MAX_CALLS: usize = 250;
type Matches<'a, 'b, T> = BTreeMap<&'a str, KVPair<'b, T>>;
// TODO: use a builder for this?
/// The constant pool for a pattern.
pub(crate) struct PatternConstants {
// last proto is implicitly the whole pattern.
pub(crate) protos: Vec>,
// Note that we can borrow these when creating the output map.
// https://play.rust-lang.org/?version=stable&mode=debug&edition=2018&gist=da26f9175e96273fa0b94971a4e6172f
pub(crate) strings: Vec,
pub(crate) regices: Vec,
pub(crate) predicates: Vec>>,
// NOTE these are part of the constant pool and so have lifetime analogous
// to 'a (consistently used to indicate constant pool lifetime) when used
// elsewhere. In particular, they can't be yielded by the iterator.
pub(crate) defs: Vec,
}
impl Default for PatternConstants {
fn default() -> Self {
Self {
protos: Default::default(),
strings: Default::default(),
regices: Default::default(),
predicates: Default::default(),
defs: Default::default(),
}
}
}
/// A pattern element.
#[derive(Copy, Clone)]
pub(crate) enum PatternElement {
Arrow,
Identifier(usize),
StringKey(usize, bool),
RegexKey(usize, bool),
ParameterKey(usize, bool),
KeySubtree(usize, bool),
ValueSubtree(usize, bool),
ApplyPredicate(usize, bool),
End
}
struct Frame<'a, 'b, T: PatternTypes> {
//obj: RefOwn<'b, T::Ref, T::Own>,
ops: &'a [PatternElement],
iar: Option,
depth: usize,
path: Vec>,
in_key: bool,
}
impl<'a, 'b, T: PatternTypes> Frame<'a, 'b, T> {
/// Advances the instruction address register.
///
/// # Returns
///
/// `true` if successful, `false` otherwise.
fn next(&mut self) -> bool {
let new = self.iar.map_or(0, |v| v + 1);
new < self.ops.len() && {
self.iar = Some(new);
true
}
}
/// Returns the current instruction.
fn op(&self) -> PatternElement {
self.ops[self.iar.expect("ops[iar]")]
}
/// Rewinds the instruction address register.
///
/// # Returns
///
/// `true` if successful, `false` otherwise.
fn prev(&mut self) -> bool {
let new = self.iar.expect("iar").checked_sub(1);
new.is_some() && {
self.iar = new;
true
}
}
}
/// Stores a single match.
///
/// See also Holder.
enum HolderState<'a, 'b, T: PatternTypes> {
/// Empty holder, for a key-value pair.
EmptyKey,
/// Empty holder, for a Matcher and a key-value pair.
EmptyKeySubtree,
// /// Empty holder, for a Matcher and a value.
// EmptyValueSubtree,
/// Occupied holder, for a key-value pair..
Key(KVPair<'b, T>),
/// Occupied holder, for a Matcher and a key-value pair.
KeySubtree(Peekable>, KVPair<'b, T>),
/// Occupied holder, for a Matcher and a value. The empty variant is
/// omitted as it would never be used otherwise.
ValueSubtree(Peekable>, RefOwn<'b, T::Ref, T::Own>),
/// Occupied holder, for a value. The empty variant is omitted as it would
/// never be used otherwise.
Value(RefOwn<'b, T::Ref, T::Own>),
}
/// Helper enum for HolderState.
#[derive(Copy, Clone, Debug, Eq, PartialEq)]
enum HolderKind {
Key,
KeySubtree,
ValueSubtree,
Value
}
//impl<'a, 'b, T: PatternTypes> Clone for HolderState<'a, 'b, T> {
// fn clone(&self) -> Self {
// match self {
// HolderState::EmptyKey => HolderState::EmptyKey,
// HolderState::EmptySubtree => HolderState::EmptySubtree,
// HolderState::Key(v) => HolderState::Key(*v),
// HolderState::KeySubtree(m, v) => HolderState::KeySubtree(m.clone(), *v),
// HolderState::ValueSubtree(m, v) => HolderState::ValueSubtree(m.clone(), *v),
// HolderState::Value(v) => HolderState::Value(*v),
// }
// }
//}
impl<'a, 'b, T: PatternTypes> HolderState<'a, 'b, T> {
#[rustfmt::skip]
fn is_empty(&self) -> bool {
match self {
| HolderState::EmptyKey
| HolderState::EmptyKeySubtree
//| HolderState::EmptyValueSubtree
=> true, _ => false
}
}
fn has_value(&self) -> bool {
!self.is_empty()
}
fn kind(&self) -> HolderKind {
match self {
| HolderState::EmptyKey
| HolderState::Key(_)
=> HolderKind::Key,
| HolderState::EmptyKeySubtree
| HolderState::KeySubtree(_, _)
=> HolderKind::KeySubtree,
//| HolderState::EmptyValueSubtree
| HolderState::ValueSubtree(_, _)
=> HolderKind::ValueSubtree,
| HolderState::Value(_)
=> HolderKind::Value,
}
}
fn value(&self) -> Option> {
match *self {
HolderState::Key((_, value)) => Some(value),
HolderState::KeySubtree(_, (_, value)) => Some(value),
HolderState::ValueSubtree(_, value) => Some(value),
HolderState::Value(value) => Some(value),
_ => None
}
}
fn key(&self) -> Option> {
match *self {
HolderState::Key((key, _)) => Some(key),
HolderState::KeySubtree(_, (key, _)) => Some(key),
_ => None
}
}
fn pair(&self) -> Option> {
match *self {
HolderState::Key(pair) => Some(pair),
HolderState::KeySubtree(_, pair) => Some(pair),
_ => None
}
}
fn subtree(&mut self) -> Option<&mut Peekable>> {
match *self {
HolderState::KeySubtree(ref mut subtree, _) => Some(subtree),
HolderState::ValueSubtree(ref mut subtree, _) => Some(subtree),
_ => None
}
}
fn clear(&mut self) {
*self = match self.kind() {
HolderKind::Key => HolderState::EmptyKey,
HolderKind::KeySubtree => HolderState::EmptyKeySubtree,
HolderKind::ValueSubtree => unreachable!(), //HolderState::EmptyValueSubtree,
HolderKind::Value => unreachable!(),
};
assert!(self.is_empty());
}
}
/// Stores a single match and associated metadata.
///
/// A single match is generally a key-value pair, but may be a collection of
/// named pairs in the case of subtree matches, or just a value for the initial
/// holder.
struct Holder<'a, 'b, T: PatternTypes> {
name: Option<&'a str>,
value: HolderState<'a, 'b, T>,
parent: Option>,
iterator: Option> + 'b>>,
filters: Vec Fn(&'c mut HolderState<'a, 'b, T>) -> Result<(), MatchError>) + 'a>>,
}
impl<'a, 'b, T: PatternTypes> Holder<'a, 'b, T> {
fn next(&mut self) -> Result {
self.ensure_iterator()?;
if let Self {
value: ref mut v,
iterator: Some(ref mut it),
ref filters,
..
} = self {
// check if we're in a subtree and (not) done.
if let Some(matcher) = v.subtree() {
if let Some(res) = matcher.peek() {
// report any errors
return res.as_ref().map(|_| true).map_err(|e| e.clone());
}
}
let kind = v.kind();
let mut next_v;
loop {
next_v = match it.next() {
Some(pair) => HolderState::Key(pair),
None => return Ok(false)
};
for filter in filters {
filter(&mut next_v)?;
if next_v.is_empty() {
break;
}
}
if next_v.has_value() {
break;
}
}
assert!(next_v.has_value());
assert_eq!(next_v.kind(), kind);
*v = next_v;
Ok(true)
} else {
unreachable!()
}
}
/// Ensure `self.iterator.is_some()`, creating an iterator if necessary.
fn ensure_iterator(&mut self) -> Result<(), MatchError> {
if self.iterator.is_none() {
let iter = T::pairs(self.parent.unwrap());
if iter.is_none() {
return Err(MatchError::UnsupportedOperation);
}
self.iterator = iter;
}
assert!(self.iterator.is_some());
Ok(())
}
}
impl<'a, 'b, T: PatternTypes> Default for Holder<'a, 'b, T> {
fn default() -> Self {
Self {
name: Default::default(),
value: HolderState::EmptyKey,
parent: Default::default(),
iterator: Default::default(),
filters: Default::default(),
}
}
}
pub struct Matcher<'a, 'b, T: PatternTypes> {
defs: &'a PatternConstants,
frame: Frame<'a, 'b, T>,
}
// TODO:
//
// [x] Arrow
// [x] StringKey
// [x] RegexKey
// [x] KeySubtree
// [x] ValueSubtree
// [x] Ident
// [x] Param (untested)
// [x] ApplyPredicate
// [x] End
/// Helper for `PatternElement::StringKey`.
fn on_string_key<'a, 'b, T: PatternTypes>(
matcher: &mut Matcher<'a, 'b, T>,
id: usize,
skippable: bool,
) -> Result {
let path = matcher.frame.path.last_mut().unwrap();
assert!(path.iterator.is_none());
let key = &matcher.defs.strings[id];
let iter = T::get(path.parent.unwrap(), RefOwn::Str(key));
match iter {
Some(None) if !skippable => Err(MatchError::ValidationError),
Some(opt) => {
path.iterator = Some(Box::new(opt.into_iter()));
Ok(true)
}
None => Err(MatchError::UnsupportedOperation),
}
}
/// Helper for `PatternElement::ParameterKey`.
fn on_parameter_key<'a, 'b, T: PatternTypes>(
matcher: &mut Matcher<'a, 'b, T>,
id: usize,
skippable: bool,
) -> Result {
let path = matcher.frame.path.last_mut().unwrap();
assert!(path.iterator.is_none());
let key = matcher.defs.defs[id];
let iter = T::get(path.parent.unwrap(), RefOwn::Own(key));
match iter {
Some(None) if !skippable => Err(MatchError::ValidationError),
Some(opt) => {
path.iterator = Some(Box::new(opt.into_iter()));
Ok(true)
}
None => Err(MatchError::UnsupportedOperation),
}
}
/// Helper for `PatternElement::RegexKey`.
fn on_regex_key<'a, 'b, T: PatternTypes>(
matcher: &mut Matcher<'a, 'b, T>,
id: usize,
skippable: bool,
) -> Result {
matcher.frame.path.last_mut().unwrap().ensure_iterator()?;
let re = &matcher.defs.regices[id];
let path = matcher.frame.path.last_mut().unwrap();
path.filters.push(Box::new(move |value| {
let s = T::as_str(value.key().unwrap());
match (s.map_or(false, |s| re.is_match(s)), skippable) {
(true, _) => Ok(()),
(false, true) => {
value.clear();
Ok(())
},
(false, false) => Err(MatchError::ValidationError),
}
}));
Ok(true)
}
/// Helper for `PatternElement::KeySubtree`.
fn on_key_subtree<'a, 'b, T: PatternTypes>(
matcher: &mut Matcher<'a, 'b, T>,
id: usize,
skippable: bool,
) -> Result {
let _ = skippable; // FIXME what should a skippable KeySubtree even do?!
matcher.frame.path.last_mut().unwrap().ensure_iterator()?;
let defs = matcher.defs;
let rlimit: usize = matcher.frame.depth;
let path = matcher.frame.path.last_mut().unwrap();
assert!(path.value.is_empty());
assert_eq!(path.value.kind(), HolderKind::Key);
path.value = HolderState::EmptyKeySubtree;
path.filters.push(Box::new(move |value| {
let key = value.key().unwrap();
let mut subtree = Matcher::new(key, defs, id, rlimit)?.peekable();
match subtree.peek() {
Some(&Ok(_)) => {
*value = HolderState::KeySubtree(subtree, value.pair().unwrap());
Ok(())
},
Some(&Err(ref e)) => {
Err(e.clone())
},
None => {
value.clear();
Ok(())
}
}
}));
Ok(true)
}
const DUMMY_OPS: &'static [PatternElement] = &[];
impl<'a, 'b, T: PatternTypes> Matcher<'a, 'b, T> {
pub(crate) fn new(obj: RefOwn<'b, T::Ref, T::Own>, defs: &'a PatternConstants, proto: usize, rlimit: usize) -> Result {
let ops: &[_] = &defs.protos[proto];
Self::with_ops(obj, defs, ops, rlimit)
}
/// Constructs a Matcher that yields a single dummy result.
fn with_ops(obj: RefOwn<'b, T::Ref, T::Own>, defs: &'a PatternConstants, ops: &'a [PatternElement], rlimit: usize) -> Result {
let depth = rlimit.checked_sub(1).ok_or(MatchError::StackOverflow)?;
Ok(Self {
defs: defs,
frame: Frame {
//obj: obj,
ops: ops,
iar: None,
depth: depth,
path: {
let mut holder = Holder::default();
holder.value = HolderState::Value(obj);
holder.iterator = Some(Box::new(std::iter::empty()));
vec![holder]
},
in_key: false,
},
})
}
fn on_in_key(&mut self) -> Result {
match self.frame.op() {
PatternElement::End => {
let path = self.frame.path.last_mut().unwrap();
if path.next()? {
Ok(false)
} else {
drop(path);
self.frame.path.pop().unwrap();
// stop at previous End, or start of frame
while self.frame.prev() {
if matches!(self.frame.op(), PatternElement::End) {
break;
}
}
// is start of frame?
if !self.frame.prev() {
self.frame.path.clear();
}
Ok(true)
}
},
PatternElement::ApplyPredicate(id, skippable) => {
// failing on T::get() is already handled, but we may need a
// T::pairs(). construct it here.
self.frame.path.last_mut().unwrap().ensure_iterator()?;
let pred = &self.defs.predicates[id];
let path = self.frame.path.last_mut().unwrap();
path.filters.push(Box::new(move |value| {
match (pred(value.value().unwrap()), skippable) {
(true, _) => Ok(()),
(false, true) => {
value.clear();
Ok(())
},
(false, false) => Err(MatchError::ValidationError),
}
}));
Ok(true)
},
PatternElement::StringKey(id, skippable) => {
on_string_key(self, id, skippable)
},
PatternElement::ParameterKey(id, skippable) => {
on_parameter_key(self, id, skippable)
},
PatternElement::RegexKey(id, skippable) => {
on_regex_key(self, id, skippable)
},
PatternElement::KeySubtree(id, skippable) => {
on_key_subtree(self, id, skippable)
},
_ => unreachable!("on_in_key")
}
}
fn on_not_in_key(&mut self) -> Result {
match self.frame.op() {
PatternElement::Arrow => {
// this *should* always pass.
assert!(self.frame.path.last().unwrap().iterator.is_some());
let mut holder = Holder::default();
holder.parent = self.frame.path.last().unwrap().value.value();
assert!(holder.parent.is_some());
self.frame.path.push(holder);
Ok(false)
},
PatternElement::Identifier(id) => {
let name = self.defs.strings.get(id).map(|s| &**s);
let path = self.frame.path.last_mut().unwrap();
path.name = name;
assert!(path.iterator.is_none());
// we don't actually create the iterator here,
// as we may still wanna use T::get() instead.
Ok(true)
},
PatternElement::ApplyPredicate(id, skippable) => {
assert!(self.frame.path.len() == 1);
let pred = &self.defs.predicates[id];
let value = self.frame.path.last().unwrap().value.value();
match (pred(value.unwrap()), skippable) {
(true, _) => Ok(false),
(false, true) => {
self.frame.path.clear();
// any Ok(_) will do
Ok(false)
},
(false, false) => Err(MatchError::ValidationError),
}
},
PatternElement::StringKey(id, skippable) => {
on_string_key(self, id, skippable)
},
PatternElement::ParameterKey(id, skippable) => {
on_parameter_key(self, id, skippable)
},
PatternElement::RegexKey(id, skippable) => {
on_regex_key(self, id, skippable)
},
PatternElement::KeySubtree(id, skippable) => {
on_key_subtree(self, id, skippable)
},
PatternElement::ValueSubtree(id, skippable) => {
let value = self.frame.path.last().unwrap().value.value().unwrap();
let mut subtree = Matcher::new(
value,
self.defs,
id,
self.frame.depth
)?.peekable();
let mut dummy = Matcher::with_ops(
value,
self.defs,
DUMMY_OPS,
self.frame.depth
)?.peekable();
// may panic.
let peeked = subtree.peek();
// shouldn't panic.
let _ = dummy.peek();
// push Holder after peek.
self.frame.path.push(Holder::default());
let mut holder = self.frame.path.last_mut().unwrap();
holder.parent = Some(value);
holder.iterator = Some(Box::new(std::iter::empty()));
match peeked {
None if skippable => {
holder.value = HolderState::ValueSubtree(dummy, value);
Ok(true)
},
Some(&Ok(_)) | None => {
drop(peeked);
holder.value = HolderState::ValueSubtree(subtree, value);
Ok(true)
},
Some(&Err(ref e)) => {
Err(e.clone())
},
}
},
_ => unreachable!("on_not_in_key")
}
}
fn collect_results(&mut self) -> Matches<'a, 'b, T> {
let mut res: Matches<'a, 'b, T> = Default::default();
for holder in &mut self.frame.path {
// make sure it's not empty.
assert!(holder.value.has_value());
// handle subtrees.
if let Some(matcher) = holder.value.subtree() {
if let Some(matches) = matcher.next() {
// NOTE: we have checked these already.
// (and if we haven't, that's a bug.)
res.extend(matches.unwrap());
}
}
// handle pairs.
if let Some(pair) = holder.value.pair() {
if let Some(name) = holder.name {
res.insert(name, pair);
}
}
}
res
}
fn on_end(&mut self) -> (bool, Matches<'a, 'b, T>) {
match self.frame.op() {
PatternElement::End => {
assert!(!self.frame.path.last().expect("path").value.is_empty());
let res = self.collect_results();
if !self.frame.prev() {
// NOTE: frame.prev() must always be called, even if this
// gets replaced with debug_assert!() in the future.
assert!(false, "frame.prev()");
}
(true, res)
}
PatternElement::ApplyPredicate {..} => {
assert!(!self.frame.in_key);
let res = self.collect_results();
self.frame.path.clear();
(false, res)
}
_ => unreachable!("on_end")
}
}
}
impl<'a, 'b, T: PatternTypes> Iterator for Matcher<'a, 'b, T> {
type Item = Result>, MatchError>;
fn next(&mut self) -> Option {
if self.frame.ops.is_empty() {
if !self.frame.path.is_empty() {
self.frame.path.clear();
return Some(Ok(Default::default()));
}
}
while !self.frame.path.is_empty() {
if !self.frame.next() {
let (in_key, res) = self.on_end();
self.frame.in_key = in_key;
return Some(Ok(res));
} else {
let in_key = if self.frame.in_key {
self.on_in_key()
} else {
self.on_not_in_key()
};
match in_key {
Ok(in_key) => self.frame.in_key = in_key,
Err(e) => {
self.frame.path.clear();
return Some(Err(e))
},
}
}
}
None
}
}