From bcdba3431c72cd0804d9a95972a907b828fb5fad Mon Sep 17 00:00:00 2001 From: SoniEx2 Date: Tue, 12 Jan 2021 22:15:26 -0300 Subject: Prepare VM design --- src/errors.rs | 8 ++ src/lib.rs | 16 ++++ src/parser.rs | 10 ++ src/pattern.rs | 22 +++++ src/vm.rs | 292 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 5 files changed, 348 insertions(+) create mode 100644 src/errors.rs create mode 100644 src/lib.rs create mode 100644 src/parser.rs create mode 100644 src/pattern.rs create mode 100644 src/vm.rs (limited to 'src') diff --git a/src/errors.rs b/src/errors.rs new file mode 100644 index 0000000..4fc2144 --- /dev/null +++ b/src/errors.rs @@ -0,0 +1,8 @@ +pub struct PatternError; + +#[derive(Clone)] +pub enum MatchError { + StackOverflow, + ValidationError, + Other +} diff --git a/src/lib.rs b/src/lib.rs new file mode 100644 index 0000000..9e84de1 --- /dev/null +++ b/src/lib.rs @@ -0,0 +1,16 @@ +pub mod errors; +mod parser; +mod pattern; +mod vm; + +pub use pattern::Pattern; + +// TODO +pub trait PatternTypes { + /// The value type. + type Value; + type Iter; +} + +// TODO +pub type Predicate = dyn (Fn(&::Value) -> bool) + Send + Sync; diff --git a/src/parser.rs b/src/parser.rs new file mode 100644 index 0000000..0a7d858 --- /dev/null +++ b/src/parser.rs @@ -0,0 +1,10 @@ +// TODO + +use crate::PatternTypes; +use crate::errors::PatternError; +use crate::vm::PatternConstants; +//use crate::vm::PatternElement; + +pub(crate) fn parse(s: &str) -> Result, PatternError> { + unimplemented!() +} diff --git a/src/pattern.rs b/src/pattern.rs new file mode 100644 index 0000000..8c57f00 --- /dev/null +++ b/src/pattern.rs @@ -0,0 +1,22 @@ +use crate::PatternTypes; +use crate::errors::PatternError; +use crate::parser::parse; +use crate::vm::Matcher; +use crate::vm::PatternConstants; +use crate::vm::MAX_CALLS; + +pub struct Pattern { + constants: PatternConstants, +} + +impl Pattern { + pub fn compile(s: &str) -> Result { + Ok(Self { + constants: parse(s)? + }) + } + + pub fn attempt_match<'a, 'b>(&'a self, value: &'b T::Value) -> Matcher<'a, 'b, T> { + Matcher::new(value, &self.constants, self.constants.protos.len() - 1, MAX_CALLS).ok().expect("datafu internal error: MAX_CALLS must not be 0") + } +} diff --git a/src/vm.rs b/src/vm.rs new file mode 100644 index 0000000..3131571 --- /dev/null +++ b/src/vm.rs @@ -0,0 +1,292 @@ +use crate::PatternTypes; +use crate::Predicate; +use crate::errors::MatchError; + +use std::collections::BTreeMap; +use std::marker::PhantomData; + +pub(crate) const MAX_CALLS: usize = 250; + +type Matches<'a, 'b, T> = BTreeMap<&'a str, (&'b ::Value, &'b ::Value)>; + +// TODO: use a builder for this? +/// The constant pool for a pattern. +pub(crate) struct PatternConstants { + // last proto is implicitly the whole pattern. + pub(crate) protos: Vec>>, + // Note that we can borrow these when creating the output map. + // https://play.rust-lang.org/?version=stable&mode=debug&edition=2018&gist=da26f9175e96273fa0b94971a4e6172f + pub(crate) strings: Vec, + pub(crate) regices: Vec<()/* TODO */>, + pub(crate) predicates: Vec>>, +} + +/// A pattern element. +pub(crate) enum PatternElement { + Arrow, + + Identifier(usize), + + StringKey(usize, bool), + RegexKey(usize, bool), + ParameterKey(usize, bool), + KeySubtree(usize, bool), + ValueSubtree(usize, bool), + ApplyPredicate(usize, bool, PhantomData) -> &Predicate>), + + End +} + +impl Copy for PatternElement {} + +impl Clone for PatternElement { + fn clone(&self) -> Self { *self } +} + +struct Frame<'a, 'b, T: PatternTypes> { + obj: &'b T::Value, + ops: &'a Vec>, + iar: Option, + depth: usize, + path: Vec>, + in_key: bool, +} + +impl<'a, 'b, T: PatternTypes> Frame<'a, 'b, T> { + /// Advances the instruction address register. + /// + /// # Returns + /// + /// `true` if successful, `false` otherwise. + fn next(&mut self) -> bool { + let new = self.iar.map_or(0, |v| v + 1); + new < self.ops.len() && { + self.iar = Some(new); + true + } + } + + /// Returns the current instruction. + fn op(&self) -> PatternElement { + self.ops[self.iar.expect("ops[iar]")] + } + + /// Rewinds the instruction address register. + /// + /// # Returns + /// + /// `true` if successful, `false` otherwise. + fn prev(&mut self) -> bool { + let new = self.iar.expect("iar").checked_sub(1); + new.is_some() && { + self.iar = new; + true + } + } +} + +/// Stores a single match. +/// +/// See also Holder. +enum HolderState<'a, 'b, T: PatternTypes> { + EmptyKey, + EmptySubtree, + Key((&'b T::Value, &'b T::Value)), + Subtree(Matches<'a, 'b, T>, &'b T::Value), +} + +impl<'a, 'b, T: PatternTypes> Clone for HolderState<'a, 'b, T> { + fn clone(&self) -> Self { + match self { + HolderState::EmptyKey => HolderState::EmptyKey, + HolderState::EmptySubtree => HolderState::EmptySubtree, + HolderState::Key(v) => HolderState::Key(*v), + HolderState::Subtree(m, v) => HolderState::Subtree(m.clone(), *v), + } + } +} + +impl<'a, 'b, T: PatternTypes> HolderState<'a, 'b, T> { + fn is_empty(&self) -> bool { + matches!(self, HolderState::EmptyKey | HolderState::EmptySubtree) + } + + fn is_subtree(&self) -> bool { + matches!(self, HolderState::EmptySubtree | HolderState::Subtree {..}) + } + + fn value(&self) -> Option<&'b T::Value> { + match self { + HolderState::Key((_, value)) => Some(value), + HolderState::Subtree(_, value) => Some(value), + _ => None + } + } +} + +/// Stores a single match and associated metadata. +/// +/// A single match is generally a key-value pair, but may be a collection of +/// named pairs in the case of subtree matches. +struct Holder<'a, 'b, T: PatternTypes> { + name: Option<&'a str>, + value: HolderState<'a, 'b, T>, + parent: Option<&'b T::Value>, + //iterator: Box, MatchError>> + Capture<'a> + Capture<'b>>, + iterator: Box Option, MatchError>>>, + //iterator: T::Iter, +} + +impl<'a, 'b, T: PatternTypes> Holder<'a, 'b, T> { + fn next(&mut self) -> Option> { + if let Self { value: ref mut v, iterator: ref mut it, .. } = self { + let is_subtree = v.is_subtree(); + *v = match it() { + Some(Ok(pair)) => pair, + Some(Err(e)) => return Some(Err(e)), + None => return None + }; + // just try to make sure the type doesn't change. + // (and that if we get to this point the result isn't empty.) + assert!(!v.is_empty() && v.is_subtree() == is_subtree); + } + Some(Ok(())) + } +} + +impl<'a, 'b, T: PatternTypes> Default for Holder<'a, 'b, T> { + fn default() -> Self { + Self { + name: Default::default(), + value: HolderState::EmptyKey, + parent: Default::default(), + iterator: Box::new(|| None), + } + } +} + +pub struct Matcher<'a, 'b, T: PatternTypes> { + defs: &'a PatternConstants, + frame: Frame<'a, 'b, T>, +} + +impl<'a, 'b, T: PatternTypes> Matcher<'a, 'b, T> { + pub(crate) fn new(obj: &'b T::Value, defs: &'a PatternConstants, proto: usize, rlimit: usize) -> Result { + let depth = rlimit.checked_sub(1).ok_or(MatchError::StackOverflow)?; + let ops: &Vec<_> = &defs.protos[proto]; + Ok(Self { + defs: defs, + frame: Frame { + obj: obj, + ops: ops, + iar: None, + depth: depth, + path: if ops.is_empty() { + Default::default() + } else { + vec![Holder::default()] + }, + in_key: false, + }, + }) + } + + fn on_in_key(&mut self) -> Result { + match self.frame.op() { + PatternElement::End => { + unimplemented!() + } + _ => unreachable!("on_in_key") + } + } + + fn on_not_in_key(&mut self) -> Result { + match self.frame.op() { + _ => unreachable!("on_not_in_key") + } + } + + fn on_end(&mut self) -> Result<(bool, Matches<'a, 'b, T>), MatchError> { + match self.frame.op() { + PatternElement::End => { + assert!(!self.frame.path.last().expect("path").value.is_empty()); + // TODO this for loop is duplicated with ApplyPredicate + // TODO figure out how to deduplicate them + let mut res: Matches<'a, 'b, T> = Default::default(); + for holder in &self.frame.path { + match holder.value { + HolderState::Subtree(ref matches, _) => { + res.extend(matches); + }, + HolderState::Key(pair) => { + if let Some(name) = holder.name { + res.insert(name, pair); + } + }, + _ => unreachable!("on_end (End)"), + } + } + if !self.frame.prev() { + // frame.prev() should always be called, even if this gets + // replaced with debug_assert!(). + assert!(false, "frame.prev()"); + } + Ok((true, res)) + }, + PatternElement::ApplyPredicate {..} => { + assert!(!self.frame.in_key); + let mut res: Matches<'a, 'b, T> = Default::default(); + for holder in &self.frame.path { + match holder.value { + HolderState::Subtree(ref matches, _) => { + res.extend(matches); + }, + HolderState::Key(pair) => { + if let Some(name) = holder.name { + res.insert(name, pair); + } + }, + _ => unreachable!("on_end (ApplyPredicate)"), + } + } + self.frame.path.clear(); + Ok((false, res)) + } + _ => unreachable!("on_end") + } + } +} + +impl<'a, 'b, T: PatternTypes> Iterator for Matcher<'a, 'b, T> { + type Item = Result, MatchError>; + + fn next(&mut self) -> Option { + while !self.frame.path.is_empty() { + if !self.frame.next() { + let (in_key, res) = match self.on_end() { + Ok(v) => v, + Err(e) => { + self.frame.path.clear(); + return Some(Err(e)) + }, + }; + self.frame.in_key = in_key; + return Some(Ok(res)); + } else { + let in_key = if self.frame.in_key { + self.on_in_key() + } else { + self.on_not_in_key() + }; + match in_key { + Ok(in_key) => self.frame.in_key = in_key, + Err(e) => { + self.frame.path.clear(); + return Some(Err(e)) + }, + } + } + } + None + } +} -- cgit 1.4.1