diff options
author | SoniEx2 <endermoneymod@gmail.com> | 2021-01-12 22:15:26 -0300 |
---|---|---|
committer | SoniEx2 <endermoneymod@gmail.com> | 2021-01-12 22:15:26 -0300 |
commit | bcdba3431c72cd0804d9a95972a907b828fb5fad (patch) | |
tree | 210f263e64f5e90726fcf92ec60eb088711787d4 /src |
Prepare VM design
Diffstat (limited to 'src')
-rw-r--r-- | src/errors.rs | 8 | ||||
-rw-r--r-- | src/lib.rs | 16 | ||||
-rw-r--r-- | src/parser.rs | 10 | ||||
-rw-r--r-- | src/pattern.rs | 22 | ||||
-rw-r--r-- | src/vm.rs | 292 |
5 files changed, 348 insertions, 0 deletions
diff --git a/src/errors.rs b/src/errors.rs new file mode 100644 index 0000000..4fc2144 --- /dev/null +++ b/src/errors.rs @@ -0,0 +1,8 @@ +pub struct PatternError; + +#[derive(Clone)] +pub enum MatchError { + StackOverflow, + ValidationError, + Other +} diff --git a/src/lib.rs b/src/lib.rs new file mode 100644 index 0000000..9e84de1 --- /dev/null +++ b/src/lib.rs @@ -0,0 +1,16 @@ +pub mod errors; +mod parser; +mod pattern; +mod vm; + +pub use pattern::Pattern; + +// TODO +pub trait PatternTypes { + /// The value type. + type Value; + type Iter; +} + +// TODO +pub type Predicate<T> = dyn (Fn(&<T as PatternTypes>::Value) -> bool) + Send + Sync; diff --git a/src/parser.rs b/src/parser.rs new file mode 100644 index 0000000..0a7d858 --- /dev/null +++ b/src/parser.rs @@ -0,0 +1,10 @@ +// TODO + +use crate::PatternTypes; +use crate::errors::PatternError; +use crate::vm::PatternConstants; +//use crate::vm::PatternElement; + +pub(crate) fn parse<T: PatternTypes>(s: &str) -> Result<PatternConstants<T>, PatternError> { + unimplemented!() +} diff --git a/src/pattern.rs b/src/pattern.rs new file mode 100644 index 0000000..8c57f00 --- /dev/null +++ b/src/pattern.rs @@ -0,0 +1,22 @@ +use crate::PatternTypes; +use crate::errors::PatternError; +use crate::parser::parse; +use crate::vm::Matcher; +use crate::vm::PatternConstants; +use crate::vm::MAX_CALLS; + +pub struct Pattern<T: PatternTypes> { + constants: PatternConstants<T>, +} + +impl<T: PatternTypes> Pattern<T> { + pub fn compile(s: &str) -> Result<Self, PatternError> { + Ok(Self { + constants: parse(s)? + }) + } + + pub fn attempt_match<'a, 'b>(&'a self, value: &'b T::Value) -> Matcher<'a, 'b, T> { + Matcher::new(value, &self.constants, self.constants.protos.len() - 1, MAX_CALLS).ok().expect("datafu internal error: MAX_CALLS must not be 0") + } +} diff --git a/src/vm.rs b/src/vm.rs new file mode 100644 index 0000000..3131571 --- /dev/null +++ b/src/vm.rs @@ -0,0 +1,292 @@ +use crate::PatternTypes; +use crate::Predicate; +use crate::errors::MatchError; + +use std::collections::BTreeMap; +use std::marker::PhantomData; + +pub(crate) const MAX_CALLS: usize = 250; + +type Matches<'a, 'b, T> = BTreeMap<&'a str, (&'b <T as PatternTypes>::Value, &'b <T as PatternTypes>::Value)>; + +// TODO: use a builder for this? +/// The constant pool for a pattern. +pub(crate) struct PatternConstants<T: PatternTypes> { + // last proto is implicitly the whole pattern. + pub(crate) protos: Vec<Vec<PatternElement<T>>>, + // Note that we can borrow these when creating the output map. + // https://play.rust-lang.org/?version=stable&mode=debug&edition=2018&gist=da26f9175e96273fa0b94971a4e6172f + pub(crate) strings: Vec<String>, + pub(crate) regices: Vec<()/* TODO */>, + pub(crate) predicates: Vec<Box<Predicate<T>>>, +} + +/// A pattern element. +pub(crate) enum PatternElement<T: PatternTypes> { + Arrow, + + Identifier(usize), + + StringKey(usize, bool), + RegexKey(usize, bool), + ParameterKey(usize, bool), + KeySubtree(usize, bool), + ValueSubtree(usize, bool), + ApplyPredicate(usize, bool, PhantomData<fn(&PatternConstants<T>) -> &Predicate<T>>), + + End +} + +impl<T: PatternTypes> Copy for PatternElement<T> {} + +impl<T: PatternTypes> Clone for PatternElement<T> { + fn clone(&self) -> Self { *self } +} + +struct Frame<'a, 'b, T: PatternTypes> { + obj: &'b T::Value, + ops: &'a Vec<PatternElement<T>>, + iar: Option<usize>, + depth: usize, + path: Vec<Holder<'a, 'b, T>>, + in_key: bool, +} + +impl<'a, 'b, T: PatternTypes> Frame<'a, 'b, T> { + /// Advances the instruction address register. + /// + /// # Returns + /// + /// `true` if successful, `false` otherwise. + fn next(&mut self) -> bool { + let new = self.iar.map_or(0, |v| v + 1); + new < self.ops.len() && { + self.iar = Some(new); + true + } + } + + /// Returns the current instruction. + fn op(&self) -> PatternElement<T> { + self.ops[self.iar.expect("ops[iar]")] + } + + /// Rewinds the instruction address register. + /// + /// # Returns + /// + /// `true` if successful, `false` otherwise. + fn prev(&mut self) -> bool { + let new = self.iar.expect("iar").checked_sub(1); + new.is_some() && { + self.iar = new; + true + } + } +} + +/// Stores a single match. +/// +/// See also Holder. +enum HolderState<'a, 'b, T: PatternTypes> { + EmptyKey, + EmptySubtree, + Key((&'b T::Value, &'b T::Value)), + Subtree(Matches<'a, 'b, T>, &'b T::Value), +} + +impl<'a, 'b, T: PatternTypes> Clone for HolderState<'a, 'b, T> { + fn clone(&self) -> Self { + match self { + HolderState::EmptyKey => HolderState::EmptyKey, + HolderState::EmptySubtree => HolderState::EmptySubtree, + HolderState::Key(v) => HolderState::Key(*v), + HolderState::Subtree(m, v) => HolderState::Subtree(m.clone(), *v), + } + } +} + +impl<'a, 'b, T: PatternTypes> HolderState<'a, 'b, T> { + fn is_empty(&self) -> bool { + matches!(self, HolderState::EmptyKey | HolderState::EmptySubtree) + } + + fn is_subtree(&self) -> bool { + matches!(self, HolderState::EmptySubtree | HolderState::Subtree {..}) + } + + fn value(&self) -> Option<&'b T::Value> { + match self { + HolderState::Key((_, value)) => Some(value), + HolderState::Subtree(_, value) => Some(value), + _ => None + } + } +} + +/// Stores a single match and associated metadata. +/// +/// A single match is generally a key-value pair, but may be a collection of +/// named pairs in the case of subtree matches. +struct Holder<'a, 'b, T: PatternTypes> { + name: Option<&'a str>, + value: HolderState<'a, 'b, T>, + parent: Option<&'b T::Value>, + //iterator: Box<dyn Iterator<Item=Result<HolderState<'a, 'b, T>, MatchError>> + Capture<'a> + Capture<'b>>, + iterator: Box<dyn FnMut() -> Option<Result<HolderState<'a, 'b, T>, MatchError>>>, + //iterator: T::Iter, +} + +impl<'a, 'b, T: PatternTypes> Holder<'a, 'b, T> { + fn next(&mut self) -> Option<Result<(), MatchError>> { + if let Self { value: ref mut v, iterator: ref mut it, .. } = self { + let is_subtree = v.is_subtree(); + *v = match it() { + Some(Ok(pair)) => pair, + Some(Err(e)) => return Some(Err(e)), + None => return None + }; + // just try to make sure the type doesn't change. + // (and that if we get to this point the result isn't empty.) + assert!(!v.is_empty() && v.is_subtree() == is_subtree); + } + Some(Ok(())) + } +} + +impl<'a, 'b, T: PatternTypes> Default for Holder<'a, 'b, T> { + fn default() -> Self { + Self { + name: Default::default(), + value: HolderState::EmptyKey, + parent: Default::default(), + iterator: Box::new(|| None), + } + } +} + +pub struct Matcher<'a, 'b, T: PatternTypes> { + defs: &'a PatternConstants<T>, + frame: Frame<'a, 'b, T>, +} + +impl<'a, 'b, T: PatternTypes> Matcher<'a, 'b, T> { + pub(crate) fn new(obj: &'b T::Value, defs: &'a PatternConstants<T>, proto: usize, rlimit: usize) -> Result<Self, MatchError> { + let depth = rlimit.checked_sub(1).ok_or(MatchError::StackOverflow)?; + let ops: &Vec<_> = &defs.protos[proto]; + Ok(Self { + defs: defs, + frame: Frame { + obj: obj, + ops: ops, + iar: None, + depth: depth, + path: if ops.is_empty() { + Default::default() + } else { + vec![Holder::default()] + }, + in_key: false, + }, + }) + } + + fn on_in_key(&mut self) -> Result<bool, MatchError> { + match self.frame.op() { + PatternElement::End => { + unimplemented!() + } + _ => unreachable!("on_in_key") + } + } + + fn on_not_in_key(&mut self) -> Result<bool, MatchError> { + match self.frame.op() { + _ => unreachable!("on_not_in_key") + } + } + + fn on_end(&mut self) -> Result<(bool, Matches<'a, 'b, T>), MatchError> { + match self.frame.op() { + PatternElement::End => { + assert!(!self.frame.path.last().expect("path").value.is_empty()); + // TODO this for loop is duplicated with ApplyPredicate + // TODO figure out how to deduplicate them + let mut res: Matches<'a, 'b, T> = Default::default(); + for holder in &self.frame.path { + match holder.value { + HolderState::Subtree(ref matches, _) => { + res.extend(matches); + }, + HolderState::Key(pair) => { + if let Some(name) = holder.name { + res.insert(name, pair); + } + }, + _ => unreachable!("on_end (End)"), + } + } + if !self.frame.prev() { + // frame.prev() should always be called, even if this gets + // replaced with debug_assert!(). + assert!(false, "frame.prev()"); + } + Ok((true, res)) + }, + PatternElement::ApplyPredicate {..} => { + assert!(!self.frame.in_key); + let mut res: Matches<'a, 'b, T> = Default::default(); + for holder in &self.frame.path { + match holder.value { + HolderState::Subtree(ref matches, _) => { + res.extend(matches); + }, + HolderState::Key(pair) => { + if let Some(name) = holder.name { + res.insert(name, pair); + } + }, + _ => unreachable!("on_end (ApplyPredicate)"), + } + } + self.frame.path.clear(); + Ok((false, res)) + } + _ => unreachable!("on_end") + } + } +} + +impl<'a, 'b, T: PatternTypes> Iterator for Matcher<'a, 'b, T> { + type Item = Result<BTreeMap<&'a str, (&'b T::Value, &'b T::Value)>, MatchError>; + + fn next(&mut self) -> Option<Self::Item> { + while !self.frame.path.is_empty() { + if !self.frame.next() { + let (in_key, res) = match self.on_end() { + Ok(v) => v, + Err(e) => { + self.frame.path.clear(); + return Some(Err(e)) + }, + }; + self.frame.in_key = in_key; + return Some(Ok(res)); + } else { + let in_key = if self.frame.in_key { + self.on_in_key() + } else { + self.on_not_in_key() + }; + match in_key { + Ok(in_key) => self.frame.in_key = in_key, + Err(e) => { + self.frame.path.clear(); + return Some(Err(e)) + }, + } + } + } + None + } +} |