summary refs log tree commit diff stats
diff options
context:
space:
mode:
-rw-r--r--.gitignore2
-rw-r--r--Cargo.toml9
-rw-r--r--src/errors.rs8
-rw-r--r--src/lib.rs16
-rw-r--r--src/parser.rs10
-rw-r--r--src/pattern.rs22
-rw-r--r--src/vm.rs292
7 files changed, 359 insertions, 0 deletions
diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..96ef6c0
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,2 @@
+/target
+Cargo.lock
diff --git a/Cargo.toml b/Cargo.toml
new file mode 100644
index 0000000..a6ddb2b
--- /dev/null
+++ b/Cargo.toml
@@ -0,0 +1,9 @@
+[package]
+name = "datafu"
+version = "0.1.0"
+authors = ["SoniEx2 <endermoneymod@gmail.com>"]
+edition = "2015"
+
+# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
+
+[dependencies]
diff --git a/src/errors.rs b/src/errors.rs
new file mode 100644
index 0000000..4fc2144
--- /dev/null
+++ b/src/errors.rs
@@ -0,0 +1,8 @@
+pub struct PatternError;
+
+#[derive(Clone)]
+pub enum MatchError {
+    StackOverflow,
+    ValidationError,
+    Other
+}
diff --git a/src/lib.rs b/src/lib.rs
new file mode 100644
index 0000000..9e84de1
--- /dev/null
+++ b/src/lib.rs
@@ -0,0 +1,16 @@
+pub mod errors;
+mod parser;
+mod pattern;
+mod vm;
+
+pub use pattern::Pattern;
+
+// TODO
+pub trait PatternTypes {
+    /// The value type.
+    type Value;
+    type Iter;
+}
+
+// TODO
+pub type Predicate<T> = dyn (Fn(&<T as PatternTypes>::Value) -> bool) + Send + Sync;
diff --git a/src/parser.rs b/src/parser.rs
new file mode 100644
index 0000000..0a7d858
--- /dev/null
+++ b/src/parser.rs
@@ -0,0 +1,10 @@
+// TODO
+
+use crate::PatternTypes;
+use crate::errors::PatternError;
+use crate::vm::PatternConstants;
+//use crate::vm::PatternElement;
+
+pub(crate) fn parse<T: PatternTypes>(s: &str) -> Result<PatternConstants<T>, PatternError> {
+    unimplemented!()
+}
diff --git a/src/pattern.rs b/src/pattern.rs
new file mode 100644
index 0000000..8c57f00
--- /dev/null
+++ b/src/pattern.rs
@@ -0,0 +1,22 @@
+use crate::PatternTypes;
+use crate::errors::PatternError;
+use crate::parser::parse;
+use crate::vm::Matcher;
+use crate::vm::PatternConstants;
+use crate::vm::MAX_CALLS;
+
+pub struct Pattern<T: PatternTypes> {
+    constants: PatternConstants<T>,
+}
+
+impl<T: PatternTypes> Pattern<T> {
+    pub fn compile(s: &str) -> Result<Self, PatternError> {
+        Ok(Self {
+            constants: parse(s)?
+        })
+    }
+
+    pub fn attempt_match<'a, 'b>(&'a self, value: &'b T::Value) -> Matcher<'a, 'b, T> {
+        Matcher::new(value, &self.constants, self.constants.protos.len() - 1, MAX_CALLS).ok().expect("datafu internal error: MAX_CALLS must not be 0")
+    }
+}
diff --git a/src/vm.rs b/src/vm.rs
new file mode 100644
index 0000000..3131571
--- /dev/null
+++ b/src/vm.rs
@@ -0,0 +1,292 @@
+use crate::PatternTypes;
+use crate::Predicate;
+use crate::errors::MatchError;
+
+use std::collections::BTreeMap;
+use std::marker::PhantomData;
+
+pub(crate) const MAX_CALLS: usize = 250;
+
+type Matches<'a, 'b, T> = BTreeMap<&'a str, (&'b <T as PatternTypes>::Value, &'b <T as PatternTypes>::Value)>;
+
+// TODO: use a builder for this?
+/// The constant pool for a pattern.
+pub(crate) struct PatternConstants<T: PatternTypes> {
+    // last proto is implicitly the whole pattern.
+    pub(crate) protos: Vec<Vec<PatternElement<T>>>,
+    // Note that we can borrow these when creating the output map.
+    // https://play.rust-lang.org/?version=stable&mode=debug&edition=2018&gist=da26f9175e96273fa0b94971a4e6172f
+    pub(crate) strings: Vec<String>,
+    pub(crate) regices: Vec<()/* TODO */>,
+    pub(crate) predicates: Vec<Box<Predicate<T>>>,
+}
+
+/// A pattern element.
+pub(crate) enum PatternElement<T: PatternTypes> {
+    Arrow,
+
+    Identifier(usize),
+
+    StringKey(usize, bool),
+    RegexKey(usize, bool),
+    ParameterKey(usize, bool),
+    KeySubtree(usize, bool),
+    ValueSubtree(usize, bool),
+    ApplyPredicate(usize, bool, PhantomData<fn(&PatternConstants<T>) -> &Predicate<T>>),
+
+    End
+}
+
+impl<T: PatternTypes> Copy for PatternElement<T> {}
+
+impl<T: PatternTypes> Clone for PatternElement<T> {
+    fn clone(&self) -> Self { *self }
+}
+
+struct Frame<'a, 'b, T: PatternTypes> {
+    obj: &'b T::Value,
+    ops: &'a Vec<PatternElement<T>>,
+    iar: Option<usize>,
+    depth: usize,
+    path: Vec<Holder<'a, 'b, T>>,
+    in_key: bool,
+}
+
+impl<'a, 'b, T: PatternTypes> Frame<'a, 'b, T> {
+    /// Advances the instruction address register.
+    ///
+    /// # Returns
+    ///
+    /// `true` if successful, `false` otherwise.
+    fn next(&mut self) -> bool {
+        let new = self.iar.map_or(0, |v| v + 1);
+        new < self.ops.len() && {
+            self.iar = Some(new);
+            true
+        }
+    }
+
+    /// Returns the current instruction.
+    fn op(&self) -> PatternElement<T> {
+        self.ops[self.iar.expect("ops[iar]")]
+    }
+
+    /// Rewinds the instruction address register.
+    ///
+    /// # Returns
+    ///
+    /// `true` if successful, `false` otherwise.
+    fn prev(&mut self) -> bool {
+        let new = self.iar.expect("iar").checked_sub(1);
+        new.is_some() && {
+            self.iar = new;
+            true
+        }
+    }
+}
+
+/// Stores a single match.
+///
+/// See also Holder.
+enum HolderState<'a, 'b, T: PatternTypes> {
+    EmptyKey,
+    EmptySubtree,
+    Key((&'b T::Value, &'b T::Value)),
+    Subtree(Matches<'a, 'b, T>, &'b T::Value),
+}
+
+impl<'a, 'b, T: PatternTypes> Clone for HolderState<'a, 'b, T> {
+    fn clone(&self) -> Self {
+        match self {
+            HolderState::EmptyKey => HolderState::EmptyKey,
+            HolderState::EmptySubtree => HolderState::EmptySubtree,
+            HolderState::Key(v) => HolderState::Key(*v),
+            HolderState::Subtree(m, v) => HolderState::Subtree(m.clone(), *v),
+        }
+    }
+}
+
+impl<'a, 'b, T: PatternTypes> HolderState<'a, 'b, T> {
+    fn is_empty(&self) -> bool {
+        matches!(self, HolderState::EmptyKey | HolderState::EmptySubtree)
+    }
+
+    fn is_subtree(&self) -> bool {
+        matches!(self, HolderState::EmptySubtree | HolderState::Subtree {..})
+    }
+
+    fn value(&self) -> Option<&'b T::Value> {
+        match self {
+            HolderState::Key((_, value)) => Some(value),
+            HolderState::Subtree(_, value) => Some(value),
+            _ => None
+        }
+    }
+}
+
+/// Stores a single match and associated metadata.
+///
+/// A single match is generally a key-value pair, but may be a collection of
+/// named pairs in the case of subtree matches.
+struct Holder<'a, 'b, T: PatternTypes> {
+     name: Option<&'a str>,
+     value: HolderState<'a, 'b, T>,
+     parent: Option<&'b T::Value>,
+     //iterator: Box<dyn Iterator<Item=Result<HolderState<'a, 'b, T>, MatchError>> + Capture<'a> + Capture<'b>>,
+     iterator: Box<dyn FnMut() -> Option<Result<HolderState<'a, 'b, T>, MatchError>>>,
+     //iterator: T::Iter,
+}
+
+impl<'a, 'b, T: PatternTypes> Holder<'a, 'b, T> {
+    fn next(&mut self) -> Option<Result<(), MatchError>> {
+        if let Self { value: ref mut v, iterator: ref mut it, .. } = self {
+            let is_subtree = v.is_subtree();
+            *v = match it() {
+                Some(Ok(pair)) => pair,
+                Some(Err(e)) => return Some(Err(e)),
+                None => return None
+            };
+            // just try to make sure the type doesn't change.
+            // (and that if we get to this point the result isn't empty.)
+            assert!(!v.is_empty() && v.is_subtree() == is_subtree);
+        }
+        Some(Ok(()))
+    }
+}
+
+impl<'a, 'b, T: PatternTypes> Default for Holder<'a, 'b, T> {
+    fn default() -> Self {
+        Self {
+            name: Default::default(),
+            value: HolderState::EmptyKey,
+            parent: Default::default(),
+            iterator: Box::new(|| None),
+        }
+    }
+}
+
+pub struct Matcher<'a, 'b, T: PatternTypes> {
+    defs: &'a PatternConstants<T>,
+    frame: Frame<'a, 'b, T>,
+}
+
+impl<'a, 'b, T: PatternTypes> Matcher<'a, 'b, T> {
+    pub(crate) fn new(obj: &'b T::Value, defs: &'a PatternConstants<T>, proto: usize, rlimit: usize) -> Result<Self, MatchError> {
+        let depth = rlimit.checked_sub(1).ok_or(MatchError::StackOverflow)?;
+        let ops: &Vec<_> = &defs.protos[proto];
+        Ok(Self {
+            defs: defs,
+            frame: Frame {
+                obj: obj,
+                ops: ops,
+                iar: None,
+                depth: depth,
+                path: if ops.is_empty() {
+                    Default::default()
+                } else {
+                    vec![Holder::default()]
+                },
+                in_key: false,
+            },
+        })
+    }
+
+    fn on_in_key(&mut self) -> Result<bool, MatchError> {
+        match self.frame.op() {
+            PatternElement::End => {
+                unimplemented!()
+            }
+            _ => unreachable!("on_in_key")
+        }
+    }
+
+    fn on_not_in_key(&mut self) -> Result<bool, MatchError> {
+        match self.frame.op() {
+            _ => unreachable!("on_not_in_key")
+        }
+    }
+
+    fn on_end(&mut self) -> Result<(bool, Matches<'a, 'b, T>), MatchError> {
+        match self.frame.op() {
+            PatternElement::End => {
+                assert!(!self.frame.path.last().expect("path").value.is_empty());
+                // TODO this for loop is duplicated with ApplyPredicate
+                // TODO figure out how to deduplicate them
+                let mut res: Matches<'a, 'b, T> = Default::default();
+                for holder in &self.frame.path {
+                    match holder.value {
+                        HolderState::Subtree(ref matches, _) => {
+                            res.extend(matches);
+                        },
+                        HolderState::Key(pair) => {
+                            if let Some(name) = holder.name {
+                                res.insert(name, pair);
+                            }
+                        },
+                        _ => unreachable!("on_end (End)"),
+                    }
+                }
+                if !self.frame.prev() {
+                    // frame.prev() should always be called, even if this gets
+                    // replaced with debug_assert!().
+                    assert!(false, "frame.prev()");
+                }
+                Ok((true, res))
+            },
+            PatternElement::ApplyPredicate {..} => {
+                assert!(!self.frame.in_key);
+                let mut res: Matches<'a, 'b, T> = Default::default();
+                for holder in &self.frame.path {
+                    match holder.value {
+                        HolderState::Subtree(ref matches, _) => {
+                            res.extend(matches);
+                        },
+                        HolderState::Key(pair) => {
+                            if let Some(name) = holder.name {
+                                res.insert(name, pair);
+                            }
+                        },
+                        _ => unreachable!("on_end (ApplyPredicate)"),
+                    }
+                }
+                self.frame.path.clear();
+                Ok((false, res))
+            }
+            _ => unreachable!("on_end")
+        }
+    }
+}
+
+impl<'a, 'b, T: PatternTypes> Iterator for Matcher<'a, 'b, T> {
+    type Item = Result<BTreeMap<&'a str, (&'b T::Value, &'b T::Value)>, MatchError>;
+
+    fn next(&mut self) -> Option<Self::Item> {
+        while !self.frame.path.is_empty() {
+            if !self.frame.next() {
+                let (in_key, res) = match self.on_end() {
+                    Ok(v) => v,
+                    Err(e) => {
+                        self.frame.path.clear();
+                        return Some(Err(e))
+                    },
+                };
+                self.frame.in_key = in_key;
+                return Some(Ok(res));
+            } else {
+                let in_key = if self.frame.in_key {
+                    self.on_in_key()
+                } else {
+                    self.on_not_in_key()
+                };
+                match in_key {
+                    Ok(in_key) => self.frame.in_key = in_key,
+                    Err(e) => {
+                        self.frame.path.clear();
+                        return Some(Err(e))
+                    },
+                }
+            }
+        }
+        None
+    }
+}