summary refs log tree commit diff stats
diff options
context:
space:
mode:
authorSoniEx2 <endermoneymod@gmail.com>2021-02-11 00:52:25 -0300
committerSoniEx2 <endermoneymod@gmail.com>2021-02-11 00:52:25 -0300
commitabaee464936a821568c3fdbd52b9aadd3adb6d0f (patch)
tree8a8b5c271f0b8b67eeb77e94a68ecf73ffc4e1f9
parent69652efe8ad9738a94fef571c8b81e342f96e7b4 (diff)
Partially implement VM
The following are now implemented:

- [x] Arrow
- [x] StringKey
- [x] RegexKey
- [ ] KeySubtree
- [ ] ValueSubtree
- [x] Ident
- [ ] Param
- [x] ApplyPredicate
- [x] End
-rw-r--r--Cargo.toml4
-rw-r--r--README.md25
-rw-r--r--src/lib.rs5
-rw-r--r--src/parser.rs9
-rw-r--r--src/vm.rs218
-rw-r--r--tests/basic_match.rs74
-rw-r--r--tests/common/mod.rs10
7 files changed, 314 insertions, 31 deletions
diff --git a/Cargo.toml b/Cargo.toml
index dbdb60a..b63dce6 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -1,11 +1,13 @@
 [package]
 name = "datafu"
-version = "0.0.2"
+version = "0.0.3"
 authors = ["SoniEx2 <endermoneymod@gmail.com>"]
 license = "AGPL-3.0-or-later"
 description = "A Rust library for extracting data from config objects and other arbitrary object graphs."
 edition = "2015"
 repository = "https://soniex2.autistic.space/git-repos/dfu.git"
+readme = "README.md"
+homepage = "https://soniex2.github.io/ganarchy/project/c0b4a8a326a320ac33c5d9d6bac2f7ea7eb703ce/"
 
 # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
 
diff --git a/README.md b/README.md
new file mode 100644
index 0000000..0c20949
--- /dev/null
+++ b/README.md
@@ -0,0 +1,25 @@
+Datafu
+======
+
+A Rust library for extracting data from config objects and other arbitrary object graphs.
+
+License
+-------
+
+```text
+Datafu - Rust library for extracting data from object graphs.
+Copyright (C) 2021  Soni L.
+
+This program is free software: you can redistribute it and/or modify
+it under the terms of the GNU Affero General Public License as published by
+the Free Software Foundation, either version 3 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU Affero General Public License for more details.
+
+You should have received a copy of the GNU Affero General Public License
+along with this program.  If not, see <https://www.gnu.org/licenses/>.
+```
diff --git a/src/lib.rs b/src/lib.rs
index 2407115..cbd32b2 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -127,6 +127,11 @@ pub trait PatternTypes {
         left: RefOwn<'_, Self::Ref, Self::Own>,
         right: RefOwn<'_, Self::Ref, Self::Own>
     ) -> bool;
+
+    /// Returns the value as an &str.
+    fn as_str<'b>(
+        value: RefOwn<'b, Self::Ref, Self::Own>
+    ) -> Option<&'b str>;
 }
 
 // TODO
diff --git a/src/parser.rs b/src/parser.rs
index 00fbcb1..eb54c86 100644
--- a/src/parser.rs
+++ b/src/parser.rs
@@ -623,6 +623,15 @@ mod tests {
         ) -> bool {
             false
         }
+
+        fn as_str<'b>(
+            item: RefOwn<'b, Self::Ref, Self::Own>
+        ) -> Option<&'b str> {
+            match item {
+                RefOwn::Str(key) => Some(key),
+                _ => None,
+            }
+        }
     }
 
     #[test]
diff --git a/src/vm.rs b/src/vm.rs
index a02010f..62fb074 100644
--- a/src/vm.rs
+++ b/src/vm.rs
@@ -156,6 +156,10 @@ impl<'a, 'b, T: PatternTypes> HolderState<'a, 'b, T> {
         matches!(self, HolderState::EmptyKey | HolderState::EmptySubtree)
     }
 
+    fn has_value(&self) -> bool {
+        !self.is_empty()
+    }
+
     fn is_subtree(&self) -> bool {
         matches!(self, HolderState::EmptySubtree | HolderState::Subtree {..})
     }
@@ -168,34 +172,85 @@ impl<'a, 'b, T: PatternTypes> HolderState<'a, 'b, T> {
             _ => None
         }
     }
+
+    fn key(&self) -> Option<RefOwn<'b, T::Ref, T::Own>> {
+        match *self {
+            HolderState::Key((key, _)) => Some(key),
+            _ => None
+        }
+    }
+
+    fn clear(&mut self) {
+        match *self {
+            HolderState::Key(_) => *self = HolderState::EmptyKey,
+            HolderState::Subtree(_, _) => *self = HolderState::EmptySubtree,
+            HolderState::Value(_) => unreachable!(),
+            _ => {},
+        };
+        assert!(self.is_empty());
+    }
 }
 
 /// Stores a single match and associated metadata.
 ///
 /// A single match is generally a key-value pair, but may be a collection of
-/// named pairs in the case of subtree matches.
+/// named pairs in the case of subtree matches, or just a value for the initial
+/// holder.
 struct Holder<'a, 'b, T: PatternTypes> {
      name: Option<&'a str>,
      value: HolderState<'a, 'b, T>,
      parent: Option<RefOwn<'b, T::Ref, T::Own>>,
      iterator: Option<Box<dyn Iterator<Item=KVPair<'b, T>> + 'b>>,
-     filters: Vec<Box<dyn for<'c> Fn(&'c mut HolderState<'a, 'b, T>) + 'a>>,
+     filters: Vec<Box<dyn (for<'c> Fn(&'c mut HolderState<'a, 'b, T>) -> Result<(), MatchError>) + 'a>>,
 }
 
 impl<'a, 'b, T: PatternTypes> Holder<'a, 'b, T> {
-    fn next(&mut self) -> Option<Result<(), MatchError>> {
-        // FIXME what even is the point of this?
-        if let Self { value: ref mut v, iterator: Some(ref mut it), .. } = self {
-            let is_subtree = v.is_subtree();
-            *v = match it.next() {
-                Some(pair) => HolderState::Key(pair),
-                None => return None
-            };
-            // just try to make sure the type doesn't change.
-            // (and that if we get to this point the result isn't empty.)
-            assert!(!v.is_empty() && v.is_subtree() == is_subtree);
+    fn next(&mut self) -> Result<bool, MatchError> {
+        self.ensure_iterator()?;
+        match self {
+            Self {
+                value: ref mut v,
+                iterator: Some(ref mut it),
+                ref filters,
+                ..
+            } => {
+                let is_subtree = v.is_subtree();
+                let mut next_v;
+                loop {
+                    next_v = match it.next() {
+                        Some(pair) => HolderState::Key(pair),
+                        None => return Ok(false)
+                    };
+                    for filter in filters {
+                        filter(&mut next_v)?;
+                        if next_v.is_empty() {
+                            break;
+                        }
+                    }
+                    if next_v.has_value() {
+                        break;
+                    }
+                }
+                assert!(next_v.has_value());
+                assert!(next_v.is_subtree() == is_subtree);
+                *v = next_v;
+                Ok(true)
+            },
+            _ => unreachable!()
         }
-        Some(Ok(()))
+    }
+
+    /// Ensure `self.iterator.is_some()`, creating an iterator if necessary.
+    fn ensure_iterator(&mut self) -> Result<(), MatchError> {
+        if self.iterator.is_none() {
+            let iter = T::pairs(self.parent.unwrap());
+            if iter.is_none() {
+                return Err(MatchError::UnsupportedOperation);
+            }
+            self.iterator = iter;
+        }
+        assert!(self.iterator.is_some());
+        Ok(())
     }
 }
 
@@ -216,6 +271,60 @@ pub struct Matcher<'a, 'b, T: PatternTypes> {
     frame: Frame<'a, 'b, T>,
 }
 
+// TODO:
+//
+// [x] Arrow
+// [x] StringKey
+// [x] RegexKey
+// [ ] KeySubtree
+// [ ] ValueSubtree
+// [x] Ident
+// [ ] Param
+// [x] ApplyPredicate
+// [x] End
+
+/// Helper for `PatternElement::StringKey`.
+fn on_string_key<'a, 'b, T: PatternTypes>(
+    matcher: &mut Matcher<'a, 'b, T>,
+    id: usize,
+    skippable: bool,
+) -> Result<bool, MatchError> {
+    let path = matcher.frame.path.last_mut().unwrap();
+    assert!(path.iterator.is_none());
+    let key = &matcher.defs.strings[id];
+    let iter = T::get(path.parent.unwrap(), RefOwn::Str(key));
+    match iter {
+        None => Err(MatchError::UnsupportedOperation),
+        Some(opt) => {
+            path.iterator = Some(Box::new(opt.into_iter()));
+            Ok(true)
+        }
+    }
+}
+
+/// Helper for `PatternElement::RegexKey`.
+fn on_regex_key<'a, 'b, T: PatternTypes>(
+    matcher: &mut Matcher<'a, 'b, T>,
+    id: usize,
+    skippable: bool,
+) -> Result<bool, MatchError> {
+    matcher.frame.path.last_mut().unwrap().ensure_iterator()?;
+    let re = &matcher.defs.regices[id];
+    let path = matcher.frame.path.last_mut().unwrap();
+    path.filters.push(Box::new(move |value| {
+        let s = T::as_str(value.key().unwrap());
+        match (s.map_or(false, |s| re.is_match(s)), skippable) {
+            (true, _) => Ok(()),
+            (false, true) => {
+                value.clear();
+                Ok(())
+            },
+            (false, false) => Err(MatchError::ValidationError),
+        }
+    }));
+    Ok(true)
+}
+
 impl<'a, 'b, T: PatternTypes> Matcher<'a, 'b, T> {
     pub(crate) fn new(obj: RefOwn<'b, T::Ref, T::Own>, defs: &'a PatternConstants<T>, proto: usize, rlimit: usize) -> Result<Self, MatchError> {
         let depth = rlimit.checked_sub(1).ok_or(MatchError::StackOverflow)?;
@@ -233,6 +342,7 @@ impl<'a, 'b, T: PatternTypes> Matcher<'a, 'b, T> {
                 } else {
                     let mut holder = Holder::default();
                     holder.value = HolderState::Value(obj);
+                    holder.iterator = Some(Box::new(std::iter::empty()));
                     vec![holder]
                 },
                 in_key: false,
@@ -243,8 +353,49 @@ impl<'a, 'b, T: PatternTypes> Matcher<'a, 'b, T> {
     fn on_in_key(&mut self) -> Result<bool, MatchError> {
         match self.frame.op() {
             PatternElement::End => {
-                todo!()
-            }
+                let path = self.frame.path.last_mut().unwrap();
+                if path.next()? {
+                    Ok(false)
+                } else {
+                    drop(path);
+                    self.frame.path.pop().unwrap();
+                    // stop at previous End, or start of frame
+                    while self.frame.prev() {
+                        if matches!(self.frame.op(), PatternElement::End) {
+                            break;
+                        }
+                    }
+                    // is start of frame?
+                    if !self.frame.prev() {
+                        self.frame.path.clear();
+                    }
+                    Ok(true)
+                }
+            },
+            PatternElement::ApplyPredicate(id, skippable, _) => {
+                // failing on T::get() is already handled, but we may need a
+                // T::pairs(). construct it here.
+                self.frame.path.last_mut().unwrap().ensure_iterator()?;
+                let pred = &self.defs.predicates[id];
+                let path = self.frame.path.last_mut().unwrap();
+                path.filters.push(Box::new(move |value| {
+                    match (pred(value.value().unwrap()), skippable) {
+                        (true, _) => Ok(()),
+                        (false, true) => {
+                            value.clear();
+                            Ok(())
+                        },
+                        (false, false) => Err(MatchError::ValidationError),
+                    }
+                }));
+                Ok(true)
+            },
+            PatternElement::StringKey(id, skippable) => {
+                on_string_key(self, id, skippable)
+            },
+            PatternElement::RegexKey(id, skippable) => {
+                on_regex_key(self, id, skippable)
+            },
             _ => unreachable!("on_in_key")
         }
     }
@@ -252,17 +403,42 @@ impl<'a, 'b, T: PatternTypes> Matcher<'a, 'b, T> {
     fn on_not_in_key(&mut self) -> Result<bool, MatchError> {
         match self.frame.op() {
             PatternElement::Arrow => {
-                assert!(!self.frame.path.last().expect("path").value.is_empty());
+                // this *should* always pass.
+                assert!(self.frame.path.last().unwrap().iterator.is_some());
                 let mut holder = Holder::default();
-                holder.parent = self.frame.path.last().expect("path").value.value();
+                holder.parent = self.frame.path.last().unwrap().value.value();
+                assert!(holder.parent.is_some());
                 self.frame.path.push(holder);
                 Ok(false)
             },
             PatternElement::Identifier(id) => {
                 let name = self.defs.strings.get(id).map(|s| &**s);
-                self.frame.path.last_mut().expect("path").name = name;
-                todo!()
-                //Ok(true)
+                let path = self.frame.path.last_mut().unwrap();
+                path.name = name;
+                assert!(path.iterator.is_none());
+                // we don't actually create the iterator here,
+                // as we may still wanna use T::get() instead.
+                Ok(true)
+            },
+            PatternElement::ApplyPredicate(id, skippable, _) => {
+                assert!(self.frame.path.len() == 1);
+                let pred = &self.defs.predicates[id];
+                let value = self.frame.path.last().unwrap().value.value();
+                match (pred(value.unwrap()), skippable) {
+                    (true, _) => Ok(false),
+                    (false, true) => {
+                        self.frame.path.clear();
+                        // any Ok(_) will do
+                        Ok(false)
+                    },
+                    (false, false) => Err(MatchError::ValidationError),
+                }
+            },
+            PatternElement::StringKey(id, skippable) => {
+                on_string_key(self, id, skippable)
+            },
+            PatternElement::RegexKey(id, skippable) => {
+                on_regex_key(self, id, skippable)
             },
             _ => unreachable!("on_not_in_key")
         }
diff --git a/tests/basic_match.rs b/tests/basic_match.rs
index d6f4da3..0b5b546 100644
--- a/tests/basic_match.rs
+++ b/tests/basic_match.rs
@@ -25,7 +25,7 @@ use common::Value;
 use datafu::RefOwn;
 
 #[test]
-fn test_basic_example() {
+fn test_basic() {
     let tree = Value::M(vec![
         ("foo".into(), Value::U(1)),
         ("bar".into(), Value::M(vec![
@@ -39,6 +39,25 @@ fn test_basic_example() {
     assert_eq!(m["X"].0, RefOwn::Ref(&Value::from("bar")));
     assert_eq!(m["Y"].0, RefOwn::Ref(&Value::from("baz")));
     assert_eq!(m["Y"].1, RefOwn::Ref(&Value::U(2)));
+    assert!(matcher.next().is_none());
+}
+
+#[test]
+fn test_str() {
+    let tree = Value::M(vec![
+        ("foo".into(), Value::U(1)),
+        ("bar".into(), Value::M(vec![
+            ("baz".into(), Value::U(2)),
+        ].into_iter().collect())),
+    ].into_iter().collect());
+    let preds = vec![("dict", Box::new(|v: RefOwn<'_, _, _>| matches!(v, RefOwn::Ref(&Value::M(_)))) as Box<datafu::Predicate<Value>>)].into_iter().collect();
+    let pat = datafu::Pattern::<Value>::compile::<&str, &str>("->X'bar'->Y", Some(preds), None).unwrap();
+    let mut matcher = pat.attempt_match(&tree);
+    let m = matcher.next().unwrap().unwrap();
+    assert_eq!(m["X"].0, RefOwn::Ref(&Value::from("bar")));
+    assert_eq!(m["Y"].0, RefOwn::Ref(&Value::from("baz")));
+    assert_eq!(m["Y"].1, RefOwn::Ref(&Value::U(2)));
+    assert!(matcher.next().is_none());
 }
 
 #[test]
@@ -62,6 +81,7 @@ fn test_basic_2() {
     assert_eq!(m["U"].0, RefOwn::Ref(&Value::from("https://soniex2.autistic.space/git-repos/ganarchy.git")));
     assert_eq!(m["B"].0, RefOwn::Ref(&Value::from("HEAD")));
     assert_eq!(m["B"].1, RefOwn::Ref(&Value::M(vec![(Value::from("active"), Value::B(true))].into_iter().collect())));
+    assert!(matcher.next().is_none());
 }
 
 #[test]
@@ -78,14 +98,50 @@ fn test_spaces() {
         ].into_iter().collect())),
     ].into_iter().collect());
     let preds = vec![("dict", Box::new(|v: RefOwn<'_, _, _>| matches!(v, RefOwn::Ref(&Value::M(_)))) as Box<datafu::Predicate<Value>>)].into_iter().collect();
-    let pat = datafu::Pattern::<Value>::compile::<_, &str>("-> 'projects'?
-                                                               -> commit /[0-9a-fA-F]{40}|[0-9a-fA-F]{64}/? :?$dict
-                                                                  -> url :?$dict
-                                                                     -> branch :?$dict", Some(preds), None).unwrap();
+    let pat = datafu::Pattern::<Value>::compile::<_, &str>("
+        -> 'projects'?
+           -> commit /[0-9a-fA-F]{40}|[0-9a-fA-F]{64}/? :?$dict
+              -> url :?$dict
+                 -> branch :?$dict",
+        Some(preds),
+        None,
+    ).unwrap();
     let mut matcher = pat.attempt_match(&tree);
     let m = matcher.next().unwrap().unwrap();
-    assert_eq!(m["P"].0, RefOwn::Ref(&Value::from("385e734a52e13949a7a5c71827f6de920dbfea43")));
-    assert_eq!(m["U"].0, RefOwn::Ref(&Value::from("https://soniex2.autistic.space/git-repos/ganarchy.git")));
-    assert_eq!(m["B"].0, RefOwn::Ref(&Value::from("HEAD")));
-    assert_eq!(m["B"].1, RefOwn::Ref(&Value::M(vec![(Value::from("active"), Value::B(true))].into_iter().collect())));
+    assert_eq!(m["commit"].0, RefOwn::Ref(&Value::from("385e734a52e13949a7a5c71827f6de920dbfea43")));
+    assert_eq!(m["url"].0, RefOwn::Ref(&Value::from("https://soniex2.autistic.space/git-repos/ganarchy.git")));
+    assert_eq!(m["branch"].0, RefOwn::Ref(&Value::from("HEAD")));
+    assert_eq!(m["branch"].1, RefOwn::Ref(&Value::M(vec![(Value::from("active"), Value::B(true))].into_iter().collect())));
+    assert!(matcher.next().is_none());
 }
+
+//#[test]
+//fn test_harder() {
+//    let tree = Value::M(vec![
+//        ("projects".into(), Value::M(vec![
+//            ("385e734a52e13949a7a5c71827f6de920dbfea43".into(), Value::M(vec![
+//                ("https://soniex2.autistic.space/git-repos/ganarchy.git".into(), Value::M(vec![
+//                    ("HEAD".into(), Value::M(vec![
+//                        ("active".into(), Value::B(true)),
+//                    ].into_iter().collect())),
+//                ].into_iter().collect())),
+//            ].into_iter().collect())),
+//        ].into_iter().collect())),
+//    ].into_iter().collect());
+//    let preds = vec![("dict", Box::new(|v: RefOwn<'_, _, _>| matches!(v, RefOwn::Ref(&Value::M(_)))) as Box<datafu::Predicate<Value>>)].into_iter().collect();
+//    let pat = datafu::Pattern::<Value>::compile::<_, &str>("
+//        -> 'projects'?
+//           -> commit /[0-9a-fA-F]{40}|[0-9a-fA-F]{64}/? :?$dict
+//              -> url :?$dict
+//                 -> branch :?$dict",
+//        Some(preds),
+//        None,
+//    ).unwrap();
+//    let mut matcher = pat.attempt_match(&tree);
+//    let m = matcher.next().unwrap().unwrap();
+//    assert_eq!(m["commit"].0, RefOwn::Ref(&Value::from("385e734a52e13949a7a5c71827f6de920dbfea43")));
+//    assert_eq!(m["url"].0, RefOwn::Ref(&Value::from("https://soniex2.autistic.space/git-repos/ganarchy.git")));
+//    assert_eq!(m["branch"].0, RefOwn::Ref(&Value::from("HEAD")));
+//    assert_eq!(m["branch"].1, RefOwn::Ref(&Value::M(vec![(Value::from("active"), Value::B(true))].into_iter().collect())));
+//    assert!(matcher.next().is_none());
+//}
diff --git a/tests/common/mod.rs b/tests/common/mod.rs
index 9680504..d0b9e89 100644
--- a/tests/common/mod.rs
+++ b/tests/common/mod.rs
@@ -176,4 +176,14 @@ impl PatternTypes for Value {
     ) -> bool {
         left == right
     }
+
+    fn as_str<'b>(
+        item: RefOwn<'b, Self, Dummy>
+    ) -> Option<&'b str> {
+        match item {
+            RefOwn::Str(key) => Some(key),
+            RefOwn::Ref(Value::S(key)) => Some(key),
+            _ => None,
+        }
+    }
 }