summary refs log tree commit diff stats
diff options
context:
space:
mode:
authorSoniEx2 <endermoneymod@gmail.com>2022-11-13 12:30:11 -0300
committerSoniEx2 <endermoneymod@gmail.com>2022-11-13 12:30:11 -0300
commit72397506c3529f4878b5a1cf8205599764ac4088 (patch)
treef8fe6ece751cff882a3e6f8331206726d3736eea
parent44a6ee680308a3d756ef7c17db2f64518bb2b493 (diff)
Finish most "core" VM functionality
Still need to do lists
-rw-r--r--src/lib.rs26
-rw-r--r--src/parser.rs19
-rw-r--r--src/vm/de.rs67
-rw-r--r--src/vm/mod.rs61
4 files changed, 117 insertions, 56 deletions
diff --git a/src/lib.rs b/src/lib.rs
index bda97f4..6d8a7e0 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -14,6 +14,8 @@
 //!
 //! ## Syntax Elements of Datafu Expressions
 //!
+//! FIXME still need to update these...
+//!
 //! An arrow is `->` and indicates indexing/iteration. Whether indexing or
 //! iteration is used is defined by the elements that follow, with iteration
 //! being used by default.
@@ -89,7 +91,7 @@
 //! arg ::= parameter | literal | regex | keymatch
 //!
 //! arrow ::= '->'
-//! keymatch ::= '[' [name] expression ']'
+//! keymatch ::= '[' [name] expression ']' ['?']
 //! subvalue ::= '(' expression ')' ['?']
 //! ```
 //!
@@ -98,7 +100,27 @@
 //!
 //! # Examples
 //!
-//! <!-- TODO -->
+//! The Datafu pattern
+//!
+//! ```datafu
+//! :map
+//! ->['a'?]:map
+//!   ->[b:?str]:?map
+//!     (->['x'?]x:?bool)
+//!     (->['y'?]y:?bool)?
+//! ```
+//!
+//! When matched against the JSON
+//!
+//! ```json
+//! {"a": {"1": {"y": true}, "2": {"x": true, "y": true}}}
+//! ```
+//!
+//! Produces the results for the sub-JSON
+//!
+//! ```json
+//! {"a": {"2": {"x": true, "y": true}}}
+//! ```
 
 pub mod errors;
 //pub mod type_tree;
diff --git a/src/parser.rs b/src/parser.rs
index 0698b6b..a11b68c 100644
--- a/src/parser.rs
+++ b/src/parser.rs
@@ -218,12 +218,13 @@ impl_trait! {
             match self_.root.tokens.drain(self_.len..).as_slice() {
                 &[
                     PatternToken::Arrow,
-                    PatternToken::KeySubtree(index),
+                    PatternToken::KeySubtree(index, optional),
                     ref name_value @ ..,
                     PatternToken::End,
                 ] => {
                     let tag = PatternElement::Tag {
-                        key_subtree: Some(index),
+                        key_subtree: index,
+                        optional,
                     };
                     self_.root.consts.protos.last_mut().unwrap().push(tag);
                     let value = collect_name_and_value(name_value);
@@ -234,9 +235,7 @@ impl_trait! {
                     ref name_value @ ..,
                     PatternToken::End,
                 ] => {
-                    let tag = PatternElement::Tag {
-                        key_subtree: None,
-                    };
+                    let tag = PatternElement::EmptyTag;
                     self_.root.consts.protos.last_mut().unwrap().push(tag);
                     let value = collect_name_and_value(name_value);
                     self_.root.consts.protos.last_mut().unwrap().push(value);
@@ -627,7 +626,7 @@ where
         }))
     }
 
-    /// key_subtree <- sp '[' sp ( matcher / name sp matcher? ) sp subtree sp ( ']' / unexpected_token / unexpected_end ) sp
+    /// key_subtree <- sp '[' sp ( matcher / name sp matcher? ) sp subtree sp ( ']' / unexpected_token / unexpected_end ) ( '?'? -> MarkSkippable ) sp
     // ( '?'? -> MarkSkippable )
     fn key_subtree(&mut self, s: &mut &'s str) -> Result<bool, PatternError<'s>> {
         let mut cursor = *s;
@@ -657,16 +656,16 @@ where
                 ||
                 subtree.unexpected_end(&mut cursor)?
             );
+            let optional = strip_prefix(&mut cursor, "?");
             subtree.sp(&mut cursor);
-            //let skippable = strip_prefix(&mut cursor, "?");
             *s = cursor;
             let id = subtree.commit();
-            self.tokens.push(PatternToken::KeySubtree(id));
+            self.tokens.push(PatternToken::KeySubtree(id, optional));
             true
         }))
     }
 
-    /// value_subtree <- sp '(' sp subtree sp ( ')' / unexpected_token / unexpected_end ) sp ( '?'? -> MarkSkippable )
+    /// value_subtree <- sp '(' sp subtree sp ( ')' / unexpected_token / unexpected_end ) ( '?'? -> MarkSkippable ) sp
     fn value_subtree(&mut self, s: &mut &'s str) -> Result<bool, PatternError<'s>> {
         let mut cursor = *s;
         Ok(lblock!('matches: {
@@ -683,8 +682,8 @@ where
                 ||
                 subtree.unexpected_end(&mut cursor)?
             );
-            subtree.sp(&mut cursor);
             let optional = strip_prefix(&mut cursor, "?");
+            subtree.sp(&mut cursor);
             *s = cursor;
             if !subtree.is_empty() {
                 let id = subtree.commit();
diff --git a/src/vm/de.rs b/src/vm/de.rs
index 6cb8e94..493b658 100644
--- a/src/vm/de.rs
+++ b/src/vm/de.rs
@@ -167,6 +167,7 @@ impl<'pat, 'state, 'de, O: Serialize> Packer<'pat, 'state, O> {
                                 iar: None,
                                 overstep: 0,
                                 matches: true,
+                                poison: false,
                             };
                             // we want the "newest" frame last, so it is
                             // easier to unwind back.
@@ -199,7 +200,7 @@ impl<'pat, 'state, 'de, O: Serialize> Packer<'pat, 'state, O> {
             // iterate backwards
             let index = orig_len - index - 1;
             let frame = &mut self.interp.frames[index];
-            let has_pack = frame.matches;
+            let mut has_pack = frame.matches;
             if frame.overstep > 0 {
                 // handle overstep
                 frame.overstep -= 1;
@@ -207,6 +208,16 @@ impl<'pat, 'state, 'de, O: Serialize> Packer<'pat, 'state, O> {
                 if has_pack {
                     pack_index -= 1;
                 }
+                if frame.poison {
+                    if has_pack {
+                        packs.remove(pack_index);
+                    }
+                    frame.matches = false;
+                    has_pack = false;
+                    if frame.is_value() {
+                        frame.poison = false;
+                    }
+                }
                 // unwind frame
                 if frame.prev() {
                     // successfully unwound. do nothing.
@@ -250,13 +261,7 @@ impl<'pat, 'state, 'de, O: Serialize> Packer<'pat, 'state, O> {
                             }
                         } else {
                             if !optional {
-                                // FIXME we actually want to skip it entirely
-                                // but that currently causes wrong results
-                                // so instead we just error...
-                                self.interp.error.insert({
-                                    MatchError::ValidationError
-                                });
-                                return Err(E::custom("subtree failed"));
+                                target_frame.poison = true;
                             }
                         }
                         if let Some((0, _)) = target_frame.num_subtrees() {
@@ -746,12 +751,13 @@ where
         let mut subframes = Vec::new();
         let mut output_matches = Vec::new();
         self.frames().iter_active().for_each(|frame| {
-            if let Some(key_subtree) = frame.key_subtree() {
+            if let Some((key_subtree, _)) = frame.key_subtree() {
                 subframes.push(Frame {
                     ops: &pat.protos[key_subtree],
                     iar: None,
                     overstep: 0,
                     matches: true,
+                    poison: false,
                 });
             }
             output_matches.push(false);
@@ -859,6 +865,8 @@ where
             }
         }
         for (f, m) in self.frames_mut().iter_active_mut().zip(output_matches) {
+            // FIXME inspect frame.key_subtree() for optional?
+            // what is this even supposed to do again?
             f.matches = m;
         }
         let obj = SerdeObject::Map(obj_inner);
@@ -1164,12 +1172,14 @@ mod tests {
             iar: None,
             matches: true,
             overstep: 0,
+            poison: false,
         });
         frames.push(Frame {
             ops: &consts.protos[1],
             iar: None,
             matches: true,
             overstep: 0,
+            poison: false,
         });
         let interp = Interpreter {
             pat: &consts,
@@ -1209,7 +1219,8 @@ mod tests {
                 }),
             },
             PatternElement::Tag {
-                key_subtree: Some(0),
+                key_subtree: 0,
+                optional: true,
             },
             PatternElement::Value {
                 name: Some(1),
@@ -1405,7 +1416,6 @@ mod tests {
             None
         ).unwrap();
         let data = r#"{"a": {"1": {"y": true}, "2": {"x": true, "y": true}}}"#;
-        //let data = r#"{"a": {"2": {"x": true, "y": true}}}"#;
         let mut der = JsonDeserializer::from_str(data);
         let mut err = Default::default();
         let mut frames = Default::default();
@@ -1419,18 +1429,17 @@ mod tests {
             interp,
             MAX_CALLS,
         ).deserialize(&mut der);
-        // FIXME it's supposed to skip "1" altogether but it currently errors.
-        assert!(result.is_err());
-        //let (mut packs, obj) = result.unwrap();
-        //assert!(obj.is_none());
-        //assert_eq!(packs.len(), 1);
-        //let pack = &packs[0];
-        //assert_eq!(pack.subpacks.len(), 1);
-        //let b = &pack.subpacks[0]["b"];
-        //assert_eq!(b.1, SerdeObject::Str(From::from("2")));
-        //assert_eq!(b.0.subpacks.len(), 1);
-        //assert_eq!(b.0.subpacks[0]["x"].1, SerdeObject::Bool(true));
-        //assert_eq!(b.0.subpacks[0]["y"].1, SerdeObject::Bool(true));
+        let (mut packs, obj) = result.unwrap();
+        assert!(obj.is_none());
+        assert_eq!(packs.len(), 1);
+        let pack = &packs[0];
+        dbg!(pack);
+        assert_eq!(pack.subpacks.len(), 1);
+        let b = &pack.subpacks[0]["b"];
+        assert_eq!(b.1, SerdeObject::Str(From::from("2")));
+        assert_eq!(b.0.subpacks.len(), 1);
+        assert_eq!(b.0.subpacks[0]["x"].1, SerdeObject::Bool(true));
+        assert_eq!(b.0.subpacks[0]["y"].1, SerdeObject::Bool(true));
     }
 
     #[test]
@@ -1439,12 +1448,12 @@ mod tests {
         let consts = crate::parser::parse::<&'static str, &'static str, ()>(
             "
             :map
-            ->['projects'?]:map
-              ->[commit:?str]:?map
-                ->[url:?str]:?map
-                  ->[branch:?str]:?map
-                    (->['active'?]active:?bool)?
-                    (->['federate'?]federate:?bool)?
+            ->['projects'?]?:map
+              ->[commit:?str]?:?map
+                ->[url:?str]?:?map
+                  ->[branch:?str]?:?map
+                    (->['active'?]?active:?bool)
+                    (->['federate'?]?federate:?bool)?
             ",
             None,
             None
diff --git a/src/vm/mod.rs b/src/vm/mod.rs
index 9f76ec5..81131c0 100644
--- a/src/vm/mod.rs
+++ b/src/vm/mod.rs
@@ -75,8 +75,7 @@ impl<O: Serialize> std::fmt::Debug for PatternConstants<O> {
     }
 }
 
-/// A pattern element.
-// FIXME: docs
+/// A datafu pattern element.
 #[derive(Copy, Clone, Debug)]
 pub(crate) enum PatternElement {
     /// A value is the core capturing element.
@@ -87,9 +86,19 @@ pub(crate) enum PatternElement {
         value: Option<Value>,
     },
     /// A tag is the core iterative element. It is always followed by a value.
+    ///
+    /// This one is empty.
+    EmptyTag,
+    /// A tag is the core iterative element. It is always followed by a value.
     Tag {
         /// The index of the (proto) key to match against.
-        key_subtree: Option<usize>,
+        key_subtree: usize,
+        /// Whether to allow this tree subtree to match nothing.
+        ///
+        /// By default, a datafu pattern only matches a tree if every branch of
+        /// the tree matches something. This enables opting out of that.
+        // TODO this isn't currently implemented.
+        optional: bool,
     },
     /// Marks the end of pattern iteration and the start of subtrees (if any).
     SubtreeMarker,
@@ -163,6 +172,7 @@ pub(crate) enum Value {
 }
 
 /// A pattern token.
+// TODO docs
 #[derive(Copy, Clone, Debug)]
 pub(crate) enum PatternToken {
     /// Start of a tag.
@@ -173,7 +183,7 @@ pub(crate) enum PatternToken {
     String(usize, bool),
     Regex(usize, bool),
     Parameter(usize, bool),
-    KeySubtree(usize),
+    KeySubtree(usize, bool),
     ValueSubtree(usize, bool),
 
     /// Represents a predicate which must be applied.
@@ -409,6 +419,8 @@ pub(crate) struct Frame<'pat> {
     overstep: usize,
     /// Whether this frame matches the data so far.
     matches: bool,
+    /// Whether this frame must not be allowed to match in the key step.
+    poison: bool,
 }
 
 impl<'pat, 'state, O: Serialize> Interpreter<'pat, 'state, O> {
@@ -416,7 +428,6 @@ impl<'pat, 'state, O: Serialize> Interpreter<'pat, 'state, O> {
         pat: &'pat PatternConstants<O>,
         error: &'state mut Option<crate::errors::MatchError>,
         frames: &'state mut Vec<Frame<'pat>>,
-        //output: &'state mut Pack<'pat, 'de>,
     ) -> Self {
         debug_assert!(frames.is_empty());
         frames.push(Frame {
@@ -424,13 +435,12 @@ impl<'pat, 'state, O: Serialize> Interpreter<'pat, 'state, O> {
             iar: None,
             overstep: 0,
             matches: true,
-            //path: Default::default(),
+            poison: false,
         });
         Self {
             pat: pat,
             error: error,
             frames: frames,
-            //output: Cell::from_mut(output),
         }
     }
 }
@@ -455,7 +465,9 @@ impl<'pat> Frame<'pat> {
                     },
                 }
             },
-            PatternElement::Tag { .. } => panic!("attempt to get type of tag"),
+            | PatternElement::EmptyTag
+            | PatternElement::Tag { .. }
+            => panic!("attempt to get type of tag"),
             _ => None,
         }
     }
@@ -470,7 +482,9 @@ impl<'pat> Frame<'pat> {
             PatternElement::Value { name: Some(name), .. } => {
                 Some(&*strings[name])
             },
-            PatternElement::Tag { .. } => panic!("attempt to get name of tag"),
+            | PatternElement::EmptyTag
+            | PatternElement::Tag { .. }
+            => panic!("attempt to get name of tag"),
             _ => None,
         }
     }
@@ -522,19 +536,35 @@ impl<'pat> Frame<'pat> {
         })
     }
 
-    /// Returns whether this key has a subtree.
+    /// Returns whether this key has a subtree, and if so, its index and
+    /// whether it is optional, as an `(index, optional)` pair.
     ///
     /// # Panics
     ///
     /// Panics if iteration hasn't begun, or this isn't a key.
-    fn key_subtree(&self) -> Option<usize> {
-        if let PatternElement::Tag { key_subtree } = self.op() {
-            key_subtree
-        } else {
-            unreachable!()
+    fn key_subtree(&self) -> Option<(usize, bool)> {
+        match self.op() {
+            PatternElement::Tag { key_subtree, optional } => {
+                Some((key_subtree, optional))
+            },
+            PatternElement::EmptyTag => None,
+            _ => unreachable!(),
         }
     }
 
+    /// Returns whether this frame is in a value operation.
+    ///
+    /// # Panics
+    ///
+    /// Panics if the frame isn't active or iteraction hasn't begun.
+    #[inline]
+    fn is_value(&self) -> bool {
+        self.active() && matches!(
+            self.raw_op(),
+            PatternElement::Value { .. },
+        )
+    }
+
     /// Returns this value subtree, as an `(index, optional)` pair.
     ///
     /// # Panics
@@ -561,6 +591,7 @@ impl<'pat> Frame<'pat> {
     }
 
     /// Returns whether this frame is active (not overstepped).
+    #[inline]
     fn active(&self) -> bool {
         self.overstep == 0
     }