From 72397506c3529f4878b5a1cf8205599764ac4088 Mon Sep 17 00:00:00 2001 From: SoniEx2 Date: Sun, 13 Nov 2022 12:30:11 -0300 Subject: Finish most "core" VM functionality Still need to do lists --- src/lib.rs | 26 +++++++++++++++++++++-- src/parser.rs | 19 ++++++++--------- src/vm/de.rs | 67 +++++++++++++++++++++++++++++++++-------------------------- src/vm/mod.rs | 61 ++++++++++++++++++++++++++++++++++++++++------------- 4 files changed, 117 insertions(+), 56 deletions(-) (limited to 'src') diff --git a/src/lib.rs b/src/lib.rs index bda97f4..6d8a7e0 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -14,6 +14,8 @@ //! //! ## Syntax Elements of Datafu Expressions //! +//! FIXME still need to update these... +//! //! An arrow is `->` and indicates indexing/iteration. Whether indexing or //! iteration is used is defined by the elements that follow, with iteration //! being used by default. @@ -89,7 +91,7 @@ //! arg ::= parameter | literal | regex | keymatch //! //! arrow ::= '->' -//! keymatch ::= '[' [name] expression ']' +//! keymatch ::= '[' [name] expression ']' ['?'] //! subvalue ::= '(' expression ')' ['?'] //! ``` //! @@ -98,7 +100,27 @@ //! //! # Examples //! -//! +//! The Datafu pattern +//! +//! ```datafu +//! :map +//! ->['a'?]:map +//! ->[b:?str]:?map +//! (->['x'?]x:?bool) +//! (->['y'?]y:?bool)? +//! ``` +//! +//! When matched against the JSON +//! +//! ```json +//! {"a": {"1": {"y": true}, "2": {"x": true, "y": true}}} +//! ``` +//! +//! Produces the results for the sub-JSON +//! +//! ```json +//! {"a": {"2": {"x": true, "y": true}}} +//! ``` pub mod errors; //pub mod type_tree; diff --git a/src/parser.rs b/src/parser.rs index 0698b6b..a11b68c 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -218,12 +218,13 @@ impl_trait! { match self_.root.tokens.drain(self_.len..).as_slice() { &[ PatternToken::Arrow, - PatternToken::KeySubtree(index), + PatternToken::KeySubtree(index, optional), ref name_value @ .., PatternToken::End, ] => { let tag = PatternElement::Tag { - key_subtree: Some(index), + key_subtree: index, + optional, }; self_.root.consts.protos.last_mut().unwrap().push(tag); let value = collect_name_and_value(name_value); @@ -234,9 +235,7 @@ impl_trait! { ref name_value @ .., PatternToken::End, ] => { - let tag = PatternElement::Tag { - key_subtree: None, - }; + let tag = PatternElement::EmptyTag; self_.root.consts.protos.last_mut().unwrap().push(tag); let value = collect_name_and_value(name_value); self_.root.consts.protos.last_mut().unwrap().push(value); @@ -627,7 +626,7 @@ where })) } - /// key_subtree <- sp '[' sp ( matcher / name sp matcher? ) sp subtree sp ( ']' / unexpected_token / unexpected_end ) sp + /// key_subtree <- sp '[' sp ( matcher / name sp matcher? ) sp subtree sp ( ']' / unexpected_token / unexpected_end ) ( '?'? -> MarkSkippable ) sp // ( '?'? -> MarkSkippable ) fn key_subtree(&mut self, s: &mut &'s str) -> Result> { let mut cursor = *s; @@ -657,16 +656,16 @@ where || subtree.unexpected_end(&mut cursor)? ); + let optional = strip_prefix(&mut cursor, "?"); subtree.sp(&mut cursor); - //let skippable = strip_prefix(&mut cursor, "?"); *s = cursor; let id = subtree.commit(); - self.tokens.push(PatternToken::KeySubtree(id)); + self.tokens.push(PatternToken::KeySubtree(id, optional)); true })) } - /// value_subtree <- sp '(' sp subtree sp ( ')' / unexpected_token / unexpected_end ) sp ( '?'? -> MarkSkippable ) + /// value_subtree <- sp '(' sp subtree sp ( ')' / unexpected_token / unexpected_end ) ( '?'? -> MarkSkippable ) sp fn value_subtree(&mut self, s: &mut &'s str) -> Result> { let mut cursor = *s; Ok(lblock!('matches: { @@ -683,8 +682,8 @@ where || subtree.unexpected_end(&mut cursor)? ); - subtree.sp(&mut cursor); let optional = strip_prefix(&mut cursor, "?"); + subtree.sp(&mut cursor); *s = cursor; if !subtree.is_empty() { let id = subtree.commit(); diff --git a/src/vm/de.rs b/src/vm/de.rs index 6cb8e94..493b658 100644 --- a/src/vm/de.rs +++ b/src/vm/de.rs @@ -167,6 +167,7 @@ impl<'pat, 'state, 'de, O: Serialize> Packer<'pat, 'state, O> { iar: None, overstep: 0, matches: true, + poison: false, }; // we want the "newest" frame last, so it is // easier to unwind back. @@ -199,7 +200,7 @@ impl<'pat, 'state, 'de, O: Serialize> Packer<'pat, 'state, O> { // iterate backwards let index = orig_len - index - 1; let frame = &mut self.interp.frames[index]; - let has_pack = frame.matches; + let mut has_pack = frame.matches; if frame.overstep > 0 { // handle overstep frame.overstep -= 1; @@ -207,6 +208,16 @@ impl<'pat, 'state, 'de, O: Serialize> Packer<'pat, 'state, O> { if has_pack { pack_index -= 1; } + if frame.poison { + if has_pack { + packs.remove(pack_index); + } + frame.matches = false; + has_pack = false; + if frame.is_value() { + frame.poison = false; + } + } // unwind frame if frame.prev() { // successfully unwound. do nothing. @@ -250,13 +261,7 @@ impl<'pat, 'state, 'de, O: Serialize> Packer<'pat, 'state, O> { } } else { if !optional { - // FIXME we actually want to skip it entirely - // but that currently causes wrong results - // so instead we just error... - self.interp.error.insert({ - MatchError::ValidationError - }); - return Err(E::custom("subtree failed")); + target_frame.poison = true; } } if let Some((0, _)) = target_frame.num_subtrees() { @@ -746,12 +751,13 @@ where let mut subframes = Vec::new(); let mut output_matches = Vec::new(); self.frames().iter_active().for_each(|frame| { - if let Some(key_subtree) = frame.key_subtree() { + if let Some((key_subtree, _)) = frame.key_subtree() { subframes.push(Frame { ops: &pat.protos[key_subtree], iar: None, overstep: 0, matches: true, + poison: false, }); } output_matches.push(false); @@ -859,6 +865,8 @@ where } } for (f, m) in self.frames_mut().iter_active_mut().zip(output_matches) { + // FIXME inspect frame.key_subtree() for optional? + // what is this even supposed to do again? f.matches = m; } let obj = SerdeObject::Map(obj_inner); @@ -1164,12 +1172,14 @@ mod tests { iar: None, matches: true, overstep: 0, + poison: false, }); frames.push(Frame { ops: &consts.protos[1], iar: None, matches: true, overstep: 0, + poison: false, }); let interp = Interpreter { pat: &consts, @@ -1209,7 +1219,8 @@ mod tests { }), }, PatternElement::Tag { - key_subtree: Some(0), + key_subtree: 0, + optional: true, }, PatternElement::Value { name: Some(1), @@ -1405,7 +1416,6 @@ mod tests { None ).unwrap(); let data = r#"{"a": {"1": {"y": true}, "2": {"x": true, "y": true}}}"#; - //let data = r#"{"a": {"2": {"x": true, "y": true}}}"#; let mut der = JsonDeserializer::from_str(data); let mut err = Default::default(); let mut frames = Default::default(); @@ -1419,18 +1429,17 @@ mod tests { interp, MAX_CALLS, ).deserialize(&mut der); - // FIXME it's supposed to skip "1" altogether but it currently errors. - assert!(result.is_err()); - //let (mut packs, obj) = result.unwrap(); - //assert!(obj.is_none()); - //assert_eq!(packs.len(), 1); - //let pack = &packs[0]; - //assert_eq!(pack.subpacks.len(), 1); - //let b = &pack.subpacks[0]["b"]; - //assert_eq!(b.1, SerdeObject::Str(From::from("2"))); - //assert_eq!(b.0.subpacks.len(), 1); - //assert_eq!(b.0.subpacks[0]["x"].1, SerdeObject::Bool(true)); - //assert_eq!(b.0.subpacks[0]["y"].1, SerdeObject::Bool(true)); + let (mut packs, obj) = result.unwrap(); + assert!(obj.is_none()); + assert_eq!(packs.len(), 1); + let pack = &packs[0]; + dbg!(pack); + assert_eq!(pack.subpacks.len(), 1); + let b = &pack.subpacks[0]["b"]; + assert_eq!(b.1, SerdeObject::Str(From::from("2"))); + assert_eq!(b.0.subpacks.len(), 1); + assert_eq!(b.0.subpacks[0]["x"].1, SerdeObject::Bool(true)); + assert_eq!(b.0.subpacks[0]["y"].1, SerdeObject::Bool(true)); } #[test] @@ -1439,12 +1448,12 @@ mod tests { let consts = crate::parser::parse::<&'static str, &'static str, ()>( " :map - ->['projects'?]:map - ->[commit:?str]:?map - ->[url:?str]:?map - ->[branch:?str]:?map - (->['active'?]active:?bool)? - (->['federate'?]federate:?bool)? + ->['projects'?]?:map + ->[commit:?str]?:?map + ->[url:?str]?:?map + ->[branch:?str]?:?map + (->['active'?]?active:?bool) + (->['federate'?]?federate:?bool)? ", None, None diff --git a/src/vm/mod.rs b/src/vm/mod.rs index 9f76ec5..81131c0 100644 --- a/src/vm/mod.rs +++ b/src/vm/mod.rs @@ -75,8 +75,7 @@ impl std::fmt::Debug for PatternConstants { } } -/// A pattern element. -// FIXME: docs +/// A datafu pattern element. #[derive(Copy, Clone, Debug)] pub(crate) enum PatternElement { /// A value is the core capturing element. @@ -87,9 +86,19 @@ pub(crate) enum PatternElement { value: Option, }, /// A tag is the core iterative element. It is always followed by a value. + /// + /// This one is empty. + EmptyTag, + /// A tag is the core iterative element. It is always followed by a value. Tag { /// The index of the (proto) key to match against. - key_subtree: Option, + key_subtree: usize, + /// Whether to allow this tree subtree to match nothing. + /// + /// By default, a datafu pattern only matches a tree if every branch of + /// the tree matches something. This enables opting out of that. + // TODO this isn't currently implemented. + optional: bool, }, /// Marks the end of pattern iteration and the start of subtrees (if any). SubtreeMarker, @@ -163,6 +172,7 @@ pub(crate) enum Value { } /// A pattern token. +// TODO docs #[derive(Copy, Clone, Debug)] pub(crate) enum PatternToken { /// Start of a tag. @@ -173,7 +183,7 @@ pub(crate) enum PatternToken { String(usize, bool), Regex(usize, bool), Parameter(usize, bool), - KeySubtree(usize), + KeySubtree(usize, bool), ValueSubtree(usize, bool), /// Represents a predicate which must be applied. @@ -409,6 +419,8 @@ pub(crate) struct Frame<'pat> { overstep: usize, /// Whether this frame matches the data so far. matches: bool, + /// Whether this frame must not be allowed to match in the key step. + poison: bool, } impl<'pat, 'state, O: Serialize> Interpreter<'pat, 'state, O> { @@ -416,7 +428,6 @@ impl<'pat, 'state, O: Serialize> Interpreter<'pat, 'state, O> { pat: &'pat PatternConstants, error: &'state mut Option, frames: &'state mut Vec>, - //output: &'state mut Pack<'pat, 'de>, ) -> Self { debug_assert!(frames.is_empty()); frames.push(Frame { @@ -424,13 +435,12 @@ impl<'pat, 'state, O: Serialize> Interpreter<'pat, 'state, O> { iar: None, overstep: 0, matches: true, - //path: Default::default(), + poison: false, }); Self { pat: pat, error: error, frames: frames, - //output: Cell::from_mut(output), } } } @@ -455,7 +465,9 @@ impl<'pat> Frame<'pat> { }, } }, - PatternElement::Tag { .. } => panic!("attempt to get type of tag"), + | PatternElement::EmptyTag + | PatternElement::Tag { .. } + => panic!("attempt to get type of tag"), _ => None, } } @@ -470,7 +482,9 @@ impl<'pat> Frame<'pat> { PatternElement::Value { name: Some(name), .. } => { Some(&*strings[name]) }, - PatternElement::Tag { .. } => panic!("attempt to get name of tag"), + | PatternElement::EmptyTag + | PatternElement::Tag { .. } + => panic!("attempt to get name of tag"), _ => None, } } @@ -522,19 +536,35 @@ impl<'pat> Frame<'pat> { }) } - /// Returns whether this key has a subtree. + /// Returns whether this key has a subtree, and if so, its index and + /// whether it is optional, as an `(index, optional)` pair. /// /// # Panics /// /// Panics if iteration hasn't begun, or this isn't a key. - fn key_subtree(&self) -> Option { - if let PatternElement::Tag { key_subtree } = self.op() { - key_subtree - } else { - unreachable!() + fn key_subtree(&self) -> Option<(usize, bool)> { + match self.op() { + PatternElement::Tag { key_subtree, optional } => { + Some((key_subtree, optional)) + }, + PatternElement::EmptyTag => None, + _ => unreachable!(), } } + /// Returns whether this frame is in a value operation. + /// + /// # Panics + /// + /// Panics if the frame isn't active or iteraction hasn't begun. + #[inline] + fn is_value(&self) -> bool { + self.active() && matches!( + self.raw_op(), + PatternElement::Value { .. }, + ) + } + /// Returns this value subtree, as an `(index, optional)` pair. /// /// # Panics @@ -561,6 +591,7 @@ impl<'pat> Frame<'pat> { } /// Returns whether this frame is active (not overstepped). + #[inline] fn active(&self) -> bool { self.overstep == 0 } -- cgit 1.4.1