From 44a6ee680308a3d756ef7c17db2f64518bb2b493 Mon Sep 17 00:00:00 2001 From: SoniEx2 Date: Sat, 12 Nov 2022 20:28:57 -0300 Subject: Make Packer::visit_map mostly usable --- src/vm/de.rs | 163 +++++++++++++++++++++++++++++++++++++++++++--------------- src/vm/mod.rs | 24 ++++++--- 2 files changed, 138 insertions(+), 49 deletions(-) (limited to 'src') diff --git a/src/vm/de.rs b/src/vm/de.rs index 734101d..6cb8e94 100644 --- a/src/vm/de.rs +++ b/src/vm/de.rs @@ -246,10 +246,13 @@ impl<'pat, 'state, 'de, O: Serialize> Packer<'pat, 'state, O> { target_frame.matches = true; pack_index += 1; } else { - packs[target_pack].merge_from(pack); + packs[target_pack].merge_breadth(pack); } } else { if !optional { + // FIXME we actually want to skip it entirely + // but that currently causes wrong results + // so instead we just error... self.interp.error.insert({ MatchError::ValidationError }); @@ -741,6 +744,7 @@ where if let Err(e) = self.step_in() { return Err(e); } self.collecting = collecting; let mut subframes = Vec::new(); + let mut output_matches = Vec::new(); self.frames().iter_active().for_each(|frame| { if let Some(key_subtree) = frame.key_subtree() { subframes.push(Frame { @@ -750,6 +754,7 @@ where matches: true, }); } + output_matches.push(false); }); let mut obj_inner = Vec::new(); let mut output_packs = Vec::new(); @@ -787,7 +792,7 @@ where ); } let mut key_packs_per_frame = packed_key.0.into_iter(); - let mut value_packs_per_frame = packed_value.0; + let mut value_packs_per_frame = packed_value.0.into_iter(); // whatever is active in self.frames(), if matches, has a pack // whatever is in subframes, if matches, has a pack // count(active self.frames() with subtree which match) is always @@ -802,9 +807,11 @@ where // value_packs_per_frame ~ self // keys come first tho (key.merge_from(value)) let mut iter_subframes = subframes.iter_mut(); - // related to value_packs_per_frame + // related to output_packs let mut pack_index = 0; - for frame in self.frames().iter_active() { + for (frame, out_matches) in self.frames().iter_active().zip({ + &mut output_matches + }) { // check if this frame has an associated subframe let subframe = if frame.key_subtree().is_some() { // if there are more frames with associated subframes @@ -813,15 +820,17 @@ where } else { None }; + let mut new_pack = None; if frame.matches && subframe.is_some() { // this already implies subframe.matches - let key_pack = key_packs_per_frame.next().unwrap(); - let value_pack = &mut value_packs_per_frame[pack_index]; - key_pack.merge_into(value_pack); - pack_index += 1; + let mut key_pack = key_packs_per_frame.next().unwrap(); + let value_pack = value_packs_per_frame.next().unwrap(); + key_pack.merge_depth(value_pack); + new_pack = Some(key_pack); } else if frame.matches { // value matches but there's no subframe, carry on - pack_index += 1; + let value_pack = value_packs_per_frame.next().unwrap(); + new_pack = Some(value_pack); } else if !frame.matches && subframe.is_some() { // frame didn't match but there was a subframe let subframe = subframe.unwrap(); @@ -836,17 +845,22 @@ where } else { // no relevant packs } - } - if output_packs.is_empty() { - output_packs = value_packs_per_frame; - } else { - for (left, right) in output_packs.iter_mut().zip( - value_packs_per_frame, - ) { - left.subpacks.extend(right.subpacks) + if let Some(new_pack) = new_pack { + if !*out_matches { + *out_matches = true; + output_packs.insert(pack_index, Pack::default()); + } + let output_pack = &mut output_packs[pack_index]; + output_pack.subpacks.extend(new_pack.subpacks); + } + if *out_matches { + pack_index += 1; } } } + for (f, m) in self.frames_mut().iter_active_mut().zip(output_matches) { + f.matches = m; + } let obj = SerdeObject::Map(obj_inner); let mut final_packs = self.step_out(output_packs)?; let mut iter_final_packs = 0..; @@ -1217,7 +1231,7 @@ mod tests { SerdeObject::Str("hello".into()), ); assert_eq!( - packs[0].subpacks[0]["value"].1, + packs[0].subpacks[0]["key"].0.subpacks[0]["value"].1, SerdeObject::U64(0), ); assert_eq!( @@ -1225,7 +1239,7 @@ mod tests { SerdeObject::Str("world".into()), ); assert_eq!( - packs[0].subpacks[1]["value"].1, + packs[0].subpacks[1]["key"].0.subpacks[0]["value"].1, SerdeObject::U64(1), ); } @@ -1371,35 +1385,28 @@ mod tests { assert!(obj.is_none()); assert_eq!(packs.len(), 1); let pack = packs.pop().unwrap(); - assert_eq!(pack.subpacks.len(), 2); + assert_eq!(pack.subpacks.len(), 1); assert_eq!(pack.subpacks[0]["name"].1, SerdeObject::Str(From::from("a"))); - assert_eq!(pack.subpacks[1]["value"].1, SerdeObject::U32(1)); + assert_eq!(pack.subpacks[0]["value"].1, SerdeObject::U32(1)); } #[test] - fn test_parser_subtrees_strict() { - // use a parsed pattern with subtrees to test Packer - // also test a non-self-describing format (postcard) - // also require at least one subtree to match on every iteration. - // also this checks for validation error + fn test_subtree_missing() { + // use a parsed pattern that might actually be used in the real world. let consts = crate::parser::parse::<&'static str, &'static str, ()>( - ":map((->['name'?]name)?(->['value'?]value)?)(->[:str]:u32)", - None, + " + :map + ->['a'?]:map + ->[b:?str]:?map + (->['x'?]x:?bool) + (->['y'?]y:?bool)? + ", None, + None ).unwrap(); - let data = &[ - 0x03, // map length (3) - 0x04, // string length (4) - 0x6E, 0x61, 0x6D, 0x65, // b'name' - 0x01, // 1 - 0x05, // string length (5) - 0x76, 0x61, 0x6C, 0x75, 0x65, // b'value' - 0x01, // 1 - 0x05, // string length (5) - 0x76, 0x65, 0x6C, 0x75, 0x65, // b'velue' - 0x01, // 1 - ]; - let mut der = PostcardDeserializer::from_bytes(data); + let data = r#"{"a": {"1": {"y": true}, "2": {"x": true, "y": true}}}"#; + //let data = r#"{"a": {"2": {"x": true, "y": true}}}"#; + let mut der = JsonDeserializer::from_str(data); let mut err = Default::default(); let mut frames = Default::default(); let interp = Interpreter::new( @@ -1412,8 +1419,80 @@ mod tests { interp, MAX_CALLS, ).deserialize(&mut der); - assert!(matches!(err, Some(MatchError::ValidationError))); + // FIXME it's supposed to skip "1" altogether but it currently errors. assert!(result.is_err()); + //let (mut packs, obj) = result.unwrap(); + //assert!(obj.is_none()); + //assert_eq!(packs.len(), 1); + //let pack = &packs[0]; + //assert_eq!(pack.subpacks.len(), 1); + //let b = &pack.subpacks[0]["b"]; + //assert_eq!(b.1, SerdeObject::Str(From::from("2"))); + //assert_eq!(b.0.subpacks.len(), 1); + //assert_eq!(b.0.subpacks[0]["x"].1, SerdeObject::Bool(true)); + //assert_eq!(b.0.subpacks[0]["y"].1, SerdeObject::Bool(true)); + } + + #[test] + fn test_realish_use_case() { + // use a parsed pattern that might actually be used in the real world. + let consts = crate::parser::parse::<&'static str, &'static str, ()>( + " + :map + ->['projects'?]:map + ->[commit:?str]:?map + ->[url:?str]:?map + ->[branch:?str]:?map + (->['active'?]active:?bool)? + (->['federate'?]federate:?bool)? + ", + None, + None + ).unwrap(); + let data = r#" + {"base_url": "https://ganarchy.autistic.space", "repo_list_srcs": {"https://ganarchy.autistic.space/index.toml": {"active": false}}, "projects": {"a8fb5087f79eafe312db270082c052c427b208c2": {"https://soniex2.autistic.space/git-repos/mmorfc.git": {"HEAD": {"active": true, "pinned": true}}}, "2d0b363fe3179087de59d9ef4a2d14af21d89071": {"https://soniex2.autistic.space/git-repos/chewstuff.git": {"HEAD": {"active": true, "pinned": true}}}}} + "#; + let mut der = JsonDeserializer::from_str(data); + let mut err = Default::default(); + let mut frames = Default::default(); + let interp = Interpreter::new( + &consts, + &mut err, + &mut frames, + //&mut output, + ); + let result = Packer::new( + interp, + MAX_CALLS, + ).deserialize(&mut der); + let (mut packs, obj) = result.unwrap(); + assert!(obj.is_none()); + assert_eq!(packs.len(), 1); + let pack = &packs[0]; + assert_eq!(pack.subpacks.len(), 2); + + let commit = &pack.subpacks[0]["commit"]; + assert_eq!(commit.1, SerdeObject::Str(From::from("a8fb5087f79eafe312db270082c052c427b208c2"))); + assert_eq!(commit.0.subpacks.len(), 1); + let url = &commit.0.subpacks[0]["url"]; + assert_eq!(url.1, SerdeObject::Str(From::from("https://soniex2.autistic.space/git-repos/mmorfc.git"))); + assert_eq!(url.0.subpacks.len(), 1); + let branch = &url.0.subpacks[0]["branch"]; + assert_eq!(branch.1, SerdeObject::Str(From::from("HEAD"))); + assert_eq!(branch.0.subpacks.len(), 1); + let active = &branch.0.subpacks[0]["active"]; + assert_eq!(active.1, SerdeObject::Bool(true)); + + let commit = &pack.subpacks[1]["commit"]; + assert_eq!(commit.1, SerdeObject::Str(From::from("2d0b363fe3179087de59d9ef4a2d14af21d89071"))); + assert_eq!(commit.0.subpacks.len(), 1); + let url = &commit.0.subpacks[0]["url"]; + assert_eq!(url.1, SerdeObject::Str(From::from("https://soniex2.autistic.space/git-repos/chewstuff.git"))); + assert_eq!(url.0.subpacks.len(), 1); + let branch = &url.0.subpacks[0]["branch"]; + assert_eq!(branch.1, SerdeObject::Str(From::from("HEAD"))); + assert_eq!(branch.0.subpacks.len(), 1); + assert_eq!(active.1, SerdeObject::Bool(true)); } } diff --git a/src/vm/mod.rs b/src/vm/mod.rs index 2e9d796..9f76ec5 100644 --- a/src/vm/mod.rs +++ b/src/vm/mod.rs @@ -353,8 +353,9 @@ pub struct Pack<'pat, 'de> { } impl<'pat, 'de> Pack<'pat, 'de> { - /// Merges two packs, with elements from `other` coming after `self`. - fn merge_from(&mut self, mut other: Self) { + /// Merges two packs, with elements from `other` coming after `self`, as if + /// parts of the same iteration. + fn merge_breadth(&mut self, mut other: Self) { match (self.subpacks.len(), other.subpacks.len()) { (0, _) => { *self = other; @@ -365,14 +366,23 @@ impl<'pat, 'de> Pack<'pat, 'de> { l.extend(r) } }, - _ => unreachable!("merge_from unbalanced iterations"), + _ => unreachable!("merge_breadth unbalanced iterations"), } } - /// Same as `merge_from` but borrows `other` instead of `self`. - fn merge_into(mut self, other: &mut Self) { - std::mem::swap(&mut self, other); - other.merge_from(self); + /// Merges two packs, with elements from `other` coming inside the last + /// element of `self` recursively, if any. + fn merge_depth(&mut self, mut other: Self) { + // note that we can't actually recurse deeper than the VM + // actually does itself, so we don't need to worry about + // blowing the stack. + if let Some(into) = self.subpacks.iter_mut().rev().filter(|map| { + !map.is_empty() + }).next() { + into.last_mut().unwrap().1.0.merge_depth(other); + } else { + *self = other; + } } } -- cgit 1.4.1