From 477a1fd5ea7603dd261bcfef9cd443f98310e5db Mon Sep 17 00:00:00 2001 From: SoniEx2 Date: Sun, 30 Oct 2022 20:28:37 -0300 Subject: Finish most of Packer and Parser This is still broken even tho it passes the tests --- src/vm/de.rs | 385 ++++++++++++++++++++++++++++++++++++++++++++++++++++------ src/vm/mod.rs | 17 ++- 2 files changed, 361 insertions(+), 41 deletions(-) (limited to 'src/vm') diff --git a/src/vm/de.rs b/src/vm/de.rs index e26ec5e..dc3e9b3 100644 --- a/src/vm/de.rs +++ b/src/vm/de.rs @@ -134,6 +134,8 @@ impl<'pat, 'state, 'de, O: Serialize> Packer<'pat, 'state, O> { if frame.overstep > 0 || !frame.matches { // overstepped and non-matching frames frame.overstep += 1; + // FIXME check if this is correct (it probably isn't) + frame.matches = false; } else { if !frame.next() { // empty/end-of frames @@ -153,7 +155,7 @@ impl<'pat, 'state, 'de, O: Serialize> Packer<'pat, 'state, O> { frame.overstep = 1; let mut at = index + 1; while self.interp.frames[index].next() { - let op = self.interp.frames[index].op(); + let op = self.interp.frames[index].raw_op(); if let PatternElement::ValueSubtree { index: subtree, .. } = op { @@ -187,12 +189,14 @@ impl<'pat, 'state, 'de, O: Serialize> Packer<'pat, 'state, O> { self.call_limit += 1; let mut index_iter = 0..; let mut pack_index = packs.len(); + let orig_len = self.interp.frames.len(); while let Some(index) = index_iter.next().filter(|&i| { - i < self.interp.frames.len() + i < orig_len }) { // iterate backwards - let index = self.interp.frames.len() - index - 1; + let index = orig_len - index - 1; let frame = &mut self.interp.frames[index]; + dbg!(index); let has_pack = frame.matches; if frame.overstep > 0 { // handle overstep @@ -212,44 +216,44 @@ impl<'pat, 'state, 'de, O: Serialize> Packer<'pat, 'state, O> { let mut target_unwound = false; while count > 0 && target > 0 { target -= 1; + if self.interp.frames[target].matches { + debug_assert!(target_pack > 0); + target_pack -= 1; + } match self.interp.frames[target].num_subtrees() { - Some((num, _)) if num < count => { - if has_pack { - debug_assert!(target_pack > 0); - target_pack -= 1; - } + Some((num, unwound)) if num < count => { count -= num; }, Some((num, unwound)) => { - if has_pack { - debug_assert!(target_pack > 0); - target_pack -= 1; - } target_unwound = unwound; count = 0; }, None => { - if has_pack { - if self.interp.frames[target].matches { - debug_assert!(target_pack > 0); - target_pack -= 1; - } - } count += 1; }, } } if count == 0 { - let frame = self.interp.frames.remove(target); - // TODO what to do with `frame`? + let frame = self.interp.frames.remove(index); + let target_frame = &mut self.interp.frames[target]; + // FIXME check frame.matches vs frames[target].op() + // FIXME actually test that this is correct + let op = target_frame.raw_op(); + target_frame.prev().then(|| ()).unwrap(); + if !target_unwound { + packs.insert(target_pack, Default::default()); + pack_index += 1; + // FIXME this is VERY wrong + target_frame.matches = true; + } if has_pack { // has parent frame let pack = packs.remove(pack_index); - if !target_unwound { - packs.insert(target_pack, pack); - } else { - packs[target_pack].merge_from(pack); - } + packs[target_pack].merge_from(pack); + } + if let Some((0, _)) = target_frame.num_subtrees() { + //target_frame.prev().then(|| ()).unwrap(); + target_frame.overstep = 0; } } } @@ -278,11 +282,13 @@ where Type::IgnoredAny, |target_type, frame| { Ok(match (target_type, frame.get_type()) { + // FIXME handle None correctly // required type binds stronger than any/ignored_any (Type::IgnoredAny, Some((ty, true))) => ty, (Type::Any, Some((ty, true))) => ty, - (ty, Some((Type::IgnoredAny, true))) => ty, - (ty, Some((Type::Any, true))) => ty, + // and also stronger than optional any/ignored_any + (ty, Some((Type::IgnoredAny, _))) => ty, + (ty, Some((Type::Any, _))) => ty, // prefer owned if any branch prefers owned (Type::String, Some((Type::Str, true))) => { Type::String @@ -306,7 +312,7 @@ where // types which are not the same are an error because we // only request a specific type if it's actually required (left, Some((right, _))) => { - return Err(todo!()); + return Err(MatchError::Unsatisfiable); }, _ => Type::Any, }) @@ -467,14 +473,167 @@ where where E: serde::de::Error, { - // no real option but to clone. - vs!(self (Cow::Owned(v.to_owned())) Str (Type::String | Type::Str)) + let pat = self.interp.pat; + let mut obj = None; + if self.collecting { + obj = Some(SerdeObject::Str(Cow::Owned(v.into()))); + } + let mut packs = Vec::new(); + let result = { + self.frames_mut().iter_active_mut().try_for_each(|frame| { + let ty = frame.get_type(); + match ty { + | Some((Type::String, _)) + | Some((Type::Str, _)) + | Some((Type::Any, _)) + | Some((Type::IgnoredAny, _)) + | None + => {}, + Some((_, false)) => { + frame.matches = false; + return Ok(()); + }, + Some((_, true)) => { + return Err(MatchError::ValidationError) + }, + } + match frame.op() { + PatternElement::Value { value: Some(value), .. } => { + match value { + | Value::String { index, skippable } + if pat.strings[index] != v => { + if skippable { + frame.matches = false; + return Ok(()); + } else { + return Err(MatchError::ValidationError); + } + }, + | Value::Regex { index, skippable } + if !pat.regices[index].is_match(v) => { + if skippable { + frame.matches = false; + return Ok(()); + } else { + return Err(MatchError::ValidationError); + } + }, + | Value::Type { .. } + | Value::Regex { .. } + | Value::String { .. } + => {}, // ok + } + }, + PatternElement::Value { value: None, .. } => {}, + _ => unreachable!(), + } + let mut pack = Pack::default(); + if let Some(name) = frame.get_name(pat) { + let mut map = IndexMap::new(); + map.insert( + name, + ( + Pack::default(), + SerdeObject::Str(Cow::Owned(v.into())), + ), + ); + pack.subpacks.push(map); + } + packs.push(pack); + Ok(()) + }) + }; + match result { + Err(e) => { + self.interp.error.insert(e); + return Err(todo!()); + }, + _ => (), + } + Ok((packs, obj)) } fn visit_borrowed_str(self, v: &'de str) -> Result where E: serde::de::Error, { - vs!(self (Cow::Borrowed(v)) Str (Type::String | Type::Str)) + let pat = self.interp.pat; + let mut obj = None; + if self.collecting { + obj = Some(SerdeObject::Str(Cow::Borrowed(v))); + } + let mut packs = Vec::new(); + let result = { + self.frames_mut().iter_active_mut().try_for_each(|frame| { + let ty = frame.get_type(); + match ty { + | Some((Type::String, _)) + | Some((Type::Str, _)) + | Some((Type::Any, _)) + | Some((Type::IgnoredAny, _)) + | None + => {}, + Some((_, false)) => { + frame.matches = false; + return Ok(()); + }, + Some((_, true)) => { + return Err(MatchError::ValidationError) + }, + } + match frame.op() { + PatternElement::Value { value: Some(value), .. } => { + match value { + | Value::String { index, skippable } + if pat.strings[index] != v => { + if skippable { + frame.matches = false; + return Ok(()); + } else { + return Err(MatchError::ValidationError); + } + }, + | Value::Regex { index, skippable } + if !pat.regices[index].is_match(v) => { + if skippable { + frame.matches = false; + return Ok(()); + } else { + return Err(MatchError::ValidationError); + } + }, + | Value::Type { .. } + | Value::Regex { .. } + | Value::String { .. } + => {}, // ok + } + }, + PatternElement::Value { value: None, .. } => {}, + _ => unreachable!(), + } + let mut pack = Pack::default(); + if let Some(name) = frame.get_name(pat) { + let mut map = IndexMap::new(); + map.insert( + name, + ( + Pack::default(), + SerdeObject::Str(Cow::Borrowed(v)), + ), + ); + pack.subpacks.push(map); + } + packs.push(pack); + Ok(()) + }) + }; + match result { + Err(e) => { + self.interp.error.insert(e); + return Err(todo!()); + }, + _ => (), + } + Ok((packs, obj)) } fn visit_string(self, v: String) -> Result where @@ -684,11 +843,11 @@ where } } let obj = SerdeObject::Map(obj_inner); + dbg!(&self.interp.frames); let mut final_packs = self.step_out(output_packs); - let mut iter_final_packs = final_packs.iter_mut(); - self.frames_mut().iter_active_mut().zip({ - final_packs.iter_mut() - }).for_each(|(frame, pack)| { + let mut iter_final_packs = 0..; + dbg!(&self.interp.frames); + self.frames_mut().iter_active_mut().for_each(|frame| { let ty = frame.get_type(); match ty { | Some((Type::Map, _)) @@ -696,10 +855,17 @@ where | Some((Type::IgnoredAny, _)) | None => { - frame.matches = true; + let matched = std::mem::replace(&mut frame.matches, true); + if !matched { + final_packs.insert( + iter_final_packs.start, + Pack::default(), + ); + } }, _ => return, } + let pack = &mut final_packs[iter_final_packs.next().unwrap()]; if let Some(name) = frame.get_name(pat) { // we can assume collecting == true let old_pack = std::mem::take(pack); @@ -860,6 +1026,7 @@ mod tests { use crate::vm::SerdeObject; use crate::vm::Frame; use serde_json::Deserializer as JsonDeserializer; + use postcard::Deserializer as PostcardDeserializer; use serde::de::DeserializeSeed as _; #[test] @@ -940,7 +1107,6 @@ mod tests { let mut frames = Default::default(); let interp = Interpreter::new(&consts, &mut err, &mut frames); let packed = Packer::new(interp, MAX_CALLS).deserialize(&mut der); - dbg!(&packed); // error produced by serde_json assert!(packed.is_err()); } @@ -1003,6 +1169,7 @@ mod tests { #[test] fn test_map() { + // test visit_map let mut consts = PatternConstants::<()>::default(); consts.strings.push("key".into()); consts.strings.push("value".into()); @@ -1055,5 +1222,149 @@ mod tests { SerdeObject::U64(1), ); } + + #[test] + fn test_parser_empty() { + // use a parsed empty pattern to test Packer + let consts = crate::parser::parse::<&'static str, &'static str, ()>( + "", + None, + None, + ).unwrap(); + let mut der = JsonDeserializer::from_str(r#"{"hello": 0, "world": 1}"#); + let mut err = Default::default(); + let mut frames = Default::default(); + let interp = Interpreter::new( + &consts, + &mut err, + &mut frames, + //&mut output, + ); + let (mut packs, obj) = Packer::new( + interp, + MAX_CALLS, + ).deserialize(&mut der).unwrap(); + assert!(obj.is_none()); + assert!(packs.len() == 1); + let pack = packs.pop().unwrap(); + assert!(pack.subpacks.is_empty()); + } + + #[test] + fn test_parser_basic() { + // use a basic parsed pattern to test Packer + let consts = crate::parser::parse::<&'static str, &'static str, ()>( + ":map->[name:str]value:str", + None, + None, + ).unwrap(); + let data = &[ + 0x02, // map length (2) + 0x04, // string length (4) + 0x6E, 0x61, 0x6D, 0x65, // b'name' + 0x01, // string length (1) + 0x61, // b'a' + 0x05, // string length (5) + 0x76, 0x61, 0x6C, 0x75, 0x65, // b'value' + 0x01, // string length (1) + 0x62, // b'b' + ]; + let mut der = PostcardDeserializer::from_bytes(data); + let mut err = Default::default(); + let mut frames = Default::default(); + let interp = Interpreter::new( + &consts, + &mut err, + &mut frames, + //&mut output, + ); + let result = Packer::new( + interp, + MAX_CALLS, + ).deserialize(&mut der); + let (mut packs, obj) = result.unwrap(); + assert!(obj.is_none()); + assert!(packs.len() == 1); + let pack = packs.pop().unwrap(); + assert!(pack.subpacks.len() == 2); + } + + #[test] + fn test_parser_basic_subtree() { + // use a basic parsed pattern with a subtree to test Packer + let consts = crate::parser::parse::<&'static str, &'static str, ()>( + ":map(->[name:str]value:str)", + None, + None, + ).unwrap(); + let data = &[ + 0x02, // map length (2) + 0x04, // string length (4) + 0x6E, 0x61, 0x6D, 0x65, // b'name' + 0x01, // string length (1) + 0x61, // b'a' + 0x05, // string length (5) + 0x76, 0x61, 0x6C, 0x75, 0x65, // b'value' + 0x01, // string length (1) + 0x62, // b'b' + ]; + let mut der = PostcardDeserializer::from_bytes(data); + let mut err = Default::default(); + let mut frames = Default::default(); + let interp = Interpreter::new( + &consts, + &mut err, + &mut frames, + //&mut output, + ); + let result = Packer::new( + interp, + MAX_CALLS, + ).deserialize(&mut der); + let (mut packs, obj) = result.unwrap(); + assert!(obj.is_none()); + assert!(packs.len() == 1); + let pack = packs.pop().unwrap(); + assert!(pack.subpacks.len() == 2); + } + + #[test] + fn test_parser_subtrees() { + // use a parsed pattern with subtrees to test Packer + // also test a non-self-describing format (postcard) + let consts = crate::parser::parse::<&'static str, &'static str, ()>( + ":map(->['name'?]name:str)(->['value'?]value:u32)(->[:str]:?ignored_any)", + None, + None, + ).unwrap(); + let data = &[ + 0x02, // map length (2) + 0x04, // string length (4) + 0x6E, 0x61, 0x6D, 0x65, // b'name' + 0x01, // string length (1) + 0x61, // b'a' + 0x05, // string length (5) + 0x76, 0x61, 0x6C, 0x75, 0x65, // b'value' + 0x01, // 1 + ]; + let mut der = PostcardDeserializer::from_bytes(data); + let mut err = Default::default(); + let mut frames = Default::default(); + let interp = Interpreter::new( + &consts, + &mut err, + &mut frames, + //&mut output, + ); + let result = Packer::new( + interp, + MAX_CALLS, + ).deserialize(&mut der); + let (mut packs, obj) = result.unwrap(); + assert!(obj.is_none()); + assert!(packs.len() == 1); + let pack = packs.pop().unwrap(); + assert!(pack.subpacks.len() == 2); + } } diff --git a/src/vm/mod.rs b/src/vm/mod.rs index 06f12e5..131e48a 100644 --- a/src/vm/mod.rs +++ b/src/vm/mod.rs @@ -355,8 +355,17 @@ pub struct Pack<'pat, 'de> { impl<'pat, 'de> Pack<'pat, 'de> { /// Merges two packs, with elements from `other` coming after `self`. fn merge_from(&mut self, mut other: Self) { - for (left, right) in self.subpacks.iter_mut().zip(other.subpacks) { - left.extend(right) + match (self.subpacks.len(), other.subpacks.len()) { + (0, _) => { + *self = other; + }, + (_, 0) => {}, + (a, b) if a == b => { + for (l, r) in self.subpacks.iter_mut().zip(other.subpacks) { + l.extend(r) + } + }, + _ => unreachable!("merge_from unbalanced iterations"), } } @@ -476,7 +485,7 @@ impl<'pat> Frame<'pat> { /// Panics if called on a non-matching frame or if iteration hasn't begun. fn op(&self) -> PatternElement { assert!(self.active(), "op() called on inactive frame"); - self.ops[self.iar.expect("ops[iar]")] + self.raw_op() } /// Counts the number of *active* subtrees, if any, and whether any @@ -486,7 +495,7 @@ impl<'pat> Frame<'pat> { /// /// Panics if iteration hasn't begun. fn num_subtrees(&self) -> Option<(usize, bool)> { - let iar = self.iar?; + let iar = self.iar.unwrap(); // check if there are any subtrees matches!( self.ops[iar], -- cgit 1.4.1