From 32d313b0cb962b42340073c39cc60b703a6d546f Mon Sep 17 00:00:00 2001 From: SoniEx2 Date: Tue, 25 Oct 2022 18:15:37 -0300 Subject: Finish most of map logic --- src/errors.rs | 1 + src/vm/de.rs | 303 ++++++++++++++++++++++++++++++++++++++++------------------ src/vm/mod.rs | 92 +++++++++--------- 3 files changed, 258 insertions(+), 138 deletions(-) diff --git a/src/errors.rs b/src/errors.rs index 877e5f5..25b28ec 100644 --- a/src/errors.rs +++ b/src/errors.rs @@ -45,6 +45,7 @@ pub enum PatternError<'a> { // /// These are errors that may be returned by the matcher when matching a // /// pattern. // #[derive(Clone, Debug)] +#[derive(Debug)] #[non_exhaustive] pub enum MatchError { /// Returned if the pattern nests too deeply. diff --git a/src/vm/de.rs b/src/vm/de.rs index 9583962..c906226 100644 --- a/src/vm/de.rs +++ b/src/vm/de.rs @@ -95,7 +95,13 @@ impl<'pat, 'state, 'de, O: Serialize> Packer<'pat, 'state, O> { } /// Steps the VM into the next operation. - fn step_in(&mut self) { + fn step_in(&mut self) -> Result<(), E> { + if self.call_limit > 0 { + self.call_limit -= 1; + } else { + self.interp.error.insert(crate::errors::MatchError::StackOverflow); + return Err(todo!()); + } // iterate up to the *live* length (i.e. the loop is allowed to modify // the length). // NOTE: we need to use while-let so as to not borrow anything in an @@ -148,6 +154,7 @@ impl<'pat, 'state, 'de, O: Serialize> Packer<'pat, 'state, O> { } } } + Ok(()) } /// Steps the VM back into the previous operation. @@ -155,6 +162,7 @@ impl<'pat, 'state, 'de, O: Serialize> Packer<'pat, 'state, O> { &mut self, mut packs: Vec>, ) -> Vec> { + self.call_limit += 1; let mut index_iter = 0..; while let Some(index) = index_iter.next().filter(|&i| { i < self.interp.frames.len() @@ -212,7 +220,7 @@ where where D: serde::Deserializer<'de> { - self.step_in(); + if let Err(e) = self.step_in() { return Err(e); } let pat = self.interp.pat; let target_type = self.frames().iter_active().fold( Type::IgnoredAny, @@ -300,32 +308,48 @@ where } /// visit method generator for simple values (primitives). +/// +/// can generate whole function or just the glue. macro_rules! vs { - ($visit:ident $obj:ident $t:ty) => { - fn $visit(mut self, v: $t) -> Result + (fn $visit:ident $obj:ident ($data_type:pat) $rust_type:ty) => { + fn $visit(mut self, v: $rust_type) -> Result where E: serde::de::Error, { - // FIXME filtering/errors - let pat = self.interp.pat; + vs!(self (v) $obj ($data_type)) + } + }; + ($this:ident $v:tt $obj:ident ($data_type:pat)) => { + { + let pat = $this.interp.pat; let mut obj = None; - if self.collecting { - obj = Some(SerdeObject::$obj(v)); + if $this.collecting { + obj = Some(SerdeObject::$obj$v); } let mut packs = Vec::new(); - self.frames_mut().iter_active_mut().try_for_each(|frame| { + $this.frames_mut().iter_active_mut().try_for_each(|frame| { + let ty = frame.get_type(pat); + match ty { + | Some(($data_type, _)) + | Some((Type::Any, _)) + | Some((Type::IgnoredAny, _)) + => {}, + Some((_, false)) => todo!(), + Some((_, true)) => return Err(todo!()), + None => unreachable!(), + } let mut pack = Pack::default(); - let mut map = IndexMap::new(); if let Some(name) = frame.get_name(pat) { - map.insert(name, (Pack::default(), SerdeObject::$obj(v))); + let mut map = IndexMap::new(); + map.insert(name, (Pack::default(), SerdeObject::$obj$v)); + pack.subpacks.push(map); } - pack.subpacks.push(map); packs.push(pack); Ok(()) })?; Ok((packs, obj)) } - } + }; } impl<'pat, 'state, 'de, O> serde::de::Visitor<'de> @@ -338,119 +362,77 @@ where write!(f, "unsure") } - vs!(visit_bool Bool bool); - vs!(visit_i8 I8 i8); - vs!(visit_i16 I16 i16); - vs!(visit_i32 I32 i32); - vs!(visit_i64 I64 i64); - vs!(visit_i128 I128 i128); - vs!(visit_u8 U8 u8); - vs!(visit_u16 U16 u16); - vs!(visit_u32 U32 u32); - vs!(visit_u64 U64 u64); - vs!(visit_u128 U128 u128); - vs!(visit_f32 F32 f32); - vs!(visit_f64 F64 f64); - vs!(visit_char Char char); + vs!(fn visit_bool Bool (Type::Bool) bool); + vs!(fn visit_i8 I8 (Type::I8) i8); + vs!(fn visit_i16 I16 (Type::I16) i16); + vs!(fn visit_i32 I32 (Type::I32) i32); + vs!(fn visit_i64 I64 (Type::I64) i64); + vs!(fn visit_i128 I128 (Type::I128) i128); + vs!(fn visit_u8 U8 (Type::U8) u8); + vs!(fn visit_u16 U16 (Type::U16) u16); + vs!(fn visit_u32 U32 (Type::U32) u32); + vs!(fn visit_u64 U64 (Type::U64) u64); + vs!(fn visit_u128 U128 (Type::U128) u128); + vs!(fn visit_f32 F32 (Type::F32) f32); + vs!(fn visit_f64 F64 (Type::F64) f64); + vs!(fn visit_char Char (Type::Char) char); fn visit_str(self, v: &str) -> Result where E: serde::de::Error, { - let mut obj = None; - if self.collecting { - obj = Some(SerdeObject::Str(Cow::Owned(String::from(v)))); - } - todo!() + // no real option but to clone. + vs!(self (Cow::Owned(v.to_owned())) Str (Type::String | Type::Str)) } fn visit_borrowed_str(self, v: &'de str) -> Result where E: serde::de::Error, { - let mut obj = None; - if self.collecting { - obj = Some(SerdeObject::Str(Cow::Borrowed(v))); - } - todo!() + vs!(self (Cow::Borrowed(v)) Str (Type::String | Type::Str)) } fn visit_string(self, v: String) -> Result where E: serde::de::Error, { - let mut obj = None; - if self.collecting { - obj = Some(SerdeObject::Str(Cow::Owned(v))); - } - todo!() + // TODO try to avoid cloning + self.visit_str(&*v) } fn visit_bytes(self, v: &[u8]) -> Result where E: serde::de::Error, { - let mut obj = None; - if self.collecting { - obj = Some(SerdeObject::Bytes(Cow::Owned(Vec::from(v)))); - } - todo!() + vs!(self (Cow::Owned(v.to_owned())) Bytes (Type::Bytes | Type::ByteBuf)) } fn visit_borrowed_bytes(self, v: &'de [u8]) -> Result where E: serde::de::Error, { - let mut obj = None; - if self.collecting { - obj = Some(SerdeObject::Bytes(Cow::Borrowed(v))); - } - todo!() + vs!(self (Cow::Borrowed(v)) Bytes (Type::Bytes | Type::ByteBuf)) } fn visit_byte_buf(self, v: Vec) -> Result where E: serde::de::Error, { - let mut obj = None; - if self.collecting { - obj = Some(SerdeObject::Bytes(Cow::Owned(v))); - } - todo!() + // TODO try to avoid cloning + self.visit_byte_buf(&*v) } fn visit_none(self) -> Result where E: serde::de::Error, { - let mut obj = None; - if self.collecting { - obj = Some(SerdeObject::None); - } - todo!() + vs!(self {} None (Type::Option)) } fn visit_some(self, deserializer: D) -> Result where D: serde::de::Deserializer<'de>, { - let mut obj = None; - if self.collecting { - obj = Some(SerdeObject::Some(todo!())); - } todo!() } fn visit_unit(self) -> Result where E: serde::de::Error, { - // FIXME subtrees - let mut obj = None; - let mut packs = Vec::new(); - let mut pack = Pack::default(); - if self.collecting { - obj = Some(SerdeObject::Unit); - } - let mut map = IndexMap::new(); - //for name in self.get_name() { - // map.insert(name, (Default::default(), SerdeObject::Unit)); - //} - pack.subpacks.push(map); - packs.push(pack); - Ok((packs, obj)) + vs!(self {} Unit (Type::Unit)) } fn visit_newtype_struct( self, @@ -459,10 +441,6 @@ where where D: serde::de::Deserializer<'de>, { - let mut obj = None; - if self.collecting { - obj = Some(SerdeObject::NewtypeStruct(todo!())); - } todo!() } fn visit_seq(self, seq: A) -> Result @@ -475,15 +453,84 @@ where } todo!() } - fn visit_map(self, map: A) -> Result + fn visit_map(self, mut map: A) -> Result where A: serde::de::MapAccess<'de>, { - let mut obj = None; - if self.collecting { - obj = Some(SerdeObject::Map(Vec::new())); + let old_collecting = self.collecting; + let pat = self.interp.pat; + let mut collecting = old_collecting; + self.frames_mut().iter_active_mut().try_for_each(|frame| { + let ty = frame.get_type(pat); + match ty { + | Some((Type::Map, _)) + | Some((Type::Any, _)) + | Some((Type::IgnoredAny, _)) + => {}, + Some((_, false)) => { + frame.matches = false; + todo!() + }, + Some((_, true)) => return Err(todo!()), + None => unreachable!(), + } + if frame.get_name(pat).is_some() { + collecting = true; + } + Ok(()) + })?; + if let Err(e) = self.step_in() { return Err(e); } + self.collecting = collecting; + let mut subframes = Vec::new(); + self.frames().iter_active().for_each(|frame| { + if let PatternElement::Tag { key_subtree } = frame.op() { + if let Some(key_subtree) = key_subtree { + subframes.push(Frame { + ops: &pat.protos[key_subtree], + iar: None, + overstep: 0, + matches: true, + }); + } + } else { + unreachable!() + } + }); + let mut obj_inner = Vec::new(); + while let Some(packed_key) = { + let subinterp = Interpreter { + pat: pat, + frames: &mut subframes, + error: self.interp.error, + }; + let mut subpacker = Packer { + interp: subinterp, + collecting: self.collecting, + call_limit: self.call_limit, + }; + map.next_key_seed(&mut subpacker)? + } { + self.frames_mut().iter_active_mut().filter(|frame| { + if let PatternElement::Tag { key_subtree } = frame.op() { + key_subtree.is_some() + } else { + unreachable!() + } + }).zip(&mut subframes).for_each(|(frame, subframe)| { + frame.matches = subframe.matches; + // reset subframe for next iteration + subframe.matches = true; + subframe.iar = None; + }); + let packed_value = map.next_value_seed(&mut *self)?; + if self.collecting { + obj_inner.push( + (packed_key.1.unwrap(), packed_value.1.unwrap()), + ); + } + todo!("merge kv"); } - todo!() + todo!(); } fn visit_enum(self, data: A) -> Result where @@ -676,8 +723,8 @@ mod tests { name_and_value: These::Both(0, Value::Type { ty: Type::U64, skippable: false, - }) - } + }), + }, ]); let mut der = JsonDeserializer::from_str("3"); let mut err = Default::default(); @@ -688,5 +735,79 @@ mod tests { assert!(obj.is_none()); assert_eq!(packs[0].subpacks[0]["hello"].1, SerdeObject::U64(3)); } + + #[test] + fn test_simple_error() { + let mut consts = PatternConstants::<()>::default(); + consts.strings.push("hello".into()); + consts.protos.push(vec![ + PatternElement::Value { + name_and_value: These::Both(0, Value::Type { + ty: Type::U64, + skippable: false, + }), + }, + ]); + let mut der = JsonDeserializer::from_str("\"hello\""); + let mut err = Default::default(); + let mut frames = Default::default(); + let interp = Interpreter::new(&consts, &mut err, &mut frames); + let packed = Packer::new(interp, MAX_CALLS).deserialize(&mut der); + dbg!(&packed); + // error produced by serde_json + assert!(packed.is_err()); + } + + #[test] + fn test_map() { + let mut consts = PatternConstants::<()>::default(); + consts.strings.push("key".into()); + consts.strings.push("value".into()); + consts.protos.push(vec![ + PatternElement::Value { + name_and_value: These::This(0), + }, + ]); + consts.protos.push(vec![ + PatternElement::Value { + name_and_value: These::That(Value::Type { + ty: Type::Map, + skippable: false, + }), + }, + PatternElement::Tag { + key_subtree: Some(0), + }, + PatternElement::Value { + name_and_value: These::Both(1, Value::Type { + ty: Type::U64, + skippable: false, + }), + }, + ]); + let mut der = JsonDeserializer::from_str(r#"{"hello": 0, "world": 1}"#); + let mut err = Default::default(); + let mut frames = Default::default(); + let interp = Interpreter::new(&consts, &mut err, &mut frames); + let packed = Packer::new(interp, MAX_CALLS).deserialize(&mut der); + let (packs, obj) = packed.unwrap(); + assert!(obj.is_none()); + assert_eq!( + packs[0].subpacks[0]["key"].1, + SerdeObject::Str("hello".into()), + ); + assert_eq!( + packs[0].subpacks[0]["value"].1, + SerdeObject::U64(0), + ); + assert_eq!( + packs[0].subpacks[1]["key"].1, + SerdeObject::Str("world".into()), + ); + assert_eq!( + packs[0].subpacks[1]["value"].1, + SerdeObject::U64(1), + ); + } } diff --git a/src/vm/mod.rs b/src/vm/mod.rs index dca15a9..afb98cc 100644 --- a/src/vm/mod.rs +++ b/src/vm/mod.rs @@ -77,23 +77,19 @@ impl std::fmt::Debug for PatternConstants { } /// A pattern element. +// FIXME: docs #[derive(Copy, Clone, Debug)] pub(crate) enum PatternElement { - /// A value is the entry point to a pattern. If present, it's the first - /// element. + /// A value is the core capturing element. Value { /// The index of the (string) name to apply to this value and/or the /// expected value of this entry. name_and_value: These, }, - /// A tag is the core iterative element, and is repeated up to the desired - /// depth of iteration. + /// A tag is the core iterative element. It is always followed by a value. Tag { /// The index of the (proto) key to match against. key_subtree: Option, - /// The index of the (string) name to apply to this value and/or the - /// expected value of this entry. - name_and_value: These, }, /// Marks the end of pattern iteration and the start of subtrees (if any). SubtreeMarker, @@ -294,43 +290,43 @@ pub(crate) enum SerdeObject<'de> { } impl<'de> SerdeObject<'de> { - /// Checks the type of this object. - fn check(&self, ty: Option) -> Result<(), E> { - let ty = match ty { - None => return Ok(()), - Some(ty) => ty, - }; - match (ty, self) { - | (Type::Any, v) - | (Type::IgnoredAny, v) - => Ok(()), - | (Type::Bool, v @ SerdeObject::Bool(_)) - | (Type::I8, v @ SerdeObject::I8(_)) - | (Type::I16, v @ SerdeObject::I16(_)) - | (Type::I32, v @ SerdeObject::I32(_)) - | (Type::I64, v @ SerdeObject::I64(_)) - | (Type::I128, v @ SerdeObject::I128(_)) - | (Type::U8, v @ SerdeObject::U8(_)) - | (Type::U16, v @ SerdeObject::U16(_)) - | (Type::U32, v @ SerdeObject::U32(_)) - | (Type::U64, v @ SerdeObject::U64(_)) - | (Type::U128, v @ SerdeObject::U128(_)) - | (Type::F32, v @ SerdeObject::F32(_)) - | (Type::F64, v @ SerdeObject::F64(_)) - | (Type::Char, v @ SerdeObject::Char(_)) - | (Type::Str, v @ SerdeObject::Str(_)) - | (Type::String, v @ SerdeObject::Str(_)) - | (Type::Bytes, v @ SerdeObject::Bytes(_)) - | (Type::ByteBuf, v @ SerdeObject::Bytes(_)) - | (Type::Option, v @ SerdeObject::None) - | (Type::Option, v @ SerdeObject::Some(_)) - | (Type::Unit, v @ SerdeObject::Unit) - | (Type::Seq, v @ SerdeObject::Seq(_)) - | (Type::Map, v @ SerdeObject::Map(_)) - => Ok(()), - _ => todo!(), - } - } + ///// Checks the type of this object. + //fn check(&self, ty: Option) -> Result<(), E> { + // let ty = match ty { + // None => return Ok(()), + // Some(ty) => ty, + // }; + // match (ty, self) { + // | (Type::Any, v) + // | (Type::IgnoredAny, v) + // => Ok(()), + // | (Type::Bool, v @ SerdeObject::Bool(_)) + // | (Type::I8, v @ SerdeObject::I8(_)) + // | (Type::I16, v @ SerdeObject::I16(_)) + // | (Type::I32, v @ SerdeObject::I32(_)) + // | (Type::I64, v @ SerdeObject::I64(_)) + // | (Type::I128, v @ SerdeObject::I128(_)) + // | (Type::U8, v @ SerdeObject::U8(_)) + // | (Type::U16, v @ SerdeObject::U16(_)) + // | (Type::U32, v @ SerdeObject::U32(_)) + // | (Type::U64, v @ SerdeObject::U64(_)) + // | (Type::U128, v @ SerdeObject::U128(_)) + // | (Type::F32, v @ SerdeObject::F32(_)) + // | (Type::F64, v @ SerdeObject::F64(_)) + // | (Type::Char, v @ SerdeObject::Char(_)) + // | (Type::Str, v @ SerdeObject::Str(_)) + // | (Type::String, v @ SerdeObject::Str(_)) + // | (Type::Bytes, v @ SerdeObject::Bytes(_)) + // | (Type::ByteBuf, v @ SerdeObject::Bytes(_)) + // | (Type::Option, v @ SerdeObject::None) + // | (Type::Option, v @ SerdeObject::Some(_)) + // | (Type::Unit, v @ SerdeObject::Unit) + // | (Type::Seq, v @ SerdeObject::Seq(_)) + // | (Type::Map, v @ SerdeObject::Map(_)) + // => Ok(()), + // _ => todo!(), + // } + //} } impl<'de, E> serde::de::IntoDeserializer<'de, E> for SerdeObject<'de> @@ -354,6 +350,7 @@ pub struct Pack<'pat, 'de> { } /// The Datafu interpreter, sorta. +#[derive(Debug)] pub(crate) struct Interpreter<'pat, 'state, O: Serialize> { /// The pattern currently being processed. pat: &'pat PatternConstants, @@ -363,6 +360,7 @@ pub(crate) struct Interpreter<'pat, 'state, O: Serialize> { frames: &'state mut Vec>, } +#[derive(Debug)] pub(crate) struct Frame<'pat> { /// The instructions/function currently being processed. ops: &'pat [PatternElement], @@ -385,7 +383,7 @@ impl<'pat, 'state, O: Serialize> Interpreter<'pat, 'state, O> { ) -> Self { debug_assert!(frames.is_empty()); frames.push(Frame { - ops: &pat.protos[0], + ops: &pat.protos.last().unwrap(), iar: None, overstep: 0, matches: true, @@ -410,7 +408,6 @@ impl<'pat> Frame<'pat> { ) -> Option<(Type, bool)> { match self.op() { | PatternElement::Value { name_and_value, .. } - | PatternElement::Tag { name_and_value, .. } if name_and_value.is_there() => { match name_and_value.there() { @@ -425,6 +422,7 @@ impl<'pat> Frame<'pat> { None => todo!(), } }, + PatternElement::Tag { .. } => panic!("attempt to get type of tag"), _ => None, } } @@ -437,11 +435,11 @@ impl<'pat> Frame<'pat> { let strings = &pat.strings; match self.op() { | PatternElement::Value { name_and_value, .. } - | PatternElement::Tag { name_and_value, .. } if name_and_value.is_here() => { Some(&*strings[name_and_value.here().unwrap()]) }, + PatternElement::Tag { .. } => panic!("attempt to get name of tag"), _ => None, } } -- cgit 1.4.1