From d849f5e301fa47cfd87df1e7f1ad0346ddf387f1 Mon Sep 17 00:00:00 2001 From: SoniEx2 Date: Sat, 8 Apr 2023 18:52:00 -0300 Subject: Initial success --- ROADMAP.md | 10 + src/errors.rs | 34 + src/graph.rs | 30 + src/lib.rs | 1 + src/parser.rs | 4 +- src/pattern.rs | 12 +- src/vm/de.rs | 1766 --------------------------------------- src/vm/de/mod.rs | 2175 +++++++++++++++++++++++++++++++++++++++++++++++++ src/vm/de/unpacker.rs | 518 ++++++++++++ src/vm/mod.rs | 107 ++- tests/basic_match.rs | 37 +- 11 files changed, 2899 insertions(+), 1795 deletions(-) create mode 100644 ROADMAP.md create mode 100644 src/graph.rs delete mode 100644 src/vm/de.rs create mode 100644 src/vm/de/mod.rs create mode 100644 src/vm/de/unpacker.rs diff --git a/ROADMAP.md b/ROADMAP.md new file mode 100644 index 0000000..96f267b --- /dev/null +++ b/ROADMAP.md @@ -0,0 +1,10 @@ +# Roadmap + +This is basically a big ol todo list. Unlike real roadmaps, we don't have dates +here. + +# Necessary + +These are features which correspond to core language semantics: + +- Grouping. Currently we don't support grouping at all. diff --git a/src/errors.rs b/src/errors.rs index 914b70a..ea1e60d 100644 --- a/src/errors.rs +++ b/src/errors.rs @@ -56,3 +56,37 @@ pub enum MatchError { /// requirements. Unsatisfiable, } + +#[derive(Debug)] +#[non_exhaustive] +pub enum QueryError { + /// Returned if the deserialization recurses too deeply. + StackOverflow, + /// Returned if there's nothing to deserialize. + Empty, + /// The query is unsatisfiable. This happens if e.g. there are multiple + /// values in the query but only one value can fit into the request. + Unsatisfiable, + /// Wrapped Serde error. + Serde(serde::de::value::Error), +} + +impl std::fmt::Display for QueryError { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + Self::StackOverflow => write!(f, "stack overflow"), + Self::Empty => write!(f, "no results"), + Self::Unsatisfiable => write!(f, "unsatisfiable"), + Self::Serde(e) => e.fmt(f), + } + } +} + +impl std::error::Error for QueryError { +} + +impl serde::de::Error for QueryError { + fn custom(msg: T) -> Self where T: std::fmt::Display { + Self::Serde(serde::de::value::Error::custom(msg)) + } +} diff --git a/src/graph.rs b/src/graph.rs new file mode 100644 index 0000000..47d6523 --- /dev/null +++ b/src/graph.rs @@ -0,0 +1,30 @@ +// Copyright (C) 2022 Soni L. +// SPDX-License-Identifier: MIT OR Apache-2.0 + +//! The results produced by a matched pattern. + +use serde::de::Deserialize; + +use crate::vm::MAX_CALLS; +use crate::vm::Pack; +use crate::vm::Unpacker; +use crate::errors::QueryError; + +// TODO in the future, we may want to store &'pat IndexSet either here +// or in the Pack. +#[derive(Debug)] +pub struct Graph<'pat, 'de>(pub(crate) Option>); + +impl<'pat, 'de> Graph<'pat, 'de> { + /// Collect this graph into a given form. + pub fn collect>(self) -> Result { + let Self(inner) = self; + match inner { + None => Err(QueryError::Empty), + Some(pack) => { + let mut unpacker = Unpacker::new(pack, MAX_CALLS); + De::deserialize(unpacker) + }, + } + } +} diff --git a/src/lib.rs b/src/lib.rs index 824d54e..897618b 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -110,6 +110,7 @@ //! ``` pub mod errors; +mod graph; //pub mod type_tree; mod parser; mod pattern; diff --git a/src/parser.rs b/src/parser.rs index eb378ad..744ab0f 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -354,7 +354,7 @@ where let self_ = &mut *self; let id = id.unwrap_or_else(move || { string.shrink_to_fit(); - self_.consts.strings.push(string); + self_.consts.strings.insert(string); self_.consts.strings.len() - 1 }); self.tokens.push(PatternToken::String(id, skippable)); @@ -499,7 +499,7 @@ where // no processing of `name` is required for this. let id = self.consts.strings.iter().position(|c| c == name); let id = id.unwrap_or_else(|| { - self.consts.strings.push(name.into()); + self.consts.strings.insert(name.into()); self.consts.strings.len() - 1 }); self.tokens.push(PatternToken::Identifier(id)); diff --git a/src/pattern.rs b/src/pattern.rs index 6286ff9..38cdcda 100644 --- a/src/pattern.rs +++ b/src/pattern.rs @@ -13,6 +13,7 @@ use serde::ser::Serialize; use crate::Predicate; use crate::errors::PatternError; +use crate::graph::Graph; use crate::parser::parse; use crate::vm; use crate::vm::PatternConstants; @@ -36,10 +37,12 @@ pub struct Pattern { impl Pattern { /// Matches the pattern against an input. - pub fn deserialize<'de, Der, De>(&self, der: Der) -> Result + pub fn deserialize<'de, Der>( + &self, + der: Der, + ) -> Result, Der::Error> where Der: Deserializer<'de>, - De: Deserialize<'de>, { let mut err = Default::default(); let mut frames = Default::default(); @@ -57,9 +60,8 @@ impl Pattern { // this should always be None debug_assert!(obj.is_none()); debug_assert!(packs.len() <= 1); - let pack = packs.pop().unwrap_or_else(Default::default); - let de = De::deserialize(vm::Unpacker::new(pack, MAX_CALLS)); - todo!() + let pack = packs.pop(); + Ok(Graph(pack)) } } diff --git a/src/vm/de.rs b/src/vm/de.rs deleted file mode 100644 index 8f2aa8d..0000000 --- a/src/vm/de.rs +++ /dev/null @@ -1,1766 +0,0 @@ -// Copyright (C) 2022 Soni L. -// SPDX-License-Identifier: MIT OR Apache-2.0 - -//! Deserialization-related parts of the VM. - -use std::borrow::Cow; -use std::marker::PhantomData; - -use indexmap::IndexMap; - -use serde::Serialize; -use serde::de::Error as _; -use serde::de::IntoDeserializer as _; - -use smallvec::SmallVec; - - -use super::Frame; -use super::Interpreter; -use super::Pack; -use super::PatternConstants; -use super::PatternElement; -use super::SerdeObject; -use super::Type; -use super::Value; -use crate::errors::MatchError; - -/// A `DeserializeSeed` for Datafu input. -/// -/// This converts from Serde to Datafu's internal representation (a "pack"). -pub(crate) struct Packer<'pat, 'state, O: Serialize> { - /// The global interpreter state. - interp: Interpreter<'pat, 'state, O>, - /// Current call limit. - call_limit: usize, - /// Whether we're collecting values. - collecting: bool, -} - -struct FramesMut<'packer, 'pat> { - frames: &'packer mut Vec>, -} - -struct Frames<'packer, 'pat> { - frames: &'packer Vec>, -} - -impl<'packer, 'pat> FramesMut<'packer, 'pat> { - fn iter_mut<'a>( - &'a mut self, - ) -> impl Iterator> + DoubleEndedIterator - where - 'packer: 'a, - { - self.frames.iter_mut() - } - - fn iter_active_mut<'a>( - &'a mut self, - ) -> impl Iterator> + DoubleEndedIterator - where - 'packer: 'a, - { - self.iter_mut().filter(|frame| { - frame.active() - }) - } -} - -impl<'packer, 'pat> Frames<'packer, 'pat> { - fn iter<'a>( - &'a self, - ) -> impl Iterator> + DoubleEndedIterator - where - 'packer: 'a, - { - self.frames.iter() - } - - fn iter_active<'a>( - &'a self, - ) -> impl Iterator> + DoubleEndedIterator - where - 'packer: 'a, - { - self.iter().filter(|frame| { - frame.active() - }) - } -} - -impl<'pat, 'state, 'de, O: Serialize> Packer<'pat, 'state, O> { - /// Creates a new Packer. - pub(crate) fn new( - interp: Interpreter<'pat, 'state, O>, - call_limit: usize, - ) -> Self { - Self { - interp: interp, - call_limit: call_limit, - collecting: false, - } - } - - fn frames_mut(&mut self) -> FramesMut<'_, 'pat> { - FramesMut { - frames: &mut *self.interp.frames, - } - } - - fn frames(&mut self) -> Frames<'_, 'pat> { - Frames { - frames: &*self.interp.frames, - } - } - - /// Steps the VM into the next operation. - fn step_in(&mut self) -> Result<(), E> { - if self.call_limit > 0 { - self.call_limit -= 1; - } else { - self.interp.error.insert(MatchError::StackOverflow); - return Err(E::custom("stack overflow")); - } - // iterate up to the *live* length (i.e. the loop is allowed to modify - // the length). - // NOTE: we need to use while-let so as to not borrow anything in an - // iterator. filtering happens on the *result* of the iterator. - let mut index_iter = 0..; - while let Some(index) = index_iter.next().filter(|&i| { - i < self.interp.frames.len() - }) { - let frame = &mut self.interp.frames[index]; - if frame.overstep > 0 || !frame.matches { - // overstepped and non-matching frames - frame.overstep += 1; - // FIXME check if this is correct (it probably isn't) - //frame.matches = false; - } else { - if !frame.next() { - // empty/end-of frames - // 1 layer of step-in. - // step-out will undo this. - // this is correct because this branch implies overstep = 0 - frame.overstep = 1; - // FIXME we still don't think this is correct - frame.matches = false; - } else if matches!( - frame.op(), - PatternElement::SubtreeMarker, - ) { - // subtrees! - // these are tricky, because the current frame can be moved - // in memory. so we have to use indexing every time. - // tho first we set it as overstep because it has special - // handling. - frame.overstep = 1; - frame.matches = false; - let mut at = index + 1; - while self.interp.frames[index].next() { - let op = self.interp.frames[index].raw_op(); - if let PatternElement::ValueSubtree { - index: subtree, .. - } = op { - let new_frame = Frame { - ops: &self.interp.pat.protos[subtree][..], - iar: None, - overstep: 0, - matches: true, - poison: false, - }; - debug_assert!(!new_frame.ops.is_empty()); - // we want the "newest" frame last, so it is - // easier to unwind back. - self.interp.frames.insert(at, new_frame); - at += 1; - } else { - unreachable!() - } - } - } - } - } - Ok(()) - } - - /// Steps the VM back into the previous operation. - fn step_out( - &mut self, - mut packs: Vec>, - ) -> Result>, E> { - // this code attempts to maintain the logical invariant of: - // self.frames().iter_active().count() == packs.len() - self.call_limit += 1; - let mut index_iter = 0..; - let mut pack_index = packs.len(); - let orig_len = self.interp.frames.len(); - while let Some(index) = index_iter.next().filter(|&i| { - i < orig_len - }) { - // iterate backwards - let index = orig_len - index - 1; - let frame = &mut self.interp.frames[index]; - let mut has_pack = frame.matches; - if frame.overstep > 0 { - // handle overstep - frame.overstep -= 1; - } else { - if has_pack { - pack_index -= 1; - } - if frame.poison { - // FIXME should frame.poison always remove the pack? - if frame.is_value() { - if has_pack { - packs.remove(pack_index); - } - frame.matches = false; - has_pack = false; - frame.poison = false; - } - } - // unwind frame - if frame.prev() { - // successfully unwound. do nothing. - } else { - // find parent frame. - let mut count = 1; - let mut target = index; - let mut target_pack = pack_index; - while count > 0 && target > 0 { - target -= 1; - if self.interp.frames[target].matches { - debug_assert!(target_pack > 0); - target_pack -= 1; - } - match self.interp.frames[target].num_subtrees() { - Some((num, _)) if num < count => { - count -= num; - }, - Some((num, _)) => { - count = 0; - }, - None => { - count += 1; - }, - } - } - if count == 0 { - // found target frame - let frame = self.interp.frames.remove(index); - let target_frame = &mut self.interp.frames[target]; - let (_, optional) = target_frame.value_subtree(); - target_frame.prev().then(|| ()).unwrap(); - if has_pack { - let pack = packs.remove(pack_index); - if !target_frame.matches { - packs.insert(target_pack, pack); - target_frame.matches = true; - pack_index += 1; - } else { - packs[target_pack].merge_breadth(pack); - } - } else { - //if frame.poison { - // target_frame.poison = true; - //} - if !optional { - self.interp.error.insert({ - MatchError::ValidationError - }); - return Err(E::custom("subtree failed")); - } - } - if let Some((0, _)) = target_frame.num_subtrees() { - target_frame.overstep = 0; - } - } - } - } - } - Ok(packs) - } -} - -impl<'pat, 'state, 'de, O> serde::de::DeserializeSeed<'de> -for &mut Packer<'pat, 'state, O> -where - O: Serialize, -{ - type Value = (Vec>, Option>); - fn deserialize( - mut self, - deserializer: D, - ) -> Result - where - D: serde::Deserializer<'de> - { - if let Err(e) = self.step_in() { return Err(e); } - let pat = self.interp.pat; - let target_type = self.frames().iter_active().try_fold( - Type::IgnoredAny, - |target_type, frame| { - Ok(match (target_type, frame.get_type()) { - // required type binds stronger than any/ignored_any - (Type::IgnoredAny, Some((ty, true))) => ty, - (Type::Any, Some((ty, true))) => ty, - // and also stronger than optional any/ignored_any - (ty, Some((Type::IgnoredAny, _))) => ty, - (ty, Some((Type::Any, _))) => ty, - // None effectively falls back into any - (Type::IgnoredAny, None) => Type::Any, - (ty, None) => ty, - // prefer owned if any branch prefers owned - (Type::String, Some((Type::Str, true))) => { - Type::String - }, - (Type::Str, Some((Type::String, true))) => { - Type::String - }, - (Type::Bytes, Some((Type::ByteBuf, true))) => { - Type::ByteBuf - }, - (Type::ByteBuf, Some((Type::Bytes, true))) => { - Type::ByteBuf - }, - // types which are the same are okay - (left, Some((right, _))) if left == right => { - left - }, - // optional type vs Any/IgnoredAny prefers Any - (Type::IgnoredAny, Some((_, false))) => Type::Any, - (Type::Any, Some((_, false))) => Type::Any, - // types which are not the same are an error because we - // only request a specific type if it's actually required - (left, Some((right, _))) => { - return Err(MatchError::Unsatisfiable); - }, - }) - }, - ); - let target_type = match target_type { - Ok(target_type) => target_type, - Err(e) => { - self.interp.error.insert(e); - return Err(D::Error::custom("type conflict")); - }, - }; - match target_type { - Type::Any => deserializer.deserialize_any(&mut *self), - Type::IgnoredAny => { - deserializer.deserialize_ignored_any(&mut *self) - }, - Type::Bool => deserializer.deserialize_bool(&mut *self), - Type::I8 => deserializer.deserialize_i8(&mut *self), - Type::I16 => deserializer.deserialize_i16(&mut *self), - Type::I32 => deserializer.deserialize_i32(&mut *self), - Type::I64 => deserializer.deserialize_i64(&mut *self), - Type::I128 => deserializer.deserialize_i128(&mut *self), - Type::U8 => deserializer.deserialize_u8(&mut *self), - Type::U16 => deserializer.deserialize_u16(&mut *self), - Type::U32 => deserializer.deserialize_u32(&mut *self), - Type::U64 => deserializer.deserialize_u64(&mut *self), - Type::U128 => deserializer.deserialize_u128(&mut *self), - Type::F32 => deserializer.deserialize_f32(&mut *self), - Type::F64 => deserializer.deserialize_f64(&mut *self), - Type::Char => deserializer.deserialize_char(&mut *self), - Type::Str if !self.collecting => { - deserializer.deserialize_str(&mut *self) - }, - Type::Str | Type::String => { - deserializer.deserialize_string(&mut *self) - }, - Type::Bytes if !self.collecting => { - deserializer.deserialize_bytes(&mut *self) - }, - Type::Bytes | Type::ByteBuf => { - deserializer.deserialize_byte_buf(&mut *self) - }, - Type::Option => deserializer.deserialize_option(&mut *self), - Type::Unit => deserializer.deserialize_unit(&mut *self), - Type::Seq => deserializer.deserialize_seq(&mut *self), - Type::Map => deserializer.deserialize_map(&mut *self), - Type::Identifier => { - deserializer.deserialize_identifier(&mut *self) - }, - Type::Tuple(len) => { - deserializer.deserialize_tuple(len, &mut *self) - }, - Type::UnitStruct(name) => { - deserializer.deserialize_unit_struct(name, &mut *self) - }, - Type::NewtypeStruct(name) => { - deserializer.deserialize_newtype_struct(name, &mut *self) - }, - Type::TupleStruct { name, len } => { - deserializer.deserialize_tuple_struct(name, len, &mut *self) - }, - Type::Struct { name, fields } => { - deserializer.deserialize_struct(name, fields, &mut *self) - }, - Type::Enum { name, variants } => { - deserializer.deserialize_enum(name, variants, &mut *self) - }, - }.and_then(|(packs, obj)| Ok((self.step_out(packs)?, obj))) - } -} - -/// visit method generator for simple values (primitives). -/// -/// can generate whole function or just the glue. -macro_rules! vs { - (fn $visit:ident $obj:ident ($data_type:pat) $rust_type:ty) => { - fn $visit(mut self, v: $rust_type) -> Result - where - E: serde::de::Error, - { - vs!(self (v) $obj ($data_type)) - } - }; - ($this:ident $v:tt $obj:ident ($data_type:pat)) => { - { - let pat = $this.interp.pat; - let mut obj = None; - if $this.collecting { - obj = Some(SerdeObject::$obj$v); - } - let mut packs = Vec::new(); - let result = { - $this.frames_mut().iter_active_mut().try_for_each(|frame| { - let ty = frame.get_type(); - match ty { - | Some(($data_type, _)) - | Some((Type::Any, _)) - | Some((Type::IgnoredAny, _)) - | None - => {}, - Some((_, false)) => { - frame.matches = false; - return Ok(()); - }, - Some((_, true)) => { - return Err(MatchError::ValidationError) - }, - } - let mut pack = Pack::default(); - if let Some(name) = frame.get_name(pat) { - let mut map = IndexMap::new(); - map.insert(name, (Pack::default(), SerdeObject::$obj$v)); - pack.subpacks.push(map); - } - packs.push(pack); - Ok(()) - }) - }; - match result { - Err(e) => { - $this.interp.error.insert(e); - return Err(serde::de::Error::custom("type mismatch")); - }, - _ => (), - } - Ok((packs, obj)) - } - }; -} - -impl<'pat, 'state, 'de, O> serde::de::Visitor<'de> -for &mut Packer<'pat, 'state, O> -where - O: Serialize, -{ - type Value = (Vec>, Option>); - fn expecting(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - write!(f, "unsure") - } - - vs!(fn visit_bool Bool (Type::Bool) bool); - vs!(fn visit_i8 I8 (Type::I8) i8); - vs!(fn visit_i16 I16 (Type::I16) i16); - vs!(fn visit_i32 I32 (Type::I32) i32); - vs!(fn visit_i64 I64 (Type::I64) i64); - vs!(fn visit_i128 I128 (Type::I128) i128); - vs!(fn visit_u8 U8 (Type::U8) u8); - vs!(fn visit_u16 U16 (Type::U16) u16); - vs!(fn visit_u32 U32 (Type::U32) u32); - vs!(fn visit_u64 U64 (Type::U64) u64); - vs!(fn visit_u128 U128 (Type::U128) u128); - vs!(fn visit_f32 F32 (Type::F32) f32); - vs!(fn visit_f64 F64 (Type::F64) f64); - vs!(fn visit_char Char (Type::Char) char); - - fn visit_str(self, v: &str) -> Result - where - E: serde::de::Error, - { - let pat = self.interp.pat; - let mut obj = None; - if self.collecting { - obj = Some(SerdeObject::Str(Cow::Owned(v.into()))); - } - let mut packs = Vec::new(); - let result = { - self.frames_mut().iter_active_mut().try_for_each(|frame| { - let ty = frame.get_type(); - match ty { - | Some((Type::String, _)) - | Some((Type::Str, _)) - | Some((Type::Any, _)) - | Some((Type::IgnoredAny, _)) - | None - => {}, - Some((_, false)) => { - frame.matches = false; - return Ok(()); - }, - Some((_, true)) => { - return Err(MatchError::ValidationError) - }, - } - match frame.op() { - PatternElement::Value { value: Some(value), .. } => { - match value { - | Value::String { index, skippable } - if pat.strings[index] != v => { - if skippable { - frame.matches = false; - return Ok(()); - } else { - return Err(MatchError::ValidationError); - } - }, - | Value::Regex { index, skippable } - if !pat.regices[index].is_match(v) => { - if skippable { - frame.matches = false; - return Ok(()); - } else { - return Err(MatchError::ValidationError); - } - }, - | Value::Type { .. } - | Value::Regex { .. } - | Value::String { .. } - => {}, // ok - } - }, - PatternElement::Value { value: None, .. } => {}, - _ => unreachable!(), - } - let mut pack = Pack::default(); - if let Some(name) = frame.get_name(pat) { - let mut map = IndexMap::new(); - map.insert( - name, - ( - Pack::default(), - SerdeObject::Str(Cow::Owned(v.into())), - ), - ); - pack.subpacks.push(map); - } - packs.push(pack); - Ok(()) - }) - }; - match result { - Err(e) => { - self.interp.error.insert(e); - return Err(E::custom("type/value mismatch")); - }, - _ => (), - } - Ok((packs, obj)) - } - fn visit_borrowed_str(self, v: &'de str) -> Result - where - E: serde::de::Error, - { - let pat = self.interp.pat; - let mut obj = None; - if self.collecting { - obj = Some(SerdeObject::Str(Cow::Borrowed(v))); - } - let mut packs = Vec::new(); - let result = { - self.frames_mut().iter_active_mut().try_for_each(|frame| { - let ty = frame.get_type(); - match ty { - | Some((Type::String, _)) - | Some((Type::Str, _)) - | Some((Type::Any, _)) - | Some((Type::IgnoredAny, _)) - | None - => {}, - Some((_, false)) => { - frame.matches = false; - return Ok(()); - }, - Some((_, true)) => { - return Err(MatchError::ValidationError) - }, - } - match frame.op() { - PatternElement::Value { value: Some(value), .. } => { - match value { - | Value::String { index, skippable } - if pat.strings[index] != v => { - if skippable { - frame.matches = false; - return Ok(()); - } else { - return Err(MatchError::ValidationError); - } - }, - | Value::Regex { index, skippable } - if !pat.regices[index].is_match(v) => { - if skippable { - frame.matches = false; - return Ok(()); - } else { - return Err(MatchError::ValidationError); - } - }, - | Value::Type { .. } - | Value::Regex { .. } - | Value::String { .. } - => {}, // ok - } - }, - PatternElement::Value { value: None, .. } => {}, - _ => unreachable!(), - } - let mut pack = Pack::default(); - if let Some(name) = frame.get_name(pat) { - let mut map = IndexMap::new(); - map.insert( - name, - ( - Pack::default(), - SerdeObject::Str(Cow::Borrowed(v)), - ), - ); - pack.subpacks.push(map); - } - packs.push(pack); - Ok(()) - }) - }; - match result { - Err(e) => { - self.interp.error.insert(e); - return Err(E::custom("type/value mismatch")); - }, - _ => (), - } - Ok((packs, obj)) - } - fn visit_string(self, v: String) -> Result - where - E: serde::de::Error, - { - // TODO try to avoid cloning - self.visit_str(&*v) - } - fn visit_bytes(self, v: &[u8]) -> Result - where - E: serde::de::Error, - { - vs!(self (Cow::Owned(v.to_owned())) Bytes (Type::Bytes | Type::ByteBuf)) - } - fn visit_borrowed_bytes(self, v: &'de [u8]) -> Result - where - E: serde::de::Error, - { - vs!(self (Cow::Borrowed(v)) Bytes (Type::Bytes | Type::ByteBuf)) - } - fn visit_byte_buf(self, v: Vec) -> Result - where - E: serde::de::Error, - { - // TODO try to avoid cloning - self.visit_bytes(&*v) - } - fn visit_none(self) -> Result - where - E: serde::de::Error, - { - vs!(self {} None (Type::Option)) - } - fn visit_some(self, deserializer: D) -> Result - where - D: serde::de::Deserializer<'de>, - { - todo!() - } - fn visit_unit(self) -> Result - where - E: serde::de::Error, - { - vs!(self {} Unit (Type::Unit)) - } - fn visit_newtype_struct( - self, - deserializer: D - ) -> Result - where - D: serde::de::Deserializer<'de>, - { - todo!() - } - fn visit_seq(self, seq: A) -> Result - where - A: serde::de::SeqAccess<'de>, - { - let mut obj = None; - if self.collecting { - obj = Some(SerdeObject::Seq(Vec::new())); - } - todo!() - } - fn visit_map(self, mut map: A) -> Result - where - A: serde::de::MapAccess<'de>, - { - let old_collecting = self.collecting; - let pat = self.interp.pat; - let mut collecting = old_collecting; - let typeck = self.frames_mut().iter_active_mut().try_for_each(|frame| { - let ty = frame.get_type(); - match ty { - | Some((Type::Map, _)) - | Some((Type::Any, _)) - | Some((Type::IgnoredAny, _)) - | None - => {}, - Some((_, false)) => { - frame.matches = false; - return Ok(()); - }, - Some((_, true)) => { - return Err(MatchError::ValidationError) - }, - } - if frame.get_name(pat).is_some() { - collecting = true; - } - Ok(()) - }); - match typeck { - Err(e) => { - self.interp.error.insert(e); - return Err(A::Error::custom("type mismatch")); - }, - _ => (), - } - if let Err(e) = self.step_in() { return Err(e); } - self.collecting = collecting; - let mut subframes = Vec::new(); - let mut output_matches = Vec::new(); - self.frames().iter_active().for_each(|frame| { - if let Some((key_subtree, _)) = frame.key_subtree() { - subframes.push(Frame { - ops: &pat.protos[key_subtree], - iar: None, - overstep: 0, - matches: true, - poison: false, - }); - } - output_matches.push(false); - }); - let mut obj_inner = Vec::new(); - let mut output_packs = Vec::new(); - while let Some(packed_key) = { - let subinterp = Interpreter { - pat: pat, - frames: &mut subframes, - error: self.interp.error, - }; - let mut subpacker = Packer { - interp: subinterp, - collecting: self.collecting, - call_limit: self.call_limit, - }; - map.next_key_seed(&mut subpacker)? - } { - self.frames_mut().iter_active_mut().filter(|frame| { - frame.key_subtree().is_some() - }).zip(&mut subframes).for_each(|(frame, subframe)| { - frame.matches = subframe.matches; - // reset subframe for next iteration - // NOTE wait to reset subframe.matches when merging packs!!! - subframe.iar = None; - }); - self.frames_mut().iter_active_mut().for_each(|frame| { - // mark every non-subtree key as matching. - if frame.key_subtree().is_none() { - frame.matches = true; - } - }); - let packed_value = map.next_value_seed(&mut *self)?; - if self.collecting { - obj_inner.push( - (packed_key.1.unwrap(), packed_value.1.unwrap()), - ); - } - let mut key_packs_per_frame = packed_key.0.into_iter(); - let mut value_packs_per_frame = packed_value.0.into_iter(); - // whatever is active in self.frames(), if matches, has a pack - // whatever is in subframes, if matches, has a pack - // count(active self.frames() with subtree which match) is always - // smaller than count(subframes which match) because the former - // gets updated by next_value_seed - // count(active self.frames() with subtree) == count(subframes) - // tl;dr: need to figure out which packs produced by subframes line - // up with which packs produced by self, discarding extra subframes - // (where the corresponding self frame doesn't match) and accepting - // extra packs produced by self. - // NOTE: key_packs_per_frame ~ subframes - // value_packs_per_frame ~ self - // keys come first tho (key.merge_from(value)) - let mut iter_subframes = subframes.iter_mut(); - // related to output_packs - let mut pack_index = 0; - for (frame, out_matches) in self.frames().iter_active().zip({ - &mut output_matches - }) { - // check if this frame has an associated subframe - let subframe = if frame.key_subtree().is_some() { - // if there are more frames with associated subframes - // than there are subframes, panic - Some(iter_subframes.next().unwrap()) - } else { - None - }; - let mut new_pack = None; - if frame.matches && subframe.is_some() { - // this already implies subframe.matches - let mut key_pack = key_packs_per_frame.next().unwrap(); - let value_pack = value_packs_per_frame.next().unwrap(); - key_pack.merge_depth(value_pack); - new_pack = Some(key_pack); - } else if frame.matches { - // value matches but there's no subframe, carry on - let value_pack = value_packs_per_frame.next().unwrap(); - new_pack = Some(value_pack); - } else if !frame.matches && subframe.is_some() { - // frame didn't match but there was a subframe - let subframe = subframe.unwrap(); - if subframe.matches { - // subframe matched, remove key pack - let _ = key_packs_per_frame.next().unwrap(); - } else { - // neither matched, no relevant packs - // do reset subframe for next_key_seed tho! - subframe.matches = true; - } - } else { - // no relevant packs - } - if let Some(new_pack) = new_pack { - if !*out_matches { - *out_matches = true; - output_packs.insert(pack_index, Pack::default()); - } - let output_pack = &mut output_packs[pack_index]; - output_pack.subpacks.extend(new_pack.subpacks); - } - if *out_matches { - pack_index += 1; - } - } - } - let mut poison = false; - for (f, m) in self.frames_mut().iter_active_mut().zip(output_matches) { - f.matches = m; - if !m { - if let Some((_, false)) = f.key_subtree() { - poison = true; - } - } - } - let obj = SerdeObject::Map(obj_inner); - let mut final_packs = self.step_out(output_packs)?; - let mut iter_final_packs = 0..; - self.frames_mut().iter_active_mut().for_each(|frame| { - let ty = frame.get_type(); - match ty { - | Some((Type::Map, _)) - | Some((Type::Any, _)) - | Some((Type::IgnoredAny, _)) - | None - => { - frame.poison = poison; - let matched = std::mem::replace(&mut frame.matches, true); - if !matched { - final_packs.insert( - iter_final_packs.start, - Pack::default(), - ); - } - }, - _ => return, - } - let pack = &mut final_packs[iter_final_packs.next().unwrap()]; - if let Some(name) = frame.get_name(pat) { - // we can assume collecting == true - let old_pack = std::mem::take(pack); - let mut map = IndexMap::new(); - map.insert(name, (old_pack, obj.clone())); - pack.subpacks.push(map); - } - }); - self.collecting = old_collecting; - Ok((final_packs, collecting.then(|| obj))) - } - fn visit_enum(self, data: A) -> Result - where - A: serde::de::EnumAccess<'de>, - { - let mut obj = None; - if self.collecting { - obj = Some(SerdeObject::Enum { - variant: todo!(), - data: todo!(), - }); - } - todo!() - } -} - -/// A `Deserializer` for Datafu output. -/// -/// This converts from Datafu's internal representation (a "pack") into the -/// desired output type. -pub struct Unpacker<'pat, 'de> { - pack: Pack<'pat, 'de>, - call_limit: usize, -} - -impl<'pat, 'de> Unpacker<'pat, 'de> { - /// Unpacks a Datafu "pack". - pub fn new(pack: Pack<'pat, 'de>, call_limit: usize) -> Self { - Self { - pack, call_limit, - } - } -} - -impl<'pat, 'de> serde::Deserializer<'de> for Unpacker<'pat, 'de> { - // TODO datafu errors - type Error = serde::de::value::Error; - fn deserialize_any(self, _: V) -> Result where V: serde::de::Visitor<'de> { todo!() } - fn deserialize_bool(self, _: V) -> Result where V: serde::de::Visitor<'de> { todo!() } - fn deserialize_i8(self, _: V) -> Result where V: serde::de::Visitor<'de> { todo!() } - fn deserialize_i16(self, _: V) -> Result where V: serde::de::Visitor<'de> { todo!() } - fn deserialize_i32(self, _: V) -> Result where V: serde::de::Visitor<'de> { todo!() } - fn deserialize_i64(self, _: V) -> Result where V: serde::de::Visitor<'de> { todo!() } - fn deserialize_u8(self, _: V) -> Result where V: serde::de::Visitor<'de> { todo!() } - fn deserialize_u16(self, _: V) -> Result where V: serde::de::Visitor<'de> { todo!() } - fn deserialize_u32(self, _: V) -> Result where V: serde::de::Visitor<'de> { todo!() } - fn deserialize_u64(self, _: V) -> Result where V: serde::de::Visitor<'de> { todo!() } - fn deserialize_f32(self, _: V) -> Result where V: serde::de::Visitor<'de> { todo!() } - fn deserialize_f64(self, _: V) -> Result where V: serde::de::Visitor<'de> { todo!() } - fn deserialize_char(self, _: V) -> Result where V: serde::de::Visitor<'de> { todo!() } - fn deserialize_str(self, _: V) -> Result where V: serde::de::Visitor<'de> { todo!() } - fn deserialize_string(self, _: V) -> Result where V: serde::de::Visitor<'de> { todo!() } - fn deserialize_bytes(self, _: V) -> Result where V: serde::de::Visitor<'de> { todo!() } - fn deserialize_byte_buf(self, _: V) -> Result where V: serde::de::Visitor<'de> { todo!() } - fn deserialize_option(self, _: V) -> Result where V: serde::de::Visitor<'de> { todo!() } - fn deserialize_unit(self, _: V) -> Result where V: serde::de::Visitor<'de> { todo!() } - fn deserialize_unit_struct(self, _: &'static str, _: V) -> Result where V: serde::de::Visitor<'de> { todo!() } - fn deserialize_newtype_struct(self, _: &'static str, _: V) -> Result where V: serde::de::Visitor<'de> { todo!() } - fn deserialize_seq(self, _: V) -> Result where V: serde::de::Visitor<'de> { todo!() } - fn deserialize_tuple(self, _: usize, _: V) -> Result where V: serde::de::Visitor<'de> { todo!() } - fn deserialize_tuple_struct(self, _: &'static str, _: usize, _: V) -> Result where V: serde::de::Visitor<'de> { todo!() } - fn deserialize_map(self, _: V) -> Result where V: serde::de::Visitor<'de> { todo!() } - fn deserialize_struct( - self, - _: &'static str, - fields: &'static [&'static str], - visitor: V, - ) -> Result - where - V: serde::de::Visitor<'de>, - { - todo!() - } - fn deserialize_enum(self, _: &'static str, _: &'static [&'static str], _: V) -> Result where V: serde::de::Visitor<'de> { todo!() } - fn deserialize_identifier(self, _: V) -> Result where V: serde::de::Visitor<'de> { todo!() } - fn deserialize_ignored_any(self, _: V) -> Result where V: serde::de::Visitor<'de> { todo!() } -} - -/// Deserializes a SerdeObject -pub(crate) struct SerdeObjectDeserializer<'de, E> { - pub(crate) obj: SerdeObject<'de>, - pub(crate) value: Option>, - pub(crate) _e: PhantomData E>, -} - -impl<'de, E> serde::de::Deserializer<'de> for SerdeObjectDeserializer<'de, E> -where - E: serde::de::Error, -{ - type Error = E; - fn deserialize_any(self, v: V) -> Result - where - V: serde::de::Visitor<'de>, - { - match self.obj { - SerdeObject::Bool(x) => v.visit_bool(x), - SerdeObject::I8(x) => v.visit_i8(x), - SerdeObject::I16(x) => v.visit_i16(x), - SerdeObject::I32(x) => v.visit_i32(x), - SerdeObject::I64(x) => v.visit_i64(x), - SerdeObject::I128(x) => v.visit_i128(x), - SerdeObject::U8(x) => v.visit_u8(x), - SerdeObject::U16(x) => v.visit_u16(x), - SerdeObject::U32(x) => v.visit_u32(x), - SerdeObject::U64(x) => v.visit_u64(x), - SerdeObject::U128(x) => v.visit_u128(x), - SerdeObject::F32(x) => v.visit_f32(x), - SerdeObject::F64(x) => v.visit_f64(x), - SerdeObject::Char(x) => v.visit_char(x), - SerdeObject::Str(Cow::Owned(x)) => v.visit_string(x), - SerdeObject::Str(Cow::Borrowed(x)) => v.visit_borrowed_str(x), - SerdeObject::Bytes(Cow::Owned(x)) => v.visit_byte_buf(x), - SerdeObject::Bytes(Cow::Borrowed(x)) => v.visit_borrowed_bytes(x), - SerdeObject::Some(x) => v.visit_some(x.into_deserializer()), - SerdeObject::None => v.visit_none(), - SerdeObject::Unit => v.visit_unit(), - SerdeObject::Seq(x) => todo!(), - SerdeObject::Map(x) => todo!(), - SerdeObject::NewtypeStruct(x) => { - v.visit_newtype_struct(x.into_deserializer()) - }, - SerdeObject::Enum { variant, data } => todo!(), - } - } - fn deserialize_ignored_any(self, v: V) -> Result - where - V: serde::de::Visitor<'de>, - { - drop(self); - v.visit_unit() - } - serde::forward_to_deserialize_any! { - bool i8 i16 i32 i64 i128 u8 u16 u32 u64 u128 f32 f64 char str string - bytes byte_buf option unit unit_struct newtype_struct seq tuple - tuple_struct map struct enum identifier - } -} - -#[cfg(test)] -mod tests { - use super::Packer; - use super::super::PatternConstants; - - use crate::vm::MAX_CALLS; - use crate::vm::Interpreter; - use crate::vm::Type; - use crate::vm::Value; - use crate::vm::PatternElement; - use crate::vm::SerdeObject; - use crate::vm::Frame; - - use postcard::Deserializer as PostcardDeserializer; - use serde::de::DeserializeSeed as _; - use serde_json::Deserializer as JsonDeserializer; - - use crate::errors::MatchError; - - #[test] - #[should_panic] - fn test_broken() { - // broken pattern, should never be emitted by parser. make sure it's - // not accepted. - let consts = PatternConstants::<()>::default(); - let mut err = Default::default(); - let mut frames = Default::default(); - let interp = Interpreter::new(&consts, &mut err, &mut frames); - let _ = Packer::new(interp, MAX_CALLS); - } - - #[test] - fn test_empty_create() { - // test creating the parser with an empty pattern. - let mut consts = PatternConstants::<()>::default(); - consts.protos.push(Vec::new()); - let mut err = Default::default(); - let mut frames = Default::default(); - let interp = Interpreter::new(&consts, &mut err, &mut frames); - let _ = Packer::new(interp, MAX_CALLS); - } - - #[test] - fn test_empty_match() { - // test matching something with an empty pattern. - let mut consts = PatternConstants::<()>::default(); - consts.protos.push(Vec::new()); - let mut der = JsonDeserializer::from_str("{}"); - let mut err = Default::default(); - let mut frames = Default::default(); - let interp = Interpreter::new(&consts, &mut err, &mut frames); - let pack = Packer::new(interp, MAX_CALLS).deserialize(&mut der).unwrap(); - } - - #[test] - fn test_simple_match() { - // test matching a simple value - let mut consts = PatternConstants::<()>::default(); - consts.strings.push("hello".into()); - consts.protos.push(vec![ - PatternElement::Value { - name: Some(0), - value: Some(Value::Type { - ty: Type::U64, - skippable: false, - }), - }, - ]); - let mut der = JsonDeserializer::from_str("3"); - let mut err = Default::default(); - let mut frames = Default::default(); - let interp = Interpreter::new(&consts, &mut err, &mut frames); - let packed = Packer::new(interp, MAX_CALLS).deserialize(&mut der); - let (packs, obj) = packed.unwrap(); - assert!(obj.is_none()); - assert_eq!(packs[0].subpacks[0]["hello"].1, SerdeObject::U64(3)); - } - - #[test] - fn test_simple_error() { - // test a value that doesn't match (serde_json error) - let mut consts = PatternConstants::<()>::default(); - consts.strings.push("hello".into()); - consts.protos.push(vec![ - PatternElement::Value { - name: Some(0), - value: Some(Value::Type { - ty: Type::U64, - skippable: false, - }), - }, - ]); - let mut der = JsonDeserializer::from_str("\"hello\""); - let mut err = Default::default(); - let mut frames = Default::default(); - let interp = Interpreter::new(&consts, &mut err, &mut frames); - let packed = Packer::new(interp, MAX_CALLS).deserialize(&mut der); - // error produced by serde_json - assert!(packed.is_err()); - } - - #[test] - fn test_basic_multiframe() { - // test multiple frames (matching and non-matching) - let mut consts = PatternConstants::<()>::default(); - consts.strings.push("a".into()); - consts.strings.push("b".into()); - consts.protos.push(vec![ - PatternElement::Value { - name: Some(0), - value: Some(Value::Type { - ty: Type::U64, - skippable: true, - }), - }, - ]); - consts.protos.push(vec![ - PatternElement::Value { - name: Some(1), - value: Some(Value::Type { - ty: Type::Bool, - skippable: true, - }), - }, - ]); - let mut der = JsonDeserializer::from_str(r#"10"#); - let mut err = Default::default(); - let mut frames: Vec<_> = Default::default(); - frames.push(Frame { - ops: &consts.protos[0], - iar: None, - matches: true, - overstep: 0, - poison: false, - }); - frames.push(Frame { - ops: &consts.protos[1], - iar: None, - matches: true, - overstep: 0, - poison: false, - }); - let interp = Interpreter { - pat: &consts, - error: &mut err, - frames: &mut frames, - }; - let packed = Packer::new(interp, MAX_CALLS).deserialize(&mut der); - let (packs, obj) = packed.unwrap(); - assert!(obj.is_none()); - assert_eq!( - packs[0].subpacks[0]["a"].1, - SerdeObject::U64(10), - ); - assert_eq!(packs.len(), 1); - assert!(frames[0].matches); - assert!(!frames[1].matches); - } - - #[test] - fn test_map() { - // test visit_map - let mut consts = PatternConstants::<()>::default(); - consts.strings.push("key".into()); - consts.strings.push("value".into()); - consts.protos.push(vec![ - PatternElement::Value { - name: Some(0), - value: None, - }, - ]); - consts.protos.push(vec![ - PatternElement::Value { - name: None, - value: Some(Value::Type { - ty: Type::Map, - skippable: false, - }), - }, - PatternElement::Tag { - key_subtree: 0, - optional: true, - }, - PatternElement::Value { - name: Some(1), - value: Some(Value::Type { - ty: Type::U64, - skippable: false, - }), - }, - ]); - let mut der = JsonDeserializer::from_str(r#"{"hello": 0, "world": 1}"#); - let mut err = Default::default(); - let mut frames = Default::default(); - let interp = Interpreter::new(&consts, &mut err, &mut frames); - let packed = Packer::new(interp, MAX_CALLS).deserialize(&mut der); - let (packs, obj) = packed.unwrap(); - assert!(obj.is_none()); - assert_eq!( - packs[0].subpacks[0]["key"].1, - SerdeObject::Str("hello".into()), - ); - assert_eq!( - packs[0].subpacks[0]["key"].0.subpacks[0]["value"].1, - SerdeObject::U64(0), - ); - assert_eq!( - packs[0].subpacks[1]["key"].1, - SerdeObject::Str("world".into()), - ); - assert_eq!( - packs[0].subpacks[1]["key"].0.subpacks[0]["value"].1, - SerdeObject::U64(1), - ); - } - - #[test] - fn test_parser_empty() { - // use a parsed empty pattern to test Packer - let consts = crate::parser::parse::<&'static str, &'static str, ()>( - "", - None, - None, - ).unwrap(); - let mut der = JsonDeserializer::from_str(r#"{"hello": 0, "world": 1}"#); - let mut err = Default::default(); - let mut frames = Default::default(); - let interp = Interpreter::new( - &consts, - &mut err, - &mut frames, - //&mut output, - ); - let (mut packs, obj) = Packer::new( - interp, - MAX_CALLS, - ).deserialize(&mut der).unwrap(); - assert!(obj.is_none()); - assert_eq!(packs.len(), 1); - let pack = packs.pop().unwrap(); - assert!(pack.subpacks.is_empty()); - } - - #[test] - fn test_parser_basic() { - // use a basic parsed pattern to test Packer - let consts = crate::parser::parse::<&'static str, &'static str, ()>( - ":map->[name:str]value:str", - None, - None, - ).unwrap(); - let data = &[ - 0x02, // map length (2) - 0x04, // string length (4) - 0x6E, 0x61, 0x6D, 0x65, // b'name' - 0x01, // string length (1) - 0x61, // b'a' - 0x05, // string length (5) - 0x76, 0x61, 0x6C, 0x75, 0x65, // b'value' - 0x01, // string length (1) - 0x62, // b'b' - ]; - let mut der = PostcardDeserializer::from_bytes(data); - let mut err = Default::default(); - let mut frames = Default::default(); - let interp = Interpreter::new( - &consts, - &mut err, - &mut frames, - //&mut output, - ); - let result = Packer::new( - interp, - MAX_CALLS, - ).deserialize(&mut der); - let (mut packs, obj) = result.unwrap(); - assert!(obj.is_none()); - assert_eq!(packs.len(), 1); - let pack = packs.pop().unwrap(); - assert_eq!(pack.subpacks.len(), 2); - } - - #[test] - fn test_parser_basic_subtree() { - // use a basic parsed pattern with a subtree to test Packer - let consts = crate::parser::parse::<&'static str, &'static str, ()>( - ":map(->[name:str]value:str)", - None, - None, - ).unwrap(); - let data = &[ - 0x02, // map length (2) - 0x04, // string length (4) - 0x6E, 0x61, 0x6D, 0x65, // b'name' - 0x01, // string length (1) - 0x61, // b'a' - 0x05, // string length (5) - 0x76, 0x61, 0x6C, 0x75, 0x65, // b'value' - 0x01, // string length (1) - 0x62, // b'b' - ]; - let mut der = PostcardDeserializer::from_bytes(data); - let mut err = Default::default(); - let mut frames = Default::default(); - let interp = Interpreter::new( - &consts, - &mut err, - &mut frames, - //&mut output, - ); - let result = Packer::new( - interp, - MAX_CALLS, - ).deserialize(&mut der); - let (mut packs, obj) = result.unwrap(); - assert!(obj.is_none()); - assert_eq!(packs.len(), 1); - let pack = packs.pop().unwrap(); - assert_eq!(pack.subpacks.len(), 2); - } - - #[test] - fn test_empty_subtrees() { - // tests empty subtrees - let consts = crate::parser::parse::<&'static str, &'static str, ()>( - ":map()(())", - None, - None, - ).unwrap(); - dbg!(&consts); - let data = r#"{"hello": "world"}"#; - let mut der = JsonDeserializer::from_str(data); - let mut err = Default::default(); - let mut frames = Default::default(); - let interp = Interpreter::new( - &consts, - &mut err, - &mut frames, - //&mut output, - ); - let result = Packer::new( - interp, - MAX_CALLS, - ).deserialize(&mut der); - let (mut packs, obj) = result.unwrap(); - assert!(obj.is_none()); - assert_eq!(packs.len(), 1); - let pack = packs.pop().unwrap(); - assert_eq!(pack.subpacks.len(), 0); - } - - #[test] - fn test_parser_subtrees() { - // use a parsed pattern with subtrees to test Packer - // also test a non-self-describing format (postcard) - let consts = crate::parser::parse::<&'static str, &'static str, ()>( - ":map(->['name'?]name:str)?(->['value'?]value:u32)?(->[:str]:?ignored_any)", - None, - None, - ).unwrap(); - let data = &[ - 0x02, // map length (2) - 0x04, // string length (4) - 0x6E, 0x61, 0x6D, 0x65, // b'name' - 0x01, // string length (1) - 0x61, // b'a' - 0x05, // string length (5) - 0x76, 0x61, 0x6C, 0x75, 0x65, // b'value' - 0x01, // 1 - ]; - let mut der = PostcardDeserializer::from_bytes(data); - let mut err = Default::default(); - let mut frames = Default::default(); - let interp = Interpreter::new( - &consts, - &mut err, - &mut frames, - //&mut output, - ); - let result = Packer::new( - interp, - MAX_CALLS, - ).deserialize(&mut der); - let (mut packs, obj) = result.unwrap(); - assert!(obj.is_none()); - assert_eq!(packs.len(), 1); - let pack = packs.pop().unwrap(); - assert_eq!(pack.subpacks.len(), 1); - assert_eq!(pack.subpacks[0]["name"].1, SerdeObject::Str(From::from("a"))); - assert_eq!(pack.subpacks[0]["value"].1, SerdeObject::U32(1)); - } - - #[test] - fn test_parser_subtrees_validation() { - // checks that the Packer can validate multiple keys. - let consts = crate::parser::parse::<&'static str, &'static str, ()>( - ":map(->['name'?]name:str)(->['value'?]value:u32)(->[:str]:?ignored_any)", - None, - None, - ).unwrap(); - let data = &[ - 0x02, // map length (2) - 0x04, // string length (4) - 0x6E, 0x61, 0x6D, 0x65, // b'name' - 0x01, // string length (1) - 0x61, // b'a' - 0x05, // string length (5) - 0x76, 0x61, 0x6C, 0x75, 0x65, // b'value' - 0x01, // 1 - ]; - let mut der = PostcardDeserializer::from_bytes(data); - let mut err = Default::default(); - let mut frames = Default::default(); - let interp = Interpreter::new( - &consts, - &mut err, - &mut frames, - //&mut output, - ); - let result = Packer::new( - interp, - MAX_CALLS, - ).deserialize(&mut der); - let (mut packs, obj) = result.unwrap(); - assert!(obj.is_none()); - assert_eq!(packs.len(), 1); - let pack = packs.pop().unwrap(); - assert_eq!(pack.subpacks.len(), 1); - assert_eq!(pack.subpacks[0]["name"].1, SerdeObject::Str(From::from("a"))); - assert_eq!(pack.subpacks[0]["value"].1, SerdeObject::U32(1)); - } - - #[test] - fn test_one_or_more_subtrees_validation() { - // checks that the Packer supports OR-like validation semantics - let consts = crate::parser::parse::<&'static str, &'static str, ()>( - ":map((->['name'?]?name)?(->['value'?]?value)?)(->[:str]:u32)", - None, - None, - ).unwrap(); - let data = &[ - 0x01, // map length (1) - 0x04, // string length (4) - 0x6E, 0x61, 0x6D, 0x65, // b'name' - 0x01, // 1 - ]; - let mut der = PostcardDeserializer::from_bytes(data); - let mut err = Default::default(); - let mut frames = Default::default(); - let interp = Interpreter::new( - &consts, - &mut err, - &mut frames, - //&mut output, - ); - let result = Packer::new( - interp, - MAX_CALLS, - ).deserialize(&mut der); - let (mut packs, obj) = result.unwrap(); - assert!(obj.is_none()); - assert_eq!(packs.len(), 1); - let pack = packs.pop().unwrap(); - assert_eq!(pack.subpacks.len(), 1); - assert_eq!(pack.subpacks[0]["name"].1, SerdeObject::U32(1)); - } - - #[test] - fn test_one_or_more_subtrees_validation_failure() { - // checks that the Packer supports OR-like validation semantics - // (failure) - let consts = crate::parser::parse::<&'static str, &'static str, ()>( - ":map((->['name'?]?name)?(->['value'?]?value)?)(->[:str]:u32)", - None, - None, - ).unwrap(); - let data = &[ - 0x01, // map length (1) - 0x04, // string length (4) - 0x6E, 0x65, 0x6D, 0x65, // b'neme' - 0x01, // 1 - ]; - let mut der = PostcardDeserializer::from_bytes(data); - let mut err = Default::default(); - let mut frames = Default::default(); - let interp = Interpreter::new( - &consts, - &mut err, - &mut frames, - //&mut output, - ); - let result = Packer::new( - interp, - MAX_CALLS, - ).deserialize(&mut der); - assert!(matches!(err, Some(MatchError::ValidationError))); - assert!(result.is_err()); - } - - // FIXME this should test that the map doesn't contain other keys aside - // from the ones requested by the pattern, but this doesn't match existing - // datafu semantics at all. we need new syntax for it. - //#[test] - //fn test_parser_subtrees_no_additional_keys() { - // // use a parsed pattern with subtrees to test Packer - // // also test a non-self-describing format (postcard) - // // also require at least one subtree to match on every iteration. - // // also this checks for validation error - // let consts = crate::parser::parse::<&'static str, &'static str, ()>( - // ":map((->['name'?]name)?(->['value'?]value)?)(->[:str]:u32)", - // None, - // None, - // ).unwrap(); - // let data = &[ - // 0x03, // map length (3) - // 0x04, // string length (4) - // 0x6E, 0x61, 0x6D, 0x65, // b'name' - // 0x01, // 1 - // 0x05, // string length (5) - // 0x76, 0x61, 0x6C, 0x75, 0x65, // b'value' - // 0x01, // 1 - // 0x05, // string length (5) - // 0x76, 0x65, 0x6C, 0x75, 0x65, // b'velue' - // 0x01, // 1 - // ]; - // let mut der = PostcardDeserializer::from_bytes(data); - // let mut err = Default::default(); - // let mut frames = Default::default(); - // let interp = Interpreter::new( - // &consts, - // &mut err, - // &mut frames, - // //&mut output, - // ); - // let result = Packer::new( - // interp, - // MAX_CALLS, - // ).deserialize(&mut der); - // assert!(matches!(err, Some(MatchError::ValidationError))); - // assert!(result.is_err()); - //} - - #[test] - fn test_key_optionally_missing() { - // test a pattern with a missing key - let consts = crate::parser::parse::<&'static str, &'static str, ()>( - " - :map - ->['a'?]:map - ->[b:?str]:?map - (->['x'?]x:?bool)? - (->['y'?]?y:?bool)? - ", - None, - None - ).unwrap(); - let data = r#"{"a": {"1": {"y": true}, "2": {"x": true, "y": true}}}"#; - let mut der = JsonDeserializer::from_str(data); - let mut err = Default::default(); - let mut frames = Default::default(); - let interp = Interpreter::new( - &consts, - &mut err, - &mut frames, - //&mut output, - ); - let result = Packer::new( - interp, - MAX_CALLS, - ).deserialize(&mut der); - let (mut packs, obj) = result.unwrap(); - assert!(obj.is_none()); - assert_eq!(packs.len(), 1); - let pack = &packs[0]; - assert_eq!(pack.subpacks.len(), 1); - let b = &pack.subpacks[0]["b"]; - assert_eq!(b.1, SerdeObject::Str(From::from("2"))); - assert_eq!(b.0.subpacks.len(), 1); - assert_eq!(b.0.subpacks[0]["x"].1, SerdeObject::Bool(true)); - assert_eq!(b.0.subpacks[0]["y"].1, SerdeObject::Bool(true)); - } - - #[test] - fn test_map_without_values() { - // test that the map without values still collects the key - let consts = crate::parser::parse::<&'static str, &'static str, ()>( - " - :map - ->[a:?str]?:?map - ->[b:?str]?:?map - ", - None, - None - ).unwrap(); - let data = r#"{"a": {}}"#; - let mut der = JsonDeserializer::from_str(data); - let mut err = Default::default(); - let mut frames = Default::default(); - let interp = Interpreter::new( - &consts, - &mut err, - &mut frames, - //&mut output, - ); - let result = Packer::new( - interp, - MAX_CALLS, - ).deserialize(&mut der); - let (mut packs, obj) = result.unwrap(); - assert!(obj.is_none()); - assert_eq!(packs.len(), 1); - let pack = &packs[0]; - assert_eq!(pack.subpacks.len(), 1); - let a = &pack.subpacks[0]["a"]; - assert_eq!(a.1, SerdeObject::Str(From::from("a"))); - assert_eq!(a.0.subpacks.len(), 0); - } - - #[test] - fn test_guaranteed_at_least_once() { - // test that at least one match is required before collecting the key - let consts = crate::parser::parse::<&'static str, &'static str, ()>( - " - :map - ->[a:?str]:?map - ->[b:?str]:?map - ", - None, - None - ).unwrap(); - let data = r#"{"a": {}}"#; - let mut der = JsonDeserializer::from_str(data); - let mut err = Default::default(); - let mut frames = Default::default(); - let interp = Interpreter::new( - &consts, - &mut err, - &mut frames, - //&mut output, - ); - let result = Packer::new( - interp, - MAX_CALLS, - ).deserialize(&mut der); - let (mut packs, obj) = result.unwrap(); - assert!(obj.is_none()); - assert_eq!(packs.len(), 0); - } - - #[test] - fn test_realish_use_case() { - // use a parsed pattern that might actually be used in the real world. - let consts = crate::parser::parse::<&'static str, &'static str, ()>( - " - :map - ->['projects'?]:map - ->[commit:?str]:?map - ->[url:?str]:?map - ->[branch:?str]:?map - (->['active'?]active:?bool)? - (->['federate'?]?federate:?bool)? - ", - None, - None - ).unwrap(); - let data = r#" - {"base_url": "https://ganarchy.autistic.space", "repo_list_srcs": {"https://ganarchy.autistic.space/index.toml": {"active": false}}, "projects": {"a8fb5087f79eafe312db270082c052c427b208c2": {"https://soniex2.autistic.space/git-repos/mmorfc.git": {"HEAD": {"active": true, "pinned": true}}}, "2d0b363fe3179087de59d9ef4a2d14af21d89071": {"https://soniex2.autistic.space/git-repos/chewstuff.git": {"HEAD": {"active": true, "pinned": true}}}}} - "#; - let mut der = JsonDeserializer::from_str(data); - let mut err = Default::default(); - let mut frames = Default::default(); - let interp = Interpreter::new( - &consts, - &mut err, - &mut frames, - //&mut output, - ); - let result = Packer::new( - interp, - MAX_CALLS, - ).deserialize(&mut der); - let (mut packs, obj) = result.unwrap(); - assert!(obj.is_none()); - assert_eq!(packs.len(), 1); - let pack = &packs[0]; - assert_eq!(pack.subpacks.len(), 2); - - let commit = &pack.subpacks[0]["commit"]; - assert_eq!(commit.1, SerdeObject::Str(From::from("a8fb5087f79eafe312db270082c052c427b208c2"))); - assert_eq!(commit.0.subpacks.len(), 1); - let url = &commit.0.subpacks[0]["url"]; - assert_eq!(url.1, SerdeObject::Str(From::from("https://soniex2.autistic.space/git-repos/mmorfc.git"))); - assert_eq!(url.0.subpacks.len(), 1); - let branch = &url.0.subpacks[0]["branch"]; - assert_eq!(branch.1, SerdeObject::Str(From::from("HEAD"))); - assert_eq!(branch.0.subpacks.len(), 1); - let active = &branch.0.subpacks[0]["active"]; - assert_eq!(active.1, SerdeObject::Bool(true)); - - let commit = &pack.subpacks[1]["commit"]; - assert_eq!(commit.1, SerdeObject::Str(From::from("2d0b363fe3179087de59d9ef4a2d14af21d89071"))); - assert_eq!(commit.0.subpacks.len(), 1); - let url = &commit.0.subpacks[0]["url"]; - assert_eq!(url.1, SerdeObject::Str(From::from("https://soniex2.autistic.space/git-repos/chewstuff.git"))); - assert_eq!(url.0.subpacks.len(), 1); - let branch = &url.0.subpacks[0]["branch"]; - assert_eq!(branch.1, SerdeObject::Str(From::from("HEAD"))); - assert_eq!(branch.0.subpacks.len(), 1); - assert_eq!(active.1, SerdeObject::Bool(true)); - } -} - diff --git a/src/vm/de/mod.rs b/src/vm/de/mod.rs new file mode 100644 index 0000000..3cba18a --- /dev/null +++ b/src/vm/de/mod.rs @@ -0,0 +1,2175 @@ +// Copyright (C) 2022 Soni L. +// SPDX-License-Identifier: MIT OR Apache-2.0 + +//! Deserialization-related parts of the VM. + +use std::borrow::Cow; +use std::marker::PhantomData; + +use indexmap::IndexMap; + +use serde::Serialize; +use serde::de::Error as _; +use serde::de::IntoDeserializer as _; + +use smallvec::SmallVec; + +use super::Frame; +use super::Interpreter; +use super::Pack; +use super::PatternConstants; +use super::PatternElement; +use super::SerdeObject; +use super::Type; +use super::Value; +use crate::errors::MatchError; +use crate::errors::QueryError; + +mod unpacker; + +pub use unpacker::Unpacker; + +/// A `DeserializeSeed` for Datafu input. +/// +/// This converts from Serde to Datafu's internal representation (a "pack"). +pub(crate) struct Packer<'pat, 'state, O: Serialize> { + /// The global interpreter state. + interp: Interpreter<'pat, 'state, O>, + /// Current call limit. + call_limit: usize, + /// Whether we're collecting values. + collecting: bool, +} + +struct FramesMut<'packer, 'pat> { + frames: &'packer mut Vec>, +} + +struct Frames<'packer, 'pat> { + frames: &'packer Vec>, +} + +impl<'packer, 'pat> FramesMut<'packer, 'pat> { + fn iter_mut<'a>( + &'a mut self, + ) -> impl Iterator> + DoubleEndedIterator + where + 'packer: 'a, + { + self.frames.iter_mut() + } + + fn iter_active_mut<'a>( + &'a mut self, + ) -> impl Iterator> + DoubleEndedIterator + where + 'packer: 'a, + { + self.iter_mut().filter(|frame| { + frame.active() + }) + } +} + +impl<'packer, 'pat> Frames<'packer, 'pat> { + fn iter<'a>( + &'a self, + ) -> impl Iterator> + DoubleEndedIterator + where + 'packer: 'a, + { + self.frames.iter() + } + + fn iter_active<'a>( + &'a self, + ) -> impl Iterator> + DoubleEndedIterator + where + 'packer: 'a, + { + self.iter().filter(|frame| { + frame.active() + }) + } +} + +impl<'pat, 'state, 'de, O: Serialize> Packer<'pat, 'state, O> { + /// Creates a new Packer. + pub(crate) fn new( + interp: Interpreter<'pat, 'state, O>, + call_limit: usize, + ) -> Self { + Self { + interp: interp, + call_limit: call_limit, + collecting: false, + } + } + + fn frames_mut(&mut self) -> FramesMut<'_, 'pat> { + FramesMut { + frames: &mut *self.interp.frames, + } + } + + fn frames(&mut self) -> Frames<'_, 'pat> { + Frames { + frames: &*self.interp.frames, + } + } + + /// Steps the VM into the next operation. + fn step_in(&mut self) -> Result<(), E> { + if self.call_limit > 0 { + self.call_limit -= 1; + } else { + self.interp.error.insert(MatchError::StackOverflow); + return Err(E::custom("stack overflow")); + } + // iterate up to the *live* length (i.e. the loop is allowed to modify + // the length). + // NOTE: we need to use while-let so as to not borrow anything in an + // iterator. filtering happens on the *result* of the iterator. + let mut index_iter = 0..; + while let Some(index) = index_iter.next().filter(|&i| { + i < self.interp.frames.len() + }) { + let frame = &mut self.interp.frames[index]; + if frame.overstep > 0 || !frame.matches { + // overstepped and non-matching frames + frame.overstep += 1; + // FIXME check if this is correct (it probably isn't) + //frame.matches = false; + } else { + if !frame.next() { + // empty/end-of frames + // 1 layer of step-in. + // step-out will undo this. + // this is correct because this branch implies overstep = 0 + frame.overstep = 1; + // FIXME we still don't think this is correct + frame.matches = false; + } else if matches!( + frame.op(), + PatternElement::SubtreeMarker, + ) { + // subtrees! + // these are tricky, because the current frame can be moved + // in memory. so we have to use indexing every time. + // tho first we set it as overstep because it has special + // handling. + frame.overstep = 1; + frame.matches = false; + let mut at = index + 1; + while self.interp.frames[index].next() { + let op = self.interp.frames[index].raw_op(); + if let PatternElement::ValueSubtree { + index: subtree, .. + } = op { + let new_frame = Frame { + ops: &self.interp.pat.protos[subtree][..], + iar: None, + overstep: 0, + matches: true, + poison: false, + }; + debug_assert!(!new_frame.ops.is_empty()); + // we want the "newest" frame last, so it is + // easier to unwind back. + self.interp.frames.insert(at, new_frame); + at += 1; + } else { + unreachable!() + } + } + } + } + } + Ok(()) + } + + /// Steps the VM back into the previous operation. + fn step_out( + &mut self, + mut packs: Vec>, + ) -> Result>, E> { + // this code attempts to maintain the logical invariant of: + // self.frames().iter_active().count() == packs.len() + self.call_limit += 1; + let mut index_iter = 0..; + let mut pack_index = packs.len(); + let orig_len = self.interp.frames.len(); + while let Some(index) = index_iter.next().filter(|&i| { + i < orig_len + }) { + // iterate backwards + let index = orig_len - index - 1; + let frame = &mut self.interp.frames[index]; + let mut has_pack = frame.matches; + if frame.overstep > 0 { + // handle overstep + frame.overstep -= 1; + } else { + if has_pack { + pack_index -= 1; + } + if frame.poison { + // FIXME should frame.poison always remove the pack? + if frame.is_value() { + if has_pack { + packs.remove(pack_index); + } + frame.matches = false; + has_pack = false; + frame.poison = false; + } + } + // unwind frame + if frame.prev() { + // successfully unwound. do nothing. + } else { + // find parent frame. + let mut count = 1; + let mut target = index; + let mut target_pack = pack_index; + while count > 0 && target > 0 { + target -= 1; + if self.interp.frames[target].matches { + debug_assert!(target_pack > 0); + target_pack -= 1; + } + match self.interp.frames[target].num_subtrees() { + Some((num, _)) if num < count => { + count -= num; + }, + Some((num, _)) => { + count = 0; + }, + None => { + count += 1; + }, + } + } + if count == 0 { + // found target frame + let frame = self.interp.frames.remove(index); + let target_frame = &mut self.interp.frames[target]; + let (_, optional) = target_frame.value_subtree(); + target_frame.prev().then(|| ()).unwrap(); + if has_pack { + let pack = packs.remove(pack_index); + if !target_frame.matches { + packs.insert(target_pack, pack); + target_frame.matches = true; + pack_index += 1; + } else { + packs[target_pack].zip(pack); + } + } else { + //if frame.poison { + // target_frame.poison = true; + //} + if !optional { + self.interp.error.insert({ + MatchError::ValidationError + }); + return Err(E::custom("subtree failed")); + } + } + if let Some((0, _)) = target_frame.num_subtrees() { + target_frame.overstep = 0; + } + } + } + } + } + Ok(packs) + } +} + +impl<'pat, 'state, 'de, O> serde::de::DeserializeSeed<'de> +for &mut Packer<'pat, 'state, O> +where + O: Serialize, +{ + type Value = (Vec>, Option>); + fn deserialize( + mut self, + deserializer: D, + ) -> Result + where + D: serde::Deserializer<'de> + { + if let Err(e) = self.step_in() { return Err(e); } + let pat = self.interp.pat; + let target_type = self.frames().iter_active().try_fold( + Type::IgnoredAny, + |target_type, frame| { + Ok(match (target_type, frame.get_type()) { + // required type binds stronger than any/ignored_any + (Type::IgnoredAny, Some((ty, true))) => ty, + (Type::Any, Some((ty, true))) => ty, + // and also stronger than optional any/ignored_any + (ty, Some((Type::IgnoredAny, _))) => ty, + (ty, Some((Type::Any, _))) => ty, + // None effectively falls back into any + (Type::IgnoredAny, None) => Type::Any, + (ty, None) => ty, + // prefer owned if any branch prefers owned + (Type::String, Some((Type::Str, true))) => { + Type::String + }, + (Type::Str, Some((Type::String, true))) => { + Type::String + }, + (Type::Bytes, Some((Type::ByteBuf, true))) => { + Type::ByteBuf + }, + (Type::ByteBuf, Some((Type::Bytes, true))) => { + Type::ByteBuf + }, + // types which are the same are okay + (left, Some((right, _))) if left == right => { + left + }, + // optional type vs Any/IgnoredAny prefers Any + (Type::IgnoredAny, Some((_, false))) => Type::Any, + (Type::Any, Some((_, false))) => Type::Any, + // types which are not the same are an error because we + // only request a specific type if it's actually required + (left, Some((right, _))) => { + return Err(MatchError::Unsatisfiable); + }, + }) + }, + ); + let target_type = match target_type { + Ok(target_type) => target_type, + Err(e) => { + self.interp.error.insert(e); + return Err(D::Error::custom("type conflict")); + }, + }; + match target_type { + Type::Any => deserializer.deserialize_any(&mut *self), + Type::IgnoredAny => { + deserializer.deserialize_ignored_any(&mut *self) + }, + Type::Bool => deserializer.deserialize_bool(&mut *self), + Type::I8 => deserializer.deserialize_i8(&mut *self), + Type::I16 => deserializer.deserialize_i16(&mut *self), + Type::I32 => deserializer.deserialize_i32(&mut *self), + Type::I64 => deserializer.deserialize_i64(&mut *self), + Type::I128 => deserializer.deserialize_i128(&mut *self), + Type::U8 => deserializer.deserialize_u8(&mut *self), + Type::U16 => deserializer.deserialize_u16(&mut *self), + Type::U32 => deserializer.deserialize_u32(&mut *self), + Type::U64 => deserializer.deserialize_u64(&mut *self), + Type::U128 => deserializer.deserialize_u128(&mut *self), + Type::F32 => deserializer.deserialize_f32(&mut *self), + Type::F64 => deserializer.deserialize_f64(&mut *self), + Type::Char => deserializer.deserialize_char(&mut *self), + Type::Str if !self.collecting => { + deserializer.deserialize_str(&mut *self) + }, + Type::Str | Type::String => { + deserializer.deserialize_string(&mut *self) + }, + Type::Bytes if !self.collecting => { + deserializer.deserialize_bytes(&mut *self) + }, + Type::Bytes | Type::ByteBuf => { + deserializer.deserialize_byte_buf(&mut *self) + }, + Type::Option => deserializer.deserialize_option(&mut *self), + Type::Unit => deserializer.deserialize_unit(&mut *self), + Type::Seq => deserializer.deserialize_seq(&mut *self), + Type::Map => deserializer.deserialize_map(&mut *self), + Type::Identifier => { + deserializer.deserialize_identifier(&mut *self) + }, + Type::Tuple(len) => { + deserializer.deserialize_tuple(len, &mut *self) + }, + Type::UnitStruct(name) => { + deserializer.deserialize_unit_struct(name, &mut *self) + }, + Type::NewtypeStruct(name) => { + deserializer.deserialize_newtype_struct(name, &mut *self) + }, + Type::TupleStruct { name, len } => { + deserializer.deserialize_tuple_struct(name, len, &mut *self) + }, + Type::Struct { name, fields } => { + deserializer.deserialize_struct(name, fields, &mut *self) + }, + Type::Enum { name, variants } => { + deserializer.deserialize_enum(name, variants, &mut *self) + }, + }.and_then(|(packs, obj)| Ok((self.step_out(packs)?, obj))) + } +} + +/// visit method generator for simple values (primitives). +/// +/// can generate whole function or just the glue. +macro_rules! vs { + (fn $visit:ident $obj:ident ($data_type:pat) $rust_type:ty) => { + fn $visit(mut self, v: $rust_type) -> Result + where + E: serde::de::Error, + { + vs!(self (v) $obj ($data_type)) + } + }; + ($this:ident $v:tt $obj:ident ($data_type:pat)) => { + { + let pat = $this.interp.pat; + let mut obj = None; + if $this.collecting { + obj = Some(SerdeObject::$obj$v); + } + let mut packs = Vec::new(); + let result = { + $this.frames_mut().iter_active_mut().try_for_each(|frame| { + let ty = frame.get_type(); + match ty { + | Some(($data_type, _)) + | Some((Type::Any, _)) + | Some((Type::IgnoredAny, _)) + | None + => {}, + Some((_, false)) => { + frame.matches = false; + return Ok(()); + }, + Some((_, true)) => { + return Err(MatchError::ValidationError) + }, + } + let mut pack = Pack::default(); + if let Some(name) = frame.get_name(pat) { + let mut map = IndexMap::new(); + map.insert(name, SerdeObject::$obj$v); + pack.subpacks.push_back((map, Pack::default())); + } + packs.push(pack); + Ok(()) + }) + }; + match result { + Err(e) => { + $this.interp.error.insert(e); + return Err(serde::de::Error::custom("type mismatch")); + }, + _ => (), + } + Ok((packs, obj)) + } + }; +} + +impl<'pat, 'state, 'de, O> serde::de::Visitor<'de> +for &mut Packer<'pat, 'state, O> +where + O: Serialize, +{ + type Value = (Vec>, Option>); + fn expecting(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "unsure") + } + + vs!(fn visit_bool Bool (Type::Bool) bool); + vs!(fn visit_i8 I8 (Type::I8) i8); + vs!(fn visit_i16 I16 (Type::I16) i16); + vs!(fn visit_i32 I32 (Type::I32) i32); + vs!(fn visit_i64 I64 (Type::I64) i64); + vs!(fn visit_i128 I128 (Type::I128) i128); + vs!(fn visit_u8 U8 (Type::U8) u8); + vs!(fn visit_u16 U16 (Type::U16) u16); + vs!(fn visit_u32 U32 (Type::U32) u32); + vs!(fn visit_u64 U64 (Type::U64) u64); + vs!(fn visit_u128 U128 (Type::U128) u128); + vs!(fn visit_f32 F32 (Type::F32) f32); + vs!(fn visit_f64 F64 (Type::F64) f64); + vs!(fn visit_char Char (Type::Char) char); + + fn visit_str(self, v: &str) -> Result + where + E: serde::de::Error, + { + let pat = self.interp.pat; + let mut obj = None; + if self.collecting { + obj = Some(SerdeObject::Str(Cow::Owned(v.into()))); + } + let mut packs = Vec::new(); + let result = { + self.frames_mut().iter_active_mut().try_for_each(|frame| { + let ty = frame.get_type(); + match ty { + | Some((Type::String, _)) + | Some((Type::Str, _)) + | Some((Type::Any, _)) + | Some((Type::IgnoredAny, _)) + | None + => {}, + Some((_, false)) => { + frame.matches = false; + return Ok(()); + }, + Some((_, true)) => { + return Err(MatchError::ValidationError) + }, + } + match frame.op() { + PatternElement::Value { value: Some(value), .. } => { + match value { + | Value::String { index, skippable } + if pat.strings[index] != v => { + if skippable { + frame.matches = false; + return Ok(()); + } else { + return Err(MatchError::ValidationError); + } + }, + | Value::Regex { index, skippable } + if !pat.regices[index].is_match(v) => { + if skippable { + frame.matches = false; + return Ok(()); + } else { + return Err(MatchError::ValidationError); + } + }, + | Value::Type { .. } + | Value::Regex { .. } + | Value::String { .. } + => {}, // ok + } + }, + PatternElement::Value { value: None, .. } => {}, + _ => unreachable!(), + } + let mut pack = Pack::default(); + if let Some(name) = frame.get_name(pat) { + let mut map = IndexMap::new(); + map.insert(name, SerdeObject::Str(Cow::Owned(v.into()))); + pack.subpacks.push_back((map, Pack::default())); + } + packs.push(pack); + Ok(()) + }) + }; + match result { + Err(e) => { + self.interp.error.insert(e); + return Err(E::custom("type/value mismatch")); + }, + _ => (), + } + Ok((packs, obj)) + } + fn visit_borrowed_str(self, v: &'de str) -> Result + where + E: serde::de::Error, + { + let pat = self.interp.pat; + let mut obj = None; + if self.collecting { + obj = Some(SerdeObject::Str(Cow::Borrowed(v))); + } + let mut packs = Vec::new(); + let result = { + self.frames_mut().iter_active_mut().try_for_each(|frame| { + let ty = frame.get_type(); + match ty { + | Some((Type::String, _)) + | Some((Type::Str, _)) + | Some((Type::Any, _)) + | Some((Type::IgnoredAny, _)) + | None + => {}, + Some((_, false)) => { + frame.matches = false; + return Ok(()); + }, + Some((_, true)) => { + return Err(MatchError::ValidationError) + }, + } + match frame.op() { + PatternElement::Value { value: Some(value), .. } => { + match value { + | Value::String { index, skippable } + if pat.strings[index] != v => { + if skippable { + frame.matches = false; + return Ok(()); + } else { + return Err(MatchError::ValidationError); + } + }, + | Value::Regex { index, skippable } + if !pat.regices[index].is_match(v) => { + if skippable { + frame.matches = false; + return Ok(()); + } else { + return Err(MatchError::ValidationError); + } + }, + | Value::Type { .. } + | Value::Regex { .. } + | Value::String { .. } + => {}, // ok + } + }, + PatternElement::Value { value: None, .. } => {}, + _ => unreachable!(), + } + let mut pack = Pack::default(); + if let Some(name) = frame.get_name(pat) { + let mut map = IndexMap::new(); + map.insert(name, SerdeObject::Str(Cow::Borrowed(v))); + pack.subpacks.push_back((map, Pack::default())); + } + packs.push(pack); + Ok(()) + }) + }; + match result { + Err(e) => { + self.interp.error.insert(e); + return Err(E::custom("type/value mismatch")); + }, + _ => (), + } + Ok((packs, obj)) + } + fn visit_string(self, v: String) -> Result + where + E: serde::de::Error, + { + // TODO try to avoid cloning + self.visit_str(&*v) + } + fn visit_bytes(self, v: &[u8]) -> Result + where + E: serde::de::Error, + { + vs!(self (Cow::Owned(v.to_owned())) Bytes (Type::Bytes | Type::ByteBuf)) + } + fn visit_borrowed_bytes(self, v: &'de [u8]) -> Result + where + E: serde::de::Error, + { + vs!(self (Cow::Borrowed(v)) Bytes (Type::Bytes | Type::ByteBuf)) + } + fn visit_byte_buf(self, v: Vec) -> Result + where + E: serde::de::Error, + { + // TODO try to avoid cloning + self.visit_bytes(&*v) + } + fn visit_none(self) -> Result + where + E: serde::de::Error, + { + vs!(self {} None (Type::Option)) + } + fn visit_some(self, deserializer: D) -> Result + where + D: serde::de::Deserializer<'de>, + { + todo!() + } + fn visit_unit(self) -> Result + where + E: serde::de::Error, + { + vs!(self {} Unit (Type::Unit)) + } + fn visit_newtype_struct( + self, + deserializer: D + ) -> Result + where + D: serde::de::Deserializer<'de>, + { + todo!() + } + fn visit_seq(self, mut seq: A) -> Result + where + A: serde::de::SeqAccess<'de>, + { + let old_collecting = self.collecting; + let pat = self.interp.pat; + let mut collecting = old_collecting; + let typeck = self.frames_mut().iter_active_mut().try_for_each(|frame| { + let ty = frame.get_type(); + match ty { + | Some((Type::Seq, _)) + | Some((Type::Any, _)) + | Some((Type::IgnoredAny, _)) + | None + => {}, + Some((_, false)) => { + frame.matches = false; + return Ok(()); + }, + Some((_, true)) => { + return Err(MatchError::ValidationError) + }, + } + if frame.get_name(pat).is_some() { + collecting = true; + } + Ok(()) + }); + match typeck { + Err(e) => { + self.interp.error.insert(e); + return Err(A::Error::custom("type mismatch")); + }, + _ => (), + } + if let Err(e) = self.step_in() { return Err(e); } + self.collecting = collecting; + let mut subframes = Vec::new(); + let mut output_matches = Vec::new(); + self.frames().iter_active().for_each(|frame| { + if let Some((key_subtree, _)) = frame.key_subtree() { + subframes.push(Frame { + ops: &pat.protos[key_subtree], + iar: None, + overstep: 0, + matches: true, + poison: false, + }); + } + output_matches.push(false); + }); + let mut obj_inner = Vec::new(); + let mut output_packs = Vec::new(); + let mut iter = 0..; + while let packed_key = { + let subinterp = Interpreter { + pat: pat, + frames: &mut subframes, + error: self.interp.error, + }; + let mut subpacker = Packer { + interp: subinterp, + collecting: self.collecting, + call_limit: self.call_limit, + }; + if subpacker.interp.frames.is_empty() { + // avoid overflow + serde::de::DeserializeSeed::deserialize(&mut subpacker, { + serde::de::value::U64Deserializer::new(0) + })? + } else { + serde::de::DeserializeSeed::deserialize(&mut subpacker, { + serde::de::value::U64Deserializer::new(iter.next().unwrap()) + })? + } + } { + self.frames_mut().iter_active_mut().filter(|frame| { + frame.key_subtree().is_some() + }).zip(&mut subframes).for_each(|(frame, subframe)| { + frame.matches = subframe.matches; + // reset subframe for next iteration + // NOTE wait to reset subframe.matches when merging packs!!! + subframe.iar = None; + }); + self.frames_mut().iter_active_mut().for_each(|frame| { + // mark every non-subtree key as matching. + if frame.key_subtree().is_none() { + frame.matches = true; + } + }); + let Some(packed_value) = seq.next_element_seed(&mut *self)? else { + break; + }; + if self.collecting { + obj_inner.push(packed_value.1.unwrap()); + } + let mut key_packs_per_frame = packed_key.0.into_iter(); + let mut value_packs_per_frame = packed_value.0.into_iter(); + // whatever is active in self.frames(), if matches, has a pack + // whatever is in subframes, if matches, has a pack + // count(active self.frames() with subtree which match) is always + // smaller than count(subframes which match) because the former + // gets updated by next_value_seed + // count(active self.frames() with subtree) == count(subframes) + // tl;dr: need to figure out which packs produced by subframes line + // up with which packs produced by self, discarding extra subframes + // (where the corresponding self frame doesn't match) and accepting + // extra packs produced by self. + // NOTE: key_packs_per_frame ~ subframes + // value_packs_per_frame ~ self + // keys come first tho (key.merge_from(value)) + let mut iter_subframes = subframes.iter_mut(); + // related to output_packs + let mut pack_index = 0; + for (frame, out_matches) in self.frames().iter_active().zip({ + &mut output_matches + }) { + // check if this frame has an associated subframe + let subframe = if frame.key_subtree().is_some() { + // if there are more frames with associated subframes + // than there are subframes, panic + Some(iter_subframes.next().unwrap()) + } else { + None + }; + let mut new_pack = None; + if frame.matches && subframe.is_some() { + // this already implies subframe.matches + let mut key_pack = key_packs_per_frame.next().unwrap(); + let value_pack = value_packs_per_frame.next().unwrap(); + key_pack.cartesian_product(value_pack); + new_pack = Some(key_pack); + } else if frame.matches { + // value matches but there's no subframe, carry on + let value_pack = value_packs_per_frame.next().unwrap(); + new_pack = Some(value_pack); + } else if !frame.matches && subframe.is_some() { + // frame didn't match but there was a subframe + let subframe = subframe.unwrap(); + if subframe.matches { + // subframe matched, remove key pack + let _ = key_packs_per_frame.next().unwrap(); + } else { + // neither matched, no relevant packs + // do reset subframe for next_key_seed tho! + subframe.matches = true; + } + } else { + // no relevant packs + } + if let Some(new_pack) = new_pack { + if !*out_matches { + *out_matches = true; + output_packs.insert(pack_index, Pack::default()); + } + let output_pack = &mut output_packs[pack_index]; + output_pack.subpacks.extend(new_pack.subpacks); + } + if *out_matches { + pack_index += 1; + } + } + } + let mut poison = false; + for (f, m) in self.frames_mut().iter_active_mut().zip(output_matches) { + f.matches = m; + if !m { + if let Some((_, false)) = f.key_subtree() { + poison = true; + } + } + } + let obj = SerdeObject::Seq(obj_inner); + let mut final_packs = self.step_out(output_packs)?; + let mut iter_final_packs = 0..; + self.frames_mut().iter_active_mut().for_each(|frame| { + let ty = frame.get_type(); + match ty { + | Some((Type::Seq, _)) + | Some((Type::Any, _)) + | Some((Type::IgnoredAny, _)) + | None + => { + frame.poison = poison; + let matched = std::mem::replace(&mut frame.matches, true); + if !matched { + final_packs.insert( + iter_final_packs.start, + Pack::default(), + ); + } + }, + _ => return, + } + let pack = &mut final_packs[iter_final_packs.next().unwrap()]; + if let Some(name) = frame.get_name(pat) { + // we can assume collecting == true + let old_pack = std::mem::take(pack); + let mut map = IndexMap::new(); + map.insert(name, obj.clone()); + pack.subpacks.push_back((map, old_pack)); + } + }); + self.collecting = old_collecting; + Ok((final_packs, collecting.then(|| obj))) + } + fn visit_map(self, mut map: A) -> Result + where + A: serde::de::MapAccess<'de>, + { + let old_collecting = self.collecting; + let pat = self.interp.pat; + let mut collecting = old_collecting; + let typeck = self.frames_mut().iter_active_mut().try_for_each(|frame| { + let ty = frame.get_type(); + match ty { + | Some((Type::Map, _)) + | Some((Type::Any, _)) + | Some((Type::IgnoredAny, _)) + | None + => {}, + Some((_, false)) => { + frame.matches = false; + return Ok(()); + }, + Some((_, true)) => { + return Err(MatchError::ValidationError) + }, + } + if frame.get_name(pat).is_some() { + collecting = true; + } + Ok(()) + }); + match typeck { + Err(e) => { + self.interp.error.insert(e); + return Err(A::Error::custom("type mismatch")); + }, + _ => (), + } + if let Err(e) = self.step_in() { return Err(e); } + self.collecting = collecting; + let mut subframes = Vec::new(); + let mut output_matches = Vec::new(); + self.frames().iter_active().for_each(|frame| { + if let Some((key_subtree, _)) = frame.key_subtree() { + subframes.push(Frame { + ops: &pat.protos[key_subtree], + iar: None, + overstep: 0, + matches: true, + poison: false, + }); + } + output_matches.push(false); + }); + let mut obj_inner = Vec::new(); + let mut output_packs = Vec::new(); + while let Some(packed_key) = { + let subinterp = Interpreter { + pat: pat, + frames: &mut subframes, + error: self.interp.error, + }; + let mut subpacker = Packer { + interp: subinterp, + collecting: self.collecting, + call_limit: self.call_limit, + }; + map.next_key_seed(&mut subpacker)? + } { + self.frames_mut().iter_active_mut().filter(|frame| { + frame.key_subtree().is_some() + }).zip(&mut subframes).for_each(|(frame, subframe)| { + frame.matches = subframe.matches; + // reset subframe for next iteration + // NOTE wait to reset subframe.matches when merging packs!!! + subframe.iar = None; + }); + self.frames_mut().iter_active_mut().for_each(|frame| { + // mark every non-subtree key as matching. + if frame.key_subtree().is_none() { + frame.matches = true; + } + }); + let packed_value = map.next_value_seed(&mut *self)?; + if self.collecting { + obj_inner.push( + (packed_key.1.unwrap(), packed_value.1.unwrap()), + ); + } + let mut key_packs_per_frame = packed_key.0.into_iter(); + let mut value_packs_per_frame = packed_value.0.into_iter(); + // whatever is active in self.frames(), if matches, has a pack + // whatever is in subframes, if matches, has a pack + // count(active self.frames() with subtree which match) is always + // smaller than count(subframes which match) because the former + // gets updated by next_value_seed + // count(active self.frames() with subtree) == count(subframes) + // tl;dr: need to figure out which packs produced by subframes line + // up with which packs produced by self, discarding extra subframes + // (where the corresponding self frame doesn't match) and accepting + // extra packs produced by self. + // NOTE: key_packs_per_frame ~ subframes + // value_packs_per_frame ~ self + // keys come first tho (key.merge_from(value)) + let mut iter_subframes = subframes.iter_mut(); + // related to output_packs + let mut pack_index = 0; + for (frame, out_matches) in self.frames().iter_active().zip({ + &mut output_matches + }) { + // check if this frame has an associated subframe + let subframe = if frame.key_subtree().is_some() { + // if there are more frames with associated subframes + // than there are subframes, panic + Some(iter_subframes.next().unwrap()) + } else { + None + }; + let mut new_pack = None; + if frame.matches && subframe.is_some() { + // this already implies subframe.matches + let mut key_pack = key_packs_per_frame.next().unwrap(); + let value_pack = value_packs_per_frame.next().unwrap(); + key_pack.cartesian_product(value_pack); + new_pack = Some(key_pack); + } else if frame.matches { + // value matches but there's no subframe, carry on + let value_pack = value_packs_per_frame.next().unwrap(); + new_pack = Some(value_pack); + } else if !frame.matches && subframe.is_some() { + // frame didn't match but there was a subframe + let subframe = subframe.unwrap(); + if subframe.matches { + // subframe matched, remove key pack + let _ = key_packs_per_frame.next().unwrap(); + } else { + // neither matched, no relevant packs + // do reset subframe for next_key_seed tho! + subframe.matches = true; + } + } else { + // no relevant packs + } + if let Some(new_pack) = new_pack { + if !*out_matches { + *out_matches = true; + output_packs.insert(pack_index, Pack::default()); + } + let output_pack = &mut output_packs[pack_index]; + output_pack.subpacks.extend(new_pack.subpacks); + } + if *out_matches { + pack_index += 1; + } + } + } + let mut poison = false; + for (f, m) in self.frames_mut().iter_active_mut().zip(output_matches) { + f.matches = m; + if !m { + if let Some((_, false)) = f.key_subtree() { + poison = true; + } + } + } + let obj = SerdeObject::Map(obj_inner); + let mut final_packs = self.step_out(output_packs)?; + let mut iter_final_packs = 0..; + self.frames_mut().iter_active_mut().for_each(|frame| { + let ty = frame.get_type(); + match ty { + | Some((Type::Map, _)) + | Some((Type::Any, _)) + | Some((Type::IgnoredAny, _)) + | None + => { + frame.poison = poison; + let matched = std::mem::replace(&mut frame.matches, true); + if !matched { + final_packs.insert( + iter_final_packs.start, + Pack::default(), + ); + } + }, + _ => return, + } + let pack = &mut final_packs[iter_final_packs.next().unwrap()]; + if let Some(name) = frame.get_name(pat) { + // we can assume collecting == true + let old_pack = std::mem::take(pack); + let mut map = IndexMap::new(); + map.insert(name, obj.clone()); + pack.subpacks.push_back((map, old_pack)); + } + }); + self.collecting = old_collecting; + Ok((final_packs, collecting.then(|| obj))) + } + fn visit_enum(self, data: A) -> Result + where + A: serde::de::EnumAccess<'de>, + { + let mut obj = None; + if self.collecting { + obj = Some(SerdeObject::Enum { + variant: todo!(), + data: todo!(), + }); + } + todo!() + } +} + +/// Deserializes a SerdeObject +pub(crate) struct SerdeObjectDeserializer<'de, E> { + pub(crate) obj: SerdeObject<'de>, + pub(crate) _e: PhantomData E>, +} + +/// Deserializes a SerdeObject::Seq +struct SerdeObjectSeq<'de, I: Iterator>, E> { + iter: I, + _e: PhantomData E>, +} + +impl<'de, E> serde::de::VariantAccess<'de> for SerdeObjectDeserializer<'de, E> +where + E: serde::de::Error, +{ + type Error = E; + + fn unit_variant(self) -> Result<(), E> { + todo!() + } + fn newtype_variant_seed(self, seed: T) -> Result + where + T: serde::de::DeserializeSeed<'de>, + { + todo!() + } + fn tuple_variant(self, len: usize, visitor: V) -> Result + where + V: serde::de::Visitor<'de>, + { + todo!() + } + fn struct_variant( + self, + fields: &'static [&'static str], + visitor: V, + ) -> Result + where + V: serde::de::Visitor<'de>, + { + todo!() + } +} + +impl<'de, E> serde::de::EnumAccess<'de> for SerdeObjectDeserializer<'de, E> +where + E: serde::de::Error, +{ + type Error = E; + type Variant = Self; + + fn variant_seed(self, seed: V) -> Result<(V::Value, Self), E> + where + V: serde::de::DeserializeSeed<'de>, + { + match self.obj { + SerdeObject::Enum { variant, data } => { + seed.deserialize(variant.into_deserializer()).map(|it| { + let data = Self { + obj: *data, + _e: PhantomData, + }; + (it, data) + }) + }, + _ => unreachable!(), + } + } +} + +impl<'de, E> serde::de::Deserializer<'de> for SerdeObjectDeserializer<'de, E> +where + E: serde::de::Error, +{ + type Error = E; + fn deserialize_any(self, v: V) -> Result + where + V: serde::de::Visitor<'de>, + { + match self.obj { + SerdeObject::Bool(x) => v.visit_bool(x), + SerdeObject::I8(x) => v.visit_i8(x), + SerdeObject::I16(x) => v.visit_i16(x), + SerdeObject::I32(x) => v.visit_i32(x), + SerdeObject::I64(x) => v.visit_i64(x), + SerdeObject::I128(x) => v.visit_i128(x), + SerdeObject::U8(x) => v.visit_u8(x), + SerdeObject::U16(x) => v.visit_u16(x), + SerdeObject::U32(x) => v.visit_u32(x), + SerdeObject::U64(x) => v.visit_u64(x), + SerdeObject::U128(x) => v.visit_u128(x), + SerdeObject::F32(x) => v.visit_f32(x), + SerdeObject::F64(x) => v.visit_f64(x), + SerdeObject::Char(x) => v.visit_char(x), + SerdeObject::Str(Cow::Owned(x)) => v.visit_string(x), + SerdeObject::Str(Cow::Borrowed(x)) => v.visit_borrowed_str(x), + SerdeObject::Bytes(Cow::Owned(x)) => v.visit_byte_buf(x), + SerdeObject::Bytes(Cow::Borrowed(x)) => v.visit_borrowed_bytes(x), + SerdeObject::Some(x) => v.visit_some(x.into_deserializer()), + SerdeObject::None => v.visit_none(), + SerdeObject::Unit => v.visit_unit(), + SerdeObject::Seq(x) => { + let mut x = serde::de::value::SeqDeserializer::new(x.into_iter()); + let r = v.visit_seq(&mut x); + x.end().and(r) + }, + SerdeObject::Map(x) => { + let mut x = serde::de::value::MapDeserializer::new(x.into_iter()); + let r = v.visit_map(&mut x); + x.end().and(r) + }, + SerdeObject::NewtypeStruct(x) => { + v.visit_newtype_struct(x.into_deserializer()) + }, + SerdeObject::Enum { .. } => v.visit_enum(self), + } + } + fn deserialize_ignored_any(self, v: V) -> Result + where + V: serde::de::Visitor<'de>, + { + drop(self); + v.visit_unit() + } + serde::forward_to_deserialize_any! { + bool i8 i16 i32 i64 i128 u8 u16 u32 u64 u128 f32 f64 char str string + bytes byte_buf option unit unit_struct newtype_struct seq tuple + tuple_struct map struct enum identifier + } +} + +#[cfg(test)] +mod tests { + use super::Packer; + use super::super::PatternConstants; + + use crate::vm::MAX_CALLS; + use crate::vm::Interpreter; + use crate::vm::Type; + use crate::vm::Value; + use crate::vm::PatternElement; + use crate::vm::SerdeObject; + use crate::vm::Frame; + + use postcard::Deserializer as PostcardDeserializer; + use serde::de::DeserializeSeed as _; + use serde_json::Deserializer as JsonDeserializer; + + use crate::errors::MatchError; + + #[test] + #[should_panic] + fn test_broken() { + // broken pattern, should never be emitted by parser. make sure it's + // not accepted. + let consts = PatternConstants::<()>::default(); + let mut err = Default::default(); + let mut frames = Default::default(); + let interp = Interpreter::new(&consts, &mut err, &mut frames); + let _ = Packer::new(interp, MAX_CALLS); + } + + #[test] + fn test_empty_create() { + // test creating the parser with an empty pattern. + let mut consts = PatternConstants::<()>::default(); + consts.protos.push(Vec::new()); + let mut err = Default::default(); + let mut frames = Default::default(); + let interp = Interpreter::new(&consts, &mut err, &mut frames); + let _ = Packer::new(interp, MAX_CALLS); + } + + #[test] + fn test_empty_match() { + // test matching something with an empty pattern. + let mut consts = PatternConstants::<()>::default(); + consts.protos.push(Vec::new()); + let mut der = JsonDeserializer::from_str("{}"); + let mut err = Default::default(); + let mut frames = Default::default(); + let interp = Interpreter::new(&consts, &mut err, &mut frames); + let pack = Packer::new(interp, MAX_CALLS).deserialize(&mut der).unwrap(); + } + + #[test] + fn test_simple_match() { + // test matching a simple value + let mut consts = PatternConstants::<()>::default(); + consts.strings.insert("hello".into()); + consts.protos.push(vec![ + PatternElement::Value { + name: Some(0), + value: Some(Value::Type { + ty: Type::U64, + skippable: false, + }), + }, + ]); + let mut der = JsonDeserializer::from_str("3"); + let mut err = Default::default(); + let mut frames = Default::default(); + let interp = Interpreter::new(&consts, &mut err, &mut frames); + let packed = Packer::new(interp, MAX_CALLS).deserialize(&mut der); + let (packs, obj) = packed.unwrap(); + assert!(obj.is_none()); + assert_eq!( + packs[0].get_object_at(0, "hello").unwrap(), + &SerdeObject::U64(3), + ); + } + + #[test] + fn test_simple_error() { + // test a value that doesn't match (serde_json error) + let mut consts = PatternConstants::<()>::default(); + consts.strings.insert("hello".into()); + consts.protos.push(vec![ + PatternElement::Value { + name: Some(0), + value: Some(Value::Type { + ty: Type::U64, + skippable: false, + }), + }, + ]); + let mut der = JsonDeserializer::from_str("\"hello\""); + let mut err = Default::default(); + let mut frames = Default::default(); + let interp = Interpreter::new(&consts, &mut err, &mut frames); + let packed = Packer::new(interp, MAX_CALLS).deserialize(&mut der); + // error produced by serde_json + assert!(packed.is_err()); + } + + #[test] + fn test_basic_multiframe() { + // test multiple frames (matching and non-matching) + let mut consts = PatternConstants::<()>::default(); + consts.strings.insert("a".into()); + consts.strings.insert("b".into()); + consts.protos.push(vec![ + PatternElement::Value { + name: Some(0), + value: Some(Value::Type { + ty: Type::U64, + skippable: true, + }), + }, + ]); + consts.protos.push(vec![ + PatternElement::Value { + name: Some(1), + value: Some(Value::Type { + ty: Type::Bool, + skippable: true, + }), + }, + ]); + let mut der = JsonDeserializer::from_str(r#"10"#); + let mut err = Default::default(); + let mut frames: Vec<_> = Default::default(); + frames.push(Frame { + ops: &consts.protos[0], + iar: None, + matches: true, + overstep: 0, + poison: false, + }); + frames.push(Frame { + ops: &consts.protos[1], + iar: None, + matches: true, + overstep: 0, + poison: false, + }); + let interp = Interpreter { + pat: &consts, + error: &mut err, + frames: &mut frames, + }; + let packed = Packer::new(interp, MAX_CALLS).deserialize(&mut der); + let (packs, obj) = packed.unwrap(); + assert!(obj.is_none()); + assert_eq!( + packs[0].get_object_at(0, "a").unwrap(), + &SerdeObject::U64(10), + ); + assert_eq!(packs.len(), 1); + assert!(frames[0].matches); + assert!(!frames[1].matches); + } + + #[test] + fn test_map() { + // test visit_map + let mut consts = PatternConstants::<()>::default(); + consts.strings.insert("key".into()); + consts.strings.insert("value".into()); + consts.protos.push(vec![ + PatternElement::Value { + name: Some(0), + value: None, + }, + ]); + consts.protos.push(vec![ + PatternElement::Value { + name: None, + value: Some(Value::Type { + ty: Type::Map, + skippable: false, + }), + }, + PatternElement::Tag { + key_subtree: 0, + optional: true, + }, + PatternElement::Value { + name: Some(1), + value: Some(Value::Type { + ty: Type::U64, + skippable: false, + }), + }, + ]); + let mut der = JsonDeserializer::from_str(r#"{"hello": 0, "world": 1}"#); + let mut err = Default::default(); + let mut frames = Default::default(); + let interp = Interpreter::new(&consts, &mut err, &mut frames); + let packed = Packer::new(interp, MAX_CALLS).deserialize(&mut der); + let (packs, obj) = packed.unwrap(); + assert!(obj.is_none()); + assert_eq!( + packs[0].get_object_at(0, "key").unwrap(), + &SerdeObject::Str("hello".into()), + ); + assert_eq!( + packs[0] + .get_object_at(0, "value").unwrap(), + &SerdeObject::U64(0), + ); + assert_eq!( + packs[0].get_object_at(1, "key").unwrap(), + &SerdeObject::Str("world".into()), + ); + assert_eq!( + packs[0] + .get_object_at(1, "value").unwrap(), + &SerdeObject::U64(1), + ); + } + + #[test] + fn test_parser_empty() { + // use a parsed empty pattern to test Packer + let consts = crate::parser::parse::<&'static str, &'static str, ()>( + "", + None, + None, + ).unwrap(); + let mut der = JsonDeserializer::from_str(r#"{"hello": 0, "world": 1}"#); + let mut err = Default::default(); + let mut frames = Default::default(); + let interp = Interpreter::new( + &consts, + &mut err, + &mut frames, + //&mut output, + ); + let (mut packs, obj) = Packer::new( + interp, + MAX_CALLS, + ).deserialize(&mut der).unwrap(); + assert!(obj.is_none()); + assert_eq!(packs.len(), 1); + let pack = packs.pop().unwrap(); + assert!(pack.subpacks.is_empty()); + } + + #[test] + fn test_parser_basic() { + // use a basic parsed pattern to test Packer + let consts = crate::parser::parse::<&'static str, &'static str, ()>( + ":map->[name:str]value:str", + None, + None, + ).unwrap(); + let data = &[ + 0x02, // map length (2) + 0x04, // string length (4) + 0x6E, 0x61, 0x6D, 0x65, // b'name' + 0x01, // string length (1) + 0x61, // b'a' + 0x05, // string length (5) + 0x76, 0x61, 0x6C, 0x75, 0x65, // b'value' + 0x01, // string length (1) + 0x62, // b'b' + ]; + let mut der = PostcardDeserializer::from_bytes(data); + let mut err = Default::default(); + let mut frames = Default::default(); + let interp = Interpreter::new( + &consts, + &mut err, + &mut frames, + //&mut output, + ); + let result = Packer::new( + interp, + MAX_CALLS, + ).deserialize(&mut der); + let (mut packs, obj) = result.unwrap(); + assert!(obj.is_none()); + assert_eq!(packs.len(), 1); + let pack = packs.pop().unwrap(); + assert_eq!(pack.subpacks.len(), 2); + } + + #[test] + fn test_parser_basic_subtree() { + // use a basic parsed pattern with a subtree to test Packer + let consts = crate::parser::parse::<&'static str, &'static str, ()>( + ":map(->[name:str]value:str)", + None, + None, + ).unwrap(); + let data = &[ + 0x02, // map length (2) + 0x04, // string length (4) + 0x6E, 0x61, 0x6D, 0x65, // b'name' + 0x01, // string length (1) + 0x61, // b'a' + 0x05, // string length (5) + 0x76, 0x61, 0x6C, 0x75, 0x65, // b'value' + 0x01, // string length (1) + 0x62, // b'b' + ]; + let mut der = PostcardDeserializer::from_bytes(data); + let mut err = Default::default(); + let mut frames = Default::default(); + let interp = Interpreter::new( + &consts, + &mut err, + &mut frames, + //&mut output, + ); + let result = Packer::new( + interp, + MAX_CALLS, + ).deserialize(&mut der); + let (mut packs, obj) = result.unwrap(); + assert!(obj.is_none()); + assert_eq!(packs.len(), 1); + let pack = packs.pop().unwrap(); + assert_eq!(pack.subpacks.len(), 2); + } + + #[test] + fn test_empty_subtrees() { + // tests empty subtrees + let consts = crate::parser::parse::<&'static str, &'static str, ()>( + ":map()(())", + None, + None, + ).unwrap(); + let data = r#"{"hello": "world"}"#; + let mut der = JsonDeserializer::from_str(data); + let mut err = Default::default(); + let mut frames = Default::default(); + let interp = Interpreter::new( + &consts, + &mut err, + &mut frames, + //&mut output, + ); + let result = Packer::new( + interp, + MAX_CALLS, + ).deserialize(&mut der); + let (mut packs, obj) = result.unwrap(); + assert!(obj.is_none()); + assert_eq!(packs.len(), 1); + let pack = packs.pop().unwrap(); + assert_eq!(pack.subpacks.len(), 0); + } + + #[test] + fn test_parser_subtrees() { + // use a parsed pattern with subtrees to test Packer + // also test a non-self-describing format (postcard) + let consts = crate::parser::parse::<&'static str, &'static str, ()>( + ":map(->['name'?]name:str)?(->['value'?]value:u32)?(->[:str]:?ignored_any)", + None, + None, + ).unwrap(); + let data = &[ + 0x02, // map length (2) + 0x04, // string length (4) + 0x6E, 0x61, 0x6D, 0x65, // b'name' + 0x01, // string length (1) + 0x61, // b'a' + 0x05, // string length (5) + 0x76, 0x61, 0x6C, 0x75, 0x65, // b'value' + 0x01, // 1 + ]; + let mut der = PostcardDeserializer::from_bytes(data); + let mut err = Default::default(); + let mut frames = Default::default(); + let interp = Interpreter::new( + &consts, + &mut err, + &mut frames, + //&mut output, + ); + let result = Packer::new( + interp, + MAX_CALLS, + ).deserialize(&mut der); + let (mut packs, obj) = result.unwrap(); + assert!(obj.is_none()); + assert_eq!(packs.len(), 1); + let pack = packs.pop().unwrap(); + assert_eq!(pack.subpacks.len(), 1); + assert_eq!( + pack.get_object_at(0, "name").unwrap(), + &SerdeObject::Str(From::from("a")), + ); + assert_eq!( + pack.get_object_at(0, "value").unwrap(), + &SerdeObject::U32(1), + ); + } + + #[test] + fn test_parser_subtrees_validation() { + // checks that the Packer can validate multiple keys. + let consts = crate::parser::parse::<&'static str, &'static str, ()>( + ":map(->['name'?]name:str)(->['value'?]value:u32)(->[:str]:?ignored_any)", + None, + None, + ).unwrap(); + let data = &[ + 0x02, // map length (2) + 0x04, // string length (4) + 0x6E, 0x61, 0x6D, 0x65, // b'name' + 0x01, // string length (1) + 0x61, // b'a' + 0x05, // string length (5) + 0x76, 0x61, 0x6C, 0x75, 0x65, // b'value' + 0x01, // 1 + ]; + let mut der = PostcardDeserializer::from_bytes(data); + let mut err = Default::default(); + let mut frames = Default::default(); + let interp = Interpreter::new( + &consts, + &mut err, + &mut frames, + //&mut output, + ); + let result = Packer::new( + interp, + MAX_CALLS, + ).deserialize(&mut der); + let (mut packs, obj) = result.unwrap(); + assert!(obj.is_none()); + assert_eq!(packs.len(), 1); + let pack = packs.pop().unwrap(); + assert_eq!(pack.subpacks.len(), 1); + assert_eq!( + pack.get_object_at(0, "name").unwrap(), + &SerdeObject::Str(From::from("a")), + ); + assert_eq!( + pack.get_object_at(0, "value").unwrap(), + &SerdeObject::U32(1), + ); + } + + #[test] + fn test_one_or_more_subtrees_validation() { + // checks that the Packer supports OR-like validation semantics + let consts = crate::parser::parse::<&'static str, &'static str, ()>( + ":map((->['name'?]?name)?(->['value'?]?value)?)(->[:str]:u32)", + None, + None, + ).unwrap(); + let data = &[ + 0x01, // map length (1) + 0x04, // string length (4) + 0x6E, 0x61, 0x6D, 0x65, // b'name' + 0x01, // 1 + ]; + let mut der = PostcardDeserializer::from_bytes(data); + let mut err = Default::default(); + let mut frames = Default::default(); + let interp = Interpreter::new( + &consts, + &mut err, + &mut frames, + //&mut output, + ); + let result = Packer::new( + interp, + MAX_CALLS, + ).deserialize(&mut der); + let (mut packs, obj) = result.unwrap(); + assert!(obj.is_none()); + assert_eq!(packs.len(), 1); + let pack = packs.pop().unwrap(); + assert_eq!(pack.subpacks.len(), 1); + assert_eq!( + pack.get_object_at(0, "name").unwrap(), + &SerdeObject::U32(1), + ); + } + + #[test] + fn test_one_or_more_subtrees_validation_failure() { + // checks that the Packer supports OR-like validation semantics + // (failure) + let consts = crate::parser::parse::<&'static str, &'static str, ()>( + ":map((->['name'?]?name)?(->['value'?]?value)?)(->[:str]:u32)", + None, + None, + ).unwrap(); + let data = &[ + 0x01, // map length (1) + 0x04, // string length (4) + 0x6E, 0x65, 0x6D, 0x65, // b'neme' + 0x01, // 1 + ]; + let mut der = PostcardDeserializer::from_bytes(data); + let mut err = Default::default(); + let mut frames = Default::default(); + let interp = Interpreter::new( + &consts, + &mut err, + &mut frames, + //&mut output, + ); + let result = Packer::new( + interp, + MAX_CALLS, + ).deserialize(&mut der); + assert!(matches!(err, Some(MatchError::ValidationError))); + assert!(result.is_err()); + } + + // FIXME this should test that the map doesn't contain other keys aside + // from the ones requested by the pattern, but this doesn't match existing + // datafu semantics at all. we need new syntax for it. + //#[test] + //fn test_parser_subtrees_no_additional_keys() { + // // use a parsed pattern with subtrees to test Packer + // // also test a non-self-describing format (postcard) + // // also require at least one subtree to match on every iteration. + // // also this checks for validation error + // let consts = crate::parser::parse::<&'static str, &'static str, ()>( + // ":map((->['name'?]name)?(->['value'?]value)?)(->[:str]:u32)", + // None, + // None, + // ).unwrap(); + // let data = &[ + // 0x03, // map length (3) + // 0x04, // string length (4) + // 0x6E, 0x61, 0x6D, 0x65, // b'name' + // 0x01, // 1 + // 0x05, // string length (5) + // 0x76, 0x61, 0x6C, 0x75, 0x65, // b'value' + // 0x01, // 1 + // 0x05, // string length (5) + // 0x76, 0x65, 0x6C, 0x75, 0x65, // b'velue' + // 0x01, // 1 + // ]; + // let mut der = PostcardDeserializer::from_bytes(data); + // let mut err = Default::default(); + // let mut frames = Default::default(); + // let interp = Interpreter::new( + // &consts, + // &mut err, + // &mut frames, + // //&mut output, + // ); + // let result = Packer::new( + // interp, + // MAX_CALLS, + // ).deserialize(&mut der); + // assert!(matches!(err, Some(MatchError::ValidationError))); + // assert!(result.is_err()); + //} + + #[test] + fn test_key_optionally_missing() { + // test a pattern with a missing key + let consts = crate::parser::parse::<&'static str, &'static str, ()>( + " + :map + ->['a'?]:map + ->[b:?str]:?map + (->['x'?]x:?bool)? + (->['y'?]?y:?bool)? + ", + None, + None + ).unwrap(); + let data = r#"{"a": {"1": {"y": true}, "2": {"x": true, "y": true}}}"#; + let mut der = JsonDeserializer::from_str(data); + let mut err = Default::default(); + let mut frames = Default::default(); + let interp = Interpreter::new( + &consts, + &mut err, + &mut frames, + //&mut output, + ); + let result = Packer::new( + interp, + MAX_CALLS, + ).deserialize(&mut der); + let (mut packs, obj) = result.unwrap(); + assert!(obj.is_none()); + assert_eq!(packs.len(), 1); + let pack = &packs[0]; + assert_eq!(pack.subpacks.len(), 1); + assert_eq!( + pack.get_object_at(0, "b").unwrap(), + &SerdeObject::Str(From::from("2")), + ); + assert_eq!( + pack.get_object_at(0, "x").unwrap(), + &SerdeObject::Bool(true), + ); + assert_eq!( + pack.get_object_at(0, "y").unwrap(), + &SerdeObject::Bool(true), + ); + } + + #[test] + fn test_map_without_values() { + // test that the map without values still collects the key + let consts = crate::parser::parse::<&'static str, &'static str, ()>( + " + :map + ->[a:?str]?:?map + ->[b:?str]?:?map + ", + None, + None + ).unwrap(); + let data = r#"{"a": {}}"#; + let mut der = JsonDeserializer::from_str(data); + let mut err = Default::default(); + let mut frames = Default::default(); + let interp = Interpreter::new( + &consts, + &mut err, + &mut frames, + //&mut output, + ); + let result = Packer::new( + interp, + MAX_CALLS, + ).deserialize(&mut der); + let (mut packs, obj) = result.unwrap(); + assert!(obj.is_none()); + assert_eq!(packs.len(), 1); + let pack = &packs[0]; + assert_eq!(pack.subpacks.len(), 1); + assert_eq!( + pack.get_object_at(0, "a").unwrap(), + &SerdeObject::Str(From::from("a")), + ); + assert_eq!(pack.get_subpack_at(0, "a").unwrap().subpacks.len(), 0); + } + + #[test] + fn test_guaranteed_at_least_once() { + // test that at least one match is required before collecting the key + let consts = crate::parser::parse::<&'static str, &'static str, ()>( + " + :map + ->[a:?str]:?map + ->[b:?str]:?map + ", + None, + None + ).unwrap(); + let data = r#"{"a": {}}"#; + let mut der = JsonDeserializer::from_str(data); + let mut err = Default::default(); + let mut frames = Default::default(); + let interp = Interpreter::new( + &consts, + &mut err, + &mut frames, + //&mut output, + ); + let result = Packer::new( + interp, + MAX_CALLS, + ).deserialize(&mut der); + let (mut packs, obj) = result.unwrap(); + assert!(obj.is_none()); + assert_eq!(packs.len(), 0); + } + + #[test] + fn test_basic_seq() { + // test that sequences work + let consts = crate::parser::parse::<&'static str, &'static str, ()>( + " + :seq + ->[i:u64]v:u64 + ", + None, + None + ).unwrap(); + let data = r#"[10, 11, 12]"#; + let mut der = JsonDeserializer::from_str(data); + let mut err = Default::default(); + let mut frames = Default::default(); + let interp = Interpreter::new( + &consts, + &mut err, + &mut frames, + //&mut output, + ); + let result = Packer::new( + interp, + MAX_CALLS, + ).deserialize(&mut der); + let (mut packs, obj) = result.unwrap(); + assert!(obj.is_none()); + assert_eq!(packs.len(), 1); + let pack = packs.pop().unwrap(); + assert_eq!(pack.subpacks.len(), 3); + assert_eq!( + pack.get_object_at(0, "i").unwrap(), + &SerdeObject::U64(0), + ); + assert_eq!( + pack.get_object_at(0, "v").unwrap(), + &SerdeObject::U64(10), + ); + assert_eq!( + pack.get_object_at(1, "i").unwrap(), + &SerdeObject::U64(1), + ); + assert_eq!( + pack.get_object_at(1, "v").unwrap(), + &SerdeObject::U64(11), + ); + assert_eq!( + pack.get_object_at(2, "i").unwrap(), + &SerdeObject::U64(2), + ); + assert_eq!( + pack.get_object_at(2, "v").unwrap(), + &SerdeObject::U64(12), + ); + } + + #[test] + fn test_list_key() { + let consts = crate::parser::parse::<&'static str, &'static str, ()>( + " + :map + ->[:seq->k:u64]:seq + ->v:str + ", + None, + None + ).unwrap(); + let data = &[ + 0x01, // map length (1) + 0x03, // list length (3) + 0x01, 0x02, 0x03, // [1, 2, 3] + 0x02, // list length (2) + 0x05, // string length (5) + b't', b'r', b'a', b'n', b's', + 0x06, // string length (6) + b'r', b'i', b'g', b'h', b't', b's' + ]; + let mut der = PostcardDeserializer::from_bytes(data); + let mut err = Default::default(); + let mut frames = Default::default(); + let interp = Interpreter::new( + &consts, + &mut err, + &mut frames, + //&mut output, + ); + let result = Packer::new( + interp, + MAX_CALLS, + ).deserialize(&mut der); + let (mut packs, obj) = result.unwrap(); + assert!(obj.is_none()); + assert_eq!(packs.len(), 1); + let pack = packs.pop().unwrap(); + assert_eq!(pack.subpacks.len(), 3); + let subpack = pack.get_subpack_at(0, "k").unwrap(); + assert_eq!(subpack.subpacks.len(), 2); + assert_eq!( + pack.get_object_at(0, "k").unwrap(), + &SerdeObject::U64(1), + ); + assert_eq!( + subpack.get_object_at(0, "v").unwrap(), + &SerdeObject::Str(From::from("trans")), + ); + assert_eq!( + subpack.get_object_at(1, "v").unwrap(), + &SerdeObject::Str(From::from("rights")), + ); + let subpack = pack.get_subpack_at(1, "k").unwrap(); + assert_eq!(subpack.subpacks.len(), 2); + assert_eq!( + pack.get_object_at(1, "k").unwrap(), + &SerdeObject::U64(2), + ); + assert_eq!( + subpack.get_object_at(0, "v").unwrap(), + &SerdeObject::Str(From::from("trans")), + ); + assert_eq!( + subpack.get_object_at(1, "v").unwrap(), + &SerdeObject::Str(From::from("rights")), + ); + let subpack = pack.get_subpack_at(2, "k").unwrap(); + assert_eq!(subpack.subpacks.len(), 2); + assert_eq!( + pack.get_object_at(2, "k").unwrap(), + &SerdeObject::U64(3), + ); + assert_eq!( + subpack.get_object_at(0, "v").unwrap(), + &SerdeObject::Str(From::from("trans")), + ); + assert_eq!( + subpack.get_object_at(1, "v").unwrap(), + &SerdeObject::Str(From::from("rights")), + ); + } + + #[test] + fn test_realish_use_case() { + // use a parsed pattern that might actually be used in the real world. + let consts = crate::parser::parse::<&'static str, &'static str, ()>( + " + :map + ->['projects'?]:map + ->[commit:?str]:?map + ->[url:?str]:?map + ->[branch:?str]:?map + (->['active'?]active:?bool)? + (->['federate'?]?federate:?bool)? + ", + None, + None + ).unwrap(); + let data = r#" + {"base_url": "https://ganarchy.autistic.space", "repo_list_srcs": {"https://ganarchy.autistic.space/index.toml": {"active": false}}, "projects": {"385e734a52e13949a7a5c71827f6de920dbfea43": {"https://github.com/ganarchy/GAnarchy": {"HEAD": {"active": true}}, "https://soniex2.autistic.space/git-repos/ganarchy.git": {"HEAD": {"active": true, "pinned": true}}}, "a8fb5087f79eafe312db270082c052c427b208c2": {"https://soniex2.autistic.space/git-repos/mmorfc.git": {"HEAD": {"active": true, "pinned": true}}}, "2d0b363fe3179087de59d9ef4a2d14af21d89071": {"https://soniex2.autistic.space/git-repos/chewstuff.git": {"HEAD": {"active": true, "pinned": true}}}}} + "#; + let mut der = JsonDeserializer::from_str(data); + let mut err = Default::default(); + let mut frames = Default::default(); + let interp = Interpreter::new( + &consts, + &mut err, + &mut frames, + //&mut output, + ); + let result = Packer::new( + interp, + MAX_CALLS, + ).deserialize(&mut der); + let (mut packs, obj) = result.unwrap(); + assert!(obj.is_none()); + assert_eq!(packs.len(), 1); + let pack = &packs[0]; + assert_eq!(pack.subpacks.len(), 3); + + let commit = pack.get_subpack_at(0, "commit").unwrap(); + assert_eq!( + pack.get_object_at(0, "commit").unwrap(), + &SerdeObject::Str(From::from("385e734a52e13949a7a5c71827f6de920dbfea43")), + ); + assert_eq!(commit.subpacks.len(), 2); + + assert_eq!( + commit.get_object_at(0, "url").unwrap(), + &SerdeObject::Str(From::from("https://github.com/ganarchy/GAnarchy")), + ); + assert_eq!( + commit.get_object_at(0, "branch").unwrap(), + &SerdeObject::Str(From::from("HEAD")), + ); + assert_eq!( + commit.get_object_at(0, "active").unwrap(), + &SerdeObject::Bool(true), + ); + + assert_eq!( + commit.get_object_at(1, "url").unwrap(), + &SerdeObject::Str(From::from("https://soniex2.autistic.space/git-repos/ganarchy.git")), + ); + assert_eq!( + commit.get_object_at(1, "branch").unwrap(), + &SerdeObject::Str(From::from("HEAD")), + ); + assert_eq!( + commit.get_object_at(1, "active").unwrap(), + &SerdeObject::Bool(true), + ); + + assert_eq!( + pack.get_object_at(1, "commit").unwrap(), + &SerdeObject::Str(From::from("a8fb5087f79eafe312db270082c052c427b208c2")), + ); + assert_eq!( + pack.get_object_at(1, "url").unwrap(), + &SerdeObject::Str(From::from("https://soniex2.autistic.space/git-repos/mmorfc.git")), + ); + assert_eq!( + pack.get_object_at(1, "branch").unwrap(), + &SerdeObject::Str(From::from("HEAD")), + ); + assert_eq!( + pack.get_object_at(1, "active").unwrap(), + &SerdeObject::Bool(true), + ); + + assert_eq!( + pack.get_object_at(2, "commit").unwrap(), + &SerdeObject::Str(From::from("2d0b363fe3179087de59d9ef4a2d14af21d89071")), + ); + assert_eq!( + pack.get_object_at(2, "url").unwrap(), + &SerdeObject::Str(From::from("https://soniex2.autistic.space/git-repos/chewstuff.git")), + ); + assert_eq!( + pack.get_object_at(2, "branch").unwrap(), + &SerdeObject::Str(From::from("HEAD")), + ); + assert_eq!( + pack.get_object_at(2, "active").unwrap(), + &SerdeObject::Bool(true), + ); + } +} + diff --git a/src/vm/de/unpacker.rs b/src/vm/de/unpacker.rs new file mode 100644 index 0000000..8b16aa3 --- /dev/null +++ b/src/vm/de/unpacker.rs @@ -0,0 +1,518 @@ +// Copyright (C) 2022 Soni L. +// SPDX-License-Identifier: MIT OR Apache-2.0 + +//! Unpacker-related parts of the VM. + +use std::collections::HashMap; +use std::collections::hash_map::IntoIter as HMIntoIter; + +use serde::de::IntoDeserializer; +use serde::de::value::StrDeserializer; + +use crate::errors::QueryError; +use crate::vm::Pack; + +/// A `Deserializer` for Datafu output. +/// +/// This converts from Datafu's internal representation (a "pack") into the +/// desired output type. +pub struct Unpacker<'pat, 'de> { + packs: Vec>, + call_limit: usize, +} + +/// Wrapper for an `&mut Unpacker<'pat, 'de>` with additional field +/// unpacking logic. +struct UnpackerFields<'a, 'pat, 'de> { + unpacker: &'a mut Unpacker<'pat, 'de>, + fields: HMIntoIter<&'static str, Option>>, + value: Option>, +} + +impl<'pat, 'de> Unpacker<'pat, 'de> { + /// Creates an Unpacker for unpacking a given Pack. + pub fn new(pack: Pack<'pat, 'de>, call_limit: usize) -> Self { + // invariant: no empty packs + let packs = if pack.subpacks.is_empty() { + vec![] + } else { + vec![pack] + }; + Self { + packs, call_limit, + } + } + + /// Takes the next subpack from the last pack in the pack stack. + fn take_next_pack(&mut self) -> Option> { + self.packs.last_mut().and_then(|x| { + x.subpacks.front_mut() + }).map(|&mut (_, ref mut pack)| { + std::mem::take(pack) + }).filter(|pack| !pack.subpacks.is_empty()) + } +} + +impl<'pat, 'de> serde::de::SeqAccess<'de> for Unpacker<'pat, 'de> { + type Error = QueryError; + + fn next_element_seed>( + &mut self, + seed: T, + ) -> Result, Self::Error> { + if self.packs.is_empty() { + return Ok(None) + } + seed.deserialize(self).map(Some) + } +} + +impl Drop for UnpackerFields<'_, '_, '_> { + fn drop(&mut self) { + let unpacker = &mut *self.unpacker; + while let Some(mut pack) = unpacker.packs.pop() { + pack.subpacks.pop_front(); + if !pack.subpacks.is_empty() { + unpacker.packs.push(pack); + break; + } + } + } +} + +impl<'pat, 'de> serde::de::MapAccess<'de> for UnpackerFields<'_, 'pat, 'de> { + type Error = QueryError; + fn next_key_seed>( + &mut self, + seed: T, + ) -> Result, Self::Error> { + while let Some((key, value)) = self.fields.next() { + if value.is_some() { + self.value = value; + return seed.deserialize(StrDeserializer::new(key)).map(Some) + } + } + Ok(None) + } + + fn next_value_seed>( + &mut self, + seed: T, + ) -> Result { + if let Some(value) = self.value.take() { + seed.deserialize(value.into_deserializer()) + } else { + panic!("broken visitor") + } + } +} + +impl<'pat, 'de> serde::Deserializer<'de> for &mut Unpacker<'pat, 'de> { + type Error = QueryError; + fn deserialize_any(self, _: V) -> Result where V: serde::de::Visitor<'de> { todo!() } + fn deserialize_bool(self, _: V) -> Result where V: serde::de::Visitor<'de> { todo!() } + fn deserialize_i8(self, _: V) -> Result where V: serde::de::Visitor<'de> { todo!() } + fn deserialize_i16(self, _: V) -> Result where V: serde::de::Visitor<'de> { todo!() } + fn deserialize_i32(self, _: V) -> Result where V: serde::de::Visitor<'de> { todo!() } + fn deserialize_i64(self, _: V) -> Result where V: serde::de::Visitor<'de> { todo!() } + fn deserialize_u8(self, _: V) -> Result where V: serde::de::Visitor<'de> { todo!() } + fn deserialize_u16(self, _: V) -> Result where V: serde::de::Visitor<'de> { todo!() } + fn deserialize_u32(self, _: V) -> Result where V: serde::de::Visitor<'de> { todo!() } + fn deserialize_u64(self, _: V) -> Result where V: serde::de::Visitor<'de> { todo!() } + fn deserialize_f32(self, _: V) -> Result where V: serde::de::Visitor<'de> { todo!() } + fn deserialize_f64(self, _: V) -> Result where V: serde::de::Visitor<'de> { todo!() } + fn deserialize_char(self, _: V) -> Result where V: serde::de::Visitor<'de> { todo!() } + fn deserialize_str(self, _: V) -> Result where V: serde::de::Visitor<'de> { todo!() } + fn deserialize_string(self, _: V) -> Result where V: serde::de::Visitor<'de> { todo!() } + fn deserialize_bytes(self, _: V) -> Result where V: serde::de::Visitor<'de> { todo!() } + fn deserialize_byte_buf(self, _: V) -> Result where V: serde::de::Visitor<'de> { todo!() } + fn deserialize_option>( + self, + visitor: V, + ) -> Result { + todo!() + } + fn deserialize_unit(self, _: V) -> Result where V: serde::de::Visitor<'de> { todo!() } + fn deserialize_unit_struct(self, _: &'static str, _: V) -> Result where V: serde::de::Visitor<'de> { todo!() } + fn deserialize_newtype_struct(self, _: &'static str, _: V) -> Result where V: serde::de::Visitor<'de> { todo!() } + fn deserialize_seq(self, visitor: V) -> Result where V: serde::de::Visitor<'de> { todo!(); } + fn deserialize_tuple(self, _: usize, _: V) -> Result where V: serde::de::Visitor<'de> { todo!() } + fn deserialize_tuple_struct(self, _: &'static str, _: usize, _: V) -> Result where V: serde::de::Visitor<'de> { todo!() } + fn deserialize_map(self, _: V) -> Result where V: serde::de::Visitor<'de> { todo!() } + fn deserialize_struct( + self, + name: &'static str, + fields: &'static [&'static str], + visitor: V, + ) -> Result + where + V: serde::de::Visitor<'de>, + { + let _ = name; + let mut field_map = fields.iter().copied().map(|n| { + (n, None::>) + }).collect::>>(); + // unroll packs + while let Some(pack) = self.take_next_pack() { + self.packs.push(pack); + } + // roll them back up + 'roll: while let Some(pack) = self.packs.pop() { + let (ref entries, _) = pack.subpacks[0]; + for key in entries.keys().copied() { + if field_map.contains_key(key) { + self.packs.push(pack); + break 'roll; + } + } + match self.packs.last_mut().map(|x| &mut x.subpacks[0]) { + Some((_, subpack)) => { + *subpack = pack; + }, + None => todo!(), + } + } + for pack in self.packs.iter() { + let (ref entries, _) = pack.subpacks[0]; + for (key, value) in entries.iter() { + if let Some(entry) = field_map.get_mut(*key) { + *entry = Some(value.clone()); + } + } + } + visitor.visit_map(UnpackerFields { + unpacker: self, + fields: field_map.into_iter(), + value: None, + }) + } + fn deserialize_enum(self, _: &'static str, _: &'static [&'static str], _: V) -> Result where V: serde::de::Visitor<'de> { todo!() } + fn deserialize_identifier(self, _: V) -> Result where V: serde::de::Visitor<'de> { todo!() } + fn deserialize_ignored_any(self, _: V) -> Result where V: serde::de::Visitor<'de> { todo!() } +} + +impl<'pat, 'de> serde::Deserializer<'de> for Unpacker<'pat, 'de> { + type Error = QueryError; + fn deserialize_any(self, _: V) -> Result where V: serde::de::Visitor<'de> { todo!() } + fn deserialize_bool(self, _: V) -> Result where V: serde::de::Visitor<'de> { todo!() } + fn deserialize_i8(self, _: V) -> Result where V: serde::de::Visitor<'de> { todo!() } + fn deserialize_i16(self, _: V) -> Result where V: serde::de::Visitor<'de> { todo!() } + fn deserialize_i32(self, _: V) -> Result where V: serde::de::Visitor<'de> { todo!() } + fn deserialize_i64(self, _: V) -> Result where V: serde::de::Visitor<'de> { todo!() } + fn deserialize_u8(self, _: V) -> Result where V: serde::de::Visitor<'de> { todo!() } + fn deserialize_u16(self, _: V) -> Result where V: serde::de::Visitor<'de> { todo!() } + fn deserialize_u32(self, _: V) -> Result where V: serde::de::Visitor<'de> { todo!() } + fn deserialize_u64(self, _: V) -> Result where V: serde::de::Visitor<'de> { todo!() } + fn deserialize_f32(self, _: V) -> Result where V: serde::de::Visitor<'de> { todo!() } + fn deserialize_f64(self, _: V) -> Result where V: serde::de::Visitor<'de> { todo!() } + fn deserialize_char(self, _: V) -> Result where V: serde::de::Visitor<'de> { todo!() } + fn deserialize_str(self, _: V) -> Result where V: serde::de::Visitor<'de> { todo!() } + fn deserialize_string(self, _: V) -> Result where V: serde::de::Visitor<'de> { todo!() } + fn deserialize_bytes(self, _: V) -> Result where V: serde::de::Visitor<'de> { todo!() } + fn deserialize_byte_buf(self, _: V) -> Result where V: serde::de::Visitor<'de> { todo!() } + fn deserialize_option(self, _: V) -> Result where V: serde::de::Visitor<'de> { todo!() } + fn deserialize_unit(self, _: V) -> Result where V: serde::de::Visitor<'de> { todo!() } + fn deserialize_unit_struct(self, _: &'static str, _: V) -> Result where V: serde::de::Visitor<'de> { todo!() } + fn deserialize_newtype_struct(self, _: &'static str, _: V) -> Result where V: serde::de::Visitor<'de> { todo!() } + fn deserialize_seq(self, visitor: V) -> Result + where + V: serde::de::Visitor<'de>, + { + visitor.visit_seq(self) + } + fn deserialize_tuple(self, _: usize, _: V) -> Result where V: serde::de::Visitor<'de> { todo!() } + fn deserialize_tuple_struct(self, _: &'static str, _: usize, _: V) -> Result where V: serde::de::Visitor<'de> { todo!() } + fn deserialize_map(self, _: V) -> Result where V: serde::de::Visitor<'de> { todo!() } + fn deserialize_struct( + self, + name: &'static str, + fields: &'static [&'static str], + visitor: V, + ) -> Result + where + V: serde::de::Visitor<'de>, + { + todo!() + } + fn deserialize_enum(self, _: &'static str, _: &'static [&'static str], _: V) -> Result where V: serde::de::Visitor<'de> { todo!() } + fn deserialize_identifier(self, _: V) -> Result where V: serde::de::Visitor<'de> { todo!() } + fn deserialize_ignored_any(self, _: V) -> Result where V: serde::de::Visitor<'de> { todo!() } +} + +//#[cfg(test)] +//mod tests { +// use serde::Deserialize; +// use crate::vm::MAX_CALLS; +// +// /// Mock struct for repo options. +// #[derive(Deserialize)] +// #[derive(Debug, PartialEq, Eq)] +// struct Options { +// active: bool, +// federate: Option, +// pinned: Option, +// } +// +// /// Mock struct for a project branch. +// #[derive(Deserialize)] +// #[derive(Debug, PartialEq, Eq)] +// struct ProjectBranch { +// /// The project commit. +// commit: String, +// /// The URL to the repo. +// url: String, +// /// The relevant branch. +// branch: String, +// /// Branch options. +// options: Options, +// } +// +// fn get_real_data() -> crate::vm::Pack<'static, 'static> { +// use indexmap::indexmap; +// use crate::vm::Pack; +// use crate::vm::SerdeObject; +// use crate::vm::SerdeObject::*; +// +// fn mkstr<'a>(s: &'a str) -> SerdeObject<'a> { +// SerdeObject::Str(s.into()) +// } +// +// Pack { +// subpacks: vec![ +// indexmap!{ +// "commit" => ( +// Pack { +// subpacks: vec![ +// indexmap!{ +// "url" => ( +// Pack { +// subpacks: vec![ +// indexmap!{ +// "branch" => ( +// Pack { +// subpacks: vec![ +// indexmap!{ +// "active" => ( +// Pack { +// subpacks: vec![].into(), +// }, +// Bool( +// true, +// ), +// ), +// }, +// ].into(), +// }, +// mkstr( +// "HEAD", +// ), +// ), +// }, +// ].into(), +// }, +// mkstr( +// "https://github.com/ganarchy/GAnarchy", +// ), +// ), +// }, +// indexmap!{ +// "url" => ( +// Pack { +// subpacks: vec![ +// indexmap!{ +// "branch" => ( +// Pack { +// subpacks: vec![ +// indexmap!{ +// "pinned" => ( +// Pack { +// subpacks: vec![].into(), +// }, +// Bool( +// true, +// ), +// ), +// "active" => ( +// Pack { +// subpacks: vec![].into(), +// }, +// Bool( +// true, +// ), +// ), +// }, +// ].into(), +// }, +// mkstr( +// "HEAD", +// ), +// ), +// }, +// ].into(), +// }, +// mkstr( +// "https://soniex2.autistic.space/git-repos/ganarchy.git", +// ), +// ), +// }, +// ].into(), +// }, +// mkstr( +// "385e734a52e13949a7a5c71827f6de920dbfea43", +// ), +// ), +// }, +// indexmap!{ +// "commit" => ( +// Pack { +// subpacks: vec![ +// indexmap!{ +// "url" => ( +// Pack { +// subpacks: vec![ +// indexmap!{ +// "branch" => ( +// Pack { +// subpacks: vec![ +// indexmap!{ +// "pinned" => ( +// Pack { +// subpacks: vec![].into(), +// }, +// Bool( +// true, +// ), +// ), +// "active" => ( +// Pack { +// subpacks: vec![].into(), +// }, +// Bool( +// true, +// ), +// ), +// }, +// ].into(), +// }, +// mkstr( +// "HEAD", +// ), +// ), +// }, +// ].into(), +// }, +// mkstr( +// "https://soniex2.autistic.space/git-repos/mmorfc.git", +// ), +// ), +// }, +// ].into(), +// }, +// mkstr( +// "a8fb5087f79eafe312db270082c052c427b208c2", +// ), +// ), +// }, +// indexmap!{ +// "commit" => ( +// Pack { +// subpacks: vec![ +// indexmap!{ +// "url" => ( +// Pack { +// subpacks: vec![ +// indexmap!{ +// "branch" => ( +// Pack { +// subpacks: vec![ +// indexmap!{ +// "pinned" => ( +// Pack { +// subpacks: vec![].into(), +// }, +// Bool( +// true, +// ), +// ), +// "active" => ( +// Pack { +// subpacks: vec![].into(), +// }, +// Bool( +// true, +// ), +// ), +// }, +// ].into(), +// }, +// mkstr( +// "HEAD", +// ), +// ), +// }, +// ].into(), +// }, +// mkstr( +// "https://soniex2.autistic.space/git-repos/chewstuff.git", +// ), +// ), +// }, +// ].into(), +// }, +// mkstr( +// "2d0b363fe3179087de59d9ef4a2d14af21d89071", +// ), +// ), +// }, +// ].into(), +// } +// } +// +// #[test] +// fn to_vec_of_struct() { +// let der = super::Unpacker::new(get_real_data(), MAX_CALLS); +// let res: Vec = Deserialize::deserialize(der).unwrap(); +// assert_eq!(res, [ +// ProjectBranch { +// commit: "385e734a52e13949a7a5c71827f6de920dbfea43".into(), +// url: "https://github.com/ganarchy/GAnarchy".into(), +// branch: "HEAD".into(), +// options: Options { +// active: true, +// federate: None, +// pinned: None, +// }, +// }, +// ProjectBranch { +// commit: "385e734a52e13949a7a5c71827f6de920dbfea43".into(), +// url: "https://soniex2.autistic.space/git-repos/ganarchy.git".into(), +// branch: "HEAD".into(), +// options: Options { +// active: true, +// federate: None, +// pinned: Some(true), +// }, +// }, +// ProjectBranch { +// commit: "a8fb5087f79eafe312db270082c052c427b208c2".into(), +// url: "https://soniex2.autistic.space/git-repos/mmorfc.git".into(), +// branch: "HEAD".into(), +// options: Options { +// active: true, +// federate: None, +// pinned: Some(true), +// }, +// }, +// ProjectBranch { +// commit: "2d0b363fe3179087de59d9ef4a2d14af21d89071".into(), +// url: "https://soniex2.autistic.space/git-repos/chewstuff.git".into(), +// branch: "HEAD".into(), +// options: Options { +// active: true, +// federate: None, +// pinned: Some(true), +// }, +// }, +// ]); +// } +//} diff --git a/src/vm/mod.rs b/src/vm/mod.rs index 21fec73..eb9a2a3 100644 --- a/src/vm/mod.rs +++ b/src/vm/mod.rs @@ -9,9 +9,11 @@ use std::borrow::Cow; use std::cell::Cell; use std::cell::RefCell; use std::collections::BTreeMap; +use std::collections::VecDeque; use std::marker::PhantomData; use indexmap::IndexMap; +use indexmap::IndexSet; use regex::Regex; use serde::Serialize; @@ -37,7 +39,7 @@ pub(crate) struct PatternConstants { pub(crate) protos: Vec>, // Note that we can borrow these when creating the output map. // https://play.rust-lang.org/?version=stable&mode=debug&edition=2018&gist=da26f9175e96273fa0b94971a4e6172f - pub(crate) strings: Vec, + pub(crate) strings: IndexSet, pub(crate) regices: Vec, pub(crate) predicates: Vec>, pub(crate) defs: Vec, @@ -348,7 +350,6 @@ where fn into_deserializer(self) -> Self::Deserializer { Self::Deserializer { obj: self, - value: None, _e: PhantomData, } } @@ -357,15 +358,25 @@ where /// Packed serde objects and datafu internal representation. /// /// This is an iterative store of key-value pairs. +/// +/// It's effectively a tree node. #[derive(Clone, Debug, Default)] pub struct Pack<'pat, 'de> { - subpacks: Vec, SerdeObject<'de>)>>, + subpacks: VecDeque<(IndexMap<&'pat str, SerdeObject<'de>>, Pack<'pat, 'de>)>, } impl<'pat, 'de> Pack<'pat, 'de> { - /// Merges two packs, with elements from `other` coming after `self`, as if - /// parts of the same iteration. - fn merge_breadth(&mut self, mut other: Self) { + /// Adds the elements of the `other` pack to the matching iterarions of the + /// current pack. If either pack is empty, pick the non-empty pack. + /// + /// The current pack will have the same number of iterations, but will + /// contain captures from both packs. In case of captures of the same name, + /// `other` will override `self`. + /// + /// # Panics + /// + /// Panics if the packs have different iteration lengths. + fn zip(&mut self, mut other: Self) { match (self.subpacks.len(), other.subpacks.len()) { (0, _) => { *self = other; @@ -373,26 +384,82 @@ impl<'pat, 'de> Pack<'pat, 'de> { (_, 0) => {}, (a, b) if a == b => { for (l, r) in self.subpacks.iter_mut().zip(other.subpacks) { - l.extend(r) + // note that we can't actually recurse deeper than the VM + // actually does itself, so we don't need to worry about + // blowing the stack. + l.0.extend(r.0); + l.1.zip(r.1); } }, - _ => unreachable!("merge_breadth unbalanced iterations"), + _ => unreachable!("zip unbalanced iterations"), } } - /// Merges two packs, with elements from `other` coming inside the last - /// element of `self` recursively, if any. - fn merge_depth(&mut self, mut other: Self) { - // note that we can't actually recurse deeper than the VM - // actually does itself, so we don't need to worry about - // blowing the stack. - if let Some(into) = self.subpacks.iter_mut().rev().filter(|map| { - !map.is_empty() - }).next() { - into.last_mut().unwrap().1.0.merge_depth(other); - } else { - *self = other; + /// Adds the elements of the `other` pack to all iterations captured by + /// this pack, such as to form a cartesian product. If either pack is + /// empty, pick the non-empty pack. If both packs have a length of 1, merge + /// their captures. + /// + /// The current pack will contain captures from both packs. In case of + /// captures of the same name, `other` will override `self`. + fn cartesian_product(&mut self, mut other: Self) { + match (self.subpacks.len(), other.subpacks.len()) { + (_, 0) => { + return; + }, + (0, _) => { + *self = other; + return; + }, + (1, 1) => { + let (robjects, rpack) = other.subpacks.pop_back().unwrap(); + let (ref mut lobjects, ref mut lpack) = self.subpacks[0]; + lobjects.extend(robjects); + lpack.cartesian_product(rpack); + return; + }, + (1, _) => { + self.subpacks[0].1.cartesian_product(other); + return; + }, + (_, 1) => { + // FIXME: need to be careful with this one. + // we want `other` to override entries from `self`, so we need + // to scan `self` for captures of the same name as those in + // `other`, and remove those captures. only then can we swap + // `self` and `other` and merge them. + // for now we can just do the inefficient thing tho. + }, + _ => {}, } + // FIXME instead of doing this, perhaps we should find the smaller one, + // and put clones of the larger one into it? + self.subpacks.iter_mut().for_each(|&mut (_, ref mut lpack)| { + lpack.cartesian_product(other.clone()) + }); + } + + /// Returns the serde object with the given name at the given iteration of + /// this pack. + #[cfg(test)] + fn get_object_at( + &self, + iter: usize, + name: &str, + ) -> Option<&SerdeObject<'de>> { + self.subpacks.get(iter).map(|x| &x.0).and_then(|x| x.get(name)) + } + + /// Returns the subpack related to the given name at the given iteration of + /// this pack. + #[cfg(test)] + fn get_subpack_at( + &self, + iter: usize, + name: &str, + ) -> Option<&Pack<'pat, 'de>> { + let _ = name; + self.subpacks.get(iter).map(|x| &x.1) } } diff --git a/tests/basic_match.rs b/tests/basic_match.rs index a1c62bb..b58457e 100644 --- a/tests/basic_match.rs +++ b/tests/basic_match.rs @@ -16,14 +16,47 @@ fn test_basic() { yk: String, y: usize, } - // should this error about needing multiple results/Vec requirement? - let matches: Values = pat.deserialize(&mut der).unwrap(); + let matches: Values = pat.deserialize(&mut der).unwrap().collect().unwrap(); assert_eq!(matches.x, "bar"); assert_eq!(matches.yk, "baz"); assert_eq!(matches.y, 2); assert!(der.end().is_ok()); } +#[test] +fn test_real_use_case() { + let pat = datafu::PatternBuilder::for_pattern( + " + :map + ->['projects'?]:map + ->[commit:?str]:?map + ->[url:?str]:?map + ->[branch:?str]:?map + (->['active'?]active:?bool)? + (->['federate'?]?federate:?bool)? + (->['pinned'?]?pinned:?bool)? + " + ).compile().unwrap(); + let data = r#" + {"base_url": "https://ganarchy.autistic.space", "repo_list_srcs": {"https://ganarchy.autistic.space/index.toml": {"active": false}}, "projects": {"385e734a52e13949a7a5c71827f6de920dbfea43": {"https://github.com/ganarchy/GAnarchy": {"HEAD": {"active": true}}, "https://soniex2.autistic.space/git-repos/ganarchy.git": {"HEAD": {"active": true, "pinned": true}}}, "a8fb5087f79eafe312db270082c052c427b208c2": {"https://soniex2.autistic.space/git-repos/mmorfc.git": {"HEAD": {"active": true, "pinned": true}}}, "2d0b363fe3179087de59d9ef4a2d14af21d89071": {"https://soniex2.autistic.space/git-repos/chewstuff.git": {"HEAD": {"active": true, "pinned": true}}}}} + "#; + let mut der = JsonDer::from_str(data); + #[derive(Deserialize)] + struct Values { + commit: String, + url: String, + branch: String, + active: bool, + #[serde(default)] + federate: bool, + #[serde(default)] + pinned: bool, + } + let graph = pat.deserialize(&mut der).unwrap(); + let matches: Vec = graph.collect().unwrap(); + assert!(der.end().is_ok()); +} + //#[test] //fn test_str() { // //let tree = Value::M(vec![ -- cgit 1.4.1