diff options
author | SoniEx2 <endermoneymod@gmail.com> | 2023-04-08 18:52:00 -0300 |
---|---|---|
committer | SoniEx2 <endermoneymod@gmail.com> | 2023-04-08 18:52:00 -0300 |
commit | d849f5e301fa47cfd87df1e7f1ad0346ddf387f1 (patch) | |
tree | a2480d05b753d94a6a8afee9832a902edf02d266 | |
parent | 6dd30531ac62f6a3a564b7341d43f6cd71b90794 (diff) |
Initial success
-rw-r--r-- | ROADMAP.md | 10 | ||||
-rw-r--r-- | src/errors.rs | 34 | ||||
-rw-r--r-- | src/graph.rs | 30 | ||||
-rw-r--r-- | src/lib.rs | 1 | ||||
-rw-r--r-- | src/parser.rs | 4 | ||||
-rw-r--r-- | src/pattern.rs | 12 | ||||
-rw-r--r-- | src/vm/de/mod.rs (renamed from src/vm/de.rs) | 687 | ||||
-rw-r--r-- | src/vm/de/unpacker.rs | 518 | ||||
-rw-r--r-- | src/vm/mod.rs | 107 | ||||
-rw-r--r-- | tests/basic_match.rs | 37 |
10 files changed, 1272 insertions, 168 deletions
diff --git a/ROADMAP.md b/ROADMAP.md new file mode 100644 index 0000000..96f267b --- /dev/null +++ b/ROADMAP.md @@ -0,0 +1,10 @@ +# Roadmap + +This is basically a big ol todo list. Unlike real roadmaps, we don't have dates +here. + +# Necessary + +These are features which correspond to core language semantics: + +- Grouping. Currently we don't support grouping at all. diff --git a/src/errors.rs b/src/errors.rs index 914b70a..ea1e60d 100644 --- a/src/errors.rs +++ b/src/errors.rs @@ -56,3 +56,37 @@ pub enum MatchError { /// requirements. Unsatisfiable, } + +#[derive(Debug)] +#[non_exhaustive] +pub enum QueryError { + /// Returned if the deserialization recurses too deeply. + StackOverflow, + /// Returned if there's nothing to deserialize. + Empty, + /// The query is unsatisfiable. This happens if e.g. there are multiple + /// values in the query but only one value can fit into the request. + Unsatisfiable, + /// Wrapped Serde error. + Serde(serde::de::value::Error), +} + +impl std::fmt::Display for QueryError { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + Self::StackOverflow => write!(f, "stack overflow"), + Self::Empty => write!(f, "no results"), + Self::Unsatisfiable => write!(f, "unsatisfiable"), + Self::Serde(e) => e.fmt(f), + } + } +} + +impl std::error::Error for QueryError { +} + +impl serde::de::Error for QueryError { + fn custom<T>(msg: T) -> Self where T: std::fmt::Display { + Self::Serde(serde::de::value::Error::custom(msg)) + } +} diff --git a/src/graph.rs b/src/graph.rs new file mode 100644 index 0000000..47d6523 --- /dev/null +++ b/src/graph.rs @@ -0,0 +1,30 @@ +// Copyright (C) 2022 Soni L. +// SPDX-License-Identifier: MIT OR Apache-2.0 + +//! The results produced by a matched pattern. + +use serde::de::Deserialize; + +use crate::vm::MAX_CALLS; +use crate::vm::Pack; +use crate::vm::Unpacker; +use crate::errors::QueryError; + +// TODO in the future, we may want to store &'pat IndexSet<String> either here +// or in the Pack. +#[derive(Debug)] +pub struct Graph<'pat, 'de>(pub(crate) Option<Pack<'pat, 'de>>); + +impl<'pat, 'de> Graph<'pat, 'de> { + /// Collect this graph into a given form. + pub fn collect<De: Deserialize<'de>>(self) -> Result<De, QueryError> { + let Self(inner) = self; + match inner { + None => Err(QueryError::Empty), + Some(pack) => { + let mut unpacker = Unpacker::new(pack, MAX_CALLS); + De::deserialize(unpacker) + }, + } + } +} diff --git a/src/lib.rs b/src/lib.rs index 824d54e..897618b 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -110,6 +110,7 @@ //! ``` pub mod errors; +mod graph; //pub mod type_tree; mod parser; mod pattern; diff --git a/src/parser.rs b/src/parser.rs index eb378ad..744ab0f 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -354,7 +354,7 @@ where let self_ = &mut *self; let id = id.unwrap_or_else(move || { string.shrink_to_fit(); - self_.consts.strings.push(string); + self_.consts.strings.insert(string); self_.consts.strings.len() - 1 }); self.tokens.push(PatternToken::String(id, skippable)); @@ -499,7 +499,7 @@ where // no processing of `name` is required for this. let id = self.consts.strings.iter().position(|c| c == name); let id = id.unwrap_or_else(|| { - self.consts.strings.push(name.into()); + self.consts.strings.insert(name.into()); self.consts.strings.len() - 1 }); self.tokens.push(PatternToken::Identifier(id)); diff --git a/src/pattern.rs b/src/pattern.rs index 6286ff9..38cdcda 100644 --- a/src/pattern.rs +++ b/src/pattern.rs @@ -13,6 +13,7 @@ use serde::ser::Serialize; use crate::Predicate; use crate::errors::PatternError; +use crate::graph::Graph; use crate::parser::parse; use crate::vm; use crate::vm::PatternConstants; @@ -36,10 +37,12 @@ pub struct Pattern<O: Serialize> { impl<O: Serialize> Pattern<O> { /// Matches the pattern against an input. - pub fn deserialize<'de, Der, De>(&self, der: Der) -> Result<De, Der::Error> + pub fn deserialize<'de, Der>( + &self, + der: Der, + ) -> Result<Graph<'_, 'de>, Der::Error> where Der: Deserializer<'de>, - De: Deserialize<'de>, { let mut err = Default::default(); let mut frames = Default::default(); @@ -57,9 +60,8 @@ impl<O: Serialize> Pattern<O> { // this should always be None debug_assert!(obj.is_none()); debug_assert!(packs.len() <= 1); - let pack = packs.pop().unwrap_or_else(Default::default); - let de = De::deserialize(vm::Unpacker::new(pack, MAX_CALLS)); - todo!() + let pack = packs.pop(); + Ok(Graph(pack)) } } diff --git a/src/vm/de.rs b/src/vm/de/mod.rs index 8f2aa8d..3cba18a 100644 --- a/src/vm/de.rs +++ b/src/vm/de/mod.rs @@ -14,7 +14,6 @@ use serde::de::IntoDeserializer as _; use smallvec::SmallVec; - use super::Frame; use super::Interpreter; use super::Pack; @@ -24,6 +23,11 @@ use super::SerdeObject; use super::Type; use super::Value; use crate::errors::MatchError; +use crate::errors::QueryError; + +mod unpacker; + +pub use unpacker::Unpacker; /// A `DeserializeSeed` for Datafu input. /// @@ -259,7 +263,7 @@ impl<'pat, 'state, 'de, O: Serialize> Packer<'pat, 'state, O> { target_frame.matches = true; pack_index += 1; } else { - packs[target_pack].merge_breadth(pack); + packs[target_pack].zip(pack); } } else { //if frame.poison { @@ -446,8 +450,8 @@ macro_rules! vs { let mut pack = Pack::default(); if let Some(name) = frame.get_name(pat) { let mut map = IndexMap::new(); - map.insert(name, (Pack::default(), SerdeObject::$obj$v)); - pack.subpacks.push(map); + map.insert(name, SerdeObject::$obj$v); + pack.subpacks.push_back((map, Pack::default())); } packs.push(pack); Ok(()) @@ -551,14 +555,8 @@ where let mut pack = Pack::default(); if let Some(name) = frame.get_name(pat) { let mut map = IndexMap::new(); - map.insert( - name, - ( - Pack::default(), - SerdeObject::Str(Cow::Owned(v.into())), - ), - ); - pack.subpacks.push(map); + map.insert(name, SerdeObject::Str(Cow::Owned(v.into()))); + pack.subpacks.push_back((map, Pack::default())); } packs.push(pack); Ok(()) @@ -634,14 +632,8 @@ where let mut pack = Pack::default(); if let Some(name) = frame.get_name(pat) { let mut map = IndexMap::new(); - map.insert( - name, - ( - Pack::default(), - SerdeObject::Str(Cow::Borrowed(v)), - ), - ); - pack.subpacks.push(map); + map.insert(name, SerdeObject::Str(Cow::Borrowed(v))); + pack.subpacks.push_back((map, Pack::default())); } packs.push(pack); Ok(()) @@ -709,15 +701,211 @@ where { todo!() } - fn visit_seq<A>(self, seq: A) -> Result<Self::Value, A::Error> + fn visit_seq<A>(self, mut seq: A) -> Result<Self::Value, A::Error> where A: serde::de::SeqAccess<'de>, { - let mut obj = None; - if self.collecting { - obj = Some(SerdeObject::Seq(Vec::new())); + let old_collecting = self.collecting; + let pat = self.interp.pat; + let mut collecting = old_collecting; + let typeck = self.frames_mut().iter_active_mut().try_for_each(|frame| { + let ty = frame.get_type(); + match ty { + | Some((Type::Seq, _)) + | Some((Type::Any, _)) + | Some((Type::IgnoredAny, _)) + | None + => {}, + Some((_, false)) => { + frame.matches = false; + return Ok(()); + }, + Some((_, true)) => { + return Err(MatchError::ValidationError) + }, + } + if frame.get_name(pat).is_some() { + collecting = true; + } + Ok(()) + }); + match typeck { + Err(e) => { + self.interp.error.insert(e); + return Err(A::Error::custom("type mismatch")); + }, + _ => (), } - todo!() + if let Err(e) = self.step_in() { return Err(e); } + self.collecting = collecting; + let mut subframes = Vec::new(); + let mut output_matches = Vec::new(); + self.frames().iter_active().for_each(|frame| { + if let Some((key_subtree, _)) = frame.key_subtree() { + subframes.push(Frame { + ops: &pat.protos[key_subtree], + iar: None, + overstep: 0, + matches: true, + poison: false, + }); + } + output_matches.push(false); + }); + let mut obj_inner = Vec::new(); + let mut output_packs = Vec::new(); + let mut iter = 0..; + while let packed_key = { + let subinterp = Interpreter { + pat: pat, + frames: &mut subframes, + error: self.interp.error, + }; + let mut subpacker = Packer { + interp: subinterp, + collecting: self.collecting, + call_limit: self.call_limit, + }; + if subpacker.interp.frames.is_empty() { + // avoid overflow + serde::de::DeserializeSeed::deserialize(&mut subpacker, { + serde::de::value::U64Deserializer::new(0) + })? + } else { + serde::de::DeserializeSeed::deserialize(&mut subpacker, { + serde::de::value::U64Deserializer::new(iter.next().unwrap()) + })? + } + } { + self.frames_mut().iter_active_mut().filter(|frame| { + frame.key_subtree().is_some() + }).zip(&mut subframes).for_each(|(frame, subframe)| { + frame.matches = subframe.matches; + // reset subframe for next iteration + // NOTE wait to reset subframe.matches when merging packs!!! + subframe.iar = None; + }); + self.frames_mut().iter_active_mut().for_each(|frame| { + // mark every non-subtree key as matching. + if frame.key_subtree().is_none() { + frame.matches = true; + } + }); + let Some(packed_value) = seq.next_element_seed(&mut *self)? else { + break; + }; + if self.collecting { + obj_inner.push(packed_value.1.unwrap()); + } + let mut key_packs_per_frame = packed_key.0.into_iter(); + let mut value_packs_per_frame = packed_value.0.into_iter(); + // whatever is active in self.frames(), if matches, has a pack + // whatever is in subframes, if matches, has a pack + // count(active self.frames() with subtree which match) is always + // smaller than count(subframes which match) because the former + // gets updated by next_value_seed + // count(active self.frames() with subtree) == count(subframes) + // tl;dr: need to figure out which packs produced by subframes line + // up with which packs produced by self, discarding extra subframes + // (where the corresponding self frame doesn't match) and accepting + // extra packs produced by self. + // NOTE: key_packs_per_frame ~ subframes + // value_packs_per_frame ~ self + // keys come first tho (key.merge_from(value)) + let mut iter_subframes = subframes.iter_mut(); + // related to output_packs + let mut pack_index = 0; + for (frame, out_matches) in self.frames().iter_active().zip({ + &mut output_matches + }) { + // check if this frame has an associated subframe + let subframe = if frame.key_subtree().is_some() { + // if there are more frames with associated subframes + // than there are subframes, panic + Some(iter_subframes.next().unwrap()) + } else { + None + }; + let mut new_pack = None; + if frame.matches && subframe.is_some() { + // this already implies subframe.matches + let mut key_pack = key_packs_per_frame.next().unwrap(); + let value_pack = value_packs_per_frame.next().unwrap(); + key_pack.cartesian_product(value_pack); + new_pack = Some(key_pack); + } else if frame.matches { + // value matches but there's no subframe, carry on + let value_pack = value_packs_per_frame.next().unwrap(); + new_pack = Some(value_pack); + } else if !frame.matches && subframe.is_some() { + // frame didn't match but there was a subframe + let subframe = subframe.unwrap(); + if subframe.matches { + // subframe matched, remove key pack + let _ = key_packs_per_frame.next().unwrap(); + } else { + // neither matched, no relevant packs + // do reset subframe for next_key_seed tho! + subframe.matches = true; + } + } else { + // no relevant packs + } + if let Some(new_pack) = new_pack { + if !*out_matches { + *out_matches = true; + output_packs.insert(pack_index, Pack::default()); + } + let output_pack = &mut output_packs[pack_index]; + output_pack.subpacks.extend(new_pack.subpacks); + } + if *out_matches { + pack_index += 1; + } + } + } + let mut poison = false; + for (f, m) in self.frames_mut().iter_active_mut().zip(output_matches) { + f.matches = m; + if !m { + if let Some((_, false)) = f.key_subtree() { + poison = true; + } + } + } + let obj = SerdeObject::Seq(obj_inner); + let mut final_packs = self.step_out(output_packs)?; + let mut iter_final_packs = 0..; + self.frames_mut().iter_active_mut().for_each(|frame| { + let ty = frame.get_type(); + match ty { + | Some((Type::Seq, _)) + | Some((Type::Any, _)) + | Some((Type::IgnoredAny, _)) + | None + => { + frame.poison = poison; + let matched = std::mem::replace(&mut frame.matches, true); + if !matched { + final_packs.insert( + iter_final_packs.start, + Pack::default(), + ); + } + }, + _ => return, + } + let pack = &mut final_packs[iter_final_packs.next().unwrap()]; + if let Some(name) = frame.get_name(pat) { + // we can assume collecting == true + let old_pack = std::mem::take(pack); + let mut map = IndexMap::new(); + map.insert(name, obj.clone()); + pack.subpacks.push_back((map, old_pack)); + } + }); + self.collecting = old_collecting; + Ok((final_packs, collecting.then(|| obj))) } fn visit_map<A>(self, mut map: A) -> Result<Self::Value, A::Error> where @@ -839,7 +1027,7 @@ where // this already implies subframe.matches let mut key_pack = key_packs_per_frame.next().unwrap(); let value_pack = value_packs_per_frame.next().unwrap(); - key_pack.merge_depth(value_pack); + key_pack.cartesian_product(value_pack); new_pack = Some(key_pack); } else if frame.matches { // value matches but there's no subframe, carry on @@ -908,8 +1096,8 @@ where // we can assume collecting == true let old_pack = std::mem::take(pack); let mut map = IndexMap::new(); - map.insert(name, (old_pack, obj.clone())); - pack.subpacks.push(map); + map.insert(name, obj.clone()); + pack.subpacks.push_back((map, old_pack)); } }); self.collecting = old_collecting; @@ -930,73 +1118,75 @@ where } } -/// A `Deserializer` for Datafu output. -/// -/// This converts from Datafu's internal representation (a "pack") into the -/// desired output type. -pub struct Unpacker<'pat, 'de> { - pack: Pack<'pat, 'de>, - call_limit: usize, +/// Deserializes a SerdeObject +pub(crate) struct SerdeObjectDeserializer<'de, E> { + pub(crate) obj: SerdeObject<'de>, + pub(crate) _e: PhantomData<fn() -> E>, } -impl<'pat, 'de> Unpacker<'pat, 'de> { - /// Unpacks a Datafu "pack". - pub fn new(pack: Pack<'pat, 'de>, call_limit: usize) -> Self { - Self { - pack, call_limit, - } - } +/// Deserializes a SerdeObject::Seq +struct SerdeObjectSeq<'de, I: Iterator<Item=SerdeObject<'de>>, E> { + iter: I, + _e: PhantomData<fn() -> E>, } -impl<'pat, 'de> serde::Deserializer<'de> for Unpacker<'pat, 'de> { - // TODO datafu errors - type Error = serde::de::value::Error; - fn deserialize_any<V>(self, _: V) -> Result<V::Value, Self::Error> where V: serde::de::Visitor<'de> { todo!() } - fn deserialize_bool<V>(self, _: V) -> Result<V::Value, Self::Error> where V: serde::de::Visitor<'de> { todo!() } - fn deserialize_i8<V>(self, _: V) -> Result<V::Value, Self::Error> where V: serde::de::Visitor<'de> { todo!() } - fn deserialize_i16<V>(self, _: V) -> Result<V::Value, Self::Error> where V: serde::de::Visitor<'de> { todo!() } - fn deserialize_i32<V>(self, _: V) -> Result<V::Value, Self::Error> where V: serde::de::Visitor<'de> { todo!() } - fn deserialize_i64<V>(self, _: V) -> Result<V::Value, Self::Error> where V: serde::de::Visitor<'de> { todo!() } - fn deserialize_u8<V>(self, _: V) -> Result<V::Value, Self::Error> where V: serde::de::Visitor<'de> { todo!() } - fn deserialize_u16<V>(self, _: V) -> Result<V::Value, Self::Error> where V: serde::de::Visitor<'de> { todo!() } - fn deserialize_u32<V>(self, _: V) -> Result<V::Value, Self::Error> where V: serde::de::Visitor<'de> { todo!() } - fn deserialize_u64<V>(self, _: V) -> Result<V::Value, Self::Error> where V: serde::de::Visitor<'de> { todo!() } - fn deserialize_f32<V>(self, _: V) -> Result<V::Value, Self::Error> where V: serde::de::Visitor<'de> { todo!() } - fn deserialize_f64<V>(self, _: V) -> Result<V::Value, Self::Error> where V: serde::de::Visitor<'de> { todo!() } - fn deserialize_char<V>(self, _: V) -> Result<V::Value, Self::Error> where V: serde::de::Visitor<'de> { todo!() } - fn deserialize_str<V>(self, _: V) -> Result<V::Value, Self::Error> where V: serde::de::Visitor<'de> { todo!() } - fn deserialize_string<V>(self, _: V) -> Result<V::Value, Self::Error> where V: serde::de::Visitor<'de> { todo!() } - fn deserialize_bytes<V>(self, _: V) -> Result<V::Value, Self::Error> where V: serde::de::Visitor<'de> { todo!() } - fn deserialize_byte_buf<V>(self, _: V) -> Result<V::Value, Self::Error> where V: serde::de::Visitor<'de> { todo!() } - fn deserialize_option<V>(self, _: V) -> Result<V::Value, Self::Error> where V: serde::de::Visitor<'de> { todo!() } - fn deserialize_unit<V>(self, _: V) -> Result<V::Value, Self::Error> where V: serde::de::Visitor<'de> { todo!() } - fn deserialize_unit_struct<V>(self, _: &'static str, _: V) -> Result<V::Value, Self::Error> where V: serde::de::Visitor<'de> { todo!() } - fn deserialize_newtype_struct<V>(self, _: &'static str, _: V) -> Result<V::Value, Self::Error> where V: serde::de::Visitor<'de> { todo!() } - fn deserialize_seq<V>(self, _: V) -> Result<V::Value, Self::Error> where V: serde::de::Visitor<'de> { todo!() } - fn deserialize_tuple<V>(self, _: usize, _: V) -> Result<V::Value, Self::Error> where V: serde::de::Visitor<'de> { todo!() } - fn deserialize_tuple_struct<V>(self, _: &'static str, _: usize, _: V) -> Result<V::Value, Self::Error> where V: serde::de::Visitor<'de> { todo!() } - fn deserialize_map<V>(self, _: V) -> Result<V::Value, Self::Error> where V: serde::de::Visitor<'de> { todo!() } - fn deserialize_struct<V>( +impl<'de, E> serde::de::VariantAccess<'de> for SerdeObjectDeserializer<'de, E> +where + E: serde::de::Error, +{ + type Error = E; + + fn unit_variant(self) -> Result<(), E> { + todo!() + } + fn newtype_variant_seed<T>(self, seed: T) -> Result<T::Value, E> + where + T: serde::de::DeserializeSeed<'de>, + { + todo!() + } + fn tuple_variant<V>(self, len: usize, visitor: V) -> Result<V::Value, E> + where + V: serde::de::Visitor<'de>, + { + todo!() + } + fn struct_variant<V>( self, - _: &'static str, fields: &'static [&'static str], visitor: V, - ) -> Result<V::Value, Self::Error> + ) -> Result<V::Value, E> where V: serde::de::Visitor<'de>, { todo!() } - fn deserialize_enum<V>(self, _: &'static str, _: &'static [&'static str], _: V) -> Result<V::Value, Self::Error> where V: serde::de::Visitor<'de> { todo!() } - fn deserialize_identifier<V>(self, _: V) -> Result<V::Value, Self::Error> where V: serde::de::Visitor<'de> { todo!() } - fn deserialize_ignored_any<V>(self, _: V) -> Result<V::Value, Self::Error> where V: serde::de::Visitor<'de> { todo!() } } -/// Deserializes a SerdeObject -pub(crate) struct SerdeObjectDeserializer<'de, E> { - pub(crate) obj: SerdeObject<'de>, - pub(crate) value: Option<SerdeObject<'de>>, - pub(crate) _e: PhantomData<fn() -> E>, +impl<'de, E> serde::de::EnumAccess<'de> for SerdeObjectDeserializer<'de, E> +where + E: serde::de::Error, +{ + type Error = E; + type Variant = Self; + + fn variant_seed<V>(self, seed: V) -> Result<(V::Value, Self), E> + where + V: serde::de::DeserializeSeed<'de>, + { + match self.obj { + SerdeObject::Enum { variant, data } => { + seed.deserialize(variant.into_deserializer()).map(|it| { + let data = Self { + obj: *data, + _e: PhantomData, + }; + (it, data) + }) + }, + _ => unreachable!(), + } + } } impl<'de, E> serde::de::Deserializer<'de> for SerdeObjectDeserializer<'de, E> @@ -1030,12 +1220,20 @@ where SerdeObject::Some(x) => v.visit_some(x.into_deserializer()), SerdeObject::None => v.visit_none(), SerdeObject::Unit => v.visit_unit(), - SerdeObject::Seq(x) => todo!(), - SerdeObject::Map(x) => todo!(), + SerdeObject::Seq(x) => { + let mut x = serde::de::value::SeqDeserializer::new(x.into_iter()); + let r = v.visit_seq(&mut x); + x.end().and(r) + }, + SerdeObject::Map(x) => { + let mut x = serde::de::value::MapDeserializer::new(x.into_iter()); + let r = v.visit_map(&mut x); + x.end().and(r) + }, SerdeObject::NewtypeStruct(x) => { v.visit_newtype_struct(x.into_deserializer()) }, - SerdeObject::Enum { variant, data } => todo!(), + SerdeObject::Enum { .. } => v.visit_enum(self), } } fn deserialize_ignored_any<V>(self, v: V) -> Result<V::Value, Self::Error> @@ -1110,7 +1308,7 @@ mod tests { fn test_simple_match() { // test matching a simple value let mut consts = PatternConstants::<()>::default(); - consts.strings.push("hello".into()); + consts.strings.insert("hello".into()); consts.protos.push(vec![ PatternElement::Value { name: Some(0), @@ -1127,14 +1325,17 @@ mod tests { let packed = Packer::new(interp, MAX_CALLS).deserialize(&mut der); let (packs, obj) = packed.unwrap(); assert!(obj.is_none()); - assert_eq!(packs[0].subpacks[0]["hello"].1, SerdeObject::U64(3)); + assert_eq!( + packs[0].get_object_at(0, "hello").unwrap(), + &SerdeObject::U64(3), + ); } #[test] fn test_simple_error() { // test a value that doesn't match (serde_json error) let mut consts = PatternConstants::<()>::default(); - consts.strings.push("hello".into()); + consts.strings.insert("hello".into()); consts.protos.push(vec![ PatternElement::Value { name: Some(0), @@ -1157,8 +1358,8 @@ mod tests { fn test_basic_multiframe() { // test multiple frames (matching and non-matching) let mut consts = PatternConstants::<()>::default(); - consts.strings.push("a".into()); - consts.strings.push("b".into()); + consts.strings.insert("a".into()); + consts.strings.insert("b".into()); consts.protos.push(vec![ PatternElement::Value { name: Some(0), @@ -1203,8 +1404,8 @@ mod tests { let (packs, obj) = packed.unwrap(); assert!(obj.is_none()); assert_eq!( - packs[0].subpacks[0]["a"].1, - SerdeObject::U64(10), + packs[0].get_object_at(0, "a").unwrap(), + &SerdeObject::U64(10), ); assert_eq!(packs.len(), 1); assert!(frames[0].matches); @@ -1215,8 +1416,8 @@ mod tests { fn test_map() { // test visit_map let mut consts = PatternConstants::<()>::default(); - consts.strings.push("key".into()); - consts.strings.push("value".into()); + consts.strings.insert("key".into()); + consts.strings.insert("value".into()); consts.protos.push(vec![ PatternElement::Value { name: Some(0), @@ -1251,20 +1452,22 @@ mod tests { let (packs, obj) = packed.unwrap(); assert!(obj.is_none()); assert_eq!( - packs[0].subpacks[0]["key"].1, - SerdeObject::Str("hello".into()), + packs[0].get_object_at(0, "key").unwrap(), + &SerdeObject::Str("hello".into()), ); assert_eq!( - packs[0].subpacks[0]["key"].0.subpacks[0]["value"].1, - SerdeObject::U64(0), + packs[0] + .get_object_at(0, "value").unwrap(), + &SerdeObject::U64(0), ); assert_eq!( - packs[0].subpacks[1]["key"].1, - SerdeObject::Str("world".into()), + packs[0].get_object_at(1, "key").unwrap(), + &SerdeObject::Str("world".into()), ); assert_eq!( - packs[0].subpacks[1]["key"].0.subpacks[0]["value"].1, - SerdeObject::U64(1), + packs[0] + .get_object_at(1, "value").unwrap(), + &SerdeObject::U64(1), ); } @@ -1381,7 +1584,6 @@ mod tests { None, None, ).unwrap(); - dbg!(&consts); let data = r#"{"hello": "world"}"#; let mut der = JsonDeserializer::from_str(data); let mut err = Default::default(); @@ -1440,8 +1642,14 @@ mod tests { assert_eq!(packs.len(), 1); let pack = packs.pop().unwrap(); assert_eq!(pack.subpacks.len(), 1); - assert_eq!(pack.subpacks[0]["name"].1, SerdeObject::Str(From::from("a"))); - assert_eq!(pack.subpacks[0]["value"].1, SerdeObject::U32(1)); + assert_eq!( + pack.get_object_at(0, "name").unwrap(), + &SerdeObject::Str(From::from("a")), + ); + assert_eq!( + pack.get_object_at(0, "value").unwrap(), + &SerdeObject::U32(1), + ); } #[test] @@ -1480,8 +1688,14 @@ mod tests { assert_eq!(packs.len(), 1); let pack = packs.pop().unwrap(); assert_eq!(pack.subpacks.len(), 1); - assert_eq!(pack.subpacks[0]["name"].1, SerdeObject::Str(From::from("a"))); - assert_eq!(pack.subpacks[0]["value"].1, SerdeObject::U32(1)); + assert_eq!( + pack.get_object_at(0, "name").unwrap(), + &SerdeObject::Str(From::from("a")), + ); + assert_eq!( + pack.get_object_at(0, "value").unwrap(), + &SerdeObject::U32(1), + ); } #[test] @@ -1516,7 +1730,10 @@ mod tests { assert_eq!(packs.len(), 1); let pack = packs.pop().unwrap(); assert_eq!(pack.subpacks.len(), 1); - assert_eq!(pack.subpacks[0]["name"].1, SerdeObject::U32(1)); + assert_eq!( + pack.get_object_at(0, "name").unwrap(), + &SerdeObject::U32(1), + ); } #[test] @@ -1627,11 +1844,18 @@ mod tests { assert_eq!(packs.len(), 1); let pack = &packs[0]; assert_eq!(pack.subpacks.len(), 1); - let b = &pack.subpacks[0]["b"]; - assert_eq!(b.1, SerdeObject::Str(From::from("2"))); - assert_eq!(b.0.subpacks.len(), 1); - assert_eq!(b.0.subpacks[0]["x"].1, SerdeObject::Bool(true)); - assert_eq!(b.0.subpacks[0]["y"].1, SerdeObject::Bool(true)); + assert_eq!( + pack.get_object_at(0, "b").unwrap(), + &SerdeObject::Str(From::from("2")), + ); + assert_eq!( + pack.get_object_at(0, "x").unwrap(), + &SerdeObject::Bool(true), + ); + assert_eq!( + pack.get_object_at(0, "y").unwrap(), + &SerdeObject::Bool(true), + ); } #[test] @@ -1665,9 +1889,11 @@ mod tests { assert_eq!(packs.len(), 1); let pack = &packs[0]; assert_eq!(pack.subpacks.len(), 1); - let a = &pack.subpacks[0]["a"]; - assert_eq!(a.1, SerdeObject::Str(From::from("a"))); - assert_eq!(a.0.subpacks.len(), 0); + assert_eq!( + pack.get_object_at(0, "a").unwrap(), + &SerdeObject::Str(From::from("a")), + ); + assert_eq!(pack.get_subpack_at(0, "a").unwrap().subpacks.len(), 0); } #[test] @@ -1702,6 +1928,145 @@ mod tests { } #[test] + fn test_basic_seq() { + // test that sequences work + let consts = crate::parser::parse::<&'static str, &'static str, ()>( + " + :seq + ->[i:u64]v:u64 + ", + None, + None + ).unwrap(); + let data = r#"[10, 11, 12]"#; + let mut der = JsonDeserializer::from_str(data); + let mut err = Default::default(); + let mut frames = Default::default(); + let interp = Interpreter::new( + &consts, + &mut err, + &mut frames, + //&mut output, + ); + let result = Packer::new( + interp, + MAX_CALLS, + ).deserialize(&mut der); + let (mut packs, obj) = result.unwrap(); + assert!(obj.is_none()); + assert_eq!(packs.len(), 1); + let pack = packs.pop().unwrap(); + assert_eq!(pack.subpacks.len(), 3); + assert_eq!( + pack.get_object_at(0, "i").unwrap(), + &SerdeObject::U64(0), + ); + assert_eq!( + pack.get_object_at(0, "v").unwrap(), + &SerdeObject::U64(10), + ); + assert_eq!( + pack.get_object_at(1, "i").unwrap(), + &SerdeObject::U64(1), + ); + assert_eq!( + pack.get_object_at(1, "v").unwrap(), + &SerdeObject::U64(11), + ); + assert_eq!( + pack.get_object_at(2, "i").unwrap(), + &SerdeObject::U64(2), + ); + assert_eq!( + pack.get_object_at(2, "v").unwrap(), + &SerdeObject::U64(12), + ); + } + + #[test] + fn test_list_key() { + let consts = crate::parser::parse::<&'static str, &'static str, ()>( + " + :map + ->[:seq->k:u64]:seq + ->v:str + ", + None, + None + ).unwrap(); + let data = &[ + 0x01, // map length (1) + 0x03, // list length (3) + 0x01, 0x02, 0x03, // [1, 2, 3] + 0x02, // list length (2) + 0x05, // string length (5) + b't', b'r', b'a', b'n', b's', + 0x06, // string length (6) + b'r', b'i', b'g', b'h', b't', b's' + ]; + let mut der = PostcardDeserializer::from_bytes(data); + let mut err = Default::default(); + let mut frames = Default::default(); + let interp = Interpreter::new( + &consts, + &mut err, + &mut frames, + //&mut output, + ); + let result = Packer::new( + interp, + MAX_CALLS, + ).deserialize(&mut der); + let (mut packs, obj) = result.unwrap(); + assert!(obj.is_none()); + assert_eq!(packs.len(), 1); + let pack = packs.pop().unwrap(); + assert_eq!(pack.subpacks.len(), 3); + let subpack = pack.get_subpack_at(0, "k").unwrap(); + assert_eq!(subpack.subpacks.len(), 2); + assert_eq!( + pack.get_object_at(0, "k").unwrap(), + &SerdeObject::U64(1), + ); + assert_eq!( + subpack.get_object_at(0, "v").unwrap(), + &SerdeObject::Str(From::from("trans")), + ); + assert_eq!( + subpack.get_object_at(1, "v").unwrap(), + &SerdeObject::Str(From::from("rights")), + ); + let subpack = pack.get_subpack_at(1, "k").unwrap(); + assert_eq!(subpack.subpacks.len(), 2); + assert_eq!( + pack.get_object_at(1, "k").unwrap(), + &SerdeObject::U64(2), + ); + assert_eq!( + subpack.get_object_at(0, "v").unwrap(), + &SerdeObject::Str(From::from("trans")), + ); + assert_eq!( + subpack.get_object_at(1, "v").unwrap(), + &SerdeObject::Str(From::from("rights")), + ); + let subpack = pack.get_subpack_at(2, "k").unwrap(); + assert_eq!(subpack.subpacks.len(), 2); + assert_eq!( + pack.get_object_at(2, "k").unwrap(), + &SerdeObject::U64(3), + ); + assert_eq!( + subpack.get_object_at(0, "v").unwrap(), + &SerdeObject::Str(From::from("trans")), + ); + assert_eq!( + subpack.get_object_at(1, "v").unwrap(), + &SerdeObject::Str(From::from("rights")), + ); + } + + #[test] fn test_realish_use_case() { // use a parsed pattern that might actually be used in the real world. let consts = crate::parser::parse::<&'static str, &'static str, ()>( @@ -1718,7 +2083,7 @@ mod tests { None ).unwrap(); let data = r#" - {"base_url": "https://ganarchy.autistic.space", "repo_list_srcs": {"https://ganarchy.autistic.space/index.toml": {"active": false}}, "projects": {"a8fb5087f79eafe312db270082c052c427b208c2": {"https://soniex2.autistic.space/git-repos/mmorfc.git": {"HEAD": {"active": true, "pinned": true}}}, "2d0b363fe3179087de59d9ef4a2d14af21d89071": {"https://soniex2.autistic.space/git-repos/chewstuff.git": {"HEAD": {"active": true, "pinned": true}}}}} + {"base_url": "https://ganarchy.autistic.space", "repo_list_srcs": {"https://ganarchy.autistic.space/index.toml": {"active": false}}, "projects": {"385e734a52e13949a7a5c71827f6de920dbfea43": {"https://github.com/ganarchy/GAnarchy": {"HEAD": {"active": true}}, "https://soniex2.autistic.space/git-repos/ganarchy.git": {"HEAD": {"active": true, "pinned": true}}}, "a8fb5087f79eafe312db270082c052c427b208c2": {"https://soniex2.autistic.space/git-repos/mmorfc.git": {"HEAD": {"active": true, "pinned": true}}}, "2d0b363fe3179087de59d9ef4a2d14af21d89071": {"https://soniex2.autistic.space/git-repos/chewstuff.git": {"HEAD": {"active": true, "pinned": true}}}}} "#; let mut der = JsonDeserializer::from_str(data); let mut err = Default::default(); @@ -1737,30 +2102,74 @@ mod tests { assert!(obj.is_none()); assert_eq!(packs.len(), 1); let pack = &packs[0]; - assert_eq!(pack.subpacks.len(), 2); + assert_eq!(pack.subpacks.len(), 3); + + let commit = pack.get_subpack_at(0, "commit").unwrap(); + assert_eq!( + pack.get_object_at(0, "commit").unwrap(), + &SerdeObject::Str(From::from("385e734a52e13949a7a5c71827f6de920dbfea43")), + ); + assert_eq!(commit.subpacks.len(), 2); + + assert_eq!( + commit.get_object_at(0, "url").unwrap(), + &SerdeObject::Str(From::from("https://github.com/ganarchy/GAnarchy")), + ); + assert_eq!( + commit.get_object_at(0, "branch").unwrap(), + &SerdeObject::Str(From::from("HEAD")), + ); + assert_eq!( + commit.get_object_at(0, "active").unwrap(), + &SerdeObject::Bool(true), + ); + + assert_eq!( + commit.get_object_at(1, "url").unwrap(), + &SerdeObject::Str(From::from("https://soniex2.autistic.space/git-repos/ganarchy.git")), + ); + assert_eq!( + commit.get_object_at(1, "branch").unwrap(), + &SerdeObject::Str(From::from("HEAD")), + ); + assert_eq!( + commit.get_object_at(1, "active").unwrap(), + &SerdeObject::Bool(true), + ); - let commit = &pack.subpacks[0]["commit"]; - assert_eq!(commit.1, SerdeObject::Str(From::from("a8fb5087f79eafe312db270082c052c427b208c2"))); - assert_eq!(commit.0.subpacks.len(), 1); - let url = &commit.0.subpacks[0]["url"]; - assert_eq!(url.1, SerdeObject::Str(From::from("https://soniex2.autistic.space/git-repos/mmorfc.git"))); - assert_eq!(url.0.subpacks.len(), 1); - let branch = &url.0.subpacks[0]["branch"]; - assert_eq!(branch.1, SerdeObject::Str(From::from("HEAD"))); - assert_eq!(branch.0.subpacks.len(), 1); - let active = &branch.0.subpacks[0]["active"]; - assert_eq!(active.1, SerdeObject::Bool(true)); - - let commit = &pack.subpacks[1]["commit"]; - assert_eq!(commit.1, SerdeObject::Str(From::from("2d0b363fe3179087de59d9ef4a2d14af21d89071"))); - assert_eq!(commit.0.subpacks.len(), 1); - let url = &commit.0.subpacks[0]["url"]; - assert_eq!(url.1, SerdeObject::Str(From::from("https://soniex2.autistic.space/git-repos/chewstuff.git"))); - assert_eq!(url.0.subpacks.len(), 1); - let branch = &url.0.subpacks[0]["branch"]; - assert_eq!(branch.1, SerdeObject::Str(From::from("HEAD"))); - assert_eq!(branch.0.subpacks.len(), 1); - assert_eq!(active.1, SerdeObject::Bool(true)); + assert_eq!( + pack.get_object_at(1, "commit").unwrap(), + &SerdeObject::Str(From::from("a8fb5087f79eafe312db270082c052c427b208c2")), + ); + assert_eq!( + pack.get_object_at(1, "url").unwrap(), + &SerdeObject::Str(From::from("https://soniex2.autistic.space/git-repos/mmorfc.git")), + ); + assert_eq!( + pack.get_object_at(1, "branch").unwrap(), + &SerdeObject::Str(From::from("HEAD")), + ); + assert_eq!( + pack.get_object_at(1, "active").unwrap(), + &SerdeObject::Bool(true), + ); + + assert_eq!( + pack.get_object_at(2, "commit").unwrap(), + &SerdeObject::Str(From::from("2d0b363fe3179087de59d9ef4a2d14af21d89071")), + ); + assert_eq!( + pack.get_object_at(2, "url").unwrap(), + &SerdeObject::Str(From::from("https://soniex2.autistic.space/git-repos/chewstuff.git")), + ); + assert_eq!( + pack.get_object_at(2, "branch").unwrap(), + &SerdeObject::Str(From::from("HEAD")), + ); + assert_eq!( + pack.get_object_at(2, "active").unwrap(), + &SerdeObject::Bool(true), + ); } } diff --git a/src/vm/de/unpacker.rs b/src/vm/de/unpacker.rs new file mode 100644 index 0000000..8b16aa3 --- /dev/null +++ b/src/vm/de/unpacker.rs @@ -0,0 +1,518 @@ +// Copyright (C) 2022 Soni L. +// SPDX-License-Identifier: MIT OR Apache-2.0 + +//! Unpacker-related parts of the VM. + +use std::collections::HashMap; +use std::collections::hash_map::IntoIter as HMIntoIter; + +use serde::de::IntoDeserializer; +use serde::de::value::StrDeserializer; + +use crate::errors::QueryError; +use crate::vm::Pack; + +/// A `Deserializer` for Datafu output. +/// +/// This converts from Datafu's internal representation (a "pack") into the +/// desired output type. +pub struct Unpacker<'pat, 'de> { + packs: Vec<Pack<'pat, 'de>>, + call_limit: usize, +} + +/// Wrapper for an `&mut Unpacker<'pat, 'de>` with additional field +/// unpacking logic. +struct UnpackerFields<'a, 'pat, 'de> { + unpacker: &'a mut Unpacker<'pat, 'de>, + fields: HMIntoIter<&'static str, Option<crate::vm::SerdeObject<'de>>>, + value: Option<crate::vm::SerdeObject<'de>>, +} + +impl<'pat, 'de> Unpacker<'pat, 'de> { + /// Creates an Unpacker for unpacking a given Pack. + pub fn new(pack: Pack<'pat, 'de>, call_limit: usize) -> Self { + // invariant: no empty packs + let packs = if pack.subpacks.is_empty() { + vec![] + } else { + vec![pack] + }; + Self { + packs, call_limit, + } + } + + /// Takes the next subpack from the last pack in the pack stack. + fn take_next_pack(&mut self) -> Option<Pack<'pat, 'de>> { + self.packs.last_mut().and_then(|x| { + x.subpacks.front_mut() + }).map(|&mut (_, ref mut pack)| { + std::mem::take(pack) + }).filter(|pack| !pack.subpacks.is_empty()) + } +} + +impl<'pat, 'de> serde::de::SeqAccess<'de> for Unpacker<'pat, 'de> { + type Error = QueryError; + + fn next_element_seed<T: serde::de::DeserializeSeed<'de>>( + &mut self, + seed: T, + ) -> Result<Option<T::Value>, Self::Error> { + if self.packs.is_empty() { + return Ok(None) + } + seed.deserialize(self).map(Some) + } +} + +impl Drop for UnpackerFields<'_, '_, '_> { + fn drop(&mut self) { + let unpacker = &mut *self.unpacker; + while let Some(mut pack) = unpacker.packs.pop() { + pack.subpacks.pop_front(); + if !pack.subpacks.is_empty() { + unpacker.packs.push(pack); + break; + } + } + } +} + +impl<'pat, 'de> serde::de::MapAccess<'de> for UnpackerFields<'_, 'pat, 'de> { + type Error = QueryError; + fn next_key_seed<T: serde::de::DeserializeSeed<'de>>( + &mut self, + seed: T, + ) -> Result<Option<T::Value>, Self::Error> { + while let Some((key, value)) = self.fields.next() { + if value.is_some() { + self.value = value; + return seed.deserialize(StrDeserializer::new(key)).map(Some) + } + } + Ok(None) + } + + fn next_value_seed<T: serde::de::DeserializeSeed<'de>>( + &mut self, + seed: T, + ) -> Result<T::Value, Self::Error> { + if let Some(value) = self.value.take() { + seed.deserialize(value.into_deserializer()) + } else { + panic!("broken visitor") + } + } +} + +impl<'pat, 'de> serde::Deserializer<'de> for &mut Unpacker<'pat, 'de> { + type Error = QueryError; + fn deserialize_any<V>(self, _: V) -> Result<V::Value, Self::Error> where V: serde::de::Visitor<'de> { todo!() } + fn deserialize_bool<V>(self, _: V) -> Result<V::Value, Self::Error> where V: serde::de::Visitor<'de> { todo!() } + fn deserialize_i8<V>(self, _: V) -> Result<V::Value, Self::Error> where V: serde::de::Visitor<'de> { todo!() } + fn deserialize_i16<V>(self, _: V) -> Result<V::Value, Self::Error> where V: serde::de::Visitor<'de> { todo!() } + fn deserialize_i32<V>(self, _: V) -> Result<V::Value, Self::Error> where V: serde::de::Visitor<'de> { todo!() } + fn deserialize_i64<V>(self, _: V) -> Result<V::Value, Self::Error> where V: serde::de::Visitor<'de> { todo!() } + fn deserialize_u8<V>(self, _: V) -> Result<V::Value, Self::Error> where V: serde::de::Visitor<'de> { todo!() } + fn deserialize_u16<V>(self, _: V) -> Result<V::Value, Self::Error> where V: serde::de::Visitor<'de> { todo!() } + fn deserialize_u32<V>(self, _: V) -> Result<V::Value, Self::Error> where V: serde::de::Visitor<'de> { todo!() } + fn deserialize_u64<V>(self, _: V) -> Result<V::Value, Self::Error> where V: serde::de::Visitor<'de> { todo!() } + fn deserialize_f32<V>(self, _: V) -> Result<V::Value, Self::Error> where V: serde::de::Visitor<'de> { todo!() } + fn deserialize_f64<V>(self, _: V) -> Result<V::Value, Self::Error> where V: serde::de::Visitor<'de> { todo!() } + fn deserialize_char<V>(self, _: V) -> Result<V::Value, Self::Error> where V: serde::de::Visitor<'de> { todo!() } + fn deserialize_str<V>(self, _: V) -> Result<V::Value, Self::Error> where V: serde::de::Visitor<'de> { todo!() } + fn deserialize_string<V>(self, _: V) -> Result<V::Value, Self::Error> where V: serde::de::Visitor<'de> { todo!() } + fn deserialize_bytes<V>(self, _: V) -> Result<V::Value, Self::Error> where V: serde::de::Visitor<'de> { todo!() } + fn deserialize_byte_buf<V>(self, _: V) -> Result<V::Value, Self::Error> where V: serde::de::Visitor<'de> { todo!() } + fn deserialize_option<V: serde::de::Visitor<'de>>( + self, + visitor: V, + ) -> Result<V::Value, Self::Error> { + todo!() + } + fn deserialize_unit<V>(self, _: V) -> Result<V::Value, Self::Error> where V: serde::de::Visitor<'de> { todo!() } + fn deserialize_unit_struct<V>(self, _: &'static str, _: V) -> Result<V::Value, Self::Error> where V: serde::de::Visitor<'de> { todo!() } + fn deserialize_newtype_struct<V>(self, _: &'static str, _: V) -> Result<V::Value, Self::Error> where V: serde::de::Visitor<'de> { todo!() } + fn deserialize_seq<V>(self, visitor: V) -> Result<V::Value, Self::Error> where V: serde::de::Visitor<'de> { todo!(); } + fn deserialize_tuple<V>(self, _: usize, _: V) -> Result<V::Value, Self::Error> where V: serde::de::Visitor<'de> { todo!() } + fn deserialize_tuple_struct<V>(self, _: &'static str, _: usize, _: V) -> Result<V::Value, Self::Error> where V: serde::de::Visitor<'de> { todo!() } + fn deserialize_map<V>(self, _: V) -> Result<V::Value, Self::Error> where V: serde::de::Visitor<'de> { todo!() } + fn deserialize_struct<V>( + self, + name: &'static str, + fields: &'static [&'static str], + visitor: V, + ) -> Result<V::Value, Self::Error> + where + V: serde::de::Visitor<'de>, + { + let _ = name; + let mut field_map = fields.iter().copied().map(|n| { + (n, None::<crate::vm::SerdeObject<'de>>) + }).collect::<HashMap<&'static str, Option<_>>>(); + // unroll packs + while let Some(pack) = self.take_next_pack() { + self.packs.push(pack); + } + // roll them back up + 'roll: while let Some(pack) = self.packs.pop() { + let (ref entries, _) = pack.subpacks[0]; + for key in entries.keys().copied() { + if field_map.contains_key(key) { + self.packs.push(pack); + break 'roll; + } + } + match self.packs.last_mut().map(|x| &mut x.subpacks[0]) { + Some((_, subpack)) => { + *subpack = pack; + }, + None => todo!(), + } + } + for pack in self.packs.iter() { + let (ref entries, _) = pack.subpacks[0]; + for (key, value) in entries.iter() { + if let Some(entry) = field_map.get_mut(*key) { + *entry = Some(value.clone()); + } + } + } + visitor.visit_map(UnpackerFields { + unpacker: self, + fields: field_map.into_iter(), + value: None, + }) + } + fn deserialize_enum<V>(self, _: &'static str, _: &'static [&'static str], _: V) -> Result<V::Value, Self::Error> where V: serde::de::Visitor<'de> { todo!() } + fn deserialize_identifier<V>(self, _: V) -> Result<V::Value, Self::Error> where V: serde::de::Visitor<'de> { todo!() } + fn deserialize_ignored_any<V>(self, _: V) -> Result<V::Value, Self::Error> where V: serde::de::Visitor<'de> { todo!() } +} + +impl<'pat, 'de> serde::Deserializer<'de> for Unpacker<'pat, 'de> { + type Error = QueryError; + fn deserialize_any<V>(self, _: V) -> Result<V::Value, Self::Error> where V: serde::de::Visitor<'de> { todo!() } + fn deserialize_bool<V>(self, _: V) -> Result<V::Value, Self::Error> where V: serde::de::Visitor<'de> { todo!() } + fn deserialize_i8<V>(self, _: V) -> Result<V::Value, Self::Error> where V: serde::de::Visitor<'de> { todo!() } + fn deserialize_i16<V>(self, _: V) -> Result<V::Value, Self::Error> where V: serde::de::Visitor<'de> { todo!() } + fn deserialize_i32<V>(self, _: V) -> Result<V::Value, Self::Error> where V: serde::de::Visitor<'de> { todo!() } + fn deserialize_i64<V>(self, _: V) -> Result<V::Value, Self::Error> where V: serde::de::Visitor<'de> { todo!() } + fn deserialize_u8<V>(self, _: V) -> Result<V::Value, Self::Error> where V: serde::de::Visitor<'de> { todo!() } + fn deserialize_u16<V>(self, _: V) -> Result<V::Value, Self::Error> where V: serde::de::Visitor<'de> { todo!() } + fn deserialize_u32<V>(self, _: V) -> Result<V::Value, Self::Error> where V: serde::de::Visitor<'de> { todo!() } + fn deserialize_u64<V>(self, _: V) -> Result<V::Value, Self::Error> where V: serde::de::Visitor<'de> { todo!() } + fn deserialize_f32<V>(self, _: V) -> Result<V::Value, Self::Error> where V: serde::de::Visitor<'de> { todo!() } + fn deserialize_f64<V>(self, _: V) -> Result<V::Value, Self::Error> where V: serde::de::Visitor<'de> { todo!() } + fn deserialize_char<V>(self, _: V) -> Result<V::Value, Self::Error> where V: serde::de::Visitor<'de> { todo!() } + fn deserialize_str<V>(self, _: V) -> Result<V::Value, Self::Error> where V: serde::de::Visitor<'de> { todo!() } + fn deserialize_string<V>(self, _: V) -> Result<V::Value, Self::Error> where V: serde::de::Visitor<'de> { todo!() } + fn deserialize_bytes<V>(self, _: V) -> Result<V::Value, Self::Error> where V: serde::de::Visitor<'de> { todo!() } + fn deserialize_byte_buf<V>(self, _: V) -> Result<V::Value, Self::Error> where V: serde::de::Visitor<'de> { todo!() } + fn deserialize_option<V>(self, _: V) -> Result<V::Value, Self::Error> where V: serde::de::Visitor<'de> { todo!() } + fn deserialize_unit<V>(self, _: V) -> Result<V::Value, Self::Error> where V: serde::de::Visitor<'de> { todo!() } + fn deserialize_unit_struct<V>(self, _: &'static str, _: V) -> Result<V::Value, Self::Error> where V: serde::de::Visitor<'de> { todo!() } + fn deserialize_newtype_struct<V>(self, _: &'static str, _: V) -> Result<V::Value, Self::Error> where V: serde::de::Visitor<'de> { todo!() } + fn deserialize_seq<V>(self, visitor: V) -> Result<V::Value, Self::Error> + where + V: serde::de::Visitor<'de>, + { + visitor.visit_seq(self) + } + fn deserialize_tuple<V>(self, _: usize, _: V) -> Result<V::Value, Self::Error> where V: serde::de::Visitor<'de> { todo!() } + fn deserialize_tuple_struct<V>(self, _: &'static str, _: usize, _: V) -> Result<V::Value, Self::Error> where V: serde::de::Visitor<'de> { todo!() } + fn deserialize_map<V>(self, _: V) -> Result<V::Value, Self::Error> where V: serde::de::Visitor<'de> { todo!() } + fn deserialize_struct<V>( + self, + name: &'static str, + fields: &'static [&'static str], + visitor: V, + ) -> Result<V::Value, Self::Error> + where + V: serde::de::Visitor<'de>, + { + todo!() + } + fn deserialize_enum<V>(self, _: &'static str, _: &'static [&'static str], _: V) -> Result<V::Value, Self::Error> where V: serde::de::Visitor<'de> { todo!() } + fn deserialize_identifier<V>(self, _: V) -> Result<V::Value, Self::Error> where V: serde::de::Visitor<'de> { todo!() } + fn deserialize_ignored_any<V>(self, _: V) -> Result<V::Value, Self::Error> where V: serde::de::Visitor<'de> { todo!() } +} + +//#[cfg(test)] +//mod tests { +// use serde::Deserialize; +// use crate::vm::MAX_CALLS; +// +// /// Mock struct for repo options. +// #[derive(Deserialize)] +// #[derive(Debug, PartialEq, Eq)] +// struct Options { +// active: bool, +// federate: Option<bool>, +// pinned: Option<bool>, +// } +// +// /// Mock struct for a project branch. +// #[derive(Deserialize)] +// #[derive(Debug, PartialEq, Eq)] +// struct ProjectBranch { +// /// The project commit. +// commit: String, +// /// The URL to the repo. +// url: String, +// /// The relevant branch. +// branch: String, +// /// Branch options. +// options: Options, +// } +// +// fn get_real_data() -> crate::vm::Pack<'static, 'static> { +// use indexmap::indexmap; +// use crate::vm::Pack; +// use crate::vm::SerdeObject; +// use crate::vm::SerdeObject::*; +// +// fn mkstr<'a>(s: &'a str) -> SerdeObject<'a> { +// SerdeObject::Str(s.into()) +// } +// +// Pack { +// subpacks: vec![ +// indexmap!{ +// "commit" => ( +// Pack { +// subpacks: vec![ +// indexmap!{ +// "url" => ( +// Pack { +// subpacks: vec![ +// indexmap!{ +// "branch" => ( +// Pack { +// subpacks: vec![ +// indexmap!{ +// "active" => ( +// Pack { +// subpacks: vec![].into(), +// }, +// Bool( +// true, +// ), +// ), +// }, +// ].into(), +// }, +// mkstr( +// "HEAD", +// ), +// ), +// }, +// ].into(), +// }, +// mkstr( +// "https://github.com/ganarchy/GAnarchy", +// ), +// ), +// }, +// indexmap!{ +// "url" => ( +// Pack { +// subpacks: vec![ +// indexmap!{ +// "branch" => ( +// Pack { +// subpacks: vec![ +// indexmap!{ +// "pinned" => ( +// Pack { +// subpacks: vec![].into(), +// }, +// Bool( +// true, +// ), +// ), +// "active" => ( +// Pack { +// subpacks: vec![].into(), +// }, +// Bool( +// true, +// ), +// ), +// }, +// ].into(), +// }, +// mkstr( +// "HEAD", +// ), +// ), +// }, +// ].into(), +// }, +// mkstr( +// "https://soniex2.autistic.space/git-repos/ganarchy.git", +// ), +// ), +// }, +// ].into(), +// }, +// mkstr( +// "385e734a52e13949a7a5c71827f6de920dbfea43", +// ), +// ), +// }, +// indexmap!{ +// "commit" => ( +// Pack { +// subpacks: vec![ +// indexmap!{ +// "url" => ( +// Pack { +// subpacks: vec![ +// indexmap!{ +// "branch" => ( +// Pack { +// subpacks: vec![ +// indexmap!{ +// "pinned" => ( +// Pack { +// subpacks: vec![].into(), +// }, +// Bool( +// true, +// ), +// ), +// "active" => ( +// Pack { +// subpacks: vec![].into(), +// }, +// Bool( +// true, +// ), +// ), +// }, +// ].into(), +// }, +// mkstr( +// "HEAD", +// ), +// ), +// }, +// ].into(), +// }, +// mkstr( +// "https://soniex2.autistic.space/git-repos/mmorfc.git", +// ), +// ), +// }, +// ].into(), +// }, +// mkstr( +// "a8fb5087f79eafe312db270082c052c427b208c2", +// ), +// ), +// }, +// indexmap!{ +// "commit" => ( +// Pack { +// subpacks: vec![ +// indexmap!{ +// "url" => ( +// Pack { +// subpacks: vec![ +// indexmap!{ +// "branch" => ( +// Pack { +// subpacks: vec![ +// indexmap!{ +// "pinned" => ( +// Pack { +// subpacks: vec![].into(), +// }, +// Bool( +// true, +// ), +// ), +// "active" => ( +// Pack { +// subpacks: vec![].into(), +// }, +// Bool( +// true, +// ), +// ), +// }, +// ].into(), +// }, +// mkstr( +// "HEAD", +// ), +// ), +// }, +// ].into(), +// }, +// mkstr( +// "https://soniex2.autistic.space/git-repos/chewstuff.git", +// ), +// ), +// }, +// ].into(), +// }, +// mkstr( +// "2d0b363fe3179087de59d9ef4a2d14af21d89071", +// ), +// ), +// }, +// ].into(), +// } +// } +// +// #[test] +// fn to_vec_of_struct() { +// let der = super::Unpacker::new(get_real_data(), MAX_CALLS); +// let res: Vec<ProjectBranch> = Deserialize::deserialize(der).unwrap(); +// assert_eq!(res, [ +// ProjectBranch { +// commit: "385e734a52e13949a7a5c71827f6de920dbfea43".into(), +// url: "https://github.com/ganarchy/GAnarchy".into(), +// branch: "HEAD".into(), +// options: Options { +// active: true, +// federate: None, +// pinned: None, +// }, +// }, +// ProjectBranch { +// commit: "385e734a52e13949a7a5c71827f6de920dbfea43".into(), +// url: "https://soniex2.autistic.space/git-repos/ganarchy.git".into(), +// branch: "HEAD".into(), +// options: Options { +// active: true, +// federate: None, +// pinned: Some(true), +// }, +// }, +// ProjectBranch { +// commit: "a8fb5087f79eafe312db270082c052c427b208c2".into(), +// url: "https://soniex2.autistic.space/git-repos/mmorfc.git".into(), +// branch: "HEAD".into(), +// options: Options { +// active: true, +// federate: None, +// pinned: Some(true), +// }, +// }, +// ProjectBranch { +// commit: "2d0b363fe3179087de59d9ef4a2d14af21d89071".into(), +// url: "https://soniex2.autistic.space/git-repos/chewstuff.git".into(), +// branch: "HEAD".into(), +// options: Options { +// active: true, +// federate: None, +// pinned: Some(true), +// }, +// }, +// ]); +// } +//} diff --git a/src/vm/mod.rs b/src/vm/mod.rs index 21fec73..eb9a2a3 100644 --- a/src/vm/mod.rs +++ b/src/vm/mod.rs @@ -9,9 +9,11 @@ use std::borrow::Cow; use std::cell::Cell; use std::cell::RefCell; use std::collections::BTreeMap; +use std::collections::VecDeque; use std::marker::PhantomData; use indexmap::IndexMap; +use indexmap::IndexSet; use regex::Regex; use serde::Serialize; @@ -37,7 +39,7 @@ pub(crate) struct PatternConstants<O: Serialize> { pub(crate) protos: Vec<Vec<PatternElement>>, // Note that we can borrow these when creating the output map. // https://play.rust-lang.org/?version=stable&mode=debug&edition=2018&gist=da26f9175e96273fa0b94971a4e6172f - pub(crate) strings: Vec<String>, + pub(crate) strings: IndexSet<String>, pub(crate) regices: Vec<Regex>, pub(crate) predicates: Vec<Box<Predicate>>, pub(crate) defs: Vec<O>, @@ -348,7 +350,6 @@ where fn into_deserializer(self) -> Self::Deserializer { Self::Deserializer { obj: self, - value: None, _e: PhantomData, } } @@ -357,15 +358,25 @@ where /// Packed serde objects and datafu internal representation. /// /// This is an iterative store of key-value pairs. +/// +/// It's effectively a tree node. #[derive(Clone, Debug, Default)] pub struct Pack<'pat, 'de> { - subpacks: Vec<IndexMap<&'pat str, (Pack<'pat, 'de>, SerdeObject<'de>)>>, + subpacks: VecDeque<(IndexMap<&'pat str, SerdeObject<'de>>, Pack<'pat, 'de>)>, } impl<'pat, 'de> Pack<'pat, 'de> { - /// Merges two packs, with elements from `other` coming after `self`, as if - /// parts of the same iteration. - fn merge_breadth(&mut self, mut other: Self) { + /// Adds the elements of the `other` pack to the matching iterarions of the + /// current pack. If either pack is empty, pick the non-empty pack. + /// + /// The current pack will have the same number of iterations, but will + /// contain captures from both packs. In case of captures of the same name, + /// `other` will override `self`. + /// + /// # Panics + /// + /// Panics if the packs have different iteration lengths. + fn zip(&mut self, mut other: Self) { match (self.subpacks.len(), other.subpacks.len()) { (0, _) => { *self = other; @@ -373,26 +384,82 @@ impl<'pat, 'de> Pack<'pat, 'de> { (_, 0) => {}, (a, b) if a == b => { for (l, r) in self.subpacks.iter_mut().zip(other.subpacks) { - l.extend(r) + // note that we can't actually recurse deeper than the VM + // actually does itself, so we don't need to worry about + // blowing the stack. + l.0.extend(r.0); + l.1.zip(r.1); } }, - _ => unreachable!("merge_breadth unbalanced iterations"), + _ => unreachable!("zip unbalanced iterations"), } } - /// Merges two packs, with elements from `other` coming inside the last - /// element of `self` recursively, if any. - fn merge_depth(&mut self, mut other: Self) { - // note that we can't actually recurse deeper than the VM - // actually does itself, so we don't need to worry about - // blowing the stack. - if let Some(into) = self.subpacks.iter_mut().rev().filter(|map| { - !map.is_empty() - }).next() { - into.last_mut().unwrap().1.0.merge_depth(other); - } else { - *self = other; + /// Adds the elements of the `other` pack to all iterations captured by + /// this pack, such as to form a cartesian product. If either pack is + /// empty, pick the non-empty pack. If both packs have a length of 1, merge + /// their captures. + /// + /// The current pack will contain captures from both packs. In case of + /// captures of the same name, `other` will override `self`. + fn cartesian_product(&mut self, mut other: Self) { + match (self.subpacks.len(), other.subpacks.len()) { + (_, 0) => { + return; + }, + (0, _) => { + *self = other; + return; + }, + (1, 1) => { + let (robjects, rpack) = other.subpacks.pop_back().unwrap(); + let (ref mut lobjects, ref mut lpack) = self.subpacks[0]; + lobjects.extend(robjects); + lpack.cartesian_product(rpack); + return; + }, + (1, _) => { + self.subpacks[0].1.cartesian_product(other); + return; + }, + (_, 1) => { + // FIXME: need to be careful with this one. + // we want `other` to override entries from `self`, so we need + // to scan `self` for captures of the same name as those in + // `other`, and remove those captures. only then can we swap + // `self` and `other` and merge them. + // for now we can just do the inefficient thing tho. + }, + _ => {}, } + // FIXME instead of doing this, perhaps we should find the smaller one, + // and put clones of the larger one into it? + self.subpacks.iter_mut().for_each(|&mut (_, ref mut lpack)| { + lpack.cartesian_product(other.clone()) + }); + } + + /// Returns the serde object with the given name at the given iteration of + /// this pack. + #[cfg(test)] + fn get_object_at( + &self, + iter: usize, + name: &str, + ) -> Option<&SerdeObject<'de>> { + self.subpacks.get(iter).map(|x| &x.0).and_then(|x| x.get(name)) + } + + /// Returns the subpack related to the given name at the given iteration of + /// this pack. + #[cfg(test)] + fn get_subpack_at( + &self, + iter: usize, + name: &str, + ) -> Option<&Pack<'pat, 'de>> { + let _ = name; + self.subpacks.get(iter).map(|x| &x.1) } } diff --git a/tests/basic_match.rs b/tests/basic_match.rs index a1c62bb..b58457e 100644 --- a/tests/basic_match.rs +++ b/tests/basic_match.rs @@ -16,14 +16,47 @@ fn test_basic() { yk: String, y: usize, } - // should this error about needing multiple results/Vec requirement? - let matches: Values = pat.deserialize(&mut der).unwrap(); + let matches: Values = pat.deserialize(&mut der).unwrap().collect().unwrap(); assert_eq!(matches.x, "bar"); assert_eq!(matches.yk, "baz"); assert_eq!(matches.y, 2); assert!(der.end().is_ok()); } +#[test] +fn test_real_use_case() { + let pat = datafu::PatternBuilder::for_pattern( + " + :map + ->['projects'?]:map + ->[commit:?str]:?map + ->[url:?str]:?map + ->[branch:?str]:?map + (->['active'?]active:?bool)? + (->['federate'?]?federate:?bool)? + (->['pinned'?]?pinned:?bool)? + " + ).compile().unwrap(); + let data = r#" + {"base_url": "https://ganarchy.autistic.space", "repo_list_srcs": {"https://ganarchy.autistic.space/index.toml": {"active": false}}, "projects": {"385e734a52e13949a7a5c71827f6de920dbfea43": {"https://github.com/ganarchy/GAnarchy": {"HEAD": {"active": true}}, "https://soniex2.autistic.space/git-repos/ganarchy.git": {"HEAD": {"active": true, "pinned": true}}}, "a8fb5087f79eafe312db270082c052c427b208c2": {"https://soniex2.autistic.space/git-repos/mmorfc.git": {"HEAD": {"active": true, "pinned": true}}}, "2d0b363fe3179087de59d9ef4a2d14af21d89071": {"https://soniex2.autistic.space/git-repos/chewstuff.git": {"HEAD": {"active": true, "pinned": true}}}}} + "#; + let mut der = JsonDer::from_str(data); + #[derive(Deserialize)] + struct Values { + commit: String, + url: String, + branch: String, + active: bool, + #[serde(default)] + federate: bool, + #[serde(default)] + pinned: bool, + } + let graph = pat.deserialize(&mut der).unwrap(); + let matches: Vec<Values> = graph.collect().unwrap(); + assert!(der.end().is_ok()); +} + //#[test] //fn test_str() { // //let tree = Value::M(vec![ |