// Copyright (C) 2022 Soni L.
// SPDX-License-Identifier: MIT OR Apache-2.0
//! Deserialization-related parts of the VM.
use std::borrow::Cow;
use std::marker::PhantomData;
use indexmap::IndexMap;
use serde::Serialize;
use serde::de::Error as _;
use serde::de::IntoDeserializer as _;
use smallvec::SmallVec;
use super::Frame;
use super::Interpreter;
use super::Pack;
use super::PatternConstants;
use super::PatternElement;
use super::SerdeObject;
use super::Type;
use super::Value;
use crate::errors::MatchError;
/// A `DeserializeSeed` for Datafu input.
///
/// This converts from Serde to Datafu's internal representation (a "pack").
pub(crate) struct Packer<'pat, 'state, O: Serialize> {
/// The global interpreter state.
interp: Interpreter<'pat, 'state, O>,
/// Current call limit.
call_limit: usize,
/// Whether we're collecting values.
collecting: bool,
}
struct FramesMut<'packer, 'pat> {
frames: &'packer mut Vec<Frame<'pat>>,
}
struct Frames<'packer, 'pat> {
frames: &'packer Vec<Frame<'pat>>,
}
impl<'packer, 'pat> FramesMut<'packer, 'pat> {
fn iter_mut<'a>(
&'a mut self,
) -> impl Iterator<Item=&'a mut Frame<'pat>> + DoubleEndedIterator
where
'packer: 'a,
{
self.frames.iter_mut()
}
fn iter_active_mut<'a>(
&'a mut self,
) -> impl Iterator<Item=&'a mut Frame<'pat>> + DoubleEndedIterator
where
'packer: 'a,
{
self.iter_mut().filter(|frame| {
frame.active()
})
}
}
impl<'packer, 'pat> Frames<'packer, 'pat> {
fn iter<'a>(
&'a self,
) -> impl Iterator<Item=&'a Frame<'pat>> + DoubleEndedIterator
where
'packer: 'a,
{
self.frames.iter()
}
fn iter_active<'a>(
&'a self,
) -> impl Iterator<Item=&'a Frame<'pat>> + DoubleEndedIterator
where
'packer: 'a,
{
self.iter().filter(|frame| {
frame.active()
})
}
}
impl<'pat, 'state, 'de, O: Serialize> Packer<'pat, 'state, O> {
/// Creates a new Packer.
pub(crate) fn new(
interp: Interpreter<'pat, 'state, O>,
call_limit: usize,
) -> Self {
Self {
interp: interp,
call_limit: call_limit,
collecting: false,
}
}
fn frames_mut(&mut self) -> FramesMut<'_, 'pat> {
FramesMut {
frames: &mut *self.interp.frames,
}
}
fn frames(&mut self) -> Frames<'_, 'pat> {
Frames {
frames: &*self.interp.frames,
}
}
/// Steps the VM into the next operation.
fn step_in<E: serde::de::Error>(&mut self) -> Result<(), E> {
if self.call_limit > 0 {
self.call_limit -= 1;
} else {
self.interp.error.insert(MatchError::StackOverflow);
return Err(todo!());
}
// iterate up to the *live* length (i.e. the loop is allowed to modify
// the length).
// NOTE: we need to use while-let so as to not borrow anything in an
// iterator. filtering happens on the *result* of the iterator.
let mut index_iter = 0..;
while let Some(index) = index_iter.next().filter(|&i| {
i < self.interp.frames.len()
}) {
let frame = &mut self.interp.frames[index];
if frame.overstep > 0 || !frame.matches {
// overstepped and non-matching frames
frame.overstep += 1;
// FIXME check if this is correct (it probably isn't)
frame.matches = false;
} else {
if !frame.next() {
// empty/end-of frames
// 1 layer of step-in.
// step-out will undo this.
// this is correct because this branch implies overstep = 0
frame.overstep = 1;
} else if matches!(
frame.op(),
PatternElement::SubtreeMarker,
) {
// subtrees!
// these are tricky, because the current frame can be moved
// in memory. so we have to use indexing every time.
// tho first we set it as overstep because it has special
// handling.
frame.overstep = 1;
frame.matches = false;
let mut at = index + 1;
while self.interp.frames[index].next() {
let op = self.interp.frames[index].raw_op();
if let PatternElement::ValueSubtree {
index: subtree, ..
} = op {
let new_frame = Frame {
ops: &self.interp.pat.protos[subtree][..],
iar: None,
overstep: 0,
matches: true,
};
// we want the "newest" frame last, so it is
// easier to unwind back.
self.interp.frames.insert(at, new_frame);
at += 1;
} else {
unreachable!()
}
}
}
}
}
Ok(())
}
/// Steps the VM back into the previous operation.
fn step_out<E: serde::de::Error>(
&mut self,
mut packs: Vec<Pack<'pat, 'de>>,
) -> Result<Vec<Pack<'pat, 'de>>, E> {
// this code attempts to maintain the logical invariant of:
// self.frames().iter_active().count() == packs.len()
self.call_limit += 1;
let mut index_iter = 0..;
let mut pack_index = packs.len();
let orig_len = self.interp.frames.len();
while let Some(index) = index_iter.next().filter(|&i| {
i < orig_len
}) {
// iterate backwards
let index = orig_len - index - 1;
let frame = &mut self.interp.frames[index];
let has_pack = frame.matches;
if frame.overstep > 0 {
// handle overstep
frame.overstep -= 1;
} else {
if has_pack {
pack_index -= 1;
}
// unwind frame
if frame.prev() {
// successfully unwound. do nothing.
} else {
// find parent frame.
let mut count = 1;
let mut target = index;
let mut target_pack = pack_index;
while count > 0 && target > 0 {
target -= 1;
if self.interp.frames[target].matches {
debug_assert!(target_pack > 0);
target_pack -= 1;
}
match self.interp.frames[target].num_subtrees() {
Some((num, _)) if num < count => {
count -= num;
},
Some((num, _)) => {
count = 0;
},
None => {
count += 1;
},
}
}
if count == 0 {
// found target frame
let frame = self.interp.frames.remove(index);
let target_frame = &mut self.interp.frames[target];
let (_, optional) = target_frame.value_subtree();
target_frame.prev().then(|| ()).unwrap();
if has_pack {
let pack = packs.remove(pack_index);
if !target_frame.matches {
packs.insert(target_pack, pack);
target_frame.matches = true;
pack_index += 1;
} else {
packs[target_pack].merge_from(pack);
}
} else {
if !optional {
self.interp.error.insert({
MatchError::ValidationError
});
return Err(E::custom("subtree failed"));
}
}
if let Some((0, _)) = target_frame.num_subtrees() {
target_frame.overstep = 0;
}
}
}
}
}
Ok(packs)
}
}
impl<'pat, 'state, 'de, O> serde::de::DeserializeSeed<'de>
for &mut Packer<'pat, 'state, O>
where
O: Serialize,
{
type Value = (Vec<Pack<'pat, 'de>>, Option<SerdeObject<'de>>);
fn deserialize<D>(
mut self,
deserializer: D,
) -> Result<Self::Value, D::Error>
where
D: serde::Deserializer<'de>
{
if let Err(e) = self.step_in() { return Err(e); }
let pat = self.interp.pat;
let target_type = self.frames().iter_active().try_fold(
Type::IgnoredAny,
|target_type, frame| {
Ok(match (target_type, frame.get_type()) {
// FIXME handle None correctly
// required type binds stronger than any/ignored_any
(Type::IgnoredAny, Some((ty, true))) => ty,
(Type::Any, Some((ty, true))) => ty,
// and also stronger than optional any/ignored_any
(ty, Some((Type::IgnoredAny, _))) => ty,
(ty, Some((Type::Any, _))) => ty,
// prefer owned if any branch prefers owned
(Type::String, Some((Type::Str, true))) => {
Type::String
},
(Type::Str, Some((Type::String, true))) => {
Type::String
},
(Type::Bytes, Some((Type::ByteBuf, true))) => {
Type::ByteBuf
},
(Type::ByteBuf, Some((Type::Bytes, true))) => {
Type::ByteBuf
},
// types which are the same are okay
(left, Some((right, _))) if left == right => {
left
},
// optional type vs Any/IgnoredAny prefers Any
(Type::IgnoredAny, Some((_, false))) => Type::Any,
(Type::Any, Some((_, false))) => Type::Any,
// types which are not the same are an error because we
// only request a specific type if it's actually required
(left, Some((right, _))) => {
return Err(MatchError::Unsatisfiable);
},
_ => Type::Any,
})
},
);
let target_type = match target_type {
Ok(target_type) => target_type,
Err(e) => {
self.interp.error.insert(e);
return Err(D::Error::custom("type conflict"));
},
};
match target_type {
Type::Any => deserializer.deserialize_any(&mut *self),
Type::IgnoredAny => {
deserializer.deserialize_ignored_any(&mut *self)
},
Type::Bool => deserializer.deserialize_bool(&mut *self),
Type::I8 => deserializer.deserialize_i8(&mut *self),
Type::I16 => deserializer.deserialize_i16(&mut *self),
Type::I32 => deserializer.deserialize_i32(&mut *self),
Type::I64 => deserializer.deserialize_i64(&mut *self),
Type::I128 => deserializer.deserialize_i128(&mut *self),
Type::U8 => deserializer.deserialize_u8(&mut *self),
Type::U16 => deserializer.deserialize_u16(&mut *self),
Type::U32 => deserializer.deserialize_u32(&mut *self),
Type::U64 => deserializer.deserialize_u64(&mut *self),
Type::U128 => deserializer.deserialize_u128(&mut *self),
Type::F32 => deserializer.deserialize_f32(&mut *self),
Type::F64 => deserializer.deserialize_f64(&mut *self),
Type::Char => deserializer.deserialize_char(&mut *self),
Type::Str if !self.collecting => {
deserializer.deserialize_str(&mut *self)
},
Type::Str | Type::String => {
deserializer.deserialize_string(&mut *self)
},
Type::Bytes if !self.collecting => {
deserializer.deserialize_bytes(&mut *self)
},
Type::Bytes | Type::ByteBuf => {
deserializer.deserialize_byte_buf(&mut *self)
},
Type::Option => deserializer.deserialize_option(&mut *self),
Type::Unit => deserializer.deserialize_unit(&mut *self),
Type::Seq => deserializer.deserialize_seq(&mut *self),
Type::Map => deserializer.deserialize_map(&mut *self),
Type::Identifier => {
deserializer.deserialize_identifier(&mut *self)
},
Type::Tuple(len) => {
deserializer.deserialize_tuple(len, &mut *self)
},
Type::UnitStruct(name) => {
deserializer.deserialize_unit_struct(name, &mut *self)
},
Type::NewtypeStruct(name) => {
deserializer.deserialize_newtype_struct(name, &mut *self)
},
Type::TupleStruct { name, len } => {
deserializer.deserialize_tuple_struct(name, len, &mut *self)
},
Type::Struct { name, fields } => {
deserializer.deserialize_struct(name, fields, &mut *self)
},
Type::Enum { name, variants } => {
deserializer.deserialize_enum(name, variants, &mut *self)
},
}.and_then(|(packs, obj)| Ok((self.step_out(packs)?, obj)))
}
}
/// visit method generator for simple values (primitives).
///
/// can generate whole function or just the glue.
macro_rules! vs {
(fn $visit:ident $obj:ident ($data_type:pat) $rust_type:ty) => {
fn $visit<E>(mut self, v: $rust_type) -> Result<Self::Value, E>
where
E: serde::de::Error,
{
vs!(self (v) $obj ($data_type))
}
};
($this:ident $v:tt $obj:ident ($data_type:pat)) => {
{
let pat = $this.interp.pat;
let mut obj = None;
if $this.collecting {
obj = Some(SerdeObject::$obj$v);
}
let mut packs = Vec::new();
let result = {
$this.frames_mut().iter_active_mut().try_for_each(|frame| {
let ty = frame.get_type();
match ty {
| Some(($data_type, _))
| Some((Type::Any, _))
| Some((Type::IgnoredAny, _))
| None
=> {},
Some((_, false)) => {
frame.matches = false;
return Ok(());
},
Some((_, true)) => {
return Err(MatchError::ValidationError)
},
}
let mut pack = Pack::default();
if let Some(name) = frame.get_name(pat) {
let mut map = IndexMap::new();
map.insert(name, (Pack::default(), SerdeObject::$obj$v));
pack.subpacks.push(map);
}
packs.push(pack);
Ok(())
})
};
match result {
Err(e) => {
$this.interp.error.insert(e);
return Err(todo!());
},
_ => (),
}
Ok((packs, obj))
}
};
}
impl<'pat, 'state, 'de, O> serde::de::Visitor<'de>
for &mut Packer<'pat, 'state, O>
where
O: Serialize,
{
type Value = (Vec<Pack<'pat, 'de>>, Option<SerdeObject<'de>>);
fn expecting(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(f, "unsure")
}
vs!(fn visit_bool Bool (Type::Bool) bool);
vs!(fn visit_i8 I8 (Type::I8) i8);
vs!(fn visit_i16 I16 (Type::I16) i16);
vs!(fn visit_i32 I32 (Type::I32) i32);
vs!(fn visit_i64 I64 (Type::I64) i64);
vs!(fn visit_i128 I128 (Type::I128) i128);
vs!(fn visit_u8 U8 (Type::U8) u8);
vs!(fn visit_u16 U16 (Type::U16) u16);
vs!(fn visit_u32 U32 (Type::U32) u32);
vs!(fn visit_u64 U64 (Type::U64) u64);
vs!(fn visit_u128 U128 (Type::U128) u128);
vs!(fn visit_f32 F32 (Type::F32) f32);
vs!(fn visit_f64 F64 (Type::F64) f64);
vs!(fn visit_char Char (Type::Char) char);
fn visit_str<E>(self, v: &str) -> Result<Self::Value, E>
where
E: serde::de::Error,
{
let pat = self.interp.pat;
let mut obj = None;
if self.collecting {
obj = Some(SerdeObject::Str(Cow::Owned(v.into())));
}
let mut packs = Vec::new();
let result = {
self.frames_mut().iter_active_mut().try_for_each(|frame| {
let ty = frame.get_type();
match ty {
| Some((Type::String, _))
| Some((Type::Str, _))
| Some((Type::Any, _))
| Some((Type::IgnoredAny, _))
| None
=> {},
Some((_, false)) => {
frame.matches = false;
return Ok(());
},
Some((_, true)) => {
return Err(MatchError::ValidationError)
},
}
match frame.op() {
PatternElement::Value { value: Some(value), .. } => {
match value {
| Value::String { index, skippable }
if pat.strings[index] != v => {
if skippable {
frame.matches = false;
return Ok(());
} else {
return Err(MatchError::ValidationError);
}
},
| Value::Regex { index, skippable }
if !pat.regices[index].is_match(v) => {
if skippable {
frame.matches = false;
return Ok(());
} else {
return Err(MatchError::ValidationError);
}
},
| Value::Type { .. }
| Value::Regex { .. }
| Value::String { .. }
=> {}, // ok
}
},
PatternElement::Value { value: None, .. } => {},
_ => unreachable!(),
}
let mut pack = Pack::default();
if let Some(name) = frame.get_name(pat) {
let mut map = IndexMap::new();
map.insert(
name,
(
Pack::default(),
SerdeObject::Str(Cow::Owned(v.into())),
),
);
pack.subpacks.push(map);
}
packs.push(pack);
Ok(())
})
};
match result {
Err(e) => {
self.interp.error.insert(e);
return Err(todo!());
},
_ => (),
}
Ok((packs, obj))
}
fn visit_borrowed_str<E>(self, v: &'de str) -> Result<Self::Value, E>
where
E: serde::de::Error,
{
let pat = self.interp.pat;
let mut obj = None;
if self.collecting {
obj = Some(SerdeObject::Str(Cow::Borrowed(v)));
}
let mut packs = Vec::new();
let result = {
self.frames_mut().iter_active_mut().try_for_each(|frame| {
let ty = frame.get_type();
match ty {
| Some((Type::String, _))
| Some((Type::Str, _))
| Some((Type::Any, _))
| Some((Type::IgnoredAny, _))
| None
=> {},
Some((_, false)) => {
frame.matches = false;
return Ok(());
},
Some((_, true)) => {
return Err(MatchError::ValidationError)
},
}
match frame.op() {
PatternElement::Value { value: Some(value), .. } => {
match value {
| Value::String { index, skippable }
if pat.strings[index] != v => {
if skippable {
frame.matches = false;
return Ok(());
} else {
return Err(MatchError::ValidationError);
}
},
| Value::Regex { index, skippable }
if !pat.regices[index].is_match(v) => {
if skippable {
frame.matches = false;
return Ok(());
} else {
return Err(MatchError::ValidationError);
}
},
| Value::Type { .. }
| Value::Regex { .. }
| Value::String { .. }
=> {}, // ok
}
},
PatternElement::Value { value: None, .. } => {},
_ => unreachable!(),
}
let mut pack = Pack::default();
if let Some(name) = frame.get_name(pat) {
let mut map = IndexMap::new();
map.insert(
name,
(
Pack::default(),
SerdeObject::Str(Cow::Borrowed(v)),
),
);
pack.subpacks.push(map);
}
packs.push(pack);
Ok(())
})
};
match result {
Err(e) => {
self.interp.error.insert(e);
return Err(todo!());
},
_ => (),
}
Ok((packs, obj))
}
fn visit_string<E>(self, v: String) -> Result<Self::Value, E>
where
E: serde::de::Error,
{
// TODO try to avoid cloning
self.visit_str(&*v)
}
fn visit_bytes<E>(self, v: &[u8]) -> Result<Self::Value, E>
where
E: serde::de::Error,
{
vs!(self (Cow::Owned(v.to_owned())) Bytes (Type::Bytes | Type::ByteBuf))
}
fn visit_borrowed_bytes<E>(self, v: &'de [u8]) -> Result<Self::Value, E>
where
E: serde::de::Error,
{
vs!(self (Cow::Borrowed(v)) Bytes (Type::Bytes | Type::ByteBuf))
}
fn visit_byte_buf<E>(self, v: Vec<u8>) -> Result<Self::Value, E>
where
E: serde::de::Error,
{
// TODO try to avoid cloning
self.visit_bytes(&*v)
}
fn visit_none<E>(self) -> Result<Self::Value, E>
where
E: serde::de::Error,
{
vs!(self {} None (Type::Option))
}
fn visit_some<D>(self, deserializer: D) -> Result<Self::Value, D::Error>
where
D: serde::de::Deserializer<'de>,
{
todo!()
}
fn visit_unit<E>(self) -> Result<Self::Value, E>
where
E: serde::de::Error,
{
vs!(self {} Unit (Type::Unit))
}
fn visit_newtype_struct<D>(
self,
deserializer: D
) -> Result<Self::Value, D::Error>
where
D: serde::de::Deserializer<'de>,
{
todo!()
}
fn visit_seq<A>(self, seq: A) -> Result<Self::Value, A::Error>
where
A: serde::de::SeqAccess<'de>,
{
let mut obj = None;
if self.collecting {
obj = Some(SerdeObject::Seq(Vec::new()));
}
todo!()
}
fn visit_map<A>(self, mut map: A) -> Result<Self::Value, A::Error>
where
A: serde::de::MapAccess<'de>,
{
let old_collecting = self.collecting;
let pat = self.interp.pat;
let mut collecting = old_collecting;
let typeck = self.frames_mut().iter_active_mut().try_for_each(|frame| {
let ty = frame.get_type();
match ty {
| Some((Type::Map, _))
| Some((Type::Any, _))
| Some((Type::IgnoredAny, _))
| None
=> {},
Some((_, false)) => {
frame.matches = false;
return Ok(());
},
Some((_, true)) => {
return Err(MatchError::ValidationError)
},
}
if frame.get_name(pat).is_some() {
collecting = true;
}
Ok(())
});
match typeck {
Err(e) => {
self.interp.error.insert(e);
return Err(todo!());
},
_ => (),
}
if let Err(e) = self.step_in() { return Err(e); }
self.collecting = collecting;
let mut subframes = Vec::new();
self.frames().iter_active().for_each(|frame| {
if let Some(key_subtree) = frame.key_subtree() {
subframes.push(Frame {
ops: &pat.protos[key_subtree],
iar: None,
overstep: 0,
matches: true,
});
}
});
let mut obj_inner = Vec::new();
let mut output_packs = Vec::new();
while let Some(packed_key) = {
let subinterp = Interpreter {
pat: pat,
frames: &mut subframes,
error: self.interp.error,
};
let mut subpacker = Packer {
interp: subinterp,
collecting: self.collecting,
call_limit: self.call_limit,
};
map.next_key_seed(&mut subpacker)?
} {
self.frames_mut().iter_active_mut().filter(|frame| {
frame.key_subtree().is_some()
}).zip(&mut subframes).for_each(|(frame, subframe)| {
frame.matches = subframe.matches;
// reset subframe for next iteration
// NOTE wait to reset subframe.matches when merging packs!!!
subframe.iar = None;
});
self.frames_mut().iter_active_mut().for_each(|frame| {
// mark every non-subtree key as matching.
if frame.key_subtree().is_none() {
frame.matches = true;
}
});
let packed_value = map.next_value_seed(&mut *self)?;
if self.collecting {
obj_inner.push(
(packed_key.1.unwrap(), packed_value.1.unwrap()),
);
}
let mut key_packs_per_frame = packed_key.0.into_iter();
let mut value_packs_per_frame = packed_value.0;
// whatever is active in self.frames(), if matches, has a pack
// whatever is in subframes, if matches, has a pack
// count(active self.frames() with subtree which match) is always
// smaller than count(subframes which match) because the former
// gets updated by next_value_seed
// count(active self.frames() with subtree) == count(subframes)
// tl;dr: need to figure out which packs produced by subframes line
// up with which packs produced by self, discarding extra subframes
// (where the corresponding self frame doesn't match) and accepting
// extra packs produced by self.
// NOTE: key_packs_per_frame ~ subframes
// value_packs_per_frame ~ self
// keys come first tho (key.merge_from(value))
let mut iter_subframes = subframes.iter_mut();
// related to value_packs_per_frame
let mut pack_index = 0;
for frame in self.frames().iter_active() {
// check if this frame has an associated subframe
let subframe = if frame.key_subtree().is_some() {
// if there are more frames with associated subframes
// than there are subframes, panic
Some(iter_subframes.next().unwrap())
} else {
None
};
if frame.matches && subframe.is_some() {
// this already implies subframe.matches
let key_pack = key_packs_per_frame.next().unwrap();
let value_pack = &mut value_packs_per_frame[pack_index];
key_pack.merge_into(value_pack);
pack_index += 1;
} else if frame.matches {
// value matches but there's no subframe, carry on
pack_index += 1;
} else if !frame.matches && subframe.is_some() {
// frame didn't match but there was a subframe
let subframe = subframe.unwrap();
if subframe.matches {
// subframe matched, remove key pack
let _ = key_packs_per_frame.next().unwrap();
} else {
// neither matched, no relevant packs
// do reset subframe for next_key_seed tho!
subframe.matches = true;
}
} else {
// no relevant packs
}
}
if output_packs.is_empty() {
output_packs = value_packs_per_frame;
} else {
for (left, right) in output_packs.iter_mut().zip(
value_packs_per_frame,
) {
left.subpacks.extend(right.subpacks)
}
}
}
let obj = SerdeObject::Map(obj_inner);
let mut final_packs = self.step_out(output_packs)?;
let mut iter_final_packs = 0..;
self.frames_mut().iter_active_mut().for_each(|frame| {
let ty = frame.get_type();
match ty {
| Some((Type::Map, _))
| Some((Type::Any, _))
| Some((Type::IgnoredAny, _))
| None
=> {
let matched = std::mem::replace(&mut frame.matches, true);
if !matched {
final_packs.insert(
iter_final_packs.start,
Pack::default(),
);
}
},
_ => return,
}
let pack = &mut final_packs[iter_final_packs.next().unwrap()];
if let Some(name) = frame.get_name(pat) {
// we can assume collecting == true
let old_pack = std::mem::take(pack);
let mut map = IndexMap::new();
map.insert(name, (old_pack, obj.clone()));
pack.subpacks.push(map);
}
});
self.collecting = old_collecting;
Ok((final_packs, collecting.then(|| obj)))
}
fn visit_enum<A>(self, data: A) -> Result<Self::Value, A::Error>
where
A: serde::de::EnumAccess<'de>,
{
let mut obj = None;
if self.collecting {
obj = Some(SerdeObject::Enum {
variant: todo!(),
data: todo!(),
});
}
todo!()
}
}
/// A `Deserializer` for Datafu output.
///
/// This converts from Datafu's internal representation (a "pack") into the
/// desired output type.
pub struct Unpacker<'pat, 'de> {
pack: Pack<'pat, 'de>,
call_limit: usize,
}
impl<'pat, 'de> Unpacker<'pat, 'de> {
/// Unpacks a Datafu "pack".
pub fn new(pack: Pack<'pat, 'de>, call_limit: usize) -> Self {
Self {
pack, call_limit,
}
}
}
impl<'pat, 'de> serde::Deserializer<'de> for Unpacker<'pat, 'de> {
// TODO datafu errors
type Error = serde::de::value::Error;
fn deserialize_any<V>(self, _: V) -> Result<V::Value, Self::Error> where V: serde::de::Visitor<'de> { todo!() }
fn deserialize_bool<V>(self, _: V) -> Result<V::Value, Self::Error> where V: serde::de::Visitor<'de> { todo!() }
fn deserialize_i8<V>(self, _: V) -> Result<V::Value, Self::Error> where V: serde::de::Visitor<'de> { todo!() }
fn deserialize_i16<V>(self, _: V) -> Result<V::Value, Self::Error> where V: serde::de::Visitor<'de> { todo!() }
fn deserialize_i32<V>(self, _: V) -> Result<V::Value, Self::Error> where V: serde::de::Visitor<'de> { todo!() }
fn deserialize_i64<V>(self, _: V) -> Result<V::Value, Self::Error> where V: serde::de::Visitor<'de> { todo!() }
fn deserialize_u8<V>(self, _: V) -> Result<V::Value, Self::Error> where V: serde::de::Visitor<'de> { todo!() }
fn deserialize_u16<V>(self, _: V) -> Result<V::Value, Self::Error> where V: serde::de::Visitor<'de> { todo!() }
fn deserialize_u32<V>(self, _: V) -> Result<V::Value, Self::Error> where V: serde::de::Visitor<'de> { todo!() }
fn deserialize_u64<V>(self, _: V) -> Result<V::Value, Self::Error> where V: serde::de::Visitor<'de> { todo!() }
fn deserialize_f32<V>(self, _: V) -> Result<V::Value, Self::Error> where V: serde::de::Visitor<'de> { todo!() }
fn deserialize_f64<V>(self, _: V) -> Result<V::Value, Self::Error> where V: serde::de::Visitor<'de> { todo!() }
fn deserialize_char<V>(self, _: V) -> Result<V::Value, Self::Error> where V: serde::de::Visitor<'de> { todo!() }
fn deserialize_str<V>(self, _: V) -> Result<V::Value, Self::Error> where V: serde::de::Visitor<'de> { todo!() }
fn deserialize_string<V>(self, _: V) -> Result<V::Value, Self::Error> where V: serde::de::Visitor<'de> { todo!() }
fn deserialize_bytes<V>(self, _: V) -> Result<V::Value, Self::Error> where V: serde::de::Visitor<'de> { todo!() }
fn deserialize_byte_buf<V>(self, _: V) -> Result<V::Value, Self::Error> where V: serde::de::Visitor<'de> { todo!() }
fn deserialize_option<V>(self, _: V) -> Result<V::Value, Self::Error> where V: serde::de::Visitor<'de> { todo!() }
fn deserialize_unit<V>(self, _: V) -> Result<V::Value, Self::Error> where V: serde::de::Visitor<'de> { todo!() }
fn deserialize_unit_struct<V>(self, _: &'static str, _: V) -> Result<V::Value, Self::Error> where V: serde::de::Visitor<'de> { todo!() }
fn deserialize_newtype_struct<V>(self, _: &'static str, _: V) -> Result<V::Value, Self::Error> where V: serde::de::Visitor<'de> { todo!() }
fn deserialize_seq<V>(self, _: V) -> Result<V::Value, Self::Error> where V: serde::de::Visitor<'de> { todo!() }
fn deserialize_tuple<V>(self, _: usize, _: V) -> Result<V::Value, Self::Error> where V: serde::de::Visitor<'de> { todo!() }
fn deserialize_tuple_struct<V>(self, _: &'static str, _: usize, _: V) -> Result<V::Value, Self::Error> where V: serde::de::Visitor<'de> { todo!() }
fn deserialize_map<V>(self, _: V) -> Result<V::Value, Self::Error> where V: serde::de::Visitor<'de> { todo!() }
fn deserialize_struct<V>(
self,
_: &'static str,
fields: &'static [&'static str],
visitor: V,
) -> Result<V::Value, Self::Error>
where
V: serde::de::Visitor<'de>,
{
todo!()
}
fn deserialize_enum<V>(self, _: &'static str, _: &'static [&'static str], _: V) -> Result<V::Value, Self::Error> where V: serde::de::Visitor<'de> { todo!() }
fn deserialize_identifier<V>(self, _: V) -> Result<V::Value, Self::Error> where V: serde::de::Visitor<'de> { todo!() }
fn deserialize_ignored_any<V>(self, _: V) -> Result<V::Value, Self::Error> where V: serde::de::Visitor<'de> { todo!() }
}
/// Deserializes a SerdeObject
pub(crate) struct SerdeObjectDeserializer<'de, E> {
pub(crate) obj: SerdeObject<'de>,
pub(crate) value: Option<SerdeObject<'de>>,
pub(crate) _e: PhantomData<fn() -> E>,
}
impl<'de, E> serde::de::Deserializer<'de> for SerdeObjectDeserializer<'de, E>
where
E: serde::de::Error,
{
type Error = E;
fn deserialize_any<V>(self, v: V) -> Result<V::Value, Self::Error>
where
V: serde::de::Visitor<'de>,
{
match self.obj {
SerdeObject::Bool(x) => v.visit_bool(x),
SerdeObject::I8(x) => v.visit_i8(x),
SerdeObject::I16(x) => v.visit_i16(x),
SerdeObject::I32(x) => v.visit_i32(x),
SerdeObject::I64(x) => v.visit_i64(x),
SerdeObject::I128(x) => v.visit_i128(x),
SerdeObject::U8(x) => v.visit_u8(x),
SerdeObject::U16(x) => v.visit_u16(x),
SerdeObject::U32(x) => v.visit_u32(x),
SerdeObject::U64(x) => v.visit_u64(x),
SerdeObject::U128(x) => v.visit_u128(x),
SerdeObject::F32(x) => v.visit_f32(x),
SerdeObject::F64(x) => v.visit_f64(x),
SerdeObject::Char(x) => v.visit_char(x),
SerdeObject::Str(Cow::Owned(x)) => v.visit_string(x),
SerdeObject::Str(Cow::Borrowed(x)) => v.visit_borrowed_str(x),
SerdeObject::Bytes(Cow::Owned(x)) => v.visit_byte_buf(x),
SerdeObject::Bytes(Cow::Borrowed(x)) => v.visit_borrowed_bytes(x),
SerdeObject::Some(x) => v.visit_some(x.into_deserializer()),
SerdeObject::None => v.visit_none(),
SerdeObject::Unit => v.visit_unit(),
SerdeObject::Seq(x) => todo!(),
SerdeObject::Map(x) => todo!(),
SerdeObject::NewtypeStruct(x) => {
v.visit_newtype_struct(x.into_deserializer())
},
SerdeObject::Enum { variant, data } => todo!(),
}
}
fn deserialize_ignored_any<V>(self, v: V) -> Result<V::Value, Self::Error>
where
V: serde::de::Visitor<'de>,
{
drop(self);
v.visit_unit()
}
serde::forward_to_deserialize_any! {
bool i8 i16 i32 i64 i128 u8 u16 u32 u64 u128 f32 f64 char str string
bytes byte_buf option unit unit_struct newtype_struct seq tuple
tuple_struct map struct enum identifier
}
}
#[cfg(test)]
mod tests {
use super::Packer;
use super::super::PatternConstants;
use crate::vm::MAX_CALLS;
use crate::vm::Interpreter;
use crate::vm::Type;
use crate::vm::Value;
use crate::vm::PatternElement;
use crate::vm::SerdeObject;
use crate::vm::Frame;
use postcard::Deserializer as PostcardDeserializer;
use serde::de::DeserializeSeed as _;
use serde_json::Deserializer as JsonDeserializer;
use crate::errors::MatchError;
#[test]
#[should_panic]
fn test_broken() {
// broken pattern, should never be emitted by parser. make sure it's
// not accepted.
let consts = PatternConstants::<()>::default();
let mut err = Default::default();
let mut frames = Default::default();
let interp = Interpreter::new(&consts, &mut err, &mut frames);
let _ = Packer::new(interp, MAX_CALLS);
}
#[test]
fn test_empty_create() {
// test creating the parser with an empty pattern.
let mut consts = PatternConstants::<()>::default();
consts.protos.push(Vec::new());
let mut err = Default::default();
let mut frames = Default::default();
let interp = Interpreter::new(&consts, &mut err, &mut frames);
let _ = Packer::new(interp, MAX_CALLS);
}
#[test]
fn test_empty_match() {
// test matching something with an empty pattern.
let mut consts = PatternConstants::<()>::default();
consts.protos.push(Vec::new());
let mut der = JsonDeserializer::from_str("{}");
let mut err = Default::default();
let mut frames = Default::default();
let interp = Interpreter::new(&consts, &mut err, &mut frames);
let pack = Packer::new(interp, MAX_CALLS).deserialize(&mut der).unwrap();
}
#[test]
fn test_simple_match() {
// test matching a simple value
let mut consts = PatternConstants::<()>::default();
consts.strings.push("hello".into());
consts.protos.push(vec![
PatternElement::Value {
name: Some(0),
value: Some(Value::Type {
ty: Type::U64,
skippable: false,
}),
},
]);
let mut der = JsonDeserializer::from_str("3");
let mut err = Default::default();
let mut frames = Default::default();
let interp = Interpreter::new(&consts, &mut err, &mut frames);
let packed = Packer::new(interp, MAX_CALLS).deserialize(&mut der);
let (packs, obj) = packed.unwrap();
assert!(obj.is_none());
assert_eq!(packs[0].subpacks[0]["hello"].1, SerdeObject::U64(3));
}
#[test]
fn test_simple_error() {
// test a value that doesn't match (serde_json error)
let mut consts = PatternConstants::<()>::default();
consts.strings.push("hello".into());
consts.protos.push(vec![
PatternElement::Value {
name: Some(0),
value: Some(Value::Type {
ty: Type::U64,
skippable: false,
}),
},
]);
let mut der = JsonDeserializer::from_str("\"hello\"");
let mut err = Default::default();
let mut frames = Default::default();
let interp = Interpreter::new(&consts, &mut err, &mut frames);
let packed = Packer::new(interp, MAX_CALLS).deserialize(&mut der);
// error produced by serde_json
assert!(packed.is_err());
}
#[test]
fn test_basic_multiframe() {
// test multiple frames (matching and non-matching)
let mut consts = PatternConstants::<()>::default();
consts.strings.push("a".into());
consts.strings.push("b".into());
consts.protos.push(vec![
PatternElement::Value {
name: Some(0),
value: Some(Value::Type {
ty: Type::U64,
skippable: true,
}),
},
]);
consts.protos.push(vec![
PatternElement::Value {
name: Some(1),
value: Some(Value::Type {
ty: Type::Bool,
skippable: true,
}),
},
]);
let mut der = JsonDeserializer::from_str(r#"10"#);
let mut err = Default::default();
let mut frames: Vec<_> = Default::default();
frames.push(Frame {
ops: &consts.protos[0],
iar: None,
matches: true,
overstep: 0,
});
frames.push(Frame {
ops: &consts.protos[1],
iar: None,
matches: true,
overstep: 0,
});
let interp = Interpreter {
pat: &consts,
error: &mut err,
frames: &mut frames,
};
let packed = Packer::new(interp, MAX_CALLS).deserialize(&mut der);
let (packs, obj) = packed.unwrap();
assert!(obj.is_none());
assert_eq!(
packs[0].subpacks[0]["a"].1,
SerdeObject::U64(10),
);
assert_eq!(packs.len(), 1);
assert!(frames[0].matches);
assert!(!frames[1].matches);
}
#[test]
fn test_map() {
// test visit_map
let mut consts = PatternConstants::<()>::default();
consts.strings.push("key".into());
consts.strings.push("value".into());
consts.protos.push(vec![
PatternElement::Value {
name: Some(0),
value: None,
},
]);
consts.protos.push(vec![
PatternElement::Value {
name: None,
value: Some(Value::Type {
ty: Type::Map,
skippable: false,
}),
},
PatternElement::Tag {
key_subtree: Some(0),
},
PatternElement::Value {
name: Some(1),
value: Some(Value::Type {
ty: Type::U64,
skippable: false,
}),
},
]);
let mut der = JsonDeserializer::from_str(r#"{"hello": 0, "world": 1}"#);
let mut err = Default::default();
let mut frames = Default::default();
let interp = Interpreter::new(&consts, &mut err, &mut frames);
let packed = Packer::new(interp, MAX_CALLS).deserialize(&mut der);
let (packs, obj) = packed.unwrap();
assert!(obj.is_none());
assert_eq!(
packs[0].subpacks[0]["key"].1,
SerdeObject::Str("hello".into()),
);
assert_eq!(
packs[0].subpacks[0]["value"].1,
SerdeObject::U64(0),
);
assert_eq!(
packs[0].subpacks[1]["key"].1,
SerdeObject::Str("world".into()),
);
assert_eq!(
packs[0].subpacks[1]["value"].1,
SerdeObject::U64(1),
);
}
#[test]
fn test_parser_empty() {
// use a parsed empty pattern to test Packer
let consts = crate::parser::parse::<&'static str, &'static str, ()>(
"",
None,
None,
).unwrap();
let mut der = JsonDeserializer::from_str(r#"{"hello": 0, "world": 1}"#);
let mut err = Default::default();
let mut frames = Default::default();
let interp = Interpreter::new(
&consts,
&mut err,
&mut frames,
//&mut output,
);
let (mut packs, obj) = Packer::new(
interp,
MAX_CALLS,
).deserialize(&mut der).unwrap();
assert!(obj.is_none());
assert_eq!(packs.len(), 1);
let pack = packs.pop().unwrap();
assert!(pack.subpacks.is_empty());
}
#[test]
fn test_parser_basic() {
// use a basic parsed pattern to test Packer
let consts = crate::parser::parse::<&'static str, &'static str, ()>(
":map->[name:str]value:str",
None,
None,
).unwrap();
let data = &[
0x02, // map length (2)
0x04, // string length (4)
0x6E, 0x61, 0x6D, 0x65, // b'name'
0x01, // string length (1)
0x61, // b'a'
0x05, // string length (5)
0x76, 0x61, 0x6C, 0x75, 0x65, // b'value'
0x01, // string length (1)
0x62, // b'b'
];
let mut der = PostcardDeserializer::from_bytes(data);
let mut err = Default::default();
let mut frames = Default::default();
let interp = Interpreter::new(
&consts,
&mut err,
&mut frames,
//&mut output,
);
let result = Packer::new(
interp,
MAX_CALLS,
).deserialize(&mut der);
let (mut packs, obj) = result.unwrap();
assert!(obj.is_none());
assert_eq!(packs.len(), 1);
let pack = packs.pop().unwrap();
assert_eq!(pack.subpacks.len(), 2);
}
#[test]
fn test_parser_basic_subtree() {
// use a basic parsed pattern with a subtree to test Packer
let consts = crate::parser::parse::<&'static str, &'static str, ()>(
":map(->[name:str]value:str)",
None,
None,
).unwrap();
let data = &[
0x02, // map length (2)
0x04, // string length (4)
0x6E, 0x61, 0x6D, 0x65, // b'name'
0x01, // string length (1)
0x61, // b'a'
0x05, // string length (5)
0x76, 0x61, 0x6C, 0x75, 0x65, // b'value'
0x01, // string length (1)
0x62, // b'b'
];
let mut der = PostcardDeserializer::from_bytes(data);
let mut err = Default::default();
let mut frames = Default::default();
let interp = Interpreter::new(
&consts,
&mut err,
&mut frames,
//&mut output,
);
let result = Packer::new(
interp,
MAX_CALLS,
).deserialize(&mut der);
let (mut packs, obj) = result.unwrap();
assert!(obj.is_none());
assert_eq!(packs.len(), 1);
let pack = packs.pop().unwrap();
assert_eq!(pack.subpacks.len(), 2);
}
#[test]
fn test_parser_subtrees() {
// use a parsed pattern with subtrees to test Packer
// also test a non-self-describing format (postcard)
let consts = crate::parser::parse::<&'static str, &'static str, ()>(
":map(->['name'?]name:str)?(->['value'?]value:u32)?(->[:str]:?ignored_any)",
None,
None,
).unwrap();
let data = &[
0x02, // map length (2)
0x04, // string length (4)
0x6E, 0x61, 0x6D, 0x65, // b'name'
0x01, // string length (1)
0x61, // b'a'
0x05, // string length (5)
0x76, 0x61, 0x6C, 0x75, 0x65, // b'value'
0x01, // 1
];
let mut der = PostcardDeserializer::from_bytes(data);
let mut err = Default::default();
let mut frames = Default::default();
let interp = Interpreter::new(
&consts,
&mut err,
&mut frames,
//&mut output,
);
let result = Packer::new(
interp,
MAX_CALLS,
).deserialize(&mut der);
let (mut packs, obj) = result.unwrap();
assert!(obj.is_none());
assert_eq!(packs.len(), 1);
let pack = packs.pop().unwrap();
assert_eq!(pack.subpacks.len(), 2);
assert_eq!(pack.subpacks[0]["name"].1, SerdeObject::Str(From::from("a")));
assert_eq!(pack.subpacks[1]["value"].1, SerdeObject::U32(1));
}
#[test]
fn test_parser_subtrees_strict() {
// use a parsed pattern with subtrees to test Packer
// also test a non-self-describing format (postcard)
// also require at least one subtree to match on every iteration.
// (also this test fails)
let consts = crate::parser::parse::<&'static str, &'static str, ()>(
":map((->['name'?]name:u32)?(->['value'?]value:u32)?)(->[:str]:u32)",
None,
None,
).unwrap();
let data = &[
0x03, // map length (3)
0x04, // string length (4)
0x6E, 0x61, 0x6D, 0x65, // b'name'
0x01, // 1
0x05, // string length (5)
0x76, 0x61, 0x6C, 0x75, 0x65, // b'value'
0x01, // 1
0x05, // string length (5)
0x76, 0x65, 0x6C, 0x75, 0x65, // b'velue'
0x01, // 1
];
let mut der = PostcardDeserializer::from_bytes(data);
let mut err = Default::default();
let mut frames = Default::default();
let interp = Interpreter::new(
&consts,
&mut err,
&mut frames,
//&mut output,
);
let result = Packer::new(
interp,
MAX_CALLS,
).deserialize(&mut der);
assert!(matches!(err, Some(MatchError::ValidationError)));
assert!(result.is_err());
}
}