// Copyright (C) 2022 Soni L.
// SPDX-License-Identifier: MIT OR Apache-2.0
//! Deserialization-related parts of the VM.
use std::borrow::Cow;
use std::marker::PhantomData;
use indexmap::IndexMap;
use serde::Serialize;
use serde::de::Error as _;
use serde::de::IntoDeserializer as _;
use smallvec::SmallVec;
use these::These;
use super::Frame;
use super::Interpreter;
use super::Pack;
use super::PatternConstants;
use super::PatternElement;
use super::SerdeObject;
use super::Type;
use super::Value;
/// A `DeserializeSeed` for Datafu input.
///
/// This converts from Serde to Datafu's internal representation (a "pack").
pub(crate) struct Packer<'pat, 'state, O: Serialize> {
/// The global interpreter state.
interp: Interpreter<'pat, 'state, O>,
/// Current call limit.
call_limit: usize,
/// Whether we're collecting values.
collecting: bool,
}
struct FramesMut<'packer, 'pat> {
frames: &'packer mut Vec>,
}
struct Frames<'packer, 'pat> {
frames: &'packer Vec>,
}
impl<'packer, 'pat> FramesMut<'packer, 'pat> {
fn iter_mut<'a>(&'a mut self) -> impl Iterator- > where 'packer: 'a {
self.frames.iter_mut()
}
fn iter_active_mut<'a>(&'a mut self) -> impl Iterator
- > where 'packer: 'a {
self.iter_mut().filter(|frame| {
frame.matches()
})
}
/// Steps the VM into the next operation.
fn step_in(&mut self) {
self.iter_mut().for_each(|frame| {
if let Some(ref mut overstep) = frame.overstep {
*overstep += 1;
} else {
if !frame.next() {
frame.overstep = Some(1);
} else if matches!(
frame.op(),
PatternElement::ValueSubtree { .. },
) {
todo!()
}
}
});
}
fn step_out<'de>(&mut self, pack: Pack<'pat, 'de>) -> Pack<'pat, 'de> {
self.iter_mut().for_each(|frame| {
if let Some(ref mut overstep) = frame.overstep {
if *overstep > 0 {
*overstep -= 1;
}
} else {
if let Some(0) = frame.overstep {
frame.overstep = None;
}
if frame.overstep.is_none() {
if frame.prev() {
todo!();
}
}
}
});
pack
}
}
impl<'packer, 'pat> Frames<'packer, 'pat> {
fn iter<'a>(&'a self) -> impl Iterator
- > where 'packer: 'a {
self.frames.iter()
}
fn iter_active<'a>(&'a self) -> impl Iterator
- > where 'packer: 'a {
self.iter().filter(|frame| {
frame.matches()
})
}
}
impl<'pat, 'state, 'de, O: Serialize> Packer<'pat, 'state, O> {
/// Creates a new Packer.
pub(crate) fn new(
interp: Interpreter<'pat, 'state, O>,
call_limit: usize,
) -> Self {
Self {
interp: interp,
call_limit: call_limit,
collecting: false,
}
}
fn frames_mut(&mut self) -> FramesMut<'_, 'pat> {
FramesMut {
frames: &mut *self.interp.frames,
}
}
fn frames(&mut self) -> Frames<'_, 'pat> {
Frames {
frames: &*self.interp.frames,
}
}
}
// what steps do we have to take?
//
// 1. figure out what type we need to deserialize (and ask the deserializer
// for it).
// 2. visit value. figure out whether we need to store it or not?
// 3. if we need to store it how do we figure out *where* to store it?
// 4. if we *don't* need to store it, what do we do?
// 5. how do we tell if we do or don't need to store it? how do we propagate
// those requirements deeper into the Deserialize's and how do we bring
// the values back out (recursively?) to parent Deserialize's, without
// wasting time storing things we don't actually care about?
// 5.a. just have a flag in the DeserializeSeed for whether to capture the
// values. propagation is more or less trivial from there.
// 6. how do you handle value subtrees?
// 6.a. you don't. for now.
// 7. how do you handle errors?
// 7.a. put them into a "state" and raise a D::Error::custom. then
// override it in the relevant Pattern call.
impl<'pat, 'state, 'de, O> serde::de::DeserializeSeed<'de>
for &mut Packer<'pat, 'state, O>
where
O: Serialize,
{
type Value = (Pack<'pat, 'de>, Option>);
fn deserialize(
mut self,
deserializer: D,
) -> Result
where
D: serde::Deserializer<'de>
{
self.frames_mut().step_in();
let pat = self.interp.pat;
let target_type = self.frames().iter_active().fold(
Type::IgnoredAny,
|target_type, frame| {
match (target_type, frame.get_type(pat)) {
(Type::IgnoredAny, Some((ty, _))) => ty,
(ty, Some((Type::IgnoredAny, _))) => ty,
(Type::String, Some((Type::Str, _))) => {
Type::String
},
(Type::Str, Some((Type::String, _))) => {
Type::String
},
(Type::Bytes, Some((Type::ByteBuf, _))) => {
Type::ByteBuf
},
(Type::ByteBuf, Some((Type::Bytes, _))) => {
Type::ByteBuf
},
(left, Some((right, _))) if left == right => {
left
},
_ => Type::Any,
}
},
);
match target_type {
Type::Any => deserializer.deserialize_any(&mut *self),
Type::IgnoredAny => {
deserializer.deserialize_ignored_any(&mut *self)
},
Type::Bool => deserializer.deserialize_bool(&mut *self),
Type::I8 => deserializer.deserialize_i8(&mut *self),
Type::I16 => deserializer.deserialize_i16(&mut *self),
Type::I32 => deserializer.deserialize_i32(&mut *self),
Type::I64 => deserializer.deserialize_i64(&mut *self),
Type::I128 => deserializer.deserialize_i128(&mut *self),
Type::U8 => deserializer.deserialize_u8(&mut *self),
Type::U16 => deserializer.deserialize_u16(&mut *self),
Type::U32 => deserializer.deserialize_u32(&mut *self),
Type::U64 => deserializer.deserialize_u64(&mut *self),
Type::U128 => deserializer.deserialize_u128(&mut *self),
Type::F32 => deserializer.deserialize_f32(&mut *self),
Type::F64 => deserializer.deserialize_f64(&mut *self),
Type::Char => deserializer.deserialize_char(&mut *self),
Type::Str if !self.collecting => {
deserializer.deserialize_str(&mut *self)
},
Type::Str | Type::String => {
deserializer.deserialize_string(&mut *self)
},
Type::Bytes if !self.collecting => {
deserializer.deserialize_bytes(&mut *self)
},
Type::Bytes | Type::ByteBuf => {
deserializer.deserialize_byte_buf(&mut *self)
},
Type::Option => deserializer.deserialize_option(&mut *self),
Type::Unit => deserializer.deserialize_unit(&mut *self),
Type::Seq => deserializer.deserialize_seq(&mut *self),
Type::Map => deserializer.deserialize_map(&mut *self),
Type::Identifier => {
deserializer.deserialize_identifier(&mut *self)
},
Type::Tuple(len) => {
deserializer.deserialize_tuple(len, &mut *self)
},
Type::UnitStruct(name) => {
deserializer.deserialize_unit_struct(name, &mut *self)
},
Type::NewtypeStruct(name) => {
deserializer.deserialize_newtype_struct(name, &mut *self)
},
Type::TupleStruct { name, len } => {
deserializer.deserialize_tuple_struct(name, len, &mut *self)
},
Type::Struct { name, fields } => {
deserializer.deserialize_struct(name, fields, &mut *self)
},
Type::Enum { name, variants } => {
deserializer.deserialize_enum(name, variants, &mut *self)
},
}.map(|(pack, obj)| (self.frames_mut().step_out(pack), obj))
}
}
/// visit method generator for simple values (primitives).
macro_rules! vs {
($visit:ident $obj:ident $t:ty) => {
fn $visit(mut self, v: $t) -> Result
where
E: serde::de::Error,
{
// FIXME filtering/errors
let pat = self.interp.pat;
let mut obj = None;
if self.collecting {
obj = Some(SerdeObject::$obj(v));
}
let mut pack = Pack::default();
self.frames_mut().iter_active_mut().try_for_each(|frame| {
let mut map = IndexMap::new();
if let Some(name) = frame.get_name(pat) {
map.insert(name, (Pack::default(), SerdeObject::$obj(v)));
}
pack.subpacks.push(map);
Ok(())
})?;
Ok((pack, obj))
}
}
}
impl<'pat, 'state, 'de, O> serde::de::Visitor<'de>
for &mut Packer<'pat, 'state, O>
where
O: Serialize,
{
type Value = (Pack<'pat, 'de>, Option>);
fn expecting(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(f, "unsure")
}
vs!(visit_bool Bool bool);
vs!(visit_i8 I8 i8);
vs!(visit_i16 I16 i16);
vs!(visit_i32 I32 i32);
vs!(visit_i64 I64 i64);
vs!(visit_i128 I128 i128);
vs!(visit_u8 U8 u8);
vs!(visit_u16 U16 u16);
vs!(visit_u32 U32 u32);
vs!(visit_u64 U64 u64);
vs!(visit_u128 U128 u128);
vs!(visit_f32 F32 f32);
vs!(visit_f64 F64 f64);
vs!(visit_char Char char);
fn visit_str(self, v: &str) -> Result
where
E: serde::de::Error,
{
let mut obj = None;
if self.collecting {
obj = Some(SerdeObject::Str(Cow::Owned(String::from(v))));
}
todo!()
}
fn visit_borrowed_str(self, v: &'de str) -> Result
where
E: serde::de::Error,
{
let mut obj = None;
if self.collecting {
obj = Some(SerdeObject::Str(Cow::Borrowed(v)));
}
todo!()
}
fn visit_string(self, v: String) -> Result
where
E: serde::de::Error,
{
let mut obj = None;
if self.collecting {
obj = Some(SerdeObject::Str(Cow::Owned(v)));
}
todo!()
}
fn visit_bytes(self, v: &[u8]) -> Result
where
E: serde::de::Error,
{
let mut obj = None;
if self.collecting {
obj = Some(SerdeObject::Bytes(Cow::Owned(Vec::from(v))));
}
todo!()
}
fn visit_borrowed_bytes(self, v: &'de [u8]) -> Result
where
E: serde::de::Error,
{
let mut obj = None;
if self.collecting {
obj = Some(SerdeObject::Bytes(Cow::Borrowed(v)));
}
todo!()
}
fn visit_byte_buf(self, v: Vec) -> Result
where
E: serde::de::Error,
{
let mut obj = None;
if self.collecting {
obj = Some(SerdeObject::Bytes(Cow::Owned(v)));
}
todo!()
}
fn visit_none(self) -> Result
where
E: serde::de::Error,
{
let mut obj = None;
if self.collecting {
obj = Some(SerdeObject::None);
}
todo!()
}
fn visit_some(self, deserializer: D) -> Result
where
D: serde::de::Deserializer<'de>,
{
let mut obj = None;
if self.collecting {
obj = Some(SerdeObject::Some(todo!()));
}
todo!()
}
fn visit_unit(self) -> Result
where
E: serde::de::Error,
{
// FIXME subtrees
let mut obj = None;
let mut pack = Pack::default();
if self.collecting {
obj = Some(SerdeObject::Unit);
}
let mut map = IndexMap::new();
//for name in self.get_name() {
// map.insert(name, (Default::default(), SerdeObject::Unit));
//}
pack.subpacks.push(map);
Ok((pack, obj))
}
fn visit_newtype_struct(
self,
deserializer: D
) -> Result
where
D: serde::de::Deserializer<'de>,
{
let mut obj = None;
if self.collecting {
obj = Some(SerdeObject::NewtypeStruct(todo!()));
}
todo!()
}
fn visit_seq(self, seq: A) -> Result
where
A: serde::de::SeqAccess<'de>,
{
let mut obj = None;
if self.collecting {
obj = Some(SerdeObject::Seq(Vec::new()));
}
todo!()
}
fn visit_map(self, map: A) -> Result
where
A: serde::de::MapAccess<'de>,
{
let mut obj = None;
if self.collecting {
obj = Some(SerdeObject::Map(Vec::new()));
}
todo!()
}
fn visit_enum(self, data: A) -> Result
where
A: serde::de::EnumAccess<'de>,
{
let mut obj = None;
if self.collecting {
obj = Some(SerdeObject::Enum {
variant: todo!(),
data: todo!(),
});
}
todo!()
}
}
/// A `Deserializer` for Datafu output.
///
/// This converts from Datafu's internal representation (a "pack") into the
/// desired output type.
pub struct Unpacker<'pat, 'de> {
pack: Pack<'pat, 'de>,
call_limit: usize,
}
impl<'pat, 'de> Unpacker<'pat, 'de> {
/// Unpacks a Datafu "pack".
pub fn new(pack: Pack<'pat, 'de>, call_limit: usize) -> Self {
Self {
pack, call_limit,
}
}
}
impl<'pat, 'de> serde::Deserializer<'de> for Unpacker<'pat, 'de> {
// TODO datafu errors
type Error = serde::de::value::Error;
fn deserialize_any(self, _: V) -> Result where V: serde::de::Visitor<'de> { todo!() }
fn deserialize_bool(self, _: V) -> Result where V: serde::de::Visitor<'de> { todo!() }
fn deserialize_i8(self, _: V) -> Result where V: serde::de::Visitor<'de> { todo!() }
fn deserialize_i16(self, _: V) -> Result where V: serde::de::Visitor<'de> { todo!() }
fn deserialize_i32(self, _: V) -> Result where V: serde::de::Visitor<'de> { todo!() }
fn deserialize_i64(self, _: V) -> Result where V: serde::de::Visitor<'de> { todo!() }
fn deserialize_u8(self, _: V) -> Result where V: serde::de::Visitor<'de> { todo!() }
fn deserialize_u16(self, _: V) -> Result where V: serde::de::Visitor<'de> { todo!() }
fn deserialize_u32(self, _: V) -> Result where V: serde::de::Visitor<'de> { todo!() }
fn deserialize_u64(self, _: V) -> Result where V: serde::de::Visitor<'de> { todo!() }
fn deserialize_f32(self, _: V) -> Result where V: serde::de::Visitor<'de> { todo!() }
fn deserialize_f64(self, _: V) -> Result where V: serde::de::Visitor<'de> { todo!() }
fn deserialize_char(self, _: V) -> Result where V: serde::de::Visitor<'de> { todo!() }
fn deserialize_str(self, _: V) -> Result where V: serde::de::Visitor<'de> { todo!() }
fn deserialize_string(self, _: V) -> Result where V: serde::de::Visitor<'de> { todo!() }
fn deserialize_bytes(self, _: V) -> Result where V: serde::de::Visitor<'de> { todo!() }
fn deserialize_byte_buf(self, _: V) -> Result where V: serde::de::Visitor<'de> { todo!() }
fn deserialize_option(self, _: V) -> Result where V: serde::de::Visitor<'de> { todo!() }
fn deserialize_unit(self, _: V) -> Result where V: serde::de::Visitor<'de> { todo!() }
fn deserialize_unit_struct(self, _: &'static str, _: V) -> Result where V: serde::de::Visitor<'de> { todo!() }
fn deserialize_newtype_struct(self, _: &'static str, _: V) -> Result where V: serde::de::Visitor<'de> { todo!() }
fn deserialize_seq(self, _: V) -> Result where V: serde::de::Visitor<'de> { todo!() }
fn deserialize_tuple(self, _: usize, _: V) -> Result where V: serde::de::Visitor<'de> { todo!() }
fn deserialize_tuple_struct(self, _: &'static str, _: usize, _: V) -> Result where V: serde::de::Visitor<'de> { todo!() }
fn deserialize_map(self, _: V) -> Result where V: serde::de::Visitor<'de> { todo!() }
fn deserialize_struct(
self,
_: &'static str,
fields: &'static [&'static str],
visitor: V,
) -> Result
where
V: serde::de::Visitor<'de>,
{
todo!()
}
fn deserialize_enum(self, _: &'static str, _: &'static [&'static str], _: V) -> Result where V: serde::de::Visitor<'de> { todo!() }
fn deserialize_identifier(self, _: V) -> Result where V: serde::de::Visitor<'de> { todo!() }
fn deserialize_ignored_any(self, _: V) -> Result where V: serde::de::Visitor<'de> { todo!() }
}
/// Deserializes a SerdeObject
pub(crate) struct SerdeObjectDeserializer<'de, E> {
pub(crate) obj: SerdeObject<'de>,
pub(crate) value: Option>,
pub(crate) _e: PhantomData E>,
}
impl<'de, E> serde::de::Deserializer<'de> for SerdeObjectDeserializer<'de, E>
where
E: serde::de::Error,
{
type Error = E;
fn deserialize_any(self, v: V) -> Result
where
V: serde::de::Visitor<'de>,
{
match self.obj {
SerdeObject::Bool(x) => v.visit_bool(x),
SerdeObject::I8(x) => v.visit_i8(x),
SerdeObject::I16(x) => v.visit_i16(x),
SerdeObject::I32(x) => v.visit_i32(x),
SerdeObject::I64(x) => v.visit_i64(x),
SerdeObject::I128(x) => v.visit_i128(x),
SerdeObject::U8(x) => v.visit_u8(x),
SerdeObject::U16(x) => v.visit_u16(x),
SerdeObject::U32(x) => v.visit_u32(x),
SerdeObject::U64(x) => v.visit_u64(x),
SerdeObject::U128(x) => v.visit_u128(x),
SerdeObject::F32(x) => v.visit_f32(x),
SerdeObject::F64(x) => v.visit_f64(x),
SerdeObject::Char(x) => v.visit_char(x),
SerdeObject::Str(Cow::Owned(x)) => v.visit_string(x),
SerdeObject::Str(Cow::Borrowed(x)) => v.visit_borrowed_str(x),
SerdeObject::Bytes(Cow::Owned(x)) => v.visit_byte_buf(x),
SerdeObject::Bytes(Cow::Borrowed(x)) => v.visit_borrowed_bytes(x),
SerdeObject::Some(x) => v.visit_some(x.into_deserializer()),
SerdeObject::None => v.visit_none(),
SerdeObject::Unit => v.visit_unit(),
SerdeObject::Seq(x) => todo!(),
SerdeObject::Map(x) => todo!(),
SerdeObject::NewtypeStruct(x) => {
v.visit_newtype_struct(x.into_deserializer())
},
SerdeObject::Enum { variant, data } => todo!(),
}
}
fn deserialize_ignored_any(self, v: V) -> Result
where
V: serde::de::Visitor<'de>,
{
drop(self);
v.visit_unit()
}
serde::forward_to_deserialize_any! {
bool i8 i16 i32 i64 i128 u8 u16 u32 u64 u128 f32 f64 char str string
bytes byte_buf option unit unit_struct newtype_struct seq tuple
tuple_struct map struct enum identifier
}
}
#[cfg(test)]
mod tests {
use super::Packer;
use super::super::PatternConstants;
use crate::vm::MAX_CALLS;
use crate::vm::Interpreter;
use crate::vm::Type;
use crate::vm::Value;
use crate::vm::PatternElement;
use crate::vm::SerdeObject;
use these::These;
use serde_json::Deserializer as JsonDeserializer;
use serde::de::DeserializeSeed as _;
#[test]
#[should_panic]
fn test_broken() {
let consts = PatternConstants::<()>::default();
let mut err = Default::default();
let mut frames = Default::default();
let interp = Interpreter::new(&consts, &mut err, &mut frames);
let _ = Packer::new(interp, MAX_CALLS);
}
#[test]
fn test_empty_create() {
let mut consts = PatternConstants::<()>::default();
consts.protos.push(Vec::new());
let mut err = Default::default();
let mut frames = Default::default();
let interp = Interpreter::new(&consts, &mut err, &mut frames);
let _ = Packer::new(interp, MAX_CALLS);
}
#[test]
fn test_empty_match() {
let mut consts = PatternConstants::<()>::default();
consts.protos.push(Vec::new());
let mut der = JsonDeserializer::from_str("{}");
let mut err = Default::default();
let mut frames = Default::default();
let interp = Interpreter::new(&consts, &mut err, &mut frames);
let pack = Packer::new(interp, MAX_CALLS).deserialize(&mut der).unwrap();
}
#[test]
fn test_simple_match() {
let mut consts = PatternConstants::<()>::default();
consts.strings.push("hello".into());
consts.protos.push(vec![
PatternElement::Value {
name_and_value: These::Both(0, Value::Type {
ty: Type::U64,
skippable: false,
})
}
]);
let mut der = JsonDeserializer::from_str("3");
let mut err = Default::default();
let mut frames = Default::default();
let interp = Interpreter::new(&consts, &mut err, &mut frames);
let packed = Packer::new(interp, MAX_CALLS).deserialize(&mut der);
let (pack, obj) = packed.unwrap();
assert!(obj.is_none());
assert_eq!(pack.subpacks[0]["hello"].1, SerdeObject::U64(3));
}
}