From f0e944696144016ca59aaed02381f7ea9d1ef848 Mon Sep 17 00:00:00 2001 From: SoniEx2 Date: Tue, 4 Oct 2022 22:44:46 -0300 Subject: Initial VM work --- src/vm/de.rs | 509 +++++++++++++++++++++++++++++++++++++++++++++++++++++------ 1 file changed, 465 insertions(+), 44 deletions(-) (limited to 'src/vm/de.rs') diff --git a/src/vm/de.rs b/src/vm/de.rs index 4d0d097..2282484 100644 --- a/src/vm/de.rs +++ b/src/vm/de.rs @@ -3,68 +3,376 @@ //! Deserialization-related parts of the VM. +use std::borrow::Cow; +use std::marker::PhantomData; + +use indexmap::IndexMap; + use serde::Serialize; use serde::de::Error as _; +use smallvec::SmallVec; + +use these::These; + +use super::Interpreter; +use super::Pack; use super::PatternConstants; use super::PatternElement; -use super::Pack; +use super::SerdeObject; +use super::Type; +use super::Value; /// A `DeserializeSeed` for Datafu input. /// /// This converts from Serde to Datafu's internal representation (a "pack"). -pub struct Packer<'pat, O: Serialize> { - /// The pattern currently being processed. - pat: &'pat PatternConstants, - /// The instructions/function currently being processed. - ops: &'pat [PatternElement], - /// Maximum number of calls. +pub(crate) struct Packer<'pat, 'state, O: Serialize> { + /// The global interpreter state. + interp: Interpreter<'pat, 'state, O>, + /// Current call limit. call_limit: usize, + /// Whether we're collecting values. + collecting: bool, + /// Instructions currently being processed. + ops: SmallVec<[InstructionReg<'pat>; 1]>, +} + +/// Instruction currently being processed. +struct InstructionReg<'pat> { + /// The (current) program sequence. + instructions: &'pat [PatternElement], + /// Whether this instruction is required to match. + required: bool, } -impl<'pat, O: Serialize> Packer<'pat, O> { +impl<'pat, 'state, 'de, O: Serialize> Packer<'pat, 'state, O> { pub(crate) fn new( - pat: &'pat PatternConstants, + interp: Interpreter<'pat, 'state, O>, call_limit: usize, ) -> Self { + let ops = SmallVec::from_buf([ + InstructionReg { + instructions: &interp.pat.protos.last().unwrap()[..], + required: true, + } + ]); Self { - pat, call_limit, ops: &pat.protos.last().unwrap()[..], + interp: interp, + call_limit: call_limit, + collecting: false, + ops: ops, } } + + /// Extracts the name for this element. + fn get_name(&self) -> SmallVec<[&'pat str; 1]> { + let mut name = SmallVec::<[&'pat str; 1]>::new(); + for reg in &self.ops { + match reg.instructions.first() { + | Some(PatternElement::Tag { name_and_value, .. }) + | Some(PatternElement::Value { name_and_value }) + => { + if let Some(name_key) = name_and_value.here() { + name.push(&self.interp.pat.strings[name_key]); + } + }, + None => { + // FIXME is this correct? + }, + _ => unreachable!(), + } + } + name + } } -impl<'pat, 'de, O> serde::de::DeserializeSeed<'de> for Packer<'pat, O> +// what steps do we have to take? +// +// 1. figure out what type we need to deserialize (and ask the deserializer +// for it). +// 2. visit value. figure out whether we need to store it or not? +// 3. if we need to store it how do we figure out *where* to store it? +// 4. if we *don't* need to store it, what do we do? +// 5. how do we tell if we do or don't need to store it? how do we propagate +// those requirements deeper into the Deserialize's and how do we bring +// the values back out (recursively?) to parent Deserialize's, without +// wasting time storing things we don't actually care about? +// 5.a. just have a flag in the DeserializeSeed for whether to capture the +// values. propagation is more or less trivial from there. +// 6. how do you handle value subtrees? +// 6.a. you don't. for now. +// 7. how do you handle errors? +// 7.a. put them into a "state" and raise a D::Error::custom. then +// override it in the relevant Pattern call. + +impl<'pat, 'state, 'de, O> serde::de::DeserializeSeed<'de> +for Packer<'pat, 'state, O> where O: Serialize, { - type Value = Pack; - fn deserialize(self, deserializer: D) -> Result + type Value = (Pack<'pat, 'de>, Option>); + fn deserialize( + mut self, + deserializer: D, + ) -> Result where D: serde::Deserializer<'de> { - // check the first op - let first = self.ops.first(); - match first { - Some(PatternElement::ApplyPredicate(id, skippable)) => { - let predicate = &self.pat.predicates[*id]; - let ok = predicate(todo!()); - match (ok, skippable) { - (true, _) => { - todo!() - }, - (false, false) => { - return Err(D::Error::custom("predicate didn't match")); - }, - (false, true) => { - todo!() - }, - } + match &*self.ops { + [] => unreachable!(), + [InstructionReg { + instructions: [], + .. + }] => { + // FIXME is this correct? + deserializer.deserialize_ignored_any(self) }, - _ => { - dbg!(first); - todo!() + [InstructionReg { + instructions: [ins, ..], + .. + }] => match ins { + | PatternElement::Tag { name_and_value, .. } + | PatternElement::Value { name_and_value } + => { + match name_and_value.there() { + | Some(Value::String { .. }) + | Some(Value::Regex { .. }) => { + if name_and_value.is_here() { + deserializer.deserialize_string(self) + } else { + deserializer.deserialize_str(self) + } + }, + Some(Value::Type { ty, .. }) => match ty { + Type::Any => deserializer.deserialize_any(self), + Type::IgnoredAny => { + deserializer.deserialize_ignored_any(self) + }, + Type::Bool => deserializer.deserialize_bool(self), + Type::I8 => deserializer.deserialize_i8(self), + Type::I16 => deserializer.deserialize_i16(self), + Type::I32 => deserializer.deserialize_i32(self), + Type::I64 => deserializer.deserialize_i64(self), + Type::I128 => deserializer.deserialize_i128(self), + Type::U8 => deserializer.deserialize_u8(self), + Type::U16 => deserializer.deserialize_u16(self), + Type::U32 => deserializer.deserialize_u32(self), + Type::U64 => deserializer.deserialize_u64(self), + Type::U128 => deserializer.deserialize_u128(self), + Type::F32 => deserializer.deserialize_f32(self), + Type::F64 => deserializer.deserialize_f64(self), + Type::Char => deserializer.deserialize_char(self), + Type::Str => deserializer.deserialize_str(self), + Type::String => deserializer.deserialize_string(self), + Type::Bytes => deserializer.deserialize_bytes(self), + Type::ByteBuf => { + deserializer.deserialize_byte_buf(self) + }, + Type::Option => deserializer.deserialize_option(self), + Type::Unit => deserializer.deserialize_unit(self), + Type::Seq => deserializer.deserialize_seq(self), + Type::Map => deserializer.deserialize_map(self), + }, + None => todo!(), + } + }, + _ => todo!(), }, + _ => todo!(), + } + } +} + +/// visit method generator for simple values (primitives). +macro_rules! vs { + ($visit:ident $obj:ident $t:ty) => { + fn $visit(self, v: $t) -> Result + where + E: serde::de::Error, + { + // FIXME subtrees + let mut obj = None; + let mut pack = Pack::default(); + if self.collecting { + obj = Some(SerdeObject::$obj(v)); + } + let mut map = IndexMap::new(); + for name in self.get_name() { + map.insert(name, (Default::default(), SerdeObject::$obj(v))); + } + pack.subpacks.push(map); + Ok((pack, obj)) + } + } +} + +impl<'pat, 'state, 'de, O> serde::de::Visitor<'de> +for Packer<'pat, 'state, O> +where + O: Serialize, +{ + type Value = (Pack<'pat, 'de>, Option>); + fn expecting(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "unsure") + } + + vs!(visit_bool Bool bool); + vs!(visit_i8 I8 i8); + vs!(visit_i16 I16 i16); + vs!(visit_i32 I32 i32); + vs!(visit_i64 I64 i64); + vs!(visit_i128 I128 i128); + vs!(visit_u8 U8 u8); + vs!(visit_u16 U16 u16); + vs!(visit_u32 U32 u32); + vs!(visit_u64 U64 u64); + vs!(visit_u128 U128 u128); + vs!(visit_f32 F32 f32); + vs!(visit_f64 F64 f64); + vs!(visit_char Char char); + + fn visit_str(self, v: &str) -> Result + where + E: serde::de::Error, + { + let mut obj = None; + if self.collecting { + obj = Some(SerdeObject::Str(Cow::Owned(String::from(v)))); + } + todo!() + } + fn visit_borrowed_str(self, v: &'de str) -> Result + where + E: serde::de::Error, + { + let mut obj = None; + if self.collecting { + obj = Some(SerdeObject::Str(Cow::Borrowed(v))); + } + todo!() + } + fn visit_string(self, v: String) -> Result + where + E: serde::de::Error, + { + let mut obj = None; + if self.collecting { + obj = Some(SerdeObject::Str(Cow::Owned(v))); + } + todo!() + } + fn visit_bytes(self, v: &[u8]) -> Result + where + E: serde::de::Error, + { + let mut obj = None; + if self.collecting { + obj = Some(SerdeObject::Bytes(Cow::Owned(Vec::from(v)))); + } + todo!() + } + fn visit_borrowed_bytes(self, v: &'de [u8]) -> Result + where + E: serde::de::Error, + { + let mut obj = None; + if self.collecting { + obj = Some(SerdeObject::Bytes(Cow::Borrowed(v))); + } + todo!() + } + fn visit_byte_buf(self, v: Vec) -> Result + where + E: serde::de::Error, + { + let mut obj = None; + if self.collecting { + obj = Some(SerdeObject::Bytes(Cow::Owned(v))); + } + todo!() + } + fn visit_none(self) -> Result + where + E: serde::de::Error, + { + let mut obj = None; + if self.collecting { + obj = Some(SerdeObject::None); } + todo!() + } + fn visit_some(self, deserializer: D) -> Result + where + D: serde::de::Deserializer<'de>, + { + let mut obj = None; + if self.collecting { + obj = Some(SerdeObject::Some(todo!())); + } + todo!() + } + fn visit_unit(self) -> Result + where + E: serde::de::Error, + { + // FIXME subtrees + let mut obj = None; + let mut pack = Pack::default(); + if self.collecting { + obj = Some(SerdeObject::Unit); + } + let mut map = IndexMap::new(); + for name in self.get_name() { + map.insert(name, (Default::default(), SerdeObject::Unit)); + } + pack.subpacks.push(map); + Ok((pack, obj)) + } + fn visit_newtype_struct( + self, + deserializer: D + ) -> Result + where + D: serde::de::Deserializer<'de>, + { + let mut obj = None; + if self.collecting { + obj = Some(SerdeObject::NewtypeStruct(todo!())); + } + todo!() + } + fn visit_seq(self, seq: A) -> Result + where + A: serde::de::SeqAccess<'de>, + { + let mut obj = None; + if self.collecting { + obj = Some(SerdeObject::Seq(Vec::new())); + } + todo!() + } + fn visit_map(self, map: A) -> Result + where + A: serde::de::MapAccess<'de>, + { + let mut obj = None; + if self.collecting { + obj = Some(SerdeObject::Map(Vec::new())); + } + todo!() + } + fn visit_enum(self, data: A) -> Result + where + A: serde::de::EnumAccess<'de>, + { + let mut obj = None; + if self.collecting { + obj = Some(SerdeObject::Enum { + variant: todo!(), + data: todo!(), + }); + } + todo!() } } @@ -72,21 +380,21 @@ where /// /// This converts from Datafu's internal representation (a "pack") into the /// desired output type. -pub struct Unpacker { - pack: Pack, +pub struct Unpacker<'pat, 'de> { + pack: Pack<'pat, 'de>, call_limit: usize, } -impl Unpacker { +impl<'pat, 'de> Unpacker<'pat, 'de> { /// Unpacks a Datafu "pack". - pub fn new(pack: Pack, call_limit: usize) -> Self { + pub fn new(pack: Pack<'pat, 'de>, call_limit: usize) -> Self { Self { pack, call_limit, } } } -impl<'de> serde::Deserializer<'de> for Unpacker { +impl<'pat, 'de> serde::Deserializer<'de> for Unpacker<'pat, 'de> { // TODO datafu errors type Error = serde::de::value::Error; fn deserialize_any(self, _: V) -> Result where V: serde::de::Visitor<'de> { todo!() } @@ -130,10 +438,123 @@ impl<'de> serde::Deserializer<'de> for Unpacker { fn deserialize_ignored_any(self, _: V) -> Result where V: serde::de::Visitor<'de> { todo!() } } -/// A Deserializer for collecting matches from [`crate::Predicate`]s. -/// -/// What are we doing? -/// -/// We certainly have regrets. -pub struct PredicateCollector { +/// Deserializes a SerdeObject +pub(crate) struct SerdeObjectDeserializer<'de, E> { + pub(crate) obj: SerdeObject<'de>, + pub(crate) value: Option>, + pub(crate) _e: PhantomData E>, } + +impl<'de, E> serde::de::Deserializer<'de> for SerdeObjectDeserializer<'de, E> +where + E: serde::de::Error, +{ + type Error = E; + fn deserialize_any(self, v: V) -> Result + where + V: serde::de::Visitor<'de>, + { + match self.obj { + SerdeObject::Bool(x) => v.visit_bool(x), + SerdeObject::I8(x) => v.visit_i8(x), + SerdeObject::I16(x) => v.visit_i16(x), + SerdeObject::I32(x) => v.visit_i32(x), + SerdeObject::I64(x) => v.visit_i64(x), + SerdeObject::I128(x) => v.visit_i128(x), + SerdeObject::U8(x) => v.visit_u8(x), + SerdeObject::U16(x) => v.visit_u16(x), + SerdeObject::U32(x) => v.visit_u32(x), + SerdeObject::U64(x) => v.visit_u64(x), + SerdeObject::U128(x) => v.visit_u128(x), + SerdeObject::F32(x) => v.visit_f32(x), + SerdeObject::F64(x) => v.visit_f64(x), + SerdeObject::Char(x) => v.visit_char(x), + SerdeObject::Str(Cow::Owned(x)) => v.visit_string(x), + SerdeObject::Str(Cow::Borrowed(x)) => v.visit_borrowed_str(x), + SerdeObject::Bytes(Cow::Owned(x)) => v.visit_byte_buf(x), + SerdeObject::Bytes(Cow::Borrowed(x)) => v.visit_borrowed_bytes(x), + SerdeObject::Some(x) => todo!(), + SerdeObject::None => v.visit_none(), + SerdeObject::Unit => v.visit_unit(), + SerdeObject::Seq(x) => todo!(), + SerdeObject::Map(x) => todo!(), + SerdeObject::NewtypeStruct(x) => todo!(), + SerdeObject::Enum { variant, data } => todo!(), + } + } + fn deserialize_ignored_any(self, v: V) -> Result + where + V: serde::de::Visitor<'de>, + { + drop(self); + v.visit_unit() + } + serde::forward_to_deserialize_any! { + bool i8 i16 i32 i64 i128 u8 u16 u32 u64 u128 f32 f64 char str string + bytes byte_buf option unit unit_struct newtype_struct seq tuple + tuple_struct map struct enum identifier + } +} + +#[cfg(test)] +mod tests { + use super::Packer; + use super::super::PatternConstants; + use crate::vm::MAX_CALLS; + use crate::vm::Interpreter; + use crate::vm::Type; + use crate::vm::Value; + use crate::vm::PatternElement; + use crate::vm::SerdeObject; + use these::These; + use serde_json::Deserializer as JsonDeserializer; + use serde::de::DeserializeSeed as _; + + #[test] + #[should_panic] + fn test_broken() { + let consts = PatternConstants::<()>::default(); + let mut err = Default::default(); + let interp = Interpreter::new(&consts, &mut err); + let _ = Packer::new(interp, MAX_CALLS); + } + + #[test] + fn test_empty_create() { + let mut consts = PatternConstants::<()>::default(); + consts.protos.push(Vec::new()); + let mut err = Default::default(); + let interp = Interpreter::new(&consts, &mut err); + let _ = Packer::new(interp, MAX_CALLS); + } + + #[test] + fn test_empty_match() { + let mut consts = PatternConstants::<()>::default(); + consts.protos.push(Vec::new()); + let mut der = JsonDeserializer::from_str("{}"); + let mut err = Default::default(); + let interp = Interpreter::new(&consts, &mut err); + let pack = Packer::new(interp, MAX_CALLS).deserialize(&mut der).unwrap(); + } + + #[test] + fn test_simple_match() { + let mut consts = PatternConstants::<()>::default(); + consts.strings.push("hello".into()); + consts.protos.push(vec![ + PatternElement::Value { + name_and_value: These::Both(0, Value::Type { + ty: Type::U64, + skippable: false, + }) + } + ]); + let mut der = JsonDeserializer::from_str("3"); + let mut err = Default::default(); + let interp = Interpreter::new(&consts, &mut err); + let pack = Packer::new(interp, MAX_CALLS).deserialize(&mut der).unwrap().0; + assert_eq!(pack.subpacks[0]["hello"].1, SerdeObject::U64(3)); + } +} + -- cgit 1.4.1