summary refs log tree commit diff stats
path: root/src
diff options
context:
space:
mode:
authorSoniEx2 <endermoneymod@gmail.com>2022-09-04 20:45:19 -0300
committerSoniEx2 <endermoneymod@gmail.com>2022-09-04 20:45:19 -0300
commit937774ee1d172cf47a7fc12e435b7dd7c6464aaf (patch)
tree6ab0173d47997852c0054715d59421cc0b81d46a /src
parentf24123f943abaebffd098a12069bcca62181f862 (diff)
Initial work on Serde VM stuff
Diffstat (limited to 'src')
-rw-r--r--src/errors.rs6
-rw-r--r--src/lib.rs7
-rw-r--r--src/pattern.rs15
-rw-r--r--src/type_tree.rs70
-rw-r--r--src/vm/de.rs135
-rw-r--r--src/vm/mod.rs (renamed from src/vm.rs)97
6 files changed, 315 insertions, 15 deletions
diff --git a/src/errors.rs b/src/errors.rs
index b41b225..9b0025e 100644
--- a/src/errors.rs
+++ b/src/errors.rs
@@ -39,11 +39,11 @@ pub enum PatternError<'a> {
 // /// These are errors that may be returned by the matcher when matching a
 // /// pattern.
 // #[derive(Clone, Debug)]
-// pub enum MatchError {
+pub enum MatchError {
 //     /// Returned if the pattern nests too deeply.
 //     StackOverflow,
 //     /// Returned if the pattern rejects the input.
-//     ValidationError,
+     ValidationError,
 //     /// Returned if the pattern attempts an unsupported operation.
 //     ///
 //     /// In particular, if the [`PatternTypes`] doesn't support `get` or `pairs`
@@ -52,4 +52,4 @@ pub enum PatternError<'a> {
 //     UnsupportedOperation,
 //     /// Returned if an unspecified non-critical error occurred.
 //     Other
-// }
+}
diff --git a/src/lib.rs b/src/lib.rs
index a3a6e1e..f71d81c 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -1,6 +1,8 @@
 // Copyright (C) 2021-2022 Soni L.
 // SPDX-License-Identifier: MIT OR Apache-2.0
 
+#![warn(elided_lifetimes_in_paths)]
+
 //! Datafu is a regex-inspired query language. It was primarily
 //! designed for processing object trees parsed from configuration files, but
 //! can be used with anything that supports serde.
@@ -99,6 +101,7 @@
 //! <!-- TODO -->
 
 pub mod errors;
+pub mod type_tree;
 mod parser;
 mod pattern;
 mod vm;
@@ -107,7 +110,7 @@ pub use pattern::Pattern;
 
 /// A predicate.
 pub type Predicate = dyn (Fn(
-    &mut dyn erased_serde::Deserializer<>
+    &mut dyn erased_serde::Deserializer<'_>
 ) -> bool) + Send + Sync;
 
 /// Helper to build predicates because closure inference is the worst.
@@ -133,7 +136,7 @@ pub type Predicate = dyn (Fn(
 pub fn pred<F>(f: F) -> Box<Predicate>
 where
     F: (Fn(
-        &mut dyn erased_serde::Deserializer<>
+        &mut dyn erased_serde::Deserializer<'_>
     ) -> bool) +  Send + Sync + 'static,
 {
     Box::new(f)
diff --git a/src/pattern.rs b/src/pattern.rs
index 2e69714..fc3c8a7 100644
--- a/src/pattern.rs
+++ b/src/pattern.rs
@@ -6,16 +6,17 @@
 use std::borrow::Borrow;
 use std::collections::BTreeMap;
 
-use serde::Deserialize;
-use serde::Deserializer;
-use serde::Serialize;
+use serde::de::Deserialize;
+use serde::de::DeserializeSeed;
+use serde::de::Deserializer;
+use serde::ser::Serialize;
 
 use crate::Predicate;
 use crate::errors::PatternError;
 use crate::parser::parse;
-//use crate::vm::Matcher;
+use crate::vm;
 use crate::vm::PatternConstants;
-//use crate::vm::MAX_CALLS;
+use crate::vm::MAX_CALLS;
 
 /// A compiled Datafu pattern.
 ///
@@ -83,11 +84,13 @@ impl<O: Serialize> Pattern<O> {
     }
 
     /// Matches the pattern against an input.
-    pub fn deserialize<'de, Der, De>(&self, de: Der) -> Result<De, Der::Error>
+    pub fn deserialize<'de, Der, De>(&self, der: Der) -> Result<De, Der::Error>
     where
         Der: Deserializer<'de>,
         De: Deserialize<'de>,
     {
+        let pack = vm::Packer::new(&self.consts, MAX_CALLS).deserialize(der)?;
+        let de = De::deserialize(vm::Unpacker::new(pack, MAX_CALLS));
         todo!()
     }
 }
diff --git a/src/type_tree.rs b/src/type_tree.rs
new file mode 100644
index 0000000..8e8098b
--- /dev/null
+++ b/src/type_tree.rs
@@ -0,0 +1,70 @@
+// Copyright (C) 2021-2022 Soni L.
+// SPDX-License-Identifier: MIT OR Apache-2.0
+
+//! Type Tree support.
+//!
+//! Type Trees are a Datafu feature for extracting types from a `serde`-based
+//! `Deserialize` in such a way that it can be used with Datafu patterns.
+//!
+//! They work by matching the `Deserialize` against some data, with the help of
+//! `serde_transmute`. Datafu then collects the relevant `Deserialize` calls,
+//! and uses them to infer an appropriate type tree for dynamic
+//! deserialization.
+//!
+//! When introspecting the `Deserialize`, all matching parts are extracted, and
+//! non-matching parts are ignored. Even if an error occurs, Datafu will gladly
+//! infer a type tree for what it could match.
+//!
+//! For example, given a struct and the corresponding data:
+//!
+//! ```
+//! struct Foo {
+//!   bar: i32,
+//! }
+//!
+//! let data = Foo { bar: 0 };
+//! ```
+//!
+//! Building a type tree will first inspect the struct like so:
+//!
+//! 1. call `deserialize()` on `Foo`.
+//! 2. inspect the `deserialize_struct` from `Foo`, storing the name and
+//!     fields.
+//! 3. give `Foo` the appropriate visitor (from `data`), through
+//!     `serde_transmute`.
+//! 4. inspect the `deserialize_i32` etc, also storing those.
+//!
+//! The resulting type tree can then be used in any pattern to effectively
+//! match a `Foo`, but more efficiently than with a predicate. Another big
+//! difference between predicates and type trees is how predicates are eager,
+//! and can consume values that would otherwise be matched by the rest of a
+//! pattern.
+//!
+//! Type trees are pretty flexible. Consider the following example:
+//!
+//! ```
+//! struct Foo {
+//!   bar: Vec<u32>,
+//! }
+//! 
+//! let data = Foo { bar: vec![1, 2, 3] };
+//! ```
+//!
+//! This will actually produce a type tree which checks that the first 3 items
+//! are `u32`! Further, when using different types for the predicate and the
+//! data, you can get even more flexiblity. For example, with the following
+//! struct and data:
+//!
+//! ```
+//! struct Foo {
+//!   bar: Vec<u32>,
+//! }
+//!
+//! let data = ();
+//! ```
+//!
+//! Datafu will actually inspect the `deserialize_struct`, and then the
+//! struct visitor will error. But despite the error, it'll still create a type
+//! tree for the `deserialize_struct`!
+
+// TODO
diff --git a/src/vm/de.rs b/src/vm/de.rs
index 7039ea9..4d0d097 100644
--- a/src/vm/de.rs
+++ b/src/vm/de.rs
@@ -1,6 +1,139 @@
 // Copyright (C) 2022 Soni L.
 // SPDX-License-Identifier: MIT OR Apache-2.0
 
-use crate::vm;
+//! Deserialization-related parts of the VM.
 
+use serde::Serialize;
+use serde::de::Error as _;
 
+use super::PatternConstants;
+use super::PatternElement;
+use super::Pack;
+
+/// A `DeserializeSeed` for Datafu input.
+///
+/// This converts from Serde to Datafu's internal representation (a "pack").
+pub struct Packer<'pat, O: Serialize> {
+    /// The pattern currently being processed.
+    pat: &'pat PatternConstants<O>,
+    /// The instructions/function currently being processed.
+    ops: &'pat [PatternElement],
+    /// Maximum number of calls.
+    call_limit: usize,
+}
+
+impl<'pat, O: Serialize> Packer<'pat, O> {
+    pub(crate) fn new(
+        pat: &'pat PatternConstants<O>,
+        call_limit: usize,
+    ) -> Self {
+        Self {
+            pat, call_limit, ops: &pat.protos.last().unwrap()[..],
+        }
+    }
+}
+
+impl<'pat, 'de, O> serde::de::DeserializeSeed<'de> for Packer<'pat, O>
+where
+    O: Serialize,
+{
+    type Value = Pack;
+    fn deserialize<D>(self, deserializer: D) -> Result<Pack, D::Error>
+    where
+        D: serde::Deserializer<'de>
+    {
+        // check the first op
+        let first = self.ops.first();
+        match first {
+            Some(PatternElement::ApplyPredicate(id, skippable)) => {
+                let predicate = &self.pat.predicates[*id];
+                let ok = predicate(todo!());
+                match (ok, skippable) {
+                    (true, _) => {
+                        todo!()
+                    },
+                    (false, false) => {
+                        return Err(D::Error::custom("predicate didn't match"));
+                    },
+                    (false, true) => {
+                        todo!()
+                    },
+                }
+            },
+            _ => {
+                dbg!(first);
+                todo!()
+            },
+        }
+    }
+}
+
+/// A `Deserializer` for Datafu output.
+///
+/// This converts from Datafu's internal representation (a "pack") into the
+/// desired output type.
+pub struct Unpacker {
+    pack: Pack,
+    call_limit: usize,
+}
+
+impl Unpacker {
+    /// Unpacks a Datafu "pack".
+    pub fn new(pack: Pack, call_limit: usize) -> Self {
+        Self {
+            pack, call_limit,
+        }
+    }
+}
+
+impl<'de> serde::Deserializer<'de> for Unpacker {
+    // TODO datafu errors
+    type Error = serde::de::value::Error;
+    fn deserialize_any<V>(self, _: V) -> Result<V::Value, Self::Error> where V: serde::de::Visitor<'de> { todo!() }
+    fn deserialize_bool<V>(self, _: V) -> Result<V::Value, Self::Error> where V: serde::de::Visitor<'de> { todo!() }
+    fn deserialize_i8<V>(self, _: V) -> Result<V::Value, Self::Error> where V: serde::de::Visitor<'de> { todo!() }
+    fn deserialize_i16<V>(self, _: V) -> Result<V::Value, Self::Error> where V: serde::de::Visitor<'de> { todo!() }
+    fn deserialize_i32<V>(self, _: V) -> Result<V::Value, Self::Error> where V: serde::de::Visitor<'de> { todo!() }
+    fn deserialize_i64<V>(self, _: V) -> Result<V::Value, Self::Error> where V: serde::de::Visitor<'de> { todo!() }
+    fn deserialize_u8<V>(self, _: V) -> Result<V::Value, Self::Error> where V: serde::de::Visitor<'de> { todo!() }
+    fn deserialize_u16<V>(self, _: V) -> Result<V::Value, Self::Error> where V: serde::de::Visitor<'de> { todo!() }
+    fn deserialize_u32<V>(self, _: V) -> Result<V::Value, Self::Error> where V: serde::de::Visitor<'de> { todo!() }
+    fn deserialize_u64<V>(self, _: V) -> Result<V::Value, Self::Error> where V: serde::de::Visitor<'de> { todo!() }
+    fn deserialize_f32<V>(self, _: V) -> Result<V::Value, Self::Error> where V: serde::de::Visitor<'de> { todo!() }
+    fn deserialize_f64<V>(self, _: V) -> Result<V::Value, Self::Error> where V: serde::de::Visitor<'de> { todo!() }
+    fn deserialize_char<V>(self, _: V) -> Result<V::Value, Self::Error> where V: serde::de::Visitor<'de> { todo!() }
+    fn deserialize_str<V>(self, _: V) -> Result<V::Value, Self::Error> where V: serde::de::Visitor<'de> { todo!() }
+    fn deserialize_string<V>(self, _: V) -> Result<V::Value, Self::Error> where V: serde::de::Visitor<'de> { todo!() }
+    fn deserialize_bytes<V>(self, _: V) -> Result<V::Value, Self::Error> where V: serde::de::Visitor<'de> { todo!() }
+    fn deserialize_byte_buf<V>(self, _: V) -> Result<V::Value, Self::Error> where V: serde::de::Visitor<'de> { todo!() }
+    fn deserialize_option<V>(self, _: V) -> Result<V::Value, Self::Error> where V: serde::de::Visitor<'de> { todo!() }
+    fn deserialize_unit<V>(self, _: V) -> Result<V::Value, Self::Error> where V: serde::de::Visitor<'de> { todo!() }
+    fn deserialize_unit_struct<V>(self, _: &'static str, _: V) -> Result<V::Value, Self::Error> where V: serde::de::Visitor<'de> { todo!() }
+    fn deserialize_newtype_struct<V>(self, _: &'static str, _: V) -> Result<V::Value, Self::Error> where V: serde::de::Visitor<'de> { todo!() }
+    fn deserialize_seq<V>(self, _: V) -> Result<V::Value, Self::Error> where V: serde::de::Visitor<'de> { todo!() }
+    fn deserialize_tuple<V>(self, _: usize, _: V) -> Result<V::Value, Self::Error> where V: serde::de::Visitor<'de> { todo!() }
+    fn deserialize_tuple_struct<V>(self, _: &'static str, _: usize, _: V) -> Result<V::Value, Self::Error> where V: serde::de::Visitor<'de> { todo!() }
+    fn deserialize_map<V>(self, _: V) -> Result<V::Value, Self::Error> where V: serde::de::Visitor<'de> { todo!() }
+    fn deserialize_struct<V>(
+        self,
+        _: &'static str,
+        fields: &'static [&'static str],
+        visitor: V,
+    ) -> Result<V::Value, Self::Error>
+    where
+        V: serde::de::Visitor<'de>,
+    {
+        todo!()
+    }
+    fn deserialize_enum<V>(self, _: &'static str, _: &'static [&'static str], _: V) -> Result<V::Value, Self::Error> where V: serde::de::Visitor<'de> { todo!() }
+    fn deserialize_identifier<V>(self, _: V) -> Result<V::Value, Self::Error> where V: serde::de::Visitor<'de> { todo!() }
+    fn deserialize_ignored_any<V>(self, _: V) -> Result<V::Value, Self::Error> where V: serde::de::Visitor<'de> { todo!() }
+}
+
+/// A Deserializer for collecting matches from [`crate::Predicate`]s.
+///
+/// What are we doing?
+/// 
+/// We certainly have regrets.
+pub struct PredicateCollector {
+}
diff --git a/src/vm.rs b/src/vm/mod.rs
index ac5c95d..92a99d7 100644
--- a/src/vm.rs
+++ b/src/vm/mod.rs
@@ -13,6 +13,9 @@ use crate::Predicate;
 
 mod de;
 
+pub use de::Unpacker;
+pub use de::Packer;
+
 /// Max depth for VM/serde recursion.
 pub(crate) const MAX_CALLS: usize = 250;
 
@@ -46,8 +49,28 @@ impl<O: Serialize> Default for PatternConstants<O> {
     }
 }
 
+impl<O: Serialize> std::fmt::Debug for PatternConstants<O> {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        f.debug_struct(
+            "PatternConstants"
+        ).field(
+            "protos", &self.protos,
+        ).field(
+            "strings", &self.strings,
+        ).field(
+            "regices", &self.regices,
+        ).field(
+            "predicates",
+            &format_args!("({} predicates)", self.predicates.len()),
+        ).field(
+            "defs",
+            &format_args!("FIXME"),
+        ).finish()
+    }
+}
+
 /// A pattern element.
-#[derive(Copy, Clone)]
+#[derive(Copy, Clone, Debug)]
 pub(crate) enum PatternElement {
     Arrow,
 
@@ -58,11 +81,79 @@ pub(crate) enum PatternElement {
     ParameterKey(usize, bool),
     KeySubtree(usize, bool),
     ValueSubtree(usize, bool),
-    ApplyPredicate(usize, bool),
 
-    End
+    /// Represents a predicate which must be applied.
+    ///
+    /// These are custom, arbitrary predicates, powered by serde. They're
+    /// represented by `:$foo` in a pattern.
+    ApplyPredicate(
+        /// The predicate index (in `PatternConstants.predicates`).
+        usize,
+        /// Whether to skip non-matching values, instead of erroring.
+        bool,
+    ),
+
+    /// Represents a type expectation.
+    ///
+    /// These are similar to predicates. They're represented by `:foo`, but are
+    /// built-in and provide functionality not supported by predicates.
+    ///
+    /// Specifically, predicates cannot ask serde for a map or a list directly.
+    /// Instead, they'd be required to parse a whole map/list/etc, which could
+    /// cause issues which datafu is designed to avoid. (Datafu is designed to
+    /// resist malicious input more so than arbitrary serde deserializers.)
+    Type(
+        /// The expected type.
+        Type,
+        /// Whether to skip non-matching values, instead of erroring.
+        bool,
+    ),
+
+    End,
+}
+
+/// The types datafu and serde currently support.
+///
+/// These are used as expectations for serde (e.g.
+/// `Deserializer::deserialize_string`).
+#[derive(Copy, Clone, Debug)]
+pub(crate) enum Type {
+    Bool,
+    I8,
+    I16,
+    I32,
+    I64,
+    I128,
+    U8,
+    U16,
+    U32,
+    U64,
+    U128,
+    F32,
+    F64,
+    Char,
+    Str,
+    String,
+    Bytes,
+    ByteBuf,
+    Option,
+    Unit,
+    Seq,
+    Tuple(usize),
+    Map,
+    // these aren't really supported:
+    // UnitStruct, UnitVariant, NewtypeStruct, NewtypeVariant, TupleStruct,
+    // TupleVariant, Struct, StructVariant
+    // instead we use type trees for that.
+    /// Adapter for Type Trees. See `crate::type_tree` for more details.
+    Of {
+        /// The type tree index (in `PatternConstants.type_trees`).
+        type_tree: usize,
+    },
 }
 
+pub struct Pack;
+
 //struct Frame<'a, 'b, T: PatternTypes> {
 //    //obj: RefOwn<'b, T::Ref, T::Own>,
 //    ops: &'a [PatternElement],