summary refs log tree commit diff stats
path: root/src/vm/mod.rs
diff options
context:
space:
mode:
authorSoniEx2 <endermoneymod@gmail.com>2022-10-04 22:44:46 -0300
committerSoniEx2 <endermoneymod@gmail.com>2022-10-04 22:44:46 -0300
commitf0e944696144016ca59aaed02381f7ea9d1ef848 (patch)
treed9c3232b920e3cd2358c3d91ee5ec5d26cec26a1 /src/vm/mod.rs
parent83d575f8a143ba031f1aa43995f6809470b8b15c (diff)
Initial VM work
Diffstat (limited to 'src/vm/mod.rs')
-rw-r--r--src/vm/mod.rs272
1 files changed, 245 insertions, 27 deletions
diff --git a/src/vm/mod.rs b/src/vm/mod.rs
index 92a99d7..5f1e86c 100644
--- a/src/vm/mod.rs
+++ b/src/vm/mod.rs
@@ -5,16 +5,23 @@
 //!
 //! This is the stuff that actually matches the pattern.
 
+use std::borrow::Cow;
+use std::cell::Cell;
+use std::collections::BTreeMap;
+use std::marker::PhantomData;
+
+use indexmap::IndexMap;
 use regex::Regex;
 use serde::Serialize;
+use these::These;
 
 use crate::Predicate;
 //use crate::errors::MatchError;
 
 mod de;
 
-pub use de::Unpacker;
-pub use de::Packer;
+pub(crate) use de::Unpacker;
+pub(crate) use de::Packer;
 
 /// Max depth for VM/serde recursion.
 pub(crate) const MAX_CALLS: usize = 250;
@@ -24,16 +31,15 @@ pub(crate) const MAX_CALLS: usize = 250;
 // maybe we should use a builder for this?
 /// The constant pool for a pattern.
 pub(crate) struct PatternConstants<O: Serialize> {
-    // last proto is implicitly the whole pattern.
+    /// The protos ("functions") in a pattern.
+    ///
+    /// The last proto is implicitly the main function/entry point.
     pub(crate) protos: Vec<Vec<PatternElement>>,
     // Note that we can borrow these when creating the output map.
     // https://play.rust-lang.org/?version=stable&mode=debug&edition=2018&gist=da26f9175e96273fa0b94971a4e6172f
     pub(crate) strings: Vec<String>,
     pub(crate) regices: Vec<Regex>,
     pub(crate) predicates: Vec<Box<Predicate>>,
-    // NOTE these are part of the constant pool and so have lifetime analogous
-    // to 'a (consistently used to indicate constant pool lifetime) when used
-    // elsewhere. In particular, they can't be yielded by the iterator.
     pub(crate) defs: Vec<O>,
 }
 
@@ -72,13 +78,102 @@ impl<O: Serialize> std::fmt::Debug for PatternConstants<O> {
 /// A pattern element.
 #[derive(Copy, Clone, Debug)]
 pub(crate) enum PatternElement {
+    /// A value is the entry point to a pattern. If present, it's the first
+    /// element.
+    Value {
+        /// The index of the (string) name to apply to this value and/or the
+        /// expected value of this entry.
+        name_and_value: These<usize, Value>,
+    },
+    /// A tag is the core iterative element, and is repeated up to the desired
+    /// depth of iteration.
+    Tag {
+        /// The index of the (proto) key to match against.
+        key_subtree: Option<usize>,
+        /// The index of the (string) name to apply to this value and/or the
+        /// expected value of this entry.
+        name_and_value: These<usize, Value>,
+    },
+    /// A value subtree is a subtree for values.
+    ///
+    /// It is applied *after* tags, and thus any value subtrees come last in
+    /// a pattern's elements.
+    ValueSubtree {
+        /// The proto index of the subtree.
+        index: usize,
+        /// Whether to allow this value subtree to produce no results.
+        ///
+        /// By default, a datafu pattern only matches a tree if every branch of
+        /// the tree produces results. This enables opting out of that.
+        optional: bool,
+    },
+}
+
+/// A value matcher.
+#[derive(Copy, Clone, Debug)]
+pub(crate) enum Value {
+    /// The value must match the specified string.
+    String {
+        /// The index of the string.
+        index: usize,
+        /// Whether to skip non-matching values, instead of erroring.
+        skippable: bool,
+    },
+    /// The value must match the specified regex.
+    Regex {
+        /// The index of the regex.
+        index: usize,
+        /// Whether to skip non-matching values, instead of erroring.
+        skippable: bool,
+    },
+    // /// The value must match the specified integer.
+    // Integer {
+    //     /// The integer.
+    //     value: usize,
+    //     /// Whether to skip non-matching values, instead of erroring.
+    //     skippable: bool,
+    // },
+    // /// The value must match the specified integer range.
+    // Range {
+    //     /// The range.
+    //     value: Range<usize>,
+    //     /// Whether to skip non-matching values, instead of erroring.
+    //     skippable: bool,
+    // },
+    // /// The value must match the specified predicate.
+    // Predicate {
+    //     /// The index of the predicate.
+    //     index: usize,
+    //     /// Whether to skip non-matching values, instead of erroring.
+    //     skippable: bool,
+    // },
+    // /// The value must match the specified parameter.
+    // Paameter {
+    //     /// The index of the parameter.
+    //     index: usize,
+    //     /// Whether to skip non-matching values, instead of erroring.
+    //     skippable: bool,
+    // },
+    /// The value must have the specified type.
+    Type {
+        /// The expected type.
+        ty: Type,
+        /// Whether to skip non-matching values, instead of erroring.
+        skippable: bool,
+    },
+}
+
+/// A pattern token.
+#[derive(Copy, Clone, Debug)]
+pub(crate) enum PatternToken {
+    /// Start of a tag.
     Arrow,
 
     Identifier(usize),
 
-    StringKey(usize, bool),
-    RegexKey(usize, bool),
-    ParameterKey(usize, bool),
+    String(usize, bool),
+    Regex(usize, bool),
+    Parameter(usize, bool),
     KeySubtree(usize, bool),
     ValueSubtree(usize, bool),
 
@@ -109,6 +204,7 @@ pub(crate) enum PatternElement {
         bool,
     ),
 
+    /// End of a tag.
     End,
 }
 
@@ -118,6 +214,8 @@ pub(crate) enum PatternElement {
 /// `Deserializer::deserialize_string`).
 #[derive(Copy, Clone, Debug)]
 pub(crate) enum Type {
+    Any,
+    IgnoredAny,
     Bool,
     I8,
     I16,
@@ -139,29 +237,149 @@ pub(crate) enum Type {
     Option,
     Unit,
     Seq,
-    Tuple(usize),
+    // Tuple(usize),
     Map,
-    // these aren't really supported:
-    // UnitStruct, UnitVariant, NewtypeStruct, NewtypeVariant, TupleStruct,
-    // TupleVariant, Struct, StructVariant
-    // instead we use type trees for that.
-    /// Adapter for Type Trees. See `crate::type_tree` for more details.
-    Of {
-        /// The type tree index (in `PatternConstants.type_trees`).
-        type_tree: usize,
+    // // these aren't really supported:
+    // // UnitStruct, UnitVariant, NewtypeStruct, NewtypeVariant, TupleStruct,
+    // // TupleVariant, Struct, StructVariant
+    // // instead we use type trees for that.
+    // /// Adapter for Type Trees. See `crate::type_tree` for more details.
+    // Of {
+    //     /// The type tree index (in `PatternConstants.type_trees`).
+    //     type_tree: usize,
+    // },
+}
+
+/// The types which can be deserialized by serde.
+///
+/// We guess this is basically the same thing as a serde_value?
+#[derive(Clone, Debug, PartialEq)]
+pub(crate) enum SerdeObject<'de> {
+    Bool(bool),
+    I8(i8),
+    I16(i16),
+    I32(i32),
+    I64(i64),
+    I128(i128),
+    U8(u8),
+    U16(u16),
+    U32(u32),
+    U64(u64),
+    U128(u128),
+    F32(f32),
+    F64(f64),
+    Char(char),
+    Str(Cow<'de, str>),
+    Bytes(Cow<'de, [u8]>),
+    Some(Box<SerdeObject<'de>>),
+    None,
+    Unit,
+    Seq(Vec<SerdeObject<'de>>),
+    // NOTE: support for multimaps!
+    Map(Vec<(SerdeObject<'de>, SerdeObject<'de>)>),
+    NewtypeStruct(Box<SerdeObject<'de>>),
+    // NOTE: currently unused!
+    #[allow(unused)]
+    Enum {
+        variant: Box<SerdeObject<'de>>,
+        data: Box<SerdeObject<'de>>,
     },
 }
 
-pub struct Pack;
+impl<'de> SerdeObject<'de> {
+    fn check<E: serde::de::Error>(self, ty: Option<Type>) -> Result<Self, E> {
+        let ty = match ty {
+            None => return Ok(self),
+            Some(ty) => ty,
+        };
+        match (ty, self) {
+            | (Type::Any, v)
+            | (Type::IgnoredAny, v)
+            => Ok(v),
+            | (Type::Bool, v @ SerdeObject::Bool(_))
+            | (Type::I8, v @ SerdeObject::I8(_))
+            | (Type::I16, v @ SerdeObject::I16(_))
+            | (Type::I32, v @ SerdeObject::I32(_))
+            | (Type::I64, v @ SerdeObject::I64(_))
+            | (Type::I128, v @ SerdeObject::I128(_))
+            | (Type::U8, v @ SerdeObject::U8(_))
+            | (Type::U16, v @ SerdeObject::U16(_))
+            | (Type::U32, v @ SerdeObject::U32(_))
+            | (Type::U64, v @ SerdeObject::U64(_))
+            | (Type::U128, v @ SerdeObject::U128(_))
+            | (Type::F32, v @ SerdeObject::F32(_))
+            | (Type::F64, v @ SerdeObject::F64(_))
+            | (Type::Char, v @ SerdeObject::Char(_))
+            | (Type::Str, v @ SerdeObject::Str(_))
+            | (Type::String, v @ SerdeObject::Str(_))
+            | (Type::Bytes, v @ SerdeObject::Bytes(_))
+            | (Type::ByteBuf, v @ SerdeObject::Bytes(_))
+            | (Type::Option, v @ SerdeObject::None)
+            | (Type::Option, v @ SerdeObject::Some(_))
+            | (Type::Unit, v @ SerdeObject::Unit)
+            | (Type::Seq, v @ SerdeObject::Seq(_))
+            | (Type::Map, v @ SerdeObject::Map(_))
+            => Ok(v),
+            _ => todo!(),
+        }
+    }
+}
 
-//struct Frame<'a, 'b, T: PatternTypes> {
-//    //obj: RefOwn<'b, T::Ref, T::Own>,
-//    ops: &'a [PatternElement],
-//    iar: Option<usize>,
-//    depth: usize,
-//    path: Vec<Holder<'a, 'b, T>>,
-//    in_key: bool,
-//}
+impl<'de, E> serde::de::IntoDeserializer<'de, E> for SerdeObject<'de>
+where
+    E: serde::de::Error,
+{
+    type Deserializer = self::de::SerdeObjectDeserializer<'de, E>;
+
+    fn into_deserializer(self) -> Self::Deserializer {
+        Self::Deserializer {
+            obj: self,
+            value: None,
+            _e: PhantomData,
+        }
+    }
+}
+
+#[derive(Clone, Debug, Default)]
+pub struct Pack<'pat, 'de> {
+    subpacks: Vec<IndexMap<&'pat str, (Pack<'pat, 'de>, SerdeObject<'de>)>>,
+}
+
+/// The Datafu interpreter, sorta.
+pub(crate) struct Interpreter<'pat, 'state, O: Serialize> {
+    /// The pattern currently being processed.
+    pat: &'pat PatternConstants<O>,
+    /// The error override (if any).
+    error: &'state Cell<Option<crate::errors::MatchError>>,
+}
+
+struct Frame<'pat, 'de> {
+    /// The instructions/function currently being processed.
+    ops: &'pat [PatternElement],
+    /// The instruction index being processed.
+    iar: Option<usize>,
+    /// Elements collected while processing this frame?
+    path: Vec<Pack<'pat, 'de>>,
+}
+
+impl<'pat, 'state, O: Serialize> Interpreter<'pat, 'state, O> {
+    pub(crate) fn new(
+        pat: &'pat PatternConstants<O>,
+        error: &'state mut Option<crate::errors::MatchError>,
+    ) -> Self {
+        Self {
+            pat: pat,
+            error: Cell::from_mut(error),
+            //frames: vec![
+            //    Frame {
+            //        ops: &pat.protos[0],
+            //        iar: None,
+            //        path: Vec::new(),
+            //    }
+            //],
+        }
+    }
+}
 //
 //impl<'a, 'b, T: PatternTypes> Frame<'a, 'b, T> {
 //    /// Advances the instruction address register.