From f0e944696144016ca59aaed02381f7ea9d1ef848 Mon Sep 17 00:00:00 2001 From: SoniEx2 Date: Tue, 4 Oct 2022 22:44:46 -0300 Subject: Initial VM work --- src/vm/mod.rs | 272 ++++++++++++++++++++++++++++++++++++++++++++++++++++------ 1 file changed, 245 insertions(+), 27 deletions(-) (limited to 'src/vm/mod.rs') diff --git a/src/vm/mod.rs b/src/vm/mod.rs index 92a99d7..5f1e86c 100644 --- a/src/vm/mod.rs +++ b/src/vm/mod.rs @@ -5,16 +5,23 @@ //! //! This is the stuff that actually matches the pattern. +use std::borrow::Cow; +use std::cell::Cell; +use std::collections::BTreeMap; +use std::marker::PhantomData; + +use indexmap::IndexMap; use regex::Regex; use serde::Serialize; +use these::These; use crate::Predicate; //use crate::errors::MatchError; mod de; -pub use de::Unpacker; -pub use de::Packer; +pub(crate) use de::Unpacker; +pub(crate) use de::Packer; /// Max depth for VM/serde recursion. pub(crate) const MAX_CALLS: usize = 250; @@ -24,16 +31,15 @@ pub(crate) const MAX_CALLS: usize = 250; // maybe we should use a builder for this? /// The constant pool for a pattern. pub(crate) struct PatternConstants { - // last proto is implicitly the whole pattern. + /// The protos ("functions") in a pattern. + /// + /// The last proto is implicitly the main function/entry point. pub(crate) protos: Vec>, // Note that we can borrow these when creating the output map. // https://play.rust-lang.org/?version=stable&mode=debug&edition=2018&gist=da26f9175e96273fa0b94971a4e6172f pub(crate) strings: Vec, pub(crate) regices: Vec, pub(crate) predicates: Vec>, - // NOTE these are part of the constant pool and so have lifetime analogous - // to 'a (consistently used to indicate constant pool lifetime) when used - // elsewhere. In particular, they can't be yielded by the iterator. pub(crate) defs: Vec, } @@ -72,13 +78,102 @@ impl std::fmt::Debug for PatternConstants { /// A pattern element. #[derive(Copy, Clone, Debug)] pub(crate) enum PatternElement { + /// A value is the entry point to a pattern. If present, it's the first + /// element. + Value { + /// The index of the (string) name to apply to this value and/or the + /// expected value of this entry. + name_and_value: These, + }, + /// A tag is the core iterative element, and is repeated up to the desired + /// depth of iteration. + Tag { + /// The index of the (proto) key to match against. + key_subtree: Option, + /// The index of the (string) name to apply to this value and/or the + /// expected value of this entry. + name_and_value: These, + }, + /// A value subtree is a subtree for values. + /// + /// It is applied *after* tags, and thus any value subtrees come last in + /// a pattern's elements. + ValueSubtree { + /// The proto index of the subtree. + index: usize, + /// Whether to allow this value subtree to produce no results. + /// + /// By default, a datafu pattern only matches a tree if every branch of + /// the tree produces results. This enables opting out of that. + optional: bool, + }, +} + +/// A value matcher. +#[derive(Copy, Clone, Debug)] +pub(crate) enum Value { + /// The value must match the specified string. + String { + /// The index of the string. + index: usize, + /// Whether to skip non-matching values, instead of erroring. + skippable: bool, + }, + /// The value must match the specified regex. + Regex { + /// The index of the regex. + index: usize, + /// Whether to skip non-matching values, instead of erroring. + skippable: bool, + }, + // /// The value must match the specified integer. + // Integer { + // /// The integer. + // value: usize, + // /// Whether to skip non-matching values, instead of erroring. + // skippable: bool, + // }, + // /// The value must match the specified integer range. + // Range { + // /// The range. + // value: Range, + // /// Whether to skip non-matching values, instead of erroring. + // skippable: bool, + // }, + // /// The value must match the specified predicate. + // Predicate { + // /// The index of the predicate. + // index: usize, + // /// Whether to skip non-matching values, instead of erroring. + // skippable: bool, + // }, + // /// The value must match the specified parameter. + // Paameter { + // /// The index of the parameter. + // index: usize, + // /// Whether to skip non-matching values, instead of erroring. + // skippable: bool, + // }, + /// The value must have the specified type. + Type { + /// The expected type. + ty: Type, + /// Whether to skip non-matching values, instead of erroring. + skippable: bool, + }, +} + +/// A pattern token. +#[derive(Copy, Clone, Debug)] +pub(crate) enum PatternToken { + /// Start of a tag. Arrow, Identifier(usize), - StringKey(usize, bool), - RegexKey(usize, bool), - ParameterKey(usize, bool), + String(usize, bool), + Regex(usize, bool), + Parameter(usize, bool), KeySubtree(usize, bool), ValueSubtree(usize, bool), @@ -109,6 +204,7 @@ pub(crate) enum PatternElement { bool, ), + /// End of a tag. End, } @@ -118,6 +214,8 @@ pub(crate) enum PatternElement { /// `Deserializer::deserialize_string`). #[derive(Copy, Clone, Debug)] pub(crate) enum Type { + Any, + IgnoredAny, Bool, I8, I16, @@ -139,29 +237,149 @@ pub(crate) enum Type { Option, Unit, Seq, - Tuple(usize), + // Tuple(usize), Map, - // these aren't really supported: - // UnitStruct, UnitVariant, NewtypeStruct, NewtypeVariant, TupleStruct, - // TupleVariant, Struct, StructVariant - // instead we use type trees for that. - /// Adapter for Type Trees. See `crate::type_tree` for more details. - Of { - /// The type tree index (in `PatternConstants.type_trees`). - type_tree: usize, + // // these aren't really supported: + // // UnitStruct, UnitVariant, NewtypeStruct, NewtypeVariant, TupleStruct, + // // TupleVariant, Struct, StructVariant + // // instead we use type trees for that. + // /// Adapter for Type Trees. See `crate::type_tree` for more details. + // Of { + // /// The type tree index (in `PatternConstants.type_trees`). + // type_tree: usize, + // }, +} + +/// The types which can be deserialized by serde. +/// +/// We guess this is basically the same thing as a serde_value? +#[derive(Clone, Debug, PartialEq)] +pub(crate) enum SerdeObject<'de> { + Bool(bool), + I8(i8), + I16(i16), + I32(i32), + I64(i64), + I128(i128), + U8(u8), + U16(u16), + U32(u32), + U64(u64), + U128(u128), + F32(f32), + F64(f64), + Char(char), + Str(Cow<'de, str>), + Bytes(Cow<'de, [u8]>), + Some(Box>), + None, + Unit, + Seq(Vec>), + // NOTE: support for multimaps! + Map(Vec<(SerdeObject<'de>, SerdeObject<'de>)>), + NewtypeStruct(Box>), + // NOTE: currently unused! + #[allow(unused)] + Enum { + variant: Box>, + data: Box>, }, } -pub struct Pack; +impl<'de> SerdeObject<'de> { + fn check(self, ty: Option) -> Result { + let ty = match ty { + None => return Ok(self), + Some(ty) => ty, + }; + match (ty, self) { + | (Type::Any, v) + | (Type::IgnoredAny, v) + => Ok(v), + | (Type::Bool, v @ SerdeObject::Bool(_)) + | (Type::I8, v @ SerdeObject::I8(_)) + | (Type::I16, v @ SerdeObject::I16(_)) + | (Type::I32, v @ SerdeObject::I32(_)) + | (Type::I64, v @ SerdeObject::I64(_)) + | (Type::I128, v @ SerdeObject::I128(_)) + | (Type::U8, v @ SerdeObject::U8(_)) + | (Type::U16, v @ SerdeObject::U16(_)) + | (Type::U32, v @ SerdeObject::U32(_)) + | (Type::U64, v @ SerdeObject::U64(_)) + | (Type::U128, v @ SerdeObject::U128(_)) + | (Type::F32, v @ SerdeObject::F32(_)) + | (Type::F64, v @ SerdeObject::F64(_)) + | (Type::Char, v @ SerdeObject::Char(_)) + | (Type::Str, v @ SerdeObject::Str(_)) + | (Type::String, v @ SerdeObject::Str(_)) + | (Type::Bytes, v @ SerdeObject::Bytes(_)) + | (Type::ByteBuf, v @ SerdeObject::Bytes(_)) + | (Type::Option, v @ SerdeObject::None) + | (Type::Option, v @ SerdeObject::Some(_)) + | (Type::Unit, v @ SerdeObject::Unit) + | (Type::Seq, v @ SerdeObject::Seq(_)) + | (Type::Map, v @ SerdeObject::Map(_)) + => Ok(v), + _ => todo!(), + } + } +} -//struct Frame<'a, 'b, T: PatternTypes> { -// //obj: RefOwn<'b, T::Ref, T::Own>, -// ops: &'a [PatternElement], -// iar: Option, -// depth: usize, -// path: Vec>, -// in_key: bool, -//} +impl<'de, E> serde::de::IntoDeserializer<'de, E> for SerdeObject<'de> +where + E: serde::de::Error, +{ + type Deserializer = self::de::SerdeObjectDeserializer<'de, E>; + + fn into_deserializer(self) -> Self::Deserializer { + Self::Deserializer { + obj: self, + value: None, + _e: PhantomData, + } + } +} + +#[derive(Clone, Debug, Default)] +pub struct Pack<'pat, 'de> { + subpacks: Vec, SerdeObject<'de>)>>, +} + +/// The Datafu interpreter, sorta. +pub(crate) struct Interpreter<'pat, 'state, O: Serialize> { + /// The pattern currently being processed. + pat: &'pat PatternConstants, + /// The error override (if any). + error: &'state Cell>, +} + +struct Frame<'pat, 'de> { + /// The instructions/function currently being processed. + ops: &'pat [PatternElement], + /// The instruction index being processed. + iar: Option, + /// Elements collected while processing this frame? + path: Vec>, +} + +impl<'pat, 'state, O: Serialize> Interpreter<'pat, 'state, O> { + pub(crate) fn new( + pat: &'pat PatternConstants, + error: &'state mut Option, + ) -> Self { + Self { + pat: pat, + error: Cell::from_mut(error), + //frames: vec![ + // Frame { + // ops: &pat.protos[0], + // iar: None, + // path: Vec::new(), + // } + //], + } + } +} // //impl<'a, 'b, T: PatternTypes> Frame<'a, 'b, T> { // /// Advances the instruction address register. -- cgit 1.4.1