diff options
author | SoniEx2 <endermoneymod@gmail.com> | 2022-10-04 22:44:46 -0300 |
---|---|---|
committer | SoniEx2 <endermoneymod@gmail.com> | 2022-10-04 22:44:46 -0300 |
commit | f0e944696144016ca59aaed02381f7ea9d1ef848 (patch) | |
tree | d9c3232b920e3cd2358c3d91ee5ec5d26cec26a1 /src/vm/mod.rs | |
parent | 83d575f8a143ba031f1aa43995f6809470b8b15c (diff) |
Initial VM work
Diffstat (limited to 'src/vm/mod.rs')
-rw-r--r-- | src/vm/mod.rs | 272 |
1 files changed, 245 insertions, 27 deletions
diff --git a/src/vm/mod.rs b/src/vm/mod.rs index 92a99d7..5f1e86c 100644 --- a/src/vm/mod.rs +++ b/src/vm/mod.rs @@ -5,16 +5,23 @@ //! //! This is the stuff that actually matches the pattern. +use std::borrow::Cow; +use std::cell::Cell; +use std::collections::BTreeMap; +use std::marker::PhantomData; + +use indexmap::IndexMap; use regex::Regex; use serde::Serialize; +use these::These; use crate::Predicate; //use crate::errors::MatchError; mod de; -pub use de::Unpacker; -pub use de::Packer; +pub(crate) use de::Unpacker; +pub(crate) use de::Packer; /// Max depth for VM/serde recursion. pub(crate) const MAX_CALLS: usize = 250; @@ -24,16 +31,15 @@ pub(crate) const MAX_CALLS: usize = 250; // maybe we should use a builder for this? /// The constant pool for a pattern. pub(crate) struct PatternConstants<O: Serialize> { - // last proto is implicitly the whole pattern. + /// The protos ("functions") in a pattern. + /// + /// The last proto is implicitly the main function/entry point. pub(crate) protos: Vec<Vec<PatternElement>>, // Note that we can borrow these when creating the output map. // https://play.rust-lang.org/?version=stable&mode=debug&edition=2018&gist=da26f9175e96273fa0b94971a4e6172f pub(crate) strings: Vec<String>, pub(crate) regices: Vec<Regex>, pub(crate) predicates: Vec<Box<Predicate>>, - // NOTE these are part of the constant pool and so have lifetime analogous - // to 'a (consistently used to indicate constant pool lifetime) when used - // elsewhere. In particular, they can't be yielded by the iterator. pub(crate) defs: Vec<O>, } @@ -72,13 +78,102 @@ impl<O: Serialize> std::fmt::Debug for PatternConstants<O> { /// A pattern element. #[derive(Copy, Clone, Debug)] pub(crate) enum PatternElement { + /// A value is the entry point to a pattern. If present, it's the first + /// element. + Value { + /// The index of the (string) name to apply to this value and/or the + /// expected value of this entry. + name_and_value: These<usize, Value>, + }, + /// A tag is the core iterative element, and is repeated up to the desired + /// depth of iteration. + Tag { + /// The index of the (proto) key to match against. + key_subtree: Option<usize>, + /// The index of the (string) name to apply to this value and/or the + /// expected value of this entry. + name_and_value: These<usize, Value>, + }, + /// A value subtree is a subtree for values. + /// + /// It is applied *after* tags, and thus any value subtrees come last in + /// a pattern's elements. + ValueSubtree { + /// The proto index of the subtree. + index: usize, + /// Whether to allow this value subtree to produce no results. + /// + /// By default, a datafu pattern only matches a tree if every branch of + /// the tree produces results. This enables opting out of that. + optional: bool, + }, +} + +/// A value matcher. +#[derive(Copy, Clone, Debug)] +pub(crate) enum Value { + /// The value must match the specified string. + String { + /// The index of the string. + index: usize, + /// Whether to skip non-matching values, instead of erroring. + skippable: bool, + }, + /// The value must match the specified regex. + Regex { + /// The index of the regex. + index: usize, + /// Whether to skip non-matching values, instead of erroring. + skippable: bool, + }, + // /// The value must match the specified integer. + // Integer { + // /// The integer. + // value: usize, + // /// Whether to skip non-matching values, instead of erroring. + // skippable: bool, + // }, + // /// The value must match the specified integer range. + // Range { + // /// The range. + // value: Range<usize>, + // /// Whether to skip non-matching values, instead of erroring. + // skippable: bool, + // }, + // /// The value must match the specified predicate. + // Predicate { + // /// The index of the predicate. + // index: usize, + // /// Whether to skip non-matching values, instead of erroring. + // skippable: bool, + // }, + // /// The value must match the specified parameter. + // Paameter { + // /// The index of the parameter. + // index: usize, + // /// Whether to skip non-matching values, instead of erroring. + // skippable: bool, + // }, + /// The value must have the specified type. + Type { + /// The expected type. + ty: Type, + /// Whether to skip non-matching values, instead of erroring. + skippable: bool, + }, +} + +/// A pattern token. +#[derive(Copy, Clone, Debug)] +pub(crate) enum PatternToken { + /// Start of a tag. Arrow, Identifier(usize), - StringKey(usize, bool), - RegexKey(usize, bool), - ParameterKey(usize, bool), + String(usize, bool), + Regex(usize, bool), + Parameter(usize, bool), KeySubtree(usize, bool), ValueSubtree(usize, bool), @@ -109,6 +204,7 @@ pub(crate) enum PatternElement { bool, ), + /// End of a tag. End, } @@ -118,6 +214,8 @@ pub(crate) enum PatternElement { /// `Deserializer::deserialize_string`). #[derive(Copy, Clone, Debug)] pub(crate) enum Type { + Any, + IgnoredAny, Bool, I8, I16, @@ -139,29 +237,149 @@ pub(crate) enum Type { Option, Unit, Seq, - Tuple(usize), + // Tuple(usize), Map, - // these aren't really supported: - // UnitStruct, UnitVariant, NewtypeStruct, NewtypeVariant, TupleStruct, - // TupleVariant, Struct, StructVariant - // instead we use type trees for that. - /// Adapter for Type Trees. See `crate::type_tree` for more details. - Of { - /// The type tree index (in `PatternConstants.type_trees`). - type_tree: usize, + // // these aren't really supported: + // // UnitStruct, UnitVariant, NewtypeStruct, NewtypeVariant, TupleStruct, + // // TupleVariant, Struct, StructVariant + // // instead we use type trees for that. + // /// Adapter for Type Trees. See `crate::type_tree` for more details. + // Of { + // /// The type tree index (in `PatternConstants.type_trees`). + // type_tree: usize, + // }, +} + +/// The types which can be deserialized by serde. +/// +/// We guess this is basically the same thing as a serde_value? +#[derive(Clone, Debug, PartialEq)] +pub(crate) enum SerdeObject<'de> { + Bool(bool), + I8(i8), + I16(i16), + I32(i32), + I64(i64), + I128(i128), + U8(u8), + U16(u16), + U32(u32), + U64(u64), + U128(u128), + F32(f32), + F64(f64), + Char(char), + Str(Cow<'de, str>), + Bytes(Cow<'de, [u8]>), + Some(Box<SerdeObject<'de>>), + None, + Unit, + Seq(Vec<SerdeObject<'de>>), + // NOTE: support for multimaps! + Map(Vec<(SerdeObject<'de>, SerdeObject<'de>)>), + NewtypeStruct(Box<SerdeObject<'de>>), + // NOTE: currently unused! + #[allow(unused)] + Enum { + variant: Box<SerdeObject<'de>>, + data: Box<SerdeObject<'de>>, }, } -pub struct Pack; +impl<'de> SerdeObject<'de> { + fn check<E: serde::de::Error>(self, ty: Option<Type>) -> Result<Self, E> { + let ty = match ty { + None => return Ok(self), + Some(ty) => ty, + }; + match (ty, self) { + | (Type::Any, v) + | (Type::IgnoredAny, v) + => Ok(v), + | (Type::Bool, v @ SerdeObject::Bool(_)) + | (Type::I8, v @ SerdeObject::I8(_)) + | (Type::I16, v @ SerdeObject::I16(_)) + | (Type::I32, v @ SerdeObject::I32(_)) + | (Type::I64, v @ SerdeObject::I64(_)) + | (Type::I128, v @ SerdeObject::I128(_)) + | (Type::U8, v @ SerdeObject::U8(_)) + | (Type::U16, v @ SerdeObject::U16(_)) + | (Type::U32, v @ SerdeObject::U32(_)) + | (Type::U64, v @ SerdeObject::U64(_)) + | (Type::U128, v @ SerdeObject::U128(_)) + | (Type::F32, v @ SerdeObject::F32(_)) + | (Type::F64, v @ SerdeObject::F64(_)) + | (Type::Char, v @ SerdeObject::Char(_)) + | (Type::Str, v @ SerdeObject::Str(_)) + | (Type::String, v @ SerdeObject::Str(_)) + | (Type::Bytes, v @ SerdeObject::Bytes(_)) + | (Type::ByteBuf, v @ SerdeObject::Bytes(_)) + | (Type::Option, v @ SerdeObject::None) + | (Type::Option, v @ SerdeObject::Some(_)) + | (Type::Unit, v @ SerdeObject::Unit) + | (Type::Seq, v @ SerdeObject::Seq(_)) + | (Type::Map, v @ SerdeObject::Map(_)) + => Ok(v), + _ => todo!(), + } + } +} -//struct Frame<'a, 'b, T: PatternTypes> { -// //obj: RefOwn<'b, T::Ref, T::Own>, -// ops: &'a [PatternElement], -// iar: Option<usize>, -// depth: usize, -// path: Vec<Holder<'a, 'b, T>>, -// in_key: bool, -//} +impl<'de, E> serde::de::IntoDeserializer<'de, E> for SerdeObject<'de> +where + E: serde::de::Error, +{ + type Deserializer = self::de::SerdeObjectDeserializer<'de, E>; + + fn into_deserializer(self) -> Self::Deserializer { + Self::Deserializer { + obj: self, + value: None, + _e: PhantomData, + } + } +} + +#[derive(Clone, Debug, Default)] +pub struct Pack<'pat, 'de> { + subpacks: Vec<IndexMap<&'pat str, (Pack<'pat, 'de>, SerdeObject<'de>)>>, +} + +/// The Datafu interpreter, sorta. +pub(crate) struct Interpreter<'pat, 'state, O: Serialize> { + /// The pattern currently being processed. + pat: &'pat PatternConstants<O>, + /// The error override (if any). + error: &'state Cell<Option<crate::errors::MatchError>>, +} + +struct Frame<'pat, 'de> { + /// The instructions/function currently being processed. + ops: &'pat [PatternElement], + /// The instruction index being processed. + iar: Option<usize>, + /// Elements collected while processing this frame? + path: Vec<Pack<'pat, 'de>>, +} + +impl<'pat, 'state, O: Serialize> Interpreter<'pat, 'state, O> { + pub(crate) fn new( + pat: &'pat PatternConstants<O>, + error: &'state mut Option<crate::errors::MatchError>, + ) -> Self { + Self { + pat: pat, + error: Cell::from_mut(error), + //frames: vec![ + // Frame { + // ops: &pat.protos[0], + // iar: None, + // path: Vec::new(), + // } + //], + } + } +} // //impl<'a, 'b, T: PatternTypes> Frame<'a, 'b, T> { // /// Advances the instruction address register. |