diff options
author | SoniEx2 <endermoneymod@gmail.com> | 2022-10-04 22:44:46 -0300 |
---|---|---|
committer | SoniEx2 <endermoneymod@gmail.com> | 2022-10-04 22:44:46 -0300 |
commit | f0e944696144016ca59aaed02381f7ea9d1ef848 (patch) | |
tree | d9c3232b920e3cd2358c3d91ee5ec5d26cec26a1 | |
parent | 83d575f8a143ba031f1aa43995f6809470b8b15c (diff) |
Initial VM work
-rw-r--r-- | Cargo.toml | 9 | ||||
-rw-r--r-- | src/errors.rs | 13 | ||||
-rw-r--r-- | src/lib.rs | 15 | ||||
-rw-r--r-- | src/parser.rs | 223 | ||||
-rw-r--r-- | src/pattern.rs | 66 | ||||
-rw-r--r-- | src/type_tree.rs | 46 | ||||
-rw-r--r-- | src/vm/de.rs | 509 | ||||
-rw-r--r-- | src/vm/mod.rs | 272 | ||||
-rw-r--r-- | tests/basic_match.rs | 5 | ||||
-rw-r--r-- | tests/parser_prop.rs | 2 |
10 files changed, 968 insertions, 192 deletions
diff --git a/Cargo.toml b/Cargo.toml index a9e6de1..e72ee6c 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -12,11 +12,14 @@ homepage = "https://soniex2.github.io/ganarchy/project/c0b4a8a326a320ac33c5d9d6b # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html [dependencies] -regex = "1" +erased-serde = "0.3.21" impl_trait = "0.1.7" -serde_transmute = "0.1.4" +indexmap = "1.9.1" +regex = "1" serde = "1.0.140" -erased-serde = "0.3.21" +serde_transmute = "0.1.4" +smallvec = "1.10.0" +these = "2.0.0" [dev-dependencies] proptest = "1.0.0" diff --git a/src/errors.rs b/src/errors.rs index 9b0025e..877e5f5 100644 --- a/src/errors.rs +++ b/src/errors.rs @@ -12,6 +12,7 @@ /// "String" here refers to a string literal in the pattern, not the input /// string. The input string is referred to as "the pattern". #[derive(Debug)] +#[non_exhaustive] pub enum PatternError<'a> { // Syntax Errors: @@ -34,15 +35,21 @@ pub enum PatternError<'a> { UnknownPredicate(usize, &'a str), /// The pattern contains an invalid regex. Regex(usize, &'a str, ::regex::Error), + + // Extra Errors: + + /// The pattern contains a token which has not yet been implemented. + Unimplemented(usize, &'a str), } // /// These are errors that may be returned by the matcher when matching a // /// pattern. // #[derive(Clone, Debug)] +#[non_exhaustive] pub enum MatchError { -// /// Returned if the pattern nests too deeply. -// StackOverflow, -// /// Returned if the pattern rejects the input. + /// Returned if the pattern nests too deeply. + StackOverflow, + /// Returned if the pattern rejects the input. ValidationError, // /// Returned if the pattern attempts an unsupported operation. // /// diff --git a/src/lib.rs b/src/lib.rs index f71d81c..bda97f4 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -84,13 +84,13 @@ //! Datafu Expressions follow the given syntax, in (pseudo-)extended BNF: //! //! ```text -//! expression ::= {arrow tag} {subvalue} -//! tag ::= identifier [arg] {predicate} | arg {predicate} +//! expression ::= [type] [predicate] {arrow tag} {subvalue} +//! tag ::= identifier [arg] [predicate] | arg [predicate] //! arg ::= parameter | literal | regex | keymatch //! //! arrow ::= '->' -//! keymatch ::= '[' {tag} {predicate} expression ']' -//! subvalue ::= '(' {predicate} expression ')' ['?'] +//! keymatch ::= '[' [name] expression ']' +//! subvalue ::= '(' expression ')' ['?'] //! ``` //! //! For a description of the terminals "parameter", "literal", "regex" and @@ -101,15 +101,16 @@ //! <!-- TODO --> pub mod errors; -pub mod type_tree; +//pub mod type_tree; mod parser; mod pattern; mod vm; pub use pattern::Pattern; +pub use pattern::PatternBuilder; /// A predicate. -pub type Predicate = dyn (Fn( +type Predicate = dyn (Fn( &mut dyn erased_serde::Deserializer<'_> ) -> bool) + Send + Sync; @@ -133,7 +134,7 @@ pub type Predicate = dyn (Fn( /// /// let x = datafu::pred(|v| String::deserialize(v).is_ok()); /// ``` -pub fn pred<F>(f: F) -> Box<Predicate> +fn pred<F>(f: F) -> Box<Predicate> where F: (Fn( &mut dyn erased_serde::Deserializer<'_> diff --git a/src/parser.rs b/src/parser.rs index c929653..97185ac 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -15,6 +15,8 @@ use crate::Predicate; use crate::errors::PatternError; use crate::vm::PatternConstants; use crate::vm::PatternElement; +use crate::vm::PatternToken; +use crate::vm::Type; /// try! with bools. (the b comes from bool.) macro_rules! bry { @@ -93,7 +95,7 @@ impl_trait! { O: Serialize, { fn start(value: &'r mut Parser<'s, PKey, OKey, O>) -> Self { - value.consts.protos.push(Default::default()); + value.tokens.push(Default::default()); Self { root: value, } @@ -101,9 +103,9 @@ impl_trait! { fn commit(self) -> usize { let mut self_ = ManuallyDrop::new(self); - let proto = self_.root.consts.protos.pop().unwrap(); + let proto = self_.root.tokens.pop().unwrap(); let id = self_.root.closed_subtrees.next().unwrap(); - self_.root.consts.protos.insert(id, proto); + self_.root.tokens.insert(id, proto); id } @@ -124,7 +126,7 @@ impl_trait! { impl trait Drop { fn drop(&mut self) { // remove "partial" proto - self.root.consts.protos.pop().expect("SubtreeHelper"); + self.root.tokens.pop().expect("SubtreeHelper"); } } } @@ -154,7 +156,7 @@ impl_trait! { O: Serialize, { fn start(value: &'r mut Parser<'s, PKey, OKey, O>) -> Self { - let len = value.consts.protos.last().unwrap().len(); + let len = value.tokens.last().unwrap().len(); Self { root: value, len : len, @@ -162,7 +164,7 @@ impl_trait! { } fn commit(self) { - std::mem::forget(self) + let _self = std::mem::ManuallyDrop::new(self); } impl trait std::ops::Deref { @@ -181,7 +183,7 @@ impl_trait! { impl trait Drop { fn drop(&mut self) { - let proto = self.root.consts.protos.last_mut().unwrap(); + let proto = self.root.tokens.last_mut().unwrap(); assert!(proto.len() >= self.len); while proto.len() > self.len { let _ = proto.pop(); @@ -203,6 +205,7 @@ where pred_ids: BTreeMap<PKey, usize>, obj_ids: BTreeMap<OKey, usize>, consts: PatternConstants<O>, + tokens: Vec<Vec<PatternToken>>, closed_subtrees: std::ops::RangeFrom<usize>, } @@ -215,6 +218,24 @@ where OKey: Borrow<str> + Ord, O: Serialize, { + /// Creates a new `Parser`. + fn new( + base: &'s str, + preds: Option<BTreeMap<PKey, Box<Predicate>>>, + objs: Option<BTreeMap<OKey, O>>, + ) -> Self { + Self { + base: base, + preds: preds, + objs: objs, + tokens: Default::default(), + pred_ids: Default::default(), + obj_ids: Default::default(), + consts: Default::default(), + closed_subtrees: (0..), + } + } + /// str_literal <- sp ( ( "'" str_char* ( "'" / ( !. -> ErrorStrEnd ) ) ( '?' -> MarkSkippable ) ) -> String ) sp /// str_char <- ( str_escape / [^%'] ) /// str_escape <- '%' ( '%' / "'" ) / ( ( '%' .? ) -> ErrorStrEscape ) @@ -260,8 +281,8 @@ where self_.consts.strings.push(string); self_.consts.strings.len() - 1 }); - let proto = self.consts.protos.last_mut().expect("protos"); - proto.push(PatternElement::StringKey(id, skippable)); + let proto = self.tokens.last_mut().expect("protos"); + proto.push(PatternToken::String(id, skippable)); *s = cursor; true })) @@ -318,14 +339,14 @@ where self_.consts.regices.push(re); self_.consts.regices.len() - 1 }); - let proto = self.consts.protos.last_mut().expect("protos"); - proto.push(PatternElement::RegexKey(id, skippable)); + let proto = self.tokens.last_mut().expect("protos"); + proto.push(PatternToken::Regex(id, skippable)); *s = cursor; true })) } - /// matcher <- sp ( parameter / str_literal / re_literal / key_subtree ) sp + /// matcher <- sp ( parameter / str_literal / re_literal / predicate / ty ) sp fn matcher(&mut self, s: &mut &'s str) -> Result<bool, PatternError<'s>> { let mut cursor = *s; Ok(lblock!('matches: { @@ -334,7 +355,9 @@ where _ if self.parameter(&mut cursor)? => {}, _ if self.str_literal(&mut cursor)? => {}, _ if self.re_literal(&mut cursor)? => {}, - _ if self.key_subtree(&mut cursor)? => {}, + _ if self.predicate(&mut cursor)? => {}, + _ if self.ty(&mut cursor)? => {}, + //_ if self.key_subtree(&mut cursor)? => {}, _ => bry!('matches false), } self.sp(&mut cursor); @@ -343,7 +366,7 @@ where })) } - /// tag <- sp ( '->' -> Arrow ) sp ( matcher / name sp matcher? ) sp predicate* ( '' -> End ) sp + /// tag <- sp ( '->' -> Arrow ) sp key_subtree? sp ( matcher / name sp matcher? ) ( '' -> End ) sp fn tag(&mut self, s: &mut &'s str) -> Result<bool, PatternError<'s>> { let mut cursor = *s; Ok(lblock!('matches: { @@ -351,22 +374,21 @@ where bry!('matches strip_prefix(&mut cursor, "->")); let mut self_ = TagHelper::start(&mut *self); { - let proto = self_.consts.protos.last_mut().expect("protos"); - proto.push(PatternElement::Arrow); + let proto = self_.tokens.last_mut().expect("protos"); + proto.push(PatternToken::Arrow); } self_.sp(&mut cursor); + let _ = self_.key_subtree(&mut cursor)?; + self_.sp(&mut cursor); if !self_.matcher(&mut cursor)? { bry!('matches self_.name(&mut cursor)?); self_.sp(&mut cursor); - // NOTE: *optional* let _ = self_.matcher(&mut cursor)?; } self_.sp(&mut cursor); - while self_.predicate(&mut cursor)? { - } { - let proto = self_.consts.protos.last_mut().expect("protos"); - proto.push(PatternElement::End); + let proto = self_.tokens.last_mut().expect("protos"); + proto.push(PatternToken::End); } self_.commit(); *s = cursor; @@ -409,8 +431,8 @@ where self.consts.strings.push(name.into()); self.consts.strings.len() - 1 }); - let proto = self.consts.protos.last_mut().expect("protos"); - proto.push(PatternElement::Identifier(id)); + let proto = self.tokens.last_mut().expect("protos"); + proto.push(PatternToken::Identifier(id)); self.sp(&mut cursor); *s = cursor; true @@ -443,22 +465,69 @@ where }, Ok, )?; - let proto = self.consts.protos.last_mut().expect("protos"); - proto.push(PatternElement::ParameterKey(id, skippable)); + let proto = self.tokens.last_mut().expect("protos"); + proto.push(PatternToken::Parameter(id, skippable)); self.sp(&mut cursor); *s = cursor; true })) } - /// predicate <- sp ':' ( '?'? -> MarkSkippable ) '$' ( identifier -> Predicate ) sp + /// ty <- sp ':' ( '?'? -> MarkSkippable ) ( identifier -> Type ) sp + fn ty(&mut self, s: &mut &'s str) -> Result<bool, PatternError<'s>> { + let mut cursor = *s; + Ok(lblock!('matches: { + self.sp(&mut cursor); + bry!('matches strip_prefix(&mut cursor, ":")); + let custom = strip_prefix(&mut cursor, "$"); + let skippable = strip_prefix(&mut cursor, "?"); + let start = cursor; + bry!('matches self.identifier(&mut cursor)?); + let name = &start[..pos_of(start, cursor).unwrap_or(start.len())]; + let proto = self.tokens.last_mut().expect("protos"); + proto.push(PatternToken::Type(match name { + "bool" => Type::Bool, + "i8" => Type::I8, + "i16" => Type::I16, + "i32" => Type::I32, + "i64" => Type::I64, + "i128" => Type::I128, + "u8" => Type::U8, + "u16" => Type::U16, + "u32" => Type::U32, + "u64" => Type::U64, + "u128" => Type::U128, + "f32" => Type::F32, + "f64" => Type::F64, + "char" => Type::Char, + "str" => Type::Str, + "string" => Type::String, + "bytes" => Type::Bytes, + "bytebuf" => Type::ByteBuf, + "option" => Type::Option, + "unit" => Type::Unit, + "seq" => Type::Seq, + "map" => Type::Map, + //"tuple" => Type::Tuple(usize), + _ => { + let pos = pos_of(self.base, start).unwrap(); + return Err(PatternError::UnknownPredicate(pos, name)) + } + }, skippable)); + self.sp(&mut cursor); + *s = cursor; + true + })) + } + + /// predicate <- sp ':' '$' ( '?'? -> MarkSkippable ) ( identifier -> Predicate ) sp fn predicate(&mut self, s: &mut &'s str) -> Result<bool, PatternError<'s>> { let mut cursor = *s; Ok(lblock!('matches: { self.sp(&mut cursor); bry!('matches strip_prefix(&mut cursor, ":")); + let custom = strip_prefix(&mut cursor, "$"); let skippable = strip_prefix(&mut cursor, "?"); - bry!('matches strip_prefix(&mut cursor, "$")); let start = cursor; bry!('matches self.identifier(&mut cursor)?); let name = &start[..pos_of(start, cursor).unwrap_or(start.len())]; @@ -478,15 +547,17 @@ where }, Ok, )?; - let proto = self.consts.protos.last_mut().expect("protos"); - proto.push(PatternElement::ApplyPredicate(id, skippable)); + let proto = self.tokens.last_mut().expect("protos"); + proto.push(PatternToken::ApplyPredicate(id, skippable)); self.sp(&mut cursor); *s = cursor; - true + let pos = pos_of(self.base, start).unwrap(); + return Err(PatternError::Unimplemented(pos, cursor)); + //true })) } - /// key_subtree <- sp '[' sp name? sp predicate* sp subtree sp ( ']' / unexpected_token / unexpected_end ) sp ( '?'? -> MarkSkippable ) + /// key_subtree <- sp '[' sp ( matcher / name sp matcher? ) sp subtree sp ( ']' / unexpected_token / unexpected_end ) sp ( '?'? -> MarkSkippable ) fn key_subtree(&mut self, s: &mut &'s str) -> Result<bool, PatternError<'s>> { let mut cursor = *s; Ok(lblock!('matches: { @@ -494,9 +565,10 @@ where bry!('matches strip_prefix(&mut cursor, "[")); self.sp(&mut cursor); let mut subtree = SubtreeHelper::start(&mut *self); - subtree.name(&mut cursor)?; - subtree.sp(&mut cursor); - while subtree.predicate(&mut cursor)? { + if !subtree.matcher(&mut cursor)? { + bry!('matches subtree.name(&mut cursor)?); + subtree.sp(&mut cursor); + let _ = subtree.matcher(&mut cursor)?; } subtree.sp(&mut cursor); bry!('matches subtree.subtree(&mut cursor)?); @@ -512,13 +584,13 @@ where let skippable = strip_prefix(&mut cursor, "?"); *s = cursor; let id = subtree.commit(); - let proto = self.consts.protos.last_mut().expect("protos"); - proto.push(PatternElement::KeySubtree(id, skippable)); + let proto = self.tokens.last_mut().expect("protos"); + proto.push(PatternToken::KeySubtree(id, skippable)); true })) } - /// value_subtree <- sp '(' sp predicate* sp subtree sp ( ')' / unexpected_token / unexpected_end ) sp ( '?'? -> MarkSkippable ) + /// value_subtree <- sp '(' sp subtree sp ( ')' / unexpected_token / unexpected_end ) sp ( '?'? -> MarkSkippable ) fn value_subtree(&mut self, s: &mut &'s str) -> Result<bool, PatternError<'s>> { let mut cursor = *s; Ok(lblock!('matches: { @@ -526,9 +598,6 @@ where bry!('matches strip_prefix(&mut cursor, "(")); self.sp(&mut cursor); let mut subtree = SubtreeHelper::start(&mut *self); - while subtree.predicate(&mut cursor)? { - } - subtree.sp(&mut cursor); bry!('matches subtree.subtree(&mut cursor)?); subtree.sp(&mut cursor); bry!('matches @@ -542,8 +611,8 @@ where let skippable = strip_prefix(&mut cursor, "?"); *s = cursor; let id = subtree.commit(); - let proto = self.consts.protos.last_mut().expect("protos"); - proto.push(PatternElement::ValueSubtree(id, skippable)); + let proto = self.tokens.last_mut().expect("protos"); + proto.push(PatternToken::ValueSubtree(id, skippable)); true })) } @@ -572,8 +641,8 @@ where } self.sp(&mut cursor); while self.value_subtree(&mut cursor)? { - let proto = self.consts.protos.last_mut().expect("protos"); - proto.push(PatternElement::End); + let proto = self.tokens.last_mut().expect("protos"); + proto.push(PatternToken::End); } self.sp(&mut cursor); *s = cursor; @@ -581,11 +650,12 @@ where Ok(true) } - /// pattern <- ( subtree / unexpected_token ) ( !. / unexpected_token ) + /// pattern <- ( matcher? sp subtree / unexpected_token ) ( !. / unexpected_token ) fn pattern(&mut self, s: &mut &'s str) -> Result<bool, PatternError<'s>> { let mut cursor = *s; Ok(lblock!('matches: { let mut subtree = SubtreeHelper::start(&mut *self); + let _ = subtree.matcher(&mut cursor)?; bry!('matches subtree.subtree(&mut cursor)? || @@ -616,22 +686,14 @@ where OKey: Borrow<str> + Ord, O: Serialize, { - let mut parser = Parser::<'s, PKey, OKey, O> { - base: input, - preds: preds, - objs: objs, - pred_ids: Default::default(), - obj_ids: Default::default(), - consts: Default::default(), - closed_subtrees: (0..), - }; + let mut parser = Parser::new(input, preds, objs); let mut parsed = input; let matched = parser.pattern(&mut parsed)?; assert!(matched); assert_eq!(parsed, ""); - assert_eq!(parser.closed_subtrees.next().unwrap(), parser.consts.protos.len()); + assert_eq!(parser.closed_subtrees.next().unwrap(), parser.tokens.len()); Ok(parser.consts) } @@ -643,18 +705,20 @@ mod tests { use proptest::prelude::*; + fn prep_parser<'s>(s: &'s str) -> Parser<'s, &'static str, &'static str, ()> { + let mut parser = Parser::< + 's, &'static str, &'static str, () + >::new(s, None, None); + parser.tokens.push(Default::default()); + parser + } + #[test] fn test_identifier() { fn identifier_input<'s>(s: &mut &'s str) -> Result<bool, PatternError<'s>> { - let mut parser = Parser::<'s, &'static str, &'static str, ()> { - base: *s, - preds: None, - objs: None, - pred_ids: Default::default(), - obj_ids: Default::default(), - consts: Default::default(), - closed_subtrees: (0..), - }; + let mut parser = Parser::< + 's, &'static str, &'static str, () + >::new(s, None, None); parser.identifier(s) } for mut identifier in vec!["test", "Test", "_test", "_Test", "_123",] { @@ -672,20 +736,6 @@ mod tests { proptest! { #[test] fn test_no_crash(s in ".{0,4096}") { - fn prep_parser<'s>(s: &'s str) -> Parser<'s, &'static str, &'static str, ()> { - let mut parser = Parser::<'s, &'static str, &'static str, ()> { - base: s, - preds: None, - objs: None, - pred_ids: Default::default(), - obj_ids: Default::default(), - consts: Default::default(), - closed_subtrees: (0..), - }; - parser.consts.protos.push(Default::default()); - parser - } - let _ = prep_parser(&s).str_literal(&mut &*s); let _ = prep_parser(&s).re_literal(&mut &*s); let _ = prep_parser(&s).matcher(&mut &*s); @@ -695,6 +745,7 @@ mod tests { let _ = prep_parser(&s).name(&mut &*s); let _ = prep_parser(&s).parameter(&mut &*s); let _ = prep_parser(&s).predicate(&mut &*s); + let _ = prep_parser(&s).ty(&mut &*s); let _ = prep_parser(&s).key_subtree(&mut &*s); let _ = prep_parser(&s).value_subtree(&mut &*s); let _ = prep_parser(&s).unexpected_end(&mut &*s); @@ -703,5 +754,21 @@ mod tests { let _ = prep_parser(&s).pattern(&mut &*s); } } + + #[test] + fn test_pattern_tag() { + fn check_tag<'s>(s: &mut &'s str) -> ( + Result<bool, PatternError<'s>>, + Parser::< + 's, &'static str, &'static str, () + > + ) { + let mut parser = prep_parser(s); + let result = parser.tag(s); + (result, parser) + } + + // TODO + } } diff --git a/src/pattern.rs b/src/pattern.rs index fc3c8a7..0fc6acd 100644 --- a/src/pattern.rs +++ b/src/pattern.rs @@ -7,7 +7,7 @@ use std::borrow::Borrow; use std::collections::BTreeMap; use serde::de::Deserialize; -use serde::de::DeserializeSeed; +use serde::de::DeserializeSeed as _; use serde::de::Deserializer; use serde::ser::Serialize; @@ -26,7 +26,7 @@ use crate::vm::MAX_CALLS; /// use datafu::Pattern; /// /// let pattern = Pattern::<()>::compile::<&str, &str>( -/// "->'hello'", +/// "->['value']'hello'", /// None, None /// ).expect("failed to compile pattern"); /// ``` @@ -35,7 +35,28 @@ pub struct Pattern<O: Serialize> { } impl<O: Serialize> Pattern<O> { - /// Compiles the input into a pattern. + /// Matches the pattern against an input. + pub fn deserialize<'de, Der, De>(&self, der: Der) -> Result<De, Der::Error> + where + Der: Deserializer<'de>, + De: Deserialize<'de>, + { + let mut err = Default::default(); + let interp = vm::Interpreter::new(&self.consts, &mut err); + let pack = vm::Packer::new(interp, MAX_CALLS).deserialize(der)?; + let de = De::deserialize(vm::Unpacker::new(pack.0, MAX_CALLS)); + todo!() + } +} + +pub struct PatternBuilder<'s, PKey=&'static str, OKey=&'static str, O=()> { + input: &'s str, + preds: Option<BTreeMap<PKey, Box<Predicate>>>, + objs: Option<BTreeMap<OKey, O>>, +} + +impl<'s> PatternBuilder<'s> { + /// Creates a PatternBuilder for a given pattern. /// /// # Examples /// @@ -69,28 +90,25 @@ impl<O: Serialize> Pattern<O> { /// Some(preds), None /// ).expect("failed to compile pattern"); /// ``` - pub fn compile<'s, PKey, OKey>( - input: &'s str, - preds: Option<BTreeMap<PKey, Box<Predicate>>>, - objs: Option<BTreeMap<OKey, O>> - ) -> Result<Self, PatternError<'s>> - where - PKey: Borrow<str> + Ord, - OKey: Borrow<str> + Ord, - { - Ok(Self { - consts: parse(input, preds, objs)? - }) + pub fn for_pattern(pattern: &'s str) -> Self { + Self { + input: pattern, + preds: None, + objs: None, + } } +} - /// Matches the pattern against an input. - pub fn deserialize<'de, Der, De>(&self, der: Der) -> Result<De, Der::Error> - where - Der: Deserializer<'de>, - De: Deserialize<'de>, - { - let pack = vm::Packer::new(&self.consts, MAX_CALLS).deserialize(der)?; - let de = De::deserialize(vm::Unpacker::new(pack, MAX_CALLS)); - todo!() +impl<'s, PKey, OKey, O> PatternBuilder<'s, PKey, OKey, O> +where + PKey: Borrow<str> + Ord, + OKey: Borrow<str> + Ord, + O: Serialize, +{ + /// Compiles the pattern. + pub fn compile(self) -> Result<Pattern<O>, PatternError<'s>> { + Ok(Pattern { + consts: parse(self.input, self.preds, self.objs)? + }) } } diff --git a/src/type_tree.rs b/src/type_tree.rs index b1faa54..e7389d5 100644 --- a/src/type_tree.rs +++ b/src/type_tree.rs @@ -38,7 +38,7 @@ //! match a `Foo`, but more efficiently than with a predicate. Another big //! difference between predicates and type trees is how predicates are eager, //! and can consume values that would otherwise be matched by the rest of a -//! pattern. +//! pattern, like `IgnoredAny`. //! //! Type trees are pretty flexible. Consider the following example: //! @@ -67,4 +67,46 @@ //! struct visitor will error. But despite the error, it'll still create a type //! tree for the `deserialize_struct`! -// TODO +// use serde::Deserializer; + +// /// A deserializer which attempts to fill in a type tree. +// struct TypeTreeDeserializer<'tt, D> { +// inner: D, +// tt: &'tt mut TypeTreeNode, +// } + +/// A Type Tree entry type. +/// +/// This represents a type to be deserialized with Serde, with everything that +/// comes with that. It supports the 29 core Serde types, and 2 self-describing +/// ones. +#[derive(Clone, Debug, PartialEq, Eq, Hash, Default)] +pub enum TypeTreeType { + /// An open type, which can be anything. + /// + /// This represents [`Deserializer::deserialize_any`]. + #[default] + Any, + /// A type for a value which will be ignored. + /// + /// This represents [`Deserializer::deserialize_ignored_any`]. + IgnoredAny, + Bool, +} + +/// A node of a type tree. +#[derive(Clone, Debug, PartialEq, Eq, Hash, Default)] +pub struct TypeTreeNode { + /// The type to be requested for this node. + pub node_type: TypeTreeType, + /// The types for when this node is an enum. + pub enum_nodes: (), // TODO + /// The types for when this node is a map. + pub map_nodes: (), // TODO + /// The types for when this node is a seq. + pub seq_nodes: (), // TODO + /// The type for when this node is a some. + pub some_node: Option<Box<TypeTreeNode>>, + /// The type for when this node is a newtype struct. + pub newtype_node: Option<Box<TypeTreeNode>>, +} diff --git a/src/vm/de.rs b/src/vm/de.rs index 4d0d097..2282484 100644 --- a/src/vm/de.rs +++ b/src/vm/de.rs @@ -3,68 +3,376 @@ //! Deserialization-related parts of the VM. +use std::borrow::Cow; +use std::marker::PhantomData; + +use indexmap::IndexMap; + use serde::Serialize; use serde::de::Error as _; +use smallvec::SmallVec; + +use these::These; + +use super::Interpreter; +use super::Pack; use super::PatternConstants; use super::PatternElement; -use super::Pack; +use super::SerdeObject; +use super::Type; +use super::Value; /// A `DeserializeSeed` for Datafu input. /// /// This converts from Serde to Datafu's internal representation (a "pack"). -pub struct Packer<'pat, O: Serialize> { - /// The pattern currently being processed. - pat: &'pat PatternConstants<O>, - /// The instructions/function currently being processed. - ops: &'pat [PatternElement], - /// Maximum number of calls. +pub(crate) struct Packer<'pat, 'state, O: Serialize> { + /// The global interpreter state. + interp: Interpreter<'pat, 'state, O>, + /// Current call limit. call_limit: usize, + /// Whether we're collecting values. + collecting: bool, + /// Instructions currently being processed. + ops: SmallVec<[InstructionReg<'pat>; 1]>, +} + +/// Instruction currently being processed. +struct InstructionReg<'pat> { + /// The (current) program sequence. + instructions: &'pat [PatternElement], + /// Whether this instruction is required to match. + required: bool, } -impl<'pat, O: Serialize> Packer<'pat, O> { +impl<'pat, 'state, 'de, O: Serialize> Packer<'pat, 'state, O> { pub(crate) fn new( - pat: &'pat PatternConstants<O>, + interp: Interpreter<'pat, 'state, O>, call_limit: usize, ) -> Self { + let ops = SmallVec::from_buf([ + InstructionReg { + instructions: &interp.pat.protos.last().unwrap()[..], + required: true, + } + ]); Self { - pat, call_limit, ops: &pat.protos.last().unwrap()[..], + interp: interp, + call_limit: call_limit, + collecting: false, + ops: ops, } } + + /// Extracts the name for this element. + fn get_name(&self) -> SmallVec<[&'pat str; 1]> { + let mut name = SmallVec::<[&'pat str; 1]>::new(); + for reg in &self.ops { + match reg.instructions.first() { + | Some(PatternElement::Tag { name_and_value, .. }) + | Some(PatternElement::Value { name_and_value }) + => { + if let Some(name_key) = name_and_value.here() { + name.push(&self.interp.pat.strings[name_key]); + } + }, + None => { + // FIXME is this correct? + }, + _ => unreachable!(), + } + } + name + } } -impl<'pat, 'de, O> serde::de::DeserializeSeed<'de> for Packer<'pat, O> +// what steps do we have to take? +// +// 1. figure out what type we need to deserialize (and ask the deserializer +// for it). +// 2. visit value. figure out whether we need to store it or not? +// 3. if we need to store it how do we figure out *where* to store it? +// 4. if we *don't* need to store it, what do we do? +// 5. how do we tell if we do or don't need to store it? how do we propagate +// those requirements deeper into the Deserialize's and how do we bring +// the values back out (recursively?) to parent Deserialize's, without +// wasting time storing things we don't actually care about? +// 5.a. just have a flag in the DeserializeSeed for whether to capture the +// values. propagation is more or less trivial from there. +// 6. how do you handle value subtrees? +// 6.a. you don't. for now. +// 7. how do you handle errors? +// 7.a. put them into a "state" and raise a D::Error::custom. then +// override it in the relevant Pattern call. + +impl<'pat, 'state, 'de, O> serde::de::DeserializeSeed<'de> +for Packer<'pat, 'state, O> where O: Serialize, { - type Value = Pack; - fn deserialize<D>(self, deserializer: D) -> Result<Pack, D::Error> + type Value = (Pack<'pat, 'de>, Option<SerdeObject<'de>>); + fn deserialize<D>( + mut self, + deserializer: D, + ) -> Result<Self::Value, D::Error> where D: serde::Deserializer<'de> { - // check the first op - let first = self.ops.first(); - match first { - Some(PatternElement::ApplyPredicate(id, skippable)) => { - let predicate = &self.pat.predicates[*id]; - let ok = predicate(todo!()); - match (ok, skippable) { - (true, _) => { - todo!() - }, - (false, false) => { - return Err(D::Error::custom("predicate didn't match")); - }, - (false, true) => { - todo!() - }, - } + match &*self.ops { + [] => unreachable!(), + [InstructionReg { + instructions: [], + .. + }] => { + // FIXME is this correct? + deserializer.deserialize_ignored_any(self) }, - _ => { - dbg!(first); - todo!() + [InstructionReg { + instructions: [ins, ..], + .. + }] => match ins { + | PatternElement::Tag { name_and_value, .. } + | PatternElement::Value { name_and_value } + => { + match name_and_value.there() { + | Some(Value::String { .. }) + | Some(Value::Regex { .. }) => { + if name_and_value.is_here() { + deserializer.deserialize_string(self) + } else { + deserializer.deserialize_str(self) + } + }, + Some(Value::Type { ty, .. }) => match ty { + Type::Any => deserializer.deserialize_any(self), + Type::IgnoredAny => { + deserializer.deserialize_ignored_any(self) + }, + Type::Bool => deserializer.deserialize_bool(self), + Type::I8 => deserializer.deserialize_i8(self), + Type::I16 => deserializer.deserialize_i16(self), + Type::I32 => deserializer.deserialize_i32(self), + Type::I64 => deserializer.deserialize_i64(self), + Type::I128 => deserializer.deserialize_i128(self), + Type::U8 => deserializer.deserialize_u8(self), + Type::U16 => deserializer.deserialize_u16(self), + Type::U32 => deserializer.deserialize_u32(self), + Type::U64 => deserializer.deserialize_u64(self), + Type::U128 => deserializer.deserialize_u128(self), + Type::F32 => deserializer.deserialize_f32(self), + Type::F64 => deserializer.deserialize_f64(self), + Type::Char => deserializer.deserialize_char(self), + Type::Str => deserializer.deserialize_str(self), + Type::String => deserializer.deserialize_string(self), + Type::Bytes => deserializer.deserialize_bytes(self), + Type::ByteBuf => { + deserializer.deserialize_byte_buf(self) + }, + Type::Option => deserializer.deserialize_option(self), + Type::Unit => deserializer.deserialize_unit(self), + Type::Seq => deserializer.deserialize_seq(self), + Type::Map => deserializer.deserialize_map(self), + }, + None => todo!(), + } + }, + _ => todo!(), }, + _ => todo!(), + } + } +} + +/// visit method generator for simple values (primitives). +macro_rules! vs { + ($visit:ident $obj:ident $t:ty) => { + fn $visit<E>(self, v: $t) -> Result<Self::Value, E> + where + E: serde::de::Error, + { + // FIXME subtrees + let mut obj = None; + let mut pack = Pack::default(); + if self.collecting { + obj = Some(SerdeObject::$obj(v)); + } + let mut map = IndexMap::new(); + for name in self.get_name() { + map.insert(name, (Default::default(), SerdeObject::$obj(v))); + } + pack.subpacks.push(map); + Ok((pack, obj)) + } + } +} + +impl<'pat, 'state, 'de, O> serde::de::Visitor<'de> +for Packer<'pat, 'state, O> +where + O: Serialize, +{ + type Value = (Pack<'pat, 'de>, Option<SerdeObject<'de>>); + fn expecting(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "unsure") + } + + vs!(visit_bool Bool bool); + vs!(visit_i8 I8 i8); + vs!(visit_i16 I16 i16); + vs!(visit_i32 I32 i32); + vs!(visit_i64 I64 i64); + vs!(visit_i128 I128 i128); + vs!(visit_u8 U8 u8); + vs!(visit_u16 U16 u16); + vs!(visit_u32 U32 u32); + vs!(visit_u64 U64 u64); + vs!(visit_u128 U128 u128); + vs!(visit_f32 F32 f32); + vs!(visit_f64 F64 f64); + vs!(visit_char Char char); + + fn visit_str<E>(self, v: &str) -> Result<Self::Value, E> + where + E: serde::de::Error, + { + let mut obj = None; + if self.collecting { + obj = Some(SerdeObject::Str(Cow::Owned(String::from(v)))); + } + todo!() + } + fn visit_borrowed_str<E>(self, v: &'de str) -> Result<Self::Value, E> + where + E: serde::de::Error, + { + let mut obj = None; + if self.collecting { + obj = Some(SerdeObject::Str(Cow::Borrowed(v))); + } + todo!() + } + fn visit_string<E>(self, v: String) -> Result<Self::Value, E> + where + E: serde::de::Error, + { + let mut obj = None; + if self.collecting { + obj = Some(SerdeObject::Str(Cow::Owned(v))); + } + todo!() + } + fn visit_bytes<E>(self, v: &[u8]) -> Result<Self::Value, E> + where + E: serde::de::Error, + { + let mut obj = None; + if self.collecting { + obj = Some(SerdeObject::Bytes(Cow::Owned(Vec::from(v)))); + } + todo!() + } + fn visit_borrowed_bytes<E>(self, v: &'de [u8]) -> Result<Self::Value, E> + where + E: serde::de::Error, + { + let mut obj = None; + if self.collecting { + obj = Some(SerdeObject::Bytes(Cow::Borrowed(v))); + } + todo!() + } + fn visit_byte_buf<E>(self, v: Vec<u8>) -> Result<Self::Value, E> + where + E: serde::de::Error, + { + let mut obj = None; + if self.collecting { + obj = Some(SerdeObject::Bytes(Cow::Owned(v))); + } + todo!() + } + fn visit_none<E>(self) -> Result<Self::Value, E> + where + E: serde::de::Error, + { + let mut obj = None; + if self.collecting { + obj = Some(SerdeObject::None); } + todo!() + } + fn visit_some<D>(self, deserializer: D) -> Result<Self::Value, D::Error> + where + D: serde::de::Deserializer<'de>, + { + let mut obj = None; + if self.collecting { + obj = Some(SerdeObject::Some(todo!())); + } + todo!() + } + fn visit_unit<E>(self) -> Result<Self::Value, E> + where + E: serde::de::Error, + { + // FIXME subtrees + let mut obj = None; + let mut pack = Pack::default(); + if self.collecting { + obj = Some(SerdeObject::Unit); + } + let mut map = IndexMap::new(); + for name in self.get_name() { + map.insert(name, (Default::default(), SerdeObject::Unit)); + } + pack.subpacks.push(map); + Ok((pack, obj)) + } + fn visit_newtype_struct<D>( + self, + deserializer: D + ) -> Result<Self::Value, D::Error> + where + D: serde::de::Deserializer<'de>, + { + let mut obj = None; + if self.collecting { + obj = Some(SerdeObject::NewtypeStruct(todo!())); + } + todo!() + } + fn visit_seq<A>(self, seq: A) -> Result<Self::Value, A::Error> + where + A: serde::de::SeqAccess<'de>, + { + let mut obj = None; + if self.collecting { + obj = Some(SerdeObject::Seq(Vec::new())); + } + todo!() + } + fn visit_map<A>(self, map: A) -> Result<Self::Value, A::Error> + where + A: serde::de::MapAccess<'de>, + { + let mut obj = None; + if self.collecting { + obj = Some(SerdeObject::Map(Vec::new())); + } + todo!() + } + fn visit_enum<A>(self, data: A) -> Result<Self::Value, A::Error> + where + A: serde::de::EnumAccess<'de>, + { + let mut obj = None; + if self.collecting { + obj = Some(SerdeObject::Enum { + variant: todo!(), + data: todo!(), + }); + } + todo!() } } @@ -72,21 +380,21 @@ where /// /// This converts from Datafu's internal representation (a "pack") into the /// desired output type. -pub struct Unpacker { - pack: Pack, +pub struct Unpacker<'pat, 'de> { + pack: Pack<'pat, 'de>, call_limit: usize, } -impl Unpacker { +impl<'pat, 'de> Unpacker<'pat, 'de> { /// Unpacks a Datafu "pack". - pub fn new(pack: Pack, call_limit: usize) -> Self { + pub fn new(pack: Pack<'pat, 'de>, call_limit: usize) -> Self { Self { pack, call_limit, } } } -impl<'de> serde::Deserializer<'de> for Unpacker { +impl<'pat, 'de> serde::Deserializer<'de> for Unpacker<'pat, 'de> { // TODO datafu errors type Error = serde::de::value::Error; fn deserialize_any<V>(self, _: V) -> Result<V::Value, Self::Error> where V: serde::de::Visitor<'de> { todo!() } @@ -130,10 +438,123 @@ impl<'de> serde::Deserializer<'de> for Unpacker { fn deserialize_ignored_any<V>(self, _: V) -> Result<V::Value, Self::Error> where V: serde::de::Visitor<'de> { todo!() } } -/// A Deserializer for collecting matches from [`crate::Predicate`]s. -/// -/// What are we doing? -/// -/// We certainly have regrets. -pub struct PredicateCollector { +/// Deserializes a SerdeObject +pub(crate) struct SerdeObjectDeserializer<'de, E> { + pub(crate) obj: SerdeObject<'de>, + pub(crate) value: Option<SerdeObject<'de>>, + pub(crate) _e: PhantomData<fn() -> E>, } + +impl<'de, E> serde::de::Deserializer<'de> for SerdeObjectDeserializer<'de, E> +where + E: serde::de::Error, +{ + type Error = E; + fn deserialize_any<V>(self, v: V) -> Result<V::Value, Self::Error> + where + V: serde::de::Visitor<'de>, + { + match self.obj { + SerdeObject::Bool(x) => v.visit_bool(x), + SerdeObject::I8(x) => v.visit_i8(x), + SerdeObject::I16(x) => v.visit_i16(x), + SerdeObject::I32(x) => v.visit_i32(x), + SerdeObject::I64(x) => v.visit_i64(x), + SerdeObject::I128(x) => v.visit_i128(x), + SerdeObject::U8(x) => v.visit_u8(x), + SerdeObject::U16(x) => v.visit_u16(x), + SerdeObject::U32(x) => v.visit_u32(x), + SerdeObject::U64(x) => v.visit_u64(x), + SerdeObject::U128(x) => v.visit_u128(x), + SerdeObject::F32(x) => v.visit_f32(x), + SerdeObject::F64(x) => v.visit_f64(x), + SerdeObject::Char(x) => v.visit_char(x), + SerdeObject::Str(Cow::Owned(x)) => v.visit_string(x), + SerdeObject::Str(Cow::Borrowed(x)) => v.visit_borrowed_str(x), + SerdeObject::Bytes(Cow::Owned(x)) => v.visit_byte_buf(x), + SerdeObject::Bytes(Cow::Borrowed(x)) => v.visit_borrowed_bytes(x), + SerdeObject::Some(x) => todo!(), + SerdeObject::None => v.visit_none(), + SerdeObject::Unit => v.visit_unit(), + SerdeObject::Seq(x) => todo!(), + SerdeObject::Map(x) => todo!(), + SerdeObject::NewtypeStruct(x) => todo!(), + SerdeObject::Enum { variant, data } => todo!(), + } + } + fn deserialize_ignored_any<V>(self, v: V) -> Result<V::Value, Self::Error> + where + V: serde::de::Visitor<'de>, + { + drop(self); + v.visit_unit() + } + serde::forward_to_deserialize_any! { + bool i8 i16 i32 i64 i128 u8 u16 u32 u64 u128 f32 f64 char str string + bytes byte_buf option unit unit_struct newtype_struct seq tuple + tuple_struct map struct enum identifier + } +} + +#[cfg(test)] +mod tests { + use super::Packer; + use super::super::PatternConstants; + use crate::vm::MAX_CALLS; + use crate::vm::Interpreter; + use crate::vm::Type; + use crate::vm::Value; + use crate::vm::PatternElement; + use crate::vm::SerdeObject; + use these::These; + use serde_json::Deserializer as JsonDeserializer; + use serde::de::DeserializeSeed as _; + + #[test] + #[should_panic] + fn test_broken() { + let consts = PatternConstants::<()>::default(); + let mut err = Default::default(); + let interp = Interpreter::new(&consts, &mut err); + let _ = Packer::new(interp, MAX_CALLS); + } + + #[test] + fn test_empty_create() { + let mut consts = PatternConstants::<()>::default(); + consts.protos.push(Vec::new()); + let mut err = Default::default(); + let interp = Interpreter::new(&consts, &mut err); + let _ = Packer::new(interp, MAX_CALLS); + } + + #[test] + fn test_empty_match() { + let mut consts = PatternConstants::<()>::default(); + consts.protos.push(Vec::new()); + let mut der = JsonDeserializer::from_str("{}"); + let mut err = Default::default(); + let interp = Interpreter::new(&consts, &mut err); + let pack = Packer::new(interp, MAX_CALLS).deserialize(&mut der).unwrap(); + } + + #[test] + fn test_simple_match() { + let mut consts = PatternConstants::<()>::default(); + consts.strings.push("hello".into()); + consts.protos.push(vec![ + PatternElement::Value { + name_and_value: These::Both(0, Value::Type { + ty: Type::U64, + skippable: false, + }) + } + ]); + let mut der = JsonDeserializer::from_str("3"); + let mut err = Default::default(); + let interp = Interpreter::new(&consts, &mut err); + let pack = Packer::new(interp, MAX_CALLS).deserialize(&mut der).unwrap().0; + assert_eq!(pack.subpacks[0]["hello"].1, SerdeObject::U64(3)); + } +} + diff --git a/src/vm/mod.rs b/src/vm/mod.rs index 92a99d7..5f1e86c 100644 --- a/src/vm/mod.rs +++ b/src/vm/mod.rs @@ -5,16 +5,23 @@ //! //! This is the stuff that actually matches the pattern. +use std::borrow::Cow; +use std::cell::Cell; +use std::collections::BTreeMap; +use std::marker::PhantomData; + +use indexmap::IndexMap; use regex::Regex; use serde::Serialize; +use these::These; use crate::Predicate; //use crate::errors::MatchError; mod de; -pub use de::Unpacker; -pub use de::Packer; +pub(crate) use de::Unpacker; +pub(crate) use de::Packer; /// Max depth for VM/serde recursion. pub(crate) const MAX_CALLS: usize = 250; @@ -24,16 +31,15 @@ pub(crate) const MAX_CALLS: usize = 250; // maybe we should use a builder for this? /// The constant pool for a pattern. pub(crate) struct PatternConstants<O: Serialize> { - // last proto is implicitly the whole pattern. + /// The protos ("functions") in a pattern. + /// + /// The last proto is implicitly the main function/entry point. pub(crate) protos: Vec<Vec<PatternElement>>, // Note that we can borrow these when creating the output map. // https://play.rust-lang.org/?version=stable&mode=debug&edition=2018&gist=da26f9175e96273fa0b94971a4e6172f pub(crate) strings: Vec<String>, pub(crate) regices: Vec<Regex>, pub(crate) predicates: Vec<Box<Predicate>>, - // NOTE these are part of the constant pool and so have lifetime analogous - // to 'a (consistently used to indicate constant pool lifetime) when used - // elsewhere. In particular, they can't be yielded by the iterator. pub(crate) defs: Vec<O>, } @@ -72,13 +78,102 @@ impl<O: Serialize> std::fmt::Debug for PatternConstants<O> { /// A pattern element. #[derive(Copy, Clone, Debug)] pub(crate) enum PatternElement { + /// A value is the entry point to a pattern. If present, it's the first + /// element. + Value { + /// The index of the (string) name to apply to this value and/or the + /// expected value of this entry. + name_and_value: These<usize, Value>, + }, + /// A tag is the core iterative element, and is repeated up to the desired + /// depth of iteration. + Tag { + /// The index of the (proto) key to match against. + key_subtree: Option<usize>, + /// The index of the (string) name to apply to this value and/or the + /// expected value of this entry. + name_and_value: These<usize, Value>, + }, + /// A value subtree is a subtree for values. + /// + /// It is applied *after* tags, and thus any value subtrees come last in + /// a pattern's elements. + ValueSubtree { + /// The proto index of the subtree. + index: usize, + /// Whether to allow this value subtree to produce no results. + /// + /// By default, a datafu pattern only matches a tree if every branch of + /// the tree produces results. This enables opting out of that. + optional: bool, + }, +} + +/// A value matcher. +#[derive(Copy, Clone, Debug)] +pub(crate) enum Value { + /// The value must match the specified string. + String { + /// The index of the string. + index: usize, + /// Whether to skip non-matching values, instead of erroring. + skippable: bool, + }, + /// The value must match the specified regex. + Regex { + /// The index of the regex. + index: usize, + /// Whether to skip non-matching values, instead of erroring. + skippable: bool, + }, + // /// The value must match the specified integer. + // Integer { + // /// The integer. + // value: usize, + // /// Whether to skip non-matching values, instead of erroring. + // skippable: bool, + // }, + // /// The value must match the specified integer range. + // Range { + // /// The range. + // value: Range<usize>, + // /// Whether to skip non-matching values, instead of erroring. + // skippable: bool, + // }, + // /// The value must match the specified predicate. + // Predicate { + // /// The index of the predicate. + // index: usize, + // /// Whether to skip non-matching values, instead of erroring. + // skippable: bool, + // }, + // /// The value must match the specified parameter. + // Paameter { + // /// The index of the parameter. + // index: usize, + // /// Whether to skip non-matching values, instead of erroring. + // skippable: bool, + // }, + /// The value must have the specified type. + Type { + /// The expected type. + ty: Type, + /// Whether to skip non-matching values, instead of erroring. + skippable: bool, + }, +} + +/// A pattern token. +#[derive(Copy, Clone, Debug)] +pub(crate) enum PatternToken { + /// Start of a tag. Arrow, Identifier(usize), - StringKey(usize, bool), - RegexKey(usize, bool), - ParameterKey(usize, bool), + String(usize, bool), + Regex(usize, bool), + Parameter(usize, bool), KeySubtree(usize, bool), ValueSubtree(usize, bool), @@ -109,6 +204,7 @@ pub(crate) enum PatternElement { bool, ), + /// End of a tag. End, } @@ -118,6 +214,8 @@ pub(crate) enum PatternElement { /// `Deserializer::deserialize_string`). #[derive(Copy, Clone, Debug)] pub(crate) enum Type { + Any, + IgnoredAny, Bool, I8, I16, @@ -139,29 +237,149 @@ pub(crate) enum Type { Option, Unit, Seq, - Tuple(usize), + // Tuple(usize), Map, - // these aren't really supported: - // UnitStruct, UnitVariant, NewtypeStruct, NewtypeVariant, TupleStruct, - // TupleVariant, Struct, StructVariant - // instead we use type trees for that. - /// Adapter for Type Trees. See `crate::type_tree` for more details. - Of { - /// The type tree index (in `PatternConstants.type_trees`). - type_tree: usize, + // // these aren't really supported: + // // UnitStruct, UnitVariant, NewtypeStruct, NewtypeVariant, TupleStruct, + // // TupleVariant, Struct, StructVariant + // // instead we use type trees for that. + // /// Adapter for Type Trees. See `crate::type_tree` for more details. + // Of { + // /// The type tree index (in `PatternConstants.type_trees`). + // type_tree: usize, + // }, +} + +/// The types which can be deserialized by serde. +/// +/// We guess this is basically the same thing as a serde_value? +#[derive(Clone, Debug, PartialEq)] +pub(crate) enum SerdeObject<'de> { + Bool(bool), + I8(i8), + I16(i16), + I32(i32), + I64(i64), + I128(i128), + U8(u8), + U16(u16), + U32(u32), + U64(u64), + U128(u128), + F32(f32), + F64(f64), + Char(char), + Str(Cow<'de, str>), + Bytes(Cow<'de, [u8]>), + Some(Box<SerdeObject<'de>>), + None, + Unit, + Seq(Vec<SerdeObject<'de>>), + // NOTE: support for multimaps! + Map(Vec<(SerdeObject<'de>, SerdeObject<'de>)>), + NewtypeStruct(Box<SerdeObject<'de>>), + // NOTE: currently unused! + #[allow(unused)] + Enum { + variant: Box<SerdeObject<'de>>, + data: Box<SerdeObject<'de>>, }, } -pub struct Pack; +impl<'de> SerdeObject<'de> { + fn check<E: serde::de::Error>(self, ty: Option<Type>) -> Result<Self, E> { + let ty = match ty { + None => return Ok(self), + Some(ty) => ty, + }; + match (ty, self) { + | (Type::Any, v) + | (Type::IgnoredAny, v) + => Ok(v), + | (Type::Bool, v @ SerdeObject::Bool(_)) + | (Type::I8, v @ SerdeObject::I8(_)) + | (Type::I16, v @ SerdeObject::I16(_)) + | (Type::I32, v @ SerdeObject::I32(_)) + | (Type::I64, v @ SerdeObject::I64(_)) + | (Type::I128, v @ SerdeObject::I128(_)) + | (Type::U8, v @ SerdeObject::U8(_)) + | (Type::U16, v @ SerdeObject::U16(_)) + | (Type::U32, v @ SerdeObject::U32(_)) + | (Type::U64, v @ SerdeObject::U64(_)) + | (Type::U128, v @ SerdeObject::U128(_)) + | (Type::F32, v @ SerdeObject::F32(_)) + | (Type::F64, v @ SerdeObject::F64(_)) + | (Type::Char, v @ SerdeObject::Char(_)) + | (Type::Str, v @ SerdeObject::Str(_)) + | (Type::String, v @ SerdeObject::Str(_)) + | (Type::Bytes, v @ SerdeObject::Bytes(_)) + | (Type::ByteBuf, v @ SerdeObject::Bytes(_)) + | (Type::Option, v @ SerdeObject::None) + | (Type::Option, v @ SerdeObject::Some(_)) + | (Type::Unit, v @ SerdeObject::Unit) + | (Type::Seq, v @ SerdeObject::Seq(_)) + | (Type::Map, v @ SerdeObject::Map(_)) + => Ok(v), + _ => todo!(), + } + } +} -//struct Frame<'a, 'b, T: PatternTypes> { -// //obj: RefOwn<'b, T::Ref, T::Own>, -// ops: &'a [PatternElement], -// iar: Option<usize>, -// depth: usize, -// path: Vec<Holder<'a, 'b, T>>, -// in_key: bool, -//} +impl<'de, E> serde::de::IntoDeserializer<'de, E> for SerdeObject<'de> +where + E: serde::de::Error, +{ + type Deserializer = self::de::SerdeObjectDeserializer<'de, E>; + + fn into_deserializer(self) -> Self::Deserializer { + Self::Deserializer { + obj: self, + value: None, + _e: PhantomData, + } + } +} + +#[derive(Clone, Debug, Default)] +pub struct Pack<'pat, 'de> { + subpacks: Vec<IndexMap<&'pat str, (Pack<'pat, 'de>, SerdeObject<'de>)>>, +} + +/// The Datafu interpreter, sorta. +pub(crate) struct Interpreter<'pat, 'state, O: Serialize> { + /// The pattern currently being processed. + pat: &'pat PatternConstants<O>, + /// The error override (if any). + error: &'state Cell<Option<crate::errors::MatchError>>, +} + +struct Frame<'pat, 'de> { + /// The instructions/function currently being processed. + ops: &'pat [PatternElement], + /// The instruction index being processed. + iar: Option<usize>, + /// Elements collected while processing this frame? + path: Vec<Pack<'pat, 'de>>, +} + +impl<'pat, 'state, O: Serialize> Interpreter<'pat, 'state, O> { + pub(crate) fn new( + pat: &'pat PatternConstants<O>, + error: &'state mut Option<crate::errors::MatchError>, + ) -> Self { + Self { + pat: pat, + error: Cell::from_mut(error), + //frames: vec![ + // Frame { + // ops: &pat.protos[0], + // iar: None, + // path: Vec::new(), + // } + //], + } + } +} // //impl<'a, 'b, T: PatternTypes> Frame<'a, 'b, T> { // /// Advances the instruction address register. diff --git a/tests/basic_match.rs b/tests/basic_match.rs index 5c03a48..b8d0dd8 100644 --- a/tests/basic_match.rs +++ b/tests/basic_match.rs @@ -4,13 +4,12 @@ use serde_json::Deserializer as JsonDer; use serde::Deserialize; -use datafu::Predicate; +//use datafu::Predicate; #[test] fn test_basic() { let mut der = JsonDer::from_str(r#"{"foo": 1, "bar": {"baz": 2}}"#); - let preds = vec![("dict", datafu::pred(|v| { todo!(); false }))].into_iter().collect(); - let pat = datafu::Pattern::<()>::compile::<&str, &str>("->[x]:?$dict->y[yk]", Some(preds), None).unwrap(); + let pat = datafu::PatternBuilder::for_pattern("->[x]:map->[yk]y").compile().unwrap(); #[derive(Deserialize)] struct Values { x: String, diff --git a/tests/parser_prop.rs b/tests/parser_prop.rs index 0b57171..8a4cbeb 100644 --- a/tests/parser_prop.rs +++ b/tests/parser_prop.rs @@ -6,6 +6,6 @@ use proptest::prelude::*; proptest! { #[test] fn doesnt_panic(s in "\\PC*") { - let _ = datafu::Pattern::<Box<dyn erased_serde::Serialize>>::compile::<&str, &str>(&s, None, None); + let _ = datafu::PatternBuilder::for_pattern(&s).compile(); } } |