diff options
author | SoniEx2 <endermoneymod@gmail.com> | 2022-10-30 00:49:56 -0300 |
---|---|---|
committer | SoniEx2 <endermoneymod@gmail.com> | 2022-10-30 00:49:56 -0300 |
commit | a66111d9f9c99f91d9256209b5e9a65e42cde7f5 (patch) | |
tree | aba85fd481ade96ba1be135c459af9d20eb409d7 | |
parent | c1210b511af8ffada948550180360859b64009d2 (diff) |
Implement parser
-rw-r--r-- | Cargo.toml | 1 | ||||
-rw-r--r-- | src/parser.rs | 174 | ||||
-rw-r--r-- | src/pattern.rs | 4 | ||||
-rw-r--r-- | src/vm/de.rs | 61 | ||||
-rw-r--r-- | src/vm/mod.rs | 29 | ||||
-rw-r--r-- | tests/basic_match.rs | 2 |
6 files changed, 191 insertions, 80 deletions
diff --git a/Cargo.toml b/Cargo.toml index e72ee6c..780a162 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -19,7 +19,6 @@ regex = "1" serde = "1.0.140" serde_transmute = "0.1.4" smallvec = "1.10.0" -these = "2.0.0" [dev-dependencies] proptest = "1.0.0" diff --git a/src/parser.rs b/src/parser.rs index 97185ac..595f157 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -13,6 +13,7 @@ use serde::Serialize; use crate::Predicate; use crate::errors::PatternError; +use crate::vm; use crate::vm::PatternConstants; use crate::vm::PatternElement; use crate::vm::PatternToken; @@ -72,6 +73,48 @@ fn pos_of<'a>(base: &'a str, sub: &'a str) -> Option<usize> { Some((sub.as_ptr() as usize) - (base.as_ptr() as usize)) } +/// Collects value-ish PatternTokens into a PatternElement::Value with the +/// given already-collected name. +fn collect_value( + name: Option<usize>, + tokens: &[PatternToken], +) -> PatternElement { + let value = match tokens { + &[PatternToken::String(index, skippable)] => { + vm::Value::String { index, skippable } + }, + &[PatternToken::Regex(index, skippable)] => { + vm::Value::Regex { index, skippable } + }, + &[PatternToken::Type(ty, skippable)] => { + vm::Value::Type { ty, skippable } + }, + other => { + unreachable!("{other:?}") + }, + }; + PatternElement::Value { + name, + value: Some(value), + } +} + +/// Collects a slice of PatternToken into a PatternElement::Value. +fn collect_name_and_value(tokens: &[PatternToken]) -> PatternElement { + match tokens { + &[PatternToken::Identifier(name)] => { + PatternElement::Value { + name: Some(name), + value: None, + } + }, + &[PatternToken::Identifier(name), ref value @ ..] => { + collect_value(Some(name), value) + }, + value => collect_value(None, value), + } +} + /// Helper to collect "subtree" sections of the pattern. /// /// This is a RAII-like guard which handles cleaning up the parsed pattern when @@ -95,7 +138,7 @@ impl_trait! { O: Serialize, { fn start(value: &'r mut Parser<'s, PKey, OKey, O>) -> Self { - value.tokens.push(Default::default()); + value.consts.protos.push(Default::default()); Self { root: value, } @@ -103,9 +146,9 @@ impl_trait! { fn commit(self) -> usize { let mut self_ = ManuallyDrop::new(self); - let proto = self_.root.tokens.pop().unwrap(); + let proto = self_.root.consts.protos.pop().unwrap(); let id = self_.root.closed_subtrees.next().unwrap(); - self_.root.tokens.insert(id, proto); + self_.root.consts.protos.insert(id, proto); id } @@ -126,7 +169,7 @@ impl_trait! { impl trait Drop { fn drop(&mut self) { // remove "partial" proto - self.root.tokens.pop().expect("SubtreeHelper"); + self.root.consts.protos.pop().expect("SubtreeHelper"); } } } @@ -156,7 +199,7 @@ impl_trait! { O: Serialize, { fn start(value: &'r mut Parser<'s, PKey, OKey, O>) -> Self { - let len = value.tokens.last().unwrap().len(); + let len = value.tokens.len(); Self { root: value, len : len, @@ -164,7 +207,40 @@ impl_trait! { } fn commit(self) { - let _self = std::mem::ManuallyDrop::new(self); + let _self = &mut *std::mem::ManuallyDrop::new(self); + // we could write a proper parser for the token stream. + // + // we could also just do this instead. + match _self.root.tokens.drain(_self.len..).as_slice() { + &[ + PatternToken::Arrow, + PatternToken::KeySubtree(index), + ref name_value @ .., + PatternToken::End, + ] => { + let tag = PatternElement::Tag { + key_subtree: Some(index), + }; + _self.root.consts.protos.last_mut().unwrap().push(tag); + let value = collect_name_and_value(name_value); + _self.root.consts.protos.last_mut().unwrap().push(value); + }, + &[ + PatternToken::Arrow, + ref name_value @ .., + PatternToken::End, + ] => { + let tag = PatternElement::Tag { + key_subtree: None, + }; + _self.root.consts.protos.last_mut().unwrap().push(tag); + let value = collect_name_and_value(name_value); + _self.root.consts.protos.last_mut().unwrap().push(value); + }, + other => { + unreachable!("{other:?}"); + }, + }; } impl trait std::ops::Deref { @@ -183,11 +259,9 @@ impl_trait! { impl trait Drop { fn drop(&mut self) { - let proto = self.root.tokens.last_mut().unwrap(); + let proto = &mut self.root.tokens; assert!(proto.len() >= self.len); - while proto.len() > self.len { - let _ = proto.pop(); - } + proto.drain(self.len..); } } } @@ -205,7 +279,7 @@ where pred_ids: BTreeMap<PKey, usize>, obj_ids: BTreeMap<OKey, usize>, consts: PatternConstants<O>, - tokens: Vec<Vec<PatternToken>>, + tokens: Vec<PatternToken>, closed_subtrees: std::ops::RangeFrom<usize>, } @@ -281,7 +355,7 @@ where self_.consts.strings.push(string); self_.consts.strings.len() - 1 }); - let proto = self.tokens.last_mut().expect("protos"); + let proto = &mut self.tokens; proto.push(PatternToken::String(id, skippable)); *s = cursor; true @@ -339,7 +413,7 @@ where self_.consts.regices.push(re); self_.consts.regices.len() - 1 }); - let proto = self.tokens.last_mut().expect("protos"); + let proto = &mut self.tokens; proto.push(PatternToken::Regex(id, skippable)); *s = cursor; true @@ -357,7 +431,6 @@ where _ if self.re_literal(&mut cursor)? => {}, _ if self.predicate(&mut cursor)? => {}, _ if self.ty(&mut cursor)? => {}, - //_ if self.key_subtree(&mut cursor)? => {}, _ => bry!('matches false), } self.sp(&mut cursor); @@ -374,7 +447,7 @@ where bry!('matches strip_prefix(&mut cursor, "->")); let mut self_ = TagHelper::start(&mut *self); { - let proto = self_.tokens.last_mut().expect("protos"); + let proto = &mut self_.tokens; proto.push(PatternToken::Arrow); } self_.sp(&mut cursor); @@ -387,7 +460,7 @@ where } self_.sp(&mut cursor); { - let proto = self_.tokens.last_mut().expect("protos"); + let proto = &mut self_.tokens; proto.push(PatternToken::End); } self_.commit(); @@ -431,7 +504,7 @@ where self.consts.strings.push(name.into()); self.consts.strings.len() - 1 }); - let proto = self.tokens.last_mut().expect("protos"); + let proto = &mut self.tokens; proto.push(PatternToken::Identifier(id)); self.sp(&mut cursor); *s = cursor; @@ -465,7 +538,7 @@ where }, Ok, )?; - let proto = self.tokens.last_mut().expect("protos"); + let proto = &mut self.tokens; proto.push(PatternToken::Parameter(id, skippable)); self.sp(&mut cursor); *s = cursor; @@ -484,7 +557,7 @@ where let start = cursor; bry!('matches self.identifier(&mut cursor)?); let name = &start[..pos_of(start, cursor).unwrap_or(start.len())]; - let proto = self.tokens.last_mut().expect("protos"); + let proto = &mut self.tokens; proto.push(PatternToken::Type(match name { "bool" => Type::Bool, "i8" => Type::I8, @@ -526,7 +599,7 @@ where Ok(lblock!('matches: { self.sp(&mut cursor); bry!('matches strip_prefix(&mut cursor, ":")); - let custom = strip_prefix(&mut cursor, "$"); + bry!('matches strip_prefix(&mut cursor, "$")); let skippable = strip_prefix(&mut cursor, "?"); let start = cursor; bry!('matches self.identifier(&mut cursor)?); @@ -547,7 +620,7 @@ where }, Ok, )?; - let proto = self.tokens.last_mut().expect("protos"); + let proto = &mut self.tokens; proto.push(PatternToken::ApplyPredicate(id, skippable)); self.sp(&mut cursor); *s = cursor; @@ -557,7 +630,8 @@ where })) } - /// key_subtree <- sp '[' sp ( matcher / name sp matcher? ) sp subtree sp ( ']' / unexpected_token / unexpected_end ) sp ( '?'? -> MarkSkippable ) + /// key_subtree <- sp '[' sp ( matcher / name sp matcher? ) sp subtree sp ( ']' / unexpected_token / unexpected_end ) sp + // ( '?'? -> MarkSkippable ) fn key_subtree(&mut self, s: &mut &'s str) -> Result<bool, PatternError<'s>> { let mut cursor = *s; Ok(lblock!('matches: { @@ -565,11 +639,17 @@ where bry!('matches strip_prefix(&mut cursor, "[")); self.sp(&mut cursor); let mut subtree = SubtreeHelper::start(&mut *self); + // FIXME handle `?` + let marker = subtree.tokens.len(); if !subtree.matcher(&mut cursor)? { bry!('matches subtree.name(&mut cursor)?); subtree.sp(&mut cursor); let _ = subtree.matcher(&mut cursor)?; } + let value = match subtree.tokens.drain(marker..).as_slice() { + name_value => collect_name_and_value(name_value), + }; + subtree.consts.protos.last_mut().unwrap().push(value); subtree.sp(&mut cursor); bry!('matches subtree.subtree(&mut cursor)?); subtree.sp(&mut cursor); @@ -581,11 +661,11 @@ where subtree.unexpected_end(&mut cursor)? ); subtree.sp(&mut cursor); - let skippable = strip_prefix(&mut cursor, "?"); + //let skippable = strip_prefix(&mut cursor, "?"); *s = cursor; let id = subtree.commit(); - let proto = self.tokens.last_mut().expect("protos"); - proto.push(PatternToken::KeySubtree(id, skippable)); + let proto = &mut self.tokens; + proto.push(PatternToken::KeySubtree(id)); true })) } @@ -611,7 +691,7 @@ where let skippable = strip_prefix(&mut cursor, "?"); *s = cursor; let id = subtree.commit(); - let proto = self.tokens.last_mut().expect("protos"); + let proto = &mut self.tokens; proto.push(PatternToken::ValueSubtree(id, skippable)); true })) @@ -641,7 +721,7 @@ where } self.sp(&mut cursor); while self.value_subtree(&mut cursor)? { - let proto = self.tokens.last_mut().expect("protos"); + let proto = &mut self.tokens; proto.push(PatternToken::End); } self.sp(&mut cursor); @@ -655,7 +735,19 @@ where let mut cursor = *s; Ok(lblock!('matches: { let mut subtree = SubtreeHelper::start(&mut *self); + // FIXME handle `?` + let marker = subtree.tokens.len(); let _ = subtree.matcher(&mut cursor)?; + let value = match subtree.tokens.drain(marker..).as_slice() { + &[] => { + PatternElement::Value { + name: None, + value: None, + } + }, + value => collect_value(None, value), + }; + subtree.consts.protos.last_mut().unwrap().push(value); bry!('matches subtree.subtree(&mut cursor)? || @@ -693,7 +785,11 @@ where assert!(matched); assert_eq!(parsed, ""); - assert_eq!(parser.closed_subtrees.next().unwrap(), parser.tokens.len()); + assert_eq!( + parser.closed_subtrees.next().unwrap(), + parser.consts.protos.len(), + ); + assert!(parser.consts.protos.iter().all(|proto| !proto.is_empty())); Ok(parser.consts) } @@ -709,7 +805,6 @@ mod tests { let mut parser = Parser::< 's, &'static str, &'static str, () >::new(s, None, None); - parser.tokens.push(Default::default()); parser } @@ -756,19 +851,16 @@ mod tests { } #[test] - fn test_pattern_tag() { - fn check_tag<'s>(s: &mut &'s str) -> ( - Result<bool, PatternError<'s>>, - Parser::< - 's, &'static str, &'static str, () - > - ) { - let mut parser = prep_parser(s); - let result = parser.tag(s); - (result, parser) + fn test_no_crash_some_patterns() { + fn run_pattern(mut s: &str) { + let _ = prep_parser(s).pattern(&mut s); } - - // TODO + run_pattern("hello"); + run_pattern("/test/"); + run_pattern("'this'"); + run_pattern(":map"); + run_pattern(":?map"); + run_pattern(":map->[:str]:str"); } } diff --git a/src/pattern.rs b/src/pattern.rs index 2f7166a..6286ff9 100644 --- a/src/pattern.rs +++ b/src/pattern.rs @@ -56,8 +56,8 @@ impl<O: Serialize> Pattern<O> { ).deserialize(der)?; // this should always be None debug_assert!(obj.is_none()); - debug_assert!(packs.len() == 1); - let pack = packs.pop().unwrap(); + debug_assert!(packs.len() <= 1); + let pack = packs.pop().unwrap_or_else(Default::default); let de = De::deserialize(vm::Unpacker::new(pack, MAX_CALLS)); todo!() } diff --git a/src/vm/de.rs b/src/vm/de.rs index 985b1b2..e26ec5e 100644 --- a/src/vm/de.rs +++ b/src/vm/de.rs @@ -14,7 +14,6 @@ use serde::de::IntoDeserializer as _; use smallvec::SmallVec; -use these::These; use super::Frame; use super::Interpreter; @@ -275,31 +274,51 @@ where { if let Err(e) = self.step_in() { return Err(e); } let pat = self.interp.pat; - let target_type = self.frames().iter_active().fold( + let target_type = self.frames().iter_active().try_fold( Type::IgnoredAny, |target_type, frame| { - match (target_type, frame.get_type()) { - (Type::IgnoredAny, Some((ty, _))) => ty, - (ty, Some((Type::IgnoredAny, _))) => ty, - (Type::String, Some((Type::Str, _))) => { + Ok(match (target_type, frame.get_type()) { + // required type binds stronger than any/ignored_any + (Type::IgnoredAny, Some((ty, true))) => ty, + (Type::Any, Some((ty, true))) => ty, + (ty, Some((Type::IgnoredAny, true))) => ty, + (ty, Some((Type::Any, true))) => ty, + // prefer owned if any branch prefers owned + (Type::String, Some((Type::Str, true))) => { Type::String }, - (Type::Str, Some((Type::String, _))) => { + (Type::Str, Some((Type::String, true))) => { Type::String }, - (Type::Bytes, Some((Type::ByteBuf, _))) => { + (Type::Bytes, Some((Type::ByteBuf, true))) => { Type::ByteBuf }, - (Type::ByteBuf, Some((Type::Bytes, _))) => { + (Type::ByteBuf, Some((Type::Bytes, true))) => { Type::ByteBuf }, + // types which are the same are okay (left, Some((right, _))) if left == right => { left }, + // optional type vs Any/IgnoredAny prefers Any + (Type::IgnoredAny, Some((_, false))) => Type::Any, + (Type::Any, Some((_, false))) => Type::Any, + // types which are not the same are an error because we + // only request a specific type if it's actually required + (left, Some((right, _))) => { + return Err(todo!()); + }, _ => Type::Any, - } + }) }, ); + let target_type = match target_type { + Ok(target_type) => target_type, + Err(e) => { + self.interp.error.insert(e); + return Err(D::Error::custom("type conflict")); + }, + }; match target_type { Type::Any => deserializer.deserialize_any(&mut *self), Type::IgnoredAny => { @@ -840,7 +859,6 @@ mod tests { use crate::vm::PatternElement; use crate::vm::SerdeObject; use crate::vm::Frame; - use these::These; use serde_json::Deserializer as JsonDeserializer; use serde::de::DeserializeSeed as _; @@ -886,7 +904,8 @@ mod tests { consts.strings.push("hello".into()); consts.protos.push(vec![ PatternElement::Value { - name_and_value: These::Both(0, Value::Type { + name: Some(0), + value: Some(Value::Type { ty: Type::U64, skippable: false, }), @@ -909,7 +928,8 @@ mod tests { consts.strings.push("hello".into()); consts.protos.push(vec![ PatternElement::Value { - name_and_value: These::Both(0, Value::Type { + name: Some(0), + value: Some(Value::Type { ty: Type::U64, skippable: false, }), @@ -933,7 +953,8 @@ mod tests { consts.strings.push("b".into()); consts.protos.push(vec![ PatternElement::Value { - name_and_value: These::Both(0, Value::Type { + name: Some(0), + value: Some(Value::Type { ty: Type::U64, skippable: true, }), @@ -941,7 +962,8 @@ mod tests { ]); consts.protos.push(vec![ PatternElement::Value { - name_and_value: These::Both(1, Value::Type { + name: Some(1), + value: Some(Value::Type { ty: Type::Bool, skippable: true, }), @@ -986,12 +1008,14 @@ mod tests { consts.strings.push("value".into()); consts.protos.push(vec![ PatternElement::Value { - name_and_value: These::This(0), + name: Some(0), + value: None, }, ]); consts.protos.push(vec![ PatternElement::Value { - name_and_value: These::That(Value::Type { + name: None, + value: Some(Value::Type { ty: Type::Map, skippable: false, }), @@ -1000,7 +1024,8 @@ mod tests { key_subtree: Some(0), }, PatternElement::Value { - name_and_value: These::Both(1, Value::Type { + name: Some(1), + value: Some(Value::Type { ty: Type::U64, skippable: false, }), diff --git a/src/vm/mod.rs b/src/vm/mod.rs index 8f20aae..06f12e5 100644 --- a/src/vm/mod.rs +++ b/src/vm/mod.rs @@ -14,7 +14,6 @@ use std::marker::PhantomData; use indexmap::IndexMap; use regex::Regex; use serde::Serialize; -use these::These; use crate::Predicate; //use crate::errors::MatchError; @@ -82,9 +81,10 @@ impl<O: Serialize> std::fmt::Debug for PatternConstants<O> { pub(crate) enum PatternElement { /// A value is the core capturing element. Value { - /// The index of the (string) name to apply to this value and/or the - /// expected value of this entry. - name_and_value: These<usize, Value>, + /// The index of the (string) name to apply to this value. + name: Option<usize>, + /// The expected value of this entry. + value: Option<Value>, }, /// A tag is the core iterative element. It is always followed by a value. Tag { @@ -173,7 +173,7 @@ pub(crate) enum PatternToken { String(usize, bool), Regex(usize, bool), Parameter(usize, bool), - KeySubtree(usize, bool), + KeySubtree(usize), ValueSubtree(usize, bool), /// Represents a predicate which must be applied. @@ -424,19 +424,16 @@ impl<'pat> Frame<'pat> { &self, ) -> Option<(Type, bool)> { match self.op() { - | PatternElement::Value { name_and_value, .. } - if name_and_value.is_there() - => { - match name_and_value.there() { - | Some(Value::String { skippable, .. }) - | Some(Value::Regex { skippable, .. }) + PatternElement::Value { value: Some(value), .. } => { + match value { + | Value::String { skippable, .. } + | Value::Regex { skippable, .. } => { Some((Type::Str, !skippable)) }, - Some(Value::Type { ty, skippable }) => { + Value::Type { ty, skippable } => { Some((ty, !skippable)) }, - None => todo!(), } }, PatternElement::Tag { .. } => panic!("attempt to get type of tag"), @@ -451,10 +448,8 @@ impl<'pat> Frame<'pat> { ) -> Option<&'pat str> { let strings = &pat.strings; match self.op() { - | PatternElement::Value { name_and_value, .. } - if name_and_value.is_here() - => { - Some(&*strings[name_and_value.here().unwrap()]) + PatternElement::Value { name: Some(name), .. } => { + Some(&*strings[name]) }, PatternElement::Tag { .. } => panic!("attempt to get name of tag"), _ => None, diff --git a/tests/basic_match.rs b/tests/basic_match.rs index b8d0dd8..a1c62bb 100644 --- a/tests/basic_match.rs +++ b/tests/basic_match.rs @@ -9,7 +9,7 @@ use serde::Deserialize; #[test] fn test_basic() { let mut der = JsonDer::from_str(r#"{"foo": 1, "bar": {"baz": 2}}"#); - let pat = datafu::PatternBuilder::for_pattern("->[x]:map->[yk]y").compile().unwrap(); + let pat = datafu::PatternBuilder::for_pattern("->[x]:?map->[yk]y").compile().unwrap(); #[derive(Deserialize)] struct Values { x: String, |