summary refs log tree commit diff stats
diff options
context:
space:
mode:
authorSoniEx2 <endermoneymod@gmail.com>2022-10-30 00:49:56 -0300
committerSoniEx2 <endermoneymod@gmail.com>2022-10-30 00:49:56 -0300
commita66111d9f9c99f91d9256209b5e9a65e42cde7f5 (patch)
treeaba85fd481ade96ba1be135c459af9d20eb409d7
parentc1210b511af8ffada948550180360859b64009d2 (diff)
Implement parser
-rw-r--r--Cargo.toml1
-rw-r--r--src/parser.rs174
-rw-r--r--src/pattern.rs4
-rw-r--r--src/vm/de.rs61
-rw-r--r--src/vm/mod.rs29
-rw-r--r--tests/basic_match.rs2
6 files changed, 191 insertions, 80 deletions
diff --git a/Cargo.toml b/Cargo.toml
index e72ee6c..780a162 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -19,7 +19,6 @@ regex = "1"
 serde = "1.0.140"
 serde_transmute = "0.1.4"
 smallvec = "1.10.0"
-these = "2.0.0"
 
 [dev-dependencies]
 proptest = "1.0.0"
diff --git a/src/parser.rs b/src/parser.rs
index 97185ac..595f157 100644
--- a/src/parser.rs
+++ b/src/parser.rs
@@ -13,6 +13,7 @@ use serde::Serialize;
 
 use crate::Predicate;
 use crate::errors::PatternError;
+use crate::vm;
 use crate::vm::PatternConstants;
 use crate::vm::PatternElement;
 use crate::vm::PatternToken;
@@ -72,6 +73,48 @@ fn pos_of<'a>(base: &'a str, sub: &'a str) -> Option<usize> {
     Some((sub.as_ptr() as usize) - (base.as_ptr() as usize))
 }
 
+/// Collects value-ish PatternTokens into a PatternElement::Value with the
+/// given already-collected name.
+fn collect_value(
+    name: Option<usize>,
+    tokens: &[PatternToken],
+) -> PatternElement {
+    let value = match tokens {
+        &[PatternToken::String(index, skippable)] => {
+            vm::Value::String { index, skippable }
+        },
+        &[PatternToken::Regex(index, skippable)] => {
+            vm::Value::Regex { index, skippable }
+        },
+        &[PatternToken::Type(ty, skippable)] => {
+            vm::Value::Type { ty, skippable }
+        },
+        other => {
+            unreachable!("{other:?}")
+        },
+    };
+    PatternElement::Value {
+        name,
+        value: Some(value),
+    }
+}
+
+/// Collects a slice of PatternToken into a PatternElement::Value.
+fn collect_name_and_value(tokens: &[PatternToken]) -> PatternElement {
+    match tokens {
+        &[PatternToken::Identifier(name)] => {
+            PatternElement::Value {
+                name: Some(name),
+                value: None,
+            }
+        },
+        &[PatternToken::Identifier(name), ref value @ ..] => {
+            collect_value(Some(name), value)
+        },
+        value => collect_value(None, value),
+    }
+}
+
 /// Helper to collect "subtree" sections of the pattern.
 ///
 /// This is a RAII-like guard which handles cleaning up the parsed pattern when
@@ -95,7 +138,7 @@ impl_trait! {
         O: Serialize,
     {
         fn start(value: &'r mut Parser<'s, PKey, OKey, O>) -> Self {
-            value.tokens.push(Default::default());
+            value.consts.protos.push(Default::default());
             Self {
                 root: value,
             }
@@ -103,9 +146,9 @@ impl_trait! {
 
         fn commit(self) -> usize {
             let mut self_ = ManuallyDrop::new(self);
-            let proto = self_.root.tokens.pop().unwrap();
+            let proto = self_.root.consts.protos.pop().unwrap();
             let id = self_.root.closed_subtrees.next().unwrap();
-            self_.root.tokens.insert(id, proto);
+            self_.root.consts.protos.insert(id, proto);
             id
         }
 
@@ -126,7 +169,7 @@ impl_trait! {
         impl trait Drop {
             fn drop(&mut self) {
                 // remove "partial" proto
-                self.root.tokens.pop().expect("SubtreeHelper");
+                self.root.consts.protos.pop().expect("SubtreeHelper");
             }
         }
     }
@@ -156,7 +199,7 @@ impl_trait! {
         O: Serialize,
     {
         fn start(value: &'r mut Parser<'s, PKey, OKey, O>) -> Self {
-            let len = value.tokens.last().unwrap().len();
+            let len = value.tokens.len();
             Self {
                 root: value,
                 len : len,
@@ -164,7 +207,40 @@ impl_trait! {
         }
 
         fn commit(self) {
-            let _self = std::mem::ManuallyDrop::new(self);
+            let _self = &mut *std::mem::ManuallyDrop::new(self);
+            // we could write a proper parser for the token stream.
+            //
+            // we could also just do this instead.
+            match _self.root.tokens.drain(_self.len..).as_slice() {
+                &[
+                    PatternToken::Arrow,
+                    PatternToken::KeySubtree(index),
+                    ref name_value @ ..,
+                    PatternToken::End,
+                ] => {
+                    let tag = PatternElement::Tag {
+                        key_subtree: Some(index),
+                    };
+                    _self.root.consts.protos.last_mut().unwrap().push(tag);
+                    let value = collect_name_and_value(name_value);
+                    _self.root.consts.protos.last_mut().unwrap().push(value);
+                },
+                &[
+                    PatternToken::Arrow,
+                    ref name_value @ ..,
+                    PatternToken::End,
+                ] => {
+                    let tag = PatternElement::Tag {
+                        key_subtree: None,
+                    };
+                    _self.root.consts.protos.last_mut().unwrap().push(tag);
+                    let value = collect_name_and_value(name_value);
+                    _self.root.consts.protos.last_mut().unwrap().push(value);
+                },
+                other => {
+                    unreachable!("{other:?}");
+                },
+            };
         }
 
         impl trait std::ops::Deref {
@@ -183,11 +259,9 @@ impl_trait! {
 
         impl trait Drop {
             fn drop(&mut self) {
-                let proto = self.root.tokens.last_mut().unwrap();
+                let proto = &mut self.root.tokens;
                 assert!(proto.len() >= self.len);
-                while proto.len() > self.len {
-                    let _ = proto.pop();
-                }
+                proto.drain(self.len..);
             }
         }
     }
@@ -205,7 +279,7 @@ where
     pred_ids: BTreeMap<PKey, usize>,
     obj_ids: BTreeMap<OKey, usize>,
     consts: PatternConstants<O>,
-    tokens: Vec<Vec<PatternToken>>,
+    tokens: Vec<PatternToken>,
     closed_subtrees: std::ops::RangeFrom<usize>,
 }
 
@@ -281,7 +355,7 @@ where
                 self_.consts.strings.push(string);
                 self_.consts.strings.len() - 1
             });
-            let proto = self.tokens.last_mut().expect("protos");
+            let proto = &mut self.tokens;
             proto.push(PatternToken::String(id, skippable));
             *s = cursor;
             true
@@ -339,7 +413,7 @@ where
                 self_.consts.regices.push(re);
                 self_.consts.regices.len() - 1
             });
-            let proto = self.tokens.last_mut().expect("protos");
+            let proto = &mut self.tokens;
             proto.push(PatternToken::Regex(id, skippable));
             *s = cursor;
             true
@@ -357,7 +431,6 @@ where
                 _ if self.re_literal(&mut cursor)? => {},
                 _ if self.predicate(&mut cursor)? => {},
                 _ if self.ty(&mut cursor)? => {},
-                //_ if self.key_subtree(&mut cursor)? => {},
                 _ => bry!('matches false),
             }
             self.sp(&mut cursor);
@@ -374,7 +447,7 @@ where
             bry!('matches strip_prefix(&mut cursor, "->"));
             let mut self_ = TagHelper::start(&mut *self);
             {
-                let proto = self_.tokens.last_mut().expect("protos");
+                let proto = &mut self_.tokens;
                 proto.push(PatternToken::Arrow);
             }
             self_.sp(&mut cursor);
@@ -387,7 +460,7 @@ where
             }
             self_.sp(&mut cursor);
             {
-                let proto = self_.tokens.last_mut().expect("protos");
+                let proto = &mut self_.tokens;
                 proto.push(PatternToken::End);
             }
             self_.commit();
@@ -431,7 +504,7 @@ where
                 self.consts.strings.push(name.into());
                 self.consts.strings.len() - 1
             });
-            let proto = self.tokens.last_mut().expect("protos");
+            let proto = &mut self.tokens;
             proto.push(PatternToken::Identifier(id));
             self.sp(&mut cursor);
             *s = cursor;
@@ -465,7 +538,7 @@ where
                 },
                 Ok,
             )?;
-            let proto = self.tokens.last_mut().expect("protos");
+            let proto = &mut self.tokens;
             proto.push(PatternToken::Parameter(id, skippable));
             self.sp(&mut cursor);
             *s = cursor;
@@ -484,7 +557,7 @@ where
             let start = cursor;
             bry!('matches self.identifier(&mut cursor)?);
             let name = &start[..pos_of(start, cursor).unwrap_or(start.len())];
-            let proto = self.tokens.last_mut().expect("protos");
+            let proto = &mut self.tokens;
             proto.push(PatternToken::Type(match name {
                 "bool" => Type::Bool,
                 "i8" => Type::I8,
@@ -526,7 +599,7 @@ where
         Ok(lblock!('matches: {
             self.sp(&mut cursor);
             bry!('matches strip_prefix(&mut cursor, ":"));
-            let custom = strip_prefix(&mut cursor, "$");
+            bry!('matches strip_prefix(&mut cursor, "$"));
             let skippable = strip_prefix(&mut cursor, "?");
             let start = cursor;
             bry!('matches self.identifier(&mut cursor)?);
@@ -547,7 +620,7 @@ where
                 },
                 Ok,
             )?;
-            let proto = self.tokens.last_mut().expect("protos");
+            let proto = &mut self.tokens;
             proto.push(PatternToken::ApplyPredicate(id, skippable));
             self.sp(&mut cursor);
             *s = cursor;
@@ -557,7 +630,8 @@ where
         }))
     }
 
-    /// key_subtree <- sp '[' sp ( matcher / name sp matcher? ) sp subtree sp ( ']' / unexpected_token / unexpected_end ) sp ( '?'? -> MarkSkippable )
+    /// key_subtree <- sp '[' sp ( matcher / name sp matcher? ) sp subtree sp ( ']' / unexpected_token / unexpected_end ) sp
+    // ( '?'? -> MarkSkippable )
     fn key_subtree(&mut self, s: &mut &'s str) -> Result<bool, PatternError<'s>> {
         let mut cursor = *s;
         Ok(lblock!('matches: {
@@ -565,11 +639,17 @@ where
             bry!('matches strip_prefix(&mut cursor, "["));
             self.sp(&mut cursor);
             let mut subtree = SubtreeHelper::start(&mut *self);
+            // FIXME handle `?`
+            let marker = subtree.tokens.len();
             if !subtree.matcher(&mut cursor)? {
                 bry!('matches subtree.name(&mut cursor)?);
                 subtree.sp(&mut cursor);
                 let _ = subtree.matcher(&mut cursor)?;
             }
+            let value = match subtree.tokens.drain(marker..).as_slice() {
+                name_value => collect_name_and_value(name_value),
+            };
+            subtree.consts.protos.last_mut().unwrap().push(value);
             subtree.sp(&mut cursor);
             bry!('matches subtree.subtree(&mut cursor)?);
             subtree.sp(&mut cursor);
@@ -581,11 +661,11 @@ where
                 subtree.unexpected_end(&mut cursor)?
             );
             subtree.sp(&mut cursor);
-            let skippable = strip_prefix(&mut cursor, "?");
+            //let skippable = strip_prefix(&mut cursor, "?");
             *s = cursor;
             let id = subtree.commit();
-            let proto = self.tokens.last_mut().expect("protos");
-            proto.push(PatternToken::KeySubtree(id, skippable));
+            let proto = &mut self.tokens;
+            proto.push(PatternToken::KeySubtree(id));
             true
         }))
     }
@@ -611,7 +691,7 @@ where
             let skippable = strip_prefix(&mut cursor, "?");
             *s = cursor;
             let id = subtree.commit();
-            let proto = self.tokens.last_mut().expect("protos");
+            let proto = &mut self.tokens;
             proto.push(PatternToken::ValueSubtree(id, skippable));
             true
         }))
@@ -641,7 +721,7 @@ where
         }
         self.sp(&mut cursor);
         while self.value_subtree(&mut cursor)? {
-            let proto = self.tokens.last_mut().expect("protos");
+            let proto = &mut self.tokens;
             proto.push(PatternToken::End);
         }
         self.sp(&mut cursor);
@@ -655,7 +735,19 @@ where
         let mut cursor = *s;
         Ok(lblock!('matches: {
             let mut subtree = SubtreeHelper::start(&mut *self);
+            // FIXME handle `?`
+            let marker = subtree.tokens.len();
             let _ = subtree.matcher(&mut cursor)?;
+            let value = match subtree.tokens.drain(marker..).as_slice() {
+                &[] => {
+                    PatternElement::Value {
+                        name: None,
+                        value: None,
+                    }
+                },
+                value => collect_value(None, value),
+            };
+            subtree.consts.protos.last_mut().unwrap().push(value);
             bry!('matches
                 subtree.subtree(&mut cursor)?
                 ||
@@ -693,7 +785,11 @@ where
 
     assert!(matched);
     assert_eq!(parsed, "");
-    assert_eq!(parser.closed_subtrees.next().unwrap(), parser.tokens.len());
+    assert_eq!(
+        parser.closed_subtrees.next().unwrap(),
+        parser.consts.protos.len(),
+    );
+    assert!(parser.consts.protos.iter().all(|proto| !proto.is_empty()));
 
     Ok(parser.consts)
 }
@@ -709,7 +805,6 @@ mod tests {
         let mut parser = Parser::<
             's, &'static str, &'static str, ()
         >::new(s, None, None);
-        parser.tokens.push(Default::default());
         parser
     }
 
@@ -756,19 +851,16 @@ mod tests {
     }
 
     #[test]
-    fn test_pattern_tag() {
-        fn check_tag<'s>(s: &mut &'s str) -> (
-            Result<bool, PatternError<'s>>,
-            Parser::<
-                's, &'static str, &'static str, ()
-            >
-        ) {
-            let mut parser = prep_parser(s);
-            let result = parser.tag(s);
-            (result, parser)
+    fn test_no_crash_some_patterns() {
+        fn run_pattern(mut s: &str) {
+            let _ = prep_parser(s).pattern(&mut s);
         }
-
-        // TODO
+        run_pattern("hello");
+        run_pattern("/test/");
+        run_pattern("'this'");
+        run_pattern(":map");
+        run_pattern(":?map");
+        run_pattern(":map->[:str]:str");
     }
 }
 
diff --git a/src/pattern.rs b/src/pattern.rs
index 2f7166a..6286ff9 100644
--- a/src/pattern.rs
+++ b/src/pattern.rs
@@ -56,8 +56,8 @@ impl<O: Serialize> Pattern<O> {
         ).deserialize(der)?;
         // this should always be None
         debug_assert!(obj.is_none());
-        debug_assert!(packs.len() == 1);
-        let pack = packs.pop().unwrap();
+        debug_assert!(packs.len() <= 1);
+        let pack = packs.pop().unwrap_or_else(Default::default);
         let de = De::deserialize(vm::Unpacker::new(pack, MAX_CALLS));
         todo!()
     }
diff --git a/src/vm/de.rs b/src/vm/de.rs
index 985b1b2..e26ec5e 100644
--- a/src/vm/de.rs
+++ b/src/vm/de.rs
@@ -14,7 +14,6 @@ use serde::de::IntoDeserializer as _;
 
 use smallvec::SmallVec;
 
-use these::These;
 
 use super::Frame;
 use super::Interpreter;
@@ -275,31 +274,51 @@ where
     {
         if let Err(e) = self.step_in() { return Err(e); }
         let pat = self.interp.pat;
-        let target_type = self.frames().iter_active().fold(
+        let target_type = self.frames().iter_active().try_fold(
             Type::IgnoredAny,
             |target_type, frame| {
-                match (target_type, frame.get_type()) {
-                    (Type::IgnoredAny, Some((ty, _))) => ty,
-                    (ty, Some((Type::IgnoredAny, _))) => ty,
-                    (Type::String, Some((Type::Str, _))) => {
+                Ok(match (target_type, frame.get_type()) {
+                    // required type binds stronger than any/ignored_any
+                    (Type::IgnoredAny, Some((ty, true))) => ty,
+                    (Type::Any, Some((ty, true))) => ty,
+                    (ty, Some((Type::IgnoredAny, true))) => ty,
+                    (ty, Some((Type::Any, true))) => ty,
+                    // prefer owned if any branch prefers owned
+                    (Type::String, Some((Type::Str, true))) => {
                         Type::String
                     },
-                    (Type::Str, Some((Type::String, _))) => {
+                    (Type::Str, Some((Type::String, true))) => {
                         Type::String
                     },
-                    (Type::Bytes, Some((Type::ByteBuf, _))) => {
+                    (Type::Bytes, Some((Type::ByteBuf, true))) => {
                         Type::ByteBuf
                     },
-                    (Type::ByteBuf, Some((Type::Bytes, _))) => {
+                    (Type::ByteBuf, Some((Type::Bytes, true))) => {
                         Type::ByteBuf
                     },
+                    // types which are the same are okay
                     (left, Some((right, _))) if left == right => {
                         left
                     },
+                    // optional type vs Any/IgnoredAny prefers Any
+                    (Type::IgnoredAny, Some((_, false))) => Type::Any,
+                    (Type::Any, Some((_, false))) => Type::Any,
+                    // types which are not the same are an error because we
+                    // only request a specific type if it's actually required
+                    (left, Some((right, _))) => {
+                        return Err(todo!());
+                    },
                     _ => Type::Any,
-                }
+                })
             },
         );
+        let target_type = match target_type {
+            Ok(target_type) => target_type,
+            Err(e) => {
+                self.interp.error.insert(e);
+                return Err(D::Error::custom("type conflict"));
+            },
+        };
         match target_type {
             Type::Any => deserializer.deserialize_any(&mut *self),
             Type::IgnoredAny => {
@@ -840,7 +859,6 @@ mod tests {
     use crate::vm::PatternElement;
     use crate::vm::SerdeObject;
     use crate::vm::Frame;
-    use these::These;
     use serde_json::Deserializer as JsonDeserializer;
     use serde::de::DeserializeSeed as _;
 
@@ -886,7 +904,8 @@ mod tests {
         consts.strings.push("hello".into());
         consts.protos.push(vec![
             PatternElement::Value {
-                name_and_value: These::Both(0, Value::Type {
+                name: Some(0),
+                value: Some(Value::Type {
                     ty: Type::U64,
                     skippable: false,
                 }),
@@ -909,7 +928,8 @@ mod tests {
         consts.strings.push("hello".into());
         consts.protos.push(vec![
             PatternElement::Value {
-                name_and_value: These::Both(0, Value::Type {
+                name: Some(0),
+                value: Some(Value::Type {
                     ty: Type::U64,
                     skippable: false,
                 }),
@@ -933,7 +953,8 @@ mod tests {
         consts.strings.push("b".into());
         consts.protos.push(vec![
             PatternElement::Value {
-                name_and_value: These::Both(0, Value::Type {
+                name: Some(0),
+                value: Some(Value::Type {
                     ty: Type::U64,
                     skippable: true,
                 }),
@@ -941,7 +962,8 @@ mod tests {
         ]);
         consts.protos.push(vec![
             PatternElement::Value {
-                name_and_value: These::Both(1, Value::Type {
+                name: Some(1),
+                value: Some(Value::Type {
                     ty: Type::Bool,
                     skippable: true,
                 }),
@@ -986,12 +1008,14 @@ mod tests {
         consts.strings.push("value".into());
         consts.protos.push(vec![
             PatternElement::Value {
-                name_and_value: These::This(0),
+                name: Some(0),
+                value: None,
             },
         ]);
         consts.protos.push(vec![
             PatternElement::Value {
-                name_and_value: These::That(Value::Type {
+                name: None,
+                value: Some(Value::Type {
                     ty: Type::Map,
                     skippable: false,
                 }),
@@ -1000,7 +1024,8 @@ mod tests {
                 key_subtree: Some(0),
             },
             PatternElement::Value {
-                name_and_value: These::Both(1, Value::Type {
+                name: Some(1),
+                value: Some(Value::Type {
                     ty: Type::U64,
                     skippable: false,
                 }),
diff --git a/src/vm/mod.rs b/src/vm/mod.rs
index 8f20aae..06f12e5 100644
--- a/src/vm/mod.rs
+++ b/src/vm/mod.rs
@@ -14,7 +14,6 @@ use std::marker::PhantomData;
 use indexmap::IndexMap;
 use regex::Regex;
 use serde::Serialize;
-use these::These;
 
 use crate::Predicate;
 //use crate::errors::MatchError;
@@ -82,9 +81,10 @@ impl<O: Serialize> std::fmt::Debug for PatternConstants<O> {
 pub(crate) enum PatternElement {
     /// A value is the core capturing element.
     Value {
-        /// The index of the (string) name to apply to this value and/or the
-        /// expected value of this entry.
-        name_and_value: These<usize, Value>,
+        /// The index of the (string) name to apply to this value.
+        name: Option<usize>,
+        /// The expected value of this entry.
+        value: Option<Value>,
     },
     /// A tag is the core iterative element. It is always followed by a value.
     Tag {
@@ -173,7 +173,7 @@ pub(crate) enum PatternToken {
     String(usize, bool),
     Regex(usize, bool),
     Parameter(usize, bool),
-    KeySubtree(usize, bool),
+    KeySubtree(usize),
     ValueSubtree(usize, bool),
 
     /// Represents a predicate which must be applied.
@@ -424,19 +424,16 @@ impl<'pat> Frame<'pat> {
         &self,
     ) -> Option<(Type, bool)> {
         match self.op() {
-            | PatternElement::Value { name_and_value, .. }
-            if name_and_value.is_there()
-            => {
-                match name_and_value.there() {
-                    | Some(Value::String { skippable, .. })
-                    | Some(Value::Regex { skippable, .. })
+            PatternElement::Value { value: Some(value), .. } => {
+                match value {
+                    | Value::String { skippable, .. }
+                    | Value::Regex { skippable, .. }
                     => {
                         Some((Type::Str, !skippable))
                     },
-                    Some(Value::Type { ty, skippable }) => {
+                    Value::Type { ty, skippable } => {
                         Some((ty, !skippable))
                     },
-                    None => todo!(),
                 }
             },
             PatternElement::Tag { .. } => panic!("attempt to get type of tag"),
@@ -451,10 +448,8 @@ impl<'pat> Frame<'pat> {
     ) -> Option<&'pat str> {
         let strings = &pat.strings;
         match self.op() {
-            | PatternElement::Value { name_and_value, .. }
-            if name_and_value.is_here()
-            => {
-                Some(&*strings[name_and_value.here().unwrap()])
+            PatternElement::Value { name: Some(name), .. } => {
+                Some(&*strings[name])
             },
             PatternElement::Tag { .. } => panic!("attempt to get name of tag"),
             _ => None,
diff --git a/tests/basic_match.rs b/tests/basic_match.rs
index b8d0dd8..a1c62bb 100644
--- a/tests/basic_match.rs
+++ b/tests/basic_match.rs
@@ -9,7 +9,7 @@ use serde::Deserialize;
 #[test]
 fn test_basic() {
     let mut der = JsonDer::from_str(r#"{"foo": 1, "bar": {"baz": 2}}"#);
-    let pat = datafu::PatternBuilder::for_pattern("->[x]:map->[yk]y").compile().unwrap();
+    let pat = datafu::PatternBuilder::for_pattern("->[x]:?map->[yk]y").compile().unwrap();
     #[derive(Deserialize)]
     struct Values {
         x: String,