summary refs log tree commit diff stats
path: root/src/parser.rs
diff options
context:
space:
mode:
authorSoniEx2 <endermoneymod@gmail.com>2022-10-30 00:49:56 -0300
committerSoniEx2 <endermoneymod@gmail.com>2022-10-30 00:49:56 -0300
commita66111d9f9c99f91d9256209b5e9a65e42cde7f5 (patch)
treeaba85fd481ade96ba1be135c459af9d20eb409d7 /src/parser.rs
parentc1210b511af8ffada948550180360859b64009d2 (diff)
Implement parser
Diffstat (limited to 'src/parser.rs')
-rw-r--r--src/parser.rs174
1 files changed, 133 insertions, 41 deletions
diff --git a/src/parser.rs b/src/parser.rs
index 97185ac..595f157 100644
--- a/src/parser.rs
+++ b/src/parser.rs
@@ -13,6 +13,7 @@ use serde::Serialize;
 
 use crate::Predicate;
 use crate::errors::PatternError;
+use crate::vm;
 use crate::vm::PatternConstants;
 use crate::vm::PatternElement;
 use crate::vm::PatternToken;
@@ -72,6 +73,48 @@ fn pos_of<'a>(base: &'a str, sub: &'a str) -> Option<usize> {
     Some((sub.as_ptr() as usize) - (base.as_ptr() as usize))
 }
 
+/// Collects value-ish PatternTokens into a PatternElement::Value with the
+/// given already-collected name.
+fn collect_value(
+    name: Option<usize>,
+    tokens: &[PatternToken],
+) -> PatternElement {
+    let value = match tokens {
+        &[PatternToken::String(index, skippable)] => {
+            vm::Value::String { index, skippable }
+        },
+        &[PatternToken::Regex(index, skippable)] => {
+            vm::Value::Regex { index, skippable }
+        },
+        &[PatternToken::Type(ty, skippable)] => {
+            vm::Value::Type { ty, skippable }
+        },
+        other => {
+            unreachable!("{other:?}")
+        },
+    };
+    PatternElement::Value {
+        name,
+        value: Some(value),
+    }
+}
+
+/// Collects a slice of PatternToken into a PatternElement::Value.
+fn collect_name_and_value(tokens: &[PatternToken]) -> PatternElement {
+    match tokens {
+        &[PatternToken::Identifier(name)] => {
+            PatternElement::Value {
+                name: Some(name),
+                value: None,
+            }
+        },
+        &[PatternToken::Identifier(name), ref value @ ..] => {
+            collect_value(Some(name), value)
+        },
+        value => collect_value(None, value),
+    }
+}
+
 /// Helper to collect "subtree" sections of the pattern.
 ///
 /// This is a RAII-like guard which handles cleaning up the parsed pattern when
@@ -95,7 +138,7 @@ impl_trait! {
         O: Serialize,
     {
         fn start(value: &'r mut Parser<'s, PKey, OKey, O>) -> Self {
-            value.tokens.push(Default::default());
+            value.consts.protos.push(Default::default());
             Self {
                 root: value,
             }
@@ -103,9 +146,9 @@ impl_trait! {
 
         fn commit(self) -> usize {
             let mut self_ = ManuallyDrop::new(self);
-            let proto = self_.root.tokens.pop().unwrap();
+            let proto = self_.root.consts.protos.pop().unwrap();
             let id = self_.root.closed_subtrees.next().unwrap();
-            self_.root.tokens.insert(id, proto);
+            self_.root.consts.protos.insert(id, proto);
             id
         }
 
@@ -126,7 +169,7 @@ impl_trait! {
         impl trait Drop {
             fn drop(&mut self) {
                 // remove "partial" proto
-                self.root.tokens.pop().expect("SubtreeHelper");
+                self.root.consts.protos.pop().expect("SubtreeHelper");
             }
         }
     }
@@ -156,7 +199,7 @@ impl_trait! {
         O: Serialize,
     {
         fn start(value: &'r mut Parser<'s, PKey, OKey, O>) -> Self {
-            let len = value.tokens.last().unwrap().len();
+            let len = value.tokens.len();
             Self {
                 root: value,
                 len : len,
@@ -164,7 +207,40 @@ impl_trait! {
         }
 
         fn commit(self) {
-            let _self = std::mem::ManuallyDrop::new(self);
+            let _self = &mut *std::mem::ManuallyDrop::new(self);
+            // we could write a proper parser for the token stream.
+            //
+            // we could also just do this instead.
+            match _self.root.tokens.drain(_self.len..).as_slice() {
+                &[
+                    PatternToken::Arrow,
+                    PatternToken::KeySubtree(index),
+                    ref name_value @ ..,
+                    PatternToken::End,
+                ] => {
+                    let tag = PatternElement::Tag {
+                        key_subtree: Some(index),
+                    };
+                    _self.root.consts.protos.last_mut().unwrap().push(tag);
+                    let value = collect_name_and_value(name_value);
+                    _self.root.consts.protos.last_mut().unwrap().push(value);
+                },
+                &[
+                    PatternToken::Arrow,
+                    ref name_value @ ..,
+                    PatternToken::End,
+                ] => {
+                    let tag = PatternElement::Tag {
+                        key_subtree: None,
+                    };
+                    _self.root.consts.protos.last_mut().unwrap().push(tag);
+                    let value = collect_name_and_value(name_value);
+                    _self.root.consts.protos.last_mut().unwrap().push(value);
+                },
+                other => {
+                    unreachable!("{other:?}");
+                },
+            };
         }
 
         impl trait std::ops::Deref {
@@ -183,11 +259,9 @@ impl_trait! {
 
         impl trait Drop {
             fn drop(&mut self) {
-                let proto = self.root.tokens.last_mut().unwrap();
+                let proto = &mut self.root.tokens;
                 assert!(proto.len() >= self.len);
-                while proto.len() > self.len {
-                    let _ = proto.pop();
-                }
+                proto.drain(self.len..);
             }
         }
     }
@@ -205,7 +279,7 @@ where
     pred_ids: BTreeMap<PKey, usize>,
     obj_ids: BTreeMap<OKey, usize>,
     consts: PatternConstants<O>,
-    tokens: Vec<Vec<PatternToken>>,
+    tokens: Vec<PatternToken>,
     closed_subtrees: std::ops::RangeFrom<usize>,
 }
 
@@ -281,7 +355,7 @@ where
                 self_.consts.strings.push(string);
                 self_.consts.strings.len() - 1
             });
-            let proto = self.tokens.last_mut().expect("protos");
+            let proto = &mut self.tokens;
             proto.push(PatternToken::String(id, skippable));
             *s = cursor;
             true
@@ -339,7 +413,7 @@ where
                 self_.consts.regices.push(re);
                 self_.consts.regices.len() - 1
             });
-            let proto = self.tokens.last_mut().expect("protos");
+            let proto = &mut self.tokens;
             proto.push(PatternToken::Regex(id, skippable));
             *s = cursor;
             true
@@ -357,7 +431,6 @@ where
                 _ if self.re_literal(&mut cursor)? => {},
                 _ if self.predicate(&mut cursor)? => {},
                 _ if self.ty(&mut cursor)? => {},
-                //_ if self.key_subtree(&mut cursor)? => {},
                 _ => bry!('matches false),
             }
             self.sp(&mut cursor);
@@ -374,7 +447,7 @@ where
             bry!('matches strip_prefix(&mut cursor, "->"));
             let mut self_ = TagHelper::start(&mut *self);
             {
-                let proto = self_.tokens.last_mut().expect("protos");
+                let proto = &mut self_.tokens;
                 proto.push(PatternToken::Arrow);
             }
             self_.sp(&mut cursor);
@@ -387,7 +460,7 @@ where
             }
             self_.sp(&mut cursor);
             {
-                let proto = self_.tokens.last_mut().expect("protos");
+                let proto = &mut self_.tokens;
                 proto.push(PatternToken::End);
             }
             self_.commit();
@@ -431,7 +504,7 @@ where
                 self.consts.strings.push(name.into());
                 self.consts.strings.len() - 1
             });
-            let proto = self.tokens.last_mut().expect("protos");
+            let proto = &mut self.tokens;
             proto.push(PatternToken::Identifier(id));
             self.sp(&mut cursor);
             *s = cursor;
@@ -465,7 +538,7 @@ where
                 },
                 Ok,
             )?;
-            let proto = self.tokens.last_mut().expect("protos");
+            let proto = &mut self.tokens;
             proto.push(PatternToken::Parameter(id, skippable));
             self.sp(&mut cursor);
             *s = cursor;
@@ -484,7 +557,7 @@ where
             let start = cursor;
             bry!('matches self.identifier(&mut cursor)?);
             let name = &start[..pos_of(start, cursor).unwrap_or(start.len())];
-            let proto = self.tokens.last_mut().expect("protos");
+            let proto = &mut self.tokens;
             proto.push(PatternToken::Type(match name {
                 "bool" => Type::Bool,
                 "i8" => Type::I8,
@@ -526,7 +599,7 @@ where
         Ok(lblock!('matches: {
             self.sp(&mut cursor);
             bry!('matches strip_prefix(&mut cursor, ":"));
-            let custom = strip_prefix(&mut cursor, "$");
+            bry!('matches strip_prefix(&mut cursor, "$"));
             let skippable = strip_prefix(&mut cursor, "?");
             let start = cursor;
             bry!('matches self.identifier(&mut cursor)?);
@@ -547,7 +620,7 @@ where
                 },
                 Ok,
             )?;
-            let proto = self.tokens.last_mut().expect("protos");
+            let proto = &mut self.tokens;
             proto.push(PatternToken::ApplyPredicate(id, skippable));
             self.sp(&mut cursor);
             *s = cursor;
@@ -557,7 +630,8 @@ where
         }))
     }
 
-    /// key_subtree <- sp '[' sp ( matcher / name sp matcher? ) sp subtree sp ( ']' / unexpected_token / unexpected_end ) sp ( '?'? -> MarkSkippable )
+    /// key_subtree <- sp '[' sp ( matcher / name sp matcher? ) sp subtree sp ( ']' / unexpected_token / unexpected_end ) sp
+    // ( '?'? -> MarkSkippable )
     fn key_subtree(&mut self, s: &mut &'s str) -> Result<bool, PatternError<'s>> {
         let mut cursor = *s;
         Ok(lblock!('matches: {
@@ -565,11 +639,17 @@ where
             bry!('matches strip_prefix(&mut cursor, "["));
             self.sp(&mut cursor);
             let mut subtree = SubtreeHelper::start(&mut *self);
+            // FIXME handle `?`
+            let marker = subtree.tokens.len();
             if !subtree.matcher(&mut cursor)? {
                 bry!('matches subtree.name(&mut cursor)?);
                 subtree.sp(&mut cursor);
                 let _ = subtree.matcher(&mut cursor)?;
             }
+            let value = match subtree.tokens.drain(marker..).as_slice() {
+                name_value => collect_name_and_value(name_value),
+            };
+            subtree.consts.protos.last_mut().unwrap().push(value);
             subtree.sp(&mut cursor);
             bry!('matches subtree.subtree(&mut cursor)?);
             subtree.sp(&mut cursor);
@@ -581,11 +661,11 @@ where
                 subtree.unexpected_end(&mut cursor)?
             );
             subtree.sp(&mut cursor);
-            let skippable = strip_prefix(&mut cursor, "?");
+            //let skippable = strip_prefix(&mut cursor, "?");
             *s = cursor;
             let id = subtree.commit();
-            let proto = self.tokens.last_mut().expect("protos");
-            proto.push(PatternToken::KeySubtree(id, skippable));
+            let proto = &mut self.tokens;
+            proto.push(PatternToken::KeySubtree(id));
             true
         }))
     }
@@ -611,7 +691,7 @@ where
             let skippable = strip_prefix(&mut cursor, "?");
             *s = cursor;
             let id = subtree.commit();
-            let proto = self.tokens.last_mut().expect("protos");
+            let proto = &mut self.tokens;
             proto.push(PatternToken::ValueSubtree(id, skippable));
             true
         }))
@@ -641,7 +721,7 @@ where
         }
         self.sp(&mut cursor);
         while self.value_subtree(&mut cursor)? {
-            let proto = self.tokens.last_mut().expect("protos");
+            let proto = &mut self.tokens;
             proto.push(PatternToken::End);
         }
         self.sp(&mut cursor);
@@ -655,7 +735,19 @@ where
         let mut cursor = *s;
         Ok(lblock!('matches: {
             let mut subtree = SubtreeHelper::start(&mut *self);
+            // FIXME handle `?`
+            let marker = subtree.tokens.len();
             let _ = subtree.matcher(&mut cursor)?;
+            let value = match subtree.tokens.drain(marker..).as_slice() {
+                &[] => {
+                    PatternElement::Value {
+                        name: None,
+                        value: None,
+                    }
+                },
+                value => collect_value(None, value),
+            };
+            subtree.consts.protos.last_mut().unwrap().push(value);
             bry!('matches
                 subtree.subtree(&mut cursor)?
                 ||
@@ -693,7 +785,11 @@ where
 
     assert!(matched);
     assert_eq!(parsed, "");
-    assert_eq!(parser.closed_subtrees.next().unwrap(), parser.tokens.len());
+    assert_eq!(
+        parser.closed_subtrees.next().unwrap(),
+        parser.consts.protos.len(),
+    );
+    assert!(parser.consts.protos.iter().all(|proto| !proto.is_empty()));
 
     Ok(parser.consts)
 }
@@ -709,7 +805,6 @@ mod tests {
         let mut parser = Parser::<
             's, &'static str, &'static str, ()
         >::new(s, None, None);
-        parser.tokens.push(Default::default());
         parser
     }
 
@@ -756,19 +851,16 @@ mod tests {
     }
 
     #[test]
-    fn test_pattern_tag() {
-        fn check_tag<'s>(s: &mut &'s str) -> (
-            Result<bool, PatternError<'s>>,
-            Parser::<
-                's, &'static str, &'static str, ()
-            >
-        ) {
-            let mut parser = prep_parser(s);
-            let result = parser.tag(s);
-            (result, parser)
+    fn test_no_crash_some_patterns() {
+        fn run_pattern(mut s: &str) {
+            let _ = prep_parser(s).pattern(&mut s);
         }
-
-        // TODO
+        run_pattern("hello");
+        run_pattern("/test/");
+        run_pattern("'this'");
+        run_pattern(":map");
+        run_pattern(":?map");
+        run_pattern(":map->[:str]:str");
     }
 }