summary refs log tree commit diff stats
path: root/src/parser.rs
diff options
context:
space:
mode:
authorSoniEx2 <endermoneymod@gmail.com>2022-07-30 11:22:23 -0300
committerSoniEx2 <endermoneymod@gmail.com>2022-07-30 11:22:23 -0300
commita8778ff35bde88bb63d9fec769edf66e68d7969e (patch)
treee9a2b7d347420e3a88f831b6f20513fcb211df4a /src/parser.rs
parentc2279c63912a47bf3078f5df3b3156ba0d9afe9f (diff)
Initial work on 0.1.0
Missing the VM.
Diffstat (limited to 'src/parser.rs')
-rw-r--r--src/parser.rs182
1 files changed, 90 insertions, 92 deletions
diff --git a/src/parser.rs b/src/parser.rs
index ff3407a..c929653 100644
--- a/src/parser.rs
+++ b/src/parser.rs
@@ -1,20 +1,7 @@
-/*
- * This file is part of Datafu
- * Copyright (C) 2021  Soni L.
- *
- * This program is free software: you can redistribute it and/or modify
- * it under the terms of the GNU Affero General Public License as published by
- * the Free Software Foundation, either version 3 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU Affero General Public License for more details.
- *
- * You should have received a copy of the GNU Affero General Public License
- * along with this program.  If not, see <https://www.gnu.org/licenses/>.
- */
+// Copyright (C) 2021-2022 Soni L.
+// SPDX-License-Identifier: MIT OR Apache-2.0
+
+//! The recursive-descent datafu language parser.
 
 use std::borrow::Borrow;
 use std::collections::BTreeMap;
@@ -22,14 +9,13 @@ use std::mem::ManuallyDrop;
 
 use impl_trait::impl_trait;
 use regex::Regex;
+use serde::Serialize;
 
-use crate::PatternTypes;
 use crate::Predicate;
 use crate::errors::PatternError;
 use crate::vm::PatternConstants;
 use crate::vm::PatternElement;
 
-
 /// try! with bools. (the b comes from bool.)
 macro_rules! bry {
     ($l:lifetime $e:expr) => {
@@ -47,6 +33,7 @@ macro_rules! bry {
 // the following macros rely on unlabeled-break-through-labeled-block being an
 // error.
 // NOTE: always test changes to this module on nightly!
+// still waiting for label-break-value stabilization...
 
 #[cfg(not(feature = "stable"))]
 /// labeled block. on nightly: better compile errors. but also works on stable.
@@ -67,23 +54,45 @@ macro_rules! lblock {
     }
 }
 
+/// Attempts to shift `s` forward by removing `prefix`.
+///
+/// Returns whether `s` has had `prefix` removed.
 // can't use Pattern here :(
 fn strip_prefix(s: &mut &str, prefix: &str) -> bool {
     s.strip_prefix(prefix).map(|ns| *s = ns).is_some()
 }
 
+/// Returns the position (index) of `sub` within `base`, in bytes.
+///
+/// Returns bogus results if `base` and `sub` are unrelated.
 fn pos_of<'a>(base: &'a str, sub: &'a str) -> Option<usize> {
-    // FIXME
+    // FIXME is there any non-UB way to check if `sub` is in `base`?
     Some((sub.as_ptr() as usize) - (base.as_ptr() as usize))
 }
 
-struct SubtreeHelper<'r, 's, P: Borrow<str> + Ord, O: Borrow<str> + Ord, T: PatternTypes> where Self: 'r {
-    root: &'r mut Parser<'s, P, O, T>,
+/// Helper to collect "subtree" sections of the pattern.
+///
+/// This is a RAII-like guard which handles cleaning up the parsed pattern when
+/// dropped.
+struct SubtreeHelper<'r, 's, PKey, OKey, O>
+where
+    Self: 'r,
+    PKey: Borrow<str> + Ord,
+    OKey: Borrow<str> + Ord,
+    O: Serialize,
+{
+    root: &'r mut Parser<'s, PKey, OKey, O>,
 }
 
 impl_trait! {
-    impl<'r, 's, P: Borrow<str> + Ord, O: Borrow<str> + Ord, T: PatternTypes> SubtreeHelper<'r, 's, P, O, T> where Self: 'r {
-        fn start(value: &'r mut Parser<'s, P, O, T>) -> Self {
+    impl<'r, 's, PKey, OKey, O> SubtreeHelper<'r, 's, PKey, OKey, O>
+    where
+        Self: 'r,
+        PKey: Borrow<str> + Ord,
+        OKey: Borrow<str> + Ord,
+        O: Serialize,
+    {
+        fn start(value: &'r mut Parser<'s, PKey, OKey, O>) -> Self {
             value.consts.protos.push(Default::default());
             Self {
                 root: value,
@@ -99,7 +108,7 @@ impl_trait! {
         }
 
         impl trait std::ops::Deref {
-            type Target = Parser<'s, P, O, T>;
+            type Target = Parser<'s, PKey, OKey, O>;
 
             fn deref(&self) -> &Self::Target {
                 &*self.root
@@ -121,14 +130,30 @@ impl_trait! {
     }
 }
 
-struct TagHelper<'r, 's, P: Borrow<str> + Ord, O: Borrow<str> + Ord, T: PatternTypes> where Self: 'r {
-    root: &'r mut Parser<'s, P, O, T>,
+/// Helper to collect "tag" sections of the pattern.
+///
+/// This is a RAII-like guard which handles cleaning up the parsed pattern when
+/// dropped.
+struct TagHelper<'r, 's, PKey, OKey, O>
+where
+    Self: 'r,
+    PKey: Borrow<str> + Ord,
+    OKey: Borrow<str> + Ord,
+    O: Serialize,
+{
+    root: &'r mut Parser<'s, PKey, OKey, O>,
     len: usize,
 }
 
 impl_trait! {
-    impl<'r, 's, P: Borrow<str> + Ord, O: Borrow<str> + Ord, T: PatternTypes> TagHelper<'r, 's, P, O, T> where Self: 'r {
-        fn start(value: &'r mut Parser<'s, P, O, T>) -> Self {
+    impl<'r, 's, PKey, OKey, O> TagHelper<'r, 's, PKey, OKey, O>
+    where
+        Self: 'r,
+        PKey: Borrow<str> + Ord,
+        OKey: Borrow<str> + Ord,
+        O: Serialize,
+    {
+        fn start(value: &'r mut Parser<'s, PKey, OKey, O>) -> Self {
             let len = value.consts.protos.last().unwrap().len();
             Self {
                 root: value,
@@ -141,7 +166,7 @@ impl_trait! {
         }
 
         impl trait std::ops::Deref {
-            type Target = Parser<'s, P, O, T>;
+            type Target = Parser<'s, PKey, OKey, O>;
 
             fn deref(&self) -> &Self::Target {
                 &*self.root
@@ -166,20 +191,30 @@ impl_trait! {
     }
 }
 
-struct Parser<'s, P: Borrow<str> + Ord, O: Borrow<str> + Ord, T: PatternTypes> {
+struct Parser<'s, PKey, OKey, O>
+where
+    PKey: Borrow<str> + Ord,
+    OKey: Borrow<str> + Ord,
+    O: Serialize,
+{
     base: &'s str,
-    preds: Option<BTreeMap<P, Box<Predicate<T>>>>,
-    objs: Option<BTreeMap<O, T::Own>>,
-    pred_ids: BTreeMap<P, usize>,
-    obj_ids: BTreeMap<O, usize>,
-    consts: PatternConstants<T>,
+    preds: Option<BTreeMap<PKey, Box<Predicate>>>,
+    objs: Option<BTreeMap<OKey, O>>,
+    pred_ids: BTreeMap<PKey, usize>,
+    obj_ids: BTreeMap<OKey, usize>,
+    consts: PatternConstants<O>,
     closed_subtrees: std::ops::RangeFrom<usize>,
 }
 
 // These are documented using LPeg.re syntax
 // http://www.inf.puc-rio.br/~roberto/lpeg/re.html
 #[rustfmt::skip]
-impl<'s, P: Borrow<str> + Ord, O: Borrow<str> + Ord, T: PatternTypes> Parser<'s, P, O, T> {
+impl<'s, PKey, OKey, O> Parser<'s, PKey, OKey, O>
+where
+    PKey: Borrow<str> + Ord,
+    OKey: Borrow<str> + Ord,
+    O: Serialize,
+{
     /// str_literal <- sp ( ( "'" str_char*  ( "'" / ( !. -> ErrorStrEnd ) ) ( '?' -> MarkSkippable ) ) -> String ) sp
     /// str_char <- ( str_escape / [^%'] )
     /// str_escape <- '%' ( '%' / "'" ) / ( ( '%' .? ) -> ErrorStrEscape )
@@ -451,7 +486,7 @@ impl<'s, P: Borrow<str> + Ord, O: Borrow<str> + Ord, T: PatternTypes> Parser<'s,
         }))
     }
 
-    /// key_subtree <- sp '[' sp predicate* sp subtree sp ( ']' / unexpected_token / unexpected_end ) sp ( '?'? -> MarkSkippable )
+    /// key_subtree <- sp '[' sp name? sp predicate* sp subtree sp ( ']' / unexpected_token / unexpected_end ) sp ( '?'? -> MarkSkippable )
     fn key_subtree(&mut self, s: &mut &'s str) -> Result<bool, PatternError<'s>> {
         let mut cursor = *s;
         Ok(lblock!('matches: {
@@ -459,6 +494,8 @@ impl<'s, P: Borrow<str> + Ord, O: Borrow<str> + Ord, T: PatternTypes> Parser<'s,
             bry!('matches strip_prefix(&mut cursor, "["));
             self.sp(&mut cursor);
             let mut subtree = SubtreeHelper::start(&mut *self);
+            subtree.name(&mut cursor)?;
+            subtree.sp(&mut cursor);
             while subtree.predicate(&mut cursor)? {
             }
             subtree.sp(&mut cursor);
@@ -566,17 +603,20 @@ impl<'s, P: Borrow<str> + Ord, O: Borrow<str> + Ord, T: PatternTypes> Parser<'s,
     }
 }
 
-pub(crate) fn parse<'s, P, O, T>(
+/// Parses a DFU expression.
+///
+/// The given 
+pub(crate) fn parse<'s, PKey, OKey, O>(
     input: &'s str,
-    preds: Option<BTreeMap<P, Box<Predicate<T>>>>,
-    objs: Option<BTreeMap<O, T::Own>>
-) -> Result<PatternConstants<T>, PatternError<'s>>
-    where
-        P: Borrow<str> + Ord,
-        O: Borrow<str> + Ord,
-        T: PatternTypes,
+    preds: Option<BTreeMap<PKey, Box<Predicate>>>,
+    objs: Option<BTreeMap<OKey, O>>
+) -> Result<PatternConstants<O>, PatternError<'s>>
+where
+    PKey: Borrow<str> + Ord,
+    OKey: Borrow<str> + Ord,
+    O: Serialize,
 {
-    let mut parser = Parser::<'s, P, O, T> {
+    let mut parser = Parser::<'s, PKey, OKey, O> {
         base: input,
         preds: preds,
         objs: objs,
@@ -598,57 +638,15 @@ pub(crate) fn parse<'s, P, O, T>(
 
 #[cfg(test)]
 mod tests {
-    use crate::PatternTypes;
-    use crate::RefOwn;
-    use crate::KVPair;
     use crate::errors::PatternError;
     use super::Parser;
 
     use proptest::prelude::*;
 
-    struct Dummy;
-    impl PatternTypes for Dummy {
-        type Ref = ();
-        type Own = ();
-        fn pairs<'b>(
-            item: RefOwn<'b, Self::Ref, Self::Own>
-        ) -> Option<Box<dyn Iterator<Item=KVPair<'b, Self>> + 'b>> {
-            let _ = item;
-            None
-        }
-
-        fn get<'a, 'b>(
-            item: RefOwn<'b, Self::Ref, Self::Own>,
-            key: RefOwn<'a, Self::Ref, Self::Own>
-        ) -> Option<Option<KVPair<'b, Self>>> {
-            let _ = item;
-            let _ = key;
-            None
-        }
-
-        fn matches(
-            left: RefOwn<'_, Self::Ref, Self::Own>,
-            right: RefOwn<'_, Self::Ref, Self::Own>
-        ) -> bool {
-            let _ = left;
-            let _ = right;
-            false
-        }
-
-        fn as_str<'b>(
-            item: RefOwn<'b, Self::Ref, Self::Own>
-        ) -> Option<&'b str> {
-            match item {
-                RefOwn::Str(key) => Some(key),
-                _ => None,
-            }
-        }
-    }
-
     #[test]
     fn test_identifier() {
         fn identifier_input<'s>(s: &mut &'s str) -> Result<bool, PatternError<'s>> {
-            let mut parser = Parser::<'s, &'static str, &'static str, Dummy> {
+            let mut parser = Parser::<'s, &'static str, &'static str, ()> {
                 base: *s,
                 preds: None,
                 objs: None,
@@ -674,8 +672,8 @@ mod tests {
     proptest! {
         #[test]
         fn test_no_crash(s in ".{0,4096}") {
-            fn prep_parser<'s>(s: &'s str) -> Parser<'s, &'static str, &'static str, Dummy> {
-                let mut parser = Parser::<'s, &'static str, &'static str, Dummy> {
+            fn prep_parser<'s>(s: &'s str) -> Parser<'s, &'static str, &'static str, ()> {
+                let mut parser = Parser::<'s, &'static str, &'static str, ()> {
                     base: s,
                     preds: None,
                     objs: None,