diff options
author | SoniEx2 <endermoneymod@gmail.com> | 2022-07-30 11:22:23 -0300 |
---|---|---|
committer | SoniEx2 <endermoneymod@gmail.com> | 2022-07-30 11:22:23 -0300 |
commit | a8778ff35bde88bb63d9fec769edf66e68d7969e (patch) | |
tree | e9a2b7d347420e3a88f831b6f20513fcb211df4a /src/parser.rs | |
parent | c2279c63912a47bf3078f5df3b3156ba0d9afe9f (diff) |
Initial work on 0.1.0
Missing the VM.
Diffstat (limited to 'src/parser.rs')
-rw-r--r-- | src/parser.rs | 182 |
1 files changed, 90 insertions, 92 deletions
diff --git a/src/parser.rs b/src/parser.rs index ff3407a..c929653 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -1,20 +1,7 @@ -/* - * This file is part of Datafu - * Copyright (C) 2021 Soni L. - * - * This program is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Affero General Public License for more details. - * - * You should have received a copy of the GNU Affero General Public License - * along with this program. If not, see <https://www.gnu.org/licenses/>. - */ +// Copyright (C) 2021-2022 Soni L. +// SPDX-License-Identifier: MIT OR Apache-2.0 + +//! The recursive-descent datafu language parser. use std::borrow::Borrow; use std::collections::BTreeMap; @@ -22,14 +9,13 @@ use std::mem::ManuallyDrop; use impl_trait::impl_trait; use regex::Regex; +use serde::Serialize; -use crate::PatternTypes; use crate::Predicate; use crate::errors::PatternError; use crate::vm::PatternConstants; use crate::vm::PatternElement; - /// try! with bools. (the b comes from bool.) macro_rules! bry { ($l:lifetime $e:expr) => { @@ -47,6 +33,7 @@ macro_rules! bry { // the following macros rely on unlabeled-break-through-labeled-block being an // error. // NOTE: always test changes to this module on nightly! +// still waiting for label-break-value stabilization... #[cfg(not(feature = "stable"))] /// labeled block. on nightly: better compile errors. but also works on stable. @@ -67,23 +54,45 @@ macro_rules! lblock { } } +/// Attempts to shift `s` forward by removing `prefix`. +/// +/// Returns whether `s` has had `prefix` removed. // can't use Pattern here :( fn strip_prefix(s: &mut &str, prefix: &str) -> bool { s.strip_prefix(prefix).map(|ns| *s = ns).is_some() } +/// Returns the position (index) of `sub` within `base`, in bytes. +/// +/// Returns bogus results if `base` and `sub` are unrelated. fn pos_of<'a>(base: &'a str, sub: &'a str) -> Option<usize> { - // FIXME + // FIXME is there any non-UB way to check if `sub` is in `base`? Some((sub.as_ptr() as usize) - (base.as_ptr() as usize)) } -struct SubtreeHelper<'r, 's, P: Borrow<str> + Ord, O: Borrow<str> + Ord, T: PatternTypes> where Self: 'r { - root: &'r mut Parser<'s, P, O, T>, +/// Helper to collect "subtree" sections of the pattern. +/// +/// This is a RAII-like guard which handles cleaning up the parsed pattern when +/// dropped. +struct SubtreeHelper<'r, 's, PKey, OKey, O> +where + Self: 'r, + PKey: Borrow<str> + Ord, + OKey: Borrow<str> + Ord, + O: Serialize, +{ + root: &'r mut Parser<'s, PKey, OKey, O>, } impl_trait! { - impl<'r, 's, P: Borrow<str> + Ord, O: Borrow<str> + Ord, T: PatternTypes> SubtreeHelper<'r, 's, P, O, T> where Self: 'r { - fn start(value: &'r mut Parser<'s, P, O, T>) -> Self { + impl<'r, 's, PKey, OKey, O> SubtreeHelper<'r, 's, PKey, OKey, O> + where + Self: 'r, + PKey: Borrow<str> + Ord, + OKey: Borrow<str> + Ord, + O: Serialize, + { + fn start(value: &'r mut Parser<'s, PKey, OKey, O>) -> Self { value.consts.protos.push(Default::default()); Self { root: value, @@ -99,7 +108,7 @@ impl_trait! { } impl trait std::ops::Deref { - type Target = Parser<'s, P, O, T>; + type Target = Parser<'s, PKey, OKey, O>; fn deref(&self) -> &Self::Target { &*self.root @@ -121,14 +130,30 @@ impl_trait! { } } -struct TagHelper<'r, 's, P: Borrow<str> + Ord, O: Borrow<str> + Ord, T: PatternTypes> where Self: 'r { - root: &'r mut Parser<'s, P, O, T>, +/// Helper to collect "tag" sections of the pattern. +/// +/// This is a RAII-like guard which handles cleaning up the parsed pattern when +/// dropped. +struct TagHelper<'r, 's, PKey, OKey, O> +where + Self: 'r, + PKey: Borrow<str> + Ord, + OKey: Borrow<str> + Ord, + O: Serialize, +{ + root: &'r mut Parser<'s, PKey, OKey, O>, len: usize, } impl_trait! { - impl<'r, 's, P: Borrow<str> + Ord, O: Borrow<str> + Ord, T: PatternTypes> TagHelper<'r, 's, P, O, T> where Self: 'r { - fn start(value: &'r mut Parser<'s, P, O, T>) -> Self { + impl<'r, 's, PKey, OKey, O> TagHelper<'r, 's, PKey, OKey, O> + where + Self: 'r, + PKey: Borrow<str> + Ord, + OKey: Borrow<str> + Ord, + O: Serialize, + { + fn start(value: &'r mut Parser<'s, PKey, OKey, O>) -> Self { let len = value.consts.protos.last().unwrap().len(); Self { root: value, @@ -141,7 +166,7 @@ impl_trait! { } impl trait std::ops::Deref { - type Target = Parser<'s, P, O, T>; + type Target = Parser<'s, PKey, OKey, O>; fn deref(&self) -> &Self::Target { &*self.root @@ -166,20 +191,30 @@ impl_trait! { } } -struct Parser<'s, P: Borrow<str> + Ord, O: Borrow<str> + Ord, T: PatternTypes> { +struct Parser<'s, PKey, OKey, O> +where + PKey: Borrow<str> + Ord, + OKey: Borrow<str> + Ord, + O: Serialize, +{ base: &'s str, - preds: Option<BTreeMap<P, Box<Predicate<T>>>>, - objs: Option<BTreeMap<O, T::Own>>, - pred_ids: BTreeMap<P, usize>, - obj_ids: BTreeMap<O, usize>, - consts: PatternConstants<T>, + preds: Option<BTreeMap<PKey, Box<Predicate>>>, + objs: Option<BTreeMap<OKey, O>>, + pred_ids: BTreeMap<PKey, usize>, + obj_ids: BTreeMap<OKey, usize>, + consts: PatternConstants<O>, closed_subtrees: std::ops::RangeFrom<usize>, } // These are documented using LPeg.re syntax // http://www.inf.puc-rio.br/~roberto/lpeg/re.html #[rustfmt::skip] -impl<'s, P: Borrow<str> + Ord, O: Borrow<str> + Ord, T: PatternTypes> Parser<'s, P, O, T> { +impl<'s, PKey, OKey, O> Parser<'s, PKey, OKey, O> +where + PKey: Borrow<str> + Ord, + OKey: Borrow<str> + Ord, + O: Serialize, +{ /// str_literal <- sp ( ( "'" str_char* ( "'" / ( !. -> ErrorStrEnd ) ) ( '?' -> MarkSkippable ) ) -> String ) sp /// str_char <- ( str_escape / [^%'] ) /// str_escape <- '%' ( '%' / "'" ) / ( ( '%' .? ) -> ErrorStrEscape ) @@ -451,7 +486,7 @@ impl<'s, P: Borrow<str> + Ord, O: Borrow<str> + Ord, T: PatternTypes> Parser<'s, })) } - /// key_subtree <- sp '[' sp predicate* sp subtree sp ( ']' / unexpected_token / unexpected_end ) sp ( '?'? -> MarkSkippable ) + /// key_subtree <- sp '[' sp name? sp predicate* sp subtree sp ( ']' / unexpected_token / unexpected_end ) sp ( '?'? -> MarkSkippable ) fn key_subtree(&mut self, s: &mut &'s str) -> Result<bool, PatternError<'s>> { let mut cursor = *s; Ok(lblock!('matches: { @@ -459,6 +494,8 @@ impl<'s, P: Borrow<str> + Ord, O: Borrow<str> + Ord, T: PatternTypes> Parser<'s, bry!('matches strip_prefix(&mut cursor, "[")); self.sp(&mut cursor); let mut subtree = SubtreeHelper::start(&mut *self); + subtree.name(&mut cursor)?; + subtree.sp(&mut cursor); while subtree.predicate(&mut cursor)? { } subtree.sp(&mut cursor); @@ -566,17 +603,20 @@ impl<'s, P: Borrow<str> + Ord, O: Borrow<str> + Ord, T: PatternTypes> Parser<'s, } } -pub(crate) fn parse<'s, P, O, T>( +/// Parses a DFU expression. +/// +/// The given +pub(crate) fn parse<'s, PKey, OKey, O>( input: &'s str, - preds: Option<BTreeMap<P, Box<Predicate<T>>>>, - objs: Option<BTreeMap<O, T::Own>> -) -> Result<PatternConstants<T>, PatternError<'s>> - where - P: Borrow<str> + Ord, - O: Borrow<str> + Ord, - T: PatternTypes, + preds: Option<BTreeMap<PKey, Box<Predicate>>>, + objs: Option<BTreeMap<OKey, O>> +) -> Result<PatternConstants<O>, PatternError<'s>> +where + PKey: Borrow<str> + Ord, + OKey: Borrow<str> + Ord, + O: Serialize, { - let mut parser = Parser::<'s, P, O, T> { + let mut parser = Parser::<'s, PKey, OKey, O> { base: input, preds: preds, objs: objs, @@ -598,57 +638,15 @@ pub(crate) fn parse<'s, P, O, T>( #[cfg(test)] mod tests { - use crate::PatternTypes; - use crate::RefOwn; - use crate::KVPair; use crate::errors::PatternError; use super::Parser; use proptest::prelude::*; - struct Dummy; - impl PatternTypes for Dummy { - type Ref = (); - type Own = (); - fn pairs<'b>( - item: RefOwn<'b, Self::Ref, Self::Own> - ) -> Option<Box<dyn Iterator<Item=KVPair<'b, Self>> + 'b>> { - let _ = item; - None - } - - fn get<'a, 'b>( - item: RefOwn<'b, Self::Ref, Self::Own>, - key: RefOwn<'a, Self::Ref, Self::Own> - ) -> Option<Option<KVPair<'b, Self>>> { - let _ = item; - let _ = key; - None - } - - fn matches( - left: RefOwn<'_, Self::Ref, Self::Own>, - right: RefOwn<'_, Self::Ref, Self::Own> - ) -> bool { - let _ = left; - let _ = right; - false - } - - fn as_str<'b>( - item: RefOwn<'b, Self::Ref, Self::Own> - ) -> Option<&'b str> { - match item { - RefOwn::Str(key) => Some(key), - _ => None, - } - } - } - #[test] fn test_identifier() { fn identifier_input<'s>(s: &mut &'s str) -> Result<bool, PatternError<'s>> { - let mut parser = Parser::<'s, &'static str, &'static str, Dummy> { + let mut parser = Parser::<'s, &'static str, &'static str, ()> { base: *s, preds: None, objs: None, @@ -674,8 +672,8 @@ mod tests { proptest! { #[test] fn test_no_crash(s in ".{0,4096}") { - fn prep_parser<'s>(s: &'s str) -> Parser<'s, &'static str, &'static str, Dummy> { - let mut parser = Parser::<'s, &'static str, &'static str, Dummy> { + fn prep_parser<'s>(s: &'s str) -> Parser<'s, &'static str, &'static str, ()> { + let mut parser = Parser::<'s, &'static str, &'static str, ()> { base: s, preds: None, objs: None, |