diff options
author | SoniEx2 <endermoneymod@gmail.com> | 2021-02-07 22:19:21 -0300 |
---|---|---|
committer | SoniEx2 <endermoneymod@gmail.com> | 2021-02-07 22:20:34 -0300 |
commit | 69652efe8ad9738a94fef571c8b81e342f96e7b4 (patch) | |
tree | 9b02efcb139894ac3b5df2667be313f2a9df4319 | |
parent | d81ce99e0d1f1371ba9165a67280a810ee27bf82 (diff) |
Finish porting parser
-rw-r--r-- | Cargo.toml | 9 | ||||
-rw-r--r-- | HACKING.md | 22 | ||||
-rw-r--r-- | src/errors.rs | 36 | ||||
-rw-r--r-- | src/lib.rs | 14 | ||||
-rw-r--r-- | src/parser.rs | 669 | ||||
-rw-r--r-- | src/pattern.rs | 17 | ||||
-rw-r--r-- | src/vm.rs | 64 | ||||
-rw-r--r-- | tests/basic_match.rs | 51 | ||||
-rw-r--r-- | tests/common/mod.rs | 10 | ||||
-rw-r--r-- | tests/parser_prop.rs | 33 |
10 files changed, 873 insertions, 52 deletions
diff --git a/Cargo.toml b/Cargo.toml index ba09f9d..dbdb60a 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -10,3 +10,12 @@ repository = "https://soniex2.autistic.space/git-repos/dfu.git" # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html [dependencies] +boolinator = "2.4.0" +regex = "1" + +[dev-dependencies] +proptest = "0.10.1" + +[features] +default = [] +stable = [] diff --git a/HACKING.md b/HACKING.md new file mode 100644 index 0000000..3d3b52e --- /dev/null +++ b/HACKING.md @@ -0,0 +1,22 @@ +Developer notes +=============== + +This file documents some potentially-unfamiliar patterns contained within this +codebase. + +`boolinator` +------------ + +This crate makes use of `boolinator` in places. Make sure you're familiar with +it. + +`bool?` (or close enough) +------------------------- + +Rust doesn't have `?` on `bool`. This crate uses an `&& { side_effects; true }` +pattern sometimes. The PEG-like parser uses a `bry!` macro (`try!` for bools). + +`proptest` +---------- + +This crate makes extensive use of proptests, because they are awesome! diff --git a/src/errors.rs b/src/errors.rs index 5c6a0c8..e98483f 100644 --- a/src/errors.rs +++ b/src/errors.rs @@ -16,10 +16,38 @@ * along with this program. If not, see <https://www.gnu.org/licenses/>. */ -pub struct PatternError; +/// These are errors that may be returned by the pattern compiler when +/// compiling a pattern. +/// +/// "String" here refers to a string literal in the pattern, not the input +/// string. The input string is referred to as "the input". +#[derive(Debug)] +pub enum PatternError<'a> { + // Syntax Errors: + + /// The input contains an invalid string escape. + StringEscape(usize, &'a str), + /// The input ends in the middle of a string literal. + StringEnd(usize, &'a str), + /// The input contains an invalid regex escape. + RegexEscape(usize, &'a str), + /// The input ends in the middle of a regex literal. + RegexEnd(usize, &'a str), + /// The input contains characters that don't make up a token. + Token(usize, &'a str), + + // Link Errors: + + /// The input requests a parameter that wasn't provided. + UnknownParameter(usize, &'a str), + /// The input requests a predicate that wasn't provided. + UnknownPredicate(usize, &'a str), + /// The input contains an invalid regex. + Regex(usize, &'a str, ::regex::Error), +} /// Error type returned by the matcher. -#[derive(Clone)] +#[derive(Clone, Debug)] pub enum MatchError { /// Returned if the pattern nests too deeply. StackOverflow, @@ -27,8 +55,8 @@ pub enum MatchError { ValidationError, /// Returned if the pattern attempts an unsupported operation. /// - /// In particular, if the PatternTypes doesn't support get or pairs for a - /// given value, this error will be returned. It can be treated as a + /// In particular, if the PatternTypes doesn't support `get` or `pairs` + /// for a given value, this error will be returned. It can be treated as a /// ValidationError, or as a bug in the pattern, at the user's discretion. UnsupportedOperation, /// Returned if an unspecified non-critical error occurred. diff --git a/src/lib.rs b/src/lib.rs index 0e14d97..2407115 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -15,6 +15,14 @@ * You should have received a copy of the GNU Affero General Public License * along with this program. If not, see <https://www.gnu.org/licenses/>. */ +#![warn(rust_2018_idioms)] +#![cfg_attr(not(feature = "stable"), feature(label_break_value))] + +extern crate boolinator; +extern crate regex; + +#[cfg(test)] +extern crate proptest; pub mod errors; mod parser; @@ -25,6 +33,7 @@ pub use pattern::Pattern; // TODO replace with GATs /// A borrowed or owned value of various types. +#[derive(Debug)] pub enum RefOwn<'b, T: ?Sized, U> { /// Borrowed T. Ref(&'b T), @@ -93,6 +102,7 @@ pub trait PatternTypes { item: RefOwn<'b, Self::Ref, Self::Own> ) -> Option<Box<dyn Iterator<Item=KVPair<'b, Self>> + 'b>> { // TODO remove these default impls that only exist for testing purposes + let _ = item; let x = None; Some(Box::new(x.into_iter())) } @@ -104,6 +114,8 @@ pub trait PatternTypes { key: RefOwn<'a, Self::Ref, Self::Own> ) -> Option<Option<KVPair<'b, Self>>> { // TODO remove these default impls that only exist for testing purposes + let _ = item; + let _ = key; Some(None) } @@ -118,4 +130,4 @@ pub trait PatternTypes { } // TODO -type Predicate<T> = dyn (Fn(RefOwn<<T as PatternTypes>::Ref, <T as PatternTypes>::Own>) -> bool) + Send + Sync; +pub type Predicate<T> = dyn (Fn(RefOwn<'_, <T as PatternTypes>::Ref, <T as PatternTypes>::Own>) -> bool) + Send + Sync; diff --git a/src/parser.rs b/src/parser.rs index e2caa4f..00fbcb1 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -16,13 +16,674 @@ * along with this program. If not, see <https://www.gnu.org/licenses/>. */ -// TODO +use std::borrow::Borrow; +use std::collections::BTreeMap; +use std::mem::ManuallyDrop; + +use boolinator::Boolinator; +use regex::Regex; use crate::PatternTypes; +use crate::Predicate; use crate::errors::PatternError; use crate::vm::PatternConstants; -//use crate::vm::PatternElement; +use crate::vm::PatternElement; + +/// try! with bools. (the b comes from bool.) +macro_rules! bry { + ($l:lifetime $e:expr) => { + if !$e { + break $l false; + } + }; + ($e:expr) => { + if !$e { + return false; + } + }; +} + +// the following macros rely on unlabeled-break-through-labeled-block being an +// error. +// NOTE: always test changes to this module on nightly! + +#[cfg(not(feature = "stable"))] +/// labeled block. on nightly: better compile errors. but also works on stable. +macro_rules! lblock { + // unfortunately ($l:lifetime : $b:block) => { $l: $b } didn't work. + ($($t:tt)+) => { + $($t)+ + } +} + +#[cfg(feature = "stable")] +/// labeled block. on nightly: better compile errors. but also works on stable. +macro_rules! lblock { + ($l:lifetime : $b:block) => { + $l: loop { + break $b + } + } +} + +// can't use Pattern here :( +fn strip_prefix(s: &mut &str, prefix: &str) -> bool { + s.strip_prefix(prefix).map(|ns| *s = ns).is_some() +} + +fn pos_of<'a>(base: &'a str, sub: &'a str) -> Option<usize> { + // FIXME + Some((sub.as_ptr() as usize) - (base.as_ptr() as usize)) +} + +struct SubtreeHelper<'r, 's, P: Borrow<str> + Ord, O: Borrow<str> + Ord, T: PatternTypes> where Self: 'r { + root: &'r mut Parser<'s, P, O, T>, +} + +impl<'r, 's, P: Borrow<str> + Ord, O: Borrow<str> + Ord, T: PatternTypes> SubtreeHelper<'r, 's, P, O, T> where Self: 'r { + fn start(value: &'r mut Parser<'s, P, O, T>) -> Self { + value.consts.protos.push(Default::default()); + Self { + root: value, + } + } + + fn commit(mut self) -> usize { + let mut self_ = ManuallyDrop::new(self); + let proto = self_.root.consts.protos.pop().unwrap(); + let id = self_.root.closed_subtrees.next().unwrap(); + self_.root.consts.protos.insert(id, proto); + id + } +} + +impl<'r, 's, P: Borrow<str> + Ord, O: Borrow<str> + Ord, T: PatternTypes> std::ops::Deref for SubtreeHelper<'r, 's, P, O, T> where Self: 'r { + type Target = Parser<'s, P, O, T>; + + fn deref(&self) -> &Self::Target { + &*self.root + } +} + +impl<'r, 's, P: Borrow<str> + Ord, O: Borrow<str> + Ord, T: PatternTypes> std::ops::DerefMut for SubtreeHelper<'r, 's, P, O, T> where Self: 'r { + fn deref_mut(&mut self) -> &mut Self::Target { + self.root + } +} + +impl<'r, 's, P: Borrow<str> + Ord, O: Borrow<str> + Ord, T: PatternTypes> Drop for SubtreeHelper<'r, 's, P, O, T> where Self: 'r { + fn drop(&mut self) { + // remove "partial" proto + self.root.consts.protos.pop().expect("SubtreeHelper"); + } +} + +struct TagHelper<'r, 's, P: Borrow<str> + Ord, O: Borrow<str> + Ord, T: PatternTypes> where Self: 'r { + root: &'r mut Parser<'s, P, O, T>, + len: usize, +} + +impl<'r, 's, P: Borrow<str> + Ord, O: Borrow<str> + Ord, T: PatternTypes> TagHelper<'r, 's, P, O, T> where Self: 'r { + fn start(value: &'r mut Parser<'s, P, O, T>) -> Self { + let len = value.consts.protos.last().unwrap().len(); + Self { + root: value, + len : len, + } + } + + fn commit(mut self) { + std::mem::forget(self) + } +} + +impl<'r, 's, P: Borrow<str> + Ord, O: Borrow<str> + Ord, T: PatternTypes> std::ops::Deref for TagHelper<'r, 's, P, O, T> where Self: 'r { + type Target = Parser<'s, P, O, T>; + + fn deref(&self) -> &Self::Target { + &*self.root + } +} + +impl<'r, 's, P: Borrow<str> + Ord, O: Borrow<str> + Ord, T: PatternTypes> std::ops::DerefMut for TagHelper<'r, 's, P, O, T> where Self: 'r { + fn deref_mut(&mut self) -> &mut Self::Target { + self.root + } +} -pub(crate) fn parse<T: PatternTypes>(s: &str, defs: Option<()>/*TODO*/) -> Result<PatternConstants<T>, PatternError> { - unimplemented!() +impl<'r, 's, P: Borrow<str> + Ord, O: Borrow<str> + Ord, T: PatternTypes> Drop for TagHelper<'r, 's, P, O, T> where Self: 'r { + fn drop(&mut self) { + let proto = self.root.consts.protos.last_mut().unwrap(); + assert!(proto.len() >= self.len); + while proto.len() > self.len { + let _ = proto.pop(); + } + } } + +struct Parser<'s, P: Borrow<str> + Ord, O: Borrow<str> + Ord, T: PatternTypes> { + base: &'s str, + preds: Option<BTreeMap<P, Box<Predicate<T>>>>, + objs: Option<BTreeMap<O, T::Own>>, + pred_ids: BTreeMap<P, usize>, + obj_ids: BTreeMap<O, usize>, + consts: PatternConstants<T>, + closed_subtrees: std::ops::RangeFrom<usize>, +} + +// These are documented using LPeg.re syntax +// http://www.inf.puc-rio.br/~roberto/lpeg/re.html +#[rustfmt::skip] +impl<'s, P: Borrow<str> + Ord, O: Borrow<str> + Ord, T: PatternTypes> Parser<'s, P, O, T> { + /// str_literal <- sp ( ( "'" str_char* ( "'" / ( !. -> ErrorStrEnd ) ) ( '?' -> MarkSkippable ) ) -> String ) sp + /// str_char <- ( str_escape / [^%'] ) + /// str_escape <- '%' ( '%' / "'" ) / ( ( '%' .? ) -> ErrorStrEscape ) + fn str_literal(&mut self, s: &mut &'s str) -> Result<bool, PatternError<'s>> { + const STRING_TOKEN: &str = "'"; + let mut cursor = *s; + Ok(lblock!('matches: { + self.sp(&mut cursor); + bry!('matches strip_prefix(&mut cursor, STRING_TOKEN)); + let mut string = String::new(); + loop { + let base = cursor; + if strip_prefix(&mut cursor, "%") { + if strip_prefix(&mut cursor, STRING_TOKEN) { + string.push_str(STRING_TOKEN); + } else if strip_prefix(&mut cursor, "%") { + string.push_str("%"); + } else { + return Err(PatternError::StringEscape(pos_of(self.base, base).unwrap(), cursor)); + } + } else { + cursor = cursor.trim_start_matches(|c: char| { + !matches!(c, '%' | '\'') + }); + if base == cursor { // no match? + break; + } + string.push_str(&base[..pos_of(base, cursor).unwrap()]); + } + } + if !strip_prefix(&mut cursor, STRING_TOKEN) { + match cursor { + "" => return Err(PatternError::StringEnd(self.base.len(), cursor)), + _ => unreachable!(), + } + } + let skippable = strip_prefix(&mut cursor, "?"); + self.sp(&mut cursor); + let id = self.consts.strings.iter().position(|c| c == &string); + let self_ = &mut *self; + let id = id.unwrap_or_else(move || { + string.shrink_to_fit(); + self_.consts.strings.push(string); + self_.consts.strings.len() - 1 + }); + let proto = self.consts.protos.last_mut().expect("protos"); + proto.push(PatternElement::StringKey(id, skippable)); + *s = cursor; + true + })) + } + + /// re_literal <- sp ( ( "/" re_char* ( "/" / ( !. -> ErrorReEnd ) ) ( '?' -> MarkSkippable ) ) -> Regex ) sp + /// re_char <- ( re_escape / [^%/] ) + /// re_escape <- '%' ( '%' / "/" ) / ( ( '%' .? ) -> ErrorReEscape ) + #[allow(unused_variables)] + #[allow(unreachable_code)] + fn re_literal(&mut self, s: &mut &'s str) -> Result<bool, PatternError<'s>> { + const RE_TOKEN: &str = "/"; + let mut cursor = *s; + Ok(lblock!('matches: { + self.sp(&mut cursor); + let orig = cursor; + bry!('matches strip_prefix(&mut cursor, RE_TOKEN)); + let mut string = String::new(); + loop { + let base = cursor; + if strip_prefix(&mut cursor, "%") { + if strip_prefix(&mut cursor, RE_TOKEN) { + string.push_str(RE_TOKEN); + } else if strip_prefix(&mut cursor, "%") { + string.push_str("%"); + } else { + return Err(PatternError::RegexEscape(pos_of(self.base, base).unwrap(), cursor)); + } + } else { + cursor = cursor.trim_start_matches(|c: char| { + !matches!(c, '%' | '/') + }); + if base == cursor { // no match? + break; + } + string.push_str(&base[..pos_of(base, cursor).unwrap()]); + } + } + if !strip_prefix(&mut cursor, RE_TOKEN) { + match cursor { + "" => return Err(PatternError::RegexEnd(self.base.len(), cursor)), + _ => unreachable!(), + } + } + let end = cursor; + let skippable = strip_prefix(&mut cursor, "?"); + self.sp(&mut cursor); + let re = Regex::new(&string).map_err(|e| { + PatternError::Regex(pos_of(self.base, orig).unwrap(), &orig[..pos_of(orig, end).unwrap()], e) + })?; + let id = self.consts.regices.iter().position(|c| c.as_str() == re.as_str()); + let self_ = &mut *self; + let id = id.unwrap_or_else(move || { + self_.consts.regices.push(re); + self_.consts.regices.len() - 1 + }); + let proto = self.consts.protos.last_mut().expect("protos"); + proto.push(PatternElement::RegexKey(id, skippable)); + *s = cursor; + true + })) + } + + /// matcher <- sp ( parameter / str_literal / re_literal / key_subtree ) sp + fn matcher(&mut self, s: &mut &'s str) -> Result<bool, PatternError<'s>> { + let mut cursor = *s; + Ok(lblock!('matches: { + self.sp(&mut cursor); + match () { + _ if self.parameter(&mut cursor)? => {}, + _ if self.str_literal(&mut cursor)? => {}, + _ if self.re_literal(&mut cursor)? => {}, + _ if self.key_subtree(&mut cursor)? => {}, + _ => bry!('matches false), + } + self.sp(&mut cursor); + *s = cursor; + true + })) + } + + /// tag <- sp ( '->' -> Arrow ) sp ( matcher / name sp matcher? ) sp predicate* ( '' -> End ) sp + fn tag(&mut self, s: &mut &'s str) -> Result<bool, PatternError<'s>> { + let mut cursor = *s; + Ok(lblock!('matches: { + self.sp(&mut cursor); + bry!('matches strip_prefix(&mut cursor, "->")); + let mut self_ = TagHelper::start(&mut *self); + { + let proto = self_.consts.protos.last_mut().expect("protos"); + proto.push(PatternElement::Arrow); + } + self_.sp(&mut cursor); + if !self_.matcher(&mut cursor)? { + bry!('matches self_.name(&mut cursor)?); + self_.sp(&mut cursor); + // NOTE: *optional* + let _ = self_.matcher(&mut cursor)?; + } + self_.sp(&mut cursor); + while self_.predicate(&mut cursor)? { + } + { + let proto = self_.consts.protos.last_mut().expect("protos"); + proto.push(PatternElement::End); + } + self_.commit(); + *s = cursor; + true + })) + } + + /// sp <- spaces* + /// + /// (always succeeds, so doesn't return anything) + fn sp(&mut self, s: &mut &'s str) { + *s = s.trim_start_matches(|c: char| { + matches!(c, ' ' | '\n' | '\r' | '\t') + }); + } + + /// identifier <- [A-Za-z_][A-Za-z0-9_]* + fn identifier(&mut self, s: &mut &'s str) -> Result<bool, PatternError<'s>> { + if let Some('A'..='Z') | Some('a'..='z') | Some('_') = s.chars().next() { + *s = s.trim_start_matches(|c: char| { + matches!(c, 'A'..='Z' | 'a'..='z' | '0'..='9' | '_') + }); + Ok(true) + } else { + Ok(false) + } + } + + /// name <- sp ( identifier -> Identifier ) sp + fn name(&mut self, s: &mut &'s str) -> Result<bool, PatternError<'s>> { + let mut cursor = *s; + Ok(lblock!('matches: { + self.sp(&mut cursor); + let start = cursor; + bry!('matches self.identifier(&mut cursor)?); + let name = &start[..pos_of(start, cursor).unwrap_or(start.len())]; + // no processing of `name` is required for this. + let id = self.consts.strings.iter().position(|c| c == name); + let id = id.unwrap_or_else(|| { + self.consts.strings.push(name.into()); + self.consts.strings.len() - 1 + }); + let proto = self.consts.protos.last_mut().expect("protos"); + proto.push(PatternElement::Identifier(id)); + self.sp(&mut cursor); + *s = cursor; + true + })) + } + + /// parameter <- sp '$' ( '?'? -> MarkSkippable ) ( identifier -> Parameter ) sp + fn parameter(&mut self, s: &mut &'s str) -> Result<bool, PatternError<'s>> { + let mut cursor = *s; + Ok(lblock!('matches: { + self.sp(&mut cursor); + bry!('matches strip_prefix(&mut cursor, "$")); + let skippable = strip_prefix(&mut cursor, "?"); + let start = cursor; + bry!('matches self.identifier(&mut cursor)?); + let name = &start[..pos_of(start, cursor).unwrap_or(start.len())]; + // no processing of `name` is required for this. + let id = self.obj_ids.get(name).copied().map_or_else( + || { + let (key, obj) = self.objs.as_mut().and_then(|map| { + map.remove_entry(name) + }).ok_or_else(|| { + let pos = pos_of(self.base, start).unwrap(); + PatternError::UnknownParameter(pos, name) + })?; + let id = self.consts.defs.len(); + self.consts.defs.push(obj); + self.obj_ids.insert(key, id); + Ok(id) + }, + Ok, + )?; + let proto = self.consts.protos.last_mut().expect("protos"); + proto.push(PatternElement::ParameterKey(id, skippable)); + self.sp(&mut cursor); + *s = cursor; + true + })) + } + + /// predicate <- sp ':' ( '?'? -> MarkSkippable ) '$' ( identifier -> Predicate ) sp + fn predicate(&mut self, s: &mut &'s str) -> Result<bool, PatternError<'s>> { + let mut cursor = *s; + Ok(lblock!('matches: { + self.sp(&mut cursor); + bry!('matches strip_prefix(&mut cursor, ":")); + let skippable = strip_prefix(&mut cursor, "?"); + bry!('matches strip_prefix(&mut cursor, "$")); + let start = cursor; + bry!('matches self.identifier(&mut cursor)?); + let name = &start[..pos_of(start, cursor).unwrap_or(start.len())]; + // no processing of `name` is required for this. + let id = self.pred_ids.get(name).copied().map_or_else( + || { + let (key, pred) = self.preds.as_mut().and_then(|map| { + map.remove_entry(name) + }).ok_or_else(|| { + let pos = pos_of(self.base, start).unwrap(); + PatternError::UnknownPredicate(pos, name) + })?; + let id = self.consts.predicates.len(); + self.consts.predicates.push(pred); + self.pred_ids.insert(key, id); + Ok(id) + }, + Ok, + )?; + let proto = self.consts.protos.last_mut().expect("protos"); + proto.push(PatternElement::ApplyPredicate(id, skippable, std::marker::PhantomData)); + self.sp(&mut cursor); + *s = cursor; + true + })) + } + + /// key_subtree <- sp '[' sp predicate* sp subtree sp ( ']' / unexpected_token / unexpected_end ) sp ( '?'? -> MarkSkippable ) + fn key_subtree(&mut self, s: &mut &'s str) -> Result<bool, PatternError<'s>> { + let mut cursor = *s; + Ok(lblock!('matches: { + self.sp(&mut cursor); + bry!('matches strip_prefix(&mut cursor, "[")); + self.sp(&mut cursor); + let mut subtree = SubtreeHelper::start(&mut *self); + while subtree.predicate(&mut cursor)? { + } + subtree.sp(&mut cursor); + bry!('matches subtree.subtree(&mut cursor)?); + subtree.sp(&mut cursor); + bry!('matches + strip_prefix(&mut cursor, "]") + || + subtree.unexpected_token(&mut cursor)? + || + subtree.unexpected_end(&mut cursor)? + ); + subtree.sp(&mut cursor); + let skippable = strip_prefix(&mut cursor, "?"); + *s = cursor; + let id = subtree.commit(); + let proto = self.consts.protos.last_mut().expect("protos"); + proto.push(PatternElement::KeySubtree(id, skippable)); + true + })) + } + + /// value_subtree <- sp '(' sp predicate* sp subtree sp ( ')' / unexpected_token / unexpected_end ) sp ( '?'? -> MarkSkippable ) + fn value_subtree(&mut self, s: &mut &'s str) -> Result<bool, PatternError<'s>> { + let mut cursor = *s; + Ok(lblock!('matches: { + self.sp(&mut cursor); + bry!('matches strip_prefix(&mut cursor, "(")); + self.sp(&mut cursor); + let mut subtree = SubtreeHelper::start(&mut *self); + while subtree.predicate(&mut cursor)? { + } + subtree.sp(&mut cursor); + bry!('matches subtree.subtree(&mut cursor)?); + subtree.sp(&mut cursor); + bry!('matches + strip_prefix(&mut cursor, ")") + || + subtree.unexpected_token(&mut cursor)? + || + subtree.unexpected_end(&mut cursor)? + ); + subtree.sp(&mut cursor); + let skippable = strip_prefix(&mut cursor, "?"); + *s = cursor; + let id = subtree.commit(); + let proto = self.consts.protos.last_mut().expect("protos"); + proto.push(PatternElement::ValueSubtree(id, skippable)); + true + })) + } + + /// unexpected_end <- ( !. ) -> ErrorToken + fn unexpected_end(&mut self, s: &mut &'s str) -> Result<bool, PatternError<'s>> { + match *s { + "" => Err(PatternError::Token(self.base.len(), s)), + _ => Ok(false), + } + } + + /// unexpected_token <- . -> ErrorToken + fn unexpected_token(&mut self, s: &mut &'s str) -> Result<bool, PatternError<'s>> { + match *s { + "" => Ok(false), + _ => Err(PatternError::Token(pos_of(self.base, s).unwrap(), s)), + } + } + + /// subtree <- sp tag* sp ( value_subtree '' -> End )* sp + fn subtree(&mut self, s: &mut &'s str) -> Result<bool, PatternError<'s>> { + let mut cursor = *s; + self.sp(&mut cursor); + while self.tag(&mut cursor)? { + } + self.sp(&mut cursor); + while self.value_subtree(&mut cursor)? { + let proto = self.consts.protos.last_mut().expect("protos"); + proto.push(PatternElement::End); + } + self.sp(&mut cursor); + *s = cursor; + // always matches (may match empty) + Ok(true) + } + + /// pattern <- ( subtree / unexpected_token ) ( !. / unexpected_token ) + fn pattern(&mut self, s: &mut &'s str) -> Result<bool, PatternError<'s>> { + let mut cursor = *s; + Ok(lblock!('matches: { + let mut subtree = SubtreeHelper::start(&mut *self); + bry!('matches + subtree.subtree(&mut cursor)? + || + subtree.unexpected_token(&mut cursor)? + ); + bry!('matches + cursor == "" + || + subtree.unexpected_token(&mut cursor)? + ); + subtree.commit(); + *s = cursor; + true + })) + } +} + +pub(crate) fn parse<'s, P, O, T>( + input: &'s str, + preds: Option<BTreeMap<P, Box<Predicate<T>>>>, + objs: Option<BTreeMap<O, T::Own>> +) -> Result<PatternConstants<T>, PatternError<'s>> + where + P: Borrow<str> + Ord, + O: Borrow<str> + Ord, + T: PatternTypes, +{ + let mut parser = Parser::<'s, P, O, T> { + base: input, + preds: preds, + objs: objs, + pred_ids: Default::default(), + obj_ids: Default::default(), + consts: Default::default(), + closed_subtrees: (0..), + }; + + let mut parsed = input; + parser.pattern(&mut parsed)?.expect(""); + + assert_eq!(parsed, ""); + assert_eq!(parser.closed_subtrees.next().unwrap(), parser.consts.protos.len()); + + Ok(parser.consts) +} + +#[cfg(test)] +mod tests { + use crate::PatternTypes; + use crate::RefOwn; + use crate::KVPair; + use crate::errors::PatternError; + use super::Parser; + + use proptest::prelude::*; + + struct Dummy; + impl PatternTypes for Dummy { + type Ref = (); + type Own = (); + fn pairs<'b>( + item: RefOwn<'b, Self::Ref, Self::Own> + ) -> Option<Box<dyn Iterator<Item=KVPair<'b, Self>> + 'b>> { + None + } + + fn get<'a, 'b>( + item: RefOwn<'b, Self::Ref, Self::Own>, + key: RefOwn<'a, Self::Ref, Self::Own> + ) -> Option<Option<KVPair<'b, Self>>> { + None + } + + fn matches( + left: RefOwn<'_, Self::Ref, Self::Own>, + right: RefOwn<'_, Self::Ref, Self::Own> + ) -> bool { + false + } + } + + #[test] + fn test_identifier() { + fn identifier_input<'s>(s: &mut &'s str) -> Result<bool, PatternError<'s>> { + let mut parser = Parser::<'s, &'static str, &'static str, Dummy> { + base: *s, + preds: None, + objs: None, + pred_ids: Default::default(), + obj_ids: Default::default(), + consts: Default::default(), + closed_subtrees: (0..), + }; + parser.identifier(s) + } + for mut identifier in vec!["test", "Test", "_test", "_Test", "_123",] { + println!("{}", identifier); + assert!(identifier_input(&mut identifier).unwrap()); + assert_eq!(identifier, ""); + } + for mut identifier in vec!["123", "1_", " ",] { + println!("{}", identifier); + assert!(!identifier_input(&mut identifier).unwrap()); + assert_ne!(identifier, ""); + } + } + + proptest! { + #[test] + fn test_no_crash(s in ".{0,4096}") { + fn prep_parser<'s>(s: &'s str) -> Parser<'s, &'static str, &'static str, Dummy> { + let mut parser = Parser::<'s, &'static str, &'static str, Dummy> { + base: s, + preds: None, + objs: None, + pred_ids: Default::default(), + obj_ids: Default::default(), + consts: Default::default(), + closed_subtrees: (0..), + }; + parser.consts.protos.push(Default::default()); + parser + } + + let _ = prep_parser(&s).str_literal(&mut &*s); + let _ = prep_parser(&s).re_literal(&mut &*s); + let _ = prep_parser(&s).matcher(&mut &*s); + let _ = prep_parser(&s).tag(&mut &*s); + let _ = prep_parser(&s).sp(&mut &*s); + let _ = prep_parser(&s).identifier(&mut &*s); + let _ = prep_parser(&s).name(&mut &*s); + let _ = prep_parser(&s).parameter(&mut &*s); + let _ = prep_parser(&s).predicate(&mut &*s); + let _ = prep_parser(&s).key_subtree(&mut &*s); + let _ = prep_parser(&s).value_subtree(&mut &*s); + let _ = prep_parser(&s).unexpected_end(&mut &*s); + let _ = prep_parser(&s).unexpected_token(&mut &*s); + let _ = prep_parser(&s).subtree(&mut &*s); + let _ = prep_parser(&s).pattern(&mut &*s); + } + } +} + diff --git a/src/pattern.rs b/src/pattern.rs index e0a5174..3349db8 100644 --- a/src/pattern.rs +++ b/src/pattern.rs @@ -16,8 +16,12 @@ * along with this program. If not, see <https://www.gnu.org/licenses/>. */ +use std::borrow::Borrow; +use std::collections::BTreeMap; + use crate::PatternTypes; use crate::RefOwn; +use crate::Predicate; use crate::errors::PatternError; use crate::parser::parse; use crate::vm::Matcher; @@ -29,9 +33,18 @@ pub struct Pattern<T: PatternTypes> { } impl<T: PatternTypes> Pattern<T> { - pub fn compile(s: &str, defs: Option<()>/*TODO*/) -> Result<Self, PatternError> { + /// Compiles the input into a pattern. + pub fn compile<'s, P, O>( + input: &'s str, + preds: Option<BTreeMap<P, Box<Predicate<T>>>>, + objs: Option<BTreeMap<O, T::Own>> + ) -> Result<Self, PatternError<'s>> + where + P: Borrow<str> + Ord, + O: Borrow<str> + Ord, + { Ok(Self { - consts: parse(s, defs)? + consts: parse(input, preds, objs)? }) } diff --git a/src/vm.rs b/src/vm.rs index 44675f3..a02010f 100644 --- a/src/vm.rs +++ b/src/vm.rs @@ -16,15 +16,17 @@ * along with this program. If not, see <https://www.gnu.org/licenses/>. */ +use std::collections::BTreeMap; +use std::marker::PhantomData; + +use regex::Regex; + use crate::KVPair; use crate::RefOwn; use crate::PatternTypes; use crate::Predicate; use crate::errors::MatchError; -use std::collections::BTreeMap; -use std::marker::PhantomData; - pub(crate) const MAX_CALLS: usize = 250; type Matches<'a, 'b, T> = BTreeMap<&'a str, KVPair<'b, T>>; @@ -37,7 +39,7 @@ pub(crate) struct PatternConstants<T: PatternTypes> { // Note that we can borrow these when creating the output map. // https://play.rust-lang.org/?version=stable&mode=debug&edition=2018&gist=da26f9175e96273fa0b94971a4e6172f pub(crate) strings: Vec<String>, - pub(crate) regices: Vec<()/* TODO */>, + pub(crate) regices: Vec<Regex>, pub(crate) predicates: Vec<Box<Predicate<T>>>, // NOTE these are part of the constant pool and so have lifetime analogous // to 'a (consistently used to indicate constant pool lifetime) when used @@ -45,6 +47,18 @@ pub(crate) struct PatternConstants<T: PatternTypes> { pub(crate) defs: Vec<T::Own>, } +impl<T: PatternTypes> Default for PatternConstants<T> { + fn default() -> Self { + Self { + protos: Default::default(), + strings: Default::default(), + regices: Default::default(), + predicates: Default::default(), + defs: Default::default(), + } + } +} + /// A pattern element. pub(crate) enum PatternElement<T: PatternTypes> { Arrow, @@ -113,10 +127,16 @@ impl<'a, 'b, T: PatternTypes> Frame<'a, 'b, T> { /// /// See also Holder. enum HolderState<'a, 'b, T: PatternTypes> { + /// Empty holder, for KV pair. EmptyKey, + /// Empty holder, for subtree. EmptySubtree, + /// Non-empty KV pair. Key(KVPair<'b, T>), + /// Non-empty subtree. Subtree(Matches<'a, 'b, T>, RefOwn<'b, T::Ref, T::Own>), + /// Preloaded value. Usually the first holder in a frame. + Value(RefOwn<'b, T::Ref, T::Own>), } impl<'a, 'b, T: PatternTypes> Clone for HolderState<'a, 'b, T> { @@ -126,6 +146,7 @@ impl<'a, 'b, T: PatternTypes> Clone for HolderState<'a, 'b, T> { HolderState::EmptySubtree => HolderState::EmptySubtree, HolderState::Key(v) => HolderState::Key(*v), HolderState::Subtree(m, v) => HolderState::Subtree(m.clone(), *v), + HolderState::Value(v) => HolderState::Value(*v), } } } @@ -143,6 +164,7 @@ impl<'a, 'b, T: PatternTypes> HolderState<'a, 'b, T> { match *self { HolderState::Key((_, value)) => Some(value), HolderState::Subtree(_, value) => Some(value), + HolderState::Value(value) => Some(value), _ => None } } @@ -156,13 +178,14 @@ struct Holder<'a, 'b, T: PatternTypes> { name: Option<&'a str>, value: HolderState<'a, 'b, T>, parent: Option<RefOwn<'b, T::Ref, T::Own>>, - iterator: Box<dyn Iterator<Item=KVPair<'b, T>> + 'b>, + iterator: Option<Box<dyn Iterator<Item=KVPair<'b, T>> + 'b>>, filters: Vec<Box<dyn for<'c> Fn(&'c mut HolderState<'a, 'b, T>) + 'a>>, } impl<'a, 'b, T: PatternTypes> Holder<'a, 'b, T> { fn next(&mut self) -> Option<Result<(), MatchError>> { - if let Self { value: ref mut v, iterator: ref mut it, .. } = self { + // FIXME what even is the point of this? + if let Self { value: ref mut v, iterator: Some(ref mut it), .. } = self { let is_subtree = v.is_subtree(); *v = match it.next() { Some(pair) => HolderState::Key(pair), @@ -182,8 +205,7 @@ impl<'a, 'b, T: PatternTypes> Default for Holder<'a, 'b, T> { name: Default::default(), value: HolderState::EmptyKey, parent: Default::default(), - // TODO https://github.com/rust-lang/rfcs/issues/3059 - iterator: Box::new(std::iter::empty()), + iterator: Default::default(), filters: Default::default(), } } @@ -206,9 +228,12 @@ impl<'a, 'b, T: PatternTypes> Matcher<'a, 'b, T> { iar: None, depth: depth, path: if ops.is_empty() { - Default::default() + // TODO decide on empty matcher behaviour + todo!() } else { - vec![Holder::default()] + let mut holder = Holder::default(); + holder.value = HolderState::Value(obj); + vec![holder] }, in_key: false, }, @@ -218,7 +243,7 @@ impl<'a, 'b, T: PatternTypes> Matcher<'a, 'b, T> { fn on_in_key(&mut self) -> Result<bool, MatchError> { match self.frame.op() { PatternElement::End => { - unimplemented!() + todo!() } _ => unreachable!("on_in_key") } @@ -226,6 +251,19 @@ impl<'a, 'b, T: PatternTypes> Matcher<'a, 'b, T> { fn on_not_in_key(&mut self) -> Result<bool, MatchError> { match self.frame.op() { + PatternElement::Arrow => { + assert!(!self.frame.path.last().expect("path").value.is_empty()); + let mut holder = Holder::default(); + holder.parent = self.frame.path.last().expect("path").value.value(); + self.frame.path.push(holder); + Ok(false) + }, + PatternElement::Identifier(id) => { + let name = self.defs.strings.get(id).map(|s| &**s); + self.frame.path.last_mut().expect("path").name = name; + todo!() + //Ok(true) + }, _ => unreachable!("on_not_in_key") } } @@ -247,6 +285,7 @@ impl<'a, 'b, T: PatternTypes> Matcher<'a, 'b, T> { res.insert(name, pair); } }, + HolderState::Value(_) => (), _ => unreachable!("on_end (End)"), } } @@ -256,7 +295,7 @@ impl<'a, 'b, T: PatternTypes> Matcher<'a, 'b, T> { assert!(false, "frame.prev()"); } Ok((true, res)) - }, + } PatternElement::ApplyPredicate {..} => { assert!(!self.frame.in_key); let mut res: Matches<'a, 'b, T> = Default::default(); @@ -270,6 +309,7 @@ impl<'a, 'b, T: PatternTypes> Matcher<'a, 'b, T> { res.insert(name, pair); } }, + HolderState::Value(_) => (), _ => unreachable!("on_end (ApplyPredicate)"), } } diff --git a/tests/basic_match.rs b/tests/basic_match.rs index 2f65451..d6f4da3 100644 --- a/tests/basic_match.rs +++ b/tests/basic_match.rs @@ -22,6 +22,8 @@ mod common; use common::Value; +use datafu::RefOwn; + #[test] fn test_basic_example() { let tree = Value::M(vec![ @@ -30,14 +32,13 @@ fn test_basic_example() { ("baz".into(), Value::U(2)), ].into_iter().collect())), ].into_iter().collect()); - let pat = datafu::Pattern::<Value>::compile("->X:?$dict->Y", None).ok().unwrap(); + let preds = vec![("dict", Box::new(|v: RefOwn<'_, _, _>| matches!(v, RefOwn::Ref(&Value::M(_)))) as Box<datafu::Predicate<Value>>)].into_iter().collect(); + let pat = datafu::Pattern::<Value>::compile::<&str, &str>("->X:?$dict->Y", Some(preds), None).unwrap(); let mut matcher = pat.attempt_match(&tree); - let m = matcher.next(); - // TODO - todo!(); - //assert m['X'][0] == 'bar' - //assert m['Y'][0] == 'baz' - //assert m['Y'][1] == 2 + let m = matcher.next().unwrap().unwrap(); + assert_eq!(m["X"].0, RefOwn::Ref(&Value::from("bar"))); + assert_eq!(m["Y"].0, RefOwn::Ref(&Value::from("baz"))); + assert_eq!(m["Y"].1, RefOwn::Ref(&Value::U(2))); } #[test] @@ -53,15 +54,14 @@ fn test_basic_2() { ].into_iter().collect())), ].into_iter().collect())), ].into_iter().collect()); - let pat = datafu::Pattern::<Value>::compile("->'projects':?$d->P/[0-9a-fA-F]{40}|[0-9a-fA-F]{64}/?:?$d->U:?$d->B", None).ok().unwrap(); + let preds = vec![("d", Box::new(|v: RefOwn<'_, _, _>| matches!(v, RefOwn::Ref(&Value::M(_)))) as Box<datafu::Predicate<Value>>)].into_iter().collect(); + let pat = datafu::Pattern::<Value>::compile::<&str, &str>("->'projects':?$d->P/[0-9a-fA-F]{40}|[0-9a-fA-F]{64}/?:?$d->U:?$d->B", Some(preds), None).unwrap(); let mut matcher = pat.attempt_match(&tree); - let m = matcher.next(); - // TODO - todo!(); - //assert m['P'][0] == "385e734a52e13949a7a5c71827f6de920dbfea43" - //assert m['U'][0] == "https://soniex2.autistic.space/git-repos/ganarchy.git" - //assert m['B'][0] == "HEAD" - //assert m['B'][1] == {"active": True} + let m = matcher.next().unwrap().unwrap(); + assert_eq!(m["P"].0, RefOwn::Ref(&Value::from("385e734a52e13949a7a5c71827f6de920dbfea43"))); + assert_eq!(m["U"].0, RefOwn::Ref(&Value::from("https://soniex2.autistic.space/git-repos/ganarchy.git"))); + assert_eq!(m["B"].0, RefOwn::Ref(&Value::from("HEAD"))); + assert_eq!(m["B"].1, RefOwn::Ref(&Value::M(vec![(Value::from("active"), Value::B(true))].into_iter().collect()))); } #[test] @@ -77,16 +77,15 @@ fn test_spaces() { ].into_iter().collect())), ].into_iter().collect())), ].into_iter().collect()); - let pat = datafu::Pattern::<Value>::compile("-> 'projects'? - -> commit /[0-9a-fA-F]{40}|[0-9a-fA-F]{64}/? :?$dict - -> url :?$dict - -> branch :?$dict", None).ok().unwrap(); + let preds = vec![("dict", Box::new(|v: RefOwn<'_, _, _>| matches!(v, RefOwn::Ref(&Value::M(_)))) as Box<datafu::Predicate<Value>>)].into_iter().collect(); + let pat = datafu::Pattern::<Value>::compile::<_, &str>("-> 'projects'? + -> commit /[0-9a-fA-F]{40}|[0-9a-fA-F]{64}/? :?$dict + -> url :?$dict + -> branch :?$dict", Some(preds), None).unwrap(); let mut matcher = pat.attempt_match(&tree); - let m = matcher.next(); - // TODO - todo!(); - //assert m['commit'][0] == "385e734a52e13949a7a5c71827f6de920dbfea43" - //assert m['url'][0] == "https://soniex2.autistic.space/git-repos/ganarchy.git" - //assert m['branch'][0] == "HEAD" - //assert m['branch'][1] == {"active": True} + let m = matcher.next().unwrap().unwrap(); + assert_eq!(m["P"].0, RefOwn::Ref(&Value::from("385e734a52e13949a7a5c71827f6de920dbfea43"))); + assert_eq!(m["U"].0, RefOwn::Ref(&Value::from("https://soniex2.autistic.space/git-repos/ganarchy.git"))); + assert_eq!(m["B"].0, RefOwn::Ref(&Value::from("HEAD"))); + assert_eq!(m["B"].1, RefOwn::Ref(&Value::M(vec![(Value::from("active"), Value::B(true))].into_iter().collect()))); } diff --git a/tests/common/mod.rs b/tests/common/mod.rs index 48153af..9680504 100644 --- a/tests/common/mod.rs +++ b/tests/common/mod.rs @@ -24,7 +24,7 @@ use datafu::RefOwn; use datafu::PatternTypes; use datafu::KVPair; -#[derive(PartialEq, Eq, PartialOrd, Ord)] +#[derive(PartialEq, Eq, PartialOrd, Ord, Debug)] pub enum Value { U(usize), B(bool), @@ -32,7 +32,7 @@ pub enum Value { S(Cow<'static, str>), } -#[derive(Copy, Clone, Eq, PartialEq)] +#[derive(Copy, Clone, Eq, PartialEq, Debug)] pub enum Dummy { } @@ -66,24 +66,28 @@ impl PartialEq<Value> for str { impl PartialEq<Dummy> for Value { fn eq(&self, other: &Dummy) -> bool { + let _ = other; unreachable!() } } impl PartialEq<Value> for Dummy { fn eq(&self, other: &Value) -> bool { + let _ = other; unreachable!() } } impl PartialEq<str> for Dummy { fn eq(&self, other: &str) -> bool { + let _ = other; unreachable!() } } impl PartialEq<Dummy> for str { fn eq(&self, other: &Dummy) -> bool { + let _ = other; unreachable!() } } @@ -158,7 +162,7 @@ impl PatternTypes for Value { RefOwn::Ref(Value::M(map)) => { Some(match key { RefOwn::Ref(key) => map.get_key_value(key), - RefOwn::Own(key) => unreachable!(),//map.get_key_value(&Value::U(key)), + RefOwn::Own(_key) => unreachable!(), RefOwn::Str(key) => map.get_key_value(&key as &dyn ValueHelper), }.map(|(k,v)| (k.into(), v.into()))) }, diff --git a/tests/parser_prop.rs b/tests/parser_prop.rs new file mode 100644 index 0000000..57976cb --- /dev/null +++ b/tests/parser_prop.rs @@ -0,0 +1,33 @@ +/* + * This file is part of Datafu + * Copyright (C) 2021 Soni L. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see <https://www.gnu.org/licenses/>. + */ + +extern crate proptest; +extern crate datafu; + +mod common; + +use common::Value; + +use proptest::prelude::*; + +proptest! { + #[test] + fn doesnt_panic(s in "\\PC*") { + let _ = datafu::Pattern::<Value>::compile::<&str, &str>(&s, None, None); + } +} |