diff options
-rw-r--r-- | Cargo.toml | 4 | ||||
-rw-r--r-- | README.md | 25 | ||||
-rw-r--r-- | src/lib.rs | 5 | ||||
-rw-r--r-- | src/parser.rs | 9 | ||||
-rw-r--r-- | src/vm.rs | 218 | ||||
-rw-r--r-- | tests/basic_match.rs | 74 | ||||
-rw-r--r-- | tests/common/mod.rs | 10 |
7 files changed, 314 insertions, 31 deletions
diff --git a/Cargo.toml b/Cargo.toml index dbdb60a..b63dce6 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,11 +1,13 @@ [package] name = "datafu" -version = "0.0.2" +version = "0.0.3" authors = ["SoniEx2 <endermoneymod@gmail.com>"] license = "AGPL-3.0-or-later" description = "A Rust library for extracting data from config objects and other arbitrary object graphs." edition = "2015" repository = "https://soniex2.autistic.space/git-repos/dfu.git" +readme = "README.md" +homepage = "https://soniex2.github.io/ganarchy/project/c0b4a8a326a320ac33c5d9d6bac2f7ea7eb703ce/" # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html diff --git a/README.md b/README.md new file mode 100644 index 0000000..0c20949 --- /dev/null +++ b/README.md @@ -0,0 +1,25 @@ +Datafu +====== + +A Rust library for extracting data from config objects and other arbitrary object graphs. + +License +------- + +```text +Datafu - Rust library for extracting data from object graphs. +Copyright (C) 2021 Soni L. + +This program is free software: you can redistribute it and/or modify +it under the terms of the GNU Affero General Public License as published by +the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU Affero General Public License for more details. + +You should have received a copy of the GNU Affero General Public License +along with this program. If not, see <https://www.gnu.org/licenses/>. +``` diff --git a/src/lib.rs b/src/lib.rs index 2407115..cbd32b2 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -127,6 +127,11 @@ pub trait PatternTypes { left: RefOwn<'_, Self::Ref, Self::Own>, right: RefOwn<'_, Self::Ref, Self::Own> ) -> bool; + + /// Returns the value as an &str. + fn as_str<'b>( + value: RefOwn<'b, Self::Ref, Self::Own> + ) -> Option<&'b str>; } // TODO diff --git a/src/parser.rs b/src/parser.rs index 00fbcb1..eb54c86 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -623,6 +623,15 @@ mod tests { ) -> bool { false } + + fn as_str<'b>( + item: RefOwn<'b, Self::Ref, Self::Own> + ) -> Option<&'b str> { + match item { + RefOwn::Str(key) => Some(key), + _ => None, + } + } } #[test] diff --git a/src/vm.rs b/src/vm.rs index a02010f..62fb074 100644 --- a/src/vm.rs +++ b/src/vm.rs @@ -156,6 +156,10 @@ impl<'a, 'b, T: PatternTypes> HolderState<'a, 'b, T> { matches!(self, HolderState::EmptyKey | HolderState::EmptySubtree) } + fn has_value(&self) -> bool { + !self.is_empty() + } + fn is_subtree(&self) -> bool { matches!(self, HolderState::EmptySubtree | HolderState::Subtree {..}) } @@ -168,34 +172,85 @@ impl<'a, 'b, T: PatternTypes> HolderState<'a, 'b, T> { _ => None } } + + fn key(&self) -> Option<RefOwn<'b, T::Ref, T::Own>> { + match *self { + HolderState::Key((key, _)) => Some(key), + _ => None + } + } + + fn clear(&mut self) { + match *self { + HolderState::Key(_) => *self = HolderState::EmptyKey, + HolderState::Subtree(_, _) => *self = HolderState::EmptySubtree, + HolderState::Value(_) => unreachable!(), + _ => {}, + }; + assert!(self.is_empty()); + } } /// Stores a single match and associated metadata. /// /// A single match is generally a key-value pair, but may be a collection of -/// named pairs in the case of subtree matches. +/// named pairs in the case of subtree matches, or just a value for the initial +/// holder. struct Holder<'a, 'b, T: PatternTypes> { name: Option<&'a str>, value: HolderState<'a, 'b, T>, parent: Option<RefOwn<'b, T::Ref, T::Own>>, iterator: Option<Box<dyn Iterator<Item=KVPair<'b, T>> + 'b>>, - filters: Vec<Box<dyn for<'c> Fn(&'c mut HolderState<'a, 'b, T>) + 'a>>, + filters: Vec<Box<dyn (for<'c> Fn(&'c mut HolderState<'a, 'b, T>) -> Result<(), MatchError>) + 'a>>, } impl<'a, 'b, T: PatternTypes> Holder<'a, 'b, T> { - fn next(&mut self) -> Option<Result<(), MatchError>> { - // FIXME what even is the point of this? - if let Self { value: ref mut v, iterator: Some(ref mut it), .. } = self { - let is_subtree = v.is_subtree(); - *v = match it.next() { - Some(pair) => HolderState::Key(pair), - None => return None - }; - // just try to make sure the type doesn't change. - // (and that if we get to this point the result isn't empty.) - assert!(!v.is_empty() && v.is_subtree() == is_subtree); + fn next(&mut self) -> Result<bool, MatchError> { + self.ensure_iterator()?; + match self { + Self { + value: ref mut v, + iterator: Some(ref mut it), + ref filters, + .. + } => { + let is_subtree = v.is_subtree(); + let mut next_v; + loop { + next_v = match it.next() { + Some(pair) => HolderState::Key(pair), + None => return Ok(false) + }; + for filter in filters { + filter(&mut next_v)?; + if next_v.is_empty() { + break; + } + } + if next_v.has_value() { + break; + } + } + assert!(next_v.has_value()); + assert!(next_v.is_subtree() == is_subtree); + *v = next_v; + Ok(true) + }, + _ => unreachable!() } - Some(Ok(())) + } + + /// Ensure `self.iterator.is_some()`, creating an iterator if necessary. + fn ensure_iterator(&mut self) -> Result<(), MatchError> { + if self.iterator.is_none() { + let iter = T::pairs(self.parent.unwrap()); + if iter.is_none() { + return Err(MatchError::UnsupportedOperation); + } + self.iterator = iter; + } + assert!(self.iterator.is_some()); + Ok(()) } } @@ -216,6 +271,60 @@ pub struct Matcher<'a, 'b, T: PatternTypes> { frame: Frame<'a, 'b, T>, } +// TODO: +// +// [x] Arrow +// [x] StringKey +// [x] RegexKey +// [ ] KeySubtree +// [ ] ValueSubtree +// [x] Ident +// [ ] Param +// [x] ApplyPredicate +// [x] End + +/// Helper for `PatternElement::StringKey`. +fn on_string_key<'a, 'b, T: PatternTypes>( + matcher: &mut Matcher<'a, 'b, T>, + id: usize, + skippable: bool, +) -> Result<bool, MatchError> { + let path = matcher.frame.path.last_mut().unwrap(); + assert!(path.iterator.is_none()); + let key = &matcher.defs.strings[id]; + let iter = T::get(path.parent.unwrap(), RefOwn::Str(key)); + match iter { + None => Err(MatchError::UnsupportedOperation), + Some(opt) => { + path.iterator = Some(Box::new(opt.into_iter())); + Ok(true) + } + } +} + +/// Helper for `PatternElement::RegexKey`. +fn on_regex_key<'a, 'b, T: PatternTypes>( + matcher: &mut Matcher<'a, 'b, T>, + id: usize, + skippable: bool, +) -> Result<bool, MatchError> { + matcher.frame.path.last_mut().unwrap().ensure_iterator()?; + let re = &matcher.defs.regices[id]; + let path = matcher.frame.path.last_mut().unwrap(); + path.filters.push(Box::new(move |value| { + let s = T::as_str(value.key().unwrap()); + match (s.map_or(false, |s| re.is_match(s)), skippable) { + (true, _) => Ok(()), + (false, true) => { + value.clear(); + Ok(()) + }, + (false, false) => Err(MatchError::ValidationError), + } + })); + Ok(true) +} + impl<'a, 'b, T: PatternTypes> Matcher<'a, 'b, T> { pub(crate) fn new(obj: RefOwn<'b, T::Ref, T::Own>, defs: &'a PatternConstants<T>, proto: usize, rlimit: usize) -> Result<Self, MatchError> { let depth = rlimit.checked_sub(1).ok_or(MatchError::StackOverflow)?; @@ -233,6 +342,7 @@ impl<'a, 'b, T: PatternTypes> Matcher<'a, 'b, T> { } else { let mut holder = Holder::default(); holder.value = HolderState::Value(obj); + holder.iterator = Some(Box::new(std::iter::empty())); vec![holder] }, in_key: false, @@ -243,8 +353,49 @@ impl<'a, 'b, T: PatternTypes> Matcher<'a, 'b, T> { fn on_in_key(&mut self) -> Result<bool, MatchError> { match self.frame.op() { PatternElement::End => { - todo!() - } + let path = self.frame.path.last_mut().unwrap(); + if path.next()? { + Ok(false) + } else { + drop(path); + self.frame.path.pop().unwrap(); + // stop at previous End, or start of frame + while self.frame.prev() { + if matches!(self.frame.op(), PatternElement::End) { + break; + } + } + // is start of frame? + if !self.frame.prev() { + self.frame.path.clear(); + } + Ok(true) + } + }, + PatternElement::ApplyPredicate(id, skippable, _) => { + // failing on T::get() is already handled, but we may need a + // T::pairs(). construct it here. + self.frame.path.last_mut().unwrap().ensure_iterator()?; + let pred = &self.defs.predicates[id]; + let path = self.frame.path.last_mut().unwrap(); + path.filters.push(Box::new(move |value| { + match (pred(value.value().unwrap()), skippable) { + (true, _) => Ok(()), + (false, true) => { + value.clear(); + Ok(()) + }, + (false, false) => Err(MatchError::ValidationError), + } + })); + Ok(true) + }, + PatternElement::StringKey(id, skippable) => { + on_string_key(self, id, skippable) + }, + PatternElement::RegexKey(id, skippable) => { + on_regex_key(self, id, skippable) + }, _ => unreachable!("on_in_key") } } @@ -252,17 +403,42 @@ impl<'a, 'b, T: PatternTypes> Matcher<'a, 'b, T> { fn on_not_in_key(&mut self) -> Result<bool, MatchError> { match self.frame.op() { PatternElement::Arrow => { - assert!(!self.frame.path.last().expect("path").value.is_empty()); + // this *should* always pass. + assert!(self.frame.path.last().unwrap().iterator.is_some()); let mut holder = Holder::default(); - holder.parent = self.frame.path.last().expect("path").value.value(); + holder.parent = self.frame.path.last().unwrap().value.value(); + assert!(holder.parent.is_some()); self.frame.path.push(holder); Ok(false) }, PatternElement::Identifier(id) => { let name = self.defs.strings.get(id).map(|s| &**s); - self.frame.path.last_mut().expect("path").name = name; - todo!() - //Ok(true) + let path = self.frame.path.last_mut().unwrap(); + path.name = name; + assert!(path.iterator.is_none()); + // we don't actually create the iterator here, + // as we may still wanna use T::get() instead. + Ok(true) + }, + PatternElement::ApplyPredicate(id, skippable, _) => { + assert!(self.frame.path.len() == 1); + let pred = &self.defs.predicates[id]; + let value = self.frame.path.last().unwrap().value.value(); + match (pred(value.unwrap()), skippable) { + (true, _) => Ok(false), + (false, true) => { + self.frame.path.clear(); + // any Ok(_) will do + Ok(false) + }, + (false, false) => Err(MatchError::ValidationError), + } + }, + PatternElement::StringKey(id, skippable) => { + on_string_key(self, id, skippable) + }, + PatternElement::RegexKey(id, skippable) => { + on_regex_key(self, id, skippable) }, _ => unreachable!("on_not_in_key") } diff --git a/tests/basic_match.rs b/tests/basic_match.rs index d6f4da3..0b5b546 100644 --- a/tests/basic_match.rs +++ b/tests/basic_match.rs @@ -25,7 +25,7 @@ use common::Value; use datafu::RefOwn; #[test] -fn test_basic_example() { +fn test_basic() { let tree = Value::M(vec![ ("foo".into(), Value::U(1)), ("bar".into(), Value::M(vec![ @@ -39,6 +39,25 @@ fn test_basic_example() { assert_eq!(m["X"].0, RefOwn::Ref(&Value::from("bar"))); assert_eq!(m["Y"].0, RefOwn::Ref(&Value::from("baz"))); assert_eq!(m["Y"].1, RefOwn::Ref(&Value::U(2))); + assert!(matcher.next().is_none()); +} + +#[test] +fn test_str() { + let tree = Value::M(vec![ + ("foo".into(), Value::U(1)), + ("bar".into(), Value::M(vec![ + ("baz".into(), Value::U(2)), + ].into_iter().collect())), + ].into_iter().collect()); + let preds = vec![("dict", Box::new(|v: RefOwn<'_, _, _>| matches!(v, RefOwn::Ref(&Value::M(_)))) as Box<datafu::Predicate<Value>>)].into_iter().collect(); + let pat = datafu::Pattern::<Value>::compile::<&str, &str>("->X'bar'->Y", Some(preds), None).unwrap(); + let mut matcher = pat.attempt_match(&tree); + let m = matcher.next().unwrap().unwrap(); + assert_eq!(m["X"].0, RefOwn::Ref(&Value::from("bar"))); + assert_eq!(m["Y"].0, RefOwn::Ref(&Value::from("baz"))); + assert_eq!(m["Y"].1, RefOwn::Ref(&Value::U(2))); + assert!(matcher.next().is_none()); } #[test] @@ -62,6 +81,7 @@ fn test_basic_2() { assert_eq!(m["U"].0, RefOwn::Ref(&Value::from("https://soniex2.autistic.space/git-repos/ganarchy.git"))); assert_eq!(m["B"].0, RefOwn::Ref(&Value::from("HEAD"))); assert_eq!(m["B"].1, RefOwn::Ref(&Value::M(vec![(Value::from("active"), Value::B(true))].into_iter().collect()))); + assert!(matcher.next().is_none()); } #[test] @@ -78,14 +98,50 @@ fn test_spaces() { ].into_iter().collect())), ].into_iter().collect()); let preds = vec![("dict", Box::new(|v: RefOwn<'_, _, _>| matches!(v, RefOwn::Ref(&Value::M(_)))) as Box<datafu::Predicate<Value>>)].into_iter().collect(); - let pat = datafu::Pattern::<Value>::compile::<_, &str>("-> 'projects'? - -> commit /[0-9a-fA-F]{40}|[0-9a-fA-F]{64}/? :?$dict - -> url :?$dict - -> branch :?$dict", Some(preds), None).unwrap(); + let pat = datafu::Pattern::<Value>::compile::<_, &str>(" + -> 'projects'? + -> commit /[0-9a-fA-F]{40}|[0-9a-fA-F]{64}/? :?$dict + -> url :?$dict + -> branch :?$dict", + Some(preds), + None, + ).unwrap(); let mut matcher = pat.attempt_match(&tree); let m = matcher.next().unwrap().unwrap(); - assert_eq!(m["P"].0, RefOwn::Ref(&Value::from("385e734a52e13949a7a5c71827f6de920dbfea43"))); - assert_eq!(m["U"].0, RefOwn::Ref(&Value::from("https://soniex2.autistic.space/git-repos/ganarchy.git"))); - assert_eq!(m["B"].0, RefOwn::Ref(&Value::from("HEAD"))); - assert_eq!(m["B"].1, RefOwn::Ref(&Value::M(vec![(Value::from("active"), Value::B(true))].into_iter().collect()))); + assert_eq!(m["commit"].0, RefOwn::Ref(&Value::from("385e734a52e13949a7a5c71827f6de920dbfea43"))); + assert_eq!(m["url"].0, RefOwn::Ref(&Value::from("https://soniex2.autistic.space/git-repos/ganarchy.git"))); + assert_eq!(m["branch"].0, RefOwn::Ref(&Value::from("HEAD"))); + assert_eq!(m["branch"].1, RefOwn::Ref(&Value::M(vec![(Value::from("active"), Value::B(true))].into_iter().collect()))); + assert!(matcher.next().is_none()); } + +//#[test] +//fn test_harder() { +// let tree = Value::M(vec![ +// ("projects".into(), Value::M(vec![ +// ("385e734a52e13949a7a5c71827f6de920dbfea43".into(), Value::M(vec![ +// ("https://soniex2.autistic.space/git-repos/ganarchy.git".into(), Value::M(vec![ +// ("HEAD".into(), Value::M(vec![ +// ("active".into(), Value::B(true)), +// ].into_iter().collect())), +// ].into_iter().collect())), +// ].into_iter().collect())), +// ].into_iter().collect())), +// ].into_iter().collect()); +// let preds = vec![("dict", Box::new(|v: RefOwn<'_, _, _>| matches!(v, RefOwn::Ref(&Value::M(_)))) as Box<datafu::Predicate<Value>>)].into_iter().collect(); +// let pat = datafu::Pattern::<Value>::compile::<_, &str>(" +// -> 'projects'? +// -> commit /[0-9a-fA-F]{40}|[0-9a-fA-F]{64}/? :?$dict +// -> url :?$dict +// -> branch :?$dict", +// Some(preds), +// None, +// ).unwrap(); +// let mut matcher = pat.attempt_match(&tree); +// let m = matcher.next().unwrap().unwrap(); +// assert_eq!(m["commit"].0, RefOwn::Ref(&Value::from("385e734a52e13949a7a5c71827f6de920dbfea43"))); +// assert_eq!(m["url"].0, RefOwn::Ref(&Value::from("https://soniex2.autistic.space/git-repos/ganarchy.git"))); +// assert_eq!(m["branch"].0, RefOwn::Ref(&Value::from("HEAD"))); +// assert_eq!(m["branch"].1, RefOwn::Ref(&Value::M(vec![(Value::from("active"), Value::B(true))].into_iter().collect()))); +// assert!(matcher.next().is_none()); +//} diff --git a/tests/common/mod.rs b/tests/common/mod.rs index 9680504..d0b9e89 100644 --- a/tests/common/mod.rs +++ b/tests/common/mod.rs @@ -176,4 +176,14 @@ impl PatternTypes for Value { ) -> bool { left == right } + + fn as_str<'b>( + item: RefOwn<'b, Self, Dummy> + ) -> Option<&'b str> { + match item { + RefOwn::Str(key) => Some(key), + RefOwn::Ref(Value::S(key)) => Some(key), + _ => None, + } + } } |