summary refs log tree commit diff stats
path: root/src/lib.rs
blob: 897618b07fe285f0c2ebfa177cf2dad10aec9f44 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
// Copyright (C) 2021-2022 Soni L.
// SPDX-License-Identifier: MIT OR Apache-2.0

#![warn(elided_lifetimes_in_paths)]

//! Datafu is a regex-inspired query language. It was primarily
//! designed for processing object trees parsed from configuration files, but
//! can be used with anything that supports serde.
//!
//! # Languge Reference
//!
//! Datafu expressions have the ability to iterate, index, validate and filter
//! data structures, through the use of the syntax elements below.
//!
//! ## Syntax Elements of Datafu Expressions
//!
//! A datafu pattern starts with an optional value matcher and is otherwise a
//! sequence of iterative elements (see "arrow" below) followed by subvalues.
//!
//! A datafu pattern is composed of the following elements:
//!
//! 1. An arrow, `->` indicates iteration. (Note that datafu operates directly
//!     on serde and deserialization is an iterative process.)
//!
//!     Always followed by a matcher, or a name and an optional matcher.
//! 2. A name is a sequence of alphanumeric characters, not starting with a
//!     digit. A name collects a matched value. The value will be identified by
//!     the name.
//! 3. A matcher is one of the following 5 elements.
//! 4. A literal is a quoted string delimited by `'` with `%` as the escape
//!     character. A literal can contain any Unicode scalar value, and the only
//!     allowed escape codes are `%%` and `%'`, for `%` and `'` respectively,
//!     which must be escaped. A literal matches a string value and can be
//!     optional.
//! 5. A parameter is `$`, optionally followed by `?`, followed by a sequence
//!     of alphanumeric characters, not starting with a digit. This is
//!     currently unimplemented.
//! 6. A regex is a quoted string delimited by `/`, with `%` as the escape
//!     character. A regex can contain any Unicode scalar value, and the only
//!     allowed escape codes are `%%` and `%/`, for `%` and `/` respectively,
//!     which must be escaped. A regex element matches a string value against
//!     the regex it represents, and can be optional.
//! 7. A predicate is `:`, optionally followed by `?`, followed by `$` and a
//!     sequence of alphanumeric characters, not starting with a digit. This is
//!     currently unimplemented.
//! 8. A type is `:`, optionally followed by `?`, followed by one of the
//!     following keywords: TODO
//! 9. A key match is a sequence of iterative elements followed by subvalues,
//!     prefixed by either a matcher or a name and an optional matcher,
//!     and enclosed within `[` and `]`, optionally followed by `?`. A key
//!     match applies to map keys and sequence indices, and aside from the
//!     previously mentioned requirement is otherwise just a datafu pattern.
//! 10. A subvalue is a sequence of iterative elements followed by subvalues,
//!     enclosed within `(` and `)`, optionally followed by `?`. A sequence of
//!     subvalues enables matching distinct patterns on the same value, like
//!     distinct fields of a struct.
//!
//! Some syntax elements can be validating or non-validating. Validating
//! syntax elements will return a [`errors::MatchError::ValidationError`]
//! whenever a value or subpattern fails to match, whereas non-validating
//! ones will skip them. In general, whether an element is validating is
//! determined by the absence of an optional `?` in the documented position,
//! with the exception of key matches, which instead use it to filter entries
//! that didn't match.
//!
//! The empty pattern matches anything, but only does so once. Empty subvalues
//! are ignored.
//!
//! ## Syntax of Datafu Expressions
//!
//! Datafu Expressions follow the given syntax, in (pseudo-)extended BNF:
//!
//! ```text
//! pattern ::= [matcher] tree
//! tree ::= {tag} {subvalue}
//! tag ::= arrow [keymatch] valuematch
//! valuematch ::= matcher | name [matcher]
//! matcher ::= parameter | literal | regex | predicate | type
//!
//! arrow ::= '->'
//! keymatch ::= '[' valuematch tree ']' ['?']
//! subvalue ::= '(' tree ')' ['?']
//! ```
//!
//! For a description of the terminals "parameter", "literal", "regex", "type"
//! and "predicate", see "Syntax Elements of Datafu Expressions" above.
//!
//! # Examples
//!
//! The Datafu pattern
//!
//! ```datafu
//! :map
//! ->['a'?]:map
//!   ->[b:?str]:?map
//!     (->['x'?]x:?bool)
//!     (->['y'?]y:?bool)?
//! ```
//!
//! When matched against the JSON
//!
//! ```json
//! {"a": {"1": {"y": true}, "2": {"x": true, "y": true}}}
//! ```
//!
//! Produces the same results as if matched against the sub-JSON
//!
//! ```json
//! {"a": {"2": {"x": true, "y": true}}}
//! ```

pub mod errors;
mod graph;
//pub mod type_tree;
mod parser;
mod pattern;
mod vm;

pub use pattern::Pattern;
pub use pattern::PatternBuilder;

/// A predicate.
type Predicate = dyn (Fn(
    &mut dyn erased_serde::Deserializer<'_>
) -> bool) + Send + Sync;

/// Helper to build predicates because closure inference is the worst.
///
/// # Examples
///
/// This doesn't work:
///
/// ```rust compile_fail
/// use serde::Deserialize;
/// use datafu::Predicate;
///
/// let x = Box::new(|v| String::deserialize(v).is_ok()) as Box<Predicate>;
/// ```
///
/// But this does:
///
/// ```rust
/// use serde::Deserialize;
///
/// let x = datafu::pred(|v| String::deserialize(v).is_ok());
/// ```
fn pred<F>(f: F) -> Box<Predicate>
where
    F: (Fn(
        &mut dyn erased_serde::Deserializer<'_>
    ) -> bool) +  Send + Sync + 'static,
{
    Box::new(f)
}