diff options
-rw-r--r-- | abdl.py | 195 | ||||
-rw-r--r-- | testing/test_abdl.py | 4 | ||||
-rw-r--r-- | testing/test_examples.py | 25 | ||||
-rw-r--r-- | testing/test_ops_abdl.py | 161 |
4 files changed, 319 insertions, 66 deletions
diff --git a/abdl.py b/abdl.py index 02efd38..d8cb86c 100644 --- a/abdl.py +++ b/abdl.py @@ -16,66 +16,75 @@ """A Boneless Datastructure Language, version 2.0.0. -This is a language for matching mixed-type data-structures simiarly to how you'd match a string with regex. +ABDL expressions are regex-like constructs for matching and validating object structures. They can be used +with JSON and similar formats, and even self-referential data structures. -The language has two parts, the Input Langauge and the Output Language. +Language Reference: -The Input Language: + ABDL expressions have the ability to iterate, index, validate and filter data structures. This is + done with the use of the syntax elements listed below. - The input language is used for matching the input and setting up variables. An ABDL expression - is made of tokens that can represent variables, literals, commands or parameters. It must start with - an arrow, which must be followed by a variable, literal, parameter, regex or key match. Additionally, - variables may be followed by a literal, parameter or regex. In turn, those may be followed by one - or more type tests. + Syntax Elements: - A variable is a string of alphanumeric characters, not starting with a digit. + An ABDL expression is a sequence of zero or more sequences starting with arrows followed by zero or + more subvalues. - A literal is a string delimited by single quotes. (use ``%'`` to escape ``'`` and ``%%`` to escape ``%``) - A literal can be made "non-validating" by appending an ``?`` after it. + An arrow is ``->`` and indicates indexing/iteration (Mappings, Sequences, Sets). It must be followed + by a variable, literal, parameter, regex or key match. - A regex is a regex delimited by forward slashes. (use ``%/`` to escape ``/`` and ``%%`` to escape ``%``) - A regex can be made "non-validating" by appending an ``?`` after it. + A variable is a string of alphanumeric characters, not starting with a digit. It may be followed by a + literal, parameter, regex, key match, or one or more type tests. A ``(key, value)`` tuple containing + the corresponding matched element will be identified by this name in the results dict. - A parameter is the symbol ``$`` followed by a string of alphanumeric characters, not starting with - a digit. A parameter can be made "non-validating" by appending an ``?`` after it. + A literal is a string delimited by single quotes (use ``%'`` to escape ``'`` and ``%%`` to escape ``%``). + A literal can be made "non-validating" by appending an ``?`` after it. It may be followed by one or more + type tests. It is exactly equivalent to indexing an object with a string key. - An arrow is ``->`` and indicates indexing/iteration (dicts, sets, frozensets, lists, tuples). + A parameter is the symbol ``$`` followed by a string of alphanumeric characters, not starting with + a digit. A parameter can be made "non-validating" by appending an ``?`` after it. It may be followed by + one or more type tests. It is exactly equivalent to indexing an object with an arbitrary object key. - A type test is ``:`` followed by a parameter. A type test can be made "non-validating" by appending - an ``?`` after the ``:``. + A regex is an RE, as defined by the ``re`` module, delimited by forward slashes (use ``%/`` to escape + ``/`` and ``%%`` to escape ``%``). A regex can be made "non-validating" by appending an ``?`` after it. + It may be followed by one or more type tests. It attempts to match each key in the object. - A key match is an ABDL expression enclosed in ``[`` and ``]``, optionally prefixed with one or more type - tests. This matches keys. + A type test is ``:`` followed by a parameter. A type test can be made "non-validating" by appending + an ``?`` after the ``:``. It attempts to match the type of each matched value in the object. + + A key match is an ABDL expression enclosed in ``[`` and ``]``, optionally prefixed with one or more type + tests. This matches keys (including the type tests). + + A subvalue is an ABDL expression enclosed in ``(`` and ``)``. This allows matching multiple values on + the same object. + + Some syntax elements can be validating or non-validating. Validating syntax elements will raise a + :py:exc:`abdl.ValidationError` whenever a non-matching element is encountered, whereas non-validating + ones will skip them. Note that it is possible for a validating syntax element to still yield results + before raising a :py:exc:`abdl.ValidationError`, so one needs to be careful when writing code where such + behaviour could result in a security vulnerability. Examples: - + + >>> import abdl >>> for m in abdl.match("->X:?$dict->Y", {"foo": 1, "bar": {"baz": 2}}, {'dict': dict}): ... print(m['X'][0], m['Y'][0], m['Y'][1]) bar baz 2 + >>> pat = abdl.compile('''-> 'projects'? + ... -> commit /[0-9a-fA-F]{40}|[0-9a-fA-F]{64}/? :?$dict + ... -> url :?$dict + ... -> branch :?$dict''', {'dict': dict}) + >>> data = {"projects": { + ... "385e734a52e13949a7a5c71827f6de920dbfea43": { + ... "https://soniex2.autistic.space/git-repos/ganarchy.git": {"HEAD": {"active": True}} + ... } + ... }} + >>> for m in pat.match(data): + ... print(m['commit'][0], m['url'][0], m['branch'][0], m['branch'][1]) + 385e734a52e13949a7a5c71827f6de920dbfea43 https://soniex2.autistic.space/git-repos/ganarchy.git HEAD {'active': True} + (If ``:?$dict`` wasn't present, a TypeError would be raised when trying to iterate the ``1`` from ``"foo": 1``.) """ -#""" -#The Output Language [NYI]: -# -# The output language is used for transforming the input data into something potentially more useful. -# Its tokens represent variables or commands. -# -# A variable must be bound on the pattern before being used on the transformer. -# -# The following commands are accepted: -# * ``!`` - indicates that the *key* corresponding to the variable shall be used, not the value. -# -# An output expression always looks like a tuple. That is, it starts with ``(`` and ends with ``)``, -# and contains comma-separated values. At least one comma is required, and a trailing comma should -# always be used. -# -# Example [NYI]: -# -# >>> for m in abdl.transform("'projects'->?j2/[0-9a-fA-F]{40}|[0-9a-fA-F]{64}/->?j3->?j4", "(j2!,j3!,j4!,j4)", {"projects": {"385e734a52e13949a7a5c71827f6de920dbfea43": {"https://soniex2.autistic.space/git-repos/ganarchy.git": {"HEAD": {"active": True}}}}}): -# ... print(m) -# ('385e734a52e13949a7a5c71827f6de920dbfea43', 'https://soniex2.autistic.space/git-repos/ganarchy.git', 'HEAD', {'active': True}) -#""" import re @@ -90,6 +99,7 @@ class DeprecationError(Exception): @classmethod def warn_all(cls): + """Enables all deprecation warnings.""" pass class PatternError(Exception): @@ -137,6 +147,7 @@ class PatternError(Exception): class ValidationError(Exception): """Raised when the object tree doesn't validate against the given pattern.""" + # FIXME TODO? class _PatternElement: def on_not_in_key(self, frame, path, defs): @@ -159,6 +170,9 @@ class _StringKey(_PatternElement): self.key = toks[0] self.skippable = toks[1] == '?' + def on_in_key(self, frame, path, defs): + return self.on_not_in_key(frame, path, defs) + def on_not_in_key(self, frame, path, defs): path[-1].iterator = self.extract(path[-1].parent) path[-1].empty = False @@ -199,7 +213,7 @@ class _RegexKey(_PatternElement): if not self.skippable: raise ValidationError -class _Subtree(_PatternElement): +class _KeySubtree(_PatternElement): def __init__(self, toks): self.key = toks[0] self.skippable = toks[1] == '?' @@ -222,6 +236,25 @@ class _Subtree(_PatternElement): for sub in self.key: sub.collect_params(res) +class _ValueSubtree(_PatternElement): + def __init__(self, toks): + self.key = toks[0] + self.skippable = toks[1] == '?' + + def on_not_in_key(self, frame, path, defs): + assert not path[-1].empty + path.append(_Holder(key=None, value=None, name=None, parent=path[-1].value, empty=False, subtree=True)) + path[-1].iterator = self.filter(path[-1].parent, defs) + return True + + def filter(self, parent, defs): + for x in _match_helper(self.key, defs, parent): + yield (x, parent) + + def collect_params(self, res: list): + for sub in self.key: + sub.collect_params(res) + class _Ident(_PatternElement): def __init__(self, toks): self.key = toks[0] @@ -237,6 +270,9 @@ class _Param(_PatternElement): self.skippable = toks[0] == '?' self.key = toks[1].key + def on_in_key(self, frame, path, defs): + return self.on_not_in_key(frame, path, defs) + def on_not_in_key(self, frame, path, defs): path[-1].iterator = self.extract(path[-1].parent, defs[self.key]) path[-1].empty = False @@ -326,12 +362,14 @@ def _build_syntax(): parameter = (Suppress("$") + skippable + identifier).setParseAction(lambda toks: [_Param(toks)]) ty = (Suppress(":") + skippable + Suppress("$") + identifier).setParseAction(lambda toks: [_Ty(toks)]) # support for objects-as-keys - keysubtree = (Suppress("[") + Group(ty[...] + subtree[1,...]) + (Suppress("]") | CharsNotIn("").setParseAction(PatternError._unexpected_tok) | StringEnd().setParseAction(PatternError._unexpected_tok)) + Optional("?", default="")).setParseAction(lambda toks: [_Subtree(toks)]) + keysubtree = (Suppress("[") + Group(ty[...] + subtree) + (Suppress("]") | CharsNotIn("").setParseAction(PatternError._unexpected_tok) | StringEnd().setParseAction(PatternError._unexpected_tok)) + Optional("?", default="")).setParseAction(lambda toks: [_KeySubtree(toks)]) # represents key matching - switches from "key" to "value" - tag = (identifier + Optional(parameter | re_literal | keysubtree) | parameter | str_literal | re_literal | keysubtree) + ty[...] + Empty().setParseAction(lambda: [_End()]) - # arrow and tag or we give up - subtree <<= arrow + tag - return (subtree | CharsNotIn("").setParseAction(PatternError._unexpected_tok))[...].parseWithTabs() + tag = (identifier + Optional(parameter | str_literal | re_literal | keysubtree) | parameter | str_literal | re_literal | keysubtree) + ty[...] + Empty().setParseAction(lambda: [_End()]) + # multiple value matching + valuesubtree = (Suppress("(") + Group(subtree) + (Suppress(")") | CharsNotIn("").setParseAction(PatternError._unexpected_tok) | StringEnd().setParseAction(PatternError._unexpected_tok)) + Optional("?", default="")).setParseAction(lambda toks: [_ValueSubtree(toks)]) + # arrow and tag, value subtree + subtree <<= (arrow + tag)[...] + (valuesubtree + Empty().setParseAction(lambda: [_End()]))[...] + return ((subtree | CharsNotIn("").setParseAction(PatternError._unexpected_tok)) + StringEnd()).parseWithTabs() _built_syntax = _build_syntax() @@ -347,14 +385,14 @@ def _pairs(o): raise TypeError class _Holder: - def __init__(self, key, value, name, parent=None, it=None, empty=False): + def __init__(self, key, value, name, parent=None, it=None, empty=False, subtree=False): self.name = name self.key = key self.value = value self.empty = empty self._it = it self.parent = parent - self.subtree = False + self.subtree = subtree @property def iterator(self): @@ -424,30 +462,63 @@ def _match_helper(ops, defs, tree): in_key = op.on_not_in_key(frame, path, defs) class Pattern: + """A compiled pattern. + """ + def __init__(self, pattern, defs): try: - self.ops = _built_syntax.parseString(pattern) + self._ops = _built_syntax.parseString(pattern) except PatternError as e: e._normalize(pattern, defs) raise else: - self.params = [] - for op in self.ops: - op.collect_params(self.params) - self.defs = {param: defs[param] for param in self.params} + self._params = [] + for op in self._ops: + op.collect_params(self._params) + self._defs = {param: defs[param] for param in self._params} - def match(self, tree): - return _match_helper(self.ops, self.defs, tree) + def match(self, obj): + """Matches this compiled pattern against the given object. -# def transform(self, tree, replacement): -# pass + Args: + obj: The object to match against. + + Returns: + An iterator. This iterator yields ``(key, value)`` pairs + wrapped in a dict for each variable in the pattern. + + """ + return _match_helper(self._ops, self._defs, obj) def compile(pattern, defs={}): + """Compiles the pattern and returns a compiled :py:class:`abdl.Pattern` object. + + Args: + pattern (str): The pattern. Refer to module-level documentation for + pattern syntax. + defs (dict): The parameter list. Used by parameters in the pattern. + + Returns: + Pattern: A compiled pattern object. + + """ # TODO caching return Pattern(pattern, defs) def match(pattern, obj, defs={}): - return compile(pattern, defs).match(obj) + """Matches the pattern against the given obj. + + This method is equivalent to ``abdl.compile(pattern, defs).match(obj)``. + + Args: + pattern (str): The pattern. Refer to module-level documentation for + pattern syntax. + obj: The object to match against. + defs (dict): The parameter list. Used by parameters in the pattern. -#def transform(pattern, replacement, obj, defs={}): -# raise NotImplementedError + Returns: + An iterator. This iterator yields ``(key, value)`` pairs + wrapped in a dict for each variable in the pattern. + + """ + return compile(pattern, defs).match(obj) diff --git a/testing/test_abdl.py b/testing/test_abdl.py index 080ab3f..4f5241f 100644 --- a/testing/test_abdl.py +++ b/testing/test_abdl.py @@ -182,6 +182,10 @@ def test_param(foo, pat): yield {"X": x, "Z": z} assert all(LogAndCompare(pat.match(foo), deep(foo))) +def test_basic_value_subtree(): + matcher = abdl.match("(->foo'foo')(->bar'bar')", {'foo': 1, 'bar': 2}) + assert list(matcher) == [{'foo': ('foo', 1), 'bar': ('bar', 2)}] + # FIXME #@hypothesis.given(objtree, st.text()) #def test_exhaustive(foo, pat): diff --git a/testing/test_examples.py b/testing/test_examples.py index 7341341..02532cc 100644 --- a/testing/test_examples.py +++ b/testing/test_examples.py @@ -1,9 +1,26 @@ import abdl def test_basic_example(): - for m in abdl.match("->X:?$dict->Y", {"foo": 1, "bar": {"baz": 2}}, {'dict': dict}): - assert m['X'][0] == 'bar' and m['Y'][0] == 'baz' and m['Y'][1] == 2 + m = next(abdl.match("->X:?$dict->Y", {"foo": 1, "bar": {"baz": 2}}, {'dict': dict})) + assert m['X'][0] == 'bar' + assert m['Y'][0] == 'baz' + assert m['Y'][1] == 2 def test_basic_2(): - for m in abdl.match("->'projects':?$d->P/[0-9a-fA-F]{40}|[0-9a-fA-F]{64}/?:?$d->U:?$d->B", {"projects": {"385e734a52e13949a7a5c71827f6de920dbfea43": {"https://soniex2.autistic.space/git-repos/ganarchy.git": {"HEAD": {"active": True}}}}}, {'d': dict}): - assert m['P'][0] == "385e734a52e13949a7a5c71827f6de920dbfea43" and m['U'][0] == "https://soniex2.autistic.space/git-repos/ganarchy.git" and m['B'][0] == "HEAD" and m['B'][1] == {"active": True} + m = next(abdl.match("->'projects':?$d->P/[0-9a-fA-F]{40}|[0-9a-fA-F]{64}/?:?$d->U:?$d->B", {"projects": {"385e734a52e13949a7a5c71827f6de920dbfea43": {"https://soniex2.autistic.space/git-repos/ganarchy.git": {"HEAD": {"active": True}}}}}, {'d': dict})) + assert m['P'][0] == "385e734a52e13949a7a5c71827f6de920dbfea43" + assert m['U'][0] == "https://soniex2.autistic.space/git-repos/ganarchy.git" + assert m['B'][0] == "HEAD" + assert m['B'][1] == {"active": True} + +def test_spaces(): + pat = abdl.compile("""-> 'projects'? + -> commit /[0-9a-fA-F]{40}|[0-9a-fA-F]{64}/? :?$dict + -> url :?$dict + -> branch :?$dict""", {'dict': dict}) + data = {"projects": {"385e734a52e13949a7a5c71827f6de920dbfea43": {"https://soniex2.autistic.space/git-repos/ganarchy.git": {"HEAD": {"active": True}}}}} + m = next(pat.match(data)) + assert m['commit'][0] == "385e734a52e13949a7a5c71827f6de920dbfea43" + assert m['url'][0] == "https://soniex2.autistic.space/git-repos/ganarchy.git" + assert m['branch'][0] == "HEAD" + assert m['branch'][1] == {"active": True} diff --git a/testing/test_ops_abdl.py b/testing/test_ops_abdl.py new file mode 100644 index 0000000..fe50369 --- /dev/null +++ b/testing/test_ops_abdl.py @@ -0,0 +1,161 @@ +# Tests abdl.py internals + +import abdl + +import re + +class OpHelper: + def __init__(self, pat, ops=None): + self.pat = pat + if not ops: + self.ops = pat._ops + else: + self.ops = ops + self.pos = -1 + + def done(self): + assert self.pos + 1 == len(self.ops) + + def __enter__(self): + self.pos += 1 + first = self.pos + assert not isinstance(self.ops[first], abdl._End) + while not isinstance(self.ops[self.pos], abdl._End): + self.pos += 1 + assert isinstance(self.ops[self.pos], abdl._End) + return self.ops[first:self.pos] + + def __exit__(self, exc_type, exc_value, traceback): + pass + +def expect_types(seq, *tys): + assert len(seq) == len(tys) + assert(all(map(lambda x: isinstance(*x), zip(seq, tys)))) + +def expect_idents(oph, *idents): + for ident in idents: + with oph as ops: + expect_types(ops, abdl._Arrow, abdl._Ident) + assert ops[1].key == ident + +def test_empty_iterator_pattern(): + oph = OpHelper(abdl.compile("")) + oph.done() + +def test_four_depths_pattern(): + oph = OpHelper(abdl.compile("->X->Y->Z->W")) + expect_idents(oph, "X", "Y", "Z", "W") + oph.done() + +def test_regex_pattern(): + oph = OpHelper(abdl.compile("->/.../")) + with oph as ops: + expect_types(ops, abdl._Arrow, abdl._RegexKey) + assert ops[1].key == '...' + assert ops[1].compiled == re.compile('...') + assert ops[1].skippable == False + oph.done() + +def test_regex_skippable_pattern(): + oph = OpHelper(abdl.compile("->/.../?")) + with oph as ops: + expect_types(ops, abdl._Arrow, abdl._RegexKey) + assert ops[1].key == '...' + assert ops[1].compiled == re.compile('...') + assert ops[1].skippable == True + oph.done() + +def test_regex_and_bind_pattern(): + oph = OpHelper(abdl.compile("->/.../->Y")) + with oph as ops: + expect_types(ops, abdl._Arrow, abdl._RegexKey) + assert ops[1].key == '...' + assert ops[1].compiled == re.compile('...') + assert ops[1].skippable == False + expect_idents(oph, "Y") + oph.done() + +def test_empty_literal_skippable_and_bind_pattern(): + oph = OpHelper(abdl.compile("->''?->Y")) + with oph as ops: + expect_types(ops, abdl._Arrow, abdl._StringKey) + assert ops[1].key == '' + assert ops[1].skippable == True + expect_idents(oph, "Y") + oph.done() + +def test_type_pattern(): + oph = OpHelper(abdl.compile("->X:?$a->Y", defs={'a': (dict, list, set)})) + assert oph.pat._defs['a'] == (dict, list, set) + with oph as ops: + expect_types(ops, abdl._Arrow, abdl._Ident, abdl._Ty) + assert ops[1].key == 'X' + assert ops[2].key == 'a' + assert ops[2].skippable == True + expect_idents(oph, "Y") + oph.done() + +def test_multi_type_pattern(): + oph = OpHelper(abdl.compile("->X:$a:?$b:?$c->Y", defs={'a': (dict, list, set), 'b': (dict, set), 'c': dict})) + assert oph.pat._defs['a'] == (dict, list, set) + assert oph.pat._defs['b'] == (dict, set) + assert oph.pat._defs['c'] == dict + with oph as ops: + expect_types(ops, abdl._Arrow, abdl._Ident, abdl._Ty, abdl._Ty, abdl._Ty) + assert ops[1].key == 'X' + assert ops[2].key == 'a' + assert ops[2].skippable == False + assert ops[3].key == 'b' + assert ops[3].skippable == True + assert ops[4].key == 'c' + assert ops[4].skippable == True + expect_idents(oph, "Y") + oph.done() + +def test_key_subtree_pattern(): + oph = OpHelper(abdl.compile("->[:?$set->A]->D", defs={'set': set})) + assert oph.pat._defs['set'] == set + with oph as ops: + expect_types(ops, abdl._Arrow, abdl._KeySubtree) + oph2 = OpHelper(None, ops=ops[1].key) + with oph2 as ops2: + expect_types(ops2, abdl._Ty, abdl._Arrow, abdl._Ident) + assert ops2[0].key == 'set' + assert ops2[0].skippable == True + assert ops2[2].key == 'A' + oph2.done() + expect_idents(oph, "D") + oph.done() + +def test_param_pattern(): + oph = OpHelper(abdl.compile("->X->$a->Z", defs={'a': '0'})) + assert oph.pat._defs['a'] == '0' + expect_idents(oph, "X") + with oph as ops: + expect_types(ops, abdl._Arrow, abdl._Param) + assert ops[1].key == 'a' + assert ops[1].skippable == False + expect_idents(oph, "Z") + oph.done() + +def test_value_subtree_pattern(): + oph = OpHelper(abdl.compile("(->foo'foo')(->bar'bar')")) + with oph as ops: + expect_types(ops, abdl._ValueSubtree) + oph2 = OpHelper(None, ops=ops[0].key) + with oph2 as ops2: + expect_types(ops2, abdl._Arrow, abdl._Ident, abdl._StringKey) + assert ops2[1].key == 'foo' + assert ops2[2].key == 'foo' + assert ops2[2].skippable == False + oph2.done() + with oph as ops: + expect_types(ops, abdl._ValueSubtree) + oph2 = OpHelper(None, ops=ops[0].key) + with oph2 as ops2: + expect_types(ops2, abdl._Arrow, abdl._Ident, abdl._StringKey) + assert ops2[1].key == 'bar' + assert ops2[2].key == 'bar' + assert ops2[2].skippable == False + oph2.done() + oph.done() |