summary refs log tree commit diff stats
diff options
context:
space:
mode:
-rw-r--r--abdl.py195
-rw-r--r--testing/test_abdl.py4
-rw-r--r--testing/test_examples.py25
-rw-r--r--testing/test_ops_abdl.py161
4 files changed, 319 insertions, 66 deletions
diff --git a/abdl.py b/abdl.py
index 02efd38..d8cb86c 100644
--- a/abdl.py
+++ b/abdl.py
@@ -16,66 +16,75 @@
 
 """A Boneless Datastructure Language, version 2.0.0.
 
-This is a language for matching mixed-type data-structures simiarly to how you'd match a string with regex.
+ABDL expressions are regex-like constructs for matching and validating object structures. They can be used
+with JSON and similar formats, and even self-referential data structures.
 
-The language has two parts, the Input Langauge and the Output Language.
+Language Reference:
 
-The Input Language:
+    ABDL expressions have the ability to iterate, index, validate and filter data structures. This is
+    done with the use of the syntax elements listed below.
 
-    The input language is used for matching the input and setting up variables. An ABDL expression
-    is made of tokens that can represent variables, literals, commands or parameters. It must start with
-    an arrow, which must be followed by a variable, literal, parameter, regex or key match. Additionally,
-    variables may be followed by a literal, parameter or regex. In turn, those may be followed by one
-    or more type tests.
+    Syntax Elements:
 
-    A variable is a string of alphanumeric characters, not starting with a digit.
+        An ABDL expression is a sequence of zero or more sequences starting with arrows followed by zero or
+        more subvalues.
 
-    A literal is a string delimited by single quotes. (use ``%'`` to escape ``'`` and ``%%`` to escape ``%``)
-    A literal can be made "non-validating" by appending an ``?`` after it.
+        An arrow is ``->`` and indicates indexing/iteration (Mappings, Sequences, Sets). It must be followed
+        by a variable, literal, parameter, regex or key match.
 
-    A regex is a regex delimited by forward slashes. (use ``%/`` to escape ``/`` and ``%%`` to escape ``%``)
-    A regex can be made "non-validating" by appending an ``?`` after it.
+        A variable is a string of alphanumeric characters, not starting with a digit. It may be followed by a
+        literal, parameter, regex, key match, or one or more type tests. A ``(key, value)`` tuple containing
+        the corresponding matched element will be identified by this name in the results dict.
 
-    A parameter is the symbol ``$`` followed by a string of alphanumeric characters, not starting with
-    a digit. A parameter can be made "non-validating" by appending an ``?`` after it.
+        A literal is a string delimited by single quotes (use ``%'`` to escape ``'`` and ``%%`` to escape ``%``).
+        A literal can be made "non-validating" by appending an ``?`` after it. It may be followed by one or more
+        type tests. It is exactly equivalent to indexing an object with a string key.
 
-    An arrow is ``->`` and indicates indexing/iteration (dicts, sets, frozensets, lists, tuples).
+        A parameter is the symbol ``$`` followed by a string of alphanumeric characters, not starting with
+        a digit. A parameter can be made "non-validating" by appending an ``?`` after it. It may be followed by
+        one or more type tests. It is exactly equivalent to indexing an object with an arbitrary object key.
 
-    A type test is ``:`` followed by a parameter. A type test can be made "non-validating" by appending
-    an ``?`` after the ``:``.
+        A regex is an RE, as defined by the ``re`` module, delimited by forward slashes (use ``%/`` to escape
+        ``/`` and ``%%`` to escape ``%``). A regex can be made "non-validating" by appending an ``?`` after it.
+        It may be followed by one or more type tests. It attempts to match each key in the object.
 
-    A key match is an ABDL expression enclosed in ``[`` and ``]``, optionally prefixed with one or more type
-    tests. This matches keys.
+        A type test is ``:`` followed by a parameter. A type test can be made "non-validating" by appending
+        an ``?`` after the ``:``. It attempts to match the type of each matched value in the object.
+
+        A key match is an ABDL expression enclosed in ``[`` and ``]``, optionally prefixed with one or more type
+        tests. This matches keys (including the type tests).
+
+        A subvalue is an ABDL expression enclosed in ``(`` and ``)``. This allows matching multiple values on
+        the same object.
+
+        Some syntax elements can be validating or non-validating. Validating syntax elements will raise a
+        :py:exc:`abdl.ValidationError` whenever a non-matching element is encountered, whereas non-validating
+        ones will skip them. Note that it is possible for a validating syntax element to still yield results
+        before raising a :py:exc:`abdl.ValidationError`, so one needs to be careful when writing code where such
+        behaviour could result in a security vulnerability.
 
     Examples:
-        
+
+        >>> import abdl
         >>> for m in abdl.match("->X:?$dict->Y", {"foo": 1, "bar": {"baz": 2}}, {'dict': dict}):
         ...     print(m['X'][0], m['Y'][0], m['Y'][1])
         bar baz 2
 
+        >>> pat = abdl.compile('''-> 'projects'?
+        ...                          -> commit /[0-9a-fA-F]{40}|[0-9a-fA-F]{64}/? :?$dict
+        ...                             -> url :?$dict
+        ...                                -> branch :?$dict''', {'dict': dict})
+        >>> data = {"projects": {
+        ...     "385e734a52e13949a7a5c71827f6de920dbfea43": {
+        ...         "https://soniex2.autistic.space/git-repos/ganarchy.git": {"HEAD": {"active": True}}
+        ...     }
+        ... }}
+        >>> for m in pat.match(data):
+        ...     print(m['commit'][0], m['url'][0], m['branch'][0], m['branch'][1])
+        385e734a52e13949a7a5c71827f6de920dbfea43 https://soniex2.autistic.space/git-repos/ganarchy.git HEAD {'active': True}
+
     (If ``:?$dict`` wasn't present, a TypeError would be raised when trying to iterate the ``1`` from ``"foo": 1``.)
 """
-#"""
-#The Output Language [NYI]:
-#
-#    The output language is used for transforming the input data into something potentially more useful.
-#    Its tokens represent variables or commands.
-#
-#    A variable must be bound on the pattern before being used on the transformer.
-#
-#    The following commands are accepted:
-#        * ``!`` - indicates that the *key* corresponding to the variable shall be used, not the value.
-#
-#    An output expression always looks like a tuple. That is, it starts with ``(`` and ends with ``)``,
-#    and contains comma-separated values. At least one comma is required, and a trailing comma should
-#    always be used.
-#
-#    Example [NYI]:
-#
-#        >>> for m in abdl.transform("'projects'->?j2/[0-9a-fA-F]{40}|[0-9a-fA-F]{64}/->?j3->?j4", "(j2!,j3!,j4!,j4)", {"projects": {"385e734a52e13949a7a5c71827f6de920dbfea43": {"https://soniex2.autistic.space/git-repos/ganarchy.git": {"HEAD": {"active": True}}}}}):
-#        ...     print(m)
-#        ('385e734a52e13949a7a5c71827f6de920dbfea43', 'https://soniex2.autistic.space/git-repos/ganarchy.git', 'HEAD', {'active': True})
-#"""
 
 import re
 
@@ -90,6 +99,7 @@ class DeprecationError(Exception):
 
     @classmethod
     def warn_all(cls):
+        """Enables all deprecation warnings."""
         pass
 
 class PatternError(Exception):
@@ -137,6 +147,7 @@ class PatternError(Exception):
 
 class ValidationError(Exception):
     """Raised when the object tree doesn't validate against the given pattern."""
+    # FIXME TODO?
 
 class _PatternElement:
     def on_not_in_key(self, frame, path, defs):
@@ -159,6 +170,9 @@ class _StringKey(_PatternElement):
         self.key = toks[0]
         self.skippable = toks[1] == '?'
 
+    def on_in_key(self, frame, path, defs):
+        return self.on_not_in_key(frame, path, defs)
+
     def on_not_in_key(self, frame, path, defs):
         path[-1].iterator = self.extract(path[-1].parent)
         path[-1].empty = False
@@ -199,7 +213,7 @@ class _RegexKey(_PatternElement):
                 if not self.skippable:
                     raise ValidationError
 
-class _Subtree(_PatternElement):
+class _KeySubtree(_PatternElement):
     def __init__(self, toks):
         self.key = toks[0]
         self.skippable = toks[1] == '?'
@@ -222,6 +236,25 @@ class _Subtree(_PatternElement):
         for sub in self.key:
             sub.collect_params(res)
 
+class _ValueSubtree(_PatternElement):
+    def __init__(self, toks):
+        self.key = toks[0]
+        self.skippable = toks[1] == '?'
+
+    def on_not_in_key(self, frame, path, defs):
+        assert not path[-1].empty
+        path.append(_Holder(key=None, value=None, name=None, parent=path[-1].value, empty=False, subtree=True))
+        path[-1].iterator = self.filter(path[-1].parent, defs)
+        return True
+
+    def filter(self, parent, defs):
+        for x in _match_helper(self.key, defs, parent):
+            yield (x, parent)
+
+    def collect_params(self, res: list):
+        for sub in self.key:
+            sub.collect_params(res)
+
 class _Ident(_PatternElement):
     def __init__(self, toks):
         self.key = toks[0]
@@ -237,6 +270,9 @@ class _Param(_PatternElement):
         self.skippable = toks[0] == '?'
         self.key = toks[1].key
 
+    def on_in_key(self, frame, path, defs):
+        return self.on_not_in_key(frame, path, defs)
+
     def on_not_in_key(self, frame, path, defs):
         path[-1].iterator = self.extract(path[-1].parent, defs[self.key])
         path[-1].empty = False
@@ -326,12 +362,14 @@ def _build_syntax():
     parameter = (Suppress("$") + skippable + identifier).setParseAction(lambda toks: [_Param(toks)])
     ty = (Suppress(":") + skippable + Suppress("$") + identifier).setParseAction(lambda toks: [_Ty(toks)])
     # support for objects-as-keys
-    keysubtree = (Suppress("[") + Group(ty[...] + subtree[1,...]) + (Suppress("]") | CharsNotIn("").setParseAction(PatternError._unexpected_tok) | StringEnd().setParseAction(PatternError._unexpected_tok)) + Optional("?", default="")).setParseAction(lambda toks: [_Subtree(toks)])
+    keysubtree = (Suppress("[") + Group(ty[...] + subtree) + (Suppress("]") | CharsNotIn("").setParseAction(PatternError._unexpected_tok) | StringEnd().setParseAction(PatternError._unexpected_tok)) + Optional("?", default="")).setParseAction(lambda toks: [_KeySubtree(toks)])
     # represents key matching - switches from "key" to "value"
-    tag = (identifier + Optional(parameter | re_literal | keysubtree) | parameter | str_literal | re_literal | keysubtree) + ty[...] + Empty().setParseAction(lambda: [_End()])
-    # arrow and tag or we give up
-    subtree <<= arrow + tag
-    return (subtree | CharsNotIn("").setParseAction(PatternError._unexpected_tok))[...].parseWithTabs()
+    tag = (identifier + Optional(parameter | str_literal | re_literal | keysubtree) | parameter | str_literal | re_literal | keysubtree) + ty[...] + Empty().setParseAction(lambda: [_End()])
+    # multiple value matching
+    valuesubtree = (Suppress("(") + Group(subtree) + (Suppress(")") | CharsNotIn("").setParseAction(PatternError._unexpected_tok) | StringEnd().setParseAction(PatternError._unexpected_tok)) + Optional("?", default="")).setParseAction(lambda toks: [_ValueSubtree(toks)])
+    # arrow and tag, value subtree
+    subtree <<= (arrow + tag)[...] + (valuesubtree + Empty().setParseAction(lambda: [_End()]))[...]
+    return ((subtree | CharsNotIn("").setParseAction(PatternError._unexpected_tok)) + StringEnd()).parseWithTabs()
 
 _built_syntax = _build_syntax()
 
@@ -347,14 +385,14 @@ def _pairs(o):
         raise TypeError
 
 class _Holder:
-    def __init__(self, key, value, name, parent=None, it=None, empty=False):
+    def __init__(self, key, value, name, parent=None, it=None, empty=False, subtree=False):
         self.name = name
         self.key = key
         self.value = value
         self.empty = empty
         self._it = it
         self.parent = parent
-        self.subtree = False
+        self.subtree = subtree
 
     @property
     def iterator(self):
@@ -424,30 +462,63 @@ def _match_helper(ops, defs, tree):
                 in_key = op.on_not_in_key(frame, path, defs)
 
 class Pattern:
+    """A compiled pattern.
+    """
+
     def __init__(self, pattern, defs):
         try:
-            self.ops = _built_syntax.parseString(pattern)
+            self._ops = _built_syntax.parseString(pattern)
         except PatternError as e:
             e._normalize(pattern, defs)
             raise
         else:
-            self.params = []
-            for op in self.ops:
-                op.collect_params(self.params)
-            self.defs = {param: defs[param] for param in self.params}
+            self._params = []
+            for op in self._ops:
+                op.collect_params(self._params)
+            self._defs = {param: defs[param] for param in self._params}
 
-    def match(self, tree):
-        return _match_helper(self.ops, self.defs, tree)
+    def match(self, obj):
+        """Matches this compiled pattern against the given object.
 
-#    def transform(self, tree, replacement):
-#        pass
+        Args:
+            obj: The object to match against.
+
+        Returns:
+            An iterator. This iterator yields ``(key, value)`` pairs
+            wrapped in a dict for each variable in the pattern.
+
+        """
+        return _match_helper(self._ops, self._defs, obj)
 
 def compile(pattern, defs={}):
+    """Compiles the pattern and returns a compiled :py:class:`abdl.Pattern` object.
+
+    Args:
+        pattern (str): The pattern. Refer to module-level documentation for
+            pattern syntax.
+        defs (dict): The parameter list. Used by parameters in the pattern.
+
+    Returns:
+        Pattern: A compiled pattern object.
+
+    """
     # TODO caching
     return Pattern(pattern, defs)
 
 def match(pattern, obj, defs={}):
-    return compile(pattern, defs).match(obj)
+    """Matches the pattern against the given obj.
+
+    This method is equivalent to ``abdl.compile(pattern, defs).match(obj)``.
+
+    Args:
+        pattern (str): The pattern. Refer to module-level documentation for
+            pattern syntax.
+        obj: The object to match against.
+        defs (dict): The parameter list. Used by parameters in the pattern.
 
-#def transform(pattern, replacement, obj, defs={}):
-#    raise NotImplementedError
+    Returns:
+        An iterator. This iterator yields ``(key, value)`` pairs
+        wrapped in a dict for each variable in the pattern.
+
+    """
+    return compile(pattern, defs).match(obj)
diff --git a/testing/test_abdl.py b/testing/test_abdl.py
index 080ab3f..4f5241f 100644
--- a/testing/test_abdl.py
+++ b/testing/test_abdl.py
@@ -182,6 +182,10 @@ def test_param(foo, pat):
                     yield {"X": x, "Z": z}
     assert all(LogAndCompare(pat.match(foo), deep(foo)))
 
+def test_basic_value_subtree():
+    matcher = abdl.match("(->foo'foo')(->bar'bar')", {'foo': 1, 'bar': 2})
+    assert list(matcher) == [{'foo': ('foo', 1), 'bar': ('bar', 2)}]
+
 # FIXME
 #@hypothesis.given(objtree, st.text())
 #def test_exhaustive(foo, pat):
diff --git a/testing/test_examples.py b/testing/test_examples.py
index 7341341..02532cc 100644
--- a/testing/test_examples.py
+++ b/testing/test_examples.py
@@ -1,9 +1,26 @@
 import abdl
 
 def test_basic_example():
-    for m in abdl.match("->X:?$dict->Y", {"foo": 1, "bar": {"baz": 2}}, {'dict': dict}):
-        assert m['X'][0] == 'bar' and  m['Y'][0] == 'baz' and m['Y'][1] == 2
+    m = next(abdl.match("->X:?$dict->Y", {"foo": 1, "bar": {"baz": 2}}, {'dict': dict}))
+    assert m['X'][0] == 'bar'
+    assert m['Y'][0] == 'baz'
+    assert m['Y'][1] == 2
 
 def test_basic_2():
-    for m in abdl.match("->'projects':?$d->P/[0-9a-fA-F]{40}|[0-9a-fA-F]{64}/?:?$d->U:?$d->B", {"projects": {"385e734a52e13949a7a5c71827f6de920dbfea43": {"https://soniex2.autistic.space/git-repos/ganarchy.git": {"HEAD": {"active": True}}}}}, {'d': dict}):
-        assert m['P'][0] == "385e734a52e13949a7a5c71827f6de920dbfea43" and m['U'][0] == "https://soniex2.autistic.space/git-repos/ganarchy.git" and m['B'][0] == "HEAD" and m['B'][1] == {"active": True}
+    m = next(abdl.match("->'projects':?$d->P/[0-9a-fA-F]{40}|[0-9a-fA-F]{64}/?:?$d->U:?$d->B", {"projects": {"385e734a52e13949a7a5c71827f6de920dbfea43": {"https://soniex2.autistic.space/git-repos/ganarchy.git": {"HEAD": {"active": True}}}}}, {'d': dict}))
+    assert m['P'][0] == "385e734a52e13949a7a5c71827f6de920dbfea43"
+    assert m['U'][0] == "https://soniex2.autistic.space/git-repos/ganarchy.git"
+    assert m['B'][0] == "HEAD"
+    assert m['B'][1] == {"active": True}
+
+def test_spaces():
+    pat = abdl.compile("""-> 'projects'?
+                             -> commit /[0-9a-fA-F]{40}|[0-9a-fA-F]{64}/? :?$dict
+                                -> url :?$dict
+                                   -> branch :?$dict""", {'dict': dict})
+    data = {"projects": {"385e734a52e13949a7a5c71827f6de920dbfea43": {"https://soniex2.autistic.space/git-repos/ganarchy.git": {"HEAD": {"active": True}}}}}
+    m = next(pat.match(data))
+    assert m['commit'][0] == "385e734a52e13949a7a5c71827f6de920dbfea43"
+    assert m['url'][0] == "https://soniex2.autistic.space/git-repos/ganarchy.git"
+    assert m['branch'][0] == "HEAD"
+    assert m['branch'][1] == {"active": True}
diff --git a/testing/test_ops_abdl.py b/testing/test_ops_abdl.py
new file mode 100644
index 0000000..fe50369
--- /dev/null
+++ b/testing/test_ops_abdl.py
@@ -0,0 +1,161 @@
+# Tests abdl.py internals
+
+import abdl
+
+import re
+
+class OpHelper:
+    def __init__(self, pat, ops=None):
+        self.pat = pat
+        if not ops:
+            self.ops = pat._ops
+        else:
+            self.ops = ops
+        self.pos = -1
+
+    def done(self):
+        assert self.pos + 1 == len(self.ops)
+
+    def __enter__(self):
+        self.pos += 1
+        first = self.pos
+        assert not isinstance(self.ops[first], abdl._End)
+        while not isinstance(self.ops[self.pos], abdl._End):
+            self.pos += 1
+        assert isinstance(self.ops[self.pos], abdl._End)
+        return self.ops[first:self.pos]
+
+    def __exit__(self, exc_type, exc_value, traceback):
+        pass
+
+def expect_types(seq, *tys):
+    assert len(seq) == len(tys)
+    assert(all(map(lambda x: isinstance(*x), zip(seq, tys))))
+
+def expect_idents(oph, *idents):
+    for ident in idents:
+        with oph as ops:
+            expect_types(ops, abdl._Arrow, abdl._Ident)
+            assert ops[1].key == ident
+
+def test_empty_iterator_pattern():
+    oph = OpHelper(abdl.compile(""))
+    oph.done()
+
+def test_four_depths_pattern():
+    oph = OpHelper(abdl.compile("->X->Y->Z->W"))
+    expect_idents(oph, "X", "Y", "Z", "W")
+    oph.done()
+
+def test_regex_pattern():
+    oph = OpHelper(abdl.compile("->/.../"))
+    with oph as ops:
+        expect_types(ops, abdl._Arrow, abdl._RegexKey)
+        assert ops[1].key == '...'
+        assert ops[1].compiled == re.compile('...')
+        assert ops[1].skippable == False
+    oph.done()
+
+def test_regex_skippable_pattern():
+    oph = OpHelper(abdl.compile("->/.../?"))
+    with oph as ops:
+        expect_types(ops, abdl._Arrow, abdl._RegexKey)
+        assert ops[1].key == '...'
+        assert ops[1].compiled == re.compile('...')
+        assert ops[1].skippable == True
+    oph.done()
+
+def test_regex_and_bind_pattern():
+    oph = OpHelper(abdl.compile("->/.../->Y"))
+    with oph as ops:
+        expect_types(ops, abdl._Arrow, abdl._RegexKey)
+        assert ops[1].key == '...'
+        assert ops[1].compiled == re.compile('...')
+        assert ops[1].skippable == False
+    expect_idents(oph, "Y")
+    oph.done()
+
+def test_empty_literal_skippable_and_bind_pattern():
+    oph = OpHelper(abdl.compile("->''?->Y"))
+    with oph as ops:
+        expect_types(ops, abdl._Arrow, abdl._StringKey)
+        assert ops[1].key == ''
+        assert ops[1].skippable == True
+    expect_idents(oph, "Y")
+    oph.done()
+
+def test_type_pattern():
+    oph = OpHelper(abdl.compile("->X:?$a->Y", defs={'a': (dict, list, set)}))
+    assert oph.pat._defs['a'] == (dict, list, set)
+    with oph as ops:
+        expect_types(ops, abdl._Arrow, abdl._Ident, abdl._Ty)
+        assert ops[1].key == 'X'
+        assert ops[2].key == 'a'
+        assert ops[2].skippable == True
+    expect_idents(oph, "Y")
+    oph.done()
+
+def test_multi_type_pattern():
+    oph = OpHelper(abdl.compile("->X:$a:?$b:?$c->Y", defs={'a': (dict, list, set), 'b': (dict, set), 'c': dict}))
+    assert oph.pat._defs['a'] == (dict, list, set)
+    assert oph.pat._defs['b'] == (dict, set)
+    assert oph.pat._defs['c'] == dict
+    with oph as ops:
+        expect_types(ops, abdl._Arrow, abdl._Ident, abdl._Ty, abdl._Ty, abdl._Ty)
+        assert ops[1].key == 'X'
+        assert ops[2].key == 'a'
+        assert ops[2].skippable == False
+        assert ops[3].key == 'b'
+        assert ops[3].skippable == True
+        assert ops[4].key == 'c'
+        assert ops[4].skippable == True
+    expect_idents(oph, "Y")
+    oph.done()
+
+def test_key_subtree_pattern():
+    oph = OpHelper(abdl.compile("->[:?$set->A]->D", defs={'set': set}))
+    assert oph.pat._defs['set'] == set
+    with oph as ops:
+        expect_types(ops, abdl._Arrow, abdl._KeySubtree)
+        oph2 = OpHelper(None, ops=ops[1].key)
+        with oph2 as ops2:
+            expect_types(ops2, abdl._Ty, abdl._Arrow, abdl._Ident)
+            assert ops2[0].key == 'set'
+            assert ops2[0].skippable == True
+            assert ops2[2].key == 'A'
+        oph2.done()
+    expect_idents(oph, "D")
+    oph.done()
+
+def test_param_pattern():
+    oph = OpHelper(abdl.compile("->X->$a->Z", defs={'a': '0'}))
+    assert oph.pat._defs['a'] == '0'
+    expect_idents(oph, "X")
+    with oph as ops:
+        expect_types(ops, abdl._Arrow, abdl._Param)
+        assert ops[1].key == 'a'
+        assert ops[1].skippable == False
+    expect_idents(oph, "Z")
+    oph.done()
+
+def test_value_subtree_pattern():
+    oph = OpHelper(abdl.compile("(->foo'foo')(->bar'bar')"))
+    with oph as ops:
+        expect_types(ops, abdl._ValueSubtree)
+        oph2 = OpHelper(None, ops=ops[0].key)
+        with oph2 as ops2:
+            expect_types(ops2, abdl._Arrow, abdl._Ident, abdl._StringKey)
+            assert ops2[1].key == 'foo'
+            assert ops2[2].key == 'foo'
+            assert ops2[2].skippable == False
+        oph2.done()
+    with oph as ops:
+        expect_types(ops, abdl._ValueSubtree)
+        oph2 = OpHelper(None, ops=ops[0].key)
+        with oph2 as ops2:
+            expect_types(ops2, abdl._Arrow, abdl._Ident, abdl._StringKey)
+            assert ops2[1].key == 'bar'
+            assert ops2[2].key == 'bar'
+            assert ops2[2].skippable == False
+        oph2.done()
+    oph.done()