# A Boneless Datastructure Language
# Copyright (C) 2019-2020 Soni L.
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with this program. If not, see <https://www.gnu.org/licenses/>.
"""A Boneless Datastructure Language, version 2.2.
ABDL expressions are regex-like constructs for matching and validating object
structures. They can be used with JSON and similar formats, and even
self-referential data structures.
Language Reference:
ABDL expressions have the ability to iterate, index, validate and filter
data structures, through the use of the syntax elements below.
Syntax Elements of ABDL Expressions:
An arrow is ``->`` and indicates indexing/iteration (Mappings,
Sequences, Sets). Whether indexing or iteration is used is defined by
the elements that follow, with iteration being used by default.
A variable is a sequence of alphanumeric characters, not starting with
a digit. A ``(key, value)`` tuple containing the respective matched
element will be identified by this name in the results dict.
A literal is a sequence of characters delimited by ``'``, optionally
followed by ``?``, with ``%`` as the escape character, and defines a
string-keyed indexing operation. A literal can contain any character,
except unescaped ``%`` or ``'`` symbols, which must be escaped as
``%%`` and ``%'``, respectively. The sequence of characters defined by
a literal is used as the string object in the indexing operation.
A parameter is ``$``, optionally followed by ``?``, followed by a
sequence of alphanumeric characters, not starting with a digit, and
defines an object-keyed indexing operation. The sequence of characters
defined by a parameter is used to retrieve, from the pattern's
definitions, the object to be used in the indexing operation.
A regex is a sequence of characters delimited by ``/``, optionally
followed by ``?``, with ``%`` as the escape character. A regex can
contain any character, except unescaped ``%`` or ``/`` symbols, which
must be escaped as ``%%`` and ``%/``, respectively. The sequence of
characters defined by a regex is passed to the ``re`` module, which
may apply further restrictions on the characters used, and is used to
accept the respective keys processed by the iterator.
A predicate is ``:``, optionally followed by ``?``, followed by an
``$`` and a sequence of alphanumeric characters, not starting with a
digit, and is used to accept values to be processed based on an
external ``abdl.predicates.Predicate``, type (through
``abdl.predicates.IsInstance``), or tuple (through
``abdl.predicates.Union``).
A key match is an ABDL expression (including, but not limited to, the
empty ABDL expression) enclosed within ``[`` and ``]``, optionally
prefixed with one or more predicates, and applies the enclosed
predicates and ABDL expression to the key (or index) being processed.
A key match enables additional validation of keys and/or extraction of
values from keys, and accepts a key if and only if the enclosed
predicates accept the key and the enclosed expression matches the key.
A subvalue is an ABDL expression (including, but not limited to, the
empty ABDL expression) enclosed within ``(`` and ``)``, and applies
the enclosed ABDL expression to the value (or index) being processed.
A subvalue enables the ability to match multiple values on the same
object, and accepts a value if and only the enclosed expression
matches the value. A subvalue can be made optional by the presence of
a ``?`` after the subvalue - in case of no match, it will just omit
the relevant keys in the result. Optional subvalues are unrelated to
non-validating syntax elements (see below), they just use the same
syntax.
Some syntax elements can be validating or non-validating. Validating
syntax elements will raise a :py:exc:`abdl.exceptions.ValidationError`
whenever a non-accepted element is encountered, whereas non-validating
ones will skip them. Whether an element is validating is determined by
the absence of an optional ``?`` in the documented position. Note that
it is possible for a validating syntax element to still yield results
before raising a :py:exc:`abdl.exceptions.ValidationError`, so one
needs to be careful when writing code where such behaviour could
result in a security vulnerability.
The empty pattern matches anything, but only does so once.
Syntax of ABDL Expressions:
ABDL Expressions follow the given syntax, in (pseudo-)extended BNF::
abdlexpression ::= {arrow tag} {subvalue}
tag ::= identifier [arg] {predicate} | arg {predicate}
arg ::= parameter | literal | regex | keymatch
arrow ::= '->'
keymatch ::= '[' {predicate} abdlexpression ']'
subvalue ::= '(' {predicate} abdlexpression ')' ['?']
For a description of the terminals "parameter", "literal", "regex" and
"predicate", see "Syntax Elements of ABDL Expressions" above.
Examples:
A simple (and yet unnecessarily complicated) by-value list and dict
iterator:
>>> import abdl
>>> for m in abdl.match("->X", [1, 2, 3]):
... print(m['X'][1])
1
2
3
>>> for m in abdl.match("->X", {'a': 1, 'b': 2, 'c': 3}):
... print(m['X'][1])
1
2
3
A not so simple nested dict iterator:
>>> import abdl
>>> for m in abdl.match("->X:?$dict->Y", {"foo": 1, "bar": {"baz": 2}}, {'dict': dict}):
... print(m['X'][0], m['Y'][0], m['Y'][1])
bar baz 2
(If ``:?$dict`` wasn't present, a TypeError would be raised when
trying to iterate the ``1`` from ``"foo": 1``.)
Extracting data from non-flat config files:
>>> import abdl
>>> pat = abdl.compile('''-> 'projects'?
... -> commit /[0-9a-fA-F]{40}|[0-9a-fA-F]{64}/? :?$dict
... -> url :?$dict
... -> branch :?$dict''', {'dict': dict})
>>> data = {"projects": {
... "385e734a52e13949a7a5c71827f6de920dbfea43": {
... "https://soniex2.autistic.space/git-repos/ganarchy.git": {"HEAD": {"active": True}}
... }
... }}
>>> for m in pat.match(data):
... print(m['commit'][0], m['url'][0], m['branch'][0], m['branch'][1])
385e734a52e13949a7a5c71827f6de920dbfea43 https://soniex2.autistic.space/git-repos/ganarchy.git HEAD {'active': True}
"""
import re
from collections.abc import Mapping, Sequence, Iterator, Set
from abdl import _parser
from abdl import _vm
from abdl import exceptions
# backwards compatibility TODO: remove in 3.0.0
from abdl.exceptions import ValidationError, DeprecationError, PatternError
class Pattern:
"""A compiled pattern object.
Warning:
Do not create instances of this class manually. Use :py:func:`abdl.compile`.
"""
def __init__(self, pattern, defs):
try:
self._ops = _parser.BUILT_SYNTAX.parseString(pattern)
except exceptions.PatternError as exc:
exc._normalize(pattern, defs)
raise
else:
self._params = []
for ins in self._ops:
ins.collect_params(self._params)
self._defs = {param: defs[param] for param in self._params}
def match(self, obj):
"""Matches this compiled pattern against the given object.
Args:
obj: The object to match against.
Returns:
An iterator. This iterator yields ``(key, value)`` pairs
wrapped in a dict for each variable in the pattern.
"""
return _vm.match_helper(self._ops, self._defs, obj)
def compile(pattern, defs=None):
"""Compiles the pattern and returns a compiled :py:class:`abdl.Pattern` object.
Args:
pattern (str): The pattern. Refer to module-level documentation for
pattern syntax.
defs (dict): The parameter list. Used by parameters in the pattern.
Returns:
Pattern: A compiled pattern object.
"""
# TODO caching
return Pattern(pattern, defs)
def match(pattern, obj, defs=None):
"""Matches the pattern against the given obj.
This method is equivalent to ``abdl.compile(pattern, defs).match(obj)``.
Args:
pattern (str): The pattern. Refer to module-level documentation for
pattern syntax.
obj: The object to match against.
defs (dict): The parameter list. Used by parameters in the pattern.
Returns:
An iterator. This iterator yields ``(key, value)`` pairs
wrapped in a dict for each variable in the pattern.
"""
return compile(pattern, defs).match(obj)