# A Boneless Datastructure Language
# Copyright (C) 2019-2020 Soni L.
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with this program. If not, see <https://www.gnu.org/licenses/>.
"""A Boneless Datastructure Language, version 2.1.
ABDL expressions are regex-like constructs for matching and validating object structures. They can be used
with JSON and similar formats, and even self-referential data structures.
Language Reference:
ABDL expressions have the ability to iterate, index, validate and filter data structures, through the
use of the syntax elements below.
Syntax Elements of ABDL Expressions:
An arrow is ``->`` and indicates indexing/iteration (Mappings, Sequences, Sets). Whether indexing or
iteration is used is defined by the elements that follow, with iteration being used by default.
A variable is a sequence of alphanumeric characters, not starting with a digit. A ``(key, value)``
tuple containing the respective matched element will be identified by this name in the results dict.
A literal is a sequence of characters delimited by ``'``, optionally followed by ``?``, with ``%``
as the escape character, and defines a string-keyed indexing operation. A literal can contain any
character, except unescaped ``%`` or ``'`` symbols, which must be escaped as ``%%`` and ``%'``,
respectively. The sequence of characters defined by a literal is used as the string object in the
indexing operation.
A parameter is ``$``, optionally followed by ``?``, followed by a sequence of alphanumeric
characters, not starting with a digit, and defines an object-keyed indexing operation. The sequence
of characters defined by a parameter is used to retrieve, from the pattern's definitions, the object
to be used in the indexing operation.
A regex is a sequence of characters delimited by ``/``, optionally followed by ``?``, with ``%`` as
the escape character. A regex can contain any character, except unescaped ``%`` or ``/`` symbols,
which must be escaped as ``%%`` and ``%/``, respectively. The sequence of characters defined by a
regex is passed to the ``re`` module, which may apply further restrictions on the characters used,
and is used to accept the respective keys processed by the iterator.
A predicate is ``:``, optionally followed by ``?``, followed by an ``$`` and a sequence of
alphanumeric characters, not starting with a digit, and is used to accept values to be
processed based on an external ``abdl.predicates.Predicate``, type (through
``abdl.predicates.IsInstance``), or tuple (through ``abdl.predicates.Union``).
A key match is an ABDL expression (including, but not limited to, the empty ABDL expression)
enclosed within ``[`` and ``]``, optionally prefixed with one or more predicates, and applies the
enclosed predicates and ABDL expression to the key (or index) being processed. A key match enables
additional validation of keys and/or extraction of values from keys, and accepts a key if and only
if the enclosed predicates accept the key and the enclosed expression matches the key.
A subvalue is an ABDL expression (including, but not limited to, the empty ABDL expression)
enclosed within ``(`` and ``)``, and applies the enclosed ABDL expression to the value (or
index) being processed. A subvalue enables the ability to match multiple values on the same object,
and accepts a value if and only the enclosed expression matches the value.
Some syntax elements can be validating or non-validating. Validating syntax elements will raise a
:py:exc:`abdl.ValidationError` whenever a non-accepted element is encountered, whereas non-validating
ones will skip them. Whether an element is validating is determined by the absence of an optional ``?``
in the documented position. Note that it is possible for a validating syntax element to still yield
results before raising a :py:exc:`abdl.ValidationError`, so one needs to be careful when writing code
where such behaviour could result in a security vulnerability.
Syntax of ABDL Expressions:
ABDL Expressions follow the given syntax, in (pseudo-)extended BNF::
abdlexpression ::= {arrow tag} {subvalue}
tag ::= identifier [arg] {predicate} | arg {predicate}
arg ::= parameter | literal | regex | keymatch
arrow ::= '->'
keymatch ::= '[' {predicate} abdlexpression ']'
subvalue ::= '(' {predicate} abdlexpression ')'
For a description of the terminals "parameter", "literal", "regex" and "predicate", see
"Syntax Elements of ABDL Expressions" above.
Examples:
A simple (and yet unnecessarily complicated) by-value list and dict iterator:
>>> import abdl
>>> for m in abdl.match("->X", [1, 2, 3]):
... print(m['X'][1])
1
2
3
>>> for m in abdl.match("->X", {'a': 1, 'b': 2, 'c': 3}):
... print(m['X'][1])
1
2
3
A not so simple nested dict iterator:
>>> import abdl
>>> for m in abdl.match("->X:?$dict->Y", {"foo": 1, "bar": {"baz": 2}}, {'dict': dict}):
... print(m['X'][0], m['Y'][0], m['Y'][1])
bar baz 2
(If ``:?$dict`` wasn't present, a TypeError would be raised when trying to iterate the ``1`` from ``"foo": 1``.)
Extracting data from non-flat config files:
>>> import abdl
>>> pat = abdl.compile('''-> 'projects'?
... -> commit /[0-9a-fA-F]{40}|[0-9a-fA-F]{64}/? :?$dict
... -> url :?$dict
... -> branch :?$dict''', {'dict': dict})
>>> data = {"projects": {
... "385e734a52e13949a7a5c71827f6de920dbfea43": {
... "https://soniex2.autistic.space/git-repos/ganarchy.git": {"HEAD": {"active": True}}
... }
... }}
>>> for m in pat.match(data):
... print(m['commit'][0], m['url'][0], m['branch'][0], m['branch'][1])
385e734a52e13949a7a5c71827f6de920dbfea43 https://soniex2.autistic.space/git-repos/ganarchy.git HEAD {'active': True}
"""
import re
from collections.abc import Mapping, Sequence, Iterator, Set
from abdl import _parser
from abdl import _vm
from abdl import exceptions
# backwards compatibility TODO: remove in 3.0.0
from abdl.exceptions import ValidationError, DeprecationError, PatternError
class Pattern:
"""A compiled pattern object.
Warning:
Do not create instances of this class manually. Use :py:func:`abdl.compile`.
"""
def __init__(self, pattern, defs):
try:
self._ops = _parser.BUILT_SYNTAX.parseString(pattern)
except exceptions.PatternError as e:
e._normalize(pattern, defs)
raise
else:
self._params = []
for op in self._ops:
op.collect_params(self._params)
self._defs = {param: defs[param] for param in self._params}
def match(self, obj):
"""Matches this compiled pattern against the given object.
Args:
obj: The object to match against.
Returns:
An iterator. This iterator yields ``(key, value)`` pairs
wrapped in a dict for each variable in the pattern.
"""
return _vm.match_helper(self._ops, self._defs, obj)
def compile(pattern, defs={}):
"""Compiles the pattern and returns a compiled :py:class:`abdl.Pattern` object.
Args:
pattern (str): The pattern. Refer to module-level documentation for
pattern syntax.
defs (dict): The parameter list. Used by parameters in the pattern.
Returns:
Pattern: A compiled pattern object.
"""
# TODO caching
return Pattern(pattern, defs)
def match(pattern, obj, defs={}):
"""Matches the pattern against the given obj.
This method is equivalent to ``abdl.compile(pattern, defs).match(obj)``.
Args:
pattern (str): The pattern. Refer to module-level documentation for
pattern syntax.
obj: The object to match against.
defs (dict): The parameter list. Used by parameters in the pattern.
Returns:
An iterator. This iterator yields ``(key, value)`` pairs
wrapped in a dict for each variable in the pattern.
"""
return compile(pattern, defs).match(obj)