diff options
Diffstat (limited to 'abdl/__init__.py')
-rw-r--r-- | abdl/__init__.py | 136 |
1 files changed, 77 insertions, 59 deletions
diff --git a/abdl/__init__.py b/abdl/__init__.py index 36a629c..547bb68 100644 --- a/abdl/__init__.py +++ b/abdl/__init__.py @@ -16,61 +16,77 @@ """A Boneless Datastructure Language, version 2.1. -ABDL expressions are regex-like constructs for matching and validating object structures. They can be used -with JSON and similar formats, and even self-referential data structures. +ABDL expressions are regex-like constructs for matching and validating object +structures. They can be used with JSON and similar formats, and even +self-referential data structures. Language Reference: - ABDL expressions have the ability to iterate, index, validate and filter data structures, through the - use of the syntax elements below. + ABDL expressions have the ability to iterate, index, validate and filter + data structures, through the use of the syntax elements below. Syntax Elements of ABDL Expressions: - An arrow is ``->`` and indicates indexing/iteration (Mappings, Sequences, Sets). Whether indexing or - iteration is used is defined by the elements that follow, with iteration being used by default. - - A variable is a sequence of alphanumeric characters, not starting with a digit. A ``(key, value)`` - tuple containing the respective matched element will be identified by this name in the results dict. - - A literal is a sequence of characters delimited by ``'``, optionally followed by ``?``, with ``%`` - as the escape character, and defines a string-keyed indexing operation. A literal can contain any - character, except unescaped ``%`` or ``'`` symbols, which must be escaped as ``%%`` and ``%'``, - respectively. The sequence of characters defined by a literal is used as the string object in the - indexing operation. - - A parameter is ``$``, optionally followed by ``?``, followed by a sequence of alphanumeric - characters, not starting with a digit, and defines an object-keyed indexing operation. The sequence - of characters defined by a parameter is used to retrieve, from the pattern's definitions, the object - to be used in the indexing operation. - - A regex is a sequence of characters delimited by ``/``, optionally followed by ``?``, with ``%`` as - the escape character. A regex can contain any character, except unescaped ``%`` or ``/`` symbols, - which must be escaped as ``%%`` and ``%/``, respectively. The sequence of characters defined by a - regex is passed to the ``re`` module, which may apply further restrictions on the characters used, - and is used to accept the respective keys processed by the iterator. - - A predicate is ``:``, optionally followed by ``?``, followed by an ``$`` and a sequence of - alphanumeric characters, not starting with a digit, and is used to accept values to be - processed based on an external ``abdl.predicates.Predicate``, type (through - ``abdl.predicates.IsInstance``), or tuple (through ``abdl.predicates.Union``). - - A key match is an ABDL expression (including, but not limited to, the empty ABDL expression) - enclosed within ``[`` and ``]``, optionally prefixed with one or more predicates, and applies the - enclosed predicates and ABDL expression to the key (or index) being processed. A key match enables - additional validation of keys and/or extraction of values from keys, and accepts a key if and only - if the enclosed predicates accept the key and the enclosed expression matches the key. - - A subvalue is an ABDL expression (including, but not limited to, the empty ABDL expression) - enclosed within ``(`` and ``)``, and applies the enclosed ABDL expression to the value (or - index) being processed. A subvalue enables the ability to match multiple values on the same object, - and accepts a value if and only the enclosed expression matches the value. - - Some syntax elements can be validating or non-validating. Validating syntax elements will raise a - :py:exc:`abdl.ValidationError` whenever a non-accepted element is encountered, whereas non-validating - ones will skip them. Whether an element is validating is determined by the absence of an optional ``?`` - in the documented position. Note that it is possible for a validating syntax element to still yield - results before raising a :py:exc:`abdl.ValidationError`, so one needs to be careful when writing code - where such behaviour could result in a security vulnerability. + An arrow is ``->`` and indicates indexing/iteration (Mappings, + Sequences, Sets). Whether indexing or iteration is used is defined by + the elements that follow, with iteration being used by default. + + A variable is a sequence of alphanumeric characters, not starting with + a digit. A ``(key, value)`` tuple containing the respective matched + element will be identified by this name in the results dict. + + A literal is a sequence of characters delimited by ``'``, optionally + followed by ``?``, with ``%`` as the escape character, and defines a + string-keyed indexing operation. A literal can contain any character, + except unescaped ``%`` or ``'`` symbols, which must be escaped as + ``%%`` and ``%'``, respectively. The sequence of characters defined by + a literal is used as the string object in the indexing operation. + + A parameter is ``$``, optionally followed by ``?``, followed by a + sequence of alphanumeric characters, not starting with a digit, and + defines an object-keyed indexing operation. The sequence of characters + defined by a parameter is used to retrieve, from the pattern's + definitions, the object to be used in the indexing operation. + + A regex is a sequence of characters delimited by ``/``, optionally + followed by ``?``, with ``%`` as the escape character. A regex can + contain any character, except unescaped ``%`` or ``/`` symbols, which + must be escaped as ``%%`` and ``%/``, respectively. The sequence of + characters defined by a regex is passed to the ``re`` module, which + may apply further restrictions on the characters used, and is used to + accept the respective keys processed by the iterator. + + A predicate is ``:``, optionally followed by ``?``, followed by an + ``$`` and a sequence of alphanumeric characters, not starting with a + digit, and is used to accept values to be processed based on an + external ``abdl.predicates.Predicate``, type (through + ``abdl.predicates.IsInstance``), or tuple (through + ``abdl.predicates.Union``). + + A key match is an ABDL expression (including, but not limited to, the + empty ABDL expression) enclosed within ``[`` and ``]``, optionally + prefixed with one or more predicates, and applies the enclosed + predicates and ABDL expression to the key (or index) being processed. + A key match enables additional validation of keys and/or extraction of + values from keys, and accepts a key if and only if the enclosed + predicates accept the key and the enclosed expression matches the key. + + A subvalue is an ABDL expression (including, but not limited to, the + empty ABDL expression) enclosed within ``(`` and ``)``, and applies + the enclosed ABDL expression to the value (or index) being processed. + A subvalue enables the ability to match multiple values on the same + object, and accepts a value if and only the enclosed expression + matches the value. + + Some syntax elements can be validating or non-validating. Validating + syntax elements will raise a :py:exc:`abdl.exceptions.ValidationError` + whenever a non-accepted element is encountered, whereas non-validating + ones will skip them. Whether an element is validating is determined by + the absence of an optional ``?`` in the documented position. Note that + it is possible for a validating syntax element to still yield results + before raising a :py:exc:`abdl.exceptions.ValidationError`, so one + needs to be careful when writing code where such behaviour could + result in a security vulnerability. Syntax of ABDL Expressions: @@ -84,12 +100,13 @@ Language Reference: keymatch ::= '[' {predicate} abdlexpression ']' subvalue ::= '(' {predicate} abdlexpression ')' - For a description of the terminals "parameter", "literal", "regex" and "predicate", see - "Syntax Elements of ABDL Expressions" above. + For a description of the terminals "parameter", "literal", "regex" and + "predicate", see "Syntax Elements of ABDL Expressions" above. Examples: - A simple (and yet unnecessarily complicated) by-value list and dict iterator: + A simple (and yet unnecessarily complicated) by-value list and dict + iterator: >>> import abdl >>> for m in abdl.match("->X", [1, 2, 3]): @@ -110,7 +127,8 @@ Language Reference: ... print(m['X'][0], m['Y'][0], m['Y'][1]) bar baz 2 - (If ``:?$dict`` wasn't present, a TypeError would be raised when trying to iterate the ``1`` from ``"foo": 1``.) + (If ``:?$dict`` wasn't present, a TypeError would be raised when + trying to iterate the ``1`` from ``"foo": 1``.) Extracting data from non-flat config files: @@ -151,13 +169,13 @@ class Pattern: def __init__(self, pattern, defs): try: self._ops = _parser.BUILT_SYNTAX.parseString(pattern) - except exceptions.PatternError as e: - e._normalize(pattern, defs) + except exceptions.PatternError as exc: + exc._normalize(pattern, defs) raise else: self._params = [] - for op in self._ops: - op.collect_params(self._params) + for ins in self._ops: + ins.collect_params(self._params) self._defs = {param: defs[param] for param in self._params} def match(self, obj): @@ -173,7 +191,7 @@ class Pattern: """ return _vm.match_helper(self._ops, self._defs, obj) -def compile(pattern, defs={}): +def compile(pattern, defs=None): """Compiles the pattern and returns a compiled :py:class:`abdl.Pattern` object. Args: @@ -188,7 +206,7 @@ def compile(pattern, defs={}): # TODO caching return Pattern(pattern, defs) -def match(pattern, obj, defs={}): +def match(pattern, obj, defs=None): """Matches the pattern against the given obj. This method is equivalent to ``abdl.compile(pattern, defs).match(obj)``. |