summary refs log blame commit diff stats
path: root/abdl/__init__.py
blob: 7208e44660d728d64abcd28467d7787bb2768603 (plain) (tree)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
                                   
                                  













                                                                             
                                                  
 


                                                                             
 
                   
 

                                                                            
 
                                        
 
















































                                                                              




                                                                             









                                                                              
 

                                                                  









                                                                            
                                                                 
 

                                                                              
 
             
 

                                                                           














                                                                 
                       



                                                                                                

                                                                        



                                                   











                                                                                                                            
   




                                                            





                                                                           

              




                                                                                    

       

                                      
                                                                 

                                              

                 
                             

                                                
                                                                       
 

                                                                  
 







                                                                    
                                                           
 
                                










                                                                                   


                                 
                                   








                                                                            
 





                                                                
# A Boneless Datastructure Language
# Copyright (C) 2019-2020  Soni L.
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with this program.  If not, see <https://www.gnu.org/licenses/>.

"""A Boneless Datastructure Language, version 2.2.

ABDL expressions are regex-like constructs for matching and validating object
structures. They can be used with JSON and similar formats, and even
self-referential data structures.

Language Reference:

    ABDL expressions have the ability to iterate, index, validate and filter
    data structures, through the use of the syntax elements below.

    Syntax Elements of ABDL Expressions:

        An arrow is ``->`` and indicates indexing/iteration (Mappings,
        Sequences, Sets). Whether indexing or iteration is used is defined by
        the elements that follow, with iteration being used by default.

        A variable is a sequence of alphanumeric characters, not starting with
        a digit. A ``(key, value)`` tuple containing the respective matched
        element will be identified by this name in the results dict.

        A literal is a sequence of characters delimited by ``'``, optionally
        followed by ``?``, with ``%`` as the escape character, and defines a
        string-keyed indexing operation. A literal can contain any character,
        except unescaped ``%`` or ``'`` symbols, which must be escaped as
        ``%%`` and ``%'``, respectively. The sequence of characters defined by
        a literal is used as the string object in the indexing operation.

        A parameter is ``$``, optionally followed by ``?``, followed by a
        sequence of alphanumeric characters, not starting with a digit, and
        defines an object-keyed indexing operation. The sequence of characters
        defined by a parameter is used to retrieve, from the pattern's
        definitions, the object to be used in the indexing operation.

        A regex is a sequence of characters delimited by ``/``, optionally
        followed by ``?``, with ``%`` as the escape character. A regex can
        contain any character, except unescaped ``%`` or ``/`` symbols, which
        must be escaped as ``%%`` and ``%/``, respectively. The sequence of
        characters defined by a regex is passed to the ``re`` module, which
        may apply further restrictions on the characters used, and is used to
        accept the respective keys processed by the iterator.

        A predicate is ``:``, optionally followed by ``?``, followed by an
        ``$`` and a sequence of alphanumeric characters, not starting with a
        digit, and is used to accept values to be processed based on an
        external ``abdl.predicates.Predicate``, type (through
        ``abdl.predicates.IsInstance``), or tuple (through
        ``abdl.predicates.Union``).

        A key match is an ABDL expression (including, but not limited to, the
        empty ABDL expression) enclosed within ``[`` and ``]``, optionally
        prefixed with one or more predicates, and applies the enclosed
        predicates and ABDL expression to the key (or index) being processed.
        A key match enables additional validation of keys and/or extraction of
        values from keys, and accepts a key if and only if the enclosed
        predicates accept the key and the enclosed expression matches the key.

        A subvalue is an ABDL expression (including, but not limited to, the
        empty ABDL expression) enclosed within ``(`` and ``)``, and applies
        the enclosed ABDL expression to the value (or index) being processed.
        A subvalue enables the ability to match multiple values on the same
        object, and accepts a value if and only the enclosed expression
        matches the value. A subvalue can be made optional by the presence of
        a ``?`` after the subvalue - in case of no match, it will just omit
        the relevant keys in the result. Optional subvalues are unrelated to
        non-validating syntax elements (see below), they just use the same
        syntax.

        Some syntax elements can be validating or non-validating. Validating
        syntax elements will raise a :py:exc:`abdl.exceptions.ValidationError`
        whenever a non-accepted element is encountered, whereas non-validating
        ones will skip them. Whether an element is validating is determined by
        the absence of an optional ``?`` in the documented position. Note that
        it is possible for a validating syntax element to still yield results
        before raising a :py:exc:`abdl.exceptions.ValidationError`, so one
        needs to be careful when writing code where such behaviour could
        result in a security vulnerability.

        The empty pattern matches anything, but only does so once.

    Syntax of ABDL Expressions:

        ABDL Expressions follow the given syntax, in (pseudo-)extended BNF::

            abdlexpression ::= {arrow tag} {subvalue}
            tag ::= identifier [arg] {predicate} | arg {predicate}
            arg ::= parameter | literal | regex | keymatch

            arrow ::= '->'
            keymatch ::= '[' {predicate} abdlexpression ']'
            subvalue ::= '(' {predicate} abdlexpression ')' ['?']

        For a description of the terminals "parameter", "literal", "regex" and
        "predicate", see "Syntax Elements of ABDL Expressions" above.

    Examples:

        A simple (and yet unnecessarily complicated) by-value list and dict
        iterator:

        >>> import abdl
        >>> for m in abdl.match("->X", [1, 2, 3]):
        ...     print(m['X'][1])
        1
        2
        3
        >>> for m in abdl.match("->X", {'a': 1, 'b': 2, 'c': 3}):
        ...     print(m['X'][1])
        1
        2
        3

        A not so simple nested dict iterator:

        >>> import abdl
        >>> for m in abdl.match("->X:?$dict->Y", {"foo": 1, "bar": {"baz": 2}}, {'dict': dict}):
        ...     print(m['X'][0], m['Y'][0], m['Y'][1])
        bar baz 2

        (If ``:?$dict`` wasn't present, a TypeError would be raised when
        trying to iterate the ``1`` from ``"foo": 1``.)

        Extracting data from non-flat config files:

        >>> import abdl
        >>> pat = abdl.compile('''-> 'projects'?
        ...                          -> commit /[0-9a-fA-F]{40}|[0-9a-fA-F]{64}/? :?$dict
        ...                             -> url :?$dict
        ...                                -> branch :?$dict''', {'dict': dict})
        >>> data = {"projects": {
        ...     "385e734a52e13949a7a5c71827f6de920dbfea43": {
        ...         "https://soniex2.autistic.space/git-repos/ganarchy.git": {"HEAD": {"active": True}}
        ...     }
        ... }}
        >>> for m in pat.match(data):
        ...     print(m['commit'][0], m['url'][0], m['branch'][0], m['branch'][1])
        385e734a52e13949a7a5c71827f6de920dbfea43 https://soniex2.autistic.space/git-repos/ganarchy.git HEAD {'active': True}
"""

import re

from collections.abc import Mapping, Sequence, Iterator, Set

from abdl import _parser
from abdl import _vm
from abdl import exceptions

# backwards compatibility TODO: remove in 3.0.0
from abdl.exceptions import ValidationError, DeprecationError, PatternError

class Pattern:
    """A compiled pattern object.

    Warning:
        Do not create instances of this class manually. Use :py:func:`abdl.compile`.

    """

    def __init__(self, pattern, defs):
        try:
            self._ops = _parser.BUILT_SYNTAX.parseString(pattern)
        except exceptions.PatternError as exc:
            exc._normalize(pattern, defs)
            raise
        else:
            self._params = []
            for ins in self._ops:
                ins.collect_params(self._params)
            self._defs = {param: defs[param] for param in self._params}

    def match(self, obj):
        """Matches this compiled pattern against the given object.

        Args:
            obj: The object to match against.

        Returns:
            An iterator. This iterator yields ``(key, value)`` pairs
            wrapped in a dict for each variable in the pattern.

        """
        return _vm.match_helper(self._ops, self._defs, obj)

def compile(pattern, defs=None):
    """Compiles the pattern and returns a compiled :py:class:`abdl.Pattern` object.

    Args:
        pattern (str): The pattern. Refer to module-level documentation for
            pattern syntax.
        defs (dict): The parameter list. Used by parameters in the pattern.

    Returns:
        Pattern: A compiled pattern object.

    """
    # TODO caching
    return Pattern(pattern, defs)

def match(pattern, obj, defs=None):
    """Matches the pattern against the given obj.

    This method is equivalent to ``abdl.compile(pattern, defs).match(obj)``.

    Args:
        pattern (str): The pattern. Refer to module-level documentation for
            pattern syntax.
        obj: The object to match against.
        defs (dict): The parameter list. Used by parameters in the pattern.

    Returns:
        An iterator. This iterator yields ``(key, value)`` pairs
        wrapped in a dict for each variable in the pattern.

    """
    return compile(pattern, defs).match(obj)