summary refs log blame commit diff stats
path: root/abdl/__init__.py
blob: 36a629caddbb71eed332a5d55ded21923a54a5d6 (plain) (tree)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
                                   
                                  













                                                                             
                                                  
 

                                                                                                          
 
                   
 

                                                                                                         
 
                                        
 

                                                                                                            
 

                                                                                                            
 




                                                                                                          
 



                                                                                                            
 




                                                                                                           
 



                                                                                                  
 




                                                                                                          
 



                                                                                                           

                                                                                                         



















                                                                                                               
 
             
 















                                                                                     
                       



                                                                                                




                                                                                                                        











                                                                                                                            
   




                                                            





                                                                           

              




                                                                                    

       

                                      

                                                                 


                                       



                                                                       
 

                                                                  
 







                                                                    
                                                           

                              










                                                                                   



                                 








                                                                            
 





                                                                
# A Boneless Datastructure Language
# Copyright (C) 2019-2020  Soni L.
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with this program.  If not, see <https://www.gnu.org/licenses/>.

"""A Boneless Datastructure Language, version 2.1.

ABDL expressions are regex-like constructs for matching and validating object structures. They can be used
with JSON and similar formats, and even self-referential data structures.

Language Reference:

    ABDL expressions have the ability to iterate, index, validate and filter data structures, through the
    use of the syntax elements below.

    Syntax Elements of ABDL Expressions:

        An arrow is ``->`` and indicates indexing/iteration (Mappings, Sequences, Sets). Whether indexing or
        iteration is used is defined by the elements that follow, with iteration being used by default.

        A variable is a sequence of alphanumeric characters, not starting with a digit. A ``(key, value)``
        tuple containing the respective matched element will be identified by this name in the results dict.

        A literal is a sequence of characters delimited by ``'``, optionally followed by ``?``, with ``%``
        as the escape character, and defines a string-keyed indexing operation. A literal can contain any
        character, except unescaped ``%`` or ``'`` symbols, which must be escaped as ``%%`` and ``%'``,
        respectively. The sequence of characters defined by a literal is used as the string object in the
        indexing operation.

        A parameter is ``$``, optionally followed by ``?``, followed by a sequence of alphanumeric
        characters, not starting with a digit, and defines an object-keyed indexing operation. The sequence
        of characters defined by a parameter is used to retrieve, from the pattern's definitions, the object
        to be used in the indexing operation.

        A regex is a sequence of characters delimited by ``/``, optionally followed by ``?``, with ``%`` as
        the escape character. A regex can contain any character, except unescaped ``%`` or ``/`` symbols,
        which must be escaped as ``%%`` and ``%/``, respectively. The sequence of characters defined by a
        regex is passed to the ``re`` module, which may apply further restrictions on the characters used,
        and is used to accept the respective keys processed by the iterator.

        A predicate is ``:``, optionally followed by ``?``, followed by an ``$`` and a sequence of
        alphanumeric characters, not starting with a digit, and is used to accept values to be
        processed based on an external ``abdl.predicates.Predicate``, type (through
        ``abdl.predicates.IsInstance``), or tuple (through ``abdl.predicates.Union``).

        A key match is an ABDL expression (including, but not limited to, the empty ABDL expression)
        enclosed within ``[`` and ``]``, optionally prefixed with one or more predicates, and applies the
        enclosed predicates and ABDL expression to the key (or index) being processed. A key match enables
        additional validation of keys and/or extraction of values from keys, and accepts a key if and only
        if the enclosed predicates accept the key and the enclosed expression matches the key.

        A subvalue is an ABDL expression (including, but not limited to, the empty ABDL expression)
        enclosed within ``(`` and ``)``, and applies the enclosed ABDL expression to the value (or
        index) being processed. A subvalue enables the ability to match multiple values on the same object,
        and accepts a value if and only the enclosed expression matches the value.

        Some syntax elements can be validating or non-validating. Validating syntax elements will raise a
        :py:exc:`abdl.ValidationError` whenever a non-accepted element is encountered, whereas non-validating
        ones will skip them. Whether an element is validating is determined by the absence of an optional ``?``
        in the documented position. Note that it is possible for a validating syntax element to still yield
        results before raising a :py:exc:`abdl.ValidationError`, so one needs to be careful when writing code
        where such behaviour could result in a security vulnerability.

    Syntax of ABDL Expressions:

        ABDL Expressions follow the given syntax, in (pseudo-)extended BNF::

            abdlexpression ::= {arrow tag} {subvalue}
            tag ::= identifier [arg] {predicate} | arg {predicate}
            arg ::= parameter | literal | regex | keymatch

            arrow ::= '->'
            keymatch ::= '[' {predicate} abdlexpression ']'
            subvalue ::= '(' {predicate} abdlexpression ')'

        For a description of the terminals "parameter", "literal", "regex" and "predicate", see
        "Syntax Elements of ABDL Expressions" above.

    Examples:

        A simple (and yet unnecessarily complicated) by-value list and dict iterator:

        >>> import abdl
        >>> for m in abdl.match("->X", [1, 2, 3]):
        ...     print(m['X'][1])
        1
        2
        3
        >>> for m in abdl.match("->X", {'a': 1, 'b': 2, 'c': 3}):
        ...     print(m['X'][1])
        1
        2
        3

        A not so simple nested dict iterator:

        >>> import abdl
        >>> for m in abdl.match("->X:?$dict->Y", {"foo": 1, "bar": {"baz": 2}}, {'dict': dict}):
        ...     print(m['X'][0], m['Y'][0], m['Y'][1])
        bar baz 2

        (If ``:?$dict`` wasn't present, a TypeError would be raised when trying to iterate the ``1`` from ``"foo": 1``.)

        Extracting data from non-flat config files:

        >>> import abdl
        >>> pat = abdl.compile('''-> 'projects'?
        ...                          -> commit /[0-9a-fA-F]{40}|[0-9a-fA-F]{64}/? :?$dict
        ...                             -> url :?$dict
        ...                                -> branch :?$dict''', {'dict': dict})
        >>> data = {"projects": {
        ...     "385e734a52e13949a7a5c71827f6de920dbfea43": {
        ...         "https://soniex2.autistic.space/git-repos/ganarchy.git": {"HEAD": {"active": True}}
        ...     }
        ... }}
        >>> for m in pat.match(data):
        ...     print(m['commit'][0], m['url'][0], m['branch'][0], m['branch'][1])
        385e734a52e13949a7a5c71827f6de920dbfea43 https://soniex2.autistic.space/git-repos/ganarchy.git HEAD {'active': True}
"""

import re

from collections.abc import Mapping, Sequence, Iterator, Set

from abdl import _parser
from abdl import _vm
from abdl import exceptions

# backwards compatibility TODO: remove in 3.0.0
from abdl.exceptions import ValidationError, DeprecationError, PatternError

class Pattern:
    """A compiled pattern object.

    Warning:
        Do not create instances of this class manually. Use :py:func:`abdl.compile`.

    """

    def __init__(self, pattern, defs):
        try:
            self._ops = _parser.BUILT_SYNTAX.parseString(pattern)
        except exceptions.PatternError as e:
            e._normalize(pattern, defs)
            raise
        else:
            self._params = []
            for op in self._ops:
                op.collect_params(self._params)
            self._defs = {param: defs[param] for param in self._params}

    def match(self, obj):
        """Matches this compiled pattern against the given object.

        Args:
            obj: The object to match against.

        Returns:
            An iterator. This iterator yields ``(key, value)`` pairs
            wrapped in a dict for each variable in the pattern.

        """
        return _vm.match_helper(self._ops, self._defs, obj)

def compile(pattern, defs={}):
    """Compiles the pattern and returns a compiled :py:class:`abdl.Pattern` object.

    Args:
        pattern (str): The pattern. Refer to module-level documentation for
            pattern syntax.
        defs (dict): The parameter list. Used by parameters in the pattern.

    Returns:
        Pattern: A compiled pattern object.

    """
    # TODO caching
    return Pattern(pattern, defs)

def match(pattern, obj, defs={}):
    """Matches the pattern against the given obj.

    This method is equivalent to ``abdl.compile(pattern, defs).match(obj)``.

    Args:
        pattern (str): The pattern. Refer to module-level documentation for
            pattern syntax.
        obj: The object to match against.
        defs (dict): The parameter list. Used by parameters in the pattern.

    Returns:
        An iterator. This iterator yields ``(key, value)`` pairs
        wrapped in a dict for each variable in the pattern.

    """
    return compile(pattern, defs).match(obj)