diff options
Diffstat (limited to 'abdl')
-rw-r--r-- | abdl/__init__.py | 490 | ||||
-rw-r--r-- | abdl/_parser.py | 70 | ||||
-rw-r--r-- | abdl/_vm.py | 298 | ||||
-rw-r--r-- | abdl/exceptions.py | 74 | ||||
-rw-r--r-- | abdl/predicates.py | 94 |
5 files changed, 619 insertions, 407 deletions
diff --git a/abdl/__init__.py b/abdl/__init__.py index 8dde742..d751187 100644 --- a/abdl/__init__.py +++ b/abdl/__init__.py @@ -1,5 +1,5 @@ # A Boneless Datastructure Language -# Copyright (C) 2019 Soni L. +# Copyright (C) 2019-2020 Soni L. # # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU Affero General Public License as published by @@ -14,62 +14,107 @@ # You should have received a copy of the GNU Affero General Public License # along with this program. If not, see <https://www.gnu.org/licenses/>. -"""A Boneless Datastructure Language, version 2.0.1. +"""A Boneless Datastructure Language, version 2.1.0. ABDL expressions are regex-like constructs for matching and validating object structures. They can be used with JSON and similar formats, and even self-referential data structures. Language Reference: - ABDL expressions have the ability to iterate, index, validate and filter data structures. This is - done with the use of the syntax elements listed below. + ABDL expressions have the ability to iterate, index, validate and filter data structures, through the + use of the syntax elements below. - Syntax Elements: + Syntax Elements of ABDL Expressions: - An ABDL expression is a sequence of zero or more sequences starting with arrows followed by zero or - more subvalues. + An arrow is ``->`` and indicates indexing/iteration (Mappings, Sequences, Sets). Whether indexing or + iteration is used is defined by the elements that follow, with iteration being used by default. - An arrow is ``->`` and indicates indexing/iteration (Mappings, Sequences, Sets). It must be followed - by a variable, literal, parameter, regex or key match. + A variable is a sequence of alphanumeric characters, not starting with a digit. A ``(key, value)`` + tuple containing the respective matched element will be identified by this name in the results dict. - A variable is a string of alphanumeric characters, not starting with a digit. It may be followed by a - literal, parameter, regex, key match, or one or more type tests. A ``(key, value)`` tuple containing - the corresponding matched element will be identified by this name in the results dict. + A literal is a sequence of characters delimited by ``'``, optionally followed by ``?``, with ``%`` + as the escape character, and defines a string-keyed indexing operation. A literal can contain any + character, except unescaped ``%`` or ``'`` symbols, which must be escaped as ``%%`` and ``%'``, + respectively. The sequence of characters defined by a literal is used as the string object in the + indexing operation. - A literal is a string delimited by single quotes (use ``%'`` to escape ``'`` and ``%%`` to escape ``%``). - A literal can be made "non-validating" by appending an ``?`` after it. It may be followed by one or more - type tests. It is exactly equivalent to indexing an object with a string key. + A parameter is ``$``, optionally followed by ``?``, followed by a sequence of alphanumeric + characters, not starting with a digit, and defines an object-keyed indexing operation. The sequence + of characters defined by a parameter is used to retrieve, from the pattern's definitions, the object + to be used in the indexing operation. - A parameter is the symbol ``$`` followed by a string of alphanumeric characters, not starting with - a digit. A parameter can be made "non-validating" by appending an ``?`` after it. It may be followed by - one or more type tests. It is exactly equivalent to indexing an object with an arbitrary object key. + A regex is a sequence of characters delimited by ``/``, optionally followed by ``?``, with ``%`` as + the escape character. A regex can contain any character, except unescaped ``%`` or ``/`` symbols, + which must be escaped as ``%%`` and ``%/``, respectively. The sequence of characters defined by a + regex is passed to the ``re`` module, which may apply further restrictions on the characters used, + and is used to accept the respective keys processed by the iterator. - A regex is an RE, as defined by the ``re`` module, delimited by forward slashes (use ``%/`` to escape - ``/`` and ``%%`` to escape ``%``). A regex can be made "non-validating" by appending an ``?`` after it. - It may be followed by one or more type tests. It attempts to match each key in the object. + A predicate is ``:``, optionally followed by ``?``, followed by an ``$`` and a sequence of + alphanumeric characters, not starting with a digit, and is used to accept values to be + processed based on an external ``abdl.predicates.Predicate``, type (through + ``abdl.predicates.IsInstance``), or tuple (through ``abdl.predicates.Union``). - A type test is ``:`` followed by a parameter. A type test can be made "non-validating" by appending - an ``?`` after the ``:``. It attempts to match the type of each matched value in the object. + A key match is an ABDL expression (including, but not limited to, the empty ABDL expression) + enclosed within ``[`` and ``]``, optionally prefixed with one or more predicates, and applies the + enclosed predicates and ABDL expression to the key (or index) being processed. A key match enables + additional validation of keys and/or extraction of values from keys, and accepts a key if and only + if the enclosed predicates accept the key and the enclosed expression matches the key. - A key match is an ABDL expression enclosed in ``[`` and ``]``, optionally prefixed with one or more type - tests. This matches keys (including the type tests). - - A subvalue is an ABDL expression enclosed in ``(`` and ``)``. This allows matching multiple values on - the same object. + A subvalue is an ABDL expression (including, but not limited to, the empty ABDL expression) + enclosed within ``(`` and ``)``, and applies the enclosed ABDL expression to the value (or + index) being processed. A subvalue enables the ability to match multiple values on the same object, + and accepts a value if and only the enclosed expression matches the value. Some syntax elements can be validating or non-validating. Validating syntax elements will raise a - :py:exc:`abdl.ValidationError` whenever a non-matching element is encountered, whereas non-validating - ones will skip them. Note that it is possible for a validating syntax element to still yield results - before raising a :py:exc:`abdl.ValidationError`, so one needs to be careful when writing code where such - behaviour could result in a security vulnerability. + :py:exc:`abdl.ValidationError` whenever a non-accepted element is encountered, whereas non-validating + ones will skip them. Whether an element is validating is determined by the absence of an optional ``?`` + in the documented position. Note that it is possible for a validating syntax element to still yield + results before raising a :py:exc:`abdl.ValidationError`, so one needs to be careful when writing code + where such behaviour could result in a security vulnerability. + + Syntax of ABDL Expressions: + + ABDL Expressions follow the given syntax, in (pseudo-)extended BNF:: + + abdlexpression ::= {arrow tag} {subvalue} + tag ::= identifier [arg] {predicate} | arg {predicate} + arg ::= parameter | literal | regex | keymatch + + arrow ::= '->' + keymatch ::= '[' {predicate} abdlexpression ']' + subvalue ::= '(' {predicate} abdlexpression ')' + + For a description of the terminals "parameter", "literal", "regex" and "predicate", see + "Syntax Elements of ABDL Expressions" above. Examples: + A simple (and yet unnecessarily complicated) by-value list and dict iterator: + + >>> import abdl + >>> for m in abdl.match("->X", [1, 2, 3]): + ... print(m['X'][1]) + 1 + 2 + 3 + >>> for m in abdl.match("->X", {'a': 1, 'b': 2, 'c': 3}): + ... print(m['X'][1]) + 1 + 2 + 3 + + A not so simple nested dict iterator: + >>> import abdl >>> for m in abdl.match("->X:?$dict->Y", {"foo": 1, "bar": {"baz": 2}}, {'dict': dict}): ... print(m['X'][0], m['Y'][0], m['Y'][1]) bar baz 2 + (If ``:?$dict`` wasn't present, a TypeError would be raised when trying to iterate the ``1`` from ``"foo": 1``.) + + Extracting data from non-flat config files: + + >>> import abdl >>> pat = abdl.compile('''-> 'projects'? ... -> commit /[0-9a-fA-F]{40}|[0-9a-fA-F]{64}/? :?$dict ... -> url :?$dict @@ -82,384 +127,15 @@ Language Reference: >>> for m in pat.match(data): ... print(m['commit'][0], m['url'][0], m['branch'][0], m['branch'][1]) 385e734a52e13949a7a5c71827f6de920dbfea43 https://soniex2.autistic.space/git-repos/ganarchy.git HEAD {'active': True} - - (If ``:?$dict`` wasn't present, a TypeError would be raised when trying to iterate the ``1`` from ``"foo": 1``.) """ import re from collections.abc import Mapping, Sequence, Iterator, Set -class DeprecationError(Exception): - """Raised for deprecated features, if they are disabled. - - This class controls warning/error behaviour of deprecated features.""" - #enable_key_match_compat = False - #warn_key_match_compat = False - - @classmethod - def warn_all(cls): - """Enables all deprecation warnings.""" - pass - -class PatternError(Exception): - """Raised for invalid input or output expressions.""" - # TODO implement formatting - - def __init__(self, msg, pattern, defs, pos, toks): - self.msg = msg - self.pattern = pattern - self.defs = defs - self.pos = pos - self._toks = toks # internal use - - def _normalize(self, pattern, defs): - if pattern is not None: - if self.pattern is not None: - raise ValueError("Attempt to normalize normalized pattern") - else: - self.pattern = pattern - if defs is not None: - if self.defs is not None: - raise ValueError("Attempt to normalize normalized defs") - else: - self.defs = defs - - @classmethod - def _str_escape(cls, s, pos, toks): - raise cls("Error in string escape", None, None, pos, toks) - - @classmethod - def _str_end(cls, s, pos, toks): - raise cls("Unfinished string", None, None, pos, toks) - - @classmethod - def _re_escape(cls, s, pos, toks): - raise cls("Error in regex escape", None, None, pos, toks) - - @classmethod - def _re_end(cls, s, pos, toks): - raise cls("Unfinished regex", None, None, pos, toks) - - @classmethod - def _unexpected_tok(cls, s, pos, toks): - raise cls("Unexpected token", None, None, pos, toks) - -class ValidationError(Exception): - """Raised when the object tree doesn't validate against the given pattern.""" - # FIXME TODO? - -class _PatternElement: - def on_not_in_key(self, frame, path, defs): - raise NotImplementedError - - def on_in_key(self, frame, path, defs): - raise NotImplementedError - - def collect_params(self, res: list): - pass - -class _Arrow(_PatternElement): - def on_not_in_key(self, frame, path, defs): - assert not path[-1].empty - path.append(_Holder(key=None, value=None, name=None, parent=path[-1].value, empty=True)) - return False - -class _StringKey(_PatternElement): - def __init__(self, toks): - self.key = toks[0] - self.skippable = toks[1] == '?' - - def on_in_key(self, frame, path, defs): - return self.on_not_in_key(frame, path, defs) - - def on_not_in_key(self, frame, path, defs): - path[-1].iterator = self.extract(path[-1].parent) - path[-1].empty = False - return True - - def extract(self, obj): - try: - yield (self.key, obj[self.key]) - except (TypeError, IndexError, KeyError): - if not self.skippable: - raise ValidationError - -class _RegexKey(_PatternElement): - def __init__(self, toks): - self.key = toks[0] - self.compiled = re.compile(self.key) - self.skippable = toks[1] == '?' - - def on_in_key(self, frame, path, defs): - return self.on_not_in_key(frame, path, defs) - - def on_not_in_key(self, frame, path, defs): - filtered_iterator = self.filter(path[-1].iterator) - del path[-1].iterator - path[-1].iterator = filtered_iterator - del filtered_iterator - path[-1].empty = False - return True - - def filter(self, it): - for el in it: - try: - if self.compiled.search(el[0]): - yield el - elif not self.skippable: - raise ValidationError - except TypeError: - if not self.skippable: - raise ValidationError - -class _KeySubtree(_PatternElement): - def __init__(self, toks): - self.key = toks[0] - self.skippable = toks[1] == '?' - - def on_not_in_key(self, frame, path, defs): - path[-1].subtree = True - filtered_iterator = self.filter(path[-1].iterator, defs) - del path[-1].iterator - path[-1].iterator = filtered_iterator - del filtered_iterator - path[-1].empty = False - return True - - def filter(self, it, defs): - for x in it: - for y in _match_helper(self.key, defs, x[0]): - yield (y, x[1]) - - def collect_params(self, res: list): - for sub in self.key: - sub.collect_params(res) - -class _ValueSubtree(_PatternElement): - def __init__(self, toks): - self.key = toks[0] - self.skippable = toks[1] == '?' - - def on_not_in_key(self, frame, path, defs): - assert not path[-1].empty - path.append(_Holder(key=None, value=None, name=None, parent=path[-1].value, empty=False, subtree=True)) - path[-1].iterator = self.filter(path[-1].parent, defs) - return True - - def filter(self, parent, defs): - for x in _match_helper(self.key, defs, parent): - yield (x, parent) - - def collect_params(self, res: list): - for sub in self.key: - sub.collect_params(res) - -class _Ident(_PatternElement): - def __init__(self, toks): - self.key = toks[0] - - def on_not_in_key(self, frame, path, defs): - path[-1].name = self.key - path[-1].empty = False - return True - -class _Param(_PatternElement): - def __init__(self, toks): - assert isinstance(toks[1], _Ident) - self.skippable = toks[0] == '?' - self.key = toks[1].key - - def on_in_key(self, frame, path, defs): - return self.on_not_in_key(frame, path, defs) - - def on_not_in_key(self, frame, path, defs): - path[-1].iterator = self.extract(path[-1].parent, defs[self.key]) - path[-1].empty = False - return True - - def extract(self, obj, key): - try: - yield (key, obj[key]) - except (TypeError, IndexError, KeyError): - if not self.skippable: - raise ValidationError - - def collect_params(self, res: list): - res.append(self.key) - - def get_value(self, defs): - return defs[self.key] - -class _Ty(_PatternElement): - def __init__(self, toks): - assert isinstance(toks[1], _Ident) - self.skippable = toks[0] == '?' - self.key = toks[1].key - - def on_in_key(self, frame, path, defs): - filtered_iterator = self.filter(path[-1].iterator, defs[self.key]) - del path[-1].iterator - path[-1].iterator = filtered_iterator - del filtered_iterator - path[-1].empty = False - return True - - def on_not_in_key(self, frame, path, defs): - assert len(path) == 1 - if isinstance(path[-1].value, defs[self.key]): - return False - elif not self.skippable: - raise ValidationError - path.clear() - return False - - def filter(self, it, ty): - for el in it: - # this may TypeError if ty is not a type nor a tuple of types - # but that's actually the programmer's error - if isinstance(el[1], ty): - yield el - elif not self.skippable: - # and this one is for actual validation - raise ValidationError - - def collect_params(self, res: list): - res.append(self.key) - -class _End(_PatternElement): - def on_in_key(self, frame, path, defs): - try: - path[-1].next() - return False - except StopIteration: - path.pop() - while frame.prev() and not isinstance(frame.current_op, _End): - pass - if not frame.prev(): - # FIXME? - path.clear() - return True # FIXME? - -def _build_syntax(): - from pyparsing import Suppress, Literal, Forward, CharsNotIn, StringEnd, Combine, Optional, Group, Word, srange, Empty - # original regex order: arrow, type/parameter/identifier, string, regex, failure - # better syntax: "arrow" changes from "value" to "key" and thus you need at least one key match before an arrow - subtree = Forward() - # where relevant, enforces match behaviour (skippable object tree branch vs required object tree branch) - skippable = Optional("?", default="") - # r"|'(?:%'|%%|%(?P<EES>.|$)|[^%'])*?(?:'|(?P<ES>$))\??" # string literals - str_literal = (Combine(Suppress("'") - + (Suppress("%") + "'" | Suppress("%") + "%" | Literal("%") + (CharsNotIn("") | StringEnd()).setParseAction(PatternError._str_escape) | CharsNotIn("%'"))[...] - + (Suppress("'") | StringEnd().setParseAction(PatternError._str_end))) + skippable).setParseAction(lambda toks: [_StringKey(toks)]) - # r"|/(?:%/|%%|%(?P<EER>.|$)|[^%/])*?(?:/|(?P<ER>$))\??" # regex - re_literal = (Combine(Suppress("/") - + (Suppress("%") + "/" | Suppress("%") + "%" | Literal("%") + (CharsNotIn("") | StringEnd()).setParseAction(PatternError._re_escape) | CharsNotIn("%/"))[...] - + (Suppress("/") | StringEnd().setParseAction(PatternError._re_end))) + skippable).setParseAction(lambda toks: [_RegexKey(toks)]) - arrow = Literal("->").setParseAction(lambda: [_Arrow()]) - # r"|(?::\??)?\$?[A-Za-z][A-Za-z0-9]*" # identifiers, parameters and type matches - identifier = Word(srange("[A-Za-z_]"), srange("[A-Za-z0-9_]")).setParseAction(lambda toks: [_Ident(toks)]) - parameter = (Suppress("$") + skippable + identifier).setParseAction(lambda toks: [_Param(toks)]) - ty = (Suppress(":") + skippable + Suppress("$") + identifier).setParseAction(lambda toks: [_Ty(toks)]) - # support for objects-as-keys - keysubtree = (Suppress("[") + Group(ty[...] + subtree) + (Suppress("]") | CharsNotIn("").setParseAction(PatternError._unexpected_tok) | StringEnd().setParseAction(PatternError._unexpected_tok)) + Optional("?", default="")).setParseAction(lambda toks: [_KeySubtree(toks)]) - # represents key matching - switches from "key" to "value" - tag = (identifier + Optional(parameter | str_literal | re_literal | keysubtree) | parameter | str_literal | re_literal | keysubtree) + ty[...] + Empty().setParseAction(lambda: [_End()]) - # multiple value matching - valuesubtree = (Suppress("(") + Group(subtree) + (Suppress(")") | CharsNotIn("").setParseAction(PatternError._unexpected_tok) | StringEnd().setParseAction(PatternError._unexpected_tok)) + Optional("?", default="")).setParseAction(lambda toks: [_ValueSubtree(toks)]) - # arrow and tag, value subtree - subtree <<= (arrow + tag)[...] + (valuesubtree + Empty().setParseAction(lambda: [_End()]))[...] - return ((subtree | CharsNotIn("").setParseAction(PatternError._unexpected_tok)) + StringEnd()).parseWithTabs() - -_built_syntax = _build_syntax() - -def _pairs(o): - if isinstance(o, Mapping): - return iter(o.items()) - elif isinstance(o, Sequence): - return iter(enumerate(o, 0)) - elif isinstance(o, Set): - return iter(((e, e) for e in o)) - else: - # maybe there's more stuff I can implement later - raise TypeError - -class _Holder: - def __init__(self, key, value, name, parent=None, it=None, empty=False, subtree=False): - self.name = name - self.key = key - self.value = value - self.empty = empty - self._it = it - self.parent = parent - self.subtree = subtree - - @property - def iterator(self): - if self._it is None: - self._it = _pairs(self.parent) - return self._it - - @iterator.setter - def iterator(self, value): - assert self._it is None - self._it = value - - @iterator.deleter - def iterator(self): - self._it = None - - def next(self): - self.key, self.value = next(self.iterator) - -class _Frame: - def __init__(self, ops): - self.ops = ops - self.pc = -1 - - def next(self): - pc = self.pc + 1 - if pc >= len(self.ops): - return False - self.pc = pc - return True - - @property - def current_op(self): - return self.ops[self.pc] - - def prev(self): - pc = self.pc - 1 - if pc < 0: - return False - self.pc = pc - return True - -def _match_helper(ops, defs, tree): - frame = _Frame(ops) - - path = [_Holder(key=None, value=tree, parent=None, it=iter(()), name=None)] - in_key = False - while path: - if not frame.next(): - assert not path[-1].empty - res = {} - for h in path: - if h.subtree: - for name, kv in h.key.items(): - res[name] = kv - elif h.name is not None: - res[h.name] = (h.key, h.value) - yield res - assert len(path) == 1 or isinstance(frame.current_op, _End) - frame.prev() - in_key = True - else: - op = frame.current_op - if in_key: - in_key = op.on_in_key(frame, path, defs) - else: - in_key = op.on_not_in_key(frame, path, defs) +from abdl import _parser +from abdl import _vm +from abdl import exceptions class Pattern: """A compiled pattern object. @@ -471,8 +147,8 @@ class Pattern: def __init__(self, pattern, defs): try: - self._ops = _built_syntax.parseString(pattern) - except PatternError as e: + self._ops = _parser.BUILT_SYNTAX.parseString(pattern) + except exceptions.PatternError as e: e._normalize(pattern, defs) raise else: @@ -492,7 +168,7 @@ class Pattern: wrapped in a dict for each variable in the pattern. """ - return _match_helper(self._ops, self._defs, obj) + return _vm.match_helper(self._ops, self._defs, obj) def compile(pattern, defs={}): """Compiles the pattern and returns a compiled :py:class:`abdl.Pattern` object. diff --git a/abdl/_parser.py b/abdl/_parser.py new file mode 100644 index 0000000..c39a45e --- /dev/null +++ b/abdl/_parser.py @@ -0,0 +1,70 @@ +# This file is part of A Boneless Datastructure Language +# Copyright (C) 2020 Soni L. +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see <https://www.gnu.org/licenses/>. + +import pyparsing + +import abdl.exceptions +from abdl import _vm + +def _build_syntax(): + # pylint: disable=protected-access + from pyparsing import Suppress, Literal, Forward, CharsNotIn, StringEnd, Combine, Optional, Group, Word, srange, Empty + + subtree = Forward() + + skippable = Optional("?", default="") + + str_literal = (Combine(Suppress("'") + + (Suppress("%") + ("'" | "%") | Literal("%") + (CharsNotIn("") | StringEnd()).setParseAction(abdl.exceptions.PatternError._str_escape) | CharsNotIn("%'"))[...] + + (Suppress("'") | StringEnd().setParseAction(abdl.exceptions.PatternError._str_end))) + skippable) + str_literal.setParseAction(lambda toks: [_vm.StringKey(toks)]) + + re_literal = (Combine(Suppress("/") + + (Suppress("%") + ("/" | "%") | Literal("%") + (CharsNotIn("") | StringEnd()).setParseAction(abdl.exceptions.PatternError._re_escape) | CharsNotIn("%/"))[...] + + (Suppress("/") | StringEnd().setParseAction(abdl.exceptions.PatternError._re_end))) + skippable) + re_literal.setParseAction(lambda toks: [_vm.RegexKey(toks)]) + + arrow = Literal("->") + arrow.setParseAction(lambda: [_vm.Arrow()]) + + identifier = Word(srange("[A-Za-z_]"), srange("[A-Za-z0-9_]")) + identifier.setParseAction(lambda toks: [_vm.Ident(toks)]) + + parameter = (Suppress("$") + skippable + identifier) + parameter.setParseAction(lambda toks: [_vm.Param(toks)]) + + type_ = (Suppress(":") + skippable + Optional(Suppress("$")) + identifier) + type_.setParseAction(lambda toks: [_vm.ApplyPredicate(toks)]) + + # support for objects-as-keys + keysubtree = (Suppress("[") + + Group(type_[...] + subtree) + + (Suppress("]") | (CharsNotIn("") | StringEnd()).setParseAction(abdl.exceptions.PatternError._unexpected_tok)) + skippable) + keysubtree.setParseAction(lambda toks: [_vm.KeySubtree(toks)]) + + # represents key matching - switches from "key" to "value" + tag = (identifier + Optional(parameter | str_literal | re_literal | keysubtree) | parameter | str_literal | re_literal | keysubtree) + type_[...] + Empty().setParseAction(lambda: [_vm.End()]) + + # multiple value matching + valuesubtree = (Suppress("(") + Group(subtree) + (Suppress(")") | CharsNotIn("").setParseAction(abdl.exceptions.PatternError._unexpected_tok) | StringEnd().setParseAction(abdl.exceptions.PatternError._unexpected_tok)) + Optional("?", default="")) + valuesubtree.setParseAction(lambda toks: [_vm.ValueSubtree(toks)]) + + # arrow and tag, value subtree + subtree <<= (arrow + tag)[...] + (valuesubtree + Empty().setParseAction(lambda: [_vm.End()]))[...] + + return ((subtree | CharsNotIn("").setParseAction(abdl.exceptions.PatternError._unexpected_tok)) + StringEnd()).parseWithTabs() + +BUILT_SYNTAX = _build_syntax() diff --git a/abdl/_vm.py b/abdl/_vm.py new file mode 100644 index 0000000..1de2e15 --- /dev/null +++ b/abdl/_vm.py @@ -0,0 +1,298 @@ +# This file is part of A Boneless Datastructure Language +# Copyright (C) 2020 Soni L. +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see <https://www.gnu.org/licenses/>. + +import collections.abc +import re +from abdl import predicates +from abdl import exceptions + +class PatternElement: + def on_not_in_key(self, frame, path, defs): + raise NotImplementedError + + def on_in_key(self, frame, path, defs): + raise NotImplementedError + + def collect_params(self, res: list): + pass + +class Arrow(PatternElement): + def on_not_in_key(self, frame, path, defs): + assert not path[-1].empty + path.append(Holder(key=None, value=None, name=None, parent=path[-1].value, empty=True)) + return False + +class StringKey(PatternElement): + def __init__(self, toks): + self.key = toks[0] + self.skippable = toks[1] == '?' + + def on_in_key(self, frame, path, defs): + return self.on_not_in_key(frame, path, defs) + + def on_not_in_key(self, frame, path, defs): + path[-1].iterator = self.extract(path[-1].parent) + path[-1].empty = False + return True + + def extract(self, obj): + try: + yield (self.key, obj[self.key]) + except (TypeError, IndexError, KeyError): + if not self.skippable: + raise exceptions.ValidationError + +class RegexKey(PatternElement): + def __init__(self, toks): + self.key = toks[0] + self.compiled = re.compile(self.key) + self.skippable = toks[1] == '?' + + def on_in_key(self, frame, path, defs): + return self.on_not_in_key(frame, path, defs) + + def on_not_in_key(self, frame, path, defs): + filtered_iterator = self.filter(path[-1].iterator) + del path[-1].iterator + path[-1].iterator = filtered_iterator + del filtered_iterator + path[-1].empty = False + return True + + def filter(self, iter_): + for el in iter_: + try: + if self.compiled.search(el[0]): + yield el + elif not self.skippable: + raise exceptions.ValidationError + except TypeError: + if not self.skippable: + raise exceptions.ValidationError + +class KeySubtree(PatternElement): + def __init__(self, toks): + self.key = toks[0] + self.skippable = toks[1] == '?' + + def on_not_in_key(self, frame, path, defs): + path[-1].subtree = True + filtered_iterator = self.filter(path[-1].iterator, defs) + del path[-1].iterator + path[-1].iterator = filtered_iterator + del filtered_iterator + path[-1].empty = False + return True + + def filter(self, iter_, defs): + for x in iter_: + for y in match_helper(self.key, defs, x[0]): + yield (y, x[1]) + + def collect_params(self, res: list): + for sub in self.key: + sub.collect_params(res) + +class ValueSubtree(PatternElement): + def __init__(self, toks): + self.key = toks[0] + self.skippable = toks[1] == '?' + + def on_not_in_key(self, frame, path, defs): + assert not path[-1].empty + path.append(Holder(key=None, value=None, name=None, parent=path[-1].value, empty=False, subtree=True)) + path[-1].iterator = self.filter(path[-1].parent, defs) + return True + + def filter(self, parent, defs): + for x in match_helper(self.key, defs, parent): + yield (x, parent) + + def collect_params(self, res: list): + for sub in self.key: + sub.collect_params(res) + +class Ident(PatternElement): + def __init__(self, toks): + self.key = toks[0] + + def on_not_in_key(self, frame, path, defs): + path[-1].name = self.key + path[-1].empty = False + return True + +class Param(PatternElement): + def __init__(self, toks): + assert isinstance(toks[1], Ident) + self.skippable = toks[0] == '?' + self.key = toks[1].key + + def on_in_key(self, frame, path, defs): + return self.on_not_in_key(frame, path, defs) + + def on_not_in_key(self, frame, path, defs): + path[-1].iterator = self.extract(path[-1].parent, defs[self.key]) + path[-1].empty = False + return True + + def extract(self, obj, key): + try: + yield (key, obj[key]) + except (TypeError, IndexError, KeyError): + if not self.skippable: + raise exceptions.ValidationError + + def collect_params(self, res: list): + res.append(self.key) + + def get_value(self, defs): + return defs[self.key] + +class ApplyPredicate(PatternElement): + def __init__(self, toks): + assert isinstance(toks[1], Ident) + self.skippable = toks[0] == '?' + self.key = toks[1].key + + def on_in_key(self, frame, path, defs): + filtered_iterator = self.filter(path[-1].iterator, defs) + del path[-1].iterator + path[-1].iterator = filtered_iterator + del filtered_iterator + path[-1].empty = False + return True + + def check(self, defs, obj): + if predicates._to_predicate(defs[self.key]).accept(obj): + return True + if self.skippable: + return False + raise exceptions.ValidationError + + def on_not_in_key(self, frame, path, defs): + assert len(path) == 1 + if not self.check(defs, path[-1].value): + path.clear() + return False + + def filter(self, iter_, defs): + for el in iter_: + if self.check(defs, el[1]): + yield el + + def collect_params(self, res: list): + res.append(self.key) + +class End(PatternElement): + def on_in_key(self, frame, path, defs): + try: + path[-1].next() + return False + except StopIteration: + path.pop() + while frame.prev() and not isinstance(frame.current_op, End): + pass + if not frame.prev(): + # FIXME? + path.clear() + return True # FIXME? + +def _pairs(obj): + if isinstance(obj, collections.abc.Mapping): + return iter(obj.items()) + elif isinstance(obj, collections.abc.Sequence): + return iter(enumerate(obj, 0)) + elif isinstance(obj, collections.abc.Set): + return iter(((e, e) for e in obj)) + else: + # maybe there's more stuff I can implement later + raise TypeError + +class Holder: + def __init__(self, key, value, name, parent=None, iterator=None, empty=False, subtree=False): + self.name = name + self.key = key + self.value = value + self.empty = empty + self._iterator = iterator + self.parent = parent + self.subtree = subtree + + @property + def iterator(self): + if self._iterator is None: + self._iterator = _pairs(self.parent) + return self._iterator + + @iterator.setter + def iterator(self, value): + assert self._iterator is None + self._iterator = value + + @iterator.deleter + def iterator(self): + self._iterator = None + + def next(self): + self.key, self.value = next(self.iterator) + +class Frame: + def __init__(self, ops): + self.ops = ops + self.pc = -1 + + def next(self): + pc = self.pc + 1 + if pc >= len(self.ops): + return False + self.pc = pc + return True + + @property + def current_op(self): + return self.ops[self.pc] + + def prev(self): + pc = self.pc - 1 + if pc < 0: + return False + self.pc = pc + return True + +def match_helper(ops, defs, tree): + frame = Frame(ops) + + path = [Holder(key=None, value=tree, parent=None, iterator=iter(()), name=None)] + in_key = False + while path: + if not frame.next(): + assert not path[-1].empty + res = {} + for h in path: + if h.subtree: + for name, kv in h.key.items(): + res[name] = kv + elif h.name is not None: + res[h.name] = (h.key, h.value) + yield res + assert len(path) == 1 or isinstance(frame.current_op, End) + frame.prev() + in_key = True + else: + if in_key: + in_key = frame.current_op.on_in_key(frame, path, defs) + else: + in_key = frame.current_op.on_not_in_key(frame, path, defs) diff --git a/abdl/exceptions.py b/abdl/exceptions.py new file mode 100644 index 0000000..961acff --- /dev/null +++ b/abdl/exceptions.py @@ -0,0 +1,74 @@ +# This file is part of A Boneless Datastructure Language +# Copyright (C) 2020 Soni L. +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see <https://www.gnu.org/licenses/>. + +class DeprecationError(Exception): + """Raised for deprecated features, if they are disabled. + + This class controls warning/error behaviour of deprecated features.""" + #enable_key_match_compat = False + #warn_key_match_compat = False + + @classmethod + def warn_all(cls): + """Enables all deprecation warnings.""" + pass + +class PatternError(Exception): + """Raised for invalid input or output expressions.""" + # TODO implement formatting + + def __init__(self, msg, pattern, defs, pos, toks): + self.msg = msg + self.pattern = pattern + self.defs = defs + self.pos = pos + self._toks = toks # internal use + + def _normalize(self, pattern, defs): + if pattern is not None: + if self.pattern is not None: + raise ValueError("Attempt to normalize normalized pattern") + else: + self.pattern = pattern + if defs is not None: + if self.defs is not None: + raise ValueError("Attempt to normalize normalized defs") + else: + self.defs = defs + + @classmethod + def _str_escape(cls, s, pos, toks): + raise cls("Error in string escape", None, None, pos, toks) + + @classmethod + def _str_end(cls, s, pos, toks): + raise cls("Unfinished string", None, None, pos, toks) + + @classmethod + def _re_escape(cls, s, pos, toks): + raise cls("Error in regex escape", None, None, pos, toks) + + @classmethod + def _re_end(cls, s, pos, toks): + raise cls("Unfinished regex", None, None, pos, toks) + + @classmethod + def _unexpected_tok(cls, s, pos, toks): + raise cls("Unexpected token", None, None, pos, toks) + +class ValidationError(Exception): + """Raised when the object tree doesn't validate against the given pattern.""" + # FIXME TODO? diff --git a/abdl/predicates.py b/abdl/predicates.py new file mode 100644 index 0000000..4df9cdf --- /dev/null +++ b/abdl/predicates.py @@ -0,0 +1,94 @@ +# This file is part of A Boneless Datastructure Language +# Copyright (C) 2020 Soni L. +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see <https://www.gnu.org/licenses/>. + +"""Classes for use with ABDL's Predicate system. + +See ``abdl.predicates.Predicate`` and the language reference for details. +""" + +# pylint: disable=too-few-public-methods + +class Predicate: + """A predicate checks if an object is accepted in an ABDL expression. + """ + + def accept(self, obj): + """Checks if ``obj`` is accepted by this predicate. + + Args: + obj: The object to be accepted. + + Returns: + bool: True if the object is accepted, False otherwise. + """ + raise NotImplementedError + +class IsInstance(Predicate): + """A ``Predicate`` that accepts objects according to ``isinstance``. + + Used to implement ``:$foo`` when ``defs['foo']`` is neither a ``Predicate`` nor a + ``tuple``. + + One generally does not need to explicitly create instances of this class, as abdl + accepts types wherever instances of this class are accepted. + + Args: + ty (type): The expected type. + """ + + def __init__(self, type_): + self.type_ = type_ + """The expected type.""" + + def accept(self, obj): + return isinstance(obj, self.type_) + +class Union(Predicate): + """A ``Predicate`` built from multiple predicates. + + Used to implement ``:$foo`` when ``defs['foo']`` is not a ``Predicate`` but is a + ``tuple``. Can be constructed from any iterable. If the iterable yields different + results on different runs (e.g. is a generator), the behaviour is undefined. + + Has semantics equivalent to ``any(p.accept(obj) for p in predicates)``, except p + can also be a type or a tuple. + + One generally does not need to explicitly create instances of this class, as abdl + accepts tuples wherever instances of this class are accepted. + + Args: + predicates: An iterable of predicates, types and tuples that yields the same + sequence on different iterations. + """ + + def __init__(self, predicates): + self.predicates = predicates + """The iterable of predicates, types and tuples.""" + + def accept(self, obj): + for predicate in self.predicates: + if _to_predicate(predicate).accept(obj): + return True + return False + +def _to_predicate(obj): + if isinstance(obj, Predicate): + return obj + if isinstance(obj, tuple): + return Union(obj) + # I don't know if anyone relies on the old behaviour of passing the thing directly to isinstance + # but this lets the exceptions be raised almost exactly like before + return IsInstance(obj) |