From 34551d96ce021d2264094a4941ef15a64224d195 Mon Sep 17 00:00:00 2001 From: SoniEx2 Date: Tue, 25 Feb 2020 22:54:47 -0300 Subject: Refactor * Cleaned up many pylint messages * Cleaned up parser * Other minor changes --- abdl/__init__.py | 136 ++++++++++++++++++++---------------- abdl/_parser.py | 91 +++++++++++++++++------- abdl/_vm.py | 200 ++++++++++++++++++++++++++++++++++++----------------- abdl/exceptions.py | 53 +++++++------- 4 files changed, 306 insertions(+), 174 deletions(-) diff --git a/abdl/__init__.py b/abdl/__init__.py index 36a629c..547bb68 100644 --- a/abdl/__init__.py +++ b/abdl/__init__.py @@ -16,61 +16,77 @@ """A Boneless Datastructure Language, version 2.1. -ABDL expressions are regex-like constructs for matching and validating object structures. They can be used -with JSON and similar formats, and even self-referential data structures. +ABDL expressions are regex-like constructs for matching and validating object +structures. They can be used with JSON and similar formats, and even +self-referential data structures. Language Reference: - ABDL expressions have the ability to iterate, index, validate and filter data structures, through the - use of the syntax elements below. + ABDL expressions have the ability to iterate, index, validate and filter + data structures, through the use of the syntax elements below. Syntax Elements of ABDL Expressions: - An arrow is ``->`` and indicates indexing/iteration (Mappings, Sequences, Sets). Whether indexing or - iteration is used is defined by the elements that follow, with iteration being used by default. - - A variable is a sequence of alphanumeric characters, not starting with a digit. A ``(key, value)`` - tuple containing the respective matched element will be identified by this name in the results dict. - - A literal is a sequence of characters delimited by ``'``, optionally followed by ``?``, with ``%`` - as the escape character, and defines a string-keyed indexing operation. A literal can contain any - character, except unescaped ``%`` or ``'`` symbols, which must be escaped as ``%%`` and ``%'``, - respectively. The sequence of characters defined by a literal is used as the string object in the - indexing operation. - - A parameter is ``$``, optionally followed by ``?``, followed by a sequence of alphanumeric - characters, not starting with a digit, and defines an object-keyed indexing operation. The sequence - of characters defined by a parameter is used to retrieve, from the pattern's definitions, the object - to be used in the indexing operation. - - A regex is a sequence of characters delimited by ``/``, optionally followed by ``?``, with ``%`` as - the escape character. A regex can contain any character, except unescaped ``%`` or ``/`` symbols, - which must be escaped as ``%%`` and ``%/``, respectively. The sequence of characters defined by a - regex is passed to the ``re`` module, which may apply further restrictions on the characters used, - and is used to accept the respective keys processed by the iterator. - - A predicate is ``:``, optionally followed by ``?``, followed by an ``$`` and a sequence of - alphanumeric characters, not starting with a digit, and is used to accept values to be - processed based on an external ``abdl.predicates.Predicate``, type (through - ``abdl.predicates.IsInstance``), or tuple (through ``abdl.predicates.Union``). - - A key match is an ABDL expression (including, but not limited to, the empty ABDL expression) - enclosed within ``[`` and ``]``, optionally prefixed with one or more predicates, and applies the - enclosed predicates and ABDL expression to the key (or index) being processed. A key match enables - additional validation of keys and/or extraction of values from keys, and accepts a key if and only - if the enclosed predicates accept the key and the enclosed expression matches the key. - - A subvalue is an ABDL expression (including, but not limited to, the empty ABDL expression) - enclosed within ``(`` and ``)``, and applies the enclosed ABDL expression to the value (or - index) being processed. A subvalue enables the ability to match multiple values on the same object, - and accepts a value if and only the enclosed expression matches the value. - - Some syntax elements can be validating or non-validating. Validating syntax elements will raise a - :py:exc:`abdl.ValidationError` whenever a non-accepted element is encountered, whereas non-validating - ones will skip them. Whether an element is validating is determined by the absence of an optional ``?`` - in the documented position. Note that it is possible for a validating syntax element to still yield - results before raising a :py:exc:`abdl.ValidationError`, so one needs to be careful when writing code - where such behaviour could result in a security vulnerability. + An arrow is ``->`` and indicates indexing/iteration (Mappings, + Sequences, Sets). Whether indexing or iteration is used is defined by + the elements that follow, with iteration being used by default. + + A variable is a sequence of alphanumeric characters, not starting with + a digit. A ``(key, value)`` tuple containing the respective matched + element will be identified by this name in the results dict. + + A literal is a sequence of characters delimited by ``'``, optionally + followed by ``?``, with ``%`` as the escape character, and defines a + string-keyed indexing operation. A literal can contain any character, + except unescaped ``%`` or ``'`` symbols, which must be escaped as + ``%%`` and ``%'``, respectively. The sequence of characters defined by + a literal is used as the string object in the indexing operation. + + A parameter is ``$``, optionally followed by ``?``, followed by a + sequence of alphanumeric characters, not starting with a digit, and + defines an object-keyed indexing operation. The sequence of characters + defined by a parameter is used to retrieve, from the pattern's + definitions, the object to be used in the indexing operation. + + A regex is a sequence of characters delimited by ``/``, optionally + followed by ``?``, with ``%`` as the escape character. A regex can + contain any character, except unescaped ``%`` or ``/`` symbols, which + must be escaped as ``%%`` and ``%/``, respectively. The sequence of + characters defined by a regex is passed to the ``re`` module, which + may apply further restrictions on the characters used, and is used to + accept the respective keys processed by the iterator. + + A predicate is ``:``, optionally followed by ``?``, followed by an + ``$`` and a sequence of alphanumeric characters, not starting with a + digit, and is used to accept values to be processed based on an + external ``abdl.predicates.Predicate``, type (through + ``abdl.predicates.IsInstance``), or tuple (through + ``abdl.predicates.Union``). + + A key match is an ABDL expression (including, but not limited to, the + empty ABDL expression) enclosed within ``[`` and ``]``, optionally + prefixed with one or more predicates, and applies the enclosed + predicates and ABDL expression to the key (or index) being processed. + A key match enables additional validation of keys and/or extraction of + values from keys, and accepts a key if and only if the enclosed + predicates accept the key and the enclosed expression matches the key. + + A subvalue is an ABDL expression (including, but not limited to, the + empty ABDL expression) enclosed within ``(`` and ``)``, and applies + the enclosed ABDL expression to the value (or index) being processed. + A subvalue enables the ability to match multiple values on the same + object, and accepts a value if and only the enclosed expression + matches the value. + + Some syntax elements can be validating or non-validating. Validating + syntax elements will raise a :py:exc:`abdl.exceptions.ValidationError` + whenever a non-accepted element is encountered, whereas non-validating + ones will skip them. Whether an element is validating is determined by + the absence of an optional ``?`` in the documented position. Note that + it is possible for a validating syntax element to still yield results + before raising a :py:exc:`abdl.exceptions.ValidationError`, so one + needs to be careful when writing code where such behaviour could + result in a security vulnerability. Syntax of ABDL Expressions: @@ -84,12 +100,13 @@ Language Reference: keymatch ::= '[' {predicate} abdlexpression ']' subvalue ::= '(' {predicate} abdlexpression ')' - For a description of the terminals "parameter", "literal", "regex" and "predicate", see - "Syntax Elements of ABDL Expressions" above. + For a description of the terminals "parameter", "literal", "regex" and + "predicate", see "Syntax Elements of ABDL Expressions" above. Examples: - A simple (and yet unnecessarily complicated) by-value list and dict iterator: + A simple (and yet unnecessarily complicated) by-value list and dict + iterator: >>> import abdl >>> for m in abdl.match("->X", [1, 2, 3]): @@ -110,7 +127,8 @@ Language Reference: ... print(m['X'][0], m['Y'][0], m['Y'][1]) bar baz 2 - (If ``:?$dict`` wasn't present, a TypeError would be raised when trying to iterate the ``1`` from ``"foo": 1``.) + (If ``:?$dict`` wasn't present, a TypeError would be raised when + trying to iterate the ``1`` from ``"foo": 1``.) Extracting data from non-flat config files: @@ -151,13 +169,13 @@ class Pattern: def __init__(self, pattern, defs): try: self._ops = _parser.BUILT_SYNTAX.parseString(pattern) - except exceptions.PatternError as e: - e._normalize(pattern, defs) + except exceptions.PatternError as exc: + exc._normalize(pattern, defs) raise else: self._params = [] - for op in self._ops: - op.collect_params(self._params) + for ins in self._ops: + ins.collect_params(self._params) self._defs = {param: defs[param] for param in self._params} def match(self, obj): @@ -173,7 +191,7 @@ class Pattern: """ return _vm.match_helper(self._ops, self._defs, obj) -def compile(pattern, defs={}): +def compile(pattern, defs=None): """Compiles the pattern and returns a compiled :py:class:`abdl.Pattern` object. Args: @@ -188,7 +206,7 @@ def compile(pattern, defs={}): # TODO caching return Pattern(pattern, defs) -def match(pattern, obj, defs={}): +def match(pattern, obj, defs=None): """Matches the pattern against the given obj. This method is equivalent to ``abdl.compile(pattern, defs).match(obj)``. diff --git a/abdl/_parser.py b/abdl/_parser.py index 3e179a2..a8b17ce 100644 --- a/abdl/_parser.py +++ b/abdl/_parser.py @@ -14,61 +14,104 @@ # You should have received a copy of the GNU Affero General Public License # along with this program. If not, see . -import pyparsing +"""[Internal] pyparsing-based Parser. -from abdl import exceptions +Provides `BUILT_SYNTAX`. +""" + +from pyparsing import Suppress, Literal, Forward, CharsNotIn, StringEnd +from pyparsing import Combine, Optional, Group, Word, srange, Empty + +from abdl import exceptions as exc from abdl import _vm +def _err_str_esc(match_str, pos, toks): + raise exc.PatternError("Error in string escape", pos=pos, toks=toks) + +def _err_str_end(match_str, pos, toks): + raise exc.PatternError("Unfinished string", pos=pos, toks=toks) + +def _err_re_esc(match_str, pos, toks): + raise exc.PatternError("Error in regex escape", pos=pos, toks=toks) + +def _err_re_end(match_str, pos, toks): + raise exc.PatternError("Unfinished regex", pos=pos, toks=toks) + +def _err_tok(match_str, pos, toks): + raise exc.PatternError("Unexpected token", pos=pos, toks=toks) + def _build_syntax(): - # pylint: disable=protected-access - from pyparsing import Suppress, Literal, Forward, CharsNotIn, StringEnd, Combine, Optional, Group, Word, srange, Empty + # pylint: disable=too-many-locals subtree = Forward() skippable = Optional("?", default="") escape_char = Literal("%") + any_char = CharsNotIn("", exact=1) str_token = Literal("'") re_token = Literal("/") - unexpected_token = CharsNotIn("", exact=1).setParseAction(exceptions.PatternError._unexpected_tok) - unexpected_end = StringEnd().setParseAction(exceptions.PatternError._unexpected_tok) + unexpected_token = any_char.copy().setParseAction(_err_tok) + unexpected_end = StringEnd().setParseAction(_err_tok) + + # TODO reformat these + unexpected_str_escape = any_char.copy().setParseAction(_err_str_esc) + str_escape = Suppress(escape_char) + (str_token | escape_char) + str_escape |= escape_char + unexpected_str_escape + str_char = (str_escape | CharsNotIn("%'")) + + str_literal = (Combine(Suppress(str_token) + str_char[...] + + (Suppress(str_token) + | StringEnd().setParseAction(_err_str_end))) + + skippable) + str_literal.setParseAction(_vm.StringKey.action) - str_literal = (Combine(Suppress(str_token) - + (Suppress(escape_char) + (str_token | escape_char) | escape_char + CharsNotIn("", exact=1).setParseAction(exceptions.PatternError._str_escape) | CharsNotIn("%'"))[...] - + (Suppress(str_token) | StringEnd().setParseAction(exceptions.PatternError._str_end))) + skippable) - str_literal.setParseAction(lambda toks: [_vm.StringKey(toks)]) + unexpected_re_escape = any_char.copy().setParseAction(_err_re_esc) + re_escape = Suppress(escape_char) + (re_token | escape_char) + re_escape |= escape_char + unexpected_re_escape + re_char = (re_escape | CharsNotIn("%/")) - re_literal = (Combine(Suppress(re_token) - + (Suppress(escape_char) + (re_token | escape_char) | escape_char + CharsNotIn("", exact=1).setParseAction(exceptions.PatternError._re_escape) | CharsNotIn("%/"))[...] - + (Suppress(re_token) | StringEnd().setParseAction(exceptions.PatternError._re_end))) + skippable) - re_literal.setParseAction(lambda toks: [_vm.RegexKey(toks)]) + re_literal = (Combine(Suppress(re_token) + re_char[...] + + (Suppress(re_token) + | StringEnd().setParseAction(_err_re_end))) + + skippable) + re_literal.setParseAction(_vm.RegexKey.action) arrow = Literal("->") - arrow.setParseAction(lambda: [_vm.Arrow()]) + arrow.setParseAction(_vm.Arrow.action) identifier = Word(srange("[A-Za-z_]"), srange("[A-Za-z0-9_]")) - identifier.setParseAction(lambda toks: [_vm.Ident(toks)]) + identifier.setParseAction(_vm.Ident.action) parameter = (Suppress("$") + skippable + identifier) - parameter.setParseAction(lambda toks: [_vm.Param(toks)]) + parameter.setParseAction(_vm.Param.action) type_ = (Suppress(":") + skippable + Suppress("$") + identifier) - type_.setParseAction(lambda toks: [_vm.ApplyPredicate(toks)]) + type_.setParseAction(_vm.ApplyPredicate.action) # support for objects-as-keys - keysubtree = (Suppress("[") + Group(type_[...] + subtree) + (Suppress("]") | unexpected_token | unexpected_end) + skippable) - keysubtree.setParseAction(lambda toks: [_vm.KeySubtree(toks)]) + keysubtree = (Suppress("[") + Group(type_[...] + subtree) + + (Suppress("]") | unexpected_token | unexpected_end) + + skippable) + keysubtree.setParseAction(_vm.KeySubtree.action) # represents key matching - switches from "key" to "value" - tag = (identifier + Optional(parameter | str_literal | re_literal | keysubtree) | parameter | str_literal | re_literal | keysubtree) + type_[...] + Empty().setParseAction(lambda: [_vm.End()]) + tag = ((identifier + + Optional(parameter | str_literal | re_literal | keysubtree) + | parameter | str_literal | re_literal | keysubtree) + type_[...] + + Empty().setParseAction(_vm.End.action)) # multiple value matching - valuesubtree = (Suppress("(") + Group(subtree) + (Suppress(")") | unexpected_token | unexpected_end) + Optional("?", default="")) - valuesubtree.setParseAction(lambda toks: [_vm.ValueSubtree(toks)]) + valuesubtree = (Suppress("(") + Group(subtree) + + (Suppress(")") | unexpected_token | unexpected_end) + + Optional("?", default="")) + valuesubtree.setParseAction(_vm.ValueSubtree.action) # arrow and tag, value subtree - subtree <<= (arrow + tag)[...] + (valuesubtree + Empty().setParseAction(lambda: [_vm.End()]))[...] + subtree <<= ((arrow + tag)[...] + + (valuesubtree + + Empty().setParseAction(_vm.End.action))[...]) return ((subtree | unexpected_token) + StringEnd()).parseWithTabs() diff --git a/abdl/_vm.py b/abdl/_vm.py index 41f28eb..0ec1018 100644 --- a/abdl/_vm.py +++ b/abdl/_vm.py @@ -14,28 +14,61 @@ # You should have received a copy of the GNU Affero General Public License # along with this program. If not, see . +"""[Internal] The VM Interpreter. + +This module holds the VM instructions and the interpreter loop. +""" + import collections.abc import re from abdl import predicates from abdl import exceptions class PatternElement: + """A parsed pattern element (token) and VM instruction.""" + def on_not_in_key(self, frame, path, defs): - raise NotImplementedError + """Called while the key matching step of the pattern isn't done. + + Returns whether the key matching step is ready. + """ + raise RuntimeError(self) def on_in_key(self, frame, path, defs): - raise NotImplementedError + """Called while the key matching step of the pattern is done. + + Returns whether the key matching step is ready. + """ + raise RuntimeError(self) def collect_params(self, res: list): - pass + """Appends parameter names used in this pattern to ``res``. + """ + + @classmethod + def action(cls, toks): + """Parse action, for pyparsing. + + Returns: + PatternElement: Parsed token. + """ + return [cls(toks)] class Arrow(PatternElement): + """The 'arrow' token.""" + def on_not_in_key(self, frame, path, defs): assert not path[-1].empty - path.append(Holder(key=None, value=None, name=None, parent=path[-1].value, empty=True)) + path.append(Holder(value=None, parent=path[-1].value, empty=True)) return False + @classmethod + def action(cls, toks): + return [cls()] + class StringKey(PatternElement): + """The 'literal' token.""" + def __init__(self, toks): self.key = toks[0] self.skippable = toks[1] == '?' @@ -44,11 +77,11 @@ class StringKey(PatternElement): return self.on_not_in_key(frame, path, defs) def on_not_in_key(self, frame, path, defs): - path[-1].iterator = self.extract(path[-1].parent) + path[-1].iterator = self._extract(path[-1].parent) path[-1].empty = False return True - def extract(self, obj): + def _extract(self, obj): try: yield (self.key, obj[self.key]) except (TypeError, IndexError, KeyError): @@ -56,6 +89,8 @@ class StringKey(PatternElement): raise exceptions.ValidationError class RegexKey(PatternElement): + """The 'regex' token.""" + def __init__(self, toks): self.key = toks[0] self.compiled = re.compile(self.key) @@ -65,18 +100,18 @@ class RegexKey(PatternElement): return self.on_not_in_key(frame, path, defs) def on_not_in_key(self, frame, path, defs): - filtered_iterator = self.filter(path[-1].iterator) + filtered_iterator = self._filter(path[-1].iterator) del path[-1].iterator path[-1].iterator = filtered_iterator del filtered_iterator path[-1].empty = False return True - def filter(self, iter_): - for el in iter_: + def _filter(self, iter_): + for elem in iter_: try: - if self.compiled.search(el[0]): - yield el + if self.compiled.search(elem[0]): + yield elem elif not self.skippable: raise exceptions.ValidationError except TypeError: @@ -84,6 +119,8 @@ class RegexKey(PatternElement): raise exceptions.ValidationError class KeySubtree(PatternElement): + """The 'keymatch' token.""" + def __init__(self, toks): self.key = toks[0] self.skippable = toks[1] == '?' @@ -93,45 +130,49 @@ class KeySubtree(PatternElement): def on_not_in_key(self, frame, path, defs): path[-1].subtree = True - filtered_iterator = self.filter(path[-1].iterator, defs, name=path[-1].name) + filtered_iterator = self._filter(path[-1].iterator, defs, name=path[-1].name) del path[-1].iterator path[-1].iterator = filtered_iterator del filtered_iterator path[-1].empty = False return True - def filter(self, iter_, defs, name): - for x in iter_: - for y in match_helper(self.key, defs, x[0]): + def _filter(self, iter_, defs, name): + for pair in iter_: + for matches in match_helper(self.key, defs, pair[0]): if name: # FIXME this "name" thing is a bit suboptimal - y.setdefault(name, x) - yield (y, x[1]) + matches.setdefault(name, pair) + yield (matches, pair[1]) def collect_params(self, res: list): for sub in self.key: sub.collect_params(res) class ValueSubtree(PatternElement): + """The 'subvalue' token.""" + def __init__(self, toks): self.key = toks[0] self.skippable = toks[1] == '?' def on_not_in_key(self, frame, path, defs): assert not path[-1].empty - path.append(Holder(key=None, value=None, name=None, parent=path[-1].value, empty=False, subtree=True)) - path[-1].iterator = self.filter(path[-1].parent, defs) + path.append(Holder(value=None, parent=path[-1].value, empty=False, subtree=True)) + path[-1].iterator = self._filter(path[-1].parent, defs) return True - def filter(self, parent, defs): - for x in match_helper(self.key, defs, parent): - yield (x, parent) + def _filter(self, parent, defs): + for pair in match_helper(self.key, defs, parent): + yield (pair, parent) def collect_params(self, res: list): for sub in self.key: sub.collect_params(res) class Ident(PatternElement): + """The 'identifier' token.""" + def __init__(self, toks): self.key = toks[0] @@ -141,6 +182,8 @@ class Ident(PatternElement): return True class Param(PatternElement): + """The 'parameter' token.""" + def __init__(self, toks): assert isinstance(toks[1], Ident) self.skippable = toks[0] == '?' @@ -150,11 +193,11 @@ class Param(PatternElement): return self.on_not_in_key(frame, path, defs) def on_not_in_key(self, frame, path, defs): - path[-1].iterator = self.extract(path[-1].parent, defs[self.key]) + path[-1].iterator = self._extract(path[-1].parent, defs[self.key]) path[-1].empty = False return True - def extract(self, obj, key): + def _extract(self, obj, key): try: yield (key, obj[key]) except (TypeError, IndexError, KeyError): @@ -164,24 +207,24 @@ class Param(PatternElement): def collect_params(self, res: list): res.append(self.key) - def get_value(self, defs): - return defs[self.key] - class ApplyPredicate(PatternElement): + """The 'predicate' token.""" + def __init__(self, toks): assert isinstance(toks[1], Ident) self.skippable = toks[0] == '?' self.key = toks[1].key def on_in_key(self, frame, path, defs): - filtered_iterator = self.filter(path[-1].iterator, defs) + filtered_iterator = self._filter(path[-1].iterator, defs) del path[-1].iterator path[-1].iterator = filtered_iterator del filtered_iterator path[-1].empty = False return True - def check(self, defs, obj): + def _check(self, defs, obj): + # pylint: disable=protected-access if predicates._to_predicate(defs[self.key]).accept(obj): return True if self.skippable: @@ -190,19 +233,21 @@ class ApplyPredicate(PatternElement): def on_not_in_key(self, frame, path, defs): assert len(path) == 1 - if not self.check(defs, path[-1].value): + if not self._check(defs, path[-1].value): path.clear() return False - def filter(self, iter_, defs): - for el in iter_: - if self.check(defs, el[1]): - yield el + def _filter(self, iter_, defs): + for elem in iter_: + if self._check(defs, elem[1]): + yield elem def collect_params(self, res: list): res.append(self.key) class End(PatternElement): + """Pseudo-token, used to advance iteration.""" + def on_in_key(self, frame, path, defs): try: path[-1].next() @@ -212,25 +257,33 @@ class End(PatternElement): while frame.prev() and not isinstance(frame.current_op, End): pass if not frame.prev(): - # FIXME? path.clear() - return True # FIXME? + return True + + @classmethod + def action(cls, toks): + return [cls()] def _pairs(obj): if isinstance(obj, collections.abc.Mapping): return iter(obj.items()) - elif isinstance(obj, collections.abc.Sequence): + if isinstance(obj, collections.abc.Sequence): return iter(enumerate(obj, 0)) - elif isinstance(obj, collections.abc.Set): + if isinstance(obj, collections.abc.Set): return iter(((e, e) for e in obj)) - else: - # maybe there's more stuff I can implement later - raise TypeError + # maybe there's more stuff I can implement later + raise TypeError class Holder: - def __init__(self, key, value, name, parent=None, iterator=None, empty=False, subtree=False): - self.name = name - self.key = key + """Stores a single match and associated metadata. + + A single match is generally a key-value pair, but may be a collection of + named pairs in the case of subtree matches. + """ + + def __init__(self, value, parent=None, iterator=None, empty=False, subtree=False): + self.name = None + self.match = None self.value = value self.empty = empty self._iterator = iterator @@ -239,6 +292,7 @@ class Holder: @property def iterator(self): + """Returns the iterator for this match.""" if self._iterator is None: self._iterator = _pairs(self.parent) return self._iterator @@ -253,46 +307,68 @@ class Holder: self._iterator = None def next(self): - self.key, self.value = next(self.iterator) + """Updates the stored match.""" + self.match, self.value = next(self.iterator) -class Frame: +class _Frame: def __init__(self, ops): self.ops = ops - self.pc = -1 + self.iar = -1 def next(self): - pc = self.pc + 1 - if pc >= len(self.ops): + """Advances the instruction address register. + + Returns: + ``True`` if successful, ``False``otherwise. + """ + + iar = self.iar + 1 + if iar >= len(self.ops): return False - self.pc = pc + self.iar = iar return True @property def current_op(self): - return self.ops[self.pc] + """Returns the current instruction.""" + return self.ops[self.iar] def prev(self): - pc = self.pc - 1 - if pc < 0: + """Rewinds the instruction address register. + + Returns: + ``True`` if successful, ``False``otherwise. + """ + + iar = self.iar - 1 + if iar < 0: return False - self.pc = pc + self.iar = iar return True def match_helper(ops, defs, tree): - frame = Frame(ops) + """The interpreter loop itself. + + The opcode/token dispatch logic is implemented through ``PatternElement``. + + Yields: + dict: Matches. + """ + + frame = _Frame(ops) - path = [Holder(key=None, value=tree, parent=None, iterator=iter(()), name=None)] + path = [Holder(value=tree, parent=None, iterator=iter(()))] in_key = False while path: if not frame.next(): assert not path[-1].empty res = {} - for h in path: - if h.subtree: - for name, kv in h.key.items(): - res[name] = kv - elif h.name is not None: - res[h.name] = (h.key, h.value) + for holder in path: + if holder.subtree: + for name, pair in holder.match.items(): + res[name] = pair + elif holder.name is not None: + res[holder.name] = (holder.match, holder.value) yield res assert len(path) == 1 or isinstance(frame.current_op, End) if not frame.prev(): diff --git a/abdl/exceptions.py b/abdl/exceptions.py index 961acff..83ca2ca 100644 --- a/abdl/exceptions.py +++ b/abdl/exceptions.py @@ -14,23 +14,40 @@ # You should have received a copy of the GNU Affero General Public License # along with this program. If not, see . +"""ABDL Exceptions. + +The exceptions for pattern and validation errors are defined here. +""" + +import warnings + class DeprecationError(Exception): """Raised for deprecated features, if they are disabled. - This class controls warning/error behaviour of deprecated features.""" - #enable_key_match_compat = False - #warn_key_match_compat = False + This class controls warning/error behaviour of deprecated features. + + Note: This class is deprecated. Use ``abdl.feature`` instead. + """ + # enable_key_match_compat = False + # warn_key_match_compat = False + + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + warnings.warn("DeprecationError is deprecated. " + "Use ``abdl.feature`` instead.", DeprecationWarning) @classmethod def warn_all(cls): """Enables all deprecation warnings.""" - pass + warnings.warn("DeprecationError is deprecated. " + "Use ``abdl.feature`` instead.", DeprecationWarning) class PatternError(Exception): """Raised for invalid input or output expressions.""" # TODO implement formatting - def __init__(self, msg, pattern, defs, pos, toks): + def __init__(self, msg, pattern=None, defs=None, pos=None, toks=None): + super().__init__(msg, pattern, defs, pos, toks) self.msg = msg self.pattern = pattern self.defs = defs @@ -41,33 +58,11 @@ class PatternError(Exception): if pattern is not None: if self.pattern is not None: raise ValueError("Attempt to normalize normalized pattern") - else: - self.pattern = pattern + self.pattern = pattern if defs is not None: if self.defs is not None: raise ValueError("Attempt to normalize normalized defs") - else: - self.defs = defs - - @classmethod - def _str_escape(cls, s, pos, toks): - raise cls("Error in string escape", None, None, pos, toks) - - @classmethod - def _str_end(cls, s, pos, toks): - raise cls("Unfinished string", None, None, pos, toks) - - @classmethod - def _re_escape(cls, s, pos, toks): - raise cls("Error in regex escape", None, None, pos, toks) - - @classmethod - def _re_end(cls, s, pos, toks): - raise cls("Unfinished regex", None, None, pos, toks) - - @classmethod - def _unexpected_tok(cls, s, pos, toks): - raise cls("Unexpected token", None, None, pos, toks) + self.defs = defs class ValidationError(Exception): """Raised when the object tree doesn't validate against the given pattern.""" -- cgit 1.4.1