From 499a362d81f3523ffcd3354e518987e7b5fd243d Mon Sep 17 00:00:00 2001 From: SoniEx2 Date: Wed, 25 Dec 2019 18:59:27 -0300 Subject: Move things around a bit --- abdl.py | 528 --------------------------------------- abdl/__init__.py | 528 +++++++++++++++++++++++++++++++++++++++ ganarchy.py | 683 --------------------------------------------------- ganarchy/__init__.py | 593 ++++++++++++++++++++++++++++++++++++++++++++ ganarchy/__main__.py | 23 ++ ganarchy/config.py | 157 ++++++++++++ ganarchy/debug.py | 36 +++ 7 files changed, 1337 insertions(+), 1211 deletions(-) delete mode 100644 abdl.py create mode 100644 abdl/__init__.py delete mode 100755 ganarchy.py create mode 100644 ganarchy/__init__.py create mode 100644 ganarchy/__main__.py create mode 100644 ganarchy/config.py create mode 100644 ganarchy/debug.py diff --git a/abdl.py b/abdl.py deleted file mode 100644 index 8dde742..0000000 --- a/abdl.py +++ /dev/null @@ -1,528 +0,0 @@ -# A Boneless Datastructure Language -# Copyright (C) 2019 Soni L. -# -# This program is free software: you can redistribute it and/or modify -# it under the terms of the GNU Affero General Public License as published by -# the Free Software Foundation, either version 3 of the License, or -# (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU Affero General Public License for more details. -# -# You should have received a copy of the GNU Affero General Public License -# along with this program. If not, see . - -"""A Boneless Datastructure Language, version 2.0.1. - -ABDL expressions are regex-like constructs for matching and validating object structures. They can be used -with JSON and similar formats, and even self-referential data structures. - -Language Reference: - - ABDL expressions have the ability to iterate, index, validate and filter data structures. This is - done with the use of the syntax elements listed below. - - Syntax Elements: - - An ABDL expression is a sequence of zero or more sequences starting with arrows followed by zero or - more subvalues. - - An arrow is ``->`` and indicates indexing/iteration (Mappings, Sequences, Sets). It must be followed - by a variable, literal, parameter, regex or key match. - - A variable is a string of alphanumeric characters, not starting with a digit. It may be followed by a - literal, parameter, regex, key match, or one or more type tests. A ``(key, value)`` tuple containing - the corresponding matched element will be identified by this name in the results dict. - - A literal is a string delimited by single quotes (use ``%'`` to escape ``'`` and ``%%`` to escape ``%``). - A literal can be made "non-validating" by appending an ``?`` after it. It may be followed by one or more - type tests. It is exactly equivalent to indexing an object with a string key. - - A parameter is the symbol ``$`` followed by a string of alphanumeric characters, not starting with - a digit. A parameter can be made "non-validating" by appending an ``?`` after it. It may be followed by - one or more type tests. It is exactly equivalent to indexing an object with an arbitrary object key. - - A regex is an RE, as defined by the ``re`` module, delimited by forward slashes (use ``%/`` to escape - ``/`` and ``%%`` to escape ``%``). A regex can be made "non-validating" by appending an ``?`` after it. - It may be followed by one or more type tests. It attempts to match each key in the object. - - A type test is ``:`` followed by a parameter. A type test can be made "non-validating" by appending - an ``?`` after the ``:``. It attempts to match the type of each matched value in the object. - - A key match is an ABDL expression enclosed in ``[`` and ``]``, optionally prefixed with one or more type - tests. This matches keys (including the type tests). - - A subvalue is an ABDL expression enclosed in ``(`` and ``)``. This allows matching multiple values on - the same object. - - Some syntax elements can be validating or non-validating. Validating syntax elements will raise a - :py:exc:`abdl.ValidationError` whenever a non-matching element is encountered, whereas non-validating - ones will skip them. Note that it is possible for a validating syntax element to still yield results - before raising a :py:exc:`abdl.ValidationError`, so one needs to be careful when writing code where such - behaviour could result in a security vulnerability. - - Examples: - - >>> import abdl - >>> for m in abdl.match("->X:?$dict->Y", {"foo": 1, "bar": {"baz": 2}}, {'dict': dict}): - ... print(m['X'][0], m['Y'][0], m['Y'][1]) - bar baz 2 - - >>> pat = abdl.compile('''-> 'projects'? - ... -> commit /[0-9a-fA-F]{40}|[0-9a-fA-F]{64}/? :?$dict - ... -> url :?$dict - ... -> branch :?$dict''', {'dict': dict}) - >>> data = {"projects": { - ... "385e734a52e13949a7a5c71827f6de920dbfea43": { - ... "https://soniex2.autistic.space/git-repos/ganarchy.git": {"HEAD": {"active": True}} - ... } - ... }} - >>> for m in pat.match(data): - ... print(m['commit'][0], m['url'][0], m['branch'][0], m['branch'][1]) - 385e734a52e13949a7a5c71827f6de920dbfea43 https://soniex2.autistic.space/git-repos/ganarchy.git HEAD {'active': True} - - (If ``:?$dict`` wasn't present, a TypeError would be raised when trying to iterate the ``1`` from ``"foo": 1``.) -""" - -import re - -from collections.abc import Mapping, Sequence, Iterator, Set - -class DeprecationError(Exception): - """Raised for deprecated features, if they are disabled. - - This class controls warning/error behaviour of deprecated features.""" - #enable_key_match_compat = False - #warn_key_match_compat = False - - @classmethod - def warn_all(cls): - """Enables all deprecation warnings.""" - pass - -class PatternError(Exception): - """Raised for invalid input or output expressions.""" - # TODO implement formatting - - def __init__(self, msg, pattern, defs, pos, toks): - self.msg = msg - self.pattern = pattern - self.defs = defs - self.pos = pos - self._toks = toks # internal use - - def _normalize(self, pattern, defs): - if pattern is not None: - if self.pattern is not None: - raise ValueError("Attempt to normalize normalized pattern") - else: - self.pattern = pattern - if defs is not None: - if self.defs is not None: - raise ValueError("Attempt to normalize normalized defs") - else: - self.defs = defs - - @classmethod - def _str_escape(cls, s, pos, toks): - raise cls("Error in string escape", None, None, pos, toks) - - @classmethod - def _str_end(cls, s, pos, toks): - raise cls("Unfinished string", None, None, pos, toks) - - @classmethod - def _re_escape(cls, s, pos, toks): - raise cls("Error in regex escape", None, None, pos, toks) - - @classmethod - def _re_end(cls, s, pos, toks): - raise cls("Unfinished regex", None, None, pos, toks) - - @classmethod - def _unexpected_tok(cls, s, pos, toks): - raise cls("Unexpected token", None, None, pos, toks) - -class ValidationError(Exception): - """Raised when the object tree doesn't validate against the given pattern.""" - # FIXME TODO? - -class _PatternElement: - def on_not_in_key(self, frame, path, defs): - raise NotImplementedError - - def on_in_key(self, frame, path, defs): - raise NotImplementedError - - def collect_params(self, res: list): - pass - -class _Arrow(_PatternElement): - def on_not_in_key(self, frame, path, defs): - assert not path[-1].empty - path.append(_Holder(key=None, value=None, name=None, parent=path[-1].value, empty=True)) - return False - -class _StringKey(_PatternElement): - def __init__(self, toks): - self.key = toks[0] - self.skippable = toks[1] == '?' - - def on_in_key(self, frame, path, defs): - return self.on_not_in_key(frame, path, defs) - - def on_not_in_key(self, frame, path, defs): - path[-1].iterator = self.extract(path[-1].parent) - path[-1].empty = False - return True - - def extract(self, obj): - try: - yield (self.key, obj[self.key]) - except (TypeError, IndexError, KeyError): - if not self.skippable: - raise ValidationError - -class _RegexKey(_PatternElement): - def __init__(self, toks): - self.key = toks[0] - self.compiled = re.compile(self.key) - self.skippable = toks[1] == '?' - - def on_in_key(self, frame, path, defs): - return self.on_not_in_key(frame, path, defs) - - def on_not_in_key(self, frame, path, defs): - filtered_iterator = self.filter(path[-1].iterator) - del path[-1].iterator - path[-1].iterator = filtered_iterator - del filtered_iterator - path[-1].empty = False - return True - - def filter(self, it): - for el in it: - try: - if self.compiled.search(el[0]): - yield el - elif not self.skippable: - raise ValidationError - except TypeError: - if not self.skippable: - raise ValidationError - -class _KeySubtree(_PatternElement): - def __init__(self, toks): - self.key = toks[0] - self.skippable = toks[1] == '?' - - def on_not_in_key(self, frame, path, defs): - path[-1].subtree = True - filtered_iterator = self.filter(path[-1].iterator, defs) - del path[-1].iterator - path[-1].iterator = filtered_iterator - del filtered_iterator - path[-1].empty = False - return True - - def filter(self, it, defs): - for x in it: - for y in _match_helper(self.key, defs, x[0]): - yield (y, x[1]) - - def collect_params(self, res: list): - for sub in self.key: - sub.collect_params(res) - -class _ValueSubtree(_PatternElement): - def __init__(self, toks): - self.key = toks[0] - self.skippable = toks[1] == '?' - - def on_not_in_key(self, frame, path, defs): - assert not path[-1].empty - path.append(_Holder(key=None, value=None, name=None, parent=path[-1].value, empty=False, subtree=True)) - path[-1].iterator = self.filter(path[-1].parent, defs) - return True - - def filter(self, parent, defs): - for x in _match_helper(self.key, defs, parent): - yield (x, parent) - - def collect_params(self, res: list): - for sub in self.key: - sub.collect_params(res) - -class _Ident(_PatternElement): - def __init__(self, toks): - self.key = toks[0] - - def on_not_in_key(self, frame, path, defs): - path[-1].name = self.key - path[-1].empty = False - return True - -class _Param(_PatternElement): - def __init__(self, toks): - assert isinstance(toks[1], _Ident) - self.skippable = toks[0] == '?' - self.key = toks[1].key - - def on_in_key(self, frame, path, defs): - return self.on_not_in_key(frame, path, defs) - - def on_not_in_key(self, frame, path, defs): - path[-1].iterator = self.extract(path[-1].parent, defs[self.key]) - path[-1].empty = False - return True - - def extract(self, obj, key): - try: - yield (key, obj[key]) - except (TypeError, IndexError, KeyError): - if not self.skippable: - raise ValidationError - - def collect_params(self, res: list): - res.append(self.key) - - def get_value(self, defs): - return defs[self.key] - -class _Ty(_PatternElement): - def __init__(self, toks): - assert isinstance(toks[1], _Ident) - self.skippable = toks[0] == '?' - self.key = toks[1].key - - def on_in_key(self, frame, path, defs): - filtered_iterator = self.filter(path[-1].iterator, defs[self.key]) - del path[-1].iterator - path[-1].iterator = filtered_iterator - del filtered_iterator - path[-1].empty = False - return True - - def on_not_in_key(self, frame, path, defs): - assert len(path) == 1 - if isinstance(path[-1].value, defs[self.key]): - return False - elif not self.skippable: - raise ValidationError - path.clear() - return False - - def filter(self, it, ty): - for el in it: - # this may TypeError if ty is not a type nor a tuple of types - # but that's actually the programmer's error - if isinstance(el[1], ty): - yield el - elif not self.skippable: - # and this one is for actual validation - raise ValidationError - - def collect_params(self, res: list): - res.append(self.key) - -class _End(_PatternElement): - def on_in_key(self, frame, path, defs): - try: - path[-1].next() - return False - except StopIteration: - path.pop() - while frame.prev() and not isinstance(frame.current_op, _End): - pass - if not frame.prev(): - # FIXME? - path.clear() - return True # FIXME? - -def _build_syntax(): - from pyparsing import Suppress, Literal, Forward, CharsNotIn, StringEnd, Combine, Optional, Group, Word, srange, Empty - # original regex order: arrow, type/parameter/identifier, string, regex, failure - # better syntax: "arrow" changes from "value" to "key" and thus you need at least one key match before an arrow - subtree = Forward() - # where relevant, enforces match behaviour (skippable object tree branch vs required object tree branch) - skippable = Optional("?", default="") - # r"|'(?:%'|%%|%(?P.|$)|[^%'])*?(?:'|(?P$))\??" # string literals - str_literal = (Combine(Suppress("'") - + (Suppress("%") + "'" | Suppress("%") + "%" | Literal("%") + (CharsNotIn("") | StringEnd()).setParseAction(PatternError._str_escape) | CharsNotIn("%'"))[...] - + (Suppress("'") | StringEnd().setParseAction(PatternError._str_end))) + skippable).setParseAction(lambda toks: [_StringKey(toks)]) - # r"|/(?:%/|%%|%(?P.|$)|[^%/])*?(?:/|(?P$))\??" # regex - re_literal = (Combine(Suppress("/") - + (Suppress("%") + "/" | Suppress("%") + "%" | Literal("%") + (CharsNotIn("") | StringEnd()).setParseAction(PatternError._re_escape) | CharsNotIn("%/"))[...] - + (Suppress("/") | StringEnd().setParseAction(PatternError._re_end))) + skippable).setParseAction(lambda toks: [_RegexKey(toks)]) - arrow = Literal("->").setParseAction(lambda: [_Arrow()]) - # r"|(?::\??)?\$?[A-Za-z][A-Za-z0-9]*" # identifiers, parameters and type matches - identifier = Word(srange("[A-Za-z_]"), srange("[A-Za-z0-9_]")).setParseAction(lambda toks: [_Ident(toks)]) - parameter = (Suppress("$") + skippable + identifier).setParseAction(lambda toks: [_Param(toks)]) - ty = (Suppress(":") + skippable + Suppress("$") + identifier).setParseAction(lambda toks: [_Ty(toks)]) - # support for objects-as-keys - keysubtree = (Suppress("[") + Group(ty[...] + subtree) + (Suppress("]") | CharsNotIn("").setParseAction(PatternError._unexpected_tok) | StringEnd().setParseAction(PatternError._unexpected_tok)) + Optional("?", default="")).setParseAction(lambda toks: [_KeySubtree(toks)]) - # represents key matching - switches from "key" to "value" - tag = (identifier + Optional(parameter | str_literal | re_literal | keysubtree) | parameter | str_literal | re_literal | keysubtree) + ty[...] + Empty().setParseAction(lambda: [_End()]) - # multiple value matching - valuesubtree = (Suppress("(") + Group(subtree) + (Suppress(")") | CharsNotIn("").setParseAction(PatternError._unexpected_tok) | StringEnd().setParseAction(PatternError._unexpected_tok)) + Optional("?", default="")).setParseAction(lambda toks: [_ValueSubtree(toks)]) - # arrow and tag, value subtree - subtree <<= (arrow + tag)[...] + (valuesubtree + Empty().setParseAction(lambda: [_End()]))[...] - return ((subtree | CharsNotIn("").setParseAction(PatternError._unexpected_tok)) + StringEnd()).parseWithTabs() - -_built_syntax = _build_syntax() - -def _pairs(o): - if isinstance(o, Mapping): - return iter(o.items()) - elif isinstance(o, Sequence): - return iter(enumerate(o, 0)) - elif isinstance(o, Set): - return iter(((e, e) for e in o)) - else: - # maybe there's more stuff I can implement later - raise TypeError - -class _Holder: - def __init__(self, key, value, name, parent=None, it=None, empty=False, subtree=False): - self.name = name - self.key = key - self.value = value - self.empty = empty - self._it = it - self.parent = parent - self.subtree = subtree - - @property - def iterator(self): - if self._it is None: - self._it = _pairs(self.parent) - return self._it - - @iterator.setter - def iterator(self, value): - assert self._it is None - self._it = value - - @iterator.deleter - def iterator(self): - self._it = None - - def next(self): - self.key, self.value = next(self.iterator) - -class _Frame: - def __init__(self, ops): - self.ops = ops - self.pc = -1 - - def next(self): - pc = self.pc + 1 - if pc >= len(self.ops): - return False - self.pc = pc - return True - - @property - def current_op(self): - return self.ops[self.pc] - - def prev(self): - pc = self.pc - 1 - if pc < 0: - return False - self.pc = pc - return True - -def _match_helper(ops, defs, tree): - frame = _Frame(ops) - - path = [_Holder(key=None, value=tree, parent=None, it=iter(()), name=None)] - in_key = False - while path: - if not frame.next(): - assert not path[-1].empty - res = {} - for h in path: - if h.subtree: - for name, kv in h.key.items(): - res[name] = kv - elif h.name is not None: - res[h.name] = (h.key, h.value) - yield res - assert len(path) == 1 or isinstance(frame.current_op, _End) - frame.prev() - in_key = True - else: - op = frame.current_op - if in_key: - in_key = op.on_in_key(frame, path, defs) - else: - in_key = op.on_not_in_key(frame, path, defs) - -class Pattern: - """A compiled pattern object. - - Warning: - Do not create instances of this class manually. Use :py:func:`abdl.compile`. - - """ - - def __init__(self, pattern, defs): - try: - self._ops = _built_syntax.parseString(pattern) - except PatternError as e: - e._normalize(pattern, defs) - raise - else: - self._params = [] - for op in self._ops: - op.collect_params(self._params) - self._defs = {param: defs[param] for param in self._params} - - def match(self, obj): - """Matches this compiled pattern against the given object. - - Args: - obj: The object to match against. - - Returns: - An iterator. This iterator yields ``(key, value)`` pairs - wrapped in a dict for each variable in the pattern. - - """ - return _match_helper(self._ops, self._defs, obj) - -def compile(pattern, defs={}): - """Compiles the pattern and returns a compiled :py:class:`abdl.Pattern` object. - - Args: - pattern (str): The pattern. Refer to module-level documentation for - pattern syntax. - defs (dict): The parameter list. Used by parameters in the pattern. - - Returns: - Pattern: A compiled pattern object. - - """ - # TODO caching - return Pattern(pattern, defs) - -def match(pattern, obj, defs={}): - """Matches the pattern against the given obj. - - This method is equivalent to ``abdl.compile(pattern, defs).match(obj)``. - - Args: - pattern (str): The pattern. Refer to module-level documentation for - pattern syntax. - obj: The object to match against. - defs (dict): The parameter list. Used by parameters in the pattern. - - Returns: - An iterator. This iterator yields ``(key, value)`` pairs - wrapped in a dict for each variable in the pattern. - - """ - return compile(pattern, defs).match(obj) diff --git a/abdl/__init__.py b/abdl/__init__.py new file mode 100644 index 0000000..8dde742 --- /dev/null +++ b/abdl/__init__.py @@ -0,0 +1,528 @@ +# A Boneless Datastructure Language +# Copyright (C) 2019 Soni L. +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . + +"""A Boneless Datastructure Language, version 2.0.1. + +ABDL expressions are regex-like constructs for matching and validating object structures. They can be used +with JSON and similar formats, and even self-referential data structures. + +Language Reference: + + ABDL expressions have the ability to iterate, index, validate and filter data structures. This is + done with the use of the syntax elements listed below. + + Syntax Elements: + + An ABDL expression is a sequence of zero or more sequences starting with arrows followed by zero or + more subvalues. + + An arrow is ``->`` and indicates indexing/iteration (Mappings, Sequences, Sets). It must be followed + by a variable, literal, parameter, regex or key match. + + A variable is a string of alphanumeric characters, not starting with a digit. It may be followed by a + literal, parameter, regex, key match, or one or more type tests. A ``(key, value)`` tuple containing + the corresponding matched element will be identified by this name in the results dict. + + A literal is a string delimited by single quotes (use ``%'`` to escape ``'`` and ``%%`` to escape ``%``). + A literal can be made "non-validating" by appending an ``?`` after it. It may be followed by one or more + type tests. It is exactly equivalent to indexing an object with a string key. + + A parameter is the symbol ``$`` followed by a string of alphanumeric characters, not starting with + a digit. A parameter can be made "non-validating" by appending an ``?`` after it. It may be followed by + one or more type tests. It is exactly equivalent to indexing an object with an arbitrary object key. + + A regex is an RE, as defined by the ``re`` module, delimited by forward slashes (use ``%/`` to escape + ``/`` and ``%%`` to escape ``%``). A regex can be made "non-validating" by appending an ``?`` after it. + It may be followed by one or more type tests. It attempts to match each key in the object. + + A type test is ``:`` followed by a parameter. A type test can be made "non-validating" by appending + an ``?`` after the ``:``. It attempts to match the type of each matched value in the object. + + A key match is an ABDL expression enclosed in ``[`` and ``]``, optionally prefixed with one or more type + tests. This matches keys (including the type tests). + + A subvalue is an ABDL expression enclosed in ``(`` and ``)``. This allows matching multiple values on + the same object. + + Some syntax elements can be validating or non-validating. Validating syntax elements will raise a + :py:exc:`abdl.ValidationError` whenever a non-matching element is encountered, whereas non-validating + ones will skip them. Note that it is possible for a validating syntax element to still yield results + before raising a :py:exc:`abdl.ValidationError`, so one needs to be careful when writing code where such + behaviour could result in a security vulnerability. + + Examples: + + >>> import abdl + >>> for m in abdl.match("->X:?$dict->Y", {"foo": 1, "bar": {"baz": 2}}, {'dict': dict}): + ... print(m['X'][0], m['Y'][0], m['Y'][1]) + bar baz 2 + + >>> pat = abdl.compile('''-> 'projects'? + ... -> commit /[0-9a-fA-F]{40}|[0-9a-fA-F]{64}/? :?$dict + ... -> url :?$dict + ... -> branch :?$dict''', {'dict': dict}) + >>> data = {"projects": { + ... "385e734a52e13949a7a5c71827f6de920dbfea43": { + ... "https://soniex2.autistic.space/git-repos/ganarchy.git": {"HEAD": {"active": True}} + ... } + ... }} + >>> for m in pat.match(data): + ... print(m['commit'][0], m['url'][0], m['branch'][0], m['branch'][1]) + 385e734a52e13949a7a5c71827f6de920dbfea43 https://soniex2.autistic.space/git-repos/ganarchy.git HEAD {'active': True} + + (If ``:?$dict`` wasn't present, a TypeError would be raised when trying to iterate the ``1`` from ``"foo": 1``.) +""" + +import re + +from collections.abc import Mapping, Sequence, Iterator, Set + +class DeprecationError(Exception): + """Raised for deprecated features, if they are disabled. + + This class controls warning/error behaviour of deprecated features.""" + #enable_key_match_compat = False + #warn_key_match_compat = False + + @classmethod + def warn_all(cls): + """Enables all deprecation warnings.""" + pass + +class PatternError(Exception): + """Raised for invalid input or output expressions.""" + # TODO implement formatting + + def __init__(self, msg, pattern, defs, pos, toks): + self.msg = msg + self.pattern = pattern + self.defs = defs + self.pos = pos + self._toks = toks # internal use + + def _normalize(self, pattern, defs): + if pattern is not None: + if self.pattern is not None: + raise ValueError("Attempt to normalize normalized pattern") + else: + self.pattern = pattern + if defs is not None: + if self.defs is not None: + raise ValueError("Attempt to normalize normalized defs") + else: + self.defs = defs + + @classmethod + def _str_escape(cls, s, pos, toks): + raise cls("Error in string escape", None, None, pos, toks) + + @classmethod + def _str_end(cls, s, pos, toks): + raise cls("Unfinished string", None, None, pos, toks) + + @classmethod + def _re_escape(cls, s, pos, toks): + raise cls("Error in regex escape", None, None, pos, toks) + + @classmethod + def _re_end(cls, s, pos, toks): + raise cls("Unfinished regex", None, None, pos, toks) + + @classmethod + def _unexpected_tok(cls, s, pos, toks): + raise cls("Unexpected token", None, None, pos, toks) + +class ValidationError(Exception): + """Raised when the object tree doesn't validate against the given pattern.""" + # FIXME TODO? + +class _PatternElement: + def on_not_in_key(self, frame, path, defs): + raise NotImplementedError + + def on_in_key(self, frame, path, defs): + raise NotImplementedError + + def collect_params(self, res: list): + pass + +class _Arrow(_PatternElement): + def on_not_in_key(self, frame, path, defs): + assert not path[-1].empty + path.append(_Holder(key=None, value=None, name=None, parent=path[-1].value, empty=True)) + return False + +class _StringKey(_PatternElement): + def __init__(self, toks): + self.key = toks[0] + self.skippable = toks[1] == '?' + + def on_in_key(self, frame, path, defs): + return self.on_not_in_key(frame, path, defs) + + def on_not_in_key(self, frame, path, defs): + path[-1].iterator = self.extract(path[-1].parent) + path[-1].empty = False + return True + + def extract(self, obj): + try: + yield (self.key, obj[self.key]) + except (TypeError, IndexError, KeyError): + if not self.skippable: + raise ValidationError + +class _RegexKey(_PatternElement): + def __init__(self, toks): + self.key = toks[0] + self.compiled = re.compile(self.key) + self.skippable = toks[1] == '?' + + def on_in_key(self, frame, path, defs): + return self.on_not_in_key(frame, path, defs) + + def on_not_in_key(self, frame, path, defs): + filtered_iterator = self.filter(path[-1].iterator) + del path[-1].iterator + path[-1].iterator = filtered_iterator + del filtered_iterator + path[-1].empty = False + return True + + def filter(self, it): + for el in it: + try: + if self.compiled.search(el[0]): + yield el + elif not self.skippable: + raise ValidationError + except TypeError: + if not self.skippable: + raise ValidationError + +class _KeySubtree(_PatternElement): + def __init__(self, toks): + self.key = toks[0] + self.skippable = toks[1] == '?' + + def on_not_in_key(self, frame, path, defs): + path[-1].subtree = True + filtered_iterator = self.filter(path[-1].iterator, defs) + del path[-1].iterator + path[-1].iterator = filtered_iterator + del filtered_iterator + path[-1].empty = False + return True + + def filter(self, it, defs): + for x in it: + for y in _match_helper(self.key, defs, x[0]): + yield (y, x[1]) + + def collect_params(self, res: list): + for sub in self.key: + sub.collect_params(res) + +class _ValueSubtree(_PatternElement): + def __init__(self, toks): + self.key = toks[0] + self.skippable = toks[1] == '?' + + def on_not_in_key(self, frame, path, defs): + assert not path[-1].empty + path.append(_Holder(key=None, value=None, name=None, parent=path[-1].value, empty=False, subtree=True)) + path[-1].iterator = self.filter(path[-1].parent, defs) + return True + + def filter(self, parent, defs): + for x in _match_helper(self.key, defs, parent): + yield (x, parent) + + def collect_params(self, res: list): + for sub in self.key: + sub.collect_params(res) + +class _Ident(_PatternElement): + def __init__(self, toks): + self.key = toks[0] + + def on_not_in_key(self, frame, path, defs): + path[-1].name = self.key + path[-1].empty = False + return True + +class _Param(_PatternElement): + def __init__(self, toks): + assert isinstance(toks[1], _Ident) + self.skippable = toks[0] == '?' + self.key = toks[1].key + + def on_in_key(self, frame, path, defs): + return self.on_not_in_key(frame, path, defs) + + def on_not_in_key(self, frame, path, defs): + path[-1].iterator = self.extract(path[-1].parent, defs[self.key]) + path[-1].empty = False + return True + + def extract(self, obj, key): + try: + yield (key, obj[key]) + except (TypeError, IndexError, KeyError): + if not self.skippable: + raise ValidationError + + def collect_params(self, res: list): + res.append(self.key) + + def get_value(self, defs): + return defs[self.key] + +class _Ty(_PatternElement): + def __init__(self, toks): + assert isinstance(toks[1], _Ident) + self.skippable = toks[0] == '?' + self.key = toks[1].key + + def on_in_key(self, frame, path, defs): + filtered_iterator = self.filter(path[-1].iterator, defs[self.key]) + del path[-1].iterator + path[-1].iterator = filtered_iterator + del filtered_iterator + path[-1].empty = False + return True + + def on_not_in_key(self, frame, path, defs): + assert len(path) == 1 + if isinstance(path[-1].value, defs[self.key]): + return False + elif not self.skippable: + raise ValidationError + path.clear() + return False + + def filter(self, it, ty): + for el in it: + # this may TypeError if ty is not a type nor a tuple of types + # but that's actually the programmer's error + if isinstance(el[1], ty): + yield el + elif not self.skippable: + # and this one is for actual validation + raise ValidationError + + def collect_params(self, res: list): + res.append(self.key) + +class _End(_PatternElement): + def on_in_key(self, frame, path, defs): + try: + path[-1].next() + return False + except StopIteration: + path.pop() + while frame.prev() and not isinstance(frame.current_op, _End): + pass + if not frame.prev(): + # FIXME? + path.clear() + return True # FIXME? + +def _build_syntax(): + from pyparsing import Suppress, Literal, Forward, CharsNotIn, StringEnd, Combine, Optional, Group, Word, srange, Empty + # original regex order: arrow, type/parameter/identifier, string, regex, failure + # better syntax: "arrow" changes from "value" to "key" and thus you need at least one key match before an arrow + subtree = Forward() + # where relevant, enforces match behaviour (skippable object tree branch vs required object tree branch) + skippable = Optional("?", default="") + # r"|'(?:%'|%%|%(?P.|$)|[^%'])*?(?:'|(?P$))\??" # string literals + str_literal = (Combine(Suppress("'") + + (Suppress("%") + "'" | Suppress("%") + "%" | Literal("%") + (CharsNotIn("") | StringEnd()).setParseAction(PatternError._str_escape) | CharsNotIn("%'"))[...] + + (Suppress("'") | StringEnd().setParseAction(PatternError._str_end))) + skippable).setParseAction(lambda toks: [_StringKey(toks)]) + # r"|/(?:%/|%%|%(?P.|$)|[^%/])*?(?:/|(?P$))\??" # regex + re_literal = (Combine(Suppress("/") + + (Suppress("%") + "/" | Suppress("%") + "%" | Literal("%") + (CharsNotIn("") | StringEnd()).setParseAction(PatternError._re_escape) | CharsNotIn("%/"))[...] + + (Suppress("/") | StringEnd().setParseAction(PatternError._re_end))) + skippable).setParseAction(lambda toks: [_RegexKey(toks)]) + arrow = Literal("->").setParseAction(lambda: [_Arrow()]) + # r"|(?::\??)?\$?[A-Za-z][A-Za-z0-9]*" # identifiers, parameters and type matches + identifier = Word(srange("[A-Za-z_]"), srange("[A-Za-z0-9_]")).setParseAction(lambda toks: [_Ident(toks)]) + parameter = (Suppress("$") + skippable + identifier).setParseAction(lambda toks: [_Param(toks)]) + ty = (Suppress(":") + skippable + Suppress("$") + identifier).setParseAction(lambda toks: [_Ty(toks)]) + # support for objects-as-keys + keysubtree = (Suppress("[") + Group(ty[...] + subtree) + (Suppress("]") | CharsNotIn("").setParseAction(PatternError._unexpected_tok) | StringEnd().setParseAction(PatternError._unexpected_tok)) + Optional("?", default="")).setParseAction(lambda toks: [_KeySubtree(toks)]) + # represents key matching - switches from "key" to "value" + tag = (identifier + Optional(parameter | str_literal | re_literal | keysubtree) | parameter | str_literal | re_literal | keysubtree) + ty[...] + Empty().setParseAction(lambda: [_End()]) + # multiple value matching + valuesubtree = (Suppress("(") + Group(subtree) + (Suppress(")") | CharsNotIn("").setParseAction(PatternError._unexpected_tok) | StringEnd().setParseAction(PatternError._unexpected_tok)) + Optional("?", default="")).setParseAction(lambda toks: [_ValueSubtree(toks)]) + # arrow and tag, value subtree + subtree <<= (arrow + tag)[...] + (valuesubtree + Empty().setParseAction(lambda: [_End()]))[...] + return ((subtree | CharsNotIn("").setParseAction(PatternError._unexpected_tok)) + StringEnd()).parseWithTabs() + +_built_syntax = _build_syntax() + +def _pairs(o): + if isinstance(o, Mapping): + return iter(o.items()) + elif isinstance(o, Sequence): + return iter(enumerate(o, 0)) + elif isinstance(o, Set): + return iter(((e, e) for e in o)) + else: + # maybe there's more stuff I can implement later + raise TypeError + +class _Holder: + def __init__(self, key, value, name, parent=None, it=None, empty=False, subtree=False): + self.name = name + self.key = key + self.value = value + self.empty = empty + self._it = it + self.parent = parent + self.subtree = subtree + + @property + def iterator(self): + if self._it is None: + self._it = _pairs(self.parent) + return self._it + + @iterator.setter + def iterator(self, value): + assert self._it is None + self._it = value + + @iterator.deleter + def iterator(self): + self._it = None + + def next(self): + self.key, self.value = next(self.iterator) + +class _Frame: + def __init__(self, ops): + self.ops = ops + self.pc = -1 + + def next(self): + pc = self.pc + 1 + if pc >= len(self.ops): + return False + self.pc = pc + return True + + @property + def current_op(self): + return self.ops[self.pc] + + def prev(self): + pc = self.pc - 1 + if pc < 0: + return False + self.pc = pc + return True + +def _match_helper(ops, defs, tree): + frame = _Frame(ops) + + path = [_Holder(key=None, value=tree, parent=None, it=iter(()), name=None)] + in_key = False + while path: + if not frame.next(): + assert not path[-1].empty + res = {} + for h in path: + if h.subtree: + for name, kv in h.key.items(): + res[name] = kv + elif h.name is not None: + res[h.name] = (h.key, h.value) + yield res + assert len(path) == 1 or isinstance(frame.current_op, _End) + frame.prev() + in_key = True + else: + op = frame.current_op + if in_key: + in_key = op.on_in_key(frame, path, defs) + else: + in_key = op.on_not_in_key(frame, path, defs) + +class Pattern: + """A compiled pattern object. + + Warning: + Do not create instances of this class manually. Use :py:func:`abdl.compile`. + + """ + + def __init__(self, pattern, defs): + try: + self._ops = _built_syntax.parseString(pattern) + except PatternError as e: + e._normalize(pattern, defs) + raise + else: + self._params = [] + for op in self._ops: + op.collect_params(self._params) + self._defs = {param: defs[param] for param in self._params} + + def match(self, obj): + """Matches this compiled pattern against the given object. + + Args: + obj: The object to match against. + + Returns: + An iterator. This iterator yields ``(key, value)`` pairs + wrapped in a dict for each variable in the pattern. + + """ + return _match_helper(self._ops, self._defs, obj) + +def compile(pattern, defs={}): + """Compiles the pattern and returns a compiled :py:class:`abdl.Pattern` object. + + Args: + pattern (str): The pattern. Refer to module-level documentation for + pattern syntax. + defs (dict): The parameter list. Used by parameters in the pattern. + + Returns: + Pattern: A compiled pattern object. + + """ + # TODO caching + return Pattern(pattern, defs) + +def match(pattern, obj, defs={}): + """Matches the pattern against the given obj. + + This method is equivalent to ``abdl.compile(pattern, defs).match(obj)``. + + Args: + pattern (str): The pattern. Refer to module-level documentation for + pattern syntax. + obj: The object to match against. + defs (dict): The parameter list. Used by parameters in the pattern. + + Returns: + An iterator. This iterator yields ``(key, value)`` pairs + wrapped in a dict for each variable in the pattern. + + """ + return compile(pattern, defs).match(obj) diff --git a/ganarchy.py b/ganarchy.py deleted file mode 100755 index 82f1a7b..0000000 --- a/ganarchy.py +++ /dev/null @@ -1,683 +0,0 @@ -#!/usr/bin/env python3 - -# GAnarchy - project homepage generator -# Copyright (C) 2019 Soni L. -# -# This program is free software: you can redistribute it and/or modify -# it under the terms of the GNU Affero General Public License as published by -# the Free Software Foundation, either version 3 of the License, or -# (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU Affero General Public License for more details. -# -# You should have received a copy of the GNU Affero General Public License -# along with this program. If not, see . - -import abc -import builtins -import hashlib -import hmac -import os -import re -import sqlite3 -import subprocess - -import click -import jinja2 -import qtoml -import requests - -import abdl - -from collections import defaultdict -from urllib.parse import urlparse - -MIGRATIONS = { - "toml-config": ( - ( - '''UPDATE "repo_history" SET "project" = (SELECT "git_commit" FROM "config") WHERE "project" IS NULL''', - '''ALTER TABLE "repos" RENAME TO "repos_old"''',), - ( - '''UPDATE "repo_history" SET "project" = NULL WHERE "project" = (SELECT "git_commit" FROM "config")''', - '''ALTER TABLE "repos_old" RENAME TO "repos"''',), - "switches to toml config format. the old 'repos' table is preserved as 'repos_old'" - ), - "better-project-management": ( - ( - '''ALTER TABLE "repos" ADD COLUMN "branch" TEXT''', - '''ALTER TABLE "repos" ADD COLUMN "project" TEXT''', - '''CREATE UNIQUE INDEX "repos_url_branch_project" ON "repos" ("url", "branch", "project")''', - '''CREATE INDEX "repos_project" ON "repos" ("project")''', - '''ALTER TABLE "repo_history" ADD COLUMN "branch" TEXT''', - '''ALTER TABLE "repo_history" ADD COLUMN "project" TEXT''', - '''CREATE INDEX "repo_history_url_branch_project" ON "repo_history" ("url", "branch", "project")''',), - ( - '''DELETE FROM "repos" WHERE "branch" IS NOT NULL OR "project" IS NOT NULL''', - '''DELETE FROM "repo_history" WHERE "branch" IS NOT NULL OR "project" IS NOT NULL''',), - "supports multiple projects, and allows choosing non-default branches" - ), - "test": ( - ('''-- apply''',), - ('''-- revert''',), - "does nothing" - ) - } - -data_home = os.environ.get('XDG_DATA_HOME', '') -if not data_home: - data_home = os.environ['HOME'] + '/.local/share' -data_home = data_home + "/ganarchy" - -cache_home = os.environ.get('XDG_CACHE_HOME', '') -if not cache_home: - cache_home = os.environ['HOME'] + '/.cache' -cache_home = cache_home + "/ganarchy" - -config_home = os.environ.get('XDG_CONFIG_HOME', '') -if not config_home: - config_home = os.environ['HOME'] + '/.config' -config_home = config_home + "/ganarchy" - -config_dirs = os.environ.get('XDG_CONFIG_DIRS', '') -if not config_dirs: - config_dirs = '/etc/xdg' -# TODO check if this is correct -config_dirs = [config_dir + "/ganarchy" for config_dir in config_dirs.split(':')] - -def get_template_loader(): - from jinja2 import DictLoader, FileSystemLoader, ChoiceLoader - return ChoiceLoader([ - FileSystemLoader([config_home + "/templates"] + [config_dir + "/templates" for config_dir in config_dirs]), - DictLoader({ - ## index.html - 'index.html': """ - - - - - {{ ganarchy.title|e }} - - - - - -

{{ ganarchy.title|e }}

-

This is {{ ganarchy.title|e }}. Currently tracking the following projects:

-
    - {% for project in ganarchy.projects -%} -
  • {{ project.title|e }}: {{ project.description|e }}
  • - {% endfor -%} -
-

Powered by GAnarchy. AGPLv3-licensed. Source Code.

-

- Register web+ganarchy: URI handler. -

- - -""", - ## index.toml - 'index.toml': """# Generated by GAnarchy - -{%- for project, repos in config.projects.items() %} -[projects.{{project}}] -{%- for repo_url, branches in repos.items() %}{% for branch, options in branches.items() %}{% if options.active %} -"{{repo_url|tomle}}".{% if branch %}"{{branch|tomle}}"{% else %}HEAD{% endif %} = { active=true } -{%- endif %}{% endfor %} -{%- endfor %} -{% endfor -%} -""", - ## project.html FIXME - 'project.html': """ - - - - - {{ project_title|e }} - {% if project_desc %}{% endif %} - - - -

{{ project_title|e }}

-

Tracking {{ project_commit }}

-

{{ project_body|e|replace("\n\n", "

") }}

-
    - {% for url, msg, img, branch in repos -%} -
  • {{ url|e }}{% if branch %} [{{ branch|e }}]{% endif %}: {{ msg|e }}
  • - {% endfor -%} -
-

Powered by GAnarchy. AGPLv3-licensed. Source Code.

-

- Main page. - Register web+ganarchy: URI handler. -

- - -""", - ## history.svg FIXME - 'history.svg': """""", - }) - ]) - -tomletrans = str.maketrans({ - 0: '\\u0000', 1: '\\u0001', 2: '\\u0002', 3: '\\u0003', 4: '\\u0004', - 5: '\\u0005', 6: '\\u0006', 7: '\\u0007', 8: '\\b', 9: '\\t', 10: '\\n', - 11: '\\u000B', 12: '\\f', 13: '\\r', 14: '\\u000E', 15: '\\u000F', - 16: '\\u0010', 17: '\\u0011', 18: '\\u0012', 19: '\\u0013', 20: '\\u0014', - 21: '\\u0015', 22: '\\u0016', 23: '\\u0017', 24: '\\u0018', 25: '\\u0019', - 26: '\\u001A', 27: '\\u001B', 28: '\\u001C', 29: '\\u001D', 30: '\\u001E', - 31: '\\u001F', '"': '\\"', '\\': '\\\\' - }) -def tomlescape(value): - return value.translate(tomletrans) - -def get_env(): - env = jinja2.Environment(loader=get_template_loader(), autoescape=False) - env.filters['tomlescape'] = tomlescape - env.filters['tomle'] = env.filters['tomlescape'] - return env - - -@click.group() -def ganarchy(): - pass - -@ganarchy.command() -def initdb(): - """Initializes the ganarchy database.""" - os.makedirs(data_home, exist_ok=True) - conn = sqlite3.connect(data_home + "/ganarchy.db") - c = conn.cursor() - c.execute('''CREATE TABLE "repo_history" ("entry" INTEGER PRIMARY KEY ASC AUTOINCREMENT, "url" TEXT, "count" INTEGER, "head_commit" TEXT, "branch" TEXT, "project" TEXT)''') - c.execute('''CREATE INDEX "repo_history_url_branch_project" ON "repo_history" ("url", "branch", "project")''') - conn.commit() - conn.close() - -def migrations(): - @ganarchy.group() - def migrations(): - """Modifies the DB to work with a newer/older version. - - WARNING: THIS COMMAND CAN BE EXTREMELY DESTRUCTIVE!""" - - @migrations.command() - @click.argument('migration') - def apply(migration): - """Applies the migration with the given name.""" - conn = sqlite3.connect(data_home + "/ganarchy.db") - c = conn.cursor() - click.echo(MIGRATIONS[migration][0]) - for migration in MIGRATIONS[migration][0]: - c.execute(migration) - conn.commit() - conn.close() - - @click.argument('migration') - @migrations.command() - def revert(migration): - """Reverts the migration with the given name.""" - conn = sqlite3.connect(data_home + "/ganarchy.db") - c = conn.cursor() - click.echo(MIGRATIONS[migration][1]) - for migration in MIGRATIONS[migration][1]: - c.execute(migration) - conn.commit() - conn.close() - - @click.argument('migration', required=False) - @migrations.command() - def info(migration): - """Shows information about the migration with the given name.""" - if not migration: - # TODO could be improved - click.echo(MIGRATIONS.keys()) - else: - click.echo(MIGRATIONS[migration][2]) - -migrations() - -class GitError(LookupError): - """Raised when a git operation fails, generally due to a missing commit or branch, or network connection issues.""" - pass - -class Git: - def __init__(self, path): - self.path = path - self.base = ("git", "-C", path) - - def get_hash(self, target): - try: - return subprocess.check_output(self.base + ("show", target, "-s", "--format=format:%H", "--"), stderr=subprocess.DEVNULL).decode("utf-8") - except subprocess.CalledProcessError as e: - raise GitError from e - - def get_commit_message(self, target): - try: - return subprocess.check_output(self.base + ("show", target, "-s", "--format=format:%B", "--"), stderr=subprocess.DEVNULL).decode("utf-8", "replace") - except subprocess.CalledProcessError as e: - raise GitError from e - -# Currently we only use one git repo, at cache_home -GIT = Git(cache_home) - -class Repo: - def __init__(self, dbconn, project_commit, url, branch, head_commit, list_metadata=False): - self.url = url - self.branch = branch - self.project_commit = project_commit - self.erroring = False - - if not branch: - self.branchname = "gan" + hashlib.sha256(url.encode("utf-8")).hexdigest() - self.head = "HEAD" - else: - self.branchname = "gan" + hmac.new(branch.encode("utf-8"), url.encode("utf-8"), "sha256").hexdigest() - self.head = "refs/heads/" + branch - - if head_commit: - self.hash = head_commit - else: - try: # FIXME should we even do this? - self.hash = GIT.get_hash(self.branchname) - except GitError: - self.erroring = True - self.hash = None - - self.message = None - if list_metadata: - try: - self.update_metadata() - except GitError: - self.erroring = True - pass - - def update_metadata(self): - self.message = GIT.get_commit_message(self.branchname) - - def update(self, updating=True): - """ - Updates the git repo, returning new metadata. - """ - if updating: - try: - subprocess.check_output(["git", "-C", cache_home, "fetch", "-q", self.url, "+" + self.head + ":" + self.branchname], stderr=subprocess.STDOUT) - except subprocess.CalledProcessError as e: - # This may error for various reasons, but some are important: dead links, etc - click.echo(e.output, err=True) - self.erroring = True - return None - pre_hash = self.hash - try: - post_hash = GIT.get_hash(self.branchname) - except GitError as e: - # This should never happen, but maybe there's some edge cases? - # TODO check - self.erroring = True - return None - self.hash = post_hash - if not pre_hash: - pre_hash = post_hash - try: - count = int(subprocess.check_output(["git", "-C", cache_home, "rev-list", "--count", pre_hash + ".." + post_hash, "--"]).decode("utf-8").strip()) - except subprocess.CalledProcessError: - count = 0 # force-pushed - try: - if updating: - subprocess.check_call(["git", "-C", cache_home, "merge-base", "--is-ancestor", self.project_commit, self.branchname], stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL) - self.update_metadata() - return count - except (subprocess.CalledProcessError, GitError) as e: - click.echo(e, err=True) - self.erroring = True - return None - -class Project: - def __init__(self, dbconn, project_commit, list_repos=False): - self.commit = project_commit - self.refresh_metadata() - self.repos = None - if list_repos: - self.list_repos(dbconn) - - def list_repos(self, dbconn): - repos = [] - with dbconn: - for (e, url, branch, head_commit) in dbconn.execute('''SELECT "max"("e"), "url", "branch", "head_commit" FROM (SELECT "max"("T1"."entry") "e", "T1"."url", "T1"."branch", "T1"."head_commit" FROM "repo_history" "T1" - WHERE (SELECT "active" FROM "repos" "T2" WHERE "url" = "T1"."url" AND "branch" IS "T1"."branch" AND "project" IS ?1) - GROUP BY "T1"."url", "T1"."branch" - UNION - SELECT null, "T3"."url", "T3"."branch", null FROM "repos" "T3" WHERE "active" AND "project" IS ?1) - GROUP BY "url" ORDER BY "e"''', (self.commit,)): - repos.append(Repo(dbconn, self.commit, url, branch, head_commit)) - self.repos = repos - - def refresh_metadata(self): - try: - project = GIT.get_commit_message(self.commit) - project_title, project_desc = (lambda x: x.groups() if x is not None else ('', None))(re.fullmatch('^\\[Project\\]\s+(.+?)(?:\n\n(.+))?$', project, flags=re.ASCII|re.DOTALL|re.IGNORECASE)) - if not project_title.strip(): # FIXME - project_title, project_desc = ("Error parsing project commit",)*2 - # if project_desc: # FIXME - # project_desc = project_desc.strip() - self.commit_body = project - self.title = project_title - self.description = project_desc - except GitError: - self.commit_body = None - self.title = None - self.description = None - - def update(self, updating=True): - # TODO? check if working correctly - results = [(repo, repo.update(updating)) for repo in self.repos] - self.refresh_metadata() - return results - -class GAnarchy: - def __init__(self, dbconn, config, list_projects=False, list_repos=False): - base_url = config.base_url - title = config.title - if not base_url: - # FIXME use a more appropriate error type - raise ValueError - if not title: - title = "GAnarchy on " + urlparse(base_url).hostname - self.title = title - self.base_url = base_url - # load config onto DB - c = dbconn.cursor() - c.execute('''CREATE TEMPORARY TABLE "repos" ("url" TEXT PRIMARY KEY, "active" INT, "branch" TEXT, "project" TEXT)''') - c.execute('''CREATE UNIQUE INDEX "temp"."repos_url_branch_project" ON "repos" ("url", "branch", "project")''') - c.execute('''CREATE INDEX "temp"."repos_project" ON "repos" ("project")''') - c.execute('''CREATE INDEX "temp"."repos_active" ON "repos" ("active")''') - for (project_commit, repos) in config.projects.items(): - for (repo_url, branches) in repos.items(): - for (branchname, options) in branches.items(): - if options['active']: # no need to insert inactive repos since they get ignored anyway - c.execute('''INSERT INTO "repos" VALUES (?, ?, ?, ?)''', (repo_url, 1, branchname, project_commit)) - dbconn.commit() - if list_projects: - projects = [] - with dbconn: - for (project,) in dbconn.execute('''SELECT DISTINCT "project" FROM "repos" '''): - projects.append(Project(dbconn, project, list_repos=list_repos)) - projects.sort(key=lambda project: project.title) # sort projects by title - self.projects = projects - else: - self.projects = None - -class ConfigSource(abc.ABC): - @abc.abstractmethod - def update(self): - """Refreshes the config if necessary.""" - pass - - def is_domain_blocked(self, domain): - """Returns True if the given domain is blocked.""" - return False - - @abc.abstractmethod - def get_project_commit_tree_paths(self): - """Returns an iterator of (project, URI, branch, options) tuples. - - project is the project commit hash, URI is the repo URI, branch is the branch name and - options are the options for the given project commit-tree path.""" - pass - - def __getitem__(self, key): - raise KeyError - -class FileConfigSource(ConfigSource): - def __init__(self, filename): - self.exists = False - self.last_updated = None - self.filename = filename - self.tomlobj = None - self.update() - - def update(self): - try: - updtime = self.last_updated - self.last_updated = os.stat(self.filename).st_mtime - if not self.exists or updtime != self.last_updated: - with open(self.filename) as f: - self.tomlobj = qtoml.load(f) - self.exists = True - except OSError: - return - - def get_project_commit_tree_paths(self): - for r in Config.CONFIG_PATTERN_SANITIZE.match(self.tomlobj): - yield (v['commit'][0], v['url'][0], v['branch'][0], v['branch'][1]) - - def __getitem__(self, key): - if key in ('title', 'base_url', 'config_srcs'): - return self.tomlobj[key] - return super().__getitem__(self, key) - -class RemoteConfigSource(ConfigSource): - def __init__(self, uri): - self.uri = uri - self.tomlobj = None - - def update(self): - raise NotImplementedError - - def get_project_commit_tree_paths(self): - for r in Config.CONFIG_PATTERN_SANITIZE.match(self.tomlobj): - if v['branch'][1].get('active', False) in (True, False): - yield (v['commit'][0], v['url'][0], v['branch'][0], v['branch'][1]) - -class Config: - # sanitize = skip invalid entries - # validate = error on invalid entries - CONFIG_PATTERN_SANITIZE = abdl.compile("->commit/[0-9a-fA-F]{40}|[0-9a-fA-F]{64}/?:?$dict->url:?$dict->branch:?$dict", {'dict': dict}) - # TODO use a validating pattern instead? - CONFIG_PATTERN = abdl.compile("->commit->url->branch", {'dict': dict}) - - def __init__(self, toml_file, base=None, remove=True): - self.projects = defaultdict(lambda: defaultdict(lambda: defaultdict(lambda: defaultdict(dict)))) - config_data = qtoml.load(toml_file) - self.remote_configs = config_data.get('config_srcs', []) - self.title = config_data.get('title', '') - self.base_url = config_data.get('base_url', '') - # TODO blocked domains (but only read them from config_data if remove is True) - self.blocked_domains = [] - self.blocked_domain_suffixes = [] - self.blocked_domains.sort() - self.blocked_domain_suffixes.sort(key=lambda x: x[::-1]) - # FIXME remove duplicates and process invalid entries - self.blocked_domains = tuple(self.blocked_domains) - self.blocked_domain_suffixes = tuple(self.blocked_domain_suffixes) # MUST be tuple - # TODO re.compile("(^" + "|^".join(map(re.escape, domains)) + "|" + "|".join(map(re.escape, suffixes) + ")$") - if base: - # FIXME is remove=remove the right thing to do? - self._update_projects(base.projects, remove=remove, sanitize=False) # already sanitized - projects = config_data.get('projects', {}) - self._update_projects(projects, remove=remove) - - def _update_projects(self, projects, remove, sanitize=True): - m = (Config.CONFIG_PATTERN_SANITIZE if sanitize else Config.CONFIG_PATTERN).match(projects) - for v in m: - commit, repo_url, branchname, options = v['commit'][0], v['url'][0], v['branch'][0], v['branch'][1] - try: - u = urlparse(repo_url) - if not u: - raise ValueError - # also raises for invalid ports, see https://docs.python.org/3/library/urllib.parse.html#urllib.parse.urlparse - # "Reading the port attribute will raise a ValueError if an invalid port is specified in the URL. [...]" - if u.port == 0: - raise ValueError - if u.scheme not in ('http', 'https'): - raise ValueError - if (u.hostname in self.blocked_domains) or (u.hostname.endswith(self.blocked_domain_suffixes)): - raise ValueError - except ValueError: - continue - if branchname == "HEAD": - branchname = None - active = options.get('active', None) - if active not in (True, False): - continue - branch = self.projects[commit][repo_url][branchname] - branch['active'] = active or (branch.get('active', False) and not remove) - -def debug(): - @ganarchy.group() - def debug(): - pass - - @debug.command() - def paths(): - click.echo('Config home: {}'.format(config_home)) - click.echo('Additional config search path: {}'.format(config_dirs)) - click.echo('Cache home: {}'.format(cache_home)) - click.echo('Data home: {}'.format(data_home)) - - @debug.command() - def configs(): - pass - -debug() - -@ganarchy.command() -@click.option('--skip-errors/--no-skip-errors', default=False) -@click.argument('files', type=click.File('r', encoding='utf-8'), nargs=-1) -def merge_configs(skip_errors, files): - """Merges config files.""" - config = None - for f in files: - try: - f.reconfigure(newline='') - config = Config(f, config, remove=False) - except (UnicodeDecodeError, qtoml.decoder.TOMLDecodeError): - if not skip_errors: - raise - if config: - env = get_env() - template = env.get_template('index.toml') - click.echo(template.render(config=config)) - -def update_remote_configs(): - pass - -@ganarchy.command() -@click.argument('out', required=True) -def run(out): - """Runs ganarchy standalone. - - This will run ganarchy so it regularly updates the output directory given by OUT. - Additionally, it'll also search for the following hooks in its config dirs: - - - post_object_update_hook - executed after an object is updated. - - - post_update_cycle_hook - executed after all objects in an update cycle are updated.""" - pass - -@ganarchy.command() -@click.option('--update/--no-update', default=True) -@click.argument('project', required=False) -def cron_target(update, project): - """Runs ganarchy as a cron target.""" - conf = None - # reverse order is intentional - for d in reversed(config_dirs): - try: - conf = Config(open(d + "/config.toml", 'r', encoding='utf-8', newline=''), conf) - except (OSError, UnicodeDecodeError, qtoml.decoder.TOMLDecodeError): - pass - with open(config_home + "/config.toml", 'r', encoding='utf-8', newline='') as f: - conf = Config(f, conf) - env = get_env() - if project == "config": - # render the config - # doesn't have access to a GAnarchy object. this is deliberate. - template = env.get_template('index.toml') - click.echo(template.render(config = conf)) - return - if project == "project-list": - # could be done with a template but eh w/e, this is probably better - for project in conf.projects.keys(): - click.echo(project) - return - # make sure the cache dir exists - os.makedirs(cache_home, exist_ok=True) - # make sure it is a git repo - subprocess.call(["git", "-C", cache_home, "init", "-q"]) - conn = sqlite3.connect(data_home + "/ganarchy.db") - instance = GAnarchy(conn, conf, list_projects=project in ["index", "config"]) - if project == "index": - # render the index - template = env.get_template('index.html') - click.echo(template.render(ganarchy = instance)) - return - if not instance.base_url or not project: - click.echo("No base URL or project commit specified", err=True) - return - entries = [] - generate_html = [] - c = conn.cursor() - p = Project(conn, project, list_repos=True) - results = p.update(update) - for (repo, count) in results: - if count is not None: - entries.append((repo.url, count, repo.hash, repo.branch, project)) - generate_html.append((repo.url, repo.message, count, repo.branch)) - # sort stuff twice because reasons - entries.sort(key=lambda x: x[1], reverse=True) - generate_html.sort(key=lambda x: x[2], reverse=True) - if update: - c.executemany('''INSERT INTO "repo_history" ("url", "count", "head_commit", "branch", "project") VALUES (?, ?, ?, ?, ?)''', entries) - conn.commit() - html_entries = [] - for (url, msg, count, branch) in generate_html: - history = c.execute('''SELECT "count" FROM "repo_history" WHERE "url" = ? AND "branch" IS ? AND "project" IS ? ORDER BY "entry" ASC''', (url, branch, project)).fetchall() - # TODO process history into SVG - html_entries.append((url, msg, "", branch)) - template = env.get_template('project.html') - click.echo(template.render(project_title = p.title, - project_desc = p.description, - project_body = p.commit_body, - project_commit = p.commit, - repos = html_entries, - base_url = instance.base_url, - # I don't think this thing supports deprecating the above? - project = p, - ganarchy = instance)) - -if __name__ == "__main__": - ganarchy() diff --git a/ganarchy/__init__.py b/ganarchy/__init__.py new file mode 100644 index 0000000..0f13b44 --- /dev/null +++ b/ganarchy/__init__.py @@ -0,0 +1,593 @@ +# GAnarchy - decentralized project hub +# Copyright (C) 2019 Soni L. +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . + +import abc +import builtins +import hashlib +import hmac +import os +import re +import sqlite3 +import subprocess + +import abdl +import click +import jinja2 +import qtoml +import requests + +from collections import defaultdict +from urllib.parse import urlparse + +import ganarchy.config + +MIGRATIONS = { + "toml-config": ( + ( + '''UPDATE "repo_history" SET "project" = (SELECT "git_commit" FROM "config") WHERE "project" IS NULL''', + '''ALTER TABLE "repos" RENAME TO "repos_old"''',), + ( + '''UPDATE "repo_history" SET "project" = NULL WHERE "project" = (SELECT "git_commit" FROM "config")''', + '''ALTER TABLE "repos_old" RENAME TO "repos"''',), + "switches to toml config format. the old 'repos' table is preserved as 'repos_old'" + ), + "better-project-management": ( + ( + '''ALTER TABLE "repos" ADD COLUMN "branch" TEXT''', + '''ALTER TABLE "repos" ADD COLUMN "project" TEXT''', + '''CREATE UNIQUE INDEX "repos_url_branch_project" ON "repos" ("url", "branch", "project")''', + '''CREATE INDEX "repos_project" ON "repos" ("project")''', + '''ALTER TABLE "repo_history" ADD COLUMN "branch" TEXT''', + '''ALTER TABLE "repo_history" ADD COLUMN "project" TEXT''', + '''CREATE INDEX "repo_history_url_branch_project" ON "repo_history" ("url", "branch", "project")''',), + ( + '''DELETE FROM "repos" WHERE "branch" IS NOT NULL OR "project" IS NOT NULL''', + '''DELETE FROM "repo_history" WHERE "branch" IS NOT NULL OR "project" IS NOT NULL''',), + "supports multiple projects, and allows choosing non-default branches" + ), + "test": ( + ('''-- apply''',), + ('''-- revert''',), + "does nothing" + ) + } + +data_home = os.environ.get('XDG_DATA_HOME', '') +if not data_home: + data_home = os.environ['HOME'] + '/.local/share' +data_home = data_home + "/ganarchy" + +cache_home = os.environ.get('XDG_CACHE_HOME', '') +if not cache_home: + cache_home = os.environ['HOME'] + '/.cache' +cache_home = cache_home + "/ganarchy" + +config_home = os.environ.get('XDG_CONFIG_HOME', '') +if not config_home: + config_home = os.environ['HOME'] + '/.config' +config_home = config_home + "/ganarchy" + +config_dirs = os.environ.get('XDG_CONFIG_DIRS', '') +if not config_dirs: + config_dirs = '/etc/xdg' +# TODO check if this is correct +config_dirs = [config_dir + "/ganarchy" for config_dir in config_dirs.split(':')] + +def get_template_loader(): + from jinja2 import DictLoader, FileSystemLoader, ChoiceLoader + return ChoiceLoader([ + FileSystemLoader([config_home + "/templates"] + [config_dir + "/templates" for config_dir in config_dirs]), + DictLoader({ + ## index.html + 'index.html': """ + + + + + {{ ganarchy.title|e }} + + + + + +

{{ ganarchy.title|e }}

+

This is {{ ganarchy.title|e }}. Currently tracking the following projects:

+
    + {% for project in ganarchy.projects -%} +
  • {{ project.title|e }}: {{ project.description|e }}
  • + {% endfor -%} +
+

Powered by GAnarchy. AGPLv3-licensed. Source Code.

+

+ Register web+ganarchy: URI handler. +

+ + +""", + ## index.toml + 'index.toml': """# Generated by GAnarchy + +{%- for project, repos in config.projects.items() %} +[projects.{{project}}] +{%- for repo_url, branches in repos.items() %}{% for branch, options in branches.items() %}{% if options.active %} +"{{repo_url|tomle}}".{% if branch %}"{{branch|tomle}}"{% else %}HEAD{% endif %} = { active=true } +{%- endif %}{% endfor %} +{%- endfor %} +{% endfor -%} +""", + ## project.html FIXME + 'project.html': """ + + + + + {{ project_title|e }} + {% if project_desc %}{% endif %} + + + +

{{ project_title|e }}

+

Tracking {{ project_commit }}

+

{{ project_body|e|replace("\n\n", "

") }}

+
    + {% for url, msg, img, branch in repos -%} +
  • {{ url|e }}{% if branch %} [{{ branch|e }}]{% endif %}: {{ msg|e }}
  • + {% endfor -%} +
+

Powered by GAnarchy. AGPLv3-licensed. Source Code.

+

+ Main page. + Register web+ganarchy: URI handler. +

+ + +""", + ## history.svg FIXME + 'history.svg': """""", + }) + ]) + +tomletrans = str.maketrans({ + 0: '\\u0000', 1: '\\u0001', 2: '\\u0002', 3: '\\u0003', 4: '\\u0004', + 5: '\\u0005', 6: '\\u0006', 7: '\\u0007', 8: '\\b', 9: '\\t', 10: '\\n', + 11: '\\u000B', 12: '\\f', 13: '\\r', 14: '\\u000E', 15: '\\u000F', + 16: '\\u0010', 17: '\\u0011', 18: '\\u0012', 19: '\\u0013', 20: '\\u0014', + 21: '\\u0015', 22: '\\u0016', 23: '\\u0017', 24: '\\u0018', 25: '\\u0019', + 26: '\\u001A', 27: '\\u001B', 28: '\\u001C', 29: '\\u001D', 30: '\\u001E', + 31: '\\u001F', '"': '\\"', '\\': '\\\\' + }) +def tomlescape(value): + return value.translate(tomletrans) + +def get_env(): + env = jinja2.Environment(loader=get_template_loader(), autoescape=False) + env.filters['tomlescape'] = tomlescape + env.filters['tomle'] = env.filters['tomlescape'] + return env + + +@click.group() +def ganarchy(): + pass + +@ganarchy.command() +def initdb(): + """Initializes the ganarchy database.""" + os.makedirs(data_home, exist_ok=True) + conn = sqlite3.connect(data_home + "/ganarchy.db") + c = conn.cursor() + c.execute('''CREATE TABLE "repo_history" ("entry" INTEGER PRIMARY KEY ASC AUTOINCREMENT, "url" TEXT, "count" INTEGER, "head_commit" TEXT, "branch" TEXT, "project" TEXT)''') + c.execute('''CREATE INDEX "repo_history_url_branch_project" ON "repo_history" ("url", "branch", "project")''') + conn.commit() + conn.close() + +def migrations(): + @ganarchy.group() + def migrations(): + """Modifies the DB to work with a newer/older version. + + WARNING: THIS COMMAND CAN BE EXTREMELY DESTRUCTIVE!""" + + @migrations.command() + @click.argument('migration') + def apply(migration): + """Applies the migration with the given name.""" + conn = sqlite3.connect(data_home + "/ganarchy.db") + c = conn.cursor() + click.echo(MIGRATIONS[migration][0]) + for migration in MIGRATIONS[migration][0]: + c.execute(migration) + conn.commit() + conn.close() + + @click.argument('migration') + @migrations.command() + def revert(migration): + """Reverts the migration with the given name.""" + conn = sqlite3.connect(data_home + "/ganarchy.db") + c = conn.cursor() + click.echo(MIGRATIONS[migration][1]) + for migration in MIGRATIONS[migration][1]: + c.execute(migration) + conn.commit() + conn.close() + + @click.argument('migration', required=False) + @migrations.command() + def info(migration): + """Shows information about the migration with the given name.""" + if not migration: + # TODO could be improved + click.echo(MIGRATIONS.keys()) + else: + click.echo(MIGRATIONS[migration][2]) + +migrations() + +class GitError(LookupError): + """Raised when a git operation fails, generally due to a missing commit or branch, or network connection issues.""" + pass + +class Git: + def __init__(self, path): + self.path = path + self.base = ("git", "-C", path) + + def get_hash(self, target): + try: + return subprocess.check_output(self.base + ("show", target, "-s", "--format=format:%H", "--"), stderr=subprocess.DEVNULL).decode("utf-8") + except subprocess.CalledProcessError as e: + raise GitError from e + + def get_commit_message(self, target): + try: + return subprocess.check_output(self.base + ("show", target, "-s", "--format=format:%B", "--"), stderr=subprocess.DEVNULL).decode("utf-8", "replace") + except subprocess.CalledProcessError as e: + raise GitError from e + +# Currently we only use one git repo, at cache_home +GIT = Git(cache_home) + +class Repo: + def __init__(self, dbconn, project_commit, url, branch, head_commit, list_metadata=False): + self.url = url + self.branch = branch + self.project_commit = project_commit + self.erroring = False + + if not branch: + self.branchname = "gan" + hashlib.sha256(url.encode("utf-8")).hexdigest() + self.head = "HEAD" + else: + self.branchname = "gan" + hmac.new(branch.encode("utf-8"), url.encode("utf-8"), "sha256").hexdigest() + self.head = "refs/heads/" + branch + + if head_commit: + self.hash = head_commit + else: + try: # FIXME should we even do this? + self.hash = GIT.get_hash(self.branchname) + except GitError: + self.erroring = True + self.hash = None + + self.message = None + if list_metadata: + try: + self.update_metadata() + except GitError: + self.erroring = True + pass + + def update_metadata(self): + self.message = GIT.get_commit_message(self.branchname) + + def update(self, updating=True): + """ + Updates the git repo, returning new metadata. + """ + if updating: + try: + subprocess.check_output(["git", "-C", cache_home, "fetch", "-q", self.url, "+" + self.head + ":" + self.branchname], stderr=subprocess.STDOUT) + except subprocess.CalledProcessError as e: + # This may error for various reasons, but some are important: dead links, etc + click.echo(e.output, err=True) + self.erroring = True + return None + pre_hash = self.hash + try: + post_hash = GIT.get_hash(self.branchname) + except GitError as e: + # This should never happen, but maybe there's some edge cases? + # TODO check + self.erroring = True + return None + self.hash = post_hash + if not pre_hash: + pre_hash = post_hash + try: + count = int(subprocess.check_output(["git", "-C", cache_home, "rev-list", "--count", pre_hash + ".." + post_hash, "--"]).decode("utf-8").strip()) + except subprocess.CalledProcessError: + count = 0 # force-pushed + try: + if updating: + subprocess.check_call(["git", "-C", cache_home, "merge-base", "--is-ancestor", self.project_commit, self.branchname], stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL) + self.update_metadata() + return count + except (subprocess.CalledProcessError, GitError) as e: + click.echo(e, err=True) + self.erroring = True + return None + +class Project: + def __init__(self, dbconn, project_commit, list_repos=False): + self.commit = project_commit + self.refresh_metadata() + self.repos = None + if list_repos: + self.list_repos(dbconn) + + def list_repos(self, dbconn): + repos = [] + with dbconn: + for (e, url, branch, head_commit) in dbconn.execute('''SELECT "max"("e"), "url", "branch", "head_commit" FROM (SELECT "max"("T1"."entry") "e", "T1"."url", "T1"."branch", "T1"."head_commit" FROM "repo_history" "T1" + WHERE (SELECT "active" FROM "repos" "T2" WHERE "url" = "T1"."url" AND "branch" IS "T1"."branch" AND "project" IS ?1) + GROUP BY "T1"."url", "T1"."branch" + UNION + SELECT null, "T3"."url", "T3"."branch", null FROM "repos" "T3" WHERE "active" AND "project" IS ?1) + GROUP BY "url" ORDER BY "e"''', (self.commit,)): + repos.append(Repo(dbconn, self.commit, url, branch, head_commit)) + self.repos = repos + + def refresh_metadata(self): + try: + project = GIT.get_commit_message(self.commit) + project_title, project_desc = (lambda x: x.groups() if x is not None else ('', None))(re.fullmatch('^\\[Project\\]\s+(.+?)(?:\n\n(.+))?$', project, flags=re.ASCII|re.DOTALL|re.IGNORECASE)) + if not project_title.strip(): # FIXME + project_title, project_desc = ("Error parsing project commit",)*2 + # if project_desc: # FIXME + # project_desc = project_desc.strip() + self.commit_body = project + self.title = project_title + self.description = project_desc + except GitError: + self.commit_body = None + self.title = None + self.description = None + + def update(self, updating=True): + # TODO? check if working correctly + results = [(repo, repo.update(updating)) for repo in self.repos] + self.refresh_metadata() + return results + +class GAnarchy: + def __init__(self, dbconn, config, list_projects=False, list_repos=False): + base_url = config.base_url + title = config.title + if not base_url: + # FIXME use a more appropriate error type + raise ValueError + if not title: + title = "GAnarchy on " + urlparse(base_url).hostname + self.title = title + self.base_url = base_url + # load config onto DB + c = dbconn.cursor() + c.execute('''CREATE TEMPORARY TABLE "repos" ("url" TEXT PRIMARY KEY, "active" INT, "branch" TEXT, "project" TEXT)''') + c.execute('''CREATE UNIQUE INDEX "temp"."repos_url_branch_project" ON "repos" ("url", "branch", "project")''') + c.execute('''CREATE INDEX "temp"."repos_project" ON "repos" ("project")''') + c.execute('''CREATE INDEX "temp"."repos_active" ON "repos" ("active")''') + for (project_commit, repos) in config.projects.items(): + for (repo_url, branches) in repos.items(): + for (branchname, options) in branches.items(): + if options['active']: # no need to insert inactive repos since they get ignored anyway + c.execute('''INSERT INTO "repos" VALUES (?, ?, ?, ?)''', (repo_url, 1, branchname, project_commit)) + dbconn.commit() + if list_projects: + projects = [] + with dbconn: + for (project,) in dbconn.execute('''SELECT DISTINCT "project" FROM "repos" '''): + projects.append(Project(dbconn, project, list_repos=list_repos)) + projects.sort(key=lambda project: project.title) # sort projects by title + self.projects = projects + else: + self.projects = None + +class Config: + def __init__(self, toml_file, base=None, remove=True): + self.projects = defaultdict(lambda: defaultdict(lambda: defaultdict(lambda: defaultdict(dict)))) + config_data = qtoml.load(toml_file) + self.remote_configs = config_data.get('config_srcs', []) + self.title = config_data.get('title', '') + self.base_url = config_data.get('base_url', '') + # TODO blocked domains (but only read them from config_data if remove is True) + self.blocked_domains = [] + self.blocked_domain_suffixes = [] + self.blocked_domains.sort() + self.blocked_domain_suffixes.sort(key=lambda x: x[::-1]) + # FIXME remove duplicates and process invalid entries + self.blocked_domains = tuple(self.blocked_domains) + self.blocked_domain_suffixes = tuple(self.blocked_domain_suffixes) # MUST be tuple + # TODO re.compile("(^" + "|^".join(map(re.escape, domains)) + "|" + "|".join(map(re.escape, suffixes) + ")$") + if base: + # FIXME is remove=remove the right thing to do? + self._update_projects(base.projects, remove=remove, sanitize=False) # already sanitized + projects = config_data.get('projects', {}) + self._update_projects(projects, remove=remove) + + def _update_projects(self, projects, remove, sanitize=True): + m = (ganarchy.config.CONFIG_PATTERN_SANITIZE if sanitize else ganarchy.config.CONFIG_PATTERN).match(projects) + for v in m: + commit, repo_url, branchname, options = v['commit'][0], v['url'][0], v['branch'][0], v['branch'][1] + try: + u = urlparse(repo_url) + if not u: + raise ValueError + # also raises for invalid ports, see https://docs.python.org/3/library/urllib.parse.html#urllib.parse.urlparse + # "Reading the port attribute will raise a ValueError if an invalid port is specified in the URL. [...]" + if u.port == 0: + raise ValueError + if u.scheme not in ('http', 'https'): + raise ValueError + if (u.hostname in self.blocked_domains) or (u.hostname.endswith(self.blocked_domain_suffixes)): + raise ValueError + except ValueError: + continue + if branchname == "HEAD": + branchname = None + active = options.get('active', None) + if active not in (True, False): + continue + branch = self.projects[commit][repo_url][branchname] + branch['active'] = active or (branch.get('active', False) and not remove) + +@ganarchy.command() +@click.option('--skip-errors/--no-skip-errors', default=False) +@click.argument('files', type=click.File('r', encoding='utf-8'), nargs=-1) +def merge_configs(skip_errors, files): + """Merges config files.""" + config = None + for f in files: + try: + f.reconfigure(newline='') + config = Config(f, config, remove=False) + except (UnicodeDecodeError, qtoml.decoder.TOMLDecodeError): + if not skip_errors: + raise + if config: + env = get_env() + template = env.get_template('index.toml') + click.echo(template.render(config=config)) + +def update_remote_configs(): + pass + +@ganarchy.command() +@click.argument('out', required=True) +def run(out): + """Runs ganarchy standalone. + + This will run ganarchy so it regularly updates the output directory given by OUT. + Additionally, it'll also search for the following hooks in its config dirs: + + - post_object_update_hook - executed after an object is updated. + + - post_update_cycle_hook - executed after all objects in an update cycle are updated.""" + pass + +@ganarchy.command() +@click.option('--update/--no-update', default=True) +@click.argument('project', required=False) +def cron_target(update, project): + """Runs ganarchy as a cron target.""" + conf = None + # reverse order is intentional + for d in reversed(config_dirs): + try: + conf = Config(open(d + "/config.toml", 'r', encoding='utf-8', newline=''), conf) + except (OSError, UnicodeDecodeError, qtoml.decoder.TOMLDecodeError): + pass + with open(config_home + "/config.toml", 'r', encoding='utf-8', newline='') as f: + conf = Config(f, conf) + env = get_env() + if project == "config": + # render the config + # doesn't have access to a GAnarchy object. this is deliberate. + template = env.get_template('index.toml') + click.echo(template.render(config = conf)) + return + if project == "project-list": + # could be done with a template but eh w/e, this is probably better + for project in conf.projects.keys(): + click.echo(project) + return + # make sure the cache dir exists + os.makedirs(cache_home, exist_ok=True) + # make sure it is a git repo + subprocess.call(["git", "-C", cache_home, "init", "-q"]) + conn = sqlite3.connect(data_home + "/ganarchy.db") + instance = GAnarchy(conn, conf, list_projects=project in ["index", "config"]) + if project == "index": + # render the index + template = env.get_template('index.html') + click.echo(template.render(ganarchy = instance)) + return + if not instance.base_url or not project: + click.echo("No base URL or project commit specified", err=True) + return + entries = [] + generate_html = [] + c = conn.cursor() + p = Project(conn, project, list_repos=True) + results = p.update(update) + for (repo, count) in results: + if count is not None: + entries.append((repo.url, count, repo.hash, repo.branch, project)) + generate_html.append((repo.url, repo.message, count, repo.branch)) + # sort stuff twice because reasons + entries.sort(key=lambda x: x[1], reverse=True) + generate_html.sort(key=lambda x: x[2], reverse=True) + if update: + c.executemany('''INSERT INTO "repo_history" ("url", "count", "head_commit", "branch", "project") VALUES (?, ?, ?, ?, ?)''', entries) + conn.commit() + html_entries = [] + for (url, msg, count, branch) in generate_html: + history = c.execute('''SELECT "count" FROM "repo_history" WHERE "url" = ? AND "branch" IS ? AND "project" IS ? ORDER BY "entry" ASC''', (url, branch, project)).fetchall() + # TODO process history into SVG + html_entries.append((url, msg, "", branch)) + template = env.get_template('project.html') + click.echo(template.render(project_title = p.title, + project_desc = p.description, + project_body = p.commit_body, + project_commit = p.commit, + repos = html_entries, + base_url = instance.base_url, + # I don't think this thing supports deprecating the above? + project = p, + ganarchy = instance)) diff --git a/ganarchy/__main__.py b/ganarchy/__main__.py new file mode 100644 index 0000000..b270251 --- /dev/null +++ b/ganarchy/__main__.py @@ -0,0 +1,23 @@ +# GAnarchy - decentralized project hub +# Copyright (C) 2019 Soni L. +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . + +# The base CLI +import ganarchy + +# Additional CLI commands +import ganarchy.debug + +ganarchy.ganarchy(prog_name='ganarchy') diff --git a/ganarchy/config.py b/ganarchy/config.py new file mode 100644 index 0000000..154447b --- /dev/null +++ b/ganarchy/config.py @@ -0,0 +1,157 @@ +# This file is part of GAnarchy - decentralized project hub +# Copyright (C) 2019 Soni L. +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . + +import abc +import os + +import abdl +import qtoml + +from enum import Enum + +# sanitize = skip invalid entries +# validate = error on invalid entries +CONFIG_REPOS_SANITIZE = abdl.compile("""->'projects'?:?$dict + ->commit/[0-9a-fA-F]{40}|[0-9a-fA-F]{64}/?:?$dict + ->url:?$dict + ->branch:?$dict(->'active'?:?$bool)""", {'bool': bool, 'dict': dict}) +CONFIG_REPOS = abdl.compile("->'projects'->commit->url->branch", {'dict': dict}) + +CONFIG_TITLE_SANITIZE = abdl.compile("""->title'title'?:?$str""", {'str': str}) +CONFIG_BASE_URL_SANITIZE = abdl.compile("""->base_url'base_url'?:?$str""", {'str': str}) +CONFIG_SRCS_SANITIZE = abdl.compile("""->'config_srcs'?:?$list->src:?$str""", {'list': list, 'str': str}) + +CONFIG_TITLE_VALIDATE = abdl.compile("""->title'title':$str""", {'str': str}) +CONFIG_BASE_URL_VALIDATE = abdl.compile("""->base_url'base_url':$str""", {'str': str}) +CONFIG_SRCS_VALIDATE = abdl.compile("""->'config_srcs':$list->src:$str""", {'list': list, 'str': str}) + +class ConfigProperty(Enum): + TITLE = 1 + BASE_URL = 2 + +class ConfigSource(abc.ABC): + @abc.abstractmethod + def update(self): + """Refreshes the config if necessary.""" + pass + + @abc.abstractmethod + def exists(self): + """Returns whether the config exists.""" + pass + + def is_domain_blocked(self, domain): + """Returns whether the given domain is blocked.""" + return False + + def get_remote_config_sources(self): + """Yields URI strings for additional configs. + + Yields: + str: A remote config URI. + + """ + yield from () + + @abc.abstractmethod + def get_project_commit_tree_paths(self): + """Yields (project, URI, branch, options) tuples. + + Yields: + tuple of (str, str, str, dict): A project commit-tree path. + + Composed of a project commit hash, a repo URI, a branch name + and a dict of options respectively. + + """ + pass + + def get_supported_properties(self): + """Returns an iterable of properties supported by this config source. + + Returns: + Iterable of ConfigProperty: Supported properties. + + """ + return () + + def get_property_value(self, prop): + """Returns the value associated with the given property. + + Args: + prop (ConfigProperty): The property. + + Returns: + The value associated with the given property. + + Raises: + ValueError: If the property is not supported by this config + source. + + """ + raise ValueError + +class FileConfigSource(ConfigSource): + SUPPORTED_PROPERTIES = {} + + def __init__(self, filename): + self.file_exists = False + self.last_updated = None + self.filename = filename + self.tomlobj = None + + def update(self): + try: + updtime = self.last_updated + self.last_updated = os.stat(self.filename).st_mtime + if not self.file_exists or updtime != self.last_updated: + with open(self.filename) as f: + self.tomlobj = qtoml.load(f) + self.file_exists = True + except OSError: + return + + def exists(self): + return self.file_exists + + def get_remote_config_sources(self): + for r in CONFIG_SRCS_SANITIZE.match(self.tomlobj): + yield r['src'][1] + + def get_project_commit_tree_paths(self): + for r in CONFIG_PATTERN_SANITIZE.match(self.tomlobj): + yield (r['commit'][0], r['url'][0], r['branch'][0], r['branch'][1]) + + @classmethod + def get_supported_properties(cls): + return cls.SUPPORTED_PROPERTIES + +class RemoteConfigSource(ConfigSource): + def __init__(self, uri): + self.uri = uri + self.tomlobj = None + self.remote_exists = False + + def update(self): + raise NotImplementedError + + def exists(self): + return self.remote_exists + + def get_project_commit_tree_paths(self): + for r in CONFIG_PATTERN_SANITIZE.match(self.tomlobj): + yield (r['commit'][0], r['url'][0], r['branch'][0], r['branch'][1]) + diff --git a/ganarchy/debug.py b/ganarchy/debug.py new file mode 100644 index 0000000..1310549 --- /dev/null +++ b/ganarchy/debug.py @@ -0,0 +1,36 @@ +# This file is part of GAnarchy - decentralized project hub +# Copyright (C) 2019 Soni L. +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . + +import click + +import ganarchy +import ganarchy.config + +@ganarchy.ganarchy.group() +def debug(): + pass + +@debug.command() +def paths(): + click.echo('Config home: {}'.format(ganarchy.config_home)) + click.echo('Additional config search path: {}'.format(ganarchy.config_dirs)) + click.echo('Cache home: {}'.format(ganarchy.cache_home)) + click.echo('Data home: {}'.format(ganarchy.data_home)) + +@debug.command() +def configs(): + pass + -- cgit 1.4.1