From b083832cc463265c6777b1c9d18cc50c45a30c27 Mon Sep 17 00:00:00 2001 From: SoniEx2 Date: Fri, 24 Apr 2020 20:02:29 -0300 Subject: Add basic support for remote repo lists --- ganarchy/__init__.py | 49 +---- ganarchy/cli/debug.py | 81 +++++-- ganarchy/config.py | 252 ---------------------- ganarchy/data.py | 585 ++++++++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 648 insertions(+), 319 deletions(-) delete mode 100644 ganarchy/config.py create mode 100644 ganarchy/data.py (limited to 'ganarchy') diff --git a/ganarchy/__init__.py b/ganarchy/__init__.py index 6d24994..42ff1ea 100644 --- a/ganarchy/__init__.py +++ b/ganarchy/__init__.py @@ -32,7 +32,7 @@ import requests from collections import defaultdict from urllib.parse import urlparse -import ganarchy.config as m_ganarchy_config +import ganarchy as m_ganarchy MIGRATIONS = { "toml-config": ( @@ -437,53 +437,6 @@ class GAnarchy: else: self.projects = None -class Config: - def __init__(self, toml_file, base=None, remove=True): - self.projects = defaultdict(lambda: defaultdict(lambda: defaultdict(lambda: defaultdict(dict)))) - config_data = qtoml.load(toml_file) - self.remote_configs = config_data.get('config_srcs', []) - self.title = config_data.get('title', '') - self.base_url = config_data.get('base_url', '') - # TODO blocked domains (but only read them from config_data if remove is True) - self.blocked_domains = [] - self.blocked_domain_suffixes = [] - self.blocked_domains.sort() - self.blocked_domain_suffixes.sort(key=lambda x: x[::-1]) - # FIXME remove duplicates and process invalid entries - self.blocked_domains = tuple(self.blocked_domains) - self.blocked_domain_suffixes = tuple(self.blocked_domain_suffixes) # MUST be tuple - # TODO re.compile("(^" + "|^".join(map(re.escape, domains)) + "|" + "|".join(map(re.escape, suffixes) + ")$") - if base: - # FIXME is remove=remove the right thing to do? - self._update_projects({'projects': base.projects}, remove=remove, sanitize=False) # already sanitized - self._update_projects(config_data, remove=remove) - - def _update_projects(self, projects, remove, sanitize=True): - m = (m_ganarchy_config.CONFIG_REPOS_SANITIZE if sanitize else m_ganarchy_config.CONFIG_REPOS).match(projects) - for v in m: - commit, repo_url, branchname, options = v['commit'][0], v['url'][0], v['branch'][0], v['branch'][1] - try: - u = urlparse(repo_url) - if not u: - raise ValueError - # also raises for invalid ports, see https://docs.python.org/3/library/urllib.parse.html#urllib.parse.urlparse - # "Reading the port attribute will raise a ValueError if an invalid port is specified in the URL. [...]" - if u.port == 0: - raise ValueError - if u.scheme not in ('http', 'https'): - raise ValueError - if (u.hostname in self.blocked_domains) or (u.hostname.endswith(self.blocked_domain_suffixes)): - raise ValueError - except ValueError: - continue - if branchname == "HEAD": - branchname = None - active = options.get('active', None) - if active not in (True, False): - continue - branch = self.projects[commit][repo_url][branchname] - branch['active'] = active or (branch.get('active', False) and not remove) - @ganarchy.command() @click.option('--skip-errors/--no-skip-errors', default=False) @click.argument('files', type=click.File('r', encoding='utf-8'), nargs=-1) diff --git a/ganarchy/cli/debug.py b/ganarchy/cli/debug.py index 16f9e6f..5bff054 100644 --- a/ganarchy/cli/debug.py +++ b/ganarchy/cli/debug.py @@ -19,7 +19,7 @@ import qtoml import ganarchy import ganarchy.cli -import ganarchy.config +import ganarchy.data @ganarchy.cli.main.group() def debug(): @@ -32,24 +32,67 @@ def paths(): click.echo('Cache home: {}'.format(ganarchy.cache_home)) click.echo('Data home: {}'.format(ganarchy.data_home)) +def print_data_source(data_source): + if ganarchy.data.DataProperty.REPO_LIST_SOURCES in data_source.get_supported_properties(): + click.echo("\tRepo list sources:") + try: + iterator = data_source.get_property_values(ganarchy.data.DataProperty.REPO_LIST_SOURCES) + except LookupError: + click.echo("\t\tNone") + else: + for i, rls in enumerate(iterator, 1): + click.echo("\t\t{}.".format(i)) + click.echo("\t\t\tURI: {}".format(rls.uri)) + click.echo("\t\t\tOptions: {}".format(rls.options)) + click.echo("\t\t\tActive: {}".format(rls.active)) + + if ganarchy.data.DataProperty.VCS_REPOS in data_source.get_supported_properties(): + click.echo("\tRepos:") + try: + iterator = data_source.get_property_values(ganarchy.data.DataProperty.VCS_REPOS) + except LookupError: + click.echo("\t\tNone") + else: + for i, pctp in enumerate(iterator, 1): + click.echo("\t\t{}.".format(i)) + click.echo("\t\t\tProject: {}".format(pctp.project_commit)) + click.echo("\t\t\tURI: {}".format(pctp.uri)) + click.echo("\t\t\tBranch: {}".format(pctp.branch)) + click.echo("\t\t\tOptions: {}".format(pctp.options)) + click.echo("\t\t\tActive: {}".format(pctp.active)) + @debug.command() def configs(): - def print_conf(conf): - click.echo("\tRepos:") - for i, pctp in enumerate(conf.get_project_commit_tree_paths()): - click.echo("\t\t{}.".format(i)) - click.echo("\t\t\tProject: {}".format(pctp.project_commit)) - click.echo("\t\t\tURI: {}".format(pctp.uri)) - click.echo("\t\t\tBranch: {}".format(pctp.branch)) - click.echo("\t\t\tActive: {}".format(pctp.options == {'active': True})) - - confs = ganarchy.config.ConfigManager.new_default() - click.echo("Configs: {}".format(confs.sources)) + confs = ganarchy.data.ConfigManager.new_default() + click.echo("Configs (raw): {}".format(confs.sources)) click.echo("Breaking down the configs.") - for conf in reversed(confs.sources): - click.echo("Config: {}".format(conf.filename)) - e = conf.update() - if e is None: - print_conf(conf) - else: - click.echo("\tError: {}".format(e)) + update_excs = confs.update() + for conf, exc in zip(reversed(confs.sources), reversed(update_excs)): + click.echo("Config: {}".format(conf)) + if exc is not None: + click.echo("\tError(s): {}".format(exc)) + if conf.exists(): + print_data_source(conf) + click.echo("ConfigManager (raw):") + print_data_source(confs) + click.echo("ConfigManager (effective):") + print_data_source(ganarchy.data.EffectiveSource(confs)) + +@debug.command() +def repo_lists(): + confs = ganarchy.data.ConfigManager.new_default() + repo_lists = ganarchy.data.RepoListManager(confs) + update_excs = repo_lists.update() + click.echo("Repo lists (raw): {}".format(repo_lists.sources)) + click.echo("Breaking down the repo lists.") + for repo_list, exc in zip(reversed(repo_lists.sources), reversed(update_excs)): + click.echo("Repo list: {}".format(repo_list)) + if exc is not None: + click.echo("\tError(s): {}".format(exc)) + if repo_list.exists(): + print_data_source(repo_list) + click.echo("RepoListManager (raw):") + print_data_source(repo_lists) + click.echo("RepoListManager (effective):") + print_data_source(ganarchy.data.EffectiveSource(repo_lists)) + diff --git a/ganarchy/config.py b/ganarchy/config.py deleted file mode 100644 index f753698..0000000 --- a/ganarchy/config.py +++ /dev/null @@ -1,252 +0,0 @@ -# This file is part of GAnarchy - decentralized project hub -# Copyright (C) 2019 Soni L. -# -# This program is free software: you can redistribute it and/or modify -# it under the terms of the GNU Affero General Public License as published by -# the Free Software Foundation, either version 3 of the License, or -# (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU Affero General Public License for more details. -# -# You should have received a copy of the GNU Affero General Public License -# along with this program. If not, see . - -import abc -import os - -import abdl -import qtoml - -from enum import Enum -from urllib.parse import urlparse - -class URIPredicate(abdl.predicates.Predicate): - def __init__(self, ports=range(1,65536), schemes=('http', 'https')): - self.ports = ports - self.schemes = schemes - - def accept(self, obj): - try: - u = urlparse(obj) - if not u: - return False - # also raises for invalid ports, see https://docs.python.org/3/library/urllib.parse.html#urllib.parse.urlparse - # "Reading the port attribute will raise a ValueError if an invalid port is specified in the URL. [...]" - if u.port is not None and u.port not in self.ports: - return False - if u.scheme not in self.schemes: - return False - except ValueError: - return False - return True - -# sanitize = skip invalid entries -# validate = error on invalid entries -CONFIG_REPOS_SANITIZE = abdl.compile("""->'projects'?:?$dict - ->commit/[0-9a-fA-F]{40}|[0-9a-fA-F]{64}/?:?$dict - ->url[:?$uri]:?$dict - ->branch:?$dict(->'active'?:?$bool)""", {'bool': bool, 'dict': dict, 'uri': URIPredicate()}) -CONFIG_REPOS = abdl.compile("->'projects'->commit->url->branch", {'dict': dict}) - -CONFIG_TITLE_SANITIZE = abdl.compile("""->title'title'?:?$str""", {'str': str}) -CONFIG_BASE_URL_SANITIZE = abdl.compile("""->base_url'base_url'?:?$str""", {'str': str}) -CONFIG_SRCS_SANITIZE = abdl.compile("""->'config_srcs'?:?$list->src:?$str""", {'list': list, 'str': str}) - -CONFIG_TITLE_VALIDATE = abdl.compile("""->title'title':$str""", {'str': str}) -CONFIG_BASE_URL_VALIDATE = abdl.compile("""->base_url'base_url':$str""", {'str': str}) -CONFIG_SRCS_VALIDATE = abdl.compile("""->'config_srcs':$list->src:$str""", {'list': list, 'str': str}) - -class ConfigProperty(Enum): - TITLE = 1 - BASE_URL = 2 - -class PCTP: - def __init__(self, project_commit, uri, branch, options): - self.project_commit = project_commit - self.uri = uri - self.branch = branch - self.options = options - -class ConfigSource(abc.ABC): - @abc.abstractmethod - def update(self): - """Refreshes the config if necessary.""" - pass - - @abc.abstractmethod - def exists(self): - """Returns whether the config exists.""" - pass - - def is_domain_blocked(self, domain): - """Returns whether the given domain is blocked.""" - return False - - def get_remote_config_sources(self): - """Yields URI strings for additional configs. - - Yields: - str: A remote config URI. - - """ - yield from () - - @abc.abstractmethod - def get_project_commit_tree_paths(self): - """Yields (project, URI, branch, options) tuples. - - Yields: - tuple of (str, str, str, dict): A project commit-tree path. - - Composed of a project commit hash, a repo URI, a branch name - and a dict of options respectively. - - """ - pass - - def get_supported_properties(self): - """Returns an iterable of properties supported by this config source. - - Returns: - Iterable of ConfigProperty: Supported properties. - - """ - return () - - def get_property_value(self, prop): - """Returns the value associated with the given property. - - Args: - prop (ConfigProperty): The property. - - Returns: - The value associated with the given property. - - Raises: - ValueError: If the property is not supported by this config - source. - - """ - raise ValueError - -class FileConfigSource(ConfigSource): - SUPPORTED_PROPERTIES = {} - - def __init__(self, filename): - self.file_exists = False - self.last_updated = None - self.filename = filename - self.tomlobj = None - - def update(self): - try: - updtime = self.last_updated - self.last_updated = os.stat(self.filename).st_mtime - if not self.file_exists or updtime != self.last_updated: - with open(self.filename) as f: - self.tomlobj = qtoml.load(f) - self.file_exists = True - except (OSError, UnicodeDecodeError, qtoml.decoder.TOMLDecodeError) as e: - return e - - def exists(self): - return self.file_exists - - def get_remote_config_sources(self): - for r in CONFIG_SRCS_SANITIZE.match(self.tomlobj): - yield r['src'][1] - - def get_project_commit_tree_paths(self): - for r in CONFIG_REPOS_SANITIZE.match(self.tomlobj): - yield PCTP(r['commit'][0], r['url'][0], r['branch'][0], r['branch'][1]) - - @classmethod - def get_supported_properties(cls): - return cls.SUPPORTED_PROPERTIES - -class RemoteConfigSource(ConfigSource): - def __init__(self, uri): - self.uri = uri - self.tomlobj = None - self.remote_exists = False - - def update(self): - raise NotImplementedError - - def exists(self): - return self.remote_exists - - def get_project_commit_tree_paths(self): - for r in CONFIG_REPOS_SANITIZE.match(self.tomlobj): - yield (r['commit'][0], r['url'][0], r['branch'][0], r['branch'][1]) - -class ConfigManager: - """A ConfigManager takes care of managing config sources and - collecting their details.""" - def __init__(self, sources): - self.sources = sources - - def update(self): - for source in self.sources: - try: - source.update() - except: - raise # TODO - - @classmethod - def new_default(cls): - from ganarchy import config_home, config_dirs - base_src = [FileConfigSource(config_home + "/config.toml")] - extra_srcs = [FileConfigSource(d + "/config.toml") for d in config_dirs] - return cls(base_src + extra_srcs) - -# class Config: -# def __init__(self, toml_file, base=None, remove=True): -# self.projects = defaultdict(lambda: defaultdict(lambda: defaultdict(lambda: defaultdict(dict)))) -# config_data = qtoml.load(toml_file) -# self.remote_configs = config_data.get('config_srcs', []) -# self.title = config_data.get('title', '') -# self.base_url = config_data.get('base_url', '') -# # TODO blocked domains (but only read them from config_data if remove is True) -# self.blocked_domains = [] -# self.blocked_domain_suffixes = [] -# self.blocked_domains.sort() -# self.blocked_domain_suffixes.sort(key=lambda x: x[::-1]) -# # FIXME remove duplicates and process invalid entries -# self.blocked_domains = tuple(self.blocked_domains) -# self.blocked_domain_suffixes = tuple(self.blocked_domain_suffixes) # MUST be tuple -# # TODO re.compile("(^" + "|^".join(map(re.escape, domains)) + "|" + "|".join(map(re.escape, suffixes) + ")$") -# if base: -# # FIXME is remove=remove the right thing to do? -# self._update_projects(base.projects, remove=remove, sanitize=False) # already sanitized -# projects = config_data.get('projects', {}) -# self._update_projects(projects, remove=remove) -# -# def _update_projects(self, projects, remove, sanitize=True): -# m = (m_ganarchy_config.CONFIG_PATTERN_SANITIZE if sanitize else m_ganarchy_config.CONFIG_PATTERN).match(projects) -# for v in m: -# commit, repo_url, branchname, options = v['commit'][0], v['url'][0], v['branch'][0], v['branch'][1] -# try: -# u = urlparse(repo_url) -# if not u: -# raise ValueError -# # also raises for invalid ports, see https://docs.python.org/3/library/urllib.parse.html#urllib.parse.urlparse -# # "Reading the port attribute will raise a ValueError if an invalid port is specified in the URL. [...]" -# if u.port == 0: -# raise ValueError -# if u.scheme not in ('http', 'https'): -# raise ValueError -# if (u.hostname in self.blocked_domains) or (u.hostname.endswith(self.blocked_domain_suffixes)): -# raise ValueError -# except ValueError: -# continue -# if branchname == "HEAD": -# branchname = None -# active = options.get('active', None) -# if active not in (True, False): -# continue -# branch = self.projects[commit][repo_url][branchname] -# branch['active'] = active or (branch.get('active', False) and not remove) diff --git a/ganarchy/data.py b/ganarchy/data.py new file mode 100644 index 0000000..bc5a1b6 --- /dev/null +++ b/ganarchy/data.py @@ -0,0 +1,585 @@ +# This file is part of GAnarchy - decentralized project hub +# Copyright (C) 2019 Soni L. +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . + +"""This module handles GAnarchy's data and config sources. + +A data source can be either a config source or a repo list source, but be +careful: they use identical syntax, but have different semantics! Mistaking +a repo list source for a config source is a recipe for security bugs! +""" + +import abc +import itertools +import os +import re +import time + +import abdl +import abdl.exceptions +import qtoml +import requests + +from enum import Enum +from urllib.parse import urlparse + +# TODO move elsewhere +class URIPredicate(abdl.predicates.Predicate): + def __init__(self, ports=range(1,65536), schemes=('https',)): + self.ports = ports + self.schemes = schemes + + def accept(self, obj): + try: + u = urlparse(obj) + if not u: + return False + # also raises for invalid ports, see https://docs.python.org/3/library/urllib.parse.html#urllib.parse.urlparse + # "Reading the port attribute will raise a ValueError if an invalid port is specified in the URL. [...]" + if u.port is not None and u.port not in self.ports: + return False + if u.scheme not in self.schemes: + return False + except ValueError: + return False + return True + +class CommitPredicate(abdl.predicates.Predicate): + def __init__(self, sha256ready=True): + if sha256ready: + self.re = re.compile(r"^[0-9a-fA-F]{40}$|^[0-9a-fA-F]{64}$") + else: + self.re = re.compile(r"^[0-9a-fA-F]{40}$") + + def accept(self, obj): + return self.re.match(obj) + +# sanitize = skip invalid entries +# validate = error on invalid entries +# LEGACY. DO NOT USE. +CONFIG_REPOS_SANITIZE = abdl.compile("""->'projects'?:?$dict + ->commit[:?$commit]:?$dict + ->url[:?$str:?$uri]:?$dict + ->branch:?$dict(->'active'?:?$bool)""", + dict(bool=bool, dict=dict, str=str, uri=URIPredicate(), commit=CommitPredicate())) + +CONFIG_TITLE_SANITIZE = abdl.compile("""->title'title'?:?$str""", dict(str=str)) +CONFIG_BASE_URL_SANITIZE = abdl.compile("""->base_url'base_url'?:?$str:?$uri""", dict(str=str, uri=URIPredicate())) + +# modern matchers, raise ValidationError if the data doesn't exist. +# they still skip "bad" entries, just like the old matchers. + +_MATCHER_REPOS = abdl.compile("""->'projects':$dict + ->commit[:?$commit]:?$dict + ->url[:?$str:?$uri]:?$dict + ->branch:?$dict(->'active'?:?$bool)""", + dict(bool=bool, dict=dict, str=str, uri=URIPredicate(), commit=CommitPredicate())) +_MATCHER_REPO_LIST_SRCS = abdl.compile("""->'repo_list_srcs':$dict + ->src[:?$str:?$uri]:?$dict + (->'active'?:?$bool)""", + dict(bool=bool, list=list, dict=dict, str=str, uri=URIPredicate(schemes=('https','file',)))) +# TODO +#_MATCHER_ALIASES = abdl.compile("""->'project_settings':$dict +# ->commit/[0-9a-fA-F]{40}|[0-9a-fA-F]{64}/?:?$dict +# """, {'dict': dict}) # FIXME check for aliases, might require changes to abdl + +_MATCHER_TITLE = abdl.compile("""->title'title':$str""", dict(str=str)) +_MATCHER_BASE_URL = abdl.compile("""->base_url'base_url':$str:$uri""", dict(str=str, uri=URIPredicate())) + +class OverridableProperty(abc.ABC): + """An overridable property, with options. + + Attributes: + options (dict): Options. + """ + + @abc.abstractmethod + def as_key(self): + """Returns an opaque representation of this OverridablePRoperty + suitable for use as a dict key. + + The returned object is not suitable for other purposes. + """ + return () + + @property + def active(self): + """Whether this property is active. + """ + return self.options.get('active', False) + +class PCTP(OverridableProperty): + """A Project Commit-Tree Path. + + Attributes: + project_commit (str): The project commit. + uri (str): The URI of a fork of the project. + branch (str): The branch name, or "HEAD" for the default branch. + options (dict): A dict of fork-specific options. + """ + + def __init__(self, project_commit, uri, branch, options): + self.project_commit = project_commit + self.uri = uri + self.branch = branch + self.options = options + + def as_key(self): + return (self.project_commit, self.uri, self.branch, ) + +class RepoListSource(OverridableProperty): + """A source for a repo list. + + Attributes: + uri (str): The URI of the repo list. + options (dict): A dict of repo list-specific options. + """ + + def __init__(self, uri, options): + self.uri = uri + self.options = options + + def as_key(self): + return (self.uri, ) + +class DataProperty(Enum): + """Represents values that can be returned by a data source. + + See documentation for DataSource get_property_value and + DataSource get_property_values for more details. + """ + INSTANCE_TITLE = (1, str) + INSTANCE_BASE_URL = (2, str) + VCS_REPOS = (3, PCTP) + REPO_LIST_SOURCES = (4, RepoListSource) + + def get_type(self): + """Returns the expected type for values from this DataProperty. + """ + return self.value[1] + +class PropertyError(LookupError): + """Raised to indicate improper use of a DataProperty. + """ + pass + +class DataSource(abc.ABC): + @abc.abstractmethod + def update(self): + """Refreshes the data associated with this source, if necessary. + """ + pass + + @abc.abstractmethod + def exists(self): + """Returns whether this source has usable data. + """ + pass + + @abc.abstractmethod + def get_supported_properties(self): + """Returns an iterable of properties supported by this data source. + + Returns: + Iterable of DataProperty: Supported properties. + + """ + return () + + def get_property_value(self, prop): + """Returns the value associated with the given property. + + If duplicated, an earlier value should override a later value. + + Args: + prop (DataProperty): The property. + + Returns: + The value associated with the given property. + + Raises: + PropertyError: If the property is not supported by this data + source. + LookupError: If the property is supported, but isn't available. + ValueError: If the property doesn't have exactly one value. + """ + iterator = self.get_property_values(prop) + try: + # note: unpacking + ret, = iterator + except LookupError as exc: raise RuntimeError from exc # don't accidentally swallow bugs in the iterator + return ret + + @abc.abstractmethod + def get_property_values(self, prop): + """Yields the values associated with the given property. + + If duplicated, earlier values should override later values. + + Args: + prop (DataProperty): The property. + + Yields: + The values associated with the given property. + + Raises: + PropertyError: If the property is not supported by this data + source. + LookupError: If the property is supported, but isn't available. + + """ + raise PropertyError + +class ObjectDataSource(DataSource): + """A DataSource backed by a Python object. + + Updates to the backing object will be immediately reflected in this + DataSource. + """ + _SUPPORTED_PROPERTIES = { + DataProperty.INSTANCE_TITLE: lambda obj: (d['title'][1] for d in _MATCHER_TITLE.match(obj)), + DataProperty.INSTANCE_BASE_URL: lambda obj: (d['base_url'][1] for d in _MATCHER_BASE_URL.match(obj)), + DataProperty.VCS_REPOS: lambda obj: (PCTP(r['commit'][0], r['url'][0], r['branch'][0], r['branch'][1]) for r in _MATCHER_REPOS.match(obj)), + DataProperty.REPO_LIST_SOURCES: lambda obj: (RepoListSource(d['src'][0], d['src'][1]) for d in _MATCHER_REPO_LIST_SRCS.match(obj)), + } + + def __init__(self, obj): + self._obj = obj + + def update(self): + pass + + def exists(self): + return True + + def get_property_values(self, prop): + try: + factory = self.get_supported_properties()[prop] + except KeyError as exc: raise PropertyError from exc + iterator = factory(self._obj) + try: + first = next(iterator) + except StopIteration: return (x for x in ()) + except abdl.exceptions.ValidationError as exc: raise LookupError from exc + except LookupError as exc: raise RuntimeError from exc # don't accidentally swallow bugs in the iterator + return itertools.chain([first], iterator) + + @classmethod + def get_supported_properties(cls): + return cls._SUPPORTED_PROPERTIES + +class LocalDataSource(ObjectDataSource): + def __init__(self, filename): + super().__init__({}) + self.file_exists = False + self.last_updated = None + self.filename = filename + + def update(self): + try: + updtime = self.last_updated + self.last_updated = os.stat(self.filename).st_mtime + if not self.file_exists or updtime != self.last_updated: + with open(self.filename) as f: + self._obj = qtoml.load(f) + self.file_exists = True + except (OSError, UnicodeDecodeError, qtoml.decoder.TOMLDecodeError) as e: + self.file_exists = False + self.last_updated = None + self._obj = {} + return e + + def exists(self): + return self.file_exists + + def __repr__(self): + return "LocalDataSource({!r})".format(self.filename) + +class RemoteDataSource(ObjectDataSource): + def __init__(self, uri): + super().__init__({}) + self.uri = uri + self.remote_exists = False + self.next_update = 0 + + def update(self): + if self.next_update > time.time(): + return + # I long for the day when toml has a registered media type + response = requests.get(self.uri, headers={'user-agent': 'ganarchy/0.0.0', 'accept': '*/*'}) + self.remote_exists = response.status_code == 200 + seconds = 3600 + if (refresh := response.headers.get('Refresh', None)) is not None: + try: + seconds = int(refresh) + except ValueError: + refresh = refresh.split(';', 1) + try: + seconds = int(refresh[0]) + except ValueError: + pass + self.next_update = time.time() + seconds + if self.remote_exists: + response.encoding = 'utf-8' + try: + self._obj = qtoml.loads(response.text) + except (UnicodeDecodeError, qtoml.decoder.TOMLDecodeError) as e: + self._obj = {} + return e + else: + return response + + def exists(self): + return self.remote_exists + + def __repr__(self): + return "RemoteDataSource({!r})".format(self.uri) + +class DefaultsDataSource(ObjectDataSource): + """Provides a way for contributors to define/encourage some default + settings. + + In particular, enables contributors to have a say in default domain + blocks. + """ + DEFAULTS = {} + + def __init__(self): + super().__init__(self.DEFAULTS) + + def exists(self): + return True + + def update(self): + return + + def __repr__(self): + return "DefaultsDataSource()" + + +class ConfigManager(DataSource): + """A ConfigManager takes care of managing config sources and + collecting their details. + + Args: + sources (list of DataSource): The config sources to be managed. + """ + def __init__(self, sources): + self.sources = sources + + @classmethod + def new_default(cls): + from ganarchy import config_home, config_dirs + srcs = [LocalDataSource(d + "/config.toml") for d in [config_home] + config_dirs] + return cls(srcs) + + def exists(self): + return True + + def update(self): + excs = [] + for source in self.sources: + excs.append(source.update()) + return excs + + def get_supported_properties(self): + return DataProperty + + def get_property_values(self, prop): + if prop not in self.get_supported_properties(): + raise PropertyError + elif prop == DataProperty.VCS_REPOS: + return self._get_vcs_repos() + elif prop == DataProperty.REPO_LIST_SOURCES: + return self._get_repo_list_sources() + else: + # short-circuiting, as these are only supposed to return a single value + for source in self.sources: + try: + return source.get_property_values(prop) + except PropertyError: + pass + except LookupError: + pass + raise LookupError + + def _get_vcs_repos(self): + for source in self.sources: + if DataProperty.VCS_REPOS in source.get_supported_properties(): + try: + iterator = source.get_property_values(DataProperty.VCS_REPOS) + except LookupError: + pass + else: + yield from iterator + + def _get_repo_list_sources(self): + for source in self.sources: + if DataProperty.REPO_LIST_SOURCES in source.get_supported_properties(): + try: + iterator = source.get_property_values(DataProperty.REPO_LIST_SOURCES) + except LookupError: + pass + else: + yield from iterator + +class RepoListManager(DataSource): + """A RepoListManager takes care of managing repo lists. + + Args: + config_manager (DataSource): The config manager from which the repo + lists come. + """ + def __init__(self, config_manager): + self.config_manager = EffectiveSource(config_manager) + self.sources = [self.config_manager] + + def exists(self): + return True + + def update(self): + excs = [self.config_manager.update()] + if DataProperty.REPO_LIST_SOURCES in self.config_manager.get_supported_properties(): + self.sources = [self.config_manager] + try: + it = self.config_manager.get_property_values(DataProperty.REPO_LIST_SOURCES) + except LookupError: + pass + else: + self.sources.extend(RemoteDataSource(rls.uri) for rls in it if rls.active) + for source in self.sources: + excs.append(source.update()) + return excs + + def get_supported_properties(self): + return {DataProperty.VCS_REPOS} + + def get_property_values(self, prop): + if prop not in self.get_supported_properties(): + raise PropertyError + assert prop == DataProperty.VCS_REPOS + # must raise exceptions *now* + # not when the generator runs + return self._get_vcs_repos(self.config_manager.get_property_values(DataProperty.VCS_REPOS)) + + def _get_vcs_repos(self, it): + assert self.config_manager == self.sources[0] + # config manager may override repo lists + yield from it + for source in self.sources: + if DataProperty.VCS_REPOS in source.get_supported_properties(): + try: + iterator = source.get_property_values(DataProperty.VCS_REPOS) + except LookupError: + pass + else: + for pctp in iterator: + # but repo lists aren't allowed to override anything + if pctp.active: + yield pctp + +class EffectiveSource(DataSource): + """Wraps another ``DataSource`` and yields "unique" results suitable + for general use. + + Methods on this class, in particular ``get_property_values``, handle + ``OverridableProperty`` overrides both to avoid code duplication and + so the user doesn't have to. + + Args: + raw_source (DataSource): The raw backing source. + """ + def __init__(self, raw_source): + self.raw_source = raw_source + + def exists(self): + return self.raw_source.exists() + + def update(self): + return self.raw_source.update() + + def get_property_value(self, prop): + return self.raw_source.get_property_value(prop) + + def get_supported_properties(self): + return self.raw_source.get_supported_properties() + + def get_property_values(self, prop): + # must raise exceptions *now* + # not when the generator runs + return self._wrap_values(prop, self.raw_source.get_property_values(prop)) + + def _wrap_values(self, prop, it): + if issubclass(prop.get_type(), OverridableProperty): + seen = {} + for v in it: + k = v.as_key() + if k in seen: + continue + seen[k] = v + yield v + else: + yield from it + + def __repr__(self): + return "EffectiveSource({!r})".format(self.raw_source) + +# class Config: +# def __init__(self, toml_file, base=None, remove=True): +# self.projects = defaultdict(lambda: defaultdict(lambda: defaultdict(lambda: defaultdict(dict)))) +# config_data = qtoml.load(toml_file) +# self.remote_configs = config_data.get('config_srcs', []) +# self.title = config_data.get('title', '') +# self.base_url = config_data.get('base_url', '') +# # TODO blocked domains (but only read them from config_data if remove is True) +# self.blocked_domains = [] +# self.blocked_domain_suffixes = [] +# self.blocked_domains.sort() +# self.blocked_domain_suffixes.sort(key=lambda x: x[::-1]) +# # FIXME remove duplicates and process invalid entries +# self.blocked_domains = tuple(self.blocked_domains) +# self.blocked_domain_suffixes = tuple(self.blocked_domain_suffixes) # MUST be tuple +# # TODO re.compile("(^" + "|^".join(map(re.escape, domains)) + "|" + "|".join(map(re.escape, suffixes) + ")$") +# if base: +# # FIXME is remove=remove the right thing to do? +# self._update_projects(base.projects, remove=remove, sanitize=False) # already sanitized +# projects = config_data.get('projects', {}) +# self._update_projects(projects, remove=remove) +# +# def _update_projects(self, projects, remove, sanitize=True): +# m = (m_ganarchy_config.CONFIG_PATTERN_SANITIZE if sanitize else m_ganarchy_config.CONFIG_PATTERN).match(projects) +# for v in m: +# commit, repo_url, branchname, options = v['commit'][0], v['url'][0], v['branch'][0], v['branch'][1] +# try: +# u = urlparse(repo_url) +# if not u: +# raise ValueError +# # also raises for invalid ports, see https://docs.python.org/3/library/urllib.parse.html#urllib.parse.urlparse +# # "Reading the port attribute will raise a ValueError if an invalid port is specified in the URL. [...]" +# if u.port == 0: +# raise ValueError +# if u.scheme not in ('http', 'https'): +# raise ValueError +# if (u.hostname in self.blocked_domains) or (u.hostname.endswith(self.blocked_domain_suffixes)): +# raise ValueError +# except ValueError: +# continue +# if branchname == "HEAD": +# branchname = None +# active = options.get('active', None) +# if active not in (True, False): +# continue +# branch = self.projects[commit][repo_url][branchname] +# branch['active'] = active or (branch.get('active', False) and not remove) -- cgit 1.4.1