From bdf5cb78b9d7daf702e0e05660580ab311473fdc Mon Sep 17 00:00:00 2001 From: SoniEx2 Date: Thu, 23 Jul 2020 13:15:56 -0300 Subject: Restore functionality Everything works again, as far as I can tell. --- MISTAKES.md | 1 + ganarchy/cli/run_targets.py | 29 +++--- ganarchy/core.py | 184 ++++++++++++++++++++++++++++++--------- ganarchy/data.py | 55 ++---------- ganarchy/db.py | 8 +- ganarchy/git.py | 72 +++++++++++---- ganarchy/templating/templates.py | 5 +- 7 files changed, 224 insertions(+), 130 deletions(-) diff --git a/MISTAKES.md b/MISTAKES.md index e6db15e..6f8bf51 100644 --- a/MISTAKES.md +++ b/MISTAKES.md @@ -2,3 +2,4 @@ Mistakes were made ------------------ 1. All config and repo list keys should've been under a "ganarchy" group. This can be changed, but might be painful, especially for remote sources. +2. Everything should've been URI. But instead I ended up with mixed URL/URI. diff --git a/ganarchy/cli/run_targets.py b/ganarchy/cli/run_targets.py index 5e2f8c0..401665e 100644 --- a/ganarchy/cli/run_targets.py +++ b/ganarchy/cli/run_targets.py @@ -42,9 +42,9 @@ from ganarchy.templating import environment # pass @cli.main.command() -@click.option('--update/--no-update', default=True) +@click.option('--dry-run/--no-dry-run', '--no-update/--update', default=False) @click.argument('project', required=False) -def cron_target(update, project): +def cron_target(dry_run, project): """Runs ganarchy as a cron target. "Deprecated". Useful if you want full control over how GAnarchy @@ -95,27 +95,26 @@ def cron_target(update, project): click.echo(template.render(ganarchy=instance)) return - # FIXME this should be in core, as it belongs to core logic! - entries = [] + p = core.Project(database, project) + p.load_repos() + generate_html = [] - c = conn.cursor() - p = Project(conn, project, list_repos=True) - results = p.update(update) + results = p.update(dry_run=dry_run) + #if not p.exists: + # ... for (repo, count) in results: if count is not None: - entries.append((repo.url, count, repo.hash, repo.branch, project)) generate_html.append((repo.url, repo.message, count, repo.branch)) - # sort stuff twice because reasons - entries.sort(key=lambda x: x[1], reverse=True) - generate_html.sort(key=lambda x: x[2], reverse=True) - if update: - c.executemany('''INSERT INTO "repo_history" ("url", "count", "head_commit", "branch", "project") VALUES (?, ?, ?, ?, ?)''', entries) - conn.commit() + else: + click.echo(repo.errormsg, err=True) html_entries = [] for (url, msg, count, branch) in generate_html: - history = c.execute('''SELECT "count" FROM "repo_history" WHERE "url" = ? AND "branch" IS ? AND "project" IS ? ORDER BY "entry" ASC''', (url, branch, project)).fetchall() + history = database.list_repobranch_activity(project, url, branch) # TODO process history into SVG + # TODO move this into a separate system + # (e.g. ``if project.startswith("svg-"):``) html_entries.append((url, msg, "", branch)) + template = env.get_template('project.html') click.echo(template.render(project_title = p.title, project_desc = p.description, diff --git a/ganarchy/core.py b/ganarchy/core.py index 3bdd820..c225735 100644 --- a/ganarchy/core.py +++ b/ganarchy/core.py @@ -14,6 +14,11 @@ # You should have received a copy of the GNU Affero General Public License # along with this program. If not, see . +"""Core logic of GAnarchy. +""" + +import hashlib +import hmac import re from urllib import parse @@ -26,11 +31,34 @@ import ganarchy.data GIT = ganarchy.git.Git(ganarchy.dirs.CACHE_HOME) class Repo: - def __init__(self, dbconn, project_commit, url, branch, head_commit, list_metadata=False): + """A GAnarchy repo. + + Args: + dbconn (ganarchy.db.Database): The database connection. + project_commit (str): The project commit. + url (str): The git URL. + branch (str): The branch. + head_commit (str): The last known head commit. + + Attributes: + branch (str or None): The remote git branch. + branchname (str): The local git branch. + """ + # TODO fill in Attributes. + + def __init__(self, dbconn, project_commit, url, branch, head_commit): self.url = url self.branch = branch self.project_commit = project_commit + self.errormsg = None self.erroring = False + self.message = None + self.hash = None + self.branchname = None + self.head = None + + if not self._check_branch(): + return if not branch: self.branchname = "gan" + hashlib.sha256(url.encode("utf-8")).hexdigest() @@ -46,30 +74,59 @@ class Repo: self.hash = GIT.get_hash(self.branchname) except ganarchy.git.GitError: self.erroring = True - self.hash = None - self.message = None - if list_metadata: - try: - self.update_metadata() - except ganarchy.git.GitError: - self.erroring = True - pass + self.refresh_metadata() - def update_metadata(self): - self.message = GIT.get_commit_message(self.branchname) + def _check_branch(self): + """Checks if ``self.branch`` is a valid git branch name, or None. Sets + ``self.errormsg`` and ``self.erroring`` accordingly. - def update(self, updating=True): - """Updates the git repo, returning new metadata. + Returns: + bool: True if valid, False otherwise. """ - if updating: + if not self.branch: + return True + try: + GIT.check_branchname(self.branch) + return True + except ganarchy.git.GitError as e: + self.erroring = True + self.errormsg = e + return False + + def refresh_metadata(self): + """Refreshes repo metadata. + """ + if not self._check_branch(): + return + try: + self.message = GIT.get_commit_message(self.branchname) + except ganarchy.git.GitError as e: + self.erroring = True + self.errormsg = e + + # FIXME maybe this shouldn't be "public"? + # reasoning: this update() isn't reflected in the db. + # but this might be handy for dry runs. + # alternatively: change the return to be the new head commit, + # and update things accordingly. + def update(self, *, dry_run=False): + """Updates the git repo, returning a commit count. + + Args: + dry_run (bool): To simulate an update without doing anything. + In particular, without fetching commits. + """ + if not self._check_branch(): + return None + if not dry_run: try: GIT.force_fetch(self.url, self.head, self.branchname) except ganarchy.git.GitError as e: # This may error for various reasons, but some # are important: dead links, etc - click.echo(e.output, err=True) self.erroring = True + self.errormsg = e return None pre_hash = self.hash try: @@ -78,44 +135,59 @@ class Repo: # This should never happen, but maybe there's some edge cases? # TODO check self.erroring = True + self.errormsg = e return None self.hash = post_hash if not pre_hash: pre_hash = post_hash count = GIT.get_count(pre_hash, post_hash) try: - if updating: - GIT.check_history(self.branchname, self.project_commit) - self.update_metadata() + GIT.check_history(self.branchname, self.project_commit) + self.refresh_metadata() return count except ganarchy.git.GitError as e: - click.echo(e, err=True) self.erroring = True + self.errormsg = e return None class Project: - # FIXME add docs + """A GAnarchy project. + + Args: + dbconn (ganarchy.db.Database): The database connection. + project_commit (str): The project commit. - def __init__(self, dbconn, project_commit, list_repos=False): + Attributes: + commit (str): The project commit. + repos (list, optional): Repos associated with this project. + title (str, optional): Title of the project. + description (str, optional): Description of the project. + commit_body (str, optional): Raw commit message for title and + description. + exists (bool): Whether the project exists in our git cache. + """ + + def __init__(self, dbconn, project_commit): self.commit = project_commit self.refresh_metadata() self.repos = None - if list_repos: - self.list_repos(dbconn) + self._dbconn = dbconn + + def load_repos(self): + """Loads the repos into this project. - def list_repos(self, dbconn): + If repos have already been loaded, re-loads them. + """ repos = [] - with dbconn: - for (e, url, branch, head_commit) in dbconn.execute('''SELECT "max"("e"), "url", "branch", "head_commit" FROM (SELECT "max"("T1"."entry") "e", "T1"."url", "T1"."branch", "T1"."head_commit" FROM "repo_history" "T1" - WHERE (SELECT "active" FROM "repos" "T2" WHERE "url" = "T1"."url" AND "branch" IS "T1"."branch" AND "project" IS ?1) - GROUP BY "T1"."url", "T1"."branch" - UNION - SELECT null, "T3"."url", "T3"."branch", null FROM "repos" "T3" WHERE "active" AND "project" IS ?1) - GROUP BY "url" ORDER BY "e"''', (self.commit,)): - repos.append(Repo(dbconn, self.commit, url, branch, head_commit)) + for url, branch, head_commit in self._dbconn.list_repobranches(self.commit): + repos.append( + Repo(self._dbconn, self.commit, url, branch, head_commit) + ) self.repos = repos def refresh_metadata(self): + """Refreshes project metadata. + """ try: project = GIT.get_commit_message(self.commit) project_title, project_desc = (lambda x: x.groups() if x is not None else ('', None))(re.fullmatch('^\\[Project\\]\s+(.+?)(?:\n\n(.+))?$', project, flags=re.ASCII|re.DOTALL|re.IGNORECASE)) @@ -133,14 +205,43 @@ class Project: self.title = None self.description = None - def update(self, updating=True): + def update(self, *, dry_run=False): + """Updates the project and its repos. + """ # TODO? check if working correctly - results = [(repo, repo.update(updating)) for repo in self.repos] + results = [] + if self.repos is not None: + for repo in self.repos: + results.append((repo, repo.update(dry_run=dry_run))) self.refresh_metadata() + if self.repos is not None: + results.sort(key=lambda x: x[1] or -1, reverse=True) + if not dry_run: + entries = [] + for (repo, count) in results: + if count is not None: + entries.append(( + self.commit, + repo.url, + repo.branch, + repo.hash, + count + )) + self._dbconn.insert_activities(entries) return results class GAnarchy: - # FIXME add docs + """A GAnarchy instance. + + Args: + dbconn (ganarchy.db.Database): The database connection. + config (ganarchy.data.DataSource): The (effective) config. + + Attributes: + base_url (str): Instance base URL. + title (str): Instance title. + projects (list, optional): Projects associated with this instance. + """ def __init__(self, dbconn, config): try: @@ -161,12 +262,15 @@ class GAnarchy: self.title = title self.base_url = base_url self.projects = None - self.dbconn = dbconn + self._dbconn = dbconn - def load_projects(self, list_repos=False): - # FIXME add docs, get rid of list_repos + def load_projects(self): + """Loads the projects into this GAnarchy instance. + + If projects have already been loaded, re-loads them. + """ projects = [] - for project in self.dbconn.list_projects(): - projects.append(Project(self.dbconn, project, list_repos=list_repos)) + for project in self._dbconn.list_projects(): + projects.append(Project(self._dbconn, project)) projects.sort(key=lambda p: p.title or "") # sort projects by title self.projects = projects diff --git a/ganarchy/data.py b/ganarchy/data.py index 730a5a9..1f4cd19 100644 --- a/ganarchy/data.py +++ b/ganarchy/data.py @@ -134,14 +134,17 @@ class PCTP(OverridableProperty): Attributes: project_commit (str): The project commit. uri (str): The URI of a fork of the project. - branch (str): The branch name, or "HEAD" for the default branch. + branch (str): The branch name, or None for the default branch. options (dict): A dict of fork-specific options. """ def __init__(self, project_commit, uri, branch, options): self.project_commit = project_commit self.uri = uri - self.branch = branch + if branch == "HEAD": + self.branch = None + else: + self.branch = branch or None self.options = options def as_key(self): @@ -549,51 +552,3 @@ class EffectiveSource(DataSource): def __repr__(self): return "EffectiveSource({!r})".format(self.raw_source) - -# class Config: -# def __init__(self, toml_file, base=None, remove=True): -# self.projects = defaultdict(lambda: defaultdict(lambda: defaultdict(lambda: defaultdict(dict)))) -# config_data = qtoml.load(toml_file) -# self.remote_configs = config_data.get('config_srcs', []) -# self.title = config_data.get('title', '') -# self.base_url = config_data.get('base_url', '') -# # TODO blocked domains (but only read them from config_data if remove is True) -# self.blocked_domains = [] -# self.blocked_domain_suffixes = [] -# self.blocked_domains.sort() -# self.blocked_domain_suffixes.sort(key=lambda x: x[::-1]) -# # FIXME remove duplicates and process invalid entries -# self.blocked_domains = tuple(self.blocked_domains) -# self.blocked_domain_suffixes = tuple(self.blocked_domain_suffixes) # MUST be tuple -# # TODO re.compile("(^" + "|^".join(map(re.escape, domains)) + "|" + "|".join(map(re.escape, suffixes) + ")$") -# if base: -# # FIXME is remove=remove the right thing to do? -# self._update_projects(base.projects, remove=remove, sanitize=False) # already sanitized -# projects = config_data.get('projects', {}) -# self._update_projects(projects, remove=remove) -# -# def _update_projects(self, projects, remove, sanitize=True): -# m = (m_ganarchy_config.CONFIG_PATTERN_SANITIZE if sanitize else m_ganarchy_config.CONFIG_PATTERN).match(projects) -# for v in m: -# commit, repo_url, branchname, options = v['commit'][0], v['url'][0], v['branch'][0], v['branch'][1] -# try: -# u = urlparse(repo_url) -# if not u: -# raise ValueError -# # also raises for invalid ports, see https://docs.python.org/3/library/urllib.parse.html#urllib.parse.urlparse -# # "Reading the port attribute will raise a ValueError if an invalid port is specified in the URL. [...]" -# if u.port == 0: -# raise ValueError -# if u.scheme not in ('http', 'https'): -# raise ValueError -# if (u.hostname in self.blocked_domains) or (u.hostname.endswith(self.blocked_domain_suffixes)): -# raise ValueError -# except ValueError: -# continue -# if branchname == "HEAD": -# branchname = None -# active = options.get('active', None) -# if active not in (True, False): -# continue -# branch = self.projects[commit][repo_url][branchname] -# branch['active'] = active or (branch.get('active', False) and not remove) diff --git a/ganarchy/db.py b/ganarchy/db.py index 37509a5..328b682 100644 --- a/ganarchy/db.py +++ b/ganarchy/db.py @@ -160,7 +160,7 @@ class Database: c = self.conn.cursor() c.execute(''' CREATE TEMPORARY TABLE "repos" ( - "url" TEXT PRIMARY KEY, + "url" TEXT, "active" INT, "branch" TEXT, "project" TEXT @@ -222,7 +222,7 @@ class Database: ''', activities ) - conn.commit() + self.conn.commit() c.close() def list_projects(self): @@ -282,7 +282,7 @@ class Database: FROM "repos" "T3" WHERE "active" AND "project" IS ?1 ) - GROUP BY "url" + GROUP BY "url", "branch" ORDER BY "e" ''', (project_commit,) @@ -313,7 +313,7 @@ class Database: AND "project" IS ? ORDER BY "entry" ASC ''', - (url, branch, project) + (uri, branch, project_commit) ).fetchall() history = [x for [x] in history] c.close() diff --git a/ganarchy/git.py b/ganarchy/git.py index a658022..f8ccfcd 100644 --- a/ganarchy/git.py +++ b/ganarchy/git.py @@ -23,7 +23,7 @@ import subprocess -class GitError(LookupError): +class GitError(Exception): """Raised when a git operation fails, generally due to a missing commit or branch, or network connection issues. """ @@ -54,13 +54,39 @@ class Git: GitError: If an error occurs. """ try: - subprocess.check_call( + subprocess.run( self.base + ("merge-base", "--is-ancestor", commit, local_head), - stdout=subprocess.DEVNULL, - stderr=subprocess.DEVNULL + check=True, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE ) except subprocess.CalledProcessError as e: - raise GitError from e + raise GitError("check history") from e + + def check_branchname(self, branchname): + """Checks if the given branchname is a valid branch name. + Raises if it isn't. + + Args: + branchname (str): Name of branch. + + Raises: + GitError: If an error occurs. + """ + try: + # TODO check that this rstrip is safe + out = subprocess.run( + self.base + ("check-ref-format", "--branch", branchname), + check=True, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE + ).stdout.decode("utf-8").rstrip('\r\n') + # protect against @{-1}/@{-n} ("previous checkout operation") + # is also fairly future-proofed, I hope? + if out != branchname: + raise GitError("check branchname", out, branchname) + except subprocess.CalledProcessError as e: + raise GitError("check branchname") from e def force_fetch(self, url, remote_head, local_head): """Fetches a remote head into a local head. @@ -76,12 +102,14 @@ class Git: GitError: If an error occurs. """ try: - subprocess.check_output( + subprocess.run( self.base + ("fetch", "-q", url, "+" + remote_head + ":" + local_head), - stderr=subprocess.STDOUT + check=True, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE ) except subprocess.CalledProcessError as e: - raise GitError from e + raise GitError(e.output) from e def get_count(self, first_hash, last_hash): """Returns a count of the commits added since ``first_hash`` @@ -96,10 +124,12 @@ class Git: if an error occurs. """ try: - res = subprocess.check_output( + res = subprocess.run( self.base + ("rev-list", "--count", first_hash + ".." + last_hash, "--"), - stderr=subprocess.DEVNULL - ).decode("utf-8").strip() + check=True, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE + ).stdout.decode("utf-8").strip() return int(res) except subprocess.CalledProcessError as e: return 0 @@ -114,12 +144,14 @@ class Git: GitError: If an error occurs. """ try: - return subprocess.check_output( + return subprocess.run( self.base + ("show", target, "-s", "--format=format:%H", "--"), - stderr=subprocess.DEVNULL - ).decode("utf-8") + check=True, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE + ).stdout.decode("utf-8") except subprocess.CalledProcessError as e: - raise GitError from e + raise GitError("") from e def get_commit_message(self, target): """Returns the commit message for a given target. @@ -131,9 +163,11 @@ class Git: GitError: If an error occurs. """ try: - return subprocess.check_output( + return subprocess.run( self.base + ("show", target, "-s", "--format=format:%B", "--"), - stderr=subprocess.DEVNULL - ).decode("utf-8", "replace") + check=True, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE + ).stdout.decode("utf-8", "replace") except subprocess.CalledProcessError as e: - raise GitError from e + raise GitError("") from e diff --git a/ganarchy/templating/templates.py b/ganarchy/templating/templates.py index 213a7fa..1435940 100644 --- a/ganarchy/templating/templates.py +++ b/ganarchy/templating/templates.py @@ -70,11 +70,12 @@ def get_template_loader(): {%- for project in database.list_projects() %} [projects.{{project}}] {%- for repo_url, branch, _head_commit in database.list_repobranches(project) %} -"{{repo_url|tomle}}".{% if branch == "HEAD" %}HEAD{% else %}"{{branch|tomle}}"{% endif %} = { active=true } +"{{repo_url|tomle}}".{% if not branch %}HEAD{% else %}"{{branch|tomle}}"{% endif %} = { active=true } {%- endfor %} {% endfor -%} """, - ## project.html FIXME + ## project.html + # FIXME convert to project.title/etc instead of project_title/etc. 'project.html': """ -- cgit 1.4.1