diff options
-rw-r--r-- | ganarchy/cli/run_targets.py | 40 | ||||
-rw-r--r-- | ganarchy/core.py | 93 | ||||
-rw-r--r-- | ganarchy/db.py | 2 | ||||
-rw-r--r-- | ganarchy/git.py | 32 |
4 files changed, 97 insertions, 70 deletions
diff --git a/ganarchy/cli/run_targets.py b/ganarchy/cli/run_targets.py index a74ef78..53e1b0f 100644 --- a/ganarchy/cli/run_targets.py +++ b/ganarchy/cli/run_targets.py @@ -19,6 +19,7 @@ import os import shutil +import threading import click @@ -31,8 +32,9 @@ from ganarchy.templating import environment @cli.main.command() @click.option('--keep-stale-projects/--no-keep-stale-projects', default=True) +@click.option('--n-threads', default=4) @click.argument('out', required=True, type=click.Path(file_okay=False, resolve_path=True)) -def run_once(out, keep_stale_projects): +def run_once(out, n_threads, keep_stale_projects): """Runs GAnarchy once. Processes any necessary updates and updates the output directory to match. @@ -66,13 +68,17 @@ def run_once(out, keep_stale_projects): # make sure it is a git repo core.GIT.create() + # default number of threads to use + #n_threads = 4 + if True: # reload config and repo data effective_repos.update() database = db.connect_database(effective_conf) + dblock = threading.Lock() database.load_repos(effective_repos) - instance = core.GAnarchy(database, effective_conf) + instance = core.GAnarchy(database, dblock, effective_conf) if not instance.base_url: click.echo("No base URL specified", err=True) @@ -87,11 +93,12 @@ def run_once(out, keep_stale_projects): os.makedirs(out + "/project", exist_ok=True) template_project = env.get_template('project.html') - for p in instance.projects: + n_threads = min(n_threads, len(instance.projects)) + def update_project(p, work_repo): p.load_repos() generate_html = [] - results = p.update() + results = p.update(work_repo) #if not p.exists: # ... for (repo, count) in results: @@ -105,7 +112,8 @@ def run_once(out, keep_stale_projects): click.echo(repo.errormsg, err=True) html_entries = [] for (url, msg, count, branch) in generate_html: - history = database.list_repobranch_activity(p.commit, url, branch) + with dblock: + history = database.list_repobranch_activity(p.commit, url, branch) # TODO process history into SVG # TODO move this into a separate system # (e.g. ``if project.startswith("svg-"):``) @@ -126,6 +134,20 @@ def run_once(out, keep_stale_projects): ganarchy = instance ).dump(f) + def run_thread(i, work_repo): + for p in instance.projects[i::n_threads]: + update_project(p, work_repo) + + with core.GIT.with_work_repos(n_threads) as work_repos: + threads = [] + for i, work_repo in enumerate(work_repos): + t = threading.Thread(target=lambda: run_thread(i, work_repo), name="ganarchy-fetch-{}".format(i)) + t.start() + threads.append(t) + for t in threads: + t.join() + + if True: # render the config template = env.get_template('index.toml') with open(out + "/index.toml", "w") as f: @@ -160,6 +182,7 @@ def cron_target(dry_run, project): # load config and repo data effective_repos.update() database = db.connect_database(effective_conf) + dblock = threading.Lock() database.load_repos(effective_repos) # load template environment @@ -183,7 +206,7 @@ def cron_target(dry_run, project): # make sure it is a git repo core.GIT.create() - instance = core.GAnarchy(database, effective_conf) + instance = core.GAnarchy(database, dblock, effective_conf) if not instance.base_url or not project: click.echo("No base URL or project commit specified", err=True) @@ -196,11 +219,12 @@ def cron_target(dry_run, project): click.echo(template.render(ganarchy=instance), nl=False) return - p = core.Project(database, project) + p = core.Project(database, dblock, project) p.load_repos() generate_html = [] - results = p.update(dry_run=dry_run) + with core.GIT.with_work_repos(1) as work_repos: + results = p.update(work_repos[0], dry_run=dry_run) #if not p.exists: # ... for (repo, count) in results: diff --git a/ganarchy/core.py b/ganarchy/core.py index 81b098c..872705c 100644 --- a/ganarchy/core.py +++ b/ganarchy/core.py @@ -56,7 +56,7 @@ class Repo: self.branchname = None self.head = None - if not self._check_branch(): + if not self._check_branch(GIT): return if not branch: @@ -74,9 +74,9 @@ class Repo: except ganarchy.git.GitError: self.erroring = True - self.refresh_metadata() + self.refresh_metadata(GIT) - def _check_branch(self): + def _check_branch(self, work_repo): """Checks if ``self.branch`` is a valid git branch name, or None. Sets ``self.errormsg`` and ``self.erroring`` accordingly. @@ -86,20 +86,20 @@ class Repo: if not self.branch: return True try: - GIT.check_branchname(self.branch) + work_repo.check_branchname(self.branch) return True except ganarchy.git.GitError as e: self.erroring = True self.errormsg = e return False - def refresh_metadata(self): + def refresh_metadata(self, work_repo): """Refreshes repo metadata. """ - if not self._check_branch(): + if not self._check_branch(work_repo): return try: - self.message = GIT.get_commit_message(self.branchname) + self.message = work_repo.get_commit_message(self.branchname) except ganarchy.git.GitError as e: self.erroring = True self.errormsg = e @@ -109,42 +109,40 @@ class Repo: # but this might be handy for dry runs. # alternatively: change the return to be the new head commit, # and update things accordingly. - def update(self, *, dry_run=False): + def update(self, work_repo, *, dry_run=False): """Updates the git repo, returning a commit count. Args: dry_run (bool): To simulate an update without doing anything. In particular, without fetching commits. """ - if not self._check_branch(): + if not self._check_branch(work_repo): return None - with GIT.with_work_repos(1) as work_repos: # FIXME - work_repo = work_repos[0] - if not dry_run: - try: - work_repo.force_fetch(self.url, self.head, self.branchname) - except ganarchy.git.GitError as e: - # This may error for various reasons, but some - # are important: dead links, etc - self.erroring = True - self.errormsg = e - return None - pre_hash = self.hash + if not dry_run: try: - post_hash = work_repo.get_hash(self.branchname) + work_repo.force_fetch(self.url, self.head, self.branchname) except ganarchy.git.GitError as e: - # This should never happen, but maybe there's some edge cases? - # TODO check + # This may error for various reasons, but some + # are important: dead links, etc self.erroring = True self.errormsg = e return None - self.hash = post_hash - if not pre_hash: - pre_hash = post_hash - count = work_repo.get_count(pre_hash, post_hash) + pre_hash = self.hash try: - GIT.check_history(self.branchname, self.project_commit) - self.refresh_metadata() + post_hash = work_repo.get_hash(self.branchname) + except ganarchy.git.GitError as e: + # This should never happen, but maybe there's some edge cases? + # TODO check + self.erroring = True + self.errormsg = e + return None + self.hash = post_hash + if not pre_hash: + pre_hash = post_hash + count = work_repo.get_count(pre_hash, post_hash) + try: + work_repo.check_history(self.branchname, self.project_commit) + self.refresh_metadata(work_repo) return count except ganarchy.git.GitError as e: self.erroring = True @@ -168,11 +166,12 @@ class Project: exists (bool): Whether the project exists in our git cache. """ - def __init__(self, dbconn, project_commit): + def __init__(self, dbconn, dblock, project_commit): self.commit = project_commit - self.refresh_metadata() + self.refresh_metadata(GIT) self.repos = None self._dbconn = dbconn + self._dblock = dblock def load_repos(self): """Loads the repos into this project. @@ -180,17 +179,18 @@ class Project: If repos have already been loaded, re-loads them. """ repos = [] - for url, branch, head_commit in self._dbconn.list_repobranches(self.commit): - repos.append( - Repo(self._dbconn, self.commit, url, branch, head_commit) - ) + with self._dblock: + for url, branch, head_commit in self._dbconn.list_repobranches(self.commit): + repos.append( + Repo(self._dbconn, self.commit, url, branch, head_commit) + ) self.repos = repos - def refresh_metadata(self): + def refresh_metadata(self, work_repo): """Refreshes project metadata. """ try: - project = GIT.get_commit_message(self.commit) + project = work_repo.get_commit_message(self.commit) project_title, project_desc = (lambda x: x.groups() if x is not None else ('', None))(re.fullmatch('^\\[Project\\]\s+(.+?)(?:\n\n(.+))?$', project, flags=re.ASCII|re.DOTALL|re.IGNORECASE)) if not project_title.strip(): # FIXME project_title, project_desc = ("Error parsing project commit",)*2 @@ -206,15 +206,15 @@ class Project: self.title = None self.description = None - def update(self, *, dry_run=False): + def update(self, work_repo, *, dry_run=False): """Updates the project and its repos. """ # TODO? check if working correctly results = [] if self.repos is not None: for repo in self.repos: - results.append((repo, repo.update(dry_run=dry_run))) - self.refresh_metadata() + results.append((repo, repo.update(work_repo, dry_run=dry_run))) + self.refresh_metadata(work_repo) if self.repos is not None: results.sort(key=lambda x: x[1] or -1, reverse=True) if not dry_run: @@ -228,7 +228,8 @@ class Project: repo.hash, count )) - self._dbconn.insert_activities(entries) + with self._dblock: + self._dbconn.insert_activities(entries) return results class GAnarchy: @@ -244,11 +245,12 @@ class GAnarchy: projects (list, optional): Projects associated with this instance. """ - def __init__(self, dbconn, config): + def __init__(self, dbconn, dblock, config): self.title = None self.base_url = None self.projects = None self._dbconn = dbconn + self._dblock = dblock self._config = config self.load_metadata() @@ -281,7 +283,8 @@ class GAnarchy: If projects have already been loaded, re-loads them. """ projects = [] - for project in self._dbconn.list_projects(): - projects.append(Project(self._dbconn, project)) + with self._dblock: + for project in self._dbconn.list_projects(): + projects.append(Project(self._dbconn, self._dblock, project)) projects.sort(key=lambda p: p.title or "") # sort projects by title self.projects = projects diff --git a/ganarchy/db.py b/ganarchy/db.py index b7aa29b..14d0d47 100644 --- a/ganarchy/db.py +++ b/ganarchy/db.py @@ -365,5 +365,5 @@ def connect_database(effective_config): for the config. """ del effective_config # currently unused, intended for the future - conn = sqlite3.connect(ganarchy.dirs.DATA_HOME + "/ganarchy.db") + conn = sqlite3.connect(ganarchy.dirs.DATA_HOME + "/ganarchy.db", check_same_thread=False) return Database(conn) diff --git a/ganarchy/git.py b/ganarchy/git.py index f0193cd..a88a979 100644 --- a/ganarchy/git.py +++ b/ganarchy/git.py @@ -100,6 +100,22 @@ class Git: except subprocess.CalledProcessError as e: raise GitError("get commit message") from e + def check_history(self, local_head, commit): + """Checks if the local head contains commit in its history. + Raises if it doesn't. + + Args: + local_head (str): Name of local head. + commit (str): Commit hash. + + Raises: + GitError: If an error occurs. + """ + try: + self._cmd("merge-base", "--is-ancestor", commit, local_head) + except subprocess.CalledProcessError as e: + raise GitError("check history") from e + ######################## # Low-level operations # ######################## @@ -222,22 +238,6 @@ class GitCache(Git): physical_work_repos.append(repo) return _WithWorkRepos(self, physical_work_repos) - def check_history(self, local_head, commit): - """Checks if the local head contains commit in its history. - Raises if it doesn't. - - Args: - local_head (str): Name of local head. - commit (str): Commit hash. - - Raises: - GitError: If an error occurs. - """ - try: - self._cmd("merge-base", "--is-ancestor", commit, local_head) - except subprocess.CalledProcessError as e: - raise GitError("check history") from e - ####################### # Internal operations # ####################### |