summary refs log tree commit diff stats
diff options
context:
space:
mode:
authorSoniEx2 <endermoneymod@gmail.com>2022-01-20 20:34:41 -0300
committerSoniEx2 <endermoneymod@gmail.com>2022-01-20 20:34:41 -0300
commite205879303c2aaeb9ea1a537ce3a8145ebe6fce0 (patch)
treeb997b7a5d5ed6755ba1a900dfcf83ffc6941ad75
parent332368115922b8b61ae4471681172a413fb42d63 (diff)
Add support for threads
This is not very good but it does provide a huge speedup!
-rw-r--r--ganarchy/cli/run_targets.py40
-rw-r--r--ganarchy/core.py93
-rw-r--r--ganarchy/db.py2
-rw-r--r--ganarchy/git.py32
4 files changed, 97 insertions, 70 deletions
diff --git a/ganarchy/cli/run_targets.py b/ganarchy/cli/run_targets.py
index a74ef78..53e1b0f 100644
--- a/ganarchy/cli/run_targets.py
+++ b/ganarchy/cli/run_targets.py
@@ -19,6 +19,7 @@
 
 import os
 import shutil
+import threading
 
 import click
 
@@ -31,8 +32,9 @@ from ganarchy.templating import environment
 
 @cli.main.command()
 @click.option('--keep-stale-projects/--no-keep-stale-projects', default=True)
+@click.option('--n-threads', default=4)
 @click.argument('out', required=True, type=click.Path(file_okay=False, resolve_path=True))
-def run_once(out, keep_stale_projects):
+def run_once(out, n_threads, keep_stale_projects):
     """Runs GAnarchy once.
 
     Processes any necessary updates and updates the output directory to match.
@@ -66,13 +68,17 @@ def run_once(out, keep_stale_projects):
     # make sure it is a git repo
     core.GIT.create()
 
+    # default number of threads to use
+    #n_threads = 4
+
     if True:
         # reload config and repo data
         effective_repos.update()
         database = db.connect_database(effective_conf)
+        dblock = threading.Lock()
         database.load_repos(effective_repos)
 
-        instance = core.GAnarchy(database, effective_conf)
+        instance = core.GAnarchy(database, dblock, effective_conf)
 
         if not instance.base_url:
             click.echo("No base URL specified", err=True)
@@ -87,11 +93,12 @@ def run_once(out, keep_stale_projects):
         os.makedirs(out + "/project", exist_ok=True)
 
         template_project = env.get_template('project.html')
-        for p in instance.projects:
+        n_threads = min(n_threads, len(instance.projects))
+        def update_project(p, work_repo):
             p.load_repos()
 
             generate_html = []
-            results = p.update()
+            results = p.update(work_repo)
             #if not p.exists:
             #    ...
             for (repo, count) in results:
@@ -105,7 +112,8 @@ def run_once(out, keep_stale_projects):
                     click.echo(repo.errormsg, err=True)
             html_entries = []
             for (url, msg, count, branch) in generate_html:
-                history = database.list_repobranch_activity(p.commit, url, branch)
+                with dblock:
+                    history = database.list_repobranch_activity(p.commit, url, branch)
                 # TODO process history into SVG
                 # TODO move this into a separate system
                 # (e.g. ``if project.startswith("svg-"):``)
@@ -126,6 +134,20 @@ def run_once(out, keep_stale_projects):
                     ganarchy       = instance
                 ).dump(f)
 
+        def run_thread(i, work_repo):
+            for p in instance.projects[i::n_threads]:
+                update_project(p, work_repo)
+
+    with core.GIT.with_work_repos(n_threads) as work_repos:
+        threads = []
+        for i, work_repo in enumerate(work_repos):
+            t = threading.Thread(target=lambda: run_thread(i, work_repo), name="ganarchy-fetch-{}".format(i))
+            t.start()
+            threads.append(t)
+        for t in threads:
+            t.join()
+
+    if True:
         # render the config
         template = env.get_template('index.toml')
         with open(out + "/index.toml", "w") as f:
@@ -160,6 +182,7 @@ def cron_target(dry_run, project):
     # load config and repo data
     effective_repos.update()
     database = db.connect_database(effective_conf)
+    dblock = threading.Lock()
     database.load_repos(effective_repos)
 
     # load template environment
@@ -183,7 +206,7 @@ def cron_target(dry_run, project):
     # make sure it is a git repo
     core.GIT.create()
 
-    instance = core.GAnarchy(database, effective_conf)
+    instance = core.GAnarchy(database, dblock, effective_conf)
 
     if not instance.base_url or not project:
         click.echo("No base URL or project commit specified", err=True)
@@ -196,11 +219,12 @@ def cron_target(dry_run, project):
         click.echo(template.render(ganarchy=instance), nl=False)
         return
 
-    p = core.Project(database, project)
+    p = core.Project(database, dblock, project)
     p.load_repos()
 
     generate_html = []
-    results = p.update(dry_run=dry_run)
+    with core.GIT.with_work_repos(1) as work_repos:
+        results = p.update(work_repos[0], dry_run=dry_run)
     #if not p.exists:
     #    ...
     for (repo, count) in results:
diff --git a/ganarchy/core.py b/ganarchy/core.py
index 81b098c..872705c 100644
--- a/ganarchy/core.py
+++ b/ganarchy/core.py
@@ -56,7 +56,7 @@ class Repo:
         self.branchname = None
         self.head = None
 
-        if not self._check_branch():
+        if not self._check_branch(GIT):
             return
 
         if not branch:
@@ -74,9 +74,9 @@ class Repo:
             except ganarchy.git.GitError:
                 self.erroring = True
 
-        self.refresh_metadata()
+        self.refresh_metadata(GIT)
 
-    def _check_branch(self):
+    def _check_branch(self, work_repo):
         """Checks if ``self.branch`` is a valid git branch name, or None. Sets
         ``self.errormsg`` and ``self.erroring`` accordingly.
 
@@ -86,20 +86,20 @@ class Repo:
         if not self.branch:
             return True
         try:
-            GIT.check_branchname(self.branch)
+            work_repo.check_branchname(self.branch)
             return True
         except ganarchy.git.GitError as e:
             self.erroring = True
             self.errormsg = e
             return False
 
-    def refresh_metadata(self):
+    def refresh_metadata(self, work_repo):
         """Refreshes repo metadata.
         """
-        if not self._check_branch():
+        if not self._check_branch(work_repo):
             return
         try:
-            self.message = GIT.get_commit_message(self.branchname)
+            self.message = work_repo.get_commit_message(self.branchname)
         except ganarchy.git.GitError as e:
             self.erroring = True
             self.errormsg = e
@@ -109,42 +109,40 @@ class Repo:
     # but this might be handy for dry runs.
     # alternatively: change the return to be the new head commit,
     # and update things accordingly.
-    def update(self, *, dry_run=False):
+    def update(self, work_repo, *, dry_run=False):
         """Updates the git repo, returning a commit count.
 
         Args:
             dry_run (bool): To simulate an update without doing anything.
                 In particular, without fetching commits.
         """
-        if not self._check_branch():
+        if not self._check_branch(work_repo):
             return None
-        with GIT.with_work_repos(1) as work_repos: # FIXME
-            work_repo = work_repos[0]
-            if not dry_run:
-                try:
-                    work_repo.force_fetch(self.url, self.head, self.branchname)
-                except ganarchy.git.GitError as e:
-                    # This may error for various reasons, but some
-                    # are important: dead links, etc
-                    self.erroring = True
-                    self.errormsg = e
-                    return None
-            pre_hash = self.hash
+        if not dry_run:
             try:
-                post_hash = work_repo.get_hash(self.branchname)
+                work_repo.force_fetch(self.url, self.head, self.branchname)
             except ganarchy.git.GitError as e:
-                # This should never happen, but maybe there's some edge cases?
-                # TODO check
+                # This may error for various reasons, but some
+                # are important: dead links, etc
                 self.erroring = True
                 self.errormsg = e
                 return None
-            self.hash = post_hash
-            if not pre_hash:
-                pre_hash = post_hash
-            count = work_repo.get_count(pre_hash, post_hash)
+        pre_hash = self.hash
         try:
-            GIT.check_history(self.branchname, self.project_commit)
-            self.refresh_metadata()
+            post_hash = work_repo.get_hash(self.branchname)
+        except ganarchy.git.GitError as e:
+            # This should never happen, but maybe there's some edge cases?
+            # TODO check
+            self.erroring = True
+            self.errormsg = e
+            return None
+        self.hash = post_hash
+        if not pre_hash:
+            pre_hash = post_hash
+        count = work_repo.get_count(pre_hash, post_hash)
+        try:
+            work_repo.check_history(self.branchname, self.project_commit)
+            self.refresh_metadata(work_repo)
             return count
         except ganarchy.git.GitError as e:
             self.erroring = True
@@ -168,11 +166,12 @@ class Project:
         exists (bool): Whether the project exists in our git cache.
     """
 
-    def __init__(self, dbconn, project_commit):
+    def __init__(self, dbconn, dblock, project_commit):
         self.commit = project_commit
-        self.refresh_metadata()
+        self.refresh_metadata(GIT)
         self.repos = None
         self._dbconn = dbconn
+        self._dblock = dblock
 
     def load_repos(self):
         """Loads the repos into this project.
@@ -180,17 +179,18 @@ class Project:
         If repos have already been loaded, re-loads them.
         """
         repos = []
-        for url, branch, head_commit in self._dbconn.list_repobranches(self.commit):
-            repos.append(
-                Repo(self._dbconn, self.commit, url, branch, head_commit)
-            )
+        with self._dblock:
+            for url, branch, head_commit in self._dbconn.list_repobranches(self.commit):
+                repos.append(
+                    Repo(self._dbconn, self.commit, url, branch, head_commit)
+                )
         self.repos = repos
 
-    def refresh_metadata(self):
+    def refresh_metadata(self, work_repo):
         """Refreshes project metadata.
         """
         try:
-            project = GIT.get_commit_message(self.commit)
+            project = work_repo.get_commit_message(self.commit)
             project_title, project_desc = (lambda x: x.groups() if x is not None else ('', None))(re.fullmatch('^\\[Project\\]\s+(.+?)(?:\n\n(.+))?$', project, flags=re.ASCII|re.DOTALL|re.IGNORECASE))
             if not project_title.strip(): # FIXME
                 project_title, project_desc = ("Error parsing project commit",)*2
@@ -206,15 +206,15 @@ class Project:
             self.title = None
             self.description = None
 
-    def update(self, *, dry_run=False):
+    def update(self, work_repo, *, dry_run=False):
         """Updates the project and its repos.
         """
         # TODO? check if working correctly
         results = []
         if self.repos is not None:
             for repo in self.repos:
-                results.append((repo, repo.update(dry_run=dry_run)))
-        self.refresh_metadata()
+                results.append((repo, repo.update(work_repo, dry_run=dry_run)))
+        self.refresh_metadata(work_repo)
         if self.repos is not None:
             results.sort(key=lambda x: x[1] or -1, reverse=True)
             if not dry_run:
@@ -228,7 +228,8 @@ class Project:
                             repo.hash,
                             count
                         ))
-                self._dbconn.insert_activities(entries)
+                with self._dblock:
+                    self._dbconn.insert_activities(entries)
         return results
 
 class GAnarchy:
@@ -244,11 +245,12 @@ class GAnarchy:
         projects (list, optional): Projects associated with this instance.
     """
 
-    def __init__(self, dbconn, config):
+    def __init__(self, dbconn, dblock, config):
         self.title = None
         self.base_url = None
         self.projects = None
         self._dbconn = dbconn
+        self._dblock = dblock
         self._config = config
         self.load_metadata()
 
@@ -281,7 +283,8 @@ class GAnarchy:
         If projects have already been loaded, re-loads them.
         """
         projects = []
-        for project in self._dbconn.list_projects():
-            projects.append(Project(self._dbconn, project))
+        with self._dblock:
+            for project in self._dbconn.list_projects():
+                projects.append(Project(self._dbconn, self._dblock, project))
         projects.sort(key=lambda p: p.title or "") # sort projects by title
         self.projects = projects
diff --git a/ganarchy/db.py b/ganarchy/db.py
index b7aa29b..14d0d47 100644
--- a/ganarchy/db.py
+++ b/ganarchy/db.py
@@ -365,5 +365,5 @@ def connect_database(effective_config):
         for the config.
     """
     del effective_config  # currently unused, intended for the future
-    conn = sqlite3.connect(ganarchy.dirs.DATA_HOME + "/ganarchy.db")
+    conn = sqlite3.connect(ganarchy.dirs.DATA_HOME + "/ganarchy.db", check_same_thread=False)
     return Database(conn)
diff --git a/ganarchy/git.py b/ganarchy/git.py
index f0193cd..a88a979 100644
--- a/ganarchy/git.py
+++ b/ganarchy/git.py
@@ -100,6 +100,22 @@ class Git:
         except subprocess.CalledProcessError as e:
             raise GitError("get commit message") from e
 
+    def check_history(self, local_head, commit):
+        """Checks if the local head contains commit in its history.
+        Raises if it doesn't.
+
+        Args:
+            local_head (str): Name of local head.
+            commit (str): Commit hash.
+
+        Raises:
+            GitError: If an error occurs.
+        """
+        try:
+            self._cmd("merge-base", "--is-ancestor", commit, local_head)
+        except subprocess.CalledProcessError as e:
+            raise GitError("check history") from e
+
     ########################
     # Low-level operations #
     ########################
@@ -222,22 +238,6 @@ class GitCache(Git):
             physical_work_repos.append(repo)
         return _WithWorkRepos(self, physical_work_repos)
 
-    def check_history(self, local_head, commit):
-        """Checks if the local head contains commit in its history.
-        Raises if it doesn't.
-
-        Args:
-            local_head (str): Name of local head.
-            commit (str): Commit hash.
-
-        Raises:
-            GitError: If an error occurs.
-        """
-        try:
-            self._cmd("merge-base", "--is-ancestor", commit, local_head)
-        except subprocess.CalledProcessError as e:
-            raise GitError("check history") from e
-
     #######################
     # Internal operations #
     #######################