summary refs log tree commit diff stats
diff options
context:
space:
mode:
-rw-r--r--ganarchy/core.py41
-rw-r--r--ganarchy/git.py337
2 files changed, 289 insertions, 89 deletions
diff --git a/ganarchy/core.py b/ganarchy/core.py
index b1025d1..81b098c 100644
--- a/ganarchy/core.py
+++ b/ganarchy/core.py
@@ -19,6 +19,7 @@
 
 import hashlib
 import hmac
+from pathlib import Path
 import re
 from urllib import parse
 
@@ -26,9 +27,7 @@ import ganarchy.git
 import ganarchy.dirs
 import ganarchy.data
 
-# Currently we only use one git repo, at CACHE_HOME
-# TODO optimize
-GIT = ganarchy.git.Git(ganarchy.dirs.CACHE_HOME)
+GIT = ganarchy.git.GitCache(Path(ganarchy.dirs.CACHE_HOME)/'ganarchy-cache.git')
 
 class Repo:
     """A GAnarchy repo.
@@ -119,28 +118,30 @@ class Repo:
         """
         if not self._check_branch():
             return None
-        if not dry_run:
+        with GIT.with_work_repos(1) as work_repos: # FIXME
+            work_repo = work_repos[0]
+            if not dry_run:
+                try:
+                    work_repo.force_fetch(self.url, self.head, self.branchname)
+                except ganarchy.git.GitError as e:
+                    # This may error for various reasons, but some
+                    # are important: dead links, etc
+                    self.erroring = True
+                    self.errormsg = e
+                    return None
+            pre_hash = self.hash
             try:
-                GIT.force_fetch(self.url, self.head, self.branchname)
+                post_hash = work_repo.get_hash(self.branchname)
             except ganarchy.git.GitError as e:
-                # This may error for various reasons, but some
-                # are important: dead links, etc
+                # This should never happen, but maybe there's some edge cases?
+                # TODO check
                 self.erroring = True
                 self.errormsg = e
                 return None
-        pre_hash = self.hash
-        try:
-            post_hash = GIT.get_hash(self.branchname)
-        except ganarchy.git.GitError as e:
-            # This should never happen, but maybe there's some edge cases?
-            # TODO check
-            self.erroring = True
-            self.errormsg = e
-            return None
-        self.hash = post_hash
-        if not pre_hash:
-            pre_hash = post_hash
-        count = GIT.get_count(pre_hash, post_hash)
+            self.hash = post_hash
+            if not pre_hash:
+                pre_hash = post_hash
+            count = work_repo.get_count(pre_hash, post_hash)
         try:
             GIT.check_history(self.branchname, self.project_commit)
             self.refresh_metadata()
diff --git a/ganarchy/git.py b/ganarchy/git.py
index f8ccfcd..f0193cd 100644
--- a/ganarchy/git.py
+++ b/ganarchy/git.py
@@ -21,6 +21,7 @@
 # For example, we return 0 for counts instead of raising, but raise
 # instead of returning empty strings for commit hashes and messages.
 
+import shutil
 import subprocess
 
 class GitError(Exception):
@@ -30,17 +31,196 @@ class GitError(Exception):
     pass
 
 class Git:
+    """A git repo.
+
+    Takes a ``pathlib.Path`` as argument.
+    """
+
     def __init__(self, path):
         self.path = path
-        self.base = ("git", "-C", path)
+
+    #########################################
+    # Operations supported on any git repo. #
+    #########################################
+
+    def check_branchname(self, branchname):
+        """Checks if the given branchname is a valid branch name.
+        Raises if it isn't.
+
+        Args:
+            branchname (str): Name of branch.
+
+        Raises:
+            GitError: If an error occurs.
+        """
+        try:
+            if branchname.startswith("-"):
+                raise GitError("check branchname", branchname)
+            out = self._cmd(
+                "check-ref-format", "--branch", branchname
+            ).stdout.decode("utf-8")
+            # protect against @{-1}/@{-n} ("previous checkout operation")
+            # is also fairly future-proofed, I hope?
+            if (not out.startswith(branchname)) or (
+                out.removeprefix(branchname) not in ('\r\n', '\n', '')
+            ):
+                raise GitError("check branchname", out, branchname)
+        except subprocess.CalledProcessError as e:
+            raise GitError("check branchname") from e
+
+    def get_hash(self, target):
+        """Returns the commit hash for a given target.
+
+        Args:
+            target (str): a refspec.
+
+        Raises:
+            GitError: If an error occurs.
+        """
+        try:
+            return self._cmd(
+                "show", target, "-s", "--format=format:%H", "--"
+            ).stdout.decode("utf-8")
+        except subprocess.CalledProcessError as e:
+            raise GitError("get hash") from e
+
+    def get_commit_message(self, target):
+        """Returns the commit message for a given target.
+
+        Args:
+            target (str): a refspec.
+
+        Raises:
+            GitError: If an error occurs.
+        """
+        try:
+            return self._cmd(
+                "show", target, "-s", "--format=format:%B", "--"
+            ).stdout.decode("utf-8", "replace")
+        except subprocess.CalledProcessError as e:
+            raise GitError("get commit message") from e
+
+    ########################
+    # Low-level operations #
+    ########################
+
+    def _cmd_init(self, *args):
+        """Runs a command for initializing this git repo.
+
+        Always uses ``--bare``.
+
+        Returns:
+            subprocess.CompletedProcess: The results of running the command.
+
+        Raises:
+            subprocess.CalledProcessError: If the command exited with a non-zero
+            status.
+        """
+        return self._cmd_common('init', '--bare', *args, self.path)
+
+    def _cmd_clone_from(self, from_, *args):
+        """Runs a command for cloning into this git repo.
+
+        Always uses ``--bare``.
+
+        Returns:
+            subprocess.CompletedProcess: The results of running the command.
+
+        Raises:
+            subprocess.CalledProcessError: If the command exited with a non-zero
+            status.
+        """
+        return self._cmd_common('clone', '--bare', *args, from_, self.path)
+
+    def _cmd(self, *args):
+        """Runs a command for operating on this git repo.
+
+        Note: Doesn't work for git init and git clone operations. Use
+        ``_cmd_init`` and ``_cmd_clone_from`` instead.
+
+        Always uses ``--bare``.
+
+        Returns:
+            subprocess.CompletedProcess: The results of running the command.
+
+        Raises:
+            subprocess.CalledProcessError: If the command exited with a non-zero
+            status.
+        """
+        return self._cmd_common('-C', self.path, '--bare', *args)
+
+    def _cmd_common(self, *args):
+        """Runs a git command with the given args.
+
+        This is a simple wrapper around ``subprocess.run``.
+
+        Returns:
+            subprocess.CompletedProcess: The results of running the command.
+
+        Raises:
+            subprocess.CalledProcessError: If the command exited with a non-zero
+            status.
+        """
+        return subprocess.run(
+            ('git',) + args,
+            check=True,
+            stdout=subprocess.PIPE,
+            stderr=subprocess.PIPE
+        )
+
+class GitCache(Git):
+    """A permanent repository used to cache remote objects.
+    """
+
+    #####################
+    # Public operations #
+    #####################
 
     def create(self):
         """Creates the local repo.
 
         Can safely be called on an existing repo.
         """
-        subprocess.call(self.base + ("init", "-q"))
+        try:
+            return self._cmd_init()
+        except subprocess.CalledProcessError as e:
+            raise GitError("create") from e
 
+    def with_work_repos(self, count):
+        """Creates a context manager for managing work repos.
+
+        Args:
+            count (int): The number of work repos.
+        """
+        """From Rust:
+        /// Creates the given number of work repos, and calls the closure to run
+        /// operations on them.
+        ///
+        /// The operations can be done on the individual repos, and they'll be
+        /// merged into the main repo as this function returns.
+        ///
+        /// If the callback fails, the work repos will be deleted. If the function
+        /// succeeds, the work repos will be merged back into the main repo.
+        ///
+        /// # Panics
+        ///
+        /// Panics if a merge conflict is detected. Specifically, if two work repos
+        /// modify the same work branch.
+        ///
+        /// # "Poisoning"
+        ///
+        /// If this method unwinds, the underlying git repos, if any, will not be
+        /// deleted. Instead, future calls to this method will return a GitError.
+        """
+        work_repos = []
+        for i in range(0, count):
+            new_path = self.path.with_name('ganarchy-fetch-{}.git'.format(i))
+            work_repos.append(GitFetch(new_path))
+        physical_work_repos = []
+        for repo in work_repos:
+            self._fork(repo)
+            physical_work_repos.append(repo)
+        return _WithWorkRepos(self, physical_work_repos)
 
     def check_history(self, local_head, commit):
         """Checks if the local head contains commit in its history.
@@ -54,39 +234,50 @@ class Git:
             GitError: If an error occurs.
         """
         try:
-            subprocess.run(
-                self.base + ("merge-base", "--is-ancestor", commit, local_head),
-                check=True,
-                stdout=subprocess.PIPE,
-                stderr=subprocess.PIPE
-            )
+            self._cmd("merge-base", "--is-ancestor", commit, local_head)
         except subprocess.CalledProcessError as e:
             raise GitError("check history") from e
 
-    def check_branchname(self, branchname):
-        """Checks if the given branchname is a valid branch name.
-        Raises if it isn't.
+    #######################
+    # Internal operations #
+    #######################
 
-        Args:
-            branchname (str): Name of branch.
+    def _fetch_work(self, from_, branch, from_branch):
+        try:
+            self._cmd(
+                "fetch", from_.path, "+{}:{}".format(from_branch, branch)
+            )
+        except subprocess.CalledProcessError as e:
+            raise GitError("fetch work") from e
 
-        Raises:
-            GitError: If an error occurs.
+    def _replace(self, old_name, new_name):
+        try:
+            self._cmd(
+                "branch", "-M", old_name, new_name
+            )
+        except subprocess.CalledProcessError as e:
+            raise GitError("replace") from e
+
+    def _fork(self, into):
+        """Makes a shared clone of this local repo into the given work repo.
+
+        Equivalent to ``git clone --bare --shared``, which is very dangerous!
         """
         try:
-            # TODO check that this rstrip is safe
-            out = subprocess.run(
-                self.base + ("check-ref-format", "--branch", branchname),
-                check=True,
-                stdout=subprocess.PIPE,
-                stderr=subprocess.PIPE
-            ).stdout.decode("utf-8").rstrip('\r\n')
-            # protect against @{-1}/@{-n} ("previous checkout operation")
-            # is also fairly future-proofed, I hope?
-            if out != branchname:
-                raise GitError("check branchname", out, branchname)
+            return into._cmd_clone_from(self.path, '--shared')
         except subprocess.CalledProcessError as e:
-            raise GitError("check branchname") from e
+            raise GitError("fork") from e
+
+class GitFetch(Git):
+    """A temporary repository used to fetch remote objects.
+    """
+    def __init__(self, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+        self.pending_branches = set()
+
+    #####################
+    # Public operations #
+    #####################
 
     def force_fetch(self, url, remote_head, local_head):
         """Fetches a remote head into a local head.
@@ -102,14 +293,12 @@ class Git:
             GitError: If an error occurs.
         """
         try:
-            subprocess.run(
-                self.base + ("fetch", "-q", url, "+" + remote_head + ":" + local_head),
-                check=True,
-                stdout=subprocess.PIPE,
-                stderr=subprocess.PIPE
+            self._cmd(
+                "fetch", url, "+" + remote_head + ":" + local_head
             )
+            self.pending_branches.add(local_head)
         except subprocess.CalledProcessError as e:
-            raise GitError(e.output) from e
+            raise GitError("fetch source") from e
 
     def get_count(self, first_hash, last_hash):
         """Returns a count of the commits added since ``first_hash``
@@ -124,50 +313,60 @@ class Git:
             if an error occurs.
         """
         try:
-            res = subprocess.run(
-                self.base + ("rev-list", "--count", first_hash + ".." + last_hash, "--"),
-                check=True,
-                stdout=subprocess.PIPE,
-                stderr=subprocess.PIPE
+            res = self._cmd(
+                "rev-list", "--count", first_hash + ".." + last_hash, "--"
             ).stdout.decode("utf-8").strip()
             return int(res)
         except subprocess.CalledProcessError as e:
             return 0
 
-    def get_hash(self, target):
-        """Returns the commit hash for a given target.
-
-        Args:
-            target (str): a refspec.
+    #######################
+    # Internal operations #
+    #######################
 
-        Raises:
-            GitError: If an error occurs.
-        """
+    def _rm_branch(self, branch):
         try:
-            return subprocess.run(
-                self.base + ("show", target, "-s", "--format=format:%H", "--"),
-                check=True,
-                stdout=subprocess.PIPE,
-                stderr=subprocess.PIPE
-            ).stdout.decode("utf-8")
+            self._cmd("branch", "-D", branch)
         except subprocess.CalledProcessError as e:
-            raise GitError("") from e
+            raise GitError("rm branch") from e
 
-    def get_commit_message(self, target):
-        """Returns the commit message for a given target.
+    def _delete(self):
+        try:
+            shutil.rmtree(self.path)
+        except IOError as e:
+            raise GitError("delete", self.path) from e
 
-        Args:
-            target (str): a refspec.
+class _WithWorkRepos:
+    """Context manager for merging forked repos in ``with_work_repos``.
+    """
+    def __init__(self, cache_repo, work_repos):
+        self.cache_repo = cache_repo
+        self.work_repos = work_repos
 
-        Raises:
-            GitError: If an error occurs.
-        """
-        try:
-            return subprocess.run(
-                self.base + ("show", target, "-s", "--format=format:%B", "--"),
-                check=True,
-                stdout=subprocess.PIPE,
-                stderr=subprocess.PIPE
-            ).stdout.decode("utf-8", "replace")
-        except subprocess.CalledProcessError as e:
-            raise GitError("") from e
+    def __enter__(self):
+        return self.work_repos
+
+    def __exit__(self, exc_type, exc_val, exc_tb):
+        if exc_type is None:
+            branches = set()
+            for work in self.work_repos:
+                for branch in work.pending_branches:
+                    if branch in branches:
+                        raise GitError("Branch {} is in conflict!".format(branch))
+                    branches.add(branch)
+            del branches
+            del work
+            del branch
+
+            for i, repo in enumerate(self.work_repos):
+                for branch in repo.pending_branches:
+                    fetch_head = "{}-{}".format(branch, i)
+                    # First collect the work branch into a fetch head
+                    self.cache_repo._fetch_work(repo, fetch_head, branch)
+                    # If that succeeds, delete the work branch to free up disk
+                    repo._rm_branch(branch)
+                    # We have all the objects in the main repo and we probably
+                    # have enough disk, so just replace the fetch head into
+                    # the main branch and hope nothing errors.
+                    self.cache_repo._replace(fetch_head, branch)
+                repo._delete()