summary refs log tree commit diff stats
path: root/src/git.rs
diff options
context:
space:
mode:
Diffstat (limited to 'src/git.rs')
-rw-r--r--src/git.rs728
1 files changed, 728 insertions, 0 deletions
diff --git a/src/git.rs b/src/git.rs
new file mode 100644
index 0000000..df9614a
--- /dev/null
+++ b/src/git.rs
@@ -0,0 +1,728 @@
+// This file is part of GAnarchy - decentralized development hub
+// Copyright (C) 2021  Soni L.
+// 
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU Affero General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// (at your option) any later version.
+// 
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU Affero General Public License for more details.
+// 
+// You should have received a copy of the GNU Affero General Public License
+// along with this program.  If not, see <https://www.gnu.org/licenses/>.
+
+//! This module provides some abstractions over git.
+//!
+//! Having this module allows easily(-ish) replacing the git backend, for
+//! example from calling the git CLI directly to using a git library.
+
+use std::collections::BTreeSet;
+use std::error;
+use std::ffi::{OsStr, OsString};
+use std::fmt;
+use std::fs;
+use std::io;
+use std::path::Path;
+use std::path::PathBuf;
+//use std::process;
+use std::process::{Command, Output};
+
+use impl_trait::impl_trait;
+
+use crate::util::NamePurpose;
+use crate::marker::Initializer;
+
+#[cfg(test)]
+mod tests;
+
+/// Represents a local git repo.
+pub struct Git {
+    path: PathBuf,
+    pending_branches: Option<BTreeSet<String>>,
+    sha256: bool,
+}
+
+/// Error returned by operations on a git repo.
+#[derive(Debug)]
+pub struct GitError {
+    inner: GitErrorInner,
+    command: Vec<OsString>,
+}
+
+#[derive(Debug)]
+enum GitErrorInner {
+    IoError(io::Error),
+    Output(Output),
+}
+
+/// Helper for tracking args to a Command.
+struct Args {
+    inner: Command,
+    args: Vec<OsString>,
+}
+
+impl_trait! {
+    impl Args {
+        /// Creates a new Args for the given command.
+        pub fn new_cmd<S: AsRef<OsStr>>(cmd: S) -> Self {
+            let cmd = cmd.as_ref();
+            Self {
+                inner: Command::new(cmd),
+                args: vec![cmd.into()],
+            }
+        }
+
+        /// Adds a single arg to the Command.
+        pub fn arg<S: AsRef<OsStr>>(&mut self, arg: S) -> &mut Self {
+            let arg = arg.as_ref();
+            self.inner.arg(arg);
+            self.args.push(arg.into());
+            self
+        }
+
+        // /// Adds multiple args to the Command.
+        // pub fn args<I, S>(&mut self, args: I) -> &mut Self
+        // where I: IntoIterator<Item=S>, S: AsRef<OsStr> {
+        //     for arg in args {
+        //         self.arg(arg);
+        //     }
+        //     self
+        // }
+
+        // impl trait Into<Result<Output, GitError>> {
+        //     fn into(self) -> Result<Output, GitError> {
+        //         todo!()
+        //     }
+        // }
+    }
+}
+
+/// RAII transaction guard for merging forked repos in with_work_repos.
+struct Merger<'a>(&'a mut Git, Vec<Git>);
+
+impl From<io::Error> for GitErrorInner {
+    fn from(e: io::Error) -> Self {
+        Self::IoError(e)
+    }
+}
+
+impl From<Output> for GitErrorInner {
+    fn from(e: Output) -> Self {
+        Self::Output(e)
+    }
+}
+
+impl_trait! {
+    impl GitError {
+        /// Creates a new GitError for the given command.
+        fn new(inner: impl Into<GitErrorInner>, cmd: Vec<OsString>) -> Self {
+            Self {
+                inner: inner.into(),
+                command: cmd,
+            }
+        }
+
+        impl trait fmt::Display {
+            fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+                write!(f, "Error running")?;
+                for part in &self.command {
+                    let part = part.to_str().unwrap_or("[not UTF-8]");
+                    write!(f, " {}", part)?;
+                }
+                match &self.inner {
+                    GitErrorInner::IoError(e) => {
+                        write!(f, ", caused by: {}", e)
+                    },
+                    GitErrorInner::Output(e) => {
+                        let out = std::str::from_utf8(&e.stdout);
+                        let out = out.unwrap_or("[not UTF-8]");
+                        let err = std::str::from_utf8(&e.stderr);
+                        let err = err.unwrap_or("[not UTF-8]");
+                        write!(f, "\nstdout:\n{}", out)?;
+                        write!(f, "\nstderr:\n{}", err)
+                    },
+                }
+            }
+        }
+
+        impl trait error::Error {
+            fn source(&self) -> Option<&(dyn error::Error + 'static)> {
+                match self.inner {
+                    GitErrorInner::IoError(ref error) => {
+                        Some(error)
+                    }
+                    GitErrorInner::Output(_) => None,
+                }
+            }
+        }
+    }
+}
+
+impl_trait! {
+    impl<'a> Merger<'a> {
+        /// Returns a shared, immutable reference to the main repo.
+        fn main(&self) -> &Git {
+            &*self.0
+        }
+
+        /// Merges the work repos back into the main repo.
+        /// 
+        /// # Panics
+        ///
+        /// Panics if there are branches in conflict.
+        fn merge(mut self) -> Result<(), GitError> {
+            // check for conflicts first!
+            let mut branches = BTreeSet::<&String>::new();
+            for work in &*self {
+                for branch in work.pending_branches.as_ref().unwrap() {
+                    if !branches.insert(branch) {
+                        panic!("Branch {} is in conflict!", branch);
+                    }
+                }
+            }
+            drop(branches);
+
+            for mut repo in std::mem::take(&mut self.1) {
+                // TODO clean up
+                let repo_id = repo.path.file_name().unwrap().to_str().unwrap()
+                    .strip_prefix("ganarchy-fetch-").unwrap()
+                    .strip_suffix(".git").unwrap()
+                    .to_owned();
+                let pending = repo.pending_branches.take().unwrap();
+                for branch in pending {
+                    let len = branch.len();
+                    let fetch_head = branch + "-" + &repo_id;
+                    let branch = &fetch_head[..len];
+                    // First collect the work branch into a fetch head
+                    self.0.fetch_work(&repo, &fetch_head, branch)?;
+                    // If that succeeds, delete the work branch to free up disk
+                    repo.rm_branch(branch)?;
+                    // We have all the objects in the main repo and we probably
+                    // have enough disk, so just replace the fetch head into
+                    // the main branch and hope nothing errors.
+                    self.0.replace(&fetch_head, branch)?;
+                }
+                repo.delete()?;
+            }
+            Ok(())
+        }
+
+        /// Accesses the work repos.
+        impl trait std::ops::Deref {
+            type Target = Vec<Git>;
+
+            fn deref(&self) -> &Vec<Git> {
+                &self.1
+            }
+        }
+
+        /// Accesses the work repos.
+        impl trait std::ops::DerefMut {
+            fn deref_mut(&mut self) -> &mut Vec<Git> {
+                &mut self.1
+            }
+        }
+
+        /// Cleans up (deletes) the work repos, if not panicking.
+        impl trait Drop {
+            fn drop(&mut self) {
+                if !std::thread::panicking() {
+                    for repo in std::mem::take(&mut self.1) {
+                        repo.delete().unwrap();
+                    }
+                }
+            }
+        }
+    }
+}
+
+/// Initializer operations on the `Git` struct.
+impl Git {
+    /// Creates a new instance of the `Git` struct, with the path as given.
+    pub fn at_path<T: AsRef<Path>>(_: Initializer, path: T) -> Option<Git> {
+        let path = path.as_ref();
+        let filename = path.file_name()?.to_str()?;
+        // TODO SHA-2
+        NamePurpose::CacheRepo.is_fit(filename).then(|| Git {
+            path: path.into(),
+            pending_branches: None,
+            sha256: false,
+        })
+    }
+}
+
+/// Operations on a git repo.
+///
+/// # Race conditions
+///
+/// These operate on the filesystem. Calling them from multiple threads
+/// can result in data corruption.
+impl Git {
+    /// Creates the given number of work repos, and calls the closure to run
+    /// operations on them.
+    ///
+    /// The operations can be done on the individual repos, and they'll be
+    /// merged into the main repo as this function returns.
+    ///
+    /// If the callback fails, the work repos will be deleted. If the function
+    /// succeeds, the work repos will be merged back into the main repo.
+    ///
+    /// # Panics
+    ///
+    /// Panics if a merge conflict is detected. Specifically, if two work repos
+    /// modify the same work branch. Also panics if this isn't a cache repo.
+    ///
+    /// # "Poisoning"
+    ///
+    /// If this method unwinds, the underlying git repos, if any, will not be
+    /// deleted. Instead, future calls to this method will return a GitError.
+    pub fn with_work_repos<F, R>(&mut self, count: usize, f: F)
+        -> Result<R, GitError>
+    where F: FnOnce(&mut [Git]) -> Result<R, GitError> {
+        assert!(self.is_cache_repo());
+        // create some Git structs
+        let mut work_repos = Vec::with_capacity(count);
+        for id in 0..count {
+            let mut new_path = self.path.clone();
+            new_path.set_file_name(format!("ganarchy-fetch-{}.git", id));
+            let git = Git {
+                path: new_path,
+                pending_branches: Some(Default::default()),
+                sha256: self.sha256,
+            };
+            assert!(git.is_work_repo());
+            work_repos.push(git);
+        }
+        // create the on-disk stuff
+        let merger = Merger(self, Vec::new());
+        let mut merger = work_repos.into_iter()
+            .try_fold(merger, |mut m, mut r| {
+                m.main().fork(&mut r)?;
+                m.push(r);
+                Ok(m)
+            })?;
+        let result = f(&mut *merger)?;
+        // merge the on-disk stuff
+        merger.merge().and(Ok(result))
+    }
+
+    /// Fetches branch `from_ref` from source `from` into branch `branch`.
+    ///
+    /// The fetch used is a force-fetch.
+    ///
+    /// # Panics
+    ///
+    /// Panics if called on a non-work repo, if `from` starts with `-`, if
+    /// `branch` isn't a cache branch, or if `from_ref` starts with `-`.
+    pub fn fetch_source(&mut self, from: &str, branch: &str, from_ref: &str)
+        -> Result<(), GitError>
+    {
+        assert!(self.is_work_repo());
+        assert!(!from.starts_with("-"));
+        assert!(!from_ref.starts_with("-"));
+        assert!(NamePurpose::WorkBranch.is_fit(branch));
+        let _output = self.cmd(|args| {
+            args.arg("fetch");
+            args.arg(from);
+            args.arg(format!("+{}:{}", from_ref, branch));
+        })?;
+        self.pending_branches.as_mut().unwrap().insert(branch.into());
+        Ok(())
+    }
+
+    /// Initializes this repo.
+    ///
+    /// # Panics
+    ///
+    /// Panics if called on a non-cache repo.
+    pub fn ensure_exists(&mut self) -> Result<(), GitError> {
+        assert!(self.is_cache_repo());
+        let _output = self.cmd_init(|_| {})?;
+        Ok(())
+    }
+
+    /// Checks if a given commit is present in the given branch's history.
+    ///
+    /// # Panics
+    ///
+    /// Panics if this isn't a cache branch on a cache repo or if commit isn't
+    /// a commit.
+    pub fn check_history(&self, branch: &str, commit: &str)
+        -> Result<(), GitError>
+    {
+        assert!(self.is_cache_repo());
+        assert!(NamePurpose::CacheBranch.is_fit(branch));
+        assert!(self.is_commit_hash(commit));
+        let _output = self.cmd(|args| {
+            args.arg("merge-base");
+            args.arg("--is-ancestor");
+            args.arg(commit);
+            args.arg(format!("refs/heads/{}", branch));
+        })?;
+        Ok(())
+    }
+
+    /// Checks if the given branch is a valid branch.
+    ///
+    /// Note: "HEAD" is **not** a branch.
+    ///
+    /// # Panics
+    ///
+    /// Panics if `branch` starts with `-`.
+    pub fn check_branch(&self, branch: &str) -> Result<(), GitError> {
+        assert!(!branch.starts_with("-"));
+        let mut output = self.cmd(|args| {
+            args.arg("check-ref-format");
+            args.arg("--branch");
+            args.arg(branch);
+        })?;
+        // perf: Vec::default doesn't allocate.
+        let stdout = std::mem::take(&mut output.stdout);
+        let stdout = String::from_utf8(stdout);
+        match stdout.as_ref().map(|x| x.strip_prefix(branch)) {
+            Ok(Some("")) | Ok(Some("\n")) | Ok(Some("\r\n")) => {
+                return Ok(())
+            },
+            _ => (),
+        }
+        output.stdout = match stdout {
+            Ok(e) => e.into_bytes(),
+            Err(e) => e.into_bytes(),
+        };
+        let v = vec![
+            OsString::from("git"),
+            "check-ref-format".into(),
+            "--branch".into(),
+            branch.into(),
+        ];
+        Err(GitError::new(output, v))
+    }
+
+    /// Returns the number of commits removed and the number of added between
+    /// from and to, respectively.
+    ///
+    /// # Panics
+    ///
+    /// Panics if called on a non-work repo.
+    pub fn get_counts(&self, from: &str, to: &str)
+        -> Result<(u64, u64), GitError>
+    {
+        // if called on a cache repo, `from` may no longer exist.
+        // this check makes sure `from` has not been garbage-collected.
+        assert!(self.is_work_repo());
+        assert!(self.is_commit_hash(from));
+        assert!(self.is_commit_hash(to));
+        let mut output = self.cmd(|args| {
+            args.arg("rev-list");
+            args.arg("--left-right");
+            args.arg("--count");
+            args.arg(format!("{}...{}", from, to));
+            args.arg("--");
+        })?;
+        // perf: Vec::default doesn't allocate.
+        let stdout = std::mem::take(&mut output.stdout);
+        let stdout = String::from_utf8(stdout);
+        match stdout.as_ref().ok().map(|x| x.trim()).filter(|x| {
+            x.trim_start_matches(|x| {
+                char::is_ascii_digit(&x)
+            }).trim_end_matches(|x| {
+                char::is_ascii_digit(&x)
+            }) == "\t"
+        }).and_then(|x| {
+            let (y, z) = x.split_once("\t")?;
+            Some((y.parse::<u64>().ok()?, z.parse::<u64>().ok()?))
+        }) {
+            Some(v) => return Ok(v),
+            None => (),
+        }
+        output.stdout = match stdout {
+            Ok(e) => e.into_bytes(),
+            Err(e) => e.into_bytes(),
+        };
+        let v = vec![
+            OsString::from("git"),
+            "rev-list".into(),
+            "--left-right".into(),
+            "--count".into(),
+            format!("{}...{}", from, to).into(),
+            "--".into(),
+        ];
+        Err(GitError::new(output, v))
+    }
+
+    /// Returns the commit hash at the given target.
+    ///
+    /// # Panics
+    ///
+    /// Panics if `target` starts with `-`.
+    pub fn get_hash(&self, target: &str)
+        -> Result<String, GitError>
+    {
+        assert!(!target.starts_with("-"));
+        let mut output = self.cmd(|args| {
+            args.arg("show");
+            args.arg(target);
+            args.arg("-s");
+            args.arg("--format=format:%H");
+            args.arg("--");
+        })?;
+        // perf: Vec::default doesn't allocate.
+        let stdout = std::mem::take(&mut output.stdout);
+        let stdout = String::from_utf8(stdout);
+        output.stdout = match stdout {
+            Ok(mut h) if self.is_commit_hash(h.trim()) => {
+                h.truncate(h.trim().len());
+                return Ok(h)
+            },
+            Ok(e) => e.into_bytes(),
+            Err(e) => e.into_bytes(),
+        };
+        let v = vec![
+            OsString::from("git"),
+            "show".into(),
+            target.into(),
+            "-s".into(),
+            "--format=format:%H".into(),
+            "--".into(),
+        ];
+        Err(GitError::new(output, v))
+    }
+
+    /// Returns the commit message for the given target.
+    ///
+    /// # Panics
+    ///
+    /// Panics if `target` starts with `-`.
+    pub fn get_message(&self, target: &str)
+        -> Result<String, GitError>
+    {
+        assert!(!target.starts_with("-"));
+        let mut output = self.cmd(|args| {
+            args.arg("show");
+            args.arg(target);
+            args.arg("-s");
+            args.arg("--format=format:%B");
+            args.arg("--");
+        })?;
+        // perf: Vec::default doesn't allocate.
+        let stdout = std::mem::take(&mut output.stdout);
+        let stdout = String::from_utf8(stdout);
+        output.stdout = match stdout {
+            Ok(e) => return Ok(e),
+            Err(e) => e.into_bytes(),
+        };
+        let v = vec![
+            OsString::from("git"),
+            "show".into(),
+            target.into(),
+            "-s".into(),
+            "--format=format:%B".into(),
+            "--".into(),
+        ];
+        Err(GitError::new(output, v))
+    }
+}
+
+/// Private operations on a git repo.
+impl Git {
+    /// Fetches branch `from_branch` from work repo `from` into branch `branch`.
+    ///
+    /// The fetch used is a force-fetch.
+    ///
+    /// # Panics
+    ///
+    /// Panics if this isn't a cache repo, if `from` isn't a work repo, if
+    /// `branch` isn't a fetch head or if `from_branch` isn't a cache branch.
+    fn fetch_work(&mut self, from: &Git, branch: &str, from_branch: &str)
+        -> Result<(), GitError>
+    {
+        assert_eq!(self.sha256, from.sha256);
+        assert!(self.is_cache_repo());
+        assert!(from.is_work_repo());
+        assert!(NamePurpose::CacheBranch.is_fit(from_branch));
+        assert!(NamePurpose::CacheFetchHead.is_fit(branch));
+        let _output = self.cmd(|args| {
+            args.arg("fetch");
+            args.arg(&from.path);
+            args.arg(format!("+{}:{}", from_branch, branch));
+        })?;
+        Ok(())
+    }
+
+    /// Replaces branch `new_name` with branch `old_name`.
+    ///
+    /// # Panics
+    ///
+    /// Panics if this isn't a cache repo, if `old_name` isn't a fetch head,
+    /// or if `new_name` isn't a cache branch.
+    fn replace(&mut self, old_name: &str, new_name: &str)
+        -> Result<(), GitError>
+    {
+        assert!(self.is_cache_repo());
+        assert!(NamePurpose::CacheBranch.is_fit(new_name));
+        assert!(NamePurpose::CacheFetchHead.is_fit(old_name));
+        let _output = self.cmd(|args| {
+            args.arg("branch");
+            args.arg("-M");
+            args.arg(old_name).arg(new_name);
+        })?;
+        Ok(())
+    }
+
+    /// Deletes work branch `branch`.
+    ///
+    /// # Panics
+    ///
+    /// Panics if the branch isn't a work branch or if this isn't a work
+    /// repo.
+    fn rm_branch(&mut self, branch: &str) -> Result<(), GitError> {
+        assert!(self.is_work_repo());
+        assert!(NamePurpose::WorkBranch.is_fit(branch));
+        let _output = self.cmd(|args| {
+            args.arg("branch");
+            args.arg("-D").arg(branch);
+        })?;
+        Ok(())
+    }
+
+    /// Makes a shared clone of this lcoal repo into the given work repo.
+    ///
+    /// Equivalent to `git clone --bare --shared`, which is very dangerous!
+    ///
+    /// # Panics
+    ///
+    /// Panics if this repo isn't a cache repo, and/or if the given repo isn't
+    /// a work repo.
+    fn fork(&self, into: &mut Git) -> Result<(), GitError> {
+        // check that this is a cache repo
+        assert_eq!(self.sha256, into.sha256);
+        assert!(self.is_cache_repo());
+        assert!(into.is_work_repo());
+        let _output = into.cmd_clone_from(&self.path, |args| {
+            args.arg("--shared");
+        })?;
+        Ok(())
+    }
+
+    /// Deletes this repo.
+    ///
+    /// # Panics
+    ///
+    /// Panics if called on a non-work repo.
+    fn delete(self) -> Result<(), GitError> {
+        assert!(self.is_work_repo());
+        fs::remove_dir_all(&self.path).map_err(|e| {
+            let args = vec![
+                "(synthetic)".into(),
+                "rm".into(),
+                "-rf".into(),
+                OsString::from(&self.path)
+            ];
+            GitError::new(e, args)
+        })
+    }
+}
+
+/// Helpers.
+impl Git {
+    /// Returns true if this is a cache repo.
+    fn is_cache_repo(&self) -> bool {
+        let filename = self.path.file_name().unwrap().to_str();
+        if self.sha256 {
+            NamePurpose::CacheRepo64.is_fit(filename.unwrap())
+        } else {
+            NamePurpose::CacheRepo.is_fit(filename.unwrap())
+        }
+    }
+
+    /// Returns true if this is a work repo.
+    fn is_work_repo(&self) -> bool {
+        let filename = self.path.file_name().unwrap().to_str();
+        if self.sha256 {
+            NamePurpose::WorkRepo64.is_fit(filename.unwrap())
+        } else {
+            NamePurpose::WorkRepo.is_fit(filename.unwrap())
+        }
+    }
+
+    /// Returns true if the string is a commit hash.
+    ///
+    /// Does not check if the commit exists.
+    fn is_commit_hash(&self, commit: &str) -> bool {
+        if self.sha256 {
+            NamePurpose::Commit64.is_fit(commit)
+        } else {
+            NamePurpose::Commit.is_fit(commit)
+        }
+    }
+}
+
+/// Raw commands on a git repo.
+impl Git {
+    /// Runs a command for initializing this git repo.
+    ///
+    /// Always uses `--bare`.
+    fn cmd_init(&self, f: impl FnOnce(&mut Args)) -> Result<Output, GitError> {
+        self.cmd_common(|cmd| {
+            cmd.arg("init").arg("--bare");
+            f(&mut *cmd);
+            cmd.arg(&self.path);
+        })
+    }
+
+    /// Runs a command for cloning into this git repo.
+    ///
+    /// Always uses `--bare`.
+    fn cmd_clone_from(
+        &self,
+        from: impl AsRef<OsStr>,
+        f: impl FnOnce(&mut Args)
+    ) -> Result<Output, GitError> {
+        self.cmd_common(|cmd| {
+            cmd.arg("clone").arg("--bare");
+            f(&mut *cmd);
+            cmd.arg(from).arg(&self.path);
+        })
+    }
+
+    /// Runs a command for operating on this git repo.
+    ///
+    /// Note: Doesn't work for git init and git clone operations. Use
+    /// [`cmd_init`] and [`cmd_clone_from`] instead.
+    ///
+    /// Always uses `--bare`.
+    fn cmd(&self, f: impl FnOnce(&mut Args)) -> Result<Output, GitError> {
+        self.cmd_common(|cmd| {
+            cmd.arg("-C").arg(&self.path);
+            cmd.arg("--bare");
+            f(&mut *cmd);
+        })
+    }
+
+    /// Common handling of raw commands.
+    ///
+    /// `"git" + f()` and error handling.
+    fn cmd_common(
+        &self,
+        f: impl FnOnce(&mut Args)
+    ) -> Result<Output, GitError> {
+        let mut cmd = Args::new_cmd("git");
+        f(&mut cmd);
+        // run the command and make nicer Error
+        let Args { inner: mut cmd, args } = cmd;
+        let mut args = Some(args);
+        cmd.output().map_err(|e| {
+            GitError::new(e, args.take().unwrap())
+        }).and_then(|output| {
+            if output.status.success() {
+                Ok(output)
+            } else {
+                Err(GitError::new(output, args.take().unwrap()))
+            }
+        })
+    }
+
+}