summary refs log blame commit diff stats
path: root/src/git.rs
blob: c892306cdfdffdbd3f200ac47a601a780a59c0e4 (plain) (tree)







































                                                                              















































                                                                    
                  
                 
             

























































                                                                       
                                                               





























































                                                                             
                                           











                                                          
                                                            












                                                                              
                                                                                


















                                                                               
                                              
 
                                                     





                                       
                                                                 

















                                                                 
                     
                                                                           
                                                                                       
                                 

                                            
                     
                  
                              
                          



                                      


     

                               











                                                                               
                                    






                                                                             
                                                                   






                                                                         


                                                         

                                    
                                         














                                               
                              
                                                             







                                                                          
                                                              



                                                           









                                                         
 
 


                                                                          
       
                                        


                











                                                                            
            

                                                          



                                                                             


                                                  

                                                                               






































                                                                  






































                                                                      









































































                                                           

                                           





                                                                                
                 
                                                                             
                                                                                    


                                             













                                                              
                                                



                                                         









                                                              














                                                                           



                                     
                                                 
                                                                   







                                                        
                          
                                             












                                                    


                                                  
                                                               
                                                             














                                                    
                          































































                                                                               
// This file is part of GAnarchy - decentralized development hub
// Copyright (C) 2021  Soni L.
// 
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU Affero General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
// 
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
// GNU Affero General Public License for more details.
// 
// You should have received a copy of the GNU Affero General Public License
// along with this program.  If not, see <https://www.gnu.org/licenses/>.

//! This module provides some abstractions over git.
//!
//! Having this module allows easily(-ish) replacing the git backend, for
//! example from calling the git CLI directly to using a git library.

use std::collections::BTreeSet;
use std::error;
use std::ffi::{OsStr, OsString};
use std::fmt;
use std::fs;
use std::io;
use std::path::Path;
use std::path::PathBuf;
//use std::process;
use std::process::{Command, Output};

use impl_trait::impl_trait;

use crate::util::NamePurpose;
use crate::marker::Initializer;

#[cfg(test)]
mod tests;

mod sealed { 
    pub trait Sealed {
        fn is_repo_path_valid(filename: &str, sha256: bool) -> bool;
    }
}

/// A repository kind.
pub trait RepoKind: sealed::Sealed {
}

/// A permanent repository used to cache remote objects.
pub struct CacheRepo {
    _non_exhaustive: (),
}

impl sealed::Sealed for CacheRepo {
    fn is_repo_path_valid(filename: &str, sha256: bool) -> bool {
        if sha256 {
            NamePurpose::CacheRepo64.is_fit(filename)
        } else {
            NamePurpose::CacheRepo.is_fit(filename)
        }
    }
}

impl RepoKind for CacheRepo {
}

/// A temporary repository used to fetch remote objects.
pub struct FetchRepo {
    pending_branches: BTreeSet<String>,
}

impl sealed::Sealed for FetchRepo {
    fn is_repo_path_valid(filename: &str, sha256: bool) -> bool {
        if sha256 {
            NamePurpose::WorkRepo64.is_fit(filename)
        } else {
            NamePurpose::WorkRepo.is_fit(filename)
        }
    }
}

impl RepoKind for FetchRepo {
}

/// A local git repository.
pub struct Git<T: RepoKind> {
    path: PathBuf,
    sha256: bool,
    inner: T,
}

/// Error returned by operations on a git repo.
#[derive(Debug)]
pub struct GitError {
    inner: GitErrorInner,
    command: Vec<OsString>,
}

#[derive(Debug)]
enum GitErrorInner {
    IoError(io::Error),
    Output(Output),
}

/// Helper for tracking args to a Command.
struct Args {
    inner: Command,
    args: Vec<OsString>,
}

impl_trait! {
    impl Args {
        /// Creates a new Args for the given command.
        pub fn new_cmd<S: AsRef<OsStr>>(cmd: S) -> Self {
            let cmd = cmd.as_ref();
            Self {
                inner: Command::new(cmd),
                args: vec![cmd.into()],
            }
        }

        /// Adds a single arg to the Command.
        pub fn arg<S: AsRef<OsStr>>(&mut self, arg: S) -> &mut Self {
            let arg = arg.as_ref();
            self.inner.arg(arg);
            self.args.push(arg.into());
            self
        }

        // /// Adds multiple args to the Command.
        // pub fn args<I, S>(&mut self, args: I) -> &mut Self
        // where I: IntoIterator<Item=S>, S: AsRef<OsStr> {
        //     for arg in args {
        //         self.arg(arg);
        //     }
        //     self
        // }

        // impl trait Into<Result<Output, GitError>> {
        //     fn into(self) -> Result<Output, GitError> {
        //         todo!()
        //     }
        // }
    }
}

/// RAII transaction guard for merging forked repos in with_work_repos.
struct Merger<'a>(&'a mut Git<CacheRepo>, Vec<Git<FetchRepo>>);

impl From<io::Error> for GitErrorInner {
    fn from(e: io::Error) -> Self {
        Self::IoError(e)
    }
}

impl From<Output> for GitErrorInner {
    fn from(e: Output) -> Self {
        Self::Output(e)
    }
}

impl_trait! {
    impl GitError {
        /// Creates a new GitError for the given command.
        fn new(inner: impl Into<GitErrorInner>, cmd: Vec<OsString>) -> Self {
            Self {
                inner: inner.into(),
                command: cmd,
            }
        }

        impl trait fmt::Display {
            fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
                write!(f, "Error running")?;
                for part in &self.command {
                    let part = part.to_str().unwrap_or("[not UTF-8]");
                    write!(f, " {}", part)?;
                }
                match &self.inner {
                    GitErrorInner::IoError(e) => {
                        write!(f, ", caused by: {}", e)
                    },
                    GitErrorInner::Output(e) => {
                        let out = std::str::from_utf8(&e.stdout);
                        let out = out.unwrap_or("[not UTF-8]");
                        let err = std::str::from_utf8(&e.stderr);
                        let err = err.unwrap_or("[not UTF-8]");
                        write!(f, "\nstdout:\n{}", out)?;
                        write!(f, "\nstderr:\n{}", err)
                    },
                }
            }
        }

        impl trait error::Error {
            fn source(&self) -> Option<&(dyn error::Error + 'static)> {
                match self.inner {
                    GitErrorInner::IoError(ref error) => {
                        Some(error)
                    }
                    GitErrorInner::Output(_) => None,
                }
            }
        }
    }
}

impl_trait! {
    impl<'a> Merger<'a> {
        /// Returns a shared, immutable reference to the main repo.
        fn main(&self) -> &Git<CacheRepo> {
            &*self.0
        }

        /// Merges the work repos back into the main repo.
        /// 
        /// # Panics
        ///
        /// Panics if there are branches in conflict.
        fn merge(mut self) -> Result<(), GitError> {
            // check for conflicts first!
            let mut branches = BTreeSet::<&String>::new();
            for work in &*self {
                for branch in &work.inner.pending_branches {
                    if !branches.insert(branch) {
                        panic!("Branch {} is in conflict!", branch);
                    }
                }
            }
            drop(branches);

            for mut repo in std::mem::take(&mut self.1) {
                // TODO clean up
                let repo_id = repo.path.file_name().unwrap().to_str().unwrap()
                    .strip_prefix("ganarchy-fetch-").unwrap()
                    .strip_suffix(".git").unwrap()
                    .to_owned();
                for branch in std::mem::take(&mut repo.inner.pending_branches) {
                    let len = branch.len();
                    let fetch_head = branch + "-" + &repo_id;
                    let branch = &fetch_head[..len];
                    // First collect the work branch into a fetch head
                    self.0.fetch_work(&repo, &fetch_head, branch)?;
                    // If that succeeds, delete the work branch to free up disk
                    repo.rm_branch(branch)?;
                    // We have all the objects in the main repo and we probably
                    // have enough disk, so just replace the fetch head into
                    // the main branch and hope nothing errors.
                    self.0.replace(&fetch_head, branch)?;
                }
                repo.delete()?;
            }
            Ok(())
        }

        /// Accesses the work repos.
        impl trait std::ops::Deref {
            type Target = Vec<Git<FetchRepo>>;

            fn deref(&self) -> &Vec<Git<FetchRepo>> {
                &self.1
            }
        }

        /// Accesses the work repos.
        impl trait std::ops::DerefMut {
            fn deref_mut(&mut self) -> &mut Vec<Git<FetchRepo>> {
                &mut self.1
            }
        }

        /// Cleans up (deletes) the work repos, if not panicking.
        impl trait Drop {
            fn drop(&mut self) {
                if !std::thread::panicking() {
                    for repo in std::mem::take(&mut self.1) {
                        repo.delete().unwrap();
                    }
                }
            }
        }
    }
}

/// Initializer operations on the `Git` struct.
impl Git<CacheRepo> {
    /// Creates a new instance of the `Git` struct, with the path as given.
    pub fn at_path<T: AsRef<Path>>(_: Initializer, path: T) -> Option<Git<CacheRepo>> {
        let path = path.as_ref();
        // using `?` for side-effects.
        let _ = path.file_name()?.to_str()?;
        // TODO SHA-2
        Some(Git {
            path: path.into(),
            sha256: false,
            inner: CacheRepo {
                _non_exhaustive: (),
            },
        }).filter(Self::is_path_valid)
    }
}

/// Operations on a cache repo.
impl Git<CacheRepo> {
    /// Creates the given number of work repos, and calls the closure to run
    /// operations on them.
    ///
    /// The operations can be done on the individual repos, and they'll be
    /// merged into the main repo as this function returns.
    ///
    /// If the callback fails, the work repos will be deleted. If the function
    /// succeeds, the work repos will be merged back into the main repo.
    ///
    /// # Panics
    ///
    /// Panics if a merge conflict is detected. Specifically, if two work repos
    /// modify the same work branch.
    ///
    /// # "Poisoning"
    ///
    /// If this method unwinds, the underlying git repos, if any, will not be
    /// deleted. Instead, future calls to this method will return a GitError.
    pub fn with_work_repos<F, R>(&mut self, count: usize, f: F)
        -> Result<R, GitError>
    where F: FnOnce(&mut [Git<FetchRepo>]) -> Result<R, GitError> {
        // create some Git structs
        let mut work_repos = Vec::with_capacity(count);
        for id in 0..count {
            let mut new_path = self.path.clone();
            new_path.set_file_name(format!("ganarchy-fetch-{}.git", id));
            let git = Git {
                path: new_path,
                inner: FetchRepo {
                    pending_branches: Default::default(),
                },
                sha256: self.sha256,
            };
            assert!(git.is_path_valid());
            work_repos.push(git);
        }
        // create the on-disk stuff
        let merger = Merger(self, Vec::new());
        let mut merger = work_repos.into_iter()
            .try_fold(merger, |mut m, mut r| {
                m.main().fork(&mut r)?;
                m.push(r);
                Ok(m)
            })?;
        let result = f(&mut *merger)?;
        // merge the on-disk stuff
        merger.merge().and(Ok(result))
    }

    /// Initializes this repo.
    pub fn ensure_exists(&mut self) -> Result<(), GitError> {
        let _output = self.cmd_init(|_| {})?;
        Ok(())
    }

    /// Checks if a given commit is present in the given branch's history.
    ///
    /// # Panics
    ///
    /// Panics if this isn't a cache branch or if commit isn't
    /// a commit.
    pub fn check_history(&self, branch: &str, commit: &str)
        -> Result<(), GitError>
    {
        assert!(NamePurpose::CacheBranch.is_fit(branch));
        assert!(self.is_commit_hash(commit));
        let _output = self.cmd(|args| {
            args.arg("merge-base");
            args.arg("--is-ancestor");
            args.arg(commit);
            args.arg(format!("refs/heads/{}", branch));
        })?;
        Ok(())
    }
}

/// Operations on a fetch repo.
impl Git<FetchRepo> {
    /// Fetches branch `from_ref` from source `from` into branch `branch`.
    ///
    /// The fetch used is a force-fetch.
    ///
    /// # Panics
    ///
    /// Panics if `from` starts with `-`, if
    /// `branch` isn't a cache branch, or if `from_ref` starts with `-`.
    pub fn fetch_source(&mut self, from: &str, branch: &str, from_ref: &str)
        -> Result<(), GitError>
    {
        assert!(!from.starts_with("-"));
        assert!(!from_ref.starts_with("-"));
        assert!(NamePurpose::WorkBranch.is_fit(branch));
        let _output = self.cmd(|args| {
            args.arg("fetch");
            args.arg(from);
            args.arg(format!("+{}:{}", from_ref, branch));
        })?;
        self.inner.pending_branches.insert(branch.into());
        Ok(())
    }

    /// Returns the number of commits removed and the number of added between
    /// from and to, respectively.
    pub fn get_counts(&self, from: &str, to: &str)
        -> Result<(u64, u64), GitError>
    {
        // if called on a cache repo, `from` may no longer exist. the FetchRepo
        // requirement makes sure `from` has not been garbage-collected.
        assert!(self.is_commit_hash(from));
        assert!(self.is_commit_hash(to));
        let mut output = self.cmd(|args| {
            args.arg("rev-list");
            args.arg("--left-right");
            args.arg("--count");
            args.arg(format!("{}...{}", from, to));
            args.arg("--");
        })?;
        // perf: Vec::default doesn't allocate.
        let stdout = std::mem::take(&mut output.stdout);
        let stdout = String::from_utf8(stdout);
        match stdout.as_ref().ok().map(|x| x.trim()).filter(|x| {
            x.trim_start_matches(|x| {
                char::is_ascii_digit(&x)
            }).trim_end_matches(|x| {
                char::is_ascii_digit(&x)
            }) == "\t"
        }).and_then(|x| {
            let (y, z) = x.split_once("\t")?;
            Some((y.parse::<u64>().ok()?, z.parse::<u64>().ok()?))
        }) {
            Some(v) => return Ok(v),
            None => (),
        }
        output.stdout = match stdout {
            Ok(e) => e.into_bytes(),
            Err(e) => e.into_bytes(),
        };
        let v = vec![
            OsString::from("git"),
            "rev-list".into(),
            "--left-right".into(),
            "--count".into(),
            format!("{}...{}", from, to).into(),
            "--".into(),
        ];
        Err(GitError::new(output, v))
    }
}

/// Generic Git operations.
impl<T: RepoKind> Git<T> {
    /// Checks if the given branch is a valid branch.
    ///
    /// Note: "HEAD" is **not** a branch.
    ///
    /// # Panics
    ///
    /// Panics if `branch` starts with `-`.
    pub fn check_branch(&self, branch: &str) -> Result<(), GitError> {
        assert!(!branch.starts_with("-"));
        let mut output = self.cmd(|args| {
            args.arg("check-ref-format");
            args.arg("--branch");
            args.arg(branch);
        })?;
        // perf: Vec::default doesn't allocate.
        let stdout = std::mem::take(&mut output.stdout);
        let stdout = String::from_utf8(stdout);
        match stdout.as_ref().map(|x| x.strip_prefix(branch)) {
            Ok(Some("")) | Ok(Some("\n")) | Ok(Some("\r\n")) => {
                return Ok(())
            },
            _ => (),
        }
        output.stdout = match stdout {
            Ok(e) => e.into_bytes(),
            Err(e) => e.into_bytes(),
        };
        let v = vec![
            OsString::from("git"),
            "check-ref-format".into(),
            "--branch".into(),
            branch.into(),
        ];
        Err(GitError::new(output, v))
    }

    /// Returns the commit hash at the given target.
    ///
    /// # Panics
    ///
    /// Panics if `target` starts with `-`.
    pub fn get_hash(&self, target: &str)
        -> Result<String, GitError>
    {
        assert!(!target.starts_with("-"));
        let mut output = self.cmd(|args| {
            args.arg("show");
            args.arg(target);
            args.arg("-s");
            args.arg("--format=format:%H");
            args.arg("--");
        })?;
        // perf: Vec::default doesn't allocate.
        let stdout = std::mem::take(&mut output.stdout);
        let stdout = String::from_utf8(stdout);
        output.stdout = match stdout {
            Ok(mut h) if self.is_commit_hash(h.trim()) => {
                h.truncate(h.trim().len());
                return Ok(h)
            },
            Ok(e) => e.into_bytes(),
            Err(e) => e.into_bytes(),
        };
        let v = vec![
            OsString::from("git"),
            "show".into(),
            target.into(),
            "-s".into(),
            "--format=format:%H".into(),
            "--".into(),
        ];
        Err(GitError::new(output, v))
    }

    /// Returns the commit message for the given target.
    ///
    /// # Panics
    ///
    /// Panics if `target` starts with `-`.
    pub fn get_message(&self, target: &str)
        -> Result<String, GitError>
    {
        assert!(!target.starts_with("-"));
        let mut output = self.cmd(|args| {
            args.arg("show");
            args.arg(target);
            args.arg("-s");
            args.arg("--format=format:%B");
            args.arg("--");
        })?;
        // perf: Vec::default doesn't allocate.
        let stdout = std::mem::take(&mut output.stdout);
        let stdout = String::from_utf8(stdout);
        output.stdout = match stdout {
            Ok(e) => return Ok(e),
            Err(e) => e.into_bytes(),
        };
        let v = vec![
            OsString::from("git"),
            "show".into(),
            target.into(),
            "-s".into(),
            "--format=format:%B".into(),
            "--".into(),
        ];
        Err(GitError::new(output, v))
    }
}

/// Private operations on a git cache repo.
impl Git<CacheRepo> {
    /// Fetches branch `from_branch` from work repo `from` into branch `branch`.
    ///
    /// The fetch used is a force-fetch.
    ///
    /// # Panics
    ///
    /// Panics if
    /// `branch` isn't a fetch head or if `from_branch` isn't a cache branch.
    fn fetch_work(&mut self, from: &Git<FetchRepo>, branch: &str, from_branch: &str)
        -> Result<(), GitError>
    {
        assert_eq!(self.sha256, from.sha256);
        assert!(NamePurpose::CacheBranch.is_fit(from_branch));
        assert!(NamePurpose::CacheFetchHead.is_fit(branch));
        let _output = self.cmd(|args| {
            args.arg("fetch");
            args.arg(&from.path);
            args.arg(format!("+{}:{}", from_branch, branch));
        })?;
        Ok(())
    }

    /// Replaces branch `new_name` with branch `old_name`.
    ///
    /// # Panics
    ///
    /// Panics if `old_name` isn't a fetch head,
    /// or if `new_name` isn't a cache branch.
    fn replace(&mut self, old_name: &str, new_name: &str)
        -> Result<(), GitError>
    {
        assert!(NamePurpose::CacheBranch.is_fit(new_name));
        assert!(NamePurpose::CacheFetchHead.is_fit(old_name));
        let _output = self.cmd(|args| {
            args.arg("branch");
            args.arg("-M");
            args.arg(old_name).arg(new_name);
        })?;
        Ok(())
    }

    /// Makes a shared clone of this local repo into the given work repo.
    ///
    /// Equivalent to `git clone --bare --shared`, which is very dangerous!
    fn fork(&self, into: &mut Git<FetchRepo>) -> Result<(), GitError> {
        // check that this is a cache repo
        assert_eq!(self.sha256, into.sha256);
        let _output = into.cmd_clone_from(&self.path, |args| {
            args.arg("--shared");
        })?;
        Ok(())
    }
}

/// Private operations on a git fetch repo.
impl Git<FetchRepo> {
    /// Deletes work branch `branch`.
    ///
    /// # Panics
    ///
    /// Panics if the branch isn't a work branch.
    fn rm_branch(&mut self, branch: &str) -> Result<(), GitError> {
        assert!(NamePurpose::WorkBranch.is_fit(branch));
        let _output = self.cmd(|args| {
            args.arg("branch");
            args.arg("-D").arg(branch);
        })?;
        Ok(())
    }

    /// Deletes this repo.
    fn delete(self) -> Result<(), GitError> {
        fs::remove_dir_all(&self.path).map_err(|e| {
            let args = vec![
                "(synthetic)".into(),
                "rm".into(),
                "-rf".into(),
                OsString::from(&self.path)
            ];
            GitError::new(e, args)
        })
    }
}

/// Helpers.
impl<T: RepoKind> Git<T> {
    /// Returns true if this repo's path is valid.
    fn is_path_valid(&self) -> bool {
        let filename = self.path.file_name().unwrap().to_str();
        T::is_repo_path_valid(filename.unwrap(), self.sha256)
    }

    /// Returns true if the string is a commit hash.
    ///
    /// Does not check if the commit exists.
    fn is_commit_hash(&self, commit: &str) -> bool {
        if self.sha256 {
            NamePurpose::Commit64.is_fit(commit)
        } else {
            NamePurpose::Commit.is_fit(commit)
        }
    }
}

/// Raw commands on a git repo.
impl<T: RepoKind> Git<T> {
    /// Runs a command for initializing this git repo.
    ///
    /// Always uses `--bare`.
    fn cmd_init(&self, f: impl FnOnce(&mut Args)) -> Result<Output, GitError> {
        self.cmd_common(|cmd| {
            cmd.arg("init").arg("--bare");
            f(&mut *cmd);
            cmd.arg(&self.path);
        })
    }

    /// Runs a command for cloning into this git repo.
    ///
    /// Always uses `--bare`.
    fn cmd_clone_from(
        &self,
        from: impl AsRef<OsStr>,
        f: impl FnOnce(&mut Args)
    ) -> Result<Output, GitError> {
        self.cmd_common(|cmd| {
            cmd.arg("clone").arg("--bare");
            f(&mut *cmd);
            cmd.arg(from).arg(&self.path);
        })
    }

    /// Runs a command for operating on this git repo.
    ///
    /// Note: Doesn't work for git init and git clone operations. Use
    /// [`cmd_init`] and [`cmd_clone_from`] instead.
    ///
    /// Always uses `--bare`.
    fn cmd(&self, f: impl FnOnce(&mut Args)) -> Result<Output, GitError> {
        self.cmd_common(|cmd| {
            cmd.arg("-C").arg(&self.path);
            cmd.arg("--bare");
            f(&mut *cmd);
        })
    }

    /// Common handling of raw commands.
    ///
    /// `"git" + f()` and error handling.
    fn cmd_common(
        &self,
        f: impl FnOnce(&mut Args)
    ) -> Result<Output, GitError> {
        let mut cmd = Args::new_cmd("git");
        f(&mut cmd);
        // run the command and make nicer Error
        let Args { inner: mut cmd, args } = cmd;
        let mut args = Some(args);
        cmd.output().map_err(|e| {
            GitError::new(e, args.take().unwrap())
        }).and_then(|output| {
            if output.status.success() {
                Ok(output)
            } else {
                Err(GitError::new(output, args.take().unwrap()))
            }
        })
    }

}