// This file is part of GAnarchy - decentralized development hub
// Copyright (C) 2021 Soni L.
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU Affero General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Affero General Public License for more details.
//
// You should have received a copy of the GNU Affero General Public License
// along with this program. If not, see <https://www.gnu.org/licenses/>.
//! This module provides some abstractions over git.
//!
//! Having this module allows easily(-ish) replacing the git backend, for
//! example from calling the git CLI directly to using a git library.
use std::collections::BTreeSet;
use std::error;
use std::ffi::{OsStr, OsString};
use std::fmt;
use std::fs;
use std::io;
use std::path::Path;
use std::path::PathBuf;
//use std::process;
use std::process::{Command, Output};
use impl_trait::impl_trait;
use crate::util::NamePurpose;
use crate::marker::Initializer;
#[cfg(test)]
mod tests;
/// Represents a local git repo.
pub struct Git {
path: PathBuf,
pending_branches: Option<BTreeSet<String>>,
sha256: bool,
}
/// Error returned by operations on a git repo.
#[derive(Debug)]
pub struct GitError {
inner: GitErrorInner,
command: Vec<OsString>,
}
#[derive(Debug)]
enum GitErrorInner {
IoError(io::Error),
Output(Output),
}
/// Helper for tracking args to a Command.
struct Args {
inner: Command,
args: Vec<OsString>,
}
impl_trait! {
impl Args {
/// Creates a new Args for the given command.
pub fn new_cmd<S: AsRef<OsStr>>(cmd: S) -> Self {
let cmd = cmd.as_ref();
Self {
inner: Command::new(cmd),
args: vec![cmd.into()],
}
}
/// Adds a single arg to the Command.
pub fn arg<S: AsRef<OsStr>>(&mut self, arg: S) -> &mut Self {
let arg = arg.as_ref();
self.inner.arg(arg);
self.args.push(arg.into());
self
}
// /// Adds multiple args to the Command.
// pub fn args<I, S>(&mut self, args: I) -> &mut Self
// where I: IntoIterator<Item=S>, S: AsRef<OsStr> {
// for arg in args {
// self.arg(arg);
// }
// self
// }
// impl trait Into<Result<Output, GitError>> {
// fn into(self) -> Result<Output, GitError> {
// todo!()
// }
// }
}
}
/// RAII transaction guard for merging forked repos in with_work_repos.
struct Merger<'a>(&'a mut Git, Vec<Git>);
impl From<io::Error> for GitErrorInner {
fn from(e: io::Error) -> Self {
Self::IoError(e)
}
}
impl From<Output> for GitErrorInner {
fn from(e: Output) -> Self {
Self::Output(e)
}
}
impl_trait! {
impl GitError {
/// Creates a new GitError for the given command.
fn new(inner: impl Into<GitErrorInner>, cmd: Vec<OsString>) -> Self {
Self {
inner: inner.into(),
command: cmd,
}
}
impl trait fmt::Display {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, "Error running")?;
for part in &self.command {
let part = part.to_str().unwrap_or("[not UTF-8]");
write!(f, " {}", part)?;
}
match &self.inner {
GitErrorInner::IoError(e) => {
write!(f, ", caused by: {}", e)
},
GitErrorInner::Output(e) => {
let out = std::str::from_utf8(&e.stdout);
let out = out.unwrap_or("[not UTF-8]");
let err = std::str::from_utf8(&e.stderr);
let err = err.unwrap_or("[not UTF-8]");
write!(f, "\nstdout:\n{}", out)?;
write!(f, "\nstderr:\n{}", err)
},
}
}
}
impl trait error::Error {
fn source(&self) -> Option<&(dyn error::Error + 'static)> {
match self.inner {
GitErrorInner::IoError(ref error) => {
Some(error)
}
GitErrorInner::Output(_) => None,
}
}
}
}
}
impl_trait! {
impl<'a> Merger<'a> {
/// Returns a shared, immutable reference to the main repo.
fn main(&self) -> &Git {
&*self.0
}
/// Merges the work repos back into the main repo.
///
/// # Panics
///
/// Panics if there are branches in conflict.
fn merge(mut self) -> Result<(), GitError> {
// check for conflicts first!
let mut branches = BTreeSet::<&String>::new();
for work in &*self {
for branch in work.pending_branches.as_ref().unwrap() {
if !branches.insert(branch) {
panic!("Branch {} is in conflict!", branch);
}
}
}
drop(branches);
for mut repo in std::mem::take(&mut self.1) {
// TODO clean up
let repo_id = repo.path.file_name().unwrap().to_str().unwrap()
.strip_prefix("ganarchy-fetch-").unwrap()
.strip_suffix(".git").unwrap()
.to_owned();
let pending = repo.pending_branches.take().unwrap();
for branch in pending {
let len = branch.len();
let fetch_head = branch + "-" + &repo_id;
let branch = &fetch_head[..len];
// First collect the work branch into a fetch head
self.0.fetch_work(&repo, &fetch_head, branch)?;
// If that succeeds, delete the work branch to free up disk
repo.rm_branch(branch)?;
// We have all the objects in the main repo and we probably
// have enough disk, so just replace the fetch head into
// the main branch and hope nothing errors.
self.0.replace(&fetch_head, branch)?;
}
repo.delete()?;
}
Ok(())
}
/// Accesses the work repos.
impl trait std::ops::Deref {
type Target = Vec<Git>;
fn deref(&self) -> &Vec<Git> {
&self.1
}
}
/// Accesses the work repos.
impl trait std::ops::DerefMut {
fn deref_mut(&mut self) -> &mut Vec<Git> {
&mut self.1
}
}
/// Cleans up (deletes) the work repos, if not panicking.
impl trait Drop {
fn drop(&mut self) {
if !std::thread::panicking() {
for repo in std::mem::take(&mut self.1) {
repo.delete().unwrap();
}
}
}
}
}
}
/// Initializer operations on the `Git` struct.
impl Git {
/// Creates a new instance of the `Git` struct, with the path as given.
pub fn at_path<T: AsRef<Path>>(_: Initializer, path: T) -> Option<Git> {
let path = path.as_ref();
let filename = path.file_name()?.to_str()?;
// TODO SHA-2
NamePurpose::CacheRepo.is_fit(filename).then(|| Git {
path: path.into(),
pending_branches: None,
sha256: false,
})
}
}
/// Operations on a git repo.
///
/// # Race conditions
///
/// These operate on the filesystem. Calling them from multiple threads
/// can result in data corruption.
impl Git {
/// Creates the given number of work repos, and calls the closure to run
/// operations on them.
///
/// The operations can be done on the individual repos, and they'll be
/// merged into the main repo as this function returns.
///
/// If the callback fails, the work repos will be deleted. If the function
/// succeeds, the work repos will be merged back into the main repo.
///
/// # Panics
///
/// Panics if a merge conflict is detected. Specifically, if two work repos
/// modify the same work branch. Also panics if this isn't a cache repo.
///
/// # "Poisoning"
///
/// If this method unwinds, the underlying git repos, if any, will not be
/// deleted. Instead, future calls to this method will return a GitError.
pub fn with_work_repos<F, R>(&mut self, count: usize, f: F)
-> Result<R, GitError>
where F: FnOnce(&mut [Git]) -> Result<R, GitError> {
assert!(self.is_cache_repo());
// create some Git structs
let mut work_repos = Vec::with_capacity(count);
for id in 0..count {
let mut new_path = self.path.clone();
new_path.set_file_name(format!("ganarchy-fetch-{}.git", id));
let git = Git {
path: new_path,
pending_branches: Some(Default::default()),
sha256: self.sha256,
};
assert!(git.is_work_repo());
work_repos.push(git);
}
// create the on-disk stuff
let merger = Merger(self, Vec::new());
let mut merger = work_repos.into_iter()
.try_fold(merger, |mut m, mut r| {
m.main().fork(&mut r)?;
m.push(r);
Ok(m)
})?;
let result = f(&mut *merger)?;
// merge the on-disk stuff
merger.merge().and(Ok(result))
}
/// Fetches branch `from_ref` from source `from` into branch `branch`.
///
/// The fetch used is a force-fetch.
///
/// # Panics
///
/// Panics if called on a non-work repo, if `from` starts with `-`, if
/// `branch` isn't a cache branch, or if `from_ref` starts with `-`.
pub fn fetch_source(&mut self, from: &str, branch: &str, from_ref: &str)
-> Result<(), GitError>
{
assert!(self.is_work_repo());
assert!(!from.starts_with("-"));
assert!(!from_ref.starts_with("-"));
assert!(NamePurpose::WorkBranch.is_fit(branch));
let _output = self.cmd(|args| {
args.arg("fetch");
args.arg(from);
args.arg(format!("+{}:{}", from_ref, branch));
})?;
self.pending_branches.as_mut().unwrap().insert(branch.into());
Ok(())
}
/// Initializes this repo.
///
/// # Panics
///
/// Panics if called on a non-cache repo.
pub fn ensure_exists(&mut self) -> Result<(), GitError> {
assert!(self.is_cache_repo());
let _output = self.cmd_init(|_| {})?;
Ok(())
}
/// Checks if a given commit is present in the given branch's history.
///
/// # Panics
///
/// Panics if this isn't a cache branch on a cache repo or if commit isn't
/// a commit.
pub fn check_history(&self, branch: &str, commit: &str)
-> Result<(), GitError>
{
assert!(self.is_cache_repo());
assert!(NamePurpose::CacheBranch.is_fit(branch));
assert!(self.is_commit_hash(commit));
let _output = self.cmd(|args| {
args.arg("merge-base");
args.arg("--is-ancestor");
args.arg(commit);
args.arg(format!("refs/heads/{}", branch));
})?;
Ok(())
}
/// Checks if the given branch is a valid branch.
///
/// Note: "HEAD" is **not** a branch.
///
/// # Panics
///
/// Panics if `branch` starts with `-`.
pub fn check_branch(&self, branch: &str) -> Result<(), GitError> {
assert!(!branch.starts_with("-"));
let mut output = self.cmd(|args| {
args.arg("check-ref-format");
args.arg("--branch");
args.arg(branch);
})?;
// perf: Vec::default doesn't allocate.
let stdout = std::mem::take(&mut output.stdout);
let stdout = String::from_utf8(stdout);
match stdout.as_ref().map(|x| x.strip_prefix(branch)) {
Ok(Some("")) | Ok(Some("\n")) | Ok(Some("\r\n")) => {
return Ok(())
},
_ => (),
}
output.stdout = match stdout {
Ok(e) => e.into_bytes(),
Err(e) => e.into_bytes(),
};
let v = vec![
OsString::from("git"),
"check-ref-format".into(),
"--branch".into(),
branch.into(),
];
Err(GitError::new(output, v))
}
/// Returns the number of commits removed and the number of added between
/// from and to, respectively.
///
/// # Panics
///
/// Panics if called on a non-work repo.
pub fn get_counts(&self, from: &str, to: &str)
-> Result<(u64, u64), GitError>
{
// if called on a cache repo, `from` may no longer exist.
// this check makes sure `from` has not been garbage-collected.
assert!(self.is_work_repo());
assert!(self.is_commit_hash(from));
assert!(self.is_commit_hash(to));
let mut output = self.cmd(|args| {
args.arg("rev-list");
args.arg("--left-right");
args.arg("--count");
args.arg(format!("{}...{}", from, to));
args.arg("--");
})?;
// perf: Vec::default doesn't allocate.
let stdout = std::mem::take(&mut output.stdout);
let stdout = String::from_utf8(stdout);
match stdout.as_ref().ok().map(|x| x.trim()).filter(|x| {
x.trim_start_matches(|x| {
char::is_ascii_digit(&x)
}).trim_end_matches(|x| {
char::is_ascii_digit(&x)
}) == "\t"
}).and_then(|x| {
let (y, z) = x.split_once("\t")?;
Some((y.parse::<u64>().ok()?, z.parse::<u64>().ok()?))
}) {
Some(v) => return Ok(v),
None => (),
}
output.stdout = match stdout {
Ok(e) => e.into_bytes(),
Err(e) => e.into_bytes(),
};
let v = vec![
OsString::from("git"),
"rev-list".into(),
"--left-right".into(),
"--count".into(),
format!("{}...{}", from, to).into(),
"--".into(),
];
Err(GitError::new(output, v))
}
/// Returns the commit hash at the given target.
///
/// # Panics
///
/// Panics if `target` starts with `-`.
pub fn get_hash(&self, target: &str)
-> Result<String, GitError>
{
assert!(!target.starts_with("-"));
let mut output = self.cmd(|args| {
args.arg("show");
args.arg(target);
args.arg("-s");
args.arg("--format=format:%H");
args.arg("--");
})?;
// perf: Vec::default doesn't allocate.
let stdout = std::mem::take(&mut output.stdout);
let stdout = String::from_utf8(stdout);
output.stdout = match stdout {
Ok(mut h) if self.is_commit_hash(h.trim()) => {
h.truncate(h.trim().len());
return Ok(h)
},
Ok(e) => e.into_bytes(),
Err(e) => e.into_bytes(),
};
let v = vec![
OsString::from("git"),
"show".into(),
target.into(),
"-s".into(),
"--format=format:%H".into(),
"--".into(),
];
Err(GitError::new(output, v))
}
/// Returns the commit message for the given target.
///
/// # Panics
///
/// Panics if `target` starts with `-`.
pub fn get_message(&self, target: &str)
-> Result<String, GitError>
{
assert!(!target.starts_with("-"));
let mut output = self.cmd(|args| {
args.arg("show");
args.arg(target);
args.arg("-s");
args.arg("--format=format:%B");
args.arg("--");
})?;
// perf: Vec::default doesn't allocate.
let stdout = std::mem::take(&mut output.stdout);
let stdout = String::from_utf8(stdout);
output.stdout = match stdout {
Ok(e) => return Ok(e),
Err(e) => e.into_bytes(),
};
let v = vec![
OsString::from("git"),
"show".into(),
target.into(),
"-s".into(),
"--format=format:%B".into(),
"--".into(),
];
Err(GitError::new(output, v))
}
}
/// Private operations on a git repo.
impl Git {
/// Fetches branch `from_branch` from work repo `from` into branch `branch`.
///
/// The fetch used is a force-fetch.
///
/// # Panics
///
/// Panics if this isn't a cache repo, if `from` isn't a work repo, if
/// `branch` isn't a fetch head or if `from_branch` isn't a cache branch.
fn fetch_work(&mut self, from: &Git, branch: &str, from_branch: &str)
-> Result<(), GitError>
{
assert_eq!(self.sha256, from.sha256);
assert!(self.is_cache_repo());
assert!(from.is_work_repo());
assert!(NamePurpose::CacheBranch.is_fit(from_branch));
assert!(NamePurpose::CacheFetchHead.is_fit(branch));
let _output = self.cmd(|args| {
args.arg("fetch");
args.arg(&from.path);
args.arg(format!("+{}:{}", from_branch, branch));
})?;
Ok(())
}
/// Replaces branch `new_name` with branch `old_name`.
///
/// # Panics
///
/// Panics if this isn't a cache repo, if `old_name` isn't a fetch head,
/// or if `new_name` isn't a cache branch.
fn replace(&mut self, old_name: &str, new_name: &str)
-> Result<(), GitError>
{
assert!(self.is_cache_repo());
assert!(NamePurpose::CacheBranch.is_fit(new_name));
assert!(NamePurpose::CacheFetchHead.is_fit(old_name));
let _output = self.cmd(|args| {
args.arg("branch");
args.arg("-M");
args.arg(old_name).arg(new_name);
})?;
Ok(())
}
/// Deletes work branch `branch`.
///
/// # Panics
///
/// Panics if the branch isn't a work branch or if this isn't a work
/// repo.
fn rm_branch(&mut self, branch: &str) -> Result<(), GitError> {
assert!(self.is_work_repo());
assert!(NamePurpose::WorkBranch.is_fit(branch));
let _output = self.cmd(|args| {
args.arg("branch");
args.arg("-D").arg(branch);
})?;
Ok(())
}
/// Makes a shared clone of this lcoal repo into the given work repo.
///
/// Equivalent to `git clone --bare --shared`, which is very dangerous!
///
/// # Panics
///
/// Panics if this repo isn't a cache repo, and/or if the given repo isn't
/// a work repo.
fn fork(&self, into: &mut Git) -> Result<(), GitError> {
// check that this is a cache repo
assert_eq!(self.sha256, into.sha256);
assert!(self.is_cache_repo());
assert!(into.is_work_repo());
let _output = into.cmd_clone_from(&self.path, |args| {
args.arg("--shared");
})?;
Ok(())
}
/// Deletes this repo.
///
/// # Panics
///
/// Panics if called on a non-work repo.
fn delete(self) -> Result<(), GitError> {
assert!(self.is_work_repo());
fs::remove_dir_all(&self.path).map_err(|e| {
let args = vec![
"(synthetic)".into(),
"rm".into(),
"-rf".into(),
OsString::from(&self.path)
];
GitError::new(e, args)
})
}
}
/// Helpers.
impl Git {
/// Returns true if this is a cache repo.
fn is_cache_repo(&self) -> bool {
let filename = self.path.file_name().unwrap().to_str();
if self.sha256 {
NamePurpose::CacheRepo64.is_fit(filename.unwrap())
} else {
NamePurpose::CacheRepo.is_fit(filename.unwrap())
}
}
/// Returns true if this is a work repo.
fn is_work_repo(&self) -> bool {
let filename = self.path.file_name().unwrap().to_str();
if self.sha256 {
NamePurpose::WorkRepo64.is_fit(filename.unwrap())
} else {
NamePurpose::WorkRepo.is_fit(filename.unwrap())
}
}
/// Returns true if the string is a commit hash.
///
/// Does not check if the commit exists.
fn is_commit_hash(&self, commit: &str) -> bool {
if self.sha256 {
NamePurpose::Commit64.is_fit(commit)
} else {
NamePurpose::Commit.is_fit(commit)
}
}
}
/// Raw commands on a git repo.
impl Git {
/// Runs a command for initializing this git repo.
///
/// Always uses `--bare`.
fn cmd_init(&self, f: impl FnOnce(&mut Args)) -> Result<Output, GitError> {
self.cmd_common(|cmd| {
cmd.arg("init").arg("--bare");
f(&mut *cmd);
cmd.arg(&self.path);
})
}
/// Runs a command for cloning into this git repo.
///
/// Always uses `--bare`.
fn cmd_clone_from(
&self,
from: impl AsRef<OsStr>,
f: impl FnOnce(&mut Args)
) -> Result<Output, GitError> {
self.cmd_common(|cmd| {
cmd.arg("clone").arg("--bare");
f(&mut *cmd);
cmd.arg(from).arg(&self.path);
})
}
/// Runs a command for operating on this git repo.
///
/// Note: Doesn't work for git init and git clone operations. Use
/// [`cmd_init`] and [`cmd_clone_from`] instead.
///
/// Always uses `--bare`.
fn cmd(&self, f: impl FnOnce(&mut Args)) -> Result<Output, GitError> {
self.cmd_common(|cmd| {
cmd.arg("-C").arg(&self.path);
cmd.arg("--bare");
f(&mut *cmd);
})
}
/// Common handling of raw commands.
///
/// `"git" + f()` and error handling.
fn cmd_common(
&self,
f: impl FnOnce(&mut Args)
) -> Result<Output, GitError> {
let mut cmd = Args::new_cmd("git");
f(&mut cmd);
// run the command and make nicer Error
let Args { inner: mut cmd, args } = cmd;
let mut args = Some(args);
cmd.output().map_err(|e| {
GitError::new(e, args.take().unwrap())
}).and_then(|output| {
if output.status.success() {
Ok(output)
} else {
Err(GitError::new(output, args.take().unwrap()))
}
})
}
}