// This file is part of GAnarchy - decentralized development hub // Copyright (C) 2021 Soni L. // // This program is free software: you can redistribute it and/or modify // it under the terms of the GNU Affero General Public License as published by // the Free Software Foundation, either version 3 of the License, or // (at your option) any later version. // // This program is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // GNU Affero General Public License for more details. // // You should have received a copy of the GNU Affero General Public License // along with this program. If not, see . //! This module provides some abstractions over git. //! //! Having this module allows easily(-ish) replacing the git backend, for //! example from calling the git CLI directly to using a git library. use std::collections::BTreeSet; use std::error; use std::ffi::{OsStr, OsString}; use std::fmt; use std::fs; use std::io; use std::path::Path; use std::path::PathBuf; //use std::process; use std::process::{Command, Output}; use impl_trait::impl_trait; use crate::util::NamePurpose; use crate::marker::Initializer; #[cfg(test)] mod tests; mod sealed { pub trait Sealed { fn is_repo_path_valid(filename: &str, sha256: bool) -> bool; } } /// A repository kind. pub trait RepoKind: sealed::Sealed { } /// A permanent repository used to cache remote objects. pub struct CacheRepo { _non_exhaustive: (), } impl sealed::Sealed for CacheRepo { fn is_repo_path_valid(filename: &str, sha256: bool) -> bool { if sha256 { NamePurpose::CacheRepo64.is_fit(filename) } else { NamePurpose::CacheRepo.is_fit(filename) } } } impl RepoKind for CacheRepo { } /// A temporary repository used to fetch remote objects. pub struct FetchRepo { pending_branches: BTreeSet, } impl sealed::Sealed for FetchRepo { fn is_repo_path_valid(filename: &str, sha256: bool) -> bool { if sha256 { NamePurpose::WorkRepo64.is_fit(filename) } else { NamePurpose::WorkRepo.is_fit(filename) } } } impl RepoKind for FetchRepo { } /// A local git repository. pub struct Git { path: PathBuf, sha256: bool, inner: T, } /// Error returned by operations on a git repo. #[derive(Debug)] pub struct GitError { inner: GitErrorInner, command: Vec, } #[derive(Debug)] enum GitErrorInner { IoError(io::Error), Output(Output), } /// Helper for tracking args to a Command. struct Args { inner: Command, args: Vec, } impl_trait! { impl Args { /// Creates a new Args for the given command. pub fn new_cmd>(cmd: S) -> Self { let cmd = cmd.as_ref(); Self { inner: Command::new(cmd), args: vec![cmd.into()], } } /// Adds a single arg to the Command. pub fn arg>(&mut self, arg: S) -> &mut Self { let arg = arg.as_ref(); self.inner.arg(arg); self.args.push(arg.into()); self } // /// Adds multiple args to the Command. // pub fn args(&mut self, args: I) -> &mut Self // where I: IntoIterator, S: AsRef { // for arg in args { // self.arg(arg); // } // self // } // impl trait Into> { // fn into(self) -> Result { // todo!() // } // } } } /// RAII transaction guard for merging forked repos in with_work_repos. struct Merger<'a>(&'a mut Git, Vec>); impl From for GitErrorInner { fn from(e: io::Error) -> Self { Self::IoError(e) } } impl From for GitErrorInner { fn from(e: Output) -> Self { Self::Output(e) } } impl_trait! { impl GitError { /// Creates a new GitError for the given command. fn new(inner: impl Into, cmd: Vec) -> Self { Self { inner: inner.into(), command: cmd, } } impl trait fmt::Display { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { write!(f, "Error running")?; for part in &self.command { let part = part.to_str().unwrap_or("[not UTF-8]"); write!(f, " {}", part)?; } match &self.inner { GitErrorInner::IoError(e) => { write!(f, ", caused by: {}", e) }, GitErrorInner::Output(e) => { let out = std::str::from_utf8(&e.stdout); let out = out.unwrap_or("[not UTF-8]"); let err = std::str::from_utf8(&e.stderr); let err = err.unwrap_or("[not UTF-8]"); write!(f, "\nstdout:\n{}", out)?; write!(f, "\nstderr:\n{}", err) }, } } } impl trait error::Error { fn source(&self) -> Option<&(dyn error::Error + 'static)> { match self.inner { GitErrorInner::IoError(ref error) => { Some(error) } GitErrorInner::Output(_) => None, } } } } } impl_trait! { impl<'a> Merger<'a> { /// Returns a shared, immutable reference to the main repo. fn main(&self) -> &Git { &*self.0 } /// Merges the work repos back into the main repo. /// /// # Panics /// /// Panics if there are branches in conflict. fn merge(mut self) -> Result<(), GitError> { // check for conflicts first! let mut branches = BTreeSet::<&String>::new(); for work in &*self { for branch in &work.inner.pending_branches { if !branches.insert(branch) { panic!("Branch {} is in conflict!", branch); } } } drop(branches); for mut repo in std::mem::take(&mut self.1) { // TODO clean up let repo_id = repo.path.file_name().unwrap().to_str().unwrap() .strip_prefix("ganarchy-fetch-").unwrap() .strip_suffix(".git").unwrap() .to_owned(); for branch in std::mem::take(&mut repo.inner.pending_branches) { let len = branch.len(); let fetch_head = branch + "-" + &repo_id; let branch = &fetch_head[..len]; // First collect the work branch into a fetch head self.0.fetch_work(&repo, &fetch_head, branch)?; // If that succeeds, delete the work branch to free up disk repo.rm_branch(branch)?; // We have all the objects in the main repo and we probably // have enough disk, so just replace the fetch head into // the main branch and hope nothing errors. self.0.replace(&fetch_head, branch)?; } repo.delete()?; } Ok(()) } /// Accesses the work repos. impl trait std::ops::Deref { type Target = Vec>; fn deref(&self) -> &Vec> { &self.1 } } /// Accesses the work repos. impl trait std::ops::DerefMut { fn deref_mut(&mut self) -> &mut Vec> { &mut self.1 } } /// Cleans up (deletes) the work repos, if not panicking. impl trait Drop { fn drop(&mut self) { if !std::thread::panicking() { for repo in std::mem::take(&mut self.1) { repo.delete().unwrap(); } } } } } } /// Initializer operations on the `Git` struct. impl Git { /// Creates a new instance of the `Git` struct, with the path as given. pub fn at_path>(_: Initializer, path: T) -> Option> { let path = path.as_ref(); // using `?` for side-effects. let _ = path.file_name()?.to_str()?; // TODO SHA-2 Some(Git { path: path.into(), sha256: false, inner: CacheRepo { _non_exhaustive: (), }, }).filter(Self::is_path_valid) } } /// Operations on a cache repo. impl Git { /// Creates the given number of work repos, and calls the closure to run /// operations on them. /// /// The operations can be done on the individual repos, and they'll be /// merged into the main repo as this function returns. /// /// If the callback fails, the work repos will be deleted. If the function /// succeeds, the work repos will be merged back into the main repo. /// /// # Panics /// /// Panics if a merge conflict is detected. Specifically, if two work repos /// modify the same work branch. /// /// # "Poisoning" /// /// If this method unwinds, the underlying git repos, if any, will not be /// deleted. Instead, future calls to this method will return a GitError. pub fn with_work_repos(&mut self, count: usize, f: F) -> Result where F: FnOnce(&mut [Git]) -> Result { // create some Git structs let mut work_repos = Vec::with_capacity(count); for id in 0..count { let mut new_path = self.path.clone(); new_path.set_file_name(format!("ganarchy-fetch-{}.git", id)); let git = Git { path: new_path, inner: FetchRepo { pending_branches: Default::default(), }, sha256: self.sha256, }; assert!(git.is_path_valid()); work_repos.push(git); } // create the on-disk stuff let merger = Merger(self, Vec::new()); let mut merger = work_repos.into_iter() .try_fold(merger, |mut m, mut r| { m.main().fork(&mut r)?; m.push(r); Ok(m) })?; let result = f(&mut *merger)?; // merge the on-disk stuff merger.merge().and(Ok(result)) } /// Initializes this repo. pub fn ensure_exists(&mut self) -> Result<(), GitError> { let _output = self.cmd_init(|_| {})?; Ok(()) } /// Checks if a given commit is present in the given branch's history. /// /// # Panics /// /// Panics if this isn't a cache branch or if commit isn't /// a commit. pub fn check_history(&self, branch: &str, commit: &str) -> Result<(), GitError> { assert!(NamePurpose::CacheBranch.is_fit(branch)); assert!(self.is_commit_hash(commit)); let _output = self.cmd(|args| { args.arg("merge-base"); args.arg("--is-ancestor"); args.arg(commit); args.arg(format!("refs/heads/{}", branch)); })?; Ok(()) } } /// Operations on a fetch repo. impl Git { /// Fetches branch `from_ref` from source `from` into branch `branch`. /// /// The fetch used is a force-fetch. /// /// # Panics /// /// Panics if `from` starts with `-`, if /// `branch` isn't a cache branch, or if `from_ref` starts with `-`. pub fn fetch_source(&mut self, from: &str, branch: &str, from_ref: &str) -> Result<(), GitError> { assert!(!from.starts_with("-")); assert!(!from_ref.starts_with("-")); assert!(NamePurpose::WorkBranch.is_fit(branch)); let _output = self.cmd(|args| { args.arg("fetch"); args.arg(from); args.arg(format!("+{}:{}", from_ref, branch)); })?; self.inner.pending_branches.insert(branch.into()); Ok(()) } /// Returns the number of commits removed and the number of added between /// from and to, respectively. pub fn get_counts(&self, from: &str, to: &str) -> Result<(u64, u64), GitError> { // if called on a cache repo, `from` may no longer exist. the FetchRepo // requirement makes sure `from` has not been garbage-collected. assert!(self.is_commit_hash(from)); assert!(self.is_commit_hash(to)); let mut output = self.cmd(|args| { args.arg("rev-list"); args.arg("--left-right"); args.arg("--count"); args.arg(format!("{}...{}", from, to)); args.arg("--"); })?; // perf: Vec::default doesn't allocate. let stdout = std::mem::take(&mut output.stdout); let stdout = String::from_utf8(stdout); match stdout.as_ref().ok().map(|x| x.trim()).filter(|x| { x.trim_start_matches(|x| { char::is_ascii_digit(&x) }).trim_end_matches(|x| { char::is_ascii_digit(&x) }) == "\t" }).and_then(|x| { let (y, z) = x.split_once("\t")?; Some((y.parse::().ok()?, z.parse::().ok()?)) }) { Some(v) => return Ok(v), None => (), } output.stdout = match stdout { Ok(e) => e.into_bytes(), Err(e) => e.into_bytes(), }; let v = vec![ OsString::from("git"), "rev-list".into(), "--left-right".into(), "--count".into(), format!("{}...{}", from, to).into(), "--".into(), ]; Err(GitError::new(output, v)) } } /// Generic Git operations. impl Git { /// Checks if the given branch is a valid branch. /// /// Note: "HEAD" is **not** a branch. /// /// # Panics /// /// Panics if `branch` starts with `-`. pub fn check_branch(&self, branch: &str) -> Result<(), GitError> { assert!(!branch.starts_with("-")); let mut output = self.cmd(|args| { args.arg("check-ref-format"); args.arg("--branch"); args.arg(branch); })?; // perf: Vec::default doesn't allocate. let stdout = std::mem::take(&mut output.stdout); let stdout = String::from_utf8(stdout); match stdout.as_ref().map(|x| x.strip_prefix(branch)) { Ok(Some("")) | Ok(Some("\n")) | Ok(Some("\r\n")) => { return Ok(()) }, _ => (), } output.stdout = match stdout { Ok(e) => e.into_bytes(), Err(e) => e.into_bytes(), }; let v = vec![ OsString::from("git"), "check-ref-format".into(), "--branch".into(), branch.into(), ]; Err(GitError::new(output, v)) } /// Returns the commit hash at the given target. /// /// # Panics /// /// Panics if `target` starts with `-`. pub fn get_hash(&self, target: &str) -> Result { assert!(!target.starts_with("-")); let mut output = self.cmd(|args| { args.arg("show"); args.arg(target); args.arg("-s"); args.arg("--format=format:%H"); args.arg("--"); })?; // perf: Vec::default doesn't allocate. let stdout = std::mem::take(&mut output.stdout); let stdout = String::from_utf8(stdout); output.stdout = match stdout { Ok(mut h) if self.is_commit_hash(h.trim()) => { h.truncate(h.trim().len()); return Ok(h) }, Ok(e) => e.into_bytes(), Err(e) => e.into_bytes(), }; let v = vec![ OsString::from("git"), "show".into(), target.into(), "-s".into(), "--format=format:%H".into(), "--".into(), ]; Err(GitError::new(output, v)) } /// Returns the commit message for the given target. /// /// # Panics /// /// Panics if `target` starts with `-`. pub fn get_message(&self, target: &str) -> Result { assert!(!target.starts_with("-")); let mut output = self.cmd(|args| { args.arg("show"); args.arg(target); args.arg("-s"); args.arg("--format=format:%B"); args.arg("--"); })?; // perf: Vec::default doesn't allocate. let stdout = std::mem::take(&mut output.stdout); let stdout = String::from_utf8(stdout); output.stdout = match stdout { Ok(e) => return Ok(e), Err(e) => e.into_bytes(), }; let v = vec![ OsString::from("git"), "show".into(), target.into(), "-s".into(), "--format=format:%B".into(), "--".into(), ]; Err(GitError::new(output, v)) } } /// Private operations on a git cache repo. impl Git { /// Fetches branch `from_branch` from work repo `from` into branch `branch`. /// /// The fetch used is a force-fetch. /// /// # Panics /// /// Panics if /// `branch` isn't a fetch head or if `from_branch` isn't a cache branch. fn fetch_work(&mut self, from: &Git, branch: &str, from_branch: &str) -> Result<(), GitError> { assert_eq!(self.sha256, from.sha256); assert!(NamePurpose::CacheBranch.is_fit(from_branch)); assert!(NamePurpose::CacheFetchHead.is_fit(branch)); let _output = self.cmd(|args| { args.arg("fetch"); args.arg(&from.path); args.arg(format!("+{}:{}", from_branch, branch)); })?; Ok(()) } /// Replaces branch `new_name` with branch `old_name`. /// /// # Panics /// /// Panics if `old_name` isn't a fetch head, /// or if `new_name` isn't a cache branch. fn replace(&mut self, old_name: &str, new_name: &str) -> Result<(), GitError> { assert!(NamePurpose::CacheBranch.is_fit(new_name)); assert!(NamePurpose::CacheFetchHead.is_fit(old_name)); let _output = self.cmd(|args| { args.arg("branch"); args.arg("-M"); args.arg(old_name).arg(new_name); })?; Ok(()) } /// Makes a shared clone of this local repo into the given work repo. /// /// Equivalent to `git clone --bare --shared`, which is very dangerous! fn fork(&self, into: &mut Git) -> Result<(), GitError> { // check that this is a cache repo assert_eq!(self.sha256, into.sha256); let _output = into.cmd_clone_from(&self.path, |args| { args.arg("--shared"); })?; Ok(()) } } /// Private operations on a git fetch repo. impl Git { /// Deletes work branch `branch`. /// /// # Panics /// /// Panics if the branch isn't a work branch. fn rm_branch(&mut self, branch: &str) -> Result<(), GitError> { assert!(NamePurpose::WorkBranch.is_fit(branch)); let _output = self.cmd(|args| { args.arg("branch"); args.arg("-D").arg(branch); })?; Ok(()) } /// Deletes this repo. fn delete(self) -> Result<(), GitError> { fs::remove_dir_all(&self.path).map_err(|e| { let args = vec![ "(synthetic)".into(), "rm".into(), "-rf".into(), OsString::from(&self.path) ]; GitError::new(e, args) }) } } /// Helpers. impl Git { /// Returns true if this repo's path is valid. fn is_path_valid(&self) -> bool { let filename = self.path.file_name().unwrap().to_str(); T::is_repo_path_valid(filename.unwrap(), self.sha256) } /// Returns true if the string is a commit hash. /// /// Does not check if the commit exists. fn is_commit_hash(&self, commit: &str) -> bool { if self.sha256 { NamePurpose::Commit64.is_fit(commit) } else { NamePurpose::Commit.is_fit(commit) } } } /// Raw commands on a git repo. impl Git { /// Runs a command for initializing this git repo. /// /// Always uses `--bare`. fn cmd_init(&self, f: impl FnOnce(&mut Args)) -> Result { self.cmd_common(|cmd| { cmd.arg("init").arg("--bare"); f(&mut *cmd); cmd.arg(&self.path); }) } /// Runs a command for cloning into this git repo. /// /// Always uses `--bare`. fn cmd_clone_from( &self, from: impl AsRef, f: impl FnOnce(&mut Args) ) -> Result { self.cmd_common(|cmd| { cmd.arg("clone").arg("--bare"); f(&mut *cmd); cmd.arg(from).arg(&self.path); }) } /// Runs a command for operating on this git repo. /// /// Note: Doesn't work for git init and git clone operations. Use /// [`cmd_init`] and [`cmd_clone_from`] instead. /// /// Always uses `--bare`. fn cmd(&self, f: impl FnOnce(&mut Args)) -> Result { self.cmd_common(|cmd| { cmd.arg("-C").arg(&self.path); cmd.arg("--bare"); f(&mut *cmd); }) } /// Common handling of raw commands. /// /// `"git" + f()` and error handling. fn cmd_common( &self, f: impl FnOnce(&mut Args) ) -> Result { let mut cmd = Args::new_cmd("git"); f(&mut cmd); // run the command and make nicer Error let Args { inner: mut cmd, args } = cmd; let mut args = Some(args); cmd.output().map_err(|e| { GitError::new(e, args.take().unwrap()) }).and_then(|output| { if output.status.success() { Ok(output) } else { Err(GitError::new(output, args.take().unwrap())) } }) } }