From a67b0812659d1481f4c5be77ce2cf448b0b37b8c Mon Sep 17 00:00:00 2001 From: SoniEx2 Date: Mon, 14 Jun 2021 22:49:07 -0300 Subject: Start rewriting in Rust --- src/git.rs | 728 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 728 insertions(+) create mode 100644 src/git.rs (limited to 'src/git.rs') diff --git a/src/git.rs b/src/git.rs new file mode 100644 index 0000000..df9614a --- /dev/null +++ b/src/git.rs @@ -0,0 +1,728 @@ +// This file is part of GAnarchy - decentralized development hub +// Copyright (C) 2021 Soni L. +// +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU Affero General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Affero General Public License for more details. +// +// You should have received a copy of the GNU Affero General Public License +// along with this program. If not, see . + +//! This module provides some abstractions over git. +//! +//! Having this module allows easily(-ish) replacing the git backend, for +//! example from calling the git CLI directly to using a git library. + +use std::collections::BTreeSet; +use std::error; +use std::ffi::{OsStr, OsString}; +use std::fmt; +use std::fs; +use std::io; +use std::path::Path; +use std::path::PathBuf; +//use std::process; +use std::process::{Command, Output}; + +use impl_trait::impl_trait; + +use crate::util::NamePurpose; +use crate::marker::Initializer; + +#[cfg(test)] +mod tests; + +/// Represents a local git repo. +pub struct Git { + path: PathBuf, + pending_branches: Option>, + sha256: bool, +} + +/// Error returned by operations on a git repo. +#[derive(Debug)] +pub struct GitError { + inner: GitErrorInner, + command: Vec, +} + +#[derive(Debug)] +enum GitErrorInner { + IoError(io::Error), + Output(Output), +} + +/// Helper for tracking args to a Command. +struct Args { + inner: Command, + args: Vec, +} + +impl_trait! { + impl Args { + /// Creates a new Args for the given command. + pub fn new_cmd>(cmd: S) -> Self { + let cmd = cmd.as_ref(); + Self { + inner: Command::new(cmd), + args: vec![cmd.into()], + } + } + + /// Adds a single arg to the Command. + pub fn arg>(&mut self, arg: S) -> &mut Self { + let arg = arg.as_ref(); + self.inner.arg(arg); + self.args.push(arg.into()); + self + } + + // /// Adds multiple args to the Command. + // pub fn args(&mut self, args: I) -> &mut Self + // where I: IntoIterator, S: AsRef { + // for arg in args { + // self.arg(arg); + // } + // self + // } + + // impl trait Into> { + // fn into(self) -> Result { + // todo!() + // } + // } + } +} + +/// RAII transaction guard for merging forked repos in with_work_repos. +struct Merger<'a>(&'a mut Git, Vec); + +impl From for GitErrorInner { + fn from(e: io::Error) -> Self { + Self::IoError(e) + } +} + +impl From for GitErrorInner { + fn from(e: Output) -> Self { + Self::Output(e) + } +} + +impl_trait! { + impl GitError { + /// Creates a new GitError for the given command. + fn new(inner: impl Into, cmd: Vec) -> Self { + Self { + inner: inner.into(), + command: cmd, + } + } + + impl trait fmt::Display { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "Error running")?; + for part in &self.command { + let part = part.to_str().unwrap_or("[not UTF-8]"); + write!(f, " {}", part)?; + } + match &self.inner { + GitErrorInner::IoError(e) => { + write!(f, ", caused by: {}", e) + }, + GitErrorInner::Output(e) => { + let out = std::str::from_utf8(&e.stdout); + let out = out.unwrap_or("[not UTF-8]"); + let err = std::str::from_utf8(&e.stderr); + let err = err.unwrap_or("[not UTF-8]"); + write!(f, "\nstdout:\n{}", out)?; + write!(f, "\nstderr:\n{}", err) + }, + } + } + } + + impl trait error::Error { + fn source(&self) -> Option<&(dyn error::Error + 'static)> { + match self.inner { + GitErrorInner::IoError(ref error) => { + Some(error) + } + GitErrorInner::Output(_) => None, + } + } + } + } +} + +impl_trait! { + impl<'a> Merger<'a> { + /// Returns a shared, immutable reference to the main repo. + fn main(&self) -> &Git { + &*self.0 + } + + /// Merges the work repos back into the main repo. + /// + /// # Panics + /// + /// Panics if there are branches in conflict. + fn merge(mut self) -> Result<(), GitError> { + // check for conflicts first! + let mut branches = BTreeSet::<&String>::new(); + for work in &*self { + for branch in work.pending_branches.as_ref().unwrap() { + if !branches.insert(branch) { + panic!("Branch {} is in conflict!", branch); + } + } + } + drop(branches); + + for mut repo in std::mem::take(&mut self.1) { + // TODO clean up + let repo_id = repo.path.file_name().unwrap().to_str().unwrap() + .strip_prefix("ganarchy-fetch-").unwrap() + .strip_suffix(".git").unwrap() + .to_owned(); + let pending = repo.pending_branches.take().unwrap(); + for branch in pending { + let len = branch.len(); + let fetch_head = branch + "-" + &repo_id; + let branch = &fetch_head[..len]; + // First collect the work branch into a fetch head + self.0.fetch_work(&repo, &fetch_head, branch)?; + // If that succeeds, delete the work branch to free up disk + repo.rm_branch(branch)?; + // We have all the objects in the main repo and we probably + // have enough disk, so just replace the fetch head into + // the main branch and hope nothing errors. + self.0.replace(&fetch_head, branch)?; + } + repo.delete()?; + } + Ok(()) + } + + /// Accesses the work repos. + impl trait std::ops::Deref { + type Target = Vec; + + fn deref(&self) -> &Vec { + &self.1 + } + } + + /// Accesses the work repos. + impl trait std::ops::DerefMut { + fn deref_mut(&mut self) -> &mut Vec { + &mut self.1 + } + } + + /// Cleans up (deletes) the work repos, if not panicking. + impl trait Drop { + fn drop(&mut self) { + if !std::thread::panicking() { + for repo in std::mem::take(&mut self.1) { + repo.delete().unwrap(); + } + } + } + } + } +} + +/// Initializer operations on the `Git` struct. +impl Git { + /// Creates a new instance of the `Git` struct, with the path as given. + pub fn at_path>(_: Initializer, path: T) -> Option { + let path = path.as_ref(); + let filename = path.file_name()?.to_str()?; + // TODO SHA-2 + NamePurpose::CacheRepo.is_fit(filename).then(|| Git { + path: path.into(), + pending_branches: None, + sha256: false, + }) + } +} + +/// Operations on a git repo. +/// +/// # Race conditions +/// +/// These operate on the filesystem. Calling them from multiple threads +/// can result in data corruption. +impl Git { + /// Creates the given number of work repos, and calls the closure to run + /// operations on them. + /// + /// The operations can be done on the individual repos, and they'll be + /// merged into the main repo as this function returns. + /// + /// If the callback fails, the work repos will be deleted. If the function + /// succeeds, the work repos will be merged back into the main repo. + /// + /// # Panics + /// + /// Panics if a merge conflict is detected. Specifically, if two work repos + /// modify the same work branch. Also panics if this isn't a cache repo. + /// + /// # "Poisoning" + /// + /// If this method unwinds, the underlying git repos, if any, will not be + /// deleted. Instead, future calls to this method will return a GitError. + pub fn with_work_repos(&mut self, count: usize, f: F) + -> Result + where F: FnOnce(&mut [Git]) -> Result { + assert!(self.is_cache_repo()); + // create some Git structs + let mut work_repos = Vec::with_capacity(count); + for id in 0..count { + let mut new_path = self.path.clone(); + new_path.set_file_name(format!("ganarchy-fetch-{}.git", id)); + let git = Git { + path: new_path, + pending_branches: Some(Default::default()), + sha256: self.sha256, + }; + assert!(git.is_work_repo()); + work_repos.push(git); + } + // create the on-disk stuff + let merger = Merger(self, Vec::new()); + let mut merger = work_repos.into_iter() + .try_fold(merger, |mut m, mut r| { + m.main().fork(&mut r)?; + m.push(r); + Ok(m) + })?; + let result = f(&mut *merger)?; + // merge the on-disk stuff + merger.merge().and(Ok(result)) + } + + /// Fetches branch `from_ref` from source `from` into branch `branch`. + /// + /// The fetch used is a force-fetch. + /// + /// # Panics + /// + /// Panics if called on a non-work repo, if `from` starts with `-`, if + /// `branch` isn't a cache branch, or if `from_ref` starts with `-`. + pub fn fetch_source(&mut self, from: &str, branch: &str, from_ref: &str) + -> Result<(), GitError> + { + assert!(self.is_work_repo()); + assert!(!from.starts_with("-")); + assert!(!from_ref.starts_with("-")); + assert!(NamePurpose::WorkBranch.is_fit(branch)); + let _output = self.cmd(|args| { + args.arg("fetch"); + args.arg(from); + args.arg(format!("+{}:{}", from_ref, branch)); + })?; + self.pending_branches.as_mut().unwrap().insert(branch.into()); + Ok(()) + } + + /// Initializes this repo. + /// + /// # Panics + /// + /// Panics if called on a non-cache repo. + pub fn ensure_exists(&mut self) -> Result<(), GitError> { + assert!(self.is_cache_repo()); + let _output = self.cmd_init(|_| {})?; + Ok(()) + } + + /// Checks if a given commit is present in the given branch's history. + /// + /// # Panics + /// + /// Panics if this isn't a cache branch on a cache repo or if commit isn't + /// a commit. + pub fn check_history(&self, branch: &str, commit: &str) + -> Result<(), GitError> + { + assert!(self.is_cache_repo()); + assert!(NamePurpose::CacheBranch.is_fit(branch)); + assert!(self.is_commit_hash(commit)); + let _output = self.cmd(|args| { + args.arg("merge-base"); + args.arg("--is-ancestor"); + args.arg(commit); + args.arg(format!("refs/heads/{}", branch)); + })?; + Ok(()) + } + + /// Checks if the given branch is a valid branch. + /// + /// Note: "HEAD" is **not** a branch. + /// + /// # Panics + /// + /// Panics if `branch` starts with `-`. + pub fn check_branch(&self, branch: &str) -> Result<(), GitError> { + assert!(!branch.starts_with("-")); + let mut output = self.cmd(|args| { + args.arg("check-ref-format"); + args.arg("--branch"); + args.arg(branch); + })?; + // perf: Vec::default doesn't allocate. + let stdout = std::mem::take(&mut output.stdout); + let stdout = String::from_utf8(stdout); + match stdout.as_ref().map(|x| x.strip_prefix(branch)) { + Ok(Some("")) | Ok(Some("\n")) | Ok(Some("\r\n")) => { + return Ok(()) + }, + _ => (), + } + output.stdout = match stdout { + Ok(e) => e.into_bytes(), + Err(e) => e.into_bytes(), + }; + let v = vec![ + OsString::from("git"), + "check-ref-format".into(), + "--branch".into(), + branch.into(), + ]; + Err(GitError::new(output, v)) + } + + /// Returns the number of commits removed and the number of added between + /// from and to, respectively. + /// + /// # Panics + /// + /// Panics if called on a non-work repo. + pub fn get_counts(&self, from: &str, to: &str) + -> Result<(u64, u64), GitError> + { + // if called on a cache repo, `from` may no longer exist. + // this check makes sure `from` has not been garbage-collected. + assert!(self.is_work_repo()); + assert!(self.is_commit_hash(from)); + assert!(self.is_commit_hash(to)); + let mut output = self.cmd(|args| { + args.arg("rev-list"); + args.arg("--left-right"); + args.arg("--count"); + args.arg(format!("{}...{}", from, to)); + args.arg("--"); + })?; + // perf: Vec::default doesn't allocate. + let stdout = std::mem::take(&mut output.stdout); + let stdout = String::from_utf8(stdout); + match stdout.as_ref().ok().map(|x| x.trim()).filter(|x| { + x.trim_start_matches(|x| { + char::is_ascii_digit(&x) + }).trim_end_matches(|x| { + char::is_ascii_digit(&x) + }) == "\t" + }).and_then(|x| { + let (y, z) = x.split_once("\t")?; + Some((y.parse::().ok()?, z.parse::().ok()?)) + }) { + Some(v) => return Ok(v), + None => (), + } + output.stdout = match stdout { + Ok(e) => e.into_bytes(), + Err(e) => e.into_bytes(), + }; + let v = vec![ + OsString::from("git"), + "rev-list".into(), + "--left-right".into(), + "--count".into(), + format!("{}...{}", from, to).into(), + "--".into(), + ]; + Err(GitError::new(output, v)) + } + + /// Returns the commit hash at the given target. + /// + /// # Panics + /// + /// Panics if `target` starts with `-`. + pub fn get_hash(&self, target: &str) + -> Result + { + assert!(!target.starts_with("-")); + let mut output = self.cmd(|args| { + args.arg("show"); + args.arg(target); + args.arg("-s"); + args.arg("--format=format:%H"); + args.arg("--"); + })?; + // perf: Vec::default doesn't allocate. + let stdout = std::mem::take(&mut output.stdout); + let stdout = String::from_utf8(stdout); + output.stdout = match stdout { + Ok(mut h) if self.is_commit_hash(h.trim()) => { + h.truncate(h.trim().len()); + return Ok(h) + }, + Ok(e) => e.into_bytes(), + Err(e) => e.into_bytes(), + }; + let v = vec![ + OsString::from("git"), + "show".into(), + target.into(), + "-s".into(), + "--format=format:%H".into(), + "--".into(), + ]; + Err(GitError::new(output, v)) + } + + /// Returns the commit message for the given target. + /// + /// # Panics + /// + /// Panics if `target` starts with `-`. + pub fn get_message(&self, target: &str) + -> Result + { + assert!(!target.starts_with("-")); + let mut output = self.cmd(|args| { + args.arg("show"); + args.arg(target); + args.arg("-s"); + args.arg("--format=format:%B"); + args.arg("--"); + })?; + // perf: Vec::default doesn't allocate. + let stdout = std::mem::take(&mut output.stdout); + let stdout = String::from_utf8(stdout); + output.stdout = match stdout { + Ok(e) => return Ok(e), + Err(e) => e.into_bytes(), + }; + let v = vec![ + OsString::from("git"), + "show".into(), + target.into(), + "-s".into(), + "--format=format:%B".into(), + "--".into(), + ]; + Err(GitError::new(output, v)) + } +} + +/// Private operations on a git repo. +impl Git { + /// Fetches branch `from_branch` from work repo `from` into branch `branch`. + /// + /// The fetch used is a force-fetch. + /// + /// # Panics + /// + /// Panics if this isn't a cache repo, if `from` isn't a work repo, if + /// `branch` isn't a fetch head or if `from_branch` isn't a cache branch. + fn fetch_work(&mut self, from: &Git, branch: &str, from_branch: &str) + -> Result<(), GitError> + { + assert_eq!(self.sha256, from.sha256); + assert!(self.is_cache_repo()); + assert!(from.is_work_repo()); + assert!(NamePurpose::CacheBranch.is_fit(from_branch)); + assert!(NamePurpose::CacheFetchHead.is_fit(branch)); + let _output = self.cmd(|args| { + args.arg("fetch"); + args.arg(&from.path); + args.arg(format!("+{}:{}", from_branch, branch)); + })?; + Ok(()) + } + + /// Replaces branch `new_name` with branch `old_name`. + /// + /// # Panics + /// + /// Panics if this isn't a cache repo, if `old_name` isn't a fetch head, + /// or if `new_name` isn't a cache branch. + fn replace(&mut self, old_name: &str, new_name: &str) + -> Result<(), GitError> + { + assert!(self.is_cache_repo()); + assert!(NamePurpose::CacheBranch.is_fit(new_name)); + assert!(NamePurpose::CacheFetchHead.is_fit(old_name)); + let _output = self.cmd(|args| { + args.arg("branch"); + args.arg("-M"); + args.arg(old_name).arg(new_name); + })?; + Ok(()) + } + + /// Deletes work branch `branch`. + /// + /// # Panics + /// + /// Panics if the branch isn't a work branch or if this isn't a work + /// repo. + fn rm_branch(&mut self, branch: &str) -> Result<(), GitError> { + assert!(self.is_work_repo()); + assert!(NamePurpose::WorkBranch.is_fit(branch)); + let _output = self.cmd(|args| { + args.arg("branch"); + args.arg("-D").arg(branch); + })?; + Ok(()) + } + + /// Makes a shared clone of this lcoal repo into the given work repo. + /// + /// Equivalent to `git clone --bare --shared`, which is very dangerous! + /// + /// # Panics + /// + /// Panics if this repo isn't a cache repo, and/or if the given repo isn't + /// a work repo. + fn fork(&self, into: &mut Git) -> Result<(), GitError> { + // check that this is a cache repo + assert_eq!(self.sha256, into.sha256); + assert!(self.is_cache_repo()); + assert!(into.is_work_repo()); + let _output = into.cmd_clone_from(&self.path, |args| { + args.arg("--shared"); + })?; + Ok(()) + } + + /// Deletes this repo. + /// + /// # Panics + /// + /// Panics if called on a non-work repo. + fn delete(self) -> Result<(), GitError> { + assert!(self.is_work_repo()); + fs::remove_dir_all(&self.path).map_err(|e| { + let args = vec![ + "(synthetic)".into(), + "rm".into(), + "-rf".into(), + OsString::from(&self.path) + ]; + GitError::new(e, args) + }) + } +} + +/// Helpers. +impl Git { + /// Returns true if this is a cache repo. + fn is_cache_repo(&self) -> bool { + let filename = self.path.file_name().unwrap().to_str(); + if self.sha256 { + NamePurpose::CacheRepo64.is_fit(filename.unwrap()) + } else { + NamePurpose::CacheRepo.is_fit(filename.unwrap()) + } + } + + /// Returns true if this is a work repo. + fn is_work_repo(&self) -> bool { + let filename = self.path.file_name().unwrap().to_str(); + if self.sha256 { + NamePurpose::WorkRepo64.is_fit(filename.unwrap()) + } else { + NamePurpose::WorkRepo.is_fit(filename.unwrap()) + } + } + + /// Returns true if the string is a commit hash. + /// + /// Does not check if the commit exists. + fn is_commit_hash(&self, commit: &str) -> bool { + if self.sha256 { + NamePurpose::Commit64.is_fit(commit) + } else { + NamePurpose::Commit.is_fit(commit) + } + } +} + +/// Raw commands on a git repo. +impl Git { + /// Runs a command for initializing this git repo. + /// + /// Always uses `--bare`. + fn cmd_init(&self, f: impl FnOnce(&mut Args)) -> Result { + self.cmd_common(|cmd| { + cmd.arg("init").arg("--bare"); + f(&mut *cmd); + cmd.arg(&self.path); + }) + } + + /// Runs a command for cloning into this git repo. + /// + /// Always uses `--bare`. + fn cmd_clone_from( + &self, + from: impl AsRef, + f: impl FnOnce(&mut Args) + ) -> Result { + self.cmd_common(|cmd| { + cmd.arg("clone").arg("--bare"); + f(&mut *cmd); + cmd.arg(from).arg(&self.path); + }) + } + + /// Runs a command for operating on this git repo. + /// + /// Note: Doesn't work for git init and git clone operations. Use + /// [`cmd_init`] and [`cmd_clone_from`] instead. + /// + /// Always uses `--bare`. + fn cmd(&self, f: impl FnOnce(&mut Args)) -> Result { + self.cmd_common(|cmd| { + cmd.arg("-C").arg(&self.path); + cmd.arg("--bare"); + f(&mut *cmd); + }) + } + + /// Common handling of raw commands. + /// + /// `"git" + f()` and error handling. + fn cmd_common( + &self, + f: impl FnOnce(&mut Args) + ) -> Result { + let mut cmd = Args::new_cmd("git"); + f(&mut cmd); + // run the command and make nicer Error + let Args { inner: mut cmd, args } = cmd; + let mut args = Some(args); + cmd.output().map_err(|e| { + GitError::new(e, args.take().unwrap()) + }).and_then(|output| { + if output.status.success() { + Ok(output) + } else { + Err(GitError::new(output, args.take().unwrap())) + } + }) + } + +} -- cgit 1.4.1