#!/usr/bin/env python # HTMLGDump - dumps a git repo to html (and symlinks) # Copyright (C) 2021 Soni L. # # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU Affero General Public License as published by # the Free Software Foundation, either version 3 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU Affero General Public License for more details. # # You should have received a copy of the GNU Affero General Public License # along with this program. If not, see . # tl;dr: install this as a git hook (post-receive) # then configure your webserver and stuff import dataclasses import os import os.path import pathlib import shutil import subprocess import sys from urllib.parse import quote import pygit2 from pygments import highlight from pygments.formatters import HtmlFormatter from pygments.lexers import get_lexer_for_filename import pygments.util @dataclasses.dataclass class GitChange: old_value: str new_value: str ref_name: str deleting: bool = dataclasses.field(init=False) def __post_init__(self): self.deleting = self.new_value == "0"*40 or self.new_value == "0"*64 def get_relative(path, target): """Makes target relative to path, without filesystem operations.""" return os.path.relpath(target, start=path) CACHE_HOME = os.environ.get('XDG_CACHE_HOME', '') if not CACHE_HOME: CACHE_HOME = os.environ['HOME'] + '/.cache' CACHE_HOME = CACHE_HOME + "/htmlgdump" # post-receive runs on $GIT_DIR repo = pygit2.Repository(os.getcwd()) try: name = pathlib.Path.cwd().relative_to(repo.config["htmlgdump.base"]) except (KeyError, ValueError): exit() changes = [GitChange(*l.rstrip("\n").split(" ", 2)) for l in sys.stdin] gen_dir = pathlib.Path(CACHE_HOME) / name / "gen" gen_dir.mkdir(parents=True,exist_ok=True) todocommits = set() print("updating refs") # build changed refs for c in changes: path = gen_dir / c.ref_name if c.deleting: try: shutil.rmtree(path) except FileNotFoundError: pass else: path.mkdir(parents=True,exist_ok=True) index = path / "index.html" link = path / "tree" tree = gen_dir / "trees" / str(repo[c.new_value].tree_id) with index.open("w") as f: # TODO f.write("refview tree") todocommits.add(repo[c.new_value]) linktarget = get_relative(path, tree) link.unlink(missing_ok=True) link.symlink_to(linktarget, target_is_directory=True) print("generating refs") # create missing refs for ref in repo.references: ref = repo.references.get(ref) path = gen_dir / ref.name path.mkdir(parents=True,exist_ok=True) index = path / "index.html" link = path / "tree" tree = gen_dir / "trees" / str(ref.peel(pygit2.Commit).tree_id) try: f = index.open("x") except FileExistsError: # check if we've already visited this commit continue with f: # TODO f.write("refview tree") todocommits.add(ref.peel(pygit2.Commit)) linktarget = get_relative(path, tree) link.symlink_to(linktarget, target_is_directory=True) todotrees = set() print("generating commits") # build commits while todocommits: c = todocommits.pop() path = gen_dir / "commits" / str(c.id) path.mkdir(parents=True,exist_ok=True) index = path / "index.html" link = path / "tree" tree = gen_dir / "trees" / str(c.tree_id) try: f = index.open("x") except FileExistsError: # check if we've already visited this commit continue with f: # TODO f.write("commitview tree") todotrees.add(c.tree) todocommits.update(c.parents) linktarget = get_relative(path, tree) link.symlink_to(linktarget, target_is_directory=True) # a dict /!\ # maps blobs to some metadata # FIXME this can get quite expensive with larger repos, and might even run out # of RAM. todoblobs = {} print("generating trees") # build trees while todotrees: t = todotrees.pop() path = gen_dir / "trees" / str(t.id) path.mkdir(parents=True,exist_ok=True) index = path / "index.html" try: f = index.open("x") except FileExistsError: # check if we've already visited this tree continue with f: f.write("tree") print("generating blobs") # build blobs while todoblobs: (b, meta) = todoblobs.popitem() path = gen_dir / "blobs" / str(b.id) path.mkdir(parents=True,exist_ok=True) index = path / "index.html" try: f = index.open("x") except FileExistsError: # check if we've already visited this tree continue with f: f.write("blob") f.write("view raw") try: text = b.data.decode("utf-8", errors="strict") if len(set(get_lexer_for_filename(f[1]).name for f in meta)) == 1: lex = get_lexer_for_filename(meta[0][1]) f.write(highlight(text, lex, HtmlFormatter())) else: # TODO maybe just write `text` (html escaped)? pass except UnicodeError: pass except pygments.util.ClassNotFound: pass f.write("") raw = path / "raw.bin" with raw.open("wb") as f: f.write(b) # create index.html path = gen_dir / "index.html" with path.open("w") as f: f.write("index") print("copying to output") # CANNOT use shutil.copytree - it is broken. # also need to be aware of copying into a directory, so we just always make it # a directory. browse = pathlib.Path.cwd() / "browse" browse.mkdir(parents=True,exist_ok=True) subprocess.run(["cp", "-R", "-P", *gen_dir.glob("*"), browse], check=True) # └── gen # ├── blobs # │   └── e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 # │   ├── index.html # │   └── raw.bin # ├── commits # │   ├── 21177a2933b1a9d21d8437159405c5bc68b4d32e # │   │   ├── index.html # │   │   └── tree -> ../../trees/1663be45d5f6b9f092c4b98d44cf7992b427172f # │   └── 3ea9318f6271ece3c7560f18d0b22f50bd3cefe5 # │   ├── index.html # │   └── tree -> ../../trees/17d6338b3a3dc189bdc3bea8481fe5f32fd388c8 # ├── refs # │   └── heads # │   └── default # │   ├── index.html # │   └── tree -> ../../../trees/1663be45d5f6b9f092c4b98d44cf7992b427172f # └── trees # ├── 1663be45d5f6b9f092c4b98d44cf7992b427172f # │   ├── bar -> ../../blobs/e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 # │   ├── baz -> ../29ba47b07d262ad717095f2d94ec771194c4c083 # │   ├── deleteme -> ../../blobs/e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 # │   ├── foo -> ../../blobs/e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 # │   └── index.html # ├── 17d6338b3a3dc189bdc3bea8481fe5f32fd388c8 # │   ├── bar -> ../../blobs/e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 # │   ├── baz -> ../29ba47b07d262ad717095f2d94ec771194c4c083 # │   ├── foo -> ../../blobs/e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 # │   └── index.html # └── 29ba47b07d262ad717095f2d94ec771194c4c083 # ├── index.html # └── qux -> ../../blobs/e69de29bb2d1d6434b8b29ae775ad8c2e48c5391