#!/usr/bin/env python # HTMLGDump - dumps a git repo to html (and symlinks) # Copyright (c) 2021 Soni L. # # Permission is hereby granted, free of charge, to any person ("You") obtaining # a copy of this software and associated documentation files (the "Software"), # to deal in the Software without restriction, including without limitation the # rights to use, copy, modify, merge, publish, distribute, sublicense, and/or # sell copies of the Software, and to permit persons to whom the Software is # furnished to do so, subject to the following conditions: # # This license shall be void if You bring a copyright lawsuit, related or # unrelated to the Software, against any of the copyright holders. # # The above copyright notice and this permission notice shall be included in all # copies or substantial portions of the Software. # # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. # tl;dr: install this as a git hook (post-receive) # then configure your webserver and stuff import dataclasses import os import os.path import pathlib import shutil import subprocess import sys from urllib.parse import quote import pygit2 from pygments import highlight from pygments.formatters import HtmlFormatter from pygments.lexers import get_lexer_for_filename from pygments.lexers import guess_lexer from pygments.lexers import guess_lexer_for_filename import pygments.util @dataclasses.dataclass class GitChange: old_value: str new_value: str ref_name: str deleting: bool = dataclasses.field(init=False) def __post_init__(self): self.deleting = self.new_value == "0"*40 or self.new_value == "0"*64 def get_relative(path, target): """Makes target relative to path, without filesystem operations.""" return os.path.relpath(target, start=path) def find_lexer(text, meta): """Attempts to find a lexer for the given text/meta.""" # TODO this can probably be improved # try exact lexers based on filename # this is by far the fastest, but may lead to incorrect results sometimes. try: if len(set(get_lexer_for_filename(f[1]).name for f in meta)) == 1: lex = get_lexer_for_filename(meta[0][1]) return lex except pygments.util.ClassNotFound: pass # try lexers based on filename and content try: if len(set(guess_lexer_for_filename(f[1], text).name for f in meta)) == 1: lex = guess_lexer_for_filename(meta[0][1], text) return lex except pygments.util.ClassNotFound: pass # try lexers based only on content try: lex = guess_lexer(text) return lex except pygments.util.ClassNotFound: pass return None def check_soupault_version(soupault): """Checks if the given soupault command provides the correct version.""" # e.g. soupault 3.1.1 # versions up to 3.1.0 have a major security flaw which makes them # unsuitable for use with this program version = subprocess.run([ soupault, "--version" ], stdout=subprocess.PIPE, check=True).stdout.splitlines()[0].decode() # support for soupault 4.x.y if version.startswith("soupault 4."): return if not version.startswith("soupault 3."): print("please use soupault 3.1.1 or newer") exit() if version.startswith("soupault 3.0."): print("please use soupault 3.1.1 or newer") exit() # semver doesn't allow leading 0 on any numeric fields, so this is safe if version.startswith("soupault 3.1.0"): print("please use soupault 3.1.1 or newer") exit() def find_soupault_config(dirs): for d in dirs: path = pathlib.Path(d) / "soupault.toml" try: f = path.open() print("using {} as soupault config".format(path)) return f except OSError as e: pass print("couldn't find soupault config. tried paths:") for d in dirs: path = pathlib.Path(d) / "soupault.toml" print(path) exit() CACHE_HOME = os.environ.get('XDG_CACHE_HOME', '') if not CACHE_HOME: CACHE_HOME = os.environ['HOME'] + '/.cache' CACHE_HOME = CACHE_HOME + "/htmlgdump" CONFIG_HOME = os.environ.get('XDG_CONFIG_HOME', '') if not CONFIG_HOME: CONFIG_HOME = os.environ['HOME'] + '/.config' CONFIG_HOME = CONFIG_HOME + "/htmlgdump" CONFIG_DIRS = os.environ.get('XDG_CONFIG_DIRS', '') if not CONFIG_DIRS: CONFIG_DIRS = '/etc/xdg' # TODO check if this is correct CONFIG_DIRS = [config_dir + "/htmlgdump" for config_dir in CONFIG_DIRS.split(':')] soupault_config = find_soupault_config([CONFIG_HOME] + CONFIG_DIRS) # post-receive runs on $GIT_DIR repo = pygit2.Repository(os.getcwd()) try: name = pathlib.Path.cwd().relative_to(repo.config["htmlgdump.base"]) except (KeyError, ValueError): print("please set htmlgdump.base") exit() soupault = "soupault" try: soupault = repo.config["htmlgdump.soupault"] except (KeyError, ValueError): pass check_soupault_version(soupault) changes = [GitChange(*l.rstrip("\n").split(" ", 2)) for l in sys.stdin] gen_dir = pathlib.Path(CACHE_HOME) / name / "gen" gen_dir.mkdir(parents=True,exist_ok=True) build_dir = pathlib.Path(CACHE_HOME) / name / "build" build_dir.mkdir(parents=True,exist_ok=True) todocommits = set() print("updating refs") # build changed refs for c in changes: path = gen_dir / c.ref_name linkpath = build_dir / c.ref_name if c.deleting: try: shutil.rmtree(path) shutil.rmtree(linkpath) except FileNotFoundError: pass else: path.mkdir(parents=True,exist_ok=True) linkpath.mkdir(parents=True,exist_ok=True) index = path / "index.html" link = linkpath / "tree" tree = gen_dir / "trees" / str(repo[c.new_value].tree_id) with index.open("w") as f: # TODO f.write("refview tree") todocommits.add(repo[c.new_value]) linktarget = get_relative(path, tree) link.unlink(missing_ok=True) link.symlink_to(linktarget, target_is_directory=True) print("generating refs") # create missing refs for ref in repo.references: ref = repo.references.get(ref) path = gen_dir / ref.name linkpath = build_dir / ref.name path.mkdir(parents=True,exist_ok=True) linkpath.mkdir(parents=True,exist_ok=True) index = path / "index.html" link = linkpath / "tree" tree = gen_dir / "trees" / str(ref.peel(pygit2.Commit).tree_id) try: f = index.open("x") except FileExistsError: # check if we've already visited this commit continue with f: # TODO f.write("refview tree") todocommits.add(ref.peel(pygit2.Commit)) linktarget = get_relative(path, tree) link.symlink_to(linktarget, target_is_directory=True) todotrees = set() print("generating commits") # build commits while todocommits: c = todocommits.pop() path = gen_dir / "commits" / str(c.id) linkpath = build_dir / "commits" / str(c.id) path.mkdir(parents=True,exist_ok=True) linkpath.mkdir(parents=True,exist_ok=True) index = path / "index.html" link = linkpath / "tree" tree = gen_dir / "trees" / str(c.tree_id) try: f = index.open("x") except FileExistsError: # check if we've already visited this commit continue with f: # TODO f.write("commitview tree") todotrees.add(c.tree) todocommits.update(c.parents) linktarget = get_relative(path, tree) link.symlink_to(linktarget, target_is_directory=True) # a dict /!\ # maps blobs to some metadata # FIXME this can get quite expensive with larger repos, and might even run out # of RAM. todoblobs = {} print("generating trees") # build trees while todotrees: t = todotrees.pop() path = gen_dir / "trees" / str(t.id) linkpath = build_dir / "trees" / str(t.id) path.mkdir(parents=True,exist_ok=True) linkpath.mkdir(parents=True,exist_ok=True) index = path / "index.html" try: f = index.open("x") except FileExistsError: # check if we've already visited this tree continue with f: f.write("tree") print("generating blobs") # build blobs while todoblobs: (b, meta) = todoblobs.popitem() path = gen_dir / "blobs" / str(b.id) rawpath = build_dir / "blobs" / str(b.id) path.mkdir(parents=True,exist_ok=True) rawpath.mkdir(parents=True,exist_ok=True) index = path / "index.html" try: f = index.open("x") except FileExistsError: # check if we've already visited this tree continue with f: f.write("blob") f.write("view raw") try: text = b.data.decode("utf-8", errors="strict") lex = find_lexer(text, meta) if lex is not None: f.write(highlight(text, lex, HtmlFormatter())) else: # TODO maybe just write `text` (html escaped)? pass except UnicodeError: pass f.write("") raw = rawpath / "raw.bin" with raw.open("wb") as f: f.write(b) # create index.html path = gen_dir / "index.html" with path.open("w") as f: f.write("index") print("running soupault") # run soupault on it. note that soupault currently follows symlinks, but we # workaround it. subprocess.run( [ soupault, "--site-dir", gen_dir, "--build-dir", build_dir, ], cwd=pathlib.Path(CONFIG_HOME), env={ **os.environ, 'SOUPAULT_CONFIG': '/dev/fd/{}'.format(soupault_config.fileno()) }, check=True, pass_fds=[soupault_config.fileno()] ) print("copying to output") # CANNOT use shutil.copytree - it is broken. # also need to be aware of copying into a directory, so we just always make it # a directory. browse = pathlib.Path.cwd() / "browse" browse.mkdir(parents=True,exist_ok=True) subprocess.run(["cp", "-R", "-P", *build_dir.glob("*"), browse], check=True) # └── gen # ├── blobs # │   └── e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 # │   ├── index.html # │   └── raw.bin # ├── commits # │   ├── 21177a2933b1a9d21d8437159405c5bc68b4d32e # │   │   ├── index.html # │   │   └── tree -> ../../trees/1663be45d5f6b9f092c4b98d44cf7992b427172f # │   └── 3ea9318f6271ece3c7560f18d0b22f50bd3cefe5 # │   ├── index.html # │   └── tree -> ../../trees/17d6338b3a3dc189bdc3bea8481fe5f32fd388c8 # ├── refs # │   └── heads # │   └── default # │   ├── index.html # │   └── tree -> ../../../trees/1663be45d5f6b9f092c4b98d44cf7992b427172f # └── trees # ├── 1663be45d5f6b9f092c4b98d44cf7992b427172f # │   ├── bar -> ../../blobs/e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 # │   ├── baz -> ../29ba47b07d262ad717095f2d94ec771194c4c083 # │   ├── deleteme -> ../../blobs/e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 # │   ├── foo -> ../../blobs/e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 # │   └── index.html # ├── 17d6338b3a3dc189bdc3bea8481fe5f32fd388c8 # │   ├── bar -> ../../blobs/e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 # │   ├── baz -> ../29ba47b07d262ad717095f2d94ec771194c4c083 # │   ├── foo -> ../../blobs/e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 # │   └── index.html # └── 29ba47b07d262ad717095f2d94ec771194c4c083 # ├── index.html # └── qux -> ../../blobs/e69de29bb2d1d6434b8b29ae775ad8c2e48c5391