summary refs log tree commit diff stats
path: root/git-hooks/post-receive
diff options
context:
space:
mode:
Diffstat (limited to 'git-hooks/post-receive')
-rwxr-xr-xgit-hooks/post-receive46
1 files changed, 36 insertions, 10 deletions
diff --git a/git-hooks/post-receive b/git-hooks/post-receive
index e21caef..53b5cab 100755
--- a/git-hooks/post-receive
+++ b/git-hooks/post-receive
@@ -33,6 +33,8 @@ import pygit2
 from pygments import highlight
 from pygments.formatters import HtmlFormatter
 from pygments.lexers import get_lexer_for_filename
+from pygments.lexers import guess_lexer
+from pygments.lexers import guess_lexer_for_filename
 import pygments.util
 
 @dataclasses.dataclass
@@ -49,6 +51,32 @@ def get_relative(path, target):
     """Makes target relative to path, without filesystem operations."""
     return os.path.relpath(target, start=path)
 
+def find_lexer(text, meta):
+    """Attempts to find a lexer for the given text/meta."""
+    # TODO this can probably be improved
+    # try exact lexers based on filename
+    # this is by far the fastest, but may lead to incorrect results sometimes.
+    try:
+        if len(set(get_lexer_for_filename(f[1]).name for f in meta)) == 1:
+            lex = get_lexer_for_filename(meta[0][1])
+            return lex
+    except pygments.util.ClassNotFound:
+        pass
+    # try lexers based on filename and content
+    try:
+        if len(set(guess_lexer_for_filename(f[1], text).name for f in meta)) == 1:
+            lex = guess_lexer_for_filename(meta[0][1], text)
+            return lex
+    except pygments.util.ClassNotFound:
+        pass
+    # try lexers based only on content
+    try:
+        lex = guess_lexer(text)
+        return lex
+    except pygments.util.ClassNotFound:
+        pass
+    return None
+
 CACHE_HOME = os.environ.get('XDG_CACHE_HOME', '')
 if not CACHE_HOME:
     CACHE_HOME = os.environ['HOME'] + '/.cache'
@@ -86,7 +114,7 @@ for c in changes:
         tree = gen_dir / "trees" / str(repo[c.new_value].tree_id)
         with index.open("w") as f:
             # TODO
-            f.write("<html><head><title>ref</title><body><a href=\"./tree\">view tree</a></body></html>")
+            f.write("<!DOCTYPE html><html><head><meta charset=\"utf-8\"><title>ref</title><body><a href=\"./tree\">view tree</a></body></html>")
         todocommits.add(repo[c.new_value])
         linktarget = get_relative(path, tree)
         link.unlink(missing_ok=True)
@@ -109,7 +137,7 @@ for ref in repo.references:
         continue
     with f:
         # TODO
-        f.write("<html><head><title>ref</title><body><a href=\"./tree\">view tree</a></body></html>")
+        f.write("<!DOCTYPE html><html><head><meta charset=\"utf-8\"><title>ref</title><body><a href=\"./tree\">view tree</a></body></html>")
     todocommits.add(ref.peel(pygit2.Commit))
     linktarget = get_relative(path, tree)
     link.symlink_to(linktarget, target_is_directory=True)
@@ -133,7 +161,7 @@ while todocommits:
         continue
     with f:
         # TODO
-        f.write("<html><head><title>commit</title><body><a href=\"./tree\">view tree</a></body></html>")
+        f.write("<!DOCTYPE html><html><head><meta charset=\"utf-8\"><title>commit</title><body><a href=\"./tree\">view tree</a></body></html>")
     todotrees.add(c.tree)
     todocommits.update(c.parents)
     linktarget = get_relative(path, tree)
@@ -159,7 +187,7 @@ while todotrees:
         # check if we've already visited this tree
         continue
     with f:
-        f.write("<html><head><title>tree</title><body><ul>")
+        f.write("<!DOCTYPE html><html><head><meta charset=\"utf-8\"><title>tree</title><body><ul>")
         for obj in t:
             linkname = obj.name
             # a git repo can contain any file, including index.html among
@@ -208,20 +236,18 @@ while todoblobs:
         # check if we've already visited this tree
         continue
     with f:
-        f.write("<html><head><title>blob</title><body>")
+        f.write("<!DOCTYPE html><html><head><meta charset=\"utf-8\"><title>blob</title><body>")
         f.write("<a href=\"./raw.bin\">view raw</a>")
         try:
             text = b.data.decode("utf-8", errors="strict")
-            if len(set(get_lexer_for_filename(f[1]).name for f in meta)) == 1:
-                lex = get_lexer_for_filename(meta[0][1])
+            lex = find_lexer(text, meta)
+            if lex is not None:
                 f.write(highlight(text, lex, HtmlFormatter()))
             else:
                 # TODO maybe just write `text` (html escaped)?
                 pass
         except UnicodeError:
             pass
-        except pygments.util.ClassNotFound:
-            pass
         f.write("</body></html>")
     raw = path / "raw.bin"
     with raw.open("wb") as f:
@@ -230,7 +256,7 @@ while todoblobs:
 # create index.html
 path = gen_dir / "index.html"
 with path.open("w") as f:
-    f.write("<html><head><title>index</title><body><ul>")
+    f.write("<!DOCTYPE html><html><head><meta charset=\"utf-8\"><title>index</title><body><ul>")
     if not repo.head_is_unborn:
         ref = repo.head
         quoted = quote(ref.name, safe='/')