summary refs log tree commit diff stats
path: root/luatokens.lua
diff options
context:
space:
mode:
authorSoniEx2 <endermoneymod@gmail.com>2019-04-03 17:08:29 -0300
committerSoniEx2 <endermoneymod@gmail.com>2019-04-03 17:08:29 -0300
commit5a4b41bd47d999619b0b51052ae99157ac491a01 (patch)
treec40faf4b4bcba14ef879b985206bed34d61a2dde /luatokens.lua
parentd03d77d28b812244be66763356f24659da769f05 (diff)
Attempted lua tokenizer didn't work
Publishing anyway because someone might be able to learn from my failure
Diffstat (limited to 'luatokens.lua')
-rw-r--r--luatokens.lua225
1 files changed, 225 insertions, 0 deletions
diff --git a/luatokens.lua b/luatokens.lua
new file mode 100644
index 0000000..81cbc11
--- /dev/null
+++ b/luatokens.lua
@@ -0,0 +1,225 @@
+-- Lua tokens
+
+-- we need some stuff from here
+local parser = require "parser"
+local selfify = parser.selfify
+
+-- "dummies"
+local TK_STRING = {}
+
+local tokens = {}
+
+tokens.base = {
+    [" "] = "whitespace",
+    ["\n"] = "newline",
+    ["\r"] = "newline",
+    ["\v"] = "whitespace",
+    ["\t"] = "whitespace",
+    ["\f"] = "whitespace",
+    ["0"] = "digit",
+    ["1"] = "digit",
+    ["2"] = "digit",
+    ["3"] = "digit",
+    ["4"] = "digit",
+    ["5"] = "digit",
+    ["6"] = "digit",
+    ["7"] = "digit",
+    ["8"] = "digit",
+    ["9"] = "digit",
+    ["a"] = "hexdigit",
+    ["b"] = "hexdigit",
+    ["c"] = "hexdigit",
+    ["d"] = "hexdigit",
+    ["e"] = "hexdigit",
+    ["f"] = "hexdigit",
+    ["A"] = "hexdigit",
+    ["B"] = "hexdigit",
+    ["C"] = "hexdigit",
+    ["D"] = "hexdigit",
+    ["E"] = "hexdigit",
+    ["F"] = "hexdigit",
+    ["g"] = "alpha",
+    ["h"] = "alpha",
+    ["i"] = "alpha",
+    ["j"] = "alpha",
+    ["k"] = "alpha",
+    ["l"] = "alpha",
+    ["m"] = "alpha",
+    ["n"] = "alpha",
+    ["o"] = "alpha",
+    ["p"] = "alpha",
+    ["q"] = "alpha",
+    ["r"] = "alpha",
+    ["s"] = "alpha",
+    ["t"] = "alpha",
+    ["u"] = "alpha",
+    ["v"] = "alpha",
+    ["w"] = "alpha",
+    ["x"] = "alpha",
+    ["y"] = "alpha",
+    ["z"] = "alpha",
+    ["G"] = "alpha",
+    ["H"] = "alpha",
+    ["I"] = "alpha",
+    ["J"] = "alpha",
+    ["K"] = "alpha",
+    ["L"] = "alpha",
+    ["M"] = "alpha",
+    ["N"] = "alpha",
+    ["O"] = "alpha",
+    ["P"] = "alpha",
+    ["Q"] = "alpha",
+    ["R"] = "alpha",
+    ["S"] = "alpha",
+    ["T"] = "alpha",
+    ["U"] = "alpha",
+    ["V"] = "alpha",
+    ["W"] = "alpha",
+    ["X"] = "alpha",
+    ["Y"] = "alpha",
+    ["Z"] = "alpha",
+}
+
+local function linecount(state, token, rule)
+    if token == "\n" or token == "\r" then
+        state.line = (state.line or 1) + 1
+    end
+end
+
+do local tstring = selfify({})
+    tokens.string = tstring
+    tstring.tokens = tokens
+    do local tsescapes = setmetatable({
+            ["'"] = "insertraw",
+            ['"'] = "insertraw",
+            ['\\'] = "insertraw",
+            ["a"] = "insertmap",
+            ["b"] = "insertmap",
+            ["f"] = "insertmap",
+            ["n"] = "insertmap",
+            ["r"] = "insertmap",
+            ["t"] = "insertmap",
+            ["v"] = "insertmap",
+            ["z"] = "skipwhitespace",
+            ["u"] = "unicode",
+            ["x"] = "hex",
+            ["\n"] = setmetatable({["\r"] = setmetatable({}, {__index=tstring})}, {__index=tstring}),
+            ["\r"] = setmetatable({["\n"] = setmetatable({}, {__index=tstring})}, {__index=tstring}),
+            [1] = linecount,
+            [2] = print
+        }, {__index = tokens.base})
+        tokens.string.escapes = tsescapes
+        tsescapes.string = tokens.string
+
+        function tsescapes.insertraw(state, token)
+            state[#state+1] = token
+            return "string"
+        end
+
+        do
+            local map = { ["a"] = "\a", ["b"] = "\b", ["f"] = "\f", ["n"] = "\n", ["r"] = "\r", ["t"] = "\t", ["v"] = "\v" }
+            function tsescapes.insertmap(state, token)
+                state[#state+1] = map[token]
+                return "string"
+            end
+        end
+
+        function tsescapes.digit(state, token)
+            local digit = string.find("1234567890", token, 1, true)
+            local num = state.in_digit
+            if digit then
+                num = (num or 0) * 10 + digit % 10
+                state.c = (state.c or 0) + 1
+                if state.c < 3 then
+                    state.in_digit = num
+                    return "digitc"
+                end
+            end
+            if num > 255 then
+                return nil
+            end
+            state[#state+1] = string.char(num)
+            state.in_digit = nil
+            state.c = nil
+            return "string"
+        end
+        tsescapes.digitc = setmetatable(selfify({[""] = tsescapes.digit, digitc = "self", string = tstring}), {__index=tstring})
+
+        tsescapes.hex = setmetatable(selfify({string = tokens.string}), {__index=tokens.base})
+        function tsescapes.hex.hexdigit(state, token)
+            local digit = string.find("123456789ABCDEF0123456789abcdef0", token, 1, true)
+            assert(digit, "this should never be called for non-hex-digits")
+            local num = state.in_hex
+            if num then
+                num = num * 16 + digit % 16
+                state[#state+1] = string.char(num)
+                state.in_hex = nil
+                return "string"
+            else
+                state.in_hex = digit % 16
+                return "self"
+            end
+        end
+
+        do local tseskipwhitespace = selfify({
+                string = tokens.string,
+                whitespace = "self",
+                [""] = "string",
+                [1] = parser.insert_fallback,
+                [2] = linecount,
+            })
+            local tbase = tokens.base
+            local tbasemap = {whitespace = "whitespace", newline = "whitespace"}
+            setmetatable(tseskipwhitespace, {__index = function(t, k) return tbasemap[tbase[k]] or tstring[k] end})
+            tsescapes.skipwhitespace =  tseskipwhitespace
+        end
+    end
+
+    tstring['\\'] = "escapes"
+
+    tstring['"'] = "close"
+    tstring["'"] = "close"
+
+    tstring['\n'] = false
+    tstring['\r'] = false
+
+    tstring[""] = "self"
+
+    tstring[1] = parser.insert_fallback
+
+    function tstring.close(state, token)
+        if state.in_string == token then
+            local i = state.string_start
+            state.in_string = nil
+            state.string_start = nil
+            state[i+1] = table.concat(state, '', i+1)
+            for j=i+2, #state do
+                state[j]=nil
+            end
+            return "tokens"
+        else
+            state[#state+1] = token
+            return "self"
+        end
+    end
+end
+
+tokens["'"] = "string_open"
+tokens['"'] = "string_open"
+
+setmetatable(tokens, {__index=whitespace})
+
+function tokens.string_open(state, token)
+    if not state.in_string then
+        state[#state+1] = TK_STRING
+        state.in_string = token
+        state.string_start = #state
+        return "string"
+    end
+    assert("this shouldn't happen")
+end
+
+return {
+    tokens = tokens,
+    TK_STRING = TK_STRING,
+}