go up view raw 27107
--[[
    This file is part of Cratera Compiler
    Copyright (C) 2019  Soni L.

    This program is free software: you can redistribute it and/or modify
    it under the terms of the GNU Affero General Public License as published by
    the Free Software Foundation, either version 3 of the License, or
    (at your option) any later version.

    This program is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU Affero General Public License for more details.

    You should have received a copy of the GNU Affero General Public License
    along with this program.  If not, see <https://www.gnu.org/licenses/>.
--]]

--[[
    This software is based on Lua 5.1 and Lua 5.3

    Lua 5.1 license:

/******************************************************************************
* Copyright (C) 1994-2012 Lua.org, PUC-Rio.  All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining
* a copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
******************************************************************************/

    Lua 5.3 license:

/******************************************************************************
* Copyright (C) 1994-2018 Lua.org, PUC-Rio.
*
* Permission is hereby granted, free of charge, to any person obtaining
* a copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
******************************************************************************/
--]]

-- we need some stuff from here
local parser = require "cratera.parser"
local selfify = parser.selfify
local EOF = parser.EOF
local COLLECT = parser.COLLECT
local collect_fallback = parser.collect_fallback

-- "dummies"
-- see http://www.lua.org/source/5.3/llex.h.html#RESERVED
-- keywords
local TK_AND, TK_BREAK,
    TK_DO, TK_ELSE, TK_ELSEIF, TK_END, TK_FALSE, TK_FOR, TK_FUNCTION,
    TK_GOTO, TK_IF, TK_IN, TK_LOCAL, TK_NIL, TK_NOT, TK_OR, TK_REPEAT,
    TK_RETURN, TK_THEN, TK_TRUE, TK_UNTIL, TK_WHILE,
    -- operators
    TK_IDIV, TK_CONCAT, TK_DOTS, TK_EQ, TK_GE, TK_LE, TK_NE,
    TK_SHL, TK_SHR,
    -- misc
    TK_DBCOLON, TK_EOS,
    -- values/constants
    TK_FLT, TK_INT, TK_NAME, TK_STRING =
    {}, {},
    {}, {}, {}, {}, {}, {}, {},
    {}, {}, {}, {}, {}, {}, {}, {},
    {}, {}, {}, {}, {},
    {}, {}, {}, {}, {}, {}, {},
    {}, {},
    {}, {},
    {}, {}, {}, {}

local keywords = {
    ["and"] = TK_AND,
    ["break"] = TK_BREAK,
    ["do"] = TK_DO,
    ["else"] = TK_ELSE,
    ["elseif"] = TK_ELSEIF,
    ["end"] = TK_END,
    ["false"] = TK_FALSE,
    ["for"] = TK_FOR,
    ["function"] = TK_FUNCTION,
    ["goto"] = TK_GOTO,
    ["if"] = TK_IF,
    ["in"] = TK_IN,
    ["local"] = TK_LOCAL,
    ["nil"] = TK_NIL,
    ["not"] = TK_NOT,
    ["or"] = TK_OR,
    ["repeat"] = TK_REPEAT,
    ["return"] = TK_RETURN,
    ["then"] = TK_THEN,
    ["true"] = TK_TRUE,
    ["until"] = TK_UNTIL,
    ["while"] = TK_WHILE,
}

local reverse_keywords = {}
for k,v in pairs(keywords) do
    reverse_keywords[v] = k
end

local defs = selfify({})

defs.base = {
    [" "] = "whitespace",
    ["\n"] = "newline",
    ["\r"] = "newline",
    ["\v"] = "whitespace",
    ["\t"] = "whitespace",
    ["\f"] = "whitespace",
    ["0"] = "digit",
    ["1"] = "digit",
    ["2"] = "digit",
    ["3"] = "digit",
    ["4"] = "digit",
    ["5"] = "digit",
    ["6"] = "digit",
    ["7"] = "digit",
    ["8"] = "digit",
    ["9"] = "digit",
    ["a"] = "hexdigit",
    ["b"] = "hexdigit",
    ["c"] = "hexdigit",
    ["d"] = "hexdigit",
    ["e"] = "hexdigit",
    ["f"] = "hexdigit",
    ["A"] = "hexdigit",
    ["B"] = "hexdigit",
    ["C"] = "hexdigit",
    ["D"] = "hexdigit",
    ["E"] = "hexdigit",
    ["F"] = "hexdigit",
    ["g"] = "alpha",
    ["h"] = "alpha",
    ["i"] = "alpha",
    ["j"] = "alpha",
    ["k"] = "alpha",
    ["l"] = "alpha",
    ["m"] = "alpha",
    ["n"] = "alpha",
    ["o"] = "alpha",
    ["p"] = "alpha",
    ["q"] = "alpha",
    ["r"] = "alpha",
    ["s"] = "alpha",
    ["t"] = "alpha",
    ["u"] = "alpha",
    ["v"] = "alpha",
    ["w"] = "alpha",
    ["x"] = "alpha",
    ["y"] = "alpha",
    ["z"] = "alpha",
    ["G"] = "alpha",
    ["H"] = "alpha",
    ["I"] = "alpha",
    ["J"] = "alpha",
    ["K"] = "alpha",
    ["L"] = "alpha",
    ["M"] = "alpha",
    ["N"] = "alpha",
    ["O"] = "alpha",
    ["P"] = "alpha",
    ["Q"] = "alpha",
    ["R"] = "alpha",
    ["S"] = "alpha",
    ["T"] = "alpha",
    ["U"] = "alpha",
    ["V"] = "alpha",
    ["W"] = "alpha",
    ["X"] = "alpha",
    ["Y"] = "alpha",
    ["Z"] = "alpha",
}

local function countline(state, token, rule)
    state.line = (state.line or 1) + 1
end

local function mknewline(t, hookn, fallback)
    fallback = fallback or t
    t["\n"] = setmetatable({[hookn] = countline, ["\r"] = setmetatable({}, {__index=fallback})}, {__index=fallback})
    t["\r"] = setmetatable({[hookn] = countline, ["\n"] = setmetatable({}, {__index=fallback})}, {__index=fallback})
    return t
end

do local tstring = selfify({})
    defs.string = tstring
    tstring.defs = defs
    do local tsescapes = setmetatable(mknewline({
            ["'"] = "insertraw",
            ['"'] = "insertraw",
            ['\\'] = "insertraw",
            ["a"] = "insertmap",
            ["b"] = "insertmap",
            ["f"] = "insertmap",
            ["n"] = "insertmap",
            ["r"] = "insertmap",
            ["t"] = "insertmap",
            ["v"] = "insertmap",
            ["z"] = "skipwhitespace",
            ["u"] = "unicode",
            ["x"] = "hex",
            --["\n"] = setmetatable({[1] = countline, ["\r"] = setmetatable({}, {__index=tstring})}, {__index=tstring}),
            --["\r"] = setmetatable({[1] = countline, ["\n"] = setmetatable({}, {__index=tstring})}, {__index=tstring}),
            [1] = function(state, token, rule) if token == "\r" or token == "\n" then collect_fallback(state, "\n") end end,
        }, 1, tstring), {__index = defs.base})
        defs.string.escapes = tsescapes
        tsescapes.string = defs.string

        function tsescapes.insertraw(state, token)
            collect_fallback(state, token)
            return "string"
        end

        do
            local map = { ["a"] = "\a", ["b"] = "\b", ["f"] = "\f", ["n"] = "\n", ["r"] = "\r", ["t"] = "\t", ["v"] = "\v" }
            function tsescapes.insertmap(state, token)
                collect_fallback(state, map[token])
                return "string"
            end
        end

        function tsescapes.digit(state, token)
            local digit = string.find("1234567890", token, 1, true)
            local num = state.in_digit
            if digit then
                num = (num or 0) * 10 + digit % 10
                state.c = (state.c or 0) + 1
                if state.c < 3 then
                    state.in_digit = num
                    return "digitc"
                end
            end
            if num > 255 then
                return nil
            end
            collect_fallback(state, string.char(num))
            state.in_digit = nil
            state.c = nil
            if not digit then
                collect_fallback(state, token)
            end
            return "string"
        end
        tsescapes.digitc = setmetatable(selfify({[parser.FALLBACK] = tsescapes.digit, string = tstring}, "digitc"), {__index=tstring})
        tsescapes.digitc[1]=function(state, token, rule)
            if rule == nil then
                collect_fallback(state, string.char(state.in_digit))
                state.in_digit = nil
                state.c = nil
            end
        end

        tsescapes.hex = setmetatable(selfify({string = defs.string, digit = "hexdigit"}), {__index=defs.base})
        function tsescapes.hex.hexdigit(state, token)
            local digit = string.find("123456789ABCDEF0123456789abcdef0", token, 1, true)
            assert(digit, "this should never be called for non-hex-digits")
            local num = state.in_hex
            if num then
                num = num * 16 + digit % 16
                collect_fallback(state, string.char(num))
                state.in_hex = nil
                return "string"
            else
                state.in_hex = digit % 16
                return "self"
            end
        end

        do local tseunicode = {}
            tseunicode["{"] = "hex"
            do local tseuhex = setmetatable(selfify({digit = "hexdigit", string=tstring}), {__index=defs.base})
                tseunicode.hex = tseuhex
                function tseuhex.hexdigit(state, token)
                    local digit = string.find("123456789ABCDEF0123456789abcdef0", token, 1, true)
                    assert(digit, "this should never be called for non-hex-digits")
                    state.in_hex = (state.in_hex or 0) * 16 + digit % 16
                    if state.in_hex <= 2147483647 then
                        return "self"
                    end
                end
                tseuhex["}"] = function(state, token)
                    local num = state.in_hex
                    state.in_hex = nil
                    if num < 128 then
                        collect_fallback(state, string.char(num))
                        return "string"
                    end
                    local bytes = ""
                    while num > 63 do
                        local v = num % 64
                        bytes = string.char(128 + v) .. bytes -- yeah ik, not the most efficient
                        num = (num - v) / 64
                    end
                    if num >= 2^6/(2^#bytes) then
                        local v = num % 64
                        bytes = string.char(128 + v) .. bytes
                        num = (num - v) / 64
                    end
                    do
                        local v = 0
                        for i=1,#bytes do
                            v = v + 128 / 2^i
                        end
                        v = v + num
                        assert(v < 126)
                        bytes = string.char(128 + v) .. bytes
                    end
                    collect_fallback(state, bytes)
                    return "string"
                end
            end
            tsescapes.unicode = tseunicode
        end

        do local tseskipwhitespace = selfify(mknewline({
                string = defs.string,
                whitespace = "self",
                [parser.FALLBACK] = "string",
                [1] = collect_fallback,
            }, 2))
            --tseskipwhitespace["\n"] = setmetatable({[2] = countline, ["\r"] = setmetatable({}, {__index=tseskipwhitespace})}, {__index=tseskipwhitespace})
            --tseskipwhitespace["\r"] = setmetatable({[2] = countline, ["\n"] = setmetatable({}, {__index=tseskipwhitespace})}, {__index=tseskipwhitespace})
            local tbase = defs.base
            local tbasemap = {whitespace = "whitespace"}
            setmetatable(tseskipwhitespace, {__index = function(t, k) return tbasemap[tbase[k]] or tstring[k] end})
            tsescapes.skipwhitespace =  tseskipwhitespace
        end
    end

    tstring['\\'] = "escapes"

    tstring['"'] = "close"
    tstring["'"] = "close"

    tstring['\n'] = false
    tstring['\r'] = false

    tstring[parser.FALLBACK] = "self"

    tstring[1] = collect_fallback

    function tstring.close(state, token)
        if state.in_string == token then
            state.in_string = nil
            state[#state+1] = table.concat(state[COLLECT])
            state[COLLECT] = nil
            return "defs"
        else
            collect_fallback(state, token)
            return "self"
        end
    end
end

do local tlongstring = {}
    defs.longstring = tlongstring
    do local tllongstring_proper = selfify({[parser.FALLBACK] = "self", ["]"] = function(state, token) state.longstring_close = 0 return "maybe_end" end})
        tllongstring_proper[1] = false -- placeholder for newline handling
        tllongstring_proper[2] = collect_fallback

        do local tllmaybe_end = selfify({defs = defs}, "maybe_end")
            tllongstring_proper.maybe_end = tllmaybe_end
            tllmaybe_end.longstring_proper = tllongstring_proper
            tllmaybe_end["="] = function(state, token)
                state.longstring_close = state.longstring_close + 1
                return "maybe_end"
            end
            tllmaybe_end["]"] = function(state, token)
                if state.longstring_close == state.longstring_count then
                    state.longstring_close = nil
                    state.longstring_count = nil
                    local pos = #state
                    state[pos+1] = TK_STRING
                    state[pos+2] = table.concat(state[COLLECT])
                    state[COLLECT] = nil
                    return "defs"
                else
                    collect_fallback(state, "]")
                    collect_fallback(state, ("="):rep(state.longstring_close))
                    state.longstring_close = 0
                    return "maybe_end"
                end
            end
            tllmaybe_end[parser.FALLBACK] = "longstring_proper"
            tllmaybe_end[1] = collect_fallback
            tllmaybe_end[-1] = function(state, token, rule)
                if not rule then
                    collect_fallback(state, "]")
                    collect_fallback(state, ("="):rep(state.longstring_close))
                    state.longstring_close = nil
                end
            end
        end

        tlongstring.longstring_proper = tllongstring_proper
        mknewline(tlongstring, 1, tllongstring_proper)
        setmetatable(tlongstring, {__index=tllongstring_proper})
    end
end

defs["'"] = "string_open"
defs['"'] = "string_open"
defs["["] = "maybe_longstring"
defs.maybe_longstring = setmetatable({
    defs = defs,
    ['['] = "longstring_open",
    ['='] = "longstring_open",
    longstring_count = selfify({
        ["="] = function(state, token)
            state.longstring_count = state.longstring_count + 1
            return "self"
        end,
        ["["] = function(state, token)
            state[COLLECT] = {coalesce=63} -- TODO tweak this for CPU/memory tradeoff?
            return "longstring"
        end,
        longstring = defs.longstring
    }),
    longstring_open = function(state, token)
        if token == "=" then
            state.longstring_count = state.longstring_count or 0 + 1
            return "longstring_count"
        elseif token == "[" then
            state.longstring_count = 0
            state[COLLECT] = {coalesce=63} -- TODO tweak this for CPU/memory tradeoff?
            return "longstring"
        end
    end,
    [-1] = function(state, token, rule)
        if rule ~= "longstring_open" then
            state[#state+1] = "["
        end
    end
}, {__index=defs})

-- these are needed for proper line counts
--defs["\n"] = setmetatable({["\r"] = setmetatable({}, {__index=defs})}, {__index=defs})
--defs["\r"] = setmetatable({["\n"] = setmetatable({}, {__index=defs})}, {__index=defs})
mknewline(defs, 1)

-- thankfully comments are easy
defs["-"] = "maybe_comment"
do local tmaybe_comment = setmetatable({["-"] = "comment"}, {__index=defs})
    defs.maybe_comment = tmaybe_comment
    tmaybe_comment[parser.EOZ] = "self" -- defs
    tmaybe_comment[-1] = function(state, token, rule)
        if rule ~= "comment" then
            state[#state+1] = "-"
        end
    end
    do local tmcomment = {comment_proper = selfify({})}
        tmaybe_comment.comment = tmcomment
        tmcomment[parser.FALLBACK] = "comment_proper"
        tmcomment["["] = "maybe_longcomment"
        mknewline(tmcomment, 1, defs)
        mknewline(tmcomment.comment_proper, 1, defs)
        tmcomment.comment_proper[parser.FALLBACK] = "self"
        do local tllongcomment_proper = selfify({[parser.FALLBACK] = "self", ["]"] = function(state, token) state.longcomment_close = 0 return "maybe_end" end})
            tmcomment.longcomment = tllongcomment_proper
            do local tllmaybe_end = selfify({defs = defs}, "maybe_end")
                tllongcomment_proper.maybe_end = tllmaybe_end
                tllmaybe_end.longcomment_proper = tllongcomment_proper
                tllmaybe_end["="] = function(state, token)
                    state.longcomment_close = state.longcomment_close + 1
                    return "maybe_end"
                end
                tllmaybe_end["]"] = function(state, token)
                    if state.longcomment_close == state.longcomment_count then
                        state.longcomment_close = nil
                        state.longcomment_count = nil
                        return "defs"
                    else
                        state.longcomment_close = 0
                        return "maybe_end"
                    end
                end
                tllmaybe_end[parser.FALLBACK] = "longcomment_proper"
                tllmaybe_end[-1] = function(state, token, rule)
                    if not rule then
                        state.longcomment_close = nil
                    end
                end
            end

            mknewline(tllongcomment_proper, 1, tllongcomment_proper)
        end

        tmcomment.maybe_longcomment = setmetatable({
            comment = tmcomment,
            ['['] = "longcomment_open",
            ['='] = "longcomment_open",
            longcomment_count = setmetatable(selfify({
                ["="] = function(state, token)
                    state.longcomment_count = state.longcomment_count + 1
                    return "longcomment_count"
                end,
                ["["] = "longcomment",
                longcomment = tmcomment.longcomment,
            }, "longcomment_count"), {__index=tmcomment}),
            longcomment_open = function(state, token)
                if token == "=" then
                    state.longcomment_count = state.longcomment_count or 0 + 1
                    return "longcomment_count"
                elseif token == "[" then
                    state.longcomment_count = 0
                    return "longcomment"
                end
            end,
        }, {__index=tmcomment})
    end
end

local STATE = parser.STATE

defs.multitokens = setmetatable({
    [parser.EOZ] = "self",
    [-1] = function(state, token, rule)
        if not state[STATE].multitoken[token] then
            state[#state+1] = state[STATE].first
        end
    end,
    second = function(state, token)
        state[#state+1] = state[STATE].multitoken[token]
        return "self" -- actually goes into defs
    end
}, {
    __index=defs,
    __call=function(t, first, ...)
        local function helper(t, second, result, ...)
            if not second then return end
            t[second] = "second"
            t.multitoken[second] = result
            return helper(t, ...)
        end
        defs[first] = setmetatable({
            first = first,
            multitoken = {}
        }, {__index=t})
        return helper(defs[first], ...)
    end
})

defs.multitokens("=", "=", TK_EQ)
defs.multitokens("/", "/", TK_IDIV)
defs.multitokens("<", "<", TK_SHL, "=", TK_LE)
defs.multitokens(">", ">", TK_SHR, "=", TK_GE)
defs.multitokens("~", "=", TK_NE)
defs.multitokens(":", ":", TK_DBCOLON)

defs["."] = setmetatable({
    [-1] = function(state, token, rule)
        if token ~= "." then
            if rule ~= "digit" then
                state[#state+1] = "."
            end
        end
    end,
    digit = function(state, token, rule)
        state[#state+1] = TK_FLT
        state[COLLECT] = {".", coalesce=31}
        return "in_decimal"
    end,
    ["."] = setmetatable({
        [-1] = function(state, token, rule)
            if token ~= "." then
                state[#state+1] = TK_CONCAT
            end
        end,
        ["."] = function(state, token)
            state[#state+1] = TK_DOTS
            return "self" -- actually goes into defs
        end,
    }, {__index=defs})
}, {__index=defs})

function defs.digit(state, token)
    state[COLLECT] = {token, coalesce=31}
    if token == "0" then
        return "in_zero"
    else
        return "in_integer"
    end
end

defs.in_integer = setmetatable(selfify({
    hexdigit = "alpha",
    alpha = false,
    ['e'] = "exp",
    ['E'] = "exp",
    [parser.EOZ] = "self", -- defs
    exp = function(state, token)
        collect_fallback(state, token)
        return "in_exp"
    end,
    ['.'] = function(state, token)
        collect_fallback(state, token)
        return "in_decimal"
    end,
    digit = function(state, token)
        collect_fallback(state, token)
        return "in_digit"
    end,
    [-1] = function(state, token, rule)
        -- TODO figure out best order for these checks
        if rule == "digit" or token == "." or rule == "hexdigit" or rule == "into_hex" or rule == "exp" then return end
        state[#state+1] = state[STATE].numtype
        state[#state+1] = tonumber(table.concat(state[COLLECT])) -- TODO maybe not the best option
        state[COLLECT] = nil
    end,
    numtype = TK_INT
}, "in_digit"), {__index=defs})

defs.in_zero = setmetatable({
    ['x'] = "into_hex",
    ['X'] = "into_hex",
    into_hex = function(state, token)
        collect_fallback(state, token)
        return "in_hex"
    end,
}, {__index=defs.in_integer})

defs.in_decimal = setmetatable(selfify({
    ['.'] = false,
    numtype = TK_FLT
}, "in_digit"), {__index=defs.in_integer})

defs.in_expnum = setmetatable(selfify({
    exp = false,
}, "in_digit"), {__index=defs.in_decimal})

defs.in_subexp = setmetatable({
    in_expnum = defs.in_expnum,
    digit = function(state, token)
        collect_fallback(state, token)
        return "in_expnum"
    end,
}, {__index=defs.base})

defs.in_exp = setmetatable({
    in_subexp = defs.in_subexp,
    ["+"] = "sign",
    ["-"] = "sign",
    sign = function(state, token)
        collect_fallback(state, token)
        return "in_subexp"
    end,
}, {__index=defs.in_subexp})

defs.in_hex = setmetatable(selfify({
    in_decimal = "in_hex_fraction",
    hexdigit = 'digit',
    ['e'] = 'hexdigit',
    ['E'] = 'hexdigit',
    ['p'] = 'exp',
    ['P'] = 'exp',
}, "in_digit"), {__index=defs.in_integer})

defs.in_hex_fraction = setmetatable(selfify({
    ['.'] = false,
    numtype = TK_FLT
}, "in_digit"), {__index=defs.in_hex})

function defs.simpletoken(state, token)
    state[#state+1] = token
    return "self"
end

for token in string.gmatch("+*%^#&|(){}];,", ".") do
    defs[token] = "simpletoken"
end

defs.whitespace = "self"
defs.hexdigit = "alpha"
defs["_"] = "alpha"
defs.in_alpha = setmetatable(selfify({digit = "in_alpha", hexdigit = "in_alpha", alpha = "in_alpha", _ = "in_alpha", [parser.EOZ] = "self"}, "in_alpha"), {__index=defs})
function defs.alpha(state, token)
    state[COLLECT] = {coalesce=15} -- TODO tweak this for CPU/memory tradeoff?
    collect_fallback(state, token)
    return "in_alpha"
end
defs.in_alpha[-1] = function(state, token, rule)
    if rule == "alpha" or rule == "digit" or rule == "hexdigit" or token == "_" then
        collect_fallback(state, token)
    else
        local key = table.concat(state[COLLECT])
        state[COLLECT] = nil
        local keyword = keywords[key]
        if keyword then
            state[#state+1] = keyword
        else
            local pos = #state
            state[pos+1] = TK_NAME
            state[pos+2] = key
        end
    end
end

setmetatable(defs, {__index=defs.base})

function defs.string_open(state, token)
    if not state.in_string then
        state[#state+1] = TK_STRING
        state[COLLECT] = {coalesce=63} -- TODO tweak this for CPU/memory tradeoff?
        state.in_string = token
        return "string"
    end
    assert("this shouldn't happen")
end

local tokens = {
    TK_AND = TK_AND, TK_BREAK = TK_BREAK,
    TK_DO = TK_DO, TK_ELSE = TK_ELSE, TK_ELSEIF = TK_ELSEIF, TK_END = TK_END, TK_FALSE = TK_FALSE, TK_FOR = TK_FOR, TK_FUNCTION = TK_FUNCTION,
    TK_GOTO = TK_GOTO, TK_IF = TK_IF, TK_IN = TK_IN, TK_LOCAL = TK_LOCAL, TK_NIL = TK_NIL, TK_NOT = TK_NOT, TK_OR = TK_OR, TK_REPEAT = TK_REPEAT,
    TK_RETURN = TK_RETURN, TK_THEN = TK_THEN, TK_TRUE = TK_TRUE, TK_UNTIL = TK_UNTIL, TK_WHILE = TK_WHILE,
    TK_IDIV = TK_IDIV, TK_CONCAT = TK_CONCAT, TK_DOTS = TK_DOTS, TK_EQ = TK_EQ, TK_GE = TK_GE, TK_LE = TK_LE, TK_NE = TK_NE,
    TK_SHL = TK_SHL, TK_SHR = TK_SHR,
    TK_DBCOLON = TK_DBCOLON, TK_EOS = TK_EOS,
    TK_FLT = TK_FLT, TK_INT = TK_INT, TK_NAME = TK_NAME, TK_STRING = TK_STRING
}
local TK = {}
for k,v in pairs(tokens) do
    setmetatable(v, {__name=k, __tostring=function(self) return getmetatable(self).__name end})
    TK[k:sub(4)] = v
end

return {
    defs = defs,
    tokens = tokens,
    TK = TK,
    reverse_keywords = reverse_keywords,
    reverse_tokens = {
        [TK_IDIV] = "//", [TK_CONCAT] = "..", [TK_DOTS] = "...", [TK_EQ] = "==", [TK_GE] = ">=", [TK_LE] = "<=", [TK_NE] = "~=",
        [TK_SHL] = "<<", [TK_SHR] = ">>",
        [TK_DBCOLON] = "::", [TK_EOS] = "<eof>",
        [TK_FLT] = "<float>", [TK_INT] = "<integer>", [TK_NAME] = "<identifier>", [TK_STRING] = "<string>"
    },
}