summary refs log blame commit diff stats
path: root/luatokens.lua
blob: 226a81a5520040d6201c9747623d653c3c91f525 (plain) (tree)
1
2
3
4
5
6
7
8
9
10
           



                               


                                                

            
















                                                                      
 
               
 
             





































































                          








                                                                                                                    


                              

                         
                                                 












                                     



                                                                                                                            

                                       

                                                  
                                          





                                                                                                                            
                                                   

















                                                                   
                                                     

                                


                                              

                           







                                                                                                                         
 
                                                                                                              





                                                                                         
                                                         







                                         













































                                                                                                               
                                                        
                                     

                                    
                                       


                                                                                                                                                            
                                   
                                                        














                                                                                                                   
                                 


                                        
                                 

                                                          
                         
            
                                          




                         

                                  





                              





























                                                                                        
 
                                       
 
                                       

                                   
                                                                                  
                               





                                   










                                                                                                                                                     
 
-- Lua defs

-- we need some stuff from here
local parser = require "parser"
local selfify = parser.selfify
local EOF = parser.EOF
local COLLECT = parser.COLLECT
local collect_fallback = parser.collect_fallback

-- "dummies"
-- see http://www.lua.org/source/5.3/llex.h.html#RESERVED
local TK_AND, TK_BREAK,
    TK_DO, TK_ELSE, TK_ELSEIF, TK_END, TK_FALSE, TK_FOR, TK_FUNCTION,
    TK_GOTO, TK_IF, TK_IN, TK_LOCAL, TK_NIL, TK_NOT, TK_OR, TK_REPEAT,
    TK_RETURN, TK_THEN, TK_TRUE, TK_UNTIL, TK_WHILE,
    TK_IDIV, TK_CONCAT, TK_DOTS, TK_EQ, TK_GE, TK_LE, TK_NE,
    TK_SHL, TK_SHR,
    TK_DBCOLON, TK_EOS,
    TK_FLT, TK_INT, TK_NAME, TK_STRING =
    {}, {},
    {}, {}, {}, {}, {}, {}, {},
    {}, {}, {}, {}, {}, {}, {}, {},
    {}, {}, {}, {}, {},
    {}, {}, {}, {}, {}, {}, {},
    {}, {},
    {}, {},
    {}, {}, {}, {}

local defs = {}

defs.base = {
    [" "] = "whitespace",
    ["\n"] = "newline",
    ["\r"] = "newline",
    ["\v"] = "whitespace",
    ["\t"] = "whitespace",
    ["\f"] = "whitespace",
    ["0"] = "digit",
    ["1"] = "digit",
    ["2"] = "digit",
    ["3"] = "digit",
    ["4"] = "digit",
    ["5"] = "digit",
    ["6"] = "digit",
    ["7"] = "digit",
    ["8"] = "digit",
    ["9"] = "digit",
    ["a"] = "hexdigit",
    ["b"] = "hexdigit",
    ["c"] = "hexdigit",
    ["d"] = "hexdigit",
    ["e"] = "hexdigit",
    ["f"] = "hexdigit",
    ["A"] = "hexdigit",
    ["B"] = "hexdigit",
    ["C"] = "hexdigit",
    ["D"] = "hexdigit",
    ["E"] = "hexdigit",
    ["F"] = "hexdigit",
    ["g"] = "alpha",
    ["h"] = "alpha",
    ["i"] = "alpha",
    ["j"] = "alpha",
    ["k"] = "alpha",
    ["l"] = "alpha",
    ["m"] = "alpha",
    ["n"] = "alpha",
    ["o"] = "alpha",
    ["p"] = "alpha",
    ["q"] = "alpha",
    ["r"] = "alpha",
    ["s"] = "alpha",
    ["t"] = "alpha",
    ["u"] = "alpha",
    ["v"] = "alpha",
    ["w"] = "alpha",
    ["x"] = "alpha",
    ["y"] = "alpha",
    ["z"] = "alpha",
    ["G"] = "alpha",
    ["H"] = "alpha",
    ["I"] = "alpha",
    ["J"] = "alpha",
    ["K"] = "alpha",
    ["L"] = "alpha",
    ["M"] = "alpha",
    ["N"] = "alpha",
    ["O"] = "alpha",
    ["P"] = "alpha",
    ["Q"] = "alpha",
    ["R"] = "alpha",
    ["S"] = "alpha",
    ["T"] = "alpha",
    ["U"] = "alpha",
    ["V"] = "alpha",
    ["W"] = "alpha",
    ["X"] = "alpha",
    ["Y"] = "alpha",
    ["Z"] = "alpha",
}

local function countline(state, token, rule)
    state.line = (state.line or 1) + 1
end

local function mknewline(t, hookn, fallback)
    fallback = fallback or t
    t["\n"] = setmetatable({[hookn] = countline, ["\r"] = setmetatable({}, {__index=fallback})}, {__index=fallback})
    t["\r"] = setmetatable({[hookn] = countline, ["\n"] = setmetatable({}, {__index=fallback})}, {__index=fallback})
    return t
end

do local tstring = selfify({})
    defs.string = tstring
    tstring.defs = defs
    do local tsescapes = setmetatable(mknewline({
            ["'"] = "insertraw",
            ['"'] = "insertraw",
            ['\\'] = "insertraw",
            ["a"] = "insertmap",
            ["b"] = "insertmap",
            ["f"] = "insertmap",
            ["n"] = "insertmap",
            ["r"] = "insertmap",
            ["t"] = "insertmap",
            ["v"] = "insertmap",
            ["z"] = "skipwhitespace",
            ["u"] = "unicode",
            ["x"] = "hex",
            --["\n"] = setmetatable({[1] = countline, ["\r"] = setmetatable({}, {__index=tstring})}, {__index=tstring}),
            --["\r"] = setmetatable({[1] = countline, ["\n"] = setmetatable({}, {__index=tstring})}, {__index=tstring}),
            [1] = function(state, token, rule) if token == "\r" or token == "\n" then collect_fallback(state, "\n") end end,
        }, 1, tstring), {__index = defs.base})
        defs.string.escapes = tsescapes
        tsescapes.string = defs.string

        function tsescapes.insertraw(state, token)
            collect_fallback(state, token)
            return "string"
        end

        do
            local map = { ["a"] = "\a", ["b"] = "\b", ["f"] = "\f", ["n"] = "\n", ["r"] = "\r", ["t"] = "\t", ["v"] = "\v" }
            function tsescapes.insertmap(state, token)
                collect_fallback(state, map[token])
                return "string"
            end
        end

        function tsescapes.digit(state, token)
            local digit = string.find("1234567890", token, 1, true)
            local num = state.in_digit
            if digit then
                num = (num or 0) * 10 + digit % 10
                state.c = (state.c or 0) + 1
                if state.c < 3 then
                    state.in_digit = num
                    return "digitc"
                end
            end
            if num > 255 then
                return nil
            end
            collect_fallback(state, string.char(num))
            state.in_digit = nil
            state.c = nil
            if not digit then
                collect_fallback(state, token)
            end
            return "string"
        end
        tsescapes.digitc = setmetatable(selfify({[""] = tsescapes.digit, string = tstring}, "digitc"), {__index=tstring})
        tsescapes.digitc[1]=function(state, token, rule)
            if rule == nil then
                collect_fallback(state, string.char(state.in_digit))
                state.in_digit = nil
                state.c = nil
            end
        end

        tsescapes.hex = setmetatable(selfify({string = defs.string, digit = "hexdigit"}), {__index=defs.base})
        function tsescapes.hex.hexdigit(state, token)
            local digit = string.find("123456789ABCDEF0123456789abcdef0", token, 1, true)
            assert(digit, "this should never be called for non-hex-digits")
            local num = state.in_hex
            if num then
                num = num * 16 + digit % 16
                collect_fallback(state, string.char(num))
                state.in_hex = nil
                return "string"
            else
                state.in_hex = digit % 16
                return "self"
            end
        end

        do local tseunicode = {}
            tseunicode["{"] = "hex"
            do local tseuhex = setmetatable(selfify({digit = "hexdigit", string=tstring}), {__index=defs.base})
                tseunicode.hex = tseuhex
                function tseuhex.hexdigit(state, token)
                    local digit = string.find("123456789ABCDEF0123456789abcdef0", token, 1, true)
                    assert(digit, "this should never be called for non-hex-digits")
                    state.in_hex = (state.in_hex or 0) * 16 + digit % 16
                    if state.in_hex <= 2147483647 then
                        return "self"
                    end
                end
                tseuhex["}"] = function(state, token)
                    local num = state.in_hex
                    state.in_hex = nil
                    if num < 128 then
                        collect_fallback(state, string.char(num))
                        return "string"
                    end
                    local bytes = ""
                    while num > 63 do
                        local v = num % 64
                        bytes = string.char(128 + v) .. bytes -- yeah ik, not the most efficient
                        num = (num - v) / 64
                    end
                    if num >= 2^6/(2^#bytes) then
                        local v = num % 64
                        bytes = string.char(128 + v) .. bytes
                        num = (num - v) / 64
                    end
                    do
                        local v = 0
                        for i=1,#bytes do
                            v = v + 128 / 2^i
                        end
                        v = v + num
                        assert(v < 126)
                        bytes = string.char(128 + v) .. bytes
                    end
                    collect_fallback(state, bytes)
                    return "string"
                end
            end
            tsescapes.unicode = tseunicode
        end

        do local tseskipwhitespace = selfify(mknewline({
                string = defs.string,
                whitespace = "self",
                [""] = "string",
                [1] = collect_fallback,
            }, 2))
            --tseskipwhitespace["\n"] = setmetatable({[2] = countline, ["\r"] = setmetatable({}, {__index=tseskipwhitespace})}, {__index=tseskipwhitespace})
            --tseskipwhitespace["\r"] = setmetatable({[2] = countline, ["\n"] = setmetatable({}, {__index=tseskipwhitespace})}, {__index=tseskipwhitespace})
            local tbase = defs.base
            local tbasemap = {whitespace = "whitespace"}
            setmetatable(tseskipwhitespace, {__index = function(t, k) return tbasemap[tbase[k]] or tstring[k] end})
            tsescapes.skipwhitespace =  tseskipwhitespace
        end
    end

    tstring['\\'] = "escapes"

    tstring['"'] = "close"
    tstring["'"] = "close"

    tstring['\n'] = false
    tstring['\r'] = false

    tstring[""] = "self"

    tstring[1] = collect_fallback

    function tstring.close(state, token)
        if state.in_string == token then
            state.in_string = nil
            state[#state+1] = table.concat(state[COLLECT])
            state[COLLECT] = nil
            return "defs"
        else
            collect_fallback(state, token)
            return "self"
        end
    end
end

do local tlongstring = selfify({})
    defs.longstring = tlongstring
    -- TODO
end

defs["'"] = "string_open"
defs['"'] = "string_open"
defs["["] = "maybe_longstring"
defs.maybe_longstring = setmetatable({
    defs = defs,
    ['['] = "longstring_open",
    ['='] = "longstring_open",
    longstring_count = selfify({
        ["="] = function(state, token)
            state.longstring_count = state.longstring_count + 1
            return "self"
        end,
        longstring = defs.longstring
    }),
    longstring_open = function(state, token)
        if token == "=" then
            state.longstring_count = state.longstring_count or 0 + 1
            return "longstring_count"
        elseif token == "[" then
            return "longstring"
        end
    end,
    [-1] = function(state, token, rule)
        if rule ~= "longstring_open" then
            state[#state+1] = "["
        end
    end
}, {__index=defs})

-- these are needed for proper line counts
--defs["\n"] = setmetatable({["\r"] = setmetatable({}, {__index=defs})}, {__index=defs})
--defs["\r"] = setmetatable({["\n"] = setmetatable({}, {__index=defs})}, {__index=defs})
mknewline(defs, 1)

setmetatable(defs, {__index=defs.base})

function defs.string_open(state, token)
    if not state.in_string then
        state[#state+1] = TK_STRING
        state[COLLECT] = {coalesce=50} -- TODO tweak this for CPU/memory tradeoff?
        state.in_string = token
        return "string"
    end
    assert("this shouldn't happen")
end

return {
    defs = defs,
    tokens = {
        TK_AND = TK_AND, TK_BREAK = TK_BREAK,
        TK_DO = TK_DO, TK_ELSE = TK_ELSE, TK_ELSEIF = TK_ELSEIF, TK_END = TK_END, TK_FALSE = TK_FALSE, TK_FOR = TK_FOR, TK_FUNCTION = TK_FUNCTION,
        TK_GOTO = TK_GOTO, TK_IF = TK_IF, TK_IN = TK_IN, TK_LOCAL = TK_LOCAL, TK_NIL = TK_NIL, TK_NOT = TK_NOT, TK_OR = TK_OR, TK_REPEAT = TK_REPEAT,
        TK_RETURN = TK_RETURN, TK_THEN = TK_THEN, TK_TRUE = TK_TRUE, TK_UNTIL = TK_UNTIL, TK_WHILE = TK_WHILE,
        TK_IDIV = TK_IDIV, TK_CONCAT = TK_CONCAT, TK_DOTS = TK_DOTS, TK_EQ = TK_EQ, TK_GE = TK_GE, TK_LE = TK_LE, TK_NE = TK_NE,
        TK_SHL = TK_SHL, TK_SHR = TK_SHR,
        TK_DBCOLON = TK_DBCOLON, TK_EOS = TK_EOS,
        TK_FLT = TK_FLT, TK_INT = TK_INT, TK_NAME = TK_NAME, TK_STRING = TK_STRING
    },
}