summary refs log blame commit diff stats
path: root/luatokens.lua
blob: 2ac2cc31661ad5abb030dd84b51d2d4257c29025 (plain) (tree)
1
2
3
4
5
6
7
8




                               


                                                






































































































                                                                                                     




                                                  
                                          





                                                                                                                            
                                                   




                                              
                               












                                                                   
                                                     





                                                                                                                                
                                                                                                                  





                                                                                         
                                                         











                                              
                                       


















                                                                                                                   
                                 


                                        
                                 

                                                          









                                   
                     





                                          
                           
                               








                                   
-- Lua tokens

-- we need some stuff from here
local parser = require "parser"
local selfify = parser.selfify
local EOF = parser.EOF
local COLLECT = parser.COLLECT
local collect_fallback = parser.collect_fallback

-- "dummies"
local TK_STRING = {}

local tokens = {}

tokens.base = {
    [" "] = "whitespace",
    ["\n"] = "newline",
    ["\r"] = "newline",
    ["\v"] = "whitespace",
    ["\t"] = "whitespace",
    ["\f"] = "whitespace",
    ["0"] = "digit",
    ["1"] = "digit",
    ["2"] = "digit",
    ["3"] = "digit",
    ["4"] = "digit",
    ["5"] = "digit",
    ["6"] = "digit",
    ["7"] = "digit",
    ["8"] = "digit",
    ["9"] = "digit",
    ["a"] = "hexdigit",
    ["b"] = "hexdigit",
    ["c"] = "hexdigit",
    ["d"] = "hexdigit",
    ["e"] = "hexdigit",
    ["f"] = "hexdigit",
    ["A"] = "hexdigit",
    ["B"] = "hexdigit",
    ["C"] = "hexdigit",
    ["D"] = "hexdigit",
    ["E"] = "hexdigit",
    ["F"] = "hexdigit",
    ["g"] = "alpha",
    ["h"] = "alpha",
    ["i"] = "alpha",
    ["j"] = "alpha",
    ["k"] = "alpha",
    ["l"] = "alpha",
    ["m"] = "alpha",
    ["n"] = "alpha",
    ["o"] = "alpha",
    ["p"] = "alpha",
    ["q"] = "alpha",
    ["r"] = "alpha",
    ["s"] = "alpha",
    ["t"] = "alpha",
    ["u"] = "alpha",
    ["v"] = "alpha",
    ["w"] = "alpha",
    ["x"] = "alpha",
    ["y"] = "alpha",
    ["z"] = "alpha",
    ["G"] = "alpha",
    ["H"] = "alpha",
    ["I"] = "alpha",
    ["J"] = "alpha",
    ["K"] = "alpha",
    ["L"] = "alpha",
    ["M"] = "alpha",
    ["N"] = "alpha",
    ["O"] = "alpha",
    ["P"] = "alpha",
    ["Q"] = "alpha",
    ["R"] = "alpha",
    ["S"] = "alpha",
    ["T"] = "alpha",
    ["U"] = "alpha",
    ["V"] = "alpha",
    ["W"] = "alpha",
    ["X"] = "alpha",
    ["Y"] = "alpha",
    ["Z"] = "alpha",
}

local function linecount(state, token, rule)
    if token == "\n" or token == "\r" then
        state.line = (state.line or 1) + 1
    end
end

do local tstring = selfify({})
    tokens.string = tstring
    tstring.tokens = tokens
    do local tsescapes = setmetatable({
            ["'"] = "insertraw",
            ['"'] = "insertraw",
            ['\\'] = "insertraw",
            ["a"] = "insertmap",
            ["b"] = "insertmap",
            ["f"] = "insertmap",
            ["n"] = "insertmap",
            ["r"] = "insertmap",
            ["t"] = "insertmap",
            ["v"] = "insertmap",
            ["z"] = "skipwhitespace",
            ["u"] = "unicode",
            ["x"] = "hex",
            ["\n"] = setmetatable({["\r"] = setmetatable({}, {__index=tstring})}, {__index=tstring}),
            ["\r"] = setmetatable({["\n"] = setmetatable({}, {__index=tstring})}, {__index=tstring}),
            [1] = linecount,
        }, {__index = tokens.base})
        tokens.string.escapes = tsescapes
        tsescapes.string = tokens.string

        function tsescapes.insertraw(state, token)
            collect_fallback(state, token)
            return "string"
        end

        do
            local map = { ["a"] = "\a", ["b"] = "\b", ["f"] = "\f", ["n"] = "\n", ["r"] = "\r", ["t"] = "\t", ["v"] = "\v" }
            function tsescapes.insertmap(state, token)
                collect_fallback(state, map[token])
                return "string"
            end
        end

        function tsescapes.digit(state, token)
            print(state, token)
            local digit = string.find("1234567890", token, 1, true)
            local num = state.in_digit
            if digit then
                num = (num or 0) * 10 + digit % 10
                state.c = (state.c or 0) + 1
                if state.c < 3 then
                    state.in_digit = num
                    return "digitc"
                end
            end
            if num > 255 then
                return nil
            end
            collect_fallback(state, string.char(num))
            state.in_digit = nil
            state.c = nil
            return "string"
        end
        tsescapes.digitc = setmetatable(selfify({[""] = tsescapes.digit, digitc = "self", string = tstring}), {__index=tstring})

        tsescapes.hex = setmetatable(selfify({string = tokens.string, digit = "hexdigit"}), {__index=tokens.base})
        function tsescapes.hex.hexdigit(state, token)
            local digit = string.find("123456789ABCDEF0123456789abcdef0", token, 1, true)
            assert(digit, "this should never be called for non-hex-digits")
            local num = state.in_hex
            if num then
                num = num * 16 + digit % 16
                collect_fallback(state, string.char(num))
                state.in_hex = nil
                return "string"
            else
                state.in_hex = digit % 16
                return "self"
            end
        end

        do local tseskipwhitespace = selfify({
                string = tokens.string,
                whitespace = "self",
                [""] = "string",
                [1] = collect_fallback,
                [2] = linecount,
            })
            local tbase = tokens.base
            local tbasemap = {whitespace = "whitespace", newline = "whitespace"}
            setmetatable(tseskipwhitespace, {__index = function(t, k) return tbasemap[tbase[k]] or tstring[k] end})
            tsescapes.skipwhitespace =  tseskipwhitespace
        end
    end

    tstring['\\'] = "escapes"

    tstring['"'] = "close"
    tstring["'"] = "close"

    tstring['\n'] = false
    tstring['\r'] = false

    tstring[""] = "self"

    tstring[1] = collect_fallback

    function tstring.close(state, token)
        if state.in_string == token then
            state.in_string = nil
            state[#state+1] = table.concat(state[COLLECT])
            state[COLLECT] = nil
            return "tokens"
        else
            state[#state+1] = token
            return "self"
        end
    end
end

tokens["'"] = "string_open"
tokens['"'] = "string_open"
tokens[1] = linecount

setmetatable(tokens, {__index=whitespace})

function tokens.string_open(state, token)
    if not state.in_string then
        state[#state+1] = TK_STRING
        state[COLLECT] = {}
        state.in_string = token
        return "string"
    end
    assert("this shouldn't happen")
end

return {
    tokens = tokens,
    TK_STRING = TK_STRING,
}