summary refs log blame commit diff stats
path: root/src/cratera/compiler.lua
blob: ab4e2bfca1fed6ea269c50d1412826bc3e69ed7f (plain) (tree)
1
2
3
    
                                         
                                     



















                                                                               
                                       

                              
                                             
















                                                                                              


                                                                                                                                                

























                                                                                                                 

                                           
                                  

                                       
                              

























                                                                                                                            
                                                                                              











                                                       
                                                                                                                
























                                                                                                          

                                            




                                                                            

                                                                          
                                                                   
                                          



















                                                                             

                                                   
                                   








                                                              

                                                                       

                                        




                                                             









                                                              

                                                                       





                                                       






                                                             










                                                                 

                                                                       































                                                                       







                                                                     












                                                       
                                     







































                                                                                                          














                                                                              

                                                                       




                                                           
                                                                                     







                                                                               
                                                               










                                                                    









                                                                 
                                         
                                                                    
                                                                           








                                                              



                                                                             
                                                 



                                       

















                                                                         


                                                   

                                                                                



















                                                                         
                                                               



                                





                                                                                            





                                    
--[[
    This file is part of Cratera Compiler
    Copyright (C) 2019, 2024  Soni L.

    This program is free software: you can redistribute it and/or modify
    it under the terms of the GNU Affero General Public License as published by
    the Free Software Foundation, either version 3 of the License, or
    (at your option) any later version.

    This program is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU Affero General Public License for more details.

    You should have received a copy of the GNU Affero General Public License
    along with this program.  If not, see <https://www.gnu.org/licenses/>.
--]]

-- slow af but works

-- need to run this first
local is51 = (function() local _ENV = {hasenv = true} return not hasenv end)()

local parser = require "cratera.parser"
local selfify = parser.selfify
local STATE = parser.STATE
local luatokens = require "cratera.luatokens"
local reverse_keywords, reverse_tokens = luatokens.reverse_keywords, luatokens.reverse_tokens
local TK = luatokens.TK
local error, assert, ipairs, tostring, type = error, assert, ipairs, tostring, type
-- try to avoid making too many locals because Lua has a limit to how many locals you can have
local math = {huge=math.huge, floor=math.floor}
local string = {format=string.format, byte=string.byte, gsub=string.gsub}
local table = {insert=table.insert, remove=table.remove}

-- marker for use with selfify()
local SELF = {}
-- other markers
local FINISH = {}
local START_OF_STMT = {}
local END_OF_STMT = {}
local END_OF_CRATERA = {}

-- implementation of myobj:[mytrait].myfunction(...)
-- TODO benchmark under luajit?
local CRATERA_FUNCTION = "_CRATERA_INVOKE_TRAIT"
local CRATERA_INIT = "local function "..CRATERA_FUNCTION.."(myobj, mytrait, myfunction, ...) return myobj[mytrait][myfunction](myobj, ...) end "

local EXTRA_DATA = {[TK.NAME] = true, [TK.INT] = true, [TK.FLT] = true, [TK.STRING] = true, [END_OF_STMT] = true}

local function is_tk(results, i, tk)
    -- needed to prevent accidentally processing string literals as tokens
    -- (note: it's generally safe, and faster, to do results[i] == tk,
    -- if tk is one of the values in the TK.* table.)
    return not EXTRA_DATA[results[i-1]] and results[i] == tk
end

local function ignore_newlines(results, i)
    -- skip '\n' and return the new i
    while is_tk(results, i, '\n') do -- ignore newlines
        i = i - 1
    end
    return i
end

-- -- --

local defs = selfify({}, SELF)

local finish = selfify({}, SELF)
finish[parser.EOZ] = function(state, token)
    local results = state.results
    local tk = table.remove(results)
    if tk == TK.FLT then
        local token = table.remove(results)
        table.insert(state, token)
    elseif tk == TK.INT then
        local v = table.remove(results)
        table.insert(state, v)
    elseif tk == TK.STRING then
        -- lua tends to use a backslash and a newline but we already do newline processing,
        -- so we need to replace the escaped newline ("\\\n") with a newline escape ("\\n").
        -- additionally lua 5.1 doesn't handle control characters other than '\0' and '\r' so we need to escape them as well
        local fmted = string.format('%q', table.remove(results))
        fmted = string.gsub(fmted, '\n', 'n')
        if is51 then
            fmted = string.gsub(fmted, "%c", function(c) return string.format("\\%03d", string.byte(c)) end)
        end
        table.insert(state, fmted)
    elseif tk == TK.NAME then
        table.insert(state, table.remove(results))
    elseif type(tk) == "string" then
        table.insert(state, tk)
    elseif tk then
        -- START_OF_STMT and END_OF_STMT are neither keywords nor tokens; this should error in that case.
        table.insert(state, assert(reverse_keywords[tk] or reverse_tokens[tk]))
    else
        return {}
    end
    return SELF
end
defs[FINISH] = finish

defs[parser.EOZ] = function(state, token)
    local results = state.results
    if not results then return {} end -- empty, don't waste time processing unnecessary things
    -- flip results around
    local len = results.n
    for i=1, len do
        local j = len-i+1
        if i >= j then
            break
        end
        results[i], results[j] = results[j], results[i]
    end
    return FINISH
end
defs[parser.FALLBACK] = function(state, token)
    local results = state.results or (function() state.results = {CRATERA_INIT, n=1} return state.results end)()

    do -- handle newlines. this allows error messages to correctly map between lua and cratera
        local oldline = state.oldline or 1
        local linenumber = state.linenumber or 1
        if linenumber > oldline then
            local count = linenumber-oldline
            local len = (results.n or 0)
            for i=1, count do
                results[len+i] = '\n'
            end
            results.n = len + count
            if EXTRA_DATA[results[len]] then -- we're in the middle of a token with extra data. fix it up.
                results[len], results[results.n] = results[results.n], results[len]
            end
        end
        state.oldline = state.linenumber
    end

    results.n = (results.n or 0) + 1
    results[results.n] = token
    if not results.skip then -- don't process string literals as tokens
        if token == ':' then
            -- figure out whether we're in funcname
            local i = results.n - 1 -- skip the ':'
            local find_statement = true
            i = ignore_newlines(results, i)
            while results[i-1] == TK.NAME do
                i = ignore_newlines(results, i-2) + 2
                if is_tk(results, i-2, '.') then
                    -- keep going
                    i = i - 3
                elseif results[i-2] == TK.FUNCTION then -- we're in funcname
                    -- funcname is of the form Name {'.' Name} ':'
                    -- TODO how can we parse 'function Foo:[Bar].Baz()'...
                    -- oh we think we need a real compiler for that
                    find_statement = false
                    break
                else
                    -- found start of statement
                    find_statement = false
                    -- mark start
                    i = i - 1
                    table.insert(results, i, START_OF_STMT)
                    results.n = results.n + 1
                    -- no need to fix existing END_OF_STMT because this code
                    -- only detects patterns of the form Name {'.' Name} ':',
                    -- which do not contain subexpressions.
                    -- mark end
                    table.insert(results, results.n + 1, END_OF_STMT)
                    table.insert(results, results.n + 2, i)
                    results.n = results.n + 2
                    break
                end
            end
            if find_statement then
                while true do
                    i = ignore_newlines(results, i)
                    if is_tk(results, i, ')') then
                        local j = i
                        -- (prefixexp) or (funcargs)
                        -- find matching '('
                        local depth = 1
                        repeat
                            i = i - 1
                            if is_tk(results, i, '(') then
                                depth = depth - 1
                            elseif is_tk(results, i, ')') then
                                depth = depth + 1
                            elseif not results[i] then
                                error("syntax error (unbalanced '()')")
                            end
                        until depth == 0
                        if results[i - 1] == TK.FUNCTION then
                            i = j
                            -- found start
                            break
                        end
                    elseif is_tk(results, i, ']') then
                        -- [indexing]
                        -- find matching '['
                        local depth = 1
                        repeat
                            i = i - 1
                            if is_tk(results, i, '[') then
                                depth = depth - 1
                            elseif is_tk(results, i, ']') then
                                depth = depth + 1
                            elseif not results[i] then
                                error("syntax error (unbalanced '[]')")
                            end
                        until depth == 0
                    elseif results[i-1] == TK.NAME then
                        -- Name or '.' Name
                        i = i - 2
                        i = ignore_newlines(results, i)
                        if not is_tk(results, i, '.') then
                            if results[i] == TK.FUNCTION then
                                i = i + 1
                            else
                                -- found start of statement
                                break
                            end
                        end
                    elseif is_tk(results, i, '}') then
                        -- prefixexp '{' table '}'
                        local newi = i
                        local depth = 1
                        repeat
                            newi = newi - 1
                            if is_tk(results, newi, '[') then
                                depth = depth - 1
                            elseif is_tk(results, newi, ']') then
                                depth = depth + 1
                            elseif not results[i] then
                                error("syntax error (unbalanced '{}')")
                            end
                        until depth == 0
                        local checki = ignore_newlines(results, newi-1)
                        -- do I need these checks?
                        if is_tk(results, checki, ']') or
                            is_tk(results, checki, '}') or
                            is_tk(results, checki, ')') or
                            results[checki-1] == TK.NAME or
                            results[checki-1] == TK.STRING then
                            i = newi
                        else
                            -- syntax error?
                            error("syntax error")
                        end
                    elseif results[i-1] == TK.STRING then
                        -- prefixexp "string"
                        -- prefixexp 'string'
                        -- prefixexp [[string]]
                        local newi = i-1
                        local checki = ignore_newlines(results, newi-1)
                        -- do I need these checks?
                        if is_tk(results, checki, ']') or
                            is_tk(results, checki, '}') or
                            is_tk(results, checki, ')') or
                            results[checki-1] == TK.NAME or
                            results[checki-1] == TK.STRING then
                            i = newi
                        else
                            -- syntax error?
                            error("syntax error")
                        end
                    else
                        if results[i] == TK.FUNCTION then
                            -- 'function' Name (funcargs)
                            -- 'function' Name '.' Name (funcargs)
                            -- etc
                            repeat
                                i = i + 1
                            until results[i] == ')' or not results[i]
                        end
                        -- found start of statement
                        break
                    end
                    i = i - 1
                end
                -- mark start
                i = i + 1
                table.insert(results, i, START_OF_STMT)
                results.n = results.n + 1
                -- fix existing END_OF_STMT
                for k=i, #results do
                    if results[k] == END_OF_STMT then
                        local v = results[k+1]
                        assert(v > i)
                        if v > i then -- this should always be true?
                            results[k+1] = v + 1
                        end
                    end
                end
                -- mark end
                table.insert(results, results.n + 1, END_OF_STMT)
                table.insert(results, results.n + 2, i)
                results.n = results.n + 2
            end
        elseif token == '(' or token == '{' or token == TK.STRING then
            local i = results.n - 1 -- skip the '(' / '{' / TK_STRING
            i = ignore_newlines(results, i)
            -- possible patterns:
            -- ':' Name '(' -- plain Lua thing, ignore
            -- ':' Name '.' Name '(' -- cratera string traits
            -- ':' '[' exp ']' '.' Name '(' -- cratera object traits
            -- ':' '[' exp ']' '(' -- supported in lua 5.3 cratera patch but no reason to support it here.
            if results[i-1] == TK.NAME then
                local tk_myfunction = i-1
                -- maybe cratera
                i = ignore_newlines(results, i-2)
                if results[i-1] == END_OF_STMT then
                    -- lua, but we need to fix it up
                    -- we could just replace them with dummies, but
                    local pos = results[i]
                    table.remove(results, i) -- remove END_OF_STMT's value
                    table.remove(results, i-1) -- remove END_OF_STMT
                    table.remove(results, pos) -- remove START_OF_STMT
                    results.n = results.n - 3 -- adjust length
                    assert(results[i-3] == ':')
                elseif is_tk(results, i, '.') then
                    -- maybe cratera
                    local tk_dot = i
                    local inject_cratera = false
                    i = ignore_newlines(results, i-1)
                    if results[i-1] == TK.NAME then
                        local tk_mytrait = i-1
                        i = ignore_newlines(results, i-2)
                        if results[i-1] == END_OF_STMT then
                            -- definitely cratera (stmt ':' Name '.' Name '(')
                            -- convert into '(' stmt ',' String ',' String
                            -- convert names into strings
                            results[tk_mytrait] = TK.STRING
                            inject_cratera = true
                        end -- else not cratera
                    elseif is_tk(results, i, ']') then
                        local tk_right = i
                        local depth = 1
                        repeat
                            i = i - 1
                            if is_tk(results, i, '[') then
                                depth = depth - 1
                            elseif is_tk(results, i, ']') then
                                depth = depth + 1
                            elseif not results[i] then
                                error("syntax error (unbalanced '[]')")
                            end
                        until depth == 0
                        local tk_left = i
                        i = ignore_newlines(results, i-1)
                        if results[i-1] == END_OF_STMT then
                            -- definitely cratera (stmt ':' '[' exp ']' '.' Name '(')
                            -- convert into '(' stmt ',' '(' exp ')' ',' String
                            -- replace '[' and ']'
                            results[tk_right] = ')'
                            results[tk_left] = '('
                            inject_cratera = true
                        end -- else not cratera
                    end
                    if inject_cratera then
                        --assert(token == '(', "unimplemented")
                        -- convert name into string
                        results[tk_myfunction] = TK.STRING
                        -- replace '.' with ','
                        results[tk_dot] = ','
                        local pos = results[i]
                        -- remove END_OF_STMT
                        table.remove(results, i-1)
                        table.remove(results, i-1)
                        results.n = results.n - 2
                        -- replace ':' with ','
                        results[ignore_newlines(results, i-2)] = ','
                        -- replace START_OF_STMT with '('
                        results[pos] = '('
                        if token == '(' then
                            -- replace '(' with ','
                            results[results.n] = ','
                        else
                            -- insert ',' before argument
                            table.insert(results, results.n, ',')
                            results.n = results.n + 1
                        end
                        -- inject cratera
                        table.insert(results, pos, CRATERA_FUNCTION)
                        -- check for potential prefixexp and correct for it
                        --if is_tk(results, pos-1, ']') or
                        --    is_tk(results, pos-1, '}') or
                        --    is_tk(results, pos-1, ')') or
                        --    results[pos-2] == TK.NAME or
                        --    results[pos-2] == TK.STRING then
                        --    table.insert(results, pos, ' ')
                        --    results.n = results.n + 1
                        --end
                        results.n = results.n + 1
                        -- tag it so we know to insert a ')' to close our '('
                        -- and to handle '(' ')' (no argument) calls
                        -- we add the tag before TK.STRING/'{'/','
                        table.insert(results, results.n, END_OF_CRATERA)
                        results.n = results.n + 1
                    end
                end -- else not cratera
            end
        elseif token == '}' then
            local i = results.n -- we'll be subtracting anyway, see below
            local depth = 1
            repeat
                i = i - 1
                if is_tk(results, i, '{') then
                    depth = depth - 1
                elseif is_tk(results, i, '}') then
                    depth = depth + 1
                elseif not results[i] then
                    error("syntax error (unbalanced '{}')")
                end
            until depth == 0
            assert(is_tk(results, i, '{'))
            if results[i-1] == END_OF_CRATERA then
                -- need to add ')' to close our '('
                table.remove(results, i-1)
                results[results.n] = ')'
            end
        elseif token == ')' then
            local i = results.n - 1 -- skip the ')'
            i = ignore_newlines(results, i)
            if results[i] == ',' and results[i-1] == END_OF_CRATERA then
                -- '(' CRATERA_FUNCTION ')' '(' something END_OF_CRATERA ',' ')'
                -- need to fix it up into 
                -- '(' CRATERA_FUNCTION ')' '(' something ')'
                table.remove(results, i-1)
                table.remove(results, i-1)
                results.n = results.n - 2
            else
                -- still might need to remove an END_OF_CRATERA somewhere
                i = i + 1
                local depth = 1
                repeat
                    i = i - 1
                    if is_tk(results, i, '(') then
                        depth = depth - 1
                    elseif is_tk(results, i, ')') then
                        depth = depth + 1
                    elseif results[i] == END_OF_CRATERA then
                        table.remove(results, i)
                        results.n = results.n - 1
                        break
                    elseif not results[i] then
                        error("syntax error (unbalanced '()')")
                    end
                until depth == 0
            end
        end
    else -- we skipped a string literal
        if results[results.n-1] == TK.STRING and results[results.n-2] == END_OF_CRATERA then
            -- need to add ')' to close our '('
            table.remove(results, results.n-2)
            results[results.n] = ')'
        end
    end
    results.skip = EXTRA_DATA[token]
    return SELF
end

return {defs = defs}