summary refs log tree commit diff stats
path: root/dirtycompiler.lua
diff options
context:
space:
mode:
authorSoniEx2 <endermoneymod@gmail.com>2019-07-30 21:12:16 -0300
committerSoniEx2 <endermoneymod@gmail.com>2019-07-30 21:12:16 -0300
commit4b365cdab1296bc88509c6f8610318adefb0ef0e (patch)
tree528741562b21cc0d81d7c71aca3eaee5e8433934 /dirtycompiler.lua
parentaf3acfbb80bca7447af9fe0d4a34cf860163b218 (diff)
It... kinda works?
Diffstat (limited to 'dirtycompiler.lua')
-rw-r--r--dirtycompiler.lua432
1 files changed, 432 insertions, 0 deletions
diff --git a/dirtycompiler.lua b/dirtycompiler.lua
new file mode 100644
index 0000000..ba499b3
--- /dev/null
+++ b/dirtycompiler.lua
@@ -0,0 +1,432 @@
+--[[
+    This file is part of cratera.lua - pure-Lua Cratera-to-Lua transpiler
+    Copyright (C) 2019  Soni L.
+
+    This program is free software: you can redistribute it and/or modify
+    it under the terms of the GNU Affero General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU Affero General Public License for more details.
+
+    You should have received a copy of the GNU Affero General Public License
+    along with this program.  If not, see <https://www.gnu.org/licenses/>.
+--]]
+
+-- slow af but works
+
+-- need to run this first
+local is51 = (function() local _ENV = {hasenv = true} return not hasenv end)()
+
+local parser = require "parser"
+local selfify = parser.selfify
+local STATE = parser.STATE
+local luatokens = require "luatokens"
+local reverse_keywords, reverse_tokens = luatokens.reverse_keywords, luatokens.reverse_tokens
+local TK = luatokens.TK
+local error, assert, ipairs, tostring, type = error, assert, ipairs, tostring, type
+-- try to avoid making too many locals because Lua has a limit to how many locals you can have
+local math = {huge=math.huge, floor=math.floor}
+local string = {format=string.format, byte=string.byte, gsub=string.gsub}
+local table = {insert=table.insert, remove=table.remove}
+
+-- marker for use with selfify()
+local SELF = {}
+-- other markers
+local FINISH = {}
+local START_OF_STMT = {}
+local END_OF_STMT = {}
+local END_OF_CRATERA = {}
+
+-- implementation of myobj:[mytrait].myfunction(...)
+local CRATERA_FUNCTION = "function(myobj, mytrait, myfunction, ...) return myobj[mytrait][myfunction](myobj, ...) end"
+
+local EXTRA_DATA = {[TK.NAME] = true, [TK.INT] = true, [TK.FLT] = true, [TK.STRING] = true, [END_OF_STMT] = true}
+
+local function is_tk(results, i, tk)
+    -- needed to prevent accidentally processing string literals as tokens
+    -- (note: it's generally safe, and faster, to do results[i] == tk,
+    -- if tk is one of the values in the TK.* table.)
+    return not EXTRA_DATA[results[i-1]] and results[i] == tk
+end
+
+local function ignore_newlines(results, i)
+    -- skip '\n' and return the new i
+    while is_tk(results, i, '\n') do -- ignore newlines
+        i = i - 1
+    end
+    return i
+end
+
+-- -- --
+
+local defs = selfify({}, SELF)
+
+local finish = selfify({}, SELF)
+finish[parser.EOZ] = function(state, token)
+    local results = state.results
+    local tk = table.remove(results)
+    print((tk == START_OF_STMT and "START_OF_STMT") or (tk == END_OF_STMT and "END_OF_STMT") or tostring(tk))
+    if tk == TK.FLT then
+        local token = table.remove(results)
+        local extra, num, den = 1, token, 1
+        assert(token == token and token >= 0, "NYI") -- the tokenizer should never output NaNs or negative values
+        if token == math.huge then -- the tokenizer *can* output math.huge tho
+            num, den = 1, 0
+        else
+            while num ~= math.floor(num) do
+                num = num * 2 -- always safe (I think)
+                local oldden = den
+                den = den * 2
+                if den == math.huge then -- subnormals or something?
+                    extra = oldden
+                    den = 2
+                end
+            end
+        end
+        table.insert(state, string.format('((%d/%d)/%d)', num, den, extra))
+    elseif tk == TK.INT then
+        local v = table.remove(results)
+        if v == math.mininteger then
+            -- corner case ( https://github.com/lua/lua/commit/707b0ba6e2dbfd58cf1167dae0e17975904b18aa )
+            table.insert(state, string.format('0x%x', v))
+        else
+            table.insert(state, string.format('(%d)', v)) -- may be negative (overflow)
+        end
+    elseif tk == TK.STRING then
+        -- lua tends to use a backslash and a newline but we already do newline processing,
+        -- so we need to replace the escaped newline ("\\\n") with a newline escape ("\\n").
+        -- additionally lua 5.1 doesn't handle control characters other than '\0' and '\r' so we need to escape them as well
+        local fmted = string.format('%q', table.remove(results))
+        fmted = string.gsub(fmted, '\n', 'n')
+        if is51 then
+            fmted = string.gsub(fmted, "%c", function(c) return string.format("\\%03d", string.byte(c)) end)
+        end
+        table.insert(state, fmted)
+    elseif tk == TK.NAME then
+        table.insert(state, table.remove(results))
+    elseif type(tk) == "string" then
+        table.insert(state, tk)
+    elseif tk then
+        -- START_OF_STMT and END_OF_STMT are neither keywords nor tokens; this should error in that case.
+        table.insert(state, assert(reverse_keywords[tk] or reverse_tokens[tk]))
+    else
+        return {}
+    end
+    return SELF
+end
+defs[FINISH] = finish
+
+defs[parser.EOZ] = function(state, token)
+    local results = state.results
+    -- flip results around
+    local len = results.n
+    for i=1, len do
+        local j = len-i+1
+        if i >= j then
+            break
+        end
+        results[i], results[j] = results[j], results[i]
+    end
+    return FINISH
+end
+defs[parser.FALLBACK] = function(state, token)
+    local results = state.results or (function() state.results = {} return state.results end)()
+
+    do -- handle newlines. this allows error messages to correctly map between lua and cratera
+        local oldline = state.oldline or 1
+        local linenumber = state.linenumber or 1
+        if linenumber > oldline then
+            local count = linenumber-oldline
+            local len = (results.n or 0)
+            for i=1, count do
+                results[len+i] = '\n'
+            end
+            results.n = len + count
+            if EXTRA_DATA[results[len]] then -- we're in the middle of a token with extra data. fix it up.
+                results[len], results[results.n] = results[results.n], results[len]
+            end
+        end
+        state.oldline = state.linenumber
+    end
+
+    results.n = (results.n or 0) + 1
+    results[results.n] = token
+    if not results.skip then -- don't process string literals as tokens
+        if token == ':' then
+            -- figure out whether we're in funcname
+            local i = results.n - 1 -- skip the ':'
+            local find_statement = true
+            --while is_tk(results, i, '\n') do -- ignore newlines
+            --    i = i - 1
+            --end
+            i = ignore_newlines(results, i)
+            while results[i-1] == TK.NAME do
+                --while is_tk(results, i-2, '\n') do -- ignore newlines
+                --    i = i - 1
+                --end
+                i = ignore_newlines(results, i-2) + 2
+                if is_tk(results, i-2, '.') then
+                    -- keep going
+                    i = i - 3
+                elseif results[i-2] == TK.FUNCTION then -- we're in funcname
+                    find_statement = false -- don't even bother with : handling
+                    break
+                else
+                    -- found start of statement
+                    find_statement = false
+                    -- mark start
+                    i = i - 1
+                    table.insert(results, i, START_OF_STMT)
+                    results.n = results.n + 1
+                    -- no need to fix existing END_OF_STMT because this code
+                    -- only detects patterns of the form Name {'.' Name} ':',
+                    -- which do not contain subexpressions.
+                    -- mark end
+                    table.insert(results, results.n + 1, END_OF_STMT)
+                    table.insert(results, results.n + 2, i)
+                    results.n = results.n + 2
+                    break
+                end
+            end
+            if find_statement then
+                while true do
+                    --while is_tk(results, i, '\n') do -- ignore newlines
+                    --    i = i - 1
+                    --end
+                    i = ignore_newlines(results, i)
+                    if is_tk(results, i, ')') then
+                        -- (prefixexp) or (funcargs)
+                        -- find matching '('
+                        local depth = 1
+                        repeat
+                            i = i - 1
+                            if is_tk(results, i, '(') then
+                                depth = depth - 1
+                            elseif is_tk(results, i, ')') then
+                                depth = depth + 1
+                            end
+                        until depth == 0
+                    elseif is_tk(results, i, ']') then
+                        -- [indexing]
+                        -- find matching '['
+                        local depth = 1
+                        repeat
+                            i = i - 1
+                            if is_tk(results, i, '[') then
+                                depth = depth - 1
+                            elseif is_tk(results, i, ']') then
+                                depth = depth + 1
+                            end
+                        until depth == 0
+                    elseif results[i-1] == TK.NAME then
+                        -- Name or '.' Name
+                        i = i - 2
+                        i = ignore_newlines(results, i)
+                        if is_tk(results, i, '.') then
+                            -- skip '.'
+                            i = i - 1
+                        else
+                            -- found start of statement
+                            break
+                        end
+                    elseif is_tk(results, i, '}') then
+                        -- prefixexp '{' table '}'
+                        local newi = i
+                        local depth = 1
+                        repeat
+                            newi = newi - 1
+                            if is_tk(results, newi, '[') then
+                                depth = depth - 1
+                            elseif is_tk(results, newi, ']') then
+                                depth = depth + 1
+                            end
+                        until depth == 0
+                        local checki = ignore_newlines(results, newi-1)
+                        -- do I need these checks?
+                        if is_tk(results, checki, ']') or
+                            is_tk(results, checki, '}') or
+                            is_tk(results, checki, ')') or
+                            results[checki-1] == TK.NAME or
+                            results[checki-1] == TK.STRING then
+                            i = newi
+                        else
+                            -- syntax error?
+                            error("syntax error")
+                        end
+                    elseif results[i-1] == TK.STRING then
+                        -- prefixexp "string"
+                        -- prefixexp 'string'
+                        -- prefixexp [[string]]
+                        local newi = i-1
+                        local checki = ignore_newlines(results, newi-1)
+                        -- do I need these checks?
+                        if is_tk(results, checki, ']') or
+                            is_tk(results, checki, '}') or
+                            is_tk(results, checki, ')') or
+                            results[checki-1] == TK.NAME or
+                            results[checki-1] == TK.STRING then
+                            i = newi
+                        else
+                            -- syntax error?
+                            error("syntax error")
+                        end
+                    else
+                        -- found start of statement
+                        break
+                    end
+                    i = i - 1
+                end
+                -- mark start
+                i = i + 1
+                table.insert(results, i, START_OF_STMT)
+                results.n = results.n + 1
+                -- fix existing END_OF_STMT
+                for k=i, #results do
+                    if results[k] == END_OF_STMT then
+                        local v = results[k+1]
+                        if v > i then -- this should always be true?
+                            results[k+1] = v + 1
+                        end
+                    end
+                end
+                -- mark end
+                table.insert(results, results.n + 1, END_OF_STMT)
+                table.insert(results, results.n + 2, i)
+                results.n = results.n + 2
+            end
+        elseif token == '(' or token == '{' or token == TK.STRING then
+            local i = results.n - 1 -- skip the '(' / '{' / TK_STRING
+            i = ignore_newlines(results, i)
+            -- possible patterns:
+            -- ':' Name '(' -- plain Lua thing, ignore
+            -- ':' Name '.' Name '(' -- cratera string traits
+            -- ':' '[' exp ']' '.' Name '(' -- cratera object traits
+            -- ':' '[' exp ']' '(' -- supported in lua 5.3 cratera patch but no reason to support it here.
+            if results[i-1] == TK.NAME then
+                local tk_myfunction = i-1
+                -- maybe cratera
+                i = ignore_newlines(results, i-2)
+                if results[i-1] == END_OF_STMT then
+                    -- lua, but we need to fix it up
+                    -- we could just replace them with dummies, but
+                    local pos = results[i]
+                    table.remove(results, i) -- remove END_OF_STMT's value
+                    table.remove(results, i-1) -- remove END_OF_STMT
+                    table.remove(results, pos) -- remove START_OF_STMT
+                    results.n = results.n - 3 -- adjust length
+                    assert(results[i-3] == ':')
+                elseif is_tk(results, i, '.') then
+                    -- maybe cratera
+                    local tk_dot = i
+                    local inject_cratera = false
+                    i = ignore_newlines(results, i-1)
+                    if results[i-1] == TK.NAME then
+                        local tk_mytrait = i-1
+                        i = ignore_newlines(results, i-2)
+                        if results[i-1] == END_OF_STMT then
+                            assert(token == '(', "unimplemented")
+                            -- definitely cratera (stmt ':' Name '.' Name '(')
+                            -- convert into '(' stmt ',' String ',' String
+                            -- convert names into strings
+                            results[tk_mytrait] = TK.STRING
+                            inject_cratera = true
+                        end -- else not cratera
+                    elseif is_tk(results, i, ']') then
+                        local tk_right = i
+                        local depth = 1
+                        repeat
+                            i = i - 1
+                            if is_tk(results, i, '[') then
+                                depth = depth - 1
+                            elseif is_tk(results, i, ']') then
+                                depth = depth + 1
+                            end
+                        until depth == 0
+                        local tk_left = i
+                        i = ignore_newlines(results, i-1)
+                        if results[i-1] == END_OF_STMT then
+                            assert(token == '(', "unimplemented")
+                            -- definitely cratera (':' '[' exp ']' '.' Name '(')
+                            -- convert into '(' stmt ',' '(' exp ')' ',' String
+                            -- replace '[' and ']'
+                            results[tk_right] = ')'
+                            results[tk_left] = '('
+                            inject_cratera = true
+                        end -- else not cratera
+                    end
+                    if inject_cratera then
+                        -- convert name into string
+                        results[tk_myfunction] = TK.STRING
+                        -- replace '.' with ','
+                        results[tk_dot] = ','
+                        local pos = results[i]
+                        -- remove END_OF_STMT
+                        table.remove(results, i-1)
+                        table.remove(results, i-1)
+                        results.n = results.n - 2
+                        -- replace ':' with ','
+                        results[ignore_newlines(results, i-2)] = ','
+                        -- replace START_OF_STMT with '(', and '(' with ','
+                        results[pos], results[results.n] = '(', ','
+                        -- inject cratera
+                        table.insert(results, pos, ')')
+                        table.insert(results, pos, CRATERA_FUNCTION)
+                        table.insert(results, pos, '(')
+                        -- check for potential prefixexp and correct for it
+                        if is_tk(results, pos-1, ']') or
+                            is_tk(results, pos-1, '}') or
+                            is_tk(results, pos-1, ')') or
+                            results[pos-2] == TK.NAME or
+                            results[pos-2] == TK.STRING then
+                            table.insert(results, pos, ';')
+                            results.n = results.n + 1
+                        end
+                        results.n = results.n + 3
+                        -- tag it for '(' ')' (no argument) calls
+                        results.n = results.n + 1
+                        results[results.n] = END_OF_CRATERA
+                    end
+                end -- else not cratera
+            end
+        elseif token == '}' then
+            -- TODO unimplemented
+        elseif token == ')' then
+            local i = results.n - 1 -- skip the ')'
+            i = ignore_newlines(results, i)
+            if results[i] == END_OF_CRATERA then
+                -- '(' CRATERA_FUNCTION ')' '(' something ',' END_OF_CRATERA ')'
+                -- need to fix it up into 
+                -- '(' CRATERA_FUNCTION ')' '(' something ')'
+                table.remove(results, i-1)
+                table.remove(results, i-1)
+                results.n = results.n - 2
+            else
+                -- still might need to remove an END_OF_CRATERA somewhere
+                i = i + 1
+                local depth = 1
+                repeat
+                    i = i - 1
+                    if is_tk(results, i, '(') then
+                        depth = depth - 1
+                    elseif is_tk(results, i, ')') then
+                        depth = depth + 1
+                    elseif results[i] == END_OF_CRATERA then
+                        table.remove(results, i)
+                        results.n = results.n - 1
+                        break
+                    elseif not results[i] then
+                        error("syntax error")
+                    end
+                until depth == 0
+            end
+        end
+    end
+    results.skip = EXTRA_DATA[token]
+    return SELF
+end
+
+return {defs = defs}