From 4a818684e2bc23c1ba09dec6cb74127e8e0e3f95 Mon Sep 17 00:00:00 2001 From: SoniEx2 Date: Wed, 31 Jul 2019 22:57:53 -0300 Subject: Cleaning up --- cratera/compiler.lua | 463 +++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 463 insertions(+) create mode 100644 cratera/compiler.lua (limited to 'cratera/compiler.lua') diff --git a/cratera/compiler.lua b/cratera/compiler.lua new file mode 100644 index 0000000..2f4a998 --- /dev/null +++ b/cratera/compiler.lua @@ -0,0 +1,463 @@ +--[[ + This file is part of Cratera Compiler + Copyright (C) 2019 Soni L. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see . +--]] + +-- slow af but works + +-- need to run this first +local is51 = (function() local _ENV = {hasenv = true} return not hasenv end)() + +local parser = require "cratera.parser" +local selfify = parser.selfify +local STATE = parser.STATE +local luatokens = require "cratera.luatokens" +local reverse_keywords, reverse_tokens = luatokens.reverse_keywords, luatokens.reverse_tokens +local TK = luatokens.TK +local error, assert, ipairs, tostring, type = error, assert, ipairs, tostring, type +-- try to avoid making too many locals because Lua has a limit to how many locals you can have +local math = {huge=math.huge, floor=math.floor} +local string = {format=string.format, byte=string.byte, gsub=string.gsub} +local table = {insert=table.insert, remove=table.remove} + +-- marker for use with selfify() +local SELF = {} +-- other markers +local FINISH = {} +local START_OF_STMT = {} +local END_OF_STMT = {} +local END_OF_CRATERA = {} + +-- implementation of myobj:[mytrait].myfunction(...) +local CRATERA_FUNCTION = "function(myobj, mytrait, myfunction, ...) return myobj[mytrait][myfunction](myobj, ...) end" + +local EXTRA_DATA = {[TK.NAME] = true, [TK.INT] = true, [TK.FLT] = true, [TK.STRING] = true, [END_OF_STMT] = true} + +local function is_tk(results, i, tk) + -- needed to prevent accidentally processing string literals as tokens + -- (note: it's generally safe, and faster, to do results[i] == tk, + -- if tk is one of the values in the TK.* table.) + return not EXTRA_DATA[results[i-1]] and results[i] == tk +end + +local function ignore_newlines(results, i) + -- skip '\n' and return the new i + while is_tk(results, i, '\n') do -- ignore newlines + i = i - 1 + end + return i +end + +-- -- -- + +local defs = selfify({}, SELF) + +local finish = selfify({}, SELF) +finish[parser.EOZ] = function(state, token) + local results = state.results + local tk = table.remove(results) + if tk == TK.FLT then + local token = table.remove(results) + local extra, num, den = 1, token, 1 + assert(token == token and token >= 0, "NYI") -- the tokenizer should never output NaNs or negative values + if token == math.huge then -- the tokenizer *can* output math.huge tho + num, den = 1, 0 + else + while num ~= math.floor(num) do + num = num * 2 -- always safe (I think) + local oldden = den + den = den * 2 + if den == math.huge then -- subnormals or something? + extra = oldden + den = 2 + end + end + end + table.insert(state, string.format('((%d/%d)/%d)', num, den, extra)) + elseif tk == TK.INT then + local v = table.remove(results) + if v == math.mininteger then + -- corner case ( https://github.com/lua/lua/commit/707b0ba6e2dbfd58cf1167dae0e17975904b18aa ) + table.insert(state, string.format('0x%x', v)) + else + table.insert(state, string.format('(%d)', v)) -- may be negative (overflow) + end + elseif tk == TK.STRING then + -- lua tends to use a backslash and a newline but we already do newline processing, + -- so we need to replace the escaped newline ("\\\n") with a newline escape ("\\n"). + -- additionally lua 5.1 doesn't handle control characters other than '\0' and '\r' so we need to escape them as well + local fmted = string.format('%q', table.remove(results)) + fmted = string.gsub(fmted, '\n', 'n') + if is51 then + fmted = string.gsub(fmted, "%c", function(c) return string.format("\\%03d", string.byte(c)) end) + end + table.insert(state, fmted) + elseif tk == TK.NAME then + table.insert(state, table.remove(results)) + elseif type(tk) == "string" then + table.insert(state, tk) + elseif tk then + -- START_OF_STMT and END_OF_STMT are neither keywords nor tokens; this should error in that case. + table.insert(state, assert(reverse_keywords[tk] or reverse_tokens[tk])) + else + return {} + end + return SELF +end +defs[FINISH] = finish + +defs[parser.EOZ] = function(state, token) + local results = state.results + if not results then return {} end -- empty, don't waste time processing unnecessary things + -- flip results around + local len = results.n + for i=1, len do + local j = len-i+1 + if i >= j then + break + end + results[i], results[j] = results[j], results[i] + end + return FINISH +end +defs[parser.FALLBACK] = function(state, token) + local results = state.results or (function() state.results = {} return state.results end)() + + do -- handle newlines. this allows error messages to correctly map between lua and cratera + local oldline = state.oldline or 1 + local linenumber = state.linenumber or 1 + if linenumber > oldline then + local count = linenumber-oldline + local len = (results.n or 0) + for i=1, count do + results[len+i] = '\n' + end + results.n = len + count + if EXTRA_DATA[results[len]] then -- we're in the middle of a token with extra data. fix it up. + results[len], results[results.n] = results[results.n], results[len] + end + end + state.oldline = state.linenumber + end + + results.n = (results.n or 0) + 1 + results[results.n] = token + if not results.skip then -- don't process string literals as tokens + if token == ':' then + -- figure out whether we're in funcname + local i = results.n - 1 -- skip the ':' + local find_statement = true + i = ignore_newlines(results, i) + while results[i-1] == TK.NAME do + i = ignore_newlines(results, i-2) + 2 + if is_tk(results, i-2, '.') then + -- keep going + i = i - 3 + elseif results[i-2] == TK.FUNCTION then -- we're in funcname + find_statement = false -- don't even bother with : handling + break + else + -- found start of statement + find_statement = false + -- mark start + i = i - 1 + table.insert(results, i, START_OF_STMT) + results.n = results.n + 1 + -- no need to fix existing END_OF_STMT because this code + -- only detects patterns of the form Name {'.' Name} ':', + -- which do not contain subexpressions. + -- mark end + table.insert(results, results.n + 1, END_OF_STMT) + table.insert(results, results.n + 2, i) + results.n = results.n + 2 + break + end + end + if find_statement then + while true do + i = ignore_newlines(results, i) + if is_tk(results, i, ')') then + -- (prefixexp) or (funcargs) + -- find matching '(' + local depth = 1 + repeat + i = i - 1 + if is_tk(results, i, '(') then + depth = depth - 1 + elseif is_tk(results, i, ')') then + depth = depth + 1 + elseif not results[i] then + error("syntax error (unbalanced '()')") + end + until depth == 0 + elseif is_tk(results, i, ']') then + -- [indexing] + -- find matching '[' + local depth = 1 + repeat + i = i - 1 + if is_tk(results, i, '[') then + depth = depth - 1 + elseif is_tk(results, i, ']') then + depth = depth + 1 + elseif not results[i] then + error("syntax error (unbalanced '[]')") + end + until depth == 0 + elseif results[i-1] == TK.NAME then + -- Name or '.' Name + i = i - 2 + i = ignore_newlines(results, i) + if is_tk(results, i, '.') then + -- skip '.' + i = i - 1 + else + -- found start of statement + break + end + elseif is_tk(results, i, '}') then + -- prefixexp '{' table '}' + local newi = i + local depth = 1 + repeat + newi = newi - 1 + if is_tk(results, newi, '[') then + depth = depth - 1 + elseif is_tk(results, newi, ']') then + depth = depth + 1 + elseif not results[i] then + error("syntax error (unbalanced '{}')") + end + until depth == 0 + local checki = ignore_newlines(results, newi-1) + -- do I need these checks? + if is_tk(results, checki, ']') or + is_tk(results, checki, '}') or + is_tk(results, checki, ')') or + results[checki-1] == TK.NAME or + results[checki-1] == TK.STRING then + i = newi + else + -- syntax error? + error("syntax error") + end + elseif results[i-1] == TK.STRING then + -- prefixexp "string" + -- prefixexp 'string' + -- prefixexp [[string]] + local newi = i-1 + local checki = ignore_newlines(results, newi-1) + -- do I need these checks? + if is_tk(results, checki, ']') or + is_tk(results, checki, '}') or + is_tk(results, checki, ')') or + results[checki-1] == TK.NAME or + results[checki-1] == TK.STRING then + i = newi + else + -- syntax error? + error("syntax error") + end + else + -- found start of statement + break + end + i = i - 1 + end + -- mark start + i = i + 1 + table.insert(results, i, START_OF_STMT) + results.n = results.n + 1 + -- fix existing END_OF_STMT + for k=i, #results do + if results[k] == END_OF_STMT then + local v = results[k+1] + if v > i then -- this should always be true? + results[k+1] = v + 1 + end + end + end + -- mark end + table.insert(results, results.n + 1, END_OF_STMT) + table.insert(results, results.n + 2, i) + results.n = results.n + 2 + end + elseif token == '(' or token == '{' or token == TK.STRING then + local i = results.n - 1 -- skip the '(' / '{' / TK_STRING + i = ignore_newlines(results, i) + -- possible patterns: + -- ':' Name '(' -- plain Lua thing, ignore + -- ':' Name '.' Name '(' -- cratera string traits + -- ':' '[' exp ']' '.' Name '(' -- cratera object traits + -- ':' '[' exp ']' '(' -- supported in lua 5.3 cratera patch but no reason to support it here. + if results[i-1] == TK.NAME then + local tk_myfunction = i-1 + -- maybe cratera + i = ignore_newlines(results, i-2) + if results[i-1] == END_OF_STMT then + -- lua, but we need to fix it up + -- we could just replace them with dummies, but + local pos = results[i] + table.remove(results, i) -- remove END_OF_STMT's value + table.remove(results, i-1) -- remove END_OF_STMT + table.remove(results, pos) -- remove START_OF_STMT + results.n = results.n - 3 -- adjust length + assert(results[i-3] == ':') + elseif is_tk(results, i, '.') then + -- maybe cratera + local tk_dot = i + local inject_cratera = false + i = ignore_newlines(results, i-1) + if results[i-1] == TK.NAME then + local tk_mytrait = i-1 + i = ignore_newlines(results, i-2) + if results[i-1] == END_OF_STMT then + -- definitely cratera (stmt ':' Name '.' Name '(') + -- convert into '(' stmt ',' String ',' String + -- convert names into strings + results[tk_mytrait] = TK.STRING + inject_cratera = true + end -- else not cratera + elseif is_tk(results, i, ']') then + local tk_right = i + local depth = 1 + repeat + i = i - 1 + if is_tk(results, i, '[') then + depth = depth - 1 + elseif is_tk(results, i, ']') then + depth = depth + 1 + elseif not results[i] then + error("syntax error (unbalanced '[]')") + end + until depth == 0 + local tk_left = i + i = ignore_newlines(results, i-1) + if results[i-1] == END_OF_STMT then + -- definitely cratera (stmt ':' '[' exp ']' '.' Name '(') + -- convert into '(' stmt ',' '(' exp ')' ',' String + -- replace '[' and ']' + results[tk_right] = ')' + results[tk_left] = '(' + inject_cratera = true + end -- else not cratera + end + if inject_cratera then + --assert(token == '(', "unimplemented") + -- convert name into string + results[tk_myfunction] = TK.STRING + -- replace '.' with ',' + results[tk_dot] = ',' + local pos = results[i] + -- remove END_OF_STMT + table.remove(results, i-1) + table.remove(results, i-1) + results.n = results.n - 2 + -- replace ':' with ',' + results[ignore_newlines(results, i-2)] = ',' + -- replace START_OF_STMT with '(' + results[pos] = '(' + if token == '(' then + -- replace '(' with ',' + results[results.n] = ',' + else + -- insert ',' before argument + table.insert(results, results.n, ',') + results.n = results.n + 1 + end + -- inject cratera + table.insert(results, pos, ')') + table.insert(results, pos, CRATERA_FUNCTION) + table.insert(results, pos, '(') + -- check for potential prefixexp and correct for it + if is_tk(results, pos-1, ']') or + is_tk(results, pos-1, '}') or + is_tk(results, pos-1, ')') or + results[pos-2] == TK.NAME or + results[pos-2] == TK.STRING then + table.insert(results, pos, ';') + results.n = results.n + 1 + end + results.n = results.n + 3 + -- tag it so we know to insert a ')' to close our '(' + -- and to handle '(' ')' (no argument) calls + -- we add the tag before TK.STRING/'{'/',' + table.insert(results, results.n, END_OF_CRATERA) + results.n = results.n + 1 + end + end -- else not cratera + end + elseif token == '}' then + local i = results.n -- we'll be subtracting anyway, see below + local depth = 1 + repeat + i = i - 1 + if is_tk(results, i, '{') then + depth = depth - 1 + elseif is_tk(results, i, '}') then + depth = depth + 1 + elseif not results[i] then + error("syntax error (unbalanced '{}')") + end + until depth == 0 + assert(is_tk(results, i, '{')) + if results[i-1] == END_OF_CRATERA then + -- need to add ')' to close our '(' + table.remove(results, i-1) + results[results.n] = ')' + end + elseif token == ')' then + local i = results.n - 1 -- skip the ')' + i = ignore_newlines(results, i) + if results[i] == ',' and results[i-1] == END_OF_CRATERA then + -- '(' CRATERA_FUNCTION ')' '(' something END_OF_CRATERA ',' ')' + -- need to fix it up into + -- '(' CRATERA_FUNCTION ')' '(' something ')' + table.remove(results, i-1) + table.remove(results, i-1) + results.n = results.n - 2 + else + -- still might need to remove an END_OF_CRATERA somewhere + i = i + 1 + local depth = 1 + repeat + i = i - 1 + if is_tk(results, i, '(') then + depth = depth - 1 + elseif is_tk(results, i, ')') then + depth = depth + 1 + elseif results[i] == END_OF_CRATERA then + table.remove(results, i) + results.n = results.n - 1 + break + elseif not results[i] then + error("syntax error (unbalanced '()')") + end + until depth == 0 + end + end + else -- we skipped a string literal + if results[results.n-1] == TK.STRING and results[results.n-2] == END_OF_CRATERA then + -- need to add ')' to close our '(' + table.remove(results, results.n-2) + results[results.n] = ')' + end + end + results.skip = EXTRA_DATA[token] + return SELF +end + +return {defs = defs} -- cgit 1.4.1