--[[
This file is part of Cratera Compiler
Copyright (C) 2019, 2024 Soni L.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <https://www.gnu.org/licenses/>.
--]]
-- slow af but works
-- need to run this first
local is51 = (function() local _ENV = {hasenv = true} return not hasenv end)()
local parser = require "cratera.parser"
local selfify = parser.selfify
local STATE = parser.STATE
local luatokens = require "cratera.luatokens"
local reverse_keywords, reverse_tokens = luatokens.reverse_keywords, luatokens.reverse_tokens
local TK = luatokens.TK
local error, assert, ipairs, tostring, type = error, assert, ipairs, tostring, type
-- try to avoid making too many locals because Lua has a limit to how many locals you can have
local math = {huge=math.huge, floor=math.floor}
local string = {format=string.format, byte=string.byte, gsub=string.gsub}
local table = {insert=table.insert, remove=table.remove}
-- marker for use with selfify()
local SELF = {}
-- other markers
local FINISH = {}
local START_OF_STMT = {}
local END_OF_STMT = {}
local END_OF_CRATERA = {}
-- implementation of myobj:[mytrait].myfunction(...)
-- TODO benchmark under luajit?
local CRATERA_FUNCTION = "_CRATERA_INVOKE_TRAIT"
local CRATERA_INIT = "local function "..CRATERA_FUNCTION.."(myobj, mytrait, myfunction, ...) return myobj[mytrait][myfunction](myobj, ...) end "
local EXTRA_DATA = {[TK.NAME] = true, [TK.INT] = true, [TK.FLT] = true, [TK.STRING] = true, [END_OF_STMT] = true}
local function is_tk(results, i, tk)
-- needed to prevent accidentally processing string literals as tokens
-- (note: it's generally safe, and faster, to do results[i] == tk,
-- if tk is one of the values in the TK.* table.)
return not EXTRA_DATA[results[i-1]] and results[i] == tk
end
local function ignore_newlines(results, i)
-- skip '\n' and return the new i
while is_tk(results, i, '\n') do -- ignore newlines
i = i - 1
end
return i
end
-- -- --
local defs = selfify({}, SELF)
local finish = selfify({}, SELF)
finish[parser.EOZ] = function(state, token)
local results = state.results
local tk = table.remove(results)
if tk == TK.FLT then
local token = table.remove(results)
local extra, num, den = 1, token, 1
assert(token == token and token >= 0, "NYI") -- the tokenizer should never output NaNs or negative values
if token == math.huge then -- the tokenizer *can* output math.huge tho
num, den = 1, 0
else
while num ~= math.floor(num) do
num = num * 2 -- always safe (I think)
local oldden = den
den = den * 2
if den == math.huge then -- subnormals or something?
extra = oldden
den = 2
end
end
end
table.insert(state, string.format('((%d/%d)/%d)', num, den, extra))
elseif tk == TK.INT then
local v = table.remove(results)
if v == math.mininteger then
-- corner case ( https://github.com/lua/lua/commit/707b0ba6e2dbfd58cf1167dae0e17975904b18aa )
table.insert(state, string.format('0x%x', v))
else
table.insert(state, string.format('(%d)', v)) -- may be negative (overflow)
end
elseif tk == TK.STRING then
-- lua tends to use a backslash and a newline but we already do newline processing,
-- so we need to replace the escaped newline ("\\\n") with a newline escape ("\\n").
-- additionally lua 5.1 doesn't handle control characters other than '\0' and '\r' so we need to escape them as well
local fmted = string.format('%q', table.remove(results))
fmted = string.gsub(fmted, '\n', 'n')
if is51 then
fmted = string.gsub(fmted, "%c", function(c) return string.format("\\%03d", string.byte(c)) end)
end
table.insert(state, fmted)
elseif tk == TK.NAME then
table.insert(state, table.remove(results))
elseif type(tk) == "string" then
table.insert(state, tk)
elseif tk then
-- START_OF_STMT and END_OF_STMT are neither keywords nor tokens; this should error in that case.
table.insert(state, assert(reverse_keywords[tk] or reverse_tokens[tk]))
else
return {}
end
return SELF
end
defs[FINISH] = finish
defs[parser.EOZ] = function(state, token)
local results = state.results
if not results then return {} end -- empty, don't waste time processing unnecessary things
-- flip results around
local len = results.n
for i=1, len do
local j = len-i+1
if i >= j then
break
end
results[i], results[j] = results[j], results[i]
end
return FINISH
end
defs[parser.FALLBACK] = function(state, token)
local results = state.results or (function() state.results = {CRATERA_INIT, n=1} return state.results end)()
do -- handle newlines. this allows error messages to correctly map between lua and cratera
local oldline = state.oldline or 1
local linenumber = state.linenumber or 1
if linenumber > oldline then
local count = linenumber-oldline
local len = (results.n or 0)
for i=1, count do
results[len+i] = '\n'
end
results.n = len + count
if EXTRA_DATA[results[len]] then -- we're in the middle of a token with extra data. fix it up.
results[len], results[results.n] = results[results.n], results[len]
end
end
state.oldline = state.linenumber
end
results.n = (results.n or 0) + 1
results[results.n] = token
if not results.skip then -- don't process string literals as tokens
if token == ':' then
-- figure out whether we're in funcname
local i = results.n - 1 -- skip the ':'
local find_statement = true
i = ignore_newlines(results, i)
while results[i-1] == TK.NAME do
i = ignore_newlines(results, i-2) + 2
if is_tk(results, i-2, '.') then
-- keep going
i = i - 3
elseif results[i-2] == TK.FUNCTION then -- we're in funcname
-- funcname is of the form Name {'.' Name} ':'
-- TODO how can we parse 'function Foo:[Bar].Baz()'...
find_statement = false
break
else
-- found start of statement
find_statement = false
-- mark start
i = i - 1
table.insert(results, i, START_OF_STMT)
results.n = results.n + 1
-- no need to fix existing END_OF_STMT because this code
-- only detects patterns of the form Name {'.' Name} ':',
-- which do not contain subexpressions.
-- mark end
table.insert(results, results.n + 1, END_OF_STMT)
table.insert(results, results.n + 2, i)
results.n = results.n + 2
break
end
end
if find_statement then
while true do
i = ignore_newlines(results, i)
if is_tk(results, i, ')') then
-- (prefixexp) or (funcargs)
-- find matching '('
local depth = 1
repeat
i = i - 1
if is_tk(results, i, '(') then
depth = depth - 1
elseif is_tk(results, i, ')') then
depth = depth + 1
elseif not results[i] then
error("syntax error (unbalanced '()')")
end
until depth == 0
elseif is_tk(results, i, ']') then
-- [indexing]
-- find matching '['
local depth = 1
repeat
i = i - 1
if is_tk(results, i, '[') then
depth = depth - 1
elseif is_tk(results, i, ']') then
depth = depth + 1
elseif not results[i] then
error("syntax error (unbalanced '[]')")
end
until depth == 0
elseif results[i-1] == TK.NAME then
-- Name or '.' Name
i = i - 2
i = ignore_newlines(results, i)
if is_tk(results, i, '.') then
-- skip '.'
i = i - 1
else
-- found start of statement
break
end
elseif is_tk(results, i, '}') then
-- prefixexp '{' table '}'
local newi = i
local depth = 1
repeat
newi = newi - 1
if is_tk(results, newi, '[') then
depth = depth - 1
elseif is_tk(results, newi, ']') then
depth = depth + 1
elseif not results[i] then
error("syntax error (unbalanced '{}')")
end
until depth == 0
local checki = ignore_newlines(results, newi-1)
-- do I need these checks?
if is_tk(results, checki, ']') or
is_tk(results, checki, '}') or
is_tk(results, checki, ')') or
results[checki-1] == TK.NAME or
results[checki-1] == TK.STRING then
i = newi
else
-- syntax error?
error("syntax error")
end
elseif results[i-1] == TK.STRING then
-- prefixexp "string"
-- prefixexp 'string'
-- prefixexp [[string]]
local newi = i-1
local checki = ignore_newlines(results, newi-1)
-- do I need these checks?
if is_tk(results, checki, ']') or
is_tk(results, checki, '}') or
is_tk(results, checki, ')') or
results[checki-1] == TK.NAME or
results[checki-1] == TK.STRING then
i = newi
else
-- syntax error?
error("syntax error")
end
else
-- found start of statement
break
end
i = i - 1
end
-- mark start
i = i + 1
table.insert(results, i, START_OF_STMT)
results.n = results.n + 1
-- fix existing END_OF_STMT
for k=i, #results do
if results[k] == END_OF_STMT then
local v = results[k+1]
if v > i then -- this should always be true?
results[k+1] = v + 1
end
end
end
-- mark end
table.insert(results, results.n + 1, END_OF_STMT)
table.insert(results, results.n + 2, i)
results.n = results.n + 2
end
elseif token == '(' or token == '{' or token == TK.STRING then
local i = results.n - 1 -- skip the '(' / '{' / TK_STRING
i = ignore_newlines(results, i)
-- possible patterns:
-- ':' Name '(' -- plain Lua thing, ignore
-- ':' Name '.' Name '(' -- cratera string traits
-- ':' '[' exp ']' '.' Name '(' -- cratera object traits
-- ':' '[' exp ']' '(' -- supported in lua 5.3 cratera patch but no reason to support it here.
if results[i-1] == TK.NAME then
local tk_myfunction = i-1
-- maybe cratera
i = ignore_newlines(results, i-2)
if results[i-1] == END_OF_STMT then
-- lua, but we need to fix it up
-- we could just replace them with dummies, but
local pos = results[i]
table.remove(results, i) -- remove END_OF_STMT's value
table.remove(results, i-1) -- remove END_OF_STMT
table.remove(results, pos) -- remove START_OF_STMT
results.n = results.n - 3 -- adjust length
assert(results[i-3] == ':')
elseif is_tk(results, i, '.') then
-- maybe cratera
local tk_dot = i
local inject_cratera = false
i = ignore_newlines(results, i-1)
if results[i-1] == TK.NAME then
local tk_mytrait = i-1
i = ignore_newlines(results, i-2)
if results[i-1] == END_OF_STMT then
-- definitely cratera (stmt ':' Name '.' Name '(')
-- convert into '(' stmt ',' String ',' String
-- convert names into strings
results[tk_mytrait] = TK.STRING
inject_cratera = true
end -- else not cratera
elseif is_tk(results, i, ']') then
local tk_right = i
local depth = 1
repeat
i = i - 1
if is_tk(results, i, '[') then
depth = depth - 1
elseif is_tk(results, i, ']') then
depth = depth + 1
elseif not results[i] then
error("syntax error (unbalanced '[]')")
end
until depth == 0
local tk_left = i
i = ignore_newlines(results, i-1)
if results[i-1] == END_OF_STMT then
-- definitely cratera (stmt ':' '[' exp ']' '.' Name '(')
-- convert into '(' stmt ',' '(' exp ')' ',' String
-- replace '[' and ']'
results[tk_right] = ')'
results[tk_left] = '('
inject_cratera = true
end -- else not cratera
end
if inject_cratera then
--assert(token == '(', "unimplemented")
-- convert name into string
results[tk_myfunction] = TK.STRING
-- replace '.' with ','
results[tk_dot] = ','
local pos = results[i]
-- remove END_OF_STMT
table.remove(results, i-1)
table.remove(results, i-1)
results.n = results.n - 2
-- replace ':' with ','
results[ignore_newlines(results, i-2)] = ','
-- replace START_OF_STMT with '('
results[pos] = '('
if token == '(' then
-- replace '(' with ','
results[results.n] = ','
else
-- insert ',' before argument
table.insert(results, results.n, ',')
results.n = results.n + 1
end
-- inject cratera
table.insert(results, pos, CRATERA_FUNCTION)
-- check for potential prefixexp and correct for it
--if is_tk(results, pos-1, ']') or
-- is_tk(results, pos-1, '}') or
-- is_tk(results, pos-1, ')') or
-- results[pos-2] == TK.NAME or
-- results[pos-2] == TK.STRING then
-- table.insert(results, pos, ' ')
-- results.n = results.n + 1
--end
results.n = results.n + 1
-- tag it so we know to insert a ')' to close our '('
-- and to handle '(' ')' (no argument) calls
-- we add the tag before TK.STRING/'{'/','
table.insert(results, results.n, END_OF_CRATERA)
results.n = results.n + 1
end
end -- else not cratera
end
elseif token == '}' then
local i = results.n -- we'll be subtracting anyway, see below
local depth = 1
repeat
i = i - 1
if is_tk(results, i, '{') then
depth = depth - 1
elseif is_tk(results, i, '}') then
depth = depth + 1
elseif not results[i] then
error("syntax error (unbalanced '{}')")
end
until depth == 0
assert(is_tk(results, i, '{'))
if results[i-1] == END_OF_CRATERA then
-- need to add ')' to close our '('
table.remove(results, i-1)
results[results.n] = ')'
end
elseif token == ')' then
local i = results.n - 1 -- skip the ')'
i = ignore_newlines(results, i)
if results[i] == ',' and results[i-1] == END_OF_CRATERA then
-- '(' CRATERA_FUNCTION ')' '(' something END_OF_CRATERA ',' ')'
-- need to fix it up into
-- '(' CRATERA_FUNCTION ')' '(' something ')'
table.remove(results, i-1)
table.remove(results, i-1)
results.n = results.n - 2
else
-- still might need to remove an END_OF_CRATERA somewhere
i = i + 1
local depth = 1
repeat
i = i - 1
if is_tk(results, i, '(') then
depth = depth - 1
elseif is_tk(results, i, ')') then
depth = depth + 1
elseif results[i] == END_OF_CRATERA then
table.remove(results, i)
results.n = results.n - 1
break
elseif not results[i] then
error("syntax error (unbalanced '()')")
end
until depth == 0
end
end
else -- we skipped a string literal
if results[results.n-1] == TK.STRING and results[results.n-2] == END_OF_CRATERA then
-- need to add ')' to close our '('
table.remove(results, results.n-2)
results[results.n] = ')'
end
end
results.skip = EXTRA_DATA[token]
return SELF
end
return {defs = defs}