summary refs log tree commit diff stats
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/bin/cratera.lua21
-rw-r--r--src/bin/printtokens.lua67
-rw-r--r--src/cratera/compiler.lua463
-rw-r--r--src/cratera/init.lua83
-rw-r--r--src/cratera/loader.lua19
-rw-r--r--src/cratera/luatokens.lua769
-rw-r--r--src/cratera/parser.lua202
7 files changed, 1624 insertions, 0 deletions
diff --git a/src/bin/cratera.lua b/src/bin/cratera.lua
new file mode 100644
index 0000000..fa94e96
--- /dev/null
+++ b/src/bin/cratera.lua
@@ -0,0 +1,21 @@
+--[[
+    Cratera Interpreter
+    Copyright (C) 2024  Soni L.
+
+    This program is free software: you can redistribute it and/or modify
+    it under the terms of the GNU Affero General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU Affero General Public License for more details.
+
+    You should have received a copy of the GNU Affero General Public License
+    along with this program.  If not, see <https://www.gnu.org/licenses/>.
+--]]
+
+local cratera = require "cratera"
+
+error("not implemented")
diff --git a/src/bin/printtokens.lua b/src/bin/printtokens.lua
new file mode 100644
index 0000000..49827da
--- /dev/null
+++ b/src/bin/printtokens.lua
@@ -0,0 +1,67 @@
+--[[
+    This file is part of Cratera Compiler
+    Copyright (C) 2019  Soni L.
+
+    This program is free software: you can redistribute it and/or modify
+    it under the terms of the GNU Affero General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU Affero General Public License for more details.
+
+    You should have received a copy of the GNU Affero General Public License
+    along with this program.  If not, see <https://www.gnu.org/licenses/>.
+--]]
+
+-- this isn't supposed to be installed or anything it's just a basic CLI to
+-- print a lua or cratera file as a token stream.
+
+local parser = require "cratera.parser"
+
+-- CLI argument rules
+local defs = parser.selfify({})
+defs['-'] = function(state, token)
+    if state.filename then
+        error("Must specify only one filename")
+    end
+    state.filename = true
+    state.file = io.stdin
+    return "self"
+end
+defs[parser.FALLBACK] = function(state, token)
+    if state.filename then
+        error("Must specify only one filename")
+    end
+    state.filename = token
+    state.file, state.err = io.open(state.filename, "r")
+    return "self"
+end
+defs[parser.EOZ] = function(state, token)
+    if not state.file then
+        error((state.filename and (state.err or "") or "No file specified") )
+    end
+    return {}
+end
+defs[-1] = function(state, token, rule)
+    if token ~= parser.EOZ and token:sub(1,1) == "-" and not rule then
+        error("Unknown option: " .. token)
+    end
+end
+defs['--'] = parser.selfify({[parser.FALLBACK] = defs[parser.FALLBACK], [parser.EOZ] = defs[parser.EOZ]})
+
+local state = parser.parse(defs, arg)
+local luatokens = require "cratera.luatokens"
+local file = state.file
+local tokens = luatokens.defs
+local state, err, etoken, estate = parser.parse(tokens, function() return file:read(8192) end)
+if state then
+    for i,v in ipairs(state) do
+        v = luatokens.reverse_keywords[v] or luatokens.reverse_tokens[v] or v
+        print(i, v) -- TODO formatting
+    end
+else
+    print("Parse error")
+end
diff --git a/src/cratera/compiler.lua b/src/cratera/compiler.lua
new file mode 100644
index 0000000..2f4a998
--- /dev/null
+++ b/src/cratera/compiler.lua
@@ -0,0 +1,463 @@
+--[[
+    This file is part of Cratera Compiler
+    Copyright (C) 2019  Soni L.
+
+    This program is free software: you can redistribute it and/or modify
+    it under the terms of the GNU Affero General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU Affero General Public License for more details.
+
+    You should have received a copy of the GNU Affero General Public License
+    along with this program.  If not, see <https://www.gnu.org/licenses/>.
+--]]
+
+-- slow af but works
+
+-- need to run this first
+local is51 = (function() local _ENV = {hasenv = true} return not hasenv end)()
+
+local parser = require "cratera.parser"
+local selfify = parser.selfify
+local STATE = parser.STATE
+local luatokens = require "cratera.luatokens"
+local reverse_keywords, reverse_tokens = luatokens.reverse_keywords, luatokens.reverse_tokens
+local TK = luatokens.TK
+local error, assert, ipairs, tostring, type = error, assert, ipairs, tostring, type
+-- try to avoid making too many locals because Lua has a limit to how many locals you can have
+local math = {huge=math.huge, floor=math.floor}
+local string = {format=string.format, byte=string.byte, gsub=string.gsub}
+local table = {insert=table.insert, remove=table.remove}
+
+-- marker for use with selfify()
+local SELF = {}
+-- other markers
+local FINISH = {}
+local START_OF_STMT = {}
+local END_OF_STMT = {}
+local END_OF_CRATERA = {}
+
+-- implementation of myobj:[mytrait].myfunction(...)
+local CRATERA_FUNCTION = "function(myobj, mytrait, myfunction, ...) return myobj[mytrait][myfunction](myobj, ...) end"
+
+local EXTRA_DATA = {[TK.NAME] = true, [TK.INT] = true, [TK.FLT] = true, [TK.STRING] = true, [END_OF_STMT] = true}
+
+local function is_tk(results, i, tk)
+    -- needed to prevent accidentally processing string literals as tokens
+    -- (note: it's generally safe, and faster, to do results[i] == tk,
+    -- if tk is one of the values in the TK.* table.)
+    return not EXTRA_DATA[results[i-1]] and results[i] == tk
+end
+
+local function ignore_newlines(results, i)
+    -- skip '\n' and return the new i
+    while is_tk(results, i, '\n') do -- ignore newlines
+        i = i - 1
+    end
+    return i
+end
+
+-- -- --
+
+local defs = selfify({}, SELF)
+
+local finish = selfify({}, SELF)
+finish[parser.EOZ] = function(state, token)
+    local results = state.results
+    local tk = table.remove(results)
+    if tk == TK.FLT then
+        local token = table.remove(results)
+        local extra, num, den = 1, token, 1
+        assert(token == token and token >= 0, "NYI") -- the tokenizer should never output NaNs or negative values
+        if token == math.huge then -- the tokenizer *can* output math.huge tho
+            num, den = 1, 0
+        else
+            while num ~= math.floor(num) do
+                num = num * 2 -- always safe (I think)
+                local oldden = den
+                den = den * 2
+                if den == math.huge then -- subnormals or something?
+                    extra = oldden
+                    den = 2
+                end
+            end
+        end
+        table.insert(state, string.format('((%d/%d)/%d)', num, den, extra))
+    elseif tk == TK.INT then
+        local v = table.remove(results)
+        if v == math.mininteger then
+            -- corner case ( https://github.com/lua/lua/commit/707b0ba6e2dbfd58cf1167dae0e17975904b18aa )
+            table.insert(state, string.format('0x%x', v))
+        else
+            table.insert(state, string.format('(%d)', v)) -- may be negative (overflow)
+        end
+    elseif tk == TK.STRING then
+        -- lua tends to use a backslash and a newline but we already do newline processing,
+        -- so we need to replace the escaped newline ("\\\n") with a newline escape ("\\n").
+        -- additionally lua 5.1 doesn't handle control characters other than '\0' and '\r' so we need to escape them as well
+        local fmted = string.format('%q', table.remove(results))
+        fmted = string.gsub(fmted, '\n', 'n')
+        if is51 then
+            fmted = string.gsub(fmted, "%c", function(c) return string.format("\\%03d", string.byte(c)) end)
+        end
+        table.insert(state, fmted)
+    elseif tk == TK.NAME then
+        table.insert(state, table.remove(results))
+    elseif type(tk) == "string" then
+        table.insert(state, tk)
+    elseif tk then
+        -- START_OF_STMT and END_OF_STMT are neither keywords nor tokens; this should error in that case.
+        table.insert(state, assert(reverse_keywords[tk] or reverse_tokens[tk]))
+    else
+        return {}
+    end
+    return SELF
+end
+defs[FINISH] = finish
+
+defs[parser.EOZ] = function(state, token)
+    local results = state.results
+    if not results then return {} end -- empty, don't waste time processing unnecessary things
+    -- flip results around
+    local len = results.n
+    for i=1, len do
+        local j = len-i+1
+        if i >= j then
+            break
+        end
+        results[i], results[j] = results[j], results[i]
+    end
+    return FINISH
+end
+defs[parser.FALLBACK] = function(state, token)
+    local results = state.results or (function() state.results = {} return state.results end)()
+
+    do -- handle newlines. this allows error messages to correctly map between lua and cratera
+        local oldline = state.oldline or 1
+        local linenumber = state.linenumber or 1
+        if linenumber > oldline then
+            local count = linenumber-oldline
+            local len = (results.n or 0)
+            for i=1, count do
+                results[len+i] = '\n'
+            end
+            results.n = len + count
+            if EXTRA_DATA[results[len]] then -- we're in the middle of a token with extra data. fix it up.
+                results[len], results[results.n] = results[results.n], results[len]
+            end
+        end
+        state.oldline = state.linenumber
+    end
+
+    results.n = (results.n or 0) + 1
+    results[results.n] = token
+    if not results.skip then -- don't process string literals as tokens
+        if token == ':' then
+            -- figure out whether we're in funcname
+            local i = results.n - 1 -- skip the ':'
+            local find_statement = true
+            i = ignore_newlines(results, i)
+            while results[i-1] == TK.NAME do
+                i = ignore_newlines(results, i-2) + 2
+                if is_tk(results, i-2, '.') then
+                    -- keep going
+                    i = i - 3
+                elseif results[i-2] == TK.FUNCTION then -- we're in funcname
+                    find_statement = false -- don't even bother with : handling
+                    break
+                else
+                    -- found start of statement
+                    find_statement = false
+                    -- mark start
+                    i = i - 1
+                    table.insert(results, i, START_OF_STMT)
+                    results.n = results.n + 1
+                    -- no need to fix existing END_OF_STMT because this code
+                    -- only detects patterns of the form Name {'.' Name} ':',
+                    -- which do not contain subexpressions.
+                    -- mark end
+                    table.insert(results, results.n + 1, END_OF_STMT)
+                    table.insert(results, results.n + 2, i)
+                    results.n = results.n + 2
+                    break
+                end
+            end
+            if find_statement then
+                while true do
+                    i = ignore_newlines(results, i)
+                    if is_tk(results, i, ')') then
+                        -- (prefixexp) or (funcargs)
+                        -- find matching '('
+                        local depth = 1
+                        repeat
+                            i = i - 1
+                            if is_tk(results, i, '(') then
+                                depth = depth - 1
+                            elseif is_tk(results, i, ')') then
+                                depth = depth + 1
+                            elseif not results[i] then
+                                error("syntax error (unbalanced '()')")
+                            end
+                        until depth == 0
+                    elseif is_tk(results, i, ']') then
+                        -- [indexing]
+                        -- find matching '['
+                        local depth = 1
+                        repeat
+                            i = i - 1
+                            if is_tk(results, i, '[') then
+                                depth = depth - 1
+                            elseif is_tk(results, i, ']') then
+                                depth = depth + 1
+                            elseif not results[i] then
+                                error("syntax error (unbalanced '[]')")
+                            end
+                        until depth == 0
+                    elseif results[i-1] == TK.NAME then
+                        -- Name or '.' Name
+                        i = i - 2
+                        i = ignore_newlines(results, i)
+                        if is_tk(results, i, '.') then
+                            -- skip '.'
+                            i = i - 1
+                        else
+                            -- found start of statement
+                            break
+                        end
+                    elseif is_tk(results, i, '}') then
+                        -- prefixexp '{' table '}'
+                        local newi = i
+                        local depth = 1
+                        repeat
+                            newi = newi - 1
+                            if is_tk(results, newi, '[') then
+                                depth = depth - 1
+                            elseif is_tk(results, newi, ']') then
+                                depth = depth + 1
+                            elseif not results[i] then
+                                error("syntax error (unbalanced '{}')")
+                            end
+                        until depth == 0
+                        local checki = ignore_newlines(results, newi-1)
+                        -- do I need these checks?
+                        if is_tk(results, checki, ']') or
+                            is_tk(results, checki, '}') or
+                            is_tk(results, checki, ')') or
+                            results[checki-1] == TK.NAME or
+                            results[checki-1] == TK.STRING then
+                            i = newi
+                        else
+                            -- syntax error?
+                            error("syntax error")
+                        end
+                    elseif results[i-1] == TK.STRING then
+                        -- prefixexp "string"
+                        -- prefixexp 'string'
+                        -- prefixexp [[string]]
+                        local newi = i-1
+                        local checki = ignore_newlines(results, newi-1)
+                        -- do I need these checks?
+                        if is_tk(results, checki, ']') or
+                            is_tk(results, checki, '}') or
+                            is_tk(results, checki, ')') or
+                            results[checki-1] == TK.NAME or
+                            results[checki-1] == TK.STRING then
+                            i = newi
+                        else
+                            -- syntax error?
+                            error("syntax error")
+                        end
+                    else
+                        -- found start of statement
+                        break
+                    end
+                    i = i - 1
+                end
+                -- mark start
+                i = i + 1
+                table.insert(results, i, START_OF_STMT)
+                results.n = results.n + 1
+                -- fix existing END_OF_STMT
+                for k=i, #results do
+                    if results[k] == END_OF_STMT then
+                        local v = results[k+1]
+                        if v > i then -- this should always be true?
+                            results[k+1] = v + 1
+                        end
+                    end
+                end
+                -- mark end
+                table.insert(results, results.n + 1, END_OF_STMT)
+                table.insert(results, results.n + 2, i)
+                results.n = results.n + 2
+            end
+        elseif token == '(' or token == '{' or token == TK.STRING then
+            local i = results.n - 1 -- skip the '(' / '{' / TK_STRING
+            i = ignore_newlines(results, i)
+            -- possible patterns:
+            -- ':' Name '(' -- plain Lua thing, ignore
+            -- ':' Name '.' Name '(' -- cratera string traits
+            -- ':' '[' exp ']' '.' Name '(' -- cratera object traits
+            -- ':' '[' exp ']' '(' -- supported in lua 5.3 cratera patch but no reason to support it here.
+            if results[i-1] == TK.NAME then
+                local tk_myfunction = i-1
+                -- maybe cratera
+                i = ignore_newlines(results, i-2)
+                if results[i-1] == END_OF_STMT then
+                    -- lua, but we need to fix it up
+                    -- we could just replace them with dummies, but
+                    local pos = results[i]
+                    table.remove(results, i) -- remove END_OF_STMT's value
+                    table.remove(results, i-1) -- remove END_OF_STMT
+                    table.remove(results, pos) -- remove START_OF_STMT
+                    results.n = results.n - 3 -- adjust length
+                    assert(results[i-3] == ':')
+                elseif is_tk(results, i, '.') then
+                    -- maybe cratera
+                    local tk_dot = i
+                    local inject_cratera = false
+                    i = ignore_newlines(results, i-1)
+                    if results[i-1] == TK.NAME then
+                        local tk_mytrait = i-1
+                        i = ignore_newlines(results, i-2)
+                        if results[i-1] == END_OF_STMT then
+                            -- definitely cratera (stmt ':' Name '.' Name '(')
+                            -- convert into '(' stmt ',' String ',' String
+                            -- convert names into strings
+                            results[tk_mytrait] = TK.STRING
+                            inject_cratera = true
+                        end -- else not cratera
+                    elseif is_tk(results, i, ']') then
+                        local tk_right = i
+                        local depth = 1
+                        repeat
+                            i = i - 1
+                            if is_tk(results, i, '[') then
+                                depth = depth - 1
+                            elseif is_tk(results, i, ']') then
+                                depth = depth + 1
+                            elseif not results[i] then
+                                error("syntax error (unbalanced '[]')")
+                            end
+                        until depth == 0
+                        local tk_left = i
+                        i = ignore_newlines(results, i-1)
+                        if results[i-1] == END_OF_STMT then
+                            -- definitely cratera (stmt ':' '[' exp ']' '.' Name '(')
+                            -- convert into '(' stmt ',' '(' exp ')' ',' String
+                            -- replace '[' and ']'
+                            results[tk_right] = ')'
+                            results[tk_left] = '('
+                            inject_cratera = true
+                        end -- else not cratera
+                    end
+                    if inject_cratera then
+                        --assert(token == '(', "unimplemented")
+                        -- convert name into string
+                        results[tk_myfunction] = TK.STRING
+                        -- replace '.' with ','
+                        results[tk_dot] = ','
+                        local pos = results[i]
+                        -- remove END_OF_STMT
+                        table.remove(results, i-1)
+                        table.remove(results, i-1)
+                        results.n = results.n - 2
+                        -- replace ':' with ','
+                        results[ignore_newlines(results, i-2)] = ','
+                        -- replace START_OF_STMT with '('
+                        results[pos] = '('
+                        if token == '(' then
+                            -- replace '(' with ','
+                            results[results.n] = ','
+                        else
+                            -- insert ',' before argument
+                            table.insert(results, results.n, ',')
+                            results.n = results.n + 1
+                        end
+                        -- inject cratera
+                        table.insert(results, pos, ')')
+                        table.insert(results, pos, CRATERA_FUNCTION)
+                        table.insert(results, pos, '(')
+                        -- check for potential prefixexp and correct for it
+                        if is_tk(results, pos-1, ']') or
+                            is_tk(results, pos-1, '}') or
+                            is_tk(results, pos-1, ')') or
+                            results[pos-2] == TK.NAME or
+                            results[pos-2] == TK.STRING then
+                            table.insert(results, pos, ';')
+                            results.n = results.n + 1
+                        end
+                        results.n = results.n + 3
+                        -- tag it so we know to insert a ')' to close our '('
+                        -- and to handle '(' ')' (no argument) calls
+                        -- we add the tag before TK.STRING/'{'/','
+                        table.insert(results, results.n, END_OF_CRATERA)
+                        results.n = results.n + 1
+                    end
+                end -- else not cratera
+            end
+        elseif token == '}' then
+            local i = results.n -- we'll be subtracting anyway, see below
+            local depth = 1
+            repeat
+                i = i - 1
+                if is_tk(results, i, '{') then
+                    depth = depth - 1
+                elseif is_tk(results, i, '}') then
+                    depth = depth + 1
+                elseif not results[i] then
+                    error("syntax error (unbalanced '{}')")
+                end
+            until depth == 0
+            assert(is_tk(results, i, '{'))
+            if results[i-1] == END_OF_CRATERA then
+                -- need to add ')' to close our '('
+                table.remove(results, i-1)
+                results[results.n] = ')'
+            end
+        elseif token == ')' then
+            local i = results.n - 1 -- skip the ')'
+            i = ignore_newlines(results, i)
+            if results[i] == ',' and results[i-1] == END_OF_CRATERA then
+                -- '(' CRATERA_FUNCTION ')' '(' something END_OF_CRATERA ',' ')'
+                -- need to fix it up into 
+                -- '(' CRATERA_FUNCTION ')' '(' something ')'
+                table.remove(results, i-1)
+                table.remove(results, i-1)
+                results.n = results.n - 2
+            else
+                -- still might need to remove an END_OF_CRATERA somewhere
+                i = i + 1
+                local depth = 1
+                repeat
+                    i = i - 1
+                    if is_tk(results, i, '(') then
+                        depth = depth - 1
+                    elseif is_tk(results, i, ')') then
+                        depth = depth + 1
+                    elseif results[i] == END_OF_CRATERA then
+                        table.remove(results, i)
+                        results.n = results.n - 1
+                        break
+                    elseif not results[i] then
+                        error("syntax error (unbalanced '()')")
+                    end
+                until depth == 0
+            end
+        end
+    else -- we skipped a string literal
+        if results[results.n-1] == TK.STRING and results[results.n-2] == END_OF_CRATERA then
+            -- need to add ')' to close our '('
+            table.remove(results, results.n-2)
+            results[results.n] = ')'
+        end
+    end
+    results.skip = EXTRA_DATA[token]
+    return SELF
+end
+
+return {defs = defs}
diff --git a/src/cratera/init.lua b/src/cratera/init.lua
new file mode 100644
index 0000000..bac3f53
--- /dev/null
+++ b/src/cratera/init.lua
@@ -0,0 +1,83 @@
+--[[
+    Cratera Compiler - pure-Lua Cratera-to-Lua transpiler
+    Copyright (C) 2019, 2024  Soni L.
+
+    This program is free software: you can redistribute it and/or modify
+    it under the terms of the GNU Affero General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU Affero General Public License for more details.
+
+    You should have received a copy of the GNU Affero General Public License
+    along with this program.  If not, see <https://www.gnu.org/licenses/>.
+--]]
+
+-- This code is highly experimental and not very good
+
+local parser = require "cratera.parser"
+local luatokens = require "cratera.luatokens"
+local compiler = require "cratera.compiler"
+
+local LUA_SIGNATURE = string.dump(function() end):sub(1,1)
+
+local function cratera_load(reader, ...)
+    if type(reader) == "string" and reader:sub(1,1) == LUA_SIGNATURE then
+        -- bytecode
+        return (loadstring or load)(reader, ...)
+    end
+    local f, s, i = parser.stream(luatokens.defs, reader)
+    if type(s[parser.DATA]) == "string" and s[parser.DATA]:sub(1,1) == LUA_SIGNATURE then
+        -- bytecode
+        local function fn()
+            fn = reader
+            return s[parser.DATA]
+        end
+        return (load)(function() return fn() end, ...)
+    end
+    local nl = 1
+    local otherstate = {} -- needed to match linenumbers
+    local f, s, i = parser.stream(compiler.defs, function()
+        local tokens
+        repeat
+            local pos, state, transemsg, etoken, estate = f(s, i)
+            otherstate.linenumber = state.line
+            i = pos
+            if not i then return nil end
+            if not state then error(transemsg) end
+            tokens = {}
+            for i,v in ipairs(state) do
+                state[i] = nil
+                tokens[i] = v
+            end
+        until #tokens > 0 or not transemsg
+        return tokens
+    end, otherstate)
+    local function fn()
+        function fn()
+            local tokens
+            repeat
+                local pos, state, transemsg, etoken, estate, est = f(s, i)
+                i = pos
+                if not i then return nil end
+                if not state then error(transemsg .. " " .. tostring(etoken)) end
+                tokens = {""}
+                for i,v in ipairs(state) do
+                    state[i] = nil
+                    tokens[i+1] = v
+                end
+            until #tokens > 1 or not transemsg
+            return table.concat(tokens, " ")
+        end
+        local ret = fn()
+        return string.sub(ret, 2)
+    end
+    return load(function()
+        return fn()
+    end, ...)
+end
+
+return {load = cratera_load}
diff --git a/src/cratera/loader.lua b/src/cratera/loader.lua
new file mode 100644
index 0000000..02697ac
--- /dev/null
+++ b/src/cratera/loader.lua
@@ -0,0 +1,19 @@
+--[[
+    The Cratera Loader
+    Copyright (C) 2024  Soni L.
+
+    This program is free software: you can redistribute it and/or modify
+    it under the terms of the GNU Affero General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU Affero General Public License for more details.
+
+    You should have received a copy of the GNU Affero General Public License
+    along with this program.  If not, see <https://www.gnu.org/licenses/>.
+--]]
+
+-- TODO
diff --git a/src/cratera/luatokens.lua b/src/cratera/luatokens.lua
new file mode 100644
index 0000000..90d0c61
--- /dev/null
+++ b/src/cratera/luatokens.lua
@@ -0,0 +1,769 @@
+--[[
+    This file is part of Cratera Compiler
+    Copyright (C) 2019  Soni L.
+
+    This program is free software: you can redistribute it and/or modify
+    it under the terms of the GNU Affero General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU Affero General Public License for more details.
+
+    You should have received a copy of the GNU Affero General Public License
+    along with this program.  If not, see <https://www.gnu.org/licenses/>.
+--]]
+
+--[[
+    This software is based on Lua 5.1 and Lua 5.3
+
+    Lua 5.1 license:
+
+/******************************************************************************
+* Copyright (C) 1994-2012 Lua.org, PUC-Rio.  All rights reserved.
+*
+* Permission is hereby granted, free of charge, to any person obtaining
+* a copy of this software and associated documentation files (the
+* "Software"), to deal in the Software without restriction, including
+* without limitation the rights to use, copy, modify, merge, publish,
+* distribute, sublicense, and/or sell copies of the Software, and to
+* permit persons to whom the Software is furnished to do so, subject to
+* the following conditions:
+*
+* The above copyright notice and this permission notice shall be
+* included in all copies or substantial portions of the Software.
+*
+* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+******************************************************************************/
+
+    Lua 5.3 license:
+
+/******************************************************************************
+* Copyright (C) 1994-2018 Lua.org, PUC-Rio.
+*
+* Permission is hereby granted, free of charge, to any person obtaining
+* a copy of this software and associated documentation files (the
+* "Software"), to deal in the Software without restriction, including
+* without limitation the rights to use, copy, modify, merge, publish,
+* distribute, sublicense, and/or sell copies of the Software, and to
+* permit persons to whom the Software is furnished to do so, subject to
+* the following conditions:
+*
+* The above copyright notice and this permission notice shall be
+* included in all copies or substantial portions of the Software.
+*
+* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+******************************************************************************/
+--]]
+
+-- we need some stuff from here
+local parser = require "cratera.parser"
+local selfify = parser.selfify
+local EOF = parser.EOF
+local COLLECT = parser.COLLECT
+local collect_fallback = parser.collect_fallback
+
+-- "dummies"
+-- see http://www.lua.org/source/5.3/llex.h.html#RESERVED
+-- keywords
+local TK_AND, TK_BREAK,
+    TK_DO, TK_ELSE, TK_ELSEIF, TK_END, TK_FALSE, TK_FOR, TK_FUNCTION,
+    TK_GOTO, TK_IF, TK_IN, TK_LOCAL, TK_NIL, TK_NOT, TK_OR, TK_REPEAT,
+    TK_RETURN, TK_THEN, TK_TRUE, TK_UNTIL, TK_WHILE,
+    -- operators
+    TK_IDIV, TK_CONCAT, TK_DOTS, TK_EQ, TK_GE, TK_LE, TK_NE,
+    TK_SHL, TK_SHR,
+    -- misc
+    TK_DBCOLON, TK_EOS,
+    -- values/constants
+    TK_FLT, TK_INT, TK_NAME, TK_STRING =
+    {}, {},
+    {}, {}, {}, {}, {}, {}, {},
+    {}, {}, {}, {}, {}, {}, {}, {},
+    {}, {}, {}, {}, {},
+    {}, {}, {}, {}, {}, {}, {},
+    {}, {},
+    {}, {},
+    {}, {}, {}, {}
+
+local keywords = {
+    ["and"] = TK_AND,
+    ["break"] = TK_BREAK,
+    ["do"] = TK_DO,
+    ["else"] = TK_ELSE,
+    ["elseif"] = TK_ELSEIF,
+    ["end"] = TK_END,
+    ["false"] = TK_FALSE,
+    ["for"] = TK_FOR,
+    ["function"] = TK_FUNCTION,
+    ["goto"] = TK_GOTO,
+    ["if"] = TK_IF,
+    ["in"] = TK_IN,
+    ["local"] = TK_LOCAL,
+    ["nil"] = TK_NIL,
+    ["not"] = TK_NOT,
+    ["or"] = TK_OR,
+    ["repeat"] = TK_REPEAT,
+    ["return"] = TK_RETURN,
+    ["then"] = TK_THEN,
+    ["true"] = TK_TRUE,
+    ["until"] = TK_UNTIL,
+    ["while"] = TK_WHILE,
+}
+
+local reverse_keywords = {}
+for k,v in pairs(keywords) do
+    reverse_keywords[v] = k
+end
+
+local defs = selfify({})
+
+defs.base = {
+    [" "] = "whitespace",
+    ["\n"] = "newline",
+    ["\r"] = "newline",
+    ["\v"] = "whitespace",
+    ["\t"] = "whitespace",
+    ["\f"] = "whitespace",
+    ["0"] = "digit",
+    ["1"] = "digit",
+    ["2"] = "digit",
+    ["3"] = "digit",
+    ["4"] = "digit",
+    ["5"] = "digit",
+    ["6"] = "digit",
+    ["7"] = "digit",
+    ["8"] = "digit",
+    ["9"] = "digit",
+    ["a"] = "hexdigit",
+    ["b"] = "hexdigit",
+    ["c"] = "hexdigit",
+    ["d"] = "hexdigit",
+    ["e"] = "hexdigit",
+    ["f"] = "hexdigit",
+    ["A"] = "hexdigit",
+    ["B"] = "hexdigit",
+    ["C"] = "hexdigit",
+    ["D"] = "hexdigit",
+    ["E"] = "hexdigit",
+    ["F"] = "hexdigit",
+    ["g"] = "alpha",
+    ["h"] = "alpha",
+    ["i"] = "alpha",
+    ["j"] = "alpha",
+    ["k"] = "alpha",
+    ["l"] = "alpha",
+    ["m"] = "alpha",
+    ["n"] = "alpha",
+    ["o"] = "alpha",
+    ["p"] = "alpha",
+    ["q"] = "alpha",
+    ["r"] = "alpha",
+    ["s"] = "alpha",
+    ["t"] = "alpha",
+    ["u"] = "alpha",
+    ["v"] = "alpha",
+    ["w"] = "alpha",
+    ["x"] = "alpha",
+    ["y"] = "alpha",
+    ["z"] = "alpha",
+    ["G"] = "alpha",
+    ["H"] = "alpha",
+    ["I"] = "alpha",
+    ["J"] = "alpha",
+    ["K"] = "alpha",
+    ["L"] = "alpha",
+    ["M"] = "alpha",
+    ["N"] = "alpha",
+    ["O"] = "alpha",
+    ["P"] = "alpha",
+    ["Q"] = "alpha",
+    ["R"] = "alpha",
+    ["S"] = "alpha",
+    ["T"] = "alpha",
+    ["U"] = "alpha",
+    ["V"] = "alpha",
+    ["W"] = "alpha",
+    ["X"] = "alpha",
+    ["Y"] = "alpha",
+    ["Z"] = "alpha",
+}
+
+local function countline(state, token, rule)
+    state.line = (state.line or 1) + 1
+end
+
+local function mknewline(t, hookn, fallback)
+    fallback = fallback or t
+    t["\n"] = setmetatable({[hookn] = countline, ["\r"] = setmetatable({}, {__index=fallback})}, {__index=fallback})
+    t["\r"] = setmetatable({[hookn] = countline, ["\n"] = setmetatable({}, {__index=fallback})}, {__index=fallback})
+    return t
+end
+
+do local tstring = selfify({})
+    defs.string = tstring
+    tstring.defs = defs
+    do local tsescapes = setmetatable(mknewline({
+            ["'"] = "insertraw",
+            ['"'] = "insertraw",
+            ['\\'] = "insertraw",
+            ["a"] = "insertmap",
+            ["b"] = "insertmap",
+            ["f"] = "insertmap",
+            ["n"] = "insertmap",
+            ["r"] = "insertmap",
+            ["t"] = "insertmap",
+            ["v"] = "insertmap",
+            ["z"] = "skipwhitespace",
+            ["u"] = "unicode",
+            ["x"] = "hex",
+            --["\n"] = setmetatable({[1] = countline, ["\r"] = setmetatable({}, {__index=tstring})}, {__index=tstring}),
+            --["\r"] = setmetatable({[1] = countline, ["\n"] = setmetatable({}, {__index=tstring})}, {__index=tstring}),
+            [1] = function(state, token, rule) if token == "\r" or token == "\n" then collect_fallback(state, "\n") end end,
+        }, 1, tstring), {__index = defs.base})
+        defs.string.escapes = tsescapes
+        tsescapes.string = defs.string
+
+        function tsescapes.insertraw(state, token)
+            collect_fallback(state, token)
+            return "string"
+        end
+
+        do
+            local map = { ["a"] = "\a", ["b"] = "\b", ["f"] = "\f", ["n"] = "\n", ["r"] = "\r", ["t"] = "\t", ["v"] = "\v" }
+            function tsescapes.insertmap(state, token)
+                collect_fallback(state, map[token])
+                return "string"
+            end
+        end
+
+        function tsescapes.digit(state, token)
+            local digit = string.find("1234567890", token, 1, true)
+            local num = state.in_digit
+            if digit then
+                num = (num or 0) * 10 + digit % 10
+                state.c = (state.c or 0) + 1
+                if state.c < 3 then
+                    state.in_digit = num
+                    return "digitc"
+                end
+            end
+            if num > 255 then
+                return nil
+            end
+            collect_fallback(state, string.char(num))
+            state.in_digit = nil
+            state.c = nil
+            if not digit then
+                collect_fallback(state, token)
+            end
+            return "string"
+        end
+        tsescapes.digitc = setmetatable(selfify({[parser.FALLBACK] = tsescapes.digit, string = tstring}, "digitc"), {__index=tstring})
+        tsescapes.digitc[1]=function(state, token, rule)
+            if rule == nil then
+                collect_fallback(state, string.char(state.in_digit))
+                state.in_digit = nil
+                state.c = nil
+            end
+        end
+
+        tsescapes.hex = setmetatable(selfify({string = defs.string, digit = "hexdigit"}), {__index=defs.base})
+        function tsescapes.hex.hexdigit(state, token)
+            local digit = string.find("123456789ABCDEF0123456789abcdef0", token, 1, true)
+            assert(digit, "this should never be called for non-hex-digits")
+            local num = state.in_hex
+            if num then
+                num = num * 16 + digit % 16
+                collect_fallback(state, string.char(num))
+                state.in_hex = nil
+                return "string"
+            else
+                state.in_hex = digit % 16
+                return "self"
+            end
+        end
+
+        do local tseunicode = {}
+            tseunicode["{"] = "hex"
+            do local tseuhex = setmetatable(selfify({digit = "hexdigit", string=tstring}), {__index=defs.base})
+                tseunicode.hex = tseuhex
+                function tseuhex.hexdigit(state, token)
+                    local digit = string.find("123456789ABCDEF0123456789abcdef0", token, 1, true)
+                    assert(digit, "this should never be called for non-hex-digits")
+                    state.in_hex = (state.in_hex or 0) * 16 + digit % 16
+                    if state.in_hex <= 2147483647 then
+                        return "self"
+                    end
+                end
+                tseuhex["}"] = function(state, token)
+                    local num = state.in_hex
+                    state.in_hex = nil
+                    if num < 128 then
+                        collect_fallback(state, string.char(num))
+                        return "string"
+                    end
+                    local bytes = ""
+                    while num > 63 do
+                        local v = num % 64
+                        bytes = string.char(128 + v) .. bytes -- yeah ik, not the most efficient
+                        num = (num - v) / 64
+                    end
+                    if num >= 2^6/(2^#bytes) then
+                        local v = num % 64
+                        bytes = string.char(128 + v) .. bytes
+                        num = (num - v) / 64
+                    end
+                    do
+                        local v = 0
+                        for i=1,#bytes do
+                            v = v + 128 / 2^i
+                        end
+                        v = v + num
+                        assert(v < 126)
+                        bytes = string.char(128 + v) .. bytes
+                    end
+                    collect_fallback(state, bytes)
+                    return "string"
+                end
+            end
+            tsescapes.unicode = tseunicode
+        end
+
+        do local tseskipwhitespace = selfify(mknewline({
+                string = defs.string,
+                whitespace = "self",
+                [parser.FALLBACK] = "string",
+                [1] = collect_fallback,
+            }, 2))
+            --tseskipwhitespace["\n"] = setmetatable({[2] = countline, ["\r"] = setmetatable({}, {__index=tseskipwhitespace})}, {__index=tseskipwhitespace})
+            --tseskipwhitespace["\r"] = setmetatable({[2] = countline, ["\n"] = setmetatable({}, {__index=tseskipwhitespace})}, {__index=tseskipwhitespace})
+            local tbase = defs.base
+            local tbasemap = {whitespace = "whitespace"}
+            setmetatable(tseskipwhitespace, {__index = function(t, k) return tbasemap[tbase[k]] or tstring[k] end})
+            tsescapes.skipwhitespace =  tseskipwhitespace
+        end
+    end
+
+    tstring['\\'] = "escapes"
+
+    tstring['"'] = "close"
+    tstring["'"] = "close"
+
+    tstring['\n'] = false
+    tstring['\r'] = false
+
+    tstring[parser.FALLBACK] = "self"
+
+    tstring[1] = collect_fallback
+
+    function tstring.close(state, token)
+        if state.in_string == token then
+            state.in_string = nil
+            state[#state+1] = table.concat(state[COLLECT])
+            state[COLLECT] = nil
+            return "defs"
+        else
+            collect_fallback(state, token)
+            return "self"
+        end
+    end
+end
+
+do local tlongstring = {}
+    defs.longstring = tlongstring
+    do local tllongstring_proper = selfify({[parser.FALLBACK] = "self", ["]"] = function(state, token) state.longstring_close = 0 return "maybe_end" end})
+        tllongstring_proper[1] = false -- placeholder for newline handling
+        tllongstring_proper[2] = collect_fallback
+
+        do local tllmaybe_end = selfify({defs = defs}, "maybe_end")
+            tllongstring_proper.maybe_end = tllmaybe_end
+            tllmaybe_end.longstring_proper = tllongstring_proper
+            tllmaybe_end["="] = function(state, token)
+                state.longstring_close = state.longstring_close + 1
+                return "maybe_end"
+            end
+            tllmaybe_end["]"] = function(state, token)
+                if state.longstring_close == state.longstring_count then
+                    state.longstring_close = nil
+                    state.longstring_count = nil
+                    local pos = #state
+                    state[pos+1] = TK_STRING
+                    state[pos+2] = table.concat(state[COLLECT])
+                    state[COLLECT] = nil
+                    return "defs"
+                else
+                    collect_fallback(state, "]")
+                    collect_fallback(state, ("="):rep(state.longstring_close))
+                    state.longstring_close = 0
+                    return "maybe_end"
+                end
+            end
+            tllmaybe_end[parser.FALLBACK] = "longstring_proper"
+            tllmaybe_end[1] = collect_fallback
+            tllmaybe_end[-1] = function(state, token, rule)
+                if not rule then
+                    collect_fallback(state, "]")
+                    collect_fallback(state, ("="):rep(state.longstring_close))
+                    state.longstring_close = nil
+                end
+            end
+        end
+
+        tlongstring.longstring_proper = tllongstring_proper
+        mknewline(tlongstring, 1, tllongstring_proper)
+        setmetatable(tlongstring, {__index=tllongstring_proper})
+    end
+end
+
+defs["'"] = "string_open"
+defs['"'] = "string_open"
+defs["["] = "maybe_longstring"
+defs.maybe_longstring = setmetatable({
+    defs = defs,
+    ['['] = "longstring_open",
+    ['='] = "longstring_open",
+    longstring_count = selfify({
+        ["="] = function(state, token)
+            state.longstring_count = state.longstring_count + 1
+            return "self"
+        end,
+        ["["] = function(state, token)
+            state[COLLECT] = {coalesce=63} -- TODO tweak this for CPU/memory tradeoff?
+            return "longstring"
+        end,
+        longstring = defs.longstring
+    }),
+    longstring_open = function(state, token)
+        if token == "=" then
+            state.longstring_count = state.longstring_count or 0 + 1
+            return "longstring_count"
+        elseif token == "[" then
+            state.longstring_count = 0
+            state[COLLECT] = {coalesce=63} -- TODO tweak this for CPU/memory tradeoff?
+            return "longstring"
+        end
+    end,
+    [-1] = function(state, token, rule)
+        if rule ~= "longstring_open" then
+            state[#state+1] = "["
+        end
+    end
+}, {__index=defs})
+
+-- these are needed for proper line counts
+--defs["\n"] = setmetatable({["\r"] = setmetatable({}, {__index=defs})}, {__index=defs})
+--defs["\r"] = setmetatable({["\n"] = setmetatable({}, {__index=defs})}, {__index=defs})
+mknewline(defs, 1)
+
+-- thankfully comments are easy
+defs["-"] = "maybe_comment"
+do local tmaybe_comment = setmetatable({["-"] = "comment"}, {__index=defs})
+    defs.maybe_comment = tmaybe_comment
+    tmaybe_comment[parser.EOZ] = "self" -- defs
+    tmaybe_comment[-1] = function(state, token, rule)
+        if rule ~= "comment" then
+            state[#state+1] = "-"
+        end
+    end
+    do local tmcomment = {comment_proper = selfify({})}
+        tmaybe_comment.comment = tmcomment
+        tmcomment[parser.FALLBACK] = "comment_proper"
+        tmcomment["["] = "maybe_longcomment"
+        mknewline(tmcomment, 1, defs)
+        mknewline(tmcomment.comment_proper, 1, defs)
+        tmcomment.comment_proper[parser.FALLBACK] = "self"
+        do local tllongcomment_proper = selfify({[parser.FALLBACK] = "self", ["]"] = function(state, token) state.longcomment_close = 0 return "maybe_end" end})
+            tmcomment.longcomment = tllongcomment_proper
+            do local tllmaybe_end = selfify({defs = defs}, "maybe_end")
+                tllongcomment_proper.maybe_end = tllmaybe_end
+                tllmaybe_end.longcomment_proper = tllongcomment_proper
+                tllmaybe_end["="] = function(state, token)
+                    state.longcomment_close = state.longcomment_close + 1
+                    return "maybe_end"
+                end
+                tllmaybe_end["]"] = function(state, token)
+                    if state.longcomment_close == state.longcomment_count then
+                        state.longcomment_close = nil
+                        state.longcomment_count = nil
+                        return "defs"
+                    else
+                        state.longcomment_close = 0
+                        return "maybe_end"
+                    end
+                end
+                tllmaybe_end[parser.FALLBACK] = "longcomment_proper"
+                tllmaybe_end[-1] = function(state, token, rule)
+                    if not rule then
+                        state.longcomment_close = nil
+                    end
+                end
+            end
+
+            mknewline(tllongcomment_proper, 1, tllongcomment_proper)
+        end
+
+        tmcomment.maybe_longcomment = setmetatable({
+            comment = tmcomment,
+            ['['] = "longcomment_open",
+            ['='] = "longcomment_open",
+            longcomment_count = setmetatable(selfify({
+                ["="] = function(state, token)
+                    state.longcomment_count = state.longcomment_count + 1
+                    return "longcomment_count"
+                end,
+                ["["] = "longcomment",
+                longcomment = tmcomment.longcomment,
+            }, "longcomment_count"), {__index=tmcomment}),
+            longcomment_open = function(state, token)
+                if token == "=" then
+                    state.longcomment_count = state.longcomment_count or 0 + 1
+                    return "longcomment_count"
+                elseif token == "[" then
+                    state.longcomment_count = 0
+                    return "longcomment"
+                end
+            end,
+        }, {__index=tmcomment})
+    end
+end
+
+local STATE = parser.STATE
+
+defs.multitokens = setmetatable({
+    [parser.EOZ] = "self",
+    [-1] = function(state, token, rule)
+        if not state[STATE].multitoken[token] then
+            state[#state+1] = state[STATE].first
+        end
+    end,
+    second = function(state, token)
+        state[#state+1] = state[STATE].multitoken[token]
+        return "self" -- actually goes into defs
+    end
+}, {
+    __index=defs,
+    __call=function(t, first, ...)
+        local function helper(t, second, result, ...)
+            if not second then return end
+            t[second] = "second"
+            t.multitoken[second] = result
+            return helper(t, ...)
+        end
+        defs[first] = setmetatable({
+            first = first,
+            multitoken = {}
+        }, {__index=t})
+        return helper(defs[first], ...)
+    end
+})
+
+defs.multitokens("=", "=", TK_EQ)
+defs.multitokens("/", "/", TK_IDIV)
+defs.multitokens("<", "<", TK_SHL, "=", TK_LE)
+defs.multitokens(">", ">", TK_SHR, "=", TK_GE)
+defs.multitokens("~", "=", TK_NE)
+defs.multitokens(":", ":", TK_DBCOLON)
+
+defs["."] = setmetatable({
+    [-1] = function(state, token, rule)
+        if token ~= "." then
+            if rule ~= "digit" then
+                state[#state+1] = "."
+            end
+        end
+    end,
+    digit = function(state, token, rule)
+        state[#state+1] = TK_FLT
+        state[COLLECT] = {".", coalesce=31}
+        return "in_decimal"
+    end,
+    ["."] = setmetatable({
+        [-1] = function(state, token, rule)
+            if token ~= "." then
+                state[#state+1] = TK_CONCAT
+            end
+        end,
+        ["."] = function(state, token)
+            state[#state+1] = TK_DOTS
+            return "self" -- actually goes into defs
+        end,
+    }, {__index=defs})
+}, {__index=defs})
+
+function defs.digit(state, token)
+    state[COLLECT] = {token, coalesce=31}
+    if token == "0" then
+        return "in_zero"
+    else
+        return "in_integer"
+    end
+end
+
+defs.in_integer = setmetatable(selfify({
+    hexdigit = "alpha",
+    alpha = false,
+    ['e'] = "exp",
+    ['E'] = "exp",
+    [parser.EOZ] = "self", -- defs
+    exp = function(state, token)
+        collect_fallback(state, token)
+        return "in_exp"
+    end,
+    ['.'] = function(state, token)
+        collect_fallback(state, token)
+        return "in_decimal"
+    end,
+    digit = function(state, token)
+        collect_fallback(state, token)
+        return "in_digit"
+    end,
+    [-1] = function(state, token, rule)
+        -- TODO figure out best order for these checks
+        if rule == "digit" or token == "." or rule == "hexdigit" or rule == "into_hex" or rule == "exp" then return end
+        state[#state+1] = state[STATE].numtype
+        state[#state+1] = tonumber(table.concat(state[COLLECT])) -- TODO maybe not the best option
+        state[COLLECT] = nil
+    end,
+    numtype = TK_INT
+}, "in_digit"), {__index=defs})
+
+defs.in_zero = setmetatable({
+    ['x'] = "into_hex",
+    ['X'] = "into_hex",
+    into_hex = function(state, token)
+        collect_fallback(state, token)
+        return "in_hex"
+    end,
+}, {__index=defs.in_integer})
+
+defs.in_decimal = setmetatable(selfify({
+    ['.'] = false,
+    numtype = TK_FLT
+}, "in_digit"), {__index=defs.in_integer})
+
+defs.in_expnum = setmetatable(selfify({
+    exp = false,
+}, "in_digit"), {__index=defs.in_decimal})
+
+defs.in_subexp = setmetatable({
+    in_expnum = defs.in_expnum,
+    digit = function(state, token)
+        collect_fallback(state, token)
+        return "in_expnum"
+    end,
+}, {__index=defs.base})
+
+defs.in_exp = setmetatable({
+    in_subexp = defs.in_subexp,
+    ["+"] = "sign",
+    ["-"] = "sign",
+    sign = function(state, token)
+        collect_fallback(state, token)
+        return "in_subexp"
+    end,
+}, {__index=defs.in_subexp})
+
+defs.in_hex = setmetatable(selfify({
+    in_decimal = "in_hex_fraction",
+    hexdigit = 'digit',
+    ['e'] = 'hexdigit',
+    ['E'] = 'hexdigit',
+    ['p'] = 'exp',
+    ['P'] = 'exp',
+}, "in_digit"), {__index=defs.in_integer})
+
+defs.in_hex_fraction = setmetatable(selfify({
+    ['.'] = false,
+    numtype = TK_FLT
+}, "in_digit"), {__index=defs.in_hex})
+
+function defs.simpletoken(state, token)
+    state[#state+1] = token
+    return "self"
+end
+
+for token in string.gmatch("+*%^#&|(){}];,", ".") do
+    defs[token] = "simpletoken"
+end
+
+defs.whitespace = "self"
+defs.hexdigit = "alpha"
+defs["_"] = "alpha"
+defs.in_alpha = setmetatable(selfify({digit = "in_alpha", hexdigit = "in_alpha", alpha = "in_alpha", _ = "in_alpha", [parser.EOZ] = "self"}, "in_alpha"), {__index=defs})
+function defs.alpha(state, token)
+    state[COLLECT] = {coalesce=15} -- TODO tweak this for CPU/memory tradeoff?
+    collect_fallback(state, token)
+    return "in_alpha"
+end
+defs.in_alpha[-1] = function(state, token, rule)
+    if rule == "alpha" or rule == "digit" or rule == "hexdigit" or token == "_" then
+        collect_fallback(state, token)
+    else
+        local key = table.concat(state[COLLECT])
+        state[COLLECT] = nil
+        local keyword = keywords[key]
+        if keyword then
+            state[#state+1] = keyword
+        else
+            local pos = #state
+            state[pos+1] = TK_NAME
+            state[pos+2] = key
+        end
+    end
+end
+
+setmetatable(defs, {__index=defs.base})
+
+function defs.string_open(state, token)
+    if not state.in_string then
+        state[#state+1] = TK_STRING
+        state[COLLECT] = {coalesce=63} -- TODO tweak this for CPU/memory tradeoff?
+        state.in_string = token
+        return "string"
+    end
+    assert("this shouldn't happen")
+end
+
+local tokens = {
+    TK_AND = TK_AND, TK_BREAK = TK_BREAK,
+    TK_DO = TK_DO, TK_ELSE = TK_ELSE, TK_ELSEIF = TK_ELSEIF, TK_END = TK_END, TK_FALSE = TK_FALSE, TK_FOR = TK_FOR, TK_FUNCTION = TK_FUNCTION,
+    TK_GOTO = TK_GOTO, TK_IF = TK_IF, TK_IN = TK_IN, TK_LOCAL = TK_LOCAL, TK_NIL = TK_NIL, TK_NOT = TK_NOT, TK_OR = TK_OR, TK_REPEAT = TK_REPEAT,
+    TK_RETURN = TK_RETURN, TK_THEN = TK_THEN, TK_TRUE = TK_TRUE, TK_UNTIL = TK_UNTIL, TK_WHILE = TK_WHILE,
+    TK_IDIV = TK_IDIV, TK_CONCAT = TK_CONCAT, TK_DOTS = TK_DOTS, TK_EQ = TK_EQ, TK_GE = TK_GE, TK_LE = TK_LE, TK_NE = TK_NE,
+    TK_SHL = TK_SHL, TK_SHR = TK_SHR,
+    TK_DBCOLON = TK_DBCOLON, TK_EOS = TK_EOS,
+    TK_FLT = TK_FLT, TK_INT = TK_INT, TK_NAME = TK_NAME, TK_STRING = TK_STRING
+}
+local TK = {}
+for k,v in pairs(tokens) do
+    setmetatable(v, {__name=k, __tostring=function(self) return getmetatable(self).__name end})
+    TK[k:sub(4)] = v
+end
+
+return {
+    defs = defs,
+    tokens = tokens,
+    TK = TK,
+    reverse_keywords = reverse_keywords,
+    reverse_tokens = {
+        [TK_IDIV] = "//", [TK_CONCAT] = "..", [TK_DOTS] = "...", [TK_EQ] = "==", [TK_GE] = ">=", [TK_LE] = "<=", [TK_NE] = "~=",
+        [TK_SHL] = "<<", [TK_SHR] = ">>",
+        [TK_DBCOLON] = "::", [TK_EOS] = "<eof>",
+        [TK_FLT] = "<float>", [TK_INT] = "<integer>", [TK_NAME] = "<identifier>", [TK_STRING] = "<string>"
+    },
+}
diff --git a/src/cratera/parser.lua b/src/cratera/parser.lua
new file mode 100644
index 0000000..ade568c
--- /dev/null
+++ b/src/cratera/parser.lua
@@ -0,0 +1,202 @@
+--[[
+    This file is part of Cratera Compiler
+    Copyright (C) 2019  Soni L.
+
+    This program is free software: you can redistribute it and/or modify
+    it under the terms of the GNU Affero General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU Affero General Public License for more details.
+
+    You should have received a copy of the GNU Affero General Public License
+    along with this program.  If not, see <https://www.gnu.org/licenses/>.
+--]]
+
+local function ts(self) return getmetatable(self).__name end
+
+-- key for STATE
+local STATE = setmetatable({}, {__name="STATE", __tostring=ts})
+-- key for DATA
+local DATA = setmetatable({}, {__name="DATA", __tostring=ts})
+-- key for GENERATOR
+local GEN = setmetatable({}, {__name="GEN", __tostring=ts})
+-- key for DATA OFFSET
+local OFFDATA = setmetatable({}, {__name="OFFDATA", __tostring=ts})
+-- key for End of Stream
+local EOZ = setmetatable({}, {__name="EOZ", __tostring=ts})
+-- key for number rules (prevent conflict with hooks)
+local NUMBER = setmetatable({}, {__name="NUMBER", __tostring=ts})
+-- key for fallback rules (prevent conflict with empty string)
+local FALLBACK = setmetatable({}, {__name="FALLBACK", __tostring=ts})
+
+local optimize_lookups = {}
+for i=0, 255 do
+    optimize_lookups[i] = string.char(i)
+end
+
+local type, tostring, string_byte
+    = type, tostring, string.byte
+
+local function get_next_common(state, in_pos, token)
+    -- note: must preserve "token" - do not call recursively with a different token
+    local transition, retry
+    local st = state[STATE]
+    if st then
+        local rule = st[token]
+        if not rule and token == EOZ then
+            return in_pos, state
+        end
+        if type(token) == "number" then
+            rule = st[NUMBER]
+        end
+        do -- pre-hooks
+            local pos = -1
+            local hook = st[pos]
+            while hook ~= nil do
+                if hook then
+                    hook(state, token, rule)
+                end
+                pos = pos - 1
+                hook = st[pos]
+            end
+        end
+        transition = rule
+        if transition == nil then
+            transition = st[FALLBACK]
+        end
+        local recheck = true
+        while recheck do
+            recheck = false
+            local tytrans = type(transition)
+            if tytrans == "string" then
+                transition = st[transition]
+                recheck = true
+            elseif tytrans == "function" then
+                transition, retry = transition(state, token)
+                recheck = true
+            elseif tytrans == "table" and st[transition] ~= nil then
+                transition = st[transition]
+                recheck = true
+            end
+        end
+        do -- post-hooks CANNOT USE ipairs HERE BECAUSE Lua 5.1/5.2
+            local pos = 1
+            local hook = st[pos]
+            while hook ~= nil do
+                if hook then
+                    hook(state, token, rule)
+                end
+                pos = pos + 1
+                hook = st[pos]
+            end
+        end
+        state[STATE] = transition -- may be nil or false
+    end
+    -- must NOT use elseif here - the above may set state to nil or false!
+    if not state[STATE] then
+        -- unexpected token. stream consumer may attempt to recover,
+        -- but we do this mostly to differentiate it from "end of stream" condition.
+        return in_pos - 1, nil, "unexpected token", token, state, st
+    end
+    if retry then in_pos = in_pos - 1 end
+    return in_pos, state, transition -- TODO is this what we should be returning?
+end
+
+local function get_next_table(state, in_pos)
+    if state[DATA] == nil or #state[DATA] == 0 then
+        if state[STATE] == nil then
+            return in_pos, state
+        else
+            return get_next_common(state, in_pos, EOZ)
+        end
+    end
+    in_pos = in_pos + 1
+    local token = state[DATA][in_pos - state[OFFDATA]]
+    if token == nil then
+        state[OFFDATA] = in_pos - 1
+        state[DATA] = state[GEN]()
+        return get_next_table(state, state[OFFDATA])
+    end
+    return get_next_common(state, in_pos, token)
+end
+
+local function get_next_string(state, in_pos)
+    if state[DATA] == nil or #state[DATA] == 0 then
+        if state[STATE] == nil then
+            return in_pos, state
+        else
+            return get_next_common(state, in_pos, EOZ)
+        end
+    end
+    in_pos = in_pos + 1
+    local token = optimize_lookups[string_byte(state[DATA], in_pos - state[OFFDATA], in_pos - state[OFFDATA])]
+    if token == nil then
+        state[OFFDATA] = in_pos - 1
+        state[DATA] = state[GEN]()
+        return get_next_string(state, state[OFFDATA])
+    end
+    return get_next_common(state, in_pos, token)
+end
+
+local function stream(defs, data, state)
+    local state = state or {}
+    local fn
+    state[STATE] = defs
+    if type(data) == "function" then
+        state[DATA] = data()
+        state[GEN] = data
+    else
+        state[DATA] = data
+        state[GEN] = function() end
+    end
+    fn = type(state[DATA]) == "table" and get_next_table or get_next_string
+    state[OFFDATA] = 0
+    return fn, state, state[OFFDATA]
+end
+
+local function parse(defs, data, state)
+    for pos, state, transemsg, etoken, estate in stream(defs, data, state) do
+        if not state then
+            -- parse error
+            return nil, transemsg, etoken, estate
+        elseif not transemsg then
+            -- parse success (maybe) - caller needs to check state[STATE] against what it considers a successful state
+            return state
+        end
+    end
+end
+
+-- not used by any of the above but useful for others
+
+local COLLECT = {}
+
+return {
+    STATE = STATE,
+    DATA = DATA,
+    COLLECT = COLLECT,
+    EOZ = EOZ,
+    FALLBACK = FALLBACK,
+    NUMBER = NUMBER,
+    stream = stream,
+    parse = parse,
+    -- common utility function
+    selfify = function(t, id)
+        t[id or "self"] = t
+        return t
+    end,
+    -- common hook
+    collect_fallback = function(state, token, rule)
+        if not rule then
+            local t = state[COLLECT]
+            t[#t+1] = token
+            if t.coalesce and #t >= t.coalesce then
+                t[1] = table.concat(t)
+                for i=2, #t do t[i] = nil end
+            end
+        end
+    end,
+}