diff options
author | SoniEx2 <endermoneymod@gmail.com> | 2024-05-27 00:11:26 -0300 |
---|---|---|
committer | SoniEx2 <endermoneymod@gmail.com> | 2024-05-27 00:11:26 -0300 |
commit | e62ec5ac36188cb12411a8c720daebce77ecf645 (patch) | |
tree | b1a2ce8e95ffc6e92ee31c1e271b0fcafe8a0b2f /src/cratera/luatokens.lua | |
parent | 9dea1c26b487ae723d99ba1e5e5887b09aec87dd (diff) |
Set up a "build system"
Diffstat (limited to 'src/cratera/luatokens.lua')
-rw-r--r-- | src/cratera/luatokens.lua | 769 |
1 files changed, 769 insertions, 0 deletions
diff --git a/src/cratera/luatokens.lua b/src/cratera/luatokens.lua new file mode 100644 index 0000000..90d0c61 --- /dev/null +++ b/src/cratera/luatokens.lua @@ -0,0 +1,769 @@ +--[[ + This file is part of Cratera Compiler + Copyright (C) 2019 Soni L. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see <https://www.gnu.org/licenses/>. +--]] + +--[[ + This software is based on Lua 5.1 and Lua 5.3 + + Lua 5.1 license: + +/****************************************************************************** +* Copyright (C) 1994-2012 Lua.org, PUC-Rio. All rights reserved. +* +* Permission is hereby granted, free of charge, to any person obtaining +* a copy of this software and associated documentation files (the +* "Software"), to deal in the Software without restriction, including +* without limitation the rights to use, copy, modify, merge, publish, +* distribute, sublicense, and/or sell copies of the Software, and to +* permit persons to whom the Software is furnished to do so, subject to +* the following conditions: +* +* The above copyright notice and this permission notice shall be +* included in all copies or substantial portions of the Software. +* +* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY +* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, +* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE +* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +******************************************************************************/ + + Lua 5.3 license: + +/****************************************************************************** +* Copyright (C) 1994-2018 Lua.org, PUC-Rio. +* +* Permission is hereby granted, free of charge, to any person obtaining +* a copy of this software and associated documentation files (the +* "Software"), to deal in the Software without restriction, including +* without limitation the rights to use, copy, modify, merge, publish, +* distribute, sublicense, and/or sell copies of the Software, and to +* permit persons to whom the Software is furnished to do so, subject to +* the following conditions: +* +* The above copyright notice and this permission notice shall be +* included in all copies or substantial portions of the Software. +* +* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY +* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, +* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE +* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +******************************************************************************/ +--]] + +-- we need some stuff from here +local parser = require "cratera.parser" +local selfify = parser.selfify +local EOF = parser.EOF +local COLLECT = parser.COLLECT +local collect_fallback = parser.collect_fallback + +-- "dummies" +-- see http://www.lua.org/source/5.3/llex.h.html#RESERVED +-- keywords +local TK_AND, TK_BREAK, + TK_DO, TK_ELSE, TK_ELSEIF, TK_END, TK_FALSE, TK_FOR, TK_FUNCTION, + TK_GOTO, TK_IF, TK_IN, TK_LOCAL, TK_NIL, TK_NOT, TK_OR, TK_REPEAT, + TK_RETURN, TK_THEN, TK_TRUE, TK_UNTIL, TK_WHILE, + -- operators + TK_IDIV, TK_CONCAT, TK_DOTS, TK_EQ, TK_GE, TK_LE, TK_NE, + TK_SHL, TK_SHR, + -- misc + TK_DBCOLON, TK_EOS, + -- values/constants + TK_FLT, TK_INT, TK_NAME, TK_STRING = + {}, {}, + {}, {}, {}, {}, {}, {}, {}, + {}, {}, {}, {}, {}, {}, {}, {}, + {}, {}, {}, {}, {}, + {}, {}, {}, {}, {}, {}, {}, + {}, {}, + {}, {}, + {}, {}, {}, {} + +local keywords = { + ["and"] = TK_AND, + ["break"] = TK_BREAK, + ["do"] = TK_DO, + ["else"] = TK_ELSE, + ["elseif"] = TK_ELSEIF, + ["end"] = TK_END, + ["false"] = TK_FALSE, + ["for"] = TK_FOR, + ["function"] = TK_FUNCTION, + ["goto"] = TK_GOTO, + ["if"] = TK_IF, + ["in"] = TK_IN, + ["local"] = TK_LOCAL, + ["nil"] = TK_NIL, + ["not"] = TK_NOT, + ["or"] = TK_OR, + ["repeat"] = TK_REPEAT, + ["return"] = TK_RETURN, + ["then"] = TK_THEN, + ["true"] = TK_TRUE, + ["until"] = TK_UNTIL, + ["while"] = TK_WHILE, +} + +local reverse_keywords = {} +for k,v in pairs(keywords) do + reverse_keywords[v] = k +end + +local defs = selfify({}) + +defs.base = { + [" "] = "whitespace", + ["\n"] = "newline", + ["\r"] = "newline", + ["\v"] = "whitespace", + ["\t"] = "whitespace", + ["\f"] = "whitespace", + ["0"] = "digit", + ["1"] = "digit", + ["2"] = "digit", + ["3"] = "digit", + ["4"] = "digit", + ["5"] = "digit", + ["6"] = "digit", + ["7"] = "digit", + ["8"] = "digit", + ["9"] = "digit", + ["a"] = "hexdigit", + ["b"] = "hexdigit", + ["c"] = "hexdigit", + ["d"] = "hexdigit", + ["e"] = "hexdigit", + ["f"] = "hexdigit", + ["A"] = "hexdigit", + ["B"] = "hexdigit", + ["C"] = "hexdigit", + ["D"] = "hexdigit", + ["E"] = "hexdigit", + ["F"] = "hexdigit", + ["g"] = "alpha", + ["h"] = "alpha", + ["i"] = "alpha", + ["j"] = "alpha", + ["k"] = "alpha", + ["l"] = "alpha", + ["m"] = "alpha", + ["n"] = "alpha", + ["o"] = "alpha", + ["p"] = "alpha", + ["q"] = "alpha", + ["r"] = "alpha", + ["s"] = "alpha", + ["t"] = "alpha", + ["u"] = "alpha", + ["v"] = "alpha", + ["w"] = "alpha", + ["x"] = "alpha", + ["y"] = "alpha", + ["z"] = "alpha", + ["G"] = "alpha", + ["H"] = "alpha", + ["I"] = "alpha", + ["J"] = "alpha", + ["K"] = "alpha", + ["L"] = "alpha", + ["M"] = "alpha", + ["N"] = "alpha", + ["O"] = "alpha", + ["P"] = "alpha", + ["Q"] = "alpha", + ["R"] = "alpha", + ["S"] = "alpha", + ["T"] = "alpha", + ["U"] = "alpha", + ["V"] = "alpha", + ["W"] = "alpha", + ["X"] = "alpha", + ["Y"] = "alpha", + ["Z"] = "alpha", +} + +local function countline(state, token, rule) + state.line = (state.line or 1) + 1 +end + +local function mknewline(t, hookn, fallback) + fallback = fallback or t + t["\n"] = setmetatable({[hookn] = countline, ["\r"] = setmetatable({}, {__index=fallback})}, {__index=fallback}) + t["\r"] = setmetatable({[hookn] = countline, ["\n"] = setmetatable({}, {__index=fallback})}, {__index=fallback}) + return t +end + +do local tstring = selfify({}) + defs.string = tstring + tstring.defs = defs + do local tsescapes = setmetatable(mknewline({ + ["'"] = "insertraw", + ['"'] = "insertraw", + ['\\'] = "insertraw", + ["a"] = "insertmap", + ["b"] = "insertmap", + ["f"] = "insertmap", + ["n"] = "insertmap", + ["r"] = "insertmap", + ["t"] = "insertmap", + ["v"] = "insertmap", + ["z"] = "skipwhitespace", + ["u"] = "unicode", + ["x"] = "hex", + --["\n"] = setmetatable({[1] = countline, ["\r"] = setmetatable({}, {__index=tstring})}, {__index=tstring}), + --["\r"] = setmetatable({[1] = countline, ["\n"] = setmetatable({}, {__index=tstring})}, {__index=tstring}), + [1] = function(state, token, rule) if token == "\r" or token == "\n" then collect_fallback(state, "\n") end end, + }, 1, tstring), {__index = defs.base}) + defs.string.escapes = tsescapes + tsescapes.string = defs.string + + function tsescapes.insertraw(state, token) + collect_fallback(state, token) + return "string" + end + + do + local map = { ["a"] = "\a", ["b"] = "\b", ["f"] = "\f", ["n"] = "\n", ["r"] = "\r", ["t"] = "\t", ["v"] = "\v" } + function tsescapes.insertmap(state, token) + collect_fallback(state, map[token]) + return "string" + end + end + + function tsescapes.digit(state, token) + local digit = string.find("1234567890", token, 1, true) + local num = state.in_digit + if digit then + num = (num or 0) * 10 + digit % 10 + state.c = (state.c or 0) + 1 + if state.c < 3 then + state.in_digit = num + return "digitc" + end + end + if num > 255 then + return nil + end + collect_fallback(state, string.char(num)) + state.in_digit = nil + state.c = nil + if not digit then + collect_fallback(state, token) + end + return "string" + end + tsescapes.digitc = setmetatable(selfify({[parser.FALLBACK] = tsescapes.digit, string = tstring}, "digitc"), {__index=tstring}) + tsescapes.digitc[1]=function(state, token, rule) + if rule == nil then + collect_fallback(state, string.char(state.in_digit)) + state.in_digit = nil + state.c = nil + end + end + + tsescapes.hex = setmetatable(selfify({string = defs.string, digit = "hexdigit"}), {__index=defs.base}) + function tsescapes.hex.hexdigit(state, token) + local digit = string.find("123456789ABCDEF0123456789abcdef0", token, 1, true) + assert(digit, "this should never be called for non-hex-digits") + local num = state.in_hex + if num then + num = num * 16 + digit % 16 + collect_fallback(state, string.char(num)) + state.in_hex = nil + return "string" + else + state.in_hex = digit % 16 + return "self" + end + end + + do local tseunicode = {} + tseunicode["{"] = "hex" + do local tseuhex = setmetatable(selfify({digit = "hexdigit", string=tstring}), {__index=defs.base}) + tseunicode.hex = tseuhex + function tseuhex.hexdigit(state, token) + local digit = string.find("123456789ABCDEF0123456789abcdef0", token, 1, true) + assert(digit, "this should never be called for non-hex-digits") + state.in_hex = (state.in_hex or 0) * 16 + digit % 16 + if state.in_hex <= 2147483647 then + return "self" + end + end + tseuhex["}"] = function(state, token) + local num = state.in_hex + state.in_hex = nil + if num < 128 then + collect_fallback(state, string.char(num)) + return "string" + end + local bytes = "" + while num > 63 do + local v = num % 64 + bytes = string.char(128 + v) .. bytes -- yeah ik, not the most efficient + num = (num - v) / 64 + end + if num >= 2^6/(2^#bytes) then + local v = num % 64 + bytes = string.char(128 + v) .. bytes + num = (num - v) / 64 + end + do + local v = 0 + for i=1,#bytes do + v = v + 128 / 2^i + end + v = v + num + assert(v < 126) + bytes = string.char(128 + v) .. bytes + end + collect_fallback(state, bytes) + return "string" + end + end + tsescapes.unicode = tseunicode + end + + do local tseskipwhitespace = selfify(mknewline({ + string = defs.string, + whitespace = "self", + [parser.FALLBACK] = "string", + [1] = collect_fallback, + }, 2)) + --tseskipwhitespace["\n"] = setmetatable({[2] = countline, ["\r"] = setmetatable({}, {__index=tseskipwhitespace})}, {__index=tseskipwhitespace}) + --tseskipwhitespace["\r"] = setmetatable({[2] = countline, ["\n"] = setmetatable({}, {__index=tseskipwhitespace})}, {__index=tseskipwhitespace}) + local tbase = defs.base + local tbasemap = {whitespace = "whitespace"} + setmetatable(tseskipwhitespace, {__index = function(t, k) return tbasemap[tbase[k]] or tstring[k] end}) + tsescapes.skipwhitespace = tseskipwhitespace + end + end + + tstring['\\'] = "escapes" + + tstring['"'] = "close" + tstring["'"] = "close" + + tstring['\n'] = false + tstring['\r'] = false + + tstring[parser.FALLBACK] = "self" + + tstring[1] = collect_fallback + + function tstring.close(state, token) + if state.in_string == token then + state.in_string = nil + state[#state+1] = table.concat(state[COLLECT]) + state[COLLECT] = nil + return "defs" + else + collect_fallback(state, token) + return "self" + end + end +end + +do local tlongstring = {} + defs.longstring = tlongstring + do local tllongstring_proper = selfify({[parser.FALLBACK] = "self", ["]"] = function(state, token) state.longstring_close = 0 return "maybe_end" end}) + tllongstring_proper[1] = false -- placeholder for newline handling + tllongstring_proper[2] = collect_fallback + + do local tllmaybe_end = selfify({defs = defs}, "maybe_end") + tllongstring_proper.maybe_end = tllmaybe_end + tllmaybe_end.longstring_proper = tllongstring_proper + tllmaybe_end["="] = function(state, token) + state.longstring_close = state.longstring_close + 1 + return "maybe_end" + end + tllmaybe_end["]"] = function(state, token) + if state.longstring_close == state.longstring_count then + state.longstring_close = nil + state.longstring_count = nil + local pos = #state + state[pos+1] = TK_STRING + state[pos+2] = table.concat(state[COLLECT]) + state[COLLECT] = nil + return "defs" + else + collect_fallback(state, "]") + collect_fallback(state, ("="):rep(state.longstring_close)) + state.longstring_close = 0 + return "maybe_end" + end + end + tllmaybe_end[parser.FALLBACK] = "longstring_proper" + tllmaybe_end[1] = collect_fallback + tllmaybe_end[-1] = function(state, token, rule) + if not rule then + collect_fallback(state, "]") + collect_fallback(state, ("="):rep(state.longstring_close)) + state.longstring_close = nil + end + end + end + + tlongstring.longstring_proper = tllongstring_proper + mknewline(tlongstring, 1, tllongstring_proper) + setmetatable(tlongstring, {__index=tllongstring_proper}) + end +end + +defs["'"] = "string_open" +defs['"'] = "string_open" +defs["["] = "maybe_longstring" +defs.maybe_longstring = setmetatable({ + defs = defs, + ['['] = "longstring_open", + ['='] = "longstring_open", + longstring_count = selfify({ + ["="] = function(state, token) + state.longstring_count = state.longstring_count + 1 + return "self" + end, + ["["] = function(state, token) + state[COLLECT] = {coalesce=63} -- TODO tweak this for CPU/memory tradeoff? + return "longstring" + end, + longstring = defs.longstring + }), + longstring_open = function(state, token) + if token == "=" then + state.longstring_count = state.longstring_count or 0 + 1 + return "longstring_count" + elseif token == "[" then + state.longstring_count = 0 + state[COLLECT] = {coalesce=63} -- TODO tweak this for CPU/memory tradeoff? + return "longstring" + end + end, + [-1] = function(state, token, rule) + if rule ~= "longstring_open" then + state[#state+1] = "[" + end + end +}, {__index=defs}) + +-- these are needed for proper line counts +--defs["\n"] = setmetatable({["\r"] = setmetatable({}, {__index=defs})}, {__index=defs}) +--defs["\r"] = setmetatable({["\n"] = setmetatable({}, {__index=defs})}, {__index=defs}) +mknewline(defs, 1) + +-- thankfully comments are easy +defs["-"] = "maybe_comment" +do local tmaybe_comment = setmetatable({["-"] = "comment"}, {__index=defs}) + defs.maybe_comment = tmaybe_comment + tmaybe_comment[parser.EOZ] = "self" -- defs + tmaybe_comment[-1] = function(state, token, rule) + if rule ~= "comment" then + state[#state+1] = "-" + end + end + do local tmcomment = {comment_proper = selfify({})} + tmaybe_comment.comment = tmcomment + tmcomment[parser.FALLBACK] = "comment_proper" + tmcomment["["] = "maybe_longcomment" + mknewline(tmcomment, 1, defs) + mknewline(tmcomment.comment_proper, 1, defs) + tmcomment.comment_proper[parser.FALLBACK] = "self" + do local tllongcomment_proper = selfify({[parser.FALLBACK] = "self", ["]"] = function(state, token) state.longcomment_close = 0 return "maybe_end" end}) + tmcomment.longcomment = tllongcomment_proper + do local tllmaybe_end = selfify({defs = defs}, "maybe_end") + tllongcomment_proper.maybe_end = tllmaybe_end + tllmaybe_end.longcomment_proper = tllongcomment_proper + tllmaybe_end["="] = function(state, token) + state.longcomment_close = state.longcomment_close + 1 + return "maybe_end" + end + tllmaybe_end["]"] = function(state, token) + if state.longcomment_close == state.longcomment_count then + state.longcomment_close = nil + state.longcomment_count = nil + return "defs" + else + state.longcomment_close = 0 + return "maybe_end" + end + end + tllmaybe_end[parser.FALLBACK] = "longcomment_proper" + tllmaybe_end[-1] = function(state, token, rule) + if not rule then + state.longcomment_close = nil + end + end + end + + mknewline(tllongcomment_proper, 1, tllongcomment_proper) + end + + tmcomment.maybe_longcomment = setmetatable({ + comment = tmcomment, + ['['] = "longcomment_open", + ['='] = "longcomment_open", + longcomment_count = setmetatable(selfify({ + ["="] = function(state, token) + state.longcomment_count = state.longcomment_count + 1 + return "longcomment_count" + end, + ["["] = "longcomment", + longcomment = tmcomment.longcomment, + }, "longcomment_count"), {__index=tmcomment}), + longcomment_open = function(state, token) + if token == "=" then + state.longcomment_count = state.longcomment_count or 0 + 1 + return "longcomment_count" + elseif token == "[" then + state.longcomment_count = 0 + return "longcomment" + end + end, + }, {__index=tmcomment}) + end +end + +local STATE = parser.STATE + +defs.multitokens = setmetatable({ + [parser.EOZ] = "self", + [-1] = function(state, token, rule) + if not state[STATE].multitoken[token] then + state[#state+1] = state[STATE].first + end + end, + second = function(state, token) + state[#state+1] = state[STATE].multitoken[token] + return "self" -- actually goes into defs + end +}, { + __index=defs, + __call=function(t, first, ...) + local function helper(t, second, result, ...) + if not second then return end + t[second] = "second" + t.multitoken[second] = result + return helper(t, ...) + end + defs[first] = setmetatable({ + first = first, + multitoken = {} + }, {__index=t}) + return helper(defs[first], ...) + end +}) + +defs.multitokens("=", "=", TK_EQ) +defs.multitokens("/", "/", TK_IDIV) +defs.multitokens("<", "<", TK_SHL, "=", TK_LE) +defs.multitokens(">", ">", TK_SHR, "=", TK_GE) +defs.multitokens("~", "=", TK_NE) +defs.multitokens(":", ":", TK_DBCOLON) + +defs["."] = setmetatable({ + [-1] = function(state, token, rule) + if token ~= "." then + if rule ~= "digit" then + state[#state+1] = "." + end + end + end, + digit = function(state, token, rule) + state[#state+1] = TK_FLT + state[COLLECT] = {".", coalesce=31} + return "in_decimal" + end, + ["."] = setmetatable({ + [-1] = function(state, token, rule) + if token ~= "." then + state[#state+1] = TK_CONCAT + end + end, + ["."] = function(state, token) + state[#state+1] = TK_DOTS + return "self" -- actually goes into defs + end, + }, {__index=defs}) +}, {__index=defs}) + +function defs.digit(state, token) + state[COLLECT] = {token, coalesce=31} + if token == "0" then + return "in_zero" + else + return "in_integer" + end +end + +defs.in_integer = setmetatable(selfify({ + hexdigit = "alpha", + alpha = false, + ['e'] = "exp", + ['E'] = "exp", + [parser.EOZ] = "self", -- defs + exp = function(state, token) + collect_fallback(state, token) + return "in_exp" + end, + ['.'] = function(state, token) + collect_fallback(state, token) + return "in_decimal" + end, + digit = function(state, token) + collect_fallback(state, token) + return "in_digit" + end, + [-1] = function(state, token, rule) + -- TODO figure out best order for these checks + if rule == "digit" or token == "." or rule == "hexdigit" or rule == "into_hex" or rule == "exp" then return end + state[#state+1] = state[STATE].numtype + state[#state+1] = tonumber(table.concat(state[COLLECT])) -- TODO maybe not the best option + state[COLLECT] = nil + end, + numtype = TK_INT +}, "in_digit"), {__index=defs}) + +defs.in_zero = setmetatable({ + ['x'] = "into_hex", + ['X'] = "into_hex", + into_hex = function(state, token) + collect_fallback(state, token) + return "in_hex" + end, +}, {__index=defs.in_integer}) + +defs.in_decimal = setmetatable(selfify({ + ['.'] = false, + numtype = TK_FLT +}, "in_digit"), {__index=defs.in_integer}) + +defs.in_expnum = setmetatable(selfify({ + exp = false, +}, "in_digit"), {__index=defs.in_decimal}) + +defs.in_subexp = setmetatable({ + in_expnum = defs.in_expnum, + digit = function(state, token) + collect_fallback(state, token) + return "in_expnum" + end, +}, {__index=defs.base}) + +defs.in_exp = setmetatable({ + in_subexp = defs.in_subexp, + ["+"] = "sign", + ["-"] = "sign", + sign = function(state, token) + collect_fallback(state, token) + return "in_subexp" + end, +}, {__index=defs.in_subexp}) + +defs.in_hex = setmetatable(selfify({ + in_decimal = "in_hex_fraction", + hexdigit = 'digit', + ['e'] = 'hexdigit', + ['E'] = 'hexdigit', + ['p'] = 'exp', + ['P'] = 'exp', +}, "in_digit"), {__index=defs.in_integer}) + +defs.in_hex_fraction = setmetatable(selfify({ + ['.'] = false, + numtype = TK_FLT +}, "in_digit"), {__index=defs.in_hex}) + +function defs.simpletoken(state, token) + state[#state+1] = token + return "self" +end + +for token in string.gmatch("+*%^#&|(){}];,", ".") do + defs[token] = "simpletoken" +end + +defs.whitespace = "self" +defs.hexdigit = "alpha" +defs["_"] = "alpha" +defs.in_alpha = setmetatable(selfify({digit = "in_alpha", hexdigit = "in_alpha", alpha = "in_alpha", _ = "in_alpha", [parser.EOZ] = "self"}, "in_alpha"), {__index=defs}) +function defs.alpha(state, token) + state[COLLECT] = {coalesce=15} -- TODO tweak this for CPU/memory tradeoff? + collect_fallback(state, token) + return "in_alpha" +end +defs.in_alpha[-1] = function(state, token, rule) + if rule == "alpha" or rule == "digit" or rule == "hexdigit" or token == "_" then + collect_fallback(state, token) + else + local key = table.concat(state[COLLECT]) + state[COLLECT] = nil + local keyword = keywords[key] + if keyword then + state[#state+1] = keyword + else + local pos = #state + state[pos+1] = TK_NAME + state[pos+2] = key + end + end +end + +setmetatable(defs, {__index=defs.base}) + +function defs.string_open(state, token) + if not state.in_string then + state[#state+1] = TK_STRING + state[COLLECT] = {coalesce=63} -- TODO tweak this for CPU/memory tradeoff? + state.in_string = token + return "string" + end + assert("this shouldn't happen") +end + +local tokens = { + TK_AND = TK_AND, TK_BREAK = TK_BREAK, + TK_DO = TK_DO, TK_ELSE = TK_ELSE, TK_ELSEIF = TK_ELSEIF, TK_END = TK_END, TK_FALSE = TK_FALSE, TK_FOR = TK_FOR, TK_FUNCTION = TK_FUNCTION, + TK_GOTO = TK_GOTO, TK_IF = TK_IF, TK_IN = TK_IN, TK_LOCAL = TK_LOCAL, TK_NIL = TK_NIL, TK_NOT = TK_NOT, TK_OR = TK_OR, TK_REPEAT = TK_REPEAT, + TK_RETURN = TK_RETURN, TK_THEN = TK_THEN, TK_TRUE = TK_TRUE, TK_UNTIL = TK_UNTIL, TK_WHILE = TK_WHILE, + TK_IDIV = TK_IDIV, TK_CONCAT = TK_CONCAT, TK_DOTS = TK_DOTS, TK_EQ = TK_EQ, TK_GE = TK_GE, TK_LE = TK_LE, TK_NE = TK_NE, + TK_SHL = TK_SHL, TK_SHR = TK_SHR, + TK_DBCOLON = TK_DBCOLON, TK_EOS = TK_EOS, + TK_FLT = TK_FLT, TK_INT = TK_INT, TK_NAME = TK_NAME, TK_STRING = TK_STRING +} +local TK = {} +for k,v in pairs(tokens) do + setmetatable(v, {__name=k, __tostring=function(self) return getmetatable(self).__name end}) + TK[k:sub(4)] = v +end + +return { + defs = defs, + tokens = tokens, + TK = TK, + reverse_keywords = reverse_keywords, + reverse_tokens = { + [TK_IDIV] = "//", [TK_CONCAT] = "..", [TK_DOTS] = "...", [TK_EQ] = "==", [TK_GE] = ">=", [TK_LE] = "<=", [TK_NE] = "~=", + [TK_SHL] = "<<", [TK_SHR] = ">>", + [TK_DBCOLON] = "::", [TK_EOS] = "<eof>", + [TK_FLT] = "<float>", [TK_INT] = "<integer>", [TK_NAME] = "<identifier>", [TK_STRING] = "<string>" + }, +} |