From f56e1bd7e7f9a8d0a55146edba4e7c2ee071487a Mon Sep 17 00:00:00 2001 From: SoniEx2 Date: Mon, 8 Apr 2019 13:57:28 -0300 Subject: Add copyright notices, everything almost works Numbers and long comments aren't working --- luatokens.lua | 238 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++- test.lua | 96 ++++++++++++++++++++++- 2 files changed, 329 insertions(+), 5 deletions(-) diff --git a/luatokens.lua b/luatokens.lua index 7bf9f68..406fba5 100644 --- a/luatokens.lua +++ b/luatokens.lua @@ -1,4 +1,74 @@ --- Lua defs +--[[ + luatokens.lua - pure-Lua Lua tokenizer + Copyright (C) 2019 Soni L. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see . +--]] + +--[[ + This software is based on Lua 5.1 and Lua 5.3 + + Lua 5.1 license: + +/****************************************************************************** +* Copyright (C) 1994-2012 Lua.org, PUC-Rio. All rights reserved. +* +* Permission is hereby granted, free of charge, to any person obtaining +* a copy of this software and associated documentation files (the +* "Software"), to deal in the Software without restriction, including +* without limitation the rights to use, copy, modify, merge, publish, +* distribute, sublicense, and/or sell copies of the Software, and to +* permit persons to whom the Software is furnished to do so, subject to +* the following conditions: +* +* The above copyright notice and this permission notice shall be +* included in all copies or substantial portions of the Software. +* +* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY +* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, +* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE +* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +******************************************************************************/ + + Lua 5.3 license: + +/****************************************************************************** +* Copyright (C) 1994-2018 Lua.org, PUC-Rio. +* +* Permission is hereby granted, free of charge, to any person obtaining +* a copy of this software and associated documentation files (the +* "Software"), to deal in the Software without restriction, including +* without limitation the rights to use, copy, modify, merge, publish, +* distribute, sublicense, and/or sell copies of the Software, and to +* permit persons to whom the Software is furnished to do so, subject to +* the following conditions: +* +* The above copyright notice and this permission notice shall be +* included in all copies or substantial portions of the Software. +* +* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY +* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, +* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE +* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +******************************************************************************/ +--]] -- we need some stuff from here local parser = require "parser" @@ -54,6 +124,11 @@ local keywords = { ["while"] = TK_WHILE, } +local reverse_keywords = {} +for k,v in pairs(keywords) do + reverse_keywords[v] = k +end + local defs = selfify({}) defs.base = { @@ -315,6 +390,7 @@ do local tlongstring = {} do local tllmaybe_end = selfify({defs = defs}, "maybe_end") tllongstring_proper.maybe_end = tllmaybe_end + tllmaybe_end.longstring_proper = tllongstring_proper tllmaybe_end["="] = function(state, token) state.longstring_close = state.longstring_close + 1 return "maybe_end" @@ -335,6 +411,8 @@ do local tlongstring = {} return "maybe_end" end end + tllmaybe_end[""] = "longstring_proper" + tllmaybe_end[1] = collect_fallback tllmaybe_end[-1] = function(state, token, rule) if not rule then collect_fallback(state, "]") @@ -390,6 +468,157 @@ defs.maybe_longstring = setmetatable({ --defs["\r"] = setmetatable({["\n"] = setmetatable({}, {__index=defs})}, {__index=defs}) mknewline(defs, 1) +-- thankfully comments are easy +defs["-"] = "maybe_comment" +do local tmaybe_comment = setmetatable({["-"] = "comment"}, {__index=defs}) + defs.maybe_comment = tmaybe_comment + tmaybe_comment[-1] = function(state, token, rule) + if rule ~= "comment" then + state[#state+1] = "-" + end + end + do local tmcomment = {comment_proper = selfify({})} + tmaybe_comment.comment = tmcomment + tmcomment[""] = "comment_proper" + tmcomment["["] = "maybe_longcomment" + mknewline(tmcomment, 1, defs) + mknewline(tmcomment.comment_proper, 1, defs) + tmcomment.comment_proper[""] = "self" + do local tlongcomment = {} + tmcomment.longcomment = tlongcomment + do local tllongcomment_proper = selfify({[""] = "self", ["]"] = function(state, token) state.longcomment_close = 0 return "maybe_end" end}) + do local tllmaybe_end = selfify({comment = tcomment}, "maybe_end") + tllongcomment_proper.maybe_end = tllmaybe_end + tllmaybe_end = tllongcomment_proper + tllmaybe_end["="] = function(state, token) + state.longcomment_close = state.longcomment_close + 1 + return "maybe_end" + end + tllmaybe_end["]"] = function(state, token) + if state.longcomment_close == state.longcomment_count then + state.longcomment_close = nil + state.longcomment_count = nil + return "defs" + else + state.longcomment_close = 0 + return "maybe_end" + end + end + tllmaybe_end[""] = "longcomment_proper" + tllmaybe_end[-1] = function(state, token, rule) + if not rule then + state.longcomment_close = nil + end + end + end + + tlongcomment.longcomment_proper = tllongcomment_proper + mknewline(tlongcomment, 1, tllongcomment_proper) + setmetatable(tlongcomment, {__index=tllongcomment_proper}) + end + end + + tmcomment.maybe_longcomment = setmetatable({ + comment = tmcomment, + ['['] = "longcomment_open", + ['='] = "longcomment_open", + longcomment_count = setmetatable(selfify({ + ["="] = function(state, token) + state.longcomment_count = state.longcomment_count + 1 + return "longcomment_count" + end, + ["["] = "longcomment", + longcomment = tmcomment.longcomment, + }, "longcomment_count"), {__index=tmcomment}), + longcomment_open = function(state, token) + if token == "=" then + state.longcomment_count = state.longcomment_count or 0 + 1 + return "longcomment_count" + elseif token == "[" then + state.longcomment_count = 0 + return "longstring" + end + end, + }, {__index=tmcomment}) + end +end + +local STATE = parser.STATE + +defs.multitokens = setmetatable({ + [-1] = function(state, token, rule) + if not state[STATE].multitoken[token] then + state[#state+1] = state[STATE].first + end + end, + second = function(state, token) + state[#state+1] = state[STATE].multitoken[token] + return "self" -- actually goes into defs + end +}, { + __index=defs, + __call=function(t, first, ...) + local function helper(t, second, result, ...) + if not second then return end + t[second] = "second" + t.multitoken[second] = result + return helper(t, ...) + end + defs[first] = setmetatable({ + first = first, + multitoken = {} + }, {__index=t}) + return helper(defs[first], ...) + end +}) + +defs.multitokens("=", "=", TK_EQ) +defs.multitokens("/", "/", TK_IDIV) +defs.multitokens("<", "<", TK_SHL, "=", TK_LE) +defs.multitokens(">", ">", TK_SHR, "=", TK_GE) +defs.multitokens("~", "=", TK_NE) +defs.multitokens(":", ":", TK_DBCOLON) + +defs["."] = setmetatable({ + [-1] = function(state, token, rule) + if token ~= "." then + if rule ~= "digit" then + state[#state+1] = "." + else + error("NYI") -- TODO digit handling + end + end + end, + ["."] = setmetatable({ + [-1] = function(state, token, rule) + if token ~= "." then + state[#state+1] = TK_CONCAT + end + end, + ["."] = function(state, token) + state[#state+1] = TK_DOTS + return "self" -- actually goes into defs + end + }, {__index=defs}) +}, {__index=defs}) + +function defs.digit(state, token) + -- TODO +end + +defs.in_digit = { + -- TODO +} + +function defs.simpletoken(state, token) + state[#state+1] = token + return "self" +end + +for token in string.gmatch("+*%^#&|(){}];,", ".") do + defs[token] = "simpletoken" +end + defs.whitespace = "self" defs.hexdigit = "alpha" defs["_"] = "alpha" @@ -440,4 +669,11 @@ return { TK_DBCOLON = TK_DBCOLON, TK_EOS = TK_EOS, TK_FLT = TK_FLT, TK_INT = TK_INT, TK_NAME = TK_NAME, TK_STRING = TK_STRING }, + reverse_keywords = reverse_keywords, + reverse_tokens = { + [TK_IDIV] = "//", [TK_CONCAT] = "..", [TK_DOTS] = "...", [TK_EQ] = "==", [TK_GE] = ">=", [TK_LE] = "<=", [TK_NE] = "~=", + [TK_SHL] = "<<", [TK_SHR] = ">>", + [TK_DBCOLON] = "::", [TK_EOS] = "", + [TK_FLT] = "", [TK_INT] = "", [TK_NAME] = "", [TK_STRING] = "" + }, } diff --git a/test.lua b/test.lua index f9648eb..a8a830d 100644 --- a/test.lua +++ b/test.lua @@ -90,6 +90,7 @@ do -- lua tokens else assert(state[1] == luatokens.tokens.TK_STRING) assert(state[2] == "hello world") + assert(state[3] == nil) assert(state.line == 1 or not state.line) end end -- lua tokens @@ -110,6 +111,7 @@ do -- more lua tokens else assert(state[1] == luatokens.tokens.TK_STRING) assert(state[2] == "\7\8\12\10\13\9\11\92\34\39\65\65\10") + assert(state[3] == nil) assert(state.line == 2) end end -- lua tokens @@ -129,6 +131,7 @@ do -- even more lua tokens else assert(state[1] == luatokens.tokens.TK_STRING) assert(state[2] == "A") + assert(state[3] == nil) assert(state.line == 1 or not state.line) end end -- lua tokens @@ -168,6 +171,7 @@ do -- even more lua tokens assert(table.remove(state, 1) == "\252\132\128\128\128\128") assert(table.remove(state, 1) == luatokens.tokens.TK_STRING) assert(table.remove(state, 1) == "\253\191\191\191\191\191") + assert(table.remove(state, 1) == nil) assert(state.line == 1 or not state.line) end end -- lua tokens @@ -188,6 +192,7 @@ do -- simple lua tokens assert(table.remove(state, 1) == "[") assert(table.remove(state, 1) == luatokens.tokens.TK_STRING) assert(table.remove(state, 1) == "") + assert(table.remove(state, 1) == nil) assert(state.line == 1 or not state.line) end end -- lua tokens @@ -207,6 +212,7 @@ do -- simple long string else assert(table.remove(state, 1) == luatokens.tokens.TK_STRING) assert(table.remove(state, 1) == "") + assert(table.remove(state, 1) == nil) assert(state.line == 1 or not state.line) end end -- long string @@ -226,6 +232,7 @@ do -- long string with depth 1 else assert(table.remove(state, 1) == luatokens.tokens.TK_STRING) assert(table.remove(state, 1) == "") + assert(table.remove(state, 1) == nil) assert(state.line == 1 or not state.line) end end -- long string @@ -245,6 +252,7 @@ do -- long string with "nested" long string else assert(table.remove(state, 1) == luatokens.tokens.TK_STRING) assert(table.remove(state, 1) == "[[]]") + assert(table.remove(state, 1) == nil) assert(state.line == 1 or not state.line) end end -- long string @@ -252,7 +260,7 @@ end -- long string do -- long string edge cases local luatokens = require "luatokens" local tokens = luatokens.defs - local state, err, etoken, estate = parser.parse(tokens, "[==[]=]==][==[]]==]") + local state, err, etoken, estate = parser.parse(tokens, "[==[]=]==][==[]]==][=[] ]=]") local case = case() if not state then print(case, "---- IN TOKENS ----") @@ -266,6 +274,9 @@ do -- long string edge cases assert(table.remove(state, 1) == "]=") assert(table.remove(state, 1) == luatokens.tokens.TK_STRING) assert(table.remove(state, 1) == "]") + assert(table.remove(state, 1) == luatokens.tokens.TK_STRING) + assert(table.remove(state, 1) == "] ") + assert(table.remove(state, 1) == nil) assert(state.line == 1 or not state.line) end end -- long string @@ -309,20 +320,97 @@ do -- keywords assert(table.remove(state, 1) == luatokens.tokens.TK_TRUE) assert(table.remove(state, 1) == luatokens.tokens.TK_UNTIL) assert(table.remove(state, 1) == luatokens.tokens.TK_WHILE) + assert(table.remove(state, 1) == nil) assert(state.line == 4) end end -- keywords -do -- FUCK +do -- "other tokens" + local luatokens = require "luatokens" + local tokens = luatokens.defs + local state, err, etoken, estate = parser.parse(tokens, [[ + + - * / % ^ # + & ~ | << >> // + == ~= <= >= < > = + ( ) { } [ ] :: + ; : , . .. ...]]) + local case = case() + if not state then + print(case, "---- IN TOKENS ----") + print(case, err, etoken) + for i,v in pairs(estate) do + print(case, i, v) + end + print(case, "---- OUT TOKENS ----") + else + assert(table.remove(state, 1) == "+") + assert(table.remove(state, 1) == "-") + assert(table.remove(state, 1) == "*") + assert(table.remove(state, 1) == "/") + assert(table.remove(state, 1) == "%") + assert(table.remove(state, 1) == "^") + assert(table.remove(state, 1) == "#") + assert(table.remove(state, 1) == "&") + assert(table.remove(state, 1) == "~") + assert(table.remove(state, 1) == "|") + assert(table.remove(state, 1) == luatokens.tokens.TK_SHL) + assert(table.remove(state, 1) == luatokens.tokens.TK_SHR) + assert(table.remove(state, 1) == luatokens.tokens.TK_IDIV) + assert(table.remove(state, 1) == luatokens.tokens.TK_EQ) + assert(table.remove(state, 1) == luatokens.tokens.TK_NE) + assert(table.remove(state, 1) == luatokens.tokens.TK_LE) + assert(table.remove(state, 1) == luatokens.tokens.TK_GE) + assert(table.remove(state, 1) == "<") + assert(table.remove(state, 1) == ">") + assert(table.remove(state, 1) == "=") + assert(table.remove(state, 1) == "(") + assert(table.remove(state, 1) == ")") + assert(table.remove(state, 1) == "{") + assert(table.remove(state, 1) == "}") + assert(table.remove(state, 1) == "[") + assert(table.remove(state, 1) == "]") + assert(table.remove(state, 1) == luatokens.tokens.TK_DBCOLON) + assert(table.remove(state, 1) == ";") + assert(table.remove(state, 1) == ":") + assert(table.remove(state, 1) == ",") + assert(table.remove(state, 1) == ".") + assert(table.remove(state, 1) == luatokens.tokens.TK_CONCAT) + assert(table.remove(state, 1) == luatokens.tokens.TK_DOTS) + assert(table.remove(state, 1) == nil) + assert(state.line == 5) + end +end -- "other tokens" + +do -- long comments + local luatokens = require "luatokens" + local tokens = luatokens.defs + local state, err, etoken, estate = parser.parse(tokens, [==[--[[ + --]]]==]) + local case = case() + if not state then + print(case, "---- IN TOKENS ----") + print(case, err, etoken) + for i,v in pairs(estate) do + print(case, i, v) + end + print(case, "---- OUT TOKENS ----") + else + assert(table.remove(state, 1) == nil) + assert(state.line == 2) + end +end -- long comments + +while false do -- FUCK local luatokens = require "luatokens" - local luatokens_file = io.open("./luatokens.lua", "r"):read((_VERSION == "Lua 5.1" or _VERSION == "Lua 5.2") and "*a" or "a") + local luatokens_file = io.open("./luatokens.lua", "r") local tokens = luatokens.defs - local state, err, etoken, estate = parser.parse(tokens, luatokens_file) + local state, err, etoken, estate = parser.parse(tokens, function() return luatokens_file:read(8192) end) local case = case() if not state then print(case, "---- IN TOKENS ----") print(case, err, etoken) for i,v in pairs(estate) do + v = luatokens.reverse_keywords[v] or luatokens.reverse_tokens[v] or v print(case, i, v) end print(case, "---- OUT TOKENS ----") -- cgit 1.4.1