summary refs log tree commit diff stats
diff options
context:
space:
mode:
-rw-r--r--luatokens.lua238
-rw-r--r--test.lua96
2 files changed, 329 insertions, 5 deletions
diff --git a/luatokens.lua b/luatokens.lua
index 7bf9f68..406fba5 100644
--- a/luatokens.lua
+++ b/luatokens.lua
@@ -1,4 +1,74 @@
--- Lua defs
+--[[
+    luatokens.lua - pure-Lua Lua tokenizer
+    Copyright (C) 2019  Soni L.
+
+    This program is free software: you can redistribute it and/or modify
+    it under the terms of the GNU Affero General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU Affero General Public License for more details.
+
+    You should have received a copy of the GNU Affero General Public License
+    along with this program.  If not, see <https://www.gnu.org/licenses/>.
+--]]
+
+--[[
+    This software is based on Lua 5.1 and Lua 5.3
+
+    Lua 5.1 license:
+
+/******************************************************************************
+* Copyright (C) 1994-2012 Lua.org, PUC-Rio.  All rights reserved.
+*
+* Permission is hereby granted, free of charge, to any person obtaining
+* a copy of this software and associated documentation files (the
+* "Software"), to deal in the Software without restriction, including
+* without limitation the rights to use, copy, modify, merge, publish,
+* distribute, sublicense, and/or sell copies of the Software, and to
+* permit persons to whom the Software is furnished to do so, subject to
+* the following conditions:
+*
+* The above copyright notice and this permission notice shall be
+* included in all copies or substantial portions of the Software.
+*
+* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+******************************************************************************/
+
+    Lua 5.3 license:
+
+/******************************************************************************
+* Copyright (C) 1994-2018 Lua.org, PUC-Rio.
+*
+* Permission is hereby granted, free of charge, to any person obtaining
+* a copy of this software and associated documentation files (the
+* "Software"), to deal in the Software without restriction, including
+* without limitation the rights to use, copy, modify, merge, publish,
+* distribute, sublicense, and/or sell copies of the Software, and to
+* permit persons to whom the Software is furnished to do so, subject to
+* the following conditions:
+*
+* The above copyright notice and this permission notice shall be
+* included in all copies or substantial portions of the Software.
+*
+* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+******************************************************************************/
+--]]
 
 -- we need some stuff from here
 local parser = require "parser"
@@ -54,6 +124,11 @@ local keywords = {
     ["while"] = TK_WHILE,
 }
 
+local reverse_keywords = {}
+for k,v in pairs(keywords) do
+    reverse_keywords[v] = k
+end
+
 local defs = selfify({})
 
 defs.base = {
@@ -315,6 +390,7 @@ do local tlongstring = {}
 
         do local tllmaybe_end = selfify({defs = defs}, "maybe_end")
             tllongstring_proper.maybe_end = tllmaybe_end
+            tllmaybe_end.longstring_proper = tllongstring_proper
             tllmaybe_end["="] = function(state, token)
                 state.longstring_close = state.longstring_close + 1
                 return "maybe_end"
@@ -335,6 +411,8 @@ do local tlongstring = {}
                     return "maybe_end"
                 end
             end
+            tllmaybe_end[""] = "longstring_proper"
+            tllmaybe_end[1] = collect_fallback
             tllmaybe_end[-1] = function(state, token, rule)
                 if not rule then
                     collect_fallback(state, "]")
@@ -390,6 +468,157 @@ defs.maybe_longstring = setmetatable({
 --defs["\r"] = setmetatable({["\n"] = setmetatable({}, {__index=defs})}, {__index=defs})
 mknewline(defs, 1)
 
+-- thankfully comments are easy
+defs["-"] = "maybe_comment"
+do local tmaybe_comment = setmetatable({["-"] = "comment"}, {__index=defs})
+    defs.maybe_comment = tmaybe_comment
+    tmaybe_comment[-1] = function(state, token, rule)
+        if rule ~= "comment" then
+            state[#state+1] = "-"
+        end
+    end
+    do local tmcomment = {comment_proper = selfify({})}
+        tmaybe_comment.comment = tmcomment
+        tmcomment[""] = "comment_proper"
+        tmcomment["["] = "maybe_longcomment"
+        mknewline(tmcomment, 1, defs)
+        mknewline(tmcomment.comment_proper, 1, defs)
+        tmcomment.comment_proper[""] = "self"
+        do local tlongcomment = {}
+            tmcomment.longcomment = tlongcomment
+            do local tllongcomment_proper = selfify({[""] = "self", ["]"] = function(state, token) state.longcomment_close = 0 return "maybe_end" end})
+                do local tllmaybe_end = selfify({comment = tcomment}, "maybe_end")
+                    tllongcomment_proper.maybe_end = tllmaybe_end
+                    tllmaybe_end = tllongcomment_proper
+                    tllmaybe_end["="] = function(state, token)
+                        state.longcomment_close = state.longcomment_close + 1
+                        return "maybe_end"
+                    end
+                    tllmaybe_end["]"] = function(state, token)
+                        if state.longcomment_close == state.longcomment_count then
+                            state.longcomment_close = nil
+                            state.longcomment_count = nil
+                            return "defs"
+                        else
+                            state.longcomment_close = 0
+                            return "maybe_end"
+                        end
+                    end
+                    tllmaybe_end[""] = "longcomment_proper"
+                    tllmaybe_end[-1] = function(state, token, rule)
+                        if not rule then
+                            state.longcomment_close = nil
+                        end
+                    end
+                end
+
+                tlongcomment.longcomment_proper = tllongcomment_proper
+                mknewline(tlongcomment, 1, tllongcomment_proper)
+                setmetatable(tlongcomment, {__index=tllongcomment_proper})
+            end
+        end
+
+        tmcomment.maybe_longcomment = setmetatable({
+            comment = tmcomment,
+            ['['] = "longcomment_open",
+            ['='] = "longcomment_open",
+            longcomment_count = setmetatable(selfify({
+                ["="] = function(state, token)
+                    state.longcomment_count = state.longcomment_count + 1
+                    return "longcomment_count"
+                end,
+                ["["] = "longcomment",
+                longcomment = tmcomment.longcomment,
+            }, "longcomment_count"), {__index=tmcomment}),
+            longcomment_open = function(state, token)
+                if token == "=" then
+                    state.longcomment_count = state.longcomment_count or 0 + 1
+                    return "longcomment_count"
+                elseif token == "[" then
+                    state.longcomment_count = 0
+                    return "longstring"
+                end
+            end,
+        }, {__index=tmcomment})
+    end
+end
+
+local STATE = parser.STATE
+
+defs.multitokens = setmetatable({
+    [-1] = function(state, token, rule)
+        if not state[STATE].multitoken[token] then
+            state[#state+1] = state[STATE].first
+        end
+    end,
+    second = function(state, token)
+        state[#state+1] = state[STATE].multitoken[token]
+        return "self" -- actually goes into defs
+    end
+}, {
+    __index=defs,
+    __call=function(t, first, ...)
+        local function helper(t, second, result, ...)
+            if not second then return end
+            t[second] = "second"
+            t.multitoken[second] = result
+            return helper(t, ...)
+        end
+        defs[first] = setmetatable({
+            first = first,
+            multitoken = {}
+        }, {__index=t})
+        return helper(defs[first], ...)
+    end
+})
+
+defs.multitokens("=", "=", TK_EQ)
+defs.multitokens("/", "/", TK_IDIV)
+defs.multitokens("<", "<", TK_SHL, "=", TK_LE)
+defs.multitokens(">", ">", TK_SHR, "=", TK_GE)
+defs.multitokens("~", "=", TK_NE)
+defs.multitokens(":", ":", TK_DBCOLON)
+
+defs["."] = setmetatable({
+    [-1] = function(state, token, rule)
+        if token ~= "." then
+            if rule ~= "digit" then
+                state[#state+1] = "."
+            else
+                error("NYI") -- TODO digit handling
+            end
+        end
+    end,
+    ["."] = setmetatable({
+        [-1] = function(state, token, rule)
+            if token ~= "." then
+                state[#state+1] = TK_CONCAT
+            end
+        end,
+        ["."] = function(state, token)
+            state[#state+1] = TK_DOTS
+            return "self" -- actually goes into defs
+        end
+    }, {__index=defs})
+}, {__index=defs})
+
+function defs.digit(state, token)
+    -- TODO
+end
+
+defs.in_digit = {
+    -- TODO
+}
+
+function defs.simpletoken(state, token)
+    state[#state+1] = token
+    return "self"
+end
+
+for token in string.gmatch("+*%^#&|(){}];,", ".") do
+    defs[token] = "simpletoken"
+end
+
 defs.whitespace = "self"
 defs.hexdigit = "alpha"
 defs["_"] = "alpha"
@@ -440,4 +669,11 @@ return {
         TK_DBCOLON = TK_DBCOLON, TK_EOS = TK_EOS,
         TK_FLT = TK_FLT, TK_INT = TK_INT, TK_NAME = TK_NAME, TK_STRING = TK_STRING
     },
+    reverse_keywords = reverse_keywords,
+    reverse_tokens = {
+        [TK_IDIV] = "//", [TK_CONCAT] = "..", [TK_DOTS] = "...", [TK_EQ] = "==", [TK_GE] = ">=", [TK_LE] = "<=", [TK_NE] = "~=",
+        [TK_SHL] = "<<", [TK_SHR] = ">>",
+        [TK_DBCOLON] = "::", [TK_EOS] = "<eof>",
+        [TK_FLT] = "<float>", [TK_INT] = "<integer>", [TK_NAME] = "<identifier>", [TK_STRING] = "<string>"
+    },
 }
diff --git a/test.lua b/test.lua
index f9648eb..a8a830d 100644
--- a/test.lua
+++ b/test.lua
@@ -90,6 +90,7 @@ do -- lua tokens
     else
         assert(state[1] == luatokens.tokens.TK_STRING)
         assert(state[2] == "hello world")
+        assert(state[3] == nil)
         assert(state.line == 1 or not state.line)
     end
 end -- lua tokens
@@ -110,6 +111,7 @@ do -- more lua tokens
     else
         assert(state[1] == luatokens.tokens.TK_STRING)
         assert(state[2] == "\7\8\12\10\13\9\11\92\34\39\65\65\10")
+        assert(state[3] == nil)
         assert(state.line == 2)
     end
 end -- lua tokens
@@ -129,6 +131,7 @@ do -- even more lua tokens
     else
         assert(state[1] == luatokens.tokens.TK_STRING)
         assert(state[2] == "A")
+        assert(state[3] == nil)
         assert(state.line == 1 or not state.line)
     end
 end -- lua tokens
@@ -168,6 +171,7 @@ do -- even more lua tokens
         assert(table.remove(state, 1) == "\252\132\128\128\128\128")
         assert(table.remove(state, 1) == luatokens.tokens.TK_STRING)
         assert(table.remove(state, 1) == "\253\191\191\191\191\191")
+        assert(table.remove(state, 1) == nil)
         assert(state.line == 1 or not state.line)
     end
 end -- lua tokens
@@ -188,6 +192,7 @@ do -- simple lua tokens
         assert(table.remove(state, 1) == "[")
         assert(table.remove(state, 1) == luatokens.tokens.TK_STRING)
         assert(table.remove(state, 1) == "")
+        assert(table.remove(state, 1) == nil)
         assert(state.line == 1 or not state.line)
     end
 end -- lua tokens
@@ -207,6 +212,7 @@ do -- simple long string
     else
         assert(table.remove(state, 1) == luatokens.tokens.TK_STRING)
         assert(table.remove(state, 1) == "")
+        assert(table.remove(state, 1) == nil)
         assert(state.line == 1 or not state.line)
     end
 end -- long string
@@ -226,6 +232,7 @@ do -- long string with depth 1
     else
         assert(table.remove(state, 1) == luatokens.tokens.TK_STRING)
         assert(table.remove(state, 1) == "")
+        assert(table.remove(state, 1) == nil)
         assert(state.line == 1 or not state.line)
     end
 end -- long string
@@ -245,6 +252,7 @@ do -- long string with "nested" long string
     else
         assert(table.remove(state, 1) == luatokens.tokens.TK_STRING)
         assert(table.remove(state, 1) == "[[]]")
+        assert(table.remove(state, 1) == nil)
         assert(state.line == 1 or not state.line)
     end
 end -- long string
@@ -252,7 +260,7 @@ end -- long string
 do -- long string edge cases
     local luatokens = require "luatokens"
     local tokens = luatokens.defs
-    local state, err, etoken, estate = parser.parse(tokens, "[==[]=]==][==[]]==]")
+    local state, err, etoken, estate = parser.parse(tokens, "[==[]=]==][==[]]==][=[] ]=]")
     local case = case()
     if not state then
         print(case, "---- IN  TOKENS ----")
@@ -266,6 +274,9 @@ do -- long string edge cases
         assert(table.remove(state, 1) == "]=")
         assert(table.remove(state, 1) == luatokens.tokens.TK_STRING)
         assert(table.remove(state, 1) == "]")
+        assert(table.remove(state, 1) == luatokens.tokens.TK_STRING)
+        assert(table.remove(state, 1) == "] ")
+        assert(table.remove(state, 1) == nil)
         assert(state.line == 1 or not state.line)
     end
 end -- long string
@@ -309,20 +320,97 @@ do -- keywords
         assert(table.remove(state, 1) == luatokens.tokens.TK_TRUE)
         assert(table.remove(state, 1) == luatokens.tokens.TK_UNTIL)
         assert(table.remove(state, 1) == luatokens.tokens.TK_WHILE)
+        assert(table.remove(state, 1) == nil)
         assert(state.line == 4)
     end
 end -- keywords
 
-do -- FUCK
+do -- "other tokens"
+    local luatokens = require "luatokens"
+    local tokens = luatokens.defs
+    local state, err, etoken, estate = parser.parse(tokens, [[
+     +     -     *     /     %     ^     #
+     &     ~     |     <<    >>    //
+     ==    ~=    <=    >=    <     >     =
+     (     )     {     }     [     ]     ::
+     ;     :     ,     .     ..    ...]])
+    local case = case()
+    if not state then
+        print(case, "---- IN  TOKENS ----")
+        print(case, err, etoken)
+        for i,v in pairs(estate) do
+            print(case, i, v)
+        end
+        print(case, "---- OUT TOKENS ----")
+    else
+        assert(table.remove(state, 1) == "+")
+        assert(table.remove(state, 1) == "-")
+        assert(table.remove(state, 1) == "*")
+        assert(table.remove(state, 1) == "/")
+        assert(table.remove(state, 1) == "%")
+        assert(table.remove(state, 1) == "^")
+        assert(table.remove(state, 1) == "#")
+        assert(table.remove(state, 1) == "&")
+        assert(table.remove(state, 1) == "~")
+        assert(table.remove(state, 1) == "|")
+        assert(table.remove(state, 1) == luatokens.tokens.TK_SHL)
+        assert(table.remove(state, 1) == luatokens.tokens.TK_SHR)
+        assert(table.remove(state, 1) == luatokens.tokens.TK_IDIV)
+        assert(table.remove(state, 1) == luatokens.tokens.TK_EQ)
+        assert(table.remove(state, 1) == luatokens.tokens.TK_NE)
+        assert(table.remove(state, 1) == luatokens.tokens.TK_LE)
+        assert(table.remove(state, 1) == luatokens.tokens.TK_GE)
+        assert(table.remove(state, 1) == "<")
+        assert(table.remove(state, 1) == ">")
+        assert(table.remove(state, 1) == "=")
+        assert(table.remove(state, 1) == "(")
+        assert(table.remove(state, 1) == ")")
+        assert(table.remove(state, 1) == "{")
+        assert(table.remove(state, 1) == "}")
+        assert(table.remove(state, 1) == "[")
+        assert(table.remove(state, 1) == "]")
+        assert(table.remove(state, 1) == luatokens.tokens.TK_DBCOLON)
+        assert(table.remove(state, 1) == ";")
+        assert(table.remove(state, 1) == ":")
+        assert(table.remove(state, 1) == ",")
+        assert(table.remove(state, 1) == ".")
+        assert(table.remove(state, 1) == luatokens.tokens.TK_CONCAT)
+        assert(table.remove(state, 1) == luatokens.tokens.TK_DOTS)
+        assert(table.remove(state, 1) == nil)
+        assert(state.line == 5)
+    end
+end -- "other tokens"
+
+do -- long comments
+    local luatokens = require "luatokens"
+    local tokens = luatokens.defs
+    local state, err, etoken, estate = parser.parse(tokens, [==[--[[
+    --]]]==])
+    local case = case()
+    if not state then
+        print(case, "---- IN  TOKENS ----")
+        print(case, err, etoken)
+        for i,v in pairs(estate) do
+            print(case, i, v)
+        end
+        print(case, "---- OUT TOKENS ----")
+    else
+        assert(table.remove(state, 1) == nil)
+        assert(state.line == 2)
+    end
+end -- long comments
+
+while false do -- FUCK
     local luatokens = require "luatokens"
-    local luatokens_file = io.open("./luatokens.lua", "r"):read((_VERSION == "Lua 5.1" or _VERSION == "Lua 5.2") and "*a" or "a")
+    local luatokens_file = io.open("./luatokens.lua", "r")
     local tokens = luatokens.defs
-    local state, err, etoken, estate = parser.parse(tokens, luatokens_file)
+    local state, err, etoken, estate = parser.parse(tokens, function() return luatokens_file:read(8192) end)
     local case = case()
     if not state then
         print(case, "---- IN  TOKENS ----")
         print(case, err, etoken)
         for i,v in pairs(estate) do
+            v = luatokens.reverse_keywords[v] or luatokens.reverse_tokens[v] or v
             print(case, i, v)
         end
         print(case, "---- OUT TOKENS ----")