summary refs log tree commit diff stats
diff options
context:
space:
mode:
-rw-r--r--luatokens.lua105
-rw-r--r--parser.lua20
-rw-r--r--test.lua102
3 files changed, 217 insertions, 10 deletions
diff --git a/luatokens.lua b/luatokens.lua
index 226a81a..7bf9f68 100644
--- a/luatokens.lua
+++ b/luatokens.lua
@@ -9,12 +9,15 @@ local collect_fallback = parser.collect_fallback
 
 -- "dummies"
 -- see http://www.lua.org/source/5.3/llex.h.html#RESERVED
+-- keywords
 local TK_AND, TK_BREAK,
     TK_DO, TK_ELSE, TK_ELSEIF, TK_END, TK_FALSE, TK_FOR, TK_FUNCTION,
     TK_GOTO, TK_IF, TK_IN, TK_LOCAL, TK_NIL, TK_NOT, TK_OR, TK_REPEAT,
     TK_RETURN, TK_THEN, TK_TRUE, TK_UNTIL, TK_WHILE,
+    -- operators
     TK_IDIV, TK_CONCAT, TK_DOTS, TK_EQ, TK_GE, TK_LE, TK_NE,
     TK_SHL, TK_SHR,
+    -- misc
     TK_DBCOLON, TK_EOS,
     TK_FLT, TK_INT, TK_NAME, TK_STRING =
     {}, {},
@@ -26,7 +29,32 @@ local TK_AND, TK_BREAK,
     {}, {},
     {}, {}, {}, {}
 
-local defs = {}
+local keywords = {
+    ["and"] = TK_AND,
+    ["break"] = TK_BREAK,
+    ["do"] = TK_DO,
+    ["else"] = TK_ELSE,
+    ["elseif"] = TK_ELSEIF,
+    ["end"] = TK_END,
+    ["false"] = TK_FALSE,
+    ["for"] = TK_FOR,
+    ["function"] = TK_FUNCTION,
+    ["goto"] = TK_GOTO,
+    ["if"] = TK_IF,
+    ["in"] = TK_IN,
+    ["local"] = TK_LOCAL,
+    ["nil"] = TK_NIL,
+    ["not"] = TK_NOT,
+    ["or"] = TK_OR,
+    ["repeat"] = TK_REPEAT,
+    ["return"] = TK_RETURN,
+    ["then"] = TK_THEN,
+    ["true"] = TK_TRUE,
+    ["until"] = TK_UNTIL,
+    ["while"] = TK_WHILE,
+}
+
+local defs = selfify({})
 
 defs.base = {
     [" "] = "whitespace",
@@ -280,9 +308,46 @@ do local tstring = selfify({})
     end
 end
 
-do local tlongstring = selfify({})
+do local tlongstring = {}
     defs.longstring = tlongstring
-    -- TODO
+    do local tllongstring_proper = selfify({[""] = "self", ["]"] = function(state, token) state.longstring_close = 0 return "maybe_end" end})
+        tllongstring_proper[1] = collect_fallback
+
+        do local tllmaybe_end = selfify({defs = defs}, "maybe_end")
+            tllongstring_proper.maybe_end = tllmaybe_end
+            tllmaybe_end["="] = function(state, token)
+                state.longstring_close = state.longstring_close + 1
+                return "maybe_end"
+            end
+            tllmaybe_end["]"] = function(state, token)
+                if state.longstring_close == state.longstring_count then
+                    state.longstring_close = nil
+                    state.longstring_count = nil
+                    local pos = #state
+                    state[pos+1] = TK_STRING
+                    state[pos+2] = table.concat(state[COLLECT])
+                    state[COLLECT] = nil
+                    return "defs"
+                else
+                    collect_fallback(state, "]")
+                    collect_fallback(state, ("="):rep(state.longstring_close))
+                    state.longstring_close = 0
+                    return "maybe_end"
+                end
+            end
+            tllmaybe_end[-1] = function(state, token, rule)
+                if not rule then
+                    collect_fallback(state, "]")
+                    collect_fallback(state, ("="):rep(state.longstring_close))
+                    state.longstring_close = nil
+                end
+            end
+        end
+
+        tlongstring.longstring_proper = tllongstring_proper
+        mknewline(tlongstring, 1, tllongstring_proper)
+        setmetatable(tlongstring, {__index=tllongstring_proper})
+    end
 end
 
 defs["'"] = "string_open"
@@ -297,6 +362,10 @@ defs.maybe_longstring = setmetatable({
             state.longstring_count = state.longstring_count + 1
             return "self"
         end,
+        ["["] = function(state, token)
+            state[COLLECT] = {coalesce=63} -- TODO tweak this for CPU/memory tradeoff?
+            return "longstring"
+        end,
         longstring = defs.longstring
     }),
     longstring_open = function(state, token)
@@ -304,6 +373,8 @@ defs.maybe_longstring = setmetatable({
             state.longstring_count = state.longstring_count or 0 + 1
             return "longstring_count"
         elseif token == "[" then
+            state.longstring_count = 0
+            state[COLLECT] = {coalesce=63} -- TODO tweak this for CPU/memory tradeoff?
             return "longstring"
         end
     end,
@@ -319,12 +390,38 @@ defs.maybe_longstring = setmetatable({
 --defs["\r"] = setmetatable({["\n"] = setmetatable({}, {__index=defs})}, {__index=defs})
 mknewline(defs, 1)
 
+defs.whitespace = "self"
+defs.hexdigit = "alpha"
+defs["_"] = "alpha"
+defs.in_alpha = setmetatable(selfify({digit = "in_alpha", hexdigit = "in_alpha", alpha = "in_alpha", _ = "in_alpha", [parser.EOZ] = "self"}, "in_alpha"), {__index=defs})
+function defs.alpha(state, token)
+    state[COLLECT] = {coalesce=15} -- TODO tweak this for CPU/memory tradeoff?
+    collect_fallback(state, token)
+    return "in_alpha"
+end
+defs.in_alpha[-1] = function(state, token, rule)
+    if rule == "alpha" or rule == "digit" or rule == "hexdigit" or token == "_" then
+        collect_fallback(state, token)
+    else
+        local key = table.concat(state[COLLECT])
+        state[COLLECT] = nil
+        local keyword = keywords[key]
+        if keyword then
+            state[#state+1] = keyword
+        else
+            local pos = #state
+            state[pos+1] = TK_NAME
+            state[pos+2] = key
+        end
+    end
+end
+
 setmetatable(defs, {__index=defs.base})
 
 function defs.string_open(state, token)
     if not state.in_string then
         state[#state+1] = TK_STRING
-        state[COLLECT] = {coalesce=50} -- TODO tweak this for CPU/memory tradeoff?
+        state[COLLECT] = {coalesce=63} -- TODO tweak this for CPU/memory tradeoff?
         state.in_string = token
         return "string"
     end
diff --git a/parser.lua b/parser.lua
index bfa7dd3..4f4e166 100644
--- a/parser.lua
+++ b/parser.lua
@@ -24,6 +24,8 @@ local DATA = {}
 local GEN = {}
 -- key for DATA OFFSET
 local OFFDATA = {}
+-- key for End of Stream
+local EOZ = {}
 
 local optimize_lookups = {}
 for i=0, 255 do
@@ -39,6 +41,9 @@ local function get_next_common(state, in_pos, token)
     if state[STATE] then
         local st = state[STATE]
         local rule = st[token]
+        if not rule and token == EOZ then
+            return in_pos, state
+        end
         do -- pre-hooks
             local pos = -1
             local hook = st[pos]
@@ -83,7 +88,9 @@ local function get_next_common(state, in_pos, token)
 end
 
 local function get_next_table(state, in_pos)
-    if state[DATA] == nil or #state[DATA] == 0 then return in_pos, state end -- TODO end-of-stream handling
+    if state[DATA] == nil or #state[DATA] == 0 then
+        return get_next_common(state, in_pos, EOZ)
+    end
     in_pos = in_pos + 1
     local token = state[DATA][in_pos - state[OFFDATA]]
     if token == nil then
@@ -95,7 +102,13 @@ local function get_next_table(state, in_pos)
 end
 
 local function get_next_string(state, in_pos)
-    if state[DATA] == nil or #state[DATA] == 0 then return in_pos, state end -- TODO end-of-stream handling
+    if state[DATA] == nil or #state[DATA] == 0 then
+        if state[STATE] == nil then
+            return in_pos, state
+        else
+            return get_next_common(state, in_pos, EOZ)
+        end
+    end
     in_pos = in_pos + 1
     local token = optimize_lookups[string.byte(state[DATA], in_pos - state[OFFDATA], in_pos - state[OFFDATA])]
     if token == nil then
@@ -142,6 +155,7 @@ local COLLECT = {}
 return {
     STATE = STATE,
     COLLECT = COLLECT,
+    EOZ = EOZ,
     stream = stream,
     parse = parse,
     -- common utility function
@@ -154,7 +168,7 @@ return {
         if not rule then
             local t = state[COLLECT]
             t[#t+1] = token
-            if t.coalesce and #t > t.coalesce then
+            if t.coalesce and #t >= t.coalesce then
                 t[1] = table.concat(t)
                 for i=2, #t do t[i] = nil end
             end
diff --git a/test.lua b/test.lua
index 8672903..ef0a586 100644
--- a/test.lua
+++ b/test.lua
@@ -24,6 +24,14 @@ local function case()
     return caseno
 end
 
+do -- basic check
+    local case = case()
+    local defs = {}
+    local count = 0
+    local state, err = parser.parse(defs, function() assert(count == 0, "should be called only once"); count = count + 1 return nil end)
+    assert(state)
+end -- basic check
+
 do -- trim left spaces
     local defs = {}
     defs.self = defs
@@ -82,6 +90,7 @@ do -- lua tokens
     else
         assert(state[1] == luatokens.tokens.TK_STRING)
         assert(state[2] == "hello world")
+        assert(state.line == 1 or not state.line)
     end
 end -- lua tokens
 
@@ -101,6 +110,7 @@ do -- more lua tokens
     else
         assert(state[1] == luatokens.tokens.TK_STRING)
         assert(state[2] == "\7\8\12\10\13\9\11\92\34\39\65\65\10")
+        assert(state.line == 2)
     end
 end -- lua tokens
 
@@ -119,6 +129,7 @@ do -- even more lua tokens
     else
         assert(state[1] == luatokens.tokens.TK_STRING)
         assert(state[2] == "A")
+        assert(state.line == 1 or not state.line)
     end
 end -- lua tokens
 
@@ -157,6 +168,7 @@ do -- even more lua tokens
         assert(table.remove(state, 1) == "\252\132\128\128\128\128")
         assert(table.remove(state, 1) == luatokens.tokens.TK_STRING)
         assert(table.remove(state, 1) == "\253\191\191\191\191\191")
+        assert(state.line == 1 or not state.line)
     end
 end -- lua tokens
 
@@ -176,6 +188,7 @@ do -- simple lua tokens
         assert(table.remove(state, 1) == "[")
         assert(table.remove(state, 1) == luatokens.tokens.TK_STRING)
         assert(table.remove(state, 1) == "")
+        assert(state.line == 1 or not state.line)
     end
 end -- lua tokens
 
@@ -194,8 +207,9 @@ do -- simple long string
     else
         assert(table.remove(state, 1) == luatokens.tokens.TK_STRING)
         assert(table.remove(state, 1) == "")
+        assert(state.line == 1 or not state.line)
     end
-end -- lua tokens
+end -- long string
 
 do -- long string with depth 1
     local luatokens = require "luatokens"
@@ -212,8 +226,9 @@ do -- long string with depth 1
     else
         assert(table.remove(state, 1) == luatokens.tokens.TK_STRING)
         assert(table.remove(state, 1) == "")
+        assert(state.line == 1 or not state.line)
     end
-end -- lua tokens
+end -- long string
 
 do -- long string with "nested" long string
     local luatokens = require "luatokens"
@@ -230,5 +245,86 @@ do -- long string with "nested" long string
     else
         assert(table.remove(state, 1) == luatokens.tokens.TK_STRING)
         assert(table.remove(state, 1) == "[[]]")
+        assert(state.line == 1 or not state.line)
     end
-end -- lua tokens
+end -- long string
+
+do -- long string edge cases
+    local luatokens = require "luatokens"
+    local tokens = luatokens.defs
+    local state, err, etoken, estate = parser.parse(tokens, "[==[]=]==][==[]]==]")
+    local case = case()
+    if not state then
+        print(case, "---- IN  TOKENS ----")
+        print(case, err, etoken)
+        for i,v in pairs(estate) do
+            print(case, i, v)
+        end
+        print(case, "---- OUT TOKENS ----")
+    else
+        assert(table.remove(state, 1) == luatokens.tokens.TK_STRING)
+        assert(table.remove(state, 1) == "]=")
+        assert(table.remove(state, 1) == luatokens.tokens.TK_STRING)
+        assert(table.remove(state, 1) == "]")
+        assert(state.line == 1 or not state.line)
+    end
+end -- long string
+
+do -- keywords
+    local luatokens = require "luatokens"
+    local tokens = luatokens.defs
+    local state, err, etoken, estate = parser.parse(tokens, [[
+     and       break     do        else      elseif    end
+     false     for       function  goto      if        in
+     local     nil       not       or        repeat    return
+     then      true      until     while]])
+    local case = case()
+    if not state then
+        print(case, "---- IN  TOKENS ----")
+        print(case, err, etoken)
+        for i,v in pairs(estate) do
+            print(case, i, v)
+        end
+        print(case, "---- OUT TOKENS ----")
+    else
+        assert(table.remove(state, 1) == luatokens.tokens.TK_AND)
+        assert(table.remove(state, 1) == luatokens.tokens.TK_BREAK)
+        assert(table.remove(state, 1) == luatokens.tokens.TK_DO)
+        assert(table.remove(state, 1) == luatokens.tokens.TK_ELSE)
+        assert(table.remove(state, 1) == luatokens.tokens.TK_ELSEIF)
+        assert(table.remove(state, 1) == luatokens.tokens.TK_END)
+        assert(table.remove(state, 1) == luatokens.tokens.TK_FALSE)
+        assert(table.remove(state, 1) == luatokens.tokens.TK_FOR)
+        assert(table.remove(state, 1) == luatokens.tokens.TK_FUNCTION)
+        assert(table.remove(state, 1) == luatokens.tokens.TK_GOTO)
+        assert(table.remove(state, 1) == luatokens.tokens.TK_IF)
+        assert(table.remove(state, 1) == luatokens.tokens.TK_IN)
+        assert(table.remove(state, 1) == luatokens.tokens.TK_LOCAL)
+        assert(table.remove(state, 1) == luatokens.tokens.TK_NIL)
+        assert(table.remove(state, 1) == luatokens.tokens.TK_NOT)
+        assert(table.remove(state, 1) == luatokens.tokens.TK_OR)
+        assert(table.remove(state, 1) == luatokens.tokens.TK_REPEAT)
+        assert(table.remove(state, 1) == luatokens.tokens.TK_RETURN)
+        assert(table.remove(state, 1) == luatokens.tokens.TK_THEN)
+        assert(table.remove(state, 1) == luatokens.tokens.TK_TRUE)
+        assert(table.remove(state, 1) == luatokens.tokens.TK_UNTIL)
+        assert(table.remove(state, 1) == luatokens.tokens.TK_WHILE)
+        assert(state.line == 4)
+    end
+end -- keywords
+
+do -- FUCK
+    local luatokens = require "luatokens"
+    local luatokens_file = io.open("./luatokens.lua", "r"):read((_VERSION == "5.1" or _VERSION == "5.2") and "*a" or "a")
+    local tokens = luatokens.defs
+    local state, err, etoken, estate = parser.parse(tokens, luatokens_file)
+    local case = case()
+    if not state then
+        print(case, "---- IN  TOKENS ----")
+        print(case, err, etoken)
+        for i,v in pairs(estate) do
+            print(case, i, v)
+        end
+        print(case, "---- OUT TOKENS ----")
+    end
+end -- FUCK