summary refs log tree commit diff stats
diff options
context:
space:
mode:
-rw-r--r--luatokens.lua72
-rw-r--r--parser.lua15
-rw-r--r--test.lua73
3 files changed, 141 insertions, 19 deletions
diff --git a/luatokens.lua b/luatokens.lua
index 58a7d09..226a81a 100644
--- a/luatokens.lua
+++ b/luatokens.lua
@@ -99,17 +99,21 @@ defs.base = {
     ["Z"] = "alpha",
 }
 
-local function linecount(state, token, rule)
-    -- TODO fix
-    if token == "\n" or token == "\r" then
-        state.line = (state.line or 1) + 1
-    end
+local function countline(state, token, rule)
+    state.line = (state.line or 1) + 1
+end
+
+local function mknewline(t, hookn, fallback)
+    fallback = fallback or t
+    t["\n"] = setmetatable({[hookn] = countline, ["\r"] = setmetatable({}, {__index=fallback})}, {__index=fallback})
+    t["\r"] = setmetatable({[hookn] = countline, ["\n"] = setmetatable({}, {__index=fallback})}, {__index=fallback})
+    return t
 end
 
 do local tstring = selfify({})
     defs.string = tstring
     tstring.defs = defs
-    do local tsescapes = setmetatable({
+    do local tsescapes = setmetatable(mknewline({
             ["'"] = "insertraw",
             ['"'] = "insertraw",
             ['\\'] = "insertraw",
@@ -123,11 +127,10 @@ do local tstring = selfify({})
             ["z"] = "skipwhitespace",
             ["u"] = "unicode",
             ["x"] = "hex",
-            ["\n"] = setmetatable({["\r"] = setmetatable({}, {__index=tstring})}, {__index=tstring}),
-            ["\r"] = setmetatable({["\n"] = setmetatable({}, {__index=tstring})}, {__index=tstring}),
-            [1] = linecount,
-            [2] = function(state, token, rule) if token == "\r" or token == "\n" then collect_fallback(state, "\n") end end,
-        }, {__index = defs.base})
+            --["\n"] = setmetatable({[1] = countline, ["\r"] = setmetatable({}, {__index=tstring})}, {__index=tstring}),
+            --["\r"] = setmetatable({[1] = countline, ["\n"] = setmetatable({}, {__index=tstring})}, {__index=tstring}),
+            [1] = function(state, token, rule) if token == "\r" or token == "\n" then collect_fallback(state, "\n") end end,
+        }, 1, tstring), {__index = defs.base})
         defs.string.escapes = tsescapes
         tsescapes.string = defs.string
 
@@ -237,15 +240,16 @@ do local tstring = selfify({})
             tsescapes.unicode = tseunicode
         end
 
-        do local tseskipwhitespace = selfify({
+        do local tseskipwhitespace = selfify(mknewline({
                 string = defs.string,
                 whitespace = "self",
                 [""] = "string",
                 [1] = collect_fallback,
-                [2] = linecount,
-            })
+            }, 2))
+            --tseskipwhitespace["\n"] = setmetatable({[2] = countline, ["\r"] = setmetatable({}, {__index=tseskipwhitespace})}, {__index=tseskipwhitespace})
+            --tseskipwhitespace["\r"] = setmetatable({[2] = countline, ["\n"] = setmetatable({}, {__index=tseskipwhitespace})}, {__index=tseskipwhitespace})
             local tbase = defs.base
-            local tbasemap = {whitespace = "whitespace", newline = "whitespace"}
+            local tbasemap = {whitespace = "whitespace"}
             setmetatable(tseskipwhitespace, {__index = function(t, k) return tbasemap[tbase[k]] or tstring[k] end})
             tsescapes.skipwhitespace =  tseskipwhitespace
         end
@@ -276,21 +280,51 @@ do local tstring = selfify({})
     end
 end
 
-do local tlongstring = {}
+do local tlongstring = selfify({})
+    defs.longstring = tlongstring
     -- TODO
 end
 
 defs["'"] = "string_open"
 defs['"'] = "string_open"
 defs["["] = "maybe_longstring"
-defs[1] = linecount
+defs.maybe_longstring = setmetatable({
+    defs = defs,
+    ['['] = "longstring_open",
+    ['='] = "longstring_open",
+    longstring_count = selfify({
+        ["="] = function(state, token)
+            state.longstring_count = state.longstring_count + 1
+            return "self"
+        end,
+        longstring = defs.longstring
+    }),
+    longstring_open = function(state, token)
+        if token == "=" then
+            state.longstring_count = state.longstring_count or 0 + 1
+            return "longstring_count"
+        elseif token == "[" then
+            return "longstring"
+        end
+    end,
+    [-1] = function(state, token, rule)
+        if rule ~= "longstring_open" then
+            state[#state+1] = "["
+        end
+    end
+}, {__index=defs})
+
+-- these are needed for proper line counts
+--defs["\n"] = setmetatable({["\r"] = setmetatable({}, {__index=defs})}, {__index=defs})
+--defs["\r"] = setmetatable({["\n"] = setmetatable({}, {__index=defs})}, {__index=defs})
+mknewline(defs, 1)
 
-setmetatable(defs, {__index=whitespace})
+setmetatable(defs, {__index=defs.base})
 
 function defs.string_open(state, token)
     if not state.in_string then
         state[#state+1] = TK_STRING
-        state[COLLECT] = {}
+        state[COLLECT] = {coalesce=50} -- TODO tweak this for CPU/memory tradeoff?
         state.in_string = token
         return "string"
     end
diff --git a/parser.lua b/parser.lua
index 0cd2853..bfa7dd3 100644
--- a/parser.lua
+++ b/parser.lua
@@ -39,6 +39,17 @@ local function get_next_common(state, in_pos, token)
     if state[STATE] then
         local st = state[STATE]
         local rule = st[token]
+        do -- pre-hooks
+            local pos = -1
+            local hook = st[pos]
+            while hook ~= nil do
+                if hook then
+                    hook(state, token, rule)
+                end
+                pos = pos - 1
+                hook = st[pos]
+            end
+        end
         transition = rule
         if transition == nil then
             transition = st[""]
@@ -143,6 +154,10 @@ return {
         if not rule then
             local t = state[COLLECT]
             t[#t+1] = token
+            if t.coalesce and #t > t.coalesce then
+                t[1] = table.concat(t)
+                for i=2, #t do t[i] = nil end
+            end
         end
     end,
 }
diff --git a/test.lua b/test.lua
index 283b566..8672903 100644
--- a/test.lua
+++ b/test.lua
@@ -159,3 +159,76 @@ do -- even more lua tokens
         assert(table.remove(state, 1) == "\253\191\191\191\191\191")
     end
 end -- lua tokens
+
+do -- simple lua tokens
+    local luatokens = require "luatokens"
+    local tokens = luatokens.defs
+    local state, err, etoken, estate = parser.parse(tokens, [[[""]])
+    local case = case()
+    if not state then
+        print(case, "---- IN  TOKENS ----")
+        print(case, err, etoken)
+        for i,v in pairs(estate) do
+            print(case, i, v)
+        end
+        print(case, "---- OUT TOKENS ----")
+    else
+        assert(table.remove(state, 1) == "[")
+        assert(table.remove(state, 1) == luatokens.tokens.TK_STRING)
+        assert(table.remove(state, 1) == "")
+    end
+end -- lua tokens
+
+do -- simple long string
+    local luatokens = require "luatokens"
+    local tokens = luatokens.defs
+    local state, err, etoken, estate = parser.parse(tokens, [=[[[]]]=])
+    local case = case()
+    if not state then
+        print(case, "---- IN  TOKENS ----")
+        print(case, err, etoken)
+        for i,v in pairs(estate) do
+            print(case, i, v)
+        end
+        print(case, "---- OUT TOKENS ----")
+    else
+        assert(table.remove(state, 1) == luatokens.tokens.TK_STRING)
+        assert(table.remove(state, 1) == "")
+    end
+end -- lua tokens
+
+do -- long string with depth 1
+    local luatokens = require "luatokens"
+    local tokens = luatokens.defs
+    local state, err, etoken, estate = parser.parse(tokens, [==[[=[]=]]==])
+    local case = case()
+    if not state then
+        print(case, "---- IN  TOKENS ----")
+        print(case, err, etoken)
+        for i,v in pairs(estate) do
+            print(case, i, v)
+        end
+        print(case, "---- OUT TOKENS ----")
+    else
+        assert(table.remove(state, 1) == luatokens.tokens.TK_STRING)
+        assert(table.remove(state, 1) == "")
+    end
+end -- lua tokens
+
+do -- long string with "nested" long string
+    local luatokens = require "luatokens"
+    local tokens = luatokens.defs
+    local state, err, etoken, estate = parser.parse(tokens, [==[[=[[[]]]=]]==])
+    local case = case()
+    if not state then
+        print(case, "---- IN  TOKENS ----")
+        print(case, err, etoken)
+        for i,v in pairs(estate) do
+            print(case, i, v)
+        end
+        print(case, "---- OUT TOKENS ----")
+    else
+        assert(table.remove(state, 1) == luatokens.tokens.TK_STRING)
+        assert(table.remove(state, 1) == "[[]]")
+    end
+end -- lua tokens