summary refs log tree commit diff stats
diff options
context:
space:
mode:
-rw-r--r--luatokens.lua30
-rw-r--r--parser.lua35
-rw-r--r--test.lua6
3 files changed, 43 insertions, 28 deletions
diff --git a/luatokens.lua b/luatokens.lua
index 81cbc11..2ac2cc3 100644
--- a/luatokens.lua
+++ b/luatokens.lua
@@ -3,6 +3,9 @@
 -- we need some stuff from here
 local parser = require "parser"
 local selfify = parser.selfify
+local EOF = parser.EOF
+local COLLECT = parser.COLLECT
+local collect_fallback = parser.collect_fallback
 
 -- "dummies"
 local TK_STRING = {}
@@ -106,25 +109,25 @@ do local tstring = selfify({})
             ["\n"] = setmetatable({["\r"] = setmetatable({}, {__index=tstring})}, {__index=tstring}),
             ["\r"] = setmetatable({["\n"] = setmetatable({}, {__index=tstring})}, {__index=tstring}),
             [1] = linecount,
-            [2] = print
         }, {__index = tokens.base})
         tokens.string.escapes = tsescapes
         tsescapes.string = tokens.string
 
         function tsescapes.insertraw(state, token)
-            state[#state+1] = token
+            collect_fallback(state, token)
             return "string"
         end
 
         do
             local map = { ["a"] = "\a", ["b"] = "\b", ["f"] = "\f", ["n"] = "\n", ["r"] = "\r", ["t"] = "\t", ["v"] = "\v" }
             function tsescapes.insertmap(state, token)
-                state[#state+1] = map[token]
+                collect_fallback(state, map[token])
                 return "string"
             end
         end
 
         function tsescapes.digit(state, token)
+            print(state, token)
             local digit = string.find("1234567890", token, 1, true)
             local num = state.in_digit
             if digit then
@@ -138,21 +141,21 @@ do local tstring = selfify({})
             if num > 255 then
                 return nil
             end
-            state[#state+1] = string.char(num)
+            collect_fallback(state, string.char(num))
             state.in_digit = nil
             state.c = nil
             return "string"
         end
         tsescapes.digitc = setmetatable(selfify({[""] = tsescapes.digit, digitc = "self", string = tstring}), {__index=tstring})
 
-        tsescapes.hex = setmetatable(selfify({string = tokens.string}), {__index=tokens.base})
+        tsescapes.hex = setmetatable(selfify({string = tokens.string, digit = "hexdigit"}), {__index=tokens.base})
         function tsescapes.hex.hexdigit(state, token)
             local digit = string.find("123456789ABCDEF0123456789abcdef0", token, 1, true)
             assert(digit, "this should never be called for non-hex-digits")
             local num = state.in_hex
             if num then
                 num = num * 16 + digit % 16
-                state[#state+1] = string.char(num)
+                collect_fallback(state, string.char(num))
                 state.in_hex = nil
                 return "string"
             else
@@ -165,7 +168,7 @@ do local tstring = selfify({})
                 string = tokens.string,
                 whitespace = "self",
                 [""] = "string",
-                [1] = parser.insert_fallback,
+                [1] = collect_fallback,
                 [2] = linecount,
             })
             local tbase = tokens.base
@@ -185,17 +188,13 @@ do local tstring = selfify({})
 
     tstring[""] = "self"
 
-    tstring[1] = parser.insert_fallback
+    tstring[1] = collect_fallback
 
     function tstring.close(state, token)
         if state.in_string == token then
-            local i = state.string_start
             state.in_string = nil
-            state.string_start = nil
-            state[i+1] = table.concat(state, '', i+1)
-            for j=i+2, #state do
-                state[j]=nil
-            end
+            state[#state+1] = table.concat(state[COLLECT])
+            state[COLLECT] = nil
             return "tokens"
         else
             state[#state+1] = token
@@ -206,14 +205,15 @@ end
 
 tokens["'"] = "string_open"
 tokens['"'] = "string_open"
+tokens[1] = linecount
 
 setmetatable(tokens, {__index=whitespace})
 
 function tokens.string_open(state, token)
     if not state.in_string then
         state[#state+1] = TK_STRING
+        state[COLLECT] = {}
         state.in_string = token
-        state.string_start = #state
         return "string"
     end
     assert("this shouldn't happen")
diff --git a/parser.lua b/parser.lua
index ece8a8f..ff8378c 100644
--- a/parser.lua
+++ b/parser.lua
@@ -25,6 +25,11 @@ local GEN = {}
 -- key for DATA OFFSET
 local OFFDATA = {}
 
+local optimize_lookups = {}
+for i=0, 255 do
+    optimize_lookups[i] = string.char(i)
+end
+
 local type, tostring
     = type, tostring
 
@@ -81,7 +86,7 @@ end
 local function get_next_string(state, in_pos)
     if state[DATA] == nil or #state[DATA] == 0 then return in_pos, state end
     in_pos = in_pos + 1
-    local token = state[DATA]:sub(in_pos - state[OFFDATA], in_pos - state[OFFDATA])
+    local token = optimize_lookups[string.byte(state[DATA], in_pos - state[OFFDATA], in_pos - state[OFFDATA])] or ""
     if token == "" then
         state[OFFDATA] = in_pos - 1
         state[DATA] = state[GEN]()
@@ -119,21 +124,25 @@ local function parse(defs, data)
     end
 end
 
--- utility function that's quite common
-local function selfify(t)
-    t.self = t
-    return t
-end
--- common hook
-local function insert_fallback(state, token, rule)
-    if not rule then
-        state[#state+1] = token
-    end
-end
+-- not used by any of the above but useful for others
+
+local COLLECT = {}
 
 return {
     STATE = STATE,
+    COLLECT = COLLECT,
     stream = stream,
     parse = parse,
-    selfify = selfify,
+    -- common utility function
+    selfify = function(t)
+        t.self = t
+        return t
+    end,
+    -- common hook
+    collect_fallback = function(state, token, rule)
+        if not rule then
+            local t = state[COLLECT]
+            t[#t+1] = token
+        end
+    end,
 }
diff --git a/test.lua b/test.lua
index 1290c97..076d1e8 100644
--- a/test.lua
+++ b/test.lua
@@ -100,6 +100,12 @@ do -- more lua tokens
     else
         for i,v in ipairs(state) do
             print(case, i, v)
+            if v == luatokens.TK_STRING then
+                in_string = true
+            elseif in_string then
+                print(case, v:gsub(".", function(v) return "\\"..string.byte(v) end))
+                in_string = false
+            end
         end
     end
     print(case, "---- OUT TOKENS ----")