diff options
-rw-r--r-- | luatokens.lua | 30 | ||||
-rw-r--r-- | parser.lua | 35 | ||||
-rw-r--r-- | test.lua | 6 |
3 files changed, 43 insertions, 28 deletions
diff --git a/luatokens.lua b/luatokens.lua index 81cbc11..2ac2cc3 100644 --- a/luatokens.lua +++ b/luatokens.lua @@ -3,6 +3,9 @@ -- we need some stuff from here local parser = require "parser" local selfify = parser.selfify +local EOF = parser.EOF +local COLLECT = parser.COLLECT +local collect_fallback = parser.collect_fallback -- "dummies" local TK_STRING = {} @@ -106,25 +109,25 @@ do local tstring = selfify({}) ["\n"] = setmetatable({["\r"] = setmetatable({}, {__index=tstring})}, {__index=tstring}), ["\r"] = setmetatable({["\n"] = setmetatable({}, {__index=tstring})}, {__index=tstring}), [1] = linecount, - [2] = print }, {__index = tokens.base}) tokens.string.escapes = tsescapes tsescapes.string = tokens.string function tsescapes.insertraw(state, token) - state[#state+1] = token + collect_fallback(state, token) return "string" end do local map = { ["a"] = "\a", ["b"] = "\b", ["f"] = "\f", ["n"] = "\n", ["r"] = "\r", ["t"] = "\t", ["v"] = "\v" } function tsescapes.insertmap(state, token) - state[#state+1] = map[token] + collect_fallback(state, map[token]) return "string" end end function tsescapes.digit(state, token) + print(state, token) local digit = string.find("1234567890", token, 1, true) local num = state.in_digit if digit then @@ -138,21 +141,21 @@ do local tstring = selfify({}) if num > 255 then return nil end - state[#state+1] = string.char(num) + collect_fallback(state, string.char(num)) state.in_digit = nil state.c = nil return "string" end tsescapes.digitc = setmetatable(selfify({[""] = tsescapes.digit, digitc = "self", string = tstring}), {__index=tstring}) - tsescapes.hex = setmetatable(selfify({string = tokens.string}), {__index=tokens.base}) + tsescapes.hex = setmetatable(selfify({string = tokens.string, digit = "hexdigit"}), {__index=tokens.base}) function tsescapes.hex.hexdigit(state, token) local digit = string.find("123456789ABCDEF0123456789abcdef0", token, 1, true) assert(digit, "this should never be called for non-hex-digits") local num = state.in_hex if num then num = num * 16 + digit % 16 - state[#state+1] = string.char(num) + collect_fallback(state, string.char(num)) state.in_hex = nil return "string" else @@ -165,7 +168,7 @@ do local tstring = selfify({}) string = tokens.string, whitespace = "self", [""] = "string", - [1] = parser.insert_fallback, + [1] = collect_fallback, [2] = linecount, }) local tbase = tokens.base @@ -185,17 +188,13 @@ do local tstring = selfify({}) tstring[""] = "self" - tstring[1] = parser.insert_fallback + tstring[1] = collect_fallback function tstring.close(state, token) if state.in_string == token then - local i = state.string_start state.in_string = nil - state.string_start = nil - state[i+1] = table.concat(state, '', i+1) - for j=i+2, #state do - state[j]=nil - end + state[#state+1] = table.concat(state[COLLECT]) + state[COLLECT] = nil return "tokens" else state[#state+1] = token @@ -206,14 +205,15 @@ end tokens["'"] = "string_open" tokens['"'] = "string_open" +tokens[1] = linecount setmetatable(tokens, {__index=whitespace}) function tokens.string_open(state, token) if not state.in_string then state[#state+1] = TK_STRING + state[COLLECT] = {} state.in_string = token - state.string_start = #state return "string" end assert("this shouldn't happen") diff --git a/parser.lua b/parser.lua index ece8a8f..ff8378c 100644 --- a/parser.lua +++ b/parser.lua @@ -25,6 +25,11 @@ local GEN = {} -- key for DATA OFFSET local OFFDATA = {} +local optimize_lookups = {} +for i=0, 255 do + optimize_lookups[i] = string.char(i) +end + local type, tostring = type, tostring @@ -81,7 +86,7 @@ end local function get_next_string(state, in_pos) if state[DATA] == nil or #state[DATA] == 0 then return in_pos, state end in_pos = in_pos + 1 - local token = state[DATA]:sub(in_pos - state[OFFDATA], in_pos - state[OFFDATA]) + local token = optimize_lookups[string.byte(state[DATA], in_pos - state[OFFDATA], in_pos - state[OFFDATA])] or "" if token == "" then state[OFFDATA] = in_pos - 1 state[DATA] = state[GEN]() @@ -119,21 +124,25 @@ local function parse(defs, data) end end --- utility function that's quite common -local function selfify(t) - t.self = t - return t -end --- common hook -local function insert_fallback(state, token, rule) - if not rule then - state[#state+1] = token - end -end +-- not used by any of the above but useful for others + +local COLLECT = {} return { STATE = STATE, + COLLECT = COLLECT, stream = stream, parse = parse, - selfify = selfify, + -- common utility function + selfify = function(t) + t.self = t + return t + end, + -- common hook + collect_fallback = function(state, token, rule) + if not rule then + local t = state[COLLECT] + t[#t+1] = token + end + end, } diff --git a/test.lua b/test.lua index 1290c97..076d1e8 100644 --- a/test.lua +++ b/test.lua @@ -100,6 +100,12 @@ do -- more lua tokens else for i,v in ipairs(state) do print(case, i, v) + if v == luatokens.TK_STRING then + in_string = true + elseif in_string then + print(case, v:gsub(".", function(v) return "\\"..string.byte(v) end)) + in_string = false + end end end print(case, "---- OUT TOKENS ----") |