summary refs log tree commit diff stats
path: root/parser.lua
diff options
context:
space:
mode:
Diffstat (limited to 'parser.lua')
-rw-r--r--parser.lua51
1 files changed, 33 insertions, 18 deletions
diff --git a/parser.lua b/parser.lua
index 7410571..34bfce2 100644
--- a/parser.lua
+++ b/parser.lua
@@ -16,34 +16,43 @@
     along with this program.  If not, see <https://www.gnu.org/licenses/>.
 --]]
 
+local function ts(self) return getmetatable(self).__name end
+
 -- key for STATE
-local STATE = {}
+local STATE = setmetatable({}, {__name="STATE", __tostring=ts})
 -- key for DATA
-local DATA = {}
+local DATA = setmetatable({}, {__name="DATA", __tostring=ts})
 -- key for GENERATOR
-local GEN = {}
+local GEN = setmetatable({}, {__name="GEN", __tostring=ts})
 -- key for DATA OFFSET
-local OFFDATA = {}
+local OFFDATA = setmetatable({}, {__name="OFFDATA", __tostring=ts})
 -- key for End of Stream
-local EOZ = {}
+local EOZ = setmetatable({}, {__name="EOZ", __tostring=ts})
+-- key for number rules (prevent conflict with hooks)
+local NUMBER = setmetatable({}, {__name="NUMBER", __tostring=ts})
+-- key for fallback rules (prevent conflict with empty string)
+local FALLBACK = setmetatable({}, {__name="FALLBACK", __tostring=ts})
 
 local optimize_lookups = {}
 for i=0, 255 do
     optimize_lookups[i] = string.char(i)
 end
 
-local type, tostring
-    = type, tostring
+local type, tostring, string_byte
+    = type, tostring, string.byte
 
 local function get_next_common(state, in_pos, token)
     -- note: must preserve "token" - do not call recursively with a different token
-    local transition
-    if state[STATE] then
-        local st = state[STATE]
+    local transition, retry
+    local st = state[STATE]
+    if st then
         local rule = st[token]
         if not rule and token == EOZ then
             return in_pos, state
         end
+        if type(token) == "number" then
+            rule = st[NUMBER]
+        end
         do -- pre-hooks
             local pos = -1
             local hook = st[pos]
@@ -57,7 +66,7 @@ local function get_next_common(state, in_pos, token)
         end
         transition = rule
         if transition == nil then
-            transition = st[""]
+            transition = st[FALLBACK]
         end
         local recheck = true
         while recheck do
@@ -67,7 +76,10 @@ local function get_next_common(state, in_pos, token)
                 transition = st[transition]
                 recheck = true
             elseif tytrans == "function" then
-                transition = transition(state, token)
+                transition, retry = transition(state, token)
+                recheck = true
+            elseif tytrans == "table" and st[transition] ~= nil then
+                transition = st[transition]
                 recheck = true
             end
         end
@@ -88,8 +100,9 @@ local function get_next_common(state, in_pos, token)
     if not state[STATE] then
         -- unexpected token. stream consumer may attempt to recover,
         -- but we do this mostly to differentiate it from "end of stream" condition.
-        return in_pos - 1, nil, "unexpected token", token, state
+        return in_pos - 1, nil, "unexpected token", token, state, st
     end
+    if retry then in_pos = in_pos - 1 end
     return in_pos, state, transition -- TODO is this what we should be returning?
 end
 
@@ -120,7 +133,7 @@ local function get_next_string(state, in_pos)
         end
     end
     in_pos = in_pos + 1
-    local token = optimize_lookups[string.byte(state[DATA], in_pos - state[OFFDATA], in_pos - state[OFFDATA])]
+    local token = optimize_lookups[string_byte(state[DATA], in_pos - state[OFFDATA], in_pos - state[OFFDATA])]
     if token == nil then
         state[OFFDATA] = in_pos - 1
         state[DATA] = state[GEN]()
@@ -129,8 +142,8 @@ local function get_next_string(state, in_pos)
     return get_next_common(state, in_pos, token)
 end
 
-local function stream(defs, data)
-    local state = {}
+local function stream(defs, data, state)
+    local state = state or {}
     local fn
     state[STATE] = defs
     if type(data) == "function" then
@@ -145,8 +158,8 @@ local function stream(defs, data)
     return fn, state, state[OFFDATA]
 end
 
-local function parse(defs, data)
-    for pos, state, transemsg, etoken, estate in stream(defs, data) do
+local function parse(defs, data, state)
+    for pos, state, transemsg, etoken, estate in stream(defs, data, state) do
         if not state then
             -- parse error
             return nil, transemsg, etoken, estate
@@ -165,6 +178,8 @@ return {
     STATE = STATE,
     COLLECT = COLLECT,
     EOZ = EOZ,
+    FALLBACK = FALLBACK,
+    NUMBER = NUMBER,
     stream = stream,
     parse = parse,
     -- common utility function