--[[
parser.lua - table based parsing
Copyright (C) 2019 Soni L.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <https://www.gnu.org/licenses/>.
--]]
-- key for STATE
local STATE = {}
-- key for DATA
local DATA = {}
-- key for GENERATOR
local GEN = {}
-- key for DATA OFFSET
local OFFDATA = {}
local optimize_lookups = {}
for i=0, 255 do
optimize_lookups[i] = string.char(i)
end
local type, tostring
= type, tostring
local function get_next_common(state, in_pos, token)
-- note: must preserve "token" - do not call recursively with a different token
local transition
if state[STATE] then
local st = state[STATE]
local rule = st[token]
transition = rule
if transition == nil then
transition = st[""]
end
local recheck = true
while recheck do
recheck = false
local tytrans = type(transition)
if tytrans == "string" then
transition = st[transition]
recheck = true
elseif tytrans == "function" then
transition = transition(state, token)
recheck = true
end
end
for i, hook in ipairs(st) do
if hook then -- allow overriding/disabling hooks
hook(state, token, rule)
end
end
state[STATE] = transition -- may be nil or false
end
-- must NOT use elseif here - the above may set state to nil or false!
if not state[STATE] then
-- unexpected token. stream consumer may attempt to recover,
-- but we do this mostly to differentiate it from "end of stream" condition.
return in_pos - 1, nil, "unexpected token", token, state
end
return in_pos, state, transition -- TODO is this what we should be returning?
end
local function get_next_table(state, in_pos)
if state[DATA] == nil or #state[DATA] == 0 then return in_pos, state end
in_pos = in_pos + 1
local token = state[DATA][in_pos - state[OFFDATA]]
if token == nil then
state[OFFDATA] = in_pos - 1
state[DATA] = state[GEN]()
return get_next_table(state, state[OFFDATA])
end
return get_next_common(state, in_pos, token)
end
local function get_next_string(state, in_pos)
if state[DATA] == nil or #state[DATA] == 0 then return in_pos, state end
in_pos = in_pos + 1
local token = optimize_lookups[string.byte(state[DATA], in_pos - state[OFFDATA], in_pos - state[OFFDATA])] or ""
if token == "" then
state[OFFDATA] = in_pos - 1
state[DATA] = state[GEN]()
return get_next_string(state, state[OFFDATA])
end
return get_next_common(state, in_pos, token)
end
local function stream(defs, data)
local state = {}
local fn
state[STATE] = defs
if type(data) == "string" then
state[DATA] = data
state[GEN] = function() end
fn = get_next_string
else
state[DATA] = data()
state[GEN] = data
fn = type(state[DATA]) == "string" and get_next_string or get_next_table
end
state[OFFDATA] = 0
return fn, state, state[OFFDATA]
end
local function parse(defs, data)
for pos, state, transemsg, etoken, estate in stream(defs, data) do
if not state then
-- parse error
return nil, transemsg, etoken, estate
elseif not transemsg then
-- parse success (maybe) - caller needs to check state[STATE] against what it considers a successful state
return state
end
end
end
-- not used by any of the above but useful for others
local COLLECT = {}
return {
STATE = STATE,
COLLECT = COLLECT,
stream = stream,
parse = parse,
-- common utility function
selfify = function(t)
t.self = t
return t
end,
-- common hook
collect_fallback = function(state, token, rule)
if not rule then
local t = state[COLLECT]
t[#t+1] = token
end
end,
}