summary refs log blame commit diff stats
path: root/parser.lua
blob: beb944ebcac8551287429f90ea954dd4b1837e02 (plain) (tree)


























                                                                               




                                        





                                                                                   





                                 





                                            
                                           





                                                     





                                                            
       

                                                                          





















                                                                                    
                                                                                                                    




































                                                                                                                      


                                                     
 

                  
                      

                    
                              

                             








                                                   
 
--[[
    parser.lua - table based parsing
    Copyright (C) 2019  Soni L.

    This program is free software: you can redistribute it and/or modify
    it under the terms of the GNU Affero General Public License as published by
    the Free Software Foundation, either version 3 of the License, or
    (at your option) any later version.

    This program is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU Affero General Public License for more details.

    You should have received a copy of the GNU Affero General Public License
    along with this program.  If not, see <https://www.gnu.org/licenses/>.
--]]

-- key for STATE
local STATE = {}
-- key for DATA
local DATA = {}
-- key for GENERATOR
local GEN = {}
-- key for DATA OFFSET
local OFFDATA = {}

local optimize_lookups = {}
for i=0, 255 do
    optimize_lookups[i] = string.char(i)
end

local type, tostring
    = type, tostring

local function get_next_common(state, in_pos, token)
    -- note: must preserve "token" - do not call recursively with a different token
    local transition
    if state[STATE] then
        local st = state[STATE]
        local rule = st[token]
        transition = rule
        if transition == nil then
            transition = st[""]
        end
        local recheck = true
        while recheck do
            recheck = false
            local tytrans = type(transition)
            if tytrans == "string" then
                transition = st[transition]
                recheck = true
            elseif tytrans == "function" then
                transition = transition(state, token)
                recheck = true
            end
        end
        for i, hook in ipairs(st) do
            if hook then -- allow overriding/disabling hooks
                hook(state, token, rule)
            end
        end
        state[STATE] = transition -- may be nil or false
    end
    -- must NOT use elseif here - the above may set state to nil or false!
    if not state[STATE] then
        -- unexpected token. stream consumer may attempt to recover,
        -- but we do this mostly to differentiate it from "end of stream" condition.
        return in_pos - 1, nil, "unexpected token", token, state
    end
    return in_pos, state, transition -- TODO is this what we should be returning?
end

local function get_next_table(state, in_pos)
    if state[DATA] == nil or #state[DATA] == 0 then return in_pos, state end
    in_pos = in_pos + 1
    local token = state[DATA][in_pos - state[OFFDATA]]
    if token == nil then
        state[OFFDATA] = in_pos - 1
        state[DATA] = state[GEN]()
        return get_next_table(state, state[OFFDATA])
    end
    return get_next_common(state, in_pos, token)
end

local function get_next_string(state, in_pos)
    if state[DATA] == nil or #state[DATA] == 0 then return in_pos, state end
    in_pos = in_pos + 1
    local token = optimize_lookups[string.byte(state[DATA], in_pos - state[OFFDATA], in_pos - state[OFFDATA])] or ""
    if token == "" then
        state[OFFDATA] = in_pos - 1
        state[DATA] = state[GEN]()
        return get_next_string(state, state[OFFDATA])
    end
    return get_next_common(state, in_pos, token)
end

local function stream(defs, data)
    local state = {}
    local fn
    state[STATE] = defs
    if type(data) == "string" then
        state[DATA] = data
        state[GEN] = function() end
        fn = get_next_string
    else
        state[DATA] = data()
        state[GEN] = data
        fn = type(state[DATA]) == "string" and get_next_string or get_next_table
    end
    state[OFFDATA] = 0
    return fn, state, state[OFFDATA]
end

local function parse(defs, data)
    for pos, state, transemsg, etoken, estate in stream(defs, data) do
        if not state then
            -- parse error
            return nil, transemsg, etoken, estate
        elseif not transemsg then
            -- parse success (maybe) - caller needs to check state[STATE] against what it considers a successful state
            return state
        end
    end
end

-- not used by any of the above but useful for others

local COLLECT = {}

return {
    STATE = STATE,
    COLLECT = COLLECT,
    stream = stream,
    parse = parse,
    -- common utility function
    selfify = function(t, id)
        t[id or "self"] = t
        return t
    end,
    -- common hook
    collect_fallback = function(state, token, rule)
        if not rule then
            local t = state[COLLECT]
            t[#t+1] = token
        end
    end,
}