summary refs log blame commit diff stats
path: root/src/cratera/parser.lua
blob: ade568ca9be07cb007d53bb73da1c9aa875dcd48 (plain) (tree)
1
2
    
                                         















                                                                               

                                                            
                
                                                               
               
                                                             
                    
                                                           
                      
                                                                   
                        




                                                                     
 




                                        

                                 


                                                                                   


                           
                              


                                         


                                       










                                            

                                 
                                     





                                            
                                           

                                             



                                                                    


                              








                                                                   


                                                        
       

                                                                          

                                                                                    
                                                                    
       
                                         



                                                                                 
                                                   




                                                      
       










                                                      






                                                      
                       
                                                                                                              
                        






                                                     

                                        

                       
                                    

                            


                                   
       
                                                                           



                                    

                                                                             









                                                                                                                      


                                                     
 

                  
                
                      
              

                        

                    
                              

                             






                                                   
                                                   


                                             

           
 
--[[
    This file is part of Cratera Compiler
    Copyright (C) 2019  Soni L.

    This program is free software: you can redistribute it and/or modify
    it under the terms of the GNU Affero General Public License as published by
    the Free Software Foundation, either version 3 of the License, or
    (at your option) any later version.

    This program is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU Affero General Public License for more details.

    You should have received a copy of the GNU Affero General Public License
    along with this program.  If not, see <https://www.gnu.org/licenses/>.
--]]

local function ts(self) return getmetatable(self).__name end

-- key for STATE
local STATE = setmetatable({}, {__name="STATE", __tostring=ts})
-- key for DATA
local DATA = setmetatable({}, {__name="DATA", __tostring=ts})
-- key for GENERATOR
local GEN = setmetatable({}, {__name="GEN", __tostring=ts})
-- key for DATA OFFSET
local OFFDATA = setmetatable({}, {__name="OFFDATA", __tostring=ts})
-- key for End of Stream
local EOZ = setmetatable({}, {__name="EOZ", __tostring=ts})
-- key for number rules (prevent conflict with hooks)
local NUMBER = setmetatable({}, {__name="NUMBER", __tostring=ts})
-- key for fallback rules (prevent conflict with empty string)
local FALLBACK = setmetatable({}, {__name="FALLBACK", __tostring=ts})

local optimize_lookups = {}
for i=0, 255 do
    optimize_lookups[i] = string.char(i)
end

local type, tostring, string_byte
    = type, tostring, string.byte

local function get_next_common(state, in_pos, token)
    -- note: must preserve "token" - do not call recursively with a different token
    local transition, retry
    local st = state[STATE]
    if st then
        local rule = st[token]
        if not rule and token == EOZ then
            return in_pos, state
        end
        if type(token) == "number" then
            rule = st[NUMBER]
        end
        do -- pre-hooks
            local pos = -1
            local hook = st[pos]
            while hook ~= nil do
                if hook then
                    hook(state, token, rule)
                end
                pos = pos - 1
                hook = st[pos]
            end
        end
        transition = rule
        if transition == nil then
            transition = st[FALLBACK]
        end
        local recheck = true
        while recheck do
            recheck = false
            local tytrans = type(transition)
            if tytrans == "string" then
                transition = st[transition]
                recheck = true
            elseif tytrans == "function" then
                transition, retry = transition(state, token)
                recheck = true
            elseif tytrans == "table" and st[transition] ~= nil then
                transition = st[transition]
                recheck = true
            end
        end
        do -- post-hooks CANNOT USE ipairs HERE BECAUSE Lua 5.1/5.2
            local pos = 1
            local hook = st[pos]
            while hook ~= nil do
                if hook then
                    hook(state, token, rule)
                end
                pos = pos + 1
                hook = st[pos]
            end
        end
        state[STATE] = transition -- may be nil or false
    end
    -- must NOT use elseif here - the above may set state to nil or false!
    if not state[STATE] then
        -- unexpected token. stream consumer may attempt to recover,
        -- but we do this mostly to differentiate it from "end of stream" condition.
        return in_pos - 1, nil, "unexpected token", token, state, st
    end
    if retry then in_pos = in_pos - 1 end
    return in_pos, state, transition -- TODO is this what we should be returning?
end

local function get_next_table(state, in_pos)
    if state[DATA] == nil or #state[DATA] == 0 then
        if state[STATE] == nil then
            return in_pos, state
        else
            return get_next_common(state, in_pos, EOZ)
        end
    end
    in_pos = in_pos + 1
    local token = state[DATA][in_pos - state[OFFDATA]]
    if token == nil then
        state[OFFDATA] = in_pos - 1
        state[DATA] = state[GEN]()
        return get_next_table(state, state[OFFDATA])
    end
    return get_next_common(state, in_pos, token)
end

local function get_next_string(state, in_pos)
    if state[DATA] == nil or #state[DATA] == 0 then
        if state[STATE] == nil then
            return in_pos, state
        else
            return get_next_common(state, in_pos, EOZ)
        end
    end
    in_pos = in_pos + 1
    local token = optimize_lookups[string_byte(state[DATA], in_pos - state[OFFDATA], in_pos - state[OFFDATA])]
    if token == nil then
        state[OFFDATA] = in_pos - 1
        state[DATA] = state[GEN]()
        return get_next_string(state, state[OFFDATA])
    end
    return get_next_common(state, in_pos, token)
end

local function stream(defs, data, state)
    local state = state or {}
    local fn
    state[STATE] = defs
    if type(data) == "function" then
        state[DATA] = data()
        state[GEN] = data
    else
        state[DATA] = data
        state[GEN] = function() end
    end
    fn = type(state[DATA]) == "table" and get_next_table or get_next_string
    state[OFFDATA] = 0
    return fn, state, state[OFFDATA]
end

local function parse(defs, data, state)
    for pos, state, transemsg, etoken, estate in stream(defs, data, state) do
        if not state then
            -- parse error
            return nil, transemsg, etoken, estate
        elseif not transemsg then
            -- parse success (maybe) - caller needs to check state[STATE] against what it considers a successful state
            return state
        end
    end
end

-- not used by any of the above but useful for others

local COLLECT = {}

return {
    STATE = STATE,
    DATA = DATA,
    COLLECT = COLLECT,
    EOZ = EOZ,
    FALLBACK = FALLBACK,
    NUMBER = NUMBER,
    stream = stream,
    parse = parse,
    -- common utility function
    selfify = function(t, id)
        t[id or "self"] = t
        return t
    end,
    -- common hook
    collect_fallback = function(state, token, rule)
        if not rule then
            local t = state[COLLECT]
            t[#t+1] = token
            if t.coalesce and #t >= t.coalesce then
                t[1] = table.concat(t)
                for i=2, #t do t[i] = nil end
            end
        end
    end,
}