summary refs log tree commit diff stats
path: root/parser.lua
blob: 4f4e166ab430e4076139609634609a62918f3f38 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
--[[
    parser.lua - table based parsing
    Copyright (C) 2019  Soni L.

    This program is free software: you can redistribute it and/or modify
    it under the terms of the GNU Affero General Public License as published by
    the Free Software Foundation, either version 3 of the License, or
    (at your option) any later version.

    This program is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU Affero General Public License for more details.

    You should have received a copy of the GNU Affero General Public License
    along with this program.  If not, see <https://www.gnu.org/licenses/>.
--]]

-- key for STATE
local STATE = {}
-- key for DATA
local DATA = {}
-- key for GENERATOR
local GEN = {}
-- key for DATA OFFSET
local OFFDATA = {}
-- key for End of Stream
local EOZ = {}

local optimize_lookups = {}
for i=0, 255 do
    optimize_lookups[i] = string.char(i)
end

local type, tostring
    = type, tostring

local function get_next_common(state, in_pos, token)
    -- note: must preserve "token" - do not call recursively with a different token
    local transition
    if state[STATE] then
        local st = state[STATE]
        local rule = st[token]
        if not rule and token == EOZ then
            return in_pos, state
        end
        do -- pre-hooks
            local pos = -1
            local hook = st[pos]
            while hook ~= nil do
                if hook then
                    hook(state, token, rule)
                end
                pos = pos - 1
                hook = st[pos]
            end
        end
        transition = rule
        if transition == nil then
            transition = st[""]
        end
        local recheck = true
        while recheck do
            recheck = false
            local tytrans = type(transition)
            if tytrans == "string" then
                transition = st[transition]
                recheck = true
            elseif tytrans == "function" then
                transition = transition(state, token)
                recheck = true
            end
        end
        for i, hook in ipairs(st) do
            if hook then -- allow overriding/disabling hooks
                hook(state, token, rule)
            end
        end
        state[STATE] = transition -- may be nil or false
    end
    -- must NOT use elseif here - the above may set state to nil or false!
    if not state[STATE] then
        -- unexpected token. stream consumer may attempt to recover,
        -- but we do this mostly to differentiate it from "end of stream" condition.
        return in_pos - 1, nil, "unexpected token", token, state
    end
    return in_pos, state, transition -- TODO is this what we should be returning?
end

local function get_next_table(state, in_pos)
    if state[DATA] == nil or #state[DATA] == 0 then
        return get_next_common(state, in_pos, EOZ)
    end
    in_pos = in_pos + 1
    local token = state[DATA][in_pos - state[OFFDATA]]
    if token == nil then
        state[OFFDATA] = in_pos - 1
        state[DATA] = state[GEN]()
        return get_next_table(state, state[OFFDATA])
    end
    return get_next_common(state, in_pos, token)
end

local function get_next_string(state, in_pos)
    if state[DATA] == nil or #state[DATA] == 0 then
        if state[STATE] == nil then
            return in_pos, state
        else
            return get_next_common(state, in_pos, EOZ)
        end
    end
    in_pos = in_pos + 1
    local token = optimize_lookups[string.byte(state[DATA], in_pos - state[OFFDATA], in_pos - state[OFFDATA])]
    if token == nil then
        state[OFFDATA] = in_pos - 1
        state[DATA] = state[GEN]()
        return get_next_string(state, state[OFFDATA])
    end
    return get_next_common(state, in_pos, token)
end

local function stream(defs, data)
    local state = {}
    local fn
    state[STATE] = defs
    if type(data) == "string" then
        state[DATA] = data
        state[GEN] = function() end
        fn = get_next_string
    else
        state[DATA] = data()
        state[GEN] = data
        fn = type(state[DATA]) == "string" and get_next_string or get_next_table
    end
    state[OFFDATA] = 0
    return fn, state, state[OFFDATA]
end

local function parse(defs, data)
    for pos, state, transemsg, etoken, estate in stream(defs, data) do
        if not state then
            -- parse error
            return nil, transemsg, etoken, estate
        elseif not transemsg then
            -- parse success (maybe) - caller needs to check state[STATE] against what it considers a successful state
            return state
        end
    end
end

-- not used by any of the above but useful for others

local COLLECT = {}

return {
    STATE = STATE,
    COLLECT = COLLECT,
    EOZ = EOZ,
    stream = stream,
    parse = parse,
    -- common utility function
    selfify = function(t, id)
        t[id or "self"] = t
        return t
    end,
    -- common hook
    collect_fallback = function(state, token, rule)
        if not rule then
            local t = state[COLLECT]
            t[#t+1] = token
            if t.coalesce and #t >= t.coalesce then
                t[1] = table.concat(t)
                for i=2, #t do t[i] = nil end
            end
        end
    end,
}