summary refs log tree commit diff stats
path: root/cratera/parser.lua
blob: ade568ca9be07cb007d53bb73da1c9aa875dcd48 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
--[[
    This file is part of Cratera Compiler
    Copyright (C) 2019  Soni L.

    This program is free software: you can redistribute it and/or modify
    it under the terms of the GNU Affero General Public License as published by
    the Free Software Foundation, either version 3 of the License, or
    (at your option) any later version.

    This program is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU Affero General Public License for more details.

    You should have received a copy of the GNU Affero General Public License
    along with this program.  If not, see <https://www.gnu.org/licenses/>.
--]]

local function ts(self) return getmetatable(self).__name end

-- key for STATE
local STATE = setmetatable({}, {__name="STATE", __tostring=ts})
-- key for DATA
local DATA = setmetatable({}, {__name="DATA", __tostring=ts})
-- key for GENERATOR
local GEN = setmetatable({}, {__name="GEN", __tostring=ts})
-- key for DATA OFFSET
local OFFDATA = setmetatable({}, {__name="OFFDATA", __tostring=ts})
-- key for End of Stream
local EOZ = setmetatable({}, {__name="EOZ", __tostring=ts})
-- key for number rules (prevent conflict with hooks)
local NUMBER = setmetatable({}, {__name="NUMBER", __tostring=ts})
-- key for fallback rules (prevent conflict with empty string)
local FALLBACK = setmetatable({}, {__name="FALLBACK", __tostring=ts})

local optimize_lookups = {}
for i=0, 255 do
    optimize_lookups[i] = string.char(i)
end

local type, tostring, string_byte
    = type, tostring, string.byte

local function get_next_common(state, in_pos, token)
    -- note: must preserve "token" - do not call recursively with a different token
    local transition, retry
    local st = state[STATE]
    if st then
        local rule = st[token]
        if not rule and token == EOZ then
            return in_pos, state
        end
        if type(token) == "number" then
            rule = st[NUMBER]
        end
        do -- pre-hooks
            local pos = -1
            local hook = st[pos]
            while hook ~= nil do
                if hook then
                    hook(state, token, rule)
                end
                pos = pos - 1
                hook = st[pos]
            end
        end
        transition = rule
        if transition == nil then
            transition = st[FALLBACK]
        end
        local recheck = true
        while recheck do
            recheck = false
            local tytrans = type(transition)
            if tytrans == "string" then
                transition = st[transition]
                recheck = true
            elseif tytrans == "function" then
                transition, retry = transition(state, token)
                recheck = true
            elseif tytrans == "table" and st[transition] ~= nil then
                transition = st[transition]
                recheck = true
            end
        end
        do -- post-hooks CANNOT USE ipairs HERE BECAUSE Lua 5.1/5.2
            local pos = 1
            local hook = st[pos]
            while hook ~= nil do
                if hook then
                    hook(state, token, rule)
                end
                pos = pos + 1
                hook = st[pos]
            end
        end
        state[STATE] = transition -- may be nil or false
    end
    -- must NOT use elseif here - the above may set state to nil or false!
    if not state[STATE] then
        -- unexpected token. stream consumer may attempt to recover,
        -- but we do this mostly to differentiate it from "end of stream" condition.
        return in_pos - 1, nil, "unexpected token", token, state, st
    end
    if retry then in_pos = in_pos - 1 end
    return in_pos, state, transition -- TODO is this what we should be returning?
end

local function get_next_table(state, in_pos)
    if state[DATA] == nil or #state[DATA] == 0 then
        if state[STATE] == nil then
            return in_pos, state
        else
            return get_next_common(state, in_pos, EOZ)
        end
    end
    in_pos = in_pos + 1
    local token = state[DATA][in_pos - state[OFFDATA]]
    if token == nil then
        state[OFFDATA] = in_pos - 1
        state[DATA] = state[GEN]()
        return get_next_table(state, state[OFFDATA])
    end
    return get_next_common(state, in_pos, token)
end

local function get_next_string(state, in_pos)
    if state[DATA] == nil or #state[DATA] == 0 then
        if state[STATE] == nil then
            return in_pos, state
        else
            return get_next_common(state, in_pos, EOZ)
        end
    end
    in_pos = in_pos + 1
    local token = optimize_lookups[string_byte(state[DATA], in_pos - state[OFFDATA], in_pos - state[OFFDATA])]
    if token == nil then
        state[OFFDATA] = in_pos - 1
        state[DATA] = state[GEN]()
        return get_next_string(state, state[OFFDATA])
    end
    return get_next_common(state, in_pos, token)
end

local function stream(defs, data, state)
    local state = state or {}
    local fn
    state[STATE] = defs
    if type(data) == "function" then
        state[DATA] = data()
        state[GEN] = data
    else
        state[DATA] = data
        state[GEN] = function() end
    end
    fn = type(state[DATA]) == "table" and get_next_table or get_next_string
    state[OFFDATA] = 0
    return fn, state, state[OFFDATA]
end

local function parse(defs, data, state)
    for pos, state, transemsg, etoken, estate in stream(defs, data, state) do
        if not state then
            -- parse error
            return nil, transemsg, etoken, estate
        elseif not transemsg then
            -- parse success (maybe) - caller needs to check state[STATE] against what it considers a successful state
            return state
        end
    end
end

-- not used by any of the above but useful for others

local COLLECT = {}

return {
    STATE = STATE,
    DATA = DATA,
    COLLECT = COLLECT,
    EOZ = EOZ,
    FALLBACK = FALLBACK,
    NUMBER = NUMBER,
    stream = stream,
    parse = parse,
    -- common utility function
    selfify = function(t, id)
        t[id or "self"] = t
        return t
    end,
    -- common hook
    collect_fallback = function(state, token, rule)
        if not rule then
            local t = state[COLLECT]
            t[#t+1] = token
            if t.coalesce and #t >= t.coalesce then
                t[1] = table.concat(t)
                for i=2, #t do t[i] = nil end
            end
        end
    end,
}