summary refs log tree commit diff stats
path: root/parser.lua
blob: 479d80aea18a8eebde93828a0a2fe20cca4d7ebc (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
--[[
    parser.lua - table based parsing
    Copyright (C) 2019  Soni L.

    This program is free software: you can redistribute it and/or modify
    it under the terms of the GNU Affero General Public License as published by
    the Free Software Foundation, either version 3 of the License, or
    (at your option) any later version.

    This program is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU Affero General Public License for more details.

    You should have received a copy of the GNU Affero General Public License
    along with this program.  If not, see <https://www.gnu.org/licenses/>.
--]]

-- key for STATE
local STATE = {}
-- key for DATA
local DATA = {}
-- key for GENERATOR
local GEN = {}
-- key for DATA OFFSET
local OFFDATA = {}

local type, tostring
    = type, tostring

local function get_next_common(state, in_pos, token)
    -- note: must preserve "token" - do not call recursively with a different token
    local transition
    if state[STATE] ~= nil then
        transition = state[STATE][token]
        if not transition then
            transition = state[STATE][""]
        end
        local recheck = true
        while recheck do
            recheck = false
            local tytrans = type(transition)
            if tytrans == "string" then
                transition = state[STATE][transition]
                recheck = true
            elseif tytrans == "function" then
                transition = transition(state, token)
                recheck = true
            end
        end
        state[STATE] = transition -- may be nil
    end
    -- must NOT use elseif here - the above may set state to nil!
    if state[STATE] == nil then
        -- unexpected token. stream consumer may attempt to recover,
        -- but we do this mostly to differentiate it from "end of stream" condition.
        return in_pos - 1, nil, "unexpected token", token, state
    end
    return in_pos, state, transition -- TODO is this what we should be returning?
end

local function get_next_table(state, in_pos)
    if state[DATA] == nil or #state[DATA] == 0 then return in_pos, state end
    in_pos = in_pos + 1
    local token = state[DATA][in_pos - state[OFFDATA]]
    if token == nil then
        state[OFFDATA] = in_pos - 1
        state[DATA] = state[GEN]()
        return get_next_table(state, state[OFFDATA])
    end
    return get_next_common(state, in_pos, token)
end

local function get_next_string(state, in_pos)
    if state[DATA] == nil or #state[DATA] == 0 then return in_pos, state end
    in_pos = in_pos + 1
    local token = state[DATA]:sub(in_pos - state[OFFDATA], in_pos - state[OFFDATA])
    if token == "" then
        state[OFFDATA] = in_pos - 1
        state[DATA] = state[GEN]()
        return get_next_string(state, state[OFFDATA])
    end
    return get_next_common(state, in_pos, token)
end

local function stream(defs, data)
    local state = {}
    local fn
    state[STATE] = defs
    if type(data) == "string" then
        state[DATA] = data
        state[GEN] = function() end
        fn = get_next_string
    else
        state[DATA] = data()
        state[GEN] = data
        fn = type(state[DATA]) == "string" and get_next_string or get_next_table
    end
    state[OFFDATA] = 0
    return fn, state, state[OFFDATA]
end

local function parse(defs, data)
    for pos, state, transemsg, etoken, estate in stream(defs, data) do
        if not state then
            -- parse error
            return nil, transemsg, etoken, estate
        elseif not transemsg then
            -- parse success (maybe) - caller needs to check state[STATE] against what it considers a successful state
            return state
        end
    end
end

return {
    STATE = STATE,
    stream = stream,
    parse = parse,
}