summary refs log tree commit diff stats
path: root/src/cratera/parser.lua
diff options
context:
space:
mode:
Diffstat (limited to 'src/cratera/parser.lua')
-rw-r--r--src/cratera/parser.lua202
1 files changed, 202 insertions, 0 deletions
diff --git a/src/cratera/parser.lua b/src/cratera/parser.lua
new file mode 100644
index 0000000..ade568c
--- /dev/null
+++ b/src/cratera/parser.lua
@@ -0,0 +1,202 @@
+--[[
+    This file is part of Cratera Compiler
+    Copyright (C) 2019  Soni L.
+
+    This program is free software: you can redistribute it and/or modify
+    it under the terms of the GNU Affero General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU Affero General Public License for more details.
+
+    You should have received a copy of the GNU Affero General Public License
+    along with this program.  If not, see <https://www.gnu.org/licenses/>.
+--]]
+
+local function ts(self) return getmetatable(self).__name end
+
+-- key for STATE
+local STATE = setmetatable({}, {__name="STATE", __tostring=ts})
+-- key for DATA
+local DATA = setmetatable({}, {__name="DATA", __tostring=ts})
+-- key for GENERATOR
+local GEN = setmetatable({}, {__name="GEN", __tostring=ts})
+-- key for DATA OFFSET
+local OFFDATA = setmetatable({}, {__name="OFFDATA", __tostring=ts})
+-- key for End of Stream
+local EOZ = setmetatable({}, {__name="EOZ", __tostring=ts})
+-- key for number rules (prevent conflict with hooks)
+local NUMBER = setmetatable({}, {__name="NUMBER", __tostring=ts})
+-- key for fallback rules (prevent conflict with empty string)
+local FALLBACK = setmetatable({}, {__name="FALLBACK", __tostring=ts})
+
+local optimize_lookups = {}
+for i=0, 255 do
+    optimize_lookups[i] = string.char(i)
+end
+
+local type, tostring, string_byte
+    = type, tostring, string.byte
+
+local function get_next_common(state, in_pos, token)
+    -- note: must preserve "token" - do not call recursively with a different token
+    local transition, retry
+    local st = state[STATE]
+    if st then
+        local rule = st[token]
+        if not rule and token == EOZ then
+            return in_pos, state
+        end
+        if type(token) == "number" then
+            rule = st[NUMBER]
+        end
+        do -- pre-hooks
+            local pos = -1
+            local hook = st[pos]
+            while hook ~= nil do
+                if hook then
+                    hook(state, token, rule)
+                end
+                pos = pos - 1
+                hook = st[pos]
+            end
+        end
+        transition = rule
+        if transition == nil then
+            transition = st[FALLBACK]
+        end
+        local recheck = true
+        while recheck do
+            recheck = false
+            local tytrans = type(transition)
+            if tytrans == "string" then
+                transition = st[transition]
+                recheck = true
+            elseif tytrans == "function" then
+                transition, retry = transition(state, token)
+                recheck = true
+            elseif tytrans == "table" and st[transition] ~= nil then
+                transition = st[transition]
+                recheck = true
+            end
+        end
+        do -- post-hooks CANNOT USE ipairs HERE BECAUSE Lua 5.1/5.2
+            local pos = 1
+            local hook = st[pos]
+            while hook ~= nil do
+                if hook then
+                    hook(state, token, rule)
+                end
+                pos = pos + 1
+                hook = st[pos]
+            end
+        end
+        state[STATE] = transition -- may be nil or false
+    end
+    -- must NOT use elseif here - the above may set state to nil or false!
+    if not state[STATE] then
+        -- unexpected token. stream consumer may attempt to recover,
+        -- but we do this mostly to differentiate it from "end of stream" condition.
+        return in_pos - 1, nil, "unexpected token", token, state, st
+    end
+    if retry then in_pos = in_pos - 1 end
+    return in_pos, state, transition -- TODO is this what we should be returning?
+end
+
+local function get_next_table(state, in_pos)
+    if state[DATA] == nil or #state[DATA] == 0 then
+        if state[STATE] == nil then
+            return in_pos, state
+        else
+            return get_next_common(state, in_pos, EOZ)
+        end
+    end
+    in_pos = in_pos + 1
+    local token = state[DATA][in_pos - state[OFFDATA]]
+    if token == nil then
+        state[OFFDATA] = in_pos - 1
+        state[DATA] = state[GEN]()
+        return get_next_table(state, state[OFFDATA])
+    end
+    return get_next_common(state, in_pos, token)
+end
+
+local function get_next_string(state, in_pos)
+    if state[DATA] == nil or #state[DATA] == 0 then
+        if state[STATE] == nil then
+            return in_pos, state
+        else
+            return get_next_common(state, in_pos, EOZ)
+        end
+    end
+    in_pos = in_pos + 1
+    local token = optimize_lookups[string_byte(state[DATA], in_pos - state[OFFDATA], in_pos - state[OFFDATA])]
+    if token == nil then
+        state[OFFDATA] = in_pos - 1
+        state[DATA] = state[GEN]()
+        return get_next_string(state, state[OFFDATA])
+    end
+    return get_next_common(state, in_pos, token)
+end
+
+local function stream(defs, data, state)
+    local state = state or {}
+    local fn
+    state[STATE] = defs
+    if type(data) == "function" then
+        state[DATA] = data()
+        state[GEN] = data
+    else
+        state[DATA] = data
+        state[GEN] = function() end
+    end
+    fn = type(state[DATA]) == "table" and get_next_table or get_next_string
+    state[OFFDATA] = 0
+    return fn, state, state[OFFDATA]
+end
+
+local function parse(defs, data, state)
+    for pos, state, transemsg, etoken, estate in stream(defs, data, state) do
+        if not state then
+            -- parse error
+            return nil, transemsg, etoken, estate
+        elseif not transemsg then
+            -- parse success (maybe) - caller needs to check state[STATE] against what it considers a successful state
+            return state
+        end
+    end
+end
+
+-- not used by any of the above but useful for others
+
+local COLLECT = {}
+
+return {
+    STATE = STATE,
+    DATA = DATA,
+    COLLECT = COLLECT,
+    EOZ = EOZ,
+    FALLBACK = FALLBACK,
+    NUMBER = NUMBER,
+    stream = stream,
+    parse = parse,
+    -- common utility function
+    selfify = function(t, id)
+        t[id or "self"] = t
+        return t
+    end,
+    -- common hook
+    collect_fallback = function(state, token, rule)
+        if not rule then
+            local t = state[COLLECT]
+            t[#t+1] = token
+            if t.coalesce and #t >= t.coalesce then
+                t[1] = table.concat(t)
+                for i=2, #t do t[i] = nil end
+            end
+        end
+    end,
+}