summary refs log tree commit diff stats
path: root/luatokens.lua
diff options
context:
space:
mode:
Diffstat (limited to 'luatokens.lua')
-rw-r--r--luatokens.lua117
1 files changed, 97 insertions, 20 deletions
diff --git a/luatokens.lua b/luatokens.lua
index 28944c1..58a7d09 100644
--- a/luatokens.lua
+++ b/luatokens.lua
@@ -1,4 +1,4 @@
--- Lua tokens
+-- Lua defs
 
 -- we need some stuff from here
 local parser = require "parser"
@@ -8,11 +8,27 @@ local COLLECT = parser.COLLECT
 local collect_fallback = parser.collect_fallback
 
 -- "dummies"
-local TK_STRING = {}
+-- see http://www.lua.org/source/5.3/llex.h.html#RESERVED
+local TK_AND, TK_BREAK,
+    TK_DO, TK_ELSE, TK_ELSEIF, TK_END, TK_FALSE, TK_FOR, TK_FUNCTION,
+    TK_GOTO, TK_IF, TK_IN, TK_LOCAL, TK_NIL, TK_NOT, TK_OR, TK_REPEAT,
+    TK_RETURN, TK_THEN, TK_TRUE, TK_UNTIL, TK_WHILE,
+    TK_IDIV, TK_CONCAT, TK_DOTS, TK_EQ, TK_GE, TK_LE, TK_NE,
+    TK_SHL, TK_SHR,
+    TK_DBCOLON, TK_EOS,
+    TK_FLT, TK_INT, TK_NAME, TK_STRING =
+    {}, {},
+    {}, {}, {}, {}, {}, {}, {},
+    {}, {}, {}, {}, {}, {}, {}, {},
+    {}, {}, {}, {}, {},
+    {}, {}, {}, {}, {}, {}, {},
+    {}, {},
+    {}, {},
+    {}, {}, {}, {}
 
-local tokens = {}
+local defs = {}
 
-tokens.base = {
+defs.base = {
     [" "] = "whitespace",
     ["\n"] = "newline",
     ["\r"] = "newline",
@@ -84,14 +100,15 @@ tokens.base = {
 }
 
 local function linecount(state, token, rule)
+    -- TODO fix
     if token == "\n" or token == "\r" then
         state.line = (state.line or 1) + 1
     end
 end
 
 do local tstring = selfify({})
-    tokens.string = tstring
-    tstring.tokens = tokens
+    defs.string = tstring
+    tstring.defs = defs
     do local tsescapes = setmetatable({
             ["'"] = "insertraw",
             ['"'] = "insertraw",
@@ -110,9 +127,9 @@ do local tstring = selfify({})
             ["\r"] = setmetatable({["\n"] = setmetatable({}, {__index=tstring})}, {__index=tstring}),
             [1] = linecount,
             [2] = function(state, token, rule) if token == "\r" or token == "\n" then collect_fallback(state, "\n") end end,
-        }, {__index = tokens.base})
-        tokens.string.escapes = tsescapes
-        tsescapes.string = tokens.string
+        }, {__index = defs.base})
+        defs.string.escapes = tsescapes
+        tsescapes.string = defs.string
 
         function tsescapes.insertraw(state, token)
             collect_fallback(state, token)
@@ -158,7 +175,7 @@ do local tstring = selfify({})
             end
         end
 
-        tsescapes.hex = setmetatable(selfify({string = tokens.string, digit = "hexdigit"}), {__index=tokens.base})
+        tsescapes.hex = setmetatable(selfify({string = defs.string, digit = "hexdigit"}), {__index=defs.base})
         function tsescapes.hex.hexdigit(state, token)
             local digit = string.find("123456789ABCDEF0123456789abcdef0", token, 1, true)
             assert(digit, "this should never be called for non-hex-digits")
@@ -174,14 +191,60 @@ do local tstring = selfify({})
             end
         end
 
+        do local tseunicode = {}
+            tseunicode["{"] = "hex"
+            do local tseuhex = setmetatable(selfify({digit = "hexdigit", string=tstring}), {__index=defs.base})
+                tseunicode.hex = tseuhex
+                function tseuhex.hexdigit(state, token)
+                    local digit = string.find("123456789ABCDEF0123456789abcdef0", token, 1, true)
+                    assert(digit, "this should never be called for non-hex-digits")
+                    state.in_hex = (state.in_hex or 0) * 16 + digit % 16
+                    if state.in_hex <= 2147483647 then
+                        return "self"
+                    end
+                end
+                tseuhex["}"] = function(state, token)
+                    local num = state.in_hex
+                    state.in_hex = nil
+                    if num < 128 then
+                        collect_fallback(state, string.char(num))
+                        return "string"
+                    end
+                    local bytes = ""
+                    while num > 63 do
+                        local v = num % 64
+                        bytes = string.char(128 + v) .. bytes -- yeah ik, not the most efficient
+                        num = (num - v) / 64
+                    end
+                    if num >= 2^6/(2^#bytes) then
+                        local v = num % 64
+                        bytes = string.char(128 + v) .. bytes
+                        num = (num - v) / 64
+                    end
+                    do
+                        local v = 0
+                        for i=1,#bytes do
+                            v = v + 128 / 2^i
+                        end
+                        v = v + num
+                        assert(v < 126)
+                        bytes = string.char(128 + v) .. bytes
+                    end
+                    collect_fallback(state, bytes)
+                    return "string"
+                end
+            end
+            tsescapes.unicode = tseunicode
+        end
+
         do local tseskipwhitespace = selfify({
-                string = tokens.string,
+                string = defs.string,
                 whitespace = "self",
                 [""] = "string",
                 [1] = collect_fallback,
                 [2] = linecount,
             })
-            local tbase = tokens.base
+            local tbase = defs.base
             local tbasemap = {whitespace = "whitespace", newline = "whitespace"}
             setmetatable(tseskipwhitespace, {__index = function(t, k) return tbasemap[tbase[k]] or tstring[k] end})
             tsescapes.skipwhitespace =  tseskipwhitespace
@@ -205,7 +268,7 @@ do local tstring = selfify({})
             state.in_string = nil
             state[#state+1] = table.concat(state[COLLECT])
             state[COLLECT] = nil
-            return "tokens"
+            return "defs"
         else
             collect_fallback(state, token)
             return "self"
@@ -213,13 +276,18 @@ do local tstring = selfify({})
     end
 end
 
-tokens["'"] = "string_open"
-tokens['"'] = "string_open"
-tokens[1] = linecount
+do local tlongstring = {}
+    -- TODO
+end
+
+defs["'"] = "string_open"
+defs['"'] = "string_open"
+defs["["] = "maybe_longstring"
+defs[1] = linecount
 
-setmetatable(tokens, {__index=whitespace})
+setmetatable(defs, {__index=whitespace})
 
-function tokens.string_open(state, token)
+function defs.string_open(state, token)
     if not state.in_string then
         state[#state+1] = TK_STRING
         state[COLLECT] = {}
@@ -230,6 +298,15 @@ function tokens.string_open(state, token)
 end
 
 return {
-    tokens = tokens,
-    TK_STRING = TK_STRING,
+    defs = defs,
+    tokens = {
+        TK_AND = TK_AND, TK_BREAK = TK_BREAK,
+        TK_DO = TK_DO, TK_ELSE = TK_ELSE, TK_ELSEIF = TK_ELSEIF, TK_END = TK_END, TK_FALSE = TK_FALSE, TK_FOR = TK_FOR, TK_FUNCTION = TK_FUNCTION,
+        TK_GOTO = TK_GOTO, TK_IF = TK_IF, TK_IN = TK_IN, TK_LOCAL = TK_LOCAL, TK_NIL = TK_NIL, TK_NOT = TK_NOT, TK_OR = TK_OR, TK_REPEAT = TK_REPEAT,
+        TK_RETURN = TK_RETURN, TK_THEN = TK_THEN, TK_TRUE = TK_TRUE, TK_UNTIL = TK_UNTIL, TK_WHILE = TK_WHILE,
+        TK_IDIV = TK_IDIV, TK_CONCAT = TK_CONCAT, TK_DOTS = TK_DOTS, TK_EQ = TK_EQ, TK_GE = TK_GE, TK_LE = TK_LE, TK_NE = TK_NE,
+        TK_SHL = TK_SHL, TK_SHR = TK_SHR,
+        TK_DBCOLON = TK_DBCOLON, TK_EOS = TK_EOS,
+        TK_FLT = TK_FLT, TK_INT = TK_INT, TK_NAME = TK_NAME, TK_STRING = TK_STRING
+    },
 }