summary refs log tree commit diff stats
diff options
context:
space:
mode:
-rwxr-xr-xbuild.sh2
-rw-r--r--src/cratera.cratera.d/lib.cratera53
-rw-r--r--src/cratera.cratera.d/lib.cratera.d/bucket.cratera371
-rw-r--r--test/bucketmap-tests.cratera35
-rw-r--r--test/defaultbucket.cratera22
5 files changed, 483 insertions, 0 deletions
diff --git a/build.sh b/build.sh
index 717b2d1..5efef89 100755
--- a/build.sh
+++ b/build.sh
@@ -68,6 +68,8 @@ do_test() {
 		test_wrapper 'printf '\''_PROMPT=setmetatable({}, {__tostring=error})\n'\'' | out/bin/cratera -i 2>&1 | grep '\''overflow\|handling'\'
 		test_wrapper 'out/bin/cratera test/line-numbers.cratera'
 		test_wrapper 'out/bin/cratera test/strings.cratera'
+		test_wrapper 'out/bin/cratera test/defaultbucket.cratera'
+		test_wrapper 'out/bin/cratera test/bucketmap-tests.cratera'
 	) || exit 2
 }
 
diff --git a/src/cratera.cratera.d/lib.cratera b/src/cratera.cratera.d/lib.cratera
index 69ca476..7e04c83 100644
--- a/src/cratera.cratera.d/lib.cratera
+++ b/src/cratera.cratera.d/lib.cratera
@@ -16,12 +16,16 @@
     along with this program.  If not, see <https://www.gnu.org/licenses/>.
 --]]
 
+-- (post-)bootstrap bits
+
 local cratera_env = _G
+-- the "old" loader
 local cratera_loader_base = cratera_env.load
 
 -- only use setfenv if lua load doesn't support env argument
 local setfenv = (lua.loadstring or lua.load)("return setfenv", nil, nil, {})()
 
+-- replace the loader
 function cratera_env.load(chunk, chunkname, mode, env)
     if env == nil then
         env = cratera_env
@@ -41,3 +45,52 @@ end
 -- cratera opts not to support dostring/dofile.
 cratera_env.dostring = false
 cratera_env.dofile = false
+
+-- "proper" stdlib from now on
+
+local setmetatable = setmetatable
+
+-- traits aren't (currently) special, they're just unique tables that can be
+-- used as keys
+function cratera_env.mktrait()
+    return {}
+end
+
+-- all structs implement the Struct trait
+cratera_env.Struct = mktrait()
+-- the Struct trait has no members of its own but just points at the struct
+-- type itself.
+
+-- (should be upvalue, not global)
+local Struct = cratera_env.Struct
+-- structs are a little special
+function cratera_env.mkstruct(constructor, meta)
+    local struct = {}
+    struct[Struct] = struct
+    meta = meta or {}
+    meta.__index = struct
+    setmetatable(struct, {__call=function(...)
+        return setmetatable(constructor(...), meta)
+    end})
+    return struct
+end
+
+cratera_env.Iter = mktrait()
+
+function Iter:iter()
+    return [[
+Returns an iterator function, a state, and an initial value, for use in a
+regular generic `for` loop.
+
+# Examples
+
+```cratera
+local bucketlib = require "cratera.lib.bucket"
+
+local map = bucketlib.BucketMap()
+
+for k,v in map:[Iter].iter() do
+end
+```
+]]
+end
diff --git a/src/cratera.cratera.d/lib.cratera.d/bucket.cratera b/src/cratera.cratera.d/lib.cratera.d/bucket.cratera
new file mode 100644
index 0000000..8aa074b
--- /dev/null
+++ b/src/cratera.cratera.d/lib.cratera.d/bucket.cratera
@@ -0,0 +1,371 @@
+--[[
+    Cratera Library - (User-defined) Hash Map
+    Copyright (C) 2024  Soni L.
+
+    This program is free software: you can redistribute it and/or modify
+    it under the terms of the GNU Affero General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU Affero General Public License for more details.
+
+    You should have received a copy of the GNU Affero General Public License
+    along with this program.  If not, see <https://www.gnu.org/licenses/>.
+--]]
+
+-- it'll be funny if this implementation turns out to also be similar to Lua's
+-- own
+
+local error = error
+local Iter = Iter
+local mktrait = mktrait
+local mkstruct = mkstruct
+local Struct = Struct
+local frexp = math.frexp
+local tointeger = math.tointeger or function(x) return x end
+local sbyte = string.byte
+local ssub = string.sub
+-- helper function that preserves "integerness" of val
+-- val is expected to be an integer, lim is expected to be an integer power of two
+local imod = (lua.loadstring or lua.load) "local val, lim = ...; return val & (lim - 1)" or function(val, lim) return val % lim end
+local _G=_G
+local _ENV = nil
+frexp = frexp or function(n, mul)
+    mul = mul or 1
+    -- naive implementation, replace later
+    if n < 0 then return frexp(-n, -1) end
+    if n == 0 then return 0, 0 end
+    local e = 0
+    while n >= 1 do
+        n = n * 0.5
+        e = e + 1
+    end
+    while n < 0.5 do
+        n = n * 2
+        e = e - 1
+    end
+    return n*mul, e
+end
+
+-- this is the object-to-hash interface
+local Bucketable = mktrait()
+
+function Bucketable:bucket(bucket)
+    return [[
+Updates the bucket with the contents of this object.
+]]
+end
+
+-- this is the "hasher" interface
+local Bucket = mktrait()
+
+function Bucket:put_number(n)
+    return [[
+Puts the number in this bucket.
+]]
+end
+
+function Bucket:put_string(s)
+    return [[
+Puts the string in this bucket.
+]]
+end
+
+function Bucket:put_object(o)
+    return [[
+Puts the object, which must implement `Bucketable`, in this bucket.
+]]
+end
+
+function Bucket:finish()
+    return [[
+Returns an integral value, traditionally from 0 to 2147483647, representing the contents of this bucket.
+]]
+end
+
+local bitwidth
+do
+    local c = 2147483647
+    if c + 1 < c then
+        bitwidth = 30
+    end
+    if c + 1 == c then
+        bitwidth = 23
+    end
+    if not bitwidth then
+        c = 9223372036854775807
+        if c + 1 < c then
+            bitwidth = 62
+        end
+        if c + 1 == c then
+            bitwidth = 52
+        end
+    end
+    if not bitwidth then
+        error("unsupported")
+    end
+end
+local wraparound = tointeger(2^bitwidth)
+
+local DefaultBucket = mkstruct(function(_struct)
+    return {_state = 0}
+end)
+
+-- FIXME clean up once `function foo.bar:[baz].qux(...)` is supported by
+-- cratera compiler
+local DefaultBucketBucket = {}
+DefaultBucket[Bucket] = DefaultBucketBucket
+
+local function shift_state(state)
+    -- a simple way to do this is to do it like java does strings
+    -- N.B. unfortunately we might not have integer math and floats lose
+    -- precision on the low end, so we must split this out
+    -- x * 31 == x * 32 - x
+    local tmp = imod(state * 32, wraparound)
+    tmp = imod(tmp - state, wraparound)
+    state = tmp
+    return state
+end
+
+function DefaultBucketBucket:put_string(s)
+    self:[Bucket].put_number(#s)
+    local state = self._state
+    for i=1,#s do
+        state = shift_state(state)
+        -- conveniently we do not need imod() here
+        state = state + sbyte(ssub(s,i,i))
+    end
+    self._state = state
+end
+
+function DefaultBucketBucket:put_number(n)
+    local state = self._state
+    state = shift_state(state)
+    if n == 0 then
+        return
+    end
+    local sign = 0
+    if n < 0 then
+        if n == -n then
+            state = imod(state + (wraparound - 1), wraparound)
+            self._state = state
+            return
+        end
+        n = -n
+        sign = 1
+    end
+    if n % 1 == 0 and n <= (2*wraparound-1) then
+        n = tointeger(n) -- ensure it is an integer
+        n = imod(n, wraparound) -- ensure it fits
+        state = imod(state + n, wraparound)
+        self._state = state
+        return
+    end
+    local m, e = frexp(n)
+    _G.print(m, e)
+    m = 2 * m - 1 -- within [0, 1), contains 52/23 bits
+    _G.print(m * wraparound, e)
+    m = tointeger(m * wraparound) -- within [0, wraparound)
+    e = (e - 1) * 2 + sign -- significantly smaller than wraparound
+    _G.print(m, e)
+    local tmp = imod(e*2 + m, wraparound)
+    state = imod(state + tmp, wraparound)
+    self._state = state
+end
+
+function DefaultBucketBucket:put_object(o)
+    o:[Bucketable].bucket(self)
+end
+
+function DefaultBucketBucket:finish()
+    return self._state
+end
+
+-- this is the actual hashmap
+local BucketingMap = mkstruct(function(_struct, bucket_factory)
+    return {
+        _bucket_factory = bucket_factory or DefaultBucket,
+        _size = 0,
+        _capacity = 0,
+        _buckets = {},
+        -- can be changed at runtime
+        load_factor = 0.5,
+    }
+end, {
+    __eq=function(a, b)
+        -- are types equal?
+        if a[Struct] ~= b[Struct] then
+            return false
+        end
+        -- are elements equal?
+        for k, v in a:[Iter].iter() do
+            if b:get(k) ~= v then
+                return false
+            end
+        end
+        return true
+    end
+})
+
+do local BucketingMapIter = {}
+    BucketingMap[Iter] = BucketingMapIter
+    local function iterator(state)
+        local bucket_pos = state[3] + 1
+        local buckets = state[1]
+        local bucket = buckets[bucket_pos]
+        while not bucket do
+            if bucket_pos > state[2] then
+                return nil
+            end
+            bucket_pos = bucket_pos + 1
+            bucket = buckets[bucket_pos]
+        end
+        return bucket[2], bucket[3]
+    end
+    function BucketingMapIter:iter()
+        return iterator, {self._buckets, self._capacity, 0}
+    end
+end
+
+-- finds the (possibly free) bucket for a given key
+local function find_bucket(buckets, capacity, hash, key)
+    local i = (hash % capacity) + 1
+    local overflow = false
+    while true do
+        local bucket = buckets[i]
+        if not bucket then
+            return i
+        end
+        if bucket[1] == hash and bucket[2] == key then
+            return i
+        end
+        i = i + 1
+        if i > capacity then
+            if overflow then
+                return nil
+            end
+            overflow = true
+            i = 1
+        end
+    end
+end
+
+local function put(self, key, value, update_key)
+    local hasher = self._bucket_factory()
+    key:[Bucketable].bucket(hasher)
+    local hash = hasher:[Bucket].finish()
+    if self._size >= self._capacity * self.load_factor then
+        local old_capacity = self._capacity
+        local new_capacity = old_capacity == 0 and 1 or old_capacity * 2
+        local old_buckets = self._buckets
+        local new_buckets = {}
+        for i=1,new_capacity do
+            new_buckets[i] = false
+        end
+        for i=1,old_capacity do
+            local old_bucket = old_buckets[i]
+            if old_bucket then
+                local new_bucket_pos = find_bucket(new_buckets, new_capacity, old_bucket[1], old_bucket[2])
+                new_buckets[new_bucket_pos] = old_bucket
+            end
+        end
+        self._buckets = new_buckets
+        self._capacity = new_capacity
+    end
+    local buckets = self._buckets
+    local bucket_pos = find_bucket(buckets, self._capacity, hash, key)
+    if not bucket_pos then
+        error("no space for key")
+    end
+    local bucket = buckets[bucket_pos]
+    if not bucket then
+        self._size = self._size + 1
+        buckets[bucket_pos] = {hash, key, value}
+        return nil
+    else
+        local old_value = bucket[3]
+        bucket[3] = value
+        if update_key then
+            bucket[2] = key
+        end
+        return old_value
+    end
+end
+
+function BucketingMap:put(key, value)
+    put(self, key, value, false)
+end
+
+function BucketingMap:has_key(key)
+    local hasher = self._bucket_factory()
+    key:[Bucketable].bucket(hasher)
+    local hash = hasher:[Bucket].finish()
+    local buckets = self._buckets
+    local bucket_pos = find_bucket(buckets, self._capacity, hash, key)
+    local bucket = buckets[bucket_pos] -- buckets[nil] is always nil
+    if not bucket then
+        return false
+    else
+        return true
+    end
+end
+
+function BucketingMap:get(key, value)
+    local hasher = self._bucket_factory()
+    key:[Bucketable].bucket(hasher)
+    local hash = hasher:[Bucket].finish()
+    local buckets = self._buckets
+    local bucket_pos = find_bucket(buckets, self._capacity, hash, key)
+    local bucket = buckets[bucket_pos] -- buckets[nil] is always nil
+    if not bucket then
+        return nil
+    else
+        return bucket[3]
+    end
+end
+
+function BucketingMap:remove(key)
+    local hasher = self._bucket_factory()
+    key:[Bucketable].bucket(hasher)
+    local hash = hasher:[Bucket].finish()
+    local buckets = self._buckets
+    local bucket_pos = find_bucket(buckets, self._capacity, hash, key)
+    local bucket = buckets[bucket_pos]
+    if not bucket then
+        return nil
+    end
+    local value = bucket[3]
+    self._size = self._size + 1
+    -- just rehash lol
+    local capacity = self._capacity
+    local new_buckets = {}
+    for i=1,capacity do
+        new_buckets[i] = false
+    end
+    for i=1,bucket_pos-1 do
+        local old_bucket = buckets[i]
+        if old_bucket then
+            local new_bucket_pos = find_bucket(new_buckets, capacity, old_bucket[1], old_bucket[2])
+            new_buckets[new_bucket_pos] = old_bucket
+        end
+    end
+    for i=bucket_pos+1,capacity do
+        local old_bucket = buckets[i]
+        if old_bucket then
+            local new_bucket_pos = find_bucket(new_buckets, capacity, old_bucket[1], old_bucket[2])
+            new_buckets[new_bucket_pos] = old_bucket
+        end
+    end
+    self._buckets = new_buckets
+    return value
+end
+
+return {
+    BucketingMap = BucketingMap,
+    Bucket = Bucket,
+    Bucketable = Bucketable,
+    DefaultBucket = DefaultBucket,
+}
diff --git a/test/bucketmap-tests.cratera b/test/bucketmap-tests.cratera
new file mode 100644
index 0000000..b2e0aaa
--- /dev/null
+++ b/test/bucketmap-tests.cratera
@@ -0,0 +1,35 @@
+local bucketlib = require "cratera.lib.bucket"
+
+local Bucketable = bucketlib.Bucketable
+local Bucket = bucketlib.Bucket
+local Struct = Struct
+
+local Point = mkstruct(function(_struct, x, y)
+    return {x, y}
+end, {
+    __eq = function(a, b)
+        return a[Struct] == b[Struct] and a[1] == b[1] and a[2] == b[2]
+    end
+})
+
+Point[Bucketable] = {}
+-- FIXME we really need that 'function Point:[Bucketable].bucket(bucket)' syntax...
+local pointbucket = Point[Bucketable]
+
+function pointbucket:bucket(bucket)
+    bucket:[Bucket].put_number(self[1])
+    bucket:[Bucket].put_number(self[2])
+end
+
+local map = bucketlib.BucketingMap()
+
+map:put(Point(0, 1), "hello")
+map:put(Point(2, 3), "cratera")
+map:put(Point(4, 5), "world")
+
+assert(map._buckets[2])
+assert(map._buckets[3])
+assert(map._buckets[4])
+assert(map:get(Point(0, 1)) == "hello")
+assert(map:get(Point(2, 3)) == "cratera")
+assert(map:get(Point(4, 5)) == "world")
diff --git a/test/defaultbucket.cratera b/test/defaultbucket.cratera
new file mode 100644
index 0000000..fd7281e
--- /dev/null
+++ b/test/defaultbucket.cratera
@@ -0,0 +1,22 @@
+local bucketlib = require "cratera.lib.bucket"
+
+local Bucket = bucketlib.Bucket
+local DefaultBucket = bucketlib.DefaultBucket
+
+local function hash_n(n)
+    local bucket = DefaultBucket()
+    bucket:[Bucket].put_number(n)
+    return bucket:[Bucket].finish()
+end
+
+local function hash_s(n)
+    local bucket = DefaultBucket()
+    bucket:[Bucket].put_string(n)
+    return bucket:[Bucket].finish()
+end
+
+assert(hash_n(1) == 1)
+assert(hash_n(2) == 2)
+assert(hash_n(-1) == 1)
+assert(hash_s("") == 0)
+assert(hash_s("h") == string.byte("h")+31)