summary refs log tree commit diff stats
path: root/src/cratera.cratera.d/lib.cratera.d/bucket.cratera
diff options
context:
space:
mode:
Diffstat (limited to 'src/cratera.cratera.d/lib.cratera.d/bucket.cratera')
-rw-r--r--src/cratera.cratera.d/lib.cratera.d/bucket.cratera371
1 files changed, 371 insertions, 0 deletions
diff --git a/src/cratera.cratera.d/lib.cratera.d/bucket.cratera b/src/cratera.cratera.d/lib.cratera.d/bucket.cratera
new file mode 100644
index 0000000..8aa074b
--- /dev/null
+++ b/src/cratera.cratera.d/lib.cratera.d/bucket.cratera
@@ -0,0 +1,371 @@
+--[[
+    Cratera Library - (User-defined) Hash Map
+    Copyright (C) 2024  Soni L.
+
+    This program is free software: you can redistribute it and/or modify
+    it under the terms of the GNU Affero General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU Affero General Public License for more details.
+
+    You should have received a copy of the GNU Affero General Public License
+    along with this program.  If not, see <https://www.gnu.org/licenses/>.
+--]]
+
+-- it'll be funny if this implementation turns out to also be similar to Lua's
+-- own
+
+local error = error
+local Iter = Iter
+local mktrait = mktrait
+local mkstruct = mkstruct
+local Struct = Struct
+local frexp = math.frexp
+local tointeger = math.tointeger or function(x) return x end
+local sbyte = string.byte
+local ssub = string.sub
+-- helper function that preserves "integerness" of val
+-- val is expected to be an integer, lim is expected to be an integer power of two
+local imod = (lua.loadstring or lua.load) "local val, lim = ...; return val & (lim - 1)" or function(val, lim) return val % lim end
+local _G=_G
+local _ENV = nil
+frexp = frexp or function(n, mul)
+    mul = mul or 1
+    -- naive implementation, replace later
+    if n < 0 then return frexp(-n, -1) end
+    if n == 0 then return 0, 0 end
+    local e = 0
+    while n >= 1 do
+        n = n * 0.5
+        e = e + 1
+    end
+    while n < 0.5 do
+        n = n * 2
+        e = e - 1
+    end
+    return n*mul, e
+end
+
+-- this is the object-to-hash interface
+local Bucketable = mktrait()
+
+function Bucketable:bucket(bucket)
+    return [[
+Updates the bucket with the contents of this object.
+]]
+end
+
+-- this is the "hasher" interface
+local Bucket = mktrait()
+
+function Bucket:put_number(n)
+    return [[
+Puts the number in this bucket.
+]]
+end
+
+function Bucket:put_string(s)
+    return [[
+Puts the string in this bucket.
+]]
+end
+
+function Bucket:put_object(o)
+    return [[
+Puts the object, which must implement `Bucketable`, in this bucket.
+]]
+end
+
+function Bucket:finish()
+    return [[
+Returns an integral value, traditionally from 0 to 2147483647, representing the contents of this bucket.
+]]
+end
+
+local bitwidth
+do
+    local c = 2147483647
+    if c + 1 < c then
+        bitwidth = 30
+    end
+    if c + 1 == c then
+        bitwidth = 23
+    end
+    if not bitwidth then
+        c = 9223372036854775807
+        if c + 1 < c then
+            bitwidth = 62
+        end
+        if c + 1 == c then
+            bitwidth = 52
+        end
+    end
+    if not bitwidth then
+        error("unsupported")
+    end
+end
+local wraparound = tointeger(2^bitwidth)
+
+local DefaultBucket = mkstruct(function(_struct)
+    return {_state = 0}
+end)
+
+-- FIXME clean up once `function foo.bar:[baz].qux(...)` is supported by
+-- cratera compiler
+local DefaultBucketBucket = {}
+DefaultBucket[Bucket] = DefaultBucketBucket
+
+local function shift_state(state)
+    -- a simple way to do this is to do it like java does strings
+    -- N.B. unfortunately we might not have integer math and floats lose
+    -- precision on the low end, so we must split this out
+    -- x * 31 == x * 32 - x
+    local tmp = imod(state * 32, wraparound)
+    tmp = imod(tmp - state, wraparound)
+    state = tmp
+    return state
+end
+
+function DefaultBucketBucket:put_string(s)
+    self:[Bucket].put_number(#s)
+    local state = self._state
+    for i=1,#s do
+        state = shift_state(state)
+        -- conveniently we do not need imod() here
+        state = state + sbyte(ssub(s,i,i))
+    end
+    self._state = state
+end
+
+function DefaultBucketBucket:put_number(n)
+    local state = self._state
+    state = shift_state(state)
+    if n == 0 then
+        return
+    end
+    local sign = 0
+    if n < 0 then
+        if n == -n then
+            state = imod(state + (wraparound - 1), wraparound)
+            self._state = state
+            return
+        end
+        n = -n
+        sign = 1
+    end
+    if n % 1 == 0 and n <= (2*wraparound-1) then
+        n = tointeger(n) -- ensure it is an integer
+        n = imod(n, wraparound) -- ensure it fits
+        state = imod(state + n, wraparound)
+        self._state = state
+        return
+    end
+    local m, e = frexp(n)
+    _G.print(m, e)
+    m = 2 * m - 1 -- within [0, 1), contains 52/23 bits
+    _G.print(m * wraparound, e)
+    m = tointeger(m * wraparound) -- within [0, wraparound)
+    e = (e - 1) * 2 + sign -- significantly smaller than wraparound
+    _G.print(m, e)
+    local tmp = imod(e*2 + m, wraparound)
+    state = imod(state + tmp, wraparound)
+    self._state = state
+end
+
+function DefaultBucketBucket:put_object(o)
+    o:[Bucketable].bucket(self)
+end
+
+function DefaultBucketBucket:finish()
+    return self._state
+end
+
+-- this is the actual hashmap
+local BucketingMap = mkstruct(function(_struct, bucket_factory)
+    return {
+        _bucket_factory = bucket_factory or DefaultBucket,
+        _size = 0,
+        _capacity = 0,
+        _buckets = {},
+        -- can be changed at runtime
+        load_factor = 0.5,
+    }
+end, {
+    __eq=function(a, b)
+        -- are types equal?
+        if a[Struct] ~= b[Struct] then
+            return false
+        end
+        -- are elements equal?
+        for k, v in a:[Iter].iter() do
+            if b:get(k) ~= v then
+                return false
+            end
+        end
+        return true
+    end
+})
+
+do local BucketingMapIter = {}
+    BucketingMap[Iter] = BucketingMapIter
+    local function iterator(state)
+        local bucket_pos = state[3] + 1
+        local buckets = state[1]
+        local bucket = buckets[bucket_pos]
+        while not bucket do
+            if bucket_pos > state[2] then
+                return nil
+            end
+            bucket_pos = bucket_pos + 1
+            bucket = buckets[bucket_pos]
+        end
+        return bucket[2], bucket[3]
+    end
+    function BucketingMapIter:iter()
+        return iterator, {self._buckets, self._capacity, 0}
+    end
+end
+
+-- finds the (possibly free) bucket for a given key
+local function find_bucket(buckets, capacity, hash, key)
+    local i = (hash % capacity) + 1
+    local overflow = false
+    while true do
+        local bucket = buckets[i]
+        if not bucket then
+            return i
+        end
+        if bucket[1] == hash and bucket[2] == key then
+            return i
+        end
+        i = i + 1
+        if i > capacity then
+            if overflow then
+                return nil
+            end
+            overflow = true
+            i = 1
+        end
+    end
+end
+
+local function put(self, key, value, update_key)
+    local hasher = self._bucket_factory()
+    key:[Bucketable].bucket(hasher)
+    local hash = hasher:[Bucket].finish()
+    if self._size >= self._capacity * self.load_factor then
+        local old_capacity = self._capacity
+        local new_capacity = old_capacity == 0 and 1 or old_capacity * 2
+        local old_buckets = self._buckets
+        local new_buckets = {}
+        for i=1,new_capacity do
+            new_buckets[i] = false
+        end
+        for i=1,old_capacity do
+            local old_bucket = old_buckets[i]
+            if old_bucket then
+                local new_bucket_pos = find_bucket(new_buckets, new_capacity, old_bucket[1], old_bucket[2])
+                new_buckets[new_bucket_pos] = old_bucket
+            end
+        end
+        self._buckets = new_buckets
+        self._capacity = new_capacity
+    end
+    local buckets = self._buckets
+    local bucket_pos = find_bucket(buckets, self._capacity, hash, key)
+    if not bucket_pos then
+        error("no space for key")
+    end
+    local bucket = buckets[bucket_pos]
+    if not bucket then
+        self._size = self._size + 1
+        buckets[bucket_pos] = {hash, key, value}
+        return nil
+    else
+        local old_value = bucket[3]
+        bucket[3] = value
+        if update_key then
+            bucket[2] = key
+        end
+        return old_value
+    end
+end
+
+function BucketingMap:put(key, value)
+    put(self, key, value, false)
+end
+
+function BucketingMap:has_key(key)
+    local hasher = self._bucket_factory()
+    key:[Bucketable].bucket(hasher)
+    local hash = hasher:[Bucket].finish()
+    local buckets = self._buckets
+    local bucket_pos = find_bucket(buckets, self._capacity, hash, key)
+    local bucket = buckets[bucket_pos] -- buckets[nil] is always nil
+    if not bucket then
+        return false
+    else
+        return true
+    end
+end
+
+function BucketingMap:get(key, value)
+    local hasher = self._bucket_factory()
+    key:[Bucketable].bucket(hasher)
+    local hash = hasher:[Bucket].finish()
+    local buckets = self._buckets
+    local bucket_pos = find_bucket(buckets, self._capacity, hash, key)
+    local bucket = buckets[bucket_pos] -- buckets[nil] is always nil
+    if not bucket then
+        return nil
+    else
+        return bucket[3]
+    end
+end
+
+function BucketingMap:remove(key)
+    local hasher = self._bucket_factory()
+    key:[Bucketable].bucket(hasher)
+    local hash = hasher:[Bucket].finish()
+    local buckets = self._buckets
+    local bucket_pos = find_bucket(buckets, self._capacity, hash, key)
+    local bucket = buckets[bucket_pos]
+    if not bucket then
+        return nil
+    end
+    local value = bucket[3]
+    self._size = self._size + 1
+    -- just rehash lol
+    local capacity = self._capacity
+    local new_buckets = {}
+    for i=1,capacity do
+        new_buckets[i] = false
+    end
+    for i=1,bucket_pos-1 do
+        local old_bucket = buckets[i]
+        if old_bucket then
+            local new_bucket_pos = find_bucket(new_buckets, capacity, old_bucket[1], old_bucket[2])
+            new_buckets[new_bucket_pos] = old_bucket
+        end
+    end
+    for i=bucket_pos+1,capacity do
+        local old_bucket = buckets[i]
+        if old_bucket then
+            local new_bucket_pos = find_bucket(new_buckets, capacity, old_bucket[1], old_bucket[2])
+            new_buckets[new_bucket_pos] = old_bucket
+        end
+    end
+    self._buckets = new_buckets
+    return value
+end
+
+return {
+    BucketingMap = BucketingMap,
+    Bucket = Bucket,
+    Bucketable = Bucketable,
+    DefaultBucket = DefaultBucket,
+}