1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
|
--[[
Cratera Library - (User-defined) Hash Map
Copyright (C) 2024 Soni L.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <https://www.gnu.org/licenses/>.
--]]
-- it'll be funny if this implementation turns out to also be similar to Lua's
-- own
local error = error
local Iter = Iter
local mktrait = mktrait
local mkstruct = mkstruct
local Struct = Struct
local frexp = math.frexp
local tointeger = math.tointeger or function(x) return x end
local sbyte = string.byte
local ssub = string.sub
-- helper function that preserves "integerness" of val
-- val is expected to be an integer, lim is expected to be an integer power of two
local imod = (lua.loadstring or lua.load) "local val, lim = ...; return val & (lim - 1)" or function(val, lim) return val % lim end
local _G=_G
local _ENV = nil
frexp = frexp or function(n, mul)
mul = mul or 1
-- naive implementation, replace later
if n < 0 then return frexp(-n, -1) end
if n == 0 then return 0, 0 end
local e = 0
while n >= 1 do
n = n * 0.5
e = e + 1
end
while n < 0.5 do
n = n * 2
e = e - 1
end
return n*mul, e
end
-- this is the object-to-hash interface
local Bucketable = mktrait()
function Bucketable:bucket(bucket)
return [[
Updates the bucket with the contents of this object.
]]
end
-- this is the "hasher" interface
local Bucket = mktrait()
function Bucket:put_number(n)
return [[
Puts the number in this bucket.
]]
end
function Bucket:put_string(s)
return [[
Puts the string in this bucket.
]]
end
function Bucket:put_object(o)
return [[
Puts the object, which must implement `Bucketable`, in this bucket.
]]
end
function Bucket:finish()
return [[
Returns an integral value, traditionally from 0 to 2147483647, representing the contents of this bucket.
]]
end
local bitwidth
do
local c = 2147483647
if c + 1 < c then
bitwidth = 30
end
if c + 1 == c then
bitwidth = 23
end
if not bitwidth then
c = 9223372036854775807
if c + 1 < c then
bitwidth = 62
end
if c + 1 == c then
bitwidth = 52
end
end
if not bitwidth then
error("unsupported")
end
end
local wraparound = tointeger(2^bitwidth)
local DefaultBucket = mkstruct(function(_struct)
return {_state = 0}
end)
-- FIXME clean up once `function foo.bar:[baz].qux(...)` is supported by
-- cratera compiler
local DefaultBucketBucket = {}
DefaultBucket[Bucket] = DefaultBucketBucket
local function shift_state(state)
-- a simple way to do this is to do it like java does strings
-- N.B. unfortunately we might not have integer math and floats lose
-- precision on the low end, so we must split this out
-- x * 31 == x * 32 - x
local tmp = imod(state * 32, wraparound)
tmp = imod(tmp - state, wraparound)
state = tmp
return state
end
function DefaultBucketBucket:put_string(s)
self:[Bucket].put_number(#s)
local state = self._state
for i=1,#s do
state = shift_state(state)
-- conveniently we do not need imod() here
state = state + sbyte(ssub(s,i,i))
end
self._state = state
end
function DefaultBucketBucket:put_number(n)
local state = self._state
state = shift_state(state)
if n == 0 then
return
end
local sign = 0
if n < 0 then
if n == -n then
state = imod(state + (wraparound - 1), wraparound)
self._state = state
return
end
n = -n
sign = 1
end
if n % 1 == 0 and n <= (2*wraparound-1) then
n = tointeger(n) -- ensure it is an integer
n = imod(n, wraparound) -- ensure it fits
state = imod(state + n, wraparound)
self._state = state
return
end
local m, e = frexp(n)
_G.print(m, e)
m = 2 * m - 1 -- within [0, 1), contains 52/23 bits
_G.print(m * wraparound, e)
m = tointeger(m * wraparound) -- within [0, wraparound)
e = (e - 1) * 2 + sign -- significantly smaller than wraparound
_G.print(m, e)
local tmp = imod(e*2 + m, wraparound)
state = imod(state + tmp, wraparound)
self._state = state
end
function DefaultBucketBucket:put_object(o)
o:[Bucketable].bucket(self)
end
function DefaultBucketBucket:finish()
return self._state
end
-- this is the actual hashmap
local BucketingMap = mkstruct(function(_struct, bucket_factory)
return {
_bucket_factory = bucket_factory or DefaultBucket,
_size = 0,
_capacity = 0,
_buckets = {},
-- can be changed at runtime
load_factor = 0.5,
}
end, {
__eq=function(a, b)
-- are types equal?
if a[Struct] ~= b[Struct] then
return false
end
-- are elements equal?
for k, v in a:[Iter].iter() do
if b:get(k) ~= v then
return false
end
end
return true
end
})
do local BucketingMapIter = {}
BucketingMap[Iter] = BucketingMapIter
local function iterator(state)
local bucket_pos = state[3] + 1
local buckets = state[1]
local bucket = buckets[bucket_pos]
while not bucket do
if bucket_pos > state[2] then
return nil
end
bucket_pos = bucket_pos + 1
bucket = buckets[bucket_pos]
end
return bucket[2], bucket[3]
end
function BucketingMapIter:iter()
return iterator, {self._buckets, self._capacity, 0}
end
end
-- finds the (possibly free) bucket for a given key
local function find_bucket(buckets, capacity, hash, key)
local i = (hash % capacity) + 1
local overflow = false
while true do
local bucket = buckets[i]
if not bucket then
return i
end
if bucket[1] == hash and bucket[2] == key then
return i
end
i = i + 1
if i > capacity then
if overflow then
return nil
end
overflow = true
i = 1
end
end
end
local function put(self, key, value, update_key)
local hasher = self._bucket_factory()
key:[Bucketable].bucket(hasher)
local hash = hasher:[Bucket].finish()
if self._size >= self._capacity * self.load_factor then
local old_capacity = self._capacity
local new_capacity = old_capacity == 0 and 1 or old_capacity * 2
local old_buckets = self._buckets
local new_buckets = {}
for i=1,new_capacity do
new_buckets[i] = false
end
for i=1,old_capacity do
local old_bucket = old_buckets[i]
if old_bucket then
local new_bucket_pos = find_bucket(new_buckets, new_capacity, old_bucket[1], old_bucket[2])
new_buckets[new_bucket_pos] = old_bucket
end
end
self._buckets = new_buckets
self._capacity = new_capacity
end
local buckets = self._buckets
local bucket_pos = find_bucket(buckets, self._capacity, hash, key)
if not bucket_pos then
error("no space for key")
end
local bucket = buckets[bucket_pos]
if not bucket then
self._size = self._size + 1
buckets[bucket_pos] = {hash, key, value}
return nil
else
local old_value = bucket[3]
bucket[3] = value
if update_key then
bucket[2] = key
end
return old_value
end
end
function BucketingMap:put(key, value)
put(self, key, value, false)
end
function BucketingMap:has_key(key)
local hasher = self._bucket_factory()
key:[Bucketable].bucket(hasher)
local hash = hasher:[Bucket].finish()
local buckets = self._buckets
local bucket_pos = find_bucket(buckets, self._capacity, hash, key)
local bucket = buckets[bucket_pos] -- buckets[nil] is always nil
if not bucket then
return false
else
return true
end
end
function BucketingMap:get(key, value)
local hasher = self._bucket_factory()
key:[Bucketable].bucket(hasher)
local hash = hasher:[Bucket].finish()
local buckets = self._buckets
local bucket_pos = find_bucket(buckets, self._capacity, hash, key)
local bucket = buckets[bucket_pos] -- buckets[nil] is always nil
if not bucket then
return nil
else
return bucket[3]
end
end
function BucketingMap:remove(key)
local hasher = self._bucket_factory()
key:[Bucketable].bucket(hasher)
local hash = hasher:[Bucket].finish()
local buckets = self._buckets
local bucket_pos = find_bucket(buckets, self._capacity, hash, key)
local bucket = buckets[bucket_pos]
if not bucket then
return nil
end
local value = bucket[3]
self._size = self._size + 1
-- just rehash lol
local capacity = self._capacity
local new_buckets = {}
for i=1,capacity do
new_buckets[i] = false
end
for i=1,bucket_pos-1 do
local old_bucket = buckets[i]
if old_bucket then
local new_bucket_pos = find_bucket(new_buckets, capacity, old_bucket[1], old_bucket[2])
new_buckets[new_bucket_pos] = old_bucket
end
end
for i=bucket_pos+1,capacity do
local old_bucket = buckets[i]
if old_bucket then
local new_bucket_pos = find_bucket(new_buckets, capacity, old_bucket[1], old_bucket[2])
new_buckets[new_bucket_pos] = old_bucket
end
end
self._buckets = new_buckets
return value
end
return {
BucketingMap = BucketingMap,
Bucket = Bucket,
Bucketable = Bucketable,
DefaultBucket = DefaultBucket,
}
|