15
15
log = logging .getLogger (__name__ )
16
16
17
17
18
+ def byte_length (bit_length ):
19
+ return (bit_length + 7 ) // 8
20
+
21
+
18
22
class InvertedLogicException (Exception ):
19
23
def __init__ (self , * , depth , exclude_count , include_len ):
20
24
self .message = (
@@ -54,12 +58,14 @@ def __init__(
54
58
nHashFuncs ,
55
59
level ,
56
60
hashAlg = fileformats .HashAlgorithm .MURMUR3 ,
61
+ hashOffset = 0 ,
57
62
salt = None ,
58
63
):
59
64
self .nHashFuncs = nHashFuncs
60
65
self .size = size
61
66
self .level = level
62
67
self .hashAlg = fileformats .HashAlgorithm (hashAlg )
68
+ self .hashOffset = hashOffset
63
69
self .salt = salt
64
70
65
71
self .bitarray = bitarray .bitarray (self .size , endian = "little" )
@@ -91,14 +97,33 @@ def hash(self, *, hash_no, key):
91
97
m = hashlib .sha256 ()
92
98
if self .salt :
93
99
m .update (self .salt )
94
- m .update (fileformats .bloomer_sha256_hash_struct .pack (hash_no , self .level ))
100
+ m .update (
101
+ fileformats .bloomer_sha256_hash_struct .pack (hash_no , self .level )
102
+ )
95
103
m .update (key )
96
104
h = (
97
105
int .from_bytes (m .digest ()[:4 ], byteorder = "little" , signed = False )
98
106
% self .size
99
107
)
100
108
return h
101
109
110
+ if self .hashAlg == fileformats .HashAlgorithm .SHA256CTR :
111
+ b = []
112
+ bytes_needed = byte_length (self .size .bit_length ())
113
+ offset = self .hashOffset + hash_no * bytes_needed
114
+ while len (b ) < bytes_needed :
115
+ m = hashlib .sha256 ()
116
+ m .update (fileformats .bloomer_sha256ctr_hash_struct .pack (offset // 32 ))
117
+ m .update (self .salt )
118
+ m .update (key )
119
+ digest = m .digest ()
120
+ i = offset % 32
121
+ x = digest [i : i + bytes_needed - len (b )]
122
+ b .extend (x )
123
+ offset += len (x )
124
+ h = int .from_bytes (b , byteorder = "little" , signed = False ) % self .size
125
+ return h
126
+
102
127
raise Exception (f"Unknown hash algorithm: { self .hashAlg } " )
103
128
104
129
def add (self , key ):
@@ -136,13 +161,19 @@ def filter_with_characteristics(
136
161
elements ,
137
162
falsePositiveRate ,
138
163
hashAlg = fileformats .HashAlgorithm .MURMUR3 ,
164
+ hashOffset = 0 ,
139
165
salt = None ,
140
166
level = 1 ,
141
167
):
142
168
nHashFuncs = Bloomer .calc_n_hashes (falsePositiveRate )
143
169
size = Bloomer .calc_size (nHashFuncs , elements , falsePositiveRate )
144
170
return Bloomer (
145
- size = size , nHashFuncs = nHashFuncs , level = level , hashAlg = hashAlg , salt = salt
171
+ size = size ,
172
+ nHashFuncs = nHashFuncs ,
173
+ level = level ,
174
+ hashAlg = hashAlg ,
175
+ hashOffset = hashOffset ,
176
+ salt = salt ,
146
177
)
147
178
148
179
@classmethod
@@ -161,7 +192,7 @@ def calc_size(cls, nHashFuncs, elements, falsePositiveRate):
161
192
min_bits = math .ceil (1.44 * elements * math .log2 (1 / falsePositiveRate ))
162
193
assert min_bits > 0 , "Always must have a positive number of bits"
163
194
# Ensure the result is divisible by 8 for full bytes
164
- return 8 * math . ceil (min_bits / 8 )
195
+ return 8 * byte_length (min_bits )
165
196
166
197
@classmethod
167
198
def from_buf (cls , buf , salt = None ):
@@ -206,10 +237,10 @@ def __init__(
206
237
invertedLogic = None ,
207
238
):
208
239
"""
209
- Construct a FilterCascade.
210
- error_rates: If not supplied, defaults will be calculated
211
- invertedLogic: If not supplied (or left as None), it will be auto-
212
- detected.
240
+ Construct a FilterCascade.
241
+ error_rates: If not supplied, defaults will be calculated
242
+ invertedLogic: If not supplied (or left as None), it will be auto-
243
+ detected.
213
244
"""
214
245
self .filters = filters or []
215
246
self .growth_factor = growth_factor
@@ -250,10 +281,10 @@ def set_crlite_error_rates(self, *, include_len, exclude_len):
250
281
251
282
def initialize (self , * , include , exclude ):
252
283
"""
253
- Arg "exclude" is potentially larger than main memory, so it should
254
- be assumed to be passed as a lazy-loading iterator. If it isn't,
255
- that's fine. The "include" arg must fit in memory and should be
256
- assumed to be a set.
284
+ Arg "exclude" is potentially larger than main memory, so it should
285
+ be assumed to be passed as a lazy-loading iterator. If it isn't,
286
+ that's fine. The "include" arg must fit in memory and should be
287
+ assumed to be a set.
257
288
"""
258
289
try :
259
290
iter (exclude )
@@ -286,6 +317,13 @@ def initialize(self, *, include, exclude):
286
317
er = self .error_rates [depth - 1 ]
287
318
288
319
if depth > len (self .filters ):
320
+ if len (self .filters ) == 0 :
321
+ hashOffset = 0
322
+ else :
323
+ prev = self .filters [- 1 ]
324
+ hashOffset = prev .hashOffset + prev .nHashFuncs * byte_length (
325
+ prev .size .bit_length ()
326
+ )
289
327
self .filters .append (
290
328
Bloomer .filter_with_characteristics (
291
329
elements = max (
@@ -296,10 +334,15 @@ def initialize(self, *, include, exclude):
296
334
falsePositiveRate = er ,
297
335
level = depth ,
298
336
hashAlg = self .defaultHashAlg ,
337
+ hashOffset = hashOffset ,
299
338
)
300
339
)
301
340
else :
302
341
# Filter already created for this layer. Check size and resize if needed.
342
+ prev = self .filters [depth - 1 ]
343
+ hashOffset = prev .hashOffset + prev .nHashFuncs * byte_length (
344
+ prev .size .bit_length ()
345
+ )
303
346
required_size = Bloomer .calc_size (
304
347
self .filters [depth - 1 ].nHashFuncs , include_len , er
305
348
)
@@ -310,6 +353,7 @@ def initialize(self, *, include, exclude):
310
353
falsePositiveRate = er ,
311
354
level = depth ,
312
355
hashAlg = self .defaultHashAlg ,
356
+ hashOffset = hashOffset ,
313
357
)
314
358
log .info (
315
359
f"Resized filter at { depth } -depth layer to { self .filters [depth - 1 ].size } "
0 commit comments