1515log = logging .getLogger (__name__ )
1616
1717
18+ def byte_length (bit_length ):
19+ return (bit_length + 7 ) // 8
20+
21+
1822class InvertedLogicException (Exception ):
1923 def __init__ (self , * , depth , exclude_count , include_len ):
2024 self .message = (
@@ -54,12 +58,14 @@ def __init__(
5458 nHashFuncs ,
5559 level ,
5660 hashAlg = fileformats .HashAlgorithm .MURMUR3 ,
61+ hashOffset = 0 ,
5762 salt = None ,
5863 ):
5964 self .nHashFuncs = nHashFuncs
6065 self .size = size
6166 self .level = level
6267 self .hashAlg = fileformats .HashAlgorithm (hashAlg )
68+ self .hashOffset = hashOffset
6369 self .salt = salt
6470
6571 self .bitarray = bitarray .bitarray (self .size , endian = "little" )
@@ -99,6 +105,23 @@ def hash(self, *, hash_no, key):
99105 )
100106 return h
101107
108+ if self .hashAlg == fileformats .HashAlgorithm .SHA256CTR :
109+ b = []
110+ bytes_needed = byte_length (self .size .bit_length ())
111+ offset = self .hashOffset + hash_no * bytes_needed
112+ while len (b ) < bytes_needed :
113+ m = hashlib .sha256 ()
114+ m .update (fileformats .bloomer_sha256ctr_hash_struct .pack (offset // 32 ))
115+ m .update (self .salt )
116+ m .update (key )
117+ digest = m .digest ()
118+ i = offset % 32
119+ x = digest [i : i + bytes_needed - len (b )]
120+ b .extend (x )
121+ offset += len (x )
122+ h = int .from_bytes (b , byteorder = "little" , signed = False ) % self .size
123+ return h
124+
102125 raise Exception (f"Unknown hash algorithm: { self .hashAlg } " )
103126
104127 def add (self , key ):
@@ -136,13 +159,19 @@ def filter_with_characteristics(
136159 elements ,
137160 falsePositiveRate ,
138161 hashAlg = fileformats .HashAlgorithm .MURMUR3 ,
162+ hashOffset = 0 ,
139163 salt = None ,
140164 level = 1 ,
141165 ):
142166 nHashFuncs = Bloomer .calc_n_hashes (falsePositiveRate )
143167 size = Bloomer .calc_size (nHashFuncs , elements , falsePositiveRate )
144168 return Bloomer (
145- size = size , nHashFuncs = nHashFuncs , level = level , hashAlg = hashAlg , salt = salt
169+ size = size ,
170+ nHashFuncs = nHashFuncs ,
171+ level = level ,
172+ hashAlg = hashAlg ,
173+ hashOffset = hashOffset ,
174+ salt = salt ,
146175 )
147176
148177 @classmethod
@@ -161,7 +190,7 @@ def calc_size(cls, nHashFuncs, elements, falsePositiveRate):
161190 min_bits = math .ceil (1.44 * elements * math .log2 (1 / falsePositiveRate ))
162191 assert min_bits > 0 , "Always must have a positive number of bits"
163192 # Ensure the result is divisible by 8 for full bytes
164- return 8 * math . ceil (min_bits / 8 )
193+ return 8 * byte_length (min_bits )
165194
166195 @classmethod
167196 def from_buf (cls , buf , salt = None ):
@@ -206,10 +235,10 @@ def __init__(
206235 invertedLogic = None ,
207236 ):
208237 """
209- Construct a FilterCascade.
210- error_rates: If not supplied, defaults will be calculated
211- invertedLogic: If not supplied (or left as None), it will be auto-
212- detected.
238+ Construct a FilterCascade.
239+ error_rates: If not supplied, defaults will be calculated
240+ invertedLogic: If not supplied (or left as None), it will be auto-
241+ detected.
213242 """
214243 self .filters = filters or []
215244 self .growth_factor = growth_factor
@@ -250,10 +279,10 @@ def set_crlite_error_rates(self, *, include_len, exclude_len):
250279
251280 def initialize (self , * , include , exclude ):
252281 """
253- Arg "exclude" is potentially larger than main memory, so it should
254- be assumed to be passed as a lazy-loading iterator. If it isn't,
255- that's fine. The "include" arg must fit in memory and should be
256- assumed to be a set.
282+ Arg "exclude" is potentially larger than main memory, so it should
283+ be assumed to be passed as a lazy-loading iterator. If it isn't,
284+ that's fine. The "include" arg must fit in memory and should be
285+ assumed to be a set.
257286 """
258287 try :
259288 iter (exclude )
@@ -286,6 +315,13 @@ def initialize(self, *, include, exclude):
286315 er = self .error_rates [depth - 1 ]
287316
288317 if depth > len (self .filters ):
318+ if len (self .filters ) == 0 :
319+ hashOffset = 0
320+ else :
321+ prev = self .filters [- 1 ]
322+ hashOffset = prev .hashOffset + prev .nHashFuncs * byte_length (
323+ prev .size .bit_length ()
324+ )
289325 self .filters .append (
290326 Bloomer .filter_with_characteristics (
291327 elements = max (
@@ -296,10 +332,15 @@ def initialize(self, *, include, exclude):
296332 falsePositiveRate = er ,
297333 level = depth ,
298334 hashAlg = self .defaultHashAlg ,
335+ hashOffset = hashOffset ,
299336 )
300337 )
301338 else :
302339 # Filter already created for this layer. Check size and resize if needed.
340+ prev = self .filters [depth - 1 ]
341+ hashOffset = prev .hashOffset + prev .nHashFuncs * byte_length (
342+ prev .size .bit_length ()
343+ )
303344 required_size = Bloomer .calc_size (
304345 self .filters [depth - 1 ].nHashFuncs , include_len , er
305346 )
@@ -310,6 +351,7 @@ def initialize(self, *, include, exclude):
310351 falsePositiveRate = er ,
311352 level = depth ,
312353 hashAlg = self .defaultHashAlg ,
354+ hashOffset = hashOffset ,
313355 )
314356 log .info (
315357 f"Resized filter at { depth } -depth layer to { self .filters [depth - 1 ].size } "
0 commit comments