Skip to content

Commit d3b0802

Browse files
committed
bugfix: cleaner and more correct ranges
1 parent 8087cbb commit d3b0802

File tree

2 files changed

+5
-16
lines changed

2 files changed

+5
-16
lines changed

v03_pipeline/lib/misc/clickhouse.py

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
import ast
22
import functools
33
import hashlib
4+
import math
45
import os
56
import time
67
from collections.abc import Callable
@@ -13,7 +14,6 @@
1314
from v03_pipeline.lib.core import DatasetType, ReferenceGenome
1415
from v03_pipeline.lib.core.environment import Env
1516
from v03_pipeline.lib.logger import get_logger
16-
from v03_pipeline.lib.misc.math import split_ranges
1717
from v03_pipeline.lib.misc.retry import retry
1818
from v03_pipeline.lib.paths import (
1919
new_entries_parquet_path,
@@ -877,7 +877,9 @@ def rebuild_gt_stats(
877877
SELECT max(key) FROM {table_name_builder.dst_table(ClickHouseTable.GT_STATS)}
878878
""",
879879
)[0][0]
880-
for range_start, range_end in split_ranges(max_key):
880+
step = math.ceil(max_key / 5)
881+
for range_start in range(0, max_key, step):
882+
range_end = min(range_start + step, max_key + 1)
881883
logged_query(
882884
f"""
883885
INSERT INTO {
@@ -886,7 +888,7 @@ def rebuild_gt_stats(
886888
{
887889
select_statement.replace(
888890
'GROUP BY project_guid',
889-
'WHERE key >= %(range_start)s AND key <= %(range_end)s GROUP BY project_guid',
891+
'WHERE key >= %(range_start)s AND key < %(range_end)s GROUP BY project_guid',
890892
)
891893
}
892894
""",

v03_pipeline/lib/misc/math.py

Lines changed: 0 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1,19 +1,6 @@
11
import math
22
from collections.abc import Generator
33

4-
5-
def split_ranges(max_value: int, n: int = 5) -> Generator[tuple[int, int], None, None]:
6-
if max_value < n:
7-
yield (0, max_value)
8-
return
9-
step = math.ceil(max_value / n)
10-
start, end = 0, step
11-
while start < max_value:
12-
yield (start, min(end, max_value))
13-
start += step
14-
end += step
15-
16-
174
def constrain(
185
number: int | float,
196
lower_bound: int | float,

0 commit comments

Comments
 (0)