Skip to content

Improve slice eval #446

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 27 commits into from
Aug 8, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
27 commits
Select commit Hold shift + click to select a range
98430bc
Improving step handling
Jul 23, 2025
24a26a8
Add fancy index test and improve slice handling for reductions
lshaw8317 Jul 23, 2025
896ccdf
Can't use cache for non-unit steps
lshaw8317 Jul 23, 2025
4b95955
Improving step handling
Jul 23, 2025
016ce34
Add fancy index test and improve slice handling for reductions
lshaw8317 Jul 23, 2025
c31cdac
Can't use cache for non-unit steps
lshaw8317 Jul 23, 2025
e2503e3
Give a try to latest pyodide 0.28.0
FrancescAlted Jul 25, 2025
f38a28d
Use better a restriction in numpy version
FrancescAlted Jul 25, 2025
4eecb02
No way; for now we can just use numpy 2.02 for pyodide
FrancescAlted Jul 25, 2025
62d46f9
Remove disk arrays in tests
Aug 4, 2025
1c73924
New commit to use new version of pyodide
Aug 4, 2025
98c3693
Apply pre-commit fixes
pre-commit-ci[bot] Aug 4, 2025
369f20a
Force pyodide version
Aug 5, 2025
22bd5e3
Force python 3.13
Aug 5, 2025
bac2309
Revert "Force pyodide version"
Aug 5, 2025
28d5e41
Revert "Force python 3.13"
Aug 5, 2025
b7b2e21
Force pyodide version
Aug 5, 2025
412dff0
Force numpy version
Aug 5, 2025
14f7d67
Only build WASM wheels for python 3.13, force pyodide version
Aug 6, 2025
ecb87f0
Only build WASM wheels for python 3.13, force pyodide version
Aug 6, 2025
6128a38
Force contiguous when an evaluation does not happen and handle specia…
Aug 6, 2025
c3af99b
Correct error with building py 3.11, 3.12 wasm wheels
Aug 6, 2025
5a1de4c
Simplify logic a little
Aug 6, 2025
80b7b24
Edit to ensure slice_fast_path doesn't load too-large array
Aug 8, 2025
83f88da
Merge
lshaw8317 Aug 8, 2025
1f9c10d
Add bench
lshaw8317 Aug 8, 2025
db2d38f
Fixed failing test
lshaw8317 Aug 8, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/wasm.yml
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ jobs:
run: pip install cibuildwheel

- name: Build wheels
# Testing is automaticall made by cibuildwheel
# Testing is automatically made by cibuildwheel
run: cibuildwheel --platform pyodide

- name: Upload wheels
Expand Down
49 changes: 49 additions & 0 deletions bench/ndarray/slice-expr-step.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
#######################################################################
# Copyright (c) 2019-present, Blosc Development Team <[email protected]>
# All rights reserved.
#
# This source code is licensed under a BSD-style license (found in the
# LICENSE file in the root directory of this source tree)
#######################################################################

# Benchmark for computing a slice with non-unit steps of a expression in a ND array.

import blosc2
import numpy as np
import matplotlib.pyplot as plt
from memory_profiler import profile, memory_usage

N = 50_000
LARGE_SLICE = False
ndim = 2
shape = (N, ) * ndim
a = blosc2.linspace(start=0, stop=np.prod(shape), num=np.prod(shape), dtype=np.float64, shape=shape)
_slice = (slice(0, N, 2),) if LARGE_SLICE else (slice(0, N, N//4),)
expr = 2 * a ** 2

@profile
def _slice_():
res1 = expr.slice(_slice)
print(f'Result of slice occupies {res1.schunk.cbytes / 1024**2:.2f} MiB')
return res1

@profile
def _gitem():
res2 = expr[_slice]
print(f'Result of _getitem_ occupies {np.prod(res2.shape) * res2.itemsize / 1024**2:.2f} MiB')
return res2

interval = 0.001
offset = 0
for f in [_slice_, _gitem]:
mem = memory_usage((f,), interval=interval)
times = offset + interval * np.arange(len(mem))
offset = times[-1]
plt.plot(times, mem)

plt.xlabel('Time (s)')
plt.ylabel('Memory usage (MiB)')
lab = 'LARGE' if LARGE_SLICE else 'SMALL'
plt.title(f'{lab} slice w/steps, Linux Blosc2 {blosc2.__version__}')
plt.legend([f'expr.slice({_slice}', f'expr[{_slice}]'])
plt.savefig(f'sliceexpr_{lab}_Blosc{blosc2.__version__.replace('.','_')}.png', format="png")
2 changes: 1 addition & 1 deletion doc/getting_started/tutorials/04.reductions.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -308,7 +308,7 @@
"source": [
"# Manual chunking\n",
"chunks = (200, 200, 100)\n",
"#blocks = (100, 50, 50) # optional, but can help performance\n",
"# blocks = (100, 50, 50) # optional, but can help performance\n",
"meas = measure_blosc2(chunks, blocks=None)\n",
"plot_meas(meas_np, meas, chunks)"
]
Expand Down
111 changes: 62 additions & 49 deletions src/blosc2/lazyexpr.py
Original file line number Diff line number Diff line change
Expand Up @@ -1412,26 +1412,19 @@ def slices_eval( # noqa: C901
# keep orig_slice
_slice = _slice.raw
orig_slice = _slice
full_slice = () # by default the full_slice is the whole array
final_slice = () # by default the final_slice is the whole array

# Compute the shape and chunks of the output array, including broadcasting
shape = compute_broadcast_shape(operands.values())
if out is None:
if _slice != ():
# Check whether _slice contains an integer, or any step that are not None or 1
if any(
(isinstance(s, int)) or (isinstance(s, slice) and s.step not in (None, 1)) for s in _slice
):
if any((isinstance(s, int)) for s in _slice):
need_final_slice = True
_slice = tuple(slice(i, i + 1) if isinstance(i, int) else i for i in _slice)
full_slice = tuple(
slice(s.start or 0, s.stop or shape[i], 1) for i, s in enumerate(_slice)
) # get rid of non-unit steps
_slice = tuple(slice(i, i + 1, 1) if isinstance(i, int) else i for i in _slice)
# shape_slice in general not equal to final shape:
# dummy dims (due to ints) or non-unit steps will be dealt with by taking final_slice
shape_slice = ndindex.ndindex(full_slice).newshape(shape)
final_slice = ndindex.ndindex(orig_slice).as_subindex(full_slice).raw
# dummy dims (due to ints) will be dealt with by taking final_slice
shape_slice = ndindex.ndindex(_slice).newshape(shape)
mask_slice = np.bool([isinstance(i, int) for i in orig_slice])
else:
# # out should always have shape of full array
# if shape is not None and shape != out.shape:
Expand Down Expand Up @@ -1476,23 +1469,21 @@ def slices_eval( # noqa: C901
for nchunk, chunk_slice in enumerate(intersecting_chunks):
# get intersection of chunk and target
if _slice != ():
cslice = tuple(
slice(max(s1.start, s2.start), min(s1.stop, s2.stop))
for s1, s2 in zip(chunk_slice.raw, _slice, strict=True)
)
cslice = step_handler(chunk_slice.raw, _slice)
else:
cslice = chunk_slice.raw

cslice_shape = tuple(s.stop - s.start for s in cslice)
len_chunk = math.prod(cslice_shape)
# get local index of part of out that is to be updated
cslice_subidx = (
ndindex.ndindex(cslice).as_subindex(full_slice).raw
) # in the case full_slice=(), just gives cslice
ndindex.ndindex(cslice).as_subindex(_slice).raw
) # in the case _slice=(), just gives cslice

# Get the starts and stops for the slice
starts = [s.start if s.start is not None else 0 for s in cslice]
stops = [s.stop if s.stop is not None else sh for s, sh in zip(cslice, cslice_shape, strict=True)]
unit_steps = np.all([s.step == 1 for s in cslice])

# Get the slice of each operand
for key, value in operands.items():
Expand All @@ -1512,6 +1503,7 @@ def slices_eval( # noqa: C901
key in chunk_operands
and cslice_shape == chunk_operands[key].shape
and isinstance(value, blosc2.NDArray)
and unit_steps
):
value.get_slice_numpy(chunk_operands[key], (starts, stops))
continue
Expand Down Expand Up @@ -1565,6 +1557,9 @@ def slices_eval( # noqa: C901
result = x[result]
else:
raise ValueError("The where condition must be a tuple with one or two elements")
# Enforce contiguity of result (necessary to fill the out array)
# but avoid copy if already contiguous
result = np.require(result, requirements="C")

if out is None:
shape_ = shape_slice if shape_slice is not None else shape
Expand Down Expand Up @@ -1622,15 +1617,13 @@ def slices_eval( # noqa: C901
out.resize((lenout,))

else: # Need to take final_slice since filled up array according to slice_ for each chunk
if final_slice != ():
if need_final_slice: # only called if out was None
if isinstance(out, np.ndarray):
if need_final_slice: # only called if out was None
out = out[final_slice]
out = np.squeeze(out, np.where(mask_slice)[0])
elif isinstance(out, blosc2.NDArray):
# It *seems* better to choose an automatic chunks and blocks for the output array
# out = out.slice(_slice, chunks=out.chunks, blocks=out.blocks)
if need_final_slice: # only called if out was None
out = out.slice(final_slice)
out = out.squeeze(mask_slice)
else:
raise ValueError("The output array is not a NumPy array or a NDArray")

Expand Down Expand Up @@ -1743,17 +1736,22 @@ def infer_reduction_dtype(dtype, operation):
raise ValueError(f"Unsupported operation: {operation}")


def step_handler(s1start, s2start, s1stop, s2stop, s2step):
# assume s1step = 1
newstart = max(s1start, s2start)
newstop = min(s1stop, s2stop)
rem = (newstart - s2start) % s2step
if rem != 0: # only pass through here if s2step is not 1
newstart += s2step - rem
# true_stop = start + n*step + 1 -> stop = start + n * step + 1 + residual
# so n = (stop - start - 1) // step
newstop = newstart + (newstop - newstart - 1) // s2step * s2step + 1
return slice(newstart, newstop, s2step)
def step_handler(cslice, _slice):
out = ()
for s1, s2 in zip(cslice, _slice, strict=True):
s1start, s1stop = s1.start, s1.stop
s2start, s2stop, s2step = s2.start, s2.stop, s2.step
# assume s1step = 1
newstart = max(s1start, s2start)
newstop = min(s1stop, s2stop)
rem = (newstart - s2start) % s2step
if rem != 0: # only pass through here if s2step is not 1
newstart += s2step - rem
# true_stop = start + n*step + 1 -> stop = start + n * step + 1 + residual
# so n = (stop - start - 1) // step
newstop = newstart + (newstop - newstart - 1) // s2step * s2step + 1
out += (slice(newstart, newstop, s2step),)
return out


def reduce_slices( # noqa: C901
Expand Down Expand Up @@ -1807,21 +1805,27 @@ def reduce_slices( # noqa: C901

_slice = _slice.raw
shape_slice = shape
full_slice = () # by default the full_slice is the whole array
mask_slice = np.bool([isinstance(i, int) for i in _slice])
if out is None and _slice != ():
_slice = tuple(slice(i, i + 1, 1) if isinstance(i, int) else i for i in _slice)
shape_slice = ndindex.ndindex(_slice).newshape(shape)
full_slice = _slice
# shape_slice in general not equal to final shape:
# dummy dims (due to ints) will be dealt with by taking final_slice

# after slicing, we reduce to calculate shape of output
if axis is None:
axis = tuple(range(len(shape_slice)))
elif not isinstance(axis, tuple):
axis = (axis,)
axis = tuple(a if a >= 0 else a + len(shape_slice) for a in axis)
axis = np.array([a if a >= 0 else a + len(shape_slice) for a in axis])
if np.any(mask_slice):
axis = tuple(axis + np.cumsum(mask_slice)[axis]) # axis now refers to new shape with dummy dims
reduce_args["axis"] = axis
if keepdims:
reduced_shape = tuple(1 if i in axis else s for i, s in enumerate(shape_slice))
else:
reduced_shape = tuple(s for i, s in enumerate(shape_slice) if i not in axis)
mask_slice = mask_slice[[i for i in range(len(mask_slice)) if i not in axis]]

if out is not None and reduced_shape != out.shape:
raise ValueError("Provided output shape does not match the reduced shape.")
Expand Down Expand Up @@ -1876,13 +1880,10 @@ def reduce_slices( # noqa: C901
# Check whether current cslice intersects with _slice
if cslice != () and _slice != ():
# get intersection of chunk and target
cslice = tuple(
step_handler(s1.start, s2.start, s1.stop, s2.stop, s2.step)
for s1, s2 in zip(cslice, _slice, strict=True)
)
cslice = step_handler(cslice, _slice)
chunks_ = tuple(s.stop - s.start for s in cslice)

if _slice == () and fast_path:
unit_steps = np.all([s.step == 1 for s in cslice])
if _slice == () and fast_path and unit_steps:
# Fast path
full_chunk = chunks_ == chunks
fill_chunk_operands(
Expand Down Expand Up @@ -1910,15 +1911,14 @@ def reduce_slices( # noqa: C901
key in chunk_operands
and chunks_ == chunk_operands[key].shape
and isinstance(value, blosc2.NDArray)
and unit_steps
):
value.get_slice_numpy(chunk_operands[key], (starts, stops))
continue
chunk_operands[key] = value[cslice]

# get local index of part of out that is to be updated
cslice_subidx = (
ndindex.ndindex(cslice).as_subindex(full_slice).raw
) # if full_slice is (), just gives cslice
cslice_subidx = ndindex.ndindex(cslice).as_subindex(_slice).raw # if _slice is (), just gives cslice
if keepdims:
reduced_slice = tuple(slice(None) if i in axis else sl for i, sl in enumerate(cslice_subidx))
else:
Expand All @@ -1938,8 +1938,8 @@ def reduce_slices( # noqa: C901

if where is None:
if expression == "o0":
# We don't have an actual expression, so avoid a copy
result = chunk_operands["o0"]
# We don't have an actual expression, so avoid a copy except to make contiguous
result = np.require(chunk_operands["o0"], requirements="C")
else:
result = ne_evaluate(expression, chunk_operands, **ne_args)
else:
Expand Down Expand Up @@ -1997,6 +1997,9 @@ def reduce_slices( # noqa: C901
dtype = np.float64
out = convert_none_out(dtype, reduce_op, reduced_shape)

final_mask = tuple(np.where(mask_slice)[0])
if np.any(mask_slice): # remove dummy dims
out = np.squeeze(out, axis=final_mask)
# Check if the output array needs to be converted into a blosc2.NDArray
if kwargs != {} and not np.isscalar(out):
out = blosc2.asarray(out, **kwargs)
Expand Down Expand Up @@ -2089,7 +2092,9 @@ def chunked_eval( # noqa: C901
# The fast path is possible under a few conditions
if getitem and (where is None or len(where) == 2) and not callable(expression):
# Compute the size of operands for the fast path
shape_operands = item.newshape(shape) # shape of slice
unit_steps = np.all([s.step == 1 for s in item.raw if isinstance(s, slice)])
# shape of slice, if non-unit steps have to decompress full array into memory
shape_operands = item.newshape(shape) if unit_steps else shape
_dtype = kwargs.get("dtype", np.float64)
size_operands = math.prod(shape_operands) * len(operands) * _dtype.itemsize
# Only take the fast path if the size of operands is relatively small
Expand Down Expand Up @@ -3118,6 +3123,14 @@ def _new_expr(cls, expression, operands, guess, out=None, where=None, ne_args=No
new_expr.expression_tosave = expression
new_expr.operands = operands_
new_expr.operands_tosave = operands
elif isinstance(new_expr, blosc2.NDArray) and len(operands) == 1:
# passed either "a" or possible "a[:10]"
expression_, operands_ = conserve_functions(
_expression, _operands, {"o0": list(operands.values())[0]} | local_vars
)
new_expr = cls(None)
new_expr.expression = expression_
new_expr.operands = operands_
else:
# An immediate evaluation happened (e.g. all operands are numpy arrays)
new_expr = cls(None)
Expand Down
2 changes: 2 additions & 0 deletions tests/ndarray/test_concatenate.py
Original file line number Diff line number Diff line change
Expand Up @@ -99,3 +99,5 @@ def test_stack(shape, dtype, axis):
[ndarr1, ndarr2, ndarr3], axis=axis, cparams=cparams, urlpath="localfile.b2nd", mode="w"
)
np.testing.assert_almost_equal(result[:], nparray)
# Remove localfile
blosc2.remove_urlpath("localfile.b2nd")
30 changes: 23 additions & 7 deletions tests/ndarray/test_lazyexpr.py
Original file line number Diff line number Diff line change
Expand Up @@ -1088,7 +1088,7 @@ def test_eval_getitem2():
np.testing.assert_allclose(expr[0], nres[0])
np.testing.assert_allclose(expr[1:, :7], nres[1:, :7])
np.testing.assert_allclose(expr[0:10:2], nres[0:10:2])
# This works, but it is not very efficient since it relies on blosc2.ndarray.slice for non-unit steps
# Now relies on inefficient blosc2.ndarray.slice for non-unit steps but only per chunk (not for whole result)
np.testing.assert_allclose(expr.slice((slice(None, None, None), slice(0, 10, 2)))[:], nres[:, 0:10:2])

# Small test for broadcasting
Expand All @@ -1097,21 +1097,21 @@ def test_eval_getitem2():
np.testing.assert_allclose(expr[0], nres[0])
np.testing.assert_allclose(expr[1:, :7], nres[1:, :7])
np.testing.assert_allclose(expr[:, 0:10:2], nres[:, 0:10:2])
# This works, but it is not very efficient since it relies on blosc2.ndarray.slice for non-unit steps
# Now relies on inefficient blosc2.ndarray.slice for non-unit steps but only per chunk (not for whole result)
np.testing.assert_allclose(expr.slice((slice(None, None, None), slice(0, 10, 2)))[:], nres[:, 0:10:2])


# Test lazyexpr's slice method
def test_eval_slice(array_fixture):
a1, a2, a3, a4, na1, na2, na3, na4 = array_fixture
expr = blosc2.lazyexpr("a1 + a2 - (a3 * a4)", operands={"a1": a1, "a2": a2, "a3": a3, "a4": a4})
nres = ne_evaluate("na1 + na2 - (na3 * na4)")[:2]
res = expr.slice(slice(0, 2))
nres = ne_evaluate("na1 + na2 - (na3 * na4)")
res = expr.slice(slice(0, 8, 2))
assert isinstance(res, blosc2.ndarray.NDArray)
np.testing.assert_allclose(res[:], nres)
res = expr[:2]
np.testing.assert_allclose(res[:], nres[:8:2])
res = expr[:8:2]
assert isinstance(res, np.ndarray)
np.testing.assert_allclose(res, nres)
np.testing.assert_allclose(res, nres[:8:2])

# string lazy expressions automatically use .slice internally
expr1 = blosc2.lazyexpr("a1 * a2", operands={"a1": a1, "a2": a2})
Expand All @@ -1123,6 +1123,18 @@ def test_eval_slice(array_fixture):
np.testing.assert_allclose(res[()], nres)


def test_rebasing(array_fixture):
a1, a2, a3, a4, na1, na2, na3, na4 = array_fixture
expr = blosc2.lazyexpr("a1 + a2 - (a3 * a4)", operands={"a1": a1, "a2": a2, "a3": a3, "a4": a4})
assert expr.expression == "(o0 + o1 - o2 * o3)"

expr = blosc2.lazyexpr("a1")
assert expr.expression == "o0"

expr = blosc2.lazyexpr("a1[:10]")
assert expr.expression == "o0.slice((slice(None, 10, None),))"


# Test get_chunk method
@pytest.mark.heavy
def test_get_chunk(array_fixture):
Expand Down Expand Up @@ -1457,6 +1469,10 @@ def test_chain_persistentexpressions():
myle4 = blosc2.open("expr4.b2nd")
assert (myle4[:] == le4[:]).all()

# Remove files
for f in ["expr1.b2nd", "expr2.b2nd", "expr3.b2nd", "expr4.b2nd", "a.b2nd", "b.b2nd", "c.b2nd"]:
blosc2.remove_urlpath(f)


@pytest.mark.parametrize(
"values",
Expand Down
3 changes: 3 additions & 0 deletions tests/ndarray/test_lazyexpr_fields.py
Original file line number Diff line number Diff line change
Expand Up @@ -671,3 +671,6 @@ def test_fields_indexing():
gotitem = expr[0] # gives an error
np.testing.assert_array_equal(sliced[()], gotitem)
np.testing.assert_array_equal(gotitem, temp[0])

# Remove file
blosc2.remove_urlpath("sa-1M.b2nd")
Loading
Loading