From 98430bc2bad04fd700a48476f12d5b1618874cee Mon Sep 17 00:00:00 2001 From: Luke Shaw Date: Wed, 23 Jul 2025 10:13:50 +0200 Subject: [PATCH 01/26] Improving step handling --- src/blosc2/lazyexpr.py | 35 ++++++++++++++------------------ tests/ndarray/test_reductions.py | 14 +++++++++++++ 2 files changed, 29 insertions(+), 20 deletions(-) diff --git a/src/blosc2/lazyexpr.py b/src/blosc2/lazyexpr.py index cf7e2976..0b258fa0 100644 --- a/src/blosc2/lazyexpr.py +++ b/src/blosc2/lazyexpr.py @@ -1404,26 +1404,19 @@ def slices_eval( # noqa: C901 # keep orig_slice _slice = _slice.raw orig_slice = _slice - full_slice = () # by default the full_slice is the whole array - final_slice = () # by default the final_slice is the whole array # Compute the shape and chunks of the output array, including broadcasting shape = compute_broadcast_shape(operands.values()) if out is None: if _slice != (): # Check whether _slice contains an integer, or any step that are not None or 1 - if any( - (isinstance(s, int)) or (isinstance(s, slice) and s.step not in (None, 1)) for s in _slice - ): + if any((isinstance(s, int)) for s in _slice): need_final_slice = True _slice = tuple(slice(i, i + 1) if isinstance(i, int) else i for i in _slice) - full_slice = tuple( - slice(s.start or 0, s.stop or shape[i], 1) for i, s in enumerate(_slice) - ) # get rid of non-unit steps # shape_slice in general not equal to final shape: - # dummy dims (due to ints) or non-unit steps will be dealt with by taking final_slice - shape_slice = ndindex.ndindex(full_slice).newshape(shape) - final_slice = ndindex.ndindex(orig_slice).as_subindex(full_slice).raw + # dummy dims (due to ints) will be dealt with by taking final_slice + shape_slice = ndindex.ndindex(_slice).newshape(shape) + mask_slice = np.bool([isinstance(i, int) for i in orig_slice]) else: # # out should always have shape of full array # if shape is not None and shape != out.shape: @@ -1469,7 +1462,7 @@ def slices_eval( # noqa: C901 # get intersection of chunk and target if _slice != (): cslice = tuple( - slice(max(s1.start, s2.start), min(s1.stop, s2.stop)) + step_handler(s1.start, s2.start, s1.stop, s2.stop, s2.step) for s1, s2 in zip(chunk_slice.raw, _slice, strict=True) ) else: @@ -1479,12 +1472,13 @@ def slices_eval( # noqa: C901 len_chunk = math.prod(cslice_shape) # get local index of part of out that is to be updated cslice_subidx = ( - ndindex.ndindex(cslice).as_subindex(full_slice).raw - ) # in the case full_slice=(), just gives cslice + ndindex.ndindex(cslice).as_subindex(_slice).raw + ) # in the case _slice=(), just gives cslice # Get the starts and stops for the slice starts = [s.start if s.start is not None else 0 for s in cslice] stops = [s.stop if s.stop is not None else sh for s, sh in zip(cslice, cslice_shape, strict=True)] + stepper = tuple(slice(None, None, s.step) for s in cslice) # Get the slice of each operand for key, value in operands.items(): @@ -1506,6 +1500,7 @@ def slices_eval( # noqa: C901 and isinstance(value, blosc2.NDArray) ): value.get_slice_numpy(chunk_operands[key], (starts, stops)) + chunk_operands[key] = chunk_operands[key][stepper] continue chunk_operands[key] = value[cslice] @@ -1614,15 +1609,13 @@ def slices_eval( # noqa: C901 out.resize((lenout,)) else: # Need to take final_slice since filled up array according to slice_ for each chunk - if final_slice != (): + if need_final_slice: # only called if out was None if isinstance(out, np.ndarray): - if need_final_slice: # only called if out was None - out = out[final_slice] + out = np.squeeze(out, np.where(mask_slice)) elif isinstance(out, blosc2.NDArray): # It *seems* better to choose an automatic chunks and blocks for the output array # out = out.slice(_slice, chunks=out.chunks, blocks=out.blocks) - if need_final_slice: # only called if out was None - out = out.slice(final_slice) + out = out.squeeze(mask_slice) else: raise ValueError("The output array is not a NumPy array or a NDArray") @@ -1873,7 +1866,7 @@ def reduce_slices( # noqa: C901 for s1, s2 in zip(cslice, _slice, strict=True) ) chunks_ = tuple(s.stop - s.start for s in cslice) - + fast_path = False if any((s.step != 1 or s.step is not None) for s in cslice) else fast_path if _slice == () and fast_path: # Fast path full_chunk = chunks_ == chunks @@ -1884,6 +1877,7 @@ def reduce_slices( # noqa: C901 # Get the starts and stops for the slice starts = [s.start if s.start is not None else 0 for s in cslice] stops = [s.stop if s.stop is not None else sh for s, sh in zip(cslice, chunks_, strict=True)] + stepper = tuple(slice(None, None, s.step) for s in cslice) # Get the slice of each operand for key, value in operands.items(): if np.isscalar(value): @@ -1904,6 +1898,7 @@ def reduce_slices( # noqa: C901 and isinstance(value, blosc2.NDArray) ): value.get_slice_numpy(chunk_operands[key], (starts, stops)) + chunk_operands[key] = chunk_operands[key][stepper] continue chunk_operands[key] = value[cslice] diff --git a/tests/ndarray/test_reductions.py b/tests/ndarray/test_reductions.py index fa39392b..ae28bb97 100644 --- a/tests/ndarray/test_reductions.py +++ b/tests/ndarray/test_reductions.py @@ -207,6 +207,20 @@ def test_reduce_slice(reduce_op): np.testing.assert_allclose(res, nres, atol=tol, rtol=tol) +# TODO: Test cache path for reductions with non-unit steps in slice +def test_reduceslice_cache(): + shape = (8, 12, 5) + na = np.linspace(0, 1, num=np.prod(shape)).reshape(shape) + a = blosc2.asarray(na, chunks=(2, 5, 1)) + tol = 1e-6 if na.dtype == np.float32 else 1e-15 + + # Test reductions with slices and strides + _slice = (slice(1, 2, 1), slice(1, 9, 2)) + res = blosc2.sum(2 * a + 0.2 * a, item=_slice, axis=1) + nres = na[_slice] * 2.2 + np.testing.assert_allclose(res, nres, atol=tol, rtol=tol) + + # Test fast path for reductions @pytest.mark.parametrize( ("chunks", "blocks"), From 24a26a8a1dfbf497a37425d1cb47616c35ec0b23 Mon Sep 17 00:00:00 2001 From: Luke Shaw Date: Wed, 23 Jul 2025 13:30:57 +0200 Subject: [PATCH 02/26] Add fancy index test and improve slice handling for reductions --- src/blosc2/lazyexpr.py | 60 ++++++++++++++++++-------------- tests/ndarray/test_ndarray.py | 29 +++++++++++++++ tests/ndarray/test_reductions.py | 19 +++++----- 3 files changed, 70 insertions(+), 38 deletions(-) diff --git a/src/blosc2/lazyexpr.py b/src/blosc2/lazyexpr.py index 0b258fa0..9cdb9a87 100644 --- a/src/blosc2/lazyexpr.py +++ b/src/blosc2/lazyexpr.py @@ -1412,7 +1412,7 @@ def slices_eval( # noqa: C901 # Check whether _slice contains an integer, or any step that are not None or 1 if any((isinstance(s, int)) for s in _slice): need_final_slice = True - _slice = tuple(slice(i, i + 1) if isinstance(i, int) else i for i in _slice) + _slice = tuple(slice(i, i + 1, 1) if isinstance(i, int) else i for i in _slice) # shape_slice in general not equal to final shape: # dummy dims (due to ints) will be dealt with by taking final_slice shape_slice = ndindex.ndindex(_slice).newshape(shape) @@ -1461,10 +1461,7 @@ def slices_eval( # noqa: C901 for nchunk, chunk_slice in enumerate(intersecting_chunks): # get intersection of chunk and target if _slice != (): - cslice = tuple( - step_handler(s1.start, s2.start, s1.stop, s2.stop, s2.step) - for s1, s2 in zip(chunk_slice.raw, _slice, strict=True) - ) + cslice = step_handler(chunk_slice.raw, _slice) else: cslice = chunk_slice.raw @@ -1611,7 +1608,7 @@ def slices_eval( # noqa: C901 else: # Need to take final_slice since filled up array according to slice_ for each chunk if need_final_slice: # only called if out was None if isinstance(out, np.ndarray): - out = np.squeeze(out, np.where(mask_slice)) + out = np.squeeze(out, np.where(mask_slice)[0]) elif isinstance(out, blosc2.NDArray): # It *seems* better to choose an automatic chunks and blocks for the output array # out = out.slice(_slice, chunks=out.chunks, blocks=out.blocks) @@ -1728,17 +1725,22 @@ def infer_reduction_dtype(dtype, operation): raise ValueError(f"Unsupported operation: {operation}") -def step_handler(s1start, s2start, s1stop, s2stop, s2step): - # assume s1step = 1 - newstart = max(s1start, s2start) - newstop = min(s1stop, s2stop) - rem = (newstart - s2start) % s2step - if rem != 0: # only pass through here if s2step is not 1 - newstart += s2step - rem - # true_stop = start + n*step + 1 -> stop = start + n * step + 1 + residual - # so n = (stop - start - 1) // step - newstop = newstart + (newstop - newstart - 1) // s2step * s2step + 1 - return slice(newstart, newstop, s2step) +def step_handler(cslice, _slice): + out = () + for s1, s2 in zip(cslice, _slice, strict=True): + s1start, s1stop = s1.start, s1.stop + s2start, s2stop, s2step = s2.start, s2.stop, s2.step + # assume s1step = 1 + newstart = max(s1start, s2start) + newstop = min(s1stop, s2stop) + rem = (newstart - s2start) % s2step + if rem != 0: # only pass through here if s2step is not 1 + newstart += s2step - rem + # true_stop = start + n*step + 1 -> stop = start + n * step + 1 + residual + # so n = (stop - start - 1) // step + newstop = newstart + (newstop - newstart - 1) // s2step * s2step + 1 + out += (slice(newstart, newstop, s2step),) + return out def reduce_slices( # noqa: C901 @@ -1792,21 +1794,27 @@ def reduce_slices( # noqa: C901 _slice = _slice.raw shape_slice = shape - full_slice = () # by default the full_slice is the whole array + mask_slice = np.bool([isinstance(i, int) for i in _slice]) if out is None and _slice != (): + _slice = tuple(slice(i, i + 1, 1) if isinstance(i, int) else i for i in _slice) shape_slice = ndindex.ndindex(_slice).newshape(shape) - full_slice = _slice + # shape_slice in general not equal to final shape: + # dummy dims (due to ints) will be dealt with by taking final_slice # after slicing, we reduce to calculate shape of output if axis is None: axis = tuple(range(len(shape_slice))) elif not isinstance(axis, tuple): axis = (axis,) - axis = tuple(a if a >= 0 else a + len(shape_slice) for a in axis) + axis = np.array([a if a >= 0 else a + len(shape_slice) for a in axis]) + if np.any(mask_slice): + axis = tuple(axis + np.cumsum(mask_slice)[axis]) # axis now refers to new shape with dummy dims + reduce_args["axis"] = axis if keepdims: reduced_shape = tuple(1 if i in axis else s for i, s in enumerate(shape_slice)) else: reduced_shape = tuple(s for i, s in enumerate(shape_slice) if i not in axis) + mask_slice = mask_slice[[i for i in range(len(mask_slice)) if i not in axis]] if out is not None and reduced_shape != out.shape: raise ValueError("Provided output shape does not match the reduced shape.") @@ -1861,10 +1869,7 @@ def reduce_slices( # noqa: C901 # Check whether current cslice intersects with _slice if cslice != () and _slice != (): # get intersection of chunk and target - cslice = tuple( - step_handler(s1.start, s2.start, s1.stop, s2.stop, s2.step) - for s1, s2 in zip(cslice, _slice, strict=True) - ) + cslice = step_handler(cslice, _slice) chunks_ = tuple(s.stop - s.start for s in cslice) fast_path = False if any((s.step != 1 or s.step is not None) for s in cslice) else fast_path if _slice == () and fast_path: @@ -1903,9 +1908,7 @@ def reduce_slices( # noqa: C901 chunk_operands[key] = value[cslice] # get local index of part of out that is to be updated - cslice_subidx = ( - ndindex.ndindex(cslice).as_subindex(full_slice).raw - ) # if full_slice is (), just gives cslice + cslice_subidx = ndindex.ndindex(cslice).as_subindex(_slice).raw # if _slice is (), just gives cslice if keepdims: reduced_slice = tuple(slice(None) if i in axis else sl for i, sl in enumerate(cslice_subidx)) else: @@ -1984,6 +1987,9 @@ def reduce_slices( # noqa: C901 dtype = np.float64 out = convert_none_out(dtype, reduce_op, reduced_shape) + final_mask = tuple(np.where(mask_slice)[0]) + if np.any(mask_slice): # remove dummy dims + out = np.squeeze(out, axis=final_mask) # Check if the output array needs to be converted into a blosc2.NDArray if kwargs != {} and not np.isscalar(out): out = blosc2.asarray(out, **kwargs) diff --git a/tests/ndarray/test_ndarray.py b/tests/ndarray/test_ndarray.py index f583c5fc..d58aa0eb 100644 --- a/tests/ndarray/test_ndarray.py +++ b/tests/ndarray/test_ndarray.py @@ -358,3 +358,32 @@ def test_findex(): # b3 = arr[0, :, [0, 1]] # n3 = nparr[0, :, [0, 1]] # np.testing.assert_allclose(b3, n3) + + +@pytest.mark.parametrize( + "arr", + [ + np.random.default_rng().random((2, 1000, 10, 8, 3)).astype(np.float32), + blosc2.asarray(np.random.default_rng().random((2, 1000, 10, 8, 3)).astype(np.float32)), + ], +) +def test_strided_output(arr): + def fancy_strided_output(inputs, output_indices, stride=1): + b, t, *f = inputs.shape + oi = np.asarray(output_indices, dtype=np.int32) + + start = np.amax(output_indices) + win_starts = np.arange(start, t, stride, dtype=np.int32) + rel_idx = win_starts[:, None] - oi[None] + rel_idx[rel_idx < 0] = 0 + + w, o = rel_idx.shape + batch_idx = np.arange(b, dtype=np.int32)[:, None, None] + batch_idx = np.broadcast_to(batch_idx, (b, w, o)) + time_idx = np.broadcast_to(rel_idx, (b, w, o)) + + return inputs[batch_idx, time_idx] + + output_indices = [800, 74, 671, 132, 818] + out = fancy_strided_output(arr, output_indices, stride=16) + assert out.shape == (2, 12, 5, 10, 8, 3) diff --git a/tests/ndarray/test_reductions.py b/tests/ndarray/test_reductions.py index ae28bb97..10aaad3f 100644 --- a/tests/ndarray/test_reductions.py +++ b/tests/ndarray/test_reductions.py @@ -206,18 +206,15 @@ def test_reduce_slice(reduce_op): nres = getattr(na[_slice], reduce_op)(axis=1) np.testing.assert_allclose(res, nres, atol=tol, rtol=tol) + # Test reductions with ints + _slice = (0, slice(1, 9, 1)) + res = getattr(a, reduce_op)(item=_slice, axis=1) + nres = getattr(na[_slice], reduce_op)(axis=1) + np.testing.assert_allclose(res, nres, atol=tol, rtol=tol) -# TODO: Test cache path for reductions with non-unit steps in slice -def test_reduceslice_cache(): - shape = (8, 12, 5) - na = np.linspace(0, 1, num=np.prod(shape)).reshape(shape) - a = blosc2.asarray(na, chunks=(2, 5, 1)) - tol = 1e-6 if na.dtype == np.float32 else 1e-15 - - # Test reductions with slices and strides - _slice = (slice(1, 2, 1), slice(1, 9, 2)) - res = blosc2.sum(2 * a + 0.2 * a, item=_slice, axis=1) - nres = na[_slice] * 2.2 + _slice = (0, slice(1, 9, 2)) + res = getattr(a, reduce_op)(item=_slice, axis=1) + nres = getattr(na[_slice], reduce_op)(axis=1) np.testing.assert_allclose(res, nres, atol=tol, rtol=tol) From 896ccdffcf7889e218b02db0a222ae185d3db116 Mon Sep 17 00:00:00 2001 From: Luke Shaw Date: Wed, 23 Jul 2025 13:45:18 +0200 Subject: [PATCH 03/26] Can't use cache for non-unit steps --- src/blosc2/lazyexpr.py | 11 +++++------ tests/ndarray/test_lazyexpr.py | 4 ++-- 2 files changed, 7 insertions(+), 8 deletions(-) diff --git a/src/blosc2/lazyexpr.py b/src/blosc2/lazyexpr.py index 9cdb9a87..6fb9e1a2 100644 --- a/src/blosc2/lazyexpr.py +++ b/src/blosc2/lazyexpr.py @@ -1475,7 +1475,7 @@ def slices_eval( # noqa: C901 # Get the starts and stops for the slice starts = [s.start if s.start is not None else 0 for s in cslice] stops = [s.stop if s.stop is not None else sh for s, sh in zip(cslice, cslice_shape, strict=True)] - stepper = tuple(slice(None, None, s.step) for s in cslice) + unit_steps = np.all([s.step == 1 for s in cslice]) # Get the slice of each operand for key, value in operands.items(): @@ -1495,9 +1495,9 @@ def slices_eval( # noqa: C901 key in chunk_operands and cslice_shape == chunk_operands[key].shape and isinstance(value, blosc2.NDArray) + and unit_steps ): value.get_slice_numpy(chunk_operands[key], (starts, stops)) - chunk_operands[key] = chunk_operands[key][stepper] continue chunk_operands[key] = value[cslice] @@ -1871,8 +1871,8 @@ def reduce_slices( # noqa: C901 # get intersection of chunk and target cslice = step_handler(cslice, _slice) chunks_ = tuple(s.stop - s.start for s in cslice) - fast_path = False if any((s.step != 1 or s.step is not None) for s in cslice) else fast_path - if _slice == () and fast_path: + unit_steps = np.all([s.step == 1 for s in cslice]) + if _slice == () and fast_path and unit_steps: # Fast path full_chunk = chunks_ == chunks fill_chunk_operands( @@ -1882,7 +1882,6 @@ def reduce_slices( # noqa: C901 # Get the starts and stops for the slice starts = [s.start if s.start is not None else 0 for s in cslice] stops = [s.stop if s.stop is not None else sh for s, sh in zip(cslice, chunks_, strict=True)] - stepper = tuple(slice(None, None, s.step) for s in cslice) # Get the slice of each operand for key, value in operands.items(): if np.isscalar(value): @@ -1901,9 +1900,9 @@ def reduce_slices( # noqa: C901 key in chunk_operands and chunks_ == chunk_operands[key].shape and isinstance(value, blosc2.NDArray) + and unit_steps ): value.get_slice_numpy(chunk_operands[key], (starts, stops)) - chunk_operands[key] = chunk_operands[key][stepper] continue chunk_operands[key] = value[cslice] diff --git a/tests/ndarray/test_lazyexpr.py b/tests/ndarray/test_lazyexpr.py index fb6e8ee0..4475df40 100644 --- a/tests/ndarray/test_lazyexpr.py +++ b/tests/ndarray/test_lazyexpr.py @@ -1088,7 +1088,7 @@ def test_eval_getitem2(): np.testing.assert_allclose(expr[0], nres[0]) np.testing.assert_allclose(expr[1:, :7], nres[1:, :7]) np.testing.assert_allclose(expr[0:10:2], nres[0:10:2]) - # This works, but it is not very efficient since it relies on blosc2.ndarray.slice for non-unit steps + # Now relies on inefficient blosc2.ndarray.slice for non-unit steps but only per chunk (not for whole result) np.testing.assert_allclose(expr.slice((slice(None, None, None), slice(0, 10, 2)))[:], nres[:, 0:10:2]) # Small test for broadcasting @@ -1097,7 +1097,7 @@ def test_eval_getitem2(): np.testing.assert_allclose(expr[0], nres[0]) np.testing.assert_allclose(expr[1:, :7], nres[1:, :7]) np.testing.assert_allclose(expr[:, 0:10:2], nres[:, 0:10:2]) - # This works, but it is not very efficient since it relies on blosc2.ndarray.slice for non-unit steps + # Now relies on inefficient blosc2.ndarray.slice for non-unit steps but only per chunk (not for whole result) np.testing.assert_allclose(expr.slice((slice(None, None, None), slice(0, 10, 2)))[:], nres[:, 0:10:2]) From 4b95955d04d02b008f4e598d874df06262225b4b Mon Sep 17 00:00:00 2001 From: Luke Shaw Date: Wed, 23 Jul 2025 10:13:50 +0200 Subject: [PATCH 04/26] Improving step handling --- src/blosc2/lazyexpr.py | 35 ++++++++++++++------------------ tests/ndarray/test_reductions.py | 14 +++++++++++++ 2 files changed, 29 insertions(+), 20 deletions(-) diff --git a/src/blosc2/lazyexpr.py b/src/blosc2/lazyexpr.py index 872b06d9..bf5acc0b 100644 --- a/src/blosc2/lazyexpr.py +++ b/src/blosc2/lazyexpr.py @@ -1412,26 +1412,19 @@ def slices_eval( # noqa: C901 # keep orig_slice _slice = _slice.raw orig_slice = _slice - full_slice = () # by default the full_slice is the whole array - final_slice = () # by default the final_slice is the whole array # Compute the shape and chunks of the output array, including broadcasting shape = compute_broadcast_shape(operands.values()) if out is None: if _slice != (): # Check whether _slice contains an integer, or any step that are not None or 1 - if any( - (isinstance(s, int)) or (isinstance(s, slice) and s.step not in (None, 1)) for s in _slice - ): + if any((isinstance(s, int)) for s in _slice): need_final_slice = True _slice = tuple(slice(i, i + 1) if isinstance(i, int) else i for i in _slice) - full_slice = tuple( - slice(s.start or 0, s.stop or shape[i], 1) for i, s in enumerate(_slice) - ) # get rid of non-unit steps # shape_slice in general not equal to final shape: - # dummy dims (due to ints) or non-unit steps will be dealt with by taking final_slice - shape_slice = ndindex.ndindex(full_slice).newshape(shape) - final_slice = ndindex.ndindex(orig_slice).as_subindex(full_slice).raw + # dummy dims (due to ints) will be dealt with by taking final_slice + shape_slice = ndindex.ndindex(_slice).newshape(shape) + mask_slice = np.bool([isinstance(i, int) for i in orig_slice]) else: # # out should always have shape of full array # if shape is not None and shape != out.shape: @@ -1477,7 +1470,7 @@ def slices_eval( # noqa: C901 # get intersection of chunk and target if _slice != (): cslice = tuple( - slice(max(s1.start, s2.start), min(s1.stop, s2.stop)) + step_handler(s1.start, s2.start, s1.stop, s2.stop, s2.step) for s1, s2 in zip(chunk_slice.raw, _slice, strict=True) ) else: @@ -1487,12 +1480,13 @@ def slices_eval( # noqa: C901 len_chunk = math.prod(cslice_shape) # get local index of part of out that is to be updated cslice_subidx = ( - ndindex.ndindex(cslice).as_subindex(full_slice).raw - ) # in the case full_slice=(), just gives cslice + ndindex.ndindex(cslice).as_subindex(_slice).raw + ) # in the case _slice=(), just gives cslice # Get the starts and stops for the slice starts = [s.start if s.start is not None else 0 for s in cslice] stops = [s.stop if s.stop is not None else sh for s, sh in zip(cslice, cslice_shape, strict=True)] + stepper = tuple(slice(None, None, s.step) for s in cslice) # Get the slice of each operand for key, value in operands.items(): @@ -1514,6 +1508,7 @@ def slices_eval( # noqa: C901 and isinstance(value, blosc2.NDArray) ): value.get_slice_numpy(chunk_operands[key], (starts, stops)) + chunk_operands[key] = chunk_operands[key][stepper] continue chunk_operands[key] = value[cslice] @@ -1622,15 +1617,13 @@ def slices_eval( # noqa: C901 out.resize((lenout,)) else: # Need to take final_slice since filled up array according to slice_ for each chunk - if final_slice != (): + if need_final_slice: # only called if out was None if isinstance(out, np.ndarray): - if need_final_slice: # only called if out was None - out = out[final_slice] + out = np.squeeze(out, np.where(mask_slice)) elif isinstance(out, blosc2.NDArray): # It *seems* better to choose an automatic chunks and blocks for the output array # out = out.slice(_slice, chunks=out.chunks, blocks=out.blocks) - if need_final_slice: # only called if out was None - out = out.slice(final_slice) + out = out.squeeze(mask_slice) else: raise ValueError("The output array is not a NumPy array or a NDArray") @@ -1881,7 +1874,7 @@ def reduce_slices( # noqa: C901 for s1, s2 in zip(cslice, _slice, strict=True) ) chunks_ = tuple(s.stop - s.start for s in cslice) - + fast_path = False if any((s.step != 1 or s.step is not None) for s in cslice) else fast_path if _slice == () and fast_path: # Fast path full_chunk = chunks_ == chunks @@ -1892,6 +1885,7 @@ def reduce_slices( # noqa: C901 # Get the starts and stops for the slice starts = [s.start if s.start is not None else 0 for s in cslice] stops = [s.stop if s.stop is not None else sh for s, sh in zip(cslice, chunks_, strict=True)] + stepper = tuple(slice(None, None, s.step) for s in cslice) # Get the slice of each operand for key, value in operands.items(): if np.isscalar(value): @@ -1912,6 +1906,7 @@ def reduce_slices( # noqa: C901 and isinstance(value, blosc2.NDArray) ): value.get_slice_numpy(chunk_operands[key], (starts, stops)) + chunk_operands[key] = chunk_operands[key][stepper] continue chunk_operands[key] = value[cslice] diff --git a/tests/ndarray/test_reductions.py b/tests/ndarray/test_reductions.py index fa39392b..ae28bb97 100644 --- a/tests/ndarray/test_reductions.py +++ b/tests/ndarray/test_reductions.py @@ -207,6 +207,20 @@ def test_reduce_slice(reduce_op): np.testing.assert_allclose(res, nres, atol=tol, rtol=tol) +# TODO: Test cache path for reductions with non-unit steps in slice +def test_reduceslice_cache(): + shape = (8, 12, 5) + na = np.linspace(0, 1, num=np.prod(shape)).reshape(shape) + a = blosc2.asarray(na, chunks=(2, 5, 1)) + tol = 1e-6 if na.dtype == np.float32 else 1e-15 + + # Test reductions with slices and strides + _slice = (slice(1, 2, 1), slice(1, 9, 2)) + res = blosc2.sum(2 * a + 0.2 * a, item=_slice, axis=1) + nres = na[_slice] * 2.2 + np.testing.assert_allclose(res, nres, atol=tol, rtol=tol) + + # Test fast path for reductions @pytest.mark.parametrize( ("chunks", "blocks"), From 016ce34d26e589faa9b800d6dc3444a87b03843d Mon Sep 17 00:00:00 2001 From: Luke Shaw Date: Wed, 23 Jul 2025 13:30:57 +0200 Subject: [PATCH 05/26] Add fancy index test and improve slice handling for reductions --- src/blosc2/lazyexpr.py | 60 ++++++++++++++++++-------------- tests/ndarray/test_ndarray.py | 29 +++++++++++++++ tests/ndarray/test_reductions.py | 19 +++++----- 3 files changed, 70 insertions(+), 38 deletions(-) diff --git a/src/blosc2/lazyexpr.py b/src/blosc2/lazyexpr.py index bf5acc0b..2f0def15 100644 --- a/src/blosc2/lazyexpr.py +++ b/src/blosc2/lazyexpr.py @@ -1420,7 +1420,7 @@ def slices_eval( # noqa: C901 # Check whether _slice contains an integer, or any step that are not None or 1 if any((isinstance(s, int)) for s in _slice): need_final_slice = True - _slice = tuple(slice(i, i + 1) if isinstance(i, int) else i for i in _slice) + _slice = tuple(slice(i, i + 1, 1) if isinstance(i, int) else i for i in _slice) # shape_slice in general not equal to final shape: # dummy dims (due to ints) will be dealt with by taking final_slice shape_slice = ndindex.ndindex(_slice).newshape(shape) @@ -1469,10 +1469,7 @@ def slices_eval( # noqa: C901 for nchunk, chunk_slice in enumerate(intersecting_chunks): # get intersection of chunk and target if _slice != (): - cslice = tuple( - step_handler(s1.start, s2.start, s1.stop, s2.stop, s2.step) - for s1, s2 in zip(chunk_slice.raw, _slice, strict=True) - ) + cslice = step_handler(chunk_slice.raw, _slice) else: cslice = chunk_slice.raw @@ -1619,7 +1616,7 @@ def slices_eval( # noqa: C901 else: # Need to take final_slice since filled up array according to slice_ for each chunk if need_final_slice: # only called if out was None if isinstance(out, np.ndarray): - out = np.squeeze(out, np.where(mask_slice)) + out = np.squeeze(out, np.where(mask_slice)[0]) elif isinstance(out, blosc2.NDArray): # It *seems* better to choose an automatic chunks and blocks for the output array # out = out.slice(_slice, chunks=out.chunks, blocks=out.blocks) @@ -1736,17 +1733,22 @@ def infer_reduction_dtype(dtype, operation): raise ValueError(f"Unsupported operation: {operation}") -def step_handler(s1start, s2start, s1stop, s2stop, s2step): - # assume s1step = 1 - newstart = max(s1start, s2start) - newstop = min(s1stop, s2stop) - rem = (newstart - s2start) % s2step - if rem != 0: # only pass through here if s2step is not 1 - newstart += s2step - rem - # true_stop = start + n*step + 1 -> stop = start + n * step + 1 + residual - # so n = (stop - start - 1) // step - newstop = newstart + (newstop - newstart - 1) // s2step * s2step + 1 - return slice(newstart, newstop, s2step) +def step_handler(cslice, _slice): + out = () + for s1, s2 in zip(cslice, _slice, strict=True): + s1start, s1stop = s1.start, s1.stop + s2start, s2stop, s2step = s2.start, s2.stop, s2.step + # assume s1step = 1 + newstart = max(s1start, s2start) + newstop = min(s1stop, s2stop) + rem = (newstart - s2start) % s2step + if rem != 0: # only pass through here if s2step is not 1 + newstart += s2step - rem + # true_stop = start + n*step + 1 -> stop = start + n * step + 1 + residual + # so n = (stop - start - 1) // step + newstop = newstart + (newstop - newstart - 1) // s2step * s2step + 1 + out += (slice(newstart, newstop, s2step),) + return out def reduce_slices( # noqa: C901 @@ -1800,21 +1802,27 @@ def reduce_slices( # noqa: C901 _slice = _slice.raw shape_slice = shape - full_slice = () # by default the full_slice is the whole array + mask_slice = np.bool([isinstance(i, int) for i in _slice]) if out is None and _slice != (): + _slice = tuple(slice(i, i + 1, 1) if isinstance(i, int) else i for i in _slice) shape_slice = ndindex.ndindex(_slice).newshape(shape) - full_slice = _slice + # shape_slice in general not equal to final shape: + # dummy dims (due to ints) will be dealt with by taking final_slice # after slicing, we reduce to calculate shape of output if axis is None: axis = tuple(range(len(shape_slice))) elif not isinstance(axis, tuple): axis = (axis,) - axis = tuple(a if a >= 0 else a + len(shape_slice) for a in axis) + axis = np.array([a if a >= 0 else a + len(shape_slice) for a in axis]) + if np.any(mask_slice): + axis = tuple(axis + np.cumsum(mask_slice)[axis]) # axis now refers to new shape with dummy dims + reduce_args["axis"] = axis if keepdims: reduced_shape = tuple(1 if i in axis else s for i, s in enumerate(shape_slice)) else: reduced_shape = tuple(s for i, s in enumerate(shape_slice) if i not in axis) + mask_slice = mask_slice[[i for i in range(len(mask_slice)) if i not in axis]] if out is not None and reduced_shape != out.shape: raise ValueError("Provided output shape does not match the reduced shape.") @@ -1869,10 +1877,7 @@ def reduce_slices( # noqa: C901 # Check whether current cslice intersects with _slice if cslice != () and _slice != (): # get intersection of chunk and target - cslice = tuple( - step_handler(s1.start, s2.start, s1.stop, s2.stop, s2.step) - for s1, s2 in zip(cslice, _slice, strict=True) - ) + cslice = step_handler(cslice, _slice) chunks_ = tuple(s.stop - s.start for s in cslice) fast_path = False if any((s.step != 1 or s.step is not None) for s in cslice) else fast_path if _slice == () and fast_path: @@ -1911,9 +1916,7 @@ def reduce_slices( # noqa: C901 chunk_operands[key] = value[cslice] # get local index of part of out that is to be updated - cslice_subidx = ( - ndindex.ndindex(cslice).as_subindex(full_slice).raw - ) # if full_slice is (), just gives cslice + cslice_subidx = ndindex.ndindex(cslice).as_subindex(_slice).raw # if _slice is (), just gives cslice if keepdims: reduced_slice = tuple(slice(None) if i in axis else sl for i, sl in enumerate(cslice_subidx)) else: @@ -1992,6 +1995,9 @@ def reduce_slices( # noqa: C901 dtype = np.float64 out = convert_none_out(dtype, reduce_op, reduced_shape) + final_mask = tuple(np.where(mask_slice)[0]) + if np.any(mask_slice): # remove dummy dims + out = np.squeeze(out, axis=final_mask) # Check if the output array needs to be converted into a blosc2.NDArray if kwargs != {} and not np.isscalar(out): out = blosc2.asarray(out, **kwargs) diff --git a/tests/ndarray/test_ndarray.py b/tests/ndarray/test_ndarray.py index f583c5fc..d58aa0eb 100644 --- a/tests/ndarray/test_ndarray.py +++ b/tests/ndarray/test_ndarray.py @@ -358,3 +358,32 @@ def test_findex(): # b3 = arr[0, :, [0, 1]] # n3 = nparr[0, :, [0, 1]] # np.testing.assert_allclose(b3, n3) + + +@pytest.mark.parametrize( + "arr", + [ + np.random.default_rng().random((2, 1000, 10, 8, 3)).astype(np.float32), + blosc2.asarray(np.random.default_rng().random((2, 1000, 10, 8, 3)).astype(np.float32)), + ], +) +def test_strided_output(arr): + def fancy_strided_output(inputs, output_indices, stride=1): + b, t, *f = inputs.shape + oi = np.asarray(output_indices, dtype=np.int32) + + start = np.amax(output_indices) + win_starts = np.arange(start, t, stride, dtype=np.int32) + rel_idx = win_starts[:, None] - oi[None] + rel_idx[rel_idx < 0] = 0 + + w, o = rel_idx.shape + batch_idx = np.arange(b, dtype=np.int32)[:, None, None] + batch_idx = np.broadcast_to(batch_idx, (b, w, o)) + time_idx = np.broadcast_to(rel_idx, (b, w, o)) + + return inputs[batch_idx, time_idx] + + output_indices = [800, 74, 671, 132, 818] + out = fancy_strided_output(arr, output_indices, stride=16) + assert out.shape == (2, 12, 5, 10, 8, 3) diff --git a/tests/ndarray/test_reductions.py b/tests/ndarray/test_reductions.py index ae28bb97..10aaad3f 100644 --- a/tests/ndarray/test_reductions.py +++ b/tests/ndarray/test_reductions.py @@ -206,18 +206,15 @@ def test_reduce_slice(reduce_op): nres = getattr(na[_slice], reduce_op)(axis=1) np.testing.assert_allclose(res, nres, atol=tol, rtol=tol) + # Test reductions with ints + _slice = (0, slice(1, 9, 1)) + res = getattr(a, reduce_op)(item=_slice, axis=1) + nres = getattr(na[_slice], reduce_op)(axis=1) + np.testing.assert_allclose(res, nres, atol=tol, rtol=tol) -# TODO: Test cache path for reductions with non-unit steps in slice -def test_reduceslice_cache(): - shape = (8, 12, 5) - na = np.linspace(0, 1, num=np.prod(shape)).reshape(shape) - a = blosc2.asarray(na, chunks=(2, 5, 1)) - tol = 1e-6 if na.dtype == np.float32 else 1e-15 - - # Test reductions with slices and strides - _slice = (slice(1, 2, 1), slice(1, 9, 2)) - res = blosc2.sum(2 * a + 0.2 * a, item=_slice, axis=1) - nres = na[_slice] * 2.2 + _slice = (0, slice(1, 9, 2)) + res = getattr(a, reduce_op)(item=_slice, axis=1) + nres = getattr(na[_slice], reduce_op)(axis=1) np.testing.assert_allclose(res, nres, atol=tol, rtol=tol) From c31cdac29bf556161b78f34f4b1faf455079ccaa Mon Sep 17 00:00:00 2001 From: Luke Shaw Date: Wed, 23 Jul 2025 13:45:18 +0200 Subject: [PATCH 06/26] Can't use cache for non-unit steps --- src/blosc2/lazyexpr.py | 11 +++++------ tests/ndarray/test_lazyexpr.py | 4 ++-- 2 files changed, 7 insertions(+), 8 deletions(-) diff --git a/src/blosc2/lazyexpr.py b/src/blosc2/lazyexpr.py index 2f0def15..3ab07e1e 100644 --- a/src/blosc2/lazyexpr.py +++ b/src/blosc2/lazyexpr.py @@ -1483,7 +1483,7 @@ def slices_eval( # noqa: C901 # Get the starts and stops for the slice starts = [s.start if s.start is not None else 0 for s in cslice] stops = [s.stop if s.stop is not None else sh for s, sh in zip(cslice, cslice_shape, strict=True)] - stepper = tuple(slice(None, None, s.step) for s in cslice) + unit_steps = np.all([s.step == 1 for s in cslice]) # Get the slice of each operand for key, value in operands.items(): @@ -1503,9 +1503,9 @@ def slices_eval( # noqa: C901 key in chunk_operands and cslice_shape == chunk_operands[key].shape and isinstance(value, blosc2.NDArray) + and unit_steps ): value.get_slice_numpy(chunk_operands[key], (starts, stops)) - chunk_operands[key] = chunk_operands[key][stepper] continue chunk_operands[key] = value[cslice] @@ -1879,8 +1879,8 @@ def reduce_slices( # noqa: C901 # get intersection of chunk and target cslice = step_handler(cslice, _slice) chunks_ = tuple(s.stop - s.start for s in cslice) - fast_path = False if any((s.step != 1 or s.step is not None) for s in cslice) else fast_path - if _slice == () and fast_path: + unit_steps = np.all([s.step == 1 for s in cslice]) + if _slice == () and fast_path and unit_steps: # Fast path full_chunk = chunks_ == chunks fill_chunk_operands( @@ -1890,7 +1890,6 @@ def reduce_slices( # noqa: C901 # Get the starts and stops for the slice starts = [s.start if s.start is not None else 0 for s in cslice] stops = [s.stop if s.stop is not None else sh for s, sh in zip(cslice, chunks_, strict=True)] - stepper = tuple(slice(None, None, s.step) for s in cslice) # Get the slice of each operand for key, value in operands.items(): if np.isscalar(value): @@ -1909,9 +1908,9 @@ def reduce_slices( # noqa: C901 key in chunk_operands and chunks_ == chunk_operands[key].shape and isinstance(value, blosc2.NDArray) + and unit_steps ): value.get_slice_numpy(chunk_operands[key], (starts, stops)) - chunk_operands[key] = chunk_operands[key][stepper] continue chunk_operands[key] = value[cslice] diff --git a/tests/ndarray/test_lazyexpr.py b/tests/ndarray/test_lazyexpr.py index dba9da5e..f2c0714c 100644 --- a/tests/ndarray/test_lazyexpr.py +++ b/tests/ndarray/test_lazyexpr.py @@ -1088,7 +1088,7 @@ def test_eval_getitem2(): np.testing.assert_allclose(expr[0], nres[0]) np.testing.assert_allclose(expr[1:, :7], nres[1:, :7]) np.testing.assert_allclose(expr[0:10:2], nres[0:10:2]) - # This works, but it is not very efficient since it relies on blosc2.ndarray.slice for non-unit steps + # Now relies on inefficient blosc2.ndarray.slice for non-unit steps but only per chunk (not for whole result) np.testing.assert_allclose(expr.slice((slice(None, None, None), slice(0, 10, 2)))[:], nres[:, 0:10:2]) # Small test for broadcasting @@ -1097,7 +1097,7 @@ def test_eval_getitem2(): np.testing.assert_allclose(expr[0], nres[0]) np.testing.assert_allclose(expr[1:, :7], nres[1:, :7]) np.testing.assert_allclose(expr[:, 0:10:2], nres[:, 0:10:2]) - # This works, but it is not very efficient since it relies on blosc2.ndarray.slice for non-unit steps + # Now relies on inefficient blosc2.ndarray.slice for non-unit steps but only per chunk (not for whole result) np.testing.assert_allclose(expr.slice((slice(None, None, None), slice(0, 10, 2)))[:], nres[:, 0:10:2]) From e2503e30b89b0b0d8f45b843bfa0ef831d367644 Mon Sep 17 00:00:00 2001 From: Francesc Alted Date: Fri, 25 Jul 2025 19:11:21 +0200 Subject: [PATCH 07/26] Give a try to latest pyodide 0.28.0 --- pyproject.toml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/pyproject.toml b/pyproject.toml index cf18fd83..dad1ba68 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -128,3 +128,7 @@ ignore = [ [tool.ruff.lint.extend-per-file-ignores] "tests/**" = ["F841"] + +[tool.cibuildwheel.pyodide] +# Build Pyodide wheels using Pyodide version 0.28.0 +pyodide-version = "0.28.0" From f38a28de89bac23bf8d802373f36e82d27217d10 Mon Sep 17 00:00:00 2001 From: Francesc Alted Date: Fri, 25 Jul 2025 19:16:10 +0200 Subject: [PATCH 08/26] Use better a restriction in numpy version --- pyproject.toml | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index dad1ba68..5a6bf8c4 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -32,7 +32,7 @@ classifiers = [ requires-python = ">=3.10" # Follow guidelines from https://scientific-python.org/specs/spec-0000/ dependencies = [ - "numpy>=1.26", + "numpy>=2.2", #"numpy>=2", "ndindex", "msgpack", @@ -128,7 +128,3 @@ ignore = [ [tool.ruff.lint.extend-per-file-ignores] "tests/**" = ["F841"] - -[tool.cibuildwheel.pyodide] -# Build Pyodide wheels using Pyodide version 0.28.0 -pyodide-version = "0.28.0" From 4eecb0219867dd0225cca541e139f3c38c939ddd Mon Sep 17 00:00:00 2001 From: Francesc Alted Date: Fri, 25 Jul 2025 19:21:44 +0200 Subject: [PATCH 09/26] No way; for now we can just use numpy 2.02 for pyodide --- .github/workflows/wasm.yml | 2 +- pyproject.toml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/wasm.yml b/.github/workflows/wasm.yml index 6095e103..3227c797 100644 --- a/.github/workflows/wasm.yml +++ b/.github/workflows/wasm.yml @@ -46,7 +46,7 @@ jobs: run: pip install cibuildwheel - name: Build wheels - # Testing is automaticall made by cibuildwheel + # Testing is automatically made by cibuildwheel run: cibuildwheel --platform pyodide - name: Upload wheels diff --git a/pyproject.toml b/pyproject.toml index 5a6bf8c4..cf18fd83 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -32,7 +32,7 @@ classifiers = [ requires-python = ">=3.10" # Follow guidelines from https://scientific-python.org/specs/spec-0000/ dependencies = [ - "numpy>=2.2", + "numpy>=1.26", #"numpy>=2", "ndindex", "msgpack", From 62d46f9cca2e0a11bf35a67015503bdde82af047 Mon Sep 17 00:00:00 2001 From: Luke Shaw Date: Mon, 4 Aug 2025 10:13:23 +0200 Subject: [PATCH 10/26] Remove disk arrays in tests --- tests/ndarray/test_concatenate.py | 2 ++ tests/ndarray/test_lazyexpr.py | 4 ++++ tests/ndarray/test_lazyexpr_fields.py | 3 +++ 3 files changed, 9 insertions(+) diff --git a/tests/ndarray/test_concatenate.py b/tests/ndarray/test_concatenate.py index 6a2f5d24..b5b432eb 100644 --- a/tests/ndarray/test_concatenate.py +++ b/tests/ndarray/test_concatenate.py @@ -99,3 +99,5 @@ def test_stack(shape, dtype, axis): [ndarr1, ndarr2, ndarr3], axis=axis, cparams=cparams, urlpath="localfile.b2nd", mode="w" ) np.testing.assert_almost_equal(result[:], nparray) + # Remove localfile + blosc2.remove_urlpath("localfile.b2nd") diff --git a/tests/ndarray/test_lazyexpr.py b/tests/ndarray/test_lazyexpr.py index f2c0714c..c9dbe7a5 100644 --- a/tests/ndarray/test_lazyexpr.py +++ b/tests/ndarray/test_lazyexpr.py @@ -1457,6 +1457,10 @@ def test_chain_persistentexpressions(): myle4 = blosc2.open("expr4.b2nd") assert (myle4[:] == le4[:]).all() + # Remove files + for f in ["expr1.b2nd", "expr2.b2nd", "expr3.b2nd", "expr4.b2nd", "a.b2nd", "b.b2nd", "c.b2nd"]: + blosc2.remove_urlpath(f) + @pytest.mark.parametrize( "values", diff --git a/tests/ndarray/test_lazyexpr_fields.py b/tests/ndarray/test_lazyexpr_fields.py index b9a9b51a..50be0076 100644 --- a/tests/ndarray/test_lazyexpr_fields.py +++ b/tests/ndarray/test_lazyexpr_fields.py @@ -671,3 +671,6 @@ def test_fields_indexing(): gotitem = expr[0] # gives an error np.testing.assert_array_equal(sliced[()], gotitem) np.testing.assert_array_equal(gotitem, temp[0]) + + # Remove file + blosc2.remove_urlpath("sa-1M.b2nd") From 1c7392425591c6aa565e148298fba04182a896f6 Mon Sep 17 00:00:00 2001 From: Luke Shaw Date: Mon, 4 Aug 2025 12:06:33 +0200 Subject: [PATCH 11/26] New commit to use new version of pyodide --- tests/ndarray/test_lazyexpr.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/tests/ndarray/test_lazyexpr.py b/tests/ndarray/test_lazyexpr.py index c9dbe7a5..a566c73c 100644 --- a/tests/ndarray/test_lazyexpr.py +++ b/tests/ndarray/test_lazyexpr.py @@ -1105,13 +1105,13 @@ def test_eval_getitem2(): def test_eval_slice(array_fixture): a1, a2, a3, a4, na1, na2, na3, na4 = array_fixture expr = blosc2.lazyexpr("a1 + a2 - (a3 * a4)", operands={"a1": a1, "a2": a2, "a3": a3, "a4": a4}) - nres = ne_evaluate("na1 + na2 - (na3 * na4)")[:2] - res = expr.slice(slice(0, 2)) + nres = ne_evaluate("na1 + na2 - (na3 * na4)") + res = expr.slice(slice(0, 8, 2)) assert isinstance(res, blosc2.ndarray.NDArray) - np.testing.assert_allclose(res[:], nres) - res = expr[:2] + np.testing.assert_allclose(res[:], nres[:8:2]) + res = expr[:8:2] assert isinstance(res, np.ndarray) - np.testing.assert_allclose(res, nres) + np.testing.assert_allclose(res, nres[:8:2]) # string lazy expressions automatically use .slice internally expr1 = blosc2.lazyexpr("a1 * a2", operands={"a1": a1, "a2": a2}) From 98c36936fb88d00364f148ee47f2f38d52d8fc9f Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Mon, 4 Aug 2025 11:08:05 +0000 Subject: [PATCH 12/26] Apply pre-commit fixes --- doc/getting_started/tutorials/04.reductions.ipynb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/getting_started/tutorials/04.reductions.ipynb b/doc/getting_started/tutorials/04.reductions.ipynb index 09721281..545084cd 100644 --- a/doc/getting_started/tutorials/04.reductions.ipynb +++ b/doc/getting_started/tutorials/04.reductions.ipynb @@ -308,7 +308,7 @@ "source": [ "# Manual chunking\n", "chunks = (200, 200, 100)\n", - "#blocks = (100, 50, 50) # optional, but can help performance\n", + "# blocks = (100, 50, 50) # optional, but can help performance\n", "meas = measure_blosc2(chunks, blocks=None)\n", "plot_meas(meas_np, meas, chunks)" ] From 369f20ae99dd4d984bad428ad0092f6ac0ffa563 Mon Sep 17 00:00:00 2001 From: Luke Shaw Date: Tue, 5 Aug 2025 11:18:10 +0200 Subject: [PATCH 13/26] Force pyodide version --- pyproject.toml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/pyproject.toml b/pyproject.toml index cf18fd83..132b4ef8 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -84,6 +84,8 @@ documentation = "https://www.blosc.org/python-blosc2/python-blosc2.html" [tool.cibuildwheel] build-verbosity = 1 +[tool.cibuildwheel.pyodide] +pyodide-version = ">=0.28.0" # Skip unsupported python versions as well as 32-bit platforms, which are not supported anymore. skip = "*-manylinux_i686 cp*-win32 *_ppc64le *_s390x *musllinux*" test-requires = "pytest" From 22bd5e344d4a690aea97797459e8f9bb64718c29 Mon Sep 17 00:00:00 2001 From: Luke Shaw Date: Tue, 5 Aug 2025 11:27:38 +0200 Subject: [PATCH 14/26] Force python 3.13 --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 132b4ef8..4fdd280f 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -29,7 +29,7 @@ classifiers = [ "Programming Language :: Python :: 3.12", "Programming Language :: Python :: 3.13", ] -requires-python = ">=3.10" +requires-python = ">=3.13" # Follow guidelines from https://scientific-python.org/specs/spec-0000/ dependencies = [ "numpy>=1.26", From bac2309302f9daa5b2034e2876f9735b2ba2cdbc Mon Sep 17 00:00:00 2001 From: Luke Shaw Date: Tue, 5 Aug 2025 11:29:16 +0200 Subject: [PATCH 15/26] Revert "Force pyodide version" This reverts commit 369f20ae99dd4d984bad428ad0092f6ac0ffa563. --- pyproject.toml | 2 -- 1 file changed, 2 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 4fdd280f..554b0943 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -84,8 +84,6 @@ documentation = "https://www.blosc.org/python-blosc2/python-blosc2.html" [tool.cibuildwheel] build-verbosity = 1 -[tool.cibuildwheel.pyodide] -pyodide-version = ">=0.28.0" # Skip unsupported python versions as well as 32-bit platforms, which are not supported anymore. skip = "*-manylinux_i686 cp*-win32 *_ppc64le *_s390x *musllinux*" test-requires = "pytest" From 28d5e415c831eaea0bdb34136e0543ab455aa2d3 Mon Sep 17 00:00:00 2001 From: Luke Shaw Date: Tue, 5 Aug 2025 11:30:28 +0200 Subject: [PATCH 16/26] Revert "Force python 3.13" This reverts commit 22bd5e344d4a690aea97797459e8f9bb64718c29. --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 554b0943..cf18fd83 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -29,7 +29,7 @@ classifiers = [ "Programming Language :: Python :: 3.12", "Programming Language :: Python :: 3.13", ] -requires-python = ">=3.13" +requires-python = ">=3.10" # Follow guidelines from https://scientific-python.org/specs/spec-0000/ dependencies = [ "numpy>=1.26", From b7b2e21672ae94f41e65ac4e1bf4dca943eba133 Mon Sep 17 00:00:00 2001 From: Luke Shaw Date: Tue, 5 Aug 2025 11:33:52 +0200 Subject: [PATCH 17/26] Force pyodide version --- pyproject.toml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/pyproject.toml b/pyproject.toml index cf18fd83..cdf2d108 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -84,6 +84,10 @@ documentation = "https://www.blosc.org/python-blosc2/python-blosc2.html" [tool.cibuildwheel] build-verbosity = 1 +[tool.cibuildwheel.pyodide] +# Build Pyodide wheels using Pyodide version 0.28.0 +pyodide-version = "0.28.0" + # Skip unsupported python versions as well as 32-bit platforms, which are not supported anymore. skip = "*-manylinux_i686 cp*-win32 *_ppc64le *_s390x *musllinux*" test-requires = "pytest" From 412dff0d053ffc556c3cbd1f7def119ebdb61305 Mon Sep 17 00:00:00 2001 From: Luke Shaw Date: Tue, 5 Aug 2025 11:56:58 +0200 Subject: [PATCH 18/26] Force numpy version --- pyproject.toml | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index cdf2d108..5a6bf8c4 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -32,7 +32,7 @@ classifiers = [ requires-python = ">=3.10" # Follow guidelines from https://scientific-python.org/specs/spec-0000/ dependencies = [ - "numpy>=1.26", + "numpy>=2.2", #"numpy>=2", "ndindex", "msgpack", @@ -84,10 +84,6 @@ documentation = "https://www.blosc.org/python-blosc2/python-blosc2.html" [tool.cibuildwheel] build-verbosity = 1 -[tool.cibuildwheel.pyodide] -# Build Pyodide wheels using Pyodide version 0.28.0 -pyodide-version = "0.28.0" - # Skip unsupported python versions as well as 32-bit platforms, which are not supported anymore. skip = "*-manylinux_i686 cp*-win32 *_ppc64le *_s390x *musllinux*" test-requires = "pytest" From 14f7d67fc76ed85c640f1769c6742c748c7ae418 Mon Sep 17 00:00:00 2001 From: Luke Shaw Date: Wed, 6 Aug 2025 09:01:14 +0200 Subject: [PATCH 19/26] Only build WASM wheels for python 3.13, force pyodide version --- .github/workflows/wasm.yml | 4 ++-- pyproject.toml | 6 +++++- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/.github/workflows/wasm.yml b/.github/workflows/wasm.yml index 3227c797..54e572fd 100644 --- a/.github/workflows/wasm.yml +++ b/.github/workflows/wasm.yml @@ -25,8 +25,8 @@ jobs: strategy: matrix: os: [ubuntu-latest] - cibw_build: ["cp3{11,12,13}-*"] - p_ver: ["3.11-3.13"] + cibw_build: ["cp3{13}-*"] + p_ver: ["3.13"] steps: - name: Checkout repo diff --git a/pyproject.toml b/pyproject.toml index 5a6bf8c4..cdf2d108 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -32,7 +32,7 @@ classifiers = [ requires-python = ">=3.10" # Follow guidelines from https://scientific-python.org/specs/spec-0000/ dependencies = [ - "numpy>=2.2", + "numpy>=1.26", #"numpy>=2", "ndindex", "msgpack", @@ -84,6 +84,10 @@ documentation = "https://www.blosc.org/python-blosc2/python-blosc2.html" [tool.cibuildwheel] build-verbosity = 1 +[tool.cibuildwheel.pyodide] +# Build Pyodide wheels using Pyodide version 0.28.0 +pyodide-version = "0.28.0" + # Skip unsupported python versions as well as 32-bit platforms, which are not supported anymore. skip = "*-manylinux_i686 cp*-win32 *_ppc64le *_s390x *musllinux*" test-requires = "pytest" From ecb87f0c5782c0f840425ee7ac83d0f5a27cfb96 Mon Sep 17 00:00:00 2001 From: Luke Shaw Date: Wed, 6 Aug 2025 09:04:31 +0200 Subject: [PATCH 20/26] Only build WASM wheels for python 3.13, force pyodide version --- .github/workflows/wasm.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/wasm.yml b/.github/workflows/wasm.yml index 54e572fd..0fee6778 100644 --- a/.github/workflows/wasm.yml +++ b/.github/workflows/wasm.yml @@ -25,7 +25,7 @@ jobs: strategy: matrix: os: [ubuntu-latest] - cibw_build: ["cp3{13}-*"] + cibw_build: ["cp313-*"] p_ver: ["3.13"] steps: From 6128a38fdcbab22c7cd5ac883657812e88be60f0 Mon Sep 17 00:00:00 2001 From: Luke Shaw Date: Wed, 6 Aug 2025 10:24:37 +0200 Subject: [PATCH 21/26] Force contiguous when an evaluation does not happen and handle special case for rebasing --- pyproject.toml | 4 ---- src/blosc2/lazyexpr.py | 18 +++++++++++++++--- tests/ndarray/test_lazyexpr.py | 14 +++++++++++++- 3 files changed, 28 insertions(+), 8 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index cdf2d108..cf18fd83 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -84,10 +84,6 @@ documentation = "https://www.blosc.org/python-blosc2/python-blosc2.html" [tool.cibuildwheel] build-verbosity = 1 -[tool.cibuildwheel.pyodide] -# Build Pyodide wheels using Pyodide version 0.28.0 -pyodide-version = "0.28.0" - # Skip unsupported python versions as well as 32-bit platforms, which are not supported anymore. skip = "*-manylinux_i686 cp*-win32 *_ppc64le *_s390x *musllinux*" test-requires = "pytest" diff --git a/src/blosc2/lazyexpr.py b/src/blosc2/lazyexpr.py index 3ab07e1e..49c61134 100644 --- a/src/blosc2/lazyexpr.py +++ b/src/blosc2/lazyexpr.py @@ -1522,7 +1522,11 @@ def slices_eval( # noqa: C901 continue if where is None: - result = ne_evaluate(expression, chunk_operands, **ne_args) + if expression == "o0": + # We don't have an actual expression, so just make sure contiguous + result = np.require(chunk_operands["o0"], requirements="C") + else: + result = ne_evaluate(expression, chunk_operands, **ne_args) else: # Apply the where condition (in result) if len(where) == 2: @@ -1935,8 +1939,8 @@ def reduce_slices( # noqa: C901 if where is None: if expression == "o0": - # We don't have an actual expression, so avoid a copy - result = chunk_operands["o0"] + # We don't have an actual expression, so avoid a copy except if have to make contiguous + result = np.require(chunk_operands["o0"], requirements="C") else: result = ne_evaluate(expression, chunk_operands, **ne_args) else: @@ -3118,6 +3122,14 @@ def _new_expr(cls, expression, operands, guess, out=None, where=None, ne_args=No new_expr.expression_tosave = expression new_expr.operands = operands_ new_expr.operands_tosave = operands + elif isinstance(new_expr, blosc2.NDArray) and len(operands) == 1: + # passed either "a" or possible "a[:10]" + expression_, operands_ = conserve_functions( + _expression, _operands, {"o0": list(operands.values())[0]} | local_vars + ) + new_expr = cls(None) + new_expr.expression = expression_ + new_expr.operands = operands_ else: # An immediate evaluation happened (e.g. all operands are numpy arrays) new_expr = cls(None) diff --git a/tests/ndarray/test_lazyexpr.py b/tests/ndarray/test_lazyexpr.py index a566c73c..88610dac 100644 --- a/tests/ndarray/test_lazyexpr.py +++ b/tests/ndarray/test_lazyexpr.py @@ -1089,7 +1089,7 @@ def test_eval_getitem2(): np.testing.assert_allclose(expr[1:, :7], nres[1:, :7]) np.testing.assert_allclose(expr[0:10:2], nres[0:10:2]) # Now relies on inefficient blosc2.ndarray.slice for non-unit steps but only per chunk (not for whole result) - np.testing.assert_allclose(expr.slice((slice(None, None, None), slice(0, 10, 2)))[:], nres[:, 0:10:2]) + np.testing.assert_allclose(expr.slice((slice(1, 2, 1), slice(0, 10, 2)))[:], nres[1:2, 0:10:2]) # Small test for broadcasting expr = test_arr + test_arr.slice(1) @@ -1123,6 +1123,18 @@ def test_eval_slice(array_fixture): np.testing.assert_allclose(res[()], nres) +def test_rebasing(array_fixture): + a1, a2, a3, a4, na1, na2, na3, na4 = array_fixture + expr = blosc2.lazyexpr("a1 + a2 - (a3 * a4)", operands={"a1": a1, "a2": a2, "a3": a3, "a4": a4}) + assert expr.expression == "(o0 + o1 - o2 * o3)" + + expr = blosc2.lazyexpr("a1") + assert expr.expression == "o0" + + expr = blosc2.lazyexpr("a1[:10]") + assert expr.expression == "o0.slice((slice(None, 10, None),))" + + # Test get_chunk method @pytest.mark.heavy def test_get_chunk(array_fixture): From c3af99b422ca20d245e834b94fd38b3b187fef47 Mon Sep 17 00:00:00 2001 From: Luke Shaw Date: Wed, 6 Aug 2025 10:30:17 +0200 Subject: [PATCH 22/26] Correct error with building py 3.11, 3.12 wasm wheels --- .github/workflows/wasm.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/wasm.yml b/.github/workflows/wasm.yml index 0fee6778..3227c797 100644 --- a/.github/workflows/wasm.yml +++ b/.github/workflows/wasm.yml @@ -25,8 +25,8 @@ jobs: strategy: matrix: os: [ubuntu-latest] - cibw_build: ["cp313-*"] - p_ver: ["3.13"] + cibw_build: ["cp3{11,12,13}-*"] + p_ver: ["3.11-3.13"] steps: - name: Checkout repo From 5a1de4ce9af822020f77ee24a5e553a84c6b63d7 Mon Sep 17 00:00:00 2001 From: Luke Shaw Date: Wed, 6 Aug 2025 11:03:44 +0200 Subject: [PATCH 23/26] Simplify logic a little --- src/blosc2/lazyexpr.py | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/src/blosc2/lazyexpr.py b/src/blosc2/lazyexpr.py index 49c61134..d6e8df43 100644 --- a/src/blosc2/lazyexpr.py +++ b/src/blosc2/lazyexpr.py @@ -1522,11 +1522,7 @@ def slices_eval( # noqa: C901 continue if where is None: - if expression == "o0": - # We don't have an actual expression, so just make sure contiguous - result = np.require(chunk_operands["o0"], requirements="C") - else: - result = ne_evaluate(expression, chunk_operands, **ne_args) + result = ne_evaluate(expression, chunk_operands, **ne_args) else: # Apply the where condition (in result) if len(where) == 2: @@ -1561,6 +1557,9 @@ def slices_eval( # noqa: C901 result = x[result] else: raise ValueError("The where condition must be a tuple with one or two elements") + # Enforce contiguity of result (necessary to fill the out array) + # but avoid copy if already contiguous + result = np.require(result, requirements="C") if out is None: shape_ = shape_slice if shape_slice is not None else shape @@ -1939,7 +1938,7 @@ def reduce_slices( # noqa: C901 if where is None: if expression == "o0": - # We don't have an actual expression, so avoid a copy except if have to make contiguous + # We don't have an actual expression, so avoid a copy except to make contiguous result = np.require(chunk_operands["o0"], requirements="C") else: result = ne_evaluate(expression, chunk_operands, **ne_args) From 80b7b24c894b676e7f0b6fbc2b6cc80c6114190e Mon Sep 17 00:00:00 2001 From: Luke Shaw Date: Fri, 8 Aug 2025 10:08:40 +0200 Subject: [PATCH 24/26] Edit to ensure slice_fast_path doesn't load too-large array --- src/blosc2/lazyexpr.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/blosc2/lazyexpr.py b/src/blosc2/lazyexpr.py index d6e8df43..a979f476 100644 --- a/src/blosc2/lazyexpr.py +++ b/src/blosc2/lazyexpr.py @@ -2092,7 +2092,9 @@ def chunked_eval( # noqa: C901 # The fast path is possible under a few conditions if getitem and (where is None or len(where) == 2) and not callable(expression): # Compute the size of operands for the fast path - shape_operands = item.newshape(shape) # shape of slice + unit_steps = np.all([s.step == 1 for s in item.raw]) + # shape of slice, if non-unit steps have to decompress full array into memory + shape_operands = item.newshape(shape) if unit_steps else shape _dtype = kwargs.get("dtype", np.float64) size_operands = math.prod(shape_operands) * len(operands) * _dtype.itemsize # Only take the fast path if the size of operands is relatively small From 1f9c10d9ae8edc43791b35405492a0f3540e6ff4 Mon Sep 17 00:00:00 2001 From: Luke Shaw Date: Fri, 8 Aug 2025 10:37:56 +0200 Subject: [PATCH 25/26] Add bench --- bench/ndarray/slice-expr-step.py | 49 ++++++++++++++++++++++++++++++++ src/blosc2/lazyexpr.py | 2 +- 2 files changed, 50 insertions(+), 1 deletion(-) create mode 100644 bench/ndarray/slice-expr-step.py diff --git a/bench/ndarray/slice-expr-step.py b/bench/ndarray/slice-expr-step.py new file mode 100644 index 00000000..01f75dca --- /dev/null +++ b/bench/ndarray/slice-expr-step.py @@ -0,0 +1,49 @@ +####################################################################### +# Copyright (c) 2019-present, Blosc Development Team +# All rights reserved. +# +# This source code is licensed under a BSD-style license (found in the +# LICENSE file in the root directory of this source tree) +####################################################################### + +# Benchmark for computing a slice with non-unit steps of a expression in a ND array. + +import blosc2 +import numpy as np +import matplotlib.pyplot as plt +from memory_profiler import profile, memory_usage + +N = 50_000 +LARGE_SLICE = False +ndim = 2 +shape = (N, ) * ndim +a = blosc2.linspace(start=0, stop=np.prod(shape), num=np.prod(shape), dtype=np.float64, shape=shape) +_slice = (slice(0, N, 2),) if LARGE_SLICE else (slice(0, N, N//4),) +expr = 2 * a ** 2 + +@profile +def _slice_(): + res1 = expr.slice(_slice) + print(f'Result of slice occupies {res1.schunk.cbytes / 1024**2:.2f} MiB') + return res1 + +@profile +def _gitem(): + res2 = expr[_slice] + print(f'Result of _getitem_ occupies {np.prod(res2.shape) * res2.itemsize / 1024**2:.2f} MiB') + return res2 + +interval = 0.001 +offset = 0 +for f in [_slice_, _gitem]: + mem = memory_usage((f,), interval=interval) + times = offset + interval * np.arange(len(mem)) + offset = times[-1] + plt.plot(times, mem) + +plt.xlabel('Time (s)') +plt.ylabel('Memory usage (MiB)') +lab = 'LARGE' if LARGE_SLICE else 'SMALL' +plt.title(f'{lab} slice w/steps, Linux Blosc2 {blosc2.__version__}') +plt.legend([f'expr.slice({_slice}', f'expr[{_slice}]']) +plt.savefig(f'sliceexpr_{lab}_Blosc{blosc2.__version__.replace('.','_')}.png', format="png") diff --git a/src/blosc2/lazyexpr.py b/src/blosc2/lazyexpr.py index a979f476..5d838244 100644 --- a/src/blosc2/lazyexpr.py +++ b/src/blosc2/lazyexpr.py @@ -2092,7 +2092,7 @@ def chunked_eval( # noqa: C901 # The fast path is possible under a few conditions if getitem and (where is None or len(where) == 2) and not callable(expression): # Compute the size of operands for the fast path - unit_steps = np.all([s.step == 1 for s in item.raw]) + unit_steps = np.all([s.step == 1 for s in item.raw if isinstance(s, slice)]) # shape of slice, if non-unit steps have to decompress full array into memory shape_operands = item.newshape(shape) if unit_steps else shape _dtype = kwargs.get("dtype", np.float64) From db2d38f839b4b53db2c686cc87dc6c9145dc1a2f Mon Sep 17 00:00:00 2001 From: Luke Shaw Date: Fri, 8 Aug 2025 10:46:56 +0200 Subject: [PATCH 26/26] Fixed failing test --- tests/ndarray/test_lazyexpr.py | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/tests/ndarray/test_lazyexpr.py b/tests/ndarray/test_lazyexpr.py index 05fc2957..614c604b 100644 --- a/tests/ndarray/test_lazyexpr.py +++ b/tests/ndarray/test_lazyexpr.py @@ -1089,9 +1089,7 @@ def test_eval_getitem2(): np.testing.assert_allclose(expr[1:, :7], nres[1:, :7]) np.testing.assert_allclose(expr[0:10:2], nres[0:10:2]) # Now relies on inefficient blosc2.ndarray.slice for non-unit steps but only per chunk (not for whole result) - np.testing.assert_allclose(expr.slice((slice(1, 2, 1), slice(0, 10, 2)))[:], nres[1:2, 0:10:2]) - # Now relies on inefficient blosc2.ndarray.slice for non-unit steps but only per chunk (not for whole result) - np.testing.assert_allclose(expr.slice((slice(1, 2, 1), slice(0, 10, 2)))[:], nres[:, 0:10:2]) + np.testing.assert_allclose(expr.slice((slice(None, None, None), slice(0, 10, 2)))[:], nres[:, 0:10:2]) # Small test for broadcasting expr = test_arr + test_arr.slice(1) @@ -1100,8 +1098,7 @@ def test_eval_getitem2(): np.testing.assert_allclose(expr[1:, :7], nres[1:, :7]) np.testing.assert_allclose(expr[:, 0:10:2], nres[:, 0:10:2]) # Now relies on inefficient blosc2.ndarray.slice for non-unit steps but only per chunk (not for whole result) - # Now relies on inefficient blosc2.ndarray.slice for non-unit steps but only per chunk (not for whole result) - np.testing.assert_allclose(expr.slice((slice(1, 2, 1), slice(0, 10, 2)))[:], nres[:, 0:10:2]) + np.testing.assert_allclose(expr.slice((slice(None, None, None), slice(0, 10, 2)))[:], nres[:, 0:10:2]) # Test lazyexpr's slice method