From 98430bc2bad04fd700a48476f12d5b1618874cee Mon Sep 17 00:00:00 2001
From: Luke Shaw <luke.shaw@ironarray.io>
Date: Wed, 23 Jul 2025 10:13:50 +0200
Subject: [PATCH 01/26] Improving step handling

---
 src/blosc2/lazyexpr.py           | 35 ++++++++++++++------------------
 tests/ndarray/test_reductions.py | 14 +++++++++++++
 2 files changed, 29 insertions(+), 20 deletions(-)

diff --git a/src/blosc2/lazyexpr.py b/src/blosc2/lazyexpr.py
index cf7e2976..0b258fa0 100644
--- a/src/blosc2/lazyexpr.py
+++ b/src/blosc2/lazyexpr.py
@@ -1404,26 +1404,19 @@ def slices_eval(  # noqa: C901
     # keep orig_slice
     _slice = _slice.raw
     orig_slice = _slice
-    full_slice = ()  # by default the full_slice is the whole array
-    final_slice = ()  # by default the final_slice is the whole array
 
     # Compute the shape and chunks of the output array, including broadcasting
     shape = compute_broadcast_shape(operands.values())
     if out is None:
         if _slice != ():
             # Check whether _slice contains an integer, or any step that are not None or 1
-            if any(
-                (isinstance(s, int)) or (isinstance(s, slice) and s.step not in (None, 1)) for s in _slice
-            ):
+            if any((isinstance(s, int)) for s in _slice):
                 need_final_slice = True
             _slice = tuple(slice(i, i + 1) if isinstance(i, int) else i for i in _slice)
-            full_slice = tuple(
-                slice(s.start or 0, s.stop or shape[i], 1) for i, s in enumerate(_slice)
-            )  # get rid of non-unit steps
             # shape_slice in general not equal to final shape:
-            # dummy dims (due to ints) or non-unit steps will be dealt with by taking final_slice
-            shape_slice = ndindex.ndindex(full_slice).newshape(shape)
-            final_slice = ndindex.ndindex(orig_slice).as_subindex(full_slice).raw
+            # dummy dims (due to ints) will be dealt with by taking final_slice
+            shape_slice = ndindex.ndindex(_slice).newshape(shape)
+            mask_slice = np.bool([isinstance(i, int) for i in orig_slice])
     else:
         # # out should always have shape of full array
         # if shape is not None and shape != out.shape:
@@ -1469,7 +1462,7 @@ def slices_eval(  # noqa: C901
         # get intersection of chunk and target
         if _slice != ():
             cslice = tuple(
-                slice(max(s1.start, s2.start), min(s1.stop, s2.stop))
+                step_handler(s1.start, s2.start, s1.stop, s2.stop, s2.step)
                 for s1, s2 in zip(chunk_slice.raw, _slice, strict=True)
             )
         else:
@@ -1479,12 +1472,13 @@ def slices_eval(  # noqa: C901
         len_chunk = math.prod(cslice_shape)
         # get local index of part of out that is to be updated
         cslice_subidx = (
-            ndindex.ndindex(cslice).as_subindex(full_slice).raw
-        )  # in the case full_slice=(), just gives cslice
+            ndindex.ndindex(cslice).as_subindex(_slice).raw
+        )  # in the case _slice=(), just gives cslice
 
         # Get the starts and stops for the slice
         starts = [s.start if s.start is not None else 0 for s in cslice]
         stops = [s.stop if s.stop is not None else sh for s, sh in zip(cslice, cslice_shape, strict=True)]
+        stepper = tuple(slice(None, None, s.step) for s in cslice)
 
         # Get the slice of each operand
         for key, value in operands.items():
@@ -1506,6 +1500,7 @@ def slices_eval(  # noqa: C901
                 and isinstance(value, blosc2.NDArray)
             ):
                 value.get_slice_numpy(chunk_operands[key], (starts, stops))
+                chunk_operands[key] = chunk_operands[key][stepper]
                 continue
 
             chunk_operands[key] = value[cslice]
@@ -1614,15 +1609,13 @@ def slices_eval(  # noqa: C901
             out.resize((lenout,))
 
     else:  # Need to take final_slice since filled up array according to slice_ for each chunk
-        if final_slice != ():
+        if need_final_slice:  # only called if out was None
             if isinstance(out, np.ndarray):
-                if need_final_slice:  # only called if out was None
-                    out = out[final_slice]
+                out = np.squeeze(out, np.where(mask_slice))
             elif isinstance(out, blosc2.NDArray):
                 # It *seems* better to choose an automatic chunks and blocks for the output array
                 # out = out.slice(_slice, chunks=out.chunks, blocks=out.blocks)
-                if need_final_slice:  # only called if out was None
-                    out = out.slice(final_slice)
+                out = out.squeeze(mask_slice)
             else:
                 raise ValueError("The output array is not a NumPy array or a NDArray")
 
@@ -1873,7 +1866,7 @@ def reduce_slices(  # noqa: C901
                 for s1, s2 in zip(cslice, _slice, strict=True)
             )
         chunks_ = tuple(s.stop - s.start for s in cslice)
-
+        fast_path = False if any((s.step != 1 or s.step is not None) for s in cslice) else fast_path
         if _slice == () and fast_path:
             # Fast path
             full_chunk = chunks_ == chunks
@@ -1884,6 +1877,7 @@ def reduce_slices(  # noqa: C901
             # Get the starts and stops for the slice
             starts = [s.start if s.start is not None else 0 for s in cslice]
             stops = [s.stop if s.stop is not None else sh for s, sh in zip(cslice, chunks_, strict=True)]
+            stepper = tuple(slice(None, None, s.step) for s in cslice)
             # Get the slice of each operand
             for key, value in operands.items():
                 if np.isscalar(value):
@@ -1904,6 +1898,7 @@ def reduce_slices(  # noqa: C901
                     and isinstance(value, blosc2.NDArray)
                 ):
                     value.get_slice_numpy(chunk_operands[key], (starts, stops))
+                    chunk_operands[key] = chunk_operands[key][stepper]
                     continue
                 chunk_operands[key] = value[cslice]
 
diff --git a/tests/ndarray/test_reductions.py b/tests/ndarray/test_reductions.py
index fa39392b..ae28bb97 100644
--- a/tests/ndarray/test_reductions.py
+++ b/tests/ndarray/test_reductions.py
@@ -207,6 +207,20 @@ def test_reduce_slice(reduce_op):
     np.testing.assert_allclose(res, nres, atol=tol, rtol=tol)
 
 
+# TODO: Test cache path for reductions with non-unit steps in slice
+def test_reduceslice_cache():
+    shape = (8, 12, 5)
+    na = np.linspace(0, 1, num=np.prod(shape)).reshape(shape)
+    a = blosc2.asarray(na, chunks=(2, 5, 1))
+    tol = 1e-6 if na.dtype == np.float32 else 1e-15
+
+    # Test reductions with slices and strides
+    _slice = (slice(1, 2, 1), slice(1, 9, 2))
+    res = blosc2.sum(2 * a + 0.2 * a, item=_slice, axis=1)
+    nres = na[_slice] * 2.2
+    np.testing.assert_allclose(res, nres, atol=tol, rtol=tol)
+
+
 # Test fast path for reductions
 @pytest.mark.parametrize(
     ("chunks", "blocks"),

From 24a26a8a1dfbf497a37425d1cb47616c35ec0b23 Mon Sep 17 00:00:00 2001
From: Luke Shaw <lshaw8317@gmail.com>
Date: Wed, 23 Jul 2025 13:30:57 +0200
Subject: [PATCH 02/26] Add fancy index test and improve slice handling for
 reductions

---
 src/blosc2/lazyexpr.py           | 60 ++++++++++++++++++--------------
 tests/ndarray/test_ndarray.py    | 29 +++++++++++++++
 tests/ndarray/test_reductions.py | 19 +++++-----
 3 files changed, 70 insertions(+), 38 deletions(-)

diff --git a/src/blosc2/lazyexpr.py b/src/blosc2/lazyexpr.py
index 0b258fa0..9cdb9a87 100644
--- a/src/blosc2/lazyexpr.py
+++ b/src/blosc2/lazyexpr.py
@@ -1412,7 +1412,7 @@ def slices_eval(  # noqa: C901
             # Check whether _slice contains an integer, or any step that are not None or 1
             if any((isinstance(s, int)) for s in _slice):
                 need_final_slice = True
-            _slice = tuple(slice(i, i + 1) if isinstance(i, int) else i for i in _slice)
+            _slice = tuple(slice(i, i + 1, 1) if isinstance(i, int) else i for i in _slice)
             # shape_slice in general not equal to final shape:
             # dummy dims (due to ints) will be dealt with by taking final_slice
             shape_slice = ndindex.ndindex(_slice).newshape(shape)
@@ -1461,10 +1461,7 @@ def slices_eval(  # noqa: C901
     for nchunk, chunk_slice in enumerate(intersecting_chunks):
         # get intersection of chunk and target
         if _slice != ():
-            cslice = tuple(
-                step_handler(s1.start, s2.start, s1.stop, s2.stop, s2.step)
-                for s1, s2 in zip(chunk_slice.raw, _slice, strict=True)
-            )
+            cslice = step_handler(chunk_slice.raw, _slice)
         else:
             cslice = chunk_slice.raw
 
@@ -1611,7 +1608,7 @@ def slices_eval(  # noqa: C901
     else:  # Need to take final_slice since filled up array according to slice_ for each chunk
         if need_final_slice:  # only called if out was None
             if isinstance(out, np.ndarray):
-                out = np.squeeze(out, np.where(mask_slice))
+                out = np.squeeze(out, np.where(mask_slice)[0])
             elif isinstance(out, blosc2.NDArray):
                 # It *seems* better to choose an automatic chunks and blocks for the output array
                 # out = out.slice(_slice, chunks=out.chunks, blocks=out.blocks)
@@ -1728,17 +1725,22 @@ def infer_reduction_dtype(dtype, operation):
         raise ValueError(f"Unsupported operation: {operation}")
 
 
-def step_handler(s1start, s2start, s1stop, s2stop, s2step):
-    # assume s1step = 1
-    newstart = max(s1start, s2start)
-    newstop = min(s1stop, s2stop)
-    rem = (newstart - s2start) % s2step
-    if rem != 0:  # only pass through here if s2step is not 1
-        newstart += s2step - rem
-        # true_stop = start + n*step + 1 -> stop = start + n * step + 1 + residual
-        # so n = (stop - start - 1) // step
-        newstop = newstart + (newstop - newstart - 1) // s2step * s2step + 1
-    return slice(newstart, newstop, s2step)
+def step_handler(cslice, _slice):
+    out = ()
+    for s1, s2 in zip(cslice, _slice, strict=True):
+        s1start, s1stop = s1.start, s1.stop
+        s2start, s2stop, s2step = s2.start, s2.stop, s2.step
+        # assume s1step = 1
+        newstart = max(s1start, s2start)
+        newstop = min(s1stop, s2stop)
+        rem = (newstart - s2start) % s2step
+        if rem != 0:  # only pass through here if s2step is not 1
+            newstart += s2step - rem
+            # true_stop = start + n*step + 1 -> stop = start + n * step + 1 + residual
+            # so n = (stop - start - 1) // step
+            newstop = newstart + (newstop - newstart - 1) // s2step * s2step + 1
+        out += (slice(newstart, newstop, s2step),)
+    return out
 
 
 def reduce_slices(  # noqa: C901
@@ -1792,21 +1794,27 @@ def reduce_slices(  # noqa: C901
 
     _slice = _slice.raw
     shape_slice = shape
-    full_slice = ()  # by default the full_slice is the whole array
+    mask_slice = np.bool([isinstance(i, int) for i in _slice])
     if out is None and _slice != ():
+        _slice = tuple(slice(i, i + 1, 1) if isinstance(i, int) else i for i in _slice)
         shape_slice = ndindex.ndindex(_slice).newshape(shape)
-        full_slice = _slice
+        # shape_slice in general not equal to final shape:
+        # dummy dims (due to ints) will be dealt with by taking final_slice
 
     # after slicing, we reduce to calculate shape of output
     if axis is None:
         axis = tuple(range(len(shape_slice)))
     elif not isinstance(axis, tuple):
         axis = (axis,)
-    axis = tuple(a if a >= 0 else a + len(shape_slice) for a in axis)
+    axis = np.array([a if a >= 0 else a + len(shape_slice) for a in axis])
+    if np.any(mask_slice):
+        axis = tuple(axis + np.cumsum(mask_slice)[axis])  # axis now refers to new shape with dummy dims
+        reduce_args["axis"] = axis
     if keepdims:
         reduced_shape = tuple(1 if i in axis else s for i, s in enumerate(shape_slice))
     else:
         reduced_shape = tuple(s for i, s in enumerate(shape_slice) if i not in axis)
+        mask_slice = mask_slice[[i for i in range(len(mask_slice)) if i not in axis]]
 
     if out is not None and reduced_shape != out.shape:
         raise ValueError("Provided output shape does not match the reduced shape.")
@@ -1861,10 +1869,7 @@ def reduce_slices(  # noqa: C901
         # Check whether current cslice intersects with _slice
         if cslice != () and _slice != ():
             # get intersection of chunk and target
-            cslice = tuple(
-                step_handler(s1.start, s2.start, s1.stop, s2.stop, s2.step)
-                for s1, s2 in zip(cslice, _slice, strict=True)
-            )
+            cslice = step_handler(cslice, _slice)
         chunks_ = tuple(s.stop - s.start for s in cslice)
         fast_path = False if any((s.step != 1 or s.step is not None) for s in cslice) else fast_path
         if _slice == () and fast_path:
@@ -1903,9 +1908,7 @@ def reduce_slices(  # noqa: C901
                 chunk_operands[key] = value[cslice]
 
         # get local index of part of out that is to be updated
-        cslice_subidx = (
-            ndindex.ndindex(cslice).as_subindex(full_slice).raw
-        )  # if full_slice is (), just gives cslice
+        cslice_subidx = ndindex.ndindex(cslice).as_subindex(_slice).raw  # if _slice is (), just gives cslice
         if keepdims:
             reduced_slice = tuple(slice(None) if i in axis else sl for i, sl in enumerate(cslice_subidx))
         else:
@@ -1984,6 +1987,9 @@ def reduce_slices(  # noqa: C901
             dtype = np.float64
         out = convert_none_out(dtype, reduce_op, reduced_shape)
 
+    final_mask = tuple(np.where(mask_slice)[0])
+    if np.any(mask_slice):  # remove dummy dims
+        out = np.squeeze(out, axis=final_mask)
     # Check if the output array needs to be converted into a blosc2.NDArray
     if kwargs != {} and not np.isscalar(out):
         out = blosc2.asarray(out, **kwargs)
diff --git a/tests/ndarray/test_ndarray.py b/tests/ndarray/test_ndarray.py
index f583c5fc..d58aa0eb 100644
--- a/tests/ndarray/test_ndarray.py
+++ b/tests/ndarray/test_ndarray.py
@@ -358,3 +358,32 @@ def test_findex():
     # b3 = arr[0, :, [0, 1]]
     # n3 = nparr[0, :, [0, 1]]
     # np.testing.assert_allclose(b3, n3)
+
+
+@pytest.mark.parametrize(
+    "arr",
+    [
+        np.random.default_rng().random((2, 1000, 10, 8, 3)).astype(np.float32),
+        blosc2.asarray(np.random.default_rng().random((2, 1000, 10, 8, 3)).astype(np.float32)),
+    ],
+)
+def test_strided_output(arr):
+    def fancy_strided_output(inputs, output_indices, stride=1):
+        b, t, *f = inputs.shape
+        oi = np.asarray(output_indices, dtype=np.int32)
+
+        start = np.amax(output_indices)
+        win_starts = np.arange(start, t, stride, dtype=np.int32)
+        rel_idx = win_starts[:, None] - oi[None]
+        rel_idx[rel_idx < 0] = 0
+
+        w, o = rel_idx.shape
+        batch_idx = np.arange(b, dtype=np.int32)[:, None, None]
+        batch_idx = np.broadcast_to(batch_idx, (b, w, o))
+        time_idx = np.broadcast_to(rel_idx, (b, w, o))
+
+        return inputs[batch_idx, time_idx]
+
+    output_indices = [800, 74, 671, 132, 818]
+    out = fancy_strided_output(arr, output_indices, stride=16)
+    assert out.shape == (2, 12, 5, 10, 8, 3)
diff --git a/tests/ndarray/test_reductions.py b/tests/ndarray/test_reductions.py
index ae28bb97..10aaad3f 100644
--- a/tests/ndarray/test_reductions.py
+++ b/tests/ndarray/test_reductions.py
@@ -206,18 +206,15 @@ def test_reduce_slice(reduce_op):
     nres = getattr(na[_slice], reduce_op)(axis=1)
     np.testing.assert_allclose(res, nres, atol=tol, rtol=tol)
 
+    # Test reductions with ints
+    _slice = (0, slice(1, 9, 1))
+    res = getattr(a, reduce_op)(item=_slice, axis=1)
+    nres = getattr(na[_slice], reduce_op)(axis=1)
+    np.testing.assert_allclose(res, nres, atol=tol, rtol=tol)
 
-# TODO: Test cache path for reductions with non-unit steps in slice
-def test_reduceslice_cache():
-    shape = (8, 12, 5)
-    na = np.linspace(0, 1, num=np.prod(shape)).reshape(shape)
-    a = blosc2.asarray(na, chunks=(2, 5, 1))
-    tol = 1e-6 if na.dtype == np.float32 else 1e-15
-
-    # Test reductions with slices and strides
-    _slice = (slice(1, 2, 1), slice(1, 9, 2))
-    res = blosc2.sum(2 * a + 0.2 * a, item=_slice, axis=1)
-    nres = na[_slice] * 2.2
+    _slice = (0, slice(1, 9, 2))
+    res = getattr(a, reduce_op)(item=_slice, axis=1)
+    nres = getattr(na[_slice], reduce_op)(axis=1)
     np.testing.assert_allclose(res, nres, atol=tol, rtol=tol)
 
 

From 896ccdffcf7889e218b02db0a222ae185d3db116 Mon Sep 17 00:00:00 2001
From: Luke Shaw <lshaw8317@gmail.com>
Date: Wed, 23 Jul 2025 13:45:18 +0200
Subject: [PATCH 03/26] Can't use cache for non-unit steps

---
 src/blosc2/lazyexpr.py         | 11 +++++------
 tests/ndarray/test_lazyexpr.py |  4 ++--
 2 files changed, 7 insertions(+), 8 deletions(-)

diff --git a/src/blosc2/lazyexpr.py b/src/blosc2/lazyexpr.py
index 9cdb9a87..6fb9e1a2 100644
--- a/src/blosc2/lazyexpr.py
+++ b/src/blosc2/lazyexpr.py
@@ -1475,7 +1475,7 @@ def slices_eval(  # noqa: C901
         # Get the starts and stops for the slice
         starts = [s.start if s.start is not None else 0 for s in cslice]
         stops = [s.stop if s.stop is not None else sh for s, sh in zip(cslice, cslice_shape, strict=True)]
-        stepper = tuple(slice(None, None, s.step) for s in cslice)
+        unit_steps = np.all([s.step == 1 for s in cslice])
 
         # Get the slice of each operand
         for key, value in operands.items():
@@ -1495,9 +1495,9 @@ def slices_eval(  # noqa: C901
                 key in chunk_operands
                 and cslice_shape == chunk_operands[key].shape
                 and isinstance(value, blosc2.NDArray)
+                and unit_steps
             ):
                 value.get_slice_numpy(chunk_operands[key], (starts, stops))
-                chunk_operands[key] = chunk_operands[key][stepper]
                 continue
 
             chunk_operands[key] = value[cslice]
@@ -1871,8 +1871,8 @@ def reduce_slices(  # noqa: C901
             # get intersection of chunk and target
             cslice = step_handler(cslice, _slice)
         chunks_ = tuple(s.stop - s.start for s in cslice)
-        fast_path = False if any((s.step != 1 or s.step is not None) for s in cslice) else fast_path
-        if _slice == () and fast_path:
+        unit_steps = np.all([s.step == 1 for s in cslice])
+        if _slice == () and fast_path and unit_steps:
             # Fast path
             full_chunk = chunks_ == chunks
             fill_chunk_operands(
@@ -1882,7 +1882,6 @@ def reduce_slices(  # noqa: C901
             # Get the starts and stops for the slice
             starts = [s.start if s.start is not None else 0 for s in cslice]
             stops = [s.stop if s.stop is not None else sh for s, sh in zip(cslice, chunks_, strict=True)]
-            stepper = tuple(slice(None, None, s.step) for s in cslice)
             # Get the slice of each operand
             for key, value in operands.items():
                 if np.isscalar(value):
@@ -1901,9 +1900,9 @@ def reduce_slices(  # noqa: C901
                     key in chunk_operands
                     and chunks_ == chunk_operands[key].shape
                     and isinstance(value, blosc2.NDArray)
+                    and unit_steps
                 ):
                     value.get_slice_numpy(chunk_operands[key], (starts, stops))
-                    chunk_operands[key] = chunk_operands[key][stepper]
                     continue
                 chunk_operands[key] = value[cslice]
 
diff --git a/tests/ndarray/test_lazyexpr.py b/tests/ndarray/test_lazyexpr.py
index fb6e8ee0..4475df40 100644
--- a/tests/ndarray/test_lazyexpr.py
+++ b/tests/ndarray/test_lazyexpr.py
@@ -1088,7 +1088,7 @@ def test_eval_getitem2():
     np.testing.assert_allclose(expr[0], nres[0])
     np.testing.assert_allclose(expr[1:, :7], nres[1:, :7])
     np.testing.assert_allclose(expr[0:10:2], nres[0:10:2])
-    # This works, but it is not very efficient since it relies on blosc2.ndarray.slice for non-unit steps
+    # Now relies on inefficient blosc2.ndarray.slice for non-unit steps but only per chunk (not for whole result)
     np.testing.assert_allclose(expr.slice((slice(None, None, None), slice(0, 10, 2)))[:], nres[:, 0:10:2])
 
     # Small test for broadcasting
@@ -1097,7 +1097,7 @@ def test_eval_getitem2():
     np.testing.assert_allclose(expr[0], nres[0])
     np.testing.assert_allclose(expr[1:, :7], nres[1:, :7])
     np.testing.assert_allclose(expr[:, 0:10:2], nres[:, 0:10:2])
-    # This works, but it is not very efficient since it relies on blosc2.ndarray.slice for non-unit steps
+    # Now relies on inefficient blosc2.ndarray.slice for non-unit steps but only per chunk (not for whole result)
     np.testing.assert_allclose(expr.slice((slice(None, None, None), slice(0, 10, 2)))[:], nres[:, 0:10:2])
 
 

From 4b95955d04d02b008f4e598d874df06262225b4b Mon Sep 17 00:00:00 2001
From: Luke Shaw <luke.shaw@ironarray.io>
Date: Wed, 23 Jul 2025 10:13:50 +0200
Subject: [PATCH 04/26] Improving step handling

---
 src/blosc2/lazyexpr.py           | 35 ++++++++++++++------------------
 tests/ndarray/test_reductions.py | 14 +++++++++++++
 2 files changed, 29 insertions(+), 20 deletions(-)

diff --git a/src/blosc2/lazyexpr.py b/src/blosc2/lazyexpr.py
index 872b06d9..bf5acc0b 100644
--- a/src/blosc2/lazyexpr.py
+++ b/src/blosc2/lazyexpr.py
@@ -1412,26 +1412,19 @@ def slices_eval(  # noqa: C901
     # keep orig_slice
     _slice = _slice.raw
     orig_slice = _slice
-    full_slice = ()  # by default the full_slice is the whole array
-    final_slice = ()  # by default the final_slice is the whole array
 
     # Compute the shape and chunks of the output array, including broadcasting
     shape = compute_broadcast_shape(operands.values())
     if out is None:
         if _slice != ():
             # Check whether _slice contains an integer, or any step that are not None or 1
-            if any(
-                (isinstance(s, int)) or (isinstance(s, slice) and s.step not in (None, 1)) for s in _slice
-            ):
+            if any((isinstance(s, int)) for s in _slice):
                 need_final_slice = True
             _slice = tuple(slice(i, i + 1) if isinstance(i, int) else i for i in _slice)
-            full_slice = tuple(
-                slice(s.start or 0, s.stop or shape[i], 1) for i, s in enumerate(_slice)
-            )  # get rid of non-unit steps
             # shape_slice in general not equal to final shape:
-            # dummy dims (due to ints) or non-unit steps will be dealt with by taking final_slice
-            shape_slice = ndindex.ndindex(full_slice).newshape(shape)
-            final_slice = ndindex.ndindex(orig_slice).as_subindex(full_slice).raw
+            # dummy dims (due to ints) will be dealt with by taking final_slice
+            shape_slice = ndindex.ndindex(_slice).newshape(shape)
+            mask_slice = np.bool([isinstance(i, int) for i in orig_slice])
     else:
         # # out should always have shape of full array
         # if shape is not None and shape != out.shape:
@@ -1477,7 +1470,7 @@ def slices_eval(  # noqa: C901
         # get intersection of chunk and target
         if _slice != ():
             cslice = tuple(
-                slice(max(s1.start, s2.start), min(s1.stop, s2.stop))
+                step_handler(s1.start, s2.start, s1.stop, s2.stop, s2.step)
                 for s1, s2 in zip(chunk_slice.raw, _slice, strict=True)
             )
         else:
@@ -1487,12 +1480,13 @@ def slices_eval(  # noqa: C901
         len_chunk = math.prod(cslice_shape)
         # get local index of part of out that is to be updated
         cslice_subidx = (
-            ndindex.ndindex(cslice).as_subindex(full_slice).raw
-        )  # in the case full_slice=(), just gives cslice
+            ndindex.ndindex(cslice).as_subindex(_slice).raw
+        )  # in the case _slice=(), just gives cslice
 
         # Get the starts and stops for the slice
         starts = [s.start if s.start is not None else 0 for s in cslice]
         stops = [s.stop if s.stop is not None else sh for s, sh in zip(cslice, cslice_shape, strict=True)]
+        stepper = tuple(slice(None, None, s.step) for s in cslice)
 
         # Get the slice of each operand
         for key, value in operands.items():
@@ -1514,6 +1508,7 @@ def slices_eval(  # noqa: C901
                 and isinstance(value, blosc2.NDArray)
             ):
                 value.get_slice_numpy(chunk_operands[key], (starts, stops))
+                chunk_operands[key] = chunk_operands[key][stepper]
                 continue
 
             chunk_operands[key] = value[cslice]
@@ -1622,15 +1617,13 @@ def slices_eval(  # noqa: C901
             out.resize((lenout,))
 
     else:  # Need to take final_slice since filled up array according to slice_ for each chunk
-        if final_slice != ():
+        if need_final_slice:  # only called if out was None
             if isinstance(out, np.ndarray):
-                if need_final_slice:  # only called if out was None
-                    out = out[final_slice]
+                out = np.squeeze(out, np.where(mask_slice))
             elif isinstance(out, blosc2.NDArray):
                 # It *seems* better to choose an automatic chunks and blocks for the output array
                 # out = out.slice(_slice, chunks=out.chunks, blocks=out.blocks)
-                if need_final_slice:  # only called if out was None
-                    out = out.slice(final_slice)
+                out = out.squeeze(mask_slice)
             else:
                 raise ValueError("The output array is not a NumPy array or a NDArray")
 
@@ -1881,7 +1874,7 @@ def reduce_slices(  # noqa: C901
                 for s1, s2 in zip(cslice, _slice, strict=True)
             )
         chunks_ = tuple(s.stop - s.start for s in cslice)
-
+        fast_path = False if any((s.step != 1 or s.step is not None) for s in cslice) else fast_path
         if _slice == () and fast_path:
             # Fast path
             full_chunk = chunks_ == chunks
@@ -1892,6 +1885,7 @@ def reduce_slices(  # noqa: C901
             # Get the starts and stops for the slice
             starts = [s.start if s.start is not None else 0 for s in cslice]
             stops = [s.stop if s.stop is not None else sh for s, sh in zip(cslice, chunks_, strict=True)]
+            stepper = tuple(slice(None, None, s.step) for s in cslice)
             # Get the slice of each operand
             for key, value in operands.items():
                 if np.isscalar(value):
@@ -1912,6 +1906,7 @@ def reduce_slices(  # noqa: C901
                     and isinstance(value, blosc2.NDArray)
                 ):
                     value.get_slice_numpy(chunk_operands[key], (starts, stops))
+                    chunk_operands[key] = chunk_operands[key][stepper]
                     continue
                 chunk_operands[key] = value[cslice]
 
diff --git a/tests/ndarray/test_reductions.py b/tests/ndarray/test_reductions.py
index fa39392b..ae28bb97 100644
--- a/tests/ndarray/test_reductions.py
+++ b/tests/ndarray/test_reductions.py
@@ -207,6 +207,20 @@ def test_reduce_slice(reduce_op):
     np.testing.assert_allclose(res, nres, atol=tol, rtol=tol)
 
 
+# TODO: Test cache path for reductions with non-unit steps in slice
+def test_reduceslice_cache():
+    shape = (8, 12, 5)
+    na = np.linspace(0, 1, num=np.prod(shape)).reshape(shape)
+    a = blosc2.asarray(na, chunks=(2, 5, 1))
+    tol = 1e-6 if na.dtype == np.float32 else 1e-15
+
+    # Test reductions with slices and strides
+    _slice = (slice(1, 2, 1), slice(1, 9, 2))
+    res = blosc2.sum(2 * a + 0.2 * a, item=_slice, axis=1)
+    nres = na[_slice] * 2.2
+    np.testing.assert_allclose(res, nres, atol=tol, rtol=tol)
+
+
 # Test fast path for reductions
 @pytest.mark.parametrize(
     ("chunks", "blocks"),

From 016ce34d26e589faa9b800d6dc3444a87b03843d Mon Sep 17 00:00:00 2001
From: Luke Shaw <lshaw8317@gmail.com>
Date: Wed, 23 Jul 2025 13:30:57 +0200
Subject: [PATCH 05/26] Add fancy index test and improve slice handling for
 reductions

---
 src/blosc2/lazyexpr.py           | 60 ++++++++++++++++++--------------
 tests/ndarray/test_ndarray.py    | 29 +++++++++++++++
 tests/ndarray/test_reductions.py | 19 +++++-----
 3 files changed, 70 insertions(+), 38 deletions(-)

diff --git a/src/blosc2/lazyexpr.py b/src/blosc2/lazyexpr.py
index bf5acc0b..2f0def15 100644
--- a/src/blosc2/lazyexpr.py
+++ b/src/blosc2/lazyexpr.py
@@ -1420,7 +1420,7 @@ def slices_eval(  # noqa: C901
             # Check whether _slice contains an integer, or any step that are not None or 1
             if any((isinstance(s, int)) for s in _slice):
                 need_final_slice = True
-            _slice = tuple(slice(i, i + 1) if isinstance(i, int) else i for i in _slice)
+            _slice = tuple(slice(i, i + 1, 1) if isinstance(i, int) else i for i in _slice)
             # shape_slice in general not equal to final shape:
             # dummy dims (due to ints) will be dealt with by taking final_slice
             shape_slice = ndindex.ndindex(_slice).newshape(shape)
@@ -1469,10 +1469,7 @@ def slices_eval(  # noqa: C901
     for nchunk, chunk_slice in enumerate(intersecting_chunks):
         # get intersection of chunk and target
         if _slice != ():
-            cslice = tuple(
-                step_handler(s1.start, s2.start, s1.stop, s2.stop, s2.step)
-                for s1, s2 in zip(chunk_slice.raw, _slice, strict=True)
-            )
+            cslice = step_handler(chunk_slice.raw, _slice)
         else:
             cslice = chunk_slice.raw
 
@@ -1619,7 +1616,7 @@ def slices_eval(  # noqa: C901
     else:  # Need to take final_slice since filled up array according to slice_ for each chunk
         if need_final_slice:  # only called if out was None
             if isinstance(out, np.ndarray):
-                out = np.squeeze(out, np.where(mask_slice))
+                out = np.squeeze(out, np.where(mask_slice)[0])
             elif isinstance(out, blosc2.NDArray):
                 # It *seems* better to choose an automatic chunks and blocks for the output array
                 # out = out.slice(_slice, chunks=out.chunks, blocks=out.blocks)
@@ -1736,17 +1733,22 @@ def infer_reduction_dtype(dtype, operation):
         raise ValueError(f"Unsupported operation: {operation}")
 
 
-def step_handler(s1start, s2start, s1stop, s2stop, s2step):
-    # assume s1step = 1
-    newstart = max(s1start, s2start)
-    newstop = min(s1stop, s2stop)
-    rem = (newstart - s2start) % s2step
-    if rem != 0:  # only pass through here if s2step is not 1
-        newstart += s2step - rem
-        # true_stop = start + n*step + 1 -> stop = start + n * step + 1 + residual
-        # so n = (stop - start - 1) // step
-        newstop = newstart + (newstop - newstart - 1) // s2step * s2step + 1
-    return slice(newstart, newstop, s2step)
+def step_handler(cslice, _slice):
+    out = ()
+    for s1, s2 in zip(cslice, _slice, strict=True):
+        s1start, s1stop = s1.start, s1.stop
+        s2start, s2stop, s2step = s2.start, s2.stop, s2.step
+        # assume s1step = 1
+        newstart = max(s1start, s2start)
+        newstop = min(s1stop, s2stop)
+        rem = (newstart - s2start) % s2step
+        if rem != 0:  # only pass through here if s2step is not 1
+            newstart += s2step - rem
+            # true_stop = start + n*step + 1 -> stop = start + n * step + 1 + residual
+            # so n = (stop - start - 1) // step
+            newstop = newstart + (newstop - newstart - 1) // s2step * s2step + 1
+        out += (slice(newstart, newstop, s2step),)
+    return out
 
 
 def reduce_slices(  # noqa: C901
@@ -1800,21 +1802,27 @@ def reduce_slices(  # noqa: C901
 
     _slice = _slice.raw
     shape_slice = shape
-    full_slice = ()  # by default the full_slice is the whole array
+    mask_slice = np.bool([isinstance(i, int) for i in _slice])
     if out is None and _slice != ():
+        _slice = tuple(slice(i, i + 1, 1) if isinstance(i, int) else i for i in _slice)
         shape_slice = ndindex.ndindex(_slice).newshape(shape)
-        full_slice = _slice
+        # shape_slice in general not equal to final shape:
+        # dummy dims (due to ints) will be dealt with by taking final_slice
 
     # after slicing, we reduce to calculate shape of output
     if axis is None:
         axis = tuple(range(len(shape_slice)))
     elif not isinstance(axis, tuple):
         axis = (axis,)
-    axis = tuple(a if a >= 0 else a + len(shape_slice) for a in axis)
+    axis = np.array([a if a >= 0 else a + len(shape_slice) for a in axis])
+    if np.any(mask_slice):
+        axis = tuple(axis + np.cumsum(mask_slice)[axis])  # axis now refers to new shape with dummy dims
+        reduce_args["axis"] = axis
     if keepdims:
         reduced_shape = tuple(1 if i in axis else s for i, s in enumerate(shape_slice))
     else:
         reduced_shape = tuple(s for i, s in enumerate(shape_slice) if i not in axis)
+        mask_slice = mask_slice[[i for i in range(len(mask_slice)) if i not in axis]]
 
     if out is not None and reduced_shape != out.shape:
         raise ValueError("Provided output shape does not match the reduced shape.")
@@ -1869,10 +1877,7 @@ def reduce_slices(  # noqa: C901
         # Check whether current cslice intersects with _slice
         if cslice != () and _slice != ():
             # get intersection of chunk and target
-            cslice = tuple(
-                step_handler(s1.start, s2.start, s1.stop, s2.stop, s2.step)
-                for s1, s2 in zip(cslice, _slice, strict=True)
-            )
+            cslice = step_handler(cslice, _slice)
         chunks_ = tuple(s.stop - s.start for s in cslice)
         fast_path = False if any((s.step != 1 or s.step is not None) for s in cslice) else fast_path
         if _slice == () and fast_path:
@@ -1911,9 +1916,7 @@ def reduce_slices(  # noqa: C901
                 chunk_operands[key] = value[cslice]
 
         # get local index of part of out that is to be updated
-        cslice_subidx = (
-            ndindex.ndindex(cslice).as_subindex(full_slice).raw
-        )  # if full_slice is (), just gives cslice
+        cslice_subidx = ndindex.ndindex(cslice).as_subindex(_slice).raw  # if _slice is (), just gives cslice
         if keepdims:
             reduced_slice = tuple(slice(None) if i in axis else sl for i, sl in enumerate(cslice_subidx))
         else:
@@ -1992,6 +1995,9 @@ def reduce_slices(  # noqa: C901
             dtype = np.float64
         out = convert_none_out(dtype, reduce_op, reduced_shape)
 
+    final_mask = tuple(np.where(mask_slice)[0])
+    if np.any(mask_slice):  # remove dummy dims
+        out = np.squeeze(out, axis=final_mask)
     # Check if the output array needs to be converted into a blosc2.NDArray
     if kwargs != {} and not np.isscalar(out):
         out = blosc2.asarray(out, **kwargs)
diff --git a/tests/ndarray/test_ndarray.py b/tests/ndarray/test_ndarray.py
index f583c5fc..d58aa0eb 100644
--- a/tests/ndarray/test_ndarray.py
+++ b/tests/ndarray/test_ndarray.py
@@ -358,3 +358,32 @@ def test_findex():
     # b3 = arr[0, :, [0, 1]]
     # n3 = nparr[0, :, [0, 1]]
     # np.testing.assert_allclose(b3, n3)
+
+
+@pytest.mark.parametrize(
+    "arr",
+    [
+        np.random.default_rng().random((2, 1000, 10, 8, 3)).astype(np.float32),
+        blosc2.asarray(np.random.default_rng().random((2, 1000, 10, 8, 3)).astype(np.float32)),
+    ],
+)
+def test_strided_output(arr):
+    def fancy_strided_output(inputs, output_indices, stride=1):
+        b, t, *f = inputs.shape
+        oi = np.asarray(output_indices, dtype=np.int32)
+
+        start = np.amax(output_indices)
+        win_starts = np.arange(start, t, stride, dtype=np.int32)
+        rel_idx = win_starts[:, None] - oi[None]
+        rel_idx[rel_idx < 0] = 0
+
+        w, o = rel_idx.shape
+        batch_idx = np.arange(b, dtype=np.int32)[:, None, None]
+        batch_idx = np.broadcast_to(batch_idx, (b, w, o))
+        time_idx = np.broadcast_to(rel_idx, (b, w, o))
+
+        return inputs[batch_idx, time_idx]
+
+    output_indices = [800, 74, 671, 132, 818]
+    out = fancy_strided_output(arr, output_indices, stride=16)
+    assert out.shape == (2, 12, 5, 10, 8, 3)
diff --git a/tests/ndarray/test_reductions.py b/tests/ndarray/test_reductions.py
index ae28bb97..10aaad3f 100644
--- a/tests/ndarray/test_reductions.py
+++ b/tests/ndarray/test_reductions.py
@@ -206,18 +206,15 @@ def test_reduce_slice(reduce_op):
     nres = getattr(na[_slice], reduce_op)(axis=1)
     np.testing.assert_allclose(res, nres, atol=tol, rtol=tol)
 
+    # Test reductions with ints
+    _slice = (0, slice(1, 9, 1))
+    res = getattr(a, reduce_op)(item=_slice, axis=1)
+    nres = getattr(na[_slice], reduce_op)(axis=1)
+    np.testing.assert_allclose(res, nres, atol=tol, rtol=tol)
 
-# TODO: Test cache path for reductions with non-unit steps in slice
-def test_reduceslice_cache():
-    shape = (8, 12, 5)
-    na = np.linspace(0, 1, num=np.prod(shape)).reshape(shape)
-    a = blosc2.asarray(na, chunks=(2, 5, 1))
-    tol = 1e-6 if na.dtype == np.float32 else 1e-15
-
-    # Test reductions with slices and strides
-    _slice = (slice(1, 2, 1), slice(1, 9, 2))
-    res = blosc2.sum(2 * a + 0.2 * a, item=_slice, axis=1)
-    nres = na[_slice] * 2.2
+    _slice = (0, slice(1, 9, 2))
+    res = getattr(a, reduce_op)(item=_slice, axis=1)
+    nres = getattr(na[_slice], reduce_op)(axis=1)
     np.testing.assert_allclose(res, nres, atol=tol, rtol=tol)
 
 

From c31cdac29bf556161b78f34f4b1faf455079ccaa Mon Sep 17 00:00:00 2001
From: Luke Shaw <lshaw8317@gmail.com>
Date: Wed, 23 Jul 2025 13:45:18 +0200
Subject: [PATCH 06/26] Can't use cache for non-unit steps

---
 src/blosc2/lazyexpr.py         | 11 +++++------
 tests/ndarray/test_lazyexpr.py |  4 ++--
 2 files changed, 7 insertions(+), 8 deletions(-)

diff --git a/src/blosc2/lazyexpr.py b/src/blosc2/lazyexpr.py
index 2f0def15..3ab07e1e 100644
--- a/src/blosc2/lazyexpr.py
+++ b/src/blosc2/lazyexpr.py
@@ -1483,7 +1483,7 @@ def slices_eval(  # noqa: C901
         # Get the starts and stops for the slice
         starts = [s.start if s.start is not None else 0 for s in cslice]
         stops = [s.stop if s.stop is not None else sh for s, sh in zip(cslice, cslice_shape, strict=True)]
-        stepper = tuple(slice(None, None, s.step) for s in cslice)
+        unit_steps = np.all([s.step == 1 for s in cslice])
 
         # Get the slice of each operand
         for key, value in operands.items():
@@ -1503,9 +1503,9 @@ def slices_eval(  # noqa: C901
                 key in chunk_operands
                 and cslice_shape == chunk_operands[key].shape
                 and isinstance(value, blosc2.NDArray)
+                and unit_steps
             ):
                 value.get_slice_numpy(chunk_operands[key], (starts, stops))
-                chunk_operands[key] = chunk_operands[key][stepper]
                 continue
 
             chunk_operands[key] = value[cslice]
@@ -1879,8 +1879,8 @@ def reduce_slices(  # noqa: C901
             # get intersection of chunk and target
             cslice = step_handler(cslice, _slice)
         chunks_ = tuple(s.stop - s.start for s in cslice)
-        fast_path = False if any((s.step != 1 or s.step is not None) for s in cslice) else fast_path
-        if _slice == () and fast_path:
+        unit_steps = np.all([s.step == 1 for s in cslice])
+        if _slice == () and fast_path and unit_steps:
             # Fast path
             full_chunk = chunks_ == chunks
             fill_chunk_operands(
@@ -1890,7 +1890,6 @@ def reduce_slices(  # noqa: C901
             # Get the starts and stops for the slice
             starts = [s.start if s.start is not None else 0 for s in cslice]
             stops = [s.stop if s.stop is not None else sh for s, sh in zip(cslice, chunks_, strict=True)]
-            stepper = tuple(slice(None, None, s.step) for s in cslice)
             # Get the slice of each operand
             for key, value in operands.items():
                 if np.isscalar(value):
@@ -1909,9 +1908,9 @@ def reduce_slices(  # noqa: C901
                     key in chunk_operands
                     and chunks_ == chunk_operands[key].shape
                     and isinstance(value, blosc2.NDArray)
+                    and unit_steps
                 ):
                     value.get_slice_numpy(chunk_operands[key], (starts, stops))
-                    chunk_operands[key] = chunk_operands[key][stepper]
                     continue
                 chunk_operands[key] = value[cslice]
 
diff --git a/tests/ndarray/test_lazyexpr.py b/tests/ndarray/test_lazyexpr.py
index dba9da5e..f2c0714c 100644
--- a/tests/ndarray/test_lazyexpr.py
+++ b/tests/ndarray/test_lazyexpr.py
@@ -1088,7 +1088,7 @@ def test_eval_getitem2():
     np.testing.assert_allclose(expr[0], nres[0])
     np.testing.assert_allclose(expr[1:, :7], nres[1:, :7])
     np.testing.assert_allclose(expr[0:10:2], nres[0:10:2])
-    # This works, but it is not very efficient since it relies on blosc2.ndarray.slice for non-unit steps
+    # Now relies on inefficient blosc2.ndarray.slice for non-unit steps but only per chunk (not for whole result)
     np.testing.assert_allclose(expr.slice((slice(None, None, None), slice(0, 10, 2)))[:], nres[:, 0:10:2])
 
     # Small test for broadcasting
@@ -1097,7 +1097,7 @@ def test_eval_getitem2():
     np.testing.assert_allclose(expr[0], nres[0])
     np.testing.assert_allclose(expr[1:, :7], nres[1:, :7])
     np.testing.assert_allclose(expr[:, 0:10:2], nres[:, 0:10:2])
-    # This works, but it is not very efficient since it relies on blosc2.ndarray.slice for non-unit steps
+    # Now relies on inefficient blosc2.ndarray.slice for non-unit steps but only per chunk (not for whole result)
     np.testing.assert_allclose(expr.slice((slice(None, None, None), slice(0, 10, 2)))[:], nres[:, 0:10:2])
 
 

From e2503e30b89b0b0d8f45b843bfa0ef831d367644 Mon Sep 17 00:00:00 2001
From: Francesc Alted <francesc@blosc.org>
Date: Fri, 25 Jul 2025 19:11:21 +0200
Subject: [PATCH 07/26] Give a try to latest pyodide 0.28.0

---
 pyproject.toml | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/pyproject.toml b/pyproject.toml
index cf18fd83..dad1ba68 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -128,3 +128,7 @@ ignore = [
 
 [tool.ruff.lint.extend-per-file-ignores]
 "tests/**" = ["F841"]
+
+[tool.cibuildwheel.pyodide]
+# Build Pyodide wheels using Pyodide version 0.28.0
+pyodide-version = "0.28.0"

From f38a28de89bac23bf8d802373f36e82d27217d10 Mon Sep 17 00:00:00 2001
From: Francesc Alted <francesc@blosc.org>
Date: Fri, 25 Jul 2025 19:16:10 +0200
Subject: [PATCH 08/26] Use better a restriction in numpy version

---
 pyproject.toml | 6 +-----
 1 file changed, 1 insertion(+), 5 deletions(-)

diff --git a/pyproject.toml b/pyproject.toml
index dad1ba68..5a6bf8c4 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -32,7 +32,7 @@ classifiers = [
 requires-python = ">=3.10"
 # Follow guidelines from https://scientific-python.org/specs/spec-0000/
 dependencies = [
-    "numpy>=1.26",
+    "numpy>=2.2",
     #"numpy>=2",
     "ndindex",
     "msgpack",
@@ -128,7 +128,3 @@ ignore = [
 
 [tool.ruff.lint.extend-per-file-ignores]
 "tests/**" = ["F841"]
-
-[tool.cibuildwheel.pyodide]
-# Build Pyodide wheels using Pyodide version 0.28.0
-pyodide-version = "0.28.0"

From 4eecb0219867dd0225cca541e139f3c38c939ddd Mon Sep 17 00:00:00 2001
From: Francesc Alted <francesc@blosc.org>
Date: Fri, 25 Jul 2025 19:21:44 +0200
Subject: [PATCH 09/26] No way; for now we can just use numpy 2.02 for pyodide

---
 .github/workflows/wasm.yml | 2 +-
 pyproject.toml             | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/wasm.yml b/.github/workflows/wasm.yml
index 6095e103..3227c797 100644
--- a/.github/workflows/wasm.yml
+++ b/.github/workflows/wasm.yml
@@ -46,7 +46,7 @@ jobs:
         run: pip install cibuildwheel
 
       - name: Build wheels
-        # Testing is automaticall made by cibuildwheel
+        # Testing is automatically made by cibuildwheel
         run: cibuildwheel --platform pyodide
 
       - name: Upload wheels
diff --git a/pyproject.toml b/pyproject.toml
index 5a6bf8c4..cf18fd83 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -32,7 +32,7 @@ classifiers = [
 requires-python = ">=3.10"
 # Follow guidelines from https://scientific-python.org/specs/spec-0000/
 dependencies = [
-    "numpy>=2.2",
+    "numpy>=1.26",
     #"numpy>=2",
     "ndindex",
     "msgpack",

From 62d46f9cca2e0a11bf35a67015503bdde82af047 Mon Sep 17 00:00:00 2001
From: Luke Shaw <luke.shaw@ironarray.io>
Date: Mon, 4 Aug 2025 10:13:23 +0200
Subject: [PATCH 10/26] Remove disk arrays in tests

---
 tests/ndarray/test_concatenate.py     | 2 ++
 tests/ndarray/test_lazyexpr.py        | 4 ++++
 tests/ndarray/test_lazyexpr_fields.py | 3 +++
 3 files changed, 9 insertions(+)

diff --git a/tests/ndarray/test_concatenate.py b/tests/ndarray/test_concatenate.py
index 6a2f5d24..b5b432eb 100644
--- a/tests/ndarray/test_concatenate.py
+++ b/tests/ndarray/test_concatenate.py
@@ -99,3 +99,5 @@ def test_stack(shape, dtype, axis):
         [ndarr1, ndarr2, ndarr3], axis=axis, cparams=cparams, urlpath="localfile.b2nd", mode="w"
     )
     np.testing.assert_almost_equal(result[:], nparray)
+    # Remove localfile
+    blosc2.remove_urlpath("localfile.b2nd")
diff --git a/tests/ndarray/test_lazyexpr.py b/tests/ndarray/test_lazyexpr.py
index f2c0714c..c9dbe7a5 100644
--- a/tests/ndarray/test_lazyexpr.py
+++ b/tests/ndarray/test_lazyexpr.py
@@ -1457,6 +1457,10 @@ def test_chain_persistentexpressions():
     myle4 = blosc2.open("expr4.b2nd")
     assert (myle4[:] == le4[:]).all()
 
+    # Remove files
+    for f in ["expr1.b2nd", "expr2.b2nd", "expr3.b2nd", "expr4.b2nd", "a.b2nd", "b.b2nd", "c.b2nd"]:
+        blosc2.remove_urlpath(f)
+
 
 @pytest.mark.parametrize(
     "values",
diff --git a/tests/ndarray/test_lazyexpr_fields.py b/tests/ndarray/test_lazyexpr_fields.py
index b9a9b51a..50be0076 100644
--- a/tests/ndarray/test_lazyexpr_fields.py
+++ b/tests/ndarray/test_lazyexpr_fields.py
@@ -671,3 +671,6 @@ def test_fields_indexing():
     gotitem = expr[0]  # gives an error
     np.testing.assert_array_equal(sliced[()], gotitem)
     np.testing.assert_array_equal(gotitem, temp[0])
+
+    # Remove file
+    blosc2.remove_urlpath("sa-1M.b2nd")

From 1c7392425591c6aa565e148298fba04182a896f6 Mon Sep 17 00:00:00 2001
From: Luke Shaw <luke.shaw@ironarray.io>
Date: Mon, 4 Aug 2025 12:06:33 +0200
Subject: [PATCH 11/26] New commit to use new version of pyodide

---
 tests/ndarray/test_lazyexpr.py | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/tests/ndarray/test_lazyexpr.py b/tests/ndarray/test_lazyexpr.py
index c9dbe7a5..a566c73c 100644
--- a/tests/ndarray/test_lazyexpr.py
+++ b/tests/ndarray/test_lazyexpr.py
@@ -1105,13 +1105,13 @@ def test_eval_getitem2():
 def test_eval_slice(array_fixture):
     a1, a2, a3, a4, na1, na2, na3, na4 = array_fixture
     expr = blosc2.lazyexpr("a1 + a2 - (a3 * a4)", operands={"a1": a1, "a2": a2, "a3": a3, "a4": a4})
-    nres = ne_evaluate("na1 + na2 - (na3 * na4)")[:2]
-    res = expr.slice(slice(0, 2))
+    nres = ne_evaluate("na1 + na2 - (na3 * na4)")
+    res = expr.slice(slice(0, 8, 2))
     assert isinstance(res, blosc2.ndarray.NDArray)
-    np.testing.assert_allclose(res[:], nres)
-    res = expr[:2]
+    np.testing.assert_allclose(res[:], nres[:8:2])
+    res = expr[:8:2]
     assert isinstance(res, np.ndarray)
-    np.testing.assert_allclose(res, nres)
+    np.testing.assert_allclose(res, nres[:8:2])
 
     # string lazy expressions automatically use .slice internally
     expr1 = blosc2.lazyexpr("a1 * a2", operands={"a1": a1, "a2": a2})

From 98c36936fb88d00364f148ee47f2f38d52d8fc9f Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Mon, 4 Aug 2025 11:08:05 +0000
Subject: [PATCH 12/26] Apply pre-commit fixes

---
 doc/getting_started/tutorials/04.reductions.ipynb | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/doc/getting_started/tutorials/04.reductions.ipynb b/doc/getting_started/tutorials/04.reductions.ipynb
index 09721281..545084cd 100644
--- a/doc/getting_started/tutorials/04.reductions.ipynb
+++ b/doc/getting_started/tutorials/04.reductions.ipynb
@@ -308,7 +308,7 @@
    "source": [
     "# Manual chunking\n",
     "chunks = (200, 200, 100)\n",
-    "#blocks = (100, 50, 50)  # optional, but can help performance\n",
+    "# blocks = (100, 50, 50)  # optional, but can help performance\n",
     "meas = measure_blosc2(chunks, blocks=None)\n",
     "plot_meas(meas_np, meas, chunks)"
    ]

From 369f20ae99dd4d984bad428ad0092f6ac0ffa563 Mon Sep 17 00:00:00 2001
From: Luke Shaw <luke.shaw@ironarray.io>
Date: Tue, 5 Aug 2025 11:18:10 +0200
Subject: [PATCH 13/26] Force pyodide version

---
 pyproject.toml | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/pyproject.toml b/pyproject.toml
index cf18fd83..132b4ef8 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -84,6 +84,8 @@ documentation = "https://www.blosc.org/python-blosc2/python-blosc2.html"
 
 [tool.cibuildwheel]
 build-verbosity = 1
+[tool.cibuildwheel.pyodide]
+pyodide-version = ">=0.28.0"
 # Skip unsupported python versions as well as 32-bit platforms, which are not supported anymore.
 skip = "*-manylinux_i686 cp*-win32 *_ppc64le *_s390x *musllinux*"
 test-requires = "pytest"

From 22bd5e344d4a690aea97797459e8f9bb64718c29 Mon Sep 17 00:00:00 2001
From: Luke Shaw <luke.shaw@ironarray.io>
Date: Tue, 5 Aug 2025 11:27:38 +0200
Subject: [PATCH 14/26] Force python 3.13

---
 pyproject.toml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pyproject.toml b/pyproject.toml
index 132b4ef8..4fdd280f 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -29,7 +29,7 @@ classifiers = [
     "Programming Language :: Python :: 3.12",
     "Programming Language :: Python :: 3.13",
 ]
-requires-python = ">=3.10"
+requires-python = ">=3.13"
 # Follow guidelines from https://scientific-python.org/specs/spec-0000/
 dependencies = [
     "numpy>=1.26",

From bac2309302f9daa5b2034e2876f9735b2ba2cdbc Mon Sep 17 00:00:00 2001
From: Luke Shaw <luke.shaw@ironarray.io>
Date: Tue, 5 Aug 2025 11:29:16 +0200
Subject: [PATCH 15/26] Revert "Force pyodide version"

This reverts commit 369f20ae99dd4d984bad428ad0092f6ac0ffa563.
---
 pyproject.toml | 2 --
 1 file changed, 2 deletions(-)

diff --git a/pyproject.toml b/pyproject.toml
index 4fdd280f..554b0943 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -84,8 +84,6 @@ documentation = "https://www.blosc.org/python-blosc2/python-blosc2.html"
 
 [tool.cibuildwheel]
 build-verbosity = 1
-[tool.cibuildwheel.pyodide]
-pyodide-version = ">=0.28.0"
 # Skip unsupported python versions as well as 32-bit platforms, which are not supported anymore.
 skip = "*-manylinux_i686 cp*-win32 *_ppc64le *_s390x *musllinux*"
 test-requires = "pytest"

From 28d5e415c831eaea0bdb34136e0543ab455aa2d3 Mon Sep 17 00:00:00 2001
From: Luke Shaw <luke.shaw@ironarray.io>
Date: Tue, 5 Aug 2025 11:30:28 +0200
Subject: [PATCH 16/26] Revert "Force python 3.13"

This reverts commit 22bd5e344d4a690aea97797459e8f9bb64718c29.
---
 pyproject.toml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pyproject.toml b/pyproject.toml
index 554b0943..cf18fd83 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -29,7 +29,7 @@ classifiers = [
     "Programming Language :: Python :: 3.12",
     "Programming Language :: Python :: 3.13",
 ]
-requires-python = ">=3.13"
+requires-python = ">=3.10"
 # Follow guidelines from https://scientific-python.org/specs/spec-0000/
 dependencies = [
     "numpy>=1.26",

From b7b2e21672ae94f41e65ac4e1bf4dca943eba133 Mon Sep 17 00:00:00 2001
From: Luke Shaw <luke.shaw@ironarray.io>
Date: Tue, 5 Aug 2025 11:33:52 +0200
Subject: [PATCH 17/26] Force pyodide version

---
 pyproject.toml | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/pyproject.toml b/pyproject.toml
index cf18fd83..cdf2d108 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -84,6 +84,10 @@ documentation = "https://www.blosc.org/python-blosc2/python-blosc2.html"
 
 [tool.cibuildwheel]
 build-verbosity = 1
+[tool.cibuildwheel.pyodide]
+# Build Pyodide wheels using Pyodide version 0.28.0
+pyodide-version = "0.28.0"
+
 # Skip unsupported python versions as well as 32-bit platforms, which are not supported anymore.
 skip = "*-manylinux_i686 cp*-win32 *_ppc64le *_s390x *musllinux*"
 test-requires = "pytest"

From 412dff0d053ffc556c3cbd1f7def119ebdb61305 Mon Sep 17 00:00:00 2001
From: Luke Shaw <luke.shaw@ironarray.io>
Date: Tue, 5 Aug 2025 11:56:58 +0200
Subject: [PATCH 18/26] Force numpy version

---
 pyproject.toml | 6 +-----
 1 file changed, 1 insertion(+), 5 deletions(-)

diff --git a/pyproject.toml b/pyproject.toml
index cdf2d108..5a6bf8c4 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -32,7 +32,7 @@ classifiers = [
 requires-python = ">=3.10"
 # Follow guidelines from https://scientific-python.org/specs/spec-0000/
 dependencies = [
-    "numpy>=1.26",
+    "numpy>=2.2",
     #"numpy>=2",
     "ndindex",
     "msgpack",
@@ -84,10 +84,6 @@ documentation = "https://www.blosc.org/python-blosc2/python-blosc2.html"
 
 [tool.cibuildwheel]
 build-verbosity = 1
-[tool.cibuildwheel.pyodide]
-# Build Pyodide wheels using Pyodide version 0.28.0
-pyodide-version = "0.28.0"
-
 # Skip unsupported python versions as well as 32-bit platforms, which are not supported anymore.
 skip = "*-manylinux_i686 cp*-win32 *_ppc64le *_s390x *musllinux*"
 test-requires = "pytest"

From 14f7d67fc76ed85c640f1769c6742c748c7ae418 Mon Sep 17 00:00:00 2001
From: Luke Shaw <luke.shaw@ironarray.io>
Date: Wed, 6 Aug 2025 09:01:14 +0200
Subject: [PATCH 19/26] Only build WASM wheels for python 3.13, force pyodide
 version

---
 .github/workflows/wasm.yml | 4 ++--
 pyproject.toml             | 6 +++++-
 2 files changed, 7 insertions(+), 3 deletions(-)

diff --git a/.github/workflows/wasm.yml b/.github/workflows/wasm.yml
index 3227c797..54e572fd 100644
--- a/.github/workflows/wasm.yml
+++ b/.github/workflows/wasm.yml
@@ -25,8 +25,8 @@ jobs:
     strategy:
       matrix:
         os: [ubuntu-latest]
-        cibw_build: ["cp3{11,12,13}-*"]
-        p_ver: ["3.11-3.13"]
+        cibw_build: ["cp3{13}-*"]
+        p_ver: ["3.13"]
 
     steps:
       - name: Checkout repo
diff --git a/pyproject.toml b/pyproject.toml
index 5a6bf8c4..cdf2d108 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -32,7 +32,7 @@ classifiers = [
 requires-python = ">=3.10"
 # Follow guidelines from https://scientific-python.org/specs/spec-0000/
 dependencies = [
-    "numpy>=2.2",
+    "numpy>=1.26",
     #"numpy>=2",
     "ndindex",
     "msgpack",
@@ -84,6 +84,10 @@ documentation = "https://www.blosc.org/python-blosc2/python-blosc2.html"
 
 [tool.cibuildwheel]
 build-verbosity = 1
+[tool.cibuildwheel.pyodide]
+# Build Pyodide wheels using Pyodide version 0.28.0
+pyodide-version = "0.28.0"
+
 # Skip unsupported python versions as well as 32-bit platforms, which are not supported anymore.
 skip = "*-manylinux_i686 cp*-win32 *_ppc64le *_s390x *musllinux*"
 test-requires = "pytest"

From ecb87f0c5782c0f840425ee7ac83d0f5a27cfb96 Mon Sep 17 00:00:00 2001
From: Luke Shaw <luke.shaw@ironarray.io>
Date: Wed, 6 Aug 2025 09:04:31 +0200
Subject: [PATCH 20/26] Only build WASM wheels for python 3.13, force pyodide
 version

---
 .github/workflows/wasm.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/wasm.yml b/.github/workflows/wasm.yml
index 54e572fd..0fee6778 100644
--- a/.github/workflows/wasm.yml
+++ b/.github/workflows/wasm.yml
@@ -25,7 +25,7 @@ jobs:
     strategy:
       matrix:
         os: [ubuntu-latest]
-        cibw_build: ["cp3{13}-*"]
+        cibw_build: ["cp313-*"]
         p_ver: ["3.13"]
 
     steps:

From 6128a38fdcbab22c7cd5ac883657812e88be60f0 Mon Sep 17 00:00:00 2001
From: Luke Shaw <luke.shaw@ironarray.io>
Date: Wed, 6 Aug 2025 10:24:37 +0200
Subject: [PATCH 21/26] Force contiguous when an evaluation does not happen and
 handle special case for rebasing

---
 pyproject.toml                 |  4 ----
 src/blosc2/lazyexpr.py         | 18 +++++++++++++++---
 tests/ndarray/test_lazyexpr.py | 14 +++++++++++++-
 3 files changed, 28 insertions(+), 8 deletions(-)

diff --git a/pyproject.toml b/pyproject.toml
index cdf2d108..cf18fd83 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -84,10 +84,6 @@ documentation = "https://www.blosc.org/python-blosc2/python-blosc2.html"
 
 [tool.cibuildwheel]
 build-verbosity = 1
-[tool.cibuildwheel.pyodide]
-# Build Pyodide wheels using Pyodide version 0.28.0
-pyodide-version = "0.28.0"
-
 # Skip unsupported python versions as well as 32-bit platforms, which are not supported anymore.
 skip = "*-manylinux_i686 cp*-win32 *_ppc64le *_s390x *musllinux*"
 test-requires = "pytest"
diff --git a/src/blosc2/lazyexpr.py b/src/blosc2/lazyexpr.py
index 3ab07e1e..49c61134 100644
--- a/src/blosc2/lazyexpr.py
+++ b/src/blosc2/lazyexpr.py
@@ -1522,7 +1522,11 @@ def slices_eval(  # noqa: C901
             continue
 
         if where is None:
-            result = ne_evaluate(expression, chunk_operands, **ne_args)
+            if expression == "o0":
+                # We don't have an actual expression, so just make sure contiguous
+                result = np.require(chunk_operands["o0"], requirements="C")
+            else:
+                result = ne_evaluate(expression, chunk_operands, **ne_args)
         else:
             # Apply the where condition (in result)
             if len(where) == 2:
@@ -1935,8 +1939,8 @@ def reduce_slices(  # noqa: C901
 
         if where is None:
             if expression == "o0":
-                # We don't have an actual expression, so avoid a copy
-                result = chunk_operands["o0"]
+                # We don't have an actual expression, so avoid a copy except if have to make contiguous
+                result = np.require(chunk_operands["o0"], requirements="C")
             else:
                 result = ne_evaluate(expression, chunk_operands, **ne_args)
         else:
@@ -3118,6 +3122,14 @@ def _new_expr(cls, expression, operands, guess, out=None, where=None, ne_args=No
                 new_expr.expression_tosave = expression
                 new_expr.operands = operands_
                 new_expr.operands_tosave = operands
+            elif isinstance(new_expr, blosc2.NDArray) and len(operands) == 1:
+                # passed either "a" or possible "a[:10]"
+                expression_, operands_ = conserve_functions(
+                    _expression, _operands, {"o0": list(operands.values())[0]} | local_vars
+                )
+                new_expr = cls(None)
+                new_expr.expression = expression_
+                new_expr.operands = operands_
             else:
                 # An immediate evaluation happened (e.g. all operands are numpy arrays)
                 new_expr = cls(None)
diff --git a/tests/ndarray/test_lazyexpr.py b/tests/ndarray/test_lazyexpr.py
index a566c73c..88610dac 100644
--- a/tests/ndarray/test_lazyexpr.py
+++ b/tests/ndarray/test_lazyexpr.py
@@ -1089,7 +1089,7 @@ def test_eval_getitem2():
     np.testing.assert_allclose(expr[1:, :7], nres[1:, :7])
     np.testing.assert_allclose(expr[0:10:2], nres[0:10:2])
     # Now relies on inefficient blosc2.ndarray.slice for non-unit steps but only per chunk (not for whole result)
-    np.testing.assert_allclose(expr.slice((slice(None, None, None), slice(0, 10, 2)))[:], nres[:, 0:10:2])
+    np.testing.assert_allclose(expr.slice((slice(1, 2, 1), slice(0, 10, 2)))[:], nres[1:2, 0:10:2])
 
     # Small test for broadcasting
     expr = test_arr + test_arr.slice(1)
@@ -1123,6 +1123,18 @@ def test_eval_slice(array_fixture):
     np.testing.assert_allclose(res[()], nres)
 
 
+def test_rebasing(array_fixture):
+    a1, a2, a3, a4, na1, na2, na3, na4 = array_fixture
+    expr = blosc2.lazyexpr("a1 + a2 - (a3 * a4)", operands={"a1": a1, "a2": a2, "a3": a3, "a4": a4})
+    assert expr.expression == "(o0 + o1 - o2 * o3)"
+
+    expr = blosc2.lazyexpr("a1")
+    assert expr.expression == "o0"
+
+    expr = blosc2.lazyexpr("a1[:10]")
+    assert expr.expression == "o0.slice((slice(None, 10, None),))"
+
+
 # Test get_chunk method
 @pytest.mark.heavy
 def test_get_chunk(array_fixture):

From c3af99b422ca20d245e834b94fd38b3b187fef47 Mon Sep 17 00:00:00 2001
From: Luke Shaw <luke.shaw@ironarray.io>
Date: Wed, 6 Aug 2025 10:30:17 +0200
Subject: [PATCH 22/26] Correct error with building py 3.11, 3.12 wasm wheels

---
 .github/workflows/wasm.yml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/wasm.yml b/.github/workflows/wasm.yml
index 0fee6778..3227c797 100644
--- a/.github/workflows/wasm.yml
+++ b/.github/workflows/wasm.yml
@@ -25,8 +25,8 @@ jobs:
     strategy:
       matrix:
         os: [ubuntu-latest]
-        cibw_build: ["cp313-*"]
-        p_ver: ["3.13"]
+        cibw_build: ["cp3{11,12,13}-*"]
+        p_ver: ["3.11-3.13"]
 
     steps:
       - name: Checkout repo

From 5a1de4ce9af822020f77ee24a5e553a84c6b63d7 Mon Sep 17 00:00:00 2001
From: Luke Shaw <luke.shaw@ironarray.io>
Date: Wed, 6 Aug 2025 11:03:44 +0200
Subject: [PATCH 23/26] Simplify logic a little

---
 src/blosc2/lazyexpr.py | 11 +++++------
 1 file changed, 5 insertions(+), 6 deletions(-)

diff --git a/src/blosc2/lazyexpr.py b/src/blosc2/lazyexpr.py
index 49c61134..d6e8df43 100644
--- a/src/blosc2/lazyexpr.py
+++ b/src/blosc2/lazyexpr.py
@@ -1522,11 +1522,7 @@ def slices_eval(  # noqa: C901
             continue
 
         if where is None:
-            if expression == "o0":
-                # We don't have an actual expression, so just make sure contiguous
-                result = np.require(chunk_operands["o0"], requirements="C")
-            else:
-                result = ne_evaluate(expression, chunk_operands, **ne_args)
+            result = ne_evaluate(expression, chunk_operands, **ne_args)
         else:
             # Apply the where condition (in result)
             if len(where) == 2:
@@ -1561,6 +1557,9 @@ def slices_eval(  # noqa: C901
                     result = x[result]
             else:
                 raise ValueError("The where condition must be a tuple with one or two elements")
+        # Enforce contiguity of result (necessary to fill the out array)
+        # but avoid copy if already contiguous
+        result = np.require(result, requirements="C")
 
         if out is None:
             shape_ = shape_slice if shape_slice is not None else shape
@@ -1939,7 +1938,7 @@ def reduce_slices(  # noqa: C901
 
         if where is None:
             if expression == "o0":
-                # We don't have an actual expression, so avoid a copy except if have to make contiguous
+                # We don't have an actual expression, so avoid a copy except to make contiguous
                 result = np.require(chunk_operands["o0"], requirements="C")
             else:
                 result = ne_evaluate(expression, chunk_operands, **ne_args)

From 80b7b24c894b676e7f0b6fbc2b6cc80c6114190e Mon Sep 17 00:00:00 2001
From: Luke Shaw <luke.shaw@ironarray.io>
Date: Fri, 8 Aug 2025 10:08:40 +0200
Subject: [PATCH 24/26] Edit to ensure slice_fast_path doesn't load too-large
 array

---
 src/blosc2/lazyexpr.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/src/blosc2/lazyexpr.py b/src/blosc2/lazyexpr.py
index d6e8df43..a979f476 100644
--- a/src/blosc2/lazyexpr.py
+++ b/src/blosc2/lazyexpr.py
@@ -2092,7 +2092,9 @@ def chunked_eval(  # noqa: C901
             # The fast path is possible under a few conditions
             if getitem and (where is None or len(where) == 2) and not callable(expression):
                 # Compute the size of operands for the fast path
-                shape_operands = item.newshape(shape)  # shape of slice
+                unit_steps = np.all([s.step == 1 for s in item.raw])
+                # shape of slice, if non-unit steps have to decompress full array into memory
+                shape_operands = item.newshape(shape) if unit_steps else shape
                 _dtype = kwargs.get("dtype", np.float64)
                 size_operands = math.prod(shape_operands) * len(operands) * _dtype.itemsize
                 # Only take the fast path if the size of operands is relatively small

From 1f9c10d9ae8edc43791b35405492a0f3540e6ff4 Mon Sep 17 00:00:00 2001
From: Luke Shaw <lshaw8317@gmail.com>
Date: Fri, 8 Aug 2025 10:37:56 +0200
Subject: [PATCH 25/26] Add bench

---
 bench/ndarray/slice-expr-step.py | 49 ++++++++++++++++++++++++++++++++
 src/blosc2/lazyexpr.py           |  2 +-
 2 files changed, 50 insertions(+), 1 deletion(-)
 create mode 100644 bench/ndarray/slice-expr-step.py

diff --git a/bench/ndarray/slice-expr-step.py b/bench/ndarray/slice-expr-step.py
new file mode 100644
index 00000000..01f75dca
--- /dev/null
+++ b/bench/ndarray/slice-expr-step.py
@@ -0,0 +1,49 @@
+#######################################################################
+# Copyright (c) 2019-present, Blosc Development Team <blosc@blosc.org>
+# All rights reserved.
+#
+# This source code is licensed under a BSD-style license (found in the
+# LICENSE file in the root directory of this source tree)
+#######################################################################
+
+# Benchmark for computing a slice with non-unit steps of a expression in a ND array.
+
+import blosc2
+import numpy as np
+import matplotlib.pyplot as plt
+from memory_profiler import profile, memory_usage
+
+N = 50_000
+LARGE_SLICE = False
+ndim = 2
+shape = (N, ) * ndim
+a = blosc2.linspace(start=0, stop=np.prod(shape), num=np.prod(shape), dtype=np.float64, shape=shape)
+_slice = (slice(0, N, 2),) if LARGE_SLICE else (slice(0, N, N//4),)
+expr = 2 * a ** 2
+
+@profile
+def _slice_():
+    res1 = expr.slice(_slice)
+    print(f'Result of slice occupies {res1.schunk.cbytes / 1024**2:.2f} MiB')
+    return res1
+
+@profile
+def _gitem():
+    res2 = expr[_slice]
+    print(f'Result of _getitem_ occupies {np.prod(res2.shape) * res2.itemsize / 1024**2:.2f} MiB')
+    return res2
+
+interval = 0.001
+offset = 0
+for f in [_slice_, _gitem]:
+    mem = memory_usage((f,), interval=interval)
+    times = offset + interval * np.arange(len(mem))
+    offset = times[-1]
+    plt.plot(times, mem)
+
+plt.xlabel('Time (s)')
+plt.ylabel('Memory usage (MiB)')
+lab = 'LARGE' if LARGE_SLICE else 'SMALL'
+plt.title(f'{lab} slice w/steps, Linux Blosc2 {blosc2.__version__}')
+plt.legend([f'expr.slice({_slice}', f'expr[{_slice}]'])
+plt.savefig(f'sliceexpr_{lab}_Blosc{blosc2.__version__.replace('.','_')}.png', format="png")
diff --git a/src/blosc2/lazyexpr.py b/src/blosc2/lazyexpr.py
index a979f476..5d838244 100644
--- a/src/blosc2/lazyexpr.py
+++ b/src/blosc2/lazyexpr.py
@@ -2092,7 +2092,7 @@ def chunked_eval(  # noqa: C901
             # The fast path is possible under a few conditions
             if getitem and (where is None or len(where) == 2) and not callable(expression):
                 # Compute the size of operands for the fast path
-                unit_steps = np.all([s.step == 1 for s in item.raw])
+                unit_steps = np.all([s.step == 1 for s in item.raw if isinstance(s, slice)])
                 # shape of slice, if non-unit steps have to decompress full array into memory
                 shape_operands = item.newshape(shape) if unit_steps else shape
                 _dtype = kwargs.get("dtype", np.float64)

From db2d38f839b4b53db2c686cc87dc6c9145dc1a2f Mon Sep 17 00:00:00 2001
From: Luke Shaw <lshaw8317@gmail.com>
Date: Fri, 8 Aug 2025 10:46:56 +0200
Subject: [PATCH 26/26] Fixed failing test

---
 tests/ndarray/test_lazyexpr.py | 7 ++-----
 1 file changed, 2 insertions(+), 5 deletions(-)

diff --git a/tests/ndarray/test_lazyexpr.py b/tests/ndarray/test_lazyexpr.py
index 05fc2957..614c604b 100644
--- a/tests/ndarray/test_lazyexpr.py
+++ b/tests/ndarray/test_lazyexpr.py
@@ -1089,9 +1089,7 @@ def test_eval_getitem2():
     np.testing.assert_allclose(expr[1:, :7], nres[1:, :7])
     np.testing.assert_allclose(expr[0:10:2], nres[0:10:2])
     # Now relies on inefficient blosc2.ndarray.slice for non-unit steps but only per chunk (not for whole result)
-    np.testing.assert_allclose(expr.slice((slice(1, 2, 1), slice(0, 10, 2)))[:], nres[1:2, 0:10:2])
-    # Now relies on inefficient blosc2.ndarray.slice for non-unit steps but only per chunk (not for whole result)
-    np.testing.assert_allclose(expr.slice((slice(1, 2, 1), slice(0, 10, 2)))[:], nres[:, 0:10:2])
+    np.testing.assert_allclose(expr.slice((slice(None, None, None), slice(0, 10, 2)))[:], nres[:, 0:10:2])
 
     # Small test for broadcasting
     expr = test_arr + test_arr.slice(1)
@@ -1100,8 +1098,7 @@ def test_eval_getitem2():
     np.testing.assert_allclose(expr[1:, :7], nres[1:, :7])
     np.testing.assert_allclose(expr[:, 0:10:2], nres[:, 0:10:2])
     # Now relies on inefficient blosc2.ndarray.slice for non-unit steps but only per chunk (not for whole result)
-    # Now relies on inefficient blosc2.ndarray.slice for non-unit steps but only per chunk (not for whole result)
-    np.testing.assert_allclose(expr.slice((slice(1, 2, 1), slice(0, 10, 2)))[:], nres[:, 0:10:2])
+    np.testing.assert_allclose(expr.slice((slice(None, None, None), slice(0, 10, 2)))[:], nres[:, 0:10:2])
 
 
 # Test lazyexpr's slice method