From 5da3159e38c327a4c5c38fbd48e8510083f9279b Mon Sep 17 00:00:00 2001 From: brokkoli71 Date: Tue, 22 Apr 2025 14:02:18 +0200 Subject: [PATCH 1/7] add numcodecs.zarr3.to_zarr3 method --- numcodecs/tests/test_zarr3.py | 48 ++++++++++++++++++++++++++++++++++- numcodecs/zarr3.py | 15 +++++++++++ 2 files changed, 62 insertions(+), 1 deletion(-) diff --git a/numcodecs/tests/test_zarr3.py b/numcodecs/tests/test_zarr3.py index 0d8ecc74..32227d45 100644 --- a/numcodecs/tests/test_zarr3.py +++ b/numcodecs/tests/test_zarr3.py @@ -260,7 +260,7 @@ def test_delta_astype(store: StorePath): dtype=data.dtype, fill_value=0, filters=[ - numcodecs.zarr3.Delta(dtype="i8", astype="i2"), # type: ignore[arg-type] + numcodecs.Delta(dtype="i8", astype="i2"), # type: ignore[arg-type] ], ) @@ -277,3 +277,49 @@ def test_repr(): def test_to_dict(): codec = numcodecs.zarr3.LZ4(level=5) assert codec.to_dict() == {"name": "numcodecs.lz4", "configuration": {"level": 5}} + +@pytest.mark.parametrize(("codec_v2", "expected_v3_cls"),[ + (numcodecs.BZ2(), numcodecs.zarr3.BZ2), + (numcodecs.CRC32(), numcodecs.zarr3.CRC32), + (numcodecs.CRC32C(), numcodecs.zarr3.CRC32C), + (numcodecs.LZ4(), numcodecs.zarr3.LZ4), + (numcodecs.LZMA(), numcodecs.zarr3.LZMA), + (numcodecs.ZFPY(), numcodecs.zarr3.ZFPY), + (numcodecs.Adler32(), numcodecs.zarr3.Adler32), + (numcodecs.AsType(encode_dtype=np.float64,decode_dtype=np.float32), numcodecs.zarr3.AsType), + (numcodecs.BitRound(keepbits=10), numcodecs.zarr3.BitRound), + (numcodecs.Blosc(), numcodecs.zarr3.Blosc), + (numcodecs.Delta(dtype=np.float64), numcodecs.zarr3.Delta), + (numcodecs.FixedScaleOffset(offset=1000, scale=10, dtype='f8', astype='u1'), numcodecs.zarr3.FixedScaleOffset), + (numcodecs.Fletcher32(), numcodecs.zarr3.Fletcher32), + (numcodecs.GZip(), numcodecs.zarr3.GZip), + (numcodecs.JenkinsLookup3(), numcodecs.zarr3.JenkinsLookup3), + (numcodecs.PCodec(), numcodecs.zarr3.PCodec), + (numcodecs.PackBits(), numcodecs.zarr3.PackBits), + (numcodecs.Quantize(digits=1, dtype='f8'), numcodecs.zarr3.Quantize), + (numcodecs.Shuffle(), numcodecs.zarr3.Shuffle), + (numcodecs.Zlib(), numcodecs.zarr3.Zlib), + (numcodecs.Zstd(), numcodecs.zarr3.Zstd), +]) +def test_cast_numcodecs_to_v3(store: Store, codec_v2, expected_v3_cls) -> None: + result_v3 = numcodecs.zarr3.to_zarr3(codec_v2) + + assert result_v3.__class__ == expected_v3_cls + assert result_v3.codec_config == codec_v2.get_config() + + if issubclass(expected_v3_cls, numcodecs.zarr3._NumcodecsArrayArrayCodec): + codec_args = {"filters": [result_v3]} + elif issubclass(expected_v3_cls, numcodecs.zarr3._NumcodecsArrayBytesCodec): + codec_args = {"serializer": result_v3} + elif issubclass(expected_v3_cls, numcodecs.zarr3._NumcodecsBytesBytesCodec): + codec_args = {"compressors": [result_v3]} + else: + raise TypeError(f"unsupported type: {expected_v3_cls}") + zarr.create_array( + store, + shape=(64,), + chunks=(64,), + dtype=np.bool, + fill_value=0, + **codec_args + ) diff --git a/numcodecs/zarr3.py b/numcodecs/zarr3.py index 43684c3d..7cd38a2d 100644 --- a/numcodecs/zarr3.py +++ b/numcodecs/zarr3.py @@ -399,3 +399,18 @@ def evolve_from_array_spec(self, array_spec: ArraySpec) -> AsType: "Zlib", "Zstd", ] + +def to_zarr3(codec: numcodecs.abc.Codec) -> _NumcodecsBytesBytesCodec | _NumcodecsArrayBytesCodec | _NumcodecsArrayArrayCodec: + """Convert a numcodecs codec to its zarr3-compatible equivalent.""" + codec_name = codec.__class__.__name__ + zarr3_module = numcodecs.zarr3 + + if not hasattr(zarr3_module, codec_name): + raise ValueError(f"No Zarr3 wrapper found for codec: {codec_name}") + + zarr3_codec_class = getattr(zarr3_module, codec_name) + + config = codec.get_config() + config.pop("id", None) + + return zarr3_codec_class(**config) \ No newline at end of file From df5bc5fcaef97eac4e37f3ed4ec1087db085f553 Mon Sep 17 00:00:00 2001 From: brokkoli71 Date: Tue, 22 Apr 2025 14:08:11 +0200 Subject: [PATCH 2/7] format --- numcodecs/tests/test_zarr3.py | 70 +++++++++++++++++++---------------- numcodecs/zarr3.py | 7 +++- 2 files changed, 44 insertions(+), 33 deletions(-) diff --git a/numcodecs/tests/test_zarr3.py b/numcodecs/tests/test_zarr3.py index 32227d45..b171b3d9 100644 --- a/numcodecs/tests/test_zarr3.py +++ b/numcodecs/tests/test_zarr3.py @@ -278,35 +278,49 @@ def test_to_dict(): codec = numcodecs.zarr3.LZ4(level=5) assert codec.to_dict() == {"name": "numcodecs.lz4", "configuration": {"level": 5}} -@pytest.mark.parametrize(("codec_v2", "expected_v3_cls"),[ - (numcodecs.BZ2(), numcodecs.zarr3.BZ2), - (numcodecs.CRC32(), numcodecs.zarr3.CRC32), - (numcodecs.CRC32C(), numcodecs.zarr3.CRC32C), - (numcodecs.LZ4(), numcodecs.zarr3.LZ4), - (numcodecs.LZMA(), numcodecs.zarr3.LZMA), - (numcodecs.ZFPY(), numcodecs.zarr3.ZFPY), - (numcodecs.Adler32(), numcodecs.zarr3.Adler32), - (numcodecs.AsType(encode_dtype=np.float64,decode_dtype=np.float32), numcodecs.zarr3.AsType), - (numcodecs.BitRound(keepbits=10), numcodecs.zarr3.BitRound), - (numcodecs.Blosc(), numcodecs.zarr3.Blosc), - (numcodecs.Delta(dtype=np.float64), numcodecs.zarr3.Delta), - (numcodecs.FixedScaleOffset(offset=1000, scale=10, dtype='f8', astype='u1'), numcodecs.zarr3.FixedScaleOffset), - (numcodecs.Fletcher32(), numcodecs.zarr3.Fletcher32), - (numcodecs.GZip(), numcodecs.zarr3.GZip), - (numcodecs.JenkinsLookup3(), numcodecs.zarr3.JenkinsLookup3), - (numcodecs.PCodec(), numcodecs.zarr3.PCodec), - (numcodecs.PackBits(), numcodecs.zarr3.PackBits), - (numcodecs.Quantize(digits=1, dtype='f8'), numcodecs.zarr3.Quantize), - (numcodecs.Shuffle(), numcodecs.zarr3.Shuffle), - (numcodecs.Zlib(), numcodecs.zarr3.Zlib), - (numcodecs.Zstd(), numcodecs.zarr3.Zstd), -]) + +@pytest.mark.parametrize( + ("codec_v2", "expected_v3_cls"), + [ + (numcodecs.BZ2(), numcodecs.zarr3.BZ2), + (numcodecs.CRC32(), numcodecs.zarr3.CRC32), + (numcodecs.CRC32C(), numcodecs.zarr3.CRC32C), + (numcodecs.LZ4(), numcodecs.zarr3.LZ4), + (numcodecs.LZMA(), numcodecs.zarr3.LZMA), + (numcodecs.ZFPY(), numcodecs.zarr3.ZFPY), + (numcodecs.Adler32(), numcodecs.zarr3.Adler32), + ( + numcodecs.AsType(encode_dtype=np.float64, decode_dtype=np.float32), + numcodecs.zarr3.AsType, + ), + (numcodecs.BitRound(keepbits=10), numcodecs.zarr3.BitRound), + (numcodecs.Blosc(), numcodecs.zarr3.Blosc), + (numcodecs.Delta(dtype=np.float64), numcodecs.zarr3.Delta), + ( + numcodecs.FixedScaleOffset(offset=1000, scale=10, dtype='f8', astype='u1'), + numcodecs.zarr3.FixedScaleOffset, + ), + (numcodecs.Fletcher32(), numcodecs.zarr3.Fletcher32), + (numcodecs.GZip(), numcodecs.zarr3.GZip), + (numcodecs.JenkinsLookup3(), numcodecs.zarr3.JenkinsLookup3), + (numcodecs.PCodec(), numcodecs.zarr3.PCodec), + (numcodecs.PackBits(), numcodecs.zarr3.PackBits), + (numcodecs.Quantize(digits=1, dtype='f8'), numcodecs.zarr3.Quantize), + (numcodecs.Shuffle(), numcodecs.zarr3.Shuffle), + (numcodecs.Zlib(), numcodecs.zarr3.Zlib), + (numcodecs.Zstd(), numcodecs.zarr3.Zstd), + ], +) def test_cast_numcodecs_to_v3(store: Store, codec_v2, expected_v3_cls) -> None: result_v3 = numcodecs.zarr3.to_zarr3(codec_v2) assert result_v3.__class__ == expected_v3_cls assert result_v3.codec_config == codec_v2.get_config() + from zarr.abc.codec import Codec + + codec_args: dict[str, Codec] + if issubclass(expected_v3_cls, numcodecs.zarr3._NumcodecsArrayArrayCodec): codec_args = {"filters": [result_v3]} elif issubclass(expected_v3_cls, numcodecs.zarr3._NumcodecsArrayBytesCodec): @@ -315,11 +329,5 @@ def test_cast_numcodecs_to_v3(store: Store, codec_v2, expected_v3_cls) -> None: codec_args = {"compressors": [result_v3]} else: raise TypeError(f"unsupported type: {expected_v3_cls}") - zarr.create_array( - store, - shape=(64,), - chunks=(64,), - dtype=np.bool, - fill_value=0, - **codec_args - ) + + zarr.create_array(store, shape=(64,), chunks=(64,), dtype=np.bool, fill_value=0, **codec_args) diff --git a/numcodecs/zarr3.py b/numcodecs/zarr3.py index 7cd38a2d..981b5f16 100644 --- a/numcodecs/zarr3.py +++ b/numcodecs/zarr3.py @@ -400,7 +400,10 @@ def evolve_from_array_spec(self, array_spec: ArraySpec) -> AsType: "Zstd", ] -def to_zarr3(codec: numcodecs.abc.Codec) -> _NumcodecsBytesBytesCodec | _NumcodecsArrayBytesCodec | _NumcodecsArrayArrayCodec: + +def to_zarr3( + codec: numcodecs.abc.Codec, +) -> _NumcodecsBytesBytesCodec | _NumcodecsArrayBytesCodec | _NumcodecsArrayArrayCodec: """Convert a numcodecs codec to its zarr3-compatible equivalent.""" codec_name = codec.__class__.__name__ zarr3_module = numcodecs.zarr3 @@ -413,4 +416,4 @@ def to_zarr3(codec: numcodecs.abc.Codec) -> _NumcodecsBytesBytesCodec | _Numcode config = codec.get_config() config.pop("id", None) - return zarr3_codec_class(**config) \ No newline at end of file + return zarr3_codec_class(**config) From 37673352a70ae8dcd1b66076b734c1a5a34a02c1 Mon Sep 17 00:00:00 2001 From: brokkoli71 Date: Tue, 22 Apr 2025 14:14:50 +0200 Subject: [PATCH 3/7] revert change in test_delta_astype --- numcodecs/tests/test_zarr3.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/numcodecs/tests/test_zarr3.py b/numcodecs/tests/test_zarr3.py index b171b3d9..28929bd6 100644 --- a/numcodecs/tests/test_zarr3.py +++ b/numcodecs/tests/test_zarr3.py @@ -260,7 +260,7 @@ def test_delta_astype(store: StorePath): dtype=data.dtype, fill_value=0, filters=[ - numcodecs.Delta(dtype="i8", astype="i2"), # type: ignore[arg-type] + numcodecs.zarr3.Delta(dtype="i8", astype="i2"), # type: ignore[arg-type] ], ) From 60d69334564b2e4ac5cb709a6abf041cf5f7ddbe Mon Sep 17 00:00:00 2001 From: brokkoli71 Date: Tue, 22 Apr 2025 14:19:49 +0200 Subject: [PATCH 4/7] fix typing --- numcodecs/tests/test_zarr3.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/numcodecs/tests/test_zarr3.py b/numcodecs/tests/test_zarr3.py index 28929bd6..8c7a6b2c 100644 --- a/numcodecs/tests/test_zarr3.py +++ b/numcodecs/tests/test_zarr3.py @@ -317,9 +317,9 @@ def test_cast_numcodecs_to_v3(store: Store, codec_v2, expected_v3_cls) -> None: assert result_v3.__class__ == expected_v3_cls assert result_v3.codec_config == codec_v2.get_config() - from zarr.abc.codec import Codec + from zarr.core.array import CompressorsLike, FiltersLike, SerializerLike - codec_args: dict[str, Codec] + codec_args: FiltersLike | SerializerLike | CompressorsLike if issubclass(expected_v3_cls, numcodecs.zarr3._NumcodecsArrayArrayCodec): codec_args = {"filters": [result_v3]} From 8c65365d053a4c287ac990cc4832811065ce7117 Mon Sep 17 00:00:00 2001 From: brokkoli71 Date: Tue, 22 Apr 2025 14:27:33 +0200 Subject: [PATCH 5/7] fix typing in test --- numcodecs/tests/test_zarr3.py | 24 ++++++++++++++++-------- 1 file changed, 16 insertions(+), 8 deletions(-) diff --git a/numcodecs/tests/test_zarr3.py b/numcodecs/tests/test_zarr3.py index 8c7a6b2c..f7340b28 100644 --- a/numcodecs/tests/test_zarr3.py +++ b/numcodecs/tests/test_zarr3.py @@ -317,17 +317,25 @@ def test_cast_numcodecs_to_v3(store: Store, codec_v2, expected_v3_cls) -> None: assert result_v3.__class__ == expected_v3_cls assert result_v3.codec_config == codec_v2.get_config() - from zarr.core.array import CompressorsLike, FiltersLike, SerializerLike - - codec_args: FiltersLike | SerializerLike | CompressorsLike - + filters = "auto" + serializer = "auto" + compressors = "auto" if issubclass(expected_v3_cls, numcodecs.zarr3._NumcodecsArrayArrayCodec): - codec_args = {"filters": [result_v3]} + filters = [result_v3] elif issubclass(expected_v3_cls, numcodecs.zarr3._NumcodecsArrayBytesCodec): - codec_args = {"serializer": result_v3} + serializer = result_v3 elif issubclass(expected_v3_cls, numcodecs.zarr3._NumcodecsBytesBytesCodec): - codec_args = {"compressors": [result_v3]} + compressors = [result_v3] else: raise TypeError(f"unsupported type: {expected_v3_cls}") - zarr.create_array(store, shape=(64,), chunks=(64,), dtype=np.bool, fill_value=0, **codec_args) + zarr.create_array( + store, + shape=(64,), + chunks=(64,), + dtype=np.bool, + fill_value=0, + filters=filters, + compressors=compressors, + serializer=serializer, + ) From d2be5b3b02f1a71504344707b258a2e2e1c790f5 Mon Sep 17 00:00:00 2001 From: brokkoli71 Date: Tue, 22 Apr 2025 14:27:33 +0200 Subject: [PATCH 6/7] fix typing in test --- numcodecs/tests/test_zarr3.py | 36 ++++++++++++++++++++++------------- 1 file changed, 23 insertions(+), 13 deletions(-) diff --git a/numcodecs/tests/test_zarr3.py b/numcodecs/tests/test_zarr3.py index 8c7a6b2c..d030e707 100644 --- a/numcodecs/tests/test_zarr3.py +++ b/numcodecs/tests/test_zarr3.py @@ -4,6 +4,8 @@ import numpy as np import pytest +from zarr.abc.codec import ArrayArrayCodec, ArrayBytesCodec, BytesBytesCodec +from zarr.core.array import CompressorsLike, FiltersLike, SerializerLike if TYPE_CHECKING: # pragma: no cover import zarr @@ -317,17 +319,25 @@ def test_cast_numcodecs_to_v3(store: Store, codec_v2, expected_v3_cls) -> None: assert result_v3.__class__ == expected_v3_cls assert result_v3.codec_config == codec_v2.get_config() - from zarr.core.array import CompressorsLike, FiltersLike, SerializerLike - - codec_args: FiltersLike | SerializerLike | CompressorsLike - - if issubclass(expected_v3_cls, numcodecs.zarr3._NumcodecsArrayArrayCodec): - codec_args = {"filters": [result_v3]} - elif issubclass(expected_v3_cls, numcodecs.zarr3._NumcodecsArrayBytesCodec): - codec_args = {"serializer": result_v3} - elif issubclass(expected_v3_cls, numcodecs.zarr3._NumcodecsBytesBytesCodec): - codec_args = {"compressors": [result_v3]} + filters: FiltersLike = "auto" + serializer: SerializerLike = "auto" + compressors: CompressorsLike = "auto" + if isinstance(result_v3, ArrayArrayCodec): + filters = [result_v3] + elif isinstance(result_v3, ArrayBytesCodec): + serializer = result_v3 + elif isinstance(result_v3, BytesBytesCodec): + compressors = [result_v3] else: - raise TypeError(f"unsupported type: {expected_v3_cls}") - - zarr.create_array(store, shape=(64,), chunks=(64,), dtype=np.bool, fill_value=0, **codec_args) + raise TypeError(f"unsupported type: {result_v3.__class__}") + + zarr.create_array( + store, + shape=(64,), + chunks=(64,), + dtype=np.bool, + fill_value=0, + filters=filters, + compressors=compressors, + serializer=serializer, + ) From d661eabc08d693cec1ea8919cfa0a265bf4aa099 Mon Sep 17 00:00:00 2001 From: brokkoli71 Date: Tue, 22 Apr 2025 14:53:28 +0200 Subject: [PATCH 7/7] update release.rst --- docs/release.rst | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/docs/release.rst b/docs/release.rst index bda2f867..ce5d0830 100644 --- a/docs/release.rst +++ b/docs/release.rst @@ -11,7 +11,6 @@ Release notes Unreleased ---------- - .. _unreleased: Unreleased @@ -20,6 +19,9 @@ Unreleased Enhancements ~~~~~~~~~~~~ +* implement ``to_zarr3`` in ``numcodecs.zarr3`` to enable conversion of a codec to its zarr3-compatible equivalent. + By :user:`Hannes Spitz ` + Improvements ~~~~~~~~~~~~ @@ -63,7 +65,7 @@ Enhancements Removals ~~~~~~~~ -The following ``blosc`` funcitons are removed, with no replacement. +The following ``blosc`` functions are removed, with no replacement. This is because they were not intended to be public API. - ``numcodecs.blosc.init``