From 45ccec17f903aabe33a60ff368ef0a7486ae1313 Mon Sep 17 00:00:00 2001 From: Justus Magin Date: Tue, 26 Aug 2025 17:58:17 +0200 Subject: [PATCH 1/4] account for `endian` being possibly `None` --- virtualizarr/utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/virtualizarr/utils.py b/virtualizarr/utils.py index c36212fc..2574663e 100644 --- a/virtualizarr/utils.py +++ b/virtualizarr/utils.py @@ -145,7 +145,7 @@ def convert_v3_to_v2_metadata( # but other codec pipelines could store endianness elsewhere. big_endian = any( isinstance(codec, ArrayBytesCodec) - and hasattr(codec, "endian") + and getattr(codec, "endian", None) is not None and codec.endian.value == "big" for codec in v3_metadata.codecs ) From 59ef11548ec5698d0f32f0996a8c088138ece5c6 Mon Sep 17 00:00:00 2001 From: Justus Magin Date: Wed, 27 Aug 2025 22:37:48 +0200 Subject: [PATCH 2/4] check that missing endianness works correctly --- virtualizarr/tests/test_writers/test_kerchunk.py | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/virtualizarr/tests/test_writers/test_kerchunk.py b/virtualizarr/tests/test_writers/test_kerchunk.py index 6b778baa..ba7d7180 100644 --- a/virtualizarr/tests/test_writers/test_kerchunk.py +++ b/virtualizarr/tests/test_writers/test_kerchunk.py @@ -193,17 +193,21 @@ def test_accessor_to_kerchunk_parquet(self, tmp_path, array_v3_metadata): } -@pytest.mark.parametrize("endian,expected_dtype_char", [("little", "<"), ("big", ">")]) +@pytest.mark.parametrize( + ["dtype", "endian", "expected_dtype_char"], + [("i8", "little", "<"), ("i8", "big", ">"), ("i1", None, "|")], +) def test_convert_v3_to_v2_metadata( - array_v3_metadata, endian: str, expected_dtype_char: str + array_v3_metadata, dtype: str, endian: str | None, expected_dtype_char: str ): shape = (5, 20) chunks = (5, 10) + codecs = [ {"name": "bytes", "configuration": {"endian": endian}}, { "name": "numcodecs.delta", - "configuration": {"dtype": f"{expected_dtype_char}i8"}, + "configuration": {"dtype": f"{expected_dtype_char}{dtype}"}, }, { "name": "numcodecs.blosc", @@ -211,7 +215,9 @@ def test_convert_v3_to_v2_metadata( }, ] - v3_metadata = array_v3_metadata(shape=shape, chunks=chunks, codecs=codecs) + v3_metadata = array_v3_metadata( + data_type=np.dtype(dtype), shape=shape, chunks=chunks, codecs=codecs + ) v2_metadata = convert_v3_to_v2_metadata(v3_metadata) assert isinstance(v2_metadata, ArrayV2Metadata) @@ -232,7 +238,7 @@ def test_convert_v3_to_v2_metadata( filters_config = filter_codec.get_config() assert filters_config["id"] == "delta" - expected_delta_dtype = f"{expected_dtype_char}i8" + expected_delta_dtype = f"{expected_dtype_char}{dtype}" assert filters_config["dtype"] == expected_delta_dtype assert filters_config["astype"] == expected_delta_dtype assert v2_metadata.attributes == {} From 7bcb4f64a5d4191c0fbc8d82eadee01012f8dcbc Mon Sep 17 00:00:00 2001 From: Justus Magin Date: Wed, 27 Aug 2025 23:09:52 +0200 Subject: [PATCH 3/4] expect the exact dtype back --- virtualizarr/tests/test_writers/test_kerchunk.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/virtualizarr/tests/test_writers/test_kerchunk.py b/virtualizarr/tests/test_writers/test_kerchunk.py index ba7d7180..b315365a 100644 --- a/virtualizarr/tests/test_writers/test_kerchunk.py +++ b/virtualizarr/tests/test_writers/test_kerchunk.py @@ -222,7 +222,7 @@ def test_convert_v3_to_v2_metadata( assert isinstance(v2_metadata, ArrayV2Metadata) assert v2_metadata.shape == shape - expected_dtype = np.dtype(f"{expected_dtype_char}i4") # assuming int32 + expected_dtype = np.dtype(f"{expected_dtype_char}{dtype}") assert v2_metadata.dtype.to_native_dtype() == expected_dtype assert v2_metadata.chunks == chunks assert v2_metadata.fill_value == 0 From 2145ea496d254e9daf3c125fbe19f125e632e5f4 Mon Sep 17 00:00:00 2001 From: Justus Magin Date: Sat, 30 Aug 2025 12:10:12 +0200 Subject: [PATCH 4/4] ignore the mypy error Not sure if there's a better fix for this, but the `getattr` call basically guarantees that if `codec` does not have the `endian` attribute the last condition is never reached. --- virtualizarr/utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/virtualizarr/utils.py b/virtualizarr/utils.py index 2574663e..08f49d3a 100644 --- a/virtualizarr/utils.py +++ b/virtualizarr/utils.py @@ -146,7 +146,7 @@ def convert_v3_to_v2_metadata( big_endian = any( isinstance(codec, ArrayBytesCodec) and getattr(codec, "endian", None) is not None - and codec.endian.value == "big" + and codec.endian.value == "big" # type: ignore[attr-defined] for codec in v3_metadata.codecs ) if big_endian: