From 2c42a46c8f89f5155bdb316965ac0458a63b8609 Mon Sep 17 00:00:00 2001 From: Davis Vann Bennett Date: Fri, 16 May 2025 17:13:39 +0200 Subject: [PATCH 1/5] wip --- tests/test_time_travel/__init__.py | 0 tests/test_time_travel/test_time_travel.py | 0 tests/test_time_travel/zp2.py | 77 ++++++++++++++++++++++ 3 files changed, 77 insertions(+) create mode 100644 tests/test_time_travel/__init__.py create mode 100644 tests/test_time_travel/test_time_travel.py create mode 100644 tests/test_time_travel/zp2.py diff --git a/tests/test_time_travel/__init__.py b/tests/test_time_travel/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/tests/test_time_travel/test_time_travel.py b/tests/test_time_travel/test_time_travel.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/tests/test_time_travel/zp2.py b/tests/test_time_travel/zp2.py new file mode 100644 index 0000000000..011de2f8d6 --- /dev/null +++ b/tests/test_time_travel/zp2.py @@ -0,0 +1,77 @@ +# /// script +# requires-python = "3.11" +# dependencies = [ +# zarr==2.18, +# ] +# /// + +import argparse + +import zarr +from zarr.storage import BaseStore + + +def copy_group( + *, node: zarr.hierarchy.Group, store: zarr.storage.BaseStore, path: str, overwrite: bool +) -> zarr.hierarchy.Group: + result = zarr.group(store=store, path=path, overwrite=overwrite) + result.attrs.put(attrs) + for key, child in node.items(): + child_path = f"{path}/{key}" + if isinstance(child, zarr.hierarchy.Group): + copy_group(node=child, store=store, path=child_path, overwrite=overwrite) + elif isinstance(child, zarr.core.Array): + copy_array(node=child, store=store, overwrite=overwrite, path=child_path) + return result + + +def copy_array( + *, node: zarr.core.Array, store: BaseStore, path: str, overwrite: bool +) -> zarr.core.Array: + result = zarr.create( + shape=node.shape, + dtype=node.dtype, + fill_value=node.fill_value, + chunks=node.chunks, + compressor=node.compressor, + filters=node.filters, + order=node.order, + dimension_separator=node.dimension_separator, + store=store, + path=path, + overwrite=overwrite, + ) + result.attrs.put(node.attrs.asdict()) + return result + + +def copy_node( + node: zarr.hierarchy.Group | zarr.core.Array, store: BaseStore, path: str, overwrite: bool +) -> zarr.hierarchy.Group | zarr.core.Array: + if isinstance(node, zarr.hierarchy.Group): + return copy_group(node=node, store=store, path=path, overwrite=overwrite) + elif isinstance(node, zarr.core.Array): + return copy_array(node=node, store=store, path=path, overwrite=overwrite) + + +def cli() -> None: + parser = argparse.ArgumentParser( + description="Copy a zarr hierarchy from one location to another" + ) + parser.add_argument("source", type=str, help="Path to the source zarr hierarchy") + parser.add_argument("destination", type=str, help="Path to the destination zarr hierarchy") + args = parser.parse_args() + + src, dst = args.source, args.dest + + root_src = zarr.open(src, mode="r") + result = copy_node(node=root_src, store=dst, overwrite=True) + print(f"successfully created {result} at {dst}") + + +def main() -> None: + cli() + + +if __name__ == "__main__": + main() From a2c2b48624d0ff2b2c44ddd6153af1ff5e7c6ba4 Mon Sep 17 00:00:00 2001 From: Davis Vann Bennett Date: Tue, 20 May 2025 08:58:05 +0200 Subject: [PATCH 2/5] wip --- pyproject.toml | 7 +++++ tests/test_time_travel/test_time_travel.py | 35 +++++++++++++++++++++ tests/test_time_travel/{zp2.py => v2.18.py} | 0 3 files changed, 42 insertions(+) rename tests/test_time_travel/{zp2.py => v2.18.py} (100%) diff --git a/pyproject.toml b/pyproject.toml index f1c290e1b1..1f54ef4ac2 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -262,6 +262,13 @@ run-coverage = "pytest --cov-config=pyproject.toml --cov=pkg --cov-report xml -- run-coverage-gpu = "pip install cupy-cuda12x && pytest -m gpu --cov-config=pyproject.toml --cov=pkg --cov-report xml --cov=src --junitxml=junit.xml -o junit_family=legacy" run-coverage-html = "pytest --cov-config=pyproject.toml --cov=pkg --cov-report html --cov=src" +[tool.hatch.envs.time_travel] +description = "Test environment for tests against older zarr-python versions" +dependencies = [ + "hatch ==1.14.1" + ] +features=["test"] + [tool.ruff] line-length = 100 force-exclude = true diff --git a/tests/test_time_travel/test_time_travel.py b/tests/test_time_travel/test_time_travel.py index e69de29bb2..586c1fb6c1 100644 --- a/tests/test_time_travel/test_time_travel.py +++ b/tests/test_time_travel/test_time_travel.py @@ -0,0 +1,35 @@ +from pathlib import Path + +import numpy as np +import pytest +from numcodecs import GZip + +from zarr.core.group import GroupMetadata, create_hierarchy +from zarr.core.metadata.v2 import ArrayV2Metadata +from zarr.storage import LocalStore + + +@pytest.fixture +def hierarchy_model(request: pytest.FixtureRequest) -> dict[str, ArrayV2Metadata | GroupMetadata]: + dtype = np.uint8() + return { + "": GroupMetadata(attributes={"foo": "bar"}, zarr_format=2), + "/array": ArrayV2Metadata( + shape=(10, 10), + dtype=dtype, + chunks=(10, 10), + compressor=GZip(), + fill_value=1, + order="C", + filters=[GZip()], + ), + } + + +async def test_copy( + tmp_path: Path, hierarchy_model: dict[str, ArrayV2Metadata | GroupMetadata] +) -> None: + # create the hierarchy + store = LocalStore(tmp_path) + [x async for x in create_hierarchy(store=store, nodes=hierarchy_model)] + breakpoint() diff --git a/tests/test_time_travel/zp2.py b/tests/test_time_travel/v2.18.py similarity index 100% rename from tests/test_time_travel/zp2.py rename to tests/test_time_travel/v2.18.py From 2e62ab8e27c61b8c907b2e26cdc34aadc8195685 Mon Sep 17 00:00:00 2001 From: Davis Vann Bennett Date: Mon, 26 May 2025 13:26:08 +0200 Subject: [PATCH 3/5] add v2 array regression testing --- pyproject.toml | 4 +- .../__init__.py | 0 tests/test_regression/test_regression.py | 112 ++++++++++++++++++ .../v2.18.py | 20 ++-- tests/test_time_travel/test_time_travel.py | 35 ------ 5 files changed, 126 insertions(+), 45 deletions(-) rename tests/{test_time_travel => test_regression}/__init__.py (100%) create mode 100644 tests/test_regression/test_regression.py rename tests/{test_time_travel => test_regression}/v2.18.py (81%) delete mode 100644 tests/test_time_travel/test_time_travel.py diff --git a/pyproject.toml b/pyproject.toml index 1f54ef4ac2..3d9b528e02 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -262,10 +262,10 @@ run-coverage = "pytest --cov-config=pyproject.toml --cov=pkg --cov-report xml -- run-coverage-gpu = "pip install cupy-cuda12x && pytest -m gpu --cov-config=pyproject.toml --cov=pkg --cov-report xml --cov=src --junitxml=junit.xml -o junit_family=legacy" run-coverage-html = "pytest --cov-config=pyproject.toml --cov=pkg --cov-report html --cov=src" -[tool.hatch.envs.time_travel] +[tool.hatch.envs.regression] description = "Test environment for tests against older zarr-python versions" dependencies = [ - "hatch ==1.14.1" + "uv ==0.7.8" ] features=["test"] diff --git a/tests/test_time_travel/__init__.py b/tests/test_regression/__init__.py similarity index 100% rename from tests/test_time_travel/__init__.py rename to tests/test_regression/__init__.py diff --git a/tests/test_regression/test_regression.py b/tests/test_regression/test_regression.py new file mode 100644 index 0000000000..d51d1108d4 --- /dev/null +++ b/tests/test_regression/test_regression.py @@ -0,0 +1,112 @@ +import subprocess +from dataclasses import asdict, dataclass +from itertools import product +from pathlib import Path + +import numcodecs +import numpy as np +import pytest +from numcodecs import LZ4, LZMA, Blosc, GZip, VLenUTF8, Zstd + +import zarr +from zarr.core.array import Array +from zarr.core.metadata.v2 import ArrayV2Metadata +from zarr.storage import LocalStore + + +def runner_installed() -> bool: + try: + subprocess.check_output(["uv", "--version"]) + return True + except FileNotFoundError: + return False + + +def array_metadata_equals(a: ArrayV2Metadata, b: ArrayV2Metadata) -> bool: + dict_a, dict_b = asdict(a), asdict(b) + fill_value_a, fill_value_b = dict_a.pop("fill_value"), dict_b.pop("fill_value") + if np.isnan(fill_value_a) and np.isnan(fill_value_b): + return dict_a == dict_b + else: + return fill_value_a == fill_value_b and dict_a == dict_b + + +@dataclass(kw_only=True) +class ArrayParams: + values: np.ndarray[tuple[int], np.dtype[np.generic]] + fill_value: np.generic | str + compressor: numcodecs.abc.Codec + + +basic_codecs = GZip(), Blosc(), LZ4(), LZMA(), Zstd() +basic_dtypes = "|b", ">i2", ">i4", ">f4", ">f8", "c8", "c16", "M8[10us]", "m8[4ps]" +string_dtypes = ">S1", "U4" + +basic_array_cases = [ + ArrayParams(values=np.arange(4, dtype=dtype), fill_value=1, compressor=codec) + for codec, dtype in product(basic_codecs, basic_dtypes) +] +datetime_array_cases = [ + ArrayParams(values=np.ones((4,), dtype=dtype), fill_value=1, compressor=codec) + for codec, dtype in product(basic_codecs, datetime_dtypes) +] +string_array_cases = [ + ArrayParams( + values=np.array(["aaaa", "bbbb", "ccccc", "dddd"], dtype=dtype), + fill_value="foo", + compressor=codec, + ) + for codec, dtype in product(basic_codecs, string_dtypes) +] +vlen_string_cases = [ + ArrayParams( + values=np.array(["a", "bb", "ccc", "dddd"], dtype="O"), + fill_value="1", + compressor=VLenUTF8(), + ) +] +array_cases = basic_array_cases + datetime_array_cases + string_array_cases + vlen_string_cases + + +@pytest.fixture +def source_array(tmp_path: Path, request: pytest.FixtureRequest) -> Array: + dest = tmp_path / "in" + store = LocalStore(dest) + array_params: ArrayParams = request.param + compressor = array_params.compressor + return zarr.from_array( + store, + data=array_params.values, + chunks=array_params.values.shape, + compressors=compressor, + fill_value=array_params.fill_value, + order="C", + filters=None, + chunk_key_encoding={"name": "v2", "configuration": {"separator": "/"}}, + write_data=True, + zarr_format=2, + ) + + +@pytest.mark.skipif(not runner_installed(), reason="no python script runner installed") +@pytest.mark.parametrize( + "source_array", array_cases, indirect=True, ids=tuple(map(str, array_cases)) +) +def test_roundtrip(source_array: Array, tmp_path: Path) -> None: + out_path = tmp_path / "out" + copy_op = subprocess.run( + [ + "uv", + "run", + Path(__file__).resolve().parent / "v2.18.py", + str(source_array.store).removeprefix("file://"), + str(out_path), + ], + capture_output=True, + text=True, + ) + assert copy_op.returncode == 0 + out_array = zarr.open_array(store=out_path, mode="r", zarr_format=2) + assert array_metadata_equals(source_array.metadata, out_array.metadata) + assert np.array_equal(source_array[:], out_array[:]) diff --git a/tests/test_time_travel/v2.18.py b/tests/test_regression/v2.18.py similarity index 81% rename from tests/test_time_travel/v2.18.py rename to tests/test_regression/v2.18.py index 011de2f8d6..39e1c5210c 100644 --- a/tests/test_time_travel/v2.18.py +++ b/tests/test_regression/v2.18.py @@ -1,21 +1,22 @@ # /// script -# requires-python = "3.11" +# requires-python = ">=3.11" # dependencies = [ -# zarr==2.18, +# "zarr==2.18", +# "numcodecs==0.15" # ] # /// import argparse import zarr -from zarr.storage import BaseStore +from zarr._storage.store import BaseStore def copy_group( *, node: zarr.hierarchy.Group, store: zarr.storage.BaseStore, path: str, overwrite: bool ) -> zarr.hierarchy.Group: result = zarr.group(store=store, path=path, overwrite=overwrite) - result.attrs.put(attrs) + result.attrs.put(node.attrs.asdict()) for key, child in node.items(): child_path = f"{path}/{key}" if isinstance(child, zarr.hierarchy.Group): @@ -36,12 +37,13 @@ def copy_array( compressor=node.compressor, filters=node.filters, order=node.order, - dimension_separator=node.dimension_separator, + dimension_separator=node._dimension_separator, store=store, path=path, overwrite=overwrite, ) result.attrs.put(node.attrs.asdict()) + result[:] = node[:] return result @@ -52,6 +54,8 @@ def copy_node( return copy_group(node=node, store=store, path=path, overwrite=overwrite) elif isinstance(node, zarr.core.Array): return copy_array(node=node, store=store, path=path, overwrite=overwrite) + else: + raise TypeError(f"Unexpected node type: {type(node)}") # pragma: no cover def cli() -> None: @@ -62,10 +66,10 @@ def cli() -> None: parser.add_argument("destination", type=str, help="Path to the destination zarr hierarchy") args = parser.parse_args() - src, dst = args.source, args.dest - + src, dst = args.source, args.destination root_src = zarr.open(src, mode="r") - result = copy_node(node=root_src, store=dst, overwrite=True) + result = copy_node(node=root_src, store=zarr.NestedDirectoryStore(dst), path="", overwrite=True) + print(f"successfully created {result} at {dst}") diff --git a/tests/test_time_travel/test_time_travel.py b/tests/test_time_travel/test_time_travel.py deleted file mode 100644 index 586c1fb6c1..0000000000 --- a/tests/test_time_travel/test_time_travel.py +++ /dev/null @@ -1,35 +0,0 @@ -from pathlib import Path - -import numpy as np -import pytest -from numcodecs import GZip - -from zarr.core.group import GroupMetadata, create_hierarchy -from zarr.core.metadata.v2 import ArrayV2Metadata -from zarr.storage import LocalStore - - -@pytest.fixture -def hierarchy_model(request: pytest.FixtureRequest) -> dict[str, ArrayV2Metadata | GroupMetadata]: - dtype = np.uint8() - return { - "": GroupMetadata(attributes={"foo": "bar"}, zarr_format=2), - "/array": ArrayV2Metadata( - shape=(10, 10), - dtype=dtype, - chunks=(10, 10), - compressor=GZip(), - fill_value=1, - order="C", - filters=[GZip()], - ), - } - - -async def test_copy( - tmp_path: Path, hierarchy_model: dict[str, ArrayV2Metadata | GroupMetadata] -) -> None: - # create the hierarchy - store = LocalStore(tmp_path) - [x async for x in create_hierarchy(store=store, nodes=hierarchy_model)] - breakpoint() From cfdcb32243814179f51cbc704f414587cf997b67 Mon Sep 17 00:00:00 2001 From: Davis Vann Bennett Date: Mon, 26 May 2025 17:02:40 +0200 Subject: [PATCH 4/5] make metadata equality check robust to string fill values --- tests/test_regression/test_regression.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/tests/test_regression/test_regression.py b/tests/test_regression/test_regression.py index d51d1108d4..df010ce051 100644 --- a/tests/test_regression/test_regression.py +++ b/tests/test_regression/test_regression.py @@ -25,7 +25,12 @@ def runner_installed() -> bool: def array_metadata_equals(a: ArrayV2Metadata, b: ArrayV2Metadata) -> bool: dict_a, dict_b = asdict(a), asdict(b) fill_value_a, fill_value_b = dict_a.pop("fill_value"), dict_b.pop("fill_value") - if np.isnan(fill_value_a) and np.isnan(fill_value_b): + if ( + isinstance(fill_value_a, float) + and isinstance(fill_value_b, float) + and np.isnan(fill_value_a) + and np.isnan(fill_value_b) + ): return dict_a == dict_b else: return fill_value_a == fill_value_b and dict_a == dict_b From ba9a95de0318ca2f8768b86b87d1534a60a20b1f Mon Sep 17 00:00:00 2001 From: Davis Vann Bennett Date: Thu, 29 May 2025 21:20:09 +0200 Subject: [PATCH 5/5] add vlen bytes test case --- tests/test_regression/test_regression.py | 28 +++++++++++++++++++----- 1 file changed, 22 insertions(+), 6 deletions(-) diff --git a/tests/test_regression/test_regression.py b/tests/test_regression/test_regression.py index df010ce051..130a6b7472 100644 --- a/tests/test_regression/test_regression.py +++ b/tests/test_regression/test_regression.py @@ -6,7 +6,7 @@ import numcodecs import numpy as np import pytest -from numcodecs import LZ4, LZMA, Blosc, GZip, VLenUTF8, Zstd +from numcodecs import LZ4, LZMA, Blosc, GZip, VLenBytes, VLenUTF8, Zstd import zarr from zarr.core.array import Array @@ -41,6 +41,7 @@ class ArrayParams: values: np.ndarray[tuple[int], np.dtype[np.generic]] fill_value: np.generic | str compressor: numcodecs.abc.Codec + filters: tuple[numcodecs.abc.Codec, ...] | None = None basic_codecs = GZip(), Blosc(), LZ4(), LZMA(), Zstd() @@ -68,10 +69,26 @@ class ArrayParams: ArrayParams( values=np.array(["a", "bb", "ccc", "dddd"], dtype="O"), fill_value="1", - compressor=VLenUTF8(), + compressor=None, + filters=(VLenUTF8(),), ) ] -array_cases = basic_array_cases + datetime_array_cases + string_array_cases + vlen_string_cases + +vlen_bytes_cases = [ + ArrayParams( + values=np.array([b"a", b"bb", b"ccc", b"dddd"], dtype="O"), + fill_value=b"1", + compressor=None, + filters=(VLenBytes(),), + ) +] +array_cases = ( + basic_array_cases + + datetime_array_cases + + string_array_cases + + vlen_string_cases + + vlen_bytes_cases +) @pytest.fixture @@ -79,15 +96,14 @@ def source_array(tmp_path: Path, request: pytest.FixtureRequest) -> Array: dest = tmp_path / "in" store = LocalStore(dest) array_params: ArrayParams = request.param - compressor = array_params.compressor return zarr.from_array( store, data=array_params.values, chunks=array_params.values.shape, - compressors=compressor, + compressors=array_params.compressor, fill_value=array_params.fill_value, order="C", - filters=None, + filters=array_params.filters, chunk_key_encoding={"name": "v2", "configuration": {"separator": "/"}}, write_data=True, zarr_format=2,