From 47d4b4ea2bfff03a563bf0092eeec26491dbafaf Mon Sep 17 00:00:00 2001 From: Davis Vann Bennett Date: Fri, 24 Jan 2025 18:52:58 +0100 Subject: [PATCH 1/4] make proper ChunkKeyEncodingLike type alias, and use it --- src/zarr/core/array.py | 14 +++++++------- src/zarr/core/chunk_key_encodings.py | 11 ++++++----- src/zarr/core/metadata/v3.py | 4 ++-- 3 files changed, 15 insertions(+), 14 deletions(-) diff --git a/src/zarr/core/array.py b/src/zarr/core/array.py index 632e8221b4..6b68d1a0ac 100644 --- a/src/zarr/core/array.py +++ b/src/zarr/core/array.py @@ -412,7 +412,7 @@ async def create( # v3 only chunk_shape: ShapeLike | None = None, chunk_key_encoding: ( - ChunkKeyEncoding + ChunkKeyEncodingLike | tuple[Literal["default"], Literal[".", "/"]] | tuple[Literal["v2"], Literal[".", "/"]] | None @@ -453,7 +453,7 @@ async def create( The shape of the array's chunks Zarr format 3 only. Zarr format 2 arrays should use `chunks` instead. If not specified, default are guessed based on the shape and dtype. - chunk_key_encoding : ChunkKeyEncoding, optional + chunk_key_encoding : ChunkKeyEncodingLike, optional A specification of how the chunk keys are represented in storage. Zarr format 3 only. Zarr format 2 arrays should use `dimension_separator` instead. Default is ``("default", "/")``. @@ -553,7 +553,7 @@ async def _create( # v3 only chunk_shape: ShapeLike | None = None, chunk_key_encoding: ( - ChunkKeyEncoding + ChunkKeyEncodingLike | tuple[Literal["default"], Literal[".", "/"]] | tuple[Literal["v2"], Literal[".", "/"]] | None @@ -671,7 +671,7 @@ async def _create_v3( config: ArrayConfig, fill_value: Any | None = None, chunk_key_encoding: ( - ChunkKeyEncoding + ChunkKeyEncodingLike | tuple[Literal["default"], Literal[".", "/"]] | tuple[Literal["v2"], Literal[".", "/"]] | None @@ -1708,7 +1708,7 @@ def create( The shape of the Array's chunks. Zarr format 3 only. Zarr format 2 arrays should use `chunks` instead. If not specified, default are guessed based on the shape and dtype. - chunk_key_encoding : ChunkKeyEncoding, optional + chunk_key_encoding : ChunkKeyEncodingLike, optional A specification of how the chunk keys are represented in storage. Zarr format 3 only. Zarr format 2 arrays should use `dimension_separator` instead. Default is ``("default", "/")``. @@ -3756,7 +3756,7 @@ async def create_array( order: MemoryOrder | None = None, zarr_format: ZarrFormat | None = 3, attributes: dict[str, JSON] | None = None, - chunk_key_encoding: ChunkKeyEncoding | ChunkKeyEncodingLike | None = None, + chunk_key_encoding: ChunkKeyEncodingLike | None = None, dimension_names: Iterable[str] | None = None, storage_options: dict[str, Any] | None = None, overwrite: bool = False, @@ -3834,7 +3834,7 @@ async def create_array( The zarr format to use when saving. attributes : dict, optional Attributes for the array. - chunk_key_encoding : ChunkKeyEncoding, optional + chunk_key_encoding : ChunkKeyEncodingLike, optional A specification of how the chunk keys are represented in storage. For Zarr format 3, the default is ``{"name": "default", "separator": "/"}}``. For Zarr format 2, the default is ``{"name": "v2", "separator": "."}}``. diff --git a/src/zarr/core/chunk_key_encodings.py b/src/zarr/core/chunk_key_encodings.py index 95ce9108f3..80a39d684a 100644 --- a/src/zarr/core/chunk_key_encodings.py +++ b/src/zarr/core/chunk_key_encodings.py @@ -2,7 +2,7 @@ from abc import abstractmethod from dataclasses import dataclass -from typing import Literal, TypedDict, cast +from typing import Literal, TypeAlias, TypedDict, cast from zarr.abc.metadata import Metadata from zarr.core.common import ( @@ -20,7 +20,7 @@ def parse_separator(data: JSON) -> SeparatorLiteral: return cast(SeparatorLiteral, data) -class ChunkKeyEncodingLike(TypedDict): +class ChunkKeyEncodingParams(TypedDict): name: Literal["v2", "default"] separator: SeparatorLiteral @@ -36,9 +36,7 @@ def __init__(self, *, separator: SeparatorLiteral) -> None: object.__setattr__(self, "separator", separator_parsed) @classmethod - def from_dict( - cls, data: dict[str, JSON] | ChunkKeyEncoding | ChunkKeyEncodingLike - ) -> ChunkKeyEncoding: + def from_dict(cls, data: dict[str, JSON] | ChunkKeyEncodingLike) -> ChunkKeyEncoding: if isinstance(data, ChunkKeyEncoding): return data @@ -73,6 +71,9 @@ def encode_chunk_key(self, chunk_coords: ChunkCoords) -> str: pass +ChunkKeyEncodingLike: TypeAlias = ChunkKeyEncodingParams | ChunkKeyEncoding + + @dataclass(frozen=True) class DefaultChunkKeyEncoding(ChunkKeyEncoding): name: Literal["default"] = "default" diff --git a/src/zarr/core/metadata/v3.py b/src/zarr/core/metadata/v3.py index 087dbd8bfc..9154762648 100644 --- a/src/zarr/core/metadata/v3.py +++ b/src/zarr/core/metadata/v3.py @@ -27,7 +27,7 @@ from zarr.abc.codec import ArrayArrayCodec, ArrayBytesCodec, BytesBytesCodec, Codec from zarr.core.array_spec import ArrayConfig, ArraySpec from zarr.core.chunk_grids import ChunkGrid, RegularChunkGrid -from zarr.core.chunk_key_encodings import ChunkKeyEncoding +from zarr.core.chunk_key_encodings import ChunkKeyEncoding, ChunkKeyEncodingLike from zarr.core.common import ( JSON, ZARR_JSON, @@ -253,7 +253,7 @@ def __init__( shape: Iterable[int], data_type: npt.DTypeLike | DataType, chunk_grid: dict[str, JSON] | ChunkGrid, - chunk_key_encoding: dict[str, JSON] | ChunkKeyEncoding, + chunk_key_encoding: ChunkKeyEncodingLike, fill_value: Any, codecs: Iterable[Codec | dict[str, JSON]], attributes: dict[str, JSON] | None, From 23b2ae711cfa1acb47089dfbc77105ae3143a3c8 Mon Sep 17 00:00:00 2001 From: Davis Vann Bennett Date: Fri, 24 Jan 2025 19:43:31 +0100 Subject: [PATCH 2/4] model the not-required-ness of the separator --- src/zarr/core/chunk_key_encodings.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/src/zarr/core/chunk_key_encodings.py b/src/zarr/core/chunk_key_encodings.py index 80a39d684a..2b8b9734f7 100644 --- a/src/zarr/core/chunk_key_encodings.py +++ b/src/zarr/core/chunk_key_encodings.py @@ -2,7 +2,10 @@ from abc import abstractmethod from dataclasses import dataclass -from typing import Literal, TypeAlias, TypedDict, cast +from typing import TYPE_CHECKING, Literal, TypeAlias, TypedDict, cast + +if TYPE_CHECKING: + from typing import NotRequired from zarr.abc.metadata import Metadata from zarr.core.common import ( @@ -22,7 +25,7 @@ def parse_separator(data: JSON) -> SeparatorLiteral: class ChunkKeyEncodingParams(TypedDict): name: Literal["v2", "default"] - separator: SeparatorLiteral + separator: NotRequired[SeparatorLiteral] @dataclass(frozen=True) @@ -45,7 +48,8 @@ def from_dict(cls, data: dict[str, JSON] | ChunkKeyEncodingLike) -> ChunkKeyEnco data = {"name": data["name"], "configuration": {"separator": data["separator"]}} # configuration is optional for chunk key encodings - name_parsed, config_parsed = parse_named_configuration(data, require_configuration=False) + # TODO: remove the type: ignore statement when we use typeddicts for all our static metadata + name_parsed, config_parsed = parse_named_configuration(data, require_configuration=False) # type: ignore[arg-type] if name_parsed == "default": if config_parsed is None: # for default, normalize missing configuration to use the "/" separator. From aa6f25b1d8c4e1f02f57251b06ffb840b191cc47 Mon Sep 17 00:00:00 2001 From: Davis Vann Bennett Date: Fri, 24 Jan 2025 19:45:37 +0100 Subject: [PATCH 3/4] add cast --- src/zarr/core/chunk_key_encodings.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/zarr/core/chunk_key_encodings.py b/src/zarr/core/chunk_key_encodings.py index 2b8b9734f7..103472c3b4 100644 --- a/src/zarr/core/chunk_key_encodings.py +++ b/src/zarr/core/chunk_key_encodings.py @@ -47,9 +47,11 @@ def from_dict(cls, data: dict[str, JSON] | ChunkKeyEncodingLike) -> ChunkKeyEnco if "name" in data and "separator" in data: data = {"name": data["name"], "configuration": {"separator": data["separator"]}} + # TODO: remove this cast when we are statically typing the JSON metadata completely. + data = cast(dict[str, JSON], data) + # configuration is optional for chunk key encodings - # TODO: remove the type: ignore statement when we use typeddicts for all our static metadata - name_parsed, config_parsed = parse_named_configuration(data, require_configuration=False) # type: ignore[arg-type] + name_parsed, config_parsed = parse_named_configuration(data, require_configuration=False) if name_parsed == "default": if config_parsed is None: # for default, normalize missing configuration to use the "/" separator. From dd9d99fba4eea92086e6be02d4d27ce5186cc93a Mon Sep 17 00:00:00 2001 From: Davis Vann Bennett Date: Fri, 24 Jan 2025 19:49:45 +0100 Subject: [PATCH 4/4] changelog --- changes/2763.chore.rst | 3 +++ 1 file changed, 3 insertions(+) create mode 100644 changes/2763.chore.rst diff --git a/changes/2763.chore.rst b/changes/2763.chore.rst new file mode 100644 index 0000000000..f36c63c289 --- /dev/null +++ b/changes/2763.chore.rst @@ -0,0 +1,3 @@ +Created a type alias ``ChunkKeyEncodingLike`` to model the union of ``ChunkKeyEncoding`` instances and the dict form of the +parameters of those instances. ``ChunkKeyEncodingLike`` should be used by high-level functions to provide a convenient +way for creating ``ChunkKeyEncoding`` objects. \ No newline at end of file