zarr-developers · ilan-gold · Mar 24, 2026 · Mar 24, 2026 · Mar 24, 2026 · Mar 24, 2026
diff --git a/changes/3826.feature.md b/changes/3826.feature.md
@@ -0,0 +1 @@
+Added a `subchunk_write_order` option to `ShardingCodec` to allow for `morton`, `unordered`, `lexicographic`, and `colexicographic` subchunk orderings.
diff --git a/docs/user-guide/performance.md b/docs/user-guide/performance.md
@@ -113,6 +113,13 @@ bytes within chunks of an array may improve the compression ratio, depending on
 the structure of the data, the compression algorithm used, and which compression
 filters (e.g., byte-shuffle) have been applied.
 
+### Subchunk memory layout
+
+The order of chunks **within each shard** can be changed via the `subchunk_write_order` parameter of the `ShardingCodec`. That parameter is a string which must be one of `["morton", "lexicographic", "colexicographic", "unordered"]`.
+
+By default [`morton`](https://en.wikipedia.org/wiki/Z-order_curve) order provides good spatial locality however [`lexicographic` (i.e., row-major)](https://en.wikipedia.org/wiki/Row-_and_column-major_order), for example, may be better suited to "batched" workflows where some form of sequential reading through a fixed number of outer dimensions is desired. The options are `lexicographic`, `morton`, `unordered` (i.e., random), and `colexicographic`.
+
+
 ### Empty chunks
 
 It is possible to configure how Zarr handles the storage of chunks that are "empty"

diff --git a/src/zarr/codecs/__init__.py b/src/zarr/codecs/__init__.py
@@ -29,7 +29,7 @@
     Zstd,
 )
 from zarr.codecs.scale_offset import ScaleOffset
-from zarr.codecs.sharding import ShardingCodec, ShardingCodecIndexLocation
+from zarr.codecs.sharding import ShardingCodec, ShardingCodecIndexLocation, SubchunkWriteOrder
 from zarr.codecs.transpose import TransposeCodec
 from zarr.codecs.vlen_utf8 import VLenBytesCodec, VLenUTF8Codec
 from zarr.codecs.zstd import ZstdCodec
@@ -47,6 +47,7 @@
     "ScaleOffset",
     "ShardingCodec",
     "ShardingCodecIndexLocation",
+    "SubchunkWriteOrder",
     "TransposeCodec",
     "VLenBytesCodec",
     "VLenUTF8Codec",

diff --git a/src/zarr/codecs/sharding.py b/src/zarr/codecs/sharding.py
@@ -4,7 +4,7 @@
 from dataclasses import dataclass, replace
 from enum import Enum
 from functools import lru_cache
-from typing import TYPE_CHECKING, Any, NamedTuple, cast
+from typing import TYPE_CHECKING, Any, Literal, NamedTuple, cast
 
 import numpy as np
 import numpy.typing as npt
@@ -46,8 +46,6 @@
     BasicIndexer,
     ChunkProjection,
     SelectorTuple,
-    _morton_order,
-    _morton_order_keys,
     c_order_iter,
     get_indexer,
     morton_order_iter,
@@ -64,7 +62,7 @@
 
 if TYPE_CHECKING:
     from collections.abc import Iterator
-    from typing import Self
+    from typing import Final, Self
 
     from zarr.core.common import JSON
     from zarr.core.dtype.wrapper import TBaseDType, TBaseScalar, ZDType
@@ -83,6 +81,15 @@ class ShardingCodecIndexLocation(Enum):
     end = "end"
 
 
+SubchunkWriteOrder = Literal["morton", "unordered", "lexicographic", "colexicographic"]
+SUBCHUNK_WRITE_ORDER: Final[tuple[str, str, str, str]] = (
+    "morton",
+    "unordered",
+    "lexicographic",
+    "colexicographic",
+)
+
+
 def parse_index_location(data: object) -> ShardingCodecIndexLocation:
     return parse_enum(data, ShardingCodecIndexLocation)
 
@@ -272,14 +279,13 @@ def to_dict_vectorized(
         dict mapping chunk coordinate tuples to Buffer or None
         """
         starts, ends, valid = self.index.get_chunk_slices_vectorized(chunk_coords_array)
-        chunk_coords_keys = _morton_order_keys(self.index.chunks_per_shard)
 
         result: dict[tuple[int, ...], Buffer | None] = {}
-        for i, coords in enumerate(chunk_coords_keys):
+        for i, coords in enumerate(chunk_coords_array):
             if valid[i]:
-                result[coords] = self.buf[int(starts[i]) : int(ends[i])]
+                result[tuple(coords.ravel())] = self.buf[int(starts[i]) : int(ends[i])]
             else:
-                result[coords] = None
+                result[tuple(coords.ravel())] = None
 
         return result
 
@@ -293,7 +299,9 @@ class ShardingCodec(
     chunk_shape: tuple[int, ...]
     codecs: tuple[Codec, ...]
     index_codecs: tuple[Codec, ...]
+    rng: np.random.Generator | None
     index_location: ShardingCodecIndexLocation = ShardingCodecIndexLocation.end
+    subchunk_write_order: SubchunkWriteOrder = "morton"
 
     def __init__(
         self,
@@ -302,16 +310,24 @@ def __init__(
         codecs: Iterable[Codec | dict[str, JSON]] = (BytesCodec(),),
         index_codecs: Iterable[Codec | dict[str, JSON]] = (BytesCodec(), Crc32cCodec()),
         index_location: ShardingCodecIndexLocation | str = ShardingCodecIndexLocation.end,
+        subchunk_write_order: SubchunkWriteOrder = "morton",
+        rng: np.random.Generator | None = None,
     ) -> None:
         chunk_shape_parsed = parse_shapelike(chunk_shape)
         codecs_parsed = parse_codecs(codecs)
         index_codecs_parsed = parse_codecs(index_codecs)
         index_location_parsed = parse_index_location(index_location)
+        if subchunk_write_order not in SUBCHUNK_WRITE_ORDER:
+            raise ValueError(
+                f"Unrecognized subchunk write order: {subchunk_write_order}. Only {SUBCHUNK_WRITE_ORDER} are allowed."
+            )
 
         object.__setattr__(self, "chunk_shape", chunk_shape_parsed)
         object.__setattr__(self, "codecs", codecs_parsed)
         object.__setattr__(self, "index_codecs", index_codecs_parsed)
         object.__setattr__(self, "index_location", index_location_parsed)
+        object.__setattr__(self, "subchunk_write_order", subchunk_write_order)
+        object.__setattr__(self, "rng", rng)
 
         # Use instance-local lru_cache to avoid memory leaks
 
@@ -324,14 +340,15 @@ def __init__(
 
     # todo: typedict return type
     def __getstate__(self) -> dict[str, Any]:
-        return self.to_dict()
+        return {"rng": self.rng, **self.to_dict()}
 
     def __setstate__(self, state: dict[str, Any]) -> None:
         config = state["configuration"]
         object.__setattr__(self, "chunk_shape", parse_shapelike(config["chunk_shape"]))
         object.__setattr__(self, "codecs", parse_codecs(config["codecs"]))
         object.__setattr__(self, "index_codecs", parse_codecs(config["index_codecs"]))
         object.__setattr__(self, "index_location", parse_index_location(config["index_location"]))
+        object.__setattr__(self, "rng", state["rng"])
 
         # Use instance-local lru_cache to avoid memory leaks
         # object.__setattr__(self, "_get_chunk_spec", lru_cache()(self._get_chunk_spec))
@@ -509,6 +526,24 @@ async def _decode_partial_single(
         else:
             return out
 
+    def _subchunk_order_iter(
+        self, chunks_per_shard: tuple[int, ...], subchunk_write_order: SubchunkWriteOrder
+    ) -> Iterable[tuple[int, ...]]:
+        match subchunk_write_order:
+            case "morton":
+                subchunk_iter = morton_order_iter(chunks_per_shard)
+            case "lexicographic":
+                subchunk_iter = np.ndindex(chunks_per_shard)
+            case "colexicographic":
+                subchunk_iter = (c[::-1] for c in np.ndindex(chunks_per_shard[::-1]))
+            case "unordered":
+                subchunk_list = list(np.ndindex(chunks_per_shard))
+                (self.rng if self.rng is not None else np.random.default_rng()).shuffle(
+                    subchunk_list
+                )
+                subchunk_iter = iter(subchunk_list)
+        return subchunk_iter
+
     async def _encode_single(
         self,
         shard_array: NDBuffer,
@@ -526,8 +561,7 @@ async def _encode_single(
                 chunk_grid=ChunkGrid.from_sizes(shard_shape, chunk_shape),
             )
         )
-
-        shard_builder = dict.fromkeys(morton_order_iter(chunks_per_shard))
+        shard_builder = dict.fromkeys(self._subchunk_order_iter(chunks_per_shard, "lexicographic"))
 
         await self.codec_pipeline.write(
             [
@@ -570,7 +604,7 @@ async def _encode_partial_single(
         )
 
         if self._is_complete_shard_write(indexer, chunks_per_shard):
-            shard_dict = dict.fromkeys(morton_order_iter(chunks_per_shard))
+            shard_dict = dict.fromkeys(self._subchunk_order_iter(chunks_per_shard, "lexicographic"))
         else:
             shard_reader = await self._load_full_shard_maybe(
                 byte_getter=byte_setter,
@@ -580,7 +614,7 @@ async def _encode_partial_single(
             shard_reader = shard_reader or _ShardReader.create_empty(chunks_per_shard)
             # Use vectorized lookup for better performance
             shard_dict = shard_reader.to_dict_vectorized(
-                np.asarray(_morton_order(chunks_per_shard))
+                np.array(list(self._subchunk_order_iter(chunks_per_shard, "lexicographic")))
             )
 
         await self.codec_pipeline.write(
@@ -619,7 +653,7 @@ async def _encode_shard_dict(
 
         template = buffer_prototype.buffer.create_zero_length()
         chunk_start = 0
-        for chunk_coords in morton_order_iter(chunks_per_shard):
+        for chunk_coords in self._subchunk_order_iter(chunks_per_shard, self.subchunk_write_order):
             value = map.get(chunk_coords)
             if value is None:
                 continue

diff --git a/src/zarr/testing/strategies.py b/src/zarr/testing/strategies.py
@@ -13,7 +13,10 @@
 import zarr
 from zarr.abc.store import RangeByteRequest, Store
 from zarr.codecs.bytes import BytesCodec
-from zarr.core.array import Array
+from zarr.codecs.crc32c_ import Crc32cCodec
+from zarr.codecs.sharding import SUBCHUNK_WRITE_ORDER, ShardingCodec, SubchunkWriteOrder
+from zarr.codecs.zstd import ZstdCodec
+from zarr.core.array import Array, CompressorsLike, SerializerLike
 from zarr.core.chunk_key_encodings import DefaultChunkKeyEncoding
 from zarr.core.common import JSON, AccessModeLiteral, ZarrFormat
 from zarr.core.dtype import get_data_type_from_native_dtype
@@ -127,6 +130,9 @@ def dimension_names(draw: st.DrawFn, *, ndim: int | None = None) -> list[None |
     return draw(st.none() | st.lists(st.none() | simple_text, min_size=ndim, max_size=ndim))  # type: ignore[arg-type]
 
 
+subchunk_write_orders: st.SearchStrategy[SubchunkWriteOrder] = st.sampled_from(SUBCHUNK_WRITE_ORDER)
+
+
 @st.composite
 def array_metadata(
     draw: st.DrawFn,
@@ -255,6 +261,7 @@ def arrays(
     arrays: st.SearchStrategy | None = None,
     attrs: st.SearchStrategy = attrs,
     zarr_formats: st.SearchStrategy = zarr_formats,
+    subchunk_write_orders: SearchStrategy[SubchunkWriteOrder] = subchunk_write_orders,
     open_mode: AccessModeLiteral = "w",
 ) -> AnyArray:
     store = draw(stores, label="store")
@@ -266,20 +273,11 @@ def arrays(
         arrays = numpy_arrays(shapes=shapes)
     nparray = draw(arrays, label="array data")
     dim_names: None | list[str | None] = None
+    serializer: SerializerLike = "auto"
+    compressors_unsearched: CompressorsLike = "auto"
 
     # For v3 arrays, optionally use RectilinearChunkGridMetadata
     chunk_grid_meta: RegularChunkGridMetadata | RectilinearChunkGridMetadata | None = None
-    shard_shape = None
-    if zarr_format == 3:
-        chunk_grid_meta = draw(chunk_grids(shape=nparray.shape), label="chunk grid")
-
-        # Sharding is only supported with regular chunk grids, and has complex
-        # divisibility constraints that don't play well with hypothesis shrinking.
-        # Disabled for now — sharding should be tested separately.
-
-        dim_names = draw(dimension_names(ndim=nparray.ndim), label="dimension names")
-    else:
-        dim_names = None
 
     # test that None works too.
     fill_value = draw(st.one_of([st.none(), npst.from_dtype(nparray.dtype)]))
@@ -295,27 +293,48 @@ def arrays(
     # - RectilinearChunkGridMetadata -> nested list of ints (triggers rectilinear path)
     # - v2 -> flat tuple of ints
     chunks_param: tuple[int, ...] | list[list[int]]
-    if zarr_format == 3 and chunk_grid_meta is not None:
+    shard_shape = None
+    dim_names = None
+    if zarr_format == 3:
+        chunk_grid_meta = draw(st.none() | chunk_grids(shape=nparray.shape), label="chunk grid")
+        dim_names = draw(dimension_names(ndim=nparray.ndim), label="dimension names")
         if isinstance(chunk_grid_meta, RectilinearChunkGridMetadata):
             chunks_param = [
                 list(dim) if isinstance(dim, tuple) else [dim]
                 for dim in chunk_grid_meta.chunk_shapes
             ]
-        else:
+        elif isinstance(chunk_grid_meta, RegularChunkGridMetadata):
             chunks_param = chunk_grid_meta.chunk_shape
+        else:
+            chunks_param = draw(chunk_shapes(shape=nparray.shape), label="chunk shape")
+
+            if all(s > c and c > 1 for s, c in zip(nparray.shape, chunks_param, strict=True)):
+                shard_shape = draw(
+                    st.none() | shard_shapes(shape=nparray.shape, chunk_shape=chunks_param),
+                    label="shard shape",
+                )
+                if shard_shape is not None:
+                    subchunk_write_order = draw(subchunk_write_orders)
+                    serializer = ShardingCodec(
+                        subchunk_write_order=subchunk_write_order,
+                        codecs=[BytesCodec(), ZstdCodec()],
+                        index_codecs=[BytesCodec(), Crc32cCodec()],
+                        chunk_shape=chunks_param,
+                    )
+                    compressors_unsearched = None
     else:
         chunks_param = draw(chunk_shapes(shape=nparray.shape), label="chunk shape")
-
     a = root.create_array(
         array_path,
         shape=nparray.shape,
         chunks=chunks_param,
         shards=shard_shape,
         dtype=nparray.dtype,
         attributes=attributes,
-        # compressor=compressor,  # FIXME
+        compressors=compressors_unsearched,  # FIXME
         fill_value=fill_value,
         dimension_names=dim_names,
+        serializer=serializer,
     )
 
     assert isinstance(a, Array)
@@ -329,12 +348,15 @@ def arrays(
 
     # Verify chunks — for rectilinear grids, .chunks raises
     if zarr_format == 3:
-        if isinstance(a.metadata.chunk_grid, RectilinearChunkGridMetadata):
-            assert shard_shape is None
-        else:
-            assert isinstance(a.metadata.chunk_grid, RegularChunkGridMetadata)
-            assert a.metadata.chunk_grid.chunk_shape == a.chunks
+        assert shard_shape == a.shards
+        if isinstance(a.metadata.chunk_grid, RegularChunkGridMetadata):
+            assert a.metadata.chunk_grid.chunk_shape == (
+                a.shards if shard_shape is not None else a.chunks
+            )
             assert shard_shape == a.shards
+        else:
+            assert isinstance(a.metadata.chunk_grid, RectilinearChunkGridMetadata)
+            assert shard_shape is None
 
     assert a.basename == name, (a.basename, name)
     assert dict(a.attrs) == expected_attrs
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1 @@
		Added a `subchunk_write_order` option to `ShardingCodec` to allow for `morton`, `unordered`, `lexicographic`, and `colexicographic` subchunk orderings.