Skip to content
Open
Show file tree
Hide file tree
Changes from 8 commits
Commits
Show all changes
21 commits
Select commit Hold shift + click to select a range
5990390
refactor: rename guess_chunks to more clearly indicate that it guesse…
d-v-b Apr 10, 2026
74ae11a
fix: use the same chunk normalization path in all cases
d-v-b Apr 10, 2026
9735a85
refactor: use newtype pattern
d-v-b Apr 12, 2026
c40c5ff
docs: changelog
d-v-b Apr 12, 2026
a9b68d8
fix: handle 0-length arrays
d-v-b Apr 12, 2026
88e93ad
test: test untested cases of chunk normalization
d-v-b Apr 12, 2026
fcd5ab0
fix: don't accept inane input
d-v-b Apr 12, 2026
5c56197
test: check error states in normalize_chunks_1d
d-v-b Apr 12, 2026
9fc3fea
refactor: make resolvedchunking recursive to support nested sharding
d-v-b Apr 13, 2026
2659f1c
Merge branch 'main' into refactor/simplify-internal-chunk-representation
d-v-b Apr 14, 2026
c6a5095
Merge branch 'main' into refactor/simplify-internal-chunk-representation
d-v-b Apr 16, 2026
fb7da3a
Merge branch 'main' into refactor/simplify-internal-chunk-representation
d-v-b Apr 20, 2026
f6fc818
test: add _assert_chunks_equal helper for ChunksTuple
d-v-b Apr 20, 2026
14788aa
perf: ChunksTuple uses 1D int64 arrays per axis
d-v-b Apr 20, 2026
796a564
revert: undo out-of-scope v3.py changes from 14788aa
d-v-b Apr 20, 2026
62ce735
fix: cast ChunksTuple elements to int at consumer sites in array.py
d-v-b Apr 20, 2026
547836b
fix: widen is_regular_* annotations and cast ChunksTuple in create_ch…
d-v-b Apr 20, 2026
e68cdfc
test: cast ChunksTuple elements to int in create_array_metadata fixture
d-v-b Apr 20, 2026
12a325a
test: rewrite ChunksTuple equality checks and add return-type charact…
d-v-b Apr 20, 2026
33ee8a1
perf: vectorize is_regular_1d for ndarray inputs
d-v-b Apr 20, 2026
4bc4678
refactor: rename ResolvedChunking to ChunkLayout
d-v-b Apr 20, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions changes/3899.bugfix.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
Make chunk normalization properly handle `-1` as a compact representation of the length of
an entire axis.
100 changes: 40 additions & 60 deletions src/zarr/core/array.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,9 +39,11 @@
)
from zarr.core.buffer.cpu import buffer_prototype as cpu_buffer_prototype
from zarr.core.chunk_grids import (
SHARDED_INNER_CHUNK_MAX_BYTES,
ChunkGrid,
_auto_partition,
normalize_chunks,
guess_chunks,
normalize_chunks_nd,
resolve_outer_and_inner_chunks,
)
from zarr.core.chunk_key_encodings import (
ChunkKeyEncoding,
Expand Down Expand Up @@ -120,10 +122,8 @@
)
from zarr.core.metadata.v3 import (
ChunkGridMetadata,
RectilinearChunkGridMetadata,
RegularChunkGridMetadata,
create_chunk_grid_metadata,
parse_node_type_array,
resolve_chunks,
)
from zarr.core.sync import sync
from zarr.errors import (
Expand Down Expand Up @@ -679,6 +679,7 @@ async def _create(

from zarr.core.chunk_grids import _is_rectilinear_chunks

# Unify the v2 (chunks) and v3 (chunk_shape) parameter names
_raw_chunks = chunks if chunks is not None else chunk_shape

config_parsed = parse_array_config(config)
Expand All @@ -704,7 +705,11 @@ async def _create(
item_size = 1
if isinstance(dtype_parsed, HasItemSize):
item_size = dtype_parsed.item_size
chunk_grid = resolve_chunks(_raw_chunks, shape, item_size)
if _raw_chunks is None:
outer_chunks = guess_chunks(shape, item_size)
else:
outer_chunks = normalize_chunks_nd(_raw_chunks, shape)
chunk_grid = create_chunk_grid_metadata(outer_chunks)
result = await cls._create_v3(
store_path,
shape=shape,
Expand Down Expand Up @@ -735,10 +740,12 @@ async def _create(
item_size = 1
if isinstance(dtype_parsed, HasItemSize):
item_size = dtype_parsed.item_size
if chunks:
_chunks = normalize_chunks(chunks, shape, item_size)
_raw = chunks or chunk_shape
if _raw is None:
outer_chunks = guess_chunks(shape, item_size)
else:
_chunks = normalize_chunks(chunk_shape, shape, item_size)
outer_chunks = normalize_chunks_nd(_raw, shape)
_chunks = tuple(dim[0] for dim in outer_chunks)

if order is None:
order_parsed = config_parsed.order
Expand Down Expand Up @@ -4796,6 +4803,7 @@ async def init_array(

zdtype = parse_dtype(dtype, zarr_format=zarr_format)
shape_parsed = parse_shapelike(shape)
item_size = zdtype.item_size if isinstance(zdtype, HasItemSize) else 1
chunk_key_encoding_parsed = _parse_chunk_key_encoding(
chunk_key_encoding, zarr_format=zarr_format
)
Expand All @@ -4808,12 +4816,9 @@ async def init_array(
else:
await ensure_no_existing_node(store_path, zarr_format=zarr_format)

# Detect rectilinear (nested list) chunks or shards, e.g. [[10, 20, 30], [25, 25]]
# Validate rectilinear chunks constraints
from zarr.core.chunk_grids import _is_rectilinear_chunks

rectilinear_meta: RectilinearChunkGridMetadata | None = None
rectilinear_shards = _is_rectilinear_chunks(shards)

if _is_rectilinear_chunks(chunks):
if zarr_format == 2:
raise ValueError("Zarr format 2 does not support rectilinear chunk grids.")
Expand All @@ -4823,43 +4828,29 @@ async def init_array(
"Use rectilinear shards instead: "
"chunks=(inner_size, ...), shards=[[shard_sizes], ...]"
)
rectilinear_meta = RectilinearChunkGridMetadata(
chunk_shapes=tuple(tuple(dim_edges) for dim_edges in chunks)

# Normalize the user's chunks into canonical ChunksTuple form
if chunks is None or chunks == "auto":
chunks_normalized = guess_chunks(
shape_parsed,
item_size,
max_bytes=SHARDED_INNER_CHUNK_MAX_BYTES if shards is not None else None,
)
# Use first chunk size per dim as placeholder for _auto_partition
chunks_flat: tuple[int, ...] | Literal["auto"] = tuple(dim_edges[0] for dim_edges in chunks)
else:
# Normalize scalar int to per-dimension tuple (e.g. chunks=100000 for a 1D array)
if isinstance(chunks, int):
chunks = tuple(chunks for _ in shape_parsed)
chunks_flat = cast("tuple[int, ...] | Literal['auto']", chunks)

# Handle rectilinear shards: shards=[[60, 40, 20], [50, 50]]
# means variable-sized shard boundaries with uniform inner chunks
shards_for_partition: ShardsLike | None = shards
if _is_rectilinear_chunks(shards):
if zarr_format == 2:
raise ValueError("Zarr format 2 does not support rectilinear chunk grids.")
rectilinear_meta = RectilinearChunkGridMetadata(
chunk_shapes=tuple(tuple(dim_edges) for dim_edges in shards)
)
# Use first shard size per dim as placeholder for _auto_partition
shards_for_partition = tuple(dim_edges[0] for dim_edges in shards)
chunks_normalized = normalize_chunks_nd(chunks, shape_parsed)

item_size = 1
if isinstance(zdtype, HasItemSize):
item_size = zdtype.item_size

shard_shape_parsed, chunk_shape_parsed = _auto_partition(
# Resolve chunks + shards into outer_chunks (grid metadata) and
# inner_chunks (sub-chunks for ShardingCodec, None if no sharding)
outer_chunks, inner_chunks = resolve_outer_and_inner_chunks(
array_shape=shape_parsed,
shard_shape=shards_for_partition,
chunk_shape=chunks_flat,
chunks=chunks_normalized,
shard_shape=shards,
item_size=item_size,
)
chunks_out: tuple[int, ...]

meta: ArrayV2Metadata | ArrayV3Metadata
if zarr_format == 2:
if shard_shape_parsed is not None:
if inner_chunks is not None:
msg = (
"Zarr format 2 arrays can only be created with `shard_shape` set to `None`. "
f"Got `shard_shape={shards}` instead."
Expand All @@ -4883,7 +4874,7 @@ async def init_array(
meta = AsyncArray._create_metadata_v2(
shape=shape_parsed,
dtype=zdtype,
chunks=chunk_shape_parsed,
chunks=tuple(dim[0] for dim in outer_chunks),
dimension_separator=chunk_key_encoding_parsed.separator,
fill_value=fill_value,
order=order_parsed,
Expand All @@ -4899,40 +4890,29 @@ async def init_array(
dtype=zdtype,
)
sub_codecs = cast("tuple[Codec, ...]", (*array_array, array_bytes, *bytes_bytes))
grid = create_chunk_grid_metadata(outer_chunks)
codecs_out: tuple[Codec, ...]
if shard_shape_parsed is not None:
if inner_chunks is not None:
inner_chunks_flat = tuple(dim[0] for dim in inner_chunks)
index_location = None
if isinstance(shards, dict):
index_location = ShardingCodecIndexLocation(shards.get("index_location", None))
if index_location is None:
index_location = ShardingCodecIndexLocation.end
sharding_codec = ShardingCodec(
chunk_shape=chunk_shape_parsed, codecs=sub_codecs, index_location=index_location
chunk_shape=inner_chunks_flat, codecs=sub_codecs, index_location=index_location
)
# Use rectilinear grid for validation when shards are rectilinear
if rectilinear_shards and rectilinear_meta is not None:
validation_grid: ChunkGridMetadata = rectilinear_meta
else:
validation_grid = RegularChunkGridMetadata(chunk_shape=shard_shape_parsed)
sharding_codec.validate(
shape=chunk_shape_parsed,
shape=inner_chunks_flat,
dtype=zdtype,
chunk_grid=validation_grid,
chunk_grid=grid,
)
codecs_out = (sharding_codec,)
chunks_out = shard_shape_parsed
else:
chunks_out = chunk_shape_parsed
codecs_out = sub_codecs

if order is not None:
_warn_order_kwarg()

grid: ChunkGridMetadata
if rectilinear_meta is not None:
grid = rectilinear_meta
else:
grid = RegularChunkGridMetadata(chunk_shape=chunks_out)
meta = AsyncArray._create_metadata_v3(
shape=shape_parsed,
dtype=zdtype,
Expand Down
Loading
Loading