zarr-developers · TomNicholas · Apr 24, 2026 · Apr 24, 2026 · Apr 24, 2026 · Apr 24, 2026
diff --git a/docs/releases.md b/docs/releases.md
@@ -7,6 +7,9 @@
 - `ChunkManifest` can now hold inlined chunks — raw chunk bytes carried directly in memory rather than as references to external files. Intended for parser authors (e.g., loading Kerchunk references with inlined data); not exposed via `loadable_variables`.
   ([#938](https://github.com/zarr-developers/VirtualiZarr/pull/938)).
   By [Max Jones](https://github.com/maxrjones) and [Tom Nicholas](https://github.com/TomNicholas).
+- `KerchunkJSONParser` and `KerchunkParquetParser` now parse inline chunk data (both raw-string and `base64:`-prefixed forms) into inlined `ChunkManifest` entries, instead of raising `NotImplementedError`. Fixes the read side of [#489](https://github.com/zarr-developers/VirtualiZarr/issues/489); writer support for inlined chunks will follow in a separate PR.
+  ([#979](https://github.com/zarr-developers/VirtualiZarr/pull/979)).
+  By [Tom Nicholas](https://github.com/TomNicholas).
 
 ### Breaking changes
 

diff --git a/virtualizarr/parsers/kerchunk/translator.py b/virtualizarr/parsers/kerchunk/translator.py
@@ -1,5 +1,6 @@
 from __future__ import annotations
 
+import base64
 import math
 from collections.abc import Iterable
 from typing import cast
@@ -215,14 +216,20 @@ def manifest_from_kerchunk_chunk_dict(
     chunk_entries: dict[ChunkKey, ChunkEntry] = {}
     for k, v in kerchunk_chunk_dict.items():
         if isinstance(v, (str, bytes)):
-            raise NotImplementedError(
-                "Reading inlined reference data is currently not supported."
-                "See https://github.com/zarr-developers/VirtualiZarr/issues/489",
+            # Kerchunk inline form: a raw string (interpreted as bytes) or, when
+            # prefixed with "base64:", a base64-encoded payload.
+            raw = v.encode() if isinstance(v, str) else v
+            if raw.startswith(b"base64:"):
+                data = base64.b64decode(raw[len(b"base64:") :])
+            else:
+                data = raw
+            chunk_entries[k] = ChunkEntry(
+                path="", offset=0, length=len(data), data=data
             )
-        elif not isinstance(v, (tuple, list)):
+        elif isinstance(v, (tuple, list)):
+            chunk_entries[k] = chunkentry_from_kerchunk(v, fs_root=fs_root)
+        else:
             raise TypeError(f"Unexpected type {type(v)} for chunk value: {v}")
-
-        chunk_entries[k] = chunkentry_from_kerchunk(v, fs_root=fs_root)
     return ChunkManifest(entries=chunk_entries, shape=shape)
 
 

diff --git a/virtualizarr/tests/test_parsers/test_kerchunk.py b/virtualizarr/tests/test_parsers/test_kerchunk.py
@@ -374,55 +374,127 @@ def test_open_virtual_dataset_existing_kerchunk_refs(
 
 
 @requires_kerchunk
-def test_notimplemented_read_inline_refs(tmp_path, netcdf4_inlined_ref, local_registry):
-    # For now, we raise a NotImplementedError if we read existing references that have inlined data
-    # https://github.com/zarr-developers/VirtualiZarr/pull/251#pullrequestreview-2361916932
-
-    ref_filepath = tmp_path / "ref.json"
+def _refs_with_one_inlined_one_virtual(inline_repr: str) -> dict:
+    """Kerchunk refs describing a (2, 4) int32 array with two (1, 4) chunks:
+    position (0, 0) is inlined (carrying ``inline_repr``), position (1, 0) is virtual."""
+    return {
+        "version": 1,
+        "refs": {
+            ".zgroup": '{"zarr_format":2}',
+            "a/.zarray": (
+                '{"chunks":[1,4],"compressor":null,"dtype":"<i4",'
+                '"fill_value":null,"filters":null,"order":"C",'
+                '"shape":[2,4],"zarr_format":2}'
+            ),
+            "a/.zattrs": '{"_ARRAY_DIMENSIONS":["x","y"]}',
+            "a/0.0": inline_repr,
+            "a/1.0": ["/test.nc", 6144, 16],
+        },
+    }
 
-    import ujson
 
-    with open(ref_filepath, "w") as json_file:
-        ujson.dump(netcdf4_inlined_ref, json_file)
+@pytest.mark.parametrize(
+    "inline_repr, expected_bytes",
+    [
+        ("base64:AQIDBAUGBwg=", b"\x01\x02\x03\x04\x05\x06\x07\x08"),
+        ("hello, w!", b"hello, w!"),
+    ],
+    ids=["base64", "raw_string"],
+)
+def test_parse_inline_refs_json(inline_repr, expected_bytes):
+    refs = _refs_with_one_inlined_one_virtual(inline_repr)
 
+    memory_store = obstore.store.MemoryStore()
+    memory_store.put("refs.json", ujson.dumps(refs).encode())
+    registry = ObjectStoreRegistry({"memory://": memory_store})
     parser = KerchunkJSONParser()
-    with pytest.raises(
-        NotImplementedError,
-        match="Reading inlined reference data is currently not supported",
-    ):
-        with open_virtual_dataset(
-            url=ref_filepath.as_posix(),
-            registry=local_registry,
-            parser=parser,
-        ) as _:
-            pass
+    manifeststore = parser("memory:///refs.json", registry=registry)
+
+    marr = manifeststore._group._members["a"]
+    assert marr.manifest._inlined == {(0, 0): expected_bytes}
+    assert marr.manifest.dict() == {
+        "0.0": {
+            "path": "__inlined__",
+            "offset": 0,
+            "length": len(expected_bytes),
+            "data": expected_bytes,
+        },
+        "1.0": {"path": "file:///test.nc", "offset": 6144, "length": 16},
+    }
 
 
 @requires_kerchunk
 @pytest.mark.skipif(not has_fastparquet, reason="fastparquet not installed")
-def test_notimplemented_read_inline_refs_parquet(
-    tmp_path, netcdf4_inlined_ref, local_registry
-):
-    # Test that parquet references with inlined data raise NotImplementedError
-    # https://github.com/zarr-developers/VirtualiZarr/issues/489
+def test_parse_inline_refs_parquet(tmp_path, local_registry):
     import pandas as pd
     from kerchunk.df import refs_to_dataframe
 
+    expected_bytes = b"\x01\x02\x03\x04\x05\x06\x07\x08"
+    refs = _refs_with_one_inlined_one_virtual("base64:" + "AQIDBAUGBwg=")
+
     ref_filepath = tmp_path / "ref.parquet"
     with pd.option_context("future.infer_string", False):
-        refs_to_dataframe(fo=netcdf4_inlined_ref, url=ref_filepath.as_posix())
+        refs_to_dataframe(fo=refs, url=ref_filepath.as_posix())
 
     parser = KerchunkParquetParser()
-    with pytest.raises(
-        NotImplementedError,
-        match="Reading inlined reference data is currently not supported",
-    ):
-        with open_virtual_dataset(
-            url=ref_filepath.as_posix(),
-            registry=local_registry,
-            parser=parser,
-        ) as _:
-            pass
+    manifeststore = parser(ref_filepath.as_posix(), registry=local_registry)
+
+    marr = manifeststore._group._members["a"]
+    assert marr.manifest._inlined == {(0, 0): expected_bytes}
+    assert marr.manifest.dict() == {
+        "0.0": {
+            "path": "__inlined__",
+            "offset": 0,
+            "length": len(expected_bytes),
+            "data": expected_bytes,
+        },
+        "1.0": {"path": "file:///test.nc", "offset": 6144, "length": 16},
+    }
+
+
+@pytest.mark.asyncio
+async def test_read_inline_and_virtual_refs_end_to_end():
+    # Parse a refs dict containing one inlined chunk and one virtual chunk,
+    # then round-trip both through ManifestStore.get: the inlined chunk must
+    # come back from memory and the virtual chunk must be fetched from the
+    # target file via the object store registry.
+    from zarr.core.buffer import default_buffer_prototype
+
+    inlined_bytes = b"\x01\x02\x03\x04\x05\x06\x07\x08"
+    virtual_bytes = b"\xaa\xbb\xcc\xdd\xee\xff\x11\x22"
+
+    refs = {
+        "version": 1,
+        "refs": {
+            ".zgroup": '{"zarr_format":2}',
+            "a/.zarray": (
+                '{"chunks":[1,4],"compressor":null,"dtype":"<i4",'
+                '"fill_value":null,"filters":null,"order":"C",'
+                '"shape":[2,4],"zarr_format":2}'
+            ),
+            "a/.zattrs": '{"_ARRAY_DIMENSIONS":["x","y"]}',
+            "a/0.0": "base64:AQIDBAUGBwg=",
+            "a/1.0": ["memory:///test.nc", 0, len(virtual_bytes)],
+        },
+    }
+
+    memory_store = obstore.store.MemoryStore()
+    memory_store.put("refs.json", ujson.dumps(refs).encode())
+    memory_store.put("test.nc", virtual_bytes)
+
+    registry = ObjectStoreRegistry({"memory://": memory_store})
+    parser = KerchunkJSONParser()
+    manifeststore = parser("memory:///refs.json", registry=registry)
+
+    inlined_result = await manifeststore.get(
+        "a/c/0/0", prototype=default_buffer_prototype()
+    )
+    assert inlined_result.to_bytes() == inlined_bytes
+
+    virtual_result = await manifeststore.get(
+        "a/c/1/0", prototype=default_buffer_prototype()
+    )
+    assert virtual_result.to_bytes() == virtual_bytes
 
 
 @pytest.mark.parametrize("skip_variables", ["a", ["a"]])