Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -225,8 +225,8 @@ check-docs = { cmd = "mkdocs build --strict" }
readthedocs = { cmd = "rm -rf $READTHEDOCS_OUTPUT/html && cp -r site $READTHEDOCS_OUTPUT/html" }
# Define commands to run within the docs environment
[tool.pixi.feature.minio.tasks]
run-tests = { cmd = "pytest virtualizarr/tests/test_manifests/test_store.py virtualizarr/tests/test_parsers/test_hdf/test_hdf_manifest_store.py --run-minio-tests --run-network-tests --verbose" }
run-tests-xml-cov = { cmd = "pytest virtualizarr/tests/test_manifests/test_store.py virtualizarr/tests/test_parsers/test_hdf/test_hdf_manifest_store.py --run-minio-tests --run-network-tests --verbose --cov-report=xml" }
run-tests = { cmd = "pytest virtualizarr/tests/test_manifests/test_store.py virtualizarr/tests/test_parsers/test_hdf/test_hdf_manifest_store.py virtualizarr/tests/test_parsers/test_zarr.py --run-minio-tests --run-network-tests --verbose" }
run-tests-xml-cov = { cmd = "pytest virtualizarr/tests/test_manifests/test_store.py virtualizarr/tests/test_parsers/test_hdf/test_hdf_manifest_store.py virtualizarr/tests/test_parsers/test_zarr.py --run-minio-tests --run-network-tests --verbose --cov-report=xml" }

[tool.setuptools_scm]
fallback_version = "9999"
Expand Down
43 changes: 43 additions & 0 deletions virtualizarr/tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,3 +79,46 @@ def minio_bucket(container):
"file": filename,
"client": client,
}


@pytest.fixture(scope="session")
def minio_nolist_bucket(container):
"""Create a MinIO bucket whose anonymous policy allows Get but NOT List."""
from minio import Minio

bucket = "nolist-bucket"
client = Minio(
"localhost:9000",
access_key=container["username"],
secret_key=container["password"],
secure=False,
)
client.make_bucket(bucket)
policy = {
"Version": "2012-10-17",
"Statement": [
{
"Effect": "Allow",
"Principal": {"AWS": "*"},
"Action": ["s3:GetBucketLocation"],
"Resource": f"arn:aws:s3:::{bucket}",
},
{
"Effect": "Allow",
"Principal": {"AWS": "*"},
"Action": [
"s3:GetObject",
],
"Resource": f"arn:aws:s3:::{bucket}/*",
},
],
}
client.set_bucket_policy(bucket, json.dumps(policy))
yield {
"port": container["port"],
"endpoint": container["endpoint"],
"username": container["username"],
"password": container["password"],
"bucket": bucket,
"client": client,
}
51 changes: 50 additions & 1 deletion virtualizarr/tests/test_parsers/test_zarr.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
get_strategy,
join_url,
)
from virtualizarr.tests import requires_pyarrow
from virtualizarr.tests import requires_minio, requires_pyarrow

pytestmark = requires_pyarrow

Expand Down Expand Up @@ -545,3 +545,52 @@ def test_sharded_array_raises_error(tmpdir):
match="Zarr V3 arrays with sharding are not yet supported",
):
parser(url=filepath, registry=registry)


@requires_minio
@pytest.mark.xfail(
reason="ZarrParser does not yet support buckets without list permissions"
)
def test_zarr_parser_nolist_bucket(minio_nolist_bucket):
"""Test that ZarrParser works with a bucket that does not allow list operations."""
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm fine to merge this as-is, but I wonder if there is a simpler test that doesn't require minio.

The simplest example of a zarr url that we can't list is surely a http store? I was imagining there would be a class like that in zarr-python, but apparently there isn't.

Obstore does have a HTTP store, but I'm confused why it says it supports list?

https://developmentseed.org/obstore/latest/api/store/http/#obstore.store.HTTPStore.head_async

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

obstore's HTTPStore is designed around webdav (xref developmentseed/obspec-utils#63).

It's a trade-off between code complexity with minio and relying on the network. For this version, you only need docker locally. I prefer local-first testing but am not planning to implement the fix so you could close this and build what works for you.

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I see. Sounds harder than I had anticipated, so minio seems fine.

import obstore as obs

bucket = minio_nolist_bucket["bucket"]
endpoint = minio_nolist_bucket["endpoint"]
username = minio_nolist_bucket["username"]
password = minio_nolist_bucket["password"]

# Write a Zarr V3 store directly to the bucket using admin credentials
admin_store = obs.store.S3Store(
bucket,
endpoint_url=endpoint,
access_key_id=username,
secret_access_key=password,
virtual_hosted_style_request=False,
client_options={"allow_http": True},
)
zarr_store = zarr.storage.ObjectStore(store=admin_store)
ds = xr.Dataset(
{"data": (("x", "y"), np.arange(12, dtype="float32").reshape(3, 4))},
coords={"x": np.arange(3), "y": np.arange(4)},
)
ds.to_zarr(zarr_store, consolidated=False, zarr_format=3)

# Create an anonymous S3 store (subject to bucket policy which denies list)
anon_store = obs.store.S3Store(
bucket,
endpoint_url=endpoint,
skip_signature=True,
virtual_hosted_style_request=False,
client_options={"allow_http": True},
)

url = f"s3://{bucket}"
registry = ObjectStoreRegistry({url: anon_store})
parser = ZarrParser()
manifeststore = parser(url=url, registry=registry)

with xr.open_dataset(
manifeststore, engine="zarr", consolidated=False, zarr_format=3
) as actual:
xr.testing.assert_identical(actual, ds)
Loading