Skip to content
Open
2 changes: 2 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0

### Fixed

- `metadata_from_stac()` now keeps declared STAC `cube:dimensions` as the dimension source of truth and handles STAC 1.1 common `bands` metadata without requiring the datacube extension ([#743](https://github.com/Open-EO/openeo-python-client/issues/743), [#867](https://github.com/Open-EO/openeo-python-client/pull/867)).


## [0.49.0] - 2026-04-01

Expand Down
201 changes: 152 additions & 49 deletions openeo/metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,8 +35,6 @@ class DimensionAlreadyExistsException(MetadataException):


# TODO: make these dimension classes immutable data classes
# TODO: align better with STAC datacube extension
# TODO: align/adapt/integrate with pystac's datacube extension implementation?
class Dimension:
"""Base class for dimensions."""

Expand Down Expand Up @@ -71,7 +69,6 @@ def rename_labels(self, target, source) -> Dimension:

class SpatialDimension(Dimension):
# TODO: align better with STAC datacube extension: e.g. support "axis" (x or y)

DEFAULT_CRS = 4326

def __init__(
Expand Down Expand Up @@ -679,30 +676,16 @@ def metadata_from_stac(url: str) -> CubeMetadata:
"""
Reads the band metadata a static STAC catalog or a STAC API Collection and returns it as a :py:class:`CubeMetadata`

Policy:
- If cube:dimensions exists: treat it as source of truth (it may omit x/y/t/bands).
- Otherwise: apply openEO-style defaults (x, y, t) and (for Collection/Item) keep bands dimension even if empty.
Comment thread
suriyahgit marked this conversation as resolved.

:param url: The URL to a static STAC catalog (STAC Item, STAC Collection, or STAC Catalog) or a specific STAC API Collection
:return: A :py:class:`CubeMetadata` containing the DataCube band metadata from the url.
"""
stac_object = pystac.read_file(href=url)
bands = _StacMetadataParser().bands_from_stac_object(stac_object)

# At least assume there are spatial dimensions
# TODO #743: are there conditions in which we even should not assume the presence of spatial dimensions?
dimensions = [
SpatialDimension(name="x", extent=[None, None]),
SpatialDimension(name="y", extent=[None, None]),
]

# TODO #743: conditionally include band dimension when there was actual indication of band metadata?
band_dimension = BandDimension(name="bands", bands=bands)
dimensions.append(band_dimension)

# TODO: is it possible to derive the actual name of temporal dimension that the backend will use?
temporal_dimension = _StacMetadataParser().get_temporal_dimension(stac_object)
if temporal_dimension:
dimensions.append(temporal_dimension)

metadata = CubeMetadata(dimensions=dimensions)
return metadata
parser = _StacMetadataParser()
return parser.metadata_from_stac_object(stac_object)

# Sniff for PySTAC extension API since version 1.9.0 (which is not available below Python 3.9)
# TODO: remove this once support for Python 3.7 and 3.8 is dropped
Expand Down Expand Up @@ -760,39 +743,159 @@ def __init__(self, *, logger=_log, log_level=logging.DEBUG, supress_duplicate_wa
# Use caching trick to avoid duplicate warnings
self._warn = functools.lru_cache(maxsize=1000)(self._warn)

def metadata_from_stac_object(self, stac_object: pystac.STACObject) -> CubeMetadata:
"""
Build cube metadata from a STAC object.
"""
dimensions = self.dimensions_from_stac_object(stac_object=stac_object)
return CubeMetadata(dimensions=dimensions)

def dimensions_from_stac_object(self, stac_object: pystac.STACObject) -> List[Dimension]:
"""
Build dimension metadata from a STAC object.

Philosophy:
- If cube:dimensions exists: treat it as source of truth (it may omit x/y/t/bands).
- Otherwise: apply openEO-style defaults (x, y, bands, optional t).
"""
bands = self.bands_from_stac_object(stac_object)
if self._has_cube_dimensions(stac_object):
return self._parse_declared_dimensions(stac_object=stac_object, bands=bands)

dimensions: List[Dimension] = [
SpatialDimension(name="x", extent=[None, None]),
SpatialDimension(name="y", extent=[None, None]),
BandDimension(name="bands", bands=list(bands)),
]
temporal_dimension = self.get_temporal_dimension(stac_object)
if temporal_dimension:
dimensions.append(temporal_dimension)
return dimensions

def get_temporal_dimension(self, stac_obj: pystac.STACObject) -> Union[TemporalDimension, None]:
"""
Extract the temporal dimension from a STAC Collection/Item (if any)
"""
# TODO: also extract temporal dimension from assets?
if _PYSTAC_1_9_EXTENSION_INTERFACE:
if stac_obj.ext.has("cube") and hasattr(stac_obj.ext, "cube"):
temporal_dims = [
(n, d.extent or [None, None])
for (n, d) in stac_obj.ext.cube.dimensions.items()
if d.dim_type == pystac.extensions.datacube.DimensionType.TEMPORAL
if self._has_cube_dimensions(stac_obj):
temporal_dimensions = [
d
for d in self._parse_declared_dimensions(stac_object=stac_obj, bands=_BandList([]))
if isinstance(d, TemporalDimension)
]
if len(temporal_dimensions) == 1:
return temporal_dimensions[0]

if isinstance(stac_obj, pystac.Collection) and stac_obj.extent and stac_obj.extent.temporal:
extent = [Rfc3339(propagate_none=True).normalize(d) for d in stac_obj.extent.temporal.intervals[0]]
return TemporalDimension(name="t", extent=extent)

if isinstance(stac_obj, pystac.Item):
props = stac_obj.properties
start = props.get("start_datetime")
end = props.get("end_datetime")
if start or end:
extent = [
Rfc3339(propagate_none=True).normalize(start),
Rfc3339(propagate_none=True).normalize(end),
]
if len(temporal_dims) == 1:
name, extent = temporal_dims[0]
return TemporalDimension(name=name, extent=extent)
elif isinstance(stac_obj, pystac.Collection) and stac_obj.extent.temporal:
# No explicit "cube:dimensions": build fallback from "extent.temporal",
# with dimension name "t" (openEO API recommendation).
extent = [Rfc3339(propagate_none=True).normalize(d) for d in stac_obj.extent.temporal.intervals[0]]
return TemporalDimension(name="t", extent=extent)
else:
if isinstance(stac_obj, pystac.Item):
cube_dimensions = stac_obj.properties.get("cube:dimensions", {})
elif isinstance(stac_obj, pystac.Collection):
cube_dimensions = stac_obj.extra_fields.get("cube:dimensions", {})

dt = props.get("datetime")
if dt:
norm = Rfc3339(propagate_none=True).normalize(dt)
return TemporalDimension(name="t", extent=[norm, norm])

def _has_cube_dimensions(self, stac_object: pystac.STACObject) -> bool:
cube_dimensions = self._cube_dimensions_dict(stac_object)
return isinstance(cube_dimensions, dict) and len(cube_dimensions) > 0

def _cube_dimensions_dict(self, stac_object: pystac.STACObject) -> Dict[str, dict]:
"""
Return raw cube:dimensions dict from a Collection/Item, or {}.
"""
if isinstance(stac_object, pystac.Item):
return stac_object.properties.get("cube:dimensions", {}) or {}
if isinstance(stac_object, pystac.Collection):
return stac_object.extra_fields.get("cube:dimensions", {}) or {}
return {}

@staticmethod
def _safe_extent_from_pystac_cube_dim(dim) -> list:
"""
PySTAC cube dimension wrapper may raise if 'extent' is missing.
Also, depending on serialization/version, extent might live in extra_fields.
"""
try:
ext = dim.extent
except Exception:
ext = None

if not ext:
extra = getattr(dim, "extra_fields", {}) or {}
ext = extra.get("extent")

return ext or [None, None]

def _parse_declared_dimensions(self, stac_object: pystac.STACObject, bands: _BandList) -> List[Dimension]:
"""
Parse dimensions declared through cube:dimensions.
"""
if (
_PYSTAC_1_9_EXTENSION_INTERFACE
and getattr(stac_object, "ext", None) is not None
and stac_object.ext.has("cube")
and hasattr(stac_object.ext, "cube")
):
return self._parse_cube_dimensions_from_pystac_extension(stac_object=stac_object, bands=bands)
return self._parse_cube_dimensions_from_raw_dict(stac_object=stac_object, bands=bands)

def _parse_cube_dimensions_from_pystac_extension(
self, stac_object: pystac.STACObject, bands: _BandList
) -> List[Dimension]:
"""
Parse dimensions from PySTAC's cube extension wrapper (when present).
Important: PySTAC DimensionType only has SPATIAL + TEMPORAL.
Everything else is treated as band-like.
"""
dimensions = []
for name, dim in stac_object.ext.cube.dimensions.items():
dim_type = getattr(dim, "dim_type", None)
extent = self._safe_extent_from_pystac_cube_dim(dim)

if dim_type == pystac.extensions.datacube.DimensionType.SPATIAL:
dimensions.append(SpatialDimension(name=name, extent=extent))
elif dim_type == pystac.extensions.datacube.DimensionType.TEMPORAL:
dimensions.append(TemporalDimension(name=name, extent=extent))
else:
cube_dimensions = {}
temporal_dims = [
(n, d.get("extent", [None, None])) for (n, d) in cube_dimensions.items() if d.get("type") == "temporal"
]
if len(temporal_dims) == 1:
name, extent = temporal_dims[0]
return TemporalDimension(name=name, extent=extent)
dimensions.append(BandDimension(name=name, bands=list(bands)))

return dimensions

def _parse_cube_dimensions_from_raw_dict(self, stac_object: pystac.STACObject, bands: _BandList) -> List[Dimension]:
"""
Parse dimensions from raw cube:dimensions dict.
Supports 'spatial', 'temporal', and ('bands' or 'spectral' as an alias).
"""
dimensions = []
cube_dimensions = self._cube_dimensions_dict(stac_object)

for name, dim in cube_dimensions.items():
if not isinstance(dim, dict):
continue

dim_type = dim.get("type")
extent = dim.get("extent", [None, None])

if dim_type == "spatial":
dimensions.append(SpatialDimension(name=name, extent=extent))
elif dim_type == "temporal":
dimensions.append(TemporalDimension(name=name, extent=extent))
elif dim_type in ("bands", "spectral"):
dimensions.append(BandDimension(name=name, bands=list(bands)))
else:
dimensions.append(Dimension(name=name, type=dim_type))

return dimensions

def _band_from_eo_bands_metadata(self, band: Union[dict, pystac.extensions.eo.Band]) -> Band:
"""Construct band from metadata in eo v1.1 style"""
Expand Down
96 changes: 95 additions & 1 deletion tests/test_metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -1159,6 +1159,27 @@ def test_metadata_from_stac_bands(tmp_path, test_stac, expected):
assert metadata.band_names == expected


def test_metadata_from_stac_stac_1_1_common_bands_without_datacube_extension(tmp_path):
stac_dict = StacDummyBuilder.collection(
stac_version="1.1.0",
bands=[
{"name": "red", "eo:common_name": "red", "eo:center_wavelength": 0.665},
{"name": "nir", "eo:common_name": "nir", "eo:center_wavelength": 0.842},
],
)
assert "stac_extensions" not in stac_dict
assert "cube:dimensions" not in stac_dict

path = tmp_path / "stac.json"
# TODO #738 real request mocking of STAC resources compatible with pystac?
path.write_text(json.dumps(stac_dict))
metadata = metadata_from_stac(str(path))

assert metadata.dimension_names() == ["x", "y", "bands", "t"]
assert metadata.band_names == ["red", "nir"]
assert metadata.band_dimension.bands[0].common_name == "red"
assert metadata.band_dimension.bands[1].wavelength_um == 0.842


@pytest.mark.skipif(not _PYSTAC_1_9_EXTENSION_INTERFACE, reason="Requires PySTAC 1.9+ extension interface")
@pytest.mark.parametrize(
Expand Down Expand Up @@ -1210,7 +1231,17 @@ def test_metadata_from_stac_collection_bands_from_item_assets(
[
(
StacDummyBuilder.item(),
None,
("t", ["2024-03-08", "2024-03-08"]),
),
(
StacDummyBuilder.item(
properties={
"datetime": "2024-03-08T00:00:00Z",
"start_datetime": "2024-04-04T00:00:00Z",
"end_datetime": "2024-06-06T00:00:00Z",
}
),
("t", ["2024-04-04T00:00:00Z", "2024-06-06T00:00:00Z"]),
),
(
StacDummyBuilder.item(cube_dimensions={"t": {"type": "temporal", "extent": ["2024-04-04", "2024-06-06"]}}),
Expand Down Expand Up @@ -1256,6 +1287,69 @@ def test_metadata_from_stac_temporal_dimension(tmp_path, stac_dict, expected):
assert not metadata.has_temporal_dimension()



# Dimension name resolution policy (STAC cube:dimensions vs openEO defaults)
Comment thread
suriyahgit marked this conversation as resolved.
@pytest.mark.parametrize(
["stac_dict", "expected_dims"],
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

can a STAC 1.1 style test case be added which uses the new 'bands' common metadata and no datacube extension?
Collection metadata example:
https://github.com/radiantearth/stac-spec/blob/master/examples/collection-only/collection.json#L152
Bands spec:
https://github.com/radiantearth/stac-spec/blob/master/commons/common-metadata.md#band-object

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Added a STAC 1.1 bands common metadata test without the datacube extension in test_metadata_from_stac_stac_1_1_common_bands_without_datacube_extension. The test verifies that metadata_from_stac() still builds the fallback openEO dimensions and extracts the common bands metadata correctly.

[
(
# No cube:dimensions -> fall back to openEO default naming convention
StacDummyBuilder.collection(summaries={"eo:bands": [{"name": "B01"}]}),
{"t", "bands", "y", "x"},
),
(
# No cube:dimensions (item) -> fall back to openEO default naming convention
StacDummyBuilder.item(
properties={"datetime": "2020-05-22T00:00:00Z", "eo:bands": [{"name": "B01"}]}
),
{"t", "bands", "y", "x"},
),
(
# cube:dimensions present -> use the dimension names as suggested by cube:dimensions keys
StacDummyBuilder.collection(
cube_dimensions={
"time": {"type": "temporal", "axis": "t", "extent": ["2024-04-04", "2024-06-06"]},
"band": {"type": "bands", "axis": "bands", "values": ["B01"]},
"y": {"type": "spatial", "axis": "y", "extent": [0, 1]},
"x": {"type": "spatial", "axis": "x", "extent": [0, 1]},
}
),
{"time", "band", "y", "x"},
),
(
# cube:dimensions present without band dimension -> don't inject an openEO "bands" dimension
StacDummyBuilder.collection(
summaries={"eo:bands": [{"name": "B01"}]},
cube_dimensions={
"time": {"type": "temporal", "axis": "t", "extent": ["2024-04-04", "2024-06-06"]},
"y": {"type": "spatial", "axis": "y", "extent": [0, 1]},
"x": {"type": "spatial", "axis": "x", "extent": [0, 1]},
},
),
{"time", "y", "x"},
),
],
)
def test_metadata_from_stac_dimension_policy_cube_dimensions_vs_default(tmp_path, stac_dict, expected_dims):
path = tmp_path / "stac.json"
# TODO #738 real request mocking of STAC resources compatible with pystac?
path.write_text(json.dumps(stac_dict))
metadata = metadata_from_stac(str(path))

got = tuple(metadata.dimension_names() or ())

# Order-insensitive check: names only
assert set(got) == expected_dims

# Ensure the policy logic is exercised correctly:
# cube:dimensions can be located at root (collection) or in properties (item)
cube_dims = stac_dict.get("cube:dimensions") or (stac_dict.get("properties") or {}).get("cube:dimensions")
if cube_dims is None:
assert set(got) == {"t", "bands", "y", "x"}
else:
assert set(got) == set(cube_dims.keys())


@pytest.mark.parametrize(
["kwargs", "expected_x", "expected_y"],
[
Expand Down
Loading