From d075ece373307dd8e2032f1d5387faec9738c361 Mon Sep 17 00:00:00 2001
From: aladinor <aladinor@unal.edu.co>
Date: Thu, 16 Apr 2026 19:47:04 -0500
Subject: [PATCH 1/7] Prefer metadata over store reads in
 _decode_cf_datetime_dtype

Restructure dtype inference into a metadata-first path that falls back
to reading the first and last values only when metadata cannot prove
safety. For well-formed data the fast path runs decode_cf_datetime on a
synthetic [0, 1] array so resolution-selection logic matches real-data
output.

Adds _verify_decoded_dtype to CFDatetimeCoder.decode so an overflow
mismatch between declared and actual datetime dtype raises a clear
TypeError instead of silently corrupting dask map_blocks output.

Addresses pydata/xarray#11303.
---
 xarray/coding/times.py | 214 +++++++++++++++++++++++++++++++++++++----
 1 file changed, 195 insertions(+), 19 deletions(-)

diff --git a/xarray/coding/times.py b/xarray/coding/times.py
index b9324d8395e..cf98b1c1324 100644
--- a/xarray/coding/times.py
+++ b/xarray/coding/times.py
@@ -336,39 +336,206 @@ def _unpack_time_units_and_ref_date_cftime(units: str, calendar: str):
     return time_units, ref_date
 
 
-def _decode_cf_datetime_dtype(
+class _CannotInferFromMetadata(Exception):
+    """Raised by _infer_dtype_from_metadata when data reads are required."""
+
+
+# Edges of the datetime64[ns] representable range (the value range of a
+# pandas.Timestamp at nanosecond resolution).
+_DATETIME64_NS_LOWER = pd.Timestamp("1677-09-21 00:12:43.145224193")
+_DATETIME64_NS_UPPER = pd.Timestamp("2262-04-11 23:47:16.854775807")
+
+
+def _max_for_storage_dtype(dtype: np.dtype) -> int | None:
+    """Maximum absolute value representable in ``dtype``, or None if the
+    bound is too loose to be useful (int64, floats)."""
+    if dtype.kind in "iu" and dtype.itemsize <= 4:
+        info = np.iinfo(dtype)
+        return int(max(abs(info.min), info.max))
+    return None
+
+
+def _bound_fits_datetime64_ns(max_value: float | int, units: str) -> bool:
+    """True if ``ref_date ± max_value * unit_seconds`` fits in datetime64[ns]."""
+    time_unit, ref_date = _unpack_time_unit_and_ref_date(units)
+    try:
+        delta = pd.Timedelta(int(max_value), unit=time_unit)
+        upper = ref_date + delta
+        lower = ref_date - delta
+    except (OutOfBoundsDatetime, OutOfBoundsTimedelta, OverflowError, ValueError):
+        return False
+    return _DATETIME64_NS_LOWER <= lower and upper <= _DATETIME64_NS_UPPER
+
+
+def _bound_from_attrs(attrs: dict, units: str) -> bool:
+    """True if ``valid_max`` or ``time_coverage_end`` in attrs proves the
+    decoded values fit in datetime64[ns]."""
+    bound = attrs.get("valid_max")
+    if bound is not None and _bound_fits_datetime64_ns(bound, units):
+        return True
+    end = attrs.get("time_coverage_end")
+    if end is not None:
+        try:
+            ts = pd.Timestamp(end)
+        except (ValueError, TypeError):
+            return False
+        return bool(_DATETIME64_NS_LOWER <= ts <= _DATETIME64_NS_UPPER)
+    return False
+
+
+def _is_overflow_safe_from_metadata(
     data,
     units: str,
     calendar: str | None,
     use_cftime: bool | None,
-    time_unit: PDDatetimeUnitOptions = "ns",
+    time_unit: PDDatetimeUnitOptions,
+    attrs: dict | None,
+) -> bool:
+    """Whether decode_cf_datetime's output dtype is knowable from metadata.
+
+    Output dtype depends on data values only when all of the following
+    hold: ``use_cftime=None``, standard calendar, ``time_unit="ns"``, data
+    is chunked (so dask enforces the declared dtype), and we have no
+    explicit bound that proves values fit the datetime64[ns] range.
+    """
+    if use_cftime is not None:
+        return True
+    # Non-standard calendars always decode to cftime (object dtype).
+    if calendar is not None and not _is_standard_calendar(calendar):
+        return True
+    # Non-"ns" datetime64 resolutions have ranges ~±10¹¹ years — overflow
+    # is unreachable in practice.
+    if time_unit != "ns":
+        return True
+    # Non-chunked data: sync decode (_ElementwiseFunctionArray) returns the
+    # function's actual output dtype and xarray reconciles at access time.
+    if not is_chunked_array(data):
+        return True
+    # Chunked + ns + auto: need a proven bound.
+    if attrs is not None and _bound_from_attrs(attrs, units):
+        return True
+    storage_dtype = getattr(data, "dtype", None)
+    if storage_dtype is not None:
+        max_val = _max_for_storage_dtype(storage_dtype)
+        if max_val is not None and _bound_fits_datetime64_ns(max_val, units):
+            return True
+    return False
+
+
+def _infer_dtype_from_metadata(
+    data,
+    units: str,
+    calendar: str | None,
+    use_cftime: bool | None,
+    time_unit: PDDatetimeUnitOptions,
+    attrs: dict | None,
 ) -> np.dtype:
-    # Verify that at least the first and last date can be decoded
-    # successfully. Otherwise, tracebacks end up swallowed by
-    # Dataset.__repr__ when users try to view their lazily decoded array.
+    """Infer the decoded dtype from metadata alone, without reading data.
+
+    Raises ``_CannotInferFromMetadata`` when the dtype depends on values
+    that must be read. Otherwise runs ``decode_cf_datetime`` on a
+    synthetic ``[0, 1]`` array so the same resolution-selection logic
+    that runs on real data also produces the declared dtype.
+    """
+    # Validate units — raises ValueError on malformed strings.
+    _unpack_time_unit_and_ref_date(units)
+
+    if not _is_overflow_safe_from_metadata(
+        data, units, calendar, use_cftime, time_unit, attrs
+    ):
+        raise _CannotInferFromMetadata
+
+    result = decode_cf_datetime(
+        np.array([0, 1]), units, calendar, use_cftime, time_unit
+    )
+    return getattr(result, "dtype", np.dtype("object"))
+
+
+def _infer_dtype_from_first_last(
+    data,
+    units: str,
+    calendar: str | None,
+    use_cftime: bool | None,
+    time_unit: PDDatetimeUnitOptions,
+) -> np.dtype:
+    """Fallback dtype inference that reads the first and last data values.
+
+    Used only when metadata cannot prove the dtype (chunked int64/float64
+    arrays with ``use_cftime=None`` and no value bounds). Reads are
+    expensive on remote-backed stores but unavoidable in this case.
+    """
     values = indexing.ImplicitToExplicitIndexingAdapter(indexing.as_indexable(data))
     example_value = np.concatenate(
         [to_numpy(first_n_items(values, 1)), to_numpy(last_item(values))]
     )
+    result = decode_cf_datetime(example_value, units, calendar, use_cftime, time_unit)
+    return getattr(result, "dtype", np.dtype("object"))
+
+
+def _wrap_decode_error(err: Exception, units: str, calendar: str | None) -> ValueError:
+    calendar_msg = (
+        "the default calendar" if calendar is None else f"calendar {calendar!r}"
+    )
+    msg = (
+        f"unable to decode time units {units!r} with {calendar_msg!r}. Try "
+        "opening your dataset with decode_times=False or installing cftime "
+        "if it is not installed."
+    )
+    return ValueError(msg)
+
 
+def _decode_cf_datetime_dtype(
+    data,
+    units: str,
+    calendar: str | None,
+    use_cftime: bool | None,
+    time_unit: PDDatetimeUnitOptions = "ns",
+    attrs: dict | None = None,
+) -> np.dtype:
     try:
-        result = decode_cf_datetime(
-            example_value, units, calendar, use_cftime, time_unit
+        return _infer_dtype_from_metadata(
+            data, units, calendar, use_cftime, time_unit, attrs
         )
+    except _CannotInferFromMetadata:
+        pass
     except Exception as err:
-        calendar_msg = (
-            "the default calendar" if calendar is None else f"calendar {calendar!r}"
-        )
-        msg = (
-            f"unable to decode time units {units!r} with {calendar_msg!r}. Try "
-            "opening your dataset with decode_times=False or installing cftime "
-            "if it is not installed."
+        raise _wrap_decode_error(err, units, calendar) from err
+
+    try:
+        return _infer_dtype_from_first_last(
+            data, units, calendar, use_cftime, time_unit
         )
-        raise ValueError(msg) from err
-    else:
-        dtype = getattr(result, "dtype", np.dtype("object"))
+    except Exception as err:
+        raise _wrap_decode_error(err, units, calendar) from err
+
+
+def _verify_decoded_dtype(func: Callable, expected_dtype: np.dtype) -> Callable:
+    """Wrap ``func`` to raise on datetime64/object dtype mismatches.
+
+    The lazy decode pipeline advertises ``expected_dtype`` upfront; when
+    data is chunked, dask.map_blocks uses it to cast the function's
+    output. If decoded values land on the wrong side of the
+    datetime64/object boundary (e.g. overflow forces a cftime fallback),
+    this wrapper raises ``TypeError`` instead of silently casting.
+    Different datetime64 resolutions are allowed — only the
+    datetime64/object boundary matters.
+    """
+    expected_is_datetime = expected_dtype.kind == "M"
+
+    def wrapper(values):
+        result = func(values)
+        result_dtype = getattr(result, "dtype", np.dtype("object"))
+        if (result_dtype.kind == "M") != expected_is_datetime:
+            raise TypeError(
+                f"decoded datetime dtype {result_dtype!r} does not match "
+                f"declared dtype {expected_dtype!r}. Values likely fall "
+                f"outside the representable range of {expected_dtype!r}; "
+                "pass use_cftime=True or a coarser time_unit "
+                "(e.g. 'us' or 's') to decode_times."
+            )
+        return result
 
-    return dtype
+    return wrapper
 
 
 def _decode_datetime_with_cftime(
@@ -1412,7 +1579,12 @@ def decode(self, variable: Variable, name: T_Name = None) -> Variable:
             units = pop_to(attrs, encoding, "units")
             calendar = pop_to(attrs, encoding, "calendar")
             dtype = _decode_cf_datetime_dtype(
-                data, units, calendar, self.use_cftime, self.time_unit
+                data,
+                units,
+                calendar,
+                self.use_cftime,
+                self.time_unit,
+                attrs=attrs,
             )
             transform = partial(
                 decode_cf_datetime,
@@ -1421,6 +1593,10 @@ def decode(self, variable: Variable, name: T_Name = None) -> Variable:
                 use_cftime=self.use_cftime,
                 time_unit=self.time_unit,
             )
+            # Guard against dtype mismatches (e.g. overflow values that would
+            # cause dask map_blocks to silently cast cftime output to
+            # datetime64 garbage).
+            transform = _verify_decoded_dtype(transform, dtype)
             data = lazy_elemwise_func(data, transform, dtype)
 
             return Variable(dims, data, attrs, encoding, fastpath=True)

From 3be423754d7854004e8dbb79591343b40b91fa39 Mon Sep 17 00:00:00 2001
From: aladinor <aladinor@unal.edu.co>
Date: Thu, 16 Apr 2026 19:47:18 -0500
Subject: [PATCH 2/7] Add tests for metadata-first CF datetime dtype inference

Cover all paths that should skip store reads (explicit use_cftime,
non-standard calendar, wide time_unit, non-chunked data, narrow
storage dtype bounds, valid_max / time_coverage_end attrs) plus the
chunked int64 fallback and malformed units error.

Uses InaccessibleArray to prove no reads occur on the fast paths and
a TypeError is raised for the verification wrapper on dtype kind
mismatches.
---
 xarray/tests/test_coding_times.py | 149 ++++++++++++++++++++++++++++++
 1 file changed, 149 insertions(+)

diff --git a/xarray/tests/test_coding_times.py b/xarray/tests/test_coding_times.py
index 15e38233c4b..86cdbbd4aea 100644
--- a/xarray/tests/test_coding_times.py
+++ b/xarray/tests/test_coding_times.py
@@ -20,6 +20,7 @@
 )
 from xarray.coders import CFDatetimeCoder, CFTimedeltaCoder
 from xarray.coding.times import (
+    _decode_cf_datetime_dtype,
     _encode_datetime_with_cftime,
     _netcdf_to_numpy_timeunit,
     _numpy_to_netcdf_timeunit,
@@ -46,6 +47,7 @@
     _STANDARD_CALENDARS,
     DuckArrayWrapper,
     FirstElementAccessibleArray,
+    InaccessibleArray,
     _all_cftime_date_types,
     arm_xfail,
     assert_array_equal,
@@ -2213,3 +2215,150 @@ def test_roundtrip_empty_datetime64_array(time_unit: PDDatetimeUnitOptions) -> N
     )
     assert_identical(variable, roundtripped)
     assert roundtripped.dtype == variable.dtype
+
+
+class TestDecodeCFDatetimeDtype:
+    """Metadata-first dtype inference skips store reads when safe."""
+
+    @staticmethod
+    def _inaccessible(dtype: str = "int64", shape: tuple[int, ...] = (3,)):
+        """An array-like that raises UnexpectedDataAccess on any read.
+
+        Used to prove ``_decode_cf_datetime_dtype`` does not touch data
+        when metadata alone is sufficient.
+        """
+        return InaccessibleArray(np.zeros(shape, dtype=dtype))
+
+    @pytest.mark.parametrize(
+        "use_cftime, expected_kind",
+        [(True, "O"), (False, "M")],
+        ids=["use_cftime=True", "use_cftime=False"],
+    )
+    def test_explicit_use_cftime_no_reads(self, use_cftime, expected_kind):
+        data = self._inaccessible()
+        dtype = _decode_cf_datetime_dtype(
+            data, "days since 2000-01-01", "standard", use_cftime
+        )
+        assert dtype.kind == expected_kind
+
+    @requires_cftime
+    def test_nonstandard_calendar_no_reads(self):
+        data = self._inaccessible()
+        dtype = _decode_cf_datetime_dtype(
+            data, "days since 2000-01-01", "noleap", use_cftime=None
+        )
+        assert dtype == np.dtype("object")
+
+    @pytest.mark.parametrize("time_unit", ["s", "ms", "us"])
+    def test_wide_time_unit_no_reads(self, time_unit):
+        data = self._inaccessible()
+        dtype = _decode_cf_datetime_dtype(
+            data,
+            "days since 2000-01-01",
+            "standard",
+            use_cftime=None,
+            time_unit=time_unit,
+        )
+        assert dtype.kind == "M"
+
+    def test_unchunked_data_no_reads(self):
+        # With use_cftime=None + ns + non-chunked, dtype mismatch at access
+        # time is harmless (sync path reconciles), so no reads needed.
+        data = self._inaccessible(dtype="int64")
+        dtype = _decode_cf_datetime_dtype(
+            data,
+            "days since 2000-01-01",
+            "standard",
+            use_cftime=None,
+            time_unit="ns",
+        )
+        assert dtype == np.dtype("datetime64[ns]")
+
+    def test_bounded_storage_dtype_no_reads(self):
+        # Chunked int32 seconds from 2000 cannot overflow datetime64[ns],
+        # so the bounds check lets us skip reads without falling through
+        # to the unchunked shortcut.
+        dask = pytest.importorskip("dask.array")
+        chunked = dask.from_array(np.zeros(3, dtype="int32"))
+        dtype = _decode_cf_datetime_dtype(
+            chunked, "seconds since 2000-01-01", "standard", use_cftime=None
+        )
+        assert dtype == np.dtype("datetime64[ns]")
+
+    @requires_dask
+    def test_valid_max_attr_no_reads(self):
+        import dask.array as da
+
+        # int64 storage would normally trigger a read, but valid_max
+        # proves no overflow is possible.
+        chunked = da.from_array(np.zeros(3, dtype="int64"))
+        chunked_inacc = InaccessibleArray(chunked)
+        attrs = {"valid_max": 3650}  # ~10 years, well within range
+        dtype = _decode_cf_datetime_dtype(
+            chunked_inacc,
+            "days since 2000-01-01",
+            "standard",
+            use_cftime=None,
+            attrs=attrs,
+        )
+        assert dtype == np.dtype("datetime64[ns]")
+
+    @requires_dask
+    def test_time_coverage_end_attr_no_reads(self):
+        import dask.array as da
+
+        chunked = da.from_array(np.zeros(3, dtype="int64"))
+        chunked_inacc = InaccessibleArray(chunked)
+        attrs = {"time_coverage_end": "2025-12-31T23:59:59Z"}
+        dtype = _decode_cf_datetime_dtype(
+            chunked_inacc,
+            "days since 2000-01-01",
+            "standard",
+            use_cftime=None,
+            attrs=attrs,
+        )
+        assert dtype == np.dtype("datetime64[ns]")
+
+    @requires_dask
+    def test_chunked_int64_falls_back_to_reads(self):
+        # Chunked + ns + use_cftime=None + int64 + no bounds: metadata
+        # cannot prove anything, so we fall back to reading first/last.
+        import dask.array as da
+
+        chunked = da.from_array(np.array([0, 1, 2], dtype="int64"), chunks=3)
+        dtype = _decode_cf_datetime_dtype(
+            chunked,
+            "days since 2000-01-01",
+            "standard",
+            use_cftime=None,
+        )
+        assert dtype == np.dtype("datetime64[ns]")
+
+    def test_invalid_units_raises_value_error(self):
+        data = self._inaccessible()
+        with pytest.raises(ValueError, match="unable to decode time units"):
+            _decode_cf_datetime_dtype(data, "not a valid unit string", None, None)
+
+    def test_verification_wrapper_raises_on_datetime_vs_object_mismatch(self):
+        # Metadata predicts datetime64[ns] but the decoder returns cftime
+        # objects (the overflow case). The wrapper must raise TypeError.
+        from xarray.coding.times import _verify_decoded_dtype
+
+        def fake_decoder(values):
+            return np.array([object()], dtype="object")  # cftime-like object output
+
+        guarded = _verify_decoded_dtype(fake_decoder, np.dtype("datetime64[ns]"))
+        with pytest.raises(TypeError, match="does not match declared dtype"):
+            guarded(np.array([0, 1]))
+
+    def test_verification_wrapper_accepts_different_datetime64_resolutions(self):
+        # Declared datetime64[ns] but decoder returns datetime64[us] — both
+        # are datetime64, so the wrapper should pass them through.
+        from xarray.coding.times import _verify_decoded_dtype
+
+        def fake_decoder(values):
+            return np.array([0, 1], dtype="datetime64[us]")
+
+        guarded = _verify_decoded_dtype(fake_decoder, np.dtype("datetime64[ns]"))
+        result = guarded(np.array([0, 1]))
+        assert result.dtype == np.dtype("datetime64[us]")

From 285a93a900c70af51b6cbbfc1da7203dc2ea119d Mon Sep 17 00:00:00 2001
From: aladinor <aladinor@unal.edu.co>
Date: Thu, 16 Apr 2026 19:47:27 -0500
Subject: [PATCH 3/7] Add whats-new entry for CF datetime dtype inference
 speedup

---
 doc/whats-new.rst | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

diff --git a/doc/whats-new.rst b/doc/whats-new.rst
index ab96aff4d1a..ad6a37b32d6 100644
--- a/doc/whats-new.rst
+++ b/doc/whats-new.rst
@@ -104,6 +104,20 @@ Deprecations
 ~~~~~~~~~~~~
 
 
+Performance
+~~~~~~~~~~~
+
+- Skip data reads during CF datetime dtype inference when the output dtype is
+  determinable from metadata alone (``use_cftime`` explicitly set, non-standard
+  calendar, non-nanosecond ``time_unit``, non-chunked data, or when
+  ``valid_max`` / ``time_coverage_end`` / a narrow storage dtype bounds the
+  possible values). This can significantly speed up
+  :py:func:`open_dataset` / :py:func:`open_datatree` on remote-backed stores
+  (zarr on S3, icechunk) with many time-encoded variables. A runtime
+  dtype-mismatch guard on the lazy decode wrapper prevents the rare overflow
+  case from being silently cast by dask.
+  By `Alfonso Ladino <https://github.com/aladinor>`_.
+
 Bug Fixes
 ~~~~~~~~~
 

From db2bb2cbd35d678bcded1e4f22100b71555cd0ea Mon Sep 17 00:00:00 2001
From: aladinor <aladinor@unal.edu.co>
Date: Thu, 16 Apr 2026 20:30:19 -0500
Subject: [PATCH 4/7] Address CI review: narrow fast path, drop runtime
 verification wrapper

- Restrict the "skip reads" fast path to backend-lazy data
  (LazilyIndexedArray, walked through intermediate coder wrappers).
  In-memory PandasIndexingAdapter arrays now keep the eager first/last
  reads so fail-fast semantics for overflow values are preserved
  (fixes test_decode_cf_datetime_transition_to_invalid regression).
- Short-circuit use_cftime=True and non-standard calendars in the
  metadata path so cftime is not required for the fast path (fixes
  bare-min CI failure).
- Drop the `_verify_decoded_dtype` guard. Instead rely on the fallback
  path's reads to surface the correct cftime dtype when the
  metadata-based check cannot prove safety.
- Rename the transform-wrapping intermediate to avoid the mypy
  reassignment error on CFDatetimeCoder.decode.
---
 xarray/coding/times.py | 78 +++++++++++++++++++++---------------------
 1 file changed, 39 insertions(+), 39 deletions(-)

diff --git a/xarray/coding/times.py b/xarray/coding/times.py
index de4751d2245..ffb6e56f607 100644
--- a/xarray/coding/times.py
+++ b/xarray/coding/times.py
@@ -367,6 +367,25 @@ def _bound_fits_datetime64_ns(max_value: float | int, units: str) -> bool:
     return _DATETIME64_NS_LOWER <= lower and upper <= _DATETIME64_NS_UPPER
 
 
+def _is_backed_by_lazy_backend(data) -> bool:
+    """Walk intermediate coder wrappers to find a backend-lazy array.
+
+    Returns True when some level of the wrapper chain is a
+    ``LazilyIndexedArray`` (zarr/netcdf/DAP backends). Returns False for
+    in-memory data (plain numpy, ``PandasIndexingAdapter``) where reads
+    are essentially free and we want to keep the fail-fast semantics.
+    """
+    # Guard against cycles; bound the depth to a small number.
+    for _ in range(16):
+        if isinstance(data, indexing.LazilyIndexedArray):
+            return True
+        inner = getattr(data, "array", None)
+        if inner is None or inner is data:
+            return False
+        data = inner
+    return False
+
+
 def _bound_from_attrs(attrs: dict, units: str) -> bool:
     """True if ``valid_max`` or ``time_coverage_end`` in attrs proves the
     decoded values fit in datetime64[ns]."""
@@ -379,6 +398,9 @@ def _bound_from_attrs(attrs: dict, units: str) -> bool:
             ts = pd.Timestamp(end)
         except (ValueError, TypeError):
             return False
+        # Strip any timezone for comparison with tz-naive ns bounds.
+        if ts.tzinfo is not None:
+            ts = ts.tz_convert(None) if ts.tz is not None else ts.tz_localize(None)
         return bool(_DATETIME64_NS_LOWER <= ts <= _DATETIME64_NS_UPPER)
     return False
 
@@ -407,9 +429,13 @@ def _is_overflow_safe_from_metadata(
     # is unreachable in practice.
     if time_unit != "ns":
         return True
-    # Non-chunked data: sync decode (_ElementwiseFunctionArray) returns the
-    # function's actual output dtype and xarray reconciles at access time.
-    if not is_chunked_array(data):
+    # Backend-lazy arrays: reads go through the backend's indexing
+    # pipeline (zarr sync/async, DAP, etc.) and can be expensive even
+    # when values are cached. We walk through intermediate coder wrappers
+    # (_ElementwiseFunctionArray) to find the backing array. In-memory
+    # wrappers like PandasIndexingAdapter are not covered here — reading
+    # them is free and preserves fail-fast on invalid values.
+    if _is_backed_by_lazy_backend(data):
         return True
     # Chunked + ns + auto: need a proven bound.
     if attrs is not None and _bound_from_attrs(attrs, units):
@@ -433,13 +459,20 @@ def _infer_dtype_from_metadata(
     """Infer the decoded dtype from metadata alone, without reading data.
 
     Raises ``_CannotInferFromMetadata`` when the dtype depends on values
-    that must be read. Otherwise runs ``decode_cf_datetime`` on a
-    synthetic ``[0, 1]`` array so the same resolution-selection logic
-    that runs on real data also produces the declared dtype.
+    that must be read. For deterministic cases (``use_cftime`` explicit,
+    non-standard calendar) returns the dtype directly so ``cftime`` does
+    not need to be importable. Otherwise runs ``decode_cf_datetime`` on
+    a synthetic ``[0, 1]`` array so resolution-selection logic matches
+    real-data output.
     """
     # Validate units — raises ValueError on malformed strings.
     _unpack_time_unit_and_ref_date(units)
 
+    if use_cftime is True:
+        return np.dtype("object")
+    if calendar is not None and not _is_standard_calendar(calendar):
+        return np.dtype("object")
+
     if not _is_overflow_safe_from_metadata(
         data, units, calendar, use_cftime, time_unit, attrs
     ):
@@ -509,35 +542,6 @@ def _decode_cf_datetime_dtype(
         raise _wrap_decode_error(err, units, calendar) from err
 
 
-def _verify_decoded_dtype(func: Callable, expected_dtype: np.dtype) -> Callable:
-    """Wrap ``func`` to raise on datetime64/object dtype mismatches.
-
-    The lazy decode pipeline advertises ``expected_dtype`` upfront; when
-    data is chunked, dask.map_blocks uses it to cast the function's
-    output. If decoded values land on the wrong side of the
-    datetime64/object boundary (e.g. overflow forces a cftime fallback),
-    this wrapper raises ``TypeError`` instead of silently casting.
-    Different datetime64 resolutions are allowed — only the
-    datetime64/object boundary matters.
-    """
-    expected_is_datetime = expected_dtype.kind == "M"
-
-    def wrapper(values):
-        result = func(values)
-        result_dtype = getattr(result, "dtype", np.dtype("object"))
-        if (result_dtype.kind == "M") != expected_is_datetime:
-            raise TypeError(
-                f"decoded datetime dtype {result_dtype!r} does not match "
-                f"declared dtype {expected_dtype!r}. Values likely fall "
-                f"outside the representable range of {expected_dtype!r}; "
-                "pass use_cftime=True or a coarser time_unit "
-                "(e.g. 'us' or 's') to decode_times."
-            )
-        return result
-
-    return wrapper
-
-
 def _decode_datetime_with_cftime(
     num_dates: np.ndarray, units: str, calendar: str
 ) -> np.ndarray:
@@ -1593,10 +1597,6 @@ def decode(self, variable: Variable, name: T_Name = None) -> Variable:
                 use_cftime=self.use_cftime,
                 time_unit=self.time_unit,
             )
-            # Guard against dtype mismatches (e.g. overflow values that would
-            # cause dask map_blocks to silently cast cftime output to
-            # datetime64 garbage).
-            transform = _verify_decoded_dtype(transform, dtype)
             data = lazy_elemwise_func(data, transform, dtype)
 
             return Variable(dims, data, attrs, encoding, fastpath=True)

From 65fadf7fc9d2ab654af6a32802f799e2b33bedc9 Mon Sep 17 00:00:00 2001
From: aladinor <aladinor@unal.edu.co>
Date: Thu, 16 Apr 2026 20:30:32 -0500
Subject: [PATCH 5/7] Update CF datetime dtype inference tests for narrowed
 fast path

- Replace the "non-chunked data" test with tests that exercise the new
  LazilyIndexedArray check, including coder-wrapped lazy backends.
- Add a regression test for in-memory PandasIndexingAdapter data to
  ensure it still falls through to the eager reads path.
- Drop the verification-wrapper tests along with the wrapper itself.
---
 xarray/tests/test_coding_times.py | 74 +++++++++++++++++++------------
 1 file changed, 45 insertions(+), 29 deletions(-)

diff --git a/xarray/tests/test_coding_times.py b/xarray/tests/test_coding_times.py
index 52b56e7cc30..db57340b53a 100644
--- a/xarray/tests/test_coding_times.py
+++ b/xarray/tests/test_coding_times.py
@@ -36,6 +36,7 @@
 )
 from xarray.coding.variables import SerializationWarning
 from xarray.conventions import _update_bounds_attributes, cf_encoder
+from xarray.core import indexing
 from xarray.core.common import contains_cftime_datetimes
 from xarray.core.types import PDDatetimeUnitOptions
 from xarray.core.utils import is_duck_dask_array
@@ -2265,12 +2266,13 @@ def test_wide_time_unit_no_reads(self, time_unit):
         )
         assert dtype.kind == "M"
 
-    def test_unchunked_data_no_reads(self):
-        # With use_cftime=None + ns + non-chunked, dtype mismatch at access
-        # time is harmless (sync path reconciles), so no reads needed.
-        data = self._inaccessible(dtype="int64")
+    def test_lazy_backend_array_no_reads(self):
+        # Backend-lazy wrappers (LazilyIndexedArray) skip reads because
+        # indexing them would trigger expensive backend I/O.
+        inner = self._inaccessible(dtype="int64")
+        lazy = indexing.LazilyIndexedArray(inner)
         dtype = _decode_cf_datetime_dtype(
-            data,
+            lazy,
             "days since 2000-01-01",
             "standard",
             use_cftime=None,
@@ -2278,6 +2280,44 @@ def test_unchunked_data_no_reads(self):
         )
         assert dtype == np.dtype("datetime64[ns]")
 
+    def test_coder_wrapped_lazy_backend_no_reads(self):
+        # When earlier coders wrap the backend-lazy array in
+        # _ElementwiseFunctionArray, the fast path still applies — the
+        # wrapper chain is walked to find the underlying LazilyIndexedArray.
+        from xarray.coding.common import lazy_elemwise_func
+
+        inner = self._inaccessible(dtype="int64")
+        lazy = indexing.LazilyIndexedArray(inner)
+        wrapped = lazy_elemwise_func(lazy, lambda x: x, np.dtype("int64"))
+        dtype = _decode_cf_datetime_dtype(
+            wrapped,
+            "days since 2000-01-01",
+            "standard",
+            use_cftime=None,
+            time_unit="ns",
+        )
+        assert dtype == np.dtype("datetime64[ns]")
+
+    def test_pandas_index_coord_reads_are_performed(self):
+        # In-memory coord data (PandasIndexingAdapter) is NOT treated as
+        # backend-lazy — we keep the eager first/last reads so fail-fast
+        # semantics on overflow values are preserved for in-memory data.
+        pd = pytest.importorskip("pandas")
+        adapter = indexing.PandasIndexingAdapter(
+            pd.Index(np.array([0, 1], dtype="int64"))
+        )
+        dtype = _decode_cf_datetime_dtype(
+            adapter,
+            "days since 2000-01-01",
+            "standard",
+            use_cftime=None,
+            time_unit="ns",
+        )
+        # No assertion on "reads" here — we rely on the fallback path
+        # being reached; overflow detection is covered by existing
+        # conventions tests (e.g. test_decode_cf_datetime_transition_to_invalid).
+        assert dtype == np.dtype("datetime64[ns]")
+
     def test_bounded_storage_dtype_no_reads(self):
         # Chunked int32 seconds from 2000 cannot overflow datetime64[ns],
         # so the bounds check lets us skip reads without falling through
@@ -2342,27 +2382,3 @@ def test_invalid_units_raises_value_error(self):
         data = self._inaccessible()
         with pytest.raises(ValueError, match="unable to decode time units"):
             _decode_cf_datetime_dtype(data, "not a valid unit string", None, None)
-
-    def test_verification_wrapper_raises_on_datetime_vs_object_mismatch(self):
-        # Metadata predicts datetime64[ns] but the decoder returns cftime
-        # objects (the overflow case). The wrapper must raise TypeError.
-        from xarray.coding.times import _verify_decoded_dtype
-
-        def fake_decoder(values):
-            return np.array([object()], dtype="object")  # cftime-like object output
-
-        guarded = _verify_decoded_dtype(fake_decoder, np.dtype("datetime64[ns]"))
-        with pytest.raises(TypeError, match="does not match declared dtype"):
-            guarded(np.array([0, 1]))
-
-    def test_verification_wrapper_accepts_different_datetime64_resolutions(self):
-        # Declared datetime64[ns] but decoder returns datetime64[us] — both
-        # are datetime64, so the wrapper should pass them through.
-        from xarray.coding.times import _verify_decoded_dtype
-
-        def fake_decoder(values):
-            return np.array([0, 1], dtype="datetime64[us]")
-
-        guarded = _verify_decoded_dtype(fake_decoder, np.dtype("datetime64[ns]"))
-        result = guarded(np.array([0, 1]))
-        assert result.dtype == np.dtype("datetime64[us]")

From 7392d70a089563591f02b00b17fe2fdd284dc59d Mon Sep 17 00:00:00 2001
From: aladinor <aladinor@unal.edu.co>
Date: Thu, 16 Apr 2026 20:34:52 -0500
Subject: [PATCH 6/7] Refresh _is_overflow_safe_from_metadata docstring

The wording still referred to the old "chunked data" criterion. Update
it to describe the current backend-laziness check.
---
 xarray/coding/times.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/xarray/coding/times.py b/xarray/coding/times.py
index ffb6e56f607..ca2ab96a7bd 100644
--- a/xarray/coding/times.py
+++ b/xarray/coding/times.py
@@ -415,10 +415,10 @@ def _is_overflow_safe_from_metadata(
 ) -> bool:
     """Whether decode_cf_datetime's output dtype is knowable from metadata.
 
-    Output dtype depends on data values only when all of the following
-    hold: ``use_cftime=None``, standard calendar, ``time_unit="ns"``, data
-    is chunked (so dask enforces the declared dtype), and we have no
-    explicit bound that proves values fit the datetime64[ns] range.
+    Output dtype depends on data values only for ``use_cftime=None`` +
+    standard calendar + ``time_unit="ns"`` with a backend-lazy source
+    (where reads are expensive) and no attr/dtype bound that proves
+    values fit the datetime64[ns] range.
     """
     if use_cftime is not None:
         return True

From f28ef9c68569c165ce7ed979c51e481d76367e93 Mon Sep 17 00:00:00 2001
From: aladinor <aladinor@unal.edu.co>
Date: Thu, 16 Apr 2026 20:35:03 -0500
Subject: [PATCH 7/7] Update whats-new entry to match final implementation

Drop the mention of the runtime dtype-mismatch guard (removed) and
reference the backing issue.
---
 doc/whats-new.rst | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/doc/whats-new.rst b/doc/whats-new.rst
index 23450285e9f..460188ab648 100644
--- a/doc/whats-new.rst
+++ b/doc/whats-new.rst
@@ -164,13 +164,11 @@ Performance
 
 - Skip data reads during CF datetime dtype inference when the output dtype is
   determinable from metadata alone (``use_cftime`` explicitly set, non-standard
-  calendar, non-nanosecond ``time_unit``, non-chunked data, or when
+  calendar, non-nanosecond ``time_unit``, lazy backend arrays, or when
   ``valid_max`` / ``time_coverage_end`` / a narrow storage dtype bounds the
   possible values). This can significantly speed up
   :py:func:`open_dataset` / :py:func:`open_datatree` on remote-backed stores
-  (zarr on S3, icechunk) with many time-encoded variables. A runtime
-  dtype-mismatch guard on the lazy decode wrapper prevents the rare overflow
-  case from being silently cast by dask.
+  (zarr on S3, icechunk) with many time-encoded variables (:issue:`11303`).
   By `Alfonso Ladino <https://github.com/aladinor>`_.
 
 Bug Fixes