mllam · kshirajahere · Mar 2, 2026 · Mar 2, 2026 · Mar 2, 2026 · Mar 3, 2026
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -8,6 +8,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 ## [unreleased](https://github.com/mllam/neural-lam/compare/v0.5.0...HEAD)
 
 ### Fixed
+- Fix `WeatherDataset` boundary handling for out-of-range indexing and forecast-mode forcing horizon validation to prevent malformed samples [\#312](https://github.com/mllam/neural-lam/pull/312)
 
 - Fix README image paths to use absolute GitHub URLs so images display correctly on PyPI [\#188](https://github.com/mllam/neural-lam/pull/188) @bk-simon
 

diff --git a/neural_lam/weather_dataset.py b/neural_lam/weather_dataset.py
@@ -131,16 +131,47 @@ def __len__(self):
                     UserWarning,
                 )
 
-            # check that there are enough forecast steps available to create
-            # samples given the number of autoregressive steps requested
-            n_forecast_steps = self.da_state.elapsed_forecast_duration.size
-            if n_forecast_steps < 2 + self.ar_steps:
+            # Check that there are enough forecast steps available for state
+            # slicing. This includes two initial states and `ar_steps` targets,
+            # potentially offset by past forcing.
+            required_state_steps = (
+                max(2, self.num_past_forcing_steps) + self.ar_steps
+            )
+            n_state_forecast_steps = (
+                self.da_state.elapsed_forecast_duration.size
+            )
+            if n_state_forecast_steps < required_state_steps:
                 raise ValueError(
-                    "The number of forecast steps available "
-                    f"({n_forecast_steps}) is less than the required "
-                    f"2+ar_steps (2+{self.ar_steps}={2 + self.ar_steps}) for "
-                    "creating a sample with initial and target states."
+                    "The number of state forecast steps available "
+                    f"({n_state_forecast_steps}) is less than the required "
+                    f"{required_state_steps} "
+                    f"(max(2, num_past_forcing_steps={self.num_past_forcing_steps})"
+                    f" + ar_steps={self.ar_steps}) for creating a sample with "
+                    "initial and target states."
+                )
+
+            # If forcing data is present, also validate that the complete
+            # forcing window can be constructed for each autoregressive target
+            # step without truncation.
+            if self.da_forcing is not None:
+                required_forcing_steps = (
+                    max(2, self.num_past_forcing_steps)
+                    + self.ar_steps
+                    + self.num_future_forcing_steps
                 )
+                n_forcing_forecast_steps = (
+                    self.da_forcing.elapsed_forecast_duration.size
+                )
+                if n_forcing_forecast_steps < required_forcing_steps:
+                    raise ValueError(
+                        "The number of forcing forecast steps available "
+                        f"({n_forcing_forecast_steps}) is less than the "
+                        f"required {required_forcing_steps} "
+                        f"(max(2, num_past_forcing_steps={self.num_past_forcing_steps})"
+                        f" + ar_steps={self.ar_steps} + "
+                        f"num_future_forcing_steps={self.num_future_forcing_steps}) "
+                        "for constructing forcing windows."
+                    )
 
             return self.da_state.analysis_time.size
         else:
@@ -159,6 +190,7 @@ def __len__(self):
                 - self.ar_steps
                 - max(2, self.num_past_forcing_steps)
                 - self.num_future_forcing_steps
+                + 1
             )
 
     def _slice_state_time(self, da_state, idx, n_steps: int):
@@ -468,6 +500,17 @@ def __getitem__(self, idx):
             the target steps.
 
         """
+        dataset_len = len(self)
+
+        # Match Python sequence semantics for negative indexing.
+        if idx < 0:
+            idx += dataset_len
+        if idx < 0 or idx >= dataset_len:
+            raise IndexError(
+                f"Index {idx} is out of bounds for dataset of size "
+                f"{dataset_len}"
+            )
+
         (
             da_init_states,
             da_target_states,

diff --git a/tests/test_datasets.py b/tests/test_datasets.py
@@ -225,12 +225,12 @@ def _create_graph():
 @pytest.mark.parametrize(
     "dataset_config",
     [
-        {"past": 0, "future": 0, "ar_steps": 1, "exp_len_reduction": 3},
-        {"past": 2, "future": 0, "ar_steps": 1, "exp_len_reduction": 3},
-        {"past": 0, "future": 2, "ar_steps": 1, "exp_len_reduction": 5},
-        {"past": 4, "future": 0, "ar_steps": 1, "exp_len_reduction": 5},
-        {"past": 0, "future": 0, "ar_steps": 5, "exp_len_reduction": 7},
-        {"past": 3, "future": 3, "ar_steps": 2, "exp_len_reduction": 8},
+        {"past": 0, "future": 0, "ar_steps": 1, "exp_len_reduction": 2},
+        {"past": 2, "future": 0, "ar_steps": 1, "exp_len_reduction": 2},
+        {"past": 0, "future": 2, "ar_steps": 1, "exp_len_reduction": 4},
+        {"past": 4, "future": 0, "ar_steps": 1, "exp_len_reduction": 4},
+        {"past": 0, "future": 0, "ar_steps": 5, "exp_len_reduction": 6},
+        {"past": 3, "future": 3, "ar_steps": 2, "exp_len_reduction": 7},
     ],
 )
 def test_dataset_length(dataset_config):
@@ -259,3 +259,131 @@ def test_dataset_length(dataset_config):
     # Check that we can actually get last and first sample
     dataset[0]
     dataset[expected_len - 1]
+
+
+def test_dataset_out_of_bounds_indexing_raises():
+    """Ensure out-of-range indexing fails instead of returning bad samples."""
+    datastore = DummyDatastore(n_grid_points=4, n_timesteps=10)
+    dataset = WeatherDataset(
+        datastore=datastore,
+        split="train",
+        ar_steps=2,
+        num_past_forcing_steps=1,
+        num_future_forcing_steps=1,
+    )
+
+    # In-bounds indices work, including Python-style negative indexing.
+    dataset[0]
+    dataset[len(dataset) - 1]
+    dataset[-1]
+
+    # Out-of-bounds indices must fail explicitly.
+    with pytest.raises(IndexError):
+        dataset[len(dataset)]
+    with pytest.raises(IndexError):
+        dataset[len(dataset) + 1]
+    with pytest.raises(IndexError):
+        dataset[-len(dataset) - 1]
+
+
+def test_forecast_len_raises_when_forcing_horizon_too_short():
+    from types import SimpleNamespace
+
+    import xarray as xr
+
+    dataset = WeatherDataset.__new__(WeatherDataset)
+    dataset.datastore = SimpleNamespace(is_forecast=True, is_ensemble=False)
+    dataset.ar_steps = 2
+    dataset.num_past_forcing_steps = 1
+    dataset.num_future_forcing_steps = 2
+
+    analysis_time = np.array(
+        ["2021-01-01T00:00:00", "2021-01-01T01:00:00"],
+        dtype="datetime64[ns]",
+    )
+    elapsed = np.arange(5, dtype="timedelta64[h]").astype("timedelta64[ns]")
+
+    dataset.da_state = xr.DataArray(
+        np.zeros((2, 5, 1, 1), dtype=np.float32),
+        dims=(
+            "analysis_time",
+            "elapsed_forecast_duration",
+            "grid_index",
+            "state_feature",
+        ),
+        coords={
+            "analysis_time": analysis_time,
+            "elapsed_forecast_duration": elapsed,
+            "grid_index": [0],
+            "state_feature": ["state_feat_0"],
+        },
+    )
+    dataset.da_forcing = xr.DataArray(
+        np.zeros((2, 5, 1, 1), dtype=np.float32),
+        dims=(
+            "analysis_time",
+            "elapsed_forecast_duration",
+            "grid_index",
+            "forcing_feature",
+        ),
+        coords={
+            "analysis_time": analysis_time,
+            "elapsed_forecast_duration": elapsed,
+            "grid_index": [0],
+            "forcing_feature": ["forcing_feat_0"],
+        },
+    )
+
+    with pytest.raises(ValueError, match="forcing forecast steps"):
+        len(dataset)
+
+
+def test_forecast_len_accepts_exact_forcing_horizon():
+    from types import SimpleNamespace
+
+    import xarray as xr
+
+    dataset = WeatherDataset.__new__(WeatherDataset)
+    dataset.datastore = SimpleNamespace(is_forecast=True, is_ensemble=False)
+    dataset.ar_steps = 2
+    dataset.num_past_forcing_steps = 1
+    dataset.num_future_forcing_steps = 2
+
+    analysis_time = np.array(
+        ["2021-01-01T00:00:00", "2021-01-01T01:00:00"],
+        dtype="datetime64[ns]",
+    )
+    elapsed = np.arange(6, dtype="timedelta64[h]").astype("timedelta64[ns]")
+
+    dataset.da_state = xr.DataArray(
+        np.zeros((2, 6, 1, 1), dtype=np.float32),
+        dims=(
+            "analysis_time",
+            "elapsed_forecast_duration",
+            "grid_index",
+            "state_feature",
+        ),
+        coords={
+            "analysis_time": analysis_time,
+            "elapsed_forecast_duration": elapsed,
+            "grid_index": [0],
+            "state_feature": ["state_feat_0"],
+        },
+    )
+    dataset.da_forcing = xr.DataArray(
+        np.zeros((2, 6, 1, 1), dtype=np.float32),
+        dims=(
+            "analysis_time",
+            "elapsed_forecast_duration",
+            "grid_index",
+            "forcing_feature",
+        ),
+        coords={
+            "analysis_time": analysis_time,
+            "elapsed_forecast_duration": elapsed,
+            "grid_index": [0],
+            "forcing_feature": ["forcing_feat_0"],
+        },
+    )
+
+    assert len(dataset) == 2