PyPSA · lmezilis · Oct 16, 2025 · Nov 5, 2025 · Nov 7, 2025 · Nov 7, 2025
diff --git a/.gitignore b/.gitignore
@@ -27,3 +27,4 @@ paper
 # Ignore IDE project files
 .idea/
 .vscode
+.vs
diff --git a/atlite/convert.py b/atlite/convert.py
@@ -36,6 +36,7 @@
 from atlite.resource import (
     get_cspinstallationconfig,
     get_solarpanelconfig,
+    get_waveenergyconverter,
     get_windturbineconfig,
     windturbine_smooth,
 )
@@ -653,7 +654,109 @@ def wind(
     )
 
 
-# irradiation
+# wave
+def convert_wave(ds, wec):
+    r"""
+    Convert wave height (Hs) and wave peak period (Tp) data into normalized power output
+    using the device-specific Wave Energy Converter (WEC) power matrix.
+
+    This function matches each combination of significant wave height and peak period
+    in the dataset to a corresponding power output from the WEC power matrix.
+    The resulting power output is normalized by the maximum possible output (capacity)
+    to obtain the specific generation profile.
+
+    Parameters
+    ----------
+    ds : xarray.Dataset
+        Input dataset (cutout) containing two variables:
+        wave_height: significant wave height (m)
+        wave_period: peak wave period (s)
+    wec_type : dict
+        Dictionary defining the WEC characteristics, including:
+        Power_Matrix: a power matrix dictionary stored in "resources\wecgenerator"
+
+    Returns
+    -------
+    xarray.DataArray
+        DataArray of specific power generation values (normalized power output).
+
+    Notes
+    -----
+    A progress message is printed every one million cases to track computation.
+    """
+
+    power_matrix = pd.DataFrame.from_dict(wec["Power_Matrix"])
+    max_pow = power_matrix.to_numpy().max()
+
+    Hs = np.ceil(ds["wave_height"] * 2) / 2
+    Tp = np.ceil(ds["wave_period"] * 2) / 2
+
+    Hs_list = Hs.to_numpy().flatten().tolist()
+    Tp_list = Tp.to_numpy().flatten().tolist()
+
+    # empty list for result
+    power_list = []
+    cases = len(Hs_list)
+    count = 0
+
+    # for loop to loop through Hs and Tp pairs and get the power output and capacity factor
+    for Hs_ind, Tp_ind in zip(Hs_list, Tp_list):
+        if count % 1000000 == 0:
+            print(f"Case {count} of {cases}: %")
+        if np.isnan(Hs_ind) or np.isnan(Tp_ind):
+            power_list.append(0)
+        elif Hs_ind > 10 or Tp_ind > 18:
+            power_list.append(0)
+        else:
+            generated_power = power_matrix.loc[Hs_ind, Tp_ind]
+            power_list.append(generated_power / max_pow)
+        count += 1
+
+    # results list to numpy array
+    power_list_np = np.array(power_list)
+
+    power_list_np = power_list_np.reshape(Hs.shape)
+
+    da = xr.DataArray(
+        power_list_np, coords=Hs.coords, dims=Hs.dims, name="Power generated"
+    )
+    da.attrs["units"] = "kWh/kWp"
+    da = da.rename("specific generation")
+    da = da.fillna(0)
+
+    return da
-def convert_wave(ds, wec):
-    r"""
-    Convert wave height (Hs) and wave peak period (Tp) data into normalized power output
-    using the device-specific Wave Energy Converter (WEC) power matrix.
-
-    This function matches each combination of significant wave height and peak period
-    in the dataset to a corresponding power output from the WEC power matrix.
-    The resulting power output is normalized by the maximum possible output (capacity)
-    to obtain the specific generation profile.
-
-    Parameters
-    ----------
-    ds : xarray.Dataset
-        Input dataset (cutout) containing two variables:
-        wave_height: significant wave height (m)
-        wave_period: peak wave period (s)
-    wec_type : dict
-        Dictionary defining the WEC characteristics, including:
-        Power_Matrix: a power matrix dictionary stored in "resources\wecgenerator"
-
-    Returns
-    -------
-    xarray.DataArray
-        DataArray of specific power generation values (normalized power output).
-
-    Notes
-    -----
-    A progress message is printed every one million cases to track computation.
-    """
-
-    power_matrix = pd.DataFrame.from_dict(wec["Power_Matrix"])
-    max_pow = power_matrix.to_numpy().max()
-
-    Hs = np.ceil(ds["wave_height"] * 2) / 2
-    Tp = np.ceil(ds["wave_period"] * 2) / 2
-
-    Hs_list = Hs.to_numpy().flatten().tolist()
-    Tp_list = Tp.to_numpy().flatten().tolist()
-
-    # empty list for result
-    power_list = []
-    cases = len(Hs_list)
-    count = 0
-
-    # for loop to loop through Hs and Tp pairs and get the power output and capacity factor
-    for Hs_ind, Tp_ind in zip(Hs_list, Tp_list):
-        if count % 1000000 == 0:
-            print(f"Case {count} of {cases}: %")
-        if np.isnan(Hs_ind) or np.isnan(Tp_ind):
-            power_list.append(0)
-        elif Hs_ind > 10 or Tp_ind > 18:
-            power_list.append(0)
-        else:
-            generated_power = power_matrix.loc[Hs_ind, Tp_ind]
-            power_list.append(generated_power / max_pow)
-        count += 1
-
-    # results list to numpy array
-    power_list_np = np.array(power_list)
-
-    power_list_np = power_list_np.reshape(Hs.shape)
-
-    da = xr.DataArray(
-        power_list_np, coords=Hs.coords, dims=Hs.dims, name="Power generated"
-    )
-    da.attrs["units"] = "kWh/kWp"
-    da = da.rename("specific generation")
-    da = da.fillna(0)
-
-    return da
+
+# wave
+def convert_wave(ds, wec_type, time_chunk_size: int = 100) -> xr.DataArray:
+    r"""
+    Convert wave height (Hs) and wave peak period (Tp) data into normalized power output
+    using the device-specific Wave Energy Converter (WEC) power matrix.
+
+    This function matches each combination of significant wave height and peak period
+    in the dataset to a corresponding power output from the WEC power matrix.
+    The resulting power output is normalized by the maximum possible output (capacity)
+    to obtain the specific generation profile.
+
+    Parameters
+    ----------
+    ds : xarray.Dataset
+        Input dataset (cutout) containing two variables:
+        wave_height: significant wave height (m)
+        wave_period: peak wave period (s)
+    wec_type : dict
+        Dictionary defining the WEC characteristics, including:
+        Power_Matrix: a power matrix dictionary stored in "resources\wecgenerator"
+    time_chunk_size : int
+        Size of time chunks for processing large datasets, to limit memory spikes. Default is 100.
+
+    Returns
+    -------
+    xarray.DataArray
+        DataArray of specific power generation values (normalized power output).
+
+    Notes
+    -----
+    A progress message is printed every one million cases to track computation.
+    """
+
+    power_matrix = (
+        pd.DataFrame.from_dict(wec_type["Power_Matrix"])
+        .stack()
+        .rename_axis(index=["wave_height", "wave_period"])
+        .where(lambda x: x > 0)
+        .dropna()
+        .to_xarray()
+    )
+
+    results = []
+    steps = np.arange(0, len(ds.time), step=100)
+    for step in tqdm(steps, desc="Processing wave data chunks", total=len(steps), unit="time chunk"):
+        ds_ = ds.isel(time=slice(step, step + time_chunk_size))
+        cf = power_matrix.interp(
+            {"wave_height": ds_.wave_height, "wave_period": ds_.wave_period},
+            method="nearest",
+        )
+        results.append(cf)
+    da = xr.concat(results, dim="time")
+
+    da.attrs["units"] = "kWh/kWp"
+    da = da.rename("specific generation")
+    da = da.fillna(0)
+
+    return da
-def convert_wave(ds, wec):
-    r"""
-    Convert wave height (Hs) and wave peak period (Tp) data into normalized power output
-    using the device-specific Wave Energy Converter (WEC) power matrix.
-
-    This function matches each combination of significant wave height and peak period
-    in the dataset to a corresponding power output from the WEC power matrix.
-    The resulting power output is normalized by the maximum possible output (capacity)
-    to obtain the specific generation profile.
-
-    Parameters
-    ----------
-    ds : xarray.Dataset
-        Input dataset (cutout) containing two variables:
-        wave_height: significant wave height (m)
-        wave_period: peak wave period (s)
-    wec_type : dict
-        Dictionary defining the WEC characteristics, including:
-        Power_Matrix: a power matrix dictionary stored in "resources\wecgenerator"
-
-    Returns
-    -------
-    xarray.DataArray
-        DataArray of specific power generation values (normalized power output).
-
-    Notes
-    -----
-    A progress message is printed every one million cases to track computation.
-    """
-
-    power_matrix = pd.DataFrame.from_dict(wec["Power_Matrix"])
-    max_pow = power_matrix.to_numpy().max()
-
-    Hs = np.ceil(ds["wave_height"] * 2) / 2
-    Tp = np.ceil(ds["wave_period"] * 2) / 2
-
-    Hs_list = Hs.to_numpy().flatten().tolist()
-    Tp_list = Tp.to_numpy().flatten().tolist()
-
-    # empty list for result
-    power_list = []
-    cases = len(Hs_list)
-    count = 0
-
-    # for loop to loop through Hs and Tp pairs and get the power output and capacity factor
-    for Hs_ind, Tp_ind in zip(Hs_list, Tp_list):
-        if count % 1000000 == 0:
-            print(f"Case {count} of {cases}: %")
-        if np.isnan(Hs_ind) or np.isnan(Tp_ind):
-            power_list.append(0)
-        elif Hs_ind > 10 or Tp_ind > 18:
-            power_list.append(0)
-        else:
-            generated_power = power_matrix.loc[Hs_ind, Tp_ind]
-            power_list.append(generated_power / max_pow)
-        count += 1
-
-    # results list to numpy array
-    power_list_np = np.array(power_list)
-
-    power_list_np = power_list_np.reshape(Hs.shape)
-
-    da = xr.DataArray(
-        power_list_np, coords=Hs.coords, dims=Hs.dims, name="Power generated"
-    )
-    da.attrs["units"] = "kWh/kWp"
-    da = da.rename("specific generation")
-    da = da.fillna(0)
-
-    return da
+
+# wave
+def convert_wave(ds, wec_type, time_chunk_size: int = 100) -> xr.DataArray:
+    r"""
+    Convert wave height (Hs) and wave peak period (Tp) data into normalized power output
+    using the device-specific Wave Energy Converter (WEC) power matrix.
+
+    This function matches each combination of significant wave height and peak period
+    in the dataset to a corresponding power output from the WEC power matrix.
+    The resulting power output is normalized by the maximum possible output (capacity)
+    to obtain the specific generation profile.
+
+    Parameters
+    ----------
+    ds : xarray.Dataset
+        Input dataset (cutout) containing two variables:
+        wave_height: significant wave height (m)
+        wave_period: peak wave period (s)
+    wec_type : dict
+        Dictionary defining the WEC characteristics, including:
+        Power_Matrix: a power matrix dictionary stored in "resources\wecgenerator"
+    time_chunk_size : int
+        Size of time chunks for processing large datasets, to limit memory spikes. Default is 100.
+
+    Returns
+    -------
+    xarray.DataArray
+        DataArray of specific power generation values (normalized power output).
+
+    Notes
+    -----
+    A progress message is printed every one million cases to track computation.
+    """
+
+    power_matrix = (
+        pd.DataFrame.from_dict(wec_type["Power_Matrix"])
+        .stack()
+        .rename_axis(index=["wave_height", "wave_period"])
+        .where(lambda x: x > 0)
+        .dropna()
+        .to_xarray()
+    )
+
+    results = []
+    steps = np.arange(0, len(ds.time), step=100)
+    for step in tqdm(steps, desc="Processing wave data chunks", total=len(steps), unit="time chunk"):
+        ds_ = ds.isel(time=slice(step, step + time_chunk_size))
+        cf = power_matrix.interp(
+            {"wave_height": ds_.wave_height, "wave_period": ds_.wave_period},
+            method="nearest",
+        )
+        results.append(cf)
+    da = xr.concat(results, dim="time")
+
+    da.attrs["units"] = "kWh/kWp"
+    da = da.rename("specific generation")
+    da = da.fillna(0)
+
+    return da
+
+
+def wave(cutout, wec, **params):
+    """
+    Compute wave energy generation time series for a given cutout and Wave Energy Converter (WEC) type.
+
+    Parameters
+    ----------
+    cutout : atlite.Cutout
+        Atlite cutout object containing wave-related data (e.g., `wave_height`, `wave_period`).
+    wec_type : str, pathlib.Path, or dict
+        WEC configuration describing the device's power characteristics.
+
+    Returns
+    -------
+    xarray.DataArray
+        Time series of normalized wave power generation for the entire cutout area, with units of "kWh/kWp".
+        The dimensions and resolution follow the input cutout and aggregation parameters.
+
+    References
+    ----------
+    [1] Lavidas G., Mezilis L., Alday M., Baki H., Tan J., Jain A., Engelfried T. and Raghavan V.,
+    Marine renewables in Energy Systems: Impacts of climate data, generators, energy policies,
+    opportunities, and untapped potential for 100% decarbonised systems. Energy, Volume 336, 2025,
+    138359, ISSN 0360-5442, https://doi.org/10.1016/j.energy.2025.138359.
+    """
+    if isinstance(wec, str | Path):
+        wec = get_waveenergyconverter(wec)
+
+    return cutout.convert_and_aggregate(convert_func=convert_wave, wec=wec, **params)
+
+
 def convert_irradiation(
     ds,
     orientation,

diff --git a/atlite/cutout.py b/atlite/cutout.py
@@ -44,6 +44,7 @@
     soil_temperature,
     solar_thermal,
     temperature,
+    wave,
     wind,
 )
 from atlite.data import available_features, cutout_prepare
@@ -661,6 +662,8 @@ def layout_from_capacity_list(self, data, col="Capacity"):
 
     wind = wind
 
+    wave = wave
+
     irradiation = irradiation
 
     pv = pv

diff --git a/atlite/datasets/__init__.py b/atlite/datasets/__init__.py
@@ -6,6 +6,12 @@
 atlite datasets.
 """
 
-from atlite.datasets import era5, gebco, sarah
+from atlite.datasets import cerra, era5, gebco, mrel_wave, sarah
 
-modules = {"era5": era5, "sarah": sarah, "gebco": gebco}
+modules = {
+    "era5": era5,
+    "sarah": sarah,
+    "mrel_wave": mrel_wave,
+    "cerra": cerra,
+    "gebco": gebco,
+}
diff --git a/atlite/datasets/cerra.py b/atlite/datasets/cerra.py
@@ -0,0 +1,72 @@
+# SPDX-FileCopyrightText: Contributors to atlite <https://github.com/pypsa/atlite>
+#
+# SPDX-License-Identifier: MIT
+
+"""
+In order to create a CERRA cutout, the data must be manually downloaded from the Climate Data Store.
+The variable used is "10m wind speed" and there is not a direction component in it.
+This 10m wind speed was transformed into a 100m wind speed in order to follow the rest of atlite's processes.
+"""
+
+import logging
+
+import numpy as np
+import xarray as xr
+from rasterio.warp import Resampling
+
+from atlite.gis import regrid
+
+logger = logging.getLogger(__name__)
+
+crs = 4326
+dx = 0.05
+dy = 0.05
+
+features = {"wind": ["wnd100m", "roughness"]}
+
+
+def as_slice(bounds, pad=True):
+    """
+    Convert coordinate bounds to slice and pad by 0.01.
+    """
+    if not isinstance(bounds, slice):
+        bounds = bounds + (-0.01, 0.01)
+        bounds = slice(*bounds)
+    return bounds
+
+
+def get_data(cutout, feature, tmpdir, **creation_parameters):
+    """
+    Retrieve data from a local CERRA dataset and process it.
+    """
+    coords = cutout.coords
+
+    if "data_path" not in creation_parameters:
+        logger.error('Argument "data_path" not defined')
+        raise ValueError('Argument "data_path" not defined')
+    path = creation_parameters["data_path"]
+
+    ds = xr.open_dataset(path)
+
+    ds = ds.sel(x=as_slice(cutout.extent[:2]), y=as_slice(cutout.extent[2:]))
+    ds = ds.assign_coords(x=ds.x.astype(float).round(4), y=ds.y.astype(float).round(4))
+
+    if (cutout.dx != dx) or (cutout.dy != dy):
+        ds = regrid(ds, coords["x"], coords["y"], resampling=Resampling.average)
+
+    if "sr" in ds:
+        ds = ds.rename({"sr": "roughness"})
+
+    logger.info("Calculating 100 metre wind speed")
+    if "si10" in ds and "roughness" in ds:
+        ds["wnd100m"] = (
+            ds["si10"] * (np.log(100 / ds["roughness"]) / np.log(10 / ds["roughness"]))
+        ).assign_attrs(units="m s**-1", long_name="100 metre wind speed")
+        ds = ds.drop_vars("si10")
+
+    ds = ds.assign_coords(x=ds.coords["x"], y=ds.coords["y"])
+
+    logger.info("Resampling to 1H.")
+    ds = ds.resample(time="1h").interpolate("linear")
+
+    return ds
diff --git a/atlite/datasets/era5.py b/atlite/datasets/era5.py
@@ -55,6 +55,7 @@ def nullcontext():
     ],
     "temperature": ["temperature", "soil temperature", "dewpoint temperature"],
     "runoff": ["runoff"],
+    "wave": ["wave_height", "wave_period"],
 }
 
 static_features = {"height"}
@@ -244,6 +245,53 @@ def sanitize_runoff(ds):
     return ds
 
 
+def get_data_wave_height(retrieval_params):
+    """
+    Get wave height data for given retrieval parameters.
+    """
+    ds = retrieve_data(
+        variable=[
+            "significant_height_of_combined_wind_waves_and_swell",
+        ],
+        **retrieval_params,
+    )
+    ds = _rename_and_clean_coords(ds)
+    ds = ds.rename({"swh": "wave_height"})
+
+    return ds
+
+
+def sanitize_wave_height(ds):
+    """
+    Sanitize retrieved wave height data.
+    """
+    ds["wave_height"] = ds["wave_height"].clip(min=0.0)
+    return ds
+
+
+def get_data_wave_period(retrieval_params):
+    """
+    Get wave period data for given retrieval parameters.
+    """
+    ds = retrieve_data(
+        variable=["peak_wave_period"],
+        **retrieval_params,
+    )
+
+    ds = _rename_and_clean_coords(ds)
+    ds = ds.rename({"pp1d": "wave_period"})
+
+    return ds
+
+
+def sanitize_wave_period(ds):
+    """
+    Sanitize retrieved wave period data.
+    """
+    ds["wave_period"] = ds["wave_period"].clip(min=0.0)
+    return ds
+
+
 def get_data_height(retrieval_params):
     """
     Get height data for given retrieval parameters.
-Original file line number
+Diff line change
@@ Expand Up / @@ -27,3 +27,4 @@ paper @@
     # Ignore IDE project files
     .idea/
     .vscode
+    .vs