Skip to content
Open
Show file tree
Hide file tree
Changes from 23 commits
Commits
Show all changes
39 commits
Select commit Hold shift + click to select a range
5f87745
atlite-mrel first commit
Oct 16, 2025
20045e1
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Nov 5, 2025
ffb156d
code resolutions for merge
Nov 7, 2025
2b0380e
Merge branch 'wecmatrices-datamodules' of https://github.com/lmezilis…
Nov 7, 2025
dcffaef
commit to fork
Nov 7, 2025
3383ebc
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Nov 7, 2025
7b6872f
fix count message error
Nov 10, 2025
335a67c
Merge branch 'wecmatrices-datamodules' of https://github.com/lmezilis…
Nov 10, 2025
1b7efd7
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Nov 10, 2025
7067490
fix docstring
Nov 10, 2025
3f0c0df
Merge branch 'wecmatrices-datamodules' of https://github.com/lmezilis…
Nov 10, 2025
7aaae50
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Nov 10, 2025
a1a2aaa
fix syntax
Nov 10, 2025
963896d
Merge branch 'wecmatrices-datamodules' of https://github.com/lmezilis…
Nov 10, 2025
76f94fd
copyrights cerra
Nov 10, 2025
5f6cfe2
fix syntax
Nov 11, 2025
39eefe6
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Nov 11, 2025
ba748f4
convert func
Nov 11, 2025
f2acfc4
Merge branch 'wecmatrices-datamodules' of https://github.com/lmezilis…
Nov 11, 2025
1de851d
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Nov 11, 2025
83ae74a
wec renaming
Nov 14, 2025
669ad8e
Merge branch 'wecmatrices-datamodules' of https://github.com/lmezilis…
Nov 14, 2025
400b591
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Nov 14, 2025
4a3e2a2
cutout example MREL
Nov 20, 2025
c76e86c
Merge branch 'wecmatrices-datamodules' of https://github.com/lmezilis…
Nov 20, 2025
124a9d2
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Nov 20, 2025
66a2d65
update convert function
Nov 20, 2025
9cb7ff7
Merge branch 'wecmatrices-datamodules' of https://github.com/lmezilis…
Nov 20, 2025
58d0a2c
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Nov 20, 2025
a63d82c
fix output
Nov 20, 2025
0c50fa9
update commit
Nov 20, 2025
d53fdd6
correct syntax
Nov 20, 2025
0c0b726
Merge branch 'wecmatrices-datamodules' of https://github.com/lmezilis…
Nov 20, 2025
5cc55a8
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Nov 20, 2025
8b4ae18
syntax corrections
Nov 20, 2025
9b7cae6
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Nov 20, 2025
b06dfd3
delete picture
Nov 20, 2025
0363202
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Nov 20, 2025
270951b
Merge branch 'master' into wecmatrices-datamodules
lmezilis Nov 20, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -27,3 +27,4 @@ paper
# Ignore IDE project files
.idea/
.vscode
.vs

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It's generally best to add these pointers to your own "global" gitignore, rather than to every project you work on. That way, it never accidentally slips in without you realising it!

105 changes: 104 additions & 1 deletion atlite/convert.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@
from atlite.resource import (
get_cspinstallationconfig,
get_solarpanelconfig,
get_waveenergyconverter,
get_windturbineconfig,
windturbine_smooth,
)
Expand Down Expand Up @@ -653,7 +654,109 @@ def wind(
)


# irradiation
# wave
def convert_wave(ds, wec):
r"""
Convert wave height (Hs) and wave peak period (Tp) data into normalized power output
using the device-specific Wave Energy Converter (WEC) power matrix.

This function matches each combination of significant wave height and peak period
in the dataset to a corresponding power output from the WEC power matrix.
The resulting power output is normalized by the maximum possible output (capacity)
to obtain the specific generation profile.

Parameters
----------
ds : xarray.Dataset
Input dataset (cutout) containing two variables:
wave_height: significant wave height (m)
wave_period: peak wave period (s)
wec_type : dict
Dictionary defining the WEC characteristics, including:
Power_Matrix: a power matrix dictionary stored in "resources\wecgenerator"

Returns
-------
xarray.DataArray
DataArray of specific power generation values (normalized power output).

Notes
-----
A progress message is printed every one million cases to track computation.
"""

power_matrix = pd.DataFrame.from_dict(wec["Power_Matrix"])
max_pow = power_matrix.to_numpy().max()

Hs = np.ceil(ds["wave_height"] * 2) / 2
Tp = np.ceil(ds["wave_period"] * 2) / 2

Hs_list = Hs.to_numpy().flatten().tolist()
Tp_list = Tp.to_numpy().flatten().tolist()

# empty list for result
power_list = []
cases = len(Hs_list)
count = 0

# for loop to loop through Hs and Tp pairs and get the power output and capacity factor
for Hs_ind, Tp_ind in zip(Hs_list, Tp_list):
if count % 1000000 == 0:
print(f"Case {count} of {cases}: %")
if np.isnan(Hs_ind) or np.isnan(Tp_ind):
power_list.append(0)
elif Hs_ind > 10 or Tp_ind > 18:

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

These are cut-outs, I assume. Should they not be specified in the power matrices? Or, rather, they look to be outside the matrices, so we'd get cf NaN values, which we can fill with zeros later. That way, it doesn't cause a silent error if in future a power matrix is added that has non-zero values above these thresholds.

Copy link
Copy Markdown
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You are correct they indicate the older limits of power conversion. It is true that they are already specified in the matrices so this power filter can be erased from this section.

power_list.append(0)
else:
generated_power = power_matrix.loc[Hs_ind, Tp_ind]
power_list.append(generated_power / max_pow)
count += 1

# results list to numpy array
power_list_np = np.array(power_list)

power_list_np = power_list_np.reshape(Hs.shape)

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This approach is quite inefficient for combining two matrices of data. We want to vectorise operations where possible. I have an idea how to achieve this which I'll test locally and then come back to you.


da = xr.DataArray(
power_list_np, coords=Hs.coords, dims=Hs.dims, name="Power generated"
)
da.attrs["units"] = "kWh/kWp"
da = da.rename("specific generation")
da = da.fillna(0)

return da

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

OK, I had the chance to test this.

This suggestion vectorises the approach, chunking on the time dimension to reduce the amount of data that is processed in memory at any point. This should be much quicker than your current approach and the time chunk size can be relaxed when running on remote machines with large memory availability. You can run even on a local machine by using small time chunks.

Give it a go with data you have and let me know how it goes!

Suggested change
def convert_wave(ds, wec):
r"""
Convert wave height (Hs) and wave peak period (Tp) data into normalized power output
using the device-specific Wave Energy Converter (WEC) power matrix.
This function matches each combination of significant wave height and peak period
in the dataset to a corresponding power output from the WEC power matrix.
The resulting power output is normalized by the maximum possible output (capacity)
to obtain the specific generation profile.
Parameters
----------
ds : xarray.Dataset
Input dataset (cutout) containing two variables:
wave_height: significant wave height (m)
wave_period: peak wave period (s)
wec_type : dict
Dictionary defining the WEC characteristics, including:
Power_Matrix: a power matrix dictionary stored in "resources\wecgenerator"
Returns
-------
xarray.DataArray
DataArray of specific power generation values (normalized power output).
Notes
-----
A progress message is printed every one million cases to track computation.
"""
power_matrix = pd.DataFrame.from_dict(wec["Power_Matrix"])
max_pow = power_matrix.to_numpy().max()
Hs = np.ceil(ds["wave_height"] * 2) / 2
Tp = np.ceil(ds["wave_period"] * 2) / 2
Hs_list = Hs.to_numpy().flatten().tolist()
Tp_list = Tp.to_numpy().flatten().tolist()
# empty list for result
power_list = []
cases = len(Hs_list)
count = 0
# for loop to loop through Hs and Tp pairs and get the power output and capacity factor
for Hs_ind, Tp_ind in zip(Hs_list, Tp_list):
if count % 1000000 == 0:
print(f"Case {count} of {cases}: %")
if np.isnan(Hs_ind) or np.isnan(Tp_ind):
power_list.append(0)
elif Hs_ind > 10 or Tp_ind > 18:
power_list.append(0)
else:
generated_power = power_matrix.loc[Hs_ind, Tp_ind]
power_list.append(generated_power / max_pow)
count += 1
# results list to numpy array
power_list_np = np.array(power_list)
power_list_np = power_list_np.reshape(Hs.shape)
da = xr.DataArray(
power_list_np, coords=Hs.coords, dims=Hs.dims, name="Power generated"
)
da.attrs["units"] = "kWh/kWp"
da = da.rename("specific generation")
da = da.fillna(0)
return da
# wave
def convert_wave(ds, wec_type, time_chunk_size: int = 100) -> xr.DataArray:
r"""
Convert wave height (Hs) and wave peak period (Tp) data into normalized power output
using the device-specific Wave Energy Converter (WEC) power matrix.
This function matches each combination of significant wave height and peak period
in the dataset to a corresponding power output from the WEC power matrix.
The resulting power output is normalized by the maximum possible output (capacity)
to obtain the specific generation profile.
Parameters
----------
ds : xarray.Dataset
Input dataset (cutout) containing two variables:
wave_height: significant wave height (m)
wave_period: peak wave period (s)
wec_type : dict
Dictionary defining the WEC characteristics, including:
Power_Matrix: a power matrix dictionary stored in "resources\wecgenerator"
time_chunk_size : int
Size of time chunks for processing large datasets, to limit memory spikes. Default is 100.
Returns
-------
xarray.DataArray
DataArray of specific power generation values (normalized power output).
Notes
-----
A progress message is printed every one million cases to track computation.
"""
power_matrix = (
pd.DataFrame.from_dict(wec_type["Power_Matrix"])
.stack()
.rename_axis(index=["wave_height", "wave_period"])
.where(lambda x: x > 0)
.dropna()
.to_xarray()
)
results = []
steps = np.arange(0, len(ds.time), step=100)
for step in tqdm(steps, desc="Processing wave data chunks", total=len(steps), unit="time chunk"):
ds_ = ds.isel(time=slice(step, step + time_chunk_size))
cf = power_matrix.interp(
{"wave_height": ds_.wave_height, "wave_period": ds_.wave_period},
method="nearest",
)
results.append(cf)
da = xr.concat(results, dim="time")
da.attrs["units"] = "kWh/kWp"
da = da.rename("specific generation")
da = da.fillna(0)
return da

Copy link
Copy Markdown
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ok perfect, thank you very much I will test it.

I need to make a correction on the variables used. I am going through my processes and I see that I used an extrapolated tp which is the 1/fp where fp: wave peak frequency. I did this a long time ago and kind of forgot it. I used to have this conversion in the mrel_wave.py module, but it was more convenient for me back then to replace fp with tp in the dataset and test cutout creation like that. Should I include this conversion in the module?

t01 and t02 can be used aswel, and the results will be more or less the same (approx 2% off).

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think it's fine to use either, depending on which you think is most appropriate. For comparability with the output of era5.py, if you use fp then you should convert it to wave_period (1/fp) directly in mrel.py



def wave(cutout, wec, **params):
"""
Compute wave energy generation time series for a given cutout and Wave Energy Converter (WEC) type.

Parameters
----------
cutout : atlite.Cutout
Atlite cutout object containing wave-related data (e.g., `wave_height`, `wave_period`).
wec_type : str, pathlib.Path, or dict
WEC configuration describing the device's power characteristics.

Returns
-------
xarray.DataArray
Time series of normalized wave power generation for the entire cutout area, with units of "kWh/kWp".
The dimensions and resolution follow the input cutout and aggregation parameters.

References
----------
[1] Lavidas G., Mezilis L., Alday M., Baki H., Tan J., Jain A., Engelfried T. and Raghavan V.,
Marine renewables in Energy Systems: Impacts of climate data, generators, energy policies,
opportunities, and untapped potential for 100% decarbonised systems. Energy, Volume 336, 2025,
138359, ISSN 0360-5442, https://doi.org/10.1016/j.energy.2025.138359.
"""
if isinstance(wec, str | Path):
wec = get_waveenergyconverter(wec)

return cutout.convert_and_aggregate(convert_func=convert_wave, wec=wec, **params)


def convert_irradiation(
ds,
orientation,
Expand Down
3 changes: 3 additions & 0 deletions atlite/cutout.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@
soil_temperature,
solar_thermal,
temperature,
wave,
wind,
)
from atlite.data import available_features, cutout_prepare
Expand Down Expand Up @@ -661,6 +662,8 @@ def layout_from_capacity_list(self, data, col="Capacity"):

wind = wind

wave = wave

irradiation = irradiation

pv = pv
Expand Down
10 changes: 8 additions & 2 deletions atlite/datasets/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,12 @@
atlite datasets.
"""

from atlite.datasets import era5, gebco, sarah
from atlite.datasets import cerra, era5, gebco, mrel_wave, sarah

modules = {"era5": era5, "sarah": sarah, "gebco": gebco}
modules = {
"era5": era5,
"sarah": sarah,
"mrel_wave": mrel_wave,
"cerra": cerra,
"gebco": gebco,
}
72 changes: 72 additions & 0 deletions atlite/datasets/cerra.py

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Since CERRA is available via the Climate Data Store, we should follow the same approach to data retrieval as with ERA5. You could probably just copy era5.py directly and delete all but the wind getter method, then adapt the wind getter method to match the data available from CERRA.

It might be best to drop this from this PR, though, and bring it in separately later. I can see a benefit to changing the features we bring in for wind since we can retrieve wind speed at various height/pressure levels from CDS, which would allow us to create a wind vertical profile (as we do with ERA5 data).

Copy link
Copy Markdown
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The problem with CERRA here is that there is a lot of preprocessing of data in order for atlite to be able to read it. I agree that it is best to review this later. Should I take an action on this or can you simply reject this file?

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The easiest is for you to simply delete this file (and reference to cerra elsewhere). If we want to revisit it, this file will still be in the commit history so we can bring it back if we want anything from it!

Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
# SPDX-FileCopyrightText: Contributors to atlite <https://github.com/pypsa/atlite>
#
# SPDX-License-Identifier: MIT

"""
In order to create a CERRA cutout, the data must be manually downloaded from the Climate Data Store.
The variable used is "10m wind speed" and there is not a direction component in it.
This 10m wind speed was transformed into a 100m wind speed in order to follow the rest of atlite's processes.
"""

import logging

import numpy as np
import xarray as xr
from rasterio.warp import Resampling

from atlite.gis import regrid

logger = logging.getLogger(__name__)

crs = 4326
dx = 0.05
dy = 0.05

features = {"wind": ["wnd100m", "roughness"]}


def as_slice(bounds, pad=True):
"""
Convert coordinate bounds to slice and pad by 0.01.
"""
if not isinstance(bounds, slice):
bounds = bounds + (-0.01, 0.01)
bounds = slice(*bounds)
return bounds


def get_data(cutout, feature, tmpdir, **creation_parameters):
"""
Retrieve data from a local CERRA dataset and process it.
"""
coords = cutout.coords

if "data_path" not in creation_parameters:
logger.error('Argument "data_path" not defined')
raise ValueError('Argument "data_path" not defined')
path = creation_parameters["data_path"]

ds = xr.open_dataset(path)

ds = ds.sel(x=as_slice(cutout.extent[:2]), y=as_slice(cutout.extent[2:]))
ds = ds.assign_coords(x=ds.x.astype(float).round(4), y=ds.y.astype(float).round(4))

if (cutout.dx != dx) or (cutout.dy != dy):
ds = regrid(ds, coords["x"], coords["y"], resampling=Resampling.average)

if "sr" in ds:
ds = ds.rename({"sr": "roughness"})

logger.info("Calculating 100 metre wind speed")
if "si10" in ds and "roughness" in ds:
ds["wnd100m"] = (
ds["si10"] * (np.log(100 / ds["roughness"]) / np.log(10 / ds["roughness"]))
).assign_attrs(units="m s**-1", long_name="100 metre wind speed")
ds = ds.drop_vars("si10")

ds = ds.assign_coords(x=ds.coords["x"], y=ds.coords["y"])

logger.info("Resampling to 1H.")
ds = ds.resample(time="1h").interpolate("linear")

return ds
48 changes: 48 additions & 0 deletions atlite/datasets/era5.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,7 @@ def nullcontext():
],
"temperature": ["temperature", "soil temperature", "dewpoint temperature"],
"runoff": ["runoff"],
"wave": ["wave_height", "wave_period"],
}

static_features = {"height"}
Expand Down Expand Up @@ -244,6 +245,53 @@ def sanitize_runoff(ds):
return ds


def get_data_wave_height(retrieval_params):
"""
Get wave height data for given retrieval parameters.
"""
ds = retrieve_data(
variable=[
"significant_height_of_combined_wind_waves_and_swell",
],
**retrieval_params,
)
ds = _rename_and_clean_coords(ds)
ds = ds.rename({"swh": "wave_height"})

return ds


def sanitize_wave_height(ds):
"""
Sanitize retrieved wave height data.
"""
ds["wave_height"] = ds["wave_height"].clip(min=0.0)
return ds


def get_data_wave_period(retrieval_params):
"""
Get wave period data for given retrieval parameters.
"""
ds = retrieve_data(
variable=["peak_wave_period"],
**retrieval_params,
)

ds = _rename_and_clean_coords(ds)
ds = ds.rename({"pp1d": "wave_period"})

return ds


def sanitize_wave_period(ds):
"""
Sanitize retrieved wave period data.
"""
ds["wave_period"] = ds["wave_period"].clip(min=0.0)
return ds


def get_data_height(retrieval_params):
"""
Get height data for given retrieval parameters.
Expand Down
Loading