-
Notifications
You must be signed in to change notification settings - Fork 152
Update CMORizer CLARA-AVHRR (A3) #4463
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
f3e07d4
15138ac
e4d66ba
7aac726
cec2a1c
0ca5a49
1c7f175
f1c7ad2
bf0e8fa
e75ab5c
90b05f5
392729a
39e9f4b
fc08aec
aa1b637
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,56 @@ | ||
| # Common global attributes for Cmorizer output | ||
| attributes: | ||
| dataset_id: CLARA-AVHRR | ||
| version: A3 | ||
| tier: 2 | ||
| type: sat | ||
| project_id: OBS6 | ||
| source: "https://cds.climate.copernicus.eu/datasets/satellite-cloud-properties" | ||
| reference: "clara-a3" | ||
| comment: "" | ||
|
|
||
| variables: | ||
| # monthly means | ||
| clivi_month: | ||
| mip: Amon | ||
| short_name: clivi | ||
| raw: iwp_allsky | ||
| filename: IWPmm{year}*.nc | ||
| clt_month: | ||
| mip: Amon | ||
| short_name: clt | ||
| raw: cfc | ||
| filename: CFCmm{year}*.nc | ||
| lwp_month: | ||
| mip: Amon | ||
| short_name: lwp | ||
| raw: lwp_allsky | ||
| filename: LWPmm{year}*.nc | ||
| clwvi_month: | ||
| mip: Amon | ||
| short_name: clwvi | ||
| raw: ['lwp_allsky', 'iwp_allsky'] | ||
| filename: ['LWPmm{year}*.nc', 'IWPmm{year}*.nc'] | ||
| operator: sum | ||
| # daily means | ||
| clivi_day: | ||
| mip: CFday | ||
| short_name: clivi | ||
| raw: iwp_allsky | ||
| filename: IWPdm{year}{month}*.nc | ||
| clt_day: | ||
| mip: CFday | ||
| short_name: clt | ||
| raw: cfc | ||
| filename: CFCdm{year}{month}*.nc | ||
| lwp_day: | ||
| mip: CFday | ||
| short_name: lwp | ||
| raw: lwp_allsky | ||
| filename: LWPdm{year}{month}*.nc | ||
| clwvi_day: | ||
| mip: CFday | ||
| short_name: clwvi | ||
| raw: ['lwp_allsky', 'iwp_allsky'] | ||
| filename: ['LWPdm{year}{month}*.nc', 'IWPdm{year}{month}*.nc'] | ||
| operator: sum |
| Original file line number | Diff line number | Diff line change | ||||||||||||
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| @@ -0,0 +1,181 @@ | ||||||||||||||
| """Script to download CM SAF CLARA-AHRR data from the CDS.""" | ||||||||||||||
|
|
||||||||||||||
| import datetime | ||||||||||||||
| import gzip | ||||||||||||||
| import logging | ||||||||||||||
| import shutil | ||||||||||||||
| import zipfile | ||||||||||||||
| from pathlib import Path | ||||||||||||||
|
|
||||||||||||||
| import cdsapi | ||||||||||||||
|
|
||||||||||||||
| logger = logging.getLogger(__name__) | ||||||||||||||
|
|
||||||||||||||
|
|
||||||||||||||
| def download_dataset( | ||||||||||||||
| original_data_dir, | ||||||||||||||
| dataset, | ||||||||||||||
| dataset_info, | ||||||||||||||
| start_date, | ||||||||||||||
| end_date, | ||||||||||||||
| overwrite, | ||||||||||||||
| ): | ||||||||||||||
| """Download dataset. | ||||||||||||||
|
|
||||||||||||||
| Parameters | ||||||||||||||
| ---------- | ||||||||||||||
| original_data_dir : Path | ||||||||||||||
| Directory where original data will be stored. | ||||||||||||||
| dataset : str | ||||||||||||||
| Name of the dataset | ||||||||||||||
| dataset_info : dict | ||||||||||||||
| Dataset information from the datasets.yml file | ||||||||||||||
| start_date : datetime | ||||||||||||||
| Start of the interval to download | ||||||||||||||
| end_date : datetime | ||||||||||||||
| End of the interval to download | ||||||||||||||
| overwrite : bool | ||||||||||||||
| Overwrite already downloaded files | ||||||||||||||
| """ | ||||||||||||||
| cds_url = "https://cds.climate.copernicus.eu/api" | ||||||||||||||
|
|
||||||||||||||
| raw_obs_dir = original_data_dir | ||||||||||||||
| output_folder = raw_obs_dir / f"Tier{dataset_info['tier']}" / dataset | ||||||||||||||
| output_folder.mkdir(parents=True, exist_ok=True) | ||||||||||||||
|
|
||||||||||||||
| # Note: As of May 2026, CLARA-AVHRR (version A3) data on the CDS are only | ||||||||||||||
| # available until 2020. Alternatively, CLARA-AVHRR data could also | ||||||||||||||
| # be obtained from the EUMETSAT data store | ||||||||||||||
| # (https://data.eumetsat.int/). | ||||||||||||||
|
|
||||||||||||||
| if start_date is None: | ||||||||||||||
| start_date_mm = datetime.datetime(1979, 1, 1, tzinfo=datetime.UTC) | ||||||||||||||
| start_date_dd = datetime.datetime(2020, 1, 1, tzinfo=datetime.UTC) | ||||||||||||||
| else: | ||||||||||||||
| start_date_mm = start_date | ||||||||||||||
| start_date_dd = start_date | ||||||||||||||
|
|
||||||||||||||
| if end_date is None: | ||||||||||||||
| end_date_mm = datetime.datetime(2020, 12, 31, tzinfo=datetime.UTC) | ||||||||||||||
| end_date_dd = datetime.datetime(2020, 12, 31, tzinfo=datetime.UTC) | ||||||||||||||
| else: | ||||||||||||||
| end_date_mm = end_date | ||||||||||||||
| end_date_dd = end_date | ||||||||||||||
|
|
||||||||||||||
| requests = {} | ||||||||||||||
|
|
||||||||||||||
| # The CDS requests for daily values are done for each month separately | ||||||||||||||
| # to avoid the error "cost limits exceeded". | ||||||||||||||
|
|
||||||||||||||
| for year in range(start_date_mm.year, end_date_mm.year + 1): | ||||||||||||||
| requests.update( | ||||||||||||||
| { | ||||||||||||||
| "clivi_monthly_" + str(year): { | ||||||||||||||
| "product_family": "clara_a3", | ||||||||||||||
| "origin": "eumetsat", | ||||||||||||||
| "variable": "cloud_physical_properties_of_the_ice_phase", | ||||||||||||||
| "climate_data_record_type": "thematic_climate_data_record", | ||||||||||||||
| "time_aggregation": "monthly_mean", | ||||||||||||||
| "year": str(year), | ||||||||||||||
| "month": [f"{m:02d}" for m in range(1, 13)], | ||||||||||||||
| }, | ||||||||||||||
| "clt_monthly_" + str(year): { | ||||||||||||||
| "product_family": "clara_a3", | ||||||||||||||
| "origin": "eumetsat", | ||||||||||||||
| "variable": "cloud_fraction", | ||||||||||||||
| "climate_data_record_type": "thematic_climate_data_record", | ||||||||||||||
| "time_aggregation": "monthly_mean", | ||||||||||||||
| "year": str(year), | ||||||||||||||
| "month": [f"{m:02d}" for m in range(1, 13)], | ||||||||||||||
| }, | ||||||||||||||
| "lwp_monthly_" + str(year): { | ||||||||||||||
| "product_family": "clara_a3", | ||||||||||||||
| "origin": "eumetsat", | ||||||||||||||
| "variable": "cloud_physical_properties_of_the_liquid_phase", | ||||||||||||||
| "climate_data_record_type": "thematic_climate_data_record", | ||||||||||||||
| "time_aggregation": "monthly_mean", | ||||||||||||||
| "year": str(year), | ||||||||||||||
| "month": [f"{m:02d}" for m in range(1, 13)], | ||||||||||||||
| }, | ||||||||||||||
| }, | ||||||||||||||
| ) | ||||||||||||||
|
|
||||||||||||||
| for year in range(start_date_dd.year, end_date_dd.year + 1): | ||||||||||||||
| for month in range(1, 13): | ||||||||||||||
| requests.update( | ||||||||||||||
| { | ||||||||||||||
| "clivi_daily_" + str(year) + f"{month:02d}": { | ||||||||||||||
| "product_family": "clara_a3", | ||||||||||||||
| "origin": "eumetsat", | ||||||||||||||
| "variable": "cloud_physical_properties_of_the_ice_phase", | ||||||||||||||
| "climate_data_record_type": "thematic_climate_data_record", | ||||||||||||||
| "time_aggregation": "daily_mean", | ||||||||||||||
| "year": str(year), | ||||||||||||||
| "month": f"{month:02d}", | ||||||||||||||
| "day": [f"{m:02d}" for m in range(1, 32)], | ||||||||||||||
| }, | ||||||||||||||
| "clt_daily_" + str(year) + f"{month:02d}": { | ||||||||||||||
| "product_family": "clara_a3", | ||||||||||||||
| "origin": "eumetsat", | ||||||||||||||
| "variable": "cloud_fraction", | ||||||||||||||
| "climate_data_record_type": "thematic_climate_data_record", | ||||||||||||||
| "time_aggregation": "daily_mean", | ||||||||||||||
| "year": str(year), | ||||||||||||||
| "month": f"{month:02d}", | ||||||||||||||
| "day": [f"{m:02d}" for m in range(1, 32)], | ||||||||||||||
| }, | ||||||||||||||
| "lwp_daily_" + str(year) + f"{month:02d}": { | ||||||||||||||
| "product_family": "clara_a3", | ||||||||||||||
| "origin": "eumetsat", | ||||||||||||||
| "variable": "cloud_physical_properties_of_the_liquid_phase", | ||||||||||||||
| "climate_data_record_type": "thematic_climate_data_record", | ||||||||||||||
| "time_aggregation": "daily_mean", | ||||||||||||||
| "year": str(year), | ||||||||||||||
| "month": f"{month:02d}", | ||||||||||||||
| "day": [f"{m:02d}" for m in range(1, 32)], | ||||||||||||||
| }, | ||||||||||||||
| }, | ||||||||||||||
| ) | ||||||||||||||
|
|
||||||||||||||
| cds_client = cdsapi.Client(cds_url) | ||||||||||||||
|
|
||||||||||||||
| for var_name, request in requests.items(): | ||||||||||||||
| datestr = var_name.split("_")[2] | ||||||||||||||
| if "daily" in var_name: | ||||||||||||||
| outdir = output_folder / f"daily/{datestr}/" | ||||||||||||||
| else: | ||||||||||||||
| outdir = output_folder / f"monthly/{datestr}/" | ||||||||||||||
| outdir.mkdir(parents=True, exist_ok=True) | ||||||||||||||
|
|
||||||||||||||
| logger.info("Downloading %s data to %s", var_name, outdir) | ||||||||||||||
|
|
||||||||||||||
| file_path = outdir / f"{var_name}.gz" | ||||||||||||||
|
|
||||||||||||||
| if file_path.exists() and not overwrite: | ||||||||||||||
| logger.info( | ||||||||||||||
| "File %s already exists. Skipping download.", | ||||||||||||||
| file_path, | ||||||||||||||
| ) | ||||||||||||||
| continue | ||||||||||||||
|
|
||||||||||||||
| try: | ||||||||||||||
| cds_client.retrieve( | ||||||||||||||
| "satellite-cloud-properties", | ||||||||||||||
| request, | ||||||||||||||
| file_path.as_posix(), | ||||||||||||||
| ) | ||||||||||||||
| # Handle both .gz and .zip files | ||||||||||||||
| with Path(file_path).open("rb") as file: | ||||||||||||||
| magic = file.read(2) | ||||||||||||||
|
|
||||||||||||||
| if magic == b"PK": # ZIP file signature | ||||||||||||||
| logger.info("Detected ZIP file: %s", file_path) | ||||||||||||||
| with zipfile.ZipFile(file_path, "r") as zip_ref: | ||||||||||||||
| zip_ref.extractall(outdir) | ||||||||||||||
| else: | ||||||||||||||
| logger.info("Detected GZIP file: %s", file_path) | ||||||||||||||
| with gzip.open(file_path, "rb") as f_in: | ||||||||||||||
| with Path(outdir / file_path.stem).open("rb") as f_out: | ||||||||||||||
|
Comment on lines
+177
to
+178
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
|
||||||||||||||
| shutil.copyfileobj(f_in, f_out) | ||||||||||||||
| except Exception as ex: | ||||||||||||||
| logger.info("%s: no data downloaded for %s", type(ex), var_name) | ||||||||||||||
|
Comment on lines
+180
to
+181
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This will more or less silently skip missing variables. Is this desirable? Can the dataset be used if files are missing? If not, I would remove this
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I agree with Manu - perhaps do a check on the required variables, and if at least one is missing, then raise an exception than mask via try/except |
||||||||||||||
Uh oh!
There was an error while loading. Please reload this page.