From 2ca9c377da40409804f52f4c50a0ea858582db79 Mon Sep 17 00:00:00 2001 From: James Collins Date: Wed, 15 May 2024 14:25:05 -0700 Subject: [PATCH 01/71] Work out a bunch of stuff --- src/climate_downscale/data.py | 4 + src/climate_downscale/extract/elevation.py | 29 +++--- .../extract/ncei_climate_stations.py | 10 +- .../model/prepare_predictors.py | 94 +++++++++++++++++++ .../model/prepare_training_data.py | 24 +++++ 5 files changed, 144 insertions(+), 17 deletions(-) create mode 100644 src/climate_downscale/model/prepare_predictors.py create mode 100644 src/climate_downscale/model/prepare_training_data.py diff --git a/src/climate_downscale/data.py b/src/climate_downscale/data.py index 609baea..60c6ec0 100644 --- a/src/climate_downscale/data.py +++ b/src/climate_downscale/data.py @@ -32,6 +32,10 @@ def ncei_climate_stations(self) -> Path: def srtm_elevation_gl1(self) -> Path: return self.extracted_data / "srtm_elevation_gl1" + @property + def open_topography_elevation(self) -> Path: + return self.extracted_data / "open_topography_elevation" + @property def rub_local_climate_zones(self) -> Path: return self.extracted_data / "rub_local_climate_zones" diff --git a/src/climate_downscale/extract/elevation.py b/src/climate_downscale/extract/elevation.py index 261c172..3c42845 100644 --- a/src/climate_downscale/extract/elevation.py +++ b/src/climate_downscale/extract/elevation.py @@ -7,21 +7,22 @@ with_output_directory, with_queue, ) +import tqdm from climate_downscale.data import DEFAULT_ROOT, ClimateDownscaleData API_ENDPOINT = "https://portal.opentopography.org/API/globaldem" ELEVATION_MODELS = [ - "SRTMGL3", # SRTM Global 3 arc second (90m) - "SRTMGL1", # SRTM Global 1 arc second (30m) - "SRTMGL1_E", # SRTM Global 1 arc second ellipsoidal height (30m) - "AW3D30", # ALOS World 3D 30m - "AW3D30_E", # ALOS World 3D 30m ellipsoidal height + "SRTMGL3", # SRTM Global 3 arc second (90m) + "SRTMGL1", # SRTM Global 1 arc second (30m) + "SRTMGL1_E", # SRTM Global 1 arc second ellipsoidal height (30m) + "AW3D30", # ALOS World 3D 30m + "AW3D30_E", # ALOS World 3D 30m ellipsoidal height "SRTM15Plus", # SRTM 15 arc second (500m) - "NASADEM", # NASA DEM 1 arc second (30m) - "COP30", # Copernicus 1 arc second (30m) - "COP90", # Copernicus 3 arc second (90m) + "NASADEM", # NASA DEM 1 arc second (30m) + "COP30", # Copernicus 1 arc second (30m) + "COP90", # Copernicus 3 arc second (90m) ] FETCH_SIZE = 5 # degrees, should be small enough for any model @@ -38,7 +39,7 @@ def extract_elevation_main( key = cred_path.read_text().strip() params: dict[str, int | str] = { - "dem_type": model_name, + "demtype": model_name, "south": lat_start, "north": lat_start + FETCH_SIZE, "west": lon_start, @@ -47,12 +48,12 @@ def extract_elevation_main( "API_Key": key, } - response = requests.get(API_ENDPOINT, params=params, stream=True, timeout=10) + response = requests.get(API_ENDPOINT, params=params, stream=True, timeout=30) response.raise_for_status() - out_path = cd_data.srtm_elevation_gl1 / f"{model_name}_{lat_start}_{lon_start}.tif" + out_path = cd_data.open_topography_elevation / f"{model_name}_{lat_start}_{lon_start}.tif" with out_path.open("wb") as fp: - for chunk in response.iter_content(chunk_size=None): + for chunk in tqdm.tqdm(response.iter_content(chunk_size=64 * 1024**2)): fp.write(chunk) @@ -72,8 +73,8 @@ def extract_elevation_main( ) @click.option( "--lon-start", - required=int, - type=float, + required=True, + type=int, help="Longitude of the top-left corner of the tile.", ) def extract_elevation_task( diff --git a/src/climate_downscale/extract/ncei_climate_stations.py b/src/climate_downscale/extract/ncei_climate_stations.py index 1ecd096..43adeb0 100644 --- a/src/climate_downscale/extract/ncei_climate_stations.py +++ b/src/climate_downscale/extract/ncei_climate_stations.py @@ -29,10 +29,13 @@ def extract_ncei_climate_stations_main(output_dir: str | Path, year: str) -> Non shutil.unpack_archive(str(gz_path), year_dir) data = pd.concat([pd.read_csv(f) for f in year_dir.glob("*.csv")]) + data['STATION'] = data['STATION'].astype(str) out_path = cd_data.ncei_climate_stations / f"{year}.parquet" - touch(out_path) data.to_parquet(out_path) + gz_path.unlink() + shutil.rmtree(year_dir) + @click.command() # type: ignore[arg-type] @with_choice( @@ -52,9 +55,9 @@ def extract_ncei_climate_stations_task(output_dir: str, year: str) -> None: @with_queue() def extract_ncei_climate_stations(output_dir: str, queue: str) -> None: jobmon.run_parallel( - "extract_ncei_climate_stations", + "extract ncei", node_args={ - "output_dir": [output_dir], + "output-dir": [output_dir], "year": EXTRACTION_YEARS, }, task_resources={ @@ -66,3 +69,4 @@ def extract_ncei_climate_stations(output_dir: str, queue: str) -> None: }, runner="cdtask", ) + diff --git a/src/climate_downscale/model/prepare_predictors.py b/src/climate_downscale/model/prepare_predictors.py new file mode 100644 index 0000000..910c572 --- /dev/null +++ b/src/climate_downscale/model/prepare_predictors.py @@ -0,0 +1,94 @@ +import geopandas as gpd +import pandas as pd +import xarray as xr +import tqdm +import matplotlib.pyplot as plt +import rasterra as rt +from affine import Affine +import numpy as np +from pathlib import Path + +def to_raster(ds, nodata, lat_col='lat', lon_col='lon'): + lat, lon = ds[lat_col].data, ds[lon_col].data + + dlat = (lat[1:] - lat[:-1]).mean() + dlon = (lon[1:] - lon[:-1]).mean() + + transform = Affine( + a=dlon, + b=0., + c=lon[0], + d=0., + e=-dlat, + f=lat[-1], + ) + raster = rt.RasterArray( + data = ds.data, + transform=transform, + crs='EPSG:4326', + no_data_value=nodata, + ) + return raster + +def make_template(x_min, y_min, stride, resolution): + evenly_divides = ( + (stride % resolution < 1e-12) + or (resolution - stride % resolution < 1e-12) + ) + assert evenly_divides + + transform = Affine( + a=resolution, + b=0, + c=x_min, + d=0, + e=-resolution, + f=y_min + stride, + ) + + n_pix = int(stride / resolution) + + data = np.zeros((n_pix, n_pix), dtype=np.int8) + template = rt.RasterArray( + data, + transform, + crs='EPSG:4326', + no_data_value=-1, + ) + return template + + +STRIDE = 30 # degrees +PAD = 1 +lat_start = 0 +lon_start = 0 + +longitudes = range(lon_start - PAD, lon_start + STRIDE + PAD) +latitudes = range(lat_start - PAD, lat_start + STRIDE + PAD) + +template_era5 = make_template(x_min=lon_start, y_min=lat_start, stride=STRIDE, resolution=0.1) +template_target = make_template(x_min=lon_start, y_min=lat_start, stride=STRIDE, resolution=0.01) + +root = Path("/mnt/share/erf/climate_downscale/extracted_data/open_topography_elevation/SRTM_GL3_srtm") +paths = [] +for lon in longitudes: + lon_stub = f"E{lon:03}" if lon >= 0 else f"W{-lon:03}" + + for lat in range(lat_start, lat_start+STRIDE): + if lat >= 30: + rel_path = f"North/North_30_60/N{lat:02}{lon_stub}.tif" + elif lat >=0: + rel_path = f"North/North_0_29/N{lat:02}{lon_stub}.tif" + else: + rel_path = f"South/S{-lat:02}{lon_stub}.tif" + + p = root / rel_path + + if p.exists(): + paths.append(p) + +elevation = rt.load_mf_raster(paths) + +elevation_target = elevation.resample_to(template_target, resampling='average') +elevation_era5 = elevation.resample_to(template_era5, resampling='average').resample_to(template_target, resampling='nearest') +elevation_anomaly = elevation_era5 - elevation_target \ No newline at end of file diff --git a/src/climate_downscale/model/prepare_training_data.py b/src/climate_downscale/model/prepare_training_data.py new file mode 100644 index 0000000..6479c7c --- /dev/null +++ b/src/climate_downscale/model/prepare_training_data.py @@ -0,0 +1,24 @@ +year = 2023 + +climate_stations = pd.read_parquet(f'/mnt/share/erf/climate_downscale/extracted_data/ncei_climate_stations/{year}.parquet') +column_map = { + "DATE": "date", + "LATITUDE": "lat", + "LONGITUDE": "lon", + "TEMP": "temperature", +} +climate_stations = climate_stations.rename(columns=column_map).loc[:, list(column_map.values())].dropna() +climate_stations['date'] = pd.to_datetime(climate_stations['date']) +climate_stations['year'] = climate_stations['date'].dt.year +climate_stations['dayofyear'] = climate_stations['date'].dt.dayofyear +climate_stations['temperature'] = 5/9 * (climate_stations['temperature'] - 32) +climate_stations.loc[climate_stations.lon < 0, 'lon'] +=360 + +era5 = xr.load_dataset(f'/mnt/share/erf/climate_downscale/extracted_data/era5_temperature_daily_mean/{year}_era5_temp_daily.nc') +lat = xr.DataArray(climate_stations['lat'].values, dims=['points']) +lon = xr.DataArray(climate_stations['lon'].values, dims=['points']) +time = xr.DataArray(climate_stations['date'].values, dims=['points']) +arr = era5.sel(latitude=lat, longitude=lon, time=time, method='nearest') +if "expver" in arr.coords: + arr = arr.sel(expver=1).combine_first(arr.sel(expver=5)) +climate_stations['era5_temperature'] = arr['t2m'].to_numpy() + 273.15 \ No newline at end of file From 83d5136bfebfac063b3ca6331f9c07c5b36b650c Mon Sep 17 00:00:00 2001 From: collijk Date: Wed, 15 May 2024 15:41:29 -0700 Subject: [PATCH 02/71] Add all boilerplate for command --- poetry.lock | 20 +- pyproject.toml | 5 +- src/climate_downscale/cli.py | 4 +- src/climate_downscale/data.py | 51 +++++ src/climate_downscale/model/__init__.py | 12 ++ .../model/prepare_predictors.py | 192 ++++++++++-------- .../model/prepare_training_data.py | 37 ++-- src/climate_downscale/utils.py | 69 +++++++ 8 files changed, 286 insertions(+), 104 deletions(-) create mode 100644 src/climate_downscale/model/__init__.py create mode 100644 src/climate_downscale/utils.py diff --git a/poetry.lock b/poetry.lock index d723c9b..e003253 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1969,6 +1969,7 @@ files = [ {file = "PyYAML-6.0.1-cp311-cp311-win_amd64.whl", hash = "sha256:bf07ee2fef7014951eeb99f56f39c9bb4af143d8aa3c21b1677805985307da34"}, {file = "PyYAML-6.0.1-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:855fb52b0dc35af121542a76b9a84f8d1cd886ea97c84703eaa6d88e37a2ad28"}, {file = "PyYAML-6.0.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:40df9b996c2b73138957fe23a16a4f0ba614f4c0efce1e9406a184b6d07fa3a9"}, + {file = "PyYAML-6.0.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a08c6f0fe150303c1c6b71ebcd7213c2858041a7e01975da3a99aed1e7a378ef"}, {file = "PyYAML-6.0.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6c22bec3fbe2524cde73d7ada88f6566758a8f7227bfbf93a408a9d86bcc12a0"}, {file = "PyYAML-6.0.1-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:8d4e9c88387b0f5c7d5f281e55304de64cf7f9c0021a3525bd3b1c542da3b0e4"}, {file = "PyYAML-6.0.1-cp312-cp312-win32.whl", hash = "sha256:d483d2cdf104e7c9fa60c544d92981f12ad66a457afae824d146093b8c294c54"}, @@ -2068,13 +2069,13 @@ test = ["boto3 (>=1.2.4)", "hypothesis", "packaging", "pytest (>=2.8.2)", "pytes [[package]] name = "rasterra" -version = "0.5.9" +version = "0.5.10" description = "A sleek, object-oriented interface designed for intuitive raster data manipulation in Python." optional = false python-versions = "<3.13,>=3.10" files = [ - {file = "rasterra-0.5.9-py3-none-any.whl", hash = "sha256:1ef3c0e36564574f870cb919553087a8f46e761c4a87da9c385dd0fa4223293f"}, - {file = "rasterra-0.5.9.tar.gz", hash = "sha256:ebe5f16df392aa2da8dc5214a70eeabb8679d33429aae877128fc299b31cede5"}, + {file = "rasterra-0.5.10-py3-none-any.whl", hash = "sha256:9d281f98e4cb6375a12a7b09cc61dcb8a07c53ecab12e62c7147254ae04307b6"}, + {file = "rasterra-0.5.10.tar.gz", hash = "sha256:6080156dc8395c7ba427d518ad5d8cbc4fe583ca372b281819807a4d0aacd543"}, ] [package.dependencies] @@ -2530,6 +2531,17 @@ files = [ [package.dependencies] urllib3 = ">=2" +[[package]] +name = "types-tqdm" +version = "4.66.0.20240417" +description = "Typing stubs for tqdm" +optional = false +python-versions = ">=3.8" +files = [ + {file = "types-tqdm-4.66.0.20240417.tar.gz", hash = "sha256:16dce9ef522ea8d40e4f5b8d84dd8a1166eefc13ceee7a7e158bf0f1a1421a31"}, + {file = "types_tqdm-4.66.0.20240417-py3-none-any.whl", hash = "sha256:248aef1f9986b7b8c2c12b3cb4399fc17dba0a29e7e3f3f9cd704babb879383d"}, +] + [[package]] name = "typing-extensions" version = "4.11.0" @@ -2671,4 +2683,4 @@ viz = ["matplotlib", "nc-time-axis", "seaborn"] [metadata] lock-version = "2.0" python-versions = ">=3.10, <3.13" -content-hash = "3682b1d3e4c4deae125e670f718deb49af4686d36d38fe4d38e18da86f49100c" +content-hash = "7dcea9087ab369daf1a2661541b1add72c879d396fda8b2115a24c999356a822" diff --git a/pyproject.toml b/pyproject.toml index 0e4ff6f..2745a11 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -34,7 +34,7 @@ python = ">=3.10, <3.13" click = "*" numpy = "^1.26.4" pandas = "^2.2.2" -rasterra = "^0.5.9" +rasterra = "^0.5.10" shapely = "^2.0.4" geopandas = "^0.14.4" xarray = "^2024.3.0" @@ -45,6 +45,7 @@ rra-tools = "^1.0.6" netcdf4 = "^1.6.5" pyarrow = "^16.0.0" types-requests = "^2.31.0.20240406" +types-tqdm = "^4.66.0.20240417" [tool.poetry.group.dev.dependencies] mkdocstrings = {version = ">=0.23", extras = ["python"]} @@ -90,6 +91,7 @@ ignore = [ "RUF007", # zip is idiomatic, this is a dumb check "RET505", # Else after return, makes a lot of false positives "E501", # Line too long, this is autoformatted + "PYI041", # Use float instead of int | float; dumb rule ] [tool.ruff.lint.per-file-ignores] @@ -142,6 +144,7 @@ exclude = [ [[tool.mypy.overrides]] module = [ "cdsapi.*", + "affine.*", ] ignore_missing_imports = true diff --git a/src/climate_downscale/cli.py b/src/climate_downscale/cli.py index 28a009e..b962d54 100644 --- a/src/climate_downscale/cli.py +++ b/src/climate_downscale/cli.py @@ -1,6 +1,6 @@ import click -from climate_downscale import extract +from climate_downscale import extract, model @click.group() @@ -13,7 +13,7 @@ def cdtask() -> None: """Entry point for running climate downscale tasks.""" -for module in [extract]: +for module in [extract, model]: runners = getattr(module, "RUNNERS", {}) task_runners = getattr(module, "TASK_RUNNERS", {}) diff --git a/src/climate_downscale/data.py b/src/climate_downscale/data.py index 60c6ec0..06f9db5 100644 --- a/src/climate_downscale/data.py +++ b/src/climate_downscale/data.py @@ -1,4 +1,7 @@ from pathlib import Path +from typing import Any + +import rasterra as rt DEFAULT_ROOT = "/mnt/share/erf/climate_downscale/" @@ -39,3 +42,51 @@ def open_topography_elevation(self) -> Path: @property def rub_local_climate_zones(self) -> Path: return self.extracted_data / "rub_local_climate_zones" + + @property + def model(self) -> Path: + return self.root / "model" + + @property + def predictors(self) -> Path: + return self.model / "predictors" + + def save_predictor(self, predictor: rt.RasterArray, name: str) -> None: + save_raster(predictor, self.predictors / f"{name}.tif") + + def load_predictor(self, name: str) -> rt.RasterArray: + return rt.load_raster(self.predictors / f"{name}.tif") + + +def save_raster( + raster: rt.RasterArray, + output_path: str | Path, + num_cores: int = 1, + **kwargs: Any, +) -> None: + """Save a raster to a file with standard parameters.""" + save_params = { + "tiled": True, + "blockxsize": 512, + "blockysize": 512, + "compress": "ZSTD", + "predictor": 2, # horizontal differencing + "num_threads": num_cores, + "bigtiff": "yes", + **kwargs, + } + raster.to_file(output_path, **save_params) + + +def save_raster_to_cog( + raster: rt.RasterArray, + output_path: str | Path, + num_cores: int = 1, + resampling: str = "nearest", +) -> None: + """Save a raster to a COG file.""" + cog_save_params = { + "driver": "COG", + "overview_resampling": resampling, + } + save_raster(raster, output_path, num_cores, **cog_save_params) diff --git a/src/climate_downscale/model/__init__.py b/src/climate_downscale/model/__init__.py new file mode 100644 index 0000000..81a96c6 --- /dev/null +++ b/src/climate_downscale/model/__init__.py @@ -0,0 +1,12 @@ +from climate_downscale.model.prepare_predictors import ( + prepare_predictors, + prepare_predictors_task, +) + +RUNNERS = { + "prepare_predictors": prepare_predictors, +} + +TASK_RUNNERS = { + "prepare_predictors": prepare_predictors_task, +} diff --git a/src/climate_downscale/model/prepare_predictors.py b/src/climate_downscale/model/prepare_predictors.py index 910c572..4cd20dc 100644 --- a/src/climate_downscale/model/prepare_predictors.py +++ b/src/climate_downscale/model/prepare_predictors.py @@ -1,94 +1,118 @@ -import geopandas as gpd -import pandas as pd -import xarray as xr -import tqdm -import matplotlib.pyplot as plt -import rasterra as rt -from affine import Affine -import numpy as np +from collections.abc import Sequence from pathlib import Path -def to_raster(ds, nodata, lat_col='lat', lon_col='lon'): - lat, lon = ds[lat_col].data, ds[lon_col].data +import click +import rasterra as rt +from rra_tools import jobmon +from rra_tools.cli_tools import ( + with_choice, + with_output_directory, + with_queue, +) - dlat = (lat[1:] - lat[:-1]).mean() - dlon = (lon[1:] - lon[:-1]).mean() +from climate_downscale.data import DEFAULT_ROOT, ClimateDownscaleData +from climate_downscale.utils import make_raster_template - transform = Affine( - a=dlon, - b=0., - c=lon[0], - d=0., - e=-dlat, - f=lat[-1], - ) - raster = rt.RasterArray( - data = ds.data, - transform=transform, - crs='EPSG:4326', - no_data_value=nodata, +# Degrees + +STRIDE = 30 +LATITUDES = [str(lat) for lat in range(-90, 90, STRIDE)] +LONGITUDES = [str(lon) for lon in range(-180, 180, STRIDE)] +PAD = 1 + + +def load_elevation( + cd_data: ClimateDownscaleData, + latitudes: Sequence[int], + longitudes: Sequence[int], +) -> rt.RasterArray: + data_root = cd_data.open_topography_elevation / "SRTM_GL3_srtm" + paths = [] + for lon in longitudes: + lon_stub = f"E{lon:03}" if lon >= 0 else f"W{-lon:03}" + for lat in latitudes: + if lat >= 30: # noqa: PLR2004 + rel_path = f"North/North_30_60/N{lat:02}{lon_stub}.tif" + elif lat >= 0: + rel_path = f"North/North_0_29/N{lat:02}{lon_stub}.tif" + else: + rel_path = f"South/S{-lat:02}{lon_stub}.tif" + + p = data_root / rel_path + + if p.exists(): + paths.append(p) + + return rt.load_mf_raster(paths) + + +def prepare_predictors_main( + output_dir: str | Path, lat_start: int, lon_start: int +) -> None: + cd_data = ClimateDownscaleData(output_dir) + predictors = {} + + longitudes = range(lon_start - PAD, lon_start + STRIDE + PAD) + latitudes = range(lat_start - PAD, lat_start + STRIDE + PAD) + + # Make upscale templates, one at ERA5 resolution and one at the target + # resolution for the predictors + template_era5 = make_raster_template( + x_min=lon_start, + y_min=lat_start, + stride=STRIDE, + resolution=0.1, ) - return raster - -def make_template(x_min, y_min, stride, resolution): - evenly_divides = ( - (stride % resolution < 1e-12) - or (resolution - stride % resolution < 1e-12) + template_target = make_raster_template( + x_min=lon_start, + y_min=lat_start, + stride=STRIDE, + resolution=0.01, ) - assert evenly_divides - - transform = Affine( - a=resolution, - b=0, - c=x_min, - d=0, - e=-resolution, - f=y_min + stride, + + elevation = load_elevation(cd_data, latitudes, longitudes) + + predictors["elevation_target"] = elevation.resample_to( + template_target, resampling="average" ) - - n_pix = int(stride / resolution) - - data = np.zeros((n_pix, n_pix), dtype=np.int8) - template = rt.RasterArray( - data, - transform, - crs='EPSG:4326', - no_data_value=-1, + predictors["elevation_era5"] = elevation.resample_to( + template_era5, resampling="average" + ).resample_to(template_target, resampling="nearest") + predictors["elevation_anomaly"] = ( + predictors["elevation_era5"] - predictors["elevation_target"] ) - return template + for name, predictor in predictors.items(): + cd_data.save_predictor(predictor, f"{name}_{lat_start}_{lon_start}") -STRIDE = 30 # degrees -PAD = 1 -lat_start = 0 -lon_start = 0 - -longitudes = range(lon_start - PAD, lon_start + STRIDE + PAD) -latitudes = range(lat_start - PAD, lat_start + STRIDE + PAD) - -template_era5 = make_template(x_min=lon_start, y_min=lat_start, stride=STRIDE, resolution=0.1) -template_target = make_template(x_min=lon_start, y_min=lat_start, stride=STRIDE, resolution=0.01) - -root = Path("/mnt/share/erf/climate_downscale/extracted_data/open_topography_elevation/SRTM_GL3_srtm") -paths = [] -for lon in longitudes: - lon_stub = f"E{lon:03}" if lon >= 0 else f"W{-lon:03}" - - for lat in range(lat_start, lat_start+STRIDE): - if lat >= 30: - rel_path = f"North/North_30_60/N{lat:02}{lon_stub}.tif" - elif lat >=0: - rel_path = f"North/North_0_29/N{lat:02}{lon_stub}.tif" - else: - rel_path = f"South/S{-lat:02}{lon_stub}.tif" - - p = root / rel_path - - if p.exists(): - paths.append(p) - -elevation = rt.load_mf_raster(paths) - -elevation_target = elevation.resample_to(template_target, resampling='average') -elevation_era5 = elevation.resample_to(template_era5, resampling='average').resample_to(template_target, resampling='nearest') -elevation_anomaly = elevation_era5 - elevation_target \ No newline at end of file + +@click.command() # type: ignore[arg-type] +@with_choice("lat_start", allow_all=True, choices=LATITUDES) +@with_choice("lon_start", allow_all=True, choices=LONGITUDES) +@with_output_directory(DEFAULT_ROOT) +def prepare_predictors_task( + lat_start: int, lon_start: int, output_dir: str | Path +) -> None: + prepare_predictors_main(output_dir, lat_start, lon_start) + + +@click.command() # type: ignore[arg-type] +@with_output_directory(DEFAULT_ROOT) +@with_queue() +def prepare_predictors(output_dir: str, queue: str) -> None: + jobmon.run_parallel( + "model prepare_predictors", + node_args={ + "output-dir": [output_dir], + "lat_start": LATITUDES, + "lon_start": LONGITUDES, + }, + task_resources={ + "queue": queue, + "cores": 1, + "memory": "10G", + "runtime": "45m", + "project": "proj_rapidresponse", + }, + runner="cdtask", + ) diff --git a/src/climate_downscale/model/prepare_training_data.py b/src/climate_downscale/model/prepare_training_data.py index 6479c7c..bd29904 100644 --- a/src/climate_downscale/model/prepare_training_data.py +++ b/src/climate_downscale/model/prepare_training_data.py @@ -1,24 +1,35 @@ +import pandas as pd +import xarray as xr + year = 2023 -climate_stations = pd.read_parquet(f'/mnt/share/erf/climate_downscale/extracted_data/ncei_climate_stations/{year}.parquet') +climate_stations = pd.read_parquet( + f"/mnt/share/erf/climate_downscale/extracted_data/ncei_climate_stations/{year}.parquet" +) column_map = { "DATE": "date", "LATITUDE": "lat", "LONGITUDE": "lon", "TEMP": "temperature", } -climate_stations = climate_stations.rename(columns=column_map).loc[:, list(column_map.values())].dropna() -climate_stations['date'] = pd.to_datetime(climate_stations['date']) -climate_stations['year'] = climate_stations['date'].dt.year -climate_stations['dayofyear'] = climate_stations['date'].dt.dayofyear -climate_stations['temperature'] = 5/9 * (climate_stations['temperature'] - 32) -climate_stations.loc[climate_stations.lon < 0, 'lon'] +=360 +climate_stations = ( + climate_stations.rename(columns=column_map) + .loc[:, list(column_map.values())] + .dropna() +) +climate_stations["date"] = pd.to_datetime(climate_stations["date"]) +climate_stations["year"] = climate_stations["date"].dt.year +climate_stations["dayofyear"] = climate_stations["date"].dt.dayofyear +climate_stations["temperature"] = 5 / 9 * (climate_stations["temperature"] - 32) +climate_stations.loc[climate_stations.lon < 0, "lon"] += 360 -era5 = xr.load_dataset(f'/mnt/share/erf/climate_downscale/extracted_data/era5_temperature_daily_mean/{year}_era5_temp_daily.nc') -lat = xr.DataArray(climate_stations['lat'].values, dims=['points']) -lon = xr.DataArray(climate_stations['lon'].values, dims=['points']) -time = xr.DataArray(climate_stations['date'].values, dims=['points']) -arr = era5.sel(latitude=lat, longitude=lon, time=time, method='nearest') +era5 = xr.load_dataset( + f"/mnt/share/erf/climate_downscale/extracted_data/era5_temperature_daily_mean/{year}_era5_temp_daily.nc" +) +lat = xr.DataArray(climate_stations["lat"].values, dims=["points"]) +lon = xr.DataArray(climate_stations["lon"].values, dims=["points"]) +time = xr.DataArray(climate_stations["date"].values, dims=["points"]) +arr = era5.sel(latitude=lat, longitude=lon, time=time, method="nearest") if "expver" in arr.coords: arr = arr.sel(expver=1).combine_first(arr.sel(expver=5)) -climate_stations['era5_temperature'] = arr['t2m'].to_numpy() + 273.15 \ No newline at end of file +climate_stations["era5_temperature"] = arr["t2m"].to_numpy() + 273.15 diff --git a/src/climate_downscale/utils.py b/src/climate_downscale/utils.py new file mode 100644 index 0000000..ff395ef --- /dev/null +++ b/src/climate_downscale/utils.py @@ -0,0 +1,69 @@ +import numpy as np +import rasterra as rt +import xarray as xr +from affine import Affine + + +def to_raster( + ds: xr.DataArray, + nodata: float | int, + lat_col: str = "lat", + lon_col: str = "lon", + crs: str = "EPSG:4326", +) -> rt.RasterArray: + """Convert an xarray DataArray to a RasterArray.""" + lat, lon = ds[lat_col].data, ds[lon_col].data + + dlat = (lat[1:] - lat[:-1]).mean() + dlon = (lon[1:] - lon[:-1]).mean() + + transform = Affine( + a=dlon, + b=0.0, + c=lon[0], + d=0.0, + e=-dlat, + f=lat[-1], + ) + return rt.RasterArray( + data=ds.data[::-1], + transform=transform, + crs=crs, + no_data_value=nodata, + ) + + +def make_raster_template( + x_min: int | float, + y_min: int | float, + stride: int | float, + resolution: int | float, + crs: str = "EPSG:4326", +) -> rt.RasterArray: + """Create a raster template with the specified dimensions and resolution.""" + tolerance = 1e-12 + evenly_divides = (stride % resolution < tolerance) or ( + resolution - stride % resolution < tolerance + ) + if not evenly_divides: + msg = "Stride must be a multiple of resolution" + raise ValueError(msg) + + transform = Affine( + a=resolution, + b=0, + c=x_min, + d=0, + e=-resolution, + f=y_min + stride, + ) + + n_pix = int(stride / resolution) + + data = np.zeros((n_pix, n_pix), dtype=np.int8) + return rt.RasterArray( + data, + transform, + crs=crs, + no_data_value=-1, + ) From dc3f8235939db1e7a0fa408af3ca377d366c29d9 Mon Sep 17 00:00:00 2001 From: James Collins Date: Wed, 15 May 2024 15:56:28 -0700 Subject: [PATCH 03/71] Fix command line stuff --- src/climate_downscale/model/prepare_predictors.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/climate_downscale/model/prepare_predictors.py b/src/climate_downscale/model/prepare_predictors.py index 4cd20dc..609978d 100644 --- a/src/climate_downscale/model/prepare_predictors.py +++ b/src/climate_downscale/model/prepare_predictors.py @@ -87,13 +87,13 @@ def prepare_predictors_main( @click.command() # type: ignore[arg-type] -@with_choice("lat_start", allow_all=True, choices=LATITUDES) -@with_choice("lon_start", allow_all=True, choices=LONGITUDES) +@with_choice("lat-start", allow_all=False, choices=LATITUDES) +@with_choice("lon-start", allow_all=False, choices=LONGITUDES) @with_output_directory(DEFAULT_ROOT) def prepare_predictors_task( - lat_start: int, lon_start: int, output_dir: str | Path + lat_start: str, lon_start: str, output_dir: str, ) -> None: - prepare_predictors_main(output_dir, lat_start, lon_start) + prepare_predictors_main(output_dir, int(lat_start), int(lon_start)) @click.command() # type: ignore[arg-type] @@ -104,8 +104,8 @@ def prepare_predictors(output_dir: str, queue: str) -> None: "model prepare_predictors", node_args={ "output-dir": [output_dir], - "lat_start": LATITUDES, - "lon_start": LONGITUDES, + "lat-start": LATITUDES, + "lon-start": LONGITUDES, }, task_resources={ "queue": queue, From ee93188f5f85e8c7d10e31f03a139cc6d6dd0e83 Mon Sep 17 00:00:00 2001 From: collijk Date: Wed, 15 May 2024 15:59:35 -0700 Subject: [PATCH 04/71] Add option when there are not paths --- .../model/prepare_predictors.py | 25 ++++++++++++++++--- 1 file changed, 22 insertions(+), 3 deletions(-) diff --git a/src/climate_downscale/model/prepare_predictors.py b/src/climate_downscale/model/prepare_predictors.py index 609978d..82c7cd1 100644 --- a/src/climate_downscale/model/prepare_predictors.py +++ b/src/climate_downscale/model/prepare_predictors.py @@ -2,6 +2,7 @@ from pathlib import Path import click +import numpy as np import rasterra as rt from rra_tools import jobmon from rra_tools.cli_tools import ( @@ -42,8 +43,24 @@ def load_elevation( if p.exists(): paths.append(p) - - return rt.load_mf_raster(paths) + if paths: + raster = rt.load_mf_raster(paths) + else: + template = make_raster_template( + x_min=longitudes[0], + y_min=latitudes[0], + stride=STRIDE, + resolution=0.1, + ) + no_data = -32768 + arr = np.full((len(latitudes), len(longitudes)), no_data, dtype=np.int16) + raster = rt.RasterArray( + data=arr, + transform=template.transform, + crs=template.crs, + no_data_value=-32768, + ) + return raster def prepare_predictors_main( @@ -91,7 +108,9 @@ def prepare_predictors_main( @with_choice("lon-start", allow_all=False, choices=LONGITUDES) @with_output_directory(DEFAULT_ROOT) def prepare_predictors_task( - lat_start: str, lon_start: str, output_dir: str, + lat_start: str, + lon_start: str, + output_dir: str, ) -> None: prepare_predictors_main(output_dir, int(lat_start), int(lon_start)) From faaaa54cd74bc6d5e34c304eff78a50590dc064d Mon Sep 17 00:00:00 2001 From: James Collins Date: Wed, 15 May 2024 17:35:46 -0700 Subject: [PATCH 05/71] Add lcz --- src/climate_downscale/model/prepare_predictors.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/src/climate_downscale/model/prepare_predictors.py b/src/climate_downscale/model/prepare_predictors.py index 82c7cd1..e71f95f 100644 --- a/src/climate_downscale/model/prepare_predictors.py +++ b/src/climate_downscale/model/prepare_predictors.py @@ -63,6 +63,12 @@ def load_elevation( return raster +def load_lcz_data(cd_data, latitudes, longitudes): + path = cd_data.rub_local_climate_zones / 'lcz_filter_v2.tif' + bounds = (longitudes[0], latitudes[0], longitudes[1], latitudes[1]) + return rt.load_raster(path, bounds=bounds) + + def prepare_predictors_main( output_dir: str | Path, lat_start: int, lon_start: int ) -> None: @@ -88,6 +94,7 @@ def prepare_predictors_main( ) elevation = load_elevation(cd_data, latitudes, longitudes) + lcz = load_lcz_data(cd_data, latitudes, longitudes) predictors["elevation_target"] = elevation.resample_to( template_target, resampling="average" @@ -98,6 +105,8 @@ def prepare_predictors_main( predictors["elevation_anomaly"] = ( predictors["elevation_era5"] - predictors["elevation_target"] ) + predictors["lcz_era5"] = lcz.resample_to(template_era5, resampling="mode") + predictors["lcz_target"] = lcz.resample_to(template.target, resampling="mode") for name, predictor in predictors.items(): cd_data.save_predictor(predictor, f"{name}_{lat_start}_{lon_start}") From f0d160b35dea92cbd69fea0ced09fc30ec2e4745 Mon Sep 17 00:00:00 2001 From: James Collins Date: Wed, 15 May 2024 18:06:35 -0700 Subject: [PATCH 06/71] Bugfixes to get lcz working --- src/climate_downscale/model/prepare_predictors.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/climate_downscale/model/prepare_predictors.py b/src/climate_downscale/model/prepare_predictors.py index e71f95f..1335df5 100644 --- a/src/climate_downscale/model/prepare_predictors.py +++ b/src/climate_downscale/model/prepare_predictors.py @@ -65,7 +65,7 @@ def load_elevation( def load_lcz_data(cd_data, latitudes, longitudes): path = cd_data.rub_local_climate_zones / 'lcz_filter_v2.tif' - bounds = (longitudes[0], latitudes[0], longitudes[1], latitudes[1]) + bounds = (longitudes[0], latitudes[0], longitudes[-1], latitudes[-1]) return rt.load_raster(path, bounds=bounds) @@ -106,7 +106,7 @@ def prepare_predictors_main( predictors["elevation_era5"] - predictors["elevation_target"] ) predictors["lcz_era5"] = lcz.resample_to(template_era5, resampling="mode") - predictors["lcz_target"] = lcz.resample_to(template.target, resampling="mode") + predictors["lcz_target"] = lcz.resample_to(template_target, resampling="mode") for name, predictor in predictors.items(): cd_data.save_predictor(predictor, f"{name}_{lat_start}_{lon_start}") From 37f40cb61320ddf32a3b52036bdd4980c77db095 Mon Sep 17 00:00:00 2001 From: James Collins Date: Thu, 16 May 2024 11:10:20 -0700 Subject: [PATCH 07/71] Prep training data --- .../model/prepare_training_data.py | 56 +++++++++++++++---- 1 file changed, 45 insertions(+), 11 deletions(-) diff --git a/src/climate_downscale/model/prepare_training_data.py b/src/climate_downscale/model/prepare_training_data.py index bd29904..3058a58 100644 --- a/src/climate_downscale/model/prepare_training_data.py +++ b/src/climate_downscale/model/prepare_training_data.py @@ -1,8 +1,26 @@ import pandas as pd import xarray as xr +import rasterra as rt +from pathlib import Path + +def get_era5_temperature(year: int | str, cs_df: pd.DataFrame): + lat = xr.DataArray(cs_df["lat"].values, dims=["points"]) + lon = xr.DataArray(cs_df["lon"].values, dims=["points"]) + time = xr.DataArray(cs_df["date"].values, dims=["points"]) + + era5 = xr.load_dataset( + f"/mnt/share/erf/climate_downscale/extracted_data/era5_temperature_daily_mean/{year}_era5_temp_daily.nc" + ) + + era5 = era5.assign_coords(longitude=(((era5.longitude + 180) % 360) - 180)).sortby(['latitude', 'longitude']) + arr = era5.sel(latitude=lat, longitude=lon, time=time, method="nearest") + if "expver" in era5.coords: + arr = arr.sel(expver=1).combine_first(arr.sel(expver=5)) + return arr['t2m'].to_numpy() - 273.15 year = 2023 +# Load and cleanup climate_stations = pd.read_parquet( f"/mnt/share/erf/climate_downscale/extracted_data/ncei_climate_stations/{year}.parquet" ) @@ -11,25 +29,41 @@ "LATITUDE": "lat", "LONGITUDE": "lon", "TEMP": "temperature", + "ELEVATION": "ncei_elevation", } climate_stations = ( climate_stations.rename(columns=column_map) .loc[:, list(column_map.values())] .dropna() + .reset_index(drop=True) ) + +# Do time things climate_stations["date"] = pd.to_datetime(climate_stations["date"]) climate_stations["year"] = climate_stations["date"].dt.year climate_stations["dayofyear"] = climate_stations["date"].dt.dayofyear + +# Add temperature climate_stations["temperature"] = 5 / 9 * (climate_stations["temperature"] - 32) -climate_stations.loc[climate_stations.lon < 0, "lon"] += 360 +climate_stations['era5_temperature'] = get_era5_temperature(year, climate_stations) -era5 = xr.load_dataset( - f"/mnt/share/erf/climate_downscale/extracted_data/era5_temperature_daily_mean/{year}_era5_temp_daily.nc" -) -lat = xr.DataArray(climate_stations["lat"].values, dims=["points"]) -lon = xr.DataArray(climate_stations["lon"].values, dims=["points"]) -time = xr.DataArray(climate_stations["date"].values, dims=["points"]) -arr = era5.sel(latitude=lat, longitude=lon, time=time, method="nearest") -if "expver" in arr.coords: - arr = arr.sel(expver=1).combine_first(arr.sel(expver=5)) -climate_stations["era5_temperature"] = arr["t2m"].to_numpy() + 273.15 +# Elevation pieces +target_elevation = rt.load_mf_raster(list(Path("/mnt/share/erf/climate_downscale/model/predictors").glob("elevation_target_*.tif"))) +climate_stations['target_elevation'] = srtm_elevation.select(climate_stations['lon'], climate_stations['lat']) +era5_elevation = rt.load_mf_raster(list(Path("/mnt/share/erf/climate_downscale/model/predictors").glob("elevation_era5_*.tif"))) +climate_stations['era5_elevation'] = era5_elevation.select(climate_stations['lon'], climate_stations['lat']) + +climate_stations['elevation'] = climate_stations['ncei_elevation'] +missing_elevation = climate_stations['elevation'] < -999 + +climate_stations['elevation'] = climate_stations['ncei_elevation'] +missing_elevation = climate_stations['elevation'] < -999 +climate_stations.loc[missing_elevation, 'elevation'] = climate_stations.loc[missing_elevation, 'target_elevation'] +still_missing_elevation = climate_stations['elevation'] < -999 +climate_stations = climate_stations.loc[~still_missing_elevation] + +# Local climate zone +target_lcz = rt.load_mf_raster(list(Path("/mnt/share/erf/climate_downscale/model/predictors").glob("lcz_target_*.tif"))) +climate_stations['target_lcz'] = target_lcz.select(climate_stations['lon'], climate_stations['lat']) +era5_lcz = rt.load_mf_raster(list(Path("/mnt/share/erf/climate_downscale/model/predictors").glob("lcz_era5_*.tif"))) +climate_stations['era5_lcz'] = era5_lcz.select(climate_stations['lon'], climate_stations['lat']) \ No newline at end of file From a74bbb5c504b5c71f1af07439f67e00249339919 Mon Sep 17 00:00:00 2001 From: collijk Date: Thu, 16 May 2024 11:24:22 -0700 Subject: [PATCH 08/71] lcz extraction task --- poetry.lock | 8 +- pyproject.toml | 3 +- .../extract/rub_local_climate_zones.py | 11 ++- .../model/prepare_predictors.py | 6 +- .../model/prepare_training_data.py | 77 +++++++++++++------ 5 files changed, 72 insertions(+), 33 deletions(-) diff --git a/poetry.lock b/poetry.lock index e003253..310bfd0 100644 --- a/poetry.lock +++ b/poetry.lock @@ -2069,13 +2069,13 @@ test = ["boto3 (>=1.2.4)", "hypothesis", "packaging", "pytest (>=2.8.2)", "pytes [[package]] name = "rasterra" -version = "0.5.10" +version = "0.5.11" description = "A sleek, object-oriented interface designed for intuitive raster data manipulation in Python." optional = false python-versions = "<3.13,>=3.10" files = [ - {file = "rasterra-0.5.10-py3-none-any.whl", hash = "sha256:9d281f98e4cb6375a12a7b09cc61dcb8a07c53ecab12e62c7147254ae04307b6"}, - {file = "rasterra-0.5.10.tar.gz", hash = "sha256:6080156dc8395c7ba427d518ad5d8cbc4fe583ca372b281819807a4d0aacd543"}, + {file = "rasterra-0.5.11-py3-none-any.whl", hash = "sha256:85344ec80b38c6aa0d337fb564d98f69dc5cd3f7699162e5016b65cf2a33f8ad"}, + {file = "rasterra-0.5.11.tar.gz", hash = "sha256:8f7e396e50ec0b8c7e7b2bac5e01bd865ebcd2f8757f72ab82daf7c73723e5d9"}, ] [package.dependencies] @@ -2683,4 +2683,4 @@ viz = ["matplotlib", "nc-time-axis", "seaborn"] [metadata] lock-version = "2.0" python-versions = ">=3.10, <3.13" -content-hash = "7dcea9087ab369daf1a2661541b1add72c879d396fda8b2115a24c999356a822" +content-hash = "76b81344dbd944abdc6006c3ce2e8a8ce7ae3131f747247396f1fb01ab80e1a3" diff --git a/pyproject.toml b/pyproject.toml index 2745a11..79a58f4 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -34,7 +34,7 @@ python = ">=3.10, <3.13" click = "*" numpy = "^1.26.4" pandas = "^2.2.2" -rasterra = "^0.5.10" +rasterra = "^0.5.11" shapely = "^2.0.4" geopandas = "^0.14.4" xarray = "^2024.3.0" @@ -92,6 +92,7 @@ ignore = [ "RET505", # Else after return, makes a lot of false positives "E501", # Line too long, this is autoformatted "PYI041", # Use float instead of int | float; dumb rule + "T201", # print is fine for now. ] [tool.ruff.lint.per-file-ignores] diff --git a/src/climate_downscale/extract/rub_local_climate_zones.py b/src/climate_downscale/extract/rub_local_climate_zones.py index 172a3e7..8ce475e 100644 --- a/src/climate_downscale/extract/rub_local_climate_zones.py +++ b/src/climate_downscale/extract/rub_local_climate_zones.py @@ -1,9 +1,10 @@ from pathlib import Path import click +from rra_tools.cli_tools import with_output_directory from rra_tools.shell_tools import wget -from climate_downscale.data import ClimateDownscaleData +from climate_downscale.data import DEFAULT_ROOT, ClimateDownscaleData URL_TEMPLATE = "https://zenodo.org/records/8419340/files/{file_name}?download=1" FILES = [ @@ -20,10 +21,12 @@ def extract_rub_local_climate_zones_main(output_dir: str | Path) -> None: out_root = data.rub_local_climate_zones for file_name in FILES: + print(f"Downloading {file_name}") url = URL_TEMPLATE.format(file_name=file_name) wget(url, out_root / file_name) -@click.command() -def extract_rub_local_climate_zones() -> None: - raise NotImplementedError +@click.command() # type: ignore[arg-type] +@with_output_directory(DEFAULT_ROOT) +def extract_rub_local_climate_zones(output_dir: str) -> None: + extract_rub_local_climate_zones_main(output_dir) diff --git a/src/climate_downscale/model/prepare_predictors.py b/src/climate_downscale/model/prepare_predictors.py index 1335df5..957bdb0 100644 --- a/src/climate_downscale/model/prepare_predictors.py +++ b/src/climate_downscale/model/prepare_predictors.py @@ -63,8 +63,10 @@ def load_elevation( return raster -def load_lcz_data(cd_data, latitudes, longitudes): - path = cd_data.rub_local_climate_zones / 'lcz_filter_v2.tif' +def load_lcz_data( + cd_data: ClimateDownscaleData, latitudes: Sequence[int], longitudes: Sequence[int] +) -> rt.RasterArray: + path = cd_data.rub_local_climate_zones / "lcz_filter_v2.tif" bounds = (longitudes[0], latitudes[0], longitudes[-1], latitudes[-1]) return rt.load_raster(path, bounds=bounds) diff --git a/src/climate_downscale/model/prepare_training_data.py b/src/climate_downscale/model/prepare_training_data.py index 3058a58..d891957 100644 --- a/src/climate_downscale/model/prepare_training_data.py +++ b/src/climate_downscale/model/prepare_training_data.py @@ -1,22 +1,31 @@ +from pathlib import Path + +import numpy as np +import numpy.typing as npt import pandas as pd -import xarray as xr import rasterra as rt -from pathlib import Path +import xarray as xr -def get_era5_temperature(year: int | str, cs_df: pd.DataFrame): + +def get_era5_temperature( + year: int | str, cs_df: pd.DataFrame +) -> npt.NDArray[np.float64]: lat = xr.DataArray(cs_df["lat"].values, dims=["points"]) lon = xr.DataArray(cs_df["lon"].values, dims=["points"]) time = xr.DataArray(cs_df["date"].values, dims=["points"]) - + era5 = xr.load_dataset( f"/mnt/share/erf/climate_downscale/extracted_data/era5_temperature_daily_mean/{year}_era5_temp_daily.nc" ) - era5 = era5.assign_coords(longitude=(((era5.longitude + 180) % 360) - 180)).sortby(['latitude', 'longitude']) + era5 = era5.assign_coords(longitude=(((era5.longitude + 180) % 360) - 180)).sortby( + ["latitude", "longitude"] + ) arr = era5.sel(latitude=lat, longitude=lon, time=time, method="nearest") if "expver" in era5.coords: arr = arr.sel(expver=1).combine_first(arr.sel(expver=5)) - return arr['t2m'].to_numpy() - 273.15 + return arr["t2m"].to_numpy() - 273.15 + year = 2023 @@ -45,25 +54,49 @@ def get_era5_temperature(year: int | str, cs_df: pd.DataFrame): # Add temperature climate_stations["temperature"] = 5 / 9 * (climate_stations["temperature"] - 32) -climate_stations['era5_temperature'] = get_era5_temperature(year, climate_stations) +climate_stations["era5_temperature"] = get_era5_temperature(year, climate_stations) -# Elevation pieces -target_elevation = rt.load_mf_raster(list(Path("/mnt/share/erf/climate_downscale/model/predictors").glob("elevation_target_*.tif"))) -climate_stations['target_elevation'] = srtm_elevation.select(climate_stations['lon'], climate_stations['lat']) -era5_elevation = rt.load_mf_raster(list(Path("/mnt/share/erf/climate_downscale/model/predictors").glob("elevation_era5_*.tif"))) -climate_stations['era5_elevation'] = era5_elevation.select(climate_stations['lon'], climate_stations['lat']) +lon, lat = climate_stations["lon"].to_numpy(), climate_stations["lat"].to_numpy() -climate_stations['elevation'] = climate_stations['ncei_elevation'] -missing_elevation = climate_stations['elevation'] < -999 +# Elevation pieces +target_elevation = rt.load_mf_raster( + list( + Path("/mnt/share/erf/climate_downscale/model/predictors").glob( + "elevation_target_*.tif" + ) + ) +) +climate_stations["target_elevation"] = target_elevation.select(lon, lat) +era5_elevation = rt.load_mf_raster( + list( + Path("/mnt/share/erf/climate_downscale/model/predictors").glob( + "elevation_era5_*.tif" + ) + ) +) +climate_stations["era5_elevation"] = era5_elevation.select(lon, lat) -climate_stations['elevation'] = climate_stations['ncei_elevation'] -missing_elevation = climate_stations['elevation'] < -999 -climate_stations.loc[missing_elevation, 'elevation'] = climate_stations.loc[missing_elevation, 'target_elevation'] -still_missing_elevation = climate_stations['elevation'] < -999 +climate_stations["elevation"] = climate_stations["ncei_elevation"] +nodata_val = -999 +missing_elevation = climate_stations["elevation"] < nodata_val +climate_stations.loc[missing_elevation, "elevation"] = climate_stations.loc[ + missing_elevation, "target_elevation" +] +still_missing_elevation = climate_stations["elevation"] < nodata_val climate_stations = climate_stations.loc[~still_missing_elevation] # Local climate zone -target_lcz = rt.load_mf_raster(list(Path("/mnt/share/erf/climate_downscale/model/predictors").glob("lcz_target_*.tif"))) -climate_stations['target_lcz'] = target_lcz.select(climate_stations['lon'], climate_stations['lat']) -era5_lcz = rt.load_mf_raster(list(Path("/mnt/share/erf/climate_downscale/model/predictors").glob("lcz_era5_*.tif"))) -climate_stations['era5_lcz'] = era5_lcz.select(climate_stations['lon'], climate_stations['lat']) \ No newline at end of file +target_lcz = rt.load_mf_raster( + list( + Path("/mnt/share/erf/climate_downscale/model/predictors").glob( + "lcz_target_*.tif" + ) + ) +) +climate_stations["target_lcz"] = target_lcz.select(lon, lat) +era5_lcz = rt.load_mf_raster( + list( + Path("/mnt/share/erf/climate_downscale/model/predictors").glob("lcz_era5_*.tif") + ) +) +climate_stations["era5_lcz"] = era5_lcz.select(lon, lat) From 06cd5d4ff64167e0b4fc8d41ee4e9160ceebe22b Mon Sep 17 00:00:00 2001 From: collijk Date: Thu, 16 May 2024 13:46:39 -0700 Subject: [PATCH 09/71] Prep training data script and clean up option usage --- src/climate_downscale/cli_options.py | 120 +++++++++++ src/climate_downscale/data.py | 43 +++- src/climate_downscale/extract/elevation.py | 43 ++-- src/climate_downscale/extract/era5.py | 84 ++------ .../extract/ncei_climate_stations.py | 8 +- .../extract/rub_local_climate_zones.py | 4 +- .../model/prepare_predictors.py | 28 +-- .../model/prepare_training_data.py | 193 ++++++++++-------- 8 files changed, 325 insertions(+), 198 deletions(-) create mode 100644 src/climate_downscale/cli_options.py diff --git a/src/climate_downscale/cli_options.py b/src/climate_downscale/cli_options.py new file mode 100644 index 0000000..a1eeeec --- /dev/null +++ b/src/climate_downscale/cli_options.py @@ -0,0 +1,120 @@ +from typing import ParamSpec, TypeVar + +from rra_tools.cli_tools import ( + RUN_ALL, + ClickOption, + with_choice, + with_debugger, + with_input_directory, + with_num_cores, + with_output_directory, + with_progress_bar, + with_queue, + with_verbose, +) + +_T = TypeVar("_T") +_P = ParamSpec("_P") + + +VALID_YEARS = [str(y) for y in range(1990, 2024)] + + +def with_year( + *, + allow_all: bool = False, +) -> ClickOption[_P, _T]: + return with_choice( + "year", + "y", + allow_all=allow_all, + choices=VALID_YEARS, + help="Year to extract data for.", + ) + + +VALID_MONTHS = [f"{i:02d}" for i in range(1, 13)] + + +def with_month( + *, + allow_all: bool = False, +) -> ClickOption[_P, _T]: + return with_choice( + "month", + "m", + allow_all=allow_all, + choices=VALID_MONTHS, + help="Month to extract data for.", + ) + + +VALID_CLIMATE_VARIABLES = [ + "total_precipitation", + "2m_temperature", +] + + +def with_climate_variable( + *, + allow_all: bool = False, +) -> ClickOption[_P, _T]: + return with_choice( + "climate-variable", + "x", + allow_all=allow_all, + choices=VALID_CLIMATE_VARIABLES, + help="Variable to extract.", + ) + + +STRIDE = 30 +LATITUDES = [str(lat) for lat in range(-90, 90, STRIDE)] +LONGITUDES = [str(lon) for lon in range(-180, 180, STRIDE)] + + +def with_lat_start( + *, + allow_all: bool = False, +) -> ClickOption[_P, _T]: + return with_choice( + "lat-start", + allow_all=allow_all, + choices=LATITUDES, + help="Latitude of the top-left corner of the tile.", + ) + + +def with_lon_start( + *, + allow_all: bool = False, +) -> ClickOption[_P, _T]: + return with_choice( + "lon-start", + allow_all=allow_all, + choices=LONGITUDES, + help="Longitude of the top-left corner of the tile.", + ) + + +__all__ = [ + "VALID_YEARS", + "VALID_MONTHS", + "VALID_CLIMATE_VARIABLES", + "STRIDE", + "LATITUDES", + "LONGITUDES", + "with_year", + "with_month", + "with_climate_variable", + "with_lat_start", + "with_lon_start", + "with_output_directory", + "with_queue", + "with_verbose", + "with_debugger", + "with_input_directory", + "with_num_cores", + "with_progress_bar", + "RUN_ALL", +] diff --git a/src/climate_downscale/data.py b/src/climate_downscale/data.py index 06f9db5..6ca01d7 100644 --- a/src/climate_downscale/data.py +++ b/src/climate_downscale/data.py @@ -1,7 +1,10 @@ from pathlib import Path from typing import Any +import pandas as pd import rasterra as rt +import xarray as xr +from rra_tools.shell_tools import touch DEFAULT_ROOT = "/mnt/share/erf/climate_downscale/" @@ -27,10 +30,23 @@ def extracted_data(self) -> Path: def era5_temperature_daily_mean(self) -> Path: return self.extracted_data / "era5_temperature_daily_mean" + def load_era5_temperature_daily_mean(self, year: int | str) -> xr.Dataset: + return xr.load_dataset( + self.era5_temperature_daily_mean / f"{year}_era5_temp_daily.nc" + ) + @property def ncei_climate_stations(self) -> Path: return self.extracted_data / "ncei_climate_stations" + def save_ncei_climate_stations(self, df: pd.DataFrame, year: int | str) -> None: + path = self.ncei_climate_stations / f"{year}.parquet" + touch(path, exist_ok=True) + df.to_parquet(path) + + def load_ncei_climate_stations(self, year: int | str) -> pd.DataFrame: + return pd.read_parquet(self.ncei_climate_stations / f"{year}.parquet") + @property def srtm_elevation_gl1(self) -> Path: return self.extracted_data / "srtm_elevation_gl1" @@ -51,11 +67,30 @@ def model(self) -> Path: def predictors(self) -> Path: return self.model / "predictors" - def save_predictor(self, predictor: rt.RasterArray, name: str) -> None: - save_raster(predictor, self.predictors / f"{name}.tif") + def save_predictor( + self, + predictor: rt.RasterArray, + name: str, + lat_start: int, + lon_start: int, + ) -> None: + save_raster(predictor, self.predictors / f"{name}_{lat_start}_{lon_start}.tif") def load_predictor(self, name: str) -> rt.RasterArray: - return rt.load_raster(self.predictors / f"{name}.tif") + paths = list(self.predictors.glob(f"{name}_*.tif")) + return rt.load_mf_raster(paths) + + @property + def training_data(self) -> Path: + return self.model / "training_data" + + def save_training_data(self, df: pd.DataFrame, year: int | str) -> None: + path = self.training_data / f"{year}.parquet" + touch(path, exist_ok=True) + df.to_parquet(path) + + def load_training_data(self, year: int | str) -> pd.DataFrame: + return pd.read_parquet(self.training_data / f"{year}.parquet") def save_raster( @@ -75,6 +110,7 @@ def save_raster( "bigtiff": "yes", **kwargs, } + touch(output_path, exist_ok=True) raster.to_file(output_path, **save_params) @@ -89,4 +125,5 @@ def save_raster_to_cog( "driver": "COG", "overview_resampling": resampling, } + touch(output_path, exist_ok=True) save_raster(raster, output_path, num_cores, **cog_save_params) diff --git a/src/climate_downscale/extract/elevation.py b/src/climate_downscale/extract/elevation.py index 3c42845..5549886 100644 --- a/src/climate_downscale/extract/elevation.py +++ b/src/climate_downscale/extract/elevation.py @@ -2,27 +2,24 @@ import click import requests -from rra_tools import jobmon -from rra_tools.cli_tools import ( - with_output_directory, - with_queue, -) import tqdm +from rra_tools import jobmon +from climate_downscale import cli_options as clio from climate_downscale.data import DEFAULT_ROOT, ClimateDownscaleData API_ENDPOINT = "https://portal.opentopography.org/API/globaldem" ELEVATION_MODELS = [ - "SRTMGL3", # SRTM Global 3 arc second (90m) - "SRTMGL1", # SRTM Global 1 arc second (30m) - "SRTMGL1_E", # SRTM Global 1 arc second ellipsoidal height (30m) - "AW3D30", # ALOS World 3D 30m - "AW3D30_E", # ALOS World 3D 30m ellipsoidal height + "SRTMGL3", # SRTM Global 3 arc second (90m) + "SRTMGL1", # SRTM Global 1 arc second (30m) + "SRTMGL1_E", # SRTM Global 1 arc second ellipsoidal height (30m) + "AW3D30", # ALOS World 3D 30m + "AW3D30_E", # ALOS World 3D 30m ellipsoidal height "SRTM15Plus", # SRTM 15 arc second (500m) - "NASADEM", # NASA DEM 1 arc second (30m) - "COP30", # Copernicus 1 arc second (30m) - "COP90", # Copernicus 3 arc second (90m) + "NASADEM", # NASA DEM 1 arc second (30m) + "COP30", # Copernicus 1 arc second (30m) + "COP90", # Copernicus 3 arc second (90m) ] FETCH_SIZE = 5 # degrees, should be small enough for any model @@ -51,14 +48,16 @@ def extract_elevation_main( response = requests.get(API_ENDPOINT, params=params, stream=True, timeout=30) response.raise_for_status() - out_path = cd_data.open_topography_elevation / f"{model_name}_{lat_start}_{lon_start}.tif" + out_path = ( + cd_data.open_topography_elevation / f"{model_name}_{lat_start}_{lon_start}.tif" + ) with out_path.open("wb") as fp: for chunk in tqdm.tqdm(response.iter_content(chunk_size=64 * 1024**2)): fp.write(chunk) @click.command() # type: ignore[arg-type] -@with_output_directory(DEFAULT_ROOT) +@clio.with_output_directory(DEFAULT_ROOT) @click.option( "--model-name", required=True, @@ -84,24 +83,34 @@ def extract_elevation_task( lon_start: int, ) -> None: """Download elevation data from Open Topography.""" + invalid = True + if invalid: + msg = "Downloaded using aws cli, this implementation is not valid" + raise NotImplementedError(msg) + extract_elevation_main(output_dir, model_name, lat_start, lon_start) @click.command() # type: ignore[arg-type] -@with_output_directory(DEFAULT_ROOT) +@clio.with_output_directory(DEFAULT_ROOT) @click.option( "--model-name", required=True, type=click.Choice(ELEVATION_MODELS), help="Name of the elevation model to download.", ) -@with_queue() +@clio.with_queue() def extract_elevation( output_dir: str, model_name: str, queue: str, ) -> None: """Download elevation data from Open Topography.""" + invalid = True + if invalid: + msg = "Downloaded using aws cli, this implementation is not valid" + raise NotImplementedError(msg) + lat_starts = list(range(-90, 90, FETCH_SIZE)) lon_starts = list(range(-180, 180, FETCH_SIZE)) diff --git a/src/climate_downscale/extract/era5.py b/src/climate_downscale/extract/era5.py index 026448f..e27ad8e 100644 --- a/src/climate_downscale/extract/era5.py +++ b/src/climate_downscale/extract/era5.py @@ -1,68 +1,13 @@ from pathlib import Path -from typing import ParamSpec, TypeVar import cdsapi import click from rra_tools import jobmon -from rra_tools.cli_tools import ( - RUN_ALL, - ClickOption, - with_choice, - with_output_directory, - with_queue, -) +from rra_tools.shell_tools import touch +from climate_downscale import cli_options as clio from climate_downscale.data import DEFAULT_ROOT, ClimateDownscaleData -VALID_YEARS = [str(y) for y in range(1990, 2024)] -VALID_MONTHS = [f"{i:02d}" for i in range(1, 13)] -VALID_VARIABLES = [ - "total_precipitation", - "2m_temperature", -] - -_T = TypeVar("_T") -_P = ParamSpec("_P") - - -def with_year( - *, - allow_all: bool = False, -) -> ClickOption[_P, _T]: - return with_choice( - "year", - "y", - allow_all=allow_all, - choices=VALID_YEARS, - help="Year to extract data for.", - ) - - -def with_month( - *, - allow_all: bool = False, -) -> ClickOption[_P, _T]: - return with_choice( - "month", - "m", - allow_all=allow_all, - choices=VALID_MONTHS, - help="Month to extract data for.", - ) - - -def with_variable( - *, - allow_all: bool = False, -) -> ClickOption[_P, _T]: - return with_choice( - "variable", - "x", - allow_all=allow_all, - choices=VALID_VARIABLES, - help="Variable to extract.", - ) - def extract_era5_main( output_dir: str | Path, @@ -99,31 +44,32 @@ def extract_era5_main( ) out_path = cddata.era5_temperature_daily_mean / f"{variable}_{year}_{month}.nc" + touch(out_path, exist_ok=True) copernicus.download(result, [out_path]) @click.command() # type: ignore[arg-type] -@with_output_directory(DEFAULT_ROOT) -@with_year() -@with_month() -@with_variable() -def extract_era5_task(year: str, month: str, variable: str) -> None: - extract_era5_main(DEFAULT_ROOT, year, month, variable) +@clio.with_output_directory(DEFAULT_ROOT) +@clio.with_year() +@clio.with_month() +@clio.with_climate_variable() +def extract_era5_task(year: str, month: str, climate_variable: str) -> None: + extract_era5_main(DEFAULT_ROOT, year, month, climate_variable) @click.command() # type: ignore[arg-type] -@with_output_directory(DEFAULT_ROOT) -@with_year(allow_all=True) -@with_variable(allow_all=True) -@with_queue() +@clio.with_output_directory(DEFAULT_ROOT) +@clio.with_year(allow_all=True) +@clio.with_climate_variable(allow_all=True) +@clio.with_queue() def extract_era5( output_dir: str, year: str, variable: str, queue: str, ) -> None: - years = VALID_YEARS if year == RUN_ALL else [year] - variables = VALID_VARIABLES if variable == RUN_ALL else [variable] + years = clio.VALID_YEARS if year == clio.RUN_ALL else [year] + variables = clio.VALID_CLIMATE_VARIABLES if variable == clio.RUN_ALL else [variable] jobmon.run_parallel( task_name="extract_era5", diff --git a/src/climate_downscale/extract/ncei_climate_stations.py b/src/climate_downscale/extract/ncei_climate_stations.py index 43adeb0..048f6a8 100644 --- a/src/climate_downscale/extract/ncei_climate_stations.py +++ b/src/climate_downscale/extract/ncei_climate_stations.py @@ -5,7 +5,7 @@ import pandas as pd from rra_tools import jobmon from rra_tools.cli_tools import with_choice, with_output_directory, with_queue -from rra_tools.shell_tools import mkdir, touch, wget +from rra_tools.shell_tools import mkdir, wget from climate_downscale.data import DEFAULT_ROOT, ClimateDownscaleData @@ -29,9 +29,8 @@ def extract_ncei_climate_stations_main(output_dir: str | Path, year: str) -> Non shutil.unpack_archive(str(gz_path), year_dir) data = pd.concat([pd.read_csv(f) for f in year_dir.glob("*.csv")]) - data['STATION'] = data['STATION'].astype(str) - out_path = cd_data.ncei_climate_stations / f"{year}.parquet" - data.to_parquet(out_path) + data["STATION"] = data["STATION"].astype(str) + cd_data.save_ncei_climate_stations(data, year) gz_path.unlink() shutil.rmtree(year_dir) @@ -69,4 +68,3 @@ def extract_ncei_climate_stations(output_dir: str, queue: str) -> None: }, runner="cdtask", ) - diff --git a/src/climate_downscale/extract/rub_local_climate_zones.py b/src/climate_downscale/extract/rub_local_climate_zones.py index 8ce475e..3296319 100644 --- a/src/climate_downscale/extract/rub_local_climate_zones.py +++ b/src/climate_downscale/extract/rub_local_climate_zones.py @@ -1,9 +1,9 @@ from pathlib import Path import click -from rra_tools.cli_tools import with_output_directory from rra_tools.shell_tools import wget +from climate_downscale import cli_options as clio from climate_downscale.data import DEFAULT_ROOT, ClimateDownscaleData URL_TEMPLATE = "https://zenodo.org/records/8419340/files/{file_name}?download=1" @@ -27,6 +27,6 @@ def extract_rub_local_climate_zones_main(output_dir: str | Path) -> None: @click.command() # type: ignore[arg-type] -@with_output_directory(DEFAULT_ROOT) +@clio.with_output_directory(DEFAULT_ROOT) def extract_rub_local_climate_zones(output_dir: str) -> None: extract_rub_local_climate_zones_main(output_dir) diff --git a/src/climate_downscale/model/prepare_predictors.py b/src/climate_downscale/model/prepare_predictors.py index 957bdb0..a05a816 100644 --- a/src/climate_downscale/model/prepare_predictors.py +++ b/src/climate_downscale/model/prepare_predictors.py @@ -5,21 +5,13 @@ import numpy as np import rasterra as rt from rra_tools import jobmon -from rra_tools.cli_tools import ( - with_choice, - with_output_directory, - with_queue, -) +from climate_downscale import cli_options as clio from climate_downscale.data import DEFAULT_ROOT, ClimateDownscaleData from climate_downscale.utils import make_raster_template -# Degrees - -STRIDE = 30 -LATITUDES = [str(lat) for lat in range(-90, 90, STRIDE)] -LONGITUDES = [str(lon) for lon in range(-180, 180, STRIDE)] PAD = 1 +STRIDE = clio.STRIDE def load_elevation( @@ -111,13 +103,13 @@ def prepare_predictors_main( predictors["lcz_target"] = lcz.resample_to(template_target, resampling="mode") for name, predictor in predictors.items(): - cd_data.save_predictor(predictor, f"{name}_{lat_start}_{lon_start}") + cd_data.save_predictor(predictor, name, lat_start, lon_start) @click.command() # type: ignore[arg-type] -@with_choice("lat-start", allow_all=False, choices=LATITUDES) -@with_choice("lon-start", allow_all=False, choices=LONGITUDES) -@with_output_directory(DEFAULT_ROOT) +@clio.with_lat_start(allow_all=False) +@clio.with_lon_start(allow_all=False) +@clio.with_output_directory(DEFAULT_ROOT) def prepare_predictors_task( lat_start: str, lon_start: str, @@ -127,15 +119,15 @@ def prepare_predictors_task( @click.command() # type: ignore[arg-type] -@with_output_directory(DEFAULT_ROOT) -@with_queue() +@clio.with_output_directory(DEFAULT_ROOT) +@clio.with_queue() def prepare_predictors(output_dir: str, queue: str) -> None: jobmon.run_parallel( "model prepare_predictors", node_args={ "output-dir": [output_dir], - "lat-start": LATITUDES, - "lon-start": LONGITUDES, + "lat-start": clio.LATITUDES, + "lon-start": clio.LONGITUDES, }, task_resources={ "queue": queue, diff --git a/src/climate_downscale/model/prepare_training_data.py b/src/climate_downscale/model/prepare_training_data.py index d891957..c11bc60 100644 --- a/src/climate_downscale/model/prepare_training_data.py +++ b/src/climate_downscale/model/prepare_training_data.py @@ -1,102 +1,127 @@ from pathlib import Path +import click import numpy as np import numpy.typing as npt import pandas as pd -import rasterra as rt import xarray as xr +from rra_tools import jobmon + +from climate_downscale import cli_options as clio +from climate_downscale.data import DEFAULT_ROOT, ClimateDownscaleData + + +def load_and_clean_climate_stations( + cd_data: ClimateDownscaleData, + year: int | str, +) -> pd.DataFrame: + climate_stations = cd_data.load_ncei_climate_stations(year) + column_map = { + "DATE": "date", + "LATITUDE": "lat", + "LONGITUDE": "lon", + "TEMP": "temperature", + "ELEVATION": "ncei_elevation", + } + climate_stations = ( + climate_stations.rename(columns=column_map) + .loc[:, list(column_map.values())] + .dropna() + .reset_index(drop=True) + .assign( + date=lambda df: pd.to_datetime(df["date"]), + year=lambda df: df["date"].dt.year, + dayofyear=lambda df: df["date"].dt.dayofyear, + temperature=lambda df: 5 / 9 * (df["temperature"] - 32), + ) + ) + return climate_stations # noqa: RET504 def get_era5_temperature( - year: int | str, cs_df: pd.DataFrame + cd_data: ClimateDownscaleData, + year: int | str, + coords: dict[str, npt.NDArray[np.float64]], ) -> npt.NDArray[np.float64]: - lat = xr.DataArray(cs_df["lat"].values, dims=["points"]) - lon = xr.DataArray(cs_df["lon"].values, dims=["points"]) - time = xr.DataArray(cs_df["date"].values, dims=["points"]) + lat = xr.DataArray(coords["lat"], dims=["points"]) + lon = xr.DataArray(coords["lon"], dims=["points"]) + time = xr.DataArray(coords["date"], dims=["points"]) - era5 = xr.load_dataset( - f"/mnt/share/erf/climate_downscale/extracted_data/era5_temperature_daily_mean/{year}_era5_temp_daily.nc" + era5 = cd_data.load_era5_temperature_daily_mean(year) + era5 = ( + era5.assign_coords(longitude=(((era5.longitude + 180) % 360) - 180)) + .sortby(["latitude", "longitude"]) + .sel(latitude=lat, longitude=lon, time=time, method="nearest") ) - era5 = era5.assign_coords(longitude=(((era5.longitude + 180) % 360) - 180)).sortby( - ["latitude", "longitude"] - ) - arr = era5.sel(latitude=lat, longitude=lon, time=time, method="nearest") if "expver" in era5.coords: - arr = arr.sel(expver=1).combine_first(arr.sel(expver=5)) - return arr["t2m"].to_numpy() - 273.15 - - -year = 2023 - -# Load and cleanup -climate_stations = pd.read_parquet( - f"/mnt/share/erf/climate_downscale/extracted_data/ncei_climate_stations/{year}.parquet" -) -column_map = { - "DATE": "date", - "LATITUDE": "lat", - "LONGITUDE": "lon", - "TEMP": "temperature", - "ELEVATION": "ncei_elevation", -} -climate_stations = ( - climate_stations.rename(columns=column_map) - .loc[:, list(column_map.values())] - .dropna() - .reset_index(drop=True) -) - -# Do time things -climate_stations["date"] = pd.to_datetime(climate_stations["date"]) -climate_stations["year"] = climate_stations["date"].dt.year -climate_stations["dayofyear"] = climate_stations["date"].dt.dayofyear - -# Add temperature -climate_stations["temperature"] = 5 / 9 * (climate_stations["temperature"] - 32) -climate_stations["era5_temperature"] = get_era5_temperature(year, climate_stations) - -lon, lat = climate_stations["lon"].to_numpy(), climate_stations["lat"].to_numpy() - -# Elevation pieces -target_elevation = rt.load_mf_raster( - list( - Path("/mnt/share/erf/climate_downscale/model/predictors").glob( - "elevation_target_*.tif" - ) + era5 = era5.sel(expver=1).combine_first(era5.sel(expver=5)) + return era5["t2m"].to_numpy() - 273.15 + + +def prepare_training_data_main(output_dir: str | Path, year: str) -> None: + cd_data = ClimateDownscaleData(output_dir) + + data = load_and_clean_climate_stations(cd_data, year) + coords = { + "lon": data["lon"].to_numpy(), + "lat": data["lat"].to_numpy(), + "date": data["date"].to_numpy(), + } + + data["era5_temperature"] = get_era5_temperature(cd_data, year, coords) + + # Elevation pieces + data["target_elevation"] = cd_data.load_predictor("elevation_target").select( + coords["lon"], coords["lat"] ) -) -climate_stations["target_elevation"] = target_elevation.select(lon, lat) -era5_elevation = rt.load_mf_raster( - list( - Path("/mnt/share/erf/climate_downscale/model/predictors").glob( - "elevation_era5_*.tif" - ) + data["era5_elevation"] = cd_data.load_predictor("elevation_era5").select( + coords["lon"], coords["lat"] ) -) -climate_stations["era5_elevation"] = era5_elevation.select(lon, lat) - -climate_stations["elevation"] = climate_stations["ncei_elevation"] -nodata_val = -999 -missing_elevation = climate_stations["elevation"] < nodata_val -climate_stations.loc[missing_elevation, "elevation"] = climate_stations.loc[ - missing_elevation, "target_elevation" -] -still_missing_elevation = climate_stations["elevation"] < nodata_val -climate_stations = climate_stations.loc[~still_missing_elevation] - -# Local climate zone -target_lcz = rt.load_mf_raster( - list( - Path("/mnt/share/erf/climate_downscale/model/predictors").glob( - "lcz_target_*.tif" - ) + + data["elevation"] = data["ncei_elevation"] + nodata_val = -999 + missing_elevation = data["elevation"] < nodata_val + data.loc[missing_elevation, "elevation"] = data.loc[ + missing_elevation, "target_elevation" + ] + still_missing_elevation = data["elevation"] < nodata_val + data = data.loc[~still_missing_elevation] + + # Local climate zone + data["target_lcz"] = cd_data.load_predictor("lcz_target").select( + coords["lon"], coords["lat"] + ) + data["era5_lcz"] = cd_data.load_predictor("lcz_era5").select( + coords["lon"], coords["lat"] ) -) -climate_stations["target_lcz"] = target_lcz.select(lon, lat) -era5_lcz = rt.load_mf_raster( - list( - Path("/mnt/share/erf/climate_downscale/model/predictors").glob("lcz_era5_*.tif") + + cd_data.save_training_data(data, year) + + +@click.command() # type: ignore[arg-type] +@clio.with_output_directory(DEFAULT_ROOT) +@clio.with_year() +def prepare_training_data_task(output_dir: str, year: str) -> None: + prepare_training_data_main(output_dir, year) + + +@click.command() # type: ignore[arg-type] +@clio.with_output_directory(DEFAULT_ROOT) +@clio.with_queue() +def prepare_training_data(output_dir: str, queue: str) -> None: + jobmon.run_parallel( + "prepare training data", + node_args={ + "output-dir": [output_dir], + "year": clio.VALID_YEARS, + }, + task_resources={ + "queue": queue, + "cores": 1, + "memory": "10G", + "runtime": "240m", + "project": "proj_rapidresponse", + }, + runner="cdtask", ) -) -climate_stations["era5_lcz"] = era5_lcz.select(lon, lat) From 5846c7f8a58fb02dcc7b40f146342559a1252a56 Mon Sep 17 00:00:00 2001 From: collijk Date: Thu, 16 May 2024 13:47:15 -0700 Subject: [PATCH 10/71] use lcz v3 --- src/climate_downscale/model/prepare_predictors.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/climate_downscale/model/prepare_predictors.py b/src/climate_downscale/model/prepare_predictors.py index a05a816..f710ace 100644 --- a/src/climate_downscale/model/prepare_predictors.py +++ b/src/climate_downscale/model/prepare_predictors.py @@ -58,7 +58,7 @@ def load_elevation( def load_lcz_data( cd_data: ClimateDownscaleData, latitudes: Sequence[int], longitudes: Sequence[int] ) -> rt.RasterArray: - path = cd_data.rub_local_climate_zones / "lcz_filter_v2.tif" + path = cd_data.rub_local_climate_zones / "lcz_filter_v3.tif" bounds = (longitudes[0], latitudes[0], longitudes[-1], latitudes[-1]) return rt.load_raster(path, bounds=bounds) From 4b43fb79e4bd90cc3a0932912d3b8e7f3ac08d37 Mon Sep 17 00:00:00 2001 From: collijk Date: Thu, 16 May 2024 13:49:53 -0700 Subject: [PATCH 11/71] Add prep training data --- src/climate_downscale/model/__init__.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/climate_downscale/model/__init__.py b/src/climate_downscale/model/__init__.py index 81a96c6..5449577 100644 --- a/src/climate_downscale/model/__init__.py +++ b/src/climate_downscale/model/__init__.py @@ -2,11 +2,17 @@ prepare_predictors, prepare_predictors_task, ) +from climate_downscale.model.prepare_training_data import ( + prepare_training_data, + prepare_training_data_task, +) RUNNERS = { "prepare_predictors": prepare_predictors, + "prepare_training_data": prepare_training_data, } TASK_RUNNERS = { "prepare_predictors": prepare_predictors_task, + "prepare_training_data": prepare_training_data_task, } From 79849a0e96534a86719283f4d9c2f88c1a93cf10 Mon Sep 17 00:00:00 2001 From: James Collins Date: Thu, 16 May 2024 15:16:01 -0700 Subject: [PATCH 12/71] Bugfixes and get prep training data running --- src/climate_downscale/model/prepare_training_data.py | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/src/climate_downscale/model/prepare_training_data.py b/src/climate_downscale/model/prepare_training_data.py index c11bc60..5c21f4b 100644 --- a/src/climate_downscale/model/prepare_training_data.py +++ b/src/climate_downscale/model/prepare_training_data.py @@ -85,8 +85,6 @@ def prepare_training_data_main(output_dir: str | Path, year: str) -> None: data.loc[missing_elevation, "elevation"] = data.loc[ missing_elevation, "target_elevation" ] - still_missing_elevation = data["elevation"] < nodata_val - data = data.loc[~still_missing_elevation] # Local climate zone data["target_lcz"] = cd_data.load_predictor("lcz_target").select( @@ -111,7 +109,7 @@ def prepare_training_data_task(output_dir: str, year: str) -> None: @clio.with_queue() def prepare_training_data(output_dir: str, queue: str) -> None: jobmon.run_parallel( - "prepare training data", + "model prepare_training_data", node_args={ "output-dir": [output_dir], "year": clio.VALID_YEARS, @@ -119,8 +117,8 @@ def prepare_training_data(output_dir: str, queue: str) -> None: task_resources={ "queue": queue, "cores": 1, - "memory": "10G", - "runtime": "240m", + "memory": "30G", + "runtime": "30m", "project": "proj_rapidresponse", }, runner="cdtask", From 1ec16f76b8d9d020f8239e283cd3d4f8c2fcaa25 Mon Sep 17 00:00:00 2001 From: James Collins Date: Fri, 24 May 2024 15:08:26 -0700 Subject: [PATCH 13/71] Add station id and remove experimental data --- src/climate_downscale/model/prepare_training_data.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/climate_downscale/model/prepare_training_data.py b/src/climate_downscale/model/prepare_training_data.py index 5c21f4b..5122eff 100644 --- a/src/climate_downscale/model/prepare_training_data.py +++ b/src/climate_downscale/model/prepare_training_data.py @@ -22,6 +22,7 @@ def load_and_clean_climate_stations( "LONGITUDE": "lon", "TEMP": "temperature", "ELEVATION": "ncei_elevation", + "STATION": "station_id", } climate_stations = ( climate_stations.rename(columns=column_map) @@ -55,7 +56,9 @@ def get_era5_temperature( ) if "expver" in era5.coords: - era5 = era5.sel(expver=1).combine_first(era5.sel(expver=5)) + # expver == 1 is final data. expver == 5 is provisional data + # and has a very strong nonsense seasonal trend. + era5 = era5.sel(expver=1) return era5["t2m"].to_numpy() - 273.15 @@ -93,6 +96,7 @@ def prepare_training_data_main(output_dir: str | Path, year: str) -> None: data["era5_lcz"] = cd_data.load_predictor("lcz_era5").select( coords["lon"], coords["lat"] ) + cd_data.save_training_data(data, year) From 454ce1a5635cb3509b8ddf31a363e4d99409773c Mon Sep 17 00:00:00 2001 From: collijk Date: Fri, 24 May 2024 15:11:44 -0700 Subject: [PATCH 14/71] Uppdate rra tools and jobmon usage --- poetry.lock | 18 +++++++++--------- pyproject.toml | 2 +- src/climate_downscale/extract/elevation.py | 6 ++++-- src/climate_downscale/extract/era5.py | 6 ++++-- .../extract/ncei_climate_stations.py | 8 +++++--- .../model/prepare_predictors.py | 8 +++++--- .../model/prepare_training_data.py | 8 +++++--- 7 files changed, 33 insertions(+), 23 deletions(-) diff --git a/poetry.lock b/poetry.lock index 310bfd0..b442f1f 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1969,7 +1969,6 @@ files = [ {file = "PyYAML-6.0.1-cp311-cp311-win_amd64.whl", hash = "sha256:bf07ee2fef7014951eeb99f56f39c9bb4af143d8aa3c21b1677805985307da34"}, {file = "PyYAML-6.0.1-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:855fb52b0dc35af121542a76b9a84f8d1cd886ea97c84703eaa6d88e37a2ad28"}, {file = "PyYAML-6.0.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:40df9b996c2b73138957fe23a16a4f0ba614f4c0efce1e9406a184b6d07fa3a9"}, - {file = "PyYAML-6.0.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a08c6f0fe150303c1c6b71ebcd7213c2858041a7e01975da3a99aed1e7a378ef"}, {file = "PyYAML-6.0.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6c22bec3fbe2524cde73d7ada88f6566758a8f7227bfbf93a408a9d86bcc12a0"}, {file = "PyYAML-6.0.1-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:8d4e9c88387b0f5c7d5f281e55304de64cf7f9c0021a3525bd3b1c542da3b0e4"}, {file = "PyYAML-6.0.1-cp312-cp312-win32.whl", hash = "sha256:d483d2cdf104e7c9fa60c544d92981f12ad66a457afae824d146093b8c294c54"}, @@ -2178,13 +2177,13 @@ files = [ [[package]] name = "requests" -version = "2.31.0" +version = "2.32.2" description = "Python HTTP for Humans." optional = false -python-versions = ">=3.7" +python-versions = ">=3.8" files = [ - {file = "requests-2.31.0-py3-none-any.whl", hash = "sha256:58cd2187c01e70e6e26505bca751777aa9f2ee0b7f4300988b709f44e013003f"}, - {file = "requests-2.31.0.tar.gz", hash = "sha256:942c5a758f98d790eaed1a29cb6eefc7ffb0d1cf7af05c3d2791656dbd6ad1e1"}, + {file = "requests-2.32.2-py3-none-any.whl", hash = "sha256:fc06670dd0ed212426dfeb94fc1b983d917c4f9847c863f313c9dfaaffb7c23c"}, + {file = "requests-2.32.2.tar.gz", hash = "sha256:dd951ff5ecf3e3b3aa26b40703ba77495dab41da839ae72ef3c8e5d8e2433289"}, ] [package.dependencies] @@ -2199,13 +2198,13 @@ use-chardet-on-py3 = ["chardet (>=3.0.2,<6)"] [[package]] name = "rra-tools" -version = "1.0.6" +version = "1.0.8" description = "Common utilities for IHME Rapid Response team pipelines." optional = false python-versions = "<4.0,>=3.10" files = [ - {file = "rra_tools-1.0.6-py3-none-any.whl", hash = "sha256:51d2f56e0b6fc13a7198e517aa61aed5fcd85c625bfb0e953a9e81ef12a4f7d9"}, - {file = "rra_tools-1.0.6.tar.gz", hash = "sha256:9abc933b7c0efc2a899d56fd71e9c89a21c82950788c0bbc559200615a17880d"}, + {file = "rra_tools-1.0.8-py3-none-any.whl", hash = "sha256:d499aa58403c2b26486a3f9a892239945aee4321067b64aac027ad5e86f39a48"}, + {file = "rra_tools-1.0.8.tar.gz", hash = "sha256:9d4bf15c4ce60a3af6b55e4e6d158446c91e880a9dd89f6fdd0c72a2d633f888"}, ] [package.dependencies] @@ -2214,6 +2213,7 @@ deep-translator = ">=1.11.4,<2.0.0" loguru = ">=0.7.2,<0.8.0" pandas = ">=2.2.2,<3.0.0" pathos = ">=0.3.2,<0.4.0" +requests = ">=2.32.2,<3.0.0" tqdm = ">=4.66.4,<5.0.0" [[package]] @@ -2683,4 +2683,4 @@ viz = ["matplotlib", "nc-time-axis", "seaborn"] [metadata] lock-version = "2.0" python-versions = ">=3.10, <3.13" -content-hash = "76b81344dbd944abdc6006c3ce2e8a8ce7ae3131f747247396f1fb01ab80e1a3" +content-hash = "f26dfb9999164fb0037e8fd2b96ea27324abfb50902d9f5e2567717902199f23" diff --git a/pyproject.toml b/pyproject.toml index 79a58f4..73fd3c8 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -41,7 +41,7 @@ xarray = "^2024.3.0" cdsapi = "^0.7.0" matplotlib = "^3.8.4" scikit-learn = "^1.4.2" -rra-tools = "^1.0.6" +rra-tools = "^1.0.8" netcdf4 = "^1.6.5" pyarrow = "^16.0.0" types-requests = "^2.31.0.20240406" diff --git a/src/climate_downscale/extract/elevation.py b/src/climate_downscale/extract/elevation.py index 5549886..c12b294 100644 --- a/src/climate_downscale/extract/elevation.py +++ b/src/climate_downscale/extract/elevation.py @@ -115,13 +115,16 @@ def extract_elevation( lon_starts = list(range(-180, 180, FETCH_SIZE)) jobmon.run_parallel( + runner="cdtask", task_name="extract_era5", node_args={ - "output-dir": [output_dir], "model-name": [model_name], "lat-start": lat_starts, "lon-start": lon_starts, }, + task_args={ + "output-dir": output_dir, + }, task_resources={ "queue": queue, "cores": 1, @@ -129,5 +132,4 @@ def extract_elevation( "runtime": "240m", "project": "proj_rapidresponse", }, - runner="cdtask", ) diff --git a/src/climate_downscale/extract/era5.py b/src/climate_downscale/extract/era5.py index e27ad8e..38c9ea9 100644 --- a/src/climate_downscale/extract/era5.py +++ b/src/climate_downscale/extract/era5.py @@ -72,12 +72,15 @@ def extract_era5( variables = clio.VALID_CLIMATE_VARIABLES if variable == clio.RUN_ALL else [variable] jobmon.run_parallel( + runner="cdtask", task_name="extract_era5", node_args={ - "output-dir": [output_dir], "year": years, "variable": variables, }, + task_args={ + "output-dir": output_dir, + }, task_resources={ "queue": queue, "cores": 1, @@ -85,5 +88,4 @@ def extract_era5( "runtime": "240m", "project": "proj_rapidresponse", }, - runner="cdtask", ) diff --git a/src/climate_downscale/extract/ncei_climate_stations.py b/src/climate_downscale/extract/ncei_climate_stations.py index 048f6a8..c8d770e 100644 --- a/src/climate_downscale/extract/ncei_climate_stations.py +++ b/src/climate_downscale/extract/ncei_climate_stations.py @@ -54,11 +54,14 @@ def extract_ncei_climate_stations_task(output_dir: str, year: str) -> None: @with_queue() def extract_ncei_climate_stations(output_dir: str, queue: str) -> None: jobmon.run_parallel( - "extract ncei", + runner="cdtask", + task_name="extract ncei", node_args={ - "output-dir": [output_dir], "year": EXTRACTION_YEARS, }, + task_args={ + "output-dir": output_dir, + }, task_resources={ "queue": queue, "cores": 1, @@ -66,5 +69,4 @@ def extract_ncei_climate_stations(output_dir: str, queue: str) -> None: "runtime": "240m", "project": "proj_rapidresponse", }, - runner="cdtask", ) diff --git a/src/climate_downscale/model/prepare_predictors.py b/src/climate_downscale/model/prepare_predictors.py index f710ace..c858c90 100644 --- a/src/climate_downscale/model/prepare_predictors.py +++ b/src/climate_downscale/model/prepare_predictors.py @@ -123,12 +123,15 @@ def prepare_predictors_task( @clio.with_queue() def prepare_predictors(output_dir: str, queue: str) -> None: jobmon.run_parallel( - "model prepare_predictors", + runner="cdtask", + task_name="model prepare_predictors", node_args={ - "output-dir": [output_dir], "lat-start": clio.LATITUDES, "lon-start": clio.LONGITUDES, }, + task_args={ + "output-dir": output_dir, + }, task_resources={ "queue": queue, "cores": 1, @@ -136,5 +139,4 @@ def prepare_predictors(output_dir: str, queue: str) -> None: "runtime": "45m", "project": "proj_rapidresponse", }, - runner="cdtask", ) diff --git a/src/climate_downscale/model/prepare_training_data.py b/src/climate_downscale/model/prepare_training_data.py index c11bc60..081540f 100644 --- a/src/climate_downscale/model/prepare_training_data.py +++ b/src/climate_downscale/model/prepare_training_data.py @@ -111,11 +111,14 @@ def prepare_training_data_task(output_dir: str, year: str) -> None: @clio.with_queue() def prepare_training_data(output_dir: str, queue: str) -> None: jobmon.run_parallel( - "prepare training data", + runner="cdtask", + task_name="prepare training data", node_args={ - "output-dir": [output_dir], "year": clio.VALID_YEARS, }, + task_args={ + "output-dir": output_dir, + }, task_resources={ "queue": queue, "cores": 1, @@ -123,5 +126,4 @@ def prepare_training_data(output_dir: str, queue: str) -> None: "runtime": "240m", "project": "proj_rapidresponse", }, - runner="cdtask", ) From bad9727d33b2b913f5cffb04bc50bbaef96c58eb Mon Sep 17 00:00:00 2001 From: collijk Date: Fri, 24 May 2024 16:22:13 -0700 Subject: [PATCH 15/71] REvamp era5 download script --- src/climate_downscale/cli_options.py | 20 ++++++++ src/climate_downscale/data.py | 10 ++++ src/climate_downscale/extract/era5.py | 68 ++++++++++++++------------- 3 files changed, 66 insertions(+), 32 deletions(-) diff --git a/src/climate_downscale/cli_options.py b/src/climate_downscale/cli_options.py index a1eeeec..aa5f054 100644 --- a/src/climate_downscale/cli_options.py +++ b/src/climate_downscale/cli_options.py @@ -52,6 +52,8 @@ def with_month( VALID_CLIMATE_VARIABLES = [ "total_precipitation", "2m_temperature", + "2m_dewpoint_temperature", + "surface_pressure", ] @@ -68,6 +70,22 @@ def with_climate_variable( ) +VALID_ERA5_DATASETS = ["reanalysis-era5-land", "reanalysis-era5-single-levels"] + + +def with_era5_dataset( + *, + allow_all: bool = False, +) -> ClickOption[_P, _T]: + return with_choice( + "era5-dataset", + "d", + allow_all=allow_all, + choices=VALID_ERA5_DATASETS, + help="Dataset to extract.", + ) + + STRIDE = 30 LATITUDES = [str(lat) for lat in range(-90, 90, STRIDE)] LONGITUDES = [str(lon) for lon in range(-180, 180, STRIDE)] @@ -101,12 +119,14 @@ def with_lon_start( "VALID_YEARS", "VALID_MONTHS", "VALID_CLIMATE_VARIABLES", + "VALID_DATASETS", "STRIDE", "LATITUDES", "LONGITUDES", "with_year", "with_month", "with_climate_variable", + "with_dataset", "with_lat_start", "with_lon_start", "with_output_directory", diff --git a/src/climate_downscale/data.py b/src/climate_downscale/data.py index 6ca01d7..7535bac 100644 --- a/src/climate_downscale/data.py +++ b/src/climate_downscale/data.py @@ -26,6 +26,16 @@ def credentials_root(self) -> Path: def extracted_data(self) -> Path: return self.root / "extracted_data" + @property + def era5(self) -> Path: + return self.extracted_data / "era5" + + def era5_path(self, dataset: str, variable: str, year: int | str) -> Path: + return self.era5 / f"{dataset}_{variable}_{year}.nc" + + def load_era5(self, dataset: str, variable: str, year: int | str) -> xr.Dataset: + return xr.open_dataset(self.era5_path(dataset, variable, year)) + @property def era5_temperature_daily_mean(self) -> Path: return self.extracted_data / "era5_temperature_daily_mean" diff --git a/src/climate_downscale/extract/era5.py b/src/climate_downscale/extract/era5.py index 38c9ea9..69645c5 100644 --- a/src/climate_downscale/extract/era5.py +++ b/src/climate_downscale/extract/era5.py @@ -11,9 +11,9 @@ def extract_era5_main( output_dir: str | Path, + era5_dataset: str, + climate_variable: str, year: int | str, - month: str, - variable: str, ) -> None: cddata = ClimateDownscaleData(output_dir) cred_path = cddata.credentials_root / "copernicus.txt" @@ -21,62 +21,66 @@ def extract_era5_main( copernicus = cdsapi.Client(url=url, key=key) kwargs = { - "dataset": "reanalysis-era5-land", "product_type": "reanalysis", - "statistic": "daily_mean", - "variable": "total_precipitation", - "year": "2020", - "month": "01", - "time_zone": "UTC+00:00", - "frequency": "1-hourly", - "grid": "0.1/0.1", - "area": {"lat": [-90, 90], "lon": [-180, 180]}, + "variable": climate_variable, + "year": year, + "month": clio.VALID_MONTHS, + "time": [f"{h:02d}:00" for h in range(0, 24)], + "format": "netcdf", } - result = copernicus.service( - "tool.toolbox.orchestrator.workflow", - params={ - "realm": "user-apps", - "project": "app-c3s-daily-era5-statistics", - "version": "master", - "kwargs": kwargs, - "workflow_name": "application", - }, - ) - - out_path = cddata.era5_temperature_daily_mean / f"{variable}_{year}_{month}.nc" + out_path = cddata.era5_path(era5_dataset, climate_variable, year) touch(out_path, exist_ok=True) - copernicus.download(result, [out_path]) + + copernicus.retrieve( + era5_dataset, + kwargs, + out_path, + ) @click.command() # type: ignore[arg-type] @clio.with_output_directory(DEFAULT_ROOT) -@clio.with_year() -@clio.with_month() +@clio.with_era5_dataset() @clio.with_climate_variable() -def extract_era5_task(year: str, month: str, climate_variable: str) -> None: - extract_era5_main(DEFAULT_ROOT, year, month, climate_variable) +@clio.with_year() +def extract_era5_task( + output_dir: str, + era5_dataset: str, + climate_variable: str, + year: str, +) -> None: + extract_era5_main( + output_dir, + era5_dataset, + climate_variable, + year, + ) @click.command() # type: ignore[arg-type] @clio.with_output_directory(DEFAULT_ROOT) -@clio.with_year(allow_all=True) +@clio.with_era5_dataset(allow_all=True) @clio.with_climate_variable(allow_all=True) +@clio.with_year(allow_all=True) @clio.with_queue() def extract_era5( output_dir: str, + era5_dataset: str, + climate_variable: str, year: str, - variable: str, queue: str, ) -> None: + datasets = clio.VALID_ERA5_DATASETS if era5_dataset == clio.RUN_ALL else [era5_dataset] + variables = clio.VALID_CLIMATE_VARIABLES if climate_variable == clio.RUN_ALL else [climate_variable] years = clio.VALID_YEARS if year == clio.RUN_ALL else [year] - variables = clio.VALID_CLIMATE_VARIABLES if variable == clio.RUN_ALL else [variable] jobmon.run_parallel( runner="cdtask", task_name="extract_era5", node_args={ + "era5-dataset": datasets, + "climate-variable": variables, "year": years, - "variable": variables, }, task_args={ "output-dir": output_dir, From e8f41bec169e1b2651cc9b3f699b60f6364d99e8 Mon Sep 17 00:00:00 2001 From: collijk Date: Fri, 24 May 2024 16:27:32 -0700 Subject: [PATCH 16/71] Add month --- src/climate_downscale/data.py | 14 +++++++++----- src/climate_downscale/extract/era5.py | 26 ++++++++++++++++++++------ 2 files changed, 29 insertions(+), 11 deletions(-) diff --git a/src/climate_downscale/data.py b/src/climate_downscale/data.py index 7535bac..48ba036 100644 --- a/src/climate_downscale/data.py +++ b/src/climate_downscale/data.py @@ -30,11 +30,15 @@ def extracted_data(self) -> Path: def era5(self) -> Path: return self.extracted_data / "era5" - def era5_path(self, dataset: str, variable: str, year: int | str) -> Path: - return self.era5 / f"{dataset}_{variable}_{year}.nc" - - def load_era5(self, dataset: str, variable: str, year: int | str) -> xr.Dataset: - return xr.open_dataset(self.era5_path(dataset, variable, year)) + def era5_path( + self, dataset: str, variable: str, year: int | str, month: str + ) -> Path: + return self.era5 / f"{dataset}_{variable}_{year}_{month}.nc" + + def load_era5( + self, dataset: str, variable: str, year: int | str, month: str + ) -> xr.Dataset: + return xr.open_dataset(self.era5_path(dataset, variable, year, month)) @property def era5_temperature_daily_mean(self) -> Path: diff --git a/src/climate_downscale/extract/era5.py b/src/climate_downscale/extract/era5.py index 69645c5..3c73182 100644 --- a/src/climate_downscale/extract/era5.py +++ b/src/climate_downscale/extract/era5.py @@ -14,6 +14,7 @@ def extract_era5_main( era5_dataset: str, climate_variable: str, year: int | str, + month: str, ) -> None: cddata = ClimateDownscaleData(output_dir) cred_path = cddata.credentials_root / "copernicus.txt" @@ -24,11 +25,11 @@ def extract_era5_main( "product_type": "reanalysis", "variable": climate_variable, "year": year, - "month": clio.VALID_MONTHS, - "time": [f"{h:02d}:00" for h in range(0, 24)], + "month": month, + "time": [f"{h:02d}:00" for h in range(24)], "format": "netcdf", } - out_path = cddata.era5_path(era5_dataset, climate_variable, year) + out_path = cddata.era5_path(era5_dataset, climate_variable, year, month) touch(out_path, exist_ok=True) copernicus.retrieve( @@ -43,17 +44,20 @@ def extract_era5_main( @clio.with_era5_dataset() @clio.with_climate_variable() @clio.with_year() +@clio.with_month() def extract_era5_task( output_dir: str, era5_dataset: str, climate_variable: str, year: str, + month: str, ) -> None: extract_era5_main( output_dir, era5_dataset, climate_variable, year, + month, ) @@ -62,17 +66,26 @@ def extract_era5_task( @clio.with_era5_dataset(allow_all=True) @clio.with_climate_variable(allow_all=True) @clio.with_year(allow_all=True) +@clio.with_month(allow_all=True) @clio.with_queue() -def extract_era5( +def extract_era5( # noqa: PLR0913 output_dir: str, era5_dataset: str, climate_variable: str, year: str, + month: str, queue: str, ) -> None: - datasets = clio.VALID_ERA5_DATASETS if era5_dataset == clio.RUN_ALL else [era5_dataset] - variables = clio.VALID_CLIMATE_VARIABLES if climate_variable == clio.RUN_ALL else [climate_variable] + datasets = ( + clio.VALID_ERA5_DATASETS if era5_dataset == clio.RUN_ALL else [era5_dataset] + ) + variables = ( + clio.VALID_CLIMATE_VARIABLES + if climate_variable == clio.RUN_ALL + else [climate_variable] + ) years = clio.VALID_YEARS if year == clio.RUN_ALL else [year] + months = clio.VALID_MONTHS if month == clio.RUN_ALL else [month] jobmon.run_parallel( runner="cdtask", @@ -81,6 +94,7 @@ def extract_era5( "era5-dataset": datasets, "climate-variable": variables, "year": years, + "month": months, }, task_args={ "output-dir": output_dir, From 407bdc51ffbdce6d8e9cf95e0b6d3602dc6ec349 Mon Sep 17 00:00:00 2001 From: collijk Date: Fri, 24 May 2024 16:36:12 -0700 Subject: [PATCH 17/71] Need to request day --- src/climate_downscale/extract/era5.py | 1 + 1 file changed, 1 insertion(+) diff --git a/src/climate_downscale/extract/era5.py b/src/climate_downscale/extract/era5.py index 879a13b..a0055b7 100644 --- a/src/climate_downscale/extract/era5.py +++ b/src/climate_downscale/extract/era5.py @@ -26,6 +26,7 @@ def extract_era5_main( "variable": climate_variable, "year": year, "month": month, + "day": [f"{d:02d}" for d in range(1, 32)], "time": [f"{h:02d}:00" for h in range(24)], "format": "netcdf", } From ae327778ec492470fb0c5aff3eee4288a1718f7d Mon Sep 17 00:00:00 2001 From: collijk Date: Fri, 24 May 2024 16:45:43 -0700 Subject: [PATCH 18/71] Add caching --- src/climate_downscale/extract/era5.py | 20 ++++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) diff --git a/src/climate_downscale/extract/era5.py b/src/climate_downscale/extract/era5.py index a0055b7..7c2020d 100644 --- a/src/climate_downscale/extract/era5.py +++ b/src/climate_downscale/extract/era5.py @@ -31,13 +31,21 @@ def extract_era5_main( "format": "netcdf", } out_path = cddata.era5_path(era5_dataset, climate_variable, year, month) - touch(out_path, exist_ok=True) + if out_path.exists(): + print("Already extracted:", out_path) + return - copernicus.retrieve( - era5_dataset, - kwargs, - out_path, - ) + touch(out_path) + try: + result = copernicus.retrieve( + era5_dataset, + kwargs, + ) + result.download(out_path) + except Exception as e: + print(f"Failed to download {era5_dataset} {climate_variable} {year} {month}") + out_path.unlink() + raise e # noqa: TRY201 @click.command() # type: ignore[arg-type] From e95d05088f341d42fa64b6deaf457790c9476c85 Mon Sep 17 00:00:00 2001 From: James Collins Date: Mon, 27 May 2024 15:08:33 -0700 Subject: [PATCH 19/71] compress files --- src/climate_downscale/extract/era5.py | 82 ++++++++++++++++++++------- 1 file changed, 61 insertions(+), 21 deletions(-) diff --git a/src/climate_downscale/extract/era5.py b/src/climate_downscale/extract/era5.py index 7c2020d..364ea8d 100644 --- a/src/climate_downscale/extract/era5.py +++ b/src/climate_downscale/extract/era5.py @@ -4,6 +4,7 @@ import click from rra_tools import jobmon from rra_tools.shell_tools import touch +import xarray as xr from climate_downscale import cli_options as clio from climate_downscale.data import DEFAULT_ROOT, ClimateDownscaleData @@ -19,34 +20,73 @@ def extract_era5_main( cddata = ClimateDownscaleData(output_dir) cred_path = cddata.credentials_root / "copernicus.txt" url, key = cred_path.read_text().strip().split("\n") - - copernicus = cdsapi.Client(url=url, key=key) - kwargs = { - "product_type": "reanalysis", - "variable": climate_variable, - "year": year, - "month": month, - "day": [f"{d:02d}" for d in range(1, 32)], - "time": [f"{h:02d}:00" for h in range(24)], - "format": "netcdf", - } + out_path = cddata.era5_path(era5_dataset, climate_variable, year, month) + raw_out_path = out_path.with_stem(f"{out_path.stem}_raw") + if out_path.exists(): - print("Already extracted:", out_path) - return + if raw_out_path.exists(): + # We ran into an error before completing compression, likely a + # memory error. Delete and retry. + out_path.unlink() + else: + print("Already extracted:", out_path) + return + + try: + if not raw_out_path.exists(): + return + touch(raw_out_path) + + print('Connecting to copernicus') + copernicus = cdsapi.Client(url=url, key=key) + kwargs = { + "product_type": "reanalysis", + "variable": climate_variable, + "year": year, + "month": month, + "day": [f"{d:02d}" for d in range(1, 32)], + "time": [f"{h:02d}:00" for h in range(24)], + "format": "netcdf", + } + print("Downloading...") + result = copernicus.retrieve( + era5_dataset, + kwargs, + ) + result.download(raw_out_path) + else: + print("Already downloaded:", raw_out_path) + except Exception as e: + print(f"Failed to download {era5_dataset} {climate_variable} {year} {month}") + if raw_out_path.exists(): + raw_out_path.unlink() + raise e # noqa: TRY201 touch(out_path) try: - result = copernicus.retrieve( - era5_dataset, - kwargs, + print("Compressing...") + ds = xr.open_dataset(raw_out_path) + var_name = list(ds)[0] # These are all single variable datasets + og_encoding = ds[var_name].encoding + ds.to_netcdf( + out_path, + encoding={ + var_name:{ + **og_encoding, + "zlib": True, + "complevel": 1, + } + } ) - result.download(out_path) + except Exception as e: - print(f"Failed to download {era5_dataset} {climate_variable} {year} {month}") - out_path.unlink() + print(f'Failed to compress {era5_dataset} {climate_variable} {year} {month}') + if out_path.exists(): + out_path.unlink() raise e # noqa: TRY201 + raw_out_path.unlink() @click.command() # type: ignore[arg-type] @clio.with_output_directory(DEFAULT_ROOT) @@ -111,8 +151,8 @@ def extract_era5( # noqa: PLR0913 task_resources={ "queue": queue, "cores": 1, - "memory": "10G", - "runtime": "240m", + "memory": "120G", + "runtime": "600m", "project": "proj_rapidresponse", }, ) From a07cbfcc41fd086d8e1d9ea5eda1311c5194adf4 Mon Sep 17 00:00:00 2001 From: collijk Date: Mon, 27 May 2024 17:32:49 -0700 Subject: [PATCH 20/71] Add infrastructure to download different filetypes and do compression on results --- poetry.lock | 8 +- pyproject.toml | 2 +- src/climate_downscale/cli_options.py | 4 +- src/climate_downscale/extract/__init__.py | 6 +- src/climate_downscale/extract/era5.py | 236 +++++++++++++++------- 5 files changed, 173 insertions(+), 83 deletions(-) diff --git a/poetry.lock b/poetry.lock index b442f1f..1bd9e79 100644 --- a/poetry.lock +++ b/poetry.lock @@ -2198,13 +2198,13 @@ use-chardet-on-py3 = ["chardet (>=3.0.2,<6)"] [[package]] name = "rra-tools" -version = "1.0.8" +version = "1.0.9" description = "Common utilities for IHME Rapid Response team pipelines." optional = false python-versions = "<4.0,>=3.10" files = [ - {file = "rra_tools-1.0.8-py3-none-any.whl", hash = "sha256:d499aa58403c2b26486a3f9a892239945aee4321067b64aac027ad5e86f39a48"}, - {file = "rra_tools-1.0.8.tar.gz", hash = "sha256:9d4bf15c4ce60a3af6b55e4e6d158446c91e880a9dd89f6fdd0c72a2d633f888"}, + {file = "rra_tools-1.0.9-py3-none-any.whl", hash = "sha256:9deb367bfb13a627df36263f6771b9c10ff8cd0458915750201b046d4343c7bd"}, + {file = "rra_tools-1.0.9.tar.gz", hash = "sha256:fe5040ade3a49498f124ec557778743f47c3e95d83de48aedd197c71abd29e78"}, ] [package.dependencies] @@ -2683,4 +2683,4 @@ viz = ["matplotlib", "nc-time-axis", "seaborn"] [metadata] lock-version = "2.0" python-versions = ">=3.10, <3.13" -content-hash = "f26dfb9999164fb0037e8fd2b96ea27324abfb50902d9f5e2567717902199f23" +content-hash = "eac4d5a666c56578b00e14d3cf04ffe5ce70619ef1e1b25c374781f4e4d08e61" diff --git a/pyproject.toml b/pyproject.toml index 73fd3c8..6b2cb8b 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -41,7 +41,7 @@ xarray = "^2024.3.0" cdsapi = "^0.7.0" matplotlib = "^3.8.4" scikit-learn = "^1.4.2" -rra-tools = "^1.0.8" +rra-tools = "^1.0.9" netcdf4 = "^1.6.5" pyarrow = "^16.0.0" types-requests = "^2.31.0.20240406" diff --git a/src/climate_downscale/cli_options.py b/src/climate_downscale/cli_options.py index aa5f054..fb75ae2 100644 --- a/src/climate_downscale/cli_options.py +++ b/src/climate_downscale/cli_options.py @@ -119,14 +119,14 @@ def with_lon_start( "VALID_YEARS", "VALID_MONTHS", "VALID_CLIMATE_VARIABLES", - "VALID_DATASETS", + "VALID_ERA5_DATASETS", "STRIDE", "LATITUDES", "LONGITUDES", "with_year", "with_month", "with_climate_variable", - "with_dataset", + "with_era5_dataset", "with_lat_start", "with_lon_start", "with_output_directory", diff --git a/src/climate_downscale/extract/__init__.py b/src/climate_downscale/extract/__init__.py index 7651931..364bcf6 100644 --- a/src/climate_downscale/extract/__init__.py +++ b/src/climate_downscale/extract/__init__.py @@ -3,8 +3,9 @@ extract_elevation_task, ) from climate_downscale.extract.era5 import ( + download_era5_task, extract_era5, - extract_era5_task, + unzip_and_compress_era5_task, ) from climate_downscale.extract.ncei_climate_stations import ( extract_ncei_climate_stations, @@ -23,7 +24,8 @@ TASK_RUNNERS = { "ncei": extract_ncei_climate_stations_task, - "era5": extract_era5_task, + "era5_download": download_era5_task, + "era5_compress": unzip_and_compress_era5_task, "lcz": extract_rub_local_climate_zones, "elevation": extract_elevation_task, } diff --git a/src/climate_downscale/extract/era5.py b/src/climate_downscale/extract/era5.py index 364ea8d..12f0535 100644 --- a/src/climate_downscale/extract/era5.py +++ b/src/climate_downscale/extract/era5.py @@ -1,16 +1,30 @@ +import itertools +import zipfile from pathlib import Path import cdsapi import click +import xarray as xr from rra_tools import jobmon from rra_tools.shell_tools import touch -import xarray as xr from climate_downscale import cli_options as clio from climate_downscale.data import DEFAULT_ROOT, ClimateDownscaleData -def extract_era5_main( +def get_download_spec( + final_out_path: Path, +) -> tuple[Path, str]: + if "land" in final_out_path.stem: + download_path = final_out_path.with_suffix(".zip") + download_format = "netcdf.zip" + else: + download_path = final_out_path.with_stem(f"{final_out_path.stem}_raw") + download_format = "netcdf" + return download_path, download_format + + +def download_era5_main( output_dir: str | Path, era5_dataset: str, climate_variable: str, @@ -18,75 +32,84 @@ def extract_era5_main( month: str, ) -> None: cddata = ClimateDownscaleData(output_dir) - cred_path = cddata.credentials_root / "copernicus.txt" - url, key = cred_path.read_text().strip().split("\n") - - out_path = cddata.era5_path(era5_dataset, climate_variable, year, month) - raw_out_path = out_path.with_stem(f"{out_path.stem}_raw") - - if out_path.exists(): - if raw_out_path.exists(): - # We ran into an error before completing compression, likely a - # memory error. Delete and retry. - out_path.unlink() - else: - print("Already extracted:", out_path) - return - + + final_out_path = cddata.era5_path(era5_dataset, climate_variable, year, month) + download_path, download_format = get_download_spec(final_out_path) + + if download_path.exists(): + print("Already downloaded:", download_path) + return + try: - if not raw_out_path.exists(): - return - touch(raw_out_path) - - print('Connecting to copernicus') - copernicus = cdsapi.Client(url=url, key=key) - kwargs = { - "product_type": "reanalysis", - "variable": climate_variable, - "year": year, - "month": month, - "day": [f"{d:02d}" for d in range(1, 32)], - "time": [f"{h:02d}:00" for h in range(24)], - "format": "netcdf", - } - print("Downloading...") - result = copernicus.retrieve( - era5_dataset, - kwargs, - ) - result.download(raw_out_path) - else: - print("Already downloaded:", raw_out_path) + touch(download_path) + + print("Connecting to copernicus") + + cred_path = cddata.credentials_root / "copernicus.txt" + url, key = cred_path.read_text().strip().split("\n") + copernicus = cdsapi.Client(url=url, key=key) + + print("Downloading...") + kwargs = { + "product_type": "reanalysis", + "variable": climate_variable, + "year": year, + "month": month, + "day": [f"{d:02d}" for d in range(1, 32)], + "time": [f"{h:02d}:00" for h in range(24)], + "format": download_format, + } + + result = copernicus.retrieve( + era5_dataset, + kwargs, + ) + result.download(download_path) except Exception as e: print(f"Failed to download {era5_dataset} {climate_variable} {year} {month}") - if raw_out_path.exists(): - raw_out_path.unlink() + if download_path.exists(): + download_path.unlink() raise e # noqa: TRY201 - touch(out_path) - try: - print("Compressing...") - ds = xr.open_dataset(raw_out_path) - var_name = list(ds)[0] # These are all single variable datasets - og_encoding = ds[var_name].encoding - ds.to_netcdf( - out_path, - encoding={ - var_name:{ - **og_encoding, - "zlib": True, - "complevel": 1, - } + +def unzip_and_compress_era5( + output_dir: str | Path, + era5_dataset: str, + climate_variable: str, + year: int | str, + month: str, +) -> None: + cddata = ClimateDownscaleData(output_dir) + final_out_path = cddata.era5_path(era5_dataset, climate_variable, year, month) + uncompressed_path = final_out_path.with_stem(f"{final_out_path.stem}_raw") + + if era5_dataset == "reanalysis-era5-land": + print("Unzipping...") + # This data needs to be unzipped first. + zip_path = final_out_path.with_suffix(".zip") + touch(uncompressed_path) + with zipfile.ZipFile(zip_path) as zf: + zinfo = zf.infolist() + if len(zinfo) != 1: + msg = f"Expected a single file in {zip_path}" + raise ValueError(msg) + zf.extract(zinfo[0], uncompressed_path) + + touch(final_out_path) + ds = xr.open_dataset(final_out_path) + var_name = next(iter(ds)) # These are all single variable datasets + og_encoding = ds[var_name].encoding + ds.to_netcdf( + final_out_path, + encoding={ + var_name: { + **og_encoding, + "zlib": True, + "complevel": 1, } - ) - - except Exception as e: - print(f'Failed to compress {era5_dataset} {climate_variable} {year} {month}') - if out_path.exists(): - out_path.unlink() - raise e # noqa: TRY201 + }, + ) - raw_out_path.unlink() @click.command() # type: ignore[arg-type] @clio.with_output_directory(DEFAULT_ROOT) @@ -94,14 +117,36 @@ def extract_era5_main( @clio.with_climate_variable() @clio.with_year() @clio.with_month() -def extract_era5_task( +def download_era5_task( output_dir: str, era5_dataset: str, climate_variable: str, year: str, month: str, ) -> None: - extract_era5_main( + download_era5_main( + output_dir, + era5_dataset, + climate_variable, + year, + month, + ) + + +@click.command() # type: ignore[arg-type] +@clio.with_output_directory(DEFAULT_ROOT) +@clio.with_era5_dataset() +@clio.with_climate_variable() +@clio.with_year() +@clio.with_month() +def unzip_and_compress_era5_task( + output_dir: str, + era5_dataset: str, + climate_variable: str, + year: str, + month: str, +) -> None: + unzip_and_compress_era5( output_dir, era5_dataset, climate_variable, @@ -125,6 +170,8 @@ def extract_era5( # noqa: PLR0913 month: str, queue: str, ) -> None: + cddata = ClimateDownscaleData(output_dir) + datasets = ( clio.VALID_ERA5_DATASETS if era5_dataset == clio.RUN_ALL else [era5_dataset] ) @@ -136,23 +183,64 @@ def extract_era5( # noqa: PLR0913 years = clio.VALID_YEARS if year == clio.RUN_ALL else [year] months = clio.VALID_MONTHS if month == clio.RUN_ALL else [month] + to_download = [] + to_compress = [] + for dataset, variable, year, month in itertools.product( + datasets, variables, years, months + ): + final_out_path = cddata.era5_path(era5_dataset, climate_variable, year, month) + download_path, _ = get_download_spec(final_out_path) + + if final_out_path.exists() and download_path.exists(): + # We broke in the middle of processing this file. Don't re-download, + # just reprocess. + final_out_path.unlink() + to_compress.append((dataset, variable, year, month)) + elif final_out_path.exists(): + # We've already extracted this dataset + continue + + to_download.append((dataset, variable, year, month)) + to_compress.append((dataset, variable, year, month)) + jobmon.run_parallel( runner="cdtask", - task_name="extract era5", - node_args={ - "era5-dataset": datasets, - "climate-variable": variables, - "year": years, - "month": months, - }, + task_name="extract era5_download", + flat_node_args=( + ("era5-dataset", "climate-variable", "year", "month"), + to_compress, + ), task_args={ "output-dir": output_dir, }, task_resources={ "queue": queue, "cores": 1, - "memory": "120G", + "memory": "10G", "runtime": "600m", "project": "proj_rapidresponse", }, + max_attempts=1, + concurrency_limit=25, + ) + + jobmon.run_parallel( + runner="cdtask", + task_name="extract era5_compress", + flat_node_args=( + ("era5-dataset", "climate-variable", "year", "month"), + to_compress, + ), + task_args={ + "output-dir": output_dir, + }, + task_resources={ + "queue": queue, + "cores": 1, + "memory": "125G", + "runtime": "30m", + "project": "proj_rapidresponse", + }, + max_attempts=1, + concurrency_limit=500, ) From f8081d27da00fed6a901fe514bb82cd50d084332 Mon Sep 17 00:00:00 2001 From: James Collins Date: Sun, 9 Jun 2024 14:31:19 -0700 Subject: [PATCH 21/71] Expand variables and parallelize over users --- poetry.lock | 10 +- pyproject.toml | 2 +- src/climate_downscale/cli_options.py | 11 ++- src/climate_downscale/extract/era5.py | 129 +++++++++++++++++++------- 4 files changed, 109 insertions(+), 43 deletions(-) diff --git a/poetry.lock b/poetry.lock index 1bd9e79..0d9646d 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1,4 +1,4 @@ -# This file is automatically @generated by Poetry 1.8.2 and should not be changed by hand. +# This file is automatically @generated by Poetry 1.8.3 and should not be changed by hand. [[package]] name = "affine" @@ -2198,13 +2198,13 @@ use-chardet-on-py3 = ["chardet (>=3.0.2,<6)"] [[package]] name = "rra-tools" -version = "1.0.9" +version = "1.0.10" description = "Common utilities for IHME Rapid Response team pipelines." optional = false python-versions = "<4.0,>=3.10" files = [ - {file = "rra_tools-1.0.9-py3-none-any.whl", hash = "sha256:9deb367bfb13a627df36263f6771b9c10ff8cd0458915750201b046d4343c7bd"}, - {file = "rra_tools-1.0.9.tar.gz", hash = "sha256:fe5040ade3a49498f124ec557778743f47c3e95d83de48aedd197c71abd29e78"}, + {file = "rra_tools-1.0.10-py3-none-any.whl", hash = "sha256:04a16fb8ca1f60b25360a709367a34497ced5176c506668cfec4dbce7f1b75e7"}, + {file = "rra_tools-1.0.10.tar.gz", hash = "sha256:9a43e76061d8538c4545fe59a0d8ecc146eed9c5265c59c579bc7bcb00da5677"}, ] [package.dependencies] @@ -2683,4 +2683,4 @@ viz = ["matplotlib", "nc-time-axis", "seaborn"] [metadata] lock-version = "2.0" python-versions = ">=3.10, <3.13" -content-hash = "eac4d5a666c56578b00e14d3cf04ffe5ce70619ef1e1b25c374781f4e4d08e61" +content-hash = "da6f45d547ceb2940cf87d9792ce11d7115e9b11a405ab3420dce9850d2a092f" diff --git a/pyproject.toml b/pyproject.toml index 6b2cb8b..017e751 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -41,7 +41,7 @@ xarray = "^2024.3.0" cdsapi = "^0.7.0" matplotlib = "^3.8.4" scikit-learn = "^1.4.2" -rra-tools = "^1.0.9" +rra-tools = "^1.0.10" netcdf4 = "^1.6.5" pyarrow = "^16.0.0" types-requests = "^2.31.0.20240406" diff --git a/src/climate_downscale/cli_options.py b/src/climate_downscale/cli_options.py index fb75ae2..8bcacfd 100644 --- a/src/climate_downscale/cli_options.py +++ b/src/climate_downscale/cli_options.py @@ -50,10 +50,17 @@ def with_month( VALID_CLIMATE_VARIABLES = [ - "total_precipitation", - "2m_temperature", + "10m_u_component_of_wind", + "10m_v_component_of_wind", "2m_dewpoint_temperature", + "2m_temperature", + "surface_net_solar_radiation", + "surface_net_thermal_radiation", "surface_pressure", + "surface_solar_radiation_downwards", + "surface_thermal_radiation_downwards", + "total_precipitation", + "total_sky_direct_solar_radiation_at_surface", ] diff --git a/src/climate_downscale/extract/era5.py b/src/climate_downscale/extract/era5.py index 12f0535..83c4108 100644 --- a/src/climate_downscale/extract/era5.py +++ b/src/climate_downscale/extract/era5.py @@ -11,6 +11,8 @@ from climate_downscale import cli_options as clio from climate_downscale.data import DEFAULT_ROOT, ClimateDownscaleData +import yaml + def get_download_spec( final_out_path: Path, @@ -30,6 +32,7 @@ def download_era5_main( climate_variable: str, year: int | str, month: str, + user: str, ) -> None: cddata = ClimateDownscaleData(output_dir) @@ -45,8 +48,10 @@ def download_era5_main( print("Connecting to copernicus") - cred_path = cddata.credentials_root / "copernicus.txt" - url, key = cred_path.read_text().strip().split("\n") + cred_path = cddata.credentials_root / "copernicus.yaml" + credentials = yaml.safe_load(cred_path.read_text()) + url = credentials['url'] + key = credentials['keys'][user] copernicus = cdsapi.Client(url=url, key=key) print("Downloading...") @@ -81,22 +86,35 @@ def unzip_and_compress_era5( ) -> None: cddata = ClimateDownscaleData(output_dir) final_out_path = cddata.era5_path(era5_dataset, climate_variable, year, month) + zip_path = final_out_path.with_suffix(".zip") uncompressed_path = final_out_path.with_stem(f"{final_out_path.stem}_raw") - + if era5_dataset == "reanalysis-era5-land": print("Unzipping...") - # This data needs to be unzipped first. - zip_path = final_out_path.with_suffix(".zip") + # This data needs to be unzipped first. + if uncompressed_path.exists(): + uncompressed_path.unlink() touch(uncompressed_path) + try: + with zipfile.ZipFile(zip_path) as zf: + pass + except zipfile.BadZipFile as e: + # Download failed or was interrupted, delete the zipfile + zip_path.unlink() + raise e + with zipfile.ZipFile(zip_path) as zf: zinfo = zf.infolist() if len(zinfo) != 1: msg = f"Expected a single file in {zip_path}" raise ValueError(msg) - zf.extract(zinfo[0], uncompressed_path) + with uncompressed_path.open('wb') as f: + f.write(zf.read(zinfo[0])) + + print("Compressing") touch(final_out_path) - ds = xr.open_dataset(final_out_path) + ds = xr.open_dataset(uncompressed_path) var_name = next(iter(ds)) # These are all single variable datasets og_encoding = ds[var_name].encoding ds.to_netcdf( @@ -109,6 +127,9 @@ def unzip_and_compress_era5( } }, ) + if zip_path.exists(): + zip_path.unlink() + uncompressed_path.unlink() @click.command() # type: ignore[arg-type] @@ -117,12 +138,17 @@ def unzip_and_compress_era5( @clio.with_climate_variable() @clio.with_year() @clio.with_month() +@click.option( + "--user", + type=str, +) def download_era5_task( output_dir: str, era5_dataset: str, climate_variable: str, year: str, month: str, + user: str, ) -> None: download_era5_main( output_dir, @@ -130,6 +156,7 @@ def download_era5_task( climate_variable, year, month, + user, ) @@ -171,7 +198,11 @@ def extract_era5( # noqa: PLR0913 queue: str, ) -> None: cddata = ClimateDownscaleData(output_dir) - + cred_path = cddata.credentials_root / "copernicus.yaml" + credentials = yaml.safe_load(cred_path.read_text()) + users = list(credentials['keys']) + jobs_per_user = 20 + datasets = ( clio.VALID_ERA5_DATASETS if era5_dataset == clio.RUN_ALL else [era5_dataset] ) @@ -185,44 +216,72 @@ def extract_era5( # noqa: PLR0913 to_download = [] to_compress = [] - for dataset, variable, year, month in itertools.product( + complete = [] + for spec in itertools.product( datasets, variables, years, months ): - final_out_path = cddata.era5_path(era5_dataset, climate_variable, year, month) - download_path, _ = get_download_spec(final_out_path) + final_out_path = cddata.era5_path(*spec) + download_path, _ = get_download_spec(final_out_path) if final_out_path.exists() and download_path.exists(): # We broke in the middle of processing this file. Don't re-download, # just reprocess. final_out_path.unlink() - to_compress.append((dataset, variable, year, month)) + to_compress.append(spec) + elif final_out_path.exists() and final_out_path.stat().st_size == 0: + # Some other kind of error happened + final_out_path.unlink() + to_download.append(spec) + to_compress.append(spec) + elif download_path.exists() and download_path.stat().st_size == 0: + # We broke while downloading. Assume this file is invalid and re-download + download_path.unlink() + to_download.append(spec) + to_compress.append(spec) + elif download_path.exists(): + to_compress.append(spec) elif final_out_path.exists(): - # We've already extracted this dataset + # We've already extracted this dataset (deleting the download path is the last step) + complete.append(spec) continue + else: + to_download.append(spec) + to_compress.append(spec) - to_download.append((dataset, variable, year, month)) - to_compress.append((dataset, variable, year, month)) + while to_download: + downloads_left = len(to_download) + + + download_batch = [] + for i in range(jobs_per_user): + for user in users: + if to_download: + download_batch.append( + (*to_download.pop(), user) + ) + assert len(download_batch) == min(len(users) * jobs_per_user, downloads_left) + + print(len(to_download) + len(download_batch), "remaining. Launching next", len(download_batch), "jobs") - jobmon.run_parallel( - runner="cdtask", - task_name="extract era5_download", - flat_node_args=( - ("era5-dataset", "climate-variable", "year", "month"), - to_compress, - ), - task_args={ - "output-dir": output_dir, - }, - task_resources={ - "queue": queue, - "cores": 1, - "memory": "10G", - "runtime": "600m", - "project": "proj_rapidresponse", - }, - max_attempts=1, - concurrency_limit=25, - ) + jobmon.run_parallel( + runner="cdtask", + task_name="extract era5_download", + flat_node_args=( + ("era5-dataset", "climate-variable", "year", "month", "user"), + download_batch, + ), + task_args={ + "output-dir": output_dir, + }, + task_resources={ + "queue": queue, + "cores": 1, + "memory": "10G", + "runtime": "600m", + "project": "proj_rapidresponse", + }, + max_attempts=1, + ) jobmon.run_parallel( runner="cdtask", From 59d49323d48b44e6a43b7875143fd8bddaa0f703 Mon Sep 17 00:00:00 2001 From: collijk Date: Sun, 9 Jun 2024 15:38:10 -0700 Subject: [PATCH 22/71] Port in cmip pipeline --- src/climate_downscale/old_climate/__init__.py | 0 src/climate_downscale/old_climate/data.py | 219 ++++++++++++++++++ .../old_climate/project_anomaly.py | 157 +++++++++++++ .../old_climate/project_climate.py | 172 ++++++++++++++ 4 files changed, 548 insertions(+) create mode 100644 src/climate_downscale/old_climate/__init__.py create mode 100644 src/climate_downscale/old_climate/data.py create mode 100644 src/climate_downscale/old_climate/project_anomaly.py create mode 100644 src/climate_downscale/old_climate/project_climate.py diff --git a/src/climate_downscale/old_climate/__init__.py b/src/climate_downscale/old_climate/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/src/climate_downscale/old_climate/data.py b/src/climate_downscale/old_climate/data.py new file mode 100644 index 0000000..b5422d2 --- /dev/null +++ b/src/climate_downscale/old_climate/data.py @@ -0,0 +1,219 @@ +import itertools +from collections.abc import Callable + +import gcsfs +import pandas as pd +import xarray as xr + + +def load_cmip_metadata( + tables: tuple[str, ...] = ("Amon", "day"), + variables: tuple[str, ...] = ("tas", "pr"), + experiments: tuple[str, ...] = ( + "historical", + "ssp126", + "ssp245", + "ssp370", + "ssp585", + ), +) -> pd.DataFrame: + """Loads CMIP6 metadata for the given tables, variables, and experiments. + + Parameters + ---------- + tables + The tables to include. + variables + The variables to include. + experiments + The experiments to include. + + Returns + ------- + pd.DataFrame + CMIP6 metadata containing only the institutions and sources with all + tables, variables, and experiments. + """ + all_models = load_raw_cmip_metadata() + models_and_params = filter_institutions_and_sources( + all_models, + tables, + variables, + experiments, + ) + + # There should be no duplicates here, but there are. I'm not going to investigate + # why, but I'm just going to drop them. + member_count = models_and_params.groupby( + ["institution_id", "source_id", "member_id"] + )["activity_id"].count() + expected_count = len(tables) * len(variables) * len(experiments) + member_mask = member_count == expected_count + + final_models = ( + models_and_params.set_index(["institution_id", "source_id", "member_id"]) + .loc[member_mask[member_mask].index] + .reset_index() + ) + + # Filter to the models we need for the anomaly analysis. + monthly_historical = (final_models["table_id"] == "Amon") & ( + final_models["experiment_id"] == "historical" + ) + daily_scenario = (final_models["table_id"] == "day") & ( + final_models["experiment_id"] != "historical" + ) + return final_models.loc[monthly_historical | daily_scenario] + + +def load_cmip_historical_data(path: str) -> xr.Dataset: + """Loads a CMIP6 historical dataset from a zarr path. + + Parameters + ---------- + path + The path to the zarr store. + + Returns + ------- + xr.Dataset + The CMIP6 historical dataset. + """ + reference_period = slice("1981-01-15", "2010-12-15") + return ( + load_cmip_data(path) + .sel(time=reference_period) + .groupby("time.month") + .mean("time") + ) + + +def load_cmip_experiment_data(path: str, year: str) -> xr.Dataset: + """Loads a CMIP6 experiment dataset from a zarr path by day for a given year. + + Parameters + ---------- + path + The path to the zarr store. + year + The year to load. + + Returns + ------- + xr.Dataset + The CMIP6 experiment dataset for the given year. + """ "" + time_slice = slice(f"{year}-01", f"{year}-12") + time_range = pd.date_range(f"{year}-01-01", f"{year}-12-31") + return load_cmip_data(path).sel(time=time_slice).interp_calendar(time_range) + + +################## +# Helper methods # +################## + + +def load_raw_cmip_metadata() -> pd.DataFrame: + """Loads metadata containing information about all CMIP6 models.""" + path = "https://storage.googleapis.com/cmip6/cmip6-zarr-consolidated-stores.csv" + return pd.read_csv(path) + + +def load_cmip_data(zarr_path: str) -> xr.Dataset: + """Loads a CMIP6 dataset from a zarr path.""" + gcs = gcsfs.GCSFileSystem(token="anon") # noqa: S106 + mapper = gcs.get_mapper(zarr_path) + ds = xr.open_zarr(mapper, consolidated=True) + lon = (ds.lon + 180) % 360 - 180 + ds = ds.assign_coords(lon=lon).sortby("lon") + ds = ds.drop( + ["lat_bnds", "lon_bnds", "time_bnds", "height", "time_bounds", "bnds"], + errors="ignore", + ) + return ds # type: ignore[no-any-return] + + +def contains_combo( + table: str, + variable: str, + experiment: str, +) -> Callable[[pd.DataFrame], bool]: + """Get a function to check if a dataset contains a given cmip metadata combination. + + Parameters + ---------- + table + The table to check for. + variable + The variable to check for. + experiment + The experiment to check for. + + Returns + ------- + Callable[[pd.DataFrame], bool] + A function that checks if a dataset contains a given cmip metadata combination. + """ + + def _check(df: pd.DataFrame) -> bool: + return ( + df["table_id"].eq(table) + & df["variable_id"].eq(variable) + & df["experiment_id"].eq(experiment) + ).any() + + return _check + + +def filter_institutions_and_sources( + cmip_meta: pd.DataFrame, + tables: tuple[str, ...], + variables: tuple[str, ...], + experiments: tuple[str, ...], +) -> pd.DataFrame: + """Filters a cmip metadata dataframe to only include models that have all + combinations of the given tables, variables, and experiments. + Parameters + ---------- + cmip_meta + CMIP metadata dataframe. + tables + The tables to include. + variables + The variables to include. + experiments + The experiments to include. + Returns + ------- + pd.DataFrame + Filtered cmip metadata containing only the institutions and sources with all + tables, variables, and experiments. + """ + # First we filter down to all models from the institutions and sources that have + # all the combinations of tables, variables, and experiments. + masks = [] + for table, variable, experiment in itertools.product( + tables, variables, experiments + ): + has_combo = cmip_meta.groupby(["institution_id", "source_id"]).apply( + contains_combo(table, variable, experiment) + ) + masks.append(has_combo) + mask = pd.concat(masks, axis=1).all(axis=1) + + institutions_and_sources = mask[mask].index + models_with_all_params = ( + cmip_meta.set_index(["institution_id", "source_id"]) + .loc[institutions_and_sources] + .reset_index() + ) + + # Now we filter down to the specific subset of table/variable/experiment + # combinations within the institutions and sources. + param_mask = ( + models_with_all_params["table_id"].isin(tables) + & models_with_all_params["variable_id"].isin(variables) + & models_with_all_params["experiment_id"].isin(experiments) + ) + models_and_params = models_with_all_params[param_mask] + return models_and_params diff --git a/src/climate_downscale/old_climate/project_anomaly.py b/src/climate_downscale/old_climate/project_anomaly.py new file mode 100644 index 0000000..6ba1dc4 --- /dev/null +++ b/src/climate_downscale/old_climate/project_anomaly.py @@ -0,0 +1,157 @@ +from __future__ import annotations + +from pathlib import Path +from typing import TYPE_CHECKING + +import click +import pandas as pd +from rra_tools import jobmon + +from rra_population_pipelines.pipelines.climate import data +from rra_population_pipelines.shared.cli_tools import options as clio +from rra_population_pipelines.shared.data import RRA_POP + +if TYPE_CHECKING: + import xarray as xr + +_ENSEMBLE_MEMBERS = [ + ("NCAR", "CESM2"), + ("MOHC", "UKESM1-0-LL"), + ("IPSL", "IPSL-CM6A-LR"), + ("MPI-M", "MPI-ESM1-2-LR"), + ("MIROC", "MIROC6"), + ("NOAA-GFDL", "GFDL-ESM4"), +] + +_VALID_YEARS = tuple([str(y) for y in range(2015, 2101)]) + + +def get_run_metadata( + variable_id: str, + experiment_id: str, +) -> pd.DataFrame: + metadata = data.load_cmip_metadata() + metadata = ( + metadata.set_index(["institution_id", "source_id"]) + .sort_index() + .loc[_ENSEMBLE_MEMBERS] + .reset_index() + .set_index(["variable_id", "experiment_id"]) + ) + history_meta = ( + metadata.loc[(variable_id, "historical")] + .set_index(["institution_id", "source_id", "member_id"]) # type: ignore[union-attr] + .loc[:, "zstore"] + ) + experiment_meta = ( + metadata.loc[(variable_id, experiment_id)] + .set_index(["institution_id", "source_id", "member_id"]) # type: ignore[union-attr] + .loc[:, "zstore"] + ) + final_meta = pd.concat( + [history_meta.rename("historical"), experiment_meta.rename("experiment")], + axis=1, + ) + return final_meta # type: ignore[no-any-return] + + +def compute_common_lat_lon( + run_metadata: pd.DataFrame, +) -> tuple[pd.Index[float], pd.Index[float]]: + lat = pd.Index([], name="lat", dtype=float) + lon = pd.Index([], name="lon", dtype=float) + + for key in run_metadata.index.tolist(): + historical = data.load_cmip_historical_data(run_metadata.at[key, "historical"]) + lat = lat.union(historical["lat"]) # type: ignore[arg-type] + lon = lon.union(historical["lon"]) # type: ignore[arg-type] + return lat, lon + + +def compute_single_model_anomaly( + historical: xr.Dataset, + experiment: xr.Dataset, + variable: str, +) -> xr.Dataset: + if variable == "tas": + anomaly = experiment.groupby("time.month") - historical + else: + historical = 86400 * historical + 1 + experiment = 86400 * experiment + 1 + anomaly = (1 / historical) * experiment.groupby("time.month") + return anomaly + + +def interp_common_lat_lon( + ds: xr.Dataset, lat: pd.Index[float], lon: pd.Index[float] +) -> xr.Dataset: + return ( + ds.pad(lon=1, mode="wrap") + .assign_coords(lon=ds.lon.pad(lon=1, mode="reflect", reflect_type="odd")) + .interp(lat=lat, lon=lon) + ) + + +def project_anomaly_main(variable: str, experiment: str, year: str) -> xr.Dataset: + run_meta = get_run_metadata(variable, experiment) + lat, lon = compute_common_lat_lon(run_meta) + + anomalies: list[xr.Dataset] = [] + for key in run_meta.index.tolist(): + historical = data.load_cmip_historical_data(run_meta.at[key, "historical"]) + scenario = data.load_cmip_experiment_data( + run_meta.at[key, "experiment"], year=year + ) + anomaly = compute_single_model_anomaly(historical, scenario, variable=variable) + anomaly = interp_common_latin _lon(anomaly, lat, lon) + anomalies.append(anomaly) + + mean_anomaly = 1 / len(anomalies) * sum(anomalies) + return mean_anomaly # type: ignore[return-value] + + +@click.command() # type: ignore[arg-type] +@click.option( + "--variable", + type=click.Choice(["tas", "pr"]), +) +@clio.with_climate_scenario(allow_all=False) +@clio.with_year(allow_all=False, choices=_VALID_YEARS) +@clio.with_output_directory(RRA_POP.projected_climate_anomaly_data) +def project_anomaly_task( + variable: str, + climate_scenario: str, + year: str, + output_dir: str, +) -> None: + projected_anomaly = project_anomaly_main(variable, climate_scenario, year) + out_path = Path(output_dir) / "{variable}_{experiment}_{year}.nc" + projected_anomaly.to_netcdf(out_path) + + +@click.command() # type: ignore[arg-type] +@clio.with_output_directory(RRA_POP.projected_climate_anomaly_data) +@clio.with_queue() +def project_anomaly(output_dir: str, queue: str) -> None: + jobmon.run_parallel( + task_name="project_anomaly", + node_args={ + "variable": [ + "tas", + "pr", + ], + "experiment": list(clio.VALID_CLIMATE_SCENARIOS), + "year": list(_VALID_YEARS), + }, + task_args={ + "output-dir": output_dir, + }, + task_resources={ + "queue": queue, + "cores": 2, + "memory": "70G", + "runtime": "120m", + "project": "proj_rapidresponse", + }, + runner="rptask", + ) diff --git a/src/climate_downscale/old_climate/project_climate.py b/src/climate_downscale/old_climate/project_climate.py new file mode 100644 index 0000000..1b366cb --- /dev/null +++ b/src/climate_downscale/old_climate/project_climate.py @@ -0,0 +1,172 @@ +import click +import pandas as pd +import xarray as xr +from rra_population_pipelines.shared.cli_tools import options as clio +from rra_population_pipelines.shared.data import ( + RRA_DATA_ROOT, + RRA_POP, + RRAPopulationData, +) +from rra_tools import jobmon + + +def get_chelsa(variable: str, lat: slice, lon: slice) -> xr.Dataset: + ds_paths = [ + RRA_POP.get_downscaled_reference_map_path(variable, month) + for month in range(1, 13) + ] + ds = ( + xr.open_mfdataset( + ds_paths, + chunks={"lat": -1, "lon": -1}, + concat_dim=[pd.Index(range(1, 13), name="month")], # type: ignore[arg-type] + combine="nested", + ) + .sel(lat=lat, lon=lon) + .rename({"Band1": variable}) + .drop_vars("crs") + ) + if variable == "tas": # noqa: SIM108 + ds = 0.1 * ds - 273.15 + else: + ds = 0.1 * ds + return ds + + +def load_and_downscale_anomaly( + variable: str, + scenario: str, + year: int, + lat: xr.DataArray, + lon: xr.DataArray, +) -> xr.Dataset: + in_root = ( + RRA_POP.human_niche_data + / "chelsa-downscaled-projections" + / "_anomalies" + / "GLOBAL" + ) + path = in_root / f"{variable}_{scenario}_{year}.nc" + ds = xr.open_dataset( + path, + # Load the whole thing, but use a dask array + chunks={"lat": -1, "lon": -1, "time": -1}, + ).interp(lat=lat, lon=lon) + return ds + + +def apply_anomaly(data: xr.Dataset, anomaly: xr.Dataset) -> xr.Dataset: + if "tas" in anomaly.keys(): # noqa: SIM118 + result = anomaly.groupby("time.month") + data + else: + result = anomaly.groupby("time.month") * data * (1 / 30) + return result + + +def compute_measure(data: xr.Dataset, measure: str) -> xr.Dataset: + if measure == "temperature": + result = data.mean("time") + elif measure == "precipitation": + result = data.sum("time") + else: + threshold = 30 + result = (data > threshold).sum("time") + return result + + +def project_climate_main( + iso3: str, + measure: str, + scenario: str, + pop_data_dir: str, +) -> None: + pop_data = RRAPopulationData(pop_data_dir) + admin0 = pop_data.load_shapefile( + admin_level=0, + iso3=iso3, + year=2022, + ) + minx, miny, maxx, maxy = admin0.total_bounds + lat, lon = slice(miny, maxy), slice(minx, maxx) + + variable = { + "temperature": "tas", + "precipitation": "pr", + "days_over_thirty": "tas", + }[measure] + + print("Working on", scenario, measure) + ds = get_chelsa(variable, lat, lon) + + results = [] + for year in range(2015, 2101): + anom = load_and_downscale_anomaly( + variable, scenario, year, ds["lat"], ds["lon"] + ) + result = apply_anomaly(ds, anom) + result = compute_measure(result, measure) + results.append(result) + result = xr.concat(results, dim=pd.Index(range(2015, 2101), name="year")) + + print("Writing results") + pop_data.save_climate_data( + result, + measure=measure, + iso3=iso3, + scenario=scenario, + ) + + +@click.command() # type: ignore[arg-type] +@clio.with_iso3(allow_all=False) +@click.option( + "--measure", + type=click.Choice(["temperature", "precipitation", "days_over_thirty"]), +) +@clio.with_climate_scenario(allow_all=False) +@clio.with_input_directory("pop-data", RRA_DATA_ROOT) +def project_climate_task( + iso3: str, + measure: str, + climate_scenario: str, + pop_data_dir: str, +) -> None: + project_climate_main(iso3, measure, climate_scenario, pop_data_dir) + + +@click.command() # type: ignore[arg-type] +@clio.with_iso3(allow_all=False) +@clio.with_input_directory("pop-data", RRA_DATA_ROOT) +@clio.with_queue() +def project_climate( + iso3: str, + pop_data_dir: str, + queue: str, +) -> None: + pop_data = RRAPopulationData(pop_data_dir) + jobmon.run_parallel( + task_name="project_climate", + node_args={ + "iso3": [ + iso3, + ], + "measure": [ + "temperature", + "precipitation", + "days_over_thirty", + ], + "scenario": list(clio.VALID_CLIMATE_SCENARIOS), + }, + task_args={ + "pop-data-dir": pop_data_dir, + }, + task_resources={ + "queue": queue, + "cores": 2, + "memory": "70G", + "runtime": "120m", + "project": "proj_rapidresponse", + }, + runner="rptask", + log_root=pop_data.climate_data, + ) From aa22cda4a03ea25b7bae2fa9ad5ea6bcc0cb955b Mon Sep 17 00:00:00 2001 From: James Collins Date: Wed, 12 Jun 2024 11:15:21 -0700 Subject: [PATCH 23/71] Add notebook code for generating daily era5 estimates --- .../model/prepare_era5_daily.py | 229 ++++++++++++++++++ 1 file changed, 229 insertions(+) create mode 100644 src/climate_downscale/model/prepare_era5_daily.py diff --git a/src/climate_downscale/model/prepare_era5_daily.py b/src/climate_downscale/model/prepare_era5_daily.py new file mode 100644 index 0000000..a33b4fc --- /dev/null +++ b/src/climate_downscale/model/prepare_era5_daily.py @@ -0,0 +1,229 @@ +import pandas as pd +import xarray as xr +from pathlib import Path +import numpy as np + + +TARGET_LON = xr.DataArray(np.round(np.arange(0., 360., 0.1, dtype='float32'), 1), dims='longitude') +TARGET_LAT = xr.DataArray(np.round(np.arange(90., -90.1, -0.1, dtype='float32'), 1), dims='latitude') + + +def kelvin_to_celsius(temperature_k): + return temperature_k - 273.15 + +def m_to_mm(ds): + return 1000*ds + +def scale_windspeed(windspeed): + """Scaling wind speed from a height of 10 meters to a height of 2 meters + + Reference: Bröde et al. (2012) + https://doi.org/10.1007/s00484-011-0454-1 + + Parameters + ---------- + ds + The 10m wind speed [m/s]. May be signed (ie a velocity component) + + Returnds + -------- + xr.DataSet + The 2m wind speed [m/s]. May be signed (ie a velocity component) + """ + scale_factor = np.log10(2 / 0.01) / np.log10(10 / 0.01) + return scale_factor * windspeed + +def identity(ds): + return ds + +def rename_val_column(ds): + data_var = next(iter(ds)) + return ds.rename({data_var: "value"}) + + +convert_map = { + "10m_u_component_of_wind": scale_windspeed, + "10m_v_component_of_wind": scale_windspeed, + "2m_dewpoint_temperature": kelvin_to_celsius, + "2m_temperature": kelvin_to_celsius, + "surface_net_solar_radiation": identity, + "surface_net_thermal_radiation": identity, + "surface_pressure": identity, + "surface_solar_radiation_downwards": identity, + "surface_thermal_radiation_downwards": identity, + "total_precipitation": m_to_mm, + "total_sky_direct_solar_radiation_at_surface": identity, +} + +def interpolate_to_target(ds): + return ( + ds + .interp(longitude=TARGET_LON, latitude=TARGET_LAT, method='nearest') + .interpolate_na(dim="longitude", method="nearest", fill_value="extrapolate") + ) + +def load_variable(variable, year, month, dataset='single-levels'): + root = Path("/mnt/share/erf/climate_downscale/extracted_data/era5") + p = root / f"reanalysis-era5-{dataset}_{variable}_{year}_{month}.nc" + if dataset == 'land' and not p.exists(): + # Substitute the single level dataset pre-interpolated at the target resolution. + p = root / f"reanalysis-era5-single-levels_{source_variable}_{year}_{month}.nc" + ds = interpolate_to_target(xr.load_dataset(p)) + elif dataset == 'land': + ds = xr.load_dataset(p).assign_coords(latitude=TARGET_LAT, longitude=TARGET_LON) + else: + ds = xr.load_dataset(p) + conversion = convert_map[variable] + ds = conversion(rename_val_column(ds)) + return ds + + +######## + +def daily_mean(ds): + return ds.groupby('time.date').mean() + +def daily_max(ds): + return ds.groupby('time.date').max() + +def daily_min(ds): + return ds.groupby('time.date').min() + +def daily_sum(ds): + return ds.groupby('time.date').sum() + +def cdd(temperature_c): + return np.maximum(temperature_c - 18, 0).groupby("time.date").mean() + +def hdd(temperature_c): + return np.maximum(18 - temperature_c, 0).groupby("time.date").mean() + +def vector_magnitude(x, y): + return np.sqrt(x**2 + y**2) + + +def buck_vapor_presure(temperature_c): + """Approximate vapor pressure of water. + + https://en.wikipedia.org/wiki/Arden_Buck_equation + https://journals.ametsoc.org/view/journals/apme/20/12/1520-0450_1981_020_1527_nefcvp_2_0_co_2.xml + """ + over_water = 6.1121 * np.exp((18.678 - temperature_c / 234.5) * (temperature_c / (257.14 + temperature_c))) + over_ice = 6.1115 * np.exp((23.036 - temperature_c / 333.7) * (temperature_c / (279.82 + temperature_c))) + return xr.where(temperature_c > 0, over_water, over_ice) + +def rh_percent(temperature_c, dewpoint_temperature_c): + # saturated vapour pressure + es = buck_vapor_pressure(temperature_c) + # vapour pressure + e = buck_vapor_pressure(dewpoint_temperature_c) + rh = (e / es) * 100 + return rh + +def heat_index(temperature_c, dewpoint_temperature_c): + t = temperature_c # Alias for simplicity in the formula + r = rh_percent(temperature_c, dewpoint_temperature_c) + + hi_raw = ( + -8.784695 + + 1.61139411 * t + + 2.338549 * r + - 0.14611605 * t * r + - 1.2308094e-2 * t**2 + - 1.6424828e-2 * r**2 + + 2.211732e-3 * t**2 * r + + 7.2546e-4 * t * r**2 + - 3.582e-6 * t**2 * r**2 + ) + hi = xr.where(t > 20, hi_raw, t) + return hi + +def humidex(temperature_c, dewpoint_temperature_c): + vp = buck_vapor_pressure(dewpoint_temperature_c) + return temperature_c + 0.5555 * (vp - 10) + +def effective_temperature(temperature_c, dewpoint_temperature_c, uas, vas): + """https://www.sciencedirect.com/topics/engineering/effective-temperature""" + t = temperature_c + r = rh_percent(temperature_c, dewpoint_temperature_c) + v = vector_magnitude(uas, vas) + + wind_adjustment = 1 / (1.76 + 1.4 * v**0.75) + et = ( + 37 + - ((37 - t) / (0.68 - 0.0014 * r + wind_adjustment)) + - 0.29 * t * (1 - 0.01 * r) + ) + return et + + + + + +collapse_map = { + "mean_temperature": (["2m_temperature"], daily_mean, (273.15, 0.01)), + "max_temperature": (["2m_temperature"], daily_max, (273.15, 0.01)), + "min_temperature": (["2m_temperature"], daily_min, (273.15, 0.01)), + "cooling_degree_days": (["2m_temperature"], cdd, (0, 0.01)), + "heating_degree_days": (["2m_temperature"], hdd, (0, 0.01)), + "wind_speed": ( + ["10m_u_component_of_wind", "10m_v_component_of_wind"], lambda x, y: daily_mean(vector_magnitude(x, y)), (0, 0.01) + ), + "relative_humidity": ( + ["2m_temperature", "2m_dewpoint_temperature"], lambda x, y: daily_mean(rh_percent(x, y)), (0, 0.01) + ), + "total_precipitation": (["total_precipitation"], daily_sum, (0, 0.1)), + # "heat_index": ( + # ["2m_temperature", "2m_dewpoint_temperature"], lambda x, y: daily_mean(heat_index(x, y)), (273.15, 0.01) + # ), + # "humidex": ( + # ['2m_temperature', '2m_dewpoint_temperature'], lambda x, y: daily_mean(humidex(x, y)), (273.15, 0.01) + # ), + # "normal_effective_temperature": ( + # ["2m_temperature", "2m_dewpoint_temperature", "10m_u_component_of_wind", "10m_v_component_of_wind"], + # lambda *args: daily_mean(effective_temperature(*args)), (273.15, 0.01) + # ), + +} + +year = "1990" +month = "01" +target_variable = "wind_speed" + +source_variables, collapse_fun, (e_offset, e_scale) = collapse_map[target_variable] + +print("loading single-levels") +single_level = [ + load_variable(sv, year, month, 'single-levels') for sv in source_variables +] +print('collapsing') +ds = collapse_fun(*single_level) +ds = ds.assign(date=pd.to_datetime(ds.date)) + +print('interpolating') +ds_land_res = interpolate_to_target(ds) + +print("loading land") +land = [ + load_variable(sv, year, month, 'land') for sv in source_variables +] +print('collapsing') +ds_land = collapse_fun(*land) +ds_land = ds_land.assign(date=pd.to_datetime(ds_land.date)) + +print('combining') +combined = ds_land.combine_first(ds_land_res) + +combined.to_netcdf( + 'compressed.nc', + encoding={ + 'value': { + 'dtype': 'int16', + 'add_offset': e_offset, + 'scale_factor': e_scale, + '_FillValue': -9999, + 'zlib': True, + 'complevel': 1, + } + } +) \ No newline at end of file From 58824e9d1a1e6003570922b98fa8e7a0084e00ca Mon Sep 17 00:00:00 2001 From: James Collins Date: Wed, 12 Jun 2024 12:43:29 -0700 Subject: [PATCH 24/71] Add cmip extraction --- src/climate_downscale/extract/cmip.py | 72 +++++++++++++++++++ .../old_climate/project_anomaly.py | 2 +- 2 files changed, 73 insertions(+), 1 deletion(-) create mode 100644 src/climate_downscale/extract/cmip.py diff --git a/src/climate_downscale/extract/cmip.py b/src/climate_downscale/extract/cmip.py new file mode 100644 index 0000000..eefe266 --- /dev/null +++ b/src/climate_downscale/extract/cmip.py @@ -0,0 +1,72 @@ +def load_raw_cmip_metadata() -> pd.DataFrame: + """Loads metadata containing information about all CMIP6 models.""" + path = "https://storage.googleapis.com/cmip6/cmip6-zarr-consolidated-stores.csv" + return pd.read_csv(path) + +meta = load_raw_cmip_metadata() + +keep_sources = [ + 'CAMS-CSM1-0', + 'CanESM5', + 'CNRM-ESM2-1', + 'GFDL-ESM4', + 'GISS-E2-1-G', + 'MIROC-ES2L', + 'MIROC6', + 'MRI-ESM2-0' +] +keep_experiments = [ + 'ssp119', + 'ssp126', + 'ssp245', + 'ssp370', + 'ssp585', +] + +keep_variables = [ + "uas", + "vas", + "hurs", + "tas", + # "rsus", + # "rlus", + "ps", + # "rsds", + # "rlds", + "pr", + # "rsdsdiff", +] + +keep_tables = [ + #"Amon", + "day", +] + + +mask = ( + meta.source_id.isin(keep_sources) + & meta.experiment_id.isin(keep_experiments) + & meta.variable_id.isin(keep_variables) + & meta.table_id.isin(keep_tables) +) + +meta_sub = meta[mask] +meta_sub['dummy'] = "X" + +pvs = ['source_id', 'experiment_id', 'variable_id'] + +meta_sub.groupby(pvs).dummy.apply(lambda s: ",".join(s.unique().tolist())).unstack() + +import gcsfs +def load_cmip_data(zarr_path: str) -> xr.Dataset: + """Loads a CMIP6 dataset from a zarr path.""" + gcs = gcsfs.GCSFileSystem(token="anon") # noqa: S106 + mapper = gcs.get_mapper(zarr_path) + ds = xr.open_zarr(mapper, consolidated=True) + lon = (ds.lon + 180) % 360 - 180 + ds = ds.assign_coords(lon=lon).sortby("lon") + ds = ds.drop_vars( + ["lat_bnds", "lon_bnds", "time_bnds", "height", "time_bounds", "bnds"], + errors="ignore", + ) + return ds # type: ignore[no-any-return] \ No newline at end of file diff --git a/src/climate_downscale/old_climate/project_anomaly.py b/src/climate_downscale/old_climate/project_anomaly.py index 6ba1dc4..ae37b72 100644 --- a/src/climate_downscale/old_climate/project_anomaly.py +++ b/src/climate_downscale/old_climate/project_anomaly.py @@ -103,7 +103,7 @@ def project_anomaly_main(variable: str, experiment: str, year: str) -> xr.Datase run_meta.at[key, "experiment"], year=year ) anomaly = compute_single_model_anomaly(historical, scenario, variable=variable) - anomaly = interp_common_latin _lon(anomaly, lat, lon) + anomaly = interp_common_lat_lon(anomaly, lat, lon) anomalies.append(anomaly) mean_anomaly = 1 / len(anomalies) * sum(anomalies) From 790ea56e9fb8012d3e6531fb6c3180503b53a73b Mon Sep 17 00:00:00 2001 From: collijk Date: Wed, 12 Jun 2024 12:44:03 -0700 Subject: [PATCH 25/71] Add gcsfs and zarr dependencies --- poetry.lock | 909 ++++++++++++++++++++++++++++++++++++++++++++++++- pyproject.toml | 2 + 2 files changed, 909 insertions(+), 2 deletions(-) diff --git a/poetry.lock b/poetry.lock index 0d9646d..e911760 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1,4 +1,4 @@ -# This file is automatically @generated by Poetry 1.8.3 and should not be changed by hand. +# This file is automatically @generated by Poetry 1.8.2 and should not be changed by hand. [[package]] name = "affine" @@ -15,6 +15,137 @@ files = [ dev = ["coveralls", "flake8", "pydocstyle"] test = ["pytest (>=4.6)", "pytest-cov"] +[[package]] +name = "aiohttp" +version = "3.9.5" +description = "Async http client/server framework (asyncio)" +optional = false +python-versions = ">=3.8" +files = [ + {file = "aiohttp-3.9.5-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:fcde4c397f673fdec23e6b05ebf8d4751314fa7c24f93334bf1f1364c1c69ac7"}, + {file = "aiohttp-3.9.5-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:5d6b3f1fabe465e819aed2c421a6743d8debbde79b6a8600739300630a01bf2c"}, + {file = "aiohttp-3.9.5-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:6ae79c1bc12c34082d92bf9422764f799aee4746fd7a392db46b7fd357d4a17a"}, + {file = "aiohttp-3.9.5-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4d3ebb9e1316ec74277d19c5f482f98cc65a73ccd5430540d6d11682cd857430"}, + {file = "aiohttp-3.9.5-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:84dabd95154f43a2ea80deffec9cb44d2e301e38a0c9d331cc4aa0166fe28ae3"}, + {file = "aiohttp-3.9.5-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:c8a02fbeca6f63cb1f0475c799679057fc9268b77075ab7cf3f1c600e81dd46b"}, + {file = "aiohttp-3.9.5-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c26959ca7b75ff768e2776d8055bf9582a6267e24556bb7f7bd29e677932be72"}, + {file = "aiohttp-3.9.5-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:714d4e5231fed4ba2762ed489b4aec07b2b9953cf4ee31e9871caac895a839c0"}, + {file = "aiohttp-3.9.5-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:e7a6a8354f1b62e15d48e04350f13e726fa08b62c3d7b8401c0a1314f02e3558"}, + {file = "aiohttp-3.9.5-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:c413016880e03e69d166efb5a1a95d40f83d5a3a648d16486592c49ffb76d0db"}, + {file = "aiohttp-3.9.5-cp310-cp310-musllinux_1_1_ppc64le.whl", hash = "sha256:ff84aeb864e0fac81f676be9f4685f0527b660f1efdc40dcede3c251ef1e867f"}, + {file = "aiohttp-3.9.5-cp310-cp310-musllinux_1_1_s390x.whl", hash = "sha256:ad7f2919d7dac062f24d6f5fe95d401597fbb015a25771f85e692d043c9d7832"}, + {file = "aiohttp-3.9.5-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:702e2c7c187c1a498a4e2b03155d52658fdd6fda882d3d7fbb891a5cf108bb10"}, + {file = "aiohttp-3.9.5-cp310-cp310-win32.whl", hash = "sha256:67c3119f5ddc7261d47163ed86d760ddf0e625cd6246b4ed852e82159617b5fb"}, + {file = "aiohttp-3.9.5-cp310-cp310-win_amd64.whl", hash = "sha256:471f0ef53ccedec9995287f02caf0c068732f026455f07db3f01a46e49d76bbb"}, + {file = "aiohttp-3.9.5-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:e0ae53e33ee7476dd3d1132f932eeb39bf6125083820049d06edcdca4381f342"}, + {file = "aiohttp-3.9.5-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:c088c4d70d21f8ca5c0b8b5403fe84a7bc8e024161febdd4ef04575ef35d474d"}, + {file = "aiohttp-3.9.5-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:639d0042b7670222f33b0028de6b4e2fad6451462ce7df2af8aee37dcac55424"}, + {file = "aiohttp-3.9.5-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f26383adb94da5e7fb388d441bf09c61e5e35f455a3217bfd790c6b6bc64b2ee"}, + {file = "aiohttp-3.9.5-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:66331d00fb28dc90aa606d9a54304af76b335ae204d1836f65797d6fe27f1ca2"}, + {file = "aiohttp-3.9.5-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:4ff550491f5492ab5ed3533e76b8567f4b37bd2995e780a1f46bca2024223233"}, + {file = "aiohttp-3.9.5-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f22eb3a6c1080d862befa0a89c380b4dafce29dc6cd56083f630073d102eb595"}, + {file = "aiohttp-3.9.5-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:a81b1143d42b66ffc40a441379387076243ef7b51019204fd3ec36b9f69e77d6"}, + {file = "aiohttp-3.9.5-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:f64fd07515dad67f24b6ea4a66ae2876c01031de91c93075b8093f07c0a2d93d"}, + {file = "aiohttp-3.9.5-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:93e22add827447d2e26d67c9ac0161756007f152fdc5210277d00a85f6c92323"}, + {file = "aiohttp-3.9.5-cp311-cp311-musllinux_1_1_ppc64le.whl", hash = "sha256:55b39c8684a46e56ef8c8d24faf02de4a2b2ac60d26cee93bc595651ff545de9"}, + {file = "aiohttp-3.9.5-cp311-cp311-musllinux_1_1_s390x.whl", hash = "sha256:4715a9b778f4293b9f8ae7a0a7cef9829f02ff8d6277a39d7f40565c737d3771"}, + {file = "aiohttp-3.9.5-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:afc52b8d969eff14e069a710057d15ab9ac17cd4b6753042c407dcea0e40bf75"}, + {file = "aiohttp-3.9.5-cp311-cp311-win32.whl", hash = "sha256:b3df71da99c98534be076196791adca8819761f0bf6e08e07fd7da25127150d6"}, + {file = "aiohttp-3.9.5-cp311-cp311-win_amd64.whl", hash = "sha256:88e311d98cc0bf45b62fc46c66753a83445f5ab20038bcc1b8a1cc05666f428a"}, + {file = "aiohttp-3.9.5-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:c7a4b7a6cf5b6eb11e109a9755fd4fda7d57395f8c575e166d363b9fc3ec4678"}, + {file = "aiohttp-3.9.5-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:0a158704edf0abcac8ac371fbb54044f3270bdbc93e254a82b6c82be1ef08f3c"}, + {file = "aiohttp-3.9.5-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:d153f652a687a8e95ad367a86a61e8d53d528b0530ef382ec5aaf533140ed00f"}, + {file = "aiohttp-3.9.5-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:82a6a97d9771cb48ae16979c3a3a9a18b600a8505b1115cfe354dfb2054468b4"}, + {file = "aiohttp-3.9.5-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:60cdbd56f4cad9f69c35eaac0fbbdf1f77b0ff9456cebd4902f3dd1cf096464c"}, + {file = "aiohttp-3.9.5-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:8676e8fd73141ded15ea586de0b7cda1542960a7b9ad89b2b06428e97125d4fa"}, + {file = "aiohttp-3.9.5-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:da00da442a0e31f1c69d26d224e1efd3a1ca5bcbf210978a2ca7426dfcae9f58"}, + {file = "aiohttp-3.9.5-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:18f634d540dd099c262e9f887c8bbacc959847cfe5da7a0e2e1cf3f14dbf2daf"}, + {file = "aiohttp-3.9.5-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:320e8618eda64e19d11bdb3bd04ccc0a816c17eaecb7e4945d01deee2a22f95f"}, + {file = "aiohttp-3.9.5-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:2faa61a904b83142747fc6a6d7ad8fccff898c849123030f8e75d5d967fd4a81"}, + {file = "aiohttp-3.9.5-cp312-cp312-musllinux_1_1_ppc64le.whl", hash = "sha256:8c64a6dc3fe5db7b1b4d2b5cb84c4f677768bdc340611eca673afb7cf416ef5a"}, + {file = "aiohttp-3.9.5-cp312-cp312-musllinux_1_1_s390x.whl", hash = "sha256:393c7aba2b55559ef7ab791c94b44f7482a07bf7640d17b341b79081f5e5cd1a"}, + {file = "aiohttp-3.9.5-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:c671dc117c2c21a1ca10c116cfcd6e3e44da7fcde37bf83b2be485ab377b25da"}, + {file = "aiohttp-3.9.5-cp312-cp312-win32.whl", hash = "sha256:5a7ee16aab26e76add4afc45e8f8206c95d1d75540f1039b84a03c3b3800dd59"}, + {file = "aiohttp-3.9.5-cp312-cp312-win_amd64.whl", hash = "sha256:5ca51eadbd67045396bc92a4345d1790b7301c14d1848feaac1d6a6c9289e888"}, + {file = "aiohttp-3.9.5-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:694d828b5c41255e54bc2dddb51a9f5150b4eefa9886e38b52605a05d96566e8"}, + {file = "aiohttp-3.9.5-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:0605cc2c0088fcaae79f01c913a38611ad09ba68ff482402d3410bf59039bfb8"}, + {file = "aiohttp-3.9.5-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:4558e5012ee03d2638c681e156461d37b7a113fe13970d438d95d10173d25f78"}, + {file = "aiohttp-3.9.5-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9dbc053ac75ccc63dc3a3cc547b98c7258ec35a215a92bd9f983e0aac95d3d5b"}, + {file = "aiohttp-3.9.5-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:4109adee842b90671f1b689901b948f347325045c15f46b39797ae1bf17019de"}, + {file = "aiohttp-3.9.5-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:a6ea1a5b409a85477fd8e5ee6ad8f0e40bf2844c270955e09360418cfd09abac"}, + {file = "aiohttp-3.9.5-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f3c2890ca8c59ee683fd09adf32321a40fe1cf164e3387799efb2acebf090c11"}, + {file = "aiohttp-3.9.5-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:3916c8692dbd9d55c523374a3b8213e628424d19116ac4308e434dbf6d95bbdd"}, + {file = "aiohttp-3.9.5-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:8d1964eb7617907c792ca00b341b5ec3e01ae8c280825deadbbd678447b127e1"}, + {file = "aiohttp-3.9.5-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:d5ab8e1f6bee051a4bf6195e38a5c13e5e161cb7bad83d8854524798bd9fcd6e"}, + {file = "aiohttp-3.9.5-cp38-cp38-musllinux_1_1_ppc64le.whl", hash = "sha256:52c27110f3862a1afbcb2af4281fc9fdc40327fa286c4625dfee247c3ba90156"}, + {file = "aiohttp-3.9.5-cp38-cp38-musllinux_1_1_s390x.whl", hash = "sha256:7f64cbd44443e80094309875d4f9c71d0401e966d191c3d469cde4642bc2e031"}, + {file = "aiohttp-3.9.5-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:8b4f72fbb66279624bfe83fd5eb6aea0022dad8eec62b71e7bf63ee1caadeafe"}, + {file = "aiohttp-3.9.5-cp38-cp38-win32.whl", hash = "sha256:6380c039ec52866c06d69b5c7aad5478b24ed11696f0e72f6b807cfb261453da"}, + {file = "aiohttp-3.9.5-cp38-cp38-win_amd64.whl", hash = "sha256:da22dab31d7180f8c3ac7c7635f3bcd53808f374f6aa333fe0b0b9e14b01f91a"}, + {file = "aiohttp-3.9.5-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:1732102949ff6087589408d76cd6dea656b93c896b011ecafff418c9661dc4ed"}, + {file = "aiohttp-3.9.5-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:c6021d296318cb6f9414b48e6a439a7f5d1f665464da507e8ff640848ee2a58a"}, + {file = "aiohttp-3.9.5-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:239f975589a944eeb1bad26b8b140a59a3a320067fb3cd10b75c3092405a1372"}, + {file = "aiohttp-3.9.5-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3b7b30258348082826d274504fbc7c849959f1989d86c29bc355107accec6cfb"}, + {file = "aiohttp-3.9.5-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:cd2adf5c87ff6d8b277814a28a535b59e20bfea40a101db6b3bdca7e9926bc24"}, + {file = "aiohttp-3.9.5-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:e9a3d838441bebcf5cf442700e3963f58b5c33f015341f9ea86dcd7d503c07e2"}, + {file = "aiohttp-3.9.5-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9e3a1ae66e3d0c17cf65c08968a5ee3180c5a95920ec2731f53343fac9bad106"}, + {file = "aiohttp-3.9.5-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:9c69e77370cce2d6df5d12b4e12bdcca60c47ba13d1cbbc8645dd005a20b738b"}, + {file = "aiohttp-3.9.5-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:0cbf56238f4bbf49dab8c2dc2e6b1b68502b1e88d335bea59b3f5b9f4c001475"}, + {file = "aiohttp-3.9.5-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:d1469f228cd9ffddd396d9948b8c9cd8022b6d1bf1e40c6f25b0fb90b4f893ed"}, + {file = "aiohttp-3.9.5-cp39-cp39-musllinux_1_1_ppc64le.whl", hash = "sha256:45731330e754f5811c314901cebdf19dd776a44b31927fa4b4dbecab9e457b0c"}, + {file = "aiohttp-3.9.5-cp39-cp39-musllinux_1_1_s390x.whl", hash = "sha256:3fcb4046d2904378e3aeea1df51f697b0467f2aac55d232c87ba162709478c46"}, + {file = "aiohttp-3.9.5-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:8cf142aa6c1a751fcb364158fd710b8a9be874b81889c2bd13aa8893197455e2"}, + {file = "aiohttp-3.9.5-cp39-cp39-win32.whl", hash = "sha256:7b179eea70833c8dee51ec42f3b4097bd6370892fa93f510f76762105568cf09"}, + {file = "aiohttp-3.9.5-cp39-cp39-win_amd64.whl", hash = "sha256:38d80498e2e169bc61418ff36170e0aad0cd268da8b38a17c4cf29d254a8b3f1"}, + {file = "aiohttp-3.9.5.tar.gz", hash = "sha256:edea7d15772ceeb29db4aff55e482d4bcfb6ae160ce144f2682de02f6d693551"}, +] + +[package.dependencies] +aiosignal = ">=1.1.2" +async-timeout = {version = ">=4.0,<5.0", markers = "python_version < \"3.11\""} +attrs = ">=17.3.0" +frozenlist = ">=1.1.1" +multidict = ">=4.5,<7.0" +yarl = ">=1.0,<2.0" + +[package.extras] +speedups = ["Brotli", "aiodns", "brotlicffi"] + +[[package]] +name = "aiosignal" +version = "1.3.1" +description = "aiosignal: a list of registered asynchronous callbacks" +optional = false +python-versions = ">=3.7" +files = [ + {file = "aiosignal-1.3.1-py3-none-any.whl", hash = "sha256:f8376fb07dd1e86a584e4fcdec80b36b7f81aac666ebc724e2c090300dd83b17"}, + {file = "aiosignal-1.3.1.tar.gz", hash = "sha256:54cd96e15e1649b75d6c87526a6ff0b6c1b0dd3459f43d9ca11d48c339b68cfc"}, +] + +[package.dependencies] +frozenlist = ">=1.1.0" + +[[package]] +name = "asciitree" +version = "0.3.3" +description = "Draws ASCII trees." +optional = false +python-versions = "*" +files = [ + {file = "asciitree-0.3.3.tar.gz", hash = "sha256:4aa4b9b649f85e3fcb343363d97564aa1fb62e249677f2e18a96765145cc0f6e"}, +] + +[[package]] +name = "async-timeout" +version = "4.0.3" +description = "Timeout context manager for asyncio programs" +optional = false +python-versions = ">=3.7" +files = [ + {file = "async-timeout-4.0.3.tar.gz", hash = "sha256:4640d96be84d82d02ed59ea2b7105a0f7b33abe8703703cd0ab0bf87c427522f"}, + {file = "async_timeout-4.0.3-py3-none-any.whl", hash = "sha256:7405140ff1230c310e51dc27b3145b9092d659ce68ff733fb0cefe3ee42be028"}, +] + [[package]] name = "attrs" version = "23.2.0" @@ -69,6 +200,17 @@ charset-normalizer = ["charset-normalizer"] html5lib = ["html5lib"] lxml = ["lxml"] +[[package]] +name = "cachetools" +version = "5.3.3" +description = "Extensible memoizing collections and decorators" +optional = false +python-versions = ">=3.7" +files = [ + {file = "cachetools-5.3.3-py3-none-any.whl", hash = "sha256:0abad1021d3f8325b2fc1d2e9c8b9c9d57b04c3932657a72465447332c24d945"}, + {file = "cachetools-5.3.3.tar.gz", hash = "sha256:ba29e2dfa0b8b556606f097407ed1aa62080ee108ab0dc5ec9d6a723a007d105"}, +] + [[package]] name = "cads-api-client" version = "1.0.0" @@ -463,6 +605,17 @@ files = [ docs = ["ipython", "matplotlib", "numpydoc", "sphinx"] tests = ["pytest", "pytest-cov", "pytest-xdist"] +[[package]] +name = "decorator" +version = "5.1.1" +description = "Decorators for Humans" +optional = false +python-versions = ">=3.5" +files = [ + {file = "decorator-5.1.1-py3-none-any.whl", hash = "sha256:b8c3f85900b9dc423225913c5aace94729fe1fa9763b38939a95226f02d37186"}, + {file = "decorator-5.1.1.tar.gz", hash = "sha256:637996211036b6385ef91435e4fae22989472f9d571faba8927ba8253acbc330"}, +] + [[package]] name = "deep-translator" version = "1.11.4" @@ -523,6 +676,17 @@ files = [ [package.extras] test = ["pytest (>=6)"] +[[package]] +name = "fasteners" +version = "0.19" +description = "A python package that provides useful locks" +optional = false +python-versions = ">=3.6" +files = [ + {file = "fasteners-0.19-py3-none-any.whl", hash = "sha256:758819cb5d94cdedf4e836988b74de396ceacb8e2794d21f82d131fd9ee77237"}, + {file = "fasteners-0.19.tar.gz", hash = "sha256:b4f37c3ac52d8a445af3a66bce57b33b5e90b97c696b7b984f530cf8f0ded09c"}, +] + [[package]] name = "filelock" version = "3.14.0" @@ -651,6 +815,155 @@ ufo = ["fs (>=2.2.0,<3)"] unicode = ["unicodedata2 (>=15.1.0)"] woff = ["brotli (>=1.0.1)", "brotlicffi (>=0.8.0)", "zopfli (>=0.1.4)"] +[[package]] +name = "frozenlist" +version = "1.4.1" +description = "A list-like structure which implements collections.abc.MutableSequence" +optional = false +python-versions = ">=3.8" +files = [ + {file = "frozenlist-1.4.1-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:f9aa1878d1083b276b0196f2dfbe00c9b7e752475ed3b682025ff20c1c1f51ac"}, + {file = "frozenlist-1.4.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:29acab3f66f0f24674b7dc4736477bcd4bc3ad4b896f5f45379a67bce8b96868"}, + {file = "frozenlist-1.4.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:74fb4bee6880b529a0c6560885fce4dc95936920f9f20f53d99a213f7bf66776"}, + {file = "frozenlist-1.4.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:590344787a90ae57d62511dd7c736ed56b428f04cd8c161fcc5e7232c130c69a"}, + {file = "frozenlist-1.4.1-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:068b63f23b17df8569b7fdca5517edef76171cf3897eb68beb01341131fbd2ad"}, + {file = "frozenlist-1.4.1-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:5c849d495bf5154cd8da18a9eb15db127d4dba2968d88831aff6f0331ea9bd4c"}, + {file = "frozenlist-1.4.1-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:9750cc7fe1ae3b1611bb8cfc3f9ec11d532244235d75901fb6b8e42ce9229dfe"}, + {file = "frozenlist-1.4.1-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a9b2de4cf0cdd5bd2dee4c4f63a653c61d2408055ab77b151c1957f221cabf2a"}, + {file = "frozenlist-1.4.1-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:0633c8d5337cb5c77acbccc6357ac49a1770b8c487e5b3505c57b949b4b82e98"}, + {file = "frozenlist-1.4.1-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:27657df69e8801be6c3638054e202a135c7f299267f1a55ed3a598934f6c0d75"}, + {file = "frozenlist-1.4.1-cp310-cp310-musllinux_1_1_ppc64le.whl", hash = "sha256:f9a3ea26252bd92f570600098783d1371354d89d5f6b7dfd87359d669f2109b5"}, + {file = "frozenlist-1.4.1-cp310-cp310-musllinux_1_1_s390x.whl", hash = "sha256:4f57dab5fe3407b6c0c1cc907ac98e8a189f9e418f3b6e54d65a718aaafe3950"}, + {file = "frozenlist-1.4.1-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:e02a0e11cf6597299b9f3bbd3f93d79217cb90cfd1411aec33848b13f5c656cc"}, + {file = "frozenlist-1.4.1-cp310-cp310-win32.whl", hash = "sha256:a828c57f00f729620a442881cc60e57cfcec6842ba38e1b19fd3e47ac0ff8dc1"}, + {file = "frozenlist-1.4.1-cp310-cp310-win_amd64.whl", hash = "sha256:f56e2333dda1fe0f909e7cc59f021eba0d2307bc6f012a1ccf2beca6ba362439"}, + {file = "frozenlist-1.4.1-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:a0cb6f11204443f27a1628b0e460f37fb30f624be6051d490fa7d7e26d4af3d0"}, + {file = "frozenlist-1.4.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:b46c8ae3a8f1f41a0d2ef350c0b6e65822d80772fe46b653ab6b6274f61d4a49"}, + {file = "frozenlist-1.4.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:fde5bd59ab5357e3853313127f4d3565fc7dad314a74d7b5d43c22c6a5ed2ced"}, + {file = "frozenlist-1.4.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:722e1124aec435320ae01ee3ac7bec11a5d47f25d0ed6328f2273d287bc3abb0"}, + {file = "frozenlist-1.4.1-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:2471c201b70d58a0f0c1f91261542a03d9a5e088ed3dc6c160d614c01649c106"}, + {file = "frozenlist-1.4.1-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:c757a9dd70d72b076d6f68efdbb9bc943665ae954dad2801b874c8c69e185068"}, + {file = "frozenlist-1.4.1-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:f146e0911cb2f1da549fc58fc7bcd2b836a44b79ef871980d605ec392ff6b0d2"}, + {file = "frozenlist-1.4.1-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4f9c515e7914626b2a2e1e311794b4c35720a0be87af52b79ff8e1429fc25f19"}, + {file = "frozenlist-1.4.1-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:c302220494f5c1ebeb0912ea782bcd5e2f8308037b3c7553fad0e48ebad6ad82"}, + {file = "frozenlist-1.4.1-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:442acde1e068288a4ba7acfe05f5f343e19fac87bfc96d89eb886b0363e977ec"}, + {file = "frozenlist-1.4.1-cp311-cp311-musllinux_1_1_ppc64le.whl", hash = "sha256:1b280e6507ea8a4fa0c0a7150b4e526a8d113989e28eaaef946cc77ffd7efc0a"}, + {file = "frozenlist-1.4.1-cp311-cp311-musllinux_1_1_s390x.whl", hash = "sha256:fe1a06da377e3a1062ae5fe0926e12b84eceb8a50b350ddca72dc85015873f74"}, + {file = "frozenlist-1.4.1-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:db9e724bebd621d9beca794f2a4ff1d26eed5965b004a97f1f1685a173b869c2"}, + {file = "frozenlist-1.4.1-cp311-cp311-win32.whl", hash = "sha256:e774d53b1a477a67838a904131c4b0eef6b3d8a651f8b138b04f748fccfefe17"}, + {file = "frozenlist-1.4.1-cp311-cp311-win_amd64.whl", hash = "sha256:fb3c2db03683b5767dedb5769b8a40ebb47d6f7f45b1b3e3b4b51ec8ad9d9825"}, + {file = "frozenlist-1.4.1-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:1979bc0aeb89b33b588c51c54ab0161791149f2461ea7c7c946d95d5f93b56ae"}, + {file = "frozenlist-1.4.1-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:cc7b01b3754ea68a62bd77ce6020afaffb44a590c2289089289363472d13aedb"}, + {file = "frozenlist-1.4.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:c9c92be9fd329ac801cc420e08452b70e7aeab94ea4233a4804f0915c14eba9b"}, + {file = "frozenlist-1.4.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5c3894db91f5a489fc8fa6a9991820f368f0b3cbdb9cd8849547ccfab3392d86"}, + {file = "frozenlist-1.4.1-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:ba60bb19387e13597fb059f32cd4d59445d7b18b69a745b8f8e5db0346f33480"}, + {file = "frozenlist-1.4.1-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:8aefbba5f69d42246543407ed2461db31006b0f76c4e32dfd6f42215a2c41d09"}, + {file = "frozenlist-1.4.1-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:780d3a35680ced9ce682fbcf4cb9c2bad3136eeff760ab33707b71db84664e3a"}, + {file = "frozenlist-1.4.1-cp312-cp312-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9acbb16f06fe7f52f441bb6f413ebae6c37baa6ef9edd49cdd567216da8600cd"}, + {file = "frozenlist-1.4.1-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:23b701e65c7b36e4bf15546a89279bd4d8675faabc287d06bbcfac7d3c33e1e6"}, + {file = "frozenlist-1.4.1-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:3e0153a805a98f5ada7e09826255ba99fb4f7524bb81bf6b47fb702666484ae1"}, + {file = "frozenlist-1.4.1-cp312-cp312-musllinux_1_1_ppc64le.whl", hash = "sha256:dd9b1baec094d91bf36ec729445f7769d0d0cf6b64d04d86e45baf89e2b9059b"}, + {file = "frozenlist-1.4.1-cp312-cp312-musllinux_1_1_s390x.whl", hash = "sha256:1a4471094e146b6790f61b98616ab8e44f72661879cc63fa1049d13ef711e71e"}, + {file = "frozenlist-1.4.1-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:5667ed53d68d91920defdf4035d1cdaa3c3121dc0b113255124bcfada1cfa1b8"}, + {file = "frozenlist-1.4.1-cp312-cp312-win32.whl", hash = "sha256:beee944ae828747fd7cb216a70f120767fc9f4f00bacae8543c14a6831673f89"}, + {file = "frozenlist-1.4.1-cp312-cp312-win_amd64.whl", hash = "sha256:64536573d0a2cb6e625cf309984e2d873979709f2cf22839bf2d61790b448ad5"}, + {file = "frozenlist-1.4.1-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:20b51fa3f588ff2fe658663db52a41a4f7aa6c04f6201449c6c7c476bd255c0d"}, + {file = "frozenlist-1.4.1-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:410478a0c562d1a5bcc2f7ea448359fcb050ed48b3c6f6f4f18c313a9bdb1826"}, + {file = "frozenlist-1.4.1-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:c6321c9efe29975232da3bd0af0ad216800a47e93d763ce64f291917a381b8eb"}, + {file = "frozenlist-1.4.1-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:48f6a4533887e189dae092f1cf981f2e3885175f7a0f33c91fb5b7b682b6bab6"}, + {file = "frozenlist-1.4.1-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:6eb73fa5426ea69ee0e012fb59cdc76a15b1283d6e32e4f8dc4482ec67d1194d"}, + {file = "frozenlist-1.4.1-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:fbeb989b5cc29e8daf7f976b421c220f1b8c731cbf22b9130d8815418ea45887"}, + {file = "frozenlist-1.4.1-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:32453c1de775c889eb4e22f1197fe3bdfe457d16476ea407472b9442e6295f7a"}, + {file = "frozenlist-1.4.1-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:693945278a31f2086d9bf3df0fe8254bbeaef1fe71e1351c3bd730aa7d31c41b"}, + {file = "frozenlist-1.4.1-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:1d0ce09d36d53bbbe566fe296965b23b961764c0bcf3ce2fa45f463745c04701"}, + {file = "frozenlist-1.4.1-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:3a670dc61eb0d0eb7080890c13de3066790f9049b47b0de04007090807c776b0"}, + {file = "frozenlist-1.4.1-cp38-cp38-musllinux_1_1_ppc64le.whl", hash = "sha256:dca69045298ce5c11fd539682cff879cc1e664c245d1c64da929813e54241d11"}, + {file = "frozenlist-1.4.1-cp38-cp38-musllinux_1_1_s390x.whl", hash = "sha256:a06339f38e9ed3a64e4c4e43aec7f59084033647f908e4259d279a52d3757d09"}, + {file = "frozenlist-1.4.1-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:b7f2f9f912dca3934c1baec2e4585a674ef16fe00218d833856408c48d5beee7"}, + {file = "frozenlist-1.4.1-cp38-cp38-win32.whl", hash = "sha256:e7004be74cbb7d9f34553a5ce5fb08be14fb33bc86f332fb71cbe5216362a497"}, + {file = "frozenlist-1.4.1-cp38-cp38-win_amd64.whl", hash = "sha256:5a7d70357e7cee13f470c7883a063aae5fe209a493c57d86eb7f5a6f910fae09"}, + {file = "frozenlist-1.4.1-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:bfa4a17e17ce9abf47a74ae02f32d014c5e9404b6d9ac7f729e01562bbee601e"}, + {file = "frozenlist-1.4.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:b7e3ed87d4138356775346e6845cccbe66cd9e207f3cd11d2f0b9fd13681359d"}, + {file = "frozenlist-1.4.1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:c99169d4ff810155ca50b4da3b075cbde79752443117d89429595c2e8e37fed8"}, + {file = "frozenlist-1.4.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:edb678da49d9f72c9f6c609fbe41a5dfb9a9282f9e6a2253d5a91e0fc382d7c0"}, + {file = "frozenlist-1.4.1-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:6db4667b187a6742b33afbbaf05a7bc551ffcf1ced0000a571aedbb4aa42fc7b"}, + {file = "frozenlist-1.4.1-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:55fdc093b5a3cb41d420884cdaf37a1e74c3c37a31f46e66286d9145d2063bd0"}, + {file = "frozenlist-1.4.1-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:82e8211d69a4f4bc360ea22cd6555f8e61a1bd211d1d5d39d3d228b48c83a897"}, + {file = "frozenlist-1.4.1-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:89aa2c2eeb20957be2d950b85974b30a01a762f3308cd02bb15e1ad632e22dc7"}, + {file = "frozenlist-1.4.1-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:9d3e0c25a2350080e9319724dede4f31f43a6c9779be48021a7f4ebde8b2d742"}, + {file = "frozenlist-1.4.1-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:7268252af60904bf52c26173cbadc3a071cece75f873705419c8681f24d3edea"}, + {file = "frozenlist-1.4.1-cp39-cp39-musllinux_1_1_ppc64le.whl", hash = "sha256:0c250a29735d4f15321007fb02865f0e6b6a41a6b88f1f523ca1596ab5f50bd5"}, + {file = "frozenlist-1.4.1-cp39-cp39-musllinux_1_1_s390x.whl", hash = "sha256:96ec70beabbd3b10e8bfe52616a13561e58fe84c0101dd031dc78f250d5128b9"}, + {file = "frozenlist-1.4.1-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:23b2d7679b73fe0e5a4560b672a39f98dfc6f60df63823b0a9970525325b95f6"}, + {file = "frozenlist-1.4.1-cp39-cp39-win32.whl", hash = "sha256:a7496bfe1da7fb1a4e1cc23bb67c58fab69311cc7d32b5a99c2007b4b2a0e932"}, + {file = "frozenlist-1.4.1-cp39-cp39-win_amd64.whl", hash = "sha256:e6a20a581f9ce92d389a8c7d7c3dd47c81fd5d6e655c8dddf341e14aa48659d0"}, + {file = "frozenlist-1.4.1-py3-none-any.whl", hash = "sha256:04ced3e6a46b4cfffe20f9ae482818e34eba9b5fb0ce4056e4cc9b6e212d09b7"}, + {file = "frozenlist-1.4.1.tar.gz", hash = "sha256:c037a86e8513059a2613aaba4d817bb90b9d9b6b69aace3ce9c877e8c8ed402b"}, +] + +[[package]] +name = "fsspec" +version = "2024.6.0" +description = "File-system specification" +optional = false +python-versions = ">=3.8" +files = [ + {file = "fsspec-2024.6.0-py3-none-any.whl", hash = "sha256:58d7122eb8a1a46f7f13453187bfea4972d66bf01618d37366521b1998034cee"}, + {file = "fsspec-2024.6.0.tar.gz", hash = "sha256:f579960a56e6d8038a9efc8f9c77279ec12e6299aa86b0769a7e9c46b94527c2"}, +] + +[package.extras] +abfs = ["adlfs"] +adl = ["adlfs"] +arrow = ["pyarrow (>=1)"] +dask = ["dask", "distributed"] +dev = ["pre-commit", "ruff"] +doc = ["numpydoc", "sphinx", "sphinx-design", "sphinx-rtd-theme", "yarl"] +dropbox = ["dropbox", "dropboxdrivefs", "requests"] +full = ["adlfs", "aiohttp (!=4.0.0a0,!=4.0.0a1)", "dask", "distributed", "dropbox", "dropboxdrivefs", "fusepy", "gcsfs", "libarchive-c", "ocifs", "panel", "paramiko", "pyarrow (>=1)", "pygit2", "requests", "s3fs", "smbprotocol", "tqdm"] +fuse = ["fusepy"] +gcs = ["gcsfs"] +git = ["pygit2"] +github = ["requests"] +gs = ["gcsfs"] +gui = ["panel"] +hdfs = ["pyarrow (>=1)"] +http = ["aiohttp (!=4.0.0a0,!=4.0.0a1)"] +libarchive = ["libarchive-c"] +oci = ["ocifs"] +s3 = ["s3fs"] +sftp = ["paramiko"] +smb = ["smbprotocol"] +ssh = ["paramiko"] +test = ["aiohttp (!=4.0.0a0,!=4.0.0a1)", "numpy", "pytest", "pytest-asyncio (!=0.22.0)", "pytest-benchmark", "pytest-cov", "pytest-mock", "pytest-recording", "pytest-rerunfailures", "requests"] +test-downstream = ["aiobotocore (>=2.5.4,<3.0.0)", "dask-expr", "dask[dataframe,test]", "moto[server] (>4,<5)", "pytest-timeout", "xarray"] +test-full = ["adlfs", "aiohttp (!=4.0.0a0,!=4.0.0a1)", "cloudpickle", "dask", "distributed", "dropbox", "dropboxdrivefs", "fastparquet", "fusepy", "gcsfs", "jinja2", "kerchunk", "libarchive-c", "lz4", "notebook", "numpy", "ocifs", "pandas", "panel", "paramiko", "pyarrow", "pyarrow (>=1)", "pyftpdlib", "pygit2", "pytest", "pytest-asyncio (!=0.22.0)", "pytest-benchmark", "pytest-cov", "pytest-mock", "pytest-recording", "pytest-rerunfailures", "python-snappy", "requests", "smbprotocol", "tqdm", "urllib3", "zarr", "zstandard"] +tqdm = ["tqdm"] + +[[package]] +name = "gcsfs" +version = "2024.6.0" +description = "Convenient Filesystem interface over GCS" +optional = false +python-versions = ">=3.8" +files = [ + {file = "gcsfs-2024.6.0-py2.py3-none-any.whl", hash = "sha256:92c9239167bd1e209b662b6f4ab71974f276118779c55360215cce5e0098ca7f"}, + {file = "gcsfs-2024.6.0.tar.gz", hash = "sha256:27bd490d7a9dd641d5f6f4ea0b18fabdcfa6129b84ebdb22b23e3460ded1aa8c"}, +] + +[package.dependencies] +aiohttp = "<4.0.0a0 || >4.0.0a0,<4.0.0a1 || >4.0.0a1" +decorator = ">4.1.2" +fsspec = "2024.6.0" +google-auth = ">=1.2" +google-auth-oauthlib = "*" +google-cloud-storage = "*" +requests = "*" + +[package.extras] +crc = ["crcmod"] +gcsfuse = ["fusepy"] + [[package]] name = "geopandas" version = "0.14.4" @@ -719,6 +1032,225 @@ gitdb = ">=4.0.1,<5" doc = ["sphinx (==4.3.2)", "sphinx-autodoc-typehints", "sphinx-rtd-theme", "sphinxcontrib-applehelp (>=1.0.2,<=1.0.4)", "sphinxcontrib-devhelp (==1.0.2)", "sphinxcontrib-htmlhelp (>=2.0.0,<=2.0.1)", "sphinxcontrib-qthelp (==1.0.3)", "sphinxcontrib-serializinghtml (==1.1.5)"] test = ["coverage[toml]", "ddt (>=1.1.1,!=1.4.3)", "mock", "mypy", "pre-commit", "pytest (>=7.3.1)", "pytest-cov", "pytest-instafail", "pytest-mock", "pytest-sugar", "typing-extensions"] +[[package]] +name = "google-api-core" +version = "2.19.0" +description = "Google API client core library" +optional = false +python-versions = ">=3.7" +files = [ + {file = "google-api-core-2.19.0.tar.gz", hash = "sha256:cf1b7c2694047886d2af1128a03ae99e391108a08804f87cfd35970e49c9cd10"}, + {file = "google_api_core-2.19.0-py3-none-any.whl", hash = "sha256:8661eec4078c35428fd3f69a2c7ee29e342896b70f01d1a1cbcb334372dd6251"}, +] + +[package.dependencies] +google-auth = ">=2.14.1,<3.0.dev0" +googleapis-common-protos = ">=1.56.2,<2.0.dev0" +proto-plus = ">=1.22.3,<2.0.0dev" +protobuf = ">=3.19.5,<3.20.0 || >3.20.0,<3.20.1 || >3.20.1,<4.21.0 || >4.21.0,<4.21.1 || >4.21.1,<4.21.2 || >4.21.2,<4.21.3 || >4.21.3,<4.21.4 || >4.21.4,<4.21.5 || >4.21.5,<5.0.0.dev0" +requests = ">=2.18.0,<3.0.0.dev0" + +[package.extras] +grpc = ["grpcio (>=1.33.2,<2.0dev)", "grpcio (>=1.49.1,<2.0dev)", "grpcio-status (>=1.33.2,<2.0.dev0)", "grpcio-status (>=1.49.1,<2.0.dev0)"] +grpcgcp = ["grpcio-gcp (>=0.2.2,<1.0.dev0)"] +grpcio-gcp = ["grpcio-gcp (>=0.2.2,<1.0.dev0)"] + +[[package]] +name = "google-auth" +version = "2.30.0" +description = "Google Authentication Library" +optional = false +python-versions = ">=3.7" +files = [ + {file = "google-auth-2.30.0.tar.gz", hash = "sha256:ab630a1320f6720909ad76a7dbdb6841cdf5c66b328d690027e4867bdfb16688"}, + {file = "google_auth-2.30.0-py2.py3-none-any.whl", hash = "sha256:8df7da660f62757388b8a7f249df13549b3373f24388cb5d2f1dd91cc18180b5"}, +] + +[package.dependencies] +cachetools = ">=2.0.0,<6.0" +pyasn1-modules = ">=0.2.1" +rsa = ">=3.1.4,<5" + +[package.extras] +aiohttp = ["aiohttp (>=3.6.2,<4.0.0.dev0)", "requests (>=2.20.0,<3.0.0.dev0)"] +enterprise-cert = ["cryptography (==36.0.2)", "pyopenssl (==22.0.0)"] +pyopenssl = ["cryptography (>=38.0.3)", "pyopenssl (>=20.0.0)"] +reauth = ["pyu2f (>=0.1.5)"] +requests = ["requests (>=2.20.0,<3.0.0.dev0)"] + +[[package]] +name = "google-auth-oauthlib" +version = "1.2.0" +description = "Google Authentication Library" +optional = false +python-versions = ">=3.6" +files = [ + {file = "google-auth-oauthlib-1.2.0.tar.gz", hash = "sha256:292d2d3783349f2b0734a0a0207b1e1e322ac193c2c09d8f7c613fb7cc501ea8"}, + {file = "google_auth_oauthlib-1.2.0-py2.py3-none-any.whl", hash = "sha256:297c1ce4cb13a99b5834c74a1fe03252e1e499716718b190f56bcb9c4abc4faf"}, +] + +[package.dependencies] +google-auth = ">=2.15.0" +requests-oauthlib = ">=0.7.0" + +[package.extras] +tool = ["click (>=6.0.0)"] + +[[package]] +name = "google-cloud-core" +version = "2.4.1" +description = "Google Cloud API client core library" +optional = false +python-versions = ">=3.7" +files = [ + {file = "google-cloud-core-2.4.1.tar.gz", hash = "sha256:9b7749272a812bde58fff28868d0c5e2f585b82f37e09a1f6ed2d4d10f134073"}, + {file = "google_cloud_core-2.4.1-py2.py3-none-any.whl", hash = "sha256:a9e6a4422b9ac5c29f79a0ede9485473338e2ce78d91f2370c01e730eab22e61"}, +] + +[package.dependencies] +google-api-core = ">=1.31.6,<2.0.dev0 || >2.3.0,<3.0.0dev" +google-auth = ">=1.25.0,<3.0dev" + +[package.extras] +grpc = ["grpcio (>=1.38.0,<2.0dev)", "grpcio-status (>=1.38.0,<2.0.dev0)"] + +[[package]] +name = "google-cloud-storage" +version = "2.17.0" +description = "Google Cloud Storage API client library" +optional = false +python-versions = ">=3.7" +files = [ + {file = "google-cloud-storage-2.17.0.tar.gz", hash = "sha256:49378abff54ef656b52dca5ef0f2eba9aa83dc2b2c72c78714b03a1a95fe9388"}, + {file = "google_cloud_storage-2.17.0-py2.py3-none-any.whl", hash = "sha256:5b393bc766b7a3bc6f5407b9e665b2450d36282614b7945e570b3480a456d1e1"}, +] + +[package.dependencies] +google-api-core = ">=2.15.0,<3.0.0dev" +google-auth = ">=2.26.1,<3.0dev" +google-cloud-core = ">=2.3.0,<3.0dev" +google-crc32c = ">=1.0,<2.0dev" +google-resumable-media = ">=2.6.0" +requests = ">=2.18.0,<3.0.0dev" + +[package.extras] +protobuf = ["protobuf (<5.0.0dev)"] + +[[package]] +name = "google-crc32c" +version = "1.5.0" +description = "A python wrapper of the C library 'Google CRC32C'" +optional = false +python-versions = ">=3.7" +files = [ + {file = "google-crc32c-1.5.0.tar.gz", hash = "sha256:89284716bc6a5a415d4eaa11b1726d2d60a0cd12aadf5439828353662ede9dd7"}, + {file = "google_crc32c-1.5.0-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:596d1f98fc70232fcb6590c439f43b350cb762fb5d61ce7b0e9db4539654cc13"}, + {file = "google_crc32c-1.5.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:be82c3c8cfb15b30f36768797a640e800513793d6ae1724aaaafe5bf86f8f346"}, + {file = "google_crc32c-1.5.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:461665ff58895f508e2866824a47bdee72497b091c730071f2b7575d5762ab65"}, + {file = "google_crc32c-1.5.0-cp310-cp310-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:e2096eddb4e7c7bdae4bd69ad364e55e07b8316653234a56552d9c988bd2d61b"}, + {file = "google_crc32c-1.5.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:116a7c3c616dd14a3de8c64a965828b197e5f2d121fedd2f8c5585c547e87b02"}, + {file = "google_crc32c-1.5.0-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:5829b792bf5822fd0a6f6eb34c5f81dd074f01d570ed7f36aa101d6fc7a0a6e4"}, + {file = "google_crc32c-1.5.0-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:64e52e2b3970bd891309c113b54cf0e4384762c934d5ae56e283f9a0afcd953e"}, + {file = "google_crc32c-1.5.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:02ebb8bf46c13e36998aeaad1de9b48f4caf545e91d14041270d9dca767b780c"}, + {file = "google_crc32c-1.5.0-cp310-cp310-win32.whl", hash = "sha256:2e920d506ec85eb4ba50cd4228c2bec05642894d4c73c59b3a2fe20346bd00ee"}, + {file = "google_crc32c-1.5.0-cp310-cp310-win_amd64.whl", hash = "sha256:07eb3c611ce363c51a933bf6bd7f8e3878a51d124acfc89452a75120bc436289"}, + {file = "google_crc32c-1.5.0-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:cae0274952c079886567f3f4f685bcaf5708f0a23a5f5216fdab71f81a6c0273"}, + {file = "google_crc32c-1.5.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:1034d91442ead5a95b5aaef90dbfaca8633b0247d1e41621d1e9f9db88c36298"}, + {file = "google_crc32c-1.5.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:7c42c70cd1d362284289c6273adda4c6af8039a8ae12dc451dcd61cdabb8ab57"}, + {file = "google_crc32c-1.5.0-cp311-cp311-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:8485b340a6a9e76c62a7dce3c98e5f102c9219f4cfbf896a00cf48caf078d438"}, + {file = "google_crc32c-1.5.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:77e2fd3057c9d78e225fa0a2160f96b64a824de17840351b26825b0848022906"}, + {file = "google_crc32c-1.5.0-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:f583edb943cf2e09c60441b910d6a20b4d9d626c75a36c8fcac01a6c96c01183"}, + {file = "google_crc32c-1.5.0-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:a1fd716e7a01f8e717490fbe2e431d2905ab8aa598b9b12f8d10abebb36b04dd"}, + {file = "google_crc32c-1.5.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:72218785ce41b9cfd2fc1d6a017dc1ff7acfc4c17d01053265c41a2c0cc39b8c"}, + {file = "google_crc32c-1.5.0-cp311-cp311-win32.whl", hash = "sha256:66741ef4ee08ea0b2cc3c86916ab66b6aef03768525627fd6a1b34968b4e3709"}, + {file = "google_crc32c-1.5.0-cp311-cp311-win_amd64.whl", hash = "sha256:ba1eb1843304b1e5537e1fca632fa894d6f6deca8d6389636ee5b4797affb968"}, + {file = "google_crc32c-1.5.0-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:98cb4d057f285bd80d8778ebc4fde6b4d509ac3f331758fb1528b733215443ae"}, + {file = "google_crc32c-1.5.0-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:fd8536e902db7e365f49e7d9029283403974ccf29b13fc7028b97e2295b33556"}, + {file = "google_crc32c-1.5.0-cp37-cp37m-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:19e0a019d2c4dcc5e598cd4a4bc7b008546b0358bd322537c74ad47a5386884f"}, + {file = "google_crc32c-1.5.0-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:02c65b9817512edc6a4ae7c7e987fea799d2e0ee40c53ec573a692bee24de876"}, + {file = "google_crc32c-1.5.0-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:6ac08d24c1f16bd2bf5eca8eaf8304812f44af5cfe5062006ec676e7e1d50afc"}, + {file = "google_crc32c-1.5.0-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:3359fc442a743e870f4588fcf5dcbc1bf929df1fad8fb9905cd94e5edb02e84c"}, + {file = "google_crc32c-1.5.0-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:1e986b206dae4476f41bcec1faa057851f3889503a70e1bdb2378d406223994a"}, + {file = "google_crc32c-1.5.0-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:de06adc872bcd8c2a4e0dc51250e9e65ef2ca91be023b9d13ebd67c2ba552e1e"}, + {file = "google_crc32c-1.5.0-cp37-cp37m-win32.whl", hash = "sha256:d3515f198eaa2f0ed49f8819d5732d70698c3fa37384146079b3799b97667a94"}, + {file = "google_crc32c-1.5.0-cp37-cp37m-win_amd64.whl", hash = "sha256:67b741654b851abafb7bc625b6d1cdd520a379074e64b6a128e3b688c3c04740"}, + {file = "google_crc32c-1.5.0-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:c02ec1c5856179f171e032a31d6f8bf84e5a75c45c33b2e20a3de353b266ebd8"}, + {file = "google_crc32c-1.5.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:edfedb64740750e1a3b16152620220f51d58ff1b4abceb339ca92e934775c27a"}, + {file = "google_crc32c-1.5.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:84e6e8cd997930fc66d5bb4fde61e2b62ba19d62b7abd7a69920406f9ecca946"}, + {file = "google_crc32c-1.5.0-cp38-cp38-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:024894d9d3cfbc5943f8f230e23950cd4906b2fe004c72e29b209420a1e6b05a"}, + {file = "google_crc32c-1.5.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:998679bf62b7fb599d2878aa3ed06b9ce688b8974893e7223c60db155f26bd8d"}, + {file = "google_crc32c-1.5.0-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:83c681c526a3439b5cf94f7420471705bbf96262f49a6fe546a6db5f687a3d4a"}, + {file = "google_crc32c-1.5.0-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:4c6fdd4fccbec90cc8a01fc00773fcd5fa28db683c116ee3cb35cd5da9ef6c37"}, + {file = "google_crc32c-1.5.0-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:5ae44e10a8e3407dbe138984f21e536583f2bba1be9491239f942c2464ac0894"}, + {file = "google_crc32c-1.5.0-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:37933ec6e693e51a5b07505bd05de57eee12f3e8c32b07da7e73669398e6630a"}, + {file = "google_crc32c-1.5.0-cp38-cp38-win32.whl", hash = "sha256:fe70e325aa68fa4b5edf7d1a4b6f691eb04bbccac0ace68e34820d283b5f80d4"}, + {file = "google_crc32c-1.5.0-cp38-cp38-win_amd64.whl", hash = "sha256:74dea7751d98034887dbd821b7aae3e1d36eda111d6ca36c206c44478035709c"}, + {file = "google_crc32c-1.5.0-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:c6c777a480337ac14f38564ac88ae82d4cd238bf293f0a22295b66eb89ffced7"}, + {file = "google_crc32c-1.5.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:759ce4851a4bb15ecabae28f4d2e18983c244eddd767f560165563bf9aefbc8d"}, + {file = "google_crc32c-1.5.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f13cae8cc389a440def0c8c52057f37359014ccbc9dc1f0827936bcd367c6100"}, + {file = "google_crc32c-1.5.0-cp39-cp39-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:e560628513ed34759456a416bf86b54b2476c59144a9138165c9a1575801d0d9"}, + {file = "google_crc32c-1.5.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e1674e4307fa3024fc897ca774e9c7562c957af85df55efe2988ed9056dc4e57"}, + {file = "google_crc32c-1.5.0-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:278d2ed7c16cfc075c91378c4f47924c0625f5fc84b2d50d921b18b7975bd210"}, + {file = "google_crc32c-1.5.0-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:d5280312b9af0976231f9e317c20e4a61cd2f9629b7bfea6a693d1878a264ebd"}, + {file = "google_crc32c-1.5.0-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:8b87e1a59c38f275c0e3676fc2ab6d59eccecfd460be267ac360cc31f7bcde96"}, + {file = "google_crc32c-1.5.0-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:7c074fece789b5034b9b1404a1f8208fc2d4c6ce9decdd16e8220c5a793e6f61"}, + {file = "google_crc32c-1.5.0-cp39-cp39-win32.whl", hash = "sha256:7f57f14606cd1dd0f0de396e1e53824c371e9544a822648cd76c034d209b559c"}, + {file = "google_crc32c-1.5.0-cp39-cp39-win_amd64.whl", hash = "sha256:a2355cba1f4ad8b6988a4ca3feed5bff33f6af2d7f134852cf279c2aebfde541"}, + {file = "google_crc32c-1.5.0-pp37-pypy37_pp73-macosx_10_9_x86_64.whl", hash = "sha256:f314013e7dcd5cf45ab1945d92e713eec788166262ae8deb2cfacd53def27325"}, + {file = "google_crc32c-1.5.0-pp37-pypy37_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3b747a674c20a67343cb61d43fdd9207ce5da6a99f629c6e2541aa0e89215bcd"}, + {file = "google_crc32c-1.5.0-pp37-pypy37_pp73-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:8f24ed114432de109aa9fd317278518a5af2d31ac2ea6b952b2f7782b43da091"}, + {file = "google_crc32c-1.5.0-pp37-pypy37_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b8667b48e7a7ef66afba2c81e1094ef526388d35b873966d8a9a447974ed9178"}, + {file = "google_crc32c-1.5.0-pp37-pypy37_pp73-win_amd64.whl", hash = "sha256:1c7abdac90433b09bad6c43a43af253e688c9cfc1c86d332aed13f9a7c7f65e2"}, + {file = "google_crc32c-1.5.0-pp38-pypy38_pp73-macosx_10_9_x86_64.whl", hash = "sha256:6f998db4e71b645350b9ac28a2167e6632c239963ca9da411523bb439c5c514d"}, + {file = "google_crc32c-1.5.0-pp38-pypy38_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9c99616c853bb585301df6de07ca2cadad344fd1ada6d62bb30aec05219c45d2"}, + {file = "google_crc32c-1.5.0-pp38-pypy38_pp73-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:2ad40e31093a4af319dadf503b2467ccdc8f67c72e4bcba97f8c10cb078207b5"}, + {file = "google_crc32c-1.5.0-pp38-pypy38_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:cd67cf24a553339d5062eff51013780a00d6f97a39ca062781d06b3a73b15462"}, + {file = "google_crc32c-1.5.0-pp38-pypy38_pp73-win_amd64.whl", hash = "sha256:398af5e3ba9cf768787eef45c803ff9614cc3e22a5b2f7d7ae116df8b11e3314"}, + {file = "google_crc32c-1.5.0-pp39-pypy39_pp73-macosx_10_9_x86_64.whl", hash = "sha256:b1f8133c9a275df5613a451e73f36c2aea4fe13c5c8997e22cf355ebd7bd0728"}, + {file = "google_crc32c-1.5.0-pp39-pypy39_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9ba053c5f50430a3fcfd36f75aff9caeba0440b2d076afdb79a318d6ca245f88"}, + {file = "google_crc32c-1.5.0-pp39-pypy39_pp73-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:272d3892a1e1a2dbc39cc5cde96834c236d5327e2122d3aaa19f6614531bb6eb"}, + {file = "google_crc32c-1.5.0-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:635f5d4dd18758a1fbd1049a8e8d2fee4ffed124462d837d1a02a0e009c3ab31"}, + {file = "google_crc32c-1.5.0-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:c672d99a345849301784604bfeaeba4db0c7aae50b95be04dd651fd2a7310b93"}, +] + +[package.extras] +testing = ["pytest"] + +[[package]] +name = "google-resumable-media" +version = "2.7.1" +description = "Utilities for Google Media Downloads and Resumable Uploads" +optional = false +python-versions = ">=3.7" +files = [ + {file = "google-resumable-media-2.7.1.tar.gz", hash = "sha256:eae451a7b2e2cdbaaa0fd2eb00cc8a1ee5e95e16b55597359cbc3d27d7d90e33"}, + {file = "google_resumable_media-2.7.1-py2.py3-none-any.whl", hash = "sha256:103ebc4ba331ab1bfdac0250f8033627a2cd7cde09e7ccff9181e31ba4315b2c"}, +] + +[package.dependencies] +google-crc32c = ">=1.0,<2.0dev" + +[package.extras] +aiohttp = ["aiohttp (>=3.6.2,<4.0.0dev)", "google-auth (>=1.22.0,<2.0dev)"] +requests = ["requests (>=2.18.0,<3.0.0dev)"] + +[[package]] +name = "googleapis-common-protos" +version = "1.63.1" +description = "Common protobufs used in Google APIs" +optional = false +python-versions = ">=3.7" +files = [ + {file = "googleapis-common-protos-1.63.1.tar.gz", hash = "sha256:c6442f7a0a6b2a80369457d79e6672bb7dcbaab88e0848302497e3ec80780a6a"}, + {file = "googleapis_common_protos-1.63.1-py2.py3-none-any.whl", hash = "sha256:0e1c2cdfcbc354b76e4a211a35ea35d6926a835cba1377073c4861db904a1877"}, +] + +[package.dependencies] +protobuf = ">=3.19.5,<3.20.0 || >3.20.0,<3.20.1 || >3.20.1,<4.21.1 || >4.21.1,<4.21.2 || >4.21.2,<4.21.3 || >4.21.3,<4.21.4 || >4.21.4,<4.21.5 || >4.21.5,<6.0.0.dev0" + +[package.extras] +grpc = ["grpcio (>=1.44.0,<2.0.0.dev0)"] + [[package]] name = "griffe" version = "0.44.0" @@ -1232,6 +1764,105 @@ files = [ griffe = ">=0.44" mkdocstrings = ">=0.24.2" +[[package]] +name = "multidict" +version = "6.0.5" +description = "multidict implementation" +optional = false +python-versions = ">=3.7" +files = [ + {file = "multidict-6.0.5-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:228b644ae063c10e7f324ab1ab6b548bdf6f8b47f3ec234fef1093bc2735e5f9"}, + {file = "multidict-6.0.5-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:896ebdcf62683551312c30e20614305f53125750803b614e9e6ce74a96232604"}, + {file = "multidict-6.0.5-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:411bf8515f3be9813d06004cac41ccf7d1cd46dfe233705933dd163b60e37600"}, + {file = "multidict-6.0.5-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1d147090048129ce3c453f0292e7697d333db95e52616b3793922945804a433c"}, + {file = "multidict-6.0.5-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:215ed703caf15f578dca76ee6f6b21b7603791ae090fbf1ef9d865571039ade5"}, + {file = "multidict-6.0.5-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:7c6390cf87ff6234643428991b7359b5f59cc15155695deb4eda5c777d2b880f"}, + {file = "multidict-6.0.5-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:21fd81c4ebdb4f214161be351eb5bcf385426bf023041da2fd9e60681f3cebae"}, + {file = "multidict-6.0.5-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:3cc2ad10255f903656017363cd59436f2111443a76f996584d1077e43ee51182"}, + {file = "multidict-6.0.5-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:6939c95381e003f54cd4c5516740faba40cf5ad3eeff460c3ad1d3e0ea2549bf"}, + {file = "multidict-6.0.5-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:220dd781e3f7af2c2c1053da9fa96d9cf3072ca58f057f4c5adaaa1cab8fc442"}, + {file = "multidict-6.0.5-cp310-cp310-musllinux_1_1_ppc64le.whl", hash = "sha256:766c8f7511df26d9f11cd3a8be623e59cca73d44643abab3f8c8c07620524e4a"}, + {file = "multidict-6.0.5-cp310-cp310-musllinux_1_1_s390x.whl", hash = "sha256:fe5d7785250541f7f5019ab9cba2c71169dc7d74d0f45253f8313f436458a4ef"}, + {file = "multidict-6.0.5-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:c1c1496e73051918fcd4f58ff2e0f2f3066d1c76a0c6aeffd9b45d53243702cc"}, + {file = "multidict-6.0.5-cp310-cp310-win32.whl", hash = "sha256:7afcdd1fc07befad18ec4523a782cde4e93e0a2bf71239894b8d61ee578c1319"}, + {file = "multidict-6.0.5-cp310-cp310-win_amd64.whl", hash = "sha256:99f60d34c048c5c2fabc766108c103612344c46e35d4ed9ae0673d33c8fb26e8"}, + {file = "multidict-6.0.5-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:f285e862d2f153a70586579c15c44656f888806ed0e5b56b64489afe4a2dbfba"}, + {file = "multidict-6.0.5-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:53689bb4e102200a4fafa9de9c7c3c212ab40a7ab2c8e474491914d2305f187e"}, + {file = "multidict-6.0.5-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:612d1156111ae11d14afaf3a0669ebf6c170dbb735e510a7438ffe2369a847fd"}, + {file = "multidict-6.0.5-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:7be7047bd08accdb7487737631d25735c9a04327911de89ff1b26b81745bd4e3"}, + {file = "multidict-6.0.5-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:de170c7b4fe6859beb8926e84f7d7d6c693dfe8e27372ce3b76f01c46e489fcf"}, + {file = "multidict-6.0.5-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:04bde7a7b3de05732a4eb39c94574db1ec99abb56162d6c520ad26f83267de29"}, + {file = "multidict-6.0.5-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:85f67aed7bb647f93e7520633d8f51d3cbc6ab96957c71272b286b2f30dc70ed"}, + {file = "multidict-6.0.5-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:425bf820055005bfc8aa9a0b99ccb52cc2f4070153e34b701acc98d201693733"}, + {file = "multidict-6.0.5-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:d3eb1ceec286eba8220c26f3b0096cf189aea7057b6e7b7a2e60ed36b373b77f"}, + {file = "multidict-6.0.5-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:7901c05ead4b3fb75113fb1dd33eb1253c6d3ee37ce93305acd9d38e0b5f21a4"}, + {file = "multidict-6.0.5-cp311-cp311-musllinux_1_1_ppc64le.whl", hash = "sha256:e0e79d91e71b9867c73323a3444724d496c037e578a0e1755ae159ba14f4f3d1"}, + {file = "multidict-6.0.5-cp311-cp311-musllinux_1_1_s390x.whl", hash = "sha256:29bfeb0dff5cb5fdab2023a7a9947b3b4af63e9c47cae2a10ad58394b517fddc"}, + {file = "multidict-6.0.5-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:e030047e85cbcedbfc073f71836d62dd5dadfbe7531cae27789ff66bc551bd5e"}, + {file = "multidict-6.0.5-cp311-cp311-win32.whl", hash = "sha256:2f4848aa3baa109e6ab81fe2006c77ed4d3cd1e0ac2c1fbddb7b1277c168788c"}, + {file = "multidict-6.0.5-cp311-cp311-win_amd64.whl", hash = "sha256:2faa5ae9376faba05f630d7e5e6be05be22913782b927b19d12b8145968a85ea"}, + {file = "multidict-6.0.5-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:51d035609b86722963404f711db441cf7134f1889107fb171a970c9701f92e1e"}, + {file = "multidict-6.0.5-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:cbebcd5bcaf1eaf302617c114aa67569dd3f090dd0ce8ba9e35e9985b41ac35b"}, + {file = "multidict-6.0.5-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:2ffc42c922dbfddb4a4c3b438eb056828719f07608af27d163191cb3e3aa6cc5"}, + {file = "multidict-6.0.5-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ceb3b7e6a0135e092de86110c5a74e46bda4bd4fbfeeb3a3bcec79c0f861e450"}, + {file = "multidict-6.0.5-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:79660376075cfd4b2c80f295528aa6beb2058fd289f4c9252f986751a4cd0496"}, + {file = "multidict-6.0.5-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:e4428b29611e989719874670fd152b6625500ad6c686d464e99f5aaeeaca175a"}, + {file = "multidict-6.0.5-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d84a5c3a5f7ce6db1f999fb9438f686bc2e09d38143f2d93d8406ed2dd6b9226"}, + {file = "multidict-6.0.5-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:76c0de87358b192de7ea9649beb392f107dcad9ad27276324c24c91774ca5271"}, + {file = "multidict-6.0.5-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:79a6d2ba910adb2cbafc95dad936f8b9386e77c84c35bc0add315b856d7c3abb"}, + {file = "multidict-6.0.5-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:92d16a3e275e38293623ebf639c471d3e03bb20b8ebb845237e0d3664914caef"}, + {file = "multidict-6.0.5-cp312-cp312-musllinux_1_1_ppc64le.whl", hash = "sha256:fb616be3538599e797a2017cccca78e354c767165e8858ab5116813146041a24"}, + {file = "multidict-6.0.5-cp312-cp312-musllinux_1_1_s390x.whl", hash = "sha256:14c2976aa9038c2629efa2c148022ed5eb4cb939e15ec7aace7ca932f48f9ba6"}, + {file = "multidict-6.0.5-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:435a0984199d81ca178b9ae2c26ec3d49692d20ee29bc4c11a2a8d4514c67eda"}, + {file = "multidict-6.0.5-cp312-cp312-win32.whl", hash = "sha256:9fe7b0653ba3d9d65cbe7698cca585bf0f8c83dbbcc710db9c90f478e175f2d5"}, + {file = "multidict-6.0.5-cp312-cp312-win_amd64.whl", hash = "sha256:01265f5e40f5a17f8241d52656ed27192be03bfa8764d88e8220141d1e4b3556"}, + {file = "multidict-6.0.5-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:19fe01cea168585ba0f678cad6f58133db2aa14eccaf22f88e4a6dccadfad8b3"}, + {file = "multidict-6.0.5-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6bf7a982604375a8d49b6cc1b781c1747f243d91b81035a9b43a2126c04766f5"}, + {file = "multidict-6.0.5-cp37-cp37m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:107c0cdefe028703fb5dafe640a409cb146d44a6ae201e55b35a4af8e95457dd"}, + {file = "multidict-6.0.5-cp37-cp37m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:403c0911cd5d5791605808b942c88a8155c2592e05332d2bf78f18697a5fa15e"}, + {file = "multidict-6.0.5-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:aeaf541ddbad8311a87dd695ed9642401131ea39ad7bc8cf3ef3967fd093b626"}, + {file = "multidict-6.0.5-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:e4972624066095e52b569e02b5ca97dbd7a7ddd4294bf4e7247d52635630dd83"}, + {file = "multidict-6.0.5-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:d946b0a9eb8aaa590df1fe082cee553ceab173e6cb5b03239716338629c50c7a"}, + {file = "multidict-6.0.5-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:b55358304d7a73d7bdf5de62494aaf70bd33015831ffd98bc498b433dfe5b10c"}, + {file = "multidict-6.0.5-cp37-cp37m-musllinux_1_1_ppc64le.whl", hash = "sha256:a3145cb08d8625b2d3fee1b2d596a8766352979c9bffe5d7833e0503d0f0b5e5"}, + {file = "multidict-6.0.5-cp37-cp37m-musllinux_1_1_s390x.whl", hash = "sha256:d65f25da8e248202bd47445cec78e0025c0fe7582b23ec69c3b27a640dd7a8e3"}, + {file = "multidict-6.0.5-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:c9bf56195c6bbd293340ea82eafd0071cb3d450c703d2c93afb89f93b8386ccc"}, + {file = "multidict-6.0.5-cp37-cp37m-win32.whl", hash = "sha256:69db76c09796b313331bb7048229e3bee7928eb62bab5e071e9f7fcc4879caee"}, + {file = "multidict-6.0.5-cp37-cp37m-win_amd64.whl", hash = "sha256:fce28b3c8a81b6b36dfac9feb1de115bab619b3c13905b419ec71d03a3fc1423"}, + {file = "multidict-6.0.5-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:76f067f5121dcecf0d63a67f29080b26c43c71a98b10c701b0677e4a065fbd54"}, + {file = "multidict-6.0.5-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:b82cc8ace10ab5bd93235dfaab2021c70637005e1ac787031f4d1da63d493c1d"}, + {file = "multidict-6.0.5-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:5cb241881eefd96b46f89b1a056187ea8e9ba14ab88ba632e68d7a2ecb7aadf7"}, + {file = "multidict-6.0.5-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e8e94e6912639a02ce173341ff62cc1201232ab86b8a8fcc05572741a5dc7d93"}, + {file = "multidict-6.0.5-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:09a892e4a9fb47331da06948690ae38eaa2426de97b4ccbfafbdcbe5c8f37ff8"}, + {file = "multidict-6.0.5-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:55205d03e8a598cfc688c71ca8ea5f66447164efff8869517f175ea632c7cb7b"}, + {file = "multidict-6.0.5-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:37b15024f864916b4951adb95d3a80c9431299080341ab9544ed148091b53f50"}, + {file = "multidict-6.0.5-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:f2a1dee728b52b33eebff5072817176c172050d44d67befd681609b4746e1c2e"}, + {file = "multidict-6.0.5-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:edd08e6f2f1a390bf137080507e44ccc086353c8e98c657e666c017718561b89"}, + {file = "multidict-6.0.5-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:60d698e8179a42ec85172d12f50b1668254628425a6bd611aba022257cac1386"}, + {file = "multidict-6.0.5-cp38-cp38-musllinux_1_1_ppc64le.whl", hash = "sha256:3d25f19500588cbc47dc19081d78131c32637c25804df8414463ec908631e453"}, + {file = "multidict-6.0.5-cp38-cp38-musllinux_1_1_s390x.whl", hash = "sha256:4cc0ef8b962ac7a5e62b9e826bd0cd5040e7d401bc45a6835910ed699037a461"}, + {file = "multidict-6.0.5-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:eca2e9d0cc5a889850e9bbd68e98314ada174ff6ccd1129500103df7a94a7a44"}, + {file = "multidict-6.0.5-cp38-cp38-win32.whl", hash = "sha256:4a6a4f196f08c58c59e0b8ef8ec441d12aee4125a7d4f4fef000ccb22f8d7241"}, + {file = "multidict-6.0.5-cp38-cp38-win_amd64.whl", hash = "sha256:0275e35209c27a3f7951e1ce7aaf93ce0d163b28948444bec61dd7badc6d3f8c"}, + {file = "multidict-6.0.5-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:e7be68734bd8c9a513f2b0cfd508802d6609da068f40dc57d4e3494cefc92929"}, + {file = "multidict-6.0.5-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:1d9ea7a7e779d7a3561aade7d596649fbecfa5c08a7674b11b423783217933f9"}, + {file = "multidict-6.0.5-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:ea1456df2a27c73ce51120fa2f519f1bea2f4a03a917f4a43c8707cf4cbbae1a"}, + {file = "multidict-6.0.5-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:cf590b134eb70629e350691ecca88eac3e3b8b3c86992042fb82e3cb1830d5e1"}, + {file = "multidict-6.0.5-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:5c0631926c4f58e9a5ccce555ad7747d9a9f8b10619621f22f9635f069f6233e"}, + {file = "multidict-6.0.5-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:dce1c6912ab9ff5f179eaf6efe7365c1f425ed690b03341911bf4939ef2f3046"}, + {file = "multidict-6.0.5-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c0868d64af83169e4d4152ec612637a543f7a336e4a307b119e98042e852ad9c"}, + {file = "multidict-6.0.5-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:141b43360bfd3bdd75f15ed811850763555a251e38b2405967f8e25fb43f7d40"}, + {file = "multidict-6.0.5-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:7df704ca8cf4a073334e0427ae2345323613e4df18cc224f647f251e5e75a527"}, + {file = "multidict-6.0.5-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:6214c5a5571802c33f80e6c84713b2c79e024995b9c5897f794b43e714daeec9"}, + {file = "multidict-6.0.5-cp39-cp39-musllinux_1_1_ppc64le.whl", hash = "sha256:cd6c8fca38178e12c00418de737aef1261576bd1b6e8c6134d3e729a4e858b38"}, + {file = "multidict-6.0.5-cp39-cp39-musllinux_1_1_s390x.whl", hash = "sha256:e02021f87a5b6932fa6ce916ca004c4d441509d33bbdbeca70d05dff5e9d2479"}, + {file = "multidict-6.0.5-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:ebd8d160f91a764652d3e51ce0d2956b38efe37c9231cd82cfc0bed2e40b581c"}, + {file = "multidict-6.0.5-cp39-cp39-win32.whl", hash = "sha256:04da1bb8c8dbadf2a18a452639771951c662c5ad03aefe4884775454be322c9b"}, + {file = "multidict-6.0.5-cp39-cp39-win_amd64.whl", hash = "sha256:d6f6d4f185481c9669b9447bf9d9cf3b95a0e9df9d169bbc17e363b7d5487755"}, + {file = "multidict-6.0.5-py3-none-any.whl", hash = "sha256:0d63c74e3d7ab26de115c49bffc92cc77ed23395303d496eae515d4204a625e7"}, + {file = "multidict-6.0.5.tar.gz", hash = "sha256:f7e301075edaf50500f0b341543c41194d8df3ae5caf4702f2095f3ca73dd8da"}, +] + [[package]] name = "multiprocess" version = "0.70.16" @@ -1388,6 +2019,46 @@ files = [ [package.dependencies] setuptools = "*" +[[package]] +name = "numcodecs" +version = "0.12.1" +description = "A Python package providing buffer compression and transformation codecs for use in data storage and communication applications." +optional = false +python-versions = ">=3.8" +files = [ + {file = "numcodecs-0.12.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:d37f628fe92b3699e65831d5733feca74d2e33b50ef29118ffd41c13c677210e"}, + {file = "numcodecs-0.12.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:941b7446b68cf79f089bcfe92edaa3b154533dcbcd82474f994b28f2eedb1c60"}, + {file = "numcodecs-0.12.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0e79bf9d1d37199ac00a60ff3adb64757523291d19d03116832e600cac391c51"}, + {file = "numcodecs-0.12.1-cp310-cp310-win_amd64.whl", hash = "sha256:82d7107f80f9307235cb7e74719292d101c7ea1e393fe628817f0d635b7384f5"}, + {file = "numcodecs-0.12.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:eeaf42768910f1c6eebf6c1bb00160728e62c9343df9e2e315dc9fe12e3f6071"}, + {file = "numcodecs-0.12.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:135b2d47563f7b9dc5ee6ce3d1b81b0f1397f69309e909f1a35bb0f7c553d45e"}, + {file = "numcodecs-0.12.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a191a8e347ecd016e5c357f2bf41fbcb026f6ffe78fff50c77ab12e96701d155"}, + {file = "numcodecs-0.12.1-cp311-cp311-win_amd64.whl", hash = "sha256:21d8267bd4313f4d16f5b6287731d4c8ebdab236038f29ad1b0e93c9b2ca64ee"}, + {file = "numcodecs-0.12.1-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:2f84df6b8693206365a5b37c005bfa9d1be486122bde683a7b6446af4b75d862"}, + {file = "numcodecs-0.12.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:760627780a8b6afdb7f942f2a0ddaf4e31d3d7eea1d8498cf0fd3204a33c4618"}, + {file = "numcodecs-0.12.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c258bd1d3dfa75a9b708540d23b2da43d63607f9df76dfa0309a7597d1de3b73"}, + {file = "numcodecs-0.12.1-cp312-cp312-win_amd64.whl", hash = "sha256:e04649ea504aff858dbe294631f098fbfd671baf58bfc04fc48d746554c05d67"}, + {file = "numcodecs-0.12.1-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:caf1a1e6678aab9c1e29d2109b299f7a467bd4d4c34235b1f0e082167846b88f"}, + {file = "numcodecs-0.12.1-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:c17687b1fd1fef68af616bc83f896035d24e40e04e91e7e6dae56379eb59fe33"}, + {file = "numcodecs-0.12.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:29dfb195f835a55c4d490fb097aac8c1bcb96c54cf1b037d9218492c95e9d8c5"}, + {file = "numcodecs-0.12.1-cp38-cp38-win_amd64.whl", hash = "sha256:2f1ba2f4af3fd3ba65b1bcffb717fe65efe101a50a91c368f79f3101dbb1e243"}, + {file = "numcodecs-0.12.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:2fbb12a6a1abe95926f25c65e283762d63a9bf9e43c0de2c6a1a798347dfcb40"}, + {file = "numcodecs-0.12.1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:f2207871868b2464dc11c513965fd99b958a9d7cde2629be7b2dc84fdaab013b"}, + {file = "numcodecs-0.12.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:abff3554a6892a89aacf7b642a044e4535499edf07aeae2f2e6e8fc08c9ba07f"}, + {file = "numcodecs-0.12.1-cp39-cp39-win_amd64.whl", hash = "sha256:ef964d4860d3e6b38df0633caf3e51dc850a6293fd8e93240473642681d95136"}, + {file = "numcodecs-0.12.1.tar.gz", hash = "sha256:05d91a433733e7eef268d7e80ec226a0232da244289614a8f3826901aec1098e"}, +] + +[package.dependencies] +numpy = ">=1.7" + +[package.extras] +docs = ["mock", "numpydoc", "sphinx (<7.0.0)", "sphinx-issues"] +msgpack = ["msgpack"] +test = ["coverage", "flake8", "pytest", "pytest-cov"] +test-extras = ["importlib-metadata"] +zfpy = ["zfpy (>=1.0.0)"] + [[package]] name = "numpy" version = "1.26.4" @@ -1433,6 +2104,22 @@ files = [ {file = "numpy-1.26.4.tar.gz", hash = "sha256:2a02aba9ed12e4ac4eb3ea9421c420301a0c6460d9830d74a9df87efa4912010"}, ] +[[package]] +name = "oauthlib" +version = "3.2.2" +description = "A generic, spec-compliant, thorough implementation of the OAuth request-signing logic" +optional = false +python-versions = ">=3.6" +files = [ + {file = "oauthlib-3.2.2-py3-none-any.whl", hash = "sha256:8139f29aac13e25d502680e9e19963e83f16838d48a0d71c287fe40e7067fbca"}, + {file = "oauthlib-3.2.2.tar.gz", hash = "sha256:9859c40929662bec5d64f34d01c99e093149682a3f38915dc0655d5a633dd918"}, +] + +[package.extras] +rsa = ["cryptography (>=3.0.0)"] +signals = ["blinker (>=1.4.0)"] +signedtoken = ["cryptography (>=3.0.0)", "pyjwt (>=2.0.0,<3)"] + [[package]] name = "packaging" version = "24.0" @@ -1715,6 +2402,43 @@ nodeenv = ">=0.11.1" pyyaml = ">=5.1" virtualenv = ">=20.10.0" +[[package]] +name = "proto-plus" +version = "1.23.0" +description = "Beautiful, Pythonic protocol buffers." +optional = false +python-versions = ">=3.6" +files = [ + {file = "proto-plus-1.23.0.tar.gz", hash = "sha256:89075171ef11988b3fa157f5dbd8b9cf09d65fffee97e29ce403cd8defba19d2"}, + {file = "proto_plus-1.23.0-py3-none-any.whl", hash = "sha256:a829c79e619e1cf632de091013a4173deed13a55f326ef84f05af6f50ff4c82c"}, +] + +[package.dependencies] +protobuf = ">=3.19.0,<5.0.0dev" + +[package.extras] +testing = ["google-api-core[grpc] (>=1.31.5)"] + +[[package]] +name = "protobuf" +version = "4.25.3" +description = "" +optional = false +python-versions = ">=3.8" +files = [ + {file = "protobuf-4.25.3-cp310-abi3-win32.whl", hash = "sha256:d4198877797a83cbfe9bffa3803602bbe1625dc30d8a097365dbc762e5790faa"}, + {file = "protobuf-4.25.3-cp310-abi3-win_amd64.whl", hash = "sha256:209ba4cc916bab46f64e56b85b090607a676f66b473e6b762e6f1d9d591eb2e8"}, + {file = "protobuf-4.25.3-cp37-abi3-macosx_10_9_universal2.whl", hash = "sha256:f1279ab38ecbfae7e456a108c5c0681e4956d5b1090027c1de0f934dfdb4b35c"}, + {file = "protobuf-4.25.3-cp37-abi3-manylinux2014_aarch64.whl", hash = "sha256:e7cb0ae90dd83727f0c0718634ed56837bfeeee29a5f82a7514c03ee1364c019"}, + {file = "protobuf-4.25.3-cp37-abi3-manylinux2014_x86_64.whl", hash = "sha256:7c8daa26095f82482307bc717364e7c13f4f1c99659be82890dcfc215194554d"}, + {file = "protobuf-4.25.3-cp38-cp38-win32.whl", hash = "sha256:f4f118245c4a087776e0a8408be33cf09f6c547442c00395fbfb116fac2f8ac2"}, + {file = "protobuf-4.25.3-cp38-cp38-win_amd64.whl", hash = "sha256:c053062984e61144385022e53678fbded7aea14ebb3e0305ae3592fb219ccfa4"}, + {file = "protobuf-4.25.3-cp39-cp39-win32.whl", hash = "sha256:19b270aeaa0099f16d3ca02628546b8baefe2955bbe23224aaf856134eccf1e4"}, + {file = "protobuf-4.25.3-cp39-cp39-win_amd64.whl", hash = "sha256:e3c97a1555fd6388f857770ff8b9703083de6bf1f9274a002a332d65fbb56c8c"}, + {file = "protobuf-4.25.3-py3-none-any.whl", hash = "sha256:f0700d54bcf45424477e46a9f0944155b46fb0639d69728739c0e47bab83f2b9"}, + {file = "protobuf-4.25.3.tar.gz", hash = "sha256:25b5d0b42fd000320bd7830b349e3b696435f3b329810427a6bcce6a5492cc5c"}, +] + [[package]] name = "pyarrow" version = "16.0.0" @@ -1763,6 +2487,31 @@ files = [ [package.dependencies] numpy = ">=1.16.6" +[[package]] +name = "pyasn1" +version = "0.6.0" +description = "Pure-Python implementation of ASN.1 types and DER/BER/CER codecs (X.208)" +optional = false +python-versions = ">=3.8" +files = [ + {file = "pyasn1-0.6.0-py2.py3-none-any.whl", hash = "sha256:cca4bb0f2df5504f02f6f8a775b6e416ff9b0b3b16f7ee80b5a3153d9b804473"}, + {file = "pyasn1-0.6.0.tar.gz", hash = "sha256:3a35ab2c4b5ef98e17dfdec8ab074046fbda76e281c5a706ccd82328cfc8f64c"}, +] + +[[package]] +name = "pyasn1-modules" +version = "0.4.0" +description = "A collection of ASN.1-based protocols modules" +optional = false +python-versions = ">=3.8" +files = [ + {file = "pyasn1_modules-0.4.0-py3-none-any.whl", hash = "sha256:be04f15b66c206eed667e0bb5ab27e2b1855ea54a842e5037738099e8ca4ae0b"}, + {file = "pyasn1_modules-0.4.0.tar.gz", hash = "sha256:831dbcea1b177b28c9baddf4c6d1013c24c3accd14a1873fffaa6a2e905f17b6"}, +] + +[package.dependencies] +pyasn1 = ">=0.4.6,<0.7.0" + [[package]] name = "pygments" version = "2.18.0" @@ -2196,6 +2945,24 @@ urllib3 = ">=1.21.1,<3" socks = ["PySocks (>=1.5.6,!=1.5.7)"] use-chardet-on-py3 = ["chardet (>=3.0.2,<6)"] +[[package]] +name = "requests-oauthlib" +version = "2.0.0" +description = "OAuthlib authentication support for Requests." +optional = false +python-versions = ">=3.4" +files = [ + {file = "requests-oauthlib-2.0.0.tar.gz", hash = "sha256:b3dffaebd884d8cd778494369603a9e7b58d29111bf6b41bdc2dcd87203af4e9"}, + {file = "requests_oauthlib-2.0.0-py2.py3-none-any.whl", hash = "sha256:7dd8a5c40426b779b0868c404bdef9768deccf22749cde15852df527e6269b36"}, +] + +[package.dependencies] +oauthlib = ">=3.0.0" +requests = ">=2.0.0" + +[package.extras] +rsa = ["oauthlib[signedtoken] (>=3.0.0)"] + [[package]] name = "rra-tools" version = "1.0.10" @@ -2216,6 +2983,20 @@ pathos = ">=0.3.2,<0.4.0" requests = ">=2.32.2,<3.0.0" tqdm = ">=4.66.4,<5.0.0" +[[package]] +name = "rsa" +version = "4.9" +description = "Pure-Python RSA implementation" +optional = false +python-versions = ">=3.6,<4" +files = [ + {file = "rsa-4.9-py3-none-any.whl", hash = "sha256:90260d9058e514786967344d0ef75fa8727eed8a7d2e43ce9f4bcf1b536174f7"}, + {file = "rsa-4.9.tar.gz", hash = "sha256:e38464a49c6c85d7f1351b0126661487a7e0a14a50f1675ec50eb34d4f20ef21"}, +] + +[package.dependencies] +pyasn1 = ">=0.1.3" + [[package]] name = "ruff" version = "0.4.3" @@ -2680,7 +3461,131 @@ io = ["cftime", "fsspec", "h5netcdf", "netCDF4", "pooch", "pydap", "scipy", "zar parallel = ["dask[complete]"] viz = ["matplotlib", "nc-time-axis", "seaborn"] +[[package]] +name = "yarl" +version = "1.9.4" +description = "Yet another URL library" +optional = false +python-versions = ">=3.7" +files = [ + {file = "yarl-1.9.4-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:a8c1df72eb746f4136fe9a2e72b0c9dc1da1cbd23b5372f94b5820ff8ae30e0e"}, + {file = "yarl-1.9.4-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:a3a6ed1d525bfb91b3fc9b690c5a21bb52de28c018530ad85093cc488bee2dd2"}, + {file = "yarl-1.9.4-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:c38c9ddb6103ceae4e4498f9c08fac9b590c5c71b0370f98714768e22ac6fa66"}, + {file = "yarl-1.9.4-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d9e09c9d74f4566e905a0b8fa668c58109f7624db96a2171f21747abc7524234"}, + {file = "yarl-1.9.4-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:b8477c1ee4bd47c57d49621a062121c3023609f7a13b8a46953eb6c9716ca392"}, + {file = "yarl-1.9.4-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:d5ff2c858f5f6a42c2a8e751100f237c5e869cbde669a724f2062d4c4ef93551"}, + {file = "yarl-1.9.4-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:357495293086c5b6d34ca9616a43d329317feab7917518bc97a08f9e55648455"}, + {file = "yarl-1.9.4-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:54525ae423d7b7a8ee81ba189f131054defdb122cde31ff17477951464c1691c"}, + {file = "yarl-1.9.4-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:801e9264d19643548651b9db361ce3287176671fb0117f96b5ac0ee1c3530d53"}, + {file = "yarl-1.9.4-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:e516dc8baf7b380e6c1c26792610230f37147bb754d6426462ab115a02944385"}, + {file = "yarl-1.9.4-cp310-cp310-musllinux_1_1_ppc64le.whl", hash = "sha256:7d5aaac37d19b2904bb9dfe12cdb08c8443e7ba7d2852894ad448d4b8f442863"}, + {file = "yarl-1.9.4-cp310-cp310-musllinux_1_1_s390x.whl", hash = "sha256:54beabb809ffcacbd9d28ac57b0db46e42a6e341a030293fb3185c409e626b8b"}, + {file = "yarl-1.9.4-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:bac8d525a8dbc2a1507ec731d2867025d11ceadcb4dd421423a5d42c56818541"}, + {file = "yarl-1.9.4-cp310-cp310-win32.whl", hash = "sha256:7855426dfbddac81896b6e533ebefc0af2f132d4a47340cee6d22cac7190022d"}, + {file = "yarl-1.9.4-cp310-cp310-win_amd64.whl", hash = "sha256:848cd2a1df56ddbffeb375535fb62c9d1645dde33ca4d51341378b3f5954429b"}, + {file = "yarl-1.9.4-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:35a2b9396879ce32754bd457d31a51ff0a9d426fd9e0e3c33394bf4b9036b099"}, + {file = "yarl-1.9.4-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:4c7d56b293cc071e82532f70adcbd8b61909eec973ae9d2d1f9b233f3d943f2c"}, + {file = "yarl-1.9.4-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:d8a1c6c0be645c745a081c192e747c5de06e944a0d21245f4cf7c05e457c36e0"}, + {file = "yarl-1.9.4-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4b3c1ffe10069f655ea2d731808e76e0f452fc6c749bea04781daf18e6039525"}, + {file = "yarl-1.9.4-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:549d19c84c55d11687ddbd47eeb348a89df9cb30e1993f1b128f4685cd0ebbf8"}, + {file = "yarl-1.9.4-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:a7409f968456111140c1c95301cadf071bd30a81cbd7ab829169fb9e3d72eae9"}, + {file = "yarl-1.9.4-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e23a6d84d9d1738dbc6e38167776107e63307dfc8ad108e580548d1f2c587f42"}, + {file = "yarl-1.9.4-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:d8b889777de69897406c9fb0b76cdf2fd0f31267861ae7501d93003d55f54fbe"}, + {file = "yarl-1.9.4-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:03caa9507d3d3c83bca08650678e25364e1843b484f19986a527630ca376ecce"}, + {file = "yarl-1.9.4-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:4e9035df8d0880b2f1c7f5031f33f69e071dfe72ee9310cfc76f7b605958ceb9"}, + {file = "yarl-1.9.4-cp311-cp311-musllinux_1_1_ppc64le.whl", hash = "sha256:c0ec0ed476f77db9fb29bca17f0a8fcc7bc97ad4c6c1d8959c507decb22e8572"}, + {file = "yarl-1.9.4-cp311-cp311-musllinux_1_1_s390x.whl", hash = "sha256:ee04010f26d5102399bd17f8df8bc38dc7ccd7701dc77f4a68c5b8d733406958"}, + {file = "yarl-1.9.4-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:49a180c2e0743d5d6e0b4d1a9e5f633c62eca3f8a86ba5dd3c471060e352ca98"}, + {file = "yarl-1.9.4-cp311-cp311-win32.whl", hash = "sha256:81eb57278deb6098a5b62e88ad8281b2ba09f2f1147c4767522353eaa6260b31"}, + {file = "yarl-1.9.4-cp311-cp311-win_amd64.whl", hash = "sha256:d1d2532b340b692880261c15aee4dc94dd22ca5d61b9db9a8a361953d36410b1"}, + {file = "yarl-1.9.4-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:0d2454f0aef65ea81037759be5ca9947539667eecebca092733b2eb43c965a81"}, + {file = "yarl-1.9.4-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:44d8ffbb9c06e5a7f529f38f53eda23e50d1ed33c6c869e01481d3fafa6b8142"}, + {file = "yarl-1.9.4-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:aaaea1e536f98754a6e5c56091baa1b6ce2f2700cc4a00b0d49eca8dea471074"}, + {file = "yarl-1.9.4-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3777ce5536d17989c91696db1d459574e9a9bd37660ea7ee4d3344579bb6f129"}, + {file = "yarl-1.9.4-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:9fc5fc1eeb029757349ad26bbc5880557389a03fa6ada41703db5e068881e5f2"}, + {file = "yarl-1.9.4-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:ea65804b5dc88dacd4a40279af0cdadcfe74b3e5b4c897aa0d81cf86927fee78"}, + {file = "yarl-1.9.4-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:aa102d6d280a5455ad6a0f9e6d769989638718e938a6a0a2ff3f4a7ff8c62cc4"}, + {file = "yarl-1.9.4-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:09efe4615ada057ba2d30df871d2f668af661e971dfeedf0c159927d48bbeff0"}, + {file = "yarl-1.9.4-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:008d3e808d03ef28542372d01057fd09168419cdc8f848efe2804f894ae03e51"}, + {file = "yarl-1.9.4-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:6f5cb257bc2ec58f437da2b37a8cd48f666db96d47b8a3115c29f316313654ff"}, + {file = "yarl-1.9.4-cp312-cp312-musllinux_1_1_ppc64le.whl", hash = "sha256:992f18e0ea248ee03b5a6e8b3b4738850ae7dbb172cc41c966462801cbf62cf7"}, + {file = "yarl-1.9.4-cp312-cp312-musllinux_1_1_s390x.whl", hash = "sha256:0e9d124c191d5b881060a9e5060627694c3bdd1fe24c5eecc8d5d7d0eb6faabc"}, + {file = "yarl-1.9.4-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:3986b6f41ad22988e53d5778f91855dc0399b043fc8946d4f2e68af22ee9ff10"}, + {file = "yarl-1.9.4-cp312-cp312-win32.whl", hash = "sha256:4b21516d181cd77ebd06ce160ef8cc2a5e9ad35fb1c5930882baff5ac865eee7"}, + {file = "yarl-1.9.4-cp312-cp312-win_amd64.whl", hash = "sha256:a9bd00dc3bc395a662900f33f74feb3e757429e545d831eef5bb280252631984"}, + {file = "yarl-1.9.4-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:63b20738b5aac74e239622d2fe30df4fca4942a86e31bf47a81a0e94c14df94f"}, + {file = "yarl-1.9.4-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d7d7f7de27b8944f1fee2c26a88b4dabc2409d2fea7a9ed3df79b67277644e17"}, + {file = "yarl-1.9.4-cp37-cp37m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:c74018551e31269d56fab81a728f683667e7c28c04e807ba08f8c9e3bba32f14"}, + {file = "yarl-1.9.4-cp37-cp37m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:ca06675212f94e7a610e85ca36948bb8fc023e458dd6c63ef71abfd482481aa5"}, + {file = "yarl-1.9.4-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5aef935237d60a51a62b86249839b51345f47564208c6ee615ed2a40878dccdd"}, + {file = "yarl-1.9.4-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:2b134fd795e2322b7684155b7855cc99409d10b2e408056db2b93b51a52accc7"}, + {file = "yarl-1.9.4-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:d25039a474c4c72a5ad4b52495056f843a7ff07b632c1b92ea9043a3d9950f6e"}, + {file = "yarl-1.9.4-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:f7d6b36dd2e029b6bcb8a13cf19664c7b8e19ab3a58e0fefbb5b8461447ed5ec"}, + {file = "yarl-1.9.4-cp37-cp37m-musllinux_1_1_ppc64le.whl", hash = "sha256:957b4774373cf6f709359e5c8c4a0af9f6d7875db657adb0feaf8d6cb3c3964c"}, + {file = "yarl-1.9.4-cp37-cp37m-musllinux_1_1_s390x.whl", hash = "sha256:d7eeb6d22331e2fd42fce928a81c697c9ee2d51400bd1a28803965883e13cead"}, + {file = "yarl-1.9.4-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:6a962e04b8f91f8c4e5917e518d17958e3bdee71fd1d8b88cdce74dd0ebbf434"}, + {file = "yarl-1.9.4-cp37-cp37m-win32.whl", hash = "sha256:f3bc6af6e2b8f92eced34ef6a96ffb248e863af20ef4fde9448cc8c9b858b749"}, + {file = "yarl-1.9.4-cp37-cp37m-win_amd64.whl", hash = "sha256:ad4d7a90a92e528aadf4965d685c17dacff3df282db1121136c382dc0b6014d2"}, + {file = "yarl-1.9.4-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:ec61d826d80fc293ed46c9dd26995921e3a82146feacd952ef0757236fc137be"}, + {file = "yarl-1.9.4-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:8be9e837ea9113676e5754b43b940b50cce76d9ed7d2461df1af39a8ee674d9f"}, + {file = "yarl-1.9.4-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:bef596fdaa8f26e3d66af846bbe77057237cb6e8efff8cd7cc8dff9a62278bbf"}, + {file = "yarl-1.9.4-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2d47552b6e52c3319fede1b60b3de120fe83bde9b7bddad11a69fb0af7db32f1"}, + {file = "yarl-1.9.4-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:84fc30f71689d7fc9168b92788abc977dc8cefa806909565fc2951d02f6b7d57"}, + {file = "yarl-1.9.4-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:4aa9741085f635934f3a2583e16fcf62ba835719a8b2b28fb2917bb0537c1dfa"}, + {file = "yarl-1.9.4-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:206a55215e6d05dbc6c98ce598a59e6fbd0c493e2de4ea6cc2f4934d5a18d130"}, + {file = "yarl-1.9.4-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:07574b007ee20e5c375a8fe4a0789fad26db905f9813be0f9fef5a68080de559"}, + {file = "yarl-1.9.4-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:5a2e2433eb9344a163aced6a5f6c9222c0786e5a9e9cac2c89f0b28433f56e23"}, + {file = "yarl-1.9.4-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:6ad6d10ed9b67a382b45f29ea028f92d25bc0bc1daf6c5b801b90b5aa70fb9ec"}, + {file = "yarl-1.9.4-cp38-cp38-musllinux_1_1_ppc64le.whl", hash = "sha256:6fe79f998a4052d79e1c30eeb7d6c1c1056ad33300f682465e1b4e9b5a188b78"}, + {file = "yarl-1.9.4-cp38-cp38-musllinux_1_1_s390x.whl", hash = "sha256:a825ec844298c791fd28ed14ed1bffc56a98d15b8c58a20e0e08c1f5f2bea1be"}, + {file = "yarl-1.9.4-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:8619d6915b3b0b34420cf9b2bb6d81ef59d984cb0fde7544e9ece32b4b3043c3"}, + {file = "yarl-1.9.4-cp38-cp38-win32.whl", hash = "sha256:686a0c2f85f83463272ddffd4deb5e591c98aac1897d65e92319f729c320eece"}, + {file = "yarl-1.9.4-cp38-cp38-win_amd64.whl", hash = "sha256:a00862fb23195b6b8322f7d781b0dc1d82cb3bcac346d1e38689370cc1cc398b"}, + {file = "yarl-1.9.4-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:604f31d97fa493083ea21bd9b92c419012531c4e17ea6da0f65cacdcf5d0bd27"}, + {file = "yarl-1.9.4-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:8a854227cf581330ffa2c4824d96e52ee621dd571078a252c25e3a3b3d94a1b1"}, + {file = "yarl-1.9.4-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:ba6f52cbc7809cd8d74604cce9c14868306ae4aa0282016b641c661f981a6e91"}, + {file = "yarl-1.9.4-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a6327976c7c2f4ee6816eff196e25385ccc02cb81427952414a64811037bbc8b"}, + {file = "yarl-1.9.4-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:8397a3817d7dcdd14bb266283cd1d6fc7264a48c186b986f32e86d86d35fbac5"}, + {file = "yarl-1.9.4-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:e0381b4ce23ff92f8170080c97678040fc5b08da85e9e292292aba67fdac6c34"}, + {file = "yarl-1.9.4-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:23d32a2594cb5d565d358a92e151315d1b2268bc10f4610d098f96b147370136"}, + {file = "yarl-1.9.4-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ddb2a5c08a4eaaba605340fdee8fc08e406c56617566d9643ad8bf6852778fc7"}, + {file = "yarl-1.9.4-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:26a1dc6285e03f3cc9e839a2da83bcbf31dcb0d004c72d0730e755b33466c30e"}, + {file = "yarl-1.9.4-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:18580f672e44ce1238b82f7fb87d727c4a131f3a9d33a5e0e82b793362bf18b4"}, + {file = "yarl-1.9.4-cp39-cp39-musllinux_1_1_ppc64le.whl", hash = "sha256:29e0f83f37610f173eb7e7b5562dd71467993495e568e708d99e9d1944f561ec"}, + {file = "yarl-1.9.4-cp39-cp39-musllinux_1_1_s390x.whl", hash = "sha256:1f23e4fe1e8794f74b6027d7cf19dc25f8b63af1483d91d595d4a07eca1fb26c"}, + {file = "yarl-1.9.4-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:db8e58b9d79200c76956cefd14d5c90af54416ff5353c5bfd7cbe58818e26ef0"}, + {file = "yarl-1.9.4-cp39-cp39-win32.whl", hash = "sha256:c7224cab95645c7ab53791022ae77a4509472613e839dab722a72abe5a684575"}, + {file = "yarl-1.9.4-cp39-cp39-win_amd64.whl", hash = "sha256:824d6c50492add5da9374875ce72db7a0733b29c2394890aef23d533106e2b15"}, + {file = "yarl-1.9.4-py3-none-any.whl", hash = "sha256:928cecb0ef9d5a7946eb6ff58417ad2fe9375762382f1bf5c55e61645f2c43ad"}, + {file = "yarl-1.9.4.tar.gz", hash = "sha256:566db86717cf8080b99b58b083b773a908ae40f06681e87e589a976faf8246bf"}, +] + +[package.dependencies] +idna = ">=2.0" +multidict = ">=4.0" + +[[package]] +name = "zarr" +version = "2.18.2" +description = "An implementation of chunked, compressed, N-dimensional arrays for Python" +optional = false +python-versions = ">=3.9" +files = [ + {file = "zarr-2.18.2-py3-none-any.whl", hash = "sha256:a638754902f97efa99b406083fdc807a0e2ccf12a949117389d2a4ba9b05df38"}, + {file = "zarr-2.18.2.tar.gz", hash = "sha256:9bb393b8a0a38fb121dbb913b047d75db28de9890f6d644a217a73cf4ae74f47"}, +] + +[package.dependencies] +asciitree = "*" +fasteners = {version = "*", markers = "sys_platform != \"emscripten\""} +numcodecs = ">=0.10.0" +numpy = ">=1.23" + +[package.extras] +docs = ["numcodecs[msgpack]", "numpydoc", "pydata-sphinx-theme", "sphinx", "sphinx-automodapi", "sphinx-copybutton", "sphinx-design", "sphinx-issues"] +jupyter = ["ipytree (>=0.2.2)", "ipywidgets (>=8.0.0)", "notebook"] + [metadata] lock-version = "2.0" python-versions = ">=3.10, <3.13" -content-hash = "da6f45d547ceb2940cf87d9792ce11d7115e9b11a405ab3420dce9850d2a092f" +content-hash = "ec02c1bbf263c411dd7207aafb53a3c6e9c9fb95297c29064018296fc916d3aa" diff --git a/pyproject.toml b/pyproject.toml index 017e751..19c3068 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -46,6 +46,8 @@ netcdf4 = "^1.6.5" pyarrow = "^16.0.0" types-requests = "^2.31.0.20240406" types-tqdm = "^4.66.0.20240417" +gcsfs = "^2024.6.0" +zarr = "^2.18.2" [tool.poetry.group.dev.dependencies] mkdocstrings = {version = ">=0.23", extras = ["python"]} From 5db69f3fb53a7e121e1fad9ea10aeb00f77c4430 Mon Sep 17 00:00:00 2001 From: collijk Date: Wed, 12 Jun 2024 13:40:33 -0700 Subject: [PATCH 26/71] CMIP6 extraction --- poetry.lock | 13 +- pyproject.toml | 3 + src/climate_downscale/cli_options.py | 87 +++++++++++- src/climate_downscale/data.py | 13 ++ src/climate_downscale/extract/__init__.py | 6 + src/climate_downscale/extract/cmip.py | 72 ---------- src/climate_downscale/extract/cmip6.py | 131 ++++++++++++++++++ src/climate_downscale/extract/era5.py | 90 ++++++------ .../old_climate/project_anomaly.py | 38 +---- 9 files changed, 294 insertions(+), 159 deletions(-) delete mode 100644 src/climate_downscale/extract/cmip.py create mode 100644 src/climate_downscale/extract/cmip6.py diff --git a/poetry.lock b/poetry.lock index e911760..daae923 100644 --- a/poetry.lock +++ b/poetry.lock @@ -3298,6 +3298,17 @@ notebook = ["ipywidgets (>=6)"] slack = ["slack-sdk"] telegram = ["requests"] +[[package]] +name = "types-pyyaml" +version = "6.0.12.20240311" +description = "Typing stubs for PyYAML" +optional = false +python-versions = ">=3.8" +files = [ + {file = "types-PyYAML-6.0.12.20240311.tar.gz", hash = "sha256:a9e0f0f88dc835739b0c1ca51ee90d04ca2a897a71af79de9aec5f38cb0a5342"}, + {file = "types_PyYAML-6.0.12.20240311-py3-none-any.whl", hash = "sha256:b845b06a1c7e54b8e5b4c683043de0d9caf205e7434b3edc678ff2411979b8f6"}, +] + [[package]] name = "types-requests" version = "2.31.0.20240406" @@ -3588,4 +3599,4 @@ jupyter = ["ipytree (>=0.2.2)", "ipywidgets (>=8.0.0)", "notebook"] [metadata] lock-version = "2.0" python-versions = ">=3.10, <3.13" -content-hash = "ec02c1bbf263c411dd7207aafb53a3c6e9c9fb95297c29064018296fc916d3aa" +content-hash = "d956b3098dcb83693feb9ac5cb4b39749dbd7ef6e90a8e2bd878ee7c3dc13f43" diff --git a/pyproject.toml b/pyproject.toml index 19c3068..b19f494 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -48,6 +48,7 @@ types-requests = "^2.31.0.20240406" types-tqdm = "^4.66.0.20240417" gcsfs = "^2024.6.0" zarr = "^2.18.2" +types-pyyaml = "^6.0.12.20240311" [tool.poetry.group.dev.dependencies] mkdocstrings = {version = ">=0.23", extras = ["python"]} @@ -95,6 +96,7 @@ ignore = [ "E501", # Line too long, this is autoformatted "PYI041", # Use float instead of int | float; dumb rule "T201", # print is fine for now. + "RET504", # Unnecessary assignment before return ] [tool.ruff.lint.per-file-ignores] @@ -148,6 +150,7 @@ exclude = [ module = [ "cdsapi.*", "affine.*", + "gcsfs.*", ] ignore_missing_imports = true diff --git a/src/climate_downscale/cli_options.py b/src/climate_downscale/cli_options.py index 8bcacfd..720b53e 100644 --- a/src/climate_downscale/cli_options.py +++ b/src/climate_downscale/cli_options.py @@ -49,7 +49,7 @@ def with_month( ) -VALID_CLIMATE_VARIABLES = [ +VALID_ERA5_VARIABLES = [ "10m_u_component_of_wind", "10m_v_component_of_wind", "2m_dewpoint_temperature", @@ -64,15 +64,15 @@ def with_month( ] -def with_climate_variable( +def with_era5_variable( *, allow_all: bool = False, ) -> ClickOption[_P, _T]: return with_choice( - "climate-variable", + "era5-variable", "x", allow_all=allow_all, - choices=VALID_CLIMATE_VARIABLES, + choices=VALID_ERA5_VARIABLES, help="Variable to extract.", ) @@ -93,6 +93,75 @@ def with_era5_dataset( ) +VALID_CMIP6_SOURCES = [ + "CAMS-CSM1-0", + "CanESM5", + "CNRM-ESM2-1", + "GFDL-ESM4", + "GISS-E2-1-G", + "MIROC-ES2L", + "MIROC6", + "MRI-ESM2-0", +] + + +def with_cmip6_source( + *, + allow_all: bool = False, +) -> ClickOption[_P, _T]: + return with_choice( + "cmip6-source", + "s", + allow_all=allow_all, + choices=VALID_CMIP6_SOURCES, + help="CMIP6 source to extract.", + ) + + +VALID_CMIP6_EXPERIMENTS = [ + "ssp119", + "ssp126", + "ssp245", + "ssp370", + "ssp585", +] + + +def with_cmip6_experiment( + *, + allow_all: bool = False, +) -> ClickOption[_P, _T]: + return with_choice( + "cmip6-experiment", + "e", + allow_all=allow_all, + choices=VALID_CMIP6_EXPERIMENTS, + help="CMIP6 experiment to extract.", + ) + + +VALID_CMIP6_VARIABLES = [ + "uas", + "vas", + "hurs", + "tas", + "pr", +] + + +def with_cmip6_variable( + *, + allow_all: bool = False, +) -> ClickOption[_P, _T]: + return with_choice( + "cmip6-variable", + "x", + allow_all=allow_all, + choices=VALID_CMIP6_VARIABLES, + help="CMIP6 variable to extract.", + ) + + STRIDE = 30 LATITUDES = [str(lat) for lat in range(-90, 90, STRIDE)] LONGITUDES = [str(lon) for lon in range(-180, 180, STRIDE)] @@ -125,15 +194,21 @@ def with_lon_start( __all__ = [ "VALID_YEARS", "VALID_MONTHS", - "VALID_CLIMATE_VARIABLES", + "VALID_ERA5_VARIABLES", "VALID_ERA5_DATASETS", + "VALID_CMIP6_SOURCES", + "VALID_CMIP6_EXPERIMENTS", + "VALID_CMIP6_VARIABLES", "STRIDE", "LATITUDES", "LONGITUDES", "with_year", "with_month", - "with_climate_variable", + "with_era5_variable", "with_era5_dataset", + "with_cmip6_source", + "with_cmip6_experiment", + "with_cmip6_variable", "with_lat_start", "with_lon_start", "with_output_directory", diff --git a/src/climate_downscale/data.py b/src/climate_downscale/data.py index 48ba036..03548c8 100644 --- a/src/climate_downscale/data.py +++ b/src/climate_downscale/data.py @@ -40,6 +40,19 @@ def load_era5( ) -> xr.Dataset: return xr.open_dataset(self.era5_path(dataset, variable, year, month)) + @property + def cmip6(self) -> Path: + return self.extracted_data / "cmip6" + + def load_cmip6_metadata(self) -> pd.DataFrame: + meta_path = self.cmip6 / "cmip6-metadata.parquet" + if not meta_path.exists(): + external_path = "https://storage.googleapis.com/cmip6/cmip6-zarr-consolidated-stores.csv" + meta = pd.read_csv(external_path) + touch(meta_path) + meta.to_parquet(meta_path) + return pd.read_parquet(meta_path) + @property def era5_temperature_daily_mean(self) -> Path: return self.extracted_data / "era5_temperature_daily_mean" diff --git a/src/climate_downscale/extract/__init__.py b/src/climate_downscale/extract/__init__.py index 364bcf6..3149b40 100644 --- a/src/climate_downscale/extract/__init__.py +++ b/src/climate_downscale/extract/__init__.py @@ -1,3 +1,7 @@ +from climate_downscale.extract.cmip6 import ( + extract_cmip6, + extract_cmip6_task, +) from climate_downscale.extract.elevation import ( extract_elevation, extract_elevation_task, @@ -18,12 +22,14 @@ RUNNERS = { "ncei": extract_ncei_climate_stations, "era5": extract_era5, + "cmip6": extract_cmip6, "lcz": extract_rub_local_climate_zones, "elevation": extract_elevation, } TASK_RUNNERS = { "ncei": extract_ncei_climate_stations_task, + "cmip6": extract_cmip6_task, "era5_download": download_era5_task, "era5_compress": unzip_and_compress_era5_task, "lcz": extract_rub_local_climate_zones, diff --git a/src/climate_downscale/extract/cmip.py b/src/climate_downscale/extract/cmip.py deleted file mode 100644 index eefe266..0000000 --- a/src/climate_downscale/extract/cmip.py +++ /dev/null @@ -1,72 +0,0 @@ -def load_raw_cmip_metadata() -> pd.DataFrame: - """Loads metadata containing information about all CMIP6 models.""" - path = "https://storage.googleapis.com/cmip6/cmip6-zarr-consolidated-stores.csv" - return pd.read_csv(path) - -meta = load_raw_cmip_metadata() - -keep_sources = [ - 'CAMS-CSM1-0', - 'CanESM5', - 'CNRM-ESM2-1', - 'GFDL-ESM4', - 'GISS-E2-1-G', - 'MIROC-ES2L', - 'MIROC6', - 'MRI-ESM2-0' -] -keep_experiments = [ - 'ssp119', - 'ssp126', - 'ssp245', - 'ssp370', - 'ssp585', -] - -keep_variables = [ - "uas", - "vas", - "hurs", - "tas", - # "rsus", - # "rlus", - "ps", - # "rsds", - # "rlds", - "pr", - # "rsdsdiff", -] - -keep_tables = [ - #"Amon", - "day", -] - - -mask = ( - meta.source_id.isin(keep_sources) - & meta.experiment_id.isin(keep_experiments) - & meta.variable_id.isin(keep_variables) - & meta.table_id.isin(keep_tables) -) - -meta_sub = meta[mask] -meta_sub['dummy'] = "X" - -pvs = ['source_id', 'experiment_id', 'variable_id'] - -meta_sub.groupby(pvs).dummy.apply(lambda s: ",".join(s.unique().tolist())).unstack() - -import gcsfs -def load_cmip_data(zarr_path: str) -> xr.Dataset: - """Loads a CMIP6 dataset from a zarr path.""" - gcs = gcsfs.GCSFileSystem(token="anon") # noqa: S106 - mapper = gcs.get_mapper(zarr_path) - ds = xr.open_zarr(mapper, consolidated=True) - lon = (ds.lon + 180) % 360 - 180 - ds = ds.assign_coords(lon=lon).sortby("lon") - ds = ds.drop_vars( - ["lat_bnds", "lon_bnds", "time_bnds", "height", "time_bounds", "bnds"], - errors="ignore", - ) - return ds # type: ignore[no-any-return] \ No newline at end of file diff --git a/src/climate_downscale/extract/cmip6.py b/src/climate_downscale/extract/cmip6.py new file mode 100644 index 0000000..e21f373 --- /dev/null +++ b/src/climate_downscale/extract/cmip6.py @@ -0,0 +1,131 @@ +from pathlib import Path + +import click +import gcsfs +import xarray as xr +from rra_tools import jobmon + +from climate_downscale import cli_options as clio +from climate_downscale.data import DEFAULT_ROOT, ClimateDownscaleData + +VARIABLE_ENCODINGS = { + "uas": (0.0, 0.01), + "vas": (0.0, 0.01), + "hurs": (0.0, 0.01), + "tas": (273.15, 0.01), + "pr": (0.0, 1e-9), +} + + +def load_cmip_data(zarr_path: str) -> xr.Dataset: + """Loads a CMIP6 dataset from a zarr path.""" + gcs = gcsfs.GCSFileSystem(token="anon") # noqa: S106 + mapper = gcs.get_mapper(zarr_path) + ds = xr.open_zarr(mapper, consolidated=True) + ds = ds.drop_vars( + ["lat_bnds", "lon_bnds", "time_bnds", "height", "time_bounds", "bnds"], + errors="ignore", + ) + return ds # type: ignore[no-any-return] + + +def extract_cmip6_main( + output_dir: str | Path, + cmip6_source: str, + cmip6_experiment: str, + cmip6_variable: str, +) -> None: + cd_data = ClimateDownscaleData(output_dir) + meta = cd_data.load_cmip6_metadata() + + mask = ( + (meta.source_id == cmip6_source) + & (meta.experiment_id == cmip6_experiment) + & (meta.variable_id == cmip6_variable) + & (meta.table_id == "day") + ) + + meta_subset = meta[mask].set_index("member_id").zstore.to_dict() + + for member, zstore_path in meta_subset.items(): + cmip_data = load_cmip_data(zstore_path) + out_filename = f"{cmip6_source}_{cmip6_experiment}_{cmip6_variable}_{member}.nc" + out_path = cd_data.cmip6 / out_filename + shift, scale = VARIABLE_ENCODINGS[cmip6_variable] + cmip_data.to_netcdf( + out_path, + encoding={ + cmip6_variable: { + "dtype": "int16", + "scale_factor": scale, + "add_offset": shift, + "_FillValue": -32767, + "zlib": True, + "complevel": 1, + } + }, + ) + + +@click.command() # type: ignore[arg-type] +@clio.with_output_directory(DEFAULT_ROOT) +@clio.with_cmip6_source() +@clio.with_cmip6_experiment() +@clio.with_cmip6_variable() +def extract_cmip6_task( + output_dir: str, + cmip6_source: str, + cmip6_experiment: str, + cmip6_variable: str, +) -> None: + extract_cmip6_main(output_dir, cmip6_source, cmip6_experiment, cmip6_variable) + + +@click.command() # type: ignore[arg-type] +@clio.with_output_directory(DEFAULT_ROOT) +@clio.with_cmip6_source(allow_all=True) +@clio.with_cmip6_experiment(allow_all=True) +@clio.with_cmip6_variable(allow_all=True) +@clio.with_queue() +def extract_cmip6( + output_dir: str, + cmip6_source: str, + cmip6_experiment: str, + cmip6_variable: str, + queue: str, +) -> None: + sources = ( + clio.VALID_CMIP6_SOURCES if cmip6_source == clio.RUN_ALL else [cmip6_source] + ) + experiments = ( + clio.VALID_CMIP6_EXPERIMENTS + if cmip6_experiment == clio.RUN_ALL + else [cmip6_experiment] + ) + variables = ( + clio.VALID_CMIP6_VARIABLES + if cmip6_variable == clio.RUN_ALL + else [cmip6_variable] + ) + + jobmon.run_parallel( + runner="cdtask", + task_name="extract_cmip6", + node_args={ + "cmip6-source": sources, + "cmip6-experiment": experiments, + "cmip6-variable": variables, + }, + task_args={ + "output-dir": output_dir, + }, + task_resources={ + "queue": queue, + "cores": 1, + "memory": "10G", + "runtime": "120m", + "project": "proj_rapidresponse", + }, + max_attempts=1, + concurrency_limit=50, + ) diff --git a/src/climate_downscale/extract/era5.py b/src/climate_downscale/extract/era5.py index 83c4108..058c7d8 100644 --- a/src/climate_downscale/extract/era5.py +++ b/src/climate_downscale/extract/era5.py @@ -5,14 +5,13 @@ import cdsapi import click import xarray as xr +import yaml from rra_tools import jobmon from rra_tools.shell_tools import touch from climate_downscale import cli_options as clio from climate_downscale.data import DEFAULT_ROOT, ClimateDownscaleData -import yaml - def get_download_spec( final_out_path: Path, @@ -29,14 +28,14 @@ def get_download_spec( def download_era5_main( output_dir: str | Path, era5_dataset: str, - climate_variable: str, + era5_variable: str, year: int | str, month: str, user: str, ) -> None: cddata = ClimateDownscaleData(output_dir) - final_out_path = cddata.era5_path(era5_dataset, climate_variable, year, month) + final_out_path = cddata.era5_path(era5_dataset, era5_variable, year, month) download_path, download_format = get_download_spec(final_out_path) if download_path.exists(): @@ -50,14 +49,14 @@ def download_era5_main( cred_path = cddata.credentials_root / "copernicus.yaml" credentials = yaml.safe_load(cred_path.read_text()) - url = credentials['url'] - key = credentials['keys'][user] + url = credentials["url"] + key = credentials["keys"][user] copernicus = cdsapi.Client(url=url, key=key) print("Downloading...") kwargs = { "product_type": "reanalysis", - "variable": climate_variable, + "variable": era5_variable, "year": year, "month": month, "day": [f"{d:02d}" for d in range(1, 32)], @@ -71,7 +70,7 @@ def download_era5_main( ) result.download(download_path) except Exception as e: - print(f"Failed to download {era5_dataset} {climate_variable} {year} {month}") + print(f"Failed to download {era5_dataset} {era5_variable} {year} {month}") if download_path.exists(): download_path.unlink() raise e # noqa: TRY201 @@ -80,18 +79,18 @@ def download_era5_main( def unzip_and_compress_era5( output_dir: str | Path, era5_dataset: str, - climate_variable: str, + era5_variable: str, year: int | str, month: str, ) -> None: cddata = ClimateDownscaleData(output_dir) - final_out_path = cddata.era5_path(era5_dataset, climate_variable, year, month) + final_out_path = cddata.era5_path(era5_dataset, era5_variable, year, month) zip_path = final_out_path.with_suffix(".zip") uncompressed_path = final_out_path.with_stem(f"{final_out_path.stem}_raw") - + if era5_dataset == "reanalysis-era5-land": print("Unzipping...") - # This data needs to be unzipped first. + # This data needs to be unzipped first. if uncompressed_path.exists(): uncompressed_path.unlink() touch(uncompressed_path) @@ -102,15 +101,14 @@ def unzip_and_compress_era5( # Download failed or was interrupted, delete the zipfile zip_path.unlink() raise e - + with zipfile.ZipFile(zip_path) as zf: zinfo = zf.infolist() if len(zinfo) != 1: msg = f"Expected a single file in {zip_path}" raise ValueError(msg) - with uncompressed_path.open('wb') as f: + with uncompressed_path.open("wb") as f: f.write(zf.read(zinfo[0])) - print("Compressing") touch(final_out_path) @@ -135,17 +133,17 @@ def unzip_and_compress_era5( @click.command() # type: ignore[arg-type] @clio.with_output_directory(DEFAULT_ROOT) @clio.with_era5_dataset() -@clio.with_climate_variable() +@clio.with_era5_variable() @clio.with_year() @clio.with_month() @click.option( - "--user", + "--user", type=str, ) def download_era5_task( output_dir: str, era5_dataset: str, - climate_variable: str, + era5_variable: str, year: str, month: str, user: str, @@ -153,7 +151,7 @@ def download_era5_task( download_era5_main( output_dir, era5_dataset, - climate_variable, + era5_variable, year, month, user, @@ -163,20 +161,20 @@ def download_era5_task( @click.command() # type: ignore[arg-type] @clio.with_output_directory(DEFAULT_ROOT) @clio.with_era5_dataset() -@clio.with_climate_variable() +@clio.with_era5_variable() @clio.with_year() @clio.with_month() def unzip_and_compress_era5_task( output_dir: str, era5_dataset: str, - climate_variable: str, + era5_variable: str, year: str, month: str, ) -> None: unzip_and_compress_era5( output_dir, era5_dataset, - climate_variable, + era5_variable, year, month, ) @@ -185,14 +183,14 @@ def unzip_and_compress_era5_task( @click.command() # type: ignore[arg-type] @clio.with_output_directory(DEFAULT_ROOT) @clio.with_era5_dataset(allow_all=True) -@clio.with_climate_variable(allow_all=True) +@clio.with_era5_variable(allow_all=True) @clio.with_year(allow_all=True) @clio.with_month(allow_all=True) @clio.with_queue() def extract_era5( # noqa: PLR0913 output_dir: str, era5_dataset: str, - climate_variable: str, + era5_variable: str, year: str, month: str, queue: str, @@ -200,16 +198,14 @@ def extract_era5( # noqa: PLR0913 cddata = ClimateDownscaleData(output_dir) cred_path = cddata.credentials_root / "copernicus.yaml" credentials = yaml.safe_load(cred_path.read_text()) - users = list(credentials['keys']) + users = list(credentials["keys"]) jobs_per_user = 20 - + datasets = ( clio.VALID_ERA5_DATASETS if era5_dataset == clio.RUN_ALL else [era5_dataset] ) variables = ( - clio.VALID_CLIMATE_VARIABLES - if climate_variable == clio.RUN_ALL - else [climate_variable] + clio.VALID_ERA5_VARIABLES if era5_variable == clio.RUN_ALL else [era5_variable] ) years = clio.VALID_YEARS if year == clio.RUN_ALL else [year] months = clio.VALID_MONTHS if month == clio.RUN_ALL else [month] @@ -217,11 +213,9 @@ def extract_era5( # noqa: PLR0913 to_download = [] to_compress = [] complete = [] - for spec in itertools.product( - datasets, variables, years, months - ): + for spec in itertools.product(datasets, variables, years, months): final_out_path = cddata.era5_path(*spec) - download_path, _ = get_download_spec(final_out_path) + download_path, _ = get_download_spec(final_out_path) if final_out_path.exists() and download_path.exists(): # We broke in the middle of processing this file. Don't re-download, @@ -234,10 +228,10 @@ def extract_era5( # noqa: PLR0913 to_download.append(spec) to_compress.append(spec) elif download_path.exists() and download_path.stat().st_size == 0: - # We broke while downloading. Assume this file is invalid and re-download + # We broke while downloading. Assume this file is invalid and re-download download_path.unlink() to_download.append(spec) - to_compress.append(spec) + to_compress.append(spec) elif download_path.exists(): to_compress.append(spec) elif final_out_path.exists(): @@ -250,24 +244,28 @@ def extract_era5( # noqa: PLR0913 while to_download: downloads_left = len(to_download) - - + download_batch = [] - for i in range(jobs_per_user): + for _ in range(jobs_per_user): for user in users: if to_download: - download_batch.append( - (*to_download.pop(), user) - ) - assert len(download_batch) == min(len(users) * jobs_per_user, downloads_left) - - print(len(to_download) + len(download_batch), "remaining. Launching next", len(download_batch), "jobs") + download_batch.append((*to_download.pop(), user)) + if not len(download_batch) == min(len(users) * jobs_per_user, downloads_left): + msg = "Download batch size is incorrect" + raise ValueError(msg) + + print( + len(to_download) + len(download_batch), + "remaining. Launching next", + len(download_batch), + "jobs", + ) jobmon.run_parallel( runner="cdtask", task_name="extract era5_download", flat_node_args=( - ("era5-dataset", "climate-variable", "year", "month", "user"), + ("era5-dataset", "era5-variable", "year", "month", "user"), download_batch, ), task_args={ @@ -287,7 +285,7 @@ def extract_era5( # noqa: PLR0913 runner="cdtask", task_name="extract era5_compress", flat_node_args=( - ("era5-dataset", "climate-variable", "year", "month"), + ("era5-dataset", "era5-variable", "year", "month"), to_compress, ), task_args={ diff --git a/src/climate_downscale/old_climate/project_anomaly.py b/src/climate_downscale/old_climate/project_anomaly.py index ae37b72..c35ef40 100644 --- a/src/climate_downscale/old_climate/project_anomaly.py +++ b/src/climate_downscale/old_climate/project_anomaly.py @@ -5,11 +5,10 @@ import click import pandas as pd -from rra_tools import jobmon - from rra_population_pipelines.pipelines.climate import data from rra_population_pipelines.shared.cli_tools import options as clio from rra_population_pipelines.shared.data import RRA_POP +from rra_tools import jobmon if TYPE_CHECKING: import xarray as xr @@ -26,35 +25,6 @@ _VALID_YEARS = tuple([str(y) for y in range(2015, 2101)]) -def get_run_metadata( - variable_id: str, - experiment_id: str, -) -> pd.DataFrame: - metadata = data.load_cmip_metadata() - metadata = ( - metadata.set_index(["institution_id", "source_id"]) - .sort_index() - .loc[_ENSEMBLE_MEMBERS] - .reset_index() - .set_index(["variable_id", "experiment_id"]) - ) - history_meta = ( - metadata.loc[(variable_id, "historical")] - .set_index(["institution_id", "source_id", "member_id"]) # type: ignore[union-attr] - .loc[:, "zstore"] - ) - experiment_meta = ( - metadata.loc[(variable_id, experiment_id)] - .set_index(["institution_id", "source_id", "member_id"]) # type: ignore[union-attr] - .loc[:, "zstore"] - ) - final_meta = pd.concat( - [history_meta.rename("historical"), experiment_meta.rename("experiment")], - axis=1, - ) - return final_meta # type: ignore[no-any-return] - - def compute_common_lat_lon( run_metadata: pd.DataFrame, ) -> tuple[pd.Index[float], pd.Index[float]]: @@ -62,7 +32,7 @@ def compute_common_lat_lon( lon = pd.Index([], name="lon", dtype=float) for key in run_metadata.index.tolist(): - historical = data.load_cmip_historical_data(run_metadata.at[key, "historical"]) + historical = data.load_cmip_historical_data(run_metadata.loc[key, "historical"]) lat = lat.union(historical["lat"]) # type: ignore[arg-type] lon = lon.union(historical["lon"]) # type: ignore[arg-type] return lat, lon @@ -98,9 +68,9 @@ def project_anomaly_main(variable: str, experiment: str, year: str) -> xr.Datase anomalies: list[xr.Dataset] = [] for key in run_meta.index.tolist(): - historical = data.load_cmip_historical_data(run_meta.at[key, "historical"]) + historical = data.load_cmip_historical_data(run_meta.loc[key, "historical"]) scenario = data.load_cmip_experiment_data( - run_meta.at[key, "experiment"], year=year + run_meta.loc[key, "experiment"], year=year ) anomaly = compute_single_model_anomaly(historical, scenario, variable=variable) anomaly = interp_common_lat_lon(anomaly, lat, lon) From 83d6030e411cf62188bff95b8a44781a62363ca0 Mon Sep 17 00:00:00 2001 From: collijk Date: Wed, 12 Jun 2024 13:41:24 -0700 Subject: [PATCH 27/71] CMIP6 extraction --- src/climate_downscale/extract/cmip6.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/climate_downscale/extract/cmip6.py b/src/climate_downscale/extract/cmip6.py index e21f373..bae994a 100644 --- a/src/climate_downscale/extract/cmip6.py +++ b/src/climate_downscale/extract/cmip6.py @@ -3,7 +3,7 @@ import click import gcsfs import xarray as xr -from rra_tools import jobmon +from rra_tools import jobmon, shell_tools from climate_downscale import cli_options as clio from climate_downscale.data import DEFAULT_ROOT, ClimateDownscaleData @@ -51,6 +51,7 @@ def extract_cmip6_main( cmip_data = load_cmip_data(zstore_path) out_filename = f"{cmip6_source}_{cmip6_experiment}_{cmip6_variable}_{member}.nc" out_path = cd_data.cmip6 / out_filename + shell_tools.touch(out_path, exist_ok=True) shift, scale = VARIABLE_ENCODINGS[cmip6_variable] cmip_data.to_netcdf( out_path, From 1c6d38567937df8933d7640b0c665da5dfe75b4f Mon Sep 17 00:00:00 2001 From: collijk Date: Wed, 12 Jun 2024 13:42:29 -0700 Subject: [PATCH 28/71] typo --- src/climate_downscale/extract/cmip6.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/climate_downscale/extract/cmip6.py b/src/climate_downscale/extract/cmip6.py index bae994a..500bc4f 100644 --- a/src/climate_downscale/extract/cmip6.py +++ b/src/climate_downscale/extract/cmip6.py @@ -111,7 +111,7 @@ def extract_cmip6( jobmon.run_parallel( runner="cdtask", - task_name="extract_cmip6", + task_name="extract cmip6", node_args={ "cmip6-source": sources, "cmip6-experiment": experiments, From 7ee36b309ac077ec749691618d0dc4ee82ee9950 Mon Sep 17 00:00:00 2001 From: collijk Date: Wed, 12 Jun 2024 13:44:36 -0700 Subject: [PATCH 29/71] some logging --- src/climate_downscale/extract/cmip6.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/climate_downscale/extract/cmip6.py b/src/climate_downscale/extract/cmip6.py index 500bc4f..1437984 100644 --- a/src/climate_downscale/extract/cmip6.py +++ b/src/climate_downscale/extract/cmip6.py @@ -35,6 +35,7 @@ def extract_cmip6_main( cmip6_experiment: str, cmip6_variable: str, ) -> None: + print(f'Checking metadata for {cmip6_source} {cmip6_experiment} {cmip6_variable}') cd_data = ClimateDownscaleData(output_dir) meta = cd_data.load_cmip6_metadata() @@ -46,6 +47,7 @@ def extract_cmip6_main( ) meta_subset = meta[mask].set_index("member_id").zstore.to_dict() + print(f'Extracting {len(meta_subset)} members...') for member, zstore_path in meta_subset.items(): cmip_data = load_cmip_data(zstore_path) From 593a2a7d6699e51458b55e87230f56ffce71f445 Mon Sep 17 00:00:00 2001 From: collijk Date: Wed, 12 Jun 2024 13:45:27 -0700 Subject: [PATCH 30/71] some logging --- src/climate_downscale/extract/cmip6.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/climate_downscale/extract/cmip6.py b/src/climate_downscale/extract/cmip6.py index 1437984..5b25e30 100644 --- a/src/climate_downscale/extract/cmip6.py +++ b/src/climate_downscale/extract/cmip6.py @@ -50,11 +50,13 @@ def extract_cmip6_main( print(f'Extracting {len(meta_subset)} members...') for member, zstore_path in meta_subset.items(): + print('Extracting', member, zstore_path) cmip_data = load_cmip_data(zstore_path) out_filename = f"{cmip6_source}_{cmip6_experiment}_{cmip6_variable}_{member}.nc" out_path = cd_data.cmip6 / out_filename shell_tools.touch(out_path, exist_ok=True) shift, scale = VARIABLE_ENCODINGS[cmip6_variable] + print('Writing to', out_path) cmip_data.to_netcdf( out_path, encoding={ From 0de6d2fce871d195cf2039d23ea7a2bd8e493379 Mon Sep 17 00:00:00 2001 From: collijk Date: Wed, 12 Jun 2024 13:47:30 -0700 Subject: [PATCH 31/71] Change naming scheme --- src/climate_downscale/extract/cmip6.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/climate_downscale/extract/cmip6.py b/src/climate_downscale/extract/cmip6.py index 5b25e30..59c6c96 100644 --- a/src/climate_downscale/extract/cmip6.py +++ b/src/climate_downscale/extract/cmip6.py @@ -35,7 +35,7 @@ def extract_cmip6_main( cmip6_experiment: str, cmip6_variable: str, ) -> None: - print(f'Checking metadata for {cmip6_source} {cmip6_experiment} {cmip6_variable}') + print(f"Checking metadata for {cmip6_source} {cmip6_experiment} {cmip6_variable}") cd_data = ClimateDownscaleData(output_dir) meta = cd_data.load_cmip6_metadata() @@ -47,16 +47,16 @@ def extract_cmip6_main( ) meta_subset = meta[mask].set_index("member_id").zstore.to_dict() - print(f'Extracting {len(meta_subset)} members...') + print(f"Extracting {len(meta_subset)} members...") for member, zstore_path in meta_subset.items(): - print('Extracting', member, zstore_path) + print("Extracting", member, zstore_path) cmip_data = load_cmip_data(zstore_path) - out_filename = f"{cmip6_source}_{cmip6_experiment}_{cmip6_variable}_{member}.nc" + out_filename = f"{cmip6_variable}_{cmip6_experiment}_{cmip6_source}_{member}.nc" out_path = cd_data.cmip6 / out_filename shell_tools.touch(out_path, exist_ok=True) shift, scale = VARIABLE_ENCODINGS[cmip6_variable] - print('Writing to', out_path) + print("Writing to", out_path) cmip_data.to_netcdf( out_path, encoding={ From 1e8da840a39a7d53470f3bc0d775948ddbcc93d9 Mon Sep 17 00:00:00 2001 From: James Collins Date: Thu, 13 Jun 2024 15:15:48 -0700 Subject: [PATCH 32/71] Update runtime --- src/climate_downscale/extract/cmip6.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/climate_downscale/extract/cmip6.py b/src/climate_downscale/extract/cmip6.py index 59c6c96..eeb8863 100644 --- a/src/climate_downscale/extract/cmip6.py +++ b/src/climate_downscale/extract/cmip6.py @@ -128,7 +128,7 @@ def extract_cmip6( "queue": queue, "cores": 1, "memory": "10G", - "runtime": "120m", + "runtime": "600m", "project": "proj_rapidresponse", }, max_attempts=1, From b84ef0dd43ab958c382f49d057ed1793761b3608 Mon Sep 17 00:00:00 2001 From: collijk Date: Thu, 13 Jun 2024 18:10:12 -0700 Subject: [PATCH 33/71] Do some reorg --- .../{model => downscale}/__init__.py | 4 +- .../prepare_predictors.py | 2 +- .../prepare_training_data.py | 4 +- src/climate_downscale/extract/elevation.py | 2 +- src/climate_downscale/generate/__init__.py | 0 .../era5_daily.py} | 127 ++++++++++-------- src/climate_downscale/generate/utils.py | 0 7 files changed, 80 insertions(+), 59 deletions(-) rename src/climate_downscale/{model => downscale}/__init__.py (74%) rename src/climate_downscale/{model => downscale}/prepare_predictors.py (98%) rename src/climate_downscale/{model => downscale}/prepare_training_data.py (97%) create mode 100644 src/climate_downscale/generate/__init__.py rename src/climate_downscale/{model/prepare_era5_daily.py => generate/era5_daily.py} (74%) create mode 100644 src/climate_downscale/generate/utils.py diff --git a/src/climate_downscale/model/__init__.py b/src/climate_downscale/downscale/__init__.py similarity index 74% rename from src/climate_downscale/model/__init__.py rename to src/climate_downscale/downscale/__init__.py index 5449577..24280ec 100644 --- a/src/climate_downscale/model/__init__.py +++ b/src/climate_downscale/downscale/__init__.py @@ -1,8 +1,8 @@ -from climate_downscale.model.prepare_predictors import ( +from climate_downscale.downscale.prepare_predictors import ( prepare_predictors, prepare_predictors_task, ) -from climate_downscale.model.prepare_training_data import ( +from climate_downscale.downscale.prepare_training_data import ( prepare_training_data, prepare_training_data_task, ) diff --git a/src/climate_downscale/model/prepare_predictors.py b/src/climate_downscale/downscale/prepare_predictors.py similarity index 98% rename from src/climate_downscale/model/prepare_predictors.py rename to src/climate_downscale/downscale/prepare_predictors.py index c858c90..e958bf8 100644 --- a/src/climate_downscale/model/prepare_predictors.py +++ b/src/climate_downscale/downscale/prepare_predictors.py @@ -124,7 +124,7 @@ def prepare_predictors_task( def prepare_predictors(output_dir: str, queue: str) -> None: jobmon.run_parallel( runner="cdtask", - task_name="model prepare_predictors", + task_name="downscale prepare_predictors", node_args={ "lat-start": clio.LATITUDES, "lon-start": clio.LONGITUDES, diff --git a/src/climate_downscale/model/prepare_training_data.py b/src/climate_downscale/downscale/prepare_training_data.py similarity index 97% rename from src/climate_downscale/model/prepare_training_data.py rename to src/climate_downscale/downscale/prepare_training_data.py index 755ea08..807b0c3 100644 --- a/src/climate_downscale/model/prepare_training_data.py +++ b/src/climate_downscale/downscale/prepare_training_data.py @@ -36,7 +36,7 @@ def load_and_clean_climate_stations( temperature=lambda df: 5 / 9 * (df["temperature"] - 32), ) ) - return climate_stations # noqa: RET504 + return climate_stations def get_era5_temperature( @@ -113,7 +113,7 @@ def prepare_training_data_task(output_dir: str, year: str) -> None: def prepare_training_data(output_dir: str, queue: str) -> None: jobmon.run_parallel( runner="cdtask", - task_name="model prepare_training_data", + task_name="downscale prepare_training_data", node_args={ "year": clio.VALID_YEARS, }, diff --git a/src/climate_downscale/extract/elevation.py b/src/climate_downscale/extract/elevation.py index c12b294..1f66d82 100644 --- a/src/climate_downscale/extract/elevation.py +++ b/src/climate_downscale/extract/elevation.py @@ -94,7 +94,7 @@ def extract_elevation_task( @click.command() # type: ignore[arg-type] @clio.with_output_directory(DEFAULT_ROOT) @click.option( - "--model-name", + "--generate-name", required=True, type=click.Choice(ELEVATION_MODELS), help="Name of the elevation model to download.", diff --git a/src/climate_downscale/generate/__init__.py b/src/climate_downscale/generate/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/src/climate_downscale/model/prepare_era5_daily.py b/src/climate_downscale/generate/era5_daily.py similarity index 74% rename from src/climate_downscale/model/prepare_era5_daily.py rename to src/climate_downscale/generate/era5_daily.py index a33b4fc..a5f34e0 100644 --- a/src/climate_downscale/model/prepare_era5_daily.py +++ b/src/climate_downscale/generate/era5_daily.py @@ -1,18 +1,24 @@ -import pandas as pd -import xarray as xr from pathlib import Path -import numpy as np +import numpy as np +import pandas as pd +import xarray as xr -TARGET_LON = xr.DataArray(np.round(np.arange(0., 360., 0.1, dtype='float32'), 1), dims='longitude') -TARGET_LAT = xr.DataArray(np.round(np.arange(90., -90.1, -0.1, dtype='float32'), 1), dims='latitude') +TARGET_LON = xr.DataArray( + np.round(np.arange(0.0, 360.0, 0.1, dtype="float32"), 1), dims="longitude" +) +TARGET_LAT = xr.DataArray( + np.round(np.arange(90.0, -90.1, -0.1, dtype="float32"), 1), dims="latitude" +) def kelvin_to_celsius(temperature_k): return temperature_k - 273.15 + def m_to_mm(ds): - return 1000*ds + return 1000 * ds + def scale_windspeed(windspeed): """Scaling wind speed from a height of 10 meters to a height of 2 meters @@ -33,19 +39,21 @@ def scale_windspeed(windspeed): scale_factor = np.log10(2 / 0.01) / np.log10(10 / 0.01) return scale_factor * windspeed + def identity(ds): return ds + def rename_val_column(ds): data_var = next(iter(ds)) return ds.rename({data_var: "value"}) - + convert_map = { "10m_u_component_of_wind": scale_windspeed, "10m_v_component_of_wind": scale_windspeed, "2m_dewpoint_temperature": kelvin_to_celsius, - "2m_temperature": kelvin_to_celsius, + "2m_temperature": kelvin_to_celsius, "surface_net_solar_radiation": identity, "surface_net_thermal_radiation": identity, "surface_pressure": identity, @@ -55,49 +63,56 @@ def rename_val_column(ds): "total_sky_direct_solar_radiation_at_surface": identity, } + def interpolate_to_target(ds): - return ( - ds - .interp(longitude=TARGET_LON, latitude=TARGET_LAT, method='nearest') - .interpolate_na(dim="longitude", method="nearest", fill_value="extrapolate") - ) + return ds.interp( + longitude=TARGET_LON, latitude=TARGET_LAT, method="nearest" + ).interpolate_na(dim="longitude", method="nearest", fill_value="extrapolate") + -def load_variable(variable, year, month, dataset='single-levels'): +def load_variable(variable, year, month, dataset="single-levels"): root = Path("/mnt/share/erf/climate_downscale/extracted_data/era5") p = root / f"reanalysis-era5-{dataset}_{variable}_{year}_{month}.nc" - if dataset == 'land' and not p.exists(): + if dataset == "land" and not p.exists(): # Substitute the single level dataset pre-interpolated at the target resolution. p = root / f"reanalysis-era5-single-levels_{source_variable}_{year}_{month}.nc" ds = interpolate_to_target(xr.load_dataset(p)) - elif dataset == 'land': + elif dataset == "land": ds = xr.load_dataset(p).assign_coords(latitude=TARGET_LAT, longitude=TARGET_LON) else: ds = xr.load_dataset(p) conversion = convert_map[variable] - ds = conversion(rename_val_column(ds)) + ds = conversion(rename_val_column(ds)) return ds - + ######## - + + def daily_mean(ds): - return ds.groupby('time.date').mean() + return ds.groupby("time.date").mean() + def daily_max(ds): - return ds.groupby('time.date').max() + return ds.groupby("time.date").max() + def daily_min(ds): - return ds.groupby('time.date').min() + return ds.groupby("time.date").min() + def daily_sum(ds): - return ds.groupby('time.date').sum() + return ds.groupby("time.date").sum() + def cdd(temperature_c): return np.maximum(temperature_c - 18, 0).groupby("time.date").mean() + def hdd(temperature_c): return np.maximum(18 - temperature_c, 0).groupby("time.date").mean() + def vector_magnitude(x, y): return np.sqrt(x**2 + y**2) @@ -108,10 +123,15 @@ def buck_vapor_presure(temperature_c): https://en.wikipedia.org/wiki/Arden_Buck_equation https://journals.ametsoc.org/view/journals/apme/20/12/1520-0450_1981_020_1527_nefcvp_2_0_co_2.xml """ - over_water = 6.1121 * np.exp((18.678 - temperature_c / 234.5) * (temperature_c / (257.14 + temperature_c))) - over_ice = 6.1115 * np.exp((23.036 - temperature_c / 333.7) * (temperature_c / (279.82 + temperature_c))) + over_water = 6.1121 * np.exp( + (18.678 - temperature_c / 234.5) * (temperature_c / (257.14 + temperature_c)) + ) + over_ice = 6.1115 * np.exp( + (23.036 - temperature_c / 333.7) * (temperature_c / (279.82 + temperature_c)) + ) return xr.where(temperature_c > 0, over_water, over_ice) + def rh_percent(temperature_c, dewpoint_temperature_c): # saturated vapour pressure es = buck_vapor_pressure(temperature_c) @@ -120,10 +140,11 @@ def rh_percent(temperature_c, dewpoint_temperature_c): rh = (e / es) * 100 return rh + def heat_index(temperature_c, dewpoint_temperature_c): t = temperature_c # Alias for simplicity in the formula r = rh_percent(temperature_c, dewpoint_temperature_c) - + hi_raw = ( -8.784695 + 1.61139411 * t @@ -138,15 +159,17 @@ def heat_index(temperature_c, dewpoint_temperature_c): hi = xr.where(t > 20, hi_raw, t) return hi + def humidex(temperature_c, dewpoint_temperature_c): vp = buck_vapor_pressure(dewpoint_temperature_c) return temperature_c + 0.5555 * (vp - 10) + def effective_temperature(temperature_c, dewpoint_temperature_c, uas, vas): """https://www.sciencedirect.com/topics/engineering/effective-temperature""" t = temperature_c r = rh_percent(temperature_c, dewpoint_temperature_c) - v = vector_magnitude(uas, vas) + v = vector_magnitude(uas, vas) wind_adjustment = 1 / (1.76 + 1.4 * v**0.75) et = ( @@ -155,22 +178,23 @@ def effective_temperature(temperature_c, dewpoint_temperature_c, uas, vas): - 0.29 * t * (1 - 0.01 * r) ) return et - - - collapse_map = { "mean_temperature": (["2m_temperature"], daily_mean, (273.15, 0.01)), "max_temperature": (["2m_temperature"], daily_max, (273.15, 0.01)), - "min_temperature": (["2m_temperature"], daily_min, (273.15, 0.01)), + "min_temperature": (["2m_temperature"], daily_min, (273.15, 0.01)), "cooling_degree_days": (["2m_temperature"], cdd, (0, 0.01)), "heating_degree_days": (["2m_temperature"], hdd, (0, 0.01)), "wind_speed": ( - ["10m_u_component_of_wind", "10m_v_component_of_wind"], lambda x, y: daily_mean(vector_magnitude(x, y)), (0, 0.01) + ["10m_u_component_of_wind", "10m_v_component_of_wind"], + lambda x, y: daily_mean(vector_magnitude(x, y)), + (0, 0.01), ), "relative_humidity": ( - ["2m_temperature", "2m_dewpoint_temperature"], lambda x, y: daily_mean(rh_percent(x, y)), (0, 0.01) + ["2m_temperature", "2m_dewpoint_temperature"], + lambda x, y: daily_mean(rh_percent(x, y)), + (0, 0.01), ), "total_precipitation": (["total_precipitation"], daily_sum, (0, 0.1)), # "heat_index": ( @@ -183,7 +207,6 @@ def effective_temperature(temperature_c, dewpoint_temperature_c, uas, vas): # ["2m_temperature", "2m_dewpoint_temperature", "10m_u_component_of_wind", "10m_v_component_of_wind"], # lambda *args: daily_mean(effective_temperature(*args)), (273.15, 0.01) # ), - } year = "1990" @@ -194,36 +217,34 @@ def effective_temperature(temperature_c, dewpoint_temperature_c, uas, vas): print("loading single-levels") single_level = [ - load_variable(sv, year, month, 'single-levels') for sv in source_variables + load_variable(sv, year, month, "single-levels") for sv in source_variables ] -print('collapsing') +print("collapsing") ds = collapse_fun(*single_level) ds = ds.assign(date=pd.to_datetime(ds.date)) -print('interpolating') +print("interpolating") ds_land_res = interpolate_to_target(ds) print("loading land") -land = [ - load_variable(sv, year, month, 'land') for sv in source_variables -] -print('collapsing') +land = [load_variable(sv, year, month, "land") for sv in source_variables] +print("collapsing") ds_land = collapse_fun(*land) ds_land = ds_land.assign(date=pd.to_datetime(ds_land.date)) -print('combining') +print("combining") combined = ds_land.combine_first(ds_land_res) combined.to_netcdf( - 'compressed.nc', + "compressed.nc", encoding={ - 'value': { - 'dtype': 'int16', - 'add_offset': e_offset, - 'scale_factor': e_scale, - '_FillValue': -9999, - 'zlib': True, - 'complevel': 1, - } - } -) \ No newline at end of file + "value": { + "dtype": "int16", + "add_offset": e_offset, + "scale_factor": e_scale, + "_FillValue": -9999, + "zlib": True, + "complevel": 1, + } + }, +) diff --git a/src/climate_downscale/generate/utils.py b/src/climate_downscale/generate/utils.py new file mode 100644 index 0000000..e69de29 From c2a4520d83f2c3a41bbec2e1ff17e449799043bb Mon Sep 17 00:00:00 2001 From: collijk Date: Thu, 13 Jun 2024 21:20:18 -0700 Subject: [PATCH 34/71] Put together era5 daily script --- src/climate_downscale/cli.py | 4 +- src/climate_downscale/cli_options.py | 5 +- src/climate_downscale/data.py | 34 +- src/climate_downscale/generate/__init__.py | 12 + src/climate_downscale/generate/era5_daily.py | 426 +++++++++---------- src/climate_downscale/generate/utils.py | 277 ++++++++++++ 6 files changed, 538 insertions(+), 220 deletions(-) diff --git a/src/climate_downscale/cli.py b/src/climate_downscale/cli.py index b962d54..38f86d3 100644 --- a/src/climate_downscale/cli.py +++ b/src/climate_downscale/cli.py @@ -1,6 +1,6 @@ import click -from climate_downscale import extract, model +from climate_downscale import downscale, extract, generate @click.group() @@ -13,7 +13,7 @@ def cdtask() -> None: """Entry point for running climate downscale tasks.""" -for module in [extract, model]: +for module in [extract, downscale, generate]: runners = getattr(module, "RUNNERS", {}) task_runners = getattr(module, "TASK_RUNNERS", {}) diff --git a/src/climate_downscale/cli_options.py b/src/climate_downscale/cli_options.py index 720b53e..879465e 100644 --- a/src/climate_downscale/cli_options.py +++ b/src/climate_downscale/cli_options.py @@ -22,13 +22,14 @@ def with_year( *, + years: list[str] = VALID_YEARS, allow_all: bool = False, ) -> ClickOption[_P, _T]: return with_choice( "year", "y", allow_all=allow_all, - choices=VALID_YEARS, + choices=years, help="Year to extract data for.", ) @@ -219,4 +220,6 @@ def with_lon_start( "with_num_cores", "with_progress_bar", "RUN_ALL", + "ClickOption", + "with_choice", ] diff --git a/src/climate_downscale/data.py b/src/climate_downscale/data.py index 03548c8..839448f 100644 --- a/src/climate_downscale/data.py +++ b/src/climate_downscale/data.py @@ -87,12 +87,12 @@ def rub_local_climate_zones(self) -> Path: return self.extracted_data / "rub_local_climate_zones" @property - def model(self) -> Path: - return self.root / "model" + def downscale_model(self) -> Path: + return self.root / "downscale_model" @property def predictors(self) -> Path: - return self.model / "predictors" + return self.downscale_model / "predictors" def save_predictor( self, @@ -109,7 +109,7 @@ def load_predictor(self, name: str) -> rt.RasterArray: @property def training_data(self) -> Path: - return self.model / "training_data" + return self.downscale_model / "training_data" def save_training_data(self, df: pd.DataFrame, year: int | str) -> None: path = self.training_data / f"{year}.parquet" @@ -119,6 +119,32 @@ def save_training_data(self, df: pd.DataFrame, year: int | str) -> None: def load_training_data(self, year: int | str) -> pd.DataFrame: return pd.read_parquet(self.training_data / f"{year}.parquet") + @property + def results(self) -> Path: + return self.root / "results" + + @property + def era5_daily(self) -> Path: + return self.results / "era5_daily" + + def save_era5_daily( + self, + ds: xr.Dataset, + variable: str, + year: int | str, + **encoding_kwargs: Any, + ) -> None: + encoding = { + "dtype": "int16", + "_FillValue": -32767, + "zlib": True, + "complevel": 1, + } + encoding.update(encoding_kwargs) + path = self.era5_daily / f"{variable}_{year}.nc" + touch(path, exist_ok=True) + ds.to_netcdf(path, encoding={"value": encoding}) + def save_raster( raster: rt.RasterArray, diff --git a/src/climate_downscale/generate/__init__.py b/src/climate_downscale/generate/__init__.py index e69de29..21710f6 100644 --- a/src/climate_downscale/generate/__init__.py +++ b/src/climate_downscale/generate/__init__.py @@ -0,0 +1,12 @@ +from climate_downscale.generate.era5_daily import ( + generate_era5_daily, + generate_era5_daily_task, +) + +RUNNERS = { + "era5_daily": generate_era5_daily, +} + +TASK_RUNNERS = { + "era5_daily": generate_era5_daily_task, +} diff --git a/src/climate_downscale/generate/era5_daily.py b/src/climate_downscale/generate/era5_daily.py index a5f34e0..6a7a45d 100644 --- a/src/climate_downscale/generate/era5_daily.py +++ b/src/climate_downscale/generate/era5_daily.py @@ -1,250 +1,250 @@ +import typing from pathlib import Path +import click import numpy as np import pandas as pd import xarray as xr +from rra_tools import jobmon + +from climate_downscale import cli_options as clio +from climate_downscale.data import DEFAULT_ROOT, ClimateDownscaleData +from climate_downscale.generate import utils TARGET_LON = xr.DataArray( - np.round(np.arange(0.0, 360.0, 0.1, dtype="float32"), 1), dims="longitude" + np.round(np.arange(-180.0, 180.0, 0.1, dtype="float32"), 1), dims="longitude" ) TARGET_LAT = xr.DataArray( np.round(np.arange(90.0, -90.1, -0.1, dtype="float32"), 1), dims="latitude" ) +# Map from source variable to a unit conversion function +CONVERT_MAP = { + "10m_u_component_of_wind": utils.scale_wind_speed_height, + "10m_v_component_of_wind": utils.scale_wind_speed_height, + "2m_dewpoint_temperature": utils.kelvin_to_celsius, + "2m_temperature": utils.kelvin_to_celsius, + "surface_net_solar_radiation": utils.identity, + "surface_net_thermal_radiation": utils.identity, + "surface_pressure": utils.identity, + "surface_solar_radiation_downwards": utils.identity, + "surface_thermal_radiation_downwards": utils.identity, + "total_precipitation": utils.meter_to_millimeter, + "total_sky_direct_solar_radiation_at_surface": utils.identity, +} -def kelvin_to_celsius(temperature_k): - return temperature_k - 273.15 - - -def m_to_mm(ds): - return 1000 * ds +# Map from target variable to: +# - a list of source variables +# - a transformation function +# - a tuple of offset and scale factors for the output for serialization +TRANSFORM_MAP = { + "mean_temperature": ( + ["2m_temperature"], + utils.daily_mean, + (273.15, 0.01), + ), + "max_temperature": ( + ["2m_temperature"], + utils.daily_max, + (273.15, 0.01), + ), + "min_temperature": ( + ["2m_temperature"], + utils.daily_min, + (273.15, 0.01), + ), + "cooling_degree_days": ( + ["2m_temperature"], + utils.cdd, + (0, 0.01), + ), + "heating_degree_days": ( + ["2m_temperature"], + utils.hdd, + (0, 0.01), + ), + "wind_speed": ( + ["10m_u_component_of_wind", "10m_v_component_of_wind"], + lambda x, y: utils.daily_mean(utils.vector_magnitude(x, y)), + (0, 0.01), + ), + "relative_humidity": ( + ["2m_temperature", "2m_dewpoint_temperature"], + lambda x, y: utils.daily_mean(utils.rh_percent(x, y)), + (0, 0.01), + ), + "total_precipitation": ( + ["total_precipitation"], + utils.daily_sum, + (0, 0.1), + ), +} +UNTESTED_TRANSFORM_MAP = { + "heat_index": ( + ["2m_temperature", "2m_dewpoint_temperature"], + lambda x, y: utils.daily_mean(utils.heat_index(x, y)), + (273.15, 0.01), + ), + "humidex": ( + ["2m_temperature", "2m_dewpoint_temperature"], + lambda x, y: utils.daily_mean(utils.humidex(x, y)), + (273.15, 0.01), + ), + "effective_temperature": ( + [ + "2m_temperature", + "2m_dewpoint_temperature", + "10m_u_component_of_wind", + "10m_v_component_of_wind", + ], + lambda t2m, t2d, uas, vas: utils.daily_mean( + utils.effective_temperature(t2m, t2d, uas, vas) + ), + (273.15, 0.01), + ), +} -def scale_windspeed(windspeed): - """Scaling wind speed from a height of 10 meters to a height of 2 meters - Reference: Bröde et al. (2012) - https://doi.org/10.1007/s00484-011-0454-1 +_P = typing.ParamSpec("_P") +_T = typing.TypeVar("_T") - Parameters - ---------- - ds - The 10m wind speed [m/s]. May be signed (ie a velocity component) - Returnds - -------- - xr.DataSet - The 2m wind speed [m/s]. May be signed (ie a velocity component) - """ - scale_factor = np.log10(2 / 0.01) / np.log10(10 / 0.01) - return scale_factor * windspeed +def with_variable( + *, + allow_all: bool = False, +) -> clio.ClickOption[_P, _T]: + return clio.with_choice( + "target-variable", + "t", + allow_all=allow_all, + choices=list(TRANSFORM_MAP.keys()), + help="Variable to generate.", + ) -def identity(ds): +def load_and_shift_longitude(ds_path: str | Path) -> xr.Dataset: + ds = xr.load_dataset(ds_path) + ds = ds.assign_coords(longitude=(ds.longitude + 180) % 360 - 180).sortby( + "longitude" + ) return ds -def rename_val_column(ds): - data_var = next(iter(ds)) - return ds.rename({data_var: "value"}) - - -convert_map = { - "10m_u_component_of_wind": scale_windspeed, - "10m_v_component_of_wind": scale_windspeed, - "2m_dewpoint_temperature": kelvin_to_celsius, - "2m_temperature": kelvin_to_celsius, - "surface_net_solar_radiation": identity, - "surface_net_thermal_radiation": identity, - "surface_pressure": identity, - "surface_solar_radiation_downwards": identity, - "surface_thermal_radiation_downwards": identity, - "total_precipitation": m_to_mm, - "total_sky_direct_solar_radiation_at_surface": identity, -} - - -def interpolate_to_target(ds): - return ds.interp( - longitude=TARGET_LON, latitude=TARGET_LAT, method="nearest" - ).interpolate_na(dim="longitude", method="nearest", fill_value="extrapolate") - - -def load_variable(variable, year, month, dataset="single-levels"): +def load_variable( + variable: str, + year: str, + month: str, + dataset: str = "single-levels", +) -> xr.Dataset: root = Path("/mnt/share/erf/climate_downscale/extracted_data/era5") p = root / f"reanalysis-era5-{dataset}_{variable}_{year}_{month}.nc" if dataset == "land" and not p.exists(): # Substitute the single level dataset pre-interpolated at the target resolution. - p = root / f"reanalysis-era5-single-levels_{source_variable}_{year}_{month}.nc" - ds = interpolate_to_target(xr.load_dataset(p)) + p = root / f"reanalysis-era5-single-levels_{variable}_{year}_{month}.nc" + ds = utils.interpolate_to_target_latlon( + load_and_shift_longitude(p), + target_lat=TARGET_LAT, + target_lon=TARGET_LON, + ) elif dataset == "land": - ds = xr.load_dataset(p).assign_coords(latitude=TARGET_LAT, longitude=TARGET_LON) + ds = load_and_shift_longitude(p).assign_coords( + latitude=TARGET_LAT, longitude=TARGET_LON + ) else: - ds = xr.load_dataset(p) - conversion = convert_map[variable] - ds = conversion(rename_val_column(ds)) + ds = load_and_shift_longitude(p) + conversion = CONVERT_MAP[variable] + ds = conversion(utils.rename_val_column(ds)) return ds -######## - - -def daily_mean(ds): - return ds.groupby("time.date").mean() - - -def daily_max(ds): - return ds.groupby("time.date").max() - - -def daily_min(ds): - return ds.groupby("time.date").min() - - -def daily_sum(ds): - return ds.groupby("time.date").sum() - - -def cdd(temperature_c): - return np.maximum(temperature_c - 18, 0).groupby("time.date").mean() - - -def hdd(temperature_c): - return np.maximum(18 - temperature_c, 0).groupby("time.date").mean() - - -def vector_magnitude(x, y): - return np.sqrt(x**2 + y**2) - - -def buck_vapor_presure(temperature_c): - """Approximate vapor pressure of water. - - https://en.wikipedia.org/wiki/Arden_Buck_equation - https://journals.ametsoc.org/view/journals/apme/20/12/1520-0450_1981_020_1527_nefcvp_2_0_co_2.xml - """ - over_water = 6.1121 * np.exp( - (18.678 - temperature_c / 234.5) * (temperature_c / (257.14 + temperature_c)) - ) - over_ice = 6.1115 * np.exp( - (23.036 - temperature_c / 333.7) * (temperature_c / (279.82 + temperature_c)) +def generate_era5_daily_main( + output_dir: str | Path, + year: str, + target_variable: str, +) -> None: + source_variables, collapse_fun, (e_offset, e_scale) = TRANSFORM_MAP[target_variable] + + datasets = [] + for month in range(1, 13): + month_str = f"{month:02d}" + print("loading single-levels") + single_level = [ + load_variable(sv, year, month_str, "single-levels") + for sv in source_variables + ] + print("collapsing") + ds = collapse_fun(*single_level) # type: ignore[operator] + # collapsing often screws the date dtype, so fix it + ds = ds.assign(date=pd.to_datetime(ds.date)) + + print("interpolating") + ds_land_res = utils.interpolate_to_target_latlon(ds, TARGET_LAT, TARGET_LON) + + print("loading land") + land = [load_variable(sv, year, month_str, "land") for sv in source_variables] + print("collapsing") + ds_land = collapse_fun(*land) # type: ignore[operator] + ds_land = ds_land.assign(date=pd.to_datetime(ds_land.date)) + + print("combining") + combined = ds_land.combine_first(ds_land_res) + datasets.append(combined) + + ds_year = xr.concat(datasets, dim="date").sortby("date") + + cd_data = ClimateDownscaleData(output_dir) + cd_data.save_era5_daily( + ds_year, target_variable, year, add_offset=e_offset, scale_factor=e_scale ) - return xr.where(temperature_c > 0, over_water, over_ice) - - -def rh_percent(temperature_c, dewpoint_temperature_c): - # saturated vapour pressure - es = buck_vapor_pressure(temperature_c) - # vapour pressure - e = buck_vapor_pressure(dewpoint_temperature_c) - rh = (e / es) * 100 - return rh - - -def heat_index(temperature_c, dewpoint_temperature_c): - t = temperature_c # Alias for simplicity in the formula - r = rh_percent(temperature_c, dewpoint_temperature_c) - - hi_raw = ( - -8.784695 - + 1.61139411 * t - + 2.338549 * r - - 0.14611605 * t * r - - 1.2308094e-2 * t**2 - - 1.6424828e-2 * r**2 - + 2.211732e-3 * t**2 * r - + 7.2546e-4 * t * r**2 - - 3.582e-6 * t**2 * r**2 - ) - hi = xr.where(t > 20, hi_raw, t) - return hi - - -def humidex(temperature_c, dewpoint_temperature_c): - vp = buck_vapor_pressure(dewpoint_temperature_c) - return temperature_c + 0.5555 * (vp - 10) -def effective_temperature(temperature_c, dewpoint_temperature_c, uas, vas): - """https://www.sciencedirect.com/topics/engineering/effective-temperature""" - t = temperature_c - r = rh_percent(temperature_c, dewpoint_temperature_c) - v = vector_magnitude(uas, vas) - - wind_adjustment = 1 / (1.76 + 1.4 * v**0.75) - et = ( - 37 - - ((37 - t) / (0.68 - 0.0014 * r + wind_adjustment)) - - 0.29 * t * (1 - 0.01 * r) +@click.command() # type: ignore[arg-type] +@clio.with_output_directory(DEFAULT_ROOT) +@clio.with_year() +@with_variable() +def generate_era5_daily_task( + output_dir: str, + year: str, + target_variable: str, +) -> None: + generate_era5_daily_main(output_dir, year, target_variable) + + +@click.command() # type: ignore[arg-type] +@clio.with_output_directory(DEFAULT_ROOT) +@clio.with_year(allow_all=True) +@with_variable(allow_all=True) +@clio.with_queue() +def generate_era5_daily( + output_dir: str, + year: str, + target_variable: str, + queue: str, +) -> None: + years = clio.VALID_YEARS if year == clio.RUN_ALL else [year] + variables = ( + list(TRANSFORM_MAP.keys()) + if target_variable == clio.RUN_ALL + else [target_variable] ) - return et - -collapse_map = { - "mean_temperature": (["2m_temperature"], daily_mean, (273.15, 0.01)), - "max_temperature": (["2m_temperature"], daily_max, (273.15, 0.01)), - "min_temperature": (["2m_temperature"], daily_min, (273.15, 0.01)), - "cooling_degree_days": (["2m_temperature"], cdd, (0, 0.01)), - "heating_degree_days": (["2m_temperature"], hdd, (0, 0.01)), - "wind_speed": ( - ["10m_u_component_of_wind", "10m_v_component_of_wind"], - lambda x, y: daily_mean(vector_magnitude(x, y)), - (0, 0.01), - ), - "relative_humidity": ( - ["2m_temperature", "2m_dewpoint_temperature"], - lambda x, y: daily_mean(rh_percent(x, y)), - (0, 0.01), - ), - "total_precipitation": (["total_precipitation"], daily_sum, (0, 0.1)), - # "heat_index": ( - # ["2m_temperature", "2m_dewpoint_temperature"], lambda x, y: daily_mean(heat_index(x, y)), (273.15, 0.01) - # ), - # "humidex": ( - # ['2m_temperature', '2m_dewpoint_temperature'], lambda x, y: daily_mean(humidex(x, y)), (273.15, 0.01) - # ), - # "normal_effective_temperature": ( - # ["2m_temperature", "2m_dewpoint_temperature", "10m_u_component_of_wind", "10m_v_component_of_wind"], - # lambda *args: daily_mean(effective_temperature(*args)), (273.15, 0.01) - # ), -} - -year = "1990" -month = "01" -target_variable = "wind_speed" - -source_variables, collapse_fun, (e_offset, e_scale) = collapse_map[target_variable] - -print("loading single-levels") -single_level = [ - load_variable(sv, year, month, "single-levels") for sv in source_variables -] -print("collapsing") -ds = collapse_fun(*single_level) -ds = ds.assign(date=pd.to_datetime(ds.date)) - -print("interpolating") -ds_land_res = interpolate_to_target(ds) - -print("loading land") -land = [load_variable(sv, year, month, "land") for sv in source_variables] -print("collapsing") -ds_land = collapse_fun(*land) -ds_land = ds_land.assign(date=pd.to_datetime(ds_land.date)) - -print("combining") -combined = ds_land.combine_first(ds_land_res) - -combined.to_netcdf( - "compressed.nc", - encoding={ - "value": { - "dtype": "int16", - "add_offset": e_offset, - "scale_factor": e_scale, - "_FillValue": -9999, - "zlib": True, - "complevel": 1, - } - }, -) + jobmon.run_parallel( + runner="cdtask", + task_name="extract cmip6", + node_args={ + "year": years, + "variable": variables, + }, + task_args={ + "output-dir": output_dir, + }, + task_resources={ + "queue": queue, + "cores": 1, + "memory": "10G", + "runtime": "120m", + "project": "proj_rapidresponse", + }, + max_attempts=1, + ) diff --git a/src/climate_downscale/generate/utils.py b/src/climate_downscale/generate/utils.py index e69de29..44de6d3 100644 --- a/src/climate_downscale/generate/utils.py +++ b/src/climate_downscale/generate/utils.py @@ -0,0 +1,277 @@ +import numpy as np +import xarray as xr + +############################# +# Standard unit conversions # +############################# + + +def kelvin_to_celsius(temperature_k: xr.Dataset) -> xr.Dataset: + """Convert temperature from Kelvin to Celsius + + Parameters + ---------- + temperature_k + Temperature in Kelvin + + Returns + ------- + xr.Dataset + Temperature in Celsius + """ + return temperature_k - 273.15 + + +def meter_to_millimeter(rainfall_m: xr.Dataset) -> xr.Dataset: + """Convert rainfall from meters to millimeters + + Parameters + ---------- + rainfall_m + Rainfall in meters + + Returns + ------- + xr.Dataset + Rainfall in millimeters + """ + return 1000 * rainfall_m + + +def scale_wind_speed_height(wind_speed_10m: xr.Dataset) -> xr.Dataset: + """Scaling wind speed from a height of 10 meters to a height of 2 meters + + Reference: Bröde et al. (2012) + https://doi.org/10.1007/s00484-011-0454-1 + + Parameters + ---------- + wind_speed_10m + The 10m wind speed [m/s]. May be signed (ie a velocity component) + + Returns + ------- + xr.DataSet + The 2m wind speed [m/s]. May be signed (ie a velocity component) + """ + scale_factor = np.log10(2 / 0.01) / np.log10(10 / 0.01) + return scale_factor * wind_speed_10m # type: ignore[no-any-return] + + +def identity(ds: xr.Dataset) -> xr.Dataset: + """Identity transformation""" + return ds + + +###################### +# Standard summaries # +###################### + + +def daily_mean(ds: xr.Dataset) -> xr.Dataset: + return ds.groupby("time.date").mean() + + +def daily_max(ds: xr.Dataset) -> xr.Dataset: + return ds.groupby("time.date").max() + + +def daily_min(ds: xr.Dataset) -> xr.Dataset: + return ds.groupby("time.date").min() + + +def daily_sum(ds: xr.Dataset) -> xr.Dataset: + return ds.groupby("time.date").sum() + + +######################## +# Data transformations # +######################## + + +def cdd(temperature_c: xr.Dataset) -> xr.Dataset: + """Calculate cooling degree days""" + return daily_mean(np.maximum(temperature_c - 18, 0)) # type: ignore[call-overload] + + +def hdd(temperature_c: xr.Dataset) -> xr.Dataset: + """Calculate heating degree days""" + return daily_mean(np.maximum(18 - temperature_c, 0)) # type: ignore[call-overload] + + +def vector_magnitude(x: xr.Dataset, y: xr.Dataset) -> xr.Dataset: + """Calculate the magnitude of a vector.""" + return np.sqrt(x**2 + y**2) # type: ignore[no-any-return] + + +def buck_vapor_pressure(temperature_c: xr.Dataset) -> xr.Dataset: + """Approximate vapor pressure of water. + + https://en.wikipedia.org/wiki/Arden_Buck_equation + https://journals.ametsoc.org/view/journals/apme/20/12/1520-0450_1981_020_1527_nefcvp_2_0_co_2.xml + + Parameters + ---------- + temperature_c + Temperature in Celsius + + Returns + ------- + xr.Dataset + Vapor pressure in hPa + """ + over_water = 6.1121 * np.exp( + (18.678 - temperature_c / 234.5) * (temperature_c / (257.14 + temperature_c)) + ) + over_ice = 6.1115 * np.exp( + (23.036 - temperature_c / 333.7) * (temperature_c / (279.82 + temperature_c)) + ) + vp = xr.where(temperature_c > 0, over_water, over_ice) # type: ignore[no-untyped-call] + return vp # type: ignore[no-any-return] + + +def rh_percent( + temperature_c: xr.Dataset, dewpoint_temperature_c: xr.Dataset +) -> xr.Dataset: + """Calculate relative humidity from temperature and dewpoint temperature. + + Parameters + ---------- + temperature_c + Temperature in Celsius + dewpoint_temperature_c + Dewpoint temperature in Celsius + + Returns + ------- + xr.Dataset + Relative humidity as a percentage + """ + # saturation vapour pressure + svp = buck_vapor_pressure(temperature_c) + # actual vapour pressure + vp = buck_vapor_pressure(dewpoint_temperature_c) + return 100 * vp / svp + + +def heat_index( + temperature_c: xr.Dataset, dewpoint_temperature_c: xr.Dataset +) -> xr.Dataset: + """Calculate the heat index. + + https://www.weather.gov/media/ffc/ta_htindx.PDF + + Parameters + ---------- + temperature_c + Temperature in Celsius + dewpoint_temperature_c + Dewpoint temperature in Celsius + + Returns + ------- + xr.Dataset + Heat index in Celsius + """ + t = temperature_c # Alias for simplicity in the formula + r = rh_percent(temperature_c, dewpoint_temperature_c) + + # Heat index formula from canonical multi-variable regression + hi_raw = ( + -8.784695 + + 1.61139411 * t + + 2.338549 * r + - 0.14611605 * t * r + - 1.2308094e-2 * t**2 + - 1.6424828e-2 * r**2 + + 2.211732e-3 * t**2 * r + + 7.2546e-4 * t * r**2 + - 3.582e-6 * t**2 * r**2 + ) + # Below 20 degrees, the heat index is the same as the temperature + hi_threshold = 20 + hi = xr.where(t > hi_threshold, hi_raw, t) # type: ignore[no-untyped-call] + return hi # type: ignore[no-any-return] + + +def humidex( + temperature_c: xr.Dataset, dewpoint_temperature_c: xr.Dataset +) -> xr.Dataset: + """Calculate the humidex. + + https://en.wikipedia.org/wiki/Humidex + + Parameters + ---------- + temperature_c + Temperature in Celsius + dewpoint_temperature_c + Dewpoint temperature in Celsius + + Returns + ------- + xr.Dataset + Humidex in Celsius + """ + vp = buck_vapor_pressure(dewpoint_temperature_c) + return temperature_c + 0.5555 * (vp - 10) + + +def effective_temperature( + temperature_c: xr.Dataset, + dewpoint_temperature_c: xr.Dataset, + uas: xr.Dataset, + vas: xr.Dataset, +) -> xr.Dataset: + """Calculate the effective temperature. + + https://www.sciencedirect.com/topics/engineering/effective-temperature + + Parameters + ---------- + temperature_c + Temperature in Celsius + dewpoint_temperature_c + Dewpoint temperature in Celsius + uas + U-component of wind speed + vas + V-component of wind speed + + Returns + ------- + xr.Dataset + Effective temperature in Celsius + """ + # Alias for simplicity in the formula + t = temperature_c + r = rh_percent(temperature_c, dewpoint_temperature_c) + v = vector_magnitude(uas, vas) + + wind_adjustment = 1 / (1.76 + 1.4 * v**0.75) + et = ( + 37 + - ((37 - t) / (0.68 - 0.0014 * r + wind_adjustment)) + - 0.29 * t * (1 - 0.01 * r) + ) + return et + + +################ +# Data cleanup # +################ + + +def rename_val_column(ds: xr.Dataset) -> xr.Dataset: + data_var = next(iter(ds)) + return ds.rename({data_var: "value"}) + + +def interpolate_to_target_latlon( + ds: xr.Dataset, + target_lat: xr.DataArray, + target_lon: xr.DataArray, +) -> xr.Dataset: + return ds.interp( + longitude=target_lon, latitude=target_lat, method="nearest" + ).interpolate_na(dim="longitude", method="nearest", fill_value="extrapolate") From 6760a191384bce0c252c905858ba13e8603e3df0 Mon Sep 17 00:00:00 2001 From: collijk Date: Thu, 13 Jun 2024 21:23:04 -0700 Subject: [PATCH 35/71] Fix runner --- src/climate_downscale/generate/era5_daily.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/climate_downscale/generate/era5_daily.py b/src/climate_downscale/generate/era5_daily.py index 6a7a45d..0d1f645 100644 --- a/src/climate_downscale/generate/era5_daily.py +++ b/src/climate_downscale/generate/era5_daily.py @@ -231,7 +231,7 @@ def generate_era5_daily( jobmon.run_parallel( runner="cdtask", - task_name="extract cmip6", + task_name="generate era5_daily", node_args={ "year": years, "variable": variables, From 5272118a1ccf2b4592b5acf9e072f33b927d0035 Mon Sep 17 00:00:00 2001 From: collijk Date: Thu, 13 Jun 2024 21:26:15 -0700 Subject: [PATCH 36/71] Add month specific logging and shorten range for testing --- src/climate_downscale/generate/era5_daily.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/climate_downscale/generate/era5_daily.py b/src/climate_downscale/generate/era5_daily.py index 0d1f645..c85770b 100644 --- a/src/climate_downscale/generate/era5_daily.py +++ b/src/climate_downscale/generate/era5_daily.py @@ -166,9 +166,9 @@ def generate_era5_daily_main( source_variables, collapse_fun, (e_offset, e_scale) = TRANSFORM_MAP[target_variable] datasets = [] - for month in range(1, 13): + for month in range(1, 3): month_str = f"{month:02d}" - print("loading single-levels") + print(f"loading single-levels for {month_str}") single_level = [ load_variable(sv, year, month_str, "single-levels") for sv in source_variables @@ -181,7 +181,7 @@ def generate_era5_daily_main( print("interpolating") ds_land_res = utils.interpolate_to_target_latlon(ds, TARGET_LAT, TARGET_LON) - print("loading land") + print(f"loading land for {month_str}") land = [load_variable(sv, year, month_str, "land") for sv in source_variables] print("collapsing") ds_land = collapse_fun(*land) # type: ignore[operator] From c75e03b32a86a9a8268645e6158e52df1671c82f Mon Sep 17 00:00:00 2001 From: collijk Date: Thu, 13 Jun 2024 23:03:21 -0700 Subject: [PATCH 37/71] Be lazier --- src/climate_downscale/generate/era5_daily.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/src/climate_downscale/generate/era5_daily.py b/src/climate_downscale/generate/era5_daily.py index c85770b..54990cf 100644 --- a/src/climate_downscale/generate/era5_daily.py +++ b/src/climate_downscale/generate/era5_daily.py @@ -124,7 +124,7 @@ def with_variable( def load_and_shift_longitude(ds_path: str | Path) -> xr.Dataset: - ds = xr.load_dataset(ds_path) + ds = xr.open_dataset(ds_path) ds = ds.assign_coords(longitude=(ds.longitude + 180) % 360 - 180).sortby( "longitude" ) @@ -140,6 +140,7 @@ def load_variable( root = Path("/mnt/share/erf/climate_downscale/extracted_data/era5") p = root / f"reanalysis-era5-{dataset}_{variable}_{year}_{month}.nc" if dataset == "land" and not p.exists(): + raise NotImplementedError # Substitute the single level dataset pre-interpolated at the target resolution. p = root / f"reanalysis-era5-single-levels_{variable}_{year}_{month}.nc" ds = utils.interpolate_to_target_latlon( @@ -174,7 +175,7 @@ def generate_era5_daily_main( for sv in source_variables ] print("collapsing") - ds = collapse_fun(*single_level) # type: ignore[operator] + ds = collapse_fun(*single_level).compute() # type: ignore[operator] # collapsing often screws the date dtype, so fix it ds = ds.assign(date=pd.to_datetime(ds.date)) @@ -184,7 +185,7 @@ def generate_era5_daily_main( print(f"loading land for {month_str}") land = [load_variable(sv, year, month_str, "land") for sv in source_variables] print("collapsing") - ds_land = collapse_fun(*land) # type: ignore[operator] + ds_land = collapse_fun(*land).compute() # type: ignore[operator] ds_land = ds_land.assign(date=pd.to_datetime(ds_land.date)) print("combining") From 0b52a39154aa1df98894b40f1f8340652bb844d0 Mon Sep 17 00:00:00 2001 From: James Collins Date: Fri, 14 Jun 2024 00:25:53 -0700 Subject: [PATCH 38/71] Add cmip daily --- src/climate_downscale/generate/cmip_daily.py | 43 ++++++++++++++++++++ src/climate_downscale/generate/era5_daily.py | 30 +++++++------- 2 files changed, 58 insertions(+), 15 deletions(-) create mode 100644 src/climate_downscale/generate/cmip_daily.py diff --git a/src/climate_downscale/generate/cmip_daily.py b/src/climate_downscale/generate/cmip_daily.py new file mode 100644 index 0000000..86085e5 --- /dev/null +++ b/src/climate_downscale/generate/cmip_daily.py @@ -0,0 +1,43 @@ +import pandas as pd +import xarray as xr +from pathlib import Path +import numpy as np +import tqdm + +from climate_downscale.generate import utils + +TARGET_LON = xr.DataArray( + np.round(np.arange(-180.0, 180.0, 0.1, dtype="float32"), 1), dims="longitude" +) +TARGET_LAT = xr.DataArray( + np.round(np.arange(90.0, -90.1, -0.1, dtype="float32"), 1), dims="latitude" +) + +variable = 'tas' +scenario = 'ssp119' +year = '2024' + +paths = sorted(list(Path("/mnt/share/erf/climate_downscale/extracted_data/cmip6").glob("tas_ssp119*.nc"))) +p = paths[0] + +def compute_anomaly(path, year): + reference_period = slice("2015-01-01", "2024-12-31") + ref = xr.open_dataset(p).sel(time=reference_period).compute().groupby("time.month").mean("time") + + time_slice = slice(f"{year}-01", f"{year}-12") + time_range = pd.date_range(f"{year}-01-01", f"{year}-12-31") + target = xr.open_dataset(p).sel(time=time_slice).compute() + target = target.assign_coords(time=pd.to_datetime(target.time.dt.date)).interp_calendar(time_range) + + anomaly = target.groupby('time.month') - ref + anomaly = anomaly.rename({'lat': 'latitude', 'lon': 'longitude'}) + anomaly = anomaly.assign_coords(longitude=(anomaly.longitude + 180) % 360 - 180).sortby("longitude") + anomaly = utils.interpolate_to_target_latlon(anomaly, target_lat=TARGET_LAT, target_lon=TARGET_LON) + + return anomaly + +a = 1 / len(paths) * compute_anomaly(paths[0], year) + +for p in tqdm.tqdm(paths[1:]): + a += 1 / len(paths) * compute_anomaly(p, year) + diff --git a/src/climate_downscale/generate/era5_daily.py b/src/climate_downscale/generate/era5_daily.py index 54990cf..71a6601 100644 --- a/src/climate_downscale/generate/era5_daily.py +++ b/src/climate_downscale/generate/era5_daily.py @@ -2,6 +2,7 @@ from pathlib import Path import click +import dask import numpy as np import pandas as pd import xarray as xr @@ -78,9 +79,6 @@ utils.daily_sum, (0, 0.1), ), -} - -UNTESTED_TRANSFORM_MAP = { "heat_index": ( ["2m_temperature", "2m_dewpoint_temperature"], lambda x, y: utils.daily_mean(utils.heat_index(x, y)), @@ -124,10 +122,11 @@ def with_variable( def load_and_shift_longitude(ds_path: str | Path) -> xr.Dataset: - ds = xr.open_dataset(ds_path) - ds = ds.assign_coords(longitude=(ds.longitude + 180) % 360 - 180).sortby( - "longitude" - ) + ds = xr.open_dataset(ds_path).chunk(time=24) + with dask.config.set(**{'array.slicing.split_large_chunks': False}): + ds = ds.assign_coords(longitude=(ds.longitude + 180) % 360 - 180).sortby( + "longitude" + ) return ds @@ -167,9 +166,9 @@ def generate_era5_daily_main( source_variables, collapse_fun, (e_offset, e_scale) = TRANSFORM_MAP[target_variable] datasets = [] - for month in range(1, 3): + for month in range(1, 13): month_str = f"{month:02d}" - print(f"loading single-levels for {month_str}") + print(f"loading single-levels for {month_str}") single_level = [ load_variable(sv, year, month_str, "single-levels") for sv in source_variables @@ -180,16 +179,17 @@ def generate_era5_daily_main( ds = ds.assign(date=pd.to_datetime(ds.date)) print("interpolating") - ds_land_res = utils.interpolate_to_target_latlon(ds, TARGET_LAT, TARGET_LON) + ds_land_res = utils.interpolate_to_target_latlon(ds, TARGET_LAT, TARGET_LON) print(f"loading land for {month_str}") land = [load_variable(sv, year, month_str, "land") for sv in source_variables] print("collapsing") - ds_land = collapse_fun(*land).compute() # type: ignore[operator] + with dask.config.set(**{'array.slicing.split_large_chunks': False}): + ds_land = collapse_fun(*land).compute() # type: ignore[operator] ds_land = ds_land.assign(date=pd.to_datetime(ds_land.date)) print("combining") - combined = ds_land.combine_first(ds_land_res) + combined = ds_land.combine_first(ds_land_res) datasets.append(combined) ds_year = xr.concat(datasets, dim="date").sortby("date") @@ -235,15 +235,15 @@ def generate_era5_daily( task_name="generate era5_daily", node_args={ "year": years, - "variable": variables, + "target-variable": variables, }, task_args={ "output-dir": output_dir, }, task_resources={ "queue": queue, - "cores": 1, - "memory": "10G", + "cores": 5, + "memory": "100G", "runtime": "120m", "project": "proj_rapidresponse", }, From 51fd1bd0534069092401603ffa8b875cb065bf3e Mon Sep 17 00:00:00 2001 From: collijk Date: Fri, 14 Jun 2024 21:38:02 -0700 Subject: [PATCH 39/71] Change layout for era5 daily --- poetry.lock | 116 +++++++++++++++++- pyproject.toml | 1 + src/climate_downscale/data.py | 73 ++++++----- src/climate_downscale/extract/era5.py | 12 +- src/climate_downscale/generate/cmip6_daily.py | 103 ++++++++++++++++ src/climate_downscale/generate/cmip_daily.py | 43 ------- src/climate_downscale/generate/era5_daily.py | 57 ++++----- src/climate_downscale/generate/utils.py | 46 ++++--- 8 files changed, 323 insertions(+), 128 deletions(-) create mode 100644 src/climate_downscale/generate/cmip6_daily.py delete mode 100644 src/climate_downscale/generate/cmip_daily.py diff --git a/poetry.lock b/poetry.lock index daae923..6c2ae67 100644 --- a/poetry.lock +++ b/poetry.lock @@ -449,6 +449,17 @@ click = ">=4.0" [package.extras] test = ["pytest-cov"] +[[package]] +name = "cloudpickle" +version = "3.0.0" +description = "Pickler class to extend the standard pickle.Pickler functionality" +optional = false +python-versions = ">=3.8" +files = [ + {file = "cloudpickle-3.0.0-py3-none-any.whl", hash = "sha256:246ee7d0c295602a036e86369c77fecda4ab17b506496730f2f576d9016fd9c7"}, + {file = "cloudpickle-3.0.0.tar.gz", hash = "sha256:996d9a482c6fb4f33c1a35335cf8afd065d2a56e973270364840712d9131a882"}, +] + [[package]] name = "colorama" version = "0.4.6" @@ -605,6 +616,35 @@ files = [ docs = ["ipython", "matplotlib", "numpydoc", "sphinx"] tests = ["pytest", "pytest-cov", "pytest-xdist"] +[[package]] +name = "dask" +version = "2024.5.2" +description = "Parallel PyData with Task Scheduling" +optional = false +python-versions = ">=3.9" +files = [ + {file = "dask-2024.5.2-py3-none-any.whl", hash = "sha256:acc2cfe41d9e0151c216ac40396dbe34df13bc3d8c51dfece190349e4f2243af"}, + {file = "dask-2024.5.2.tar.gz", hash = "sha256:5c9722c44d0195e78b6e54197aa3302e6fcaaac2310fd3014560bcb86253dcb3"}, +] + +[package.dependencies] +click = ">=8.1" +cloudpickle = ">=1.5.0" +fsspec = ">=2021.09.0" +importlib-metadata = {version = ">=4.13.0", markers = "python_version < \"3.12\""} +packaging = ">=20.0" +partd = ">=1.2.0" +pyyaml = ">=5.3.1" +toolz = ">=0.10.0" + +[package.extras] +array = ["numpy (>=1.21)"] +complete = ["dask[array,dataframe,diagnostics,distributed]", "lz4 (>=4.3.2)", "pyarrow (>=7.0)", "pyarrow-hotfix"] +dataframe = ["dask-expr (>=1.1,<1.2)", "dask[array]", "pandas (>=1.3)"] +diagnostics = ["bokeh (>=2.4.2)", "jinja2 (>=2.10.3)"] +distributed = ["distributed (==2024.5.2)"] +test = ["pandas[test]", "pre-commit", "pytest", "pytest-cov", "pytest-rerunfailures", "pytest-timeout", "pytest-xdist"] + [[package]] name = "decorator" version = "5.1.1" @@ -1290,6 +1330,25 @@ files = [ {file = "idna-3.7.tar.gz", hash = "sha256:028ff3aadf0609c1fd278d8ea3089299412a7a8b9bd005dd08b9f8285bcb5cfc"}, ] +[[package]] +name = "importlib-metadata" +version = "7.1.0" +description = "Read metadata from Python packages" +optional = false +python-versions = ">=3.8" +files = [ + {file = "importlib_metadata-7.1.0-py3-none-any.whl", hash = "sha256:30962b96c0c223483ed6cc7280e7f0199feb01a0e40cfae4d4450fc6fab1f570"}, + {file = "importlib_metadata-7.1.0.tar.gz", hash = "sha256:b78938b926ee8d5f020fc4772d487045805a55ddbad2ecf21c6d60938dc7fcd2"}, +] + +[package.dependencies] +zipp = ">=0.5" + +[package.extras] +docs = ["furo", "jaraco.packaging (>=9.3)", "jaraco.tidelift (>=1.4)", "rst.linker (>=1.9)", "sphinx (>=3.5)", "sphinx-lint"] +perf = ["ipython"] +testing = ["flufl.flake8", "importlib-resources (>=1.3)", "jaraco.test (>=5.4)", "packaging", "pyfakefs", "pytest (>=6)", "pytest-checkdocs (>=2.4)", "pytest-cov", "pytest-enabler (>=2.2)", "pytest-mypy", "pytest-perf (>=0.9.2)", "pytest-ruff (>=0.2.1)"] + [[package]] name = "iniconfig" version = "2.0.0" @@ -1442,6 +1501,17 @@ files = [ {file = "kiwisolver-1.4.5.tar.gz", hash = "sha256:e57e563a57fb22a142da34f38acc2fc1a5c864bc29ca1517a88abc963e60d6ec"}, ] +[[package]] +name = "locket" +version = "1.0.0" +description = "File-based locks for Python on Linux and Windows" +optional = false +python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*" +files = [ + {file = "locket-1.0.0-py2.py3-none-any.whl", hash = "sha256:b6c819a722f7b6bd955b80781788e4a66a55628b858d347536b7e81325a3a5e3"}, + {file = "locket-1.0.0.tar.gz", hash = "sha256:5c0d4c052a8bbbf750e056a8e65ccd309086f4f0f18a2eac306a8dfa4112a632"}, +] + [[package]] name = "loguru" version = "0.7.2" @@ -2214,6 +2284,24 @@ sql-other = ["SQLAlchemy (>=2.0.0)", "adbc-driver-postgresql (>=0.8.0)", "adbc-d test = ["hypothesis (>=6.46.1)", "pytest (>=7.3.2)", "pytest-xdist (>=2.2.0)"] xml = ["lxml (>=4.9.2)"] +[[package]] +name = "partd" +version = "1.4.2" +description = "Appendable key-value storage" +optional = false +python-versions = ">=3.9" +files = [ + {file = "partd-1.4.2-py3-none-any.whl", hash = "sha256:978e4ac767ec4ba5b86c6eaa52e5a2a3bc748a2ca839e8cc798f1cc6ce6efb0f"}, + {file = "partd-1.4.2.tar.gz", hash = "sha256:d022c33afbdc8405c226621b015e8067888173d85f7f5ecebb3cafed9a20f02c"}, +] + +[package.dependencies] +locket = "*" +toolz = "*" + +[package.extras] +complete = ["blosc", "numpy (>=1.20.0)", "pandas (>=1.3)", "pyzmq"] + [[package]] name = "pathos" version = "0.3.2" @@ -3278,6 +3366,17 @@ files = [ {file = "tomli-2.0.1.tar.gz", hash = "sha256:de526c12914f0c550d15924c62d72abc48d6fe7364aa87328337a31007fe8a4f"}, ] +[[package]] +name = "toolz" +version = "0.12.1" +description = "List processing tools and functional utilities" +optional = false +python-versions = ">=3.7" +files = [ + {file = "toolz-0.12.1-py3-none-any.whl", hash = "sha256:d22731364c07d72eea0a0ad45bafb2c2937ab6fd38a3507bf55eae8744aa7d85"}, + {file = "toolz-0.12.1.tar.gz", hash = "sha256:ecca342664893f177a13dac0e6b41cbd8ac25a358e5f215316d43e2100224f4d"}, +] + [[package]] name = "tqdm" version = "4.66.4" @@ -3596,7 +3695,22 @@ numpy = ">=1.23" docs = ["numcodecs[msgpack]", "numpydoc", "pydata-sphinx-theme", "sphinx", "sphinx-automodapi", "sphinx-copybutton", "sphinx-design", "sphinx-issues"] jupyter = ["ipytree (>=0.2.2)", "ipywidgets (>=8.0.0)", "notebook"] +[[package]] +name = "zipp" +version = "3.19.2" +description = "Backport of pathlib-compatible object wrapper for zip files" +optional = false +python-versions = ">=3.8" +files = [ + {file = "zipp-3.19.2-py3-none-any.whl", hash = "sha256:f091755f667055f2d02b32c53771a7a6c8b47e1fdbc4b72a8b9072b3eef8015c"}, + {file = "zipp-3.19.2.tar.gz", hash = "sha256:bf1dcf6450f873a13e952a29504887c89e6de7506209e5b1bcc3460135d4de19"}, +] + +[package.extras] +doc = ["furo", "jaraco.packaging (>=9.3)", "jaraco.tidelift (>=1.4)", "rst.linker (>=1.9)", "sphinx (>=3.5)", "sphinx-lint"] +test = ["big-O", "importlib-resources", "jaraco.functools", "jaraco.itertools", "jaraco.test", "more-itertools", "pytest (>=6,!=8.1.*)", "pytest-checkdocs (>=2.4)", "pytest-cov", "pytest-enabler (>=2.2)", "pytest-ignore-flaky", "pytest-mypy", "pytest-ruff (>=0.2.1)"] + [metadata] lock-version = "2.0" python-versions = ">=3.10, <3.13" -content-hash = "d956b3098dcb83693feb9ac5cb4b39749dbd7ef6e90a8e2bd878ee7c3dc13f43" +content-hash = "e0e7f81ba64d5f9ceee07a0a5635b84eb8f65a541f15dcc047f1f42e5ab21053" diff --git a/pyproject.toml b/pyproject.toml index b19f494..cc5fee9 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -49,6 +49,7 @@ types-tqdm = "^4.66.0.20240417" gcsfs = "^2024.6.0" zarr = "^2.18.2" types-pyyaml = "^6.0.12.20240311" +dask = "^2024.5.2" [tool.poetry.group.dev.dependencies] mkdocstrings = {version = ">=0.23", extras = ["python"]} diff --git a/src/climate_downscale/data.py b/src/climate_downscale/data.py index 839448f..ef77ebf 100644 --- a/src/climate_downscale/data.py +++ b/src/climate_downscale/data.py @@ -4,7 +4,7 @@ import pandas as pd import rasterra as rt import xarray as xr -from rra_tools.shell_tools import touch +from rra_tools.shell_tools import mkdir, touch DEFAULT_ROOT = "/mnt/share/erf/climate_downscale/" @@ -22,30 +22,29 @@ def root(self) -> Path: def credentials_root(self) -> Path: return self._credentials_root + ################## + # Extracted data # + ################## + @property def extracted_data(self) -> Path: return self.root / "extracted_data" @property - def era5(self) -> Path: + def extracted_era5(self) -> Path: return self.extracted_data / "era5" - def era5_path( + def extracted_era5_path( self, dataset: str, variable: str, year: int | str, month: str ) -> Path: - return self.era5 / f"{dataset}_{variable}_{year}_{month}.nc" - - def load_era5( - self, dataset: str, variable: str, year: int | str, month: str - ) -> xr.Dataset: - return xr.open_dataset(self.era5_path(dataset, variable, year, month)) + return self.extracted_era5 / f"{dataset}_{variable}_{year}_{month}.nc" @property - def cmip6(self) -> Path: + def extracted_cmip6(self) -> Path: return self.extracted_data / "cmip6" def load_cmip6_metadata(self) -> pd.DataFrame: - meta_path = self.cmip6 / "cmip6-metadata.parquet" + meta_path = self.extracted_cmip6 / "cmip6-metadata.parquet" if not meta_path.exists(): external_path = "https://storage.googleapis.com/cmip6/cmip6-zarr-consolidated-stores.csv" meta = pd.read_csv(external_path) @@ -53,15 +52,6 @@ def load_cmip6_metadata(self) -> pd.DataFrame: meta.to_parquet(meta_path) return pd.read_parquet(meta_path) - @property - def era5_temperature_daily_mean(self) -> Path: - return self.extracted_data / "era5_temperature_daily_mean" - - def load_era5_temperature_daily_mean(self, year: int | str) -> xr.Dataset: - return xr.load_dataset( - self.era5_temperature_daily_mean / f"{year}_era5_temp_daily.nc" - ) - @property def ncei_climate_stations(self) -> Path: return self.extracted_data / "ncei_climate_stations" @@ -74,10 +64,6 @@ def save_ncei_climate_stations(self, df: pd.DataFrame, year: int | str) -> None: def load_ncei_climate_stations(self, year: int | str) -> pd.DataFrame: return pd.read_parquet(self.ncei_climate_stations / f"{year}.parquet") - @property - def srtm_elevation_gl1(self) -> Path: - return self.extracted_data / "srtm_elevation_gl1" - @property def open_topography_elevation(self) -> Path: return self.extracted_data / "open_topography_elevation" @@ -86,6 +72,10 @@ def open_topography_elevation(self) -> Path: def rub_local_climate_zones(self) -> Path: return self.extracted_data / "rub_local_climate_zones" + ################### + # Downscale model # + ################### + @property def downscale_model(self) -> Path: return self.root / "downscale_model" @@ -119,21 +109,33 @@ def save_training_data(self, df: pd.DataFrame, year: int | str) -> None: def load_training_data(self, year: int | str) -> pd.DataFrame: return pd.read_parquet(self.training_data / f"{year}.parquet") + ########### + # Results # + ########### + @property def results(self) -> Path: return self.root / "results" @property - def era5_daily(self) -> Path: - return self.results / "era5_daily" + def daily_results(self) -> Path: + return self.results / "daily" - def save_era5_daily( + def daily_results_path(self, scenario: str, variable: str, year: int | str) -> Path: + return self.daily_results / scenario / variable / f"{year}.nc" + + def save_daily_results( self, - ds: xr.Dataset, + results_ds: xr.Dataset, + scenario: str, variable: str, year: int | str, **encoding_kwargs: Any, - ) -> None: + ): + path = self.daily_results_path(scenario, variable, year) + mkdir(path.parent, exist_ok=True, parents=True) + touch(path, exist_ok=True) + encoding = { "dtype": "int16", "_FillValue": -32767, @@ -141,9 +143,16 @@ def save_era5_daily( "complevel": 1, } encoding.update(encoding_kwargs) - path = self.era5_daily / f"{variable}_{year}.nc" - touch(path, exist_ok=True) - ds.to_netcdf(path, encoding={"value": encoding}) + results_ds.to_netcdf(path, encoding={"value": encoding}) + + def load_daily_results( + self, + scenario: str, + variable: str, + year: int | str, + ) -> xr.Dataset: + results_path = self.daily_results_path(scenario, variable, year) + return xr.open_dataset(results_path) def save_raster( diff --git a/src/climate_downscale/extract/era5.py b/src/climate_downscale/extract/era5.py index 058c7d8..95f49f2 100644 --- a/src/climate_downscale/extract/era5.py +++ b/src/climate_downscale/extract/era5.py @@ -35,7 +35,9 @@ def download_era5_main( ) -> None: cddata = ClimateDownscaleData(output_dir) - final_out_path = cddata.era5_path(era5_dataset, era5_variable, year, month) + final_out_path = cddata.extracted_era5_path( + era5_dataset, era5_variable, year, month + ) download_path, download_format = get_download_spec(final_out_path) if download_path.exists(): @@ -84,7 +86,9 @@ def unzip_and_compress_era5( month: str, ) -> None: cddata = ClimateDownscaleData(output_dir) - final_out_path = cddata.era5_path(era5_dataset, era5_variable, year, month) + final_out_path = cddata.extracted_era5_path( + era5_dataset, era5_variable, year, month + ) zip_path = final_out_path.with_suffix(".zip") uncompressed_path = final_out_path.with_stem(f"{final_out_path.stem}_raw") @@ -214,7 +218,7 @@ def extract_era5( # noqa: PLR0913 to_compress = [] complete = [] for spec in itertools.product(datasets, variables, years, months): - final_out_path = cddata.era5_path(*spec) + final_out_path = cddata.extracted_era5_path(*spec) download_path, _ = get_download_spec(final_out_path) if final_out_path.exists() and download_path.exists(): @@ -250,7 +254,7 @@ def extract_era5( # noqa: PLR0913 for user in users: if to_download: download_batch.append((*to_download.pop(), user)) - if not len(download_batch) == min(len(users) * jobs_per_user, downloads_left): + if len(download_batch) != min(len(users) * jobs_per_user, downloads_left): msg = "Download batch size is incorrect" raise ValueError(msg) diff --git a/src/climate_downscale/generate/cmip6_daily.py b/src/climate_downscale/generate/cmip6_daily.py new file mode 100644 index 0000000..aa148a2 --- /dev/null +++ b/src/climate_downscale/generate/cmip6_daily.py @@ -0,0 +1,103 @@ +from pathlib import Path + +import pandas as pd +import tqdm +import xarray as xr + +from climate_downscale.data import ClimateDownscaleData +from climate_downscale.generate import utils + + +def compute_anomaly( + reference: xr.DataArray, target: xr.DataArray, anomaly_type: str +) -> xr.Dataset: + if anomaly_type == "additive": + anomaly = target.groupby("time.month") - reference + elif anomaly_type == "multiplicative": + anomaly = (target.groupby("time.month") + 1) / (reference + 1) + else: + msg = f"Unknown anomaly type: {anomaly_type}" + raise ValueError(msg) + + anomaly = ( + anomaly.drop_vars("month") + .rename({"lat": "latitude", "lon": "longitude", "time": "date"}) + .assign_coords(longitude=(anomaly.longitude + 180) % 360 - 180) + .sortby("longitude") + ) + anomaly = utils.interpolate_to_target_latlon(anomaly) + return anomaly + + +TRANSFORM_MAP = { + "tas": (utils.kelvin_to_celsius, "additive"), + "pr": (utils.precipitation_flux_to_rainfall, "multiplicative"), +} + + +def load_reference_and_target( + path: str | Path, year: str | int +) -> tuple[xr.Dataset, xr.Dataset]: + reference = ( + xr.open_dataset(path) + .sel(time=utils.REFERENCE_PERIOD) + .compute() # Load the subset before computing the mean, otherwise it's slow + .groupby("time.month") + .mean("time") + ) + + time_slice = slice(f"{year}-01", f"{year}-12") + time_range = pd.date_range(f"{year}-01-01", f"{year}-12-31") + target = xr.open_dataset(path).sel(time=time_slice).compute() + target = ( + target.assign_coords(time=target.time.dt.floor("D")) + .interp_calendar(time_range) + .interpolate_na(dim="time", method="nearest", fill_value="extrapolate") + ) + return reference, target + + +def generate_cmip6_daily_main( + output_dir: str | Path, + year: str | int, + target_variable: str, + cmip_scenario: str, + rerefk, +) -> None: + cd_data = ClimateDownscaleData(output_dir) + paths = cd_data.cmip6.glob(f"{target_variable}_{cmip_scenario}*.nc") + + +def compute_anomaly(path, year): + reference_period = slice("2015-01-01", "2024-12-31") + + anomaly = target.groupby("time.month") - ref + anomaly = anomaly.rename({"lat": "latitude", "lon": "longitude"}) + anomaly = anomaly.assign_coords( + longitude=(anomaly.longitude + 180) % 360 - 180 + ).sortby("longitude") + anomaly = utils.interpolate_to_target_latlon( + anomaly, target_lat=TARGET_LAT, target_lon=TARGET_LON + ) + + return anomaly + + +variable = "tas" +scenario = "ssp119" +year = "2024" + +paths = sorted( + list( + Path("/mnt/share/erf/climate_downscale/extracted_data/cmip6").glob( + "tas_ssp119*.nc" + ) + ) +) +p = paths[0] + + +a = 1 / len(paths) * compute_anomaly(paths[0], year) + +for p in tqdm.tqdm(paths[1:]): + a += 1 / len(paths) * compute_anomaly(p, year) diff --git a/src/climate_downscale/generate/cmip_daily.py b/src/climate_downscale/generate/cmip_daily.py deleted file mode 100644 index 86085e5..0000000 --- a/src/climate_downscale/generate/cmip_daily.py +++ /dev/null @@ -1,43 +0,0 @@ -import pandas as pd -import xarray as xr -from pathlib import Path -import numpy as np -import tqdm - -from climate_downscale.generate import utils - -TARGET_LON = xr.DataArray( - np.round(np.arange(-180.0, 180.0, 0.1, dtype="float32"), 1), dims="longitude" -) -TARGET_LAT = xr.DataArray( - np.round(np.arange(90.0, -90.1, -0.1, dtype="float32"), 1), dims="latitude" -) - -variable = 'tas' -scenario = 'ssp119' -year = '2024' - -paths = sorted(list(Path("/mnt/share/erf/climate_downscale/extracted_data/cmip6").glob("tas_ssp119*.nc"))) -p = paths[0] - -def compute_anomaly(path, year): - reference_period = slice("2015-01-01", "2024-12-31") - ref = xr.open_dataset(p).sel(time=reference_period).compute().groupby("time.month").mean("time") - - time_slice = slice(f"{year}-01", f"{year}-12") - time_range = pd.date_range(f"{year}-01-01", f"{year}-12-31") - target = xr.open_dataset(p).sel(time=time_slice).compute() - target = target.assign_coords(time=pd.to_datetime(target.time.dt.date)).interp_calendar(time_range) - - anomaly = target.groupby('time.month') - ref - anomaly = anomaly.rename({'lat': 'latitude', 'lon': 'longitude'}) - anomaly = anomaly.assign_coords(longitude=(anomaly.longitude + 180) % 360 - 180).sortby("longitude") - anomaly = utils.interpolate_to_target_latlon(anomaly, target_lat=TARGET_LAT, target_lon=TARGET_LON) - - return anomaly - -a = 1 / len(paths) * compute_anomaly(paths[0], year) - -for p in tqdm.tqdm(paths[1:]): - a += 1 / len(paths) * compute_anomaly(p, year) - diff --git a/src/climate_downscale/generate/era5_daily.py b/src/climate_downscale/generate/era5_daily.py index 71a6601..34b2a24 100644 --- a/src/climate_downscale/generate/era5_daily.py +++ b/src/climate_downscale/generate/era5_daily.py @@ -3,7 +3,6 @@ import click import dask -import numpy as np import pandas as pd import xarray as xr from rra_tools import jobmon @@ -12,13 +11,6 @@ from climate_downscale.data import DEFAULT_ROOT, ClimateDownscaleData from climate_downscale.generate import utils -TARGET_LON = xr.DataArray( - np.round(np.arange(-180.0, 180.0, 0.1, dtype="float32"), 1), dims="longitude" -) -TARGET_LAT = xr.DataArray( - np.round(np.arange(90.0, -90.1, -0.1, dtype="float32"), 1), dims="latitude" -) - # Map from source variable to a unit conversion function CONVERT_MAP = { "10m_u_component_of_wind": utils.scale_wind_speed_height, @@ -54,15 +46,10 @@ utils.daily_min, (273.15, 0.01), ), - "cooling_degree_days": ( - ["2m_temperature"], - utils.cdd, - (0, 0.01), - ), - "heating_degree_days": ( - ["2m_temperature"], - utils.hdd, - (0, 0.01), + "dewpoint_temperature": ( + ["2m_dewpoint_temperature"], + utils.daily_mean, + (273.15, 0.01), ), "wind_speed": ( ["10m_u_component_of_wind", "10m_v_component_of_wind"], @@ -79,6 +66,9 @@ utils.daily_sum, (0, 0.1), ), +} + +ADDITIONAL_TRANSFORM_MAP = { "heat_index": ( ["2m_temperature", "2m_dewpoint_temperature"], lambda x, y: utils.daily_mean(utils.heat_index(x, y)), @@ -122,8 +112,8 @@ def with_variable( def load_and_shift_longitude(ds_path: str | Path) -> xr.Dataset: - ds = xr.open_dataset(ds_path).chunk(time=24) - with dask.config.set(**{'array.slicing.split_large_chunks': False}): + ds = xr.open_dataset(ds_path).chunk(time=24) + with dask.config.set(**{"array.slicing.split_large_chunks": False}): ds = ds.assign_coords(longitude=(ds.longitude + 180) % 360 - 180).sortby( "longitude" ) @@ -142,14 +132,10 @@ def load_variable( raise NotImplementedError # Substitute the single level dataset pre-interpolated at the target resolution. p = root / f"reanalysis-era5-single-levels_{variable}_{year}_{month}.nc" - ds = utils.interpolate_to_target_latlon( - load_and_shift_longitude(p), - target_lat=TARGET_LAT, - target_lon=TARGET_LON, - ) + ds = utils.interpolate_to_target_latlon(load_and_shift_longitude(p)) elif dataset == "land": ds = load_and_shift_longitude(p).assign_coords( - latitude=TARGET_LAT, longitude=TARGET_LON + latitude=utils.TARGET_LAT, longitude=utils.TARGET_LON ) else: ds = load_and_shift_longitude(p) @@ -168,7 +154,7 @@ def generate_era5_daily_main( datasets = [] for month in range(1, 13): month_str = f"{month:02d}" - print(f"loading single-levels for {month_str}") + print(f"loading single-levels for {month_str}") single_level = [ load_variable(sv, year, month_str, "single-levels") for sv in source_variables @@ -179,24 +165,29 @@ def generate_era5_daily_main( ds = ds.assign(date=pd.to_datetime(ds.date)) print("interpolating") - ds_land_res = utils.interpolate_to_target_latlon(ds, TARGET_LAT, TARGET_LON) + ds_land_res = utils.interpolate_to_target_latlon(ds) print(f"loading land for {month_str}") land = [load_variable(sv, year, month_str, "land") for sv in source_variables] print("collapsing") - with dask.config.set(**{'array.slicing.split_large_chunks': False}): + with dask.config.set(**{"array.slicing.split_large_chunks": False}): ds_land = collapse_fun(*land).compute() # type: ignore[operator] ds_land = ds_land.assign(date=pd.to_datetime(ds_land.date)) print("combining") - combined = ds_land.combine_first(ds_land_res) + combined = ds_land.combine_first(ds_land_res) datasets.append(combined) ds_year = xr.concat(datasets, dim="date").sortby("date") cd_data = ClimateDownscaleData(output_dir) - cd_data.save_era5_daily( - ds_year, target_variable, year, add_offset=e_offset, scale_factor=e_scale + cd_data.save_daily_results( + ds_year, + scenario="historical", + variable=target_variable, + year=year, + add_offset=e_offset, + scale_factor=e_scale, ) @@ -232,7 +223,7 @@ def generate_era5_daily( jobmon.run_parallel( runner="cdtask", - task_name="generate era5_daily", + task_name="generate historical_daily", node_args={ "year": years, "target-variable": variables, @@ -243,7 +234,7 @@ def generate_era5_daily( task_resources={ "queue": queue, "cores": 5, - "memory": "100G", + "memory": "200G", "runtime": "120m", "project": "proj_rapidresponse", }, diff --git a/src/climate_downscale/generate/utils.py b/src/climate_downscale/generate/utils.py index 44de6d3..a0ed5b4 100644 --- a/src/climate_downscale/generate/utils.py +++ b/src/climate_downscale/generate/utils.py @@ -1,6 +1,14 @@ import numpy as np import xarray as xr +REFERENCE_PERIOD = slice("2015-01-01", "2024-12-31") +TARGET_LON = xr.DataArray( + np.round(np.arange(-180.0, 180.0, 0.1, dtype="float32"), 1), dims="longitude" +) +TARGET_LAT = xr.DataArray( + np.round(np.arange(90.0, -90.1, -0.1, dtype="float32"), 1), dims="latitude" +) + ############################# # Standard unit conversions # ############################# @@ -38,6 +46,24 @@ def meter_to_millimeter(rainfall_m: xr.Dataset) -> xr.Dataset: return 1000 * rainfall_m +def precipitation_flux_to_rainfall(precipitation_flux: xr.Dataset) -> xr.Dataset: + """Convert precipitation flux to rainfall + + Parameters + ---------- + precipitation_flux + Precipitation flux in kg m-2 s-1 + + Returns + ------- + xr.Dataset + Rainfall in mm/day + """ + seconds_per_day = 86400 + mm_per_kg_m2 = 1 + return seconds_per_day * mm_per_kg_m2 * precipitation_flux # type: ignore[no-any-return]k + + def scale_wind_speed_height(wind_speed_10m: xr.Dataset) -> xr.Dataset: """Scaling wind speed from a height of 10 meters to a height of 2 meters @@ -89,16 +115,6 @@ def daily_sum(ds: xr.Dataset) -> xr.Dataset: ######################## -def cdd(temperature_c: xr.Dataset) -> xr.Dataset: - """Calculate cooling degree days""" - return daily_mean(np.maximum(temperature_c - 18, 0)) # type: ignore[call-overload] - - -def hdd(temperature_c: xr.Dataset) -> xr.Dataset: - """Calculate heating degree days""" - return daily_mean(np.maximum(18 - temperature_c, 0)) # type: ignore[call-overload] - - def vector_magnitude(x: xr.Dataset, y: xr.Dataset) -> xr.Dataset: """Calculate the magnitude of a vector.""" return np.sqrt(x**2 + y**2) # type: ignore[no-any-return] @@ -155,7 +171,8 @@ def rh_percent( def heat_index( - temperature_c: xr.Dataset, dewpoint_temperature_c: xr.Dataset + temperature_c: xr.Dataset, + dewpoint_temperature_c: xr.Dataset, ) -> xr.Dataset: """Calculate the heat index. @@ -195,7 +212,8 @@ def heat_index( def humidex( - temperature_c: xr.Dataset, dewpoint_temperature_c: xr.Dataset + temperature_c: xr.Dataset, + dewpoint_temperature_c: xr.Dataset, ) -> xr.Dataset: """Calculate the humidex. @@ -269,9 +287,7 @@ def rename_val_column(ds: xr.Dataset) -> xr.Dataset: def interpolate_to_target_latlon( ds: xr.Dataset, - target_lat: xr.DataArray, - target_lon: xr.DataArray, ) -> xr.Dataset: return ds.interp( - longitude=target_lon, latitude=target_lat, method="nearest" + longitude=TARGET_LON, latitude=TARGET_LAT, method="nearest" ).interpolate_na(dim="longitude", method="nearest", fill_value="extrapolate") From c61f2c6b141ec942d0b779c3afca3b609d1f2fe0 Mon Sep 17 00:00:00 2001 From: collijk Date: Fri, 14 Jun 2024 21:44:35 -0700 Subject: [PATCH 40/71] Change era5_daily to historical_daily --- src/climate_downscale/generate/__init__.py | 10 +++++----- .../generate/{era5_daily.py => historical_daily.py} | 8 ++++---- 2 files changed, 9 insertions(+), 9 deletions(-) rename src/climate_downscale/generate/{era5_daily.py => historical_daily.py} (97%) diff --git a/src/climate_downscale/generate/__init__.py b/src/climate_downscale/generate/__init__.py index 21710f6..675c482 100644 --- a/src/climate_downscale/generate/__init__.py +++ b/src/climate_downscale/generate/__init__.py @@ -1,12 +1,12 @@ -from climate_downscale.generate.era5_daily import ( - generate_era5_daily, - generate_era5_daily_task, +from climate_downscale.generate.historical_daily import ( + generate_historical_daily, + generate_historical_daily_task, ) RUNNERS = { - "era5_daily": generate_era5_daily, + "historical_daily": generate_historical_daily, } TASK_RUNNERS = { - "era5_daily": generate_era5_daily_task, + "historical_daily": generate_historical_daily_task, } diff --git a/src/climate_downscale/generate/era5_daily.py b/src/climate_downscale/generate/historical_daily.py similarity index 97% rename from src/climate_downscale/generate/era5_daily.py rename to src/climate_downscale/generate/historical_daily.py index 34b2a24..4d051f9 100644 --- a/src/climate_downscale/generate/era5_daily.py +++ b/src/climate_downscale/generate/historical_daily.py @@ -144,7 +144,7 @@ def load_variable( return ds -def generate_era5_daily_main( +def generate_historical_daily_main( output_dir: str | Path, year: str, target_variable: str, @@ -195,12 +195,12 @@ def generate_era5_daily_main( @clio.with_output_directory(DEFAULT_ROOT) @clio.with_year() @with_variable() -def generate_era5_daily_task( +def generate_historical_daily_task( output_dir: str, year: str, target_variable: str, ) -> None: - generate_era5_daily_main(output_dir, year, target_variable) + generate_historical_daily_main(output_dir, year, target_variable) @click.command() # type: ignore[arg-type] @@ -208,7 +208,7 @@ def generate_era5_daily_task( @clio.with_year(allow_all=True) @with_variable(allow_all=True) @clio.with_queue() -def generate_era5_daily( +def generate_historical_daily( output_dir: str, year: str, target_variable: str, From 7f177c5f42a73603bdb33f9cb1a6068a13fef804 Mon Sep 17 00:00:00 2001 From: collijk Date: Sat, 15 Jun 2024 08:19:55 -0700 Subject: [PATCH 41/71] Add overwrite --- src/climate_downscale/cli_options.py | 10 +++++++ .../generate/historical_daily.py | 26 ++++++++++++++++--- 2 files changed, 32 insertions(+), 4 deletions(-) diff --git a/src/climate_downscale/cli_options.py b/src/climate_downscale/cli_options.py index 879465e..31cc067 100644 --- a/src/climate_downscale/cli_options.py +++ b/src/climate_downscale/cli_options.py @@ -1,5 +1,6 @@ from typing import ParamSpec, TypeVar +import click from rra_tools.cli_tools import ( RUN_ALL, ClickOption, @@ -192,6 +193,14 @@ def with_lon_start( ) +def with_overwrite() -> ClickOption[_P, _T]: + return click.option( + "--overwrite", + is_flag=True, + help="Overwrite existing files.", + ) + + __all__ = [ "VALID_YEARS", "VALID_MONTHS", @@ -222,4 +231,5 @@ def with_lon_start( "RUN_ALL", "ClickOption", "with_choice", + "with_overwrite", ] diff --git a/src/climate_downscale/generate/historical_daily.py b/src/climate_downscale/generate/historical_daily.py index 4d051f9..6b39bc4 100644 --- a/src/climate_downscale/generate/historical_daily.py +++ b/src/climate_downscale/generate/historical_daily.py @@ -1,3 +1,4 @@ +import itertools import typing from pathlib import Path @@ -208,26 +209,43 @@ def generate_historical_daily_task( @clio.with_year(allow_all=True) @with_variable(allow_all=True) @clio.with_queue() +@clio.with_overwrite() def generate_historical_daily( output_dir: str, year: str, target_variable: str, queue: str, + overwrite: bool, ) -> None: + cd_data = ClimateDownscaleData(output_dir) + years = clio.VALID_YEARS if year == clio.RUN_ALL else [year] variables = ( list(TRANSFORM_MAP.keys()) if target_variable == clio.RUN_ALL else [target_variable] ) + years_and_variables = [] + complete = [] + for y, v in itertools.product(years, variables): + path = cd_data.daily_results_path("historical", v, y) + if not path.exists() or overwrite: + years_and_variables.append((y, v)) + else: + complete.append((y, v)) + + print( + f"{len(complete)} tasks already done. " + f"Launching {len(years_and_variables)} tasks" + ) jobmon.run_parallel( runner="cdtask", task_name="generate historical_daily", - node_args={ - "year": years, - "target-variable": variables, - }, + flat_node_args=( + ("year", "target_variable"), + years_and_variables, + ), task_args={ "output-dir": output_dir, }, From 9309a64b6b2653a9a7389e12a954d69155e38c18 Mon Sep 17 00:00:00 2001 From: collijk Date: Sat, 15 Jun 2024 08:20:20 -0700 Subject: [PATCH 42/71] Bump runtime --- src/climate_downscale/generate/historical_daily.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/climate_downscale/generate/historical_daily.py b/src/climate_downscale/generate/historical_daily.py index 6b39bc4..a5d3ffa 100644 --- a/src/climate_downscale/generate/historical_daily.py +++ b/src/climate_downscale/generate/historical_daily.py @@ -253,7 +253,7 @@ def generate_historical_daily( "queue": queue, "cores": 5, "memory": "200G", - "runtime": "120m", + "runtime": "240m", "project": "proj_rapidresponse", }, max_attempts=1, From d37be57d6291f55deb722608c8cb1e37be5ce315 Mon Sep 17 00:00:00 2001 From: collijk Date: Sat, 15 Jun 2024 09:18:06 -0700 Subject: [PATCH 43/71] Add worflow to generate historical reference --- src/climate_downscale/data.py | 2 +- src/climate_downscale/generate/__init__.py | 6 ++ .../generate/historical_daily.py | 12 +-- .../generate/historical_reference.py | 81 +++++++++++++++++++ src/climate_downscale/generate/utils.py | 3 +- 5 files changed, 97 insertions(+), 7 deletions(-) create mode 100644 src/climate_downscale/generate/historical_reference.py diff --git a/src/climate_downscale/data.py b/src/climate_downscale/data.py index ef77ebf..4e6e3f9 100644 --- a/src/climate_downscale/data.py +++ b/src/climate_downscale/data.py @@ -130,7 +130,7 @@ def save_daily_results( scenario: str, variable: str, year: int | str, - **encoding_kwargs: Any, + encoding_kwargs: dict[str, Any], ): path = self.daily_results_path(scenario, variable, year) mkdir(path.parent, exist_ok=True, parents=True) diff --git a/src/climate_downscale/generate/__init__.py b/src/climate_downscale/generate/__init__.py index 675c482..022426b 100644 --- a/src/climate_downscale/generate/__init__.py +++ b/src/climate_downscale/generate/__init__.py @@ -2,11 +2,17 @@ generate_historical_daily, generate_historical_daily_task, ) +from climate_downscale.generate.historical_reference import ( + generate_historical_reference, + generate_historical_reference_task, +) RUNNERS = { "historical_daily": generate_historical_daily, + "historical_reference": generate_historical_reference, } TASK_RUNNERS = { "historical_daily": generate_historical_daily_task, + "historical_reference": generate_historical_reference_task, } diff --git a/src/climate_downscale/generate/historical_daily.py b/src/climate_downscale/generate/historical_daily.py index a5d3ffa..979312a 100644 --- a/src/climate_downscale/generate/historical_daily.py +++ b/src/climate_downscale/generate/historical_daily.py @@ -99,7 +99,7 @@ _T = typing.TypeVar("_T") -def with_variable( +def with_target_variable( *, allow_all: bool = False, ) -> clio.ClickOption[_P, _T]: @@ -187,15 +187,17 @@ def generate_historical_daily_main( scenario="historical", variable=target_variable, year=year, - add_offset=e_offset, - scale_factor=e_scale, + encoding_kwargs={ + "add_offset": e_offset, + "scale_factor": e_scale, + }, ) @click.command() # type: ignore[arg-type] @clio.with_output_directory(DEFAULT_ROOT) @clio.with_year() -@with_variable() +@with_target_variable() def generate_historical_daily_task( output_dir: str, year: str, @@ -207,7 +209,7 @@ def generate_historical_daily_task( @click.command() # type: ignore[arg-type] @clio.with_output_directory(DEFAULT_ROOT) @clio.with_year(allow_all=True) -@with_variable(allow_all=True) +@with_target_variable(allow_all=True) @clio.with_queue() @clio.with_overwrite() def generate_historical_daily( diff --git a/src/climate_downscale/generate/historical_reference.py b/src/climate_downscale/generate/historical_reference.py new file mode 100644 index 0000000..6fee78d --- /dev/null +++ b/src/climate_downscale/generate/historical_reference.py @@ -0,0 +1,81 @@ +import click +import xarray as xr +from rra_tools import jobmon + +from climate_downscale import cli_options as clio +from climate_downscale.data import DEFAULT_ROOT, ClimateDownscaleData +from climate_downscale.generate import utils +from climate_downscale.generate.historical_daily import ( + TRANSFORM_MAP, + with_target_variable, +) + + +def generate_historical_reference_main( + output_dir: str, + target_variable: str, +) -> None: + cd_data = ClimateDownscaleData(output_dir) + paths = [ + cd_data.daily_results_path("historical", target_variable, year) + for year in utils.REFERENCE_YEARS + ] + + reference_data = [] + for path in paths: + ds = xr.load_dataset(path).groupby("time.month").mean("time") + reference_data.append(ds) + + encoding_kwargs = xr.open_dataset(paths[0])["value"].encoding + + reference = sum(reference_data) / len(reference_data) + cd_data.save_daily_results( + reference, + scenario="historical", + variable=target_variable, + year="reference", + encoding_kwargs=encoding_kwargs, + ) + + +@click.command() +@clio.with_output_directory(DEFAULT_ROOT) +@with_target_variable() +def generate_historical_reference_task( + output_dir: str, + target_variable: str, +) -> None: + generate_historical_reference_main(output_dir, target_variable) + + +@click.command() +@clio.with_output_directory(DEFAULT_ROOT) +@with_target_variable(allow_all=True) +@clio.with_queue() +def generate_historical_reference( + output_dir: str, + target_variable: str, + queue: str, +) -> None: + variables = ( + list(TRANSFORM_MAP) if target_variable == clio.RUN_ALL else [target_variable] + ) + + jobmon.run_parallel( + runner="cdtask", + task_name="generate historical_reference", + node_args={ + "target-variable": variables, + }, + task_args={ + "output-dir": output_dir, + }, + task_resources={ + "queue": queue, + "cores": 1, + "memory": "100G", + "runtime": "240m", + "project": "proj_rapidresponse", + }, + max_attempts=1, + ) diff --git a/src/climate_downscale/generate/utils.py b/src/climate_downscale/generate/utils.py index a0ed5b4..f87d14a 100644 --- a/src/climate_downscale/generate/utils.py +++ b/src/climate_downscale/generate/utils.py @@ -1,7 +1,8 @@ import numpy as np import xarray as xr -REFERENCE_PERIOD = slice("2015-01-01", "2024-12-31") +REFERENCE_YEARS = list(range(2018, 2024)) +REFERENCE_PERIOD = slice(f"{REFERENCE_YEARS[0]}-01-01", f"{REFERENCE_YEARS[-1]}-12-31") TARGET_LON = xr.DataArray( np.round(np.arange(-180.0, 180.0, 0.1, dtype="float32"), 1), dims="longitude" ) From 018851877525cf808bd56de70a2d737a459b2257 Mon Sep 17 00:00:00 2001 From: James Collins Date: Sat, 15 Jun 2024 18:21:44 -0700 Subject: [PATCH 44/71] typo --- src/climate_downscale/generate/historical_daily.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/climate_downscale/generate/historical_daily.py b/src/climate_downscale/generate/historical_daily.py index a5d3ffa..362ae39 100644 --- a/src/climate_downscale/generate/historical_daily.py +++ b/src/climate_downscale/generate/historical_daily.py @@ -243,7 +243,7 @@ def generate_historical_daily( runner="cdtask", task_name="generate historical_daily", flat_node_args=( - ("year", "target_variable"), + ("year", "target-variable"), years_and_variables, ), task_args={ From ec7cf80817f76aec3be995735c86074812c4cb54 Mon Sep 17 00:00:00 2001 From: collijk Date: Sat, 15 Jun 2024 18:34:43 -0700 Subject: [PATCH 45/71] Add tasmin/tasmax, overwrite option, and some robustness --- src/climate_downscale/cli_options.py | 2 + src/climate_downscale/data.py | 3 + src/climate_downscale/extract/cmip6.py | 61 +++++++++++++------ .../{cmip6_daily.py => scenario_daily.py} | 10 +-- 4 files changed, 52 insertions(+), 24 deletions(-) rename src/climate_downscale/generate/{cmip6_daily.py => scenario_daily.py} (99%) diff --git a/src/climate_downscale/cli_options.py b/src/climate_downscale/cli_options.py index 31cc067..38117ce 100644 --- a/src/climate_downscale/cli_options.py +++ b/src/climate_downscale/cli_options.py @@ -147,6 +147,8 @@ def with_cmip6_experiment( "vas", "hurs", "tas", + "tasmin", + "tasmax", "pr", ] diff --git a/src/climate_downscale/data.py b/src/climate_downscale/data.py index 4e6e3f9..6af375b 100644 --- a/src/climate_downscale/data.py +++ b/src/climate_downscale/data.py @@ -52,6 +52,9 @@ def load_cmip6_metadata(self) -> pd.DataFrame: meta.to_parquet(meta_path) return pd.read_parquet(meta_path) + def extracted_cmip6_path(self, variable: str, experiment: str, source: str, member: str) -> Path: + return self.extracted_cmip6 / f"{variable}_{experiment}_{source}_{member}.nc" + @property def ncei_climate_stations(self) -> Path: return self.extracted_data / "ncei_climate_stations" diff --git a/src/climate_downscale/extract/cmip6.py b/src/climate_downscale/extract/cmip6.py index eeb8863..e032658 100644 --- a/src/climate_downscale/extract/cmip6.py +++ b/src/climate_downscale/extract/cmip6.py @@ -1,3 +1,4 @@ +import itertools from pathlib import Path import click @@ -13,6 +14,8 @@ "vas": (0.0, 0.01), "hurs": (0.0, 0.01), "tas": (273.15, 0.01), + "tasmin": (273.15, 0.01), + "tasmax": (273.15, 0.01), "pr": (0.0, 1e-9), } @@ -34,6 +37,7 @@ def extract_cmip6_main( cmip6_source: str, cmip6_experiment: str, cmip6_variable: str, + overwrite: bool, ) -> None: print(f"Checking metadata for {cmip6_source} {cmip6_experiment} {cmip6_variable}") cd_data = ClimateDownscaleData(output_dir) @@ -50,26 +54,40 @@ def extract_cmip6_main( print(f"Extracting {len(meta_subset)} members...") for member, zstore_path in meta_subset.items(): - print("Extracting", member, zstore_path) - cmip_data = load_cmip_data(zstore_path) - out_filename = f"{cmip6_variable}_{cmip6_experiment}_{cmip6_source}_{member}.nc" - out_path = cd_data.cmip6 / out_filename - shell_tools.touch(out_path, exist_ok=True) - shift, scale = VARIABLE_ENCODINGS[cmip6_variable] - print("Writing to", out_path) - cmip_data.to_netcdf( - out_path, - encoding={ - cmip6_variable: { - "dtype": "int16", - "scale_factor": scale, - "add_offset": shift, - "_FillValue": -32767, - "zlib": True, - "complevel": 1, - } - }, + out_path = cd_data.extracted_cmip6_path( + cmip6_variable, + cmip6_experiment, + cmip6_source, + member, ) + if out_path.exists() and not overwrite: + print("Skipping", member, zstore_path) + continue + + try: + print("Extracting", member, zstore_path) + cmip_data = load_cmip_data(zstore_path) + + shell_tools.touch(out_path, exist_ok=True) + shift, scale = VARIABLE_ENCODINGS[cmip6_variable] + print("Writing to", out_path) + cmip_data.to_netcdf( + out_path, + encoding={ + cmip6_variable: { + "dtype": "int16", + "scale_factor": scale, + "add_offset": shift, + "_FillValue": -32767, + "zlib": True, + "complevel": 1, + } + }, + ) + except Exception as e: + if out_path.exists(): + out_path.unlink() + raise e @click.command() # type: ignore[arg-type] @@ -77,13 +95,15 @@ def extract_cmip6_main( @clio.with_cmip6_source() @clio.with_cmip6_experiment() @clio.with_cmip6_variable() +@clio.with_overwrite() def extract_cmip6_task( output_dir: str, cmip6_source: str, cmip6_experiment: str, cmip6_variable: str, + overwrite: bool, ) -> None: - extract_cmip6_main(output_dir, cmip6_source, cmip6_experiment, cmip6_variable) + extract_cmip6_main(output_dir, cmip6_source, cmip6_experiment, cmip6_variable, overwrite) @click.command() # type: ignore[arg-type] @@ -92,6 +112,7 @@ def extract_cmip6_task( @clio.with_cmip6_experiment(allow_all=True) @clio.with_cmip6_variable(allow_all=True) @clio.with_queue() +@clio.with_overwrite() def extract_cmip6( output_dir: str, cmip6_source: str, diff --git a/src/climate_downscale/generate/cmip6_daily.py b/src/climate_downscale/generate/scenario_daily.py similarity index 99% rename from src/climate_downscale/generate/cmip6_daily.py rename to src/climate_downscale/generate/scenario_daily.py index aa148a2..283ff77 100644 --- a/src/climate_downscale/generate/cmip6_daily.py +++ b/src/climate_downscale/generate/scenario_daily.py @@ -7,6 +7,11 @@ from climate_downscale.data import ClimateDownscaleData from climate_downscale.generate import utils +TRANSFORM_MAP = { + "tas": (utils.kelvin_to_celsius, "additive"), + "pr": (utils.precipitation_flux_to_rainfall, "multiplicative"), +} + def compute_anomaly( reference: xr.DataArray, target: xr.DataArray, anomaly_type: str @@ -29,10 +34,7 @@ def compute_anomaly( return anomaly -TRANSFORM_MAP = { - "tas": (utils.kelvin_to_celsius, "additive"), - "pr": (utils.precipitation_flux_to_rainfall, "multiplicative"), -} + def load_reference_and_target( From fd58f3d60f0b0f22e57df28dd263f481434dbbc1 Mon Sep 17 00:00:00 2001 From: collijk Date: Sat, 15 Jun 2024 18:44:40 -0700 Subject: [PATCH 46/71] Add tasmin/tasmax, overwrite option, and some robustness. Fix bugs. --- src/climate_downscale/data.py | 2 +- .../generate/historical_daily.py | 42 ++-- .../generate/historical_reference.py | 18 +- .../generate/scenario_daily.py | 121 +++++----- src/climate_downscale/generate/utils.py | 10 +- src/climate_downscale/old_climate/__init__.py | 0 src/climate_downscale/old_climate/data.py | 219 ------------------ .../old_climate/project_anomaly.py | 127 ---------- .../old_climate/project_climate.py | 172 -------------- 9 files changed, 104 insertions(+), 607 deletions(-) delete mode 100644 src/climate_downscale/old_climate/__init__.py delete mode 100644 src/climate_downscale/old_climate/data.py delete mode 100644 src/climate_downscale/old_climate/project_anomaly.py delete mode 100644 src/climate_downscale/old_climate/project_climate.py diff --git a/src/climate_downscale/data.py b/src/climate_downscale/data.py index 6af375b..f27355d 100644 --- a/src/climate_downscale/data.py +++ b/src/climate_downscale/data.py @@ -134,7 +134,7 @@ def save_daily_results( variable: str, year: int | str, encoding_kwargs: dict[str, Any], - ): + ) -> None: path = self.daily_results_path(scenario, variable, year) mkdir(path.parent, exist_ok=True, parents=True) touch(path, exist_ok=True) diff --git a/src/climate_downscale/generate/historical_daily.py b/src/climate_downscale/generate/historical_daily.py index 979312a..20012fa 100644 --- a/src/climate_downscale/generate/historical_daily.py +++ b/src/climate_downscale/generate/historical_daily.py @@ -122,24 +122,31 @@ def load_and_shift_longitude(ds_path: str | Path) -> xr.Dataset: def load_variable( + cd_data: ClimateDownscaleData, variable: str, year: str, month: str, dataset: str = "single-levels", ) -> xr.Dataset: - root = Path("/mnt/share/erf/climate_downscale/extracted_data/era5") - p = root / f"reanalysis-era5-{dataset}_{variable}_{year}_{month}.nc" - if dataset == "land" and not p.exists(): - raise NotImplementedError - # Substitute the single level dataset pre-interpolated at the target resolution. - p = root / f"reanalysis-era5-single-levels_{variable}_{year}_{month}.nc" - ds = utils.interpolate_to_target_latlon(load_and_shift_longitude(p)) + path = cd_data.extracted_era5_path(dataset, variable, year, month) + if dataset == "land" and not path.exists(): + if variable != "total_sky_direct_solar_radiation_at_surface": + # We only fallback for the one dataset, otherwise extraction failed. + msg = f"Land dataset not found for {variable}. Extraction likely failed." + raise ValueError(msg) + # If the land dataset doesn't exist, fall back to the single-levels dataset + path = cd_data.extracted_era5_path("single-levels", variable, year, month) + ds = load_and_shift_longitude(path) + # We expect this to already be in the correct grid, so interpolate. + ds = utils.interpolate_to_target_latlon(ds) elif dataset == "land": - ds = load_and_shift_longitude(p).assign_coords( - latitude=utils.TARGET_LAT, longitude=utils.TARGET_LON - ) + ds = load_and_shift_longitude(path) + # There are some slight numerical differences in the lat/long for some of + # the land datasets. They are gridded consistently, so just tweak the + # coordinates so things align. + ds = ds.assign_coords(latitude=utils.TARGET_LAT, longitude=utils.TARGET_LON) else: - ds = load_and_shift_longitude(p) + ds = load_and_shift_longitude(path) conversion = CONVERT_MAP[variable] ds = conversion(utils.rename_val_column(ds)) return ds @@ -150,14 +157,15 @@ def generate_historical_daily_main( year: str, target_variable: str, ) -> None: - source_variables, collapse_fun, (e_offset, e_scale) = TRANSFORM_MAP[target_variable] + cd_data = ClimateDownscaleData(output_dir) + source_variables, collapse_fun, (e_offset, e_scale) = TRANSFORM_MAP[target_variable] datasets = [] for month in range(1, 13): month_str = f"{month:02d}" print(f"loading single-levels for {month_str}") single_level = [ - load_variable(sv, year, month_str, "single-levels") + load_variable(cd_data, sv, year, month_str, "single-levels") for sv in source_variables ] print("collapsing") @@ -169,7 +177,10 @@ def generate_historical_daily_main( ds_land_res = utils.interpolate_to_target_latlon(ds) print(f"loading land for {month_str}") - land = [load_variable(sv, year, month_str, "land") for sv in source_variables] + land = [ + load_variable(cd_data, sv, year, month_str, "land") + for sv in source_variables + ] print("collapsing") with dask.config.set(**{"array.slicing.split_large_chunks": False}): ds_land = collapse_fun(*land).compute() # type: ignore[operator] @@ -181,7 +192,6 @@ def generate_historical_daily_main( ds_year = xr.concat(datasets, dim="date").sortby("date") - cd_data = ClimateDownscaleData(output_dir) cd_data.save_daily_results( ds_year, scenario="historical", @@ -217,7 +227,7 @@ def generate_historical_daily( year: str, target_variable: str, queue: str, - overwrite: bool, + overwrite: bool, # noqa: FBT001 ) -> None: cd_data = ClimateDownscaleData(output_dir) diff --git a/src/climate_downscale/generate/historical_reference.py b/src/climate_downscale/generate/historical_reference.py index 6fee78d..e68213c 100644 --- a/src/climate_downscale/generate/historical_reference.py +++ b/src/climate_downscale/generate/historical_reference.py @@ -20,15 +20,29 @@ def generate_historical_reference_main( cd_data.daily_results_path("historical", target_variable, year) for year in utils.REFERENCE_YEARS ] + print(f"Building reference data from: {len(paths)} files.") reference_data = [] for path in paths: - ds = xr.load_dataset(path).groupby("time.month").mean("time") + print(f"Loading: {path}") + ds = xr.load_dataset(path) + print("Computing monthly means") + ds = ds.groupby("time.month").mean("time") reference_data.append(ds) - encoding_kwargs = xr.open_dataset(paths[0])["value"].encoding + old_encoding = { + k: v for k, v in xr.open_dataset(paths[0])["value"].encoding.items() + if k in ['dtype', '_FillValue', 'scale_factor', 'add_offset'] + } + encoding_kwargs = { + "zlib": True, + "complevel": 1, + **old_encoding, + } + print("Averaging years by month") reference = sum(reference_data) / len(reference_data) + print("Saving reference data") cd_data.save_daily_results( reference, scenario="historical", diff --git a/src/climate_downscale/generate/scenario_daily.py b/src/climate_downscale/generate/scenario_daily.py index 283ff77..8bfad67 100644 --- a/src/climate_downscale/generate/scenario_daily.py +++ b/src/climate_downscale/generate/scenario_daily.py @@ -7,14 +7,51 @@ from climate_downscale.data import ClimateDownscaleData from climate_downscale.generate import utils -TRANSFORM_MAP = { - "tas": (utils.kelvin_to_celsius, "additive"), - "pr": (utils.precipitation_flux_to_rainfall, "multiplicative"), + +# Map from source variable to a unit conversion function +CONVERT_MAP = { + "tas": utils.kelvin_to_celsius, + "pr": utils.precipitation_flux_to_rainfall, } +def load_and_shift_longitude( + ds_path: str | Path, + time_slice: slice, +) -> xr.Dataset: + ds = xr.open_dataset(ds_path).sel(time=time_slice).compute() + ds = ( + ds + .rename({"lat": "latitude", "lon": "longitude", "time": "date"}) + .assign_coords(longitude=(ds.longitude + 180) % 360 - 180) + .sortby("longitude") + ) + return ds + + +def load_variable( + member_path: str | Path, + variable: str, + year: str, +) -> xr.Dataset: + if year == "reference": + ds = load_and_shift_longitude(member_path, utils.REFERENCE_PERIOD) + ds = ds.groupby("date.month").mean("date") + else: + time_slice = slice(f"{year}-01-01", f"{year}-12-31") + time_range = pd.date_range(f"{year}-01-01", f"{year}-12-31") + ds = load_and_shift_longitude(member_path, time_slice) + ds = ( + ds.assign_coords(date=ds.date.dt.floor("D")) + .interp(date=time_range) + .interpolate_na(dim="date", method="nearest", fill_value="extrapolate") + ) + conversion = CONVERT_MAP[variable] + ds = conversion(utils.rename_val_column(ds)) + return ds + def compute_anomaly( - reference: xr.DataArray, target: xr.DataArray, anomaly_type: str + reference: xr.Dataset, target: xr.Dataset, anomaly_type: str ) -> xr.Dataset: if anomaly_type == "additive": anomaly = target.groupby("time.month") - reference @@ -33,73 +70,25 @@ def compute_anomaly( anomaly = utils.interpolate_to_target_latlon(anomaly) return anomaly - - - - -def load_reference_and_target( - path: str | Path, year: str | int -) -> tuple[xr.Dataset, xr.Dataset]: - reference = ( - xr.open_dataset(path) - .sel(time=utils.REFERENCE_PERIOD) - .compute() # Load the subset before computing the mean, otherwise it's slow - .groupby("time.month") - .mean("time") - ) - - time_slice = slice(f"{year}-01", f"{year}-12") - time_range = pd.date_range(f"{year}-01-01", f"{year}-12-31") - target = xr.open_dataset(path).sel(time=time_slice).compute() - target = ( - target.assign_coords(time=target.time.dt.floor("D")) - .interp_calendar(time_range) - .interpolate_na(dim="time", method="nearest", fill_value="extrapolate") - ) - return reference, target - - -def generate_cmip6_daily_main( +def generate_scenario_daily_main( output_dir: str | Path, year: str | int, target_variable: str, cmip_scenario: str, - rerefk, ) -> None: cd_data = ClimateDownscaleData(output_dir) - paths = cd_data.cmip6.glob(f"{target_variable}_{cmip_scenario}*.nc") - - -def compute_anomaly(path, year): - reference_period = slice("2015-01-01", "2024-12-31") - - anomaly = target.groupby("time.month") - ref - anomaly = anomaly.rename({"lat": "latitude", "lon": "longitude"}) - anomaly = anomaly.assign_coords( - longitude=(anomaly.longitude + 180) % 360 - 180 - ).sortby("longitude") - anomaly = utils.interpolate_to_target_latlon( - anomaly, target_lat=TARGET_LAT, target_lon=TARGET_LON - ) - - return anomaly - - -variable = "tas" -scenario = "ssp119" -year = "2024" - -paths = sorted( - list( - Path("/mnt/share/erf/climate_downscale/extracted_data/cmip6").glob( - "tas_ssp119*.nc" + paths = cd_data.extracted_cmip6.glob(f"{target_variable}_{cmip_scenario}*.nc") + + for path in paths: + reference = load_variable(path, target_variable, "reference") + target = load_variable(path, target_variable, year) + + anomaly_type = TRANSFORM_MAP[target_variable][1] + anomaly = compute_anomaly(reference, target, anomaly_type) + cd_data.save_daily_results( + anomaly, + scenario=cmip_scenario, + variable=target_variable, + year=year, ) - ) -) -p = paths[0] - - -a = 1 / len(paths) * compute_anomaly(paths[0], year) -for p in tqdm.tqdm(paths[1:]): - a += 1 / len(paths) * compute_anomaly(p, year) diff --git a/src/climate_downscale/generate/utils.py b/src/climate_downscale/generate/utils.py index f87d14a..366db11 100644 --- a/src/climate_downscale/generate/utils.py +++ b/src/climate_downscale/generate/utils.py @@ -62,7 +62,7 @@ def precipitation_flux_to_rainfall(precipitation_flux: xr.Dataset) -> xr.Dataset """ seconds_per_day = 86400 mm_per_kg_m2 = 1 - return seconds_per_day * mm_per_kg_m2 * precipitation_flux # type: ignore[no-any-return]k + return seconds_per_day * mm_per_kg_m2 * precipitation_flux # type: ignore[no-any-return] def scale_wind_speed_height(wind_speed_10m: xr.Dataset) -> xr.Dataset: @@ -289,6 +289,8 @@ def rename_val_column(ds: xr.Dataset) -> xr.Dataset: def interpolate_to_target_latlon( ds: xr.Dataset, ) -> xr.Dataset: - return ds.interp( - longitude=TARGET_LON, latitude=TARGET_LAT, method="nearest" - ).interpolate_na(dim="longitude", method="nearest", fill_value="extrapolate") + return ( + ds.interp(longitude=TARGET_LON, latitude=TARGET_LAT, method="nearest") + .interpolate_na(dim="longitude", method="nearest", fill_value="extrapolate") + .interpolate_na(dim="latitude", method="nearest", fill_value="extrapolate") + ) diff --git a/src/climate_downscale/old_climate/__init__.py b/src/climate_downscale/old_climate/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/src/climate_downscale/old_climate/data.py b/src/climate_downscale/old_climate/data.py deleted file mode 100644 index b5422d2..0000000 --- a/src/climate_downscale/old_climate/data.py +++ /dev/null @@ -1,219 +0,0 @@ -import itertools -from collections.abc import Callable - -import gcsfs -import pandas as pd -import xarray as xr - - -def load_cmip_metadata( - tables: tuple[str, ...] = ("Amon", "day"), - variables: tuple[str, ...] = ("tas", "pr"), - experiments: tuple[str, ...] = ( - "historical", - "ssp126", - "ssp245", - "ssp370", - "ssp585", - ), -) -> pd.DataFrame: - """Loads CMIP6 metadata for the given tables, variables, and experiments. - - Parameters - ---------- - tables - The tables to include. - variables - The variables to include. - experiments - The experiments to include. - - Returns - ------- - pd.DataFrame - CMIP6 metadata containing only the institutions and sources with all - tables, variables, and experiments. - """ - all_models = load_raw_cmip_metadata() - models_and_params = filter_institutions_and_sources( - all_models, - tables, - variables, - experiments, - ) - - # There should be no duplicates here, but there are. I'm not going to investigate - # why, but I'm just going to drop them. - member_count = models_and_params.groupby( - ["institution_id", "source_id", "member_id"] - )["activity_id"].count() - expected_count = len(tables) * len(variables) * len(experiments) - member_mask = member_count == expected_count - - final_models = ( - models_and_params.set_index(["institution_id", "source_id", "member_id"]) - .loc[member_mask[member_mask].index] - .reset_index() - ) - - # Filter to the models we need for the anomaly analysis. - monthly_historical = (final_models["table_id"] == "Amon") & ( - final_models["experiment_id"] == "historical" - ) - daily_scenario = (final_models["table_id"] == "day") & ( - final_models["experiment_id"] != "historical" - ) - return final_models.loc[monthly_historical | daily_scenario] - - -def load_cmip_historical_data(path: str) -> xr.Dataset: - """Loads a CMIP6 historical dataset from a zarr path. - - Parameters - ---------- - path - The path to the zarr store. - - Returns - ------- - xr.Dataset - The CMIP6 historical dataset. - """ - reference_period = slice("1981-01-15", "2010-12-15") - return ( - load_cmip_data(path) - .sel(time=reference_period) - .groupby("time.month") - .mean("time") - ) - - -def load_cmip_experiment_data(path: str, year: str) -> xr.Dataset: - """Loads a CMIP6 experiment dataset from a zarr path by day for a given year. - - Parameters - ---------- - path - The path to the zarr store. - year - The year to load. - - Returns - ------- - xr.Dataset - The CMIP6 experiment dataset for the given year. - """ "" - time_slice = slice(f"{year}-01", f"{year}-12") - time_range = pd.date_range(f"{year}-01-01", f"{year}-12-31") - return load_cmip_data(path).sel(time=time_slice).interp_calendar(time_range) - - -################## -# Helper methods # -################## - - -def load_raw_cmip_metadata() -> pd.DataFrame: - """Loads metadata containing information about all CMIP6 models.""" - path = "https://storage.googleapis.com/cmip6/cmip6-zarr-consolidated-stores.csv" - return pd.read_csv(path) - - -def load_cmip_data(zarr_path: str) -> xr.Dataset: - """Loads a CMIP6 dataset from a zarr path.""" - gcs = gcsfs.GCSFileSystem(token="anon") # noqa: S106 - mapper = gcs.get_mapper(zarr_path) - ds = xr.open_zarr(mapper, consolidated=True) - lon = (ds.lon + 180) % 360 - 180 - ds = ds.assign_coords(lon=lon).sortby("lon") - ds = ds.drop( - ["lat_bnds", "lon_bnds", "time_bnds", "height", "time_bounds", "bnds"], - errors="ignore", - ) - return ds # type: ignore[no-any-return] - - -def contains_combo( - table: str, - variable: str, - experiment: str, -) -> Callable[[pd.DataFrame], bool]: - """Get a function to check if a dataset contains a given cmip metadata combination. - - Parameters - ---------- - table - The table to check for. - variable - The variable to check for. - experiment - The experiment to check for. - - Returns - ------- - Callable[[pd.DataFrame], bool] - A function that checks if a dataset contains a given cmip metadata combination. - """ - - def _check(df: pd.DataFrame) -> bool: - return ( - df["table_id"].eq(table) - & df["variable_id"].eq(variable) - & df["experiment_id"].eq(experiment) - ).any() - - return _check - - -def filter_institutions_and_sources( - cmip_meta: pd.DataFrame, - tables: tuple[str, ...], - variables: tuple[str, ...], - experiments: tuple[str, ...], -) -> pd.DataFrame: - """Filters a cmip metadata dataframe to only include models that have all - combinations of the given tables, variables, and experiments. - Parameters - ---------- - cmip_meta - CMIP metadata dataframe. - tables - The tables to include. - variables - The variables to include. - experiments - The experiments to include. - Returns - ------- - pd.DataFrame - Filtered cmip metadata containing only the institutions and sources with all - tables, variables, and experiments. - """ - # First we filter down to all models from the institutions and sources that have - # all the combinations of tables, variables, and experiments. - masks = [] - for table, variable, experiment in itertools.product( - tables, variables, experiments - ): - has_combo = cmip_meta.groupby(["institution_id", "source_id"]).apply( - contains_combo(table, variable, experiment) - ) - masks.append(has_combo) - mask = pd.concat(masks, axis=1).all(axis=1) - - institutions_and_sources = mask[mask].index - models_with_all_params = ( - cmip_meta.set_index(["institution_id", "source_id"]) - .loc[institutions_and_sources] - .reset_index() - ) - - # Now we filter down to the specific subset of table/variable/experiment - # combinations within the institutions and sources. - param_mask = ( - models_with_all_params["table_id"].isin(tables) - & models_with_all_params["variable_id"].isin(variables) - & models_with_all_params["experiment_id"].isin(experiments) - ) - models_and_params = models_with_all_params[param_mask] - return models_and_params diff --git a/src/climate_downscale/old_climate/project_anomaly.py b/src/climate_downscale/old_climate/project_anomaly.py deleted file mode 100644 index c35ef40..0000000 --- a/src/climate_downscale/old_climate/project_anomaly.py +++ /dev/null @@ -1,127 +0,0 @@ -from __future__ import annotations - -from pathlib import Path -from typing import TYPE_CHECKING - -import click -import pandas as pd -from rra_population_pipelines.pipelines.climate import data -from rra_population_pipelines.shared.cli_tools import options as clio -from rra_population_pipelines.shared.data import RRA_POP -from rra_tools import jobmon - -if TYPE_CHECKING: - import xarray as xr - -_ENSEMBLE_MEMBERS = [ - ("NCAR", "CESM2"), - ("MOHC", "UKESM1-0-LL"), - ("IPSL", "IPSL-CM6A-LR"), - ("MPI-M", "MPI-ESM1-2-LR"), - ("MIROC", "MIROC6"), - ("NOAA-GFDL", "GFDL-ESM4"), -] - -_VALID_YEARS = tuple([str(y) for y in range(2015, 2101)]) - - -def compute_common_lat_lon( - run_metadata: pd.DataFrame, -) -> tuple[pd.Index[float], pd.Index[float]]: - lat = pd.Index([], name="lat", dtype=float) - lon = pd.Index([], name="lon", dtype=float) - - for key in run_metadata.index.tolist(): - historical = data.load_cmip_historical_data(run_metadata.loc[key, "historical"]) - lat = lat.union(historical["lat"]) # type: ignore[arg-type] - lon = lon.union(historical["lon"]) # type: ignore[arg-type] - return lat, lon - - -def compute_single_model_anomaly( - historical: xr.Dataset, - experiment: xr.Dataset, - variable: str, -) -> xr.Dataset: - if variable == "tas": - anomaly = experiment.groupby("time.month") - historical - else: - historical = 86400 * historical + 1 - experiment = 86400 * experiment + 1 - anomaly = (1 / historical) * experiment.groupby("time.month") - return anomaly - - -def interp_common_lat_lon( - ds: xr.Dataset, lat: pd.Index[float], lon: pd.Index[float] -) -> xr.Dataset: - return ( - ds.pad(lon=1, mode="wrap") - .assign_coords(lon=ds.lon.pad(lon=1, mode="reflect", reflect_type="odd")) - .interp(lat=lat, lon=lon) - ) - - -def project_anomaly_main(variable: str, experiment: str, year: str) -> xr.Dataset: - run_meta = get_run_metadata(variable, experiment) - lat, lon = compute_common_lat_lon(run_meta) - - anomalies: list[xr.Dataset] = [] - for key in run_meta.index.tolist(): - historical = data.load_cmip_historical_data(run_meta.loc[key, "historical"]) - scenario = data.load_cmip_experiment_data( - run_meta.loc[key, "experiment"], year=year - ) - anomaly = compute_single_model_anomaly(historical, scenario, variable=variable) - anomaly = interp_common_lat_lon(anomaly, lat, lon) - anomalies.append(anomaly) - - mean_anomaly = 1 / len(anomalies) * sum(anomalies) - return mean_anomaly # type: ignore[return-value] - - -@click.command() # type: ignore[arg-type] -@click.option( - "--variable", - type=click.Choice(["tas", "pr"]), -) -@clio.with_climate_scenario(allow_all=False) -@clio.with_year(allow_all=False, choices=_VALID_YEARS) -@clio.with_output_directory(RRA_POP.projected_climate_anomaly_data) -def project_anomaly_task( - variable: str, - climate_scenario: str, - year: str, - output_dir: str, -) -> None: - projected_anomaly = project_anomaly_main(variable, climate_scenario, year) - out_path = Path(output_dir) / "{variable}_{experiment}_{year}.nc" - projected_anomaly.to_netcdf(out_path) - - -@click.command() # type: ignore[arg-type] -@clio.with_output_directory(RRA_POP.projected_climate_anomaly_data) -@clio.with_queue() -def project_anomaly(output_dir: str, queue: str) -> None: - jobmon.run_parallel( - task_name="project_anomaly", - node_args={ - "variable": [ - "tas", - "pr", - ], - "experiment": list(clio.VALID_CLIMATE_SCENARIOS), - "year": list(_VALID_YEARS), - }, - task_args={ - "output-dir": output_dir, - }, - task_resources={ - "queue": queue, - "cores": 2, - "memory": "70G", - "runtime": "120m", - "project": "proj_rapidresponse", - }, - runner="rptask", - ) diff --git a/src/climate_downscale/old_climate/project_climate.py b/src/climate_downscale/old_climate/project_climate.py deleted file mode 100644 index 1b366cb..0000000 --- a/src/climate_downscale/old_climate/project_climate.py +++ /dev/null @@ -1,172 +0,0 @@ -import click -import pandas as pd -import xarray as xr -from rra_population_pipelines.shared.cli_tools import options as clio -from rra_population_pipelines.shared.data import ( - RRA_DATA_ROOT, - RRA_POP, - RRAPopulationData, -) -from rra_tools import jobmon - - -def get_chelsa(variable: str, lat: slice, lon: slice) -> xr.Dataset: - ds_paths = [ - RRA_POP.get_downscaled_reference_map_path(variable, month) - for month in range(1, 13) - ] - ds = ( - xr.open_mfdataset( - ds_paths, - chunks={"lat": -1, "lon": -1}, - concat_dim=[pd.Index(range(1, 13), name="month")], # type: ignore[arg-type] - combine="nested", - ) - .sel(lat=lat, lon=lon) - .rename({"Band1": variable}) - .drop_vars("crs") - ) - if variable == "tas": # noqa: SIM108 - ds = 0.1 * ds - 273.15 - else: - ds = 0.1 * ds - return ds - - -def load_and_downscale_anomaly( - variable: str, - scenario: str, - year: int, - lat: xr.DataArray, - lon: xr.DataArray, -) -> xr.Dataset: - in_root = ( - RRA_POP.human_niche_data - / "chelsa-downscaled-projections" - / "_anomalies" - / "GLOBAL" - ) - path = in_root / f"{variable}_{scenario}_{year}.nc" - ds = xr.open_dataset( - path, - # Load the whole thing, but use a dask array - chunks={"lat": -1, "lon": -1, "time": -1}, - ).interp(lat=lat, lon=lon) - return ds - - -def apply_anomaly(data: xr.Dataset, anomaly: xr.Dataset) -> xr.Dataset: - if "tas" in anomaly.keys(): # noqa: SIM118 - result = anomaly.groupby("time.month") + data - else: - result = anomaly.groupby("time.month") * data * (1 / 30) - return result - - -def compute_measure(data: xr.Dataset, measure: str) -> xr.Dataset: - if measure == "temperature": - result = data.mean("time") - elif measure == "precipitation": - result = data.sum("time") - else: - threshold = 30 - result = (data > threshold).sum("time") - return result - - -def project_climate_main( - iso3: str, - measure: str, - scenario: str, - pop_data_dir: str, -) -> None: - pop_data = RRAPopulationData(pop_data_dir) - admin0 = pop_data.load_shapefile( - admin_level=0, - iso3=iso3, - year=2022, - ) - minx, miny, maxx, maxy = admin0.total_bounds - lat, lon = slice(miny, maxy), slice(minx, maxx) - - variable = { - "temperature": "tas", - "precipitation": "pr", - "days_over_thirty": "tas", - }[measure] - - print("Working on", scenario, measure) - ds = get_chelsa(variable, lat, lon) - - results = [] - for year in range(2015, 2101): - anom = load_and_downscale_anomaly( - variable, scenario, year, ds["lat"], ds["lon"] - ) - result = apply_anomaly(ds, anom) - result = compute_measure(result, measure) - results.append(result) - result = xr.concat(results, dim=pd.Index(range(2015, 2101), name="year")) - - print("Writing results") - pop_data.save_climate_data( - result, - measure=measure, - iso3=iso3, - scenario=scenario, - ) - - -@click.command() # type: ignore[arg-type] -@clio.with_iso3(allow_all=False) -@click.option( - "--measure", - type=click.Choice(["temperature", "precipitation", "days_over_thirty"]), -) -@clio.with_climate_scenario(allow_all=False) -@clio.with_input_directory("pop-data", RRA_DATA_ROOT) -def project_climate_task( - iso3: str, - measure: str, - climate_scenario: str, - pop_data_dir: str, -) -> None: - project_climate_main(iso3, measure, climate_scenario, pop_data_dir) - - -@click.command() # type: ignore[arg-type] -@clio.with_iso3(allow_all=False) -@clio.with_input_directory("pop-data", RRA_DATA_ROOT) -@clio.with_queue() -def project_climate( - iso3: str, - pop_data_dir: str, - queue: str, -) -> None: - pop_data = RRAPopulationData(pop_data_dir) - jobmon.run_parallel( - task_name="project_climate", - node_args={ - "iso3": [ - iso3, - ], - "measure": [ - "temperature", - "precipitation", - "days_over_thirty", - ], - "scenario": list(clio.VALID_CLIMATE_SCENARIOS), - }, - task_args={ - "pop-data-dir": pop_data_dir, - }, - task_resources={ - "queue": queue, - "cores": 2, - "memory": "70G", - "runtime": "120m", - "project": "proj_rapidresponse", - }, - runner="rptask", - log_root=pop_data.climate_data, - ) From 19a7dd499f36ddbdfb5737a1cf5b7bf23c5af037 Mon Sep 17 00:00:00 2001 From: collijk Date: Sat, 15 Jun 2024 18:49:39 -0700 Subject: [PATCH 47/71] Formatting --- .../downscale/prepare_training_data.py | 4 ++-- .../generate/historical_daily.py | 4 ++-- .../generate/historical_reference.py | 11 ++++++----- src/climate_downscale/generate/scenario_daily.py | 15 +++++++-------- src/climate_downscale/generate/utils.py | 2 +- 5 files changed, 18 insertions(+), 18 deletions(-) diff --git a/src/climate_downscale/downscale/prepare_training_data.py b/src/climate_downscale/downscale/prepare_training_data.py index 807b0c3..570bb4e 100644 --- a/src/climate_downscale/downscale/prepare_training_data.py +++ b/src/climate_downscale/downscale/prepare_training_data.py @@ -48,7 +48,7 @@ def get_era5_temperature( lon = xr.DataArray(coords["lon"], dims=["points"]) time = xr.DataArray(coords["date"], dims=["points"]) - era5 = cd_data.load_era5_temperature_daily_mean(year) + era5 = cd_data.load_daily_results("historical", "tas", year) era5 = ( era5.assign_coords(longitude=(((era5.longitude + 180) % 360) - 180)) .sortby(["latitude", "longitude"]) @@ -59,7 +59,7 @@ def get_era5_temperature( # expver == 1 is final data. expver == 5 is provisional data # and has a very strong nonsense seasonal trend. era5 = era5.sel(expver=1) - return era5["t2m"].to_numpy() - 273.15 + return era5["value"].to_numpy() def prepare_training_data_main(output_dir: str | Path, year: str) -> None: diff --git a/src/climate_downscale/generate/historical_daily.py b/src/climate_downscale/generate/historical_daily.py index 20012fa..412026b 100644 --- a/src/climate_downscale/generate/historical_daily.py +++ b/src/climate_downscale/generate/historical_daily.py @@ -114,7 +114,7 @@ def with_target_variable( def load_and_shift_longitude(ds_path: str | Path) -> xr.Dataset: ds = xr.open_dataset(ds_path).chunk(time=24) - with dask.config.set(**{"array.slicing.split_large_chunks": False}): + with dask.config.set(**{"array.slicing.split_large_chunks": False}): # type: ignore[arg-type] ds = ds.assign_coords(longitude=(ds.longitude + 180) % 360 - 180).sortby( "longitude" ) @@ -182,7 +182,7 @@ def generate_historical_daily_main( for sv in source_variables ] print("collapsing") - with dask.config.set(**{"array.slicing.split_large_chunks": False}): + with dask.config.set(**{"array.slicing.split_large_chunks": False}): # type: ignore[arg-type] ds_land = collapse_fun(*land).compute() # type: ignore[operator] ds_land = ds_land.assign(date=pd.to_datetime(ds_land.date)) diff --git a/src/climate_downscale/generate/historical_reference.py b/src/climate_downscale/generate/historical_reference.py index e68213c..8d349bb 100644 --- a/src/climate_downscale/generate/historical_reference.py +++ b/src/climate_downscale/generate/historical_reference.py @@ -31,8 +31,9 @@ def generate_historical_reference_main( reference_data.append(ds) old_encoding = { - k: v for k, v in xr.open_dataset(paths[0])["value"].encoding.items() - if k in ['dtype', '_FillValue', 'scale_factor', 'add_offset'] + k: v + for k, v in xr.open_dataset(paths[0])["value"].encoding.items() + if k in ["dtype", "_FillValue", "scale_factor", "add_offset"] } encoding_kwargs = { "zlib": True, @@ -44,7 +45,7 @@ def generate_historical_reference_main( reference = sum(reference_data) / len(reference_data) print("Saving reference data") cd_data.save_daily_results( - reference, + reference, # type: ignore[arg-type] scenario="historical", variable=target_variable, year="reference", @@ -52,7 +53,7 @@ def generate_historical_reference_main( ) -@click.command() +@click.command() # type: ignore[arg-type] @clio.with_output_directory(DEFAULT_ROOT) @with_target_variable() def generate_historical_reference_task( @@ -62,7 +63,7 @@ def generate_historical_reference_task( generate_historical_reference_main(output_dir, target_variable) -@click.command() +@click.command() # type: ignore[arg-type] @clio.with_output_directory(DEFAULT_ROOT) @with_target_variable(allow_all=True) @clio.with_queue() diff --git a/src/climate_downscale/generate/scenario_daily.py b/src/climate_downscale/generate/scenario_daily.py index 8bfad67..061ccfb 100644 --- a/src/climate_downscale/generate/scenario_daily.py +++ b/src/climate_downscale/generate/scenario_daily.py @@ -1,13 +1,11 @@ from pathlib import Path import pandas as pd -import tqdm import xarray as xr from climate_downscale.data import ClimateDownscaleData from climate_downscale.generate import utils - # Map from source variable to a unit conversion function CONVERT_MAP = { "tas": utils.kelvin_to_celsius, @@ -21,8 +19,7 @@ def load_and_shift_longitude( ) -> xr.Dataset: ds = xr.open_dataset(ds_path).sel(time=time_slice).compute() ds = ( - ds - .rename({"lat": "latitude", "lon": "longitude", "time": "date"}) + ds.rename({"lat": "latitude", "lon": "longitude", "time": "date"}) .assign_coords(longitude=(ds.longitude + 180) % 360 - 180) .sortby("longitude") ) @@ -32,7 +29,7 @@ def load_and_shift_longitude( def load_variable( member_path: str | Path, variable: str, - year: str, + year: str | int, ) -> xr.Dataset: if year == "reference": ds = load_and_shift_longitude(member_path, utils.REFERENCE_PERIOD) @@ -50,13 +47,14 @@ def load_variable( ds = conversion(utils.rename_val_column(ds)) return ds + def compute_anomaly( reference: xr.Dataset, target: xr.Dataset, anomaly_type: str ) -> xr.Dataset: if anomaly_type == "additive": anomaly = target.groupby("time.month") - reference elif anomaly_type == "multiplicative": - anomaly = (target.groupby("time.month") + 1) / (reference + 1) + anomaly = (target.groupby("time.month") + 1) / (reference + 1) # type: ignore[operator] else: msg = f"Unknown anomaly type: {anomaly_type}" raise ValueError(msg) @@ -70,6 +68,7 @@ def compute_anomaly( anomaly = utils.interpolate_to_target_latlon(anomaly) return anomaly + def generate_scenario_daily_main( output_dir: str | Path, year: str | int, @@ -83,12 +82,12 @@ def generate_scenario_daily_main( reference = load_variable(path, target_variable, "reference") target = load_variable(path, target_variable, year) - anomaly_type = TRANSFORM_MAP[target_variable][1] + anomaly_type = "additive" # TRANSFORM_MAP[target_variable][1] anomaly = compute_anomaly(reference, target, anomaly_type) cd_data.save_daily_results( anomaly, scenario=cmip_scenario, variable=target_variable, year=year, + encoding_kwargs={"zlib": True, "complevel": 1}, ) - diff --git a/src/climate_downscale/generate/utils.py b/src/climate_downscale/generate/utils.py index 366db11..eb48c91 100644 --- a/src/climate_downscale/generate/utils.py +++ b/src/climate_downscale/generate/utils.py @@ -62,7 +62,7 @@ def precipitation_flux_to_rainfall(precipitation_flux: xr.Dataset) -> xr.Dataset """ seconds_per_day = 86400 mm_per_kg_m2 = 1 - return seconds_per_day * mm_per_kg_m2 * precipitation_flux # type: ignore[no-any-return] + return seconds_per_day * mm_per_kg_m2 * precipitation_flux def scale_wind_speed_height(wind_speed_10m: xr.Dataset) -> xr.Dataset: From 3f43234ecfd7b833f679972a7515cdcae7d5f50f Mon Sep 17 00:00:00 2001 From: James Collins Date: Sat, 15 Jun 2024 18:50:22 -0700 Subject: [PATCH 48/71] typo --- src/climate_downscale/generate/historical_reference.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/climate_downscale/generate/historical_reference.py b/src/climate_downscale/generate/historical_reference.py index 6fee78d..760d015 100644 --- a/src/climate_downscale/generate/historical_reference.py +++ b/src/climate_downscale/generate/historical_reference.py @@ -23,7 +23,7 @@ def generate_historical_reference_main( reference_data = [] for path in paths: - ds = xr.load_dataset(path).groupby("time.month").mean("time") + ds = xr.load_dataset(path).groupby("date.month").mean("date") reference_data.append(ds) encoding_kwargs = xr.open_dataset(paths[0])["value"].encoding From d8ea998145826fee4f8a4ca180a1f9cdd84f346c Mon Sep 17 00:00:00 2001 From: collijk Date: Sat, 15 Jun 2024 19:25:55 -0700 Subject: [PATCH 49/71] thread through overwrite in extract cmip --- pyproject.toml | 2 ++ src/climate_downscale/extract/cmip6.py | 11 +++++++---- 2 files changed, 9 insertions(+), 4 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index cc5fee9..731596e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -98,6 +98,8 @@ ignore = [ "PYI041", # Use float instead of int | float; dumb rule "T201", # print is fine for now. "RET504", # Unnecessary assignment before return + "PLR0913", # Too many arguments in function call, hard with CLIs. + "TRY201", # ] [tool.ruff.lint.per-file-ignores] diff --git a/src/climate_downscale/extract/cmip6.py b/src/climate_downscale/extract/cmip6.py index e032658..3fe546f 100644 --- a/src/climate_downscale/extract/cmip6.py +++ b/src/climate_downscale/extract/cmip6.py @@ -1,4 +1,3 @@ -import itertools from pathlib import Path import click @@ -37,7 +36,7 @@ def extract_cmip6_main( cmip6_source: str, cmip6_experiment: str, cmip6_variable: str, - overwrite: bool, + overwrite: bool, # noqa: FBT001 ) -> None: print(f"Checking metadata for {cmip6_source} {cmip6_experiment} {cmip6_variable}") cd_data = ClimateDownscaleData(output_dir) @@ -101,9 +100,11 @@ def extract_cmip6_task( cmip6_source: str, cmip6_experiment: str, cmip6_variable: str, - overwrite: bool, + overwrite: bool, # noqa: FBT001 ) -> None: - extract_cmip6_main(output_dir, cmip6_source, cmip6_experiment, cmip6_variable, overwrite) + extract_cmip6_main( + output_dir, cmip6_source, cmip6_experiment, cmip6_variable, overwrite + ) @click.command() # type: ignore[arg-type] @@ -119,6 +120,7 @@ def extract_cmip6( cmip6_experiment: str, cmip6_variable: str, queue: str, + overwrite: bool, # noqa: FBT001 ) -> None: sources = ( clio.VALID_CMIP6_SOURCES if cmip6_source == clio.RUN_ALL else [cmip6_source] @@ -144,6 +146,7 @@ def extract_cmip6( }, task_args={ "output-dir": output_dir, + "overwrite": overwrite, }, task_resources={ "queue": queue, From e5bd086c3a7c0a5287f7b8b1f6a52892e44bb7f6 Mon Sep 17 00:00:00 2001 From: collijk Date: Sun, 16 Jun 2024 00:20:03 -0700 Subject: [PATCH 50/71] Better cmip logging --- src/climate_downscale/extract/cmip6.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/src/climate_downscale/extract/cmip6.py b/src/climate_downscale/extract/cmip6.py index 3fe546f..085637e 100644 --- a/src/climate_downscale/extract/cmip6.py +++ b/src/climate_downscale/extract/cmip6.py @@ -52,7 +52,8 @@ def extract_cmip6_main( meta_subset = meta[mask].set_index("member_id").zstore.to_dict() print(f"Extracting {len(meta_subset)} members...") - for member, zstore_path in meta_subset.items(): + for i, (member, zstore_path) in enumerate(meta_subset.items()): + item = f"{i}/{len(meta_subset)} {member}" out_path = cd_data.extracted_cmip6_path( cmip6_variable, cmip6_experiment, @@ -60,11 +61,11 @@ def extract_cmip6_main( member, ) if out_path.exists() and not overwrite: - print("Skipping", member, zstore_path) + print("Skipping", item) continue try: - print("Extracting", member, zstore_path) + print("Extracting", item) cmip_data = load_cmip_data(zstore_path) shell_tools.touch(out_path, exist_ok=True) From 8300d19c0ec25f2b775698fad8c3dec0d72eb1b6 Mon Sep 17 00:00:00 2001 From: collijk Date: Sun, 16 Jun 2024 16:26:24 -0700 Subject: [PATCH 51/71] Delete some spurious historical variables, add runner for scenarios --- src/climate_downscale/generate/__init__.py | 6 + .../generate/historical_daily.py | 31 --- .../generate/scenario_daily.py | 204 ++++++++++++++++-- 3 files changed, 192 insertions(+), 49 deletions(-) diff --git a/src/climate_downscale/generate/__init__.py b/src/climate_downscale/generate/__init__.py index 022426b..10b0563 100644 --- a/src/climate_downscale/generate/__init__.py +++ b/src/climate_downscale/generate/__init__.py @@ -6,13 +6,19 @@ generate_historical_reference, generate_historical_reference_task, ) +from climate_downscale.generate.scenario_daily import ( + generate_scenario_daily, + generate_scenario_daily_task, +) RUNNERS = { "historical_daily": generate_historical_daily, "historical_reference": generate_historical_reference, + "scenario_daily": generate_scenario_daily, } TASK_RUNNERS = { "historical_daily": generate_historical_daily_task, "historical_reference": generate_historical_reference_task, + "scenario_daily": generate_scenario_daily_task, } diff --git a/src/climate_downscale/generate/historical_daily.py b/src/climate_downscale/generate/historical_daily.py index 107f440..d1bcb2d 100644 --- a/src/climate_downscale/generate/historical_daily.py +++ b/src/climate_downscale/generate/historical_daily.py @@ -47,11 +47,6 @@ utils.daily_min, (273.15, 0.01), ), - "dewpoint_temperature": ( - ["2m_dewpoint_temperature"], - utils.daily_mean, - (273.15, 0.01), - ), "wind_speed": ( ["10m_u_component_of_wind", "10m_v_component_of_wind"], lambda x, y: utils.daily_mean(utils.vector_magnitude(x, y)), @@ -69,32 +64,6 @@ ), } -ADDITIONAL_TRANSFORM_MAP = { - "heat_index": ( - ["2m_temperature", "2m_dewpoint_temperature"], - lambda x, y: utils.daily_mean(utils.heat_index(x, y)), - (273.15, 0.01), - ), - "humidex": ( - ["2m_temperature", "2m_dewpoint_temperature"], - lambda x, y: utils.daily_mean(utils.humidex(x, y)), - (273.15, 0.01), - ), - "effective_temperature": ( - [ - "2m_temperature", - "2m_dewpoint_temperature", - "10m_u_component_of_wind", - "10m_v_component_of_wind", - ], - lambda t2m, t2d, uas, vas: utils.daily_mean( - utils.effective_temperature(t2m, t2d, uas, vas) - ), - (273.15, 0.01), - ), -} - - _P = typing.ParamSpec("_P") _T = typing.TypeVar("_T") diff --git a/src/climate_downscale/generate/scenario_daily.py b/src/climate_downscale/generate/scenario_daily.py index 061ccfb..6451d88 100644 --- a/src/climate_downscale/generate/scenario_daily.py +++ b/src/climate_downscale/generate/scenario_daily.py @@ -1,17 +1,90 @@ +import itertools +import typing from pathlib import Path +import click import pandas as pd import xarray as xr +from rra_tools import jobmon -from climate_downscale.data import ClimateDownscaleData +from climate_downscale import cli_options as clio +from climate_downscale.data import DEFAULT_ROOT, ClimateDownscaleData from climate_downscale.generate import utils +VALID_YEARS = [str(y) for y in range(max(utils.REFERENCE_YEARS) + 1, 2101)] + # Map from source variable to a unit conversion function CONVERT_MAP = { + "uas": utils.scale_wind_speed_height, + "vas": utils.scale_wind_speed_height, + "hurs": utils.identity, "tas": utils.kelvin_to_celsius, + "tasmin": utils.kelvin_to_celsius, + "tasmax": utils.kelvin_to_celsius, "pr": utils.precipitation_flux_to_rainfall, } +# Map from target variable to: +# - a list of source variables +# - a transformation function +# - a tuple of offset and scale factors for the output for serialization +# - an anomaly type +TRANSFORM_MAP = { + "mean_temperature": ( + ["tas"], + utils.identity, + (273.15, 0.01), + "additive", + ), + "max_temperature": ( + ["tasmax"], + utils.identity, + (273.15, 0.01), + "additive", + ), + "min_temperature": ( + ["tasmin"], + utils.identity, + (273.15, 0.01), + "additive", + ), + "wind_speed": ( + ["uas", "vas"], + utils.vector_magnitude, + (0, 0.01), + "multiplicative", + ), + "relative_humidity": ( + ["hurs"], + utils.identity, + (0, 0.01), + "multiplicative", + ), + "total_precipitation": ( + ["pr"], + utils.identity, + (0, 0.1), + "multiplicative", + ), +} + + +_P = typing.ParamSpec("_P") +_T = typing.TypeVar("_T") + + +def with_target_variable( + *, + allow_all: bool = False, +) -> clio.ClickOption[_P, _T]: + return clio.with_choice( + "target-variable", + "t", + allow_all=allow_all, + choices=list(TRANSFORM_MAP.keys()), + help="Variable to generate.", + ) + def load_and_shift_longitude( ds_path: str | Path, @@ -33,7 +106,6 @@ def load_variable( ) -> xr.Dataset: if year == "reference": ds = load_and_shift_longitude(member_path, utils.REFERENCE_PERIOD) - ds = ds.groupby("date.month").mean("date") else: time_slice = slice(f"{year}-01-01", f"{year}-12-31") time_range = pd.date_range(f"{year}-01-01", f"{year}-12-31") @@ -65,7 +137,6 @@ def compute_anomaly( .assign_coords(longitude=(anomaly.longitude + 180) % 360 - 180) .sortby("longitude") ) - anomaly = utils.interpolate_to_target_latlon(anomaly) return anomaly @@ -73,21 +144,118 @@ def generate_scenario_daily_main( output_dir: str | Path, year: str | int, target_variable: str, - cmip_scenario: str, + cmip6_experiment: str, ) -> None: cd_data = ClimateDownscaleData(output_dir) - paths = cd_data.extracted_cmip6.glob(f"{target_variable}_{cmip_scenario}*.nc") - - for path in paths: - reference = load_variable(path, target_variable, "reference") - target = load_variable(path, target_variable, year) - - anomaly_type = "additive" # TRANSFORM_MAP[target_variable][1] - anomaly = compute_anomaly(reference, target, anomaly_type) - cd_data.save_daily_results( - anomaly, - scenario=cmip_scenario, - variable=target_variable, - year=year, - encoding_kwargs={"zlib": True, "complevel": 1}, + + (source_variables, transform_fun, (e_offset, e_scale), anomaly_type) = ( + TRANSFORM_MAP[target_variable] + ) + + paths_by_var = [ + list(cd_data.extracted_cmip6.glob(f"{source_variable}_{cmip6_experiment}*.nc")) + for source_variable in source_variables + ] + source_paths = list(zip(*paths_by_var, strict=True)) + + historical_reference = cd_data.load_daily_results( + scenario="historical", + variable=target_variable, + year="reference", + ) + + scale = 1 / len(source_paths) + anomaly = xr.zeros_like(historical_reference) + for sps in source_paths: + scenario_reference = transform_fun( # type: ignore[operator] + *[load_variable(sp, target_variable, "reference") for sp in sps] ) + target = transform_fun( # type: ignore[operator] + *[load_variable(sp, target_variable, year) for sp in sps] + ) + s_anomaly = scale * compute_anomaly(scenario_reference, target, anomaly_type) + anomaly += utils.interpolate_to_target_latlon(s_anomaly) + + scenario_data = historical_reference + anomaly + cd_data.save_daily_results( + scenario_data, + scenario=cmip6_experiment, + variable=target_variable, + year=year, + encoding_kwargs={ + "add_offset": e_offset, + "scale_factor": e_scale, + }, + ) + + +@click.command() # type: ignore[arg-type] +@clio.with_output_directory(DEFAULT_ROOT) +@clio.with_year(years=VALID_YEARS) +@with_target_variable() +@clio.with_cmip6_experiment() +def generate_scenario_daily_task( + output_dir: str, year: str, target_variable: str, cmip6_experiment: str +) -> None: + generate_scenario_daily_main(output_dir, year, target_variable, cmip6_experiment) + + +@click.command() # type: ignore[arg-type] +@clio.with_output_directory(DEFAULT_ROOT) +@clio.with_year(years=VALID_YEARS, allow_all=True) +@with_target_variable(allow_all=True) +@clio.with_cmip6_experiment(allow_all=True) +@clio.with_queue() +@clio.with_overwrite() +def generate_scenario_daily( + output_dir: str, + year: str, + target_variable: str, + cmip6_experiment: str, + queue: str, + overwrite: bool, # noqa: FBT001 +) -> None: + cd_data = ClimateDownscaleData(output_dir) + + years = VALID_YEARS if year == clio.RUN_ALL else [year] + variables = ( + list(TRANSFORM_MAP.keys()) + if target_variable == clio.RUN_ALL + else [target_variable] + ) + experiments = ( + list(clio.VALID_CMIP6_EXPERIMENTS) + if cmip6_experiment == clio.RUN_ALL + else [cmip6_experiment] + ) + + yve = [] + complete = [] + for y, v, e in itertools.product(years, variables, experiments): + path = cd_data.daily_results_path(y, v, e) + if not path.exists() or overwrite: + yve.append((y, v, e)) + else: + complete.append((y, v, e)) + + print(f"{len(complete)} tasks already done. " f"Launching {len(yve)} tasks") + + jobmon.run_parallel( + runner="cdtask", + task_name="generate scenario_daily", + flat_node_args=( + ("year", "target-variable", "cmip-experiment"), + yve, + ), + task_args={ + "output-dir": output_dir, + }, + task_resources={ + "queue": queue, + "cores": 5, + "memory": "200G", + "runtime": "240m", + "project": "proj_rapidresponse", + }, + max_attempts=1, + ) From 7a8edc9b8de552a2e8ae3bedc64d732dac911621 Mon Sep 17 00:00:00 2001 From: James Collins Date: Sun, 16 Jun 2024 16:28:19 -0700 Subject: [PATCH 52/71] Fix overwrite --- src/climate_downscale/extract/cmip6.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/climate_downscale/extract/cmip6.py b/src/climate_downscale/extract/cmip6.py index 3fe546f..590f3ff 100644 --- a/src/climate_downscale/extract/cmip6.py +++ b/src/climate_downscale/extract/cmip6.py @@ -136,6 +136,8 @@ def extract_cmip6( else [cmip6_variable] ) + overwrite_arg = {"overwrite": None} if overwrite else {} + jobmon.run_parallel( runner="cdtask", task_name="extract cmip6", @@ -146,7 +148,7 @@ def extract_cmip6( }, task_args={ "output-dir": output_dir, - "overwrite": overwrite, + **overwrite_arg, }, task_resources={ "queue": queue, From 164debe3dc45f0c2e2517fafef51bd23f0dc62ed Mon Sep 17 00:00:00 2001 From: collijk Date: Sun, 16 Jun 2024 16:34:48 -0700 Subject: [PATCH 53/71] Add logging, linear interp for anomaly, and multiplicative anomaly application --- .../generate/scenario_daily.py | 19 ++++++++++++++++--- src/climate_downscale/generate/utils.py | 3 ++- 2 files changed, 18 insertions(+), 4 deletions(-) diff --git a/src/climate_downscale/generate/scenario_daily.py b/src/climate_downscale/generate/scenario_daily.py index 6451d88..6ea39db 100644 --- a/src/climate_downscale/generate/scenario_daily.py +++ b/src/climate_downscale/generate/scenario_daily.py @@ -158,25 +158,38 @@ def generate_scenario_daily_main( ] source_paths = list(zip(*paths_by_var, strict=True)) + print("loading historical reference") historical_reference = cd_data.load_daily_results( scenario="historical", variable=target_variable, year="reference", ) + print("Making memory buffer") scale = 1 / len(source_paths) anomaly = xr.zeros_like(historical_reference) - for sps in source_paths: + for i, sps in enumerate(source_paths): + pid = f"{i}/{len(source_paths)}" + print(f"{pid}: Loading reference") scenario_reference = transform_fun( # type: ignore[operator] *[load_variable(sp, target_variable, "reference") for sp in sps] ) + print(f"{pid}: Loading target") target = transform_fun( # type: ignore[operator] *[load_variable(sp, target_variable, year) for sp in sps] ) + print(f"{pid}: computing anomaly") s_anomaly = scale * compute_anomaly(scenario_reference, target, anomaly_type) - anomaly += utils.interpolate_to_target_latlon(s_anomaly) + print(f"{pid}: downscaling anomaly") + anomaly += utils.interpolate_to_target_latlon(s_anomaly, method="linear") - scenario_data = historical_reference + anomaly + print("Computing scenario data") + if anomaly_type == "additive": + scenario_data = historical_reference + anomaly + else: + scenario_data = historical_reference * anomaly + + print("Saving") cd_data.save_daily_results( scenario_data, scenario=cmip6_experiment, diff --git a/src/climate_downscale/generate/utils.py b/src/climate_downscale/generate/utils.py index eb48c91..75fc0ad 100644 --- a/src/climate_downscale/generate/utils.py +++ b/src/climate_downscale/generate/utils.py @@ -288,9 +288,10 @@ def rename_val_column(ds: xr.Dataset) -> xr.Dataset: def interpolate_to_target_latlon( ds: xr.Dataset, + method: str = "nearest", ) -> xr.Dataset: return ( - ds.interp(longitude=TARGET_LON, latitude=TARGET_LAT, method="nearest") + ds.interp(longitude=TARGET_LON, latitude=TARGET_LAT, method=method) # type: ignore[arg-type] .interpolate_na(dim="longitude", method="nearest", fill_value="extrapolate") .interpolate_na(dim="latitude", method="nearest", fill_value="extrapolate") ) From 6770d88e046d39640f28c05ad1e1c49472dcdfa9 Mon Sep 17 00:00:00 2001 From: collijk Date: Sun, 16 Jun 2024 16:36:59 -0700 Subject: [PATCH 54/71] Reorder load and shift longitude ops --- src/climate_downscale/generate/scenario_daily.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/climate_downscale/generate/scenario_daily.py b/src/climate_downscale/generate/scenario_daily.py index 6ea39db..fc5f870 100644 --- a/src/climate_downscale/generate/scenario_daily.py +++ b/src/climate_downscale/generate/scenario_daily.py @@ -92,9 +92,9 @@ def load_and_shift_longitude( ) -> xr.Dataset: ds = xr.open_dataset(ds_path).sel(time=time_slice).compute() ds = ( - ds.rename({"lat": "latitude", "lon": "longitude", "time": "date"}) - .assign_coords(longitude=(ds.longitude + 180) % 360 - 180) - .sortby("longitude") + ds.assign_coords(lon=(ds.lon + 180) % 360 - 180) + .sortby("lon") + .rename({"lat": "latitude", "lon": "longitude", "time": "date"}) ) return ds From 892d1ed35a34e2df976eff146abeb0f65e7bfdd1 Mon Sep 17 00:00:00 2001 From: collijk Date: Sun, 16 Jun 2024 16:39:45 -0700 Subject: [PATCH 55/71] Infer variable from dataset --- src/climate_downscale/generate/scenario_daily.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/climate_downscale/generate/scenario_daily.py b/src/climate_downscale/generate/scenario_daily.py index fc5f870..ac349a2 100644 --- a/src/climate_downscale/generate/scenario_daily.py +++ b/src/climate_downscale/generate/scenario_daily.py @@ -101,7 +101,6 @@ def load_and_shift_longitude( def load_variable( member_path: str | Path, - variable: str, year: str | int, ) -> xr.Dataset: if year == "reference": @@ -115,6 +114,7 @@ def load_variable( .interp(date=time_range) .interpolate_na(dim="date", method="nearest", fill_value="extrapolate") ) + variable = str(next(iter(ds))) conversion = CONVERT_MAP[variable] ds = conversion(utils.rename_val_column(ds)) return ds @@ -172,11 +172,11 @@ def generate_scenario_daily_main( pid = f"{i}/{len(source_paths)}" print(f"{pid}: Loading reference") scenario_reference = transform_fun( # type: ignore[operator] - *[load_variable(sp, target_variable, "reference") for sp in sps] + *[load_variable(sp, "reference") for sp in sps] ) print(f"{pid}: Loading target") target = transform_fun( # type: ignore[operator] - *[load_variable(sp, target_variable, year) for sp in sps] + *[load_variable(sp, year) for sp in sps] ) print(f"{pid}: computing anomaly") s_anomaly = scale * compute_anomaly(scenario_reference, target, anomaly_type) From 5c97e1226178d4075657259e9500ccfb1a6e0cad Mon Sep 17 00:00:00 2001 From: collijk Date: Sun, 16 Jun 2024 16:44:39 -0700 Subject: [PATCH 56/71] Need call to interp calendar --- src/climate_downscale/generate/scenario_daily.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/climate_downscale/generate/scenario_daily.py b/src/climate_downscale/generate/scenario_daily.py index ac349a2..e11dd69 100644 --- a/src/climate_downscale/generate/scenario_daily.py +++ b/src/climate_downscale/generate/scenario_daily.py @@ -111,7 +111,7 @@ def load_variable( ds = load_and_shift_longitude(member_path, time_slice) ds = ( ds.assign_coords(date=ds.date.dt.floor("D")) - .interp(date=time_range) + .interp_calendar(time_range, dim="date") .interpolate_na(dim="date", method="nearest", fill_value="extrapolate") ) variable = str(next(iter(ds))) From a5bef4fc18b429db5a08dcb9450e3c7f591a9fb6 Mon Sep 17 00:00:00 2001 From: James Collins Date: Mon, 17 Jun 2024 15:30:36 -0700 Subject: [PATCH 57/71] Lots of fidling to get things to work --- .../generate/scenario_daily.py | 111 +++++++++++------- src/climate_downscale/generate/utils.py | 4 +- 2 files changed, 74 insertions(+), 41 deletions(-) diff --git a/src/climate_downscale/generate/scenario_daily.py b/src/climate_downscale/generate/scenario_daily.py index e11dd69..6fc2590 100644 --- a/src/climate_downscale/generate/scenario_daily.py +++ b/src/climate_downscale/generate/scenario_daily.py @@ -1,8 +1,10 @@ import itertools import typing from pathlib import Path +from collections import defaultdict import click +import numpy as np import pandas as pd import xarray as xr from rra_tools import jobmon @@ -91,10 +93,13 @@ def load_and_shift_longitude( time_slice: slice, ) -> xr.Dataset: ds = xr.open_dataset(ds_path).sel(time=time_slice).compute() + if ds.time.size == 0: + msg = 'No data in slice' + raise KeyError(msg) ds = ( ds.assign_coords(lon=(ds.lon + 180) % 360 - 180) .sortby("lon") - .rename({"lat": "latitude", "lon": "longitude", "time": "date"}) + .rename({"lat": "latitude", "lon": "longitude"}) ) return ds @@ -104,15 +109,16 @@ def load_variable( year: str | int, ) -> xr.Dataset: if year == "reference": - ds = load_and_shift_longitude(member_path, utils.REFERENCE_PERIOD) + ds = load_and_shift_longitude(member_path, utils.REFERENCE_PERIOD).rename({"time": "date"}) else: time_slice = slice(f"{year}-01-01", f"{year}-12-31") time_range = pd.date_range(f"{year}-01-01", f"{year}-12-31") ds = load_and_shift_longitude(member_path, time_slice) ds = ( - ds.assign_coords(date=ds.date.dt.floor("D")) - .interp_calendar(time_range, dim="date") - .interpolate_na(dim="date", method="nearest", fill_value="extrapolate") + ds.assign_coords(time=ds.time.dt.floor("D")) + .interp_calendar(time_range) + .interpolate_na(dim="time", method="nearest", fill_value="extrapolate") + .rename({"time": "date"}) ) variable = str(next(iter(ds))) conversion = CONVERT_MAP[variable] @@ -123,20 +129,15 @@ def load_variable( def compute_anomaly( reference: xr.Dataset, target: xr.Dataset, anomaly_type: str ) -> xr.Dataset: + reference = reference.groupby("date.month").mean("date") if anomaly_type == "additive": - anomaly = target.groupby("time.month") - reference + anomaly = target.groupby("date.month") - reference elif anomaly_type == "multiplicative": - anomaly = (target.groupby("time.month") + 1) / (reference + 1) # type: ignore[operator] + anomaly = (target + 1).groupby("date.month") / (reference + 1) # type: ignore[operator] else: msg = f"Unknown anomaly type: {anomaly_type}" raise ValueError(msg) - - anomaly = ( - anomaly.drop_vars("month") - .rename({"lat": "latitude", "lon": "longitude", "time": "date"}) - .assign_coords(longitude=(anomaly.longitude + 180) % 360 - 180) - .sortby("longitude") - ) + anomaly = anomaly.drop_vars("month") return anomaly @@ -152,11 +153,24 @@ def generate_scenario_daily_main( TRANSFORM_MAP[target_variable] ) - paths_by_var = [ - list(cd_data.extracted_cmip6.glob(f"{source_variable}_{cmip6_experiment}*.nc")) - for source_variable in source_variables + models_by_var = {} + for source_variable in source_variables: + model_vars = set([ + p.stem.split(f"{cmip6_experiment}_")[1] + for p in cd_data.extracted_cmip6.glob(f"{source_variable}_{cmip6_experiment}*.nc") + ]) + models_by_var[source_variable] = model_vars + + shared_models = set.intersection(*models_by_var.values()) + for var, models in models_by_var.items(): + extra_models = models.difference(shared_models) + if extra_models: + print(var, extra_models) + source_paths = [ + [cd_data.extracted_cmip6 / f'{source_variable}_{cmip6_experiment}_{model}.nc' + for source_variable in source_variables] + for model in sorted(shared_models) ] - source_paths = list(zip(*paths_by_var, strict=True)) print("loading historical reference") historical_reference = cd_data.load_daily_results( @@ -165,30 +179,48 @@ def generate_scenario_daily_main( year="reference", ) - print("Making memory buffer") - scale = 1 / len(source_paths) - anomaly = xr.zeros_like(historical_reference) + anomalies = {} + source_paths = source_paths for i, sps in enumerate(source_paths): - pid = f"{i}/{len(source_paths)}" + pid = f"{i+1}/{len(source_paths)} {sps[0].stem}" print(f"{pid}: Loading reference") - scenario_reference = transform_fun( # type: ignore[operator] - *[load_variable(sp, "reference") for sp in sps] - ) - print(f"{pid}: Loading target") - target = transform_fun( # type: ignore[operator] - *[load_variable(sp, year) for sp in sps] - ) + try: + scenario_reference = transform_fun( # type: ignore[operator] + *[load_variable(sp, "reference") for sp in sps] + ) + print(f"{pid}: Loading target") + target = transform_fun( # type: ignore[operator] + *[load_variable(sp, year) for sp in sps] + ) + except KeyError: + print(f"{pid}: Bad formatting, skipping...") + continue print(f"{pid}: computing anomaly") - s_anomaly = scale * compute_anomaly(scenario_reference, target, anomaly_type) - print(f"{pid}: downscaling anomaly") - anomaly += utils.interpolate_to_target_latlon(s_anomaly, method="linear") + s_anomaly = compute_anomaly(scenario_reference, target, anomaly_type) + key = f"{len(s_anomaly.latitude)}_{len(s_anomaly.longitude)}" + old = anomalies.get(key, 0) + if old: + for coord in ['latitude', 'longitude']: + old_c = old[coord].to_numpy() + new_c = s_anomaly[coord].to_numpy() + if np.abs(old_c - new_c).max() < 1e-5: + s_anomaly = s_anomaly.assign(**{coord: old_c}) + else: + msg = f"{coord} does not match despite having the same subdivision" + raise ValueError(msg) + anomalies[key] = old + s_anomaly + anomaly = 0 + for i, (k, v) in enumerate(anomalies.items()): + print(f"Downscaling {i+1}/{len(anomalies)}: {k}") + anomaly += utils.interpolate_to_target_latlon(v, method="linear") + anomaly /= len(source_paths) print("Computing scenario data") if anomaly_type == "additive": - scenario_data = historical_reference + anomaly + scenario_data = historical_reference + anomaly.groupby('date.month') else: - scenario_data = historical_reference * anomaly - + scenario_data = historical_reference * anomaly.groupby('date.month') + scenario_data = scenario_data.drop_vars('month') print("Saving") cd_data.save_daily_results( scenario_data, @@ -245,19 +277,18 @@ def generate_scenario_daily( yve = [] complete = [] for y, v, e in itertools.product(years, variables, experiments): - path = cd_data.daily_results_path(y, v, e) + path = cd_data.daily_results_path(scenario=e, variable=v, year=y) if not path.exists() or overwrite: yve.append((y, v, e)) else: complete.append((y, v, e)) print(f"{len(complete)} tasks already done. " f"Launching {len(yve)} tasks") - jobmon.run_parallel( runner="cdtask", task_name="generate scenario_daily", flat_node_args=( - ("year", "target-variable", "cmip-experiment"), + ("year", "target-variable", "cmip6-experiment"), yve, ), task_args={ @@ -266,8 +297,8 @@ def generate_scenario_daily( task_resources={ "queue": queue, "cores": 5, - "memory": "200G", - "runtime": "240m", + "memory": "120G", + "runtime": "400m", "project": "proj_rapidresponse", }, max_attempts=1, diff --git a/src/climate_downscale/generate/utils.py b/src/climate_downscale/generate/utils.py index 75fc0ad..8ed5cb0 100644 --- a/src/climate_downscale/generate/utils.py +++ b/src/climate_downscale/generate/utils.py @@ -7,7 +7,7 @@ np.round(np.arange(-180.0, 180.0, 0.1, dtype="float32"), 1), dims="longitude" ) TARGET_LAT = xr.DataArray( - np.round(np.arange(90.0, -90.1, -0.1, dtype="float32"), 1), dims="latitude" + np.round(np.arange(-90.0, 90.1, 0.1, dtype="float32"), 1), dims="latitude" ) ############################# @@ -293,5 +293,7 @@ def interpolate_to_target_latlon( return ( ds.interp(longitude=TARGET_LON, latitude=TARGET_LAT, method=method) # type: ignore[arg-type] .interpolate_na(dim="longitude", method="nearest", fill_value="extrapolate") + .sortby('latitude') .interpolate_na(dim="latitude", method="nearest", fill_value="extrapolate") + .sortby('latitude', ascending=False) ) From 71a81b925c8021782014a4894810c5d2c88c720b Mon Sep 17 00:00:00 2001 From: collijk Date: Mon, 17 Jun 2024 17:09:22 -0700 Subject: [PATCH 58/71] Add annual scenario --- src/climate_downscale/data.py | 31 ++- src/climate_downscale/generate/__init__.py | 6 + .../generate/scenario_annual.py | 255 ++++++++++++++++++ .../generate/scenario_daily.py | 58 ++-- src/climate_downscale/generate/utils.py | 68 +++-- 5 files changed, 372 insertions(+), 46 deletions(-) create mode 100644 src/climate_downscale/generate/scenario_annual.py diff --git a/src/climate_downscale/data.py b/src/climate_downscale/data.py index f27355d..cffdf82 100644 --- a/src/climate_downscale/data.py +++ b/src/climate_downscale/data.py @@ -52,7 +52,9 @@ def load_cmip6_metadata(self) -> pd.DataFrame: meta.to_parquet(meta_path) return pd.read_parquet(meta_path) - def extracted_cmip6_path(self, variable: str, experiment: str, source: str, member: str) -> Path: + def extracted_cmip6_path( + self, variable: str, experiment: str, source: str, member: str + ) -> Path: return self.extracted_cmip6 / f"{variable}_{experiment}_{source}_{member}.nc" @property @@ -157,6 +159,33 @@ def load_daily_results( results_path = self.daily_results_path(scenario, variable, year) return xr.open_dataset(results_path) + @property + def annual_results(self) -> Path: + return self.results / "annual" + + def annual_results_path(self, scenario: str, variable: str) -> Path: + return self.annual_results / scenario / f"{variable}.nc" + + def save_annual_results( + self, + results_ds: xr.Dataset, + scenario: str, + variable: str, + encoding_kwargs: dict[str, Any], + ) -> None: + path = self.annual_results_path(scenario, variable) + mkdir(path.parent, exist_ok=True, parents=True) + touch(path, exist_ok=True) + + encoding = { + "dtype": "int16", + "_FillValue": -32767, + "zlib": True, + "complevel": 1, + } + encoding.update(encoding_kwargs) + results_ds.to_netcdf(path, encoding={"value": encoding}) + def save_raster( raster: rt.RasterArray, diff --git a/src/climate_downscale/generate/__init__.py b/src/climate_downscale/generate/__init__.py index 10b0563..4f4afa8 100644 --- a/src/climate_downscale/generate/__init__.py +++ b/src/climate_downscale/generate/__init__.py @@ -6,6 +6,10 @@ generate_historical_reference, generate_historical_reference_task, ) +from climate_downscale.generate.scenario_annual import ( + generate_scenario_annual, + generate_scenario_annual_task, +) from climate_downscale.generate.scenario_daily import ( generate_scenario_daily, generate_scenario_daily_task, @@ -15,10 +19,12 @@ "historical_daily": generate_historical_daily, "historical_reference": generate_historical_reference, "scenario_daily": generate_scenario_daily, + "scenario_annual": generate_scenario_annual, } TASK_RUNNERS = { "historical_daily": generate_historical_daily_task, "historical_reference": generate_historical_reference_task, "scenario_daily": generate_scenario_daily_task, + "scenario_annual": generate_scenario_annual_task, } diff --git a/src/climate_downscale/generate/scenario_annual.py b/src/climate_downscale/generate/scenario_annual.py new file mode 100644 index 0000000..55aa233 --- /dev/null +++ b/src/climate_downscale/generate/scenario_annual.py @@ -0,0 +1,255 @@ +import itertools +import typing +from pathlib import Path + +import click +import xarray as xr +from rra_tools import jobmon + +from climate_downscale import cli_options as clio +from climate_downscale.data import DEFAULT_ROOT, ClimateDownscaleData +from climate_downscale.generate import utils +from climate_downscale.generate.scenario_daily import VALID_YEARS + +YEARS = { + "historical": clio.VALID_YEARS, + "scenario": VALID_YEARS, +} +TEMP_THRESHOLDS = list(range(20, 35)) + + +class Transform: + def __init__( + self, + source_variables: list[str], + transform_funcs: list[typing.Callable[..., xr.Dataset]] = [utils.annual_mean], # noqa: B006 + encoding_scale: float = 1.0, + encoding_offset: float = 0.0, + ): + self.source_variables = source_variables + self.transform_funcs = transform_funcs + self.encoding_scale = encoding_scale + self.encoding_offset = encoding_offset + + def __iter__(self) -> typing.Iterator[str]: + return iter(self.source_variables) + + def __call__(self, *datasets: xr.Dataset) -> xr.Dataset: + res = self.transform_funcs[0](*datasets) + for transform_func in self.transform_funcs[1:]: + res = transform_func(res) + return res + + @property + def encoding_kwargs(self) -> dict[str, float]: + return {"add_offset": self.encoding_offset, "scale_factor": self.encoding_scale} + + +TRANSFORM_MAP = { + "mean_temperature": Transform( + source_variables=["mean_temperature"], + encoding_scale=0.01, + encoding_offset=273.15, + ), + "mean_high_temperature": Transform( + source_variables=["max_temperature"], + encoding_scale=0.01, + encoding_offset=273.15, + ), + "mean_low_temperature": Transform( + source_variables=["min_temperature"], + encoding_scale=0.01, + encoding_offset=273.15, + ), + **{ + f"days_over_{temp}C": Transform( + source_variables=["mean_temperature"], + transform_funcs=[utils.count_threshold(temp), utils.annual_sum], + ) + for temp in TEMP_THRESHOLDS + }, + "mean_heat_index": Transform( + source_variables=["mean_temperature", "relative_humidity"], + transform_funcs=[utils.heat_index, utils.annual_mean], + encoding_scale=0.01, + encoding_offset=273.15, + ), + **{ + f"days_over_{temp}C_heat_index": Transform( + source_variables=["mean_temperature", "relative_humidity"], + transform_funcs=[ + utils.heat_index, + utils.count_threshold(temp), + utils.annual_sum, + ], + ) + for temp in TEMP_THRESHOLDS + }, + "mean_humidex": Transform( + source_variables=["mean_temperature", "relative_humidity"], + transform_funcs=[utils.humidex, utils.annual_mean], + encoding_scale=0.01, + encoding_offset=273.15, + ), + **{ + f"days_over_{temp}C_humidex": Transform( + source_variables=["mean_temperature", "relative_humidity"], + transform_funcs=[ + utils.humidex, + utils.count_threshold(temp), + utils.annual_sum, + ], + ) + for temp in TEMP_THRESHOLDS + }, + "mean_effective_temperature": Transform( + source_variables=["mean_temperature", "relative_humidity", "wind_speed"], + transform_funcs=[utils.effective_temperature, utils.annual_mean], + encoding_scale=0.01, + encoding_offset=273.15, + ), + **{ + f"days_over_{temp}C_effective_temperature": Transform( + source_variables=["mean_temperature", "relative_humidity", "wind_speed"], + transform_funcs=[ + utils.effective_temperature, + utils.count_threshold(temp), + utils.annual_sum, + ], + ) + for temp in TEMP_THRESHOLDS + }, + "wind_speed": Transform( + source_variables=["wind_speed"], + encoding_scale=0.01, + ), + "relative_humidity": Transform( + source_variables=["relative_humidity"], + encoding_scale=0.01, + ), + "total_precipitation": Transform( + source_variables=["total_precipitation"], + transform_funcs=[utils.annual_sum], + encoding_scale=0.1, + ), +} + + +_P = typing.ParamSpec("_P") +_T = typing.TypeVar("_T") + + +def with_target_variable( + *, + allow_all: bool = False, +) -> clio.ClickOption[_P, _T]: + return clio.with_choice( + "target-variable", + "t", + allow_all=allow_all, + choices=list(TRANSFORM_MAP.keys()), + help="Variable to generate.", + ) + + +def generate_scenario_annual_main( + output_dir: str | Path, + target_variable: str, + scenario: str, +) -> None: + cd_data = ClimateDownscaleData(output_dir) + + transform = TRANSFORM_MAP[target_variable] + + annual_data = [] + for scenario_label, year_list in YEARS.items(): + scenario_label = scenario if scenario_label == "scenario" else "historical" # noqa: PLW2901 + for year in year_list: + print(f"Loading {scenario_label} {year} data for {target_variable}") + ds = transform( + *[ + cd_data.load_daily_results(scenario_label, source_variable, year) + for source_variable in transform + ] + ) + annual_data.append(ds) + + annual_ds = xr.concat(annual_data, dim="year") + cd_data.save_annual_results( + annual_ds, + scenario=scenario, + variable=target_variable, + encoding_kwargs=transform.encoding_kwargs, + ) + + +@click.command() # type: ignore[arg-type] +@clio.with_output_directory(DEFAULT_ROOT) +@with_target_variable() +@clio.with_cmip6_experiment() +def generate_scenario_annual_task( + output_dir: str, + target_variable: str, + cmip6_experiment: str, +) -> None: + generate_scenario_annual_main(output_dir, target_variable, cmip6_experiment) + + +@click.command() # type: ignore[arg-type] +@clio.with_output_directory(DEFAULT_ROOT) +@with_target_variable(allow_all=True) +@clio.with_cmip6_experiment(allow_all=True) +@clio.with_queue() +@clio.with_overwrite() +def generate_scenario_annual( + output_dir: str, + target_variable: str, + cmip6_experiment: str, + queue: str, + overwrite: bool, # noqa: FBT001 +) -> None: + cd_data = ClimateDownscaleData(output_dir) + + variables = ( + list(TRANSFORM_MAP.keys()) + if target_variable == clio.RUN_ALL + else [target_variable] + ) + experiments = ( + list(clio.VALID_CMIP6_EXPERIMENTS) + if cmip6_experiment == clio.RUN_ALL + else [cmip6_experiment] + ) + + ve = [] + complete = [] + for v, e in itertools.product(variables, experiments): + path = cd_data.annual_results_path(scenario=e, variable=v) + if not path.exists() or overwrite: + ve.append((v, e)) + else: + complete.append((v, e)) + + print(f"{len(complete)} tasks already done. {len(ve)} tasks to do.") + if not ve: + return + + jobmon.run_parallel( + runner="cdtask", + task_name="generate scenario_daily", + flat_node_args=( + ("target-variable", "cmip6-experiment"), + ve, + ), + task_args={ + "output-dir": output_dir, + }, + task_resources={ + "queue": queue, + "cores": 5, + "memory": "120G", + "runtime": "400m", + "project": "proj_rapidresponse", + }, + max_attempts=1, + ) diff --git a/src/climate_downscale/generate/scenario_daily.py b/src/climate_downscale/generate/scenario_daily.py index 6fc2590..6ec2b2a 100644 --- a/src/climate_downscale/generate/scenario_daily.py +++ b/src/climate_downscale/generate/scenario_daily.py @@ -1,7 +1,6 @@ import itertools import typing from pathlib import Path -from collections import defaultdict import click import numpy as np @@ -94,7 +93,7 @@ def load_and_shift_longitude( ) -> xr.Dataset: ds = xr.open_dataset(ds_path).sel(time=time_slice).compute() if ds.time.size == 0: - msg = 'No data in slice' + msg = "No data in slice" raise KeyError(msg) ds = ( ds.assign_coords(lon=(ds.lon + 180) % 360 - 180) @@ -109,7 +108,9 @@ def load_variable( year: str | int, ) -> xr.Dataset: if year == "reference": - ds = load_and_shift_longitude(member_path, utils.REFERENCE_PERIOD).rename({"time": "date"}) + ds = load_and_shift_longitude(member_path, utils.REFERENCE_PERIOD).rename( + {"time": "date"} + ) else: time_slice = slice(f"{year}-01-01", f"{year}-12-31") time_range = pd.date_range(f"{year}-01-01", f"{year}-12-31") @@ -133,7 +134,7 @@ def compute_anomaly( if anomaly_type == "additive": anomaly = target.groupby("date.month") - reference elif anomaly_type == "multiplicative": - anomaly = (target + 1).groupby("date.month") / (reference + 1) # type: ignore[operator] + anomaly = (target + 1).groupby("date.month") / (reference + 1) else: msg = f"Unknown anomaly type: {anomaly_type}" raise ValueError(msg) @@ -141,7 +142,7 @@ def compute_anomaly( return anomaly -def generate_scenario_daily_main( +def generate_scenario_daily_main( # noqa: C901, PLR0912, PLR0915 output_dir: str | Path, year: str | int, target_variable: str, @@ -155,10 +156,12 @@ def generate_scenario_daily_main( models_by_var = {} for source_variable in source_variables: - model_vars = set([ + model_vars = { p.stem.split(f"{cmip6_experiment}_")[1] - for p in cd_data.extracted_cmip6.glob(f"{source_variable}_{cmip6_experiment}*.nc") - ]) + for p in cd_data.extracted_cmip6.glob( + f"{source_variable}_{cmip6_experiment}*.nc" + ) + } models_by_var[source_variable] = model_vars shared_models = set.intersection(*models_by_var.values()) @@ -167,8 +170,10 @@ def generate_scenario_daily_main( if extra_models: print(var, extra_models) source_paths = [ - [cd_data.extracted_cmip6 / f'{source_variable}_{cmip6_experiment}_{model}.nc' - for source_variable in source_variables] + [ + cd_data.extracted_cmip6 / f"{source_variable}_{cmip6_experiment}_{model}.nc" + for source_variable in source_variables + ] for model in sorted(shared_models) ] @@ -179,8 +184,7 @@ def generate_scenario_daily_main( year="reference", ) - anomalies = {} - source_paths = source_paths + anomalies: dict[str, xr.Dataset] = {} for i, sps in enumerate(source_paths): pid = f"{i+1}/{len(source_paths)} {sps[0].stem}" print(f"{pid}: Loading reference") @@ -198,29 +202,37 @@ def generate_scenario_daily_main( print(f"{pid}: computing anomaly") s_anomaly = compute_anomaly(scenario_reference, target, anomaly_type) key = f"{len(s_anomaly.latitude)}_{len(s_anomaly.longitude)}" - old = anomalies.get(key, 0) - if old: - for coord in ['latitude', 'longitude']: + + if key in anomalies: + old = anomalies[key] + for coord in ["latitude", "longitude"]: old_c = old[coord].to_numpy() new_c = s_anomaly[coord].to_numpy() - if np.abs(old_c - new_c).max() < 1e-5: - s_anomaly = s_anomaly.assign(**{coord: old_c}) + tol = 1e-5 + if np.abs(old_c - new_c).max() < tol: + s_anomaly = s_anomaly.assign({coord: old_c}) else: msg = f"{coord} does not match despite having the same subdivision" raise ValueError(msg) - anomalies[key] = old + s_anomaly - anomaly = 0 + anomalies[key] = old + s_anomaly + else: + anomalies[key] = s_anomaly + + anomaly = xr.Dataset() for i, (k, v) in enumerate(anomalies.items()): print(f"Downscaling {i+1}/{len(anomalies)}: {k}") - anomaly += utils.interpolate_to_target_latlon(v, method="linear") + if anomaly.nbytes: + anomaly += utils.interpolate_to_target_latlon(v, method="linear") + else: + anomaly = utils.interpolate_to_target_latlon(v, method="linear") anomaly /= len(source_paths) print("Computing scenario data") if anomaly_type == "additive": - scenario_data = historical_reference + anomaly.groupby('date.month') + scenario_data = historical_reference + anomaly.groupby("date.month") else: - scenario_data = historical_reference * anomaly.groupby('date.month') - scenario_data = scenario_data.drop_vars('month') + scenario_data = historical_reference * anomaly.groupby("date.month") + scenario_data = scenario_data.drop_vars("month") print("Saving") cd_data.save_daily_results( scenario_data, diff --git a/src/climate_downscale/generate/utils.py b/src/climate_downscale/generate/utils.py index 8ed5cb0..e949f27 100644 --- a/src/climate_downscale/generate/utils.py +++ b/src/climate_downscale/generate/utils.py @@ -1,3 +1,5 @@ +from collections.abc import Callable + import numpy as np import xarray as xr @@ -99,18 +101,41 @@ def daily_mean(ds: xr.Dataset) -> xr.Dataset: return ds.groupby("time.date").mean() +def annual_mean(ds: xr.Dataset) -> xr.Dataset: + return ds.groupby("date.year").mean() + + def daily_max(ds: xr.Dataset) -> xr.Dataset: return ds.groupby("time.date").max() +def annual_max(ds: xr.Dataset) -> xr.Dataset: + return ds.groupby("date.year").max() + + def daily_min(ds: xr.Dataset) -> xr.Dataset: return ds.groupby("time.date").min() +def annual_min(ds: xr.Dataset) -> xr.Dataset: + return ds.groupby("date.year").min() + + def daily_sum(ds: xr.Dataset) -> xr.Dataset: return ds.groupby("time.date").sum() +def annual_sum(ds: xr.Dataset) -> xr.Dataset: + return ds.groupby("date.year").sum() + + +def count_threshold(threshold: int | float) -> Callable[[xr.Dataset], xr.Dataset]: + def count(ds: xr.Dataset) -> xr.Dataset: + return ds > threshold + + return count + + ######################## # Data transformations # ######################## @@ -173,7 +198,7 @@ def rh_percent( def heat_index( temperature_c: xr.Dataset, - dewpoint_temperature_c: xr.Dataset, + relative_humidity_percent: xr.Dataset, ) -> xr.Dataset: """Calculate the heat index. @@ -183,16 +208,17 @@ def heat_index( ---------- temperature_c Temperature in Celsius - dewpoint_temperature_c - Dewpoint temperature in Celsius + relative_humidity_percent + Relative humidity as a percentage Returns ------- xr.Dataset Heat index in Celsius """ - t = temperature_c # Alias for simplicity in the formula - r = rh_percent(temperature_c, dewpoint_temperature_c) + # Alias for simplicity in the formula + t = temperature_c + r = relative_humidity_percent # Heat index formula from canonical multi-variable regression hi_raw = ( @@ -214,7 +240,7 @@ def heat_index( def humidex( temperature_c: xr.Dataset, - dewpoint_temperature_c: xr.Dataset, + relative_humidity_percent: xr.Dataset, ) -> xr.Dataset: """Calculate the humidex. @@ -224,23 +250,23 @@ def humidex( ---------- temperature_c Temperature in Celsius - dewpoint_temperature_c - Dewpoint temperature in Celsius + relative_humidity_percent + Relative humidity as a percentage Returns ------- xr.Dataset Humidex in Celsius """ - vp = buck_vapor_pressure(dewpoint_temperature_c) + svp = buck_vapor_pressure(temperature_c) + vp = relative_humidity_percent / 100 * svp return temperature_c + 0.5555 * (vp - 10) def effective_temperature( temperature_c: xr.Dataset, - dewpoint_temperature_c: xr.Dataset, - uas: xr.Dataset, - vas: xr.Dataset, + relative_humidity_percent: xr.Dataset, + wind_speed_m_s: xr.Dataset, ) -> xr.Dataset: """Calculate the effective temperature. @@ -250,12 +276,10 @@ def effective_temperature( ---------- temperature_c Temperature in Celsius - dewpoint_temperature_c - Dewpoint temperature in Celsius - uas - U-component of wind speed - vas - V-component of wind speed + relative_humidity_percent + Relative humidity as a percentage + wind_speed_m_s + Wind speed in m/s Returns ------- @@ -264,8 +288,8 @@ def effective_temperature( """ # Alias for simplicity in the formula t = temperature_c - r = rh_percent(temperature_c, dewpoint_temperature_c) - v = vector_magnitude(uas, vas) + r = relative_humidity_percent + v = wind_speed_m_s wind_adjustment = 1 / (1.76 + 1.4 * v**0.75) et = ( @@ -293,7 +317,7 @@ def interpolate_to_target_latlon( return ( ds.interp(longitude=TARGET_LON, latitude=TARGET_LAT, method=method) # type: ignore[arg-type] .interpolate_na(dim="longitude", method="nearest", fill_value="extrapolate") - .sortby('latitude') + .sortby("latitude") .interpolate_na(dim="latitude", method="nearest", fill_value="extrapolate") - .sortby('latitude', ascending=False) + .sortby("latitude", ascending=False) ) From ff75c62b6ee693215e16e4aebdf930d87c41e7ae Mon Sep 17 00:00:00 2001 From: James Collins Date: Wed, 19 Jun 2024 09:04:45 -0700 Subject: [PATCH 59/71] Catch empty workflow error --- src/climate_downscale/extract/era5.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/src/climate_downscale/extract/era5.py b/src/climate_downscale/extract/era5.py index 95f49f2..3322898 100644 --- a/src/climate_downscale/extract/era5.py +++ b/src/climate_downscale/extract/era5.py @@ -246,6 +246,9 @@ def extract_era5( # noqa: PLR0913 to_download.append(spec) to_compress.append(spec) + if not to_download: + print('No datasets to download') + while to_download: downloads_left = len(to_download) @@ -285,6 +288,10 @@ def extract_era5( # noqa: PLR0913 max_attempts=1, ) + if not to_compress: + print('No datasets to compress.') + return + jobmon.run_parallel( runner="cdtask", task_name="extract era5_compress", From 107e5c66889aa62094246da6ca662317482a5d99 Mon Sep 17 00:00:00 2001 From: James Collins Date: Wed, 19 Jun 2024 09:05:09 -0700 Subject: [PATCH 60/71] Get annual working --- .../generate/scenario_annual.py | 44 +++++++++++-------- 1 file changed, 25 insertions(+), 19 deletions(-) diff --git a/src/climate_downscale/generate/scenario_annual.py b/src/climate_downscale/generate/scenario_annual.py index 55aa233..bb32fbb 100644 --- a/src/climate_downscale/generate/scenario_annual.py +++ b/src/climate_downscale/generate/scenario_annual.py @@ -42,7 +42,9 @@ def __call__(self, *datasets: xr.Dataset) -> xr.Dataset: @property def encoding_kwargs(self) -> dict[str, float]: - return {"add_offset": self.encoding_offset, "scale_factor": self.encoding_scale} + if self.encoding_offset != 0. or self.encoding_scale != 1: + return {"add_offset": self.encoding_offset, "scale_factor": self.encoding_scale} + return {} TRANSFORM_MAP = { @@ -161,22 +163,26 @@ def generate_scenario_annual_main( transform = TRANSFORM_MAP[target_variable] - annual_data = [] - for scenario_label, year_list in YEARS.items(): - scenario_label = scenario if scenario_label == "scenario" else "historical" # noqa: PLW2901 - for year in year_list: - print(f"Loading {scenario_label} {year} data for {target_variable}") - ds = transform( - *[ - cd_data.load_daily_results(scenario_label, source_variable, year) - for source_variable in transform - ] + + variables = [] + for source_variable in transform: + paths = [] + for scenario_label, year_list in YEARS.items(): + s = "historical" if scenario_label == "historical" else scenario + for year in year_list: + paths.append(cd_data.daily_results_path(s, source_variable, year)) + variables.append( + xr.open_mfdataset( + paths, + parallel=True, + chunks={'date': -1, 'latitude': 601, 'longitude': 1200}, ) - annual_data.append(ds) - - annual_ds = xr.concat(annual_data, dim="year") + ) + ds = transform(*variables).compute() + + cd_data.save_annual_results( - annual_ds, + ds, scenario=scenario, variable=target_variable, encoding_kwargs=transform.encoding_kwargs, @@ -236,7 +242,7 @@ def generate_scenario_annual( jobmon.run_parallel( runner="cdtask", - task_name="generate scenario_daily", + task_name="generate scenario_annual", flat_node_args=( ("target-variable", "cmip6-experiment"), ve, @@ -246,9 +252,9 @@ def generate_scenario_annual( }, task_resources={ "queue": queue, - "cores": 5, - "memory": "120G", - "runtime": "400m", + "cores": 20, + "memory": "250G", + "runtime": "600m", "project": "proj_rapidresponse", }, max_attempts=1, From b643ca352c08a0e40f3589d5a0078bd8569c8056 Mon Sep 17 00:00:00 2001 From: collijk Date: Wed, 19 Jun 2024 09:27:59 -0700 Subject: [PATCH 61/71] make year usage more coherent --- src/climate_downscale/cli_options.py | 16 +++- .../downscale/prepare_training_data.py | 4 +- src/climate_downscale/extract/era5.py | 92 +++++++++++-------- .../generate/historical_daily.py | 6 +- .../generate/historical_reference.py | 3 +- .../generate/scenario_annual.py | 42 ++++----- .../generate/scenario_daily.py | 8 +- src/climate_downscale/generate/utils.py | 8 +- 8 files changed, 102 insertions(+), 77 deletions(-) diff --git a/src/climate_downscale/cli_options.py b/src/climate_downscale/cli_options.py index 38117ce..1612f6e 100644 --- a/src/climate_downscale/cli_options.py +++ b/src/climate_downscale/cli_options.py @@ -18,12 +18,14 @@ _P = ParamSpec("_P") -VALID_YEARS = [str(y) for y in range(1990, 2024)] +VALID_HISTORY_YEARS = [str(y) for y in range(1990, 2024)] +VALID_REFERENCE_YEARS = VALID_HISTORY_YEARS[-5:] +VALID_FORECAST_YEARS = [str(y) for y in range(2024, 2101)] def with_year( *, - years: list[str] = VALID_YEARS, + years: list[str], allow_all: bool = False, ) -> ClickOption[_P, _T]: return with_choice( @@ -132,12 +134,16 @@ def with_cmip6_source( def with_cmip6_experiment( *, allow_all: bool = False, + allow_historical: bool = False, ) -> ClickOption[_P, _T]: + choices = VALID_CMIP6_EXPERIMENTS[:] + if allow_historical: + choices.append("historical") return with_choice( "cmip6-experiment", "e", allow_all=allow_all, - choices=VALID_CMIP6_EXPERIMENTS, + choices=choices, help="CMIP6 experiment to extract.", ) @@ -204,7 +210,9 @@ def with_overwrite() -> ClickOption[_P, _T]: __all__ = [ - "VALID_YEARS", + "VALID_HISTORY_YEARS", + "VALID_REFERENCE_YEARS", + "VALID_FORECAST_YEARS", "VALID_MONTHS", "VALID_ERA5_VARIABLES", "VALID_ERA5_DATASETS", diff --git a/src/climate_downscale/downscale/prepare_training_data.py b/src/climate_downscale/downscale/prepare_training_data.py index 570bb4e..fbc7d7d 100644 --- a/src/climate_downscale/downscale/prepare_training_data.py +++ b/src/climate_downscale/downscale/prepare_training_data.py @@ -102,7 +102,7 @@ def prepare_training_data_main(output_dir: str | Path, year: str) -> None: @click.command() # type: ignore[arg-type] @clio.with_output_directory(DEFAULT_ROOT) -@clio.with_year() +@clio.with_year(years=clio.VALID_HISTORY_YEARS) def prepare_training_data_task(output_dir: str, year: str) -> None: prepare_training_data_main(output_dir, year) @@ -115,7 +115,7 @@ def prepare_training_data(output_dir: str, queue: str) -> None: runner="cdtask", task_name="downscale prepare_training_data", node_args={ - "year": clio.VALID_YEARS, + "year": clio.VALID_HISTORY_YEARS, }, task_args={ "output-dir": output_dir, diff --git a/src/climate_downscale/extract/era5.py b/src/climate_downscale/extract/era5.py index 3322898..8b33e43 100644 --- a/src/climate_downscale/extract/era5.py +++ b/src/climate_downscale/extract/era5.py @@ -75,7 +75,7 @@ def download_era5_main( print(f"Failed to download {era5_dataset} {era5_variable} {year} {month}") if download_path.exists(): download_path.unlink() - raise e # noqa: TRY201 + raise e def unzip_and_compress_era5( @@ -138,7 +138,7 @@ def unzip_and_compress_era5( @clio.with_output_directory(DEFAULT_ROOT) @clio.with_era5_dataset() @clio.with_era5_variable() -@clio.with_year() +@clio.with_year(years=clio.VALID_HISTORY_YEARS) @clio.with_month() @click.option( "--user", @@ -166,7 +166,7 @@ def download_era5_task( @clio.with_output_directory(DEFAULT_ROOT) @clio.with_era5_dataset() @clio.with_era5_variable() -@clio.with_year() +@clio.with_year(years=clio.VALID_HISTORY_YEARS) @clio.with_month() def unzip_and_compress_era5_task( output_dir: str, @@ -184,40 +184,14 @@ def unzip_and_compress_era5_task( ) -@click.command() # type: ignore[arg-type] -@clio.with_output_directory(DEFAULT_ROOT) -@clio.with_era5_dataset(allow_all=True) -@clio.with_era5_variable(allow_all=True) -@clio.with_year(allow_all=True) -@clio.with_month(allow_all=True) -@clio.with_queue() -def extract_era5( # noqa: PLR0913 - output_dir: str, - era5_dataset: str, - era5_variable: str, - year: str, - month: str, - queue: str, -) -> None: - cddata = ClimateDownscaleData(output_dir) - cred_path = cddata.credentials_root / "copernicus.yaml" - credentials = yaml.safe_load(cred_path.read_text()) - users = list(credentials["keys"]) - jobs_per_user = 20 - - datasets = ( - clio.VALID_ERA5_DATASETS if era5_dataset == clio.RUN_ALL else [era5_dataset] - ) - variables = ( - clio.VALID_ERA5_VARIABLES if era5_variable == clio.RUN_ALL else [era5_variable] - ) - years = clio.VALID_YEARS if year == clio.RUN_ALL else [year] - months = clio.VALID_MONTHS if month == clio.RUN_ALL else [month] - +def build_task_lists( + cddata: ClimateDownscaleData, + *spec_variables: list[str], +) -> tuple[list[tuple[str, ...]], ...]: to_download = [] to_compress = [] complete = [] - for spec in itertools.product(datasets, variables, years, months): + for spec in itertools.product(*spec_variables): final_out_path = cddata.extracted_era5_path(*spec) download_path, _ = get_download_spec(final_out_path) @@ -239,15 +213,57 @@ def extract_era5( # noqa: PLR0913 elif download_path.exists(): to_compress.append(spec) elif final_out_path.exists(): - # We've already extracted this dataset (deleting the download path is the last step) + # We've already extracted this dataset + # (deleting the download path is the last step) complete.append(spec) continue else: to_download.append(spec) to_compress.append(spec) + return to_download, to_compress, complete + + +@click.command() # type: ignore[arg-type] +@clio.with_output_directory(DEFAULT_ROOT) +@clio.with_era5_dataset(allow_all=True) +@clio.with_era5_variable(allow_all=True) +@clio.with_year(years=clio.VALID_HISTORY_YEARS, allow_all=True) +@clio.with_month(allow_all=True) +@clio.with_queue() +def extract_era5( + output_dir: str, + era5_dataset: str, + era5_variable: str, + year: str, + month: str, + queue: str, +) -> None: + cddata = ClimateDownscaleData(output_dir) + cred_path = cddata.credentials_root / "copernicus.yaml" + credentials = yaml.safe_load(cred_path.read_text()) + users = list(credentials["keys"]) + jobs_per_user = 20 + + datasets = ( + clio.VALID_ERA5_DATASETS if era5_dataset == clio.RUN_ALL else [era5_dataset] + ) + variables = ( + clio.VALID_ERA5_VARIABLES if era5_variable == clio.RUN_ALL else [era5_variable] + ) + years = clio.VALID_HISTORY_YEARS if year == clio.RUN_ALL else [year] + months = clio.VALID_MONTHS if month == clio.RUN_ALL else [month] + + to_download, to_compress, complete = build_task_lists( + cddata, + datasets, + variables, + years, + months, + ) + if not to_download: - print('No datasets to download') + print("No datasets to download") while to_download: downloads_left = len(to_download) @@ -256,7 +272,7 @@ def extract_era5( # noqa: PLR0913 for _ in range(jobs_per_user): for user in users: if to_download: - download_batch.append((*to_download.pop(), user)) + download_batch.append((*to_download.pop(), user)) # noqa: PERF401 if len(download_batch) != min(len(users) * jobs_per_user, downloads_left): msg = "Download batch size is incorrect" raise ValueError(msg) @@ -289,7 +305,7 @@ def extract_era5( # noqa: PLR0913 ) if not to_compress: - print('No datasets to compress.') + print("No datasets to compress.") return jobmon.run_parallel( diff --git a/src/climate_downscale/generate/historical_daily.py b/src/climate_downscale/generate/historical_daily.py index d1bcb2d..5c1dc6d 100644 --- a/src/climate_downscale/generate/historical_daily.py +++ b/src/climate_downscale/generate/historical_daily.py @@ -175,7 +175,7 @@ def generate_historical_daily_main( @click.command() # type: ignore[arg-type] @clio.with_output_directory(DEFAULT_ROOT) -@clio.with_year() +@clio.with_year(years=clio.VALID_HISTORY_YEARS) @with_target_variable() def generate_historical_daily_task( output_dir: str, @@ -187,7 +187,7 @@ def generate_historical_daily_task( @click.command() # type: ignore[arg-type] @clio.with_output_directory(DEFAULT_ROOT) -@clio.with_year(allow_all=True) +@clio.with_year(years=clio.VALID_HISTORY_YEARS, allow_all=True) @with_target_variable(allow_all=True) @clio.with_queue() @clio.with_overwrite() @@ -200,7 +200,7 @@ def generate_historical_daily( ) -> None: cd_data = ClimateDownscaleData(output_dir) - years = clio.VALID_YEARS if year == clio.RUN_ALL else [year] + years = clio.VALID_HISTORY_YEARS if year == clio.RUN_ALL else [year] variables = ( list(TRANSFORM_MAP.keys()) if target_variable == clio.RUN_ALL diff --git a/src/climate_downscale/generate/historical_reference.py b/src/climate_downscale/generate/historical_reference.py index bb29ea1..d2a3f87 100644 --- a/src/climate_downscale/generate/historical_reference.py +++ b/src/climate_downscale/generate/historical_reference.py @@ -4,7 +4,6 @@ from climate_downscale import cli_options as clio from climate_downscale.data import DEFAULT_ROOT, ClimateDownscaleData -from climate_downscale.generate import utils from climate_downscale.generate.historical_daily import ( TRANSFORM_MAP, with_target_variable, @@ -18,7 +17,7 @@ def generate_historical_reference_main( cd_data = ClimateDownscaleData(output_dir) paths = [ cd_data.daily_results_path("historical", target_variable, year) - for year in utils.REFERENCE_YEARS + for year in clio.VALID_REFERENCE_YEARS ] print(f"Building reference data from: {len(paths)} files.") diff --git a/src/climate_downscale/generate/scenario_annual.py b/src/climate_downscale/generate/scenario_annual.py index bb32fbb..dd880f1 100644 --- a/src/climate_downscale/generate/scenario_annual.py +++ b/src/climate_downscale/generate/scenario_annual.py @@ -9,12 +9,7 @@ from climate_downscale import cli_options as clio from climate_downscale.data import DEFAULT_ROOT, ClimateDownscaleData from climate_downscale.generate import utils -from climate_downscale.generate.scenario_daily import VALID_YEARS -YEARS = { - "historical": clio.VALID_YEARS, - "scenario": VALID_YEARS, -} TEMP_THRESHOLDS = list(range(20, 35)) @@ -42,8 +37,11 @@ def __call__(self, *datasets: xr.Dataset) -> xr.Dataset: @property def encoding_kwargs(self) -> dict[str, float]: - if self.encoding_offset != 0. or self.encoding_scale != 1: - return {"add_offset": self.encoding_offset, "scale_factor": self.encoding_scale} + if self.encoding_offset != 0.0 or self.encoding_scale != 1: + return { + "add_offset": self.encoding_offset, + "scale_factor": self.encoding_scale, + } return {} @@ -160,27 +158,29 @@ def generate_scenario_annual_main( scenario: str, ) -> None: cd_data = ClimateDownscaleData(output_dir) - transform = TRANSFORM_MAP[target_variable] - + years = ( + clio.VALID_HISTORY_YEARS + if scenario == "historical" + else clio.VALID_FORECAST_YEARS + ) + variables = [] for source_variable in transform: - paths = [] - for scenario_label, year_list in YEARS.items(): - s = "historical" if scenario_label == "historical" else scenario - for year in year_list: - paths.append(cd_data.daily_results_path(s, source_variable, year)) + paths = [ + cd_data.daily_results_path(scenario, source_variable, year) + for year in years + ] variables.append( xr.open_mfdataset( - paths, - parallel=True, - chunks={'date': -1, 'latitude': 601, 'longitude': 1200}, + paths, + parallel=True, + chunks={"date": -1, "latitude": 601, "longitude": 1200}, ) ) ds = transform(*variables).compute() - - + cd_data.save_annual_results( ds, scenario=scenario, @@ -192,7 +192,7 @@ def generate_scenario_annual_main( @click.command() # type: ignore[arg-type] @clio.with_output_directory(DEFAULT_ROOT) @with_target_variable() -@clio.with_cmip6_experiment() +@clio.with_cmip6_experiment(allow_historical=True) def generate_scenario_annual_task( output_dir: str, target_variable: str, @@ -204,7 +204,7 @@ def generate_scenario_annual_task( @click.command() # type: ignore[arg-type] @clio.with_output_directory(DEFAULT_ROOT) @with_target_variable(allow_all=True) -@clio.with_cmip6_experiment(allow_all=True) +@clio.with_cmip6_experiment(allow_all=True, allow_historical=True) @clio.with_queue() @clio.with_overwrite() def generate_scenario_annual( diff --git a/src/climate_downscale/generate/scenario_daily.py b/src/climate_downscale/generate/scenario_daily.py index 6ec2b2a..c38b592 100644 --- a/src/climate_downscale/generate/scenario_daily.py +++ b/src/climate_downscale/generate/scenario_daily.py @@ -12,8 +12,6 @@ from climate_downscale.data import DEFAULT_ROOT, ClimateDownscaleData from climate_downscale.generate import utils -VALID_YEARS = [str(y) for y in range(max(utils.REFERENCE_YEARS) + 1, 2101)] - # Map from source variable to a unit conversion function CONVERT_MAP = { "uas": utils.scale_wind_speed_height, @@ -248,7 +246,7 @@ def generate_scenario_daily_main( # noqa: C901, PLR0912, PLR0915 @click.command() # type: ignore[arg-type] @clio.with_output_directory(DEFAULT_ROOT) -@clio.with_year(years=VALID_YEARS) +@clio.with_year(years=clio.VALID_FORECAST_YEARS) @with_target_variable() @clio.with_cmip6_experiment() def generate_scenario_daily_task( @@ -259,7 +257,7 @@ def generate_scenario_daily_task( @click.command() # type: ignore[arg-type] @clio.with_output_directory(DEFAULT_ROOT) -@clio.with_year(years=VALID_YEARS, allow_all=True) +@clio.with_year(years=clio.VALID_FORECAST_YEARS, allow_all=True) @with_target_variable(allow_all=True) @clio.with_cmip6_experiment(allow_all=True) @clio.with_queue() @@ -274,7 +272,7 @@ def generate_scenario_daily( ) -> None: cd_data = ClimateDownscaleData(output_dir) - years = VALID_YEARS if year == clio.RUN_ALL else [year] + years = clio.VALID_FORECAST_YEARS if year == clio.RUN_ALL else [year] variables = ( list(TRANSFORM_MAP.keys()) if target_variable == clio.RUN_ALL diff --git a/src/climate_downscale/generate/utils.py b/src/climate_downscale/generate/utils.py index e949f27..a2864c2 100644 --- a/src/climate_downscale/generate/utils.py +++ b/src/climate_downscale/generate/utils.py @@ -3,8 +3,12 @@ import numpy as np import xarray as xr -REFERENCE_YEARS = list(range(2018, 2024)) -REFERENCE_PERIOD = slice(f"{REFERENCE_YEARS[0]}-01-01", f"{REFERENCE_YEARS[-1]}-12-31") +import climate_downscale.cli_options as clio + +REFERENCE_PERIOD = slice( + f"{clio.VALID_REFERENCE_YEARS[0]}-01-01", + f"{clio.VALID_REFERENCE_YEARS[-1]}-12-31", +) TARGET_LON = xr.DataArray( np.round(np.arange(-180.0, 180.0, 0.1, dtype="float32"), 1), dims="longitude" ) From cd67a388f89d62a83c966dba0d1ba5d28850cb41 Mon Sep 17 00:00:00 2001 From: collijk Date: Wed, 19 Jun 2024 09:38:25 -0700 Subject: [PATCH 62/71] Make scenario run by year --- src/climate_downscale/data.py | 9 ++- .../generate/scenario_annual.py | 74 +++++++++---------- 2 files changed, 43 insertions(+), 40 deletions(-) diff --git a/src/climate_downscale/data.py b/src/climate_downscale/data.py index cffdf82..5bf53f7 100644 --- a/src/climate_downscale/data.py +++ b/src/climate_downscale/data.py @@ -163,17 +163,20 @@ def load_daily_results( def annual_results(self) -> Path: return self.results / "annual" - def annual_results_path(self, scenario: str, variable: str) -> Path: - return self.annual_results / scenario / f"{variable}.nc" + def annual_results_path( + self, scenario: str, variable: str, year: int | str + ) -> Path: + return self.annual_results / scenario / variable / f"{year}.nc" def save_annual_results( self, results_ds: xr.Dataset, scenario: str, variable: str, + year: int | str, encoding_kwargs: dict[str, Any], ) -> None: - path = self.annual_results_path(scenario, variable) + path = self.annual_results_path(scenario, variable, year) mkdir(path.parent, exist_ok=True, parents=True) touch(path, exist_ok=True) diff --git a/src/climate_downscale/generate/scenario_annual.py b/src/climate_downscale/generate/scenario_annual.py index dd880f1..300c014 100644 --- a/src/climate_downscale/generate/scenario_annual.py +++ b/src/climate_downscale/generate/scenario_annual.py @@ -153,38 +153,22 @@ def with_target_variable( def generate_scenario_annual_main( - output_dir: str | Path, - target_variable: str, - scenario: str, + output_dir: str | Path, target_variable: str, scenario: str, year: str ) -> None: cd_data = ClimateDownscaleData(output_dir) transform = TRANSFORM_MAP[target_variable] - years = ( - clio.VALID_HISTORY_YEARS - if scenario == "historical" - else clio.VALID_FORECAST_YEARS - ) - - variables = [] - for source_variable in transform: - paths = [ - cd_data.daily_results_path(scenario, source_variable, year) - for year in years + ds = transform( + *[ + xr.open_dataset(cd_data.daily_results_path(scenario, source_variable, year)) + for source_variable in transform ] - variables.append( - xr.open_mfdataset( - paths, - parallel=True, - chunks={"date": -1, "latitude": 601, "longitude": 1200}, - ) - ) - ds = transform(*variables).compute() - + ) cd_data.save_annual_results( ds, scenario=scenario, variable=target_variable, + year=year, encoding_kwargs=transform.encoding_kwargs, ) @@ -193,12 +177,24 @@ def generate_scenario_annual_main( @clio.with_output_directory(DEFAULT_ROOT) @with_target_variable() @clio.with_cmip6_experiment(allow_historical=True) +@clio.with_year(years=clio.VALID_HISTORY_YEARS + clio.VALID_FORECAST_YEARS) def generate_scenario_annual_task( output_dir: str, target_variable: str, cmip6_experiment: str, + year: str, ) -> None: - generate_scenario_annual_main(output_dir, target_variable, cmip6_experiment) + if year in clio.VALID_HISTORY_YEARS and cmip6_experiment != "historical": + msg = "Historical years must use the 'historical' experiment." + raise ValueError(msg) + if year in clio.VALID_FORECAST_YEARS and cmip6_experiment == "historical": + msg = ( + f"Forecast years must use a future experiment: " + f"{clio.VALID_CMIP6_EXPERIMENTS}." + ) + raise ValueError(msg) + + generate_scenario_annual_main(output_dir, target_variable, cmip6_experiment, year) @click.command() # type: ignore[arg-type] @@ -227,34 +223,38 @@ def generate_scenario_annual( else [cmip6_experiment] ) - ve = [] + vey = [] complete = [] for v, e in itertools.product(variables, experiments): - path = cd_data.annual_results_path(scenario=e, variable=v) - if not path.exists() or overwrite: - ve.append((v, e)) - else: - complete.append((v, e)) + year_list = ( + clio.VALID_HISTORY_YEARS if e == "historical" else clio.VALID_FORECAST_YEARS + ) + for y in year_list: + path = cd_data.annual_results_path(scenario=e, variable=v, year=y) + if not path.exists() or overwrite: + vey.append((v, e, y)) + else: + complete.append((v, e, y)) - print(f"{len(complete)} tasks already done. {len(ve)} tasks to do.") - if not ve: + print(f"{len(complete)} tasks already done. {len(vey)} tasks to do.") + if not vey: return jobmon.run_parallel( runner="cdtask", task_name="generate scenario_annual", flat_node_args=( - ("target-variable", "cmip6-experiment"), - ve, + ("target-variable", "cmip6-experiment", "year"), + vey, ), task_args={ "output-dir": output_dir, }, task_resources={ "queue": queue, - "cores": 20, - "memory": "250G", - "runtime": "600m", + "cores": 2, + "memory": "100G", + "runtime": "120m", "project": "proj_rapidresponse", }, max_attempts=1, From f7e42fe597132e2ffa43e2a3214523b4783dbe4b Mon Sep 17 00:00:00 2001 From: collijk Date: Wed, 19 Jun 2024 12:56:50 -0700 Subject: [PATCH 63/71] Use transform class everywhere --- .../generate/derived_daily.py | 0 .../generate/historical_daily.py | 66 +++++----- .../generate/scenario_annual.py | 65 +++------ .../generate/scenario_daily.py | 124 ++++++++++-------- src/climate_downscale/generate/utils.py | 30 +++++ 5 files changed, 153 insertions(+), 132 deletions(-) create mode 100644 src/climate_downscale/generate/derived_daily.py diff --git a/src/climate_downscale/generate/derived_daily.py b/src/climate_downscale/generate/derived_daily.py new file mode 100644 index 0000000..e69de29 diff --git a/src/climate_downscale/generate/historical_daily.py b/src/climate_downscale/generate/historical_daily.py index 5c1dc6d..a27e27c 100644 --- a/src/climate_downscale/generate/historical_daily.py +++ b/src/climate_downscale/generate/historical_daily.py @@ -32,35 +32,38 @@ # - a transformation function # - a tuple of offset and scale factors for the output for serialization TRANSFORM_MAP = { - "mean_temperature": ( - ["2m_temperature"], - utils.daily_mean, - (273.15, 0.01), + "mean_temperature": utils.Transform( + source_variables=["2m_temperature"], + transform_funcs=[utils.daily_mean], + encoding_scale=0.01, + encoding_offset=273.15, ), - "max_temperature": ( - ["2m_temperature"], - utils.daily_max, - (273.15, 0.01), + "max_temperature": utils.Transform( + source_variables=["2m_temperature"], + transform_funcs=[utils.daily_max], + encoding_scale=0.01, + encoding_offset=273.15, ), - "min_temperature": ( - ["2m_temperature"], - utils.daily_min, - (273.15, 0.01), + "min_temperature": utils.Transform( + source_variables=["2m_temperature"], + transform_funcs=[utils.daily_min], + encoding_scale=0.01, + encoding_offset=273.15, ), - "wind_speed": ( - ["10m_u_component_of_wind", "10m_v_component_of_wind"], - lambda x, y: utils.daily_mean(utils.vector_magnitude(x, y)), - (0, 0.01), + "wind_speed": utils.Transform( + source_variables=["10m_u_component_of_wind", "10m_v_component_of_wind"], + transform_funcs=[utils.vector_magnitude, utils.daily_mean], + encoding_scale=0.01, ), - "relative_humidity": ( - ["2m_temperature", "2m_dewpoint_temperature"], - lambda x, y: utils.daily_mean(utils.rh_percent(x, y)), - (0, 0.01), + "relative_humidity": utils.Transform( + source_variables=["2m_temperature", "2m_dewpoint_temperature"], + transform_funcs=[utils.rh_percent, utils.daily_mean], + encoding_scale=0.01, ), - "total_precipitation": ( - ["total_precipitation"], - utils.daily_sum, - (0, 0.1), + "total_precipitation": utils.Transform( + source_variables=["total_precipitation"], + transform_funcs=[utils.daily_sum], + encoding_scale=0.1, ), } @@ -128,17 +131,17 @@ def generate_historical_daily_main( ) -> None: cd_data = ClimateDownscaleData(output_dir) - source_variables, collapse_fun, (e_offset, e_scale) = TRANSFORM_MAP[target_variable] + transform = TRANSFORM_MAP[target_variable] datasets = [] for month in range(1, 13): month_str = f"{month:02d}" print(f"loading single-levels for {month_str}") single_level = [ load_variable(cd_data, sv, year, month_str, "single-levels") - for sv in source_variables + for sv in transform.source_variables ] print("collapsing") - ds = collapse_fun(*single_level).compute() # type: ignore[operator] + ds = transform(*single_level).compute() # collapsing often screws the date dtype, so fix it ds = ds.assign(date=pd.to_datetime(ds.date)) @@ -148,11 +151,11 @@ def generate_historical_daily_main( print(f"loading land for {month_str}") land = [ load_variable(cd_data, sv, year, month_str, "land") - for sv in source_variables + for sv in transform.source_variables ] print("collapsing") with dask.config.set(**{"array.slicing.split_large_chunks": False}): # type: ignore[arg-type] - ds_land = collapse_fun(*land).compute() # type: ignore[operator] + ds_land = transform(*land).compute() ds_land = ds_land.assign(date=pd.to_datetime(ds_land.date)) print("combining") @@ -166,10 +169,7 @@ def generate_historical_daily_main( scenario="historical", variable=target_variable, year=year, - encoding_kwargs={ - "add_offset": e_offset, - "scale_factor": e_scale, - }, + encoding_kwargs=transform.encoding_kwargs, ) diff --git a/src/climate_downscale/generate/scenario_annual.py b/src/climate_downscale/generate/scenario_annual.py index 300c014..0457ba3 100644 --- a/src/climate_downscale/generate/scenario_annual.py +++ b/src/climate_downscale/generate/scenario_annual.py @@ -13,69 +13,40 @@ TEMP_THRESHOLDS = list(range(20, 35)) -class Transform: - def __init__( - self, - source_variables: list[str], - transform_funcs: list[typing.Callable[..., xr.Dataset]] = [utils.annual_mean], # noqa: B006 - encoding_scale: float = 1.0, - encoding_offset: float = 0.0, - ): - self.source_variables = source_variables - self.transform_funcs = transform_funcs - self.encoding_scale = encoding_scale - self.encoding_offset = encoding_offset - - def __iter__(self) -> typing.Iterator[str]: - return iter(self.source_variables) - - def __call__(self, *datasets: xr.Dataset) -> xr.Dataset: - res = self.transform_funcs[0](*datasets) - for transform_func in self.transform_funcs[1:]: - res = transform_func(res) - return res - - @property - def encoding_kwargs(self) -> dict[str, float]: - if self.encoding_offset != 0.0 or self.encoding_scale != 1: - return { - "add_offset": self.encoding_offset, - "scale_factor": self.encoding_scale, - } - return {} - - TRANSFORM_MAP = { - "mean_temperature": Transform( + "mean_temperature": utils.Transform( source_variables=["mean_temperature"], + transform_funcs=[utils.annual_mean], encoding_scale=0.01, encoding_offset=273.15, ), - "mean_high_temperature": Transform( + "mean_high_temperature": utils.Transform( source_variables=["max_temperature"], + transform_funcs=[utils.annual_mean], encoding_scale=0.01, encoding_offset=273.15, ), - "mean_low_temperature": Transform( + "mean_low_temperature": utils.Transform( source_variables=["min_temperature"], + transform_funcs=[utils.annual_mean], encoding_scale=0.01, encoding_offset=273.15, ), **{ - f"days_over_{temp}C": Transform( + f"days_over_{temp}C": utils.Transform( source_variables=["mean_temperature"], transform_funcs=[utils.count_threshold(temp), utils.annual_sum], ) for temp in TEMP_THRESHOLDS }, - "mean_heat_index": Transform( + "mean_heat_index": utils.Transform( source_variables=["mean_temperature", "relative_humidity"], transform_funcs=[utils.heat_index, utils.annual_mean], encoding_scale=0.01, encoding_offset=273.15, ), **{ - f"days_over_{temp}C_heat_index": Transform( + f"days_over_{temp}C_heat_index": utils.Transform( source_variables=["mean_temperature", "relative_humidity"], transform_funcs=[ utils.heat_index, @@ -85,14 +56,14 @@ def encoding_kwargs(self) -> dict[str, float]: ) for temp in TEMP_THRESHOLDS }, - "mean_humidex": Transform( + "mean_humidex": utils.Transform( source_variables=["mean_temperature", "relative_humidity"], transform_funcs=[utils.humidex, utils.annual_mean], encoding_scale=0.01, encoding_offset=273.15, ), **{ - f"days_over_{temp}C_humidex": Transform( + f"days_over_{temp}C_humidex": utils.Transform( source_variables=["mean_temperature", "relative_humidity"], transform_funcs=[ utils.humidex, @@ -102,14 +73,14 @@ def encoding_kwargs(self) -> dict[str, float]: ) for temp in TEMP_THRESHOLDS }, - "mean_effective_temperature": Transform( + "mean_effective_temperature": utils.Transform( source_variables=["mean_temperature", "relative_humidity", "wind_speed"], transform_funcs=[utils.effective_temperature, utils.annual_mean], encoding_scale=0.01, encoding_offset=273.15, ), **{ - f"days_over_{temp}C_effective_temperature": Transform( + f"days_over_{temp}C_effective_temperature": utils.Transform( source_variables=["mean_temperature", "relative_humidity", "wind_speed"], transform_funcs=[ utils.effective_temperature, @@ -119,15 +90,17 @@ def encoding_kwargs(self) -> dict[str, float]: ) for temp in TEMP_THRESHOLDS }, - "wind_speed": Transform( + "wind_speed": utils.Transform( source_variables=["wind_speed"], + transform_funcs=[utils.annual_mean], encoding_scale=0.01, ), - "relative_humidity": Transform( + "relative_humidity": utils.Transform( source_variables=["relative_humidity"], + transform_funcs=[utils.annual_mean], encoding_scale=0.01, ), - "total_precipitation": Transform( + "total_precipitation": utils.Transform( source_variables=["total_precipitation"], transform_funcs=[utils.annual_sum], encoding_scale=0.1, @@ -161,7 +134,7 @@ def generate_scenario_annual_main( ds = transform( *[ xr.open_dataset(cd_data.daily_results_path(scenario, source_variable, year)) - for source_variable in transform + for source_variable in transform.source_variables ] ) cd_data.save_annual_results( diff --git a/src/climate_downscale/generate/scenario_daily.py b/src/climate_downscale/generate/scenario_daily.py index c38b592..56081b5 100644 --- a/src/climate_downscale/generate/scenario_daily.py +++ b/src/climate_downscale/generate/scenario_daily.py @@ -28,41 +28,56 @@ # - a transformation function # - a tuple of offset and scale factors for the output for serialization # - an anomaly type -TRANSFORM_MAP = { +TRANSFORM_MAP: dict[str, tuple[utils.Transform, str]] = { "mean_temperature": ( - ["tas"], - utils.identity, - (273.15, 0.01), + utils.Transform( + source_variables=["tas"], + transform_funcs=[utils.identity], + encoding_scale=0.01, + encoding_offset=273.15, + ), "additive", ), "max_temperature": ( - ["tasmax"], - utils.identity, - (273.15, 0.01), + utils.Transform( + source_variables=["tasmax"], + transform_funcs=[utils.identity], + encoding_scale=0.01, + encoding_offset=273.15, + ), "additive", ), "min_temperature": ( - ["tasmin"], - utils.identity, - (273.15, 0.01), + utils.Transform( + source_variables=["tasmin"], + transform_funcs=[utils.identity], + encoding_scale=0.01, + encoding_offset=273.15, + ), "additive", ), "wind_speed": ( - ["uas", "vas"], - utils.vector_magnitude, - (0, 0.01), + utils.Transform( + source_variables=["uas", "vas"], + transform_funcs=[utils.vector_magnitude], + encoding_scale=0.01, + ), "multiplicative", ), "relative_humidity": ( - ["hurs"], - utils.identity, - (0, 0.01), + utils.Transform( + source_variables=["hurs"], + transform_funcs=[utils.identity], + encoding_scale=0.01, + ), "multiplicative", ), "total_precipitation": ( - ["pr"], - utils.identity, - (0, 0.1), + utils.Transform( + source_variables=["pr"], + transform_funcs=[utils.identity], + encoding_scale=0.1, + ), "multiplicative", ), } @@ -85,6 +100,36 @@ def with_target_variable( ) +def get_source_paths( + cd_data: ClimateDownscaleData, + source_variables: list[str], + cmip6_experiment: str, +) -> list[list[Path]]: + models_by_var = {} + for source_variable in source_variables: + model_vars = { + p.stem.split(f"{cmip6_experiment}_")[1] + for p in cd_data.extracted_cmip6.glob( + f"{source_variable}_{cmip6_experiment}*.nc" + ) + } + models_by_var[source_variable] = model_vars + + shared_models = set.intersection(*models_by_var.values()) + for var, models in models_by_var.items(): + extra_models = models.difference(shared_models) + if extra_models: + print(var, extra_models) + source_paths = [ + [ + cd_data.extracted_cmip6 / f"{source_variable}_{cmip6_experiment}_{model}.nc" + for source_variable in source_variables + ] + for model in sorted(shared_models) + ] + return source_paths + + def load_and_shift_longitude( ds_path: str | Path, time_slice: slice, @@ -140,7 +185,7 @@ def compute_anomaly( return anomaly -def generate_scenario_daily_main( # noqa: C901, PLR0912, PLR0915 +def generate_scenario_daily_main( # noqa: PLR0912 output_dir: str | Path, year: str | int, target_variable: str, @@ -148,33 +193,11 @@ def generate_scenario_daily_main( # noqa: C901, PLR0912, PLR0915 ) -> None: cd_data = ClimateDownscaleData(output_dir) - (source_variables, transform_fun, (e_offset, e_scale), anomaly_type) = ( - TRANSFORM_MAP[target_variable] + transform, anomaly_type = TRANSFORM_MAP[target_variable] + source_paths = get_source_paths( + cd_data, transform.source_variables, cmip6_experiment ) - models_by_var = {} - for source_variable in source_variables: - model_vars = { - p.stem.split(f"{cmip6_experiment}_")[1] - for p in cd_data.extracted_cmip6.glob( - f"{source_variable}_{cmip6_experiment}*.nc" - ) - } - models_by_var[source_variable] = model_vars - - shared_models = set.intersection(*models_by_var.values()) - for var, models in models_by_var.items(): - extra_models = models.difference(shared_models) - if extra_models: - print(var, extra_models) - source_paths = [ - [ - cd_data.extracted_cmip6 / f"{source_variable}_{cmip6_experiment}_{model}.nc" - for source_variable in source_variables - ] - for model in sorted(shared_models) - ] - print("loading historical reference") historical_reference = cd_data.load_daily_results( scenario="historical", @@ -187,13 +210,11 @@ def generate_scenario_daily_main( # noqa: C901, PLR0912, PLR0915 pid = f"{i+1}/{len(source_paths)} {sps[0].stem}" print(f"{pid}: Loading reference") try: - scenario_reference = transform_fun( # type: ignore[operator] + scenario_reference = transform( *[load_variable(sp, "reference") for sp in sps] ) print(f"{pid}: Loading target") - target = transform_fun( # type: ignore[operator] - *[load_variable(sp, year) for sp in sps] - ) + target = transform(*[load_variable(sp, year) for sp in sps]) except KeyError: print(f"{pid}: Bad formatting, skipping...") continue @@ -237,10 +258,7 @@ def generate_scenario_daily_main( # noqa: C901, PLR0912, PLR0915 scenario=cmip6_experiment, variable=target_variable, year=year, - encoding_kwargs={ - "add_offset": e_offset, - "scale_factor": e_scale, - }, + encoding_kwargs=transform.encoding_kwargs, ) diff --git a/src/climate_downscale/generate/utils.py b/src/climate_downscale/generate/utils.py index a2864c2..7a77713 100644 --- a/src/climate_downscale/generate/utils.py +++ b/src/climate_downscale/generate/utils.py @@ -1,3 +1,4 @@ +import typing from collections.abc import Callable import numpy as np @@ -325,3 +326,32 @@ def interpolate_to_target_latlon( .interpolate_na(dim="latitude", method="nearest", fill_value="extrapolate") .sortby("latitude", ascending=False) ) + + +class Transform: + def __init__( + self, + source_variables: list[str], + transform_funcs: list[typing.Callable[..., xr.Dataset]], + encoding_scale: float = 1.0, + encoding_offset: float = 0.0, + ): + self.source_variables = source_variables + self.transform_funcs = transform_funcs + self.encoding_scale = encoding_scale + self.encoding_offset = encoding_offset + + def __call__(self, *datasets: xr.Dataset) -> xr.Dataset: + res = self.transform_funcs[0](*datasets) + for transform_func in self.transform_funcs[1:]: + res = transform_func(res) + return res + + @property + def encoding_kwargs(self) -> dict[str, float]: + if self.encoding_offset != 0.0 or self.encoding_scale != 1: + return { + "add_offset": self.encoding_offset, + "scale_factor": self.encoding_scale, + } + return {} From 1611af2ef35ad4f36850cea335eda6dcd4a5d19f Mon Sep 17 00:00:00 2001 From: collijk Date: Wed, 19 Jun 2024 13:02:14 -0700 Subject: [PATCH 64/71] pullback with_target_variable --- src/climate_downscale/cli_options.py | 14 ++++++++++++ .../generate/historical_daily.py | 21 ++---------------- .../generate/historical_reference.py | 5 ++--- .../generate/scenario_annual.py | 22 ++----------------- .../generate/scenario_daily.py | 22 ++----------------- 5 files changed, 22 insertions(+), 62 deletions(-) diff --git a/src/climate_downscale/cli_options.py b/src/climate_downscale/cli_options.py index 1612f6e..b1888da 100644 --- a/src/climate_downscale/cli_options.py +++ b/src/climate_downscale/cli_options.py @@ -172,6 +172,20 @@ def with_cmip6_variable( ) +def with_target_variable( + *, + variable_names: list[str], + allow_all: bool = False, +) -> ClickOption[_P, _T]: + return with_choice( + "target-variable", + "t", + allow_all=allow_all, + choices=variable_names, + help="Variable to generate.", + ) + + STRIDE = 30 LATITUDES = [str(lat) for lat in range(-90, 90, STRIDE)] LONGITUDES = [str(lon) for lon in range(-180, 180, STRIDE)] diff --git a/src/climate_downscale/generate/historical_daily.py b/src/climate_downscale/generate/historical_daily.py index a27e27c..5eeddde 100644 --- a/src/climate_downscale/generate/historical_daily.py +++ b/src/climate_downscale/generate/historical_daily.py @@ -1,5 +1,4 @@ import itertools -import typing from pathlib import Path import click @@ -67,22 +66,6 @@ ), } -_P = typing.ParamSpec("_P") -_T = typing.TypeVar("_T") - - -def with_target_variable( - *, - allow_all: bool = False, -) -> clio.ClickOption[_P, _T]: - return clio.with_choice( - "target-variable", - "t", - allow_all=allow_all, - choices=list(TRANSFORM_MAP.keys()), - help="Variable to generate.", - ) - def load_and_shift_longitude(ds_path: str | Path) -> xr.Dataset: ds = xr.open_dataset(ds_path).chunk(time=24) @@ -176,7 +159,7 @@ def generate_historical_daily_main( @click.command() # type: ignore[arg-type] @clio.with_output_directory(DEFAULT_ROOT) @clio.with_year(years=clio.VALID_HISTORY_YEARS) -@with_target_variable() +@clio.with_target_variable(variable_names=list(TRANSFORM_MAP)) def generate_historical_daily_task( output_dir: str, year: str, @@ -188,7 +171,7 @@ def generate_historical_daily_task( @click.command() # type: ignore[arg-type] @clio.with_output_directory(DEFAULT_ROOT) @clio.with_year(years=clio.VALID_HISTORY_YEARS, allow_all=True) -@with_target_variable(allow_all=True) +@clio.with_target_variable(variable_names=list(TRANSFORM_MAP), allow_all=True) @clio.with_queue() @clio.with_overwrite() def generate_historical_daily( diff --git a/src/climate_downscale/generate/historical_reference.py b/src/climate_downscale/generate/historical_reference.py index d2a3f87..de030a2 100644 --- a/src/climate_downscale/generate/historical_reference.py +++ b/src/climate_downscale/generate/historical_reference.py @@ -6,7 +6,6 @@ from climate_downscale.data import DEFAULT_ROOT, ClimateDownscaleData from climate_downscale.generate.historical_daily import ( TRANSFORM_MAP, - with_target_variable, ) @@ -54,7 +53,7 @@ def generate_historical_reference_main( @click.command() # type: ignore[arg-type] @clio.with_output_directory(DEFAULT_ROOT) -@with_target_variable() +@clio.with_target_variable(variable_names=list(TRANSFORM_MAP)) def generate_historical_reference_task( output_dir: str, target_variable: str, @@ -64,7 +63,7 @@ def generate_historical_reference_task( @click.command() # type: ignore[arg-type] @clio.with_output_directory(DEFAULT_ROOT) -@with_target_variable(allow_all=True) +@clio.with_target_variable(variable_names=list(TRANSFORM_MAP)) @clio.with_queue() def generate_historical_reference( output_dir: str, diff --git a/src/climate_downscale/generate/scenario_annual.py b/src/climate_downscale/generate/scenario_annual.py index 0457ba3..859185b 100644 --- a/src/climate_downscale/generate/scenario_annual.py +++ b/src/climate_downscale/generate/scenario_annual.py @@ -1,5 +1,4 @@ import itertools -import typing from pathlib import Path import click @@ -108,23 +107,6 @@ } -_P = typing.ParamSpec("_P") -_T = typing.TypeVar("_T") - - -def with_target_variable( - *, - allow_all: bool = False, -) -> clio.ClickOption[_P, _T]: - return clio.with_choice( - "target-variable", - "t", - allow_all=allow_all, - choices=list(TRANSFORM_MAP.keys()), - help="Variable to generate.", - ) - - def generate_scenario_annual_main( output_dir: str | Path, target_variable: str, scenario: str, year: str ) -> None: @@ -148,7 +130,7 @@ def generate_scenario_annual_main( @click.command() # type: ignore[arg-type] @clio.with_output_directory(DEFAULT_ROOT) -@with_target_variable() +@clio.with_target_variable(variable_names=list(TRANSFORM_MAP)) @clio.with_cmip6_experiment(allow_historical=True) @clio.with_year(years=clio.VALID_HISTORY_YEARS + clio.VALID_FORECAST_YEARS) def generate_scenario_annual_task( @@ -172,7 +154,7 @@ def generate_scenario_annual_task( @click.command() # type: ignore[arg-type] @clio.with_output_directory(DEFAULT_ROOT) -@with_target_variable(allow_all=True) +@clio.with_target_variable(variable_names=list(TRANSFORM_MAP), allow_all=True) @clio.with_cmip6_experiment(allow_all=True, allow_historical=True) @clio.with_queue() @clio.with_overwrite() diff --git a/src/climate_downscale/generate/scenario_daily.py b/src/climate_downscale/generate/scenario_daily.py index 56081b5..12cd391 100644 --- a/src/climate_downscale/generate/scenario_daily.py +++ b/src/climate_downscale/generate/scenario_daily.py @@ -1,5 +1,4 @@ import itertools -import typing from pathlib import Path import click @@ -83,23 +82,6 @@ } -_P = typing.ParamSpec("_P") -_T = typing.TypeVar("_T") - - -def with_target_variable( - *, - allow_all: bool = False, -) -> clio.ClickOption[_P, _T]: - return clio.with_choice( - "target-variable", - "t", - allow_all=allow_all, - choices=list(TRANSFORM_MAP.keys()), - help="Variable to generate.", - ) - - def get_source_paths( cd_data: ClimateDownscaleData, source_variables: list[str], @@ -265,7 +247,7 @@ def generate_scenario_daily_main( # noqa: PLR0912 @click.command() # type: ignore[arg-type] @clio.with_output_directory(DEFAULT_ROOT) @clio.with_year(years=clio.VALID_FORECAST_YEARS) -@with_target_variable() +@clio.with_target_variable(variable_names=list(TRANSFORM_MAP)) @clio.with_cmip6_experiment() def generate_scenario_daily_task( output_dir: str, year: str, target_variable: str, cmip6_experiment: str @@ -276,7 +258,7 @@ def generate_scenario_daily_task( @click.command() # type: ignore[arg-type] @clio.with_output_directory(DEFAULT_ROOT) @clio.with_year(years=clio.VALID_FORECAST_YEARS, allow_all=True) -@with_target_variable(allow_all=True) +@clio.with_target_variable(variable_names=list(TRANSFORM_MAP), allow_all=True) @clio.with_cmip6_experiment(allow_all=True) @clio.with_queue() @clio.with_overwrite() From 62935665f6bfd6d25f32987909ef8108e6ef06b1 Mon Sep 17 00:00:00 2001 From: collijk Date: Wed, 19 Jun 2024 13:45:46 -0700 Subject: [PATCH 65/71] Add script to generate derived daily variables --- src/climate_downscale/generate/__init__.py | 6 + .../generate/derived_daily.py | 140 ++++++++++++++++++ 2 files changed, 146 insertions(+) diff --git a/src/climate_downscale/generate/__init__.py b/src/climate_downscale/generate/__init__.py index 4f4afa8..ded9def 100644 --- a/src/climate_downscale/generate/__init__.py +++ b/src/climate_downscale/generate/__init__.py @@ -1,3 +1,7 @@ +from climate_downscale.generate.derived_daily import ( + generate_derived_daily, + generate_derived_daily_task, +) from climate_downscale.generate.historical_daily import ( generate_historical_daily, generate_historical_daily_task, @@ -19,6 +23,7 @@ "historical_daily": generate_historical_daily, "historical_reference": generate_historical_reference, "scenario_daily": generate_scenario_daily, + "derived_daily": generate_derived_daily, "scenario_annual": generate_scenario_annual, } @@ -26,5 +31,6 @@ "historical_daily": generate_historical_daily_task, "historical_reference": generate_historical_reference_task, "scenario_daily": generate_scenario_daily_task, + "derived_daily": generate_derived_daily_task, "scenario_annual": generate_scenario_annual_task, } diff --git a/src/climate_downscale/generate/derived_daily.py b/src/climate_downscale/generate/derived_daily.py index e69de29..6df575d 100644 --- a/src/climate_downscale/generate/derived_daily.py +++ b/src/climate_downscale/generate/derived_daily.py @@ -0,0 +1,140 @@ +import itertools + +import click +from rra_tools import jobmon + +from climate_downscale import cli_options as clio +from climate_downscale.data import DEFAULT_ROOT, ClimateDownscaleData +from climate_downscale.generate import utils + +TRANSFORM_MAP = { + "heat_index": utils.Transform( + source_variables=["mean_temperature", "relative_humidity"], + transform_funcs=[utils.heat_index], + encoding_scale=0.01, + encoding_offset=273.15, + ), + "humidex": utils.Transform( + source_variables=["mean_temperature", "relative_humidity"], + transform_funcs=[utils.humidex], + encoding_scale=0.01, + encoding_offset=273.15, + ), + "effective_temperature": utils.Transform( + source_variables=["mean_temperature", "relative_humidity", "wind_speed"], + transform_funcs=[utils.effective_temperature], + encoding_scale=0.01, + encoding_offset=273.15, + ), +} + + +def generate_derived_daily_main( + output_dir: str, + target_variable: str, + scenario: str, + year: str, +) -> None: + cd_data = ClimateDownscaleData(output_dir) + transform = TRANSFORM_MAP[target_variable] + + ds = transform( + *[ + cd_data.load_daily_results(scenario, source_variable, year) + for source_variable in transform.source_variables + ] + ) + cd_data.save_daily_results( + ds, + scenario=scenario, + variable=target_variable, + year=year, + encoding_kwargs=transform.encoding_kwargs, + ) + + +@click.command() # type: ignore[arg-type] +@clio.with_output_directory(DEFAULT_ROOT) +@clio.with_target_variable(variable_names=list(TRANSFORM_MAP)) +@clio.with_cmip6_experiment(allow_historical=True) +@clio.with_year(years=clio.VALID_HISTORY_YEARS + clio.VALID_FORECAST_YEARS) +def generate_derived_daily_task( + output_dir: str, + target_variable: str, + cmip6_experiment: str, + year: str, +) -> None: + if year in clio.VALID_HISTORY_YEARS and cmip6_experiment != "historical": + msg = "Historical years must use the 'historical' experiment." + raise ValueError(msg) + if year in clio.VALID_FORECAST_YEARS and cmip6_experiment == "historical": + msg = ( + f"Forecast years must use a future experiment: " + f"{clio.VALID_CMIP6_EXPERIMENTS}." + ) + raise ValueError(msg) + generate_derived_daily_main(output_dir, target_variable, cmip6_experiment, year) + + +@click.command() # type: ignore[arg-type] +@clio.with_output_directory(DEFAULT_ROOT) +@clio.with_target_variable(variable_names=list(TRANSFORM_MAP), allow_all=True) +@clio.with_cmip6_experiment(allow_all=True, allow_historical=True) +@clio.with_queue() +@clio.with_overwrite() +def generate_derived_daily( + output_dir: str, + target_variable: str, + cmip6_experiment: str, + queue: str, + overwrite: bool, # noqa: FBT001 +) -> None: + cd_data = ClimateDownscaleData(output_dir) + + variables = ( + list(TRANSFORM_MAP.keys()) + if target_variable == clio.RUN_ALL + else [target_variable] + ) + experiments = ( + list(clio.VALID_CMIP6_EXPERIMENTS) + if cmip6_experiment == clio.RUN_ALL + else [cmip6_experiment] + ) + + vey = [] + complete = [] + for v, e in itertools.product(variables, experiments): + year_list = ( + clio.VALID_HISTORY_YEARS if e == "historical" else clio.VALID_FORECAST_YEARS + ) + for y in year_list: + path = cd_data.annual_results_path(scenario=e, variable=v, year=y) + if not path.exists() or overwrite: + vey.append((v, e, y)) + else: + complete.append((v, e, y)) + + print(f"{len(complete)} tasks already done. {len(vey)} tasks to do.") + if not vey: + return + + jobmon.run_parallel( + runner="cdtask", + task_name="generate derived_daily", + flat_node_args=( + ("target-variable", "cmip6-experiment", "year"), + vey, + ), + task_args={ + "output-dir": output_dir, + }, + task_resources={ + "queue": queue, + "cores": 2, + "memory": "100G", + "runtime": "120m", + "project": "proj_rapidresponse", + }, + max_attempts=1, + ) From b1f5f12f92f6e2d4dfd85dc072fd6f0a60ba6201 Mon Sep 17 00:00:00 2001 From: collijk Date: Sun, 7 Jul 2024 11:52:24 -0700 Subject: [PATCH 66/71] Add readme for pipline stages --- src/climate_downscale/generate/README.md | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) create mode 100644 src/climate_downscale/generate/README.md diff --git a/src/climate_downscale/generate/README.md b/src/climate_downscale/generate/README.md new file mode 100644 index 0000000..b52eda9 --- /dev/null +++ b/src/climate_downscale/generate/README.md @@ -0,0 +1,20 @@ +# Climate Variable Pipeline + +This set of scripts processes ERA5 and CMIP6 climate data into a database of +climate variables at a consistent resolution and format. The pipeline is +run in several stages: + +1. Historical Daily: This processes the hourly ERA5-Land and ERA5-Single-Level + data into a unified daily format, pulling the higher-resolution ERA5-Land data + where available and filling in with ERA5-Single-Level data. +2. Historical Reference: This produces a set of reference climatologies from this + historical daily results by averaging, by month and space, over a historical + reference period. This is used to downscale and bias-correct the CMIP6 data. +3. Scenario Daily: This produces scenario projections from the CMIP6 data, ensembling + over a curated set of GCMs and using the historical reference climatologies to + bias-correct the data. +4. Derived Daily: This produces derived climate variables from the daily data, such as + humidex and effective temperature. This writes results to the same directories + as the daily data. +5. Scenario Annual: This produces annualized summaries of the scenario data, such as + annual averages and extremes. From e5a136acb51a3d513fa04c963f81f6bf2c45eb45 Mon Sep 17 00:00:00 2001 From: James Collins Date: Mon, 8 Jul 2024 11:49:22 -0700 Subject: [PATCH 67/71] Fix derived climate variables --- .../generate/derived_daily.py | 30 ++++++++++++---- .../generate/scenario_annual.py | 34 +++++++++++-------- 2 files changed, 42 insertions(+), 22 deletions(-) diff --git a/src/climate_downscale/generate/derived_daily.py b/src/climate_downscale/generate/derived_daily.py index 6df575d..1a4735b 100644 --- a/src/climate_downscale/generate/derived_daily.py +++ b/src/climate_downscale/generate/derived_daily.py @@ -2,6 +2,7 @@ import click from rra_tools import jobmon +from dask.diagnostics import ProgressBar from climate_downscale import cli_options as clio from climate_downscale.data import DEFAULT_ROOT, ClimateDownscaleData @@ -34,16 +35,27 @@ def generate_derived_daily_main( target_variable: str, scenario: str, year: str, + progress_bar: bool = False ) -> None: cd_data = ClimateDownscaleData(output_dir) transform = TRANSFORM_MAP[target_variable] + # Empirically tested to find a good balance between + # runtime and memory usage for data at this scale. + chunks = {"latitude": -1, "longitude": -1, "date": 20} + ds = transform( *[ - cd_data.load_daily_results(scenario, source_variable, year) + cd_data.load_daily_results(scenario, source_variable, year).chunk(**chunks) for source_variable in transform.source_variables ] ) + if progress_bar: + with ProgressBar(): + ds = ds.compute() + else: + ds = ds.compute() + cd_data.save_daily_results( ds, scenario=scenario, @@ -97,7 +109,7 @@ def generate_derived_daily( else [target_variable] ) experiments = ( - list(clio.VALID_CMIP6_EXPERIMENTS) + clio.VALID_CMIP6_EXPERIMENTS + ['historical'] if cmip6_experiment == clio.RUN_ALL else [cmip6_experiment] ) @@ -109,11 +121,15 @@ def generate_derived_daily( clio.VALID_HISTORY_YEARS if e == "historical" else clio.VALID_FORECAST_YEARS ) for y in year_list: - path = cd_data.annual_results_path(scenario=e, variable=v, year=y) + path = cd_data.daily_results_path(scenario=e, variable=v, year=y) + if path.exists() and path.stat().st_size == 0: + # job failed when writing, delete the file + path.unlink() + if not path.exists() or overwrite: vey.append((v, e, y)) else: - complete.append((v, e, y)) + complete.append((v, e, y)) print(f"{len(complete)} tasks already done. {len(vey)} tasks to do.") if not vey: @@ -131,9 +147,9 @@ def generate_derived_daily( }, task_resources={ "queue": queue, - "cores": 2, - "memory": "100G", - "runtime": "120m", + "cores": 8, + "memory": "150G", + "runtime": "45m", "project": "proj_rapidresponse", }, max_attempts=1, diff --git a/src/climate_downscale/generate/scenario_annual.py b/src/climate_downscale/generate/scenario_annual.py index 859185b..83ceab9 100644 --- a/src/climate_downscale/generate/scenario_annual.py +++ b/src/climate_downscale/generate/scenario_annual.py @@ -39,16 +39,15 @@ for temp in TEMP_THRESHOLDS }, "mean_heat_index": utils.Transform( - source_variables=["mean_temperature", "relative_humidity"], - transform_funcs=[utils.heat_index, utils.annual_mean], + source_variables=["heat_index"], + transform_funcs=[utils.annual_mean], encoding_scale=0.01, encoding_offset=273.15, ), **{ f"days_over_{temp}C_heat_index": utils.Transform( - source_variables=["mean_temperature", "relative_humidity"], + source_variables=["heat_index"], transform_funcs=[ - utils.heat_index, utils.count_threshold(temp), utils.annual_sum, ], @@ -56,16 +55,15 @@ for temp in TEMP_THRESHOLDS }, "mean_humidex": utils.Transform( - source_variables=["mean_temperature", "relative_humidity"], - transform_funcs=[utils.humidex, utils.annual_mean], + source_variables=["humidex"], + transform_funcs=[utils.annual_mean], encoding_scale=0.01, encoding_offset=273.15, ), **{ f"days_over_{temp}C_humidex": utils.Transform( - source_variables=["mean_temperature", "relative_humidity"], + source_variables=["humidex"], transform_funcs=[ - utils.humidex, utils.count_threshold(temp), utils.annual_sum, ], @@ -73,16 +71,15 @@ for temp in TEMP_THRESHOLDS }, "mean_effective_temperature": utils.Transform( - source_variables=["mean_temperature", "relative_humidity", "wind_speed"], - transform_funcs=[utils.effective_temperature, utils.annual_mean], + source_variables=["effective_temperature"], + transform_funcs=[utils.annual_mean], encoding_scale=0.01, encoding_offset=273.15, ), **{ f"days_over_{temp}C_effective_temperature": utils.Transform( - source_variables=["mean_temperature", "relative_humidity", "wind_speed"], + source_variables=["effective_temperature"], transform_funcs=[ - utils.effective_temperature, utils.count_threshold(temp), utils.annual_sum, ], @@ -108,7 +105,7 @@ def generate_scenario_annual_main( - output_dir: str | Path, target_variable: str, scenario: str, year: str + output_dir: str | Path, target_variable: str, scenario: str, year: str, progress_bar: bool = False ) -> None: cd_data = ClimateDownscaleData(output_dir) transform = TRANSFORM_MAP[target_variable] @@ -119,6 +116,13 @@ def generate_scenario_annual_main( for source_variable in transform.source_variables ] ) + + if progress_bar: + with ProgressBar(): + ds = ds.compute() + else: + ds = ds.compute() + cd_data.save_annual_results( ds, scenario=scenario, @@ -173,7 +177,7 @@ def generate_scenario_annual( else [target_variable] ) experiments = ( - list(clio.VALID_CMIP6_EXPERIMENTS) + clio.VALID_CMIP6_EXPERIMENTS + ['historical'] if cmip6_experiment == clio.RUN_ALL else [cmip6_experiment] ) @@ -207,7 +211,7 @@ def generate_scenario_annual( }, task_resources={ "queue": queue, - "cores": 2, + "cores": 3, "memory": "100G", "runtime": "120m", "project": "proj_rapidresponse", From 3b435d3aa2f5971d470a0eb56e87ee98c4cce370 Mon Sep 17 00:00:00 2001 From: James Collins Date: Mon, 8 Jul 2024 11:49:33 -0700 Subject: [PATCH 68/71] start scenario inclusion script --- .../generate/scenario_inclusion.py | 78 +++++++++++++++++++ 1 file changed, 78 insertions(+) create mode 100644 src/climate_downscale/generate/scenario_inclusion.py diff --git a/src/climate_downscale/generate/scenario_inclusion.py b/src/climate_downscale/generate/scenario_inclusion.py new file mode 100644 index 0000000..6d416f4 --- /dev/null +++ b/src/climate_downscale/generate/scenario_inclusion.py @@ -0,0 +1,78 @@ +from pathlib import Path + +import pandas as pd +import xarray as xr +from rra_tools import parallel +import tqdm + +from climate_downscale.data import DEFAULT_ROOT, ClimateDownscaleData + +import warnings + +warnings.filterwarnings('ignore') + +cd_data = ClimateDownscaleData(output_dir) +paths = list(cd_data.extracted_cmip6.glob(f'*.nc')) + +def extract_metadata(data_path: Path) -> tuple: + variable, scenario, source, variant = data_path.stem.split('_') + + realization = variant.split('i')[0][1:] + initialization = variant.split('i')[1].split('p')[0] + physics = variant.split('p')[1].split('f')[0] + forcing = variant.split('f')[1] + + + ds = xr.open_dataset(data_path) + year_start = ds['time.year'].min().item() + year_end = ds['time.year'].max().item() + return (variable, scenario, source, variant, realization, initialization, physics, forcing, year_start, year_end) + +meta_list = parallel.run_parallel( + extract_metadata, + paths, + num_cores=25, + progress_bar=True, +) + +meta_df = ( + pd.DataFrame( + meta_list, + columns=[ + 'variable', + 'scenario', + 'source', + 'variant', + 'realization', + 'initialization', + 'physics', + 'forcing', + 'year_start', + 'year_end', + ], + ).assign( + all_years=lambda x: (x.year_start <= 2020) & (x.year_end >= 2099), + year_range=lambda x: x.apply(lambda r: f"{r.loc['year_start']}_{r.loc['year_end']}", axis=1), + ) +) + +valid_scenarios = ( + meta_df + .set_index(['variable', 'source', 'variant', 'scenario']).all_years + .unstack() + .fillna(False) + .sum(axis=1) + .rename('valid_scenarios') +) +year_range = ( + meta_df + .set_index(['variable', 'source', 'variant', 'scenario']).year_range + .unstack() + .fillna("") +) +inclusion_df = pd.concat([ + year_range, + valid_scenarios, + meta_df.drop(columns=['scenario', 'year_start', 'year_end', 'all_years', 'year_range']).drop_duplicates().set_index(['variable', 'source', 'variant']) +], axis=1) +inclusion_df['include'] = inclusion_df.valid_scenarios == 5 \ No newline at end of file From 70f5abf7af730aaff56828a0b35a41150d7daca1 Mon Sep 17 00:00:00 2001 From: collijk Date: Mon, 8 Jul 2024 12:15:40 -0700 Subject: [PATCH 69/71] Add task to generate scenario inclusion metadata --- pyproject.toml | 3 + src/climate_downscale/data.py | 22 ++++ src/climate_downscale/generate/__init__.py | 5 + .../generate/derived_daily.py | 20 +-- .../generate/scenario_annual.py | 15 ++- .../generate/scenario_inclusion.py | 124 +++++++++--------- 6 files changed, 111 insertions(+), 78 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 731596e..7e91877 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -100,6 +100,9 @@ ignore = [ "RET504", # Unnecessary assignment before return "PLR0913", # Too many arguments in function call, hard with CLIs. "TRY201", # + "PD010", # I like stack and unstack + "FBT001", # Boolean positional args are super common in clis + "FBT002", # Boolean positional args are super common in clis ] [tool.ruff.lint.per-file-ignores] diff --git a/src/climate_downscale/data.py b/src/climate_downscale/data.py index 5bf53f7..71fa346 100644 --- a/src/climate_downscale/data.py +++ b/src/climate_downscale/data.py @@ -126,6 +126,28 @@ def results(self) -> Path: def daily_results(self) -> Path: return self.results / "daily" + @property + def results_metadata(self) -> Path: + return self.results / "metadata" + + def save_scenario_metadata(self, df: pd.DataFrame) -> None: + path = self.results_metadata / "scenario_metadata.parquet" + touch(path, exist_ok=True) + df.to_parquet(path) + + def load_scenario_metadata(self) -> pd.DataFrame: + path = self.results_metadata / "scenario_metadata.parquet" + return pd.read_parquet(path) + + def save_scenario_inclusion_metadata(self, df: pd.DataFrame) -> None: + path = self.results_metadata / "scenario_inclusion_metadata.parquet" + touch(path, exist_ok=True) + df.to_parquet(path) + + def load_scenario_inclusion_metadata(self) -> pd.DataFrame: + path = self.results_metadata / "scenario_inclusion_metadata.parquet" + return pd.read_parquet(path) + def daily_results_path(self, scenario: str, variable: str, year: int | str) -> Path: return self.daily_results / scenario / variable / f"{year}.nc" diff --git a/src/climate_downscale/generate/__init__.py b/src/climate_downscale/generate/__init__.py index ded9def..d17a310 100644 --- a/src/climate_downscale/generate/__init__.py +++ b/src/climate_downscale/generate/__init__.py @@ -18,10 +18,14 @@ generate_scenario_daily, generate_scenario_daily_task, ) +from climate_downscale.generate.scenario_inclusion import ( + generate_scenario_inclusion, +) RUNNERS = { "historical_daily": generate_historical_daily, "historical_reference": generate_historical_reference, + "scenario_inclusion": generate_scenario_inclusion, "scenario_daily": generate_scenario_daily, "derived_daily": generate_derived_daily, "scenario_annual": generate_scenario_annual, @@ -30,6 +34,7 @@ TASK_RUNNERS = { "historical_daily": generate_historical_daily_task, "historical_reference": generate_historical_reference_task, + "scenario_inclusion": generate_scenario_inclusion, "scenario_daily": generate_scenario_daily_task, "derived_daily": generate_derived_daily_task, "scenario_annual": generate_scenario_annual_task, diff --git a/src/climate_downscale/generate/derived_daily.py b/src/climate_downscale/generate/derived_daily.py index 1a4735b..77e7d52 100644 --- a/src/climate_downscale/generate/derived_daily.py +++ b/src/climate_downscale/generate/derived_daily.py @@ -1,8 +1,8 @@ import itertools import click +from dask.diagnostics.progress import ProgressBar from rra_tools import jobmon -from dask.diagnostics import ProgressBar from climate_downscale import cli_options as clio from climate_downscale.data import DEFAULT_ROOT, ClimateDownscaleData @@ -35,27 +35,27 @@ def generate_derived_daily_main( target_variable: str, scenario: str, year: str, - progress_bar: bool = False + progress_bar: bool = False, ) -> None: cd_data = ClimateDownscaleData(output_dir) transform = TRANSFORM_MAP[target_variable] - # Empirically tested to find a good balance between + # Empirically tested to find a good balance between # runtime and memory usage for data at this scale. chunks = {"latitude": -1, "longitude": -1, "date": 20} ds = transform( *[ - cd_data.load_daily_results(scenario, source_variable, year).chunk(**chunks) + cd_data.load_daily_results(scenario, source_variable, year).chunk(**chunks) # type: ignore[arg-type] for source_variable in transform.source_variables ] ) if progress_bar: - with ProgressBar(): + with ProgressBar(): # type: ignore[no-untyped-call] ds = ds.compute() else: ds = ds.compute() - + cd_data.save_daily_results( ds, scenario=scenario, @@ -99,7 +99,7 @@ def generate_derived_daily( target_variable: str, cmip6_experiment: str, queue: str, - overwrite: bool, # noqa: FBT001 + overwrite: bool, ) -> None: cd_data = ClimateDownscaleData(output_dir) @@ -109,7 +109,7 @@ def generate_derived_daily( else [target_variable] ) experiments = ( - clio.VALID_CMIP6_EXPERIMENTS + ['historical'] + [*clio.VALID_CMIP6_EXPERIMENTS, "historical"] if cmip6_experiment == clio.RUN_ALL else [cmip6_experiment] ) @@ -125,11 +125,11 @@ def generate_derived_daily( if path.exists() and path.stat().st_size == 0: # job failed when writing, delete the file path.unlink() - + if not path.exists() or overwrite: vey.append((v, e, y)) else: - complete.append((v, e, y)) + complete.append((v, e, y)) print(f"{len(complete)} tasks already done. {len(vey)} tasks to do.") if not vey: diff --git a/src/climate_downscale/generate/scenario_annual.py b/src/climate_downscale/generate/scenario_annual.py index 83ceab9..1508477 100644 --- a/src/climate_downscale/generate/scenario_annual.py +++ b/src/climate_downscale/generate/scenario_annual.py @@ -3,6 +3,7 @@ import click import xarray as xr +from dask.diagnostics.progress import ProgressBar from rra_tools import jobmon from climate_downscale import cli_options as clio @@ -105,7 +106,11 @@ def generate_scenario_annual_main( - output_dir: str | Path, target_variable: str, scenario: str, year: str, progress_bar: bool = False + output_dir: str | Path, + target_variable: str, + scenario: str, + year: str, + progress_bar: bool = False, ) -> None: cd_data = ClimateDownscaleData(output_dir) transform = TRANSFORM_MAP[target_variable] @@ -118,11 +123,11 @@ def generate_scenario_annual_main( ) if progress_bar: - with ProgressBar(): + with ProgressBar(): # type: ignore[no-untyped-call] ds = ds.compute() else: ds = ds.compute() - + cd_data.save_annual_results( ds, scenario=scenario, @@ -167,7 +172,7 @@ def generate_scenario_annual( target_variable: str, cmip6_experiment: str, queue: str, - overwrite: bool, # noqa: FBT001 + overwrite: bool, ) -> None: cd_data = ClimateDownscaleData(output_dir) @@ -177,7 +182,7 @@ def generate_scenario_annual( else [target_variable] ) experiments = ( - clio.VALID_CMIP6_EXPERIMENTS + ['historical'] + [*clio.VALID_CMIP6_EXPERIMENTS, "historical"] if cmip6_experiment == clio.RUN_ALL else [cmip6_experiment] ) diff --git a/src/climate_downscale/generate/scenario_inclusion.py b/src/climate_downscale/generate/scenario_inclusion.py index 6d416f4..63dc1ea 100644 --- a/src/climate_downscale/generate/scenario_inclusion.py +++ b/src/climate_downscale/generate/scenario_inclusion.py @@ -1,78 +1,76 @@ +import warnings from pathlib import Path +from typing import Any +import click import pandas as pd import xarray as xr from rra_tools import parallel -import tqdm +from climate_downscale import cli_options as clio from climate_downscale.data import DEFAULT_ROOT, ClimateDownscaleData -import warnings - -warnings.filterwarnings('ignore') +warnings.filterwarnings("ignore") -cd_data = ClimateDownscaleData(output_dir) -paths = list(cd_data.extracted_cmip6.glob(f'*.nc')) -def extract_metadata(data_path: Path) -> tuple: - variable, scenario, source, variant = data_path.stem.split('_') - - realization = variant.split('i')[0][1:] - initialization = variant.split('i')[1].split('p')[0] - physics = variant.split('p')[1].split('f')[0] - forcing = variant.split('f')[1] - - +def extract_metadata(data_path: Path) -> tuple[Any]: + meta = data_path.stem.split("_") ds = xr.open_dataset(data_path) - year_start = ds['time.year'].min().item() - year_end = ds['time.year'].max().item() - return (variable, scenario, source, variant, realization, initialization, physics, forcing, year_start, year_end) + year_start = ds["time.year"].min().item() + year_end = ds["time.year"].max().item() + return *meta, year_start, year_end, str(data_path) + + +def generate_scenario_inclusion_main( + output_dir: str | Path, *, num_cores: int = 1, progress_bar: bool = False +) -> None: + cd_data = ClimateDownscaleData(output_dir) + paths = list(cd_data.extracted_cmip6.glob("*.nc")) + + meta_list = parallel.run_parallel( + extract_metadata, + paths, + num_cores=num_cores, + progress_bar=progress_bar, + ) -meta_list = parallel.run_parallel( - extract_metadata, - paths, - num_cores=25, - progress_bar=True, -) + columns = ["variable", "scenario", "source", "variant", "year_start", "year_end"] + meta_df = pd.DataFrame(meta_list, columns=columns) + meta_df["all_years"] = (meta_df.year_start <= 2020) & (meta_df.year_end >= 2099) # noqa: PLR2004 + meta_df["year_range"] = meta_df.apply( + lambda r: f"{r.loc['year_start']}_{r.loc['year_end']}", axis=1 + ) -meta_df = ( - pd.DataFrame( - meta_list, - columns=[ - 'variable', - 'scenario', - 'source', - 'variant', - 'realization', - 'initialization', - 'physics', - 'forcing', - 'year_start', - 'year_end', - ], - ).assign( - all_years=lambda x: (x.year_start <= 2020) & (x.year_end >= 2099), - year_range=lambda x: x.apply(lambda r: f"{r.loc['year_start']}_{r.loc['year_end']}", axis=1), + valid_scenarios = ( + meta_df.set_index(["variable", "source", "variant", "scenario"]) + .all_years.unstack() + .fillna(value=False) + .sum(axis=1) + .rename("valid_scenarios") + ) + year_range = ( + meta_df.set_index(["variable", "source", "variant", "scenario"]) + .year_range.unstack() + .fillna("") ) -) + inclusion_df = pd.concat([year_range, valid_scenarios], axis=1).reset_index() + inclusion_df["include"] = inclusion_df.valid_scenarios == 5 # noqa: PLR2004 -valid_scenarios = ( - meta_df - .set_index(['variable', 'source', 'variant', 'scenario']).all_years - .unstack() - .fillna(False) - .sum(axis=1) - .rename('valid_scenarios') -) -year_range = ( - meta_df - .set_index(['variable', 'source', 'variant', 'scenario']).year_range - .unstack() - .fillna("") -) -inclusion_df = pd.concat([ - year_range, - valid_scenarios, - meta_df.drop(columns=['scenario', 'year_start', 'year_end', 'all_years', 'year_range']).drop_duplicates().set_index(['variable', 'source', 'variant']) -], axis=1) -inclusion_df['include'] = inclusion_df.valid_scenarios == 5 \ No newline at end of file + cd_data.save_scenario_metadata(meta_df) + cd_data.save_scenario_inclusion_metadata(inclusion_df) + + +@click.command() # type: ignore[arg-type] +@clio.with_output_directory(DEFAULT_ROOT) +@clio.with_num_cores(default=10) +@clio.with_progress_bar() +def generate_scenario_inclusion( + output_dir: str, + num_cores: int, + progress_bar: bool, +) -> None: + generate_scenario_inclusion_main( + output_dir, + num_cores=num_cores, + progress_bar=progress_bar, + ) From ae7b4772b2eecbff6b1d8627144da1a12e58a3d0 Mon Sep 17 00:00:00 2001 From: collijk Date: Mon, 8 Jul 2024 12:19:18 -0700 Subject: [PATCH 70/71] Remove extra path column --- src/climate_downscale/generate/scenario_inclusion.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/climate_downscale/generate/scenario_inclusion.py b/src/climate_downscale/generate/scenario_inclusion.py index 63dc1ea..bf5a0e7 100644 --- a/src/climate_downscale/generate/scenario_inclusion.py +++ b/src/climate_downscale/generate/scenario_inclusion.py @@ -13,12 +13,12 @@ warnings.filterwarnings("ignore") -def extract_metadata(data_path: Path) -> tuple[Any]: +def extract_metadata(data_path: Path) -> tuple[Any, ...]: meta = data_path.stem.split("_") ds = xr.open_dataset(data_path) year_start = ds["time.year"].min().item() year_end = ds["time.year"].max().item() - return *meta, year_start, year_end, str(data_path) + return *meta, year_start, year_end def generate_scenario_inclusion_main( From cef92b3c4e21d45e85cc43cf989cc0158f42b5d9 Mon Sep 17 00:00:00 2001 From: James Collins Date: Tue, 9 Jul 2024 05:40:51 -0700 Subject: [PATCH 71/71] So many fixes --- src/climate_downscale/data.py | 2 +- .../generate/derived_daily.py | 3 - .../generate/historical_daily.py | 7 +- .../generate/historical_reference.py | 2 +- .../generate/scenario_annual.py | 6 - .../generate/scenario_daily.py | 139 ++++++++++-------- .../generate/scenario_inclusion.py | 6 + 7 files changed, 84 insertions(+), 81 deletions(-) diff --git a/src/climate_downscale/data.py b/src/climate_downscale/data.py index 71fa346..77be728 100644 --- a/src/climate_downscale/data.py +++ b/src/climate_downscale/data.py @@ -37,7 +37,7 @@ def extracted_era5(self) -> Path: def extracted_era5_path( self, dataset: str, variable: str, year: int | str, month: str ) -> Path: - return self.extracted_era5 / f"{dataset}_{variable}_{year}_{month}.nc" + return self.extracted_era5 / f"reanalysis-era5-{dataset}_{variable}_{year}_{month}.nc" @property def extracted_cmip6(self) -> Path: diff --git a/src/climate_downscale/generate/derived_daily.py b/src/climate_downscale/generate/derived_daily.py index 77e7d52..50c5309 100644 --- a/src/climate_downscale/generate/derived_daily.py +++ b/src/climate_downscale/generate/derived_daily.py @@ -13,19 +13,16 @@ source_variables=["mean_temperature", "relative_humidity"], transform_funcs=[utils.heat_index], encoding_scale=0.01, - encoding_offset=273.15, ), "humidex": utils.Transform( source_variables=["mean_temperature", "relative_humidity"], transform_funcs=[utils.humidex], encoding_scale=0.01, - encoding_offset=273.15, ), "effective_temperature": utils.Transform( source_variables=["mean_temperature", "relative_humidity", "wind_speed"], transform_funcs=[utils.effective_temperature], encoding_scale=0.01, - encoding_offset=273.15, ), } diff --git a/src/climate_downscale/generate/historical_daily.py b/src/climate_downscale/generate/historical_daily.py index 5eeddde..6585318 100644 --- a/src/climate_downscale/generate/historical_daily.py +++ b/src/climate_downscale/generate/historical_daily.py @@ -35,19 +35,16 @@ source_variables=["2m_temperature"], transform_funcs=[utils.daily_mean], encoding_scale=0.01, - encoding_offset=273.15, ), "max_temperature": utils.Transform( source_variables=["2m_temperature"], transform_funcs=[utils.daily_max], encoding_scale=0.01, - encoding_offset=273.15, ), "min_temperature": utils.Transform( source_variables=["2m_temperature"], transform_funcs=[utils.daily_min], encoding_scale=0.01, - encoding_offset=273.15, ), "wind_speed": utils.Transform( source_variables=["10m_u_component_of_wind", "10m_v_component_of_wind"], @@ -98,8 +95,8 @@ def load_variable( ds = load_and_shift_longitude(path) # There are some slight numerical differences in the lat/long for some of # the land datasets. They are gridded consistently, so just tweak the - # coordinates so things align. - ds = ds.assign_coords(latitude=utils.TARGET_LAT, longitude=utils.TARGET_LON) + # coordinates so things align. + ds = ds.assign_coords(latitude=utils.TARGET_LAT[::-1], longitude=utils.TARGET_LON) else: ds = load_and_shift_longitude(path) conversion = CONVERT_MAP[variable] diff --git a/src/climate_downscale/generate/historical_reference.py b/src/climate_downscale/generate/historical_reference.py index de030a2..14f84ff 100644 --- a/src/climate_downscale/generate/historical_reference.py +++ b/src/climate_downscale/generate/historical_reference.py @@ -63,7 +63,7 @@ def generate_historical_reference_task( @click.command() # type: ignore[arg-type] @clio.with_output_directory(DEFAULT_ROOT) -@clio.with_target_variable(variable_names=list(TRANSFORM_MAP)) +@clio.with_target_variable(allow_all=True, variable_names=list(TRANSFORM_MAP)) @clio.with_queue() def generate_historical_reference( output_dir: str, diff --git a/src/climate_downscale/generate/scenario_annual.py b/src/climate_downscale/generate/scenario_annual.py index 1508477..455614e 100644 --- a/src/climate_downscale/generate/scenario_annual.py +++ b/src/climate_downscale/generate/scenario_annual.py @@ -18,19 +18,16 @@ source_variables=["mean_temperature"], transform_funcs=[utils.annual_mean], encoding_scale=0.01, - encoding_offset=273.15, ), "mean_high_temperature": utils.Transform( source_variables=["max_temperature"], transform_funcs=[utils.annual_mean], encoding_scale=0.01, - encoding_offset=273.15, ), "mean_low_temperature": utils.Transform( source_variables=["min_temperature"], transform_funcs=[utils.annual_mean], encoding_scale=0.01, - encoding_offset=273.15, ), **{ f"days_over_{temp}C": utils.Transform( @@ -43,7 +40,6 @@ source_variables=["heat_index"], transform_funcs=[utils.annual_mean], encoding_scale=0.01, - encoding_offset=273.15, ), **{ f"days_over_{temp}C_heat_index": utils.Transform( @@ -59,7 +55,6 @@ source_variables=["humidex"], transform_funcs=[utils.annual_mean], encoding_scale=0.01, - encoding_offset=273.15, ), **{ f"days_over_{temp}C_humidex": utils.Transform( @@ -75,7 +70,6 @@ source_variables=["effective_temperature"], transform_funcs=[utils.annual_mean], encoding_scale=0.01, - encoding_offset=273.15, ), **{ f"days_over_{temp}C_effective_temperature": utils.Transform( diff --git a/src/climate_downscale/generate/scenario_daily.py b/src/climate_downscale/generate/scenario_daily.py index 12cd391..0cf48a7 100644 --- a/src/climate_downscale/generate/scenario_daily.py +++ b/src/climate_downscale/generate/scenario_daily.py @@ -1,3 +1,4 @@ +from collections import defaultdict import itertools from pathlib import Path @@ -33,7 +34,6 @@ source_variables=["tas"], transform_funcs=[utils.identity], encoding_scale=0.01, - encoding_offset=273.15, ), "additive", ), @@ -42,7 +42,6 @@ source_variables=["tasmax"], transform_funcs=[utils.identity], encoding_scale=0.01, - encoding_offset=273.15, ), "additive", ), @@ -51,7 +50,6 @@ source_variables=["tasmin"], transform_funcs=[utils.identity], encoding_scale=0.01, - encoding_offset=273.15, ), "additive", ), @@ -86,29 +84,16 @@ def get_source_paths( cd_data: ClimateDownscaleData, source_variables: list[str], cmip6_experiment: str, -) -> list[list[Path]]: - models_by_var = {} - for source_variable in source_variables: - model_vars = { - p.stem.split(f"{cmip6_experiment}_")[1] - for p in cd_data.extracted_cmip6.glob( - f"{source_variable}_{cmip6_experiment}*.nc" - ) - } - models_by_var[source_variable] = model_vars - - shared_models = set.intersection(*models_by_var.values()) - for var, models in models_by_var.items(): - extra_models = models.difference(shared_models) - if extra_models: - print(var, extra_models) - source_paths = [ - [ - cd_data.extracted_cmip6 / f"{source_variable}_{cmip6_experiment}_{model}.nc" - for source_variable in source_variables - ] - for model in sorted(shared_models) - ] +) -> dict[str, list[list[Path]]]: + inclusion_meta = cd_data.load_scenario_inclusion_metadata()[source_variables] + inclusion_meta = inclusion_meta[inclusion_meta.all(axis=1)] + source_paths = defaultdict(list) + for source, variant in inclusion_meta.index.tolist(): + source_paths[source].append( + [cd_data.extracted_cmip6_path(v, cmip6_experiment, source, variant) + for v in source_variables] + ) + return source_paths @@ -187,52 +172,76 @@ def generate_scenario_daily_main( # noqa: PLR0912 year="reference", ) - anomalies: dict[str, xr.Dataset] = {} - for i, sps in enumerate(source_paths): - pid = f"{i+1}/{len(source_paths)} {sps[0].stem}" - print(f"{pid}: Loading reference") - try: - scenario_reference = transform( - *[load_variable(sp, "reference") for sp in sps] - ) - print(f"{pid}: Loading target") - target = transform(*[load_variable(sp, year) for sp in sps]) - except KeyError: - print(f"{pid}: Bad formatting, skipping...") - continue - print(f"{pid}: computing anomaly") - s_anomaly = compute_anomaly(scenario_reference, target, anomaly_type) - key = f"{len(s_anomaly.latitude)}_{len(s_anomaly.longitude)}" + anomalies: dict[str, dict[str, tuple[int, xr.Dataset]]] = {} + for i, (source, variant_paths) in enumerate(source_paths.items()): + sid = f"Source {i+1}/{len(source_paths)}: {source}" - if key in anomalies: - old = anomalies[key] - for coord in ["latitude", "longitude"]: - old_c = old[coord].to_numpy() - new_c = s_anomaly[coord].to_numpy() - tol = 1e-5 - if np.abs(old_c - new_c).max() < tol: - s_anomaly = s_anomaly.assign({coord: old_c}) - else: - msg = f"{coord} does not match despite having the same subdivision" - raise ValueError(msg) - anomalies[key] = old + s_anomaly - else: - anomalies[key] = s_anomaly + source_anomalies: dict[str, tuple[int, xr.Dataset]] = {} + for j, vps in enumerate(variant_paths): + vid = f"{sid}, Variant {j+1}/{len(variant_paths)}: {vps[0].stem.split('_')[-1]}" + try: + print(f"{vid}: Loading reference") + sref = transform(*[load_variable(vp, "reference") for vp in vps]) + print(f"{vid}: Loading target") + target = transform(*[load_variable(vp, year) for vp in vps]) + except KeyError: + print(f"{vid}: Bad formatting, skipping...") + continue + + print(f"{vid}: computing anomaly") + v_anomaly = compute_anomaly(sref, target, anomaly_type) + + key = f"{len(v_anomaly.latitude)}_{len(v_anomaly.longitude)}" + + if key in source_anomalies: + old_count, old_anomaly = source_anomalies[key] + + for coord in ["latitude", "longitude"]: + old_c = old_anomaly[coord].to_numpy() + new_c = v_anomaly[coord].to_numpy() + tol = 1e-5 + + if np.abs(old_c - new_c).max() < tol: + v_anomaly = v_anomaly.assign({coord: old_c}) + else: + msg = f"{coord} does not match despite having the same subdivision" + raise ValueError(msg) + source_anomalies[key] = old_count + 1, old_anomaly + v_anomaly + else: + source_anomalies[key] = 1, v_anomaly + if source_anomalies: + anomalies[source] = source_anomalies + + ensemble_anomaly = xr.Dataset() + for i, (source, source_anomalies) in enumerate(anomalies.items()): + sid = f"Source {i+1}/{len(source_paths)}: {source}" + print(f"Downscaling {i+1}/{len(anomalies)}: {source}") + + source_ensemble_anomaly = xr.Dataset() + total_count = 0 + for j, (res, (count, v_anomaly)) in enumerate(source_anomalies.items()): + res_id = f"{sid}, Resolution {j} / {len(source_anomalies)}: {res}" + print(f"Downscaling {res_id}") + + if source_ensemble_anomaly.nbytes: + source_ensemble_anomaly += utils.interpolate_to_target_latlon(v_anomaly, method="linear") + else: + source_ensemble_anomaly = utils.interpolate_to_target_latlon(v_anomaly, method="linear") + total_count += count + source_ensemble_anomaly /= total_count - anomaly = xr.Dataset() - for i, (k, v) in enumerate(anomalies.items()): - print(f"Downscaling {i+1}/{len(anomalies)}: {k}") - if anomaly.nbytes: - anomaly += utils.interpolate_to_target_latlon(v, method="linear") + if ensemble_anomaly.nbytes: + ensemble_anomaly += source_ensemble_anomaly else: - anomaly = utils.interpolate_to_target_latlon(v, method="linear") - anomaly /= len(source_paths) + ensemble_anomaly = source_ensemble_anomaly + + ensemble_anomaly /= len(anomalies) print("Computing scenario data") if anomaly_type == "additive": - scenario_data = historical_reference + anomaly.groupby("date.month") + scenario_data = historical_reference + ensemble_anomaly.groupby("date.month") else: - scenario_data = historical_reference * anomaly.groupby("date.month") + scenario_data = historical_reference * ensemble_anomaly.groupby("date.month") scenario_data = scenario_data.drop_vars("month") print("Saving") cd_data.save_daily_results( diff --git a/src/climate_downscale/generate/scenario_inclusion.py b/src/climate_downscale/generate/scenario_inclusion.py index bf5a0e7..a04a277 100644 --- a/src/climate_downscale/generate/scenario_inclusion.py +++ b/src/climate_downscale/generate/scenario_inclusion.py @@ -55,6 +55,12 @@ def generate_scenario_inclusion_main( ) inclusion_df = pd.concat([year_range, valid_scenarios], axis=1).reset_index() inclusion_df["include"] = inclusion_df.valid_scenarios == 5 # noqa: PLR2004 + inclusion_df = ( + inclusion_df.loc[inclusion_df.include] + .set_index(['source', 'variant', 'variable']).include + .unstack() + .fillna(False) + ) cd_data.save_scenario_metadata(meta_df) cd_data.save_scenario_inclusion_metadata(inclusion_df)