From 3ef021fbf44d1e4ff7a4f8a58366fd9c9813a1a8 Mon Sep 17 00:00:00 2001 From: Ehren Foss Date: Thu, 14 Nov 2024 14:11:24 -0800 Subject: [PATCH 1/4] running against Google Cloud Storage copy of Copenicus data --- atlite/datasets/era5.py | 93 ++++++++++++++++++++++++++--------------- wind_cf.py | 65 ++++++++++++++++++++++++++++ 2 files changed, 125 insertions(+), 33 deletions(-) create mode 100644 wind_cf.py diff --git a/atlite/datasets/era5.py b/atlite/datasets/era5.py index 04e88015..cfcc8e96 100644 --- a/atlite/datasets/era5.py +++ b/atlite/datasets/era5.py @@ -21,6 +21,8 @@ from dask import compute, delayed from dask.array import arctan2, sqrt from numpy import atleast_1d +from dask.diagnostics import ProgressBar + from atlite.gis import maybe_swap_spatial_dims from atlite.pv.solar_position import SolarPosition @@ -71,7 +73,7 @@ def _add_height(ds): """ g0 = 9.80665 - z = ds["z"] + z = ds["geopotential"] if "time" in z.coords: z = z.isel(time=0, drop=True) ds["height"] = z / g0 @@ -118,19 +120,19 @@ def get_data_wind(retrieval_params): ds = _rename_and_clean_coords(ds) for h in [10, 100]: - ds[f"wnd{h}m"] = sqrt(ds[f"u{h}"] ** 2 + ds[f"v{h}"] ** 2).assign_attrs( - units=ds[f"u{h}"].attrs["units"], long_name=f"{h} metre wind speed" + ds[f"wnd{h}m"] = sqrt(ds[f"{h}m_u_component_of_wind"] ** 2 + ds[f"{h}m_v_component_of_wind"] ** 2).assign_attrs( + units=ds[f"{h}m_u_component_of_wind"].attrs["units"], long_name=f"{h} metre wind speed" ) ds["wnd_shear_exp"] = ( np.log(ds["wnd10m"] / ds["wnd100m"]) / np.log(10 / 100) ).assign_attrs(units="", long_name="wind shear exponent") # span the whole circle: 0 is north, π/2 is east, -π is south, 3π/2 is west - azimuth = arctan2(ds["u100"], ds["v100"]) + azimuth = arctan2(ds["100m_u_component_of_wind"], ds["100m_v_component_of_wind"]) ds["wnd_azimuth"] = azimuth.where(azimuth >= 0, azimuth + 2 * np.pi) - ds = ds.drop_vars(["u100", "v100", "u10", "v10", "wnd10m"]) - ds = ds.rename({"fsr": "roughness"}) + ds = ds.drop_vars(["10m_u_component_of_wind", "10m_v_component_of_wind", "100m_u_component_of_wind", "100m_v_component_of_wind", "wnd10m"]) + ds = ds.rename({"forecast_surface_roughness": "roughness"}) return ds @@ -159,7 +161,10 @@ def get_data_influx(retrieval_params): ds = _rename_and_clean_coords(ds) - ds = ds.rename({"fdir": "influx_direct", "tisr": "influx_toa"}) + ds = ds.rename({"total_sky_direct_solar_radiation_at_surface": "influx_direct" + , "toa_incident_solar_radiation": "influx_toa" + , "surface_solar_radiation_downwards": "ssrd" + , "surface_net_solar_radiation": "ssr"}) ds["albedo"] = ( ((ds["ssrd"] - ds["ssr"]) / ds["ssrd"].where(ds["ssrd"] != 0)) .fillna(0.0) @@ -215,9 +220,9 @@ def get_data_temperature(retrieval_params): ds = _rename_and_clean_coords(ds) ds = ds.rename( { - "t2m": "temperature", - "stl4": "soil temperature", - "d2m": "dewpoint temperature", + "2m_temperature": "temperature", + "soil_temperature_level_4": "soil temperature", + "2m_dewpoint_temperature": "dewpoint temperature", } ) @@ -231,7 +236,7 @@ def get_data_runoff(retrieval_params): ds = retrieve_data(variable=["runoff"], **retrieval_params) ds = _rename_and_clean_coords(ds) - ds = ds.rename({"ro": "runoff"}) + #ds = ds.rename({"ro": "runoff"}) return ds @@ -341,33 +346,55 @@ def retrieve_data(product, chunks=None, tmpdir=None, lock=None, **updates): request = {"product_type": "reanalysis", "format": "netcdf"} request.update(updates) + print(request) + assert {"year", "month", "variable"}.issubset( request ), "Need to specify at least 'variable', 'year' and 'month'" - client = cdsapi.Client( - info_callback=logger.debug, debug=logging.DEBUG >= logging.root.level + data_arrays = {} + bucket = 'gs://gcp-public-data-arco-era5/ar/full_37-1h-0p25deg-chunk-1.zarr-v3/' + + ProgressBar().register() + + # Open the Zarr store as a dataset + ds = xr.open_zarr( + bucket, + chunks={}, + storage_options=dict(token='anon') + ) + + # Select specific variables of interest + variables = atleast_1d(request['variable']) # e.g., ['100m_u_component_of_wind', '100m_v_component_of_wind'] + ds_selected_vars = ds[variables] + + # Define the time range and spatial bounding box + year = request['year'] + month = str(request['month'][0]).zfill(2) # Convert month to zero-padded format + day_start = str(request['day'][0]).zfill(2) + day_end = str(request['day'][-1]).zfill(2) + + # Create start and end time strings + time_start = f"{year}-{month}-{day_start}T00:00" + time_end = f"{year}-{month}-{day_end}T23:00" + + # Define the time range slice + time_range = slice(time_start, time_end) + + #time_range = slice("2020-01-01", "2020-01-31") + bbox = request['area'] + lat_lon_bbox = { + 'latitude': slice(bbox[0], bbox[2]), + 'longitude': slice(bbox[1] + 180, bbox[3] + 180) + } + print(lat_lon_bbox) + + # Apply selections + ds = ds_selected_vars.sel( + time=time_range, + latitude=lat_lon_bbox['latitude'], + longitude=lat_lon_bbox['longitude'] ) - result = client.retrieve(product, request) - - if lock is None: - lock = nullcontext() - - with lock: - fd, target = mkstemp(suffix=".nc", dir=tmpdir) - os.close(fd) - - # Inform user about data being downloaded as "* variable (year-month)" - timestr = f"{request['year']}-{request['month']}" - variables = atleast_1d(request["variable"]) - varstr = "\n\t".join([f"{v} ({timestr})" for v in variables]) - logger.info(f"CDS: Downloading variables\n\t{varstr}\n") - result.download(target) - - ds = xr.open_dataset(target, chunks=chunks or {}) - if tmpdir is None: - logger.debug(f"Adding finalizer for {target}") - weakref.finalize(ds._file_obj._manager, noisy_unlink, target) return ds diff --git a/wind_cf.py b/wind_cf.py new file mode 100644 index 00000000..842c03c6 --- /dev/null +++ b/wind_cf.py @@ -0,0 +1,65 @@ +import atlite +from math import cos, radians +import logging +import xarray as xr +from datetime import datetime + +logging.basicConfig(level=logging.INFO) + +limon_co_lat = 39.26719230055635 +limon_co_lon = -103.69300728924804 + +center_lon = limon_co_lon +center_lat = limon_co_lat + +#center_lat = miso_lat +#center_lon = miso_lon + +# Define a roughly 30x30km box around the MISO coordinates +# 1 degree of latitude is approximately 111 km +# 1 degree of longitude varies, but at this latitude it's about 95 km +lat_offset = 250 / 111 / 2 # Half of 30km in degrees latitude +lon_offset = 250 / (95 * cos(radians(center_lat))) / 2 # Half of 30km in degrees longitude + +min_lat = center_lat - lat_offset +max_lat = center_lat + lat_offset +min_lon = center_lon - lon_offset +max_lon = center_lon + lon_offset + +print(f"Bounding box coordinates:") +print(f"Min Latitude: {min_lat:.6f}") +print(f"Max Latitude: {max_lat:.6f}") +print(f"Min Longitude: {min_lon:.6f}") +print(f"Max Longitude: {max_lon:.6f}") + +cutout = atlite.Cutout( + #path="limon_co_wind.nc", + path="limon_co_2023.nc", + module="era5", + x=slice(min_lon, max_lon), + y=slice(min_lat, max_lat), + time="2023-01", + dt='h' +) +cutout.prepare() + +# Print out the current time +current_time = datetime.now() +print(f"Post prepare time: {current_time}") + +cap_factors = cutout.wind(turbine="NREL_ReferenceTurbine_2020ATB_5.5MW", capacity_factor_timeseries=True) + +print(cap_factors) + +# Convert the DataArray to a DataFrame +df = cap_factors.to_dataframe().reset_index() + +# Preview the DataFrame +print(df.head()) + +# Save the DataFrame to a CSV file +df.to_csv('capacity_factors.csv', index=False) + +# Print out the current time +current_time = datetime.now() +print(f"Post model time: {current_time}") From cbc108c99fb3579f20a69aa9826762a457d02e82 Mon Sep 17 00:00:00 2001 From: Ehren Foss Date: Fri, 15 Nov 2024 09:08:30 -0800 Subject: [PATCH 2/4] VM setup steps --- vm_setup.sh | 67 +++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 67 insertions(+) create mode 100644 vm_setup.sh diff --git a/vm_setup.sh b/vm_setup.sh new file mode 100644 index 00000000..9e4a92a8 --- /dev/null +++ b/vm_setup.sh @@ -0,0 +1,67 @@ + sudo apt update + 2 sudo apt install -y python3-pip git + 3 git clone https://github.com/carbondirect/cd_atlite + 4 cd cd_atlite/ + 5 ls + 6 pip3 install -r requirements.txt + 7 python3 -m venv venv + 8 apt install python3.11-venv + 9 sudo apt install python3.11-venv + 10 apt install python3.11-venv + 11 sudo apt install python3.11-venv + 12 python3 -m venv venv + 13 source venv/bin/activate + 14 pip3 install -r requirements.txt + 15 vi requirements.txt + 16 pip3 install -r requirements.txt + 17 rm requirements.txt + 18 ls + 19 curl -sSL https://install.python-poetry.org | python3 - + 20 poetry install + 21 poetry shell + 22 exit + 23 history + 24 poetry install + 25 cd cd_atlite/ + 26 poetry install + 27 ls + 28 pip install pip-tools + 29 source venv/bin/activate + 30 pip install pip-tools + 31 pip-compile pyproject.toml + 32 pip install -r requirements.txt + 33 python wind_cf.py + 34 vi wind_cf.py + 35 pip install -e . + 36 vi wind_cf.py + 37 python wind_cf.py + 38 git status + 39 PYTHONPATH=$(pwd) python wind_df + 40 PYTHONPATH=$(pwd) python wind_cf.py + 41 pwd + 42 pip install -e . + 43 history + 44 PYTHONPATH=$(pwd) python wind_cf.py + 45 exit + 46 cd cd_atlite/ + 47 ls + 48 source venv/bin/activate + 49 python wind_cf.py + 50 ls + 51 vi atlite/__init__.py + 52 python wind_cf.py + 53 pip install gcsfs + 54 python wind_cf.py + 55 lsblk + 56 sudo mkfs.ext4 -m 0 -F -E lazy_itable_init=0,lazy_journal_init=0,discard /dev/sda + 57 sudo mkdir -p /mnt/disks/era5-disk + 58 sudo mount -o discard,defaults /dev/sda /mnt/disks/era5-disk + 59 sudo nano /etc/fstab + 60 ls -al /mnt/disks/era5-disk/ + 61 ls + 62 export DASK_TEMPORARY_DIRECTORY="/mnt/disks/era5-disk/tmp" + 63 mkdir -p /mnt/disks/era5-disk/tmp + 64 sudo mkdir -p /mnt/disks/era5-disk/tmp + 65 chmod a+w /mnt/disks/era5-disk/tmp + 66 sudo chmod a+w /mnt/disks/era5-disk/tmp + 67 history From a70f544d365c2e9e0f2a62fc155c03842dbd02d7 Mon Sep 17 00:00:00 2001 From: Ben Bogart Date: Fri, 15 Nov 2024 12:31:46 -0800 Subject: [PATCH 3/4] fix: fix chunking for ERA5 data --- atlite/data.py | 1 + atlite/datasets/era5.py | 32 +++++++++++++++++++++++++++----- wind_cf.py | 20 ++++++++++++-------- 3 files changed, 40 insertions(+), 13 deletions(-) diff --git a/atlite/data.py b/atlite/data.py index b027509e..aa84a2ba 100644 --- a/atlite/data.py +++ b/atlite/data.py @@ -225,6 +225,7 @@ def cutout_prepare( ds[v].encoding.update(compression) ds = cutout.data.merge(ds[missing_vars.values]).assign_attrs(**attrs) + ds.unify_chunks() # not strictly necessary, but should speed things up. # write data to tmp file, copy it to original data, this is much safer # than appending variables diff --git a/atlite/datasets/era5.py b/atlite/datasets/era5.py index cfcc8e96..4e3c086e 100644 --- a/atlite/datasets/era5.py +++ b/atlite/datasets/era5.py @@ -77,7 +77,7 @@ def _add_height(ds): if "time" in z.coords: z = z.isel(time=0, drop=True) ds["height"] = z / g0 - ds = ds.drop_vars("z") + ds = ds.drop_vars("geopotential") return ds @@ -343,10 +343,29 @@ def retrieve_data(product, chunks=None, tmpdir=None, lock=None, **updates): If you want to track the state of your request go to https://cds-beta.climate.copernicus.eu/requests?tab=all """ + + # started creating a var map to simplify the changes. + variable_map = { + "geopotential": "z", + "10m_u_component_of_wind": "", + "100m_u_component_of_wind": "", + "10m_v_component_of_wind": "", + "100m_v_component_of_wind": "", + "forecast_surface_roughness": "", + "surface_net_solar_radiation": "", + "surface_solar_radiation_downwards": "", + "toa_incident_solar_radiation": "", + "total_sky_direct_solar_radiation_at_surface": "", + "runoff": "", + "2m_temperature": "", + "soil_temperature_level_4": "", + "2m_dewpoint_temperature": "", + } + request = {"product_type": "reanalysis", "format": "netcdf"} request.update(updates) - print(request) + logger.info(f"request: {request}") assert {"year", "month", "variable"}.issubset( request @@ -360,14 +379,17 @@ def retrieve_data(product, chunks=None, tmpdir=None, lock=None, **updates): # Open the Zarr store as a dataset ds = xr.open_zarr( bucket, - chunks={}, - storage_options=dict(token='anon') + chunks=chunks, # the chunks are not aligned, we deal with this later + consolidated=True, + storage_options=dict(token="anon"), ) # Select specific variables of interest variables = atleast_1d(request['variable']) # e.g., ['100m_u_component_of_wind', '100m_v_component_of_wind'] ds_selected_vars = ds[variables] + ds_selected_vars.unify_chunks() + # Define the time range and spatial bounding box year = request['year'] month = str(request['month'][0]).zfill(2) # Convert month to zero-padded format @@ -387,7 +409,7 @@ def retrieve_data(product, chunks=None, tmpdir=None, lock=None, **updates): 'latitude': slice(bbox[0], bbox[2]), 'longitude': slice(bbox[1] + 180, bbox[3] + 180) } - print(lat_lon_bbox) + logger.info(f"lat_lon_bbox: {lat_lon_bbox}") # Apply selections ds = ds_selected_vars.sel( diff --git a/wind_cf.py b/wind_cf.py index 842c03c6..35320861 100644 --- a/wind_cf.py +++ b/wind_cf.py @@ -12,14 +12,16 @@ center_lon = limon_co_lon center_lat = limon_co_lat -#center_lat = miso_lat -#center_lon = miso_lon +# center_lat = miso_lat +# center_lon = miso_lon # Define a roughly 30x30km box around the MISO coordinates # 1 degree of latitude is approximately 111 km # 1 degree of longitude varies, but at this latitude it's about 95 km lat_offset = 250 / 111 / 2 # Half of 30km in degrees latitude -lon_offset = 250 / (95 * cos(radians(center_lat))) / 2 # Half of 30km in degrees longitude +lon_offset = ( + 250 / (95 * cos(radians(center_lat))) / 2 +) # Half of 30km in degrees longitude min_lat = center_lat - lat_offset max_lat = center_lat + lat_offset @@ -33,21 +35,23 @@ print(f"Max Longitude: {max_lon:.6f}") cutout = atlite.Cutout( - #path="limon_co_wind.nc", + # path="limon_co_wind.nc", path="limon_co_2023.nc", module="era5", x=slice(min_lon, max_lon), y=slice(min_lat, max_lat), time="2023-01", - dt='h' + dt="h", ) -cutout.prepare() +cutout.prepare(show_progress=True) # Print out the current time current_time = datetime.now() print(f"Post prepare time: {current_time}") -cap_factors = cutout.wind(turbine="NREL_ReferenceTurbine_2020ATB_5.5MW", capacity_factor_timeseries=True) +cap_factors = cutout.wind( + turbine="NREL_ReferenceTurbine_2020ATB_5.5MW", capacity_factor_timeseries=True +) print(cap_factors) @@ -58,7 +62,7 @@ print(df.head()) # Save the DataFrame to a CSV file -df.to_csv('capacity_factors.csv', index=False) +df.to_csv("capacity_factors.csv", index=False) # Print out the current time current_time = datetime.now() From 0237ab8a4c2b20a3b4daecc84a945761da2375ea Mon Sep 17 00:00:00 2001 From: Ehren Foss Date: Mon, 18 Nov 2024 14:39:52 -0800 Subject: [PATCH 4/4] more time range info prints --- atlite/__init__.py | 2 +- atlite/datasets/era5.py | 14 +++++++++----- wind_cf.py | 24 ++++++++++++++++-------- 3 files changed, 26 insertions(+), 14 deletions(-) diff --git a/atlite/__init__.py b/atlite/__init__.py index 20099d30..2305fedc 100644 --- a/atlite/__init__.py +++ b/atlite/__init__.py @@ -35,7 +35,7 @@ match = re.match(r"(\d+\.\d+(\.\d+)?)", __version__) assert match, f"Could not determine release_version of pypsa: {__version__}" release_version = match.group(0) -assert not __version__.startswith("0.0"), "Could not determine version of atlite." +#assert not __version__.startswith("0.0"), "Could not determine version of atlite." __all__ = [ Cutout, diff --git a/atlite/datasets/era5.py b/atlite/datasets/era5.py index 4e3c086e..07d24d84 100644 --- a/atlite/datasets/era5.py +++ b/atlite/datasets/era5.py @@ -391,23 +391,27 @@ def retrieve_data(product, chunks=None, tmpdir=None, lock=None, **updates): ds_selected_vars.unify_chunks() # Define the time range and spatial bounding box + # TODO multiple years if needed? year = request['year'] - month = str(request['month'][0]).zfill(2) # Convert month to zero-padded format + month_start = str(request['month'][0]).zfill(2) # Convert month to zero-padded format + month_end = str(request['month'][-1]).zfill(2) day_start = str(request['day'][0]).zfill(2) day_end = str(request['day'][-1]).zfill(2) # Create start and end time strings - time_start = f"{year}-{month}-{day_start}T00:00" - time_end = f"{year}-{month}-{day_end}T23:00" + time_start = f"{year}-{month_start}-{day_start}T00:00" + time_end = f"{year}-{month_end}-{day_end}T23:00" # Define the time range slice time_range = slice(time_start, time_end) + logger.info("Time Range:") + logger.info(time_start) + logger.info(time_end) - #time_range = slice("2020-01-01", "2020-01-31") bbox = request['area'] lat_lon_bbox = { 'latitude': slice(bbox[0], bbox[2]), - 'longitude': slice(bbox[1] + 180, bbox[3] + 180) + 'longitude': slice(bbox[1], bbox[3]) } logger.info(f"lat_lon_bbox: {lat_lon_bbox}") diff --git a/wind_cf.py b/wind_cf.py index 35320861..e6a34cca 100644 --- a/wind_cf.py +++ b/wind_cf.py @@ -9,18 +9,22 @@ limon_co_lat = 39.26719230055635 limon_co_lon = -103.69300728924804 +# much windier spot... +ne_wind_lat = 42.0 +ne_wind_lon = -102.7 + center_lon = limon_co_lon center_lat = limon_co_lat -# center_lat = miso_lat -# center_lon = miso_lon +center_lat = ne_wind_lat +center_lon = ne_wind_lon # Define a roughly 30x30km box around the MISO coordinates # 1 degree of latitude is approximately 111 km # 1 degree of longitude varies, but at this latitude it's about 95 km -lat_offset = 250 / 111 / 2 # Half of 30km in degrees latitude +lat_offset = 100 / 111 / 2 # Half of 30km in degrees latitude lon_offset = ( - 250 / (95 * cos(radians(center_lat))) / 2 + 100 / (95 * cos(radians(center_lat))) / 2 ) # Half of 30km in degrees longitude min_lat = center_lat - lat_offset @@ -36,11 +40,11 @@ cutout = atlite.Cutout( # path="limon_co_wind.nc", - path="limon_co_2023.nc", + path="ne_wind.nc", module="era5", - x=slice(min_lon, max_lon), + x=slice(min_lon + 180, max_lon + 180), # convert to 360 here, avoid differences between fetching & existing .nc paths y=slice(min_lat, max_lat), - time="2023-01", + time="2023", dt="h", ) cutout.prepare(show_progress=True) @@ -61,8 +65,12 @@ # Preview the DataFrame print(df.head()) +# get just a single time series from the grid area +df_filtered = df[(df['lon'] == 76.75) & (df['lat'] == 41.75)] +print(df_filtered.head()) + # Save the DataFrame to a CSV file -df.to_csv("capacity_factors.csv", index=False) +df_filtered.to_csv("capacity_factors.csv", index=False) # Print out the current time current_time = datetime.now()