Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: Bb/fix chunking #408

Draft
wants to merge 4 commits into
base: master
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion atlite/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@
match = re.match(r"(\d+\.\d+(\.\d+)?)", __version__)
assert match, f"Could not determine release_version of pypsa: {__version__}"
release_version = match.group(0)
assert not __version__.startswith("0.0"), "Could not determine version of atlite."
#assert not __version__.startswith("0.0"), "Could not determine version of atlite."

__all__ = [
Cutout,
Expand Down
1 change: 1 addition & 0 deletions atlite/data.py
Original file line number Diff line number Diff line change
Expand Up @@ -225,6 +225,7 @@ def cutout_prepare(
ds[v].encoding.update(compression)

ds = cutout.data.merge(ds[missing_vars.values]).assign_attrs(**attrs)
ds.unify_chunks() # not strictly necessary, but should speed things up.

# write data to tmp file, copy it to original data, this is much safer
# than appending variables
Expand Down
121 changes: 87 additions & 34 deletions atlite/datasets/era5.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,8 @@
from dask import compute, delayed
from dask.array import arctan2, sqrt
from numpy import atleast_1d
from dask.diagnostics import ProgressBar


from atlite.gis import maybe_swap_spatial_dims
from atlite.pv.solar_position import SolarPosition
Expand Down Expand Up @@ -71,11 +73,11 @@ def _add_height(ds):

"""
g0 = 9.80665
z = ds["z"]
z = ds["geopotential"]
if "time" in z.coords:
z = z.isel(time=0, drop=True)
ds["height"] = z / g0
ds = ds.drop_vars("z")
ds = ds.drop_vars("geopotential")
return ds


Expand Down Expand Up @@ -118,19 +120,19 @@ def get_data_wind(retrieval_params):
ds = _rename_and_clean_coords(ds)

for h in [10, 100]:
ds[f"wnd{h}m"] = sqrt(ds[f"u{h}"] ** 2 + ds[f"v{h}"] ** 2).assign_attrs(
units=ds[f"u{h}"].attrs["units"], long_name=f"{h} metre wind speed"
ds[f"wnd{h}m"] = sqrt(ds[f"{h}m_u_component_of_wind"] ** 2 + ds[f"{h}m_v_component_of_wind"] ** 2).assign_attrs(
units=ds[f"{h}m_u_component_of_wind"].attrs["units"], long_name=f"{h} metre wind speed"
)
ds["wnd_shear_exp"] = (
np.log(ds["wnd10m"] / ds["wnd100m"]) / np.log(10 / 100)
).assign_attrs(units="", long_name="wind shear exponent")

# span the whole circle: 0 is north, π/2 is east, -π is south, 3π/2 is west
azimuth = arctan2(ds["u100"], ds["v100"])
azimuth = arctan2(ds["100m_u_component_of_wind"], ds["100m_v_component_of_wind"])
ds["wnd_azimuth"] = azimuth.where(azimuth >= 0, azimuth + 2 * np.pi)

ds = ds.drop_vars(["u100", "v100", "u10", "v10", "wnd10m"])
ds = ds.rename({"fsr": "roughness"})
ds = ds.drop_vars(["10m_u_component_of_wind", "10m_v_component_of_wind", "100m_u_component_of_wind", "100m_v_component_of_wind", "wnd10m"])
ds = ds.rename({"forecast_surface_roughness": "roughness"})

return ds

Expand Down Expand Up @@ -159,7 +161,10 @@ def get_data_influx(retrieval_params):

ds = _rename_and_clean_coords(ds)

ds = ds.rename({"fdir": "influx_direct", "tisr": "influx_toa"})
ds = ds.rename({"total_sky_direct_solar_radiation_at_surface": "influx_direct"
, "toa_incident_solar_radiation": "influx_toa"
, "surface_solar_radiation_downwards": "ssrd"
, "surface_net_solar_radiation": "ssr"})
ds["albedo"] = (
((ds["ssrd"] - ds["ssr"]) / ds["ssrd"].where(ds["ssrd"] != 0))
.fillna(0.0)
Expand Down Expand Up @@ -215,9 +220,9 @@ def get_data_temperature(retrieval_params):
ds = _rename_and_clean_coords(ds)
ds = ds.rename(
{
"t2m": "temperature",
"stl4": "soil temperature",
"d2m": "dewpoint temperature",
"2m_temperature": "temperature",
"soil_temperature_level_4": "soil temperature",
"2m_dewpoint_temperature": "dewpoint temperature",
}
)

Expand All @@ -231,7 +236,7 @@ def get_data_runoff(retrieval_params):
ds = retrieve_data(variable=["runoff"], **retrieval_params)

ds = _rename_and_clean_coords(ds)
ds = ds.rename({"ro": "runoff"})
#ds = ds.rename({"ro": "runoff"})

return ds

Expand Down Expand Up @@ -338,36 +343,84 @@ def retrieve_data(product, chunks=None, tmpdir=None, lock=None, **updates):
If you want to track the state of your request go to
https://cds-beta.climate.copernicus.eu/requests?tab=all
"""

# started creating a var map to simplify the changes.
variable_map = {
"geopotential": "z",
"10m_u_component_of_wind": "",
"100m_u_component_of_wind": "",
"10m_v_component_of_wind": "",
"100m_v_component_of_wind": "",
"forecast_surface_roughness": "",
"surface_net_solar_radiation": "",
"surface_solar_radiation_downwards": "",
"toa_incident_solar_radiation": "",
"total_sky_direct_solar_radiation_at_surface": "",
"runoff": "",
"2m_temperature": "",
"soil_temperature_level_4": "",
"2m_dewpoint_temperature": "",
}

request = {"product_type": "reanalysis", "format": "netcdf"}
request.update(updates)

logger.info(f"request: {request}")

assert {"year", "month", "variable"}.issubset(
request
), "Need to specify at least 'variable', 'year' and 'month'"

client = cdsapi.Client(
info_callback=logger.debug, debug=logging.DEBUG >= logging.root.level
data_arrays = {}
bucket = 'gs://gcp-public-data-arco-era5/ar/full_37-1h-0p25deg-chunk-1.zarr-v3/'

ProgressBar().register()

# Open the Zarr store as a dataset
ds = xr.open_zarr(
bucket,
chunks=chunks, # the chunks are not aligned, we deal with this later
consolidated=True,
storage_options=dict(token="anon"),
)

# Select specific variables of interest
variables = atleast_1d(request['variable']) # e.g., ['100m_u_component_of_wind', '100m_v_component_of_wind']
ds_selected_vars = ds[variables]

ds_selected_vars.unify_chunks()

# Define the time range and spatial bounding box
# TODO multiple years if needed?
year = request['year']
month_start = str(request['month'][0]).zfill(2) # Convert month to zero-padded format
month_end = str(request['month'][-1]).zfill(2)
day_start = str(request['day'][0]).zfill(2)
day_end = str(request['day'][-1]).zfill(2)

# Create start and end time strings
time_start = f"{year}-{month_start}-{day_start}T00:00"
time_end = f"{year}-{month_end}-{day_end}T23:00"

# Define the time range slice
time_range = slice(time_start, time_end)
logger.info("Time Range:")
logger.info(time_start)
logger.info(time_end)

bbox = request['area']
lat_lon_bbox = {
'latitude': slice(bbox[0], bbox[2]),
'longitude': slice(bbox[1], bbox[3])
}
logger.info(f"lat_lon_bbox: {lat_lon_bbox}")

# Apply selections
ds = ds_selected_vars.sel(
time=time_range,
latitude=lat_lon_bbox['latitude'],
longitude=lat_lon_bbox['longitude']
)
result = client.retrieve(product, request)

if lock is None:
lock = nullcontext()

with lock:
fd, target = mkstemp(suffix=".nc", dir=tmpdir)
os.close(fd)

# Inform user about data being downloaded as "* variable (year-month)"
timestr = f"{request['year']}-{request['month']}"
variables = atleast_1d(request["variable"])
varstr = "\n\t".join([f"{v} ({timestr})" for v in variables])
logger.info(f"CDS: Downloading variables\n\t{varstr}\n")
result.download(target)

ds = xr.open_dataset(target, chunks=chunks or {})
if tmpdir is None:
logger.debug(f"Adding finalizer for {target}")
weakref.finalize(ds._file_obj._manager, noisy_unlink, target)

return ds

Expand Down
67 changes: 67 additions & 0 deletions vm_setup.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
sudo apt update
2 sudo apt install -y python3-pip git
3 git clone https://github.com/carbondirect/cd_atlite
4 cd cd_atlite/
5 ls
6 pip3 install -r requirements.txt
7 python3 -m venv venv
8 apt install python3.11-venv
9 sudo apt install python3.11-venv
10 apt install python3.11-venv
11 sudo apt install python3.11-venv
12 python3 -m venv venv
13 source venv/bin/activate
14 pip3 install -r requirements.txt
15 vi requirements.txt
16 pip3 install -r requirements.txt
17 rm requirements.txt
18 ls
19 curl -sSL https://install.python-poetry.org | python3 -
20 poetry install
21 poetry shell
22 exit
23 history
24 poetry install
25 cd cd_atlite/
26 poetry install
27 ls
28 pip install pip-tools
29 source venv/bin/activate
30 pip install pip-tools
31 pip-compile pyproject.toml
32 pip install -r requirements.txt
33 python wind_cf.py
34 vi wind_cf.py
35 pip install -e .
36 vi wind_cf.py
37 python wind_cf.py
38 git status
39 PYTHONPATH=$(pwd) python wind_df
40 PYTHONPATH=$(pwd) python wind_cf.py
41 pwd
42 pip install -e .
43 history
44 PYTHONPATH=$(pwd) python wind_cf.py
45 exit
46 cd cd_atlite/
47 ls
48 source venv/bin/activate
49 python wind_cf.py
50 ls
51 vi atlite/__init__.py
52 python wind_cf.py
53 pip install gcsfs
54 python wind_cf.py
55 lsblk
56 sudo mkfs.ext4 -m 0 -F -E lazy_itable_init=0,lazy_journal_init=0,discard /dev/sda
57 sudo mkdir -p /mnt/disks/era5-disk
58 sudo mount -o discard,defaults /dev/sda /mnt/disks/era5-disk
59 sudo nano /etc/fstab
60 ls -al /mnt/disks/era5-disk/
61 ls
62 export DASK_TEMPORARY_DIRECTORY="/mnt/disks/era5-disk/tmp"
63 mkdir -p /mnt/disks/era5-disk/tmp
64 sudo mkdir -p /mnt/disks/era5-disk/tmp
65 chmod a+w /mnt/disks/era5-disk/tmp
66 sudo chmod a+w /mnt/disks/era5-disk/tmp
67 history
77 changes: 77 additions & 0 deletions wind_cf.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
import atlite
from math import cos, radians
import logging
import xarray as xr
from datetime import datetime

logging.basicConfig(level=logging.INFO)

limon_co_lat = 39.26719230055635
limon_co_lon = -103.69300728924804

# much windier spot...
ne_wind_lat = 42.0
ne_wind_lon = -102.7

center_lon = limon_co_lon
center_lat = limon_co_lat

center_lat = ne_wind_lat
center_lon = ne_wind_lon

# Define a roughly 30x30km box around the MISO coordinates
# 1 degree of latitude is approximately 111 km
# 1 degree of longitude varies, but at this latitude it's about 95 km
lat_offset = 100 / 111 / 2 # Half of 30km in degrees latitude
lon_offset = (
100 / (95 * cos(radians(center_lat))) / 2
) # Half of 30km in degrees longitude

min_lat = center_lat - lat_offset
max_lat = center_lat + lat_offset
min_lon = center_lon - lon_offset
max_lon = center_lon + lon_offset

print(f"Bounding box coordinates:")
print(f"Min Latitude: {min_lat:.6f}")
print(f"Max Latitude: {max_lat:.6f}")
print(f"Min Longitude: {min_lon:.6f}")
print(f"Max Longitude: {max_lon:.6f}")

cutout = atlite.Cutout(
# path="limon_co_wind.nc",
path="ne_wind.nc",
module="era5",
x=slice(min_lon + 180, max_lon + 180), # convert to 360 here, avoid differences between fetching & existing .nc paths
y=slice(min_lat, max_lat),
time="2023",
dt="h",
)
cutout.prepare(show_progress=True)

# Print out the current time
current_time = datetime.now()
print(f"Post prepare time: {current_time}")

cap_factors = cutout.wind(
turbine="NREL_ReferenceTurbine_2020ATB_5.5MW", capacity_factor_timeseries=True
)

print(cap_factors)

# Convert the DataArray to a DataFrame
df = cap_factors.to_dataframe().reset_index()

# Preview the DataFrame
print(df.head())

# get just a single time series from the grid area
df_filtered = df[(df['lon'] == 76.75) & (df['lat'] == 41.75)]
print(df_filtered.head())

# Save the DataFrame to a CSV file
df_filtered.to_csv("capacity_factors.csv", index=False)

# Print out the current time
current_time = datetime.now()
print(f"Post model time: {current_time}")