Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Download, process and run model on hindcasts #91

Open
wants to merge 17 commits into
base: master
Choose a base branch
from
Open
813 changes: 437 additions & 376 deletions IBF-Typhoon-model/.Rhistory

Large diffs are not rendered by default.

Binary file not shown.
Original file line number Diff line number Diff line change
Expand Up @@ -1229,7 +1229,7 @@
"hash": "c56dc72bed4e123c21144b1399355bbd07b8d587f6360d6b6ea22ee2ab335a35"
},
"kernelspec": {
"display_name": "Python 3.8.10 ('geo_env')",
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
Expand Down
14 changes: 4 additions & 10 deletions IBF-Typhoon-model/run_model_V2.R
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,7 @@ wind_grid <- read.csv(windfield_data) %>%
)

rainfall_ <- Read_rainfall_v2(wshade)
rainfall_[,2] <- rainfall_[,2] - 1000


typhoon_hazard <- wind_grid %>%
Expand Down Expand Up @@ -230,19 +231,12 @@ df_impact_forecast_CERF <- get_total_impact_forecast(
# ------------------------ calculate and plot probability National -----------------------------------

dref_damage_thresholds <- c(100000, 80000, 70000, 50000, 30000)
dref_probabilities <- c(0.95, 0.80, 0.70, 0.60, 0.50)
dref_probabilities <- c(0.95, 0.80, 0.70, 0.60, 0.50)

df_impact_forecast_DREF <- get_total_impact_forecast(df_impact_forecast,
dref_damage_thresholds,
dref_probabilities, "DREF")%>%
dplyr::mutate(trigger = ifelse('100k' >= 50,1,
ifelse('80k' >= 60,1,
ifelse('70k' >= 50,1,
ifelse('50k' >= 80,1,
ifelse('30k' >= 95,1, 0))))))

#write trigger to file
write.csv(df_impact_forecast_DREF,file = paste0(Output_folder, "trigger", "_", forecast_time, "_",Typhoon_stormname, ".csv"))
dref_probabilities, "DREF")


# ------------------------ calculate average impact vs probability -----------------------------------

Expand Down
17 changes: 14 additions & 3 deletions IBF-Typhoon-model/src/typhoonmodel/pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,11 +21,16 @@
@click.command()
@click.option('--path', default='./', help='main directory')
@click.option('--remote_directory', default=None,
help='remote directory for ECMWF forecast data, format: YYYYMMDDhhmmss')
help='remote directory for ECMWF forecast data, format: YYYYMMDDhhmmss. In the case of hindcasts,'
'the target timestamp of the forecast run.')
@click.option('--use-hindcast', is_flag=True, help='Use hindcast instead of latest ECMWF. Need to specify'
'local directory and use remote directory for timestamp')
@click.option('--local-directory', default=None,
help='local directory containing with ECMWF hindcasts')
@click.option('--typhoonname', default=None, help='name for active typhoon')
@click.option('--no-azure', is_flag=True, help="Don't push to Azure lake")
@click.option('--debug', is_flag=True, help='setting for DEBUG option')
def main(path, remote_directory, typhoonname, no_azure, debug):
def main(path, remote_directory, use_hindcast, local_directory, typhoonname, no_azure, debug):
initialize.setup_cartopy()
start_time = datetime.now()
print('---------------------AUTOMATION SCRIPT STARTED---------------------------------')
Expand All @@ -37,7 +42,13 @@ def main(path, remote_directory, typhoonname, no_azure, debug):
typhoonname = 'MEGI'
remote_dir = '20220412000000'
logger.info(f"DEBUGGING piepline for typhoon {typhoonname}")
Forecast(path, remote_dir, typhoonname, countryCodeISO3='PHP', admin_level=3, no_azure=no_azure)
if use_hindcast:
if not remote_directory or not local_directory or not typhoonname:
logger.error("If you want to use a hindcast, you need to specify remote directory"
"(for the forecast timestamp), a local directory, and the typhoon name")
logger.info(f"Running on hindcast {typhoonname}")
Forecast(path, remote_dir, typhoonname, countryCodeISO3='PHP', admin_level=3, no_azure=no_azure,
use_hindcast=use_hindcast, local_directory=local_directory)
print('---------------------AUTOMATION SCRIPT FINISHED---------------------------------')
print(str(datetime.now()))

Expand Down
26 changes: 25 additions & 1 deletion IBF-Typhoon-model/src/typhoonmodel/utility_fun/Rainfall_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -165,4 +165,28 @@ def zonal_stat_rain(filepath,admin):
# Obtain list with maximum 6h rainfall
maximum_6h = [max(x.values()) for x in final]
return pd.DataFrame(maximum_6h)



def create_synthetic_rainfall(Input_folder):
# TODO: Why is only rainfall 24 used?
# TODO: turn this into actual data from the past
logger.info("Creating synthetic rainfall dataset")
# Make the 24 h netcdf only
ds = xr.load_dataset('./data-raw/rainfall_synthetic_1000.nc')
rainfall_path = os.path.join(Input_folder, 'rainfall/')
if not os.path.exists(rainfall_path):
os.makedirs(rainfall_path)
# TODO: Fix this by deleting old file once I have internet to google it
try:
ds.to_netcdf(os.path.join(rainfall_path, "rainfall_24.nc"))
except PermissionError:
logger.warning("Can't overwrite old file, skipping")
# Unclear that this csv is used
ADMIN_PATH = 'data-raw/gis_data/phl_admin3_simpl2.geojson'
admin = gpd.read_file(ADMIN_PATH)
df_rain = pd.DataFrame({
"max_06h_rain": 250,
"max_24h_rain": 1000,
"Mun_Code": admin['adm3_pcode'].values,
})
df_rain.to_csv(os.path.join(Input_folder, "rainfall/rain_data.csv"), index=False)
120 changes: 64 additions & 56 deletions IBF-Typhoon-model/src/typhoonmodel/utility_fun/forecast_process.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
from climada.hazard.tc_tracks_forecast import TCForecast
from typhoonmodel.utility_fun.settings import get_settings
from typhoonmodel.utility_fun import track_data_clean, Check_for_active_typhoon, Sendemail, \
ucl_data, plot_intensity, initialize
ucl_data, plot_intensity, initialize, read_in_hindcast

if platform == "linux" or platform == "linux2": #check if running on linux or windows os
from typhoonmodel.utility_fun import Rainfall_data
Expand All @@ -33,7 +33,8 @@


class Forecast:
def __init__(self,main_path, remote_dir,typhoonname, countryCodeISO3, admin_level, no_azure):
def __init__(self,main_path, remote_dir,typhoonname, countryCodeISO3, admin_level, no_azure,
use_hindcast, local_directory):
self.TyphoonName = typhoonname
self.admin_level = admin_level
#self.db = DatabaseManager(leadTimeLabel, countryCodeISO3,admin_level)
Expand Down Expand Up @@ -75,29 +76,34 @@ def __init__(self,main_path, remote_dir,typhoonname, countryCodeISO3, admin_leve
self.Output_folder = Output_folder

#download NOAA rainfall
try:
#Rainfall_data_window.download_rainfall_nomads(Input_folder,path,Alternative_data_point)
Rainfall_data.download_rainfall_nomads(self.Input_folder,self.main_path,self.Alternative_data_point)
rainfall_error=False
self.rainfall_data=pd.read_csv(os.path.join(self.Input_folder, "rainfall/rain_data.csv"))
except:
traceback.print_exc()
#logger.warning(f'Rainfall download failed, performing download in R script')
logger.info(f'Rainfall download failed, performing download in R script')
rainfall_error=True
self.rainfall_data=[]
###### download UCL data

try:
ucl_data.create_ucl_metadata(self.main_path,
self.UCL_USERNAME,self.UCL_PASSWORD)
ucl_data.process_ucl_data(self.main_path,
self.Input_folder,self.UCL_USERNAME,self.UCL_PASSWORD)
except:
logger.info(f'UCL download failed')
if use_hindcast:
Rainfall_data.create_synthetic_rainfall(self.Input_folder)
rainfall_error = False
else:
try:
#Rainfall_data_window.download_rainfall_nomads(Input_folder,path,Alternative_data_point)
Rainfall_data.download_rainfall_nomads(self.Input_folder,self.main_path,self.Alternative_data_point)
rainfall_error=False
self.rainfall_data=pd.read_csv(os.path.join(self.Input_folder, "rainfall/rain_data.csv"))
except:
traceback.print_exc()
#logger.warning(f'Rainfall download failed, performing download in R script')
logger.info(f'Rainfall download failed, performing download in R script')
rainfall_error=True
self.rainfall_data=[]
###### download UCL data

try:
ucl_data.create_ucl_metadata(self.main_path,
self.UCL_USERNAME,self.UCL_PASSWORD)
ucl_data.process_ucl_data(self.main_path,
self.Input_folder,self.UCL_USERNAME,self.UCL_PASSWORD)
except:
logger.info(f'UCL download failed')


##Create grid points to calculate Winfield
logger.info("Creating windfield")
cent = Centroids()
cent.set_raster_from_pnt_bounds((118,6,127,19), res=0.05)
cent.check()
Expand All @@ -116,29 +122,33 @@ def __init__(self,main_path, remote_dir,typhoonname, countryCodeISO3, admin_leve
self.df_admin=df_admin
# Sometimes the ECMWF ftp server complains about too many requests
# This code allows several retries with some sleep time in between
n_tries = 0
while True:
try:
logger.info("Downloading ECMWF typhoon tracks")
bufr_files = TCForecast.fetch_bufr_ftp(remote_dir=self.remote_dir)
fcast = TCForecast()
fcast.fetch_ecmwf(files=bufr_files)
except ftplib.all_errors as e:
n_tries += 1
if n_tries >= self.ECMWF_MAX_TRIES:
logger.error(f' Data downloading from ECMWF failed: {e}, '
f'reached limit of {self.ECMWF_MAX_TRIES} tries, exiting')
sys.exit()
logger.error(f' Data downloading from ECMWF failed: {e}, retrying after {self.ECMWF_SLEEP} s')
time.sleep(self.ECMWF_SLEEP)
continue
break
if use_hindcast:
logger.info("Reading in hindcast data")
fcast_data = read_in_hindcast.read_in_hindcast(typhoonname, remote_dir, local_directory)
fcast_data = [track_data_clean.track_data_clean(tr) for tr in fcast_data]
else:
n_tries = 0
while True:
try:
logger.info("Downloading ECMWF typhoon tracks")
bufr_files = TCForecast.fetch_bufr_ftp(remote_dir=self.remote_dir)
fcast = TCForecast()
fcast.fetch_ecmwf(files=bufr_files)
except ftplib.all_errors as e:
n_tries += 1
if n_tries >= self.ECMWF_MAX_TRIES:
logger.error(f' Data downloading from ECMWF failed: {e}, '
f'reached limit of {self.ECMWF_MAX_TRIES} tries, exiting')
sys.exit()
logger.error(f' Data downloading from ECMWF failed: {e}, retrying after {self.ECMWF_SLEEP} s')
time.sleep(self.ECMWF_SLEEP)
continue
break
fcast_data = [track_data_clean.track_data_clean(tr) for tr in fcast.data if (tr.time.size>1 and tr.name in Activetyphoon)]

#%% filter data downloaded in the above step for active typhoons in PAR
# filter tracks with name of current typhoons and drop tracks with only one timestep
fcast_data = [track_data_clean.track_data_clean(tr) for tr in fcast.data if (tr.time.size>1 and tr.name in Activetyphoon)]
self.fcast_data=fcast_data
fcast.data =fcast_data # [tr for tr in fcast.data if tr.name in Activetyphoon]
self.data_filenames_list={}
self.image_filenames_list={}
self.typhhon_wind_data={}
Expand All @@ -160,11 +170,11 @@ def __init__(self,main_path, remote_dir,typhoonname, countryCodeISO3, admin_leve
#typhoons='SURIGAE' # to run it manually for any typhoon
# select windspeed for HRS model

fcast.data=[tr for tr in fcast.data if tr.name==typhoons]
tr_HRS=[tr for tr in fcast.data if (tr.is_ensemble=='False')]
fcast_data=[tr for tr in fcast_data if tr.name==typhoons]
tr_HRS=[tr for tr in fcast_data if (tr.is_ensemble=='False')]

if tr_HRS !=[]:
HRS_SPEED=(tr_HRS[0].max_sustained_wind.values/0.84).tolist() ############# 0.84 is conversion factor for ECMWF 10MIN TO 1MIN AVERAGE
# Make HRS track same length as the others
dfff=tr_HRS[0].to_dataframe()
dfff[['VMAX','LAT','LON']]=dfff[['max_sustained_wind','lat','lon']]
dfff['YYYYMMDDHH']=dfff.index.values
Expand All @@ -177,18 +187,16 @@ def __init__(self,main_path, remote_dir,typhoonname, countryCodeISO3, admin_leve
#line_='Rainfall,'+'%sRainfall/' % Input_folder +','+ typhoons + ',' + date_dir #StormName #
fname.write(line_+'\n')
# Adjust track time step
data_forced=[tr.where(tr.time <= max(tr_HRS[0].time.values),drop=True) for tr in fcast.data]
# data_forced = [track_data_clean.track_data_force_HRS(tr,HRS_SPEED) for tr in data_forced] # forced with HRS windspeed

#data_forced= [track_data_clean.track_data_clean(tr) for tr in fcast.data] # taking speed of ENS
# interpolate to 3h steps from the original 6h
#fcast.equal_timestep(3)
data_forced = fcast_data
# data_forced=[tr.where(tr.time <= max(tr_HRS[0].time.values),drop=True) for tr in fcast_data]
# TODO: Use pressure from high res
# Complicated because all tracks are diff length, some more or less than HRS
else:
len_ar=np.min([ len(var.lat.values) for var in fcast.data ])
lat_ = np.ma.mean( [ var.lat.values[:len_ar] for var in fcast.data ], axis=0 )
lon_ = np.ma.mean( [ var.lon.values[:len_ar] for var in fcast.data ], axis=0 )
YYYYMMDDHH =pd.date_range(fcast.data[0].time.values[0], periods=len_ar, freq="H")
vmax_ = np.ma.mean( [ var.max_sustained_wind.values[:len_ar] for var in fcast.data ], axis=0 )
len_ar=np.min([ len(var.lat.values) for var in fcast_data ])
lat_ = np.ma.mean( [ var.lat.values[:len_ar] for var in fcast_data ], axis=0 )
lon_ = np.ma.mean( [ var.lon.values[:len_ar] for var in fcast_data ], axis=0 )
YYYYMMDDHH =pd.date_range(fcast_data[0].time.values[0], periods=len_ar, freq="H")
vmax_ = np.ma.mean( [ var.max_sustained_wind.values[:len_ar] for var in fcast_data ], axis=0 )
d = {'YYYYMMDDHH':YYYYMMDDHH,
"VMAX":vmax_,
"LAT": lat_,
Expand All @@ -202,7 +210,7 @@ def __init__(self,main_path, remote_dir,typhoonname, countryCodeISO3, admin_leve
line_='ecmwf,'+'%secmwf_hrs_track.csv' % self.Input_folder+ ',' +typhoons+','+ self.date_dir #StormName #
#line_='Rainfall,'+'%sRainfall/' % Input_folder +','+ typhoons + ',' + date_dir #StormName #
fname.write(line_+'\n')
data_forced=fcast.data
data_forced=fcast_data
landfall_location_={}
check_flag=0
for row,data in hrs_track_data.iterrows():
Expand Down
59 changes: 59 additions & 0 deletions IBF-Typhoon-model/src/typhoonmodel/utility_fun/read_in_hindcast.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
from datetime import datetime
from pathlib import Path

import numpy as np
import xarray as xr
import pandas as pd

ONE_MIN_TO_TEN_MIN = 0.84


def read_in_hindcast(typhoon_name: str, remote_dir: str, local_directory: str):
# Read in the hindcast csv
filename = Path(local_directory) / f"{typhoon_name.lower()}_all.csv"
forecast_time = datetime.strptime(remote_dir, "%Y%m%d%H%M%S")

df = pd.read_csv(filename)
for cname in ["time", "forecast_time"]:
df[cname] = pd.to_datetime(df[cname])
df = df[df["mtype"].isin(["ensembleforecast", "forecast"])]
df = df[df["forecast_time"] == forecast_time]

# Format into a list of tracks
tracks = []
for ensemble, group in df.groupby("ensemble"):

is_ensemble = 'False' if ensemble == 'none' else 'True'

time_step = (group["time"].values[1] - group["time"].values[0]).astype('timedelta64[h]')
time_step = pd.to_timedelta(time_step).total_seconds() / 3600

coords = dict(
time=(["time"], group["time"]),
)
data_vars=dict(
max_sustained_wind=(["time"], group['speed'] / ONE_MIN_TO_TEN_MIN),
central_pressure=(["time"], group['pressure']),
lat=(["time"], group["lat"]),
lon=(["time"], group["lon"]),
time_step=(["time"], [time_step] * len(group)),
radius_max_wind = (["time"], [np.nan] * len(group)),
environmental_pressure = (["time"], [1010.] * len(group)),
)
attrs=dict(
max_sustained_wind_unit="m/s",
central_pressure_unit="mb",
name=typhoon_name.upper(),
data_provider="ECMWF",
ensemble_number=ensemble,
is_ensemble=is_ensemble,
forecast_time=forecast_time,
basin="W - North West Pacific",
sid=typhoon_name,
orig_event_flag=False,
id_no=None,
category=None
)
ds = xr.Dataset(data_vars=data_vars, coords=coords, attrs=attrs).set_coords(["lat", "lon"])
tracks.append(ds)
return tracks
49 changes: 49 additions & 0 deletions analysis/hindcasts/01_download_hindcasts.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
import os
import requests
from datetime import datetime
from pathlib import Path

from dateutil import rrule

from constants import save_dir

save_dir = Path(save_dir)

email = input('email: ')
pswd = input('password: ')

values = {'email' : email, 'passwd' : pswd, 'action' : 'login'}
login_url = 'https://rda.ucar.edu/cgi-bin/login'

ret = requests.post(login_url, data=values)
if ret.status_code != 200:
print('Bad Authentication')
print(ret.text)
exit(1)

start_date = datetime(2006, 10, 1, 0, 0, 0)
dspath = 'https://rda.ucar.edu/data/ds330.3/'
date_list = rrule.rrule(rrule.HOURLY, dtstart=start_date, until=datetime.utcnow().date(), interval=12)
verbose = True


for date in date_list:
ymd = date.strftime("%Y%m%d")
ymdhms = date.strftime("%Y%m%d%H%M%S")
server = "test" if date < datetime(2008, 8, 1) else "prod"
file = f'ecmf/{date.year}/{ymd}/z_tigge_c_ecmf_{ymdhms}_ifs_glob_{server}_all_glo.xml'
filename = dspath + file
outfile = save_dir / "xml" / os.path.basename(filename)
# Don't download if exists already
if outfile.exists():
if verbose:
print(f'{file} already exists')
continue
req = requests.get(filename, cookies = ret.cookies, allow_redirects=True)
if req.status_code != 200:
if verbose:
print(f'{file} invalid URL')
continue
if verbose:
print(f'{file} downloading')
open(outfile, 'wb').write(req.content)
Loading