rodekruis · turnerm · Aug 22, 2022 · Aug 22, 2022 · Aug 23, 2022 · Aug 23, 2022
diff --git a/IBF-Typhoon-model/.Rhistory b/IBF-Typhoon-model/.Rhistory
diff --git a/IBF-Typhoon-model/data-raw/rainfall_synthetic_1000.nc b/IBF-Typhoon-model/data-raw/rainfall_synthetic_1000.nc
diff --git a/IBF-Typhoon-model/documentation/notebooks/rainfalldownload.ipynb b/IBF-Typhoon-model/documentation/notebooks/rainfalldownload.ipynb
@@ -1229,7 +1229,7 @@
    "hash": "c56dc72bed4e123c21144b1399355bbd07b8d587f6360d6b6ea22ee2ab335a35"
   },
   "kernelspec": {
-   "display_name": "Python 3.8.10 ('geo_env')",
+   "display_name": "Python 3 (ipykernel)",
    "language": "python",
    "name": "python3"
   },

diff --git a/IBF-Typhoon-model/run_model_V2.R b/IBF-Typhoon-model/run_model_V2.R
@@ -97,6 +97,7 @@ wind_grid <- read.csv(windfield_data) %>%
   )
 
 rainfall_ <- Read_rainfall_v2(wshade)
+rainfall_[,2] <- rainfall_[,2] - 1000
 
 
 typhoon_hazard <- wind_grid %>%
@@ -230,19 +231,12 @@ df_impact_forecast_CERF <- get_total_impact_forecast(
 # ------------------------ calculate and plot probability National -----------------------------------
 
 dref_damage_thresholds <- c(100000, 80000, 70000, 50000, 30000)
-dref_probabilities <- c(0.95, 0.80, 0.70, 0.60, 0.50)
+dref_probabilities <-  c(0.95, 0.80, 0.70, 0.60, 0.50)
 
 df_impact_forecast_DREF <- get_total_impact_forecast(df_impact_forecast,
                                                      dref_damage_thresholds,
-                                                     dref_probabilities, "DREF")%>%
-  dplyr::mutate(trigger = ifelse('100k' >= 50,1,
-                                 ifelse('80k' >= 60,1,
-                                        ifelse('70k' >= 50,1, 
-                                               ifelse('50k' >= 80,1,
-                                                      ifelse('30k' >= 95,1, 0))))))
-
-#write trigger to file
-write.csv(df_impact_forecast_DREF,file = paste0(Output_folder, "trigger", "_",  forecast_time, "_",Typhoon_stormname, ".csv"))
+                                                     dref_probabilities, "DREF")
+
 
 # ------------------------ calculate average impact vs probability   -----------------------------------
 

diff --git a/IBF-Typhoon-model/src/typhoonmodel/pipeline.py b/IBF-Typhoon-model/src/typhoonmodel/pipeline.py
@@ -21,11 +21,16 @@
 @click.command()
 @click.option('--path', default='./', help='main directory')
 @click.option('--remote_directory', default=None,
-              help='remote directory for ECMWF forecast data, format: YYYYMMDDhhmmss')
+              help='remote directory for ECMWF forecast data, format: YYYYMMDDhhmmss. In the case of hindcasts,'
+                   'the target timestamp of the forecast run.')
+@click.option('--use-hindcast', is_flag=True, help='Use hindcast instead of latest ECMWF. Need to specify'
+                                                   'local directory and use remote directory for timestamp')
+@click.option('--local-directory', default=None,
+              help='local directory containing with ECMWF hindcasts')
 @click.option('--typhoonname', default=None, help='name for active typhoon')
 @click.option('--no-azure', is_flag=True, help="Don't push to Azure lake")
 @click.option('--debug', is_flag=True, help='setting for DEBUG option')
-def main(path, remote_directory, typhoonname, no_azure, debug):
+def main(path, remote_directory, use_hindcast, local_directory, typhoonname, no_azure, debug):
     initialize.setup_cartopy()
     start_time = datetime.now()
     print('---------------------AUTOMATION SCRIPT STARTED---------------------------------')
@@ -37,7 +42,13 @@ def main(path, remote_directory, typhoonname, no_azure, debug):
         typhoonname = 'MEGI'
         remote_dir = '20220412000000'
         logger.info(f"DEBUGGING piepline for typhoon {typhoonname}")
-    Forecast(path, remote_dir, typhoonname, countryCodeISO3='PHP', admin_level=3, no_azure=no_azure)
+    if use_hindcast:
+        if not remote_directory or not local_directory or not typhoonname:
+            logger.error("If you want to use a hindcast, you need to specify remote directory"
+                         "(for the forecast timestamp), a local directory, and the typhoon name")
+        logger.info(f"Running on hindcast {typhoonname}")
+    Forecast(path, remote_dir, typhoonname, countryCodeISO3='PHP', admin_level=3, no_azure=no_azure,
+             use_hindcast=use_hindcast, local_directory=local_directory)
     print('---------------------AUTOMATION SCRIPT FINISHED---------------------------------')
     print(str(datetime.now()))
 

diff --git a/IBF-Typhoon-model/src/typhoonmodel/utility_fun/Rainfall_data.py b/IBF-Typhoon-model/src/typhoonmodel/utility_fun/Rainfall_data.py
@@ -165,4 +165,28 @@ def zonal_stat_rain(filepath,admin):
     # Obtain list with maximum 6h rainfall
     maximum_6h = [max(x.values()) for x in final]
     return pd.DataFrame(maximum_6h)
-
+
+
+def create_synthetic_rainfall(Input_folder):
+    # TODO: Why is only rainfall 24 used?
+    # TODO: turn this into actual data from the past
+    logger.info("Creating synthetic rainfall dataset")
+    # Make the 24 h netcdf only
+    ds = xr.load_dataset('./data-raw/rainfall_synthetic_1000.nc')
+    rainfall_path = os.path.join(Input_folder, 'rainfall/')
+    if not os.path.exists(rainfall_path):
+        os.makedirs(rainfall_path)
+    # TODO: Fix this by deleting old file once I have internet to google it
+    try:
+        ds.to_netcdf(os.path.join(rainfall_path, "rainfall_24.nc"))
+    except PermissionError:
+        logger.warning("Can't overwrite old file, skipping")
+    # Unclear that this csv is used
+    ADMIN_PATH = 'data-raw/gis_data/phl_admin3_simpl2.geojson'
+    admin = gpd.read_file(ADMIN_PATH)
+    df_rain = pd.DataFrame({
+        "max_06h_rain": 250,
+        "max_24h_rain": 1000,
+        "Mun_Code": admin['adm3_pcode'].values,
+    })
+    df_rain.to_csv(os.path.join(Input_folder, "rainfall/rain_data.csv"), index=False)
diff --git a/IBF-Typhoon-model/src/typhoonmodel/utility_fun/forecast_process.py b/IBF-Typhoon-model/src/typhoonmodel/utility_fun/forecast_process.py
@@ -21,7 +21,7 @@
 from climada.hazard.tc_tracks_forecast import TCForecast
 from typhoonmodel.utility_fun.settings import get_settings
 from typhoonmodel.utility_fun import track_data_clean, Check_for_active_typhoon, Sendemail, \
-    ucl_data, plot_intensity, initialize
+    ucl_data, plot_intensity, initialize, read_in_hindcast
 
 if platform == "linux" or platform == "linux2": #check if running on linux or windows os
     from typhoonmodel.utility_fun import Rainfall_data
@@ -33,7 +33,8 @@
 
 
 class Forecast:
-    def __init__(self,main_path, remote_dir,typhoonname, countryCodeISO3, admin_level, no_azure):
+    def __init__(self,main_path, remote_dir,typhoonname, countryCodeISO3, admin_level, no_azure,
+                 use_hindcast, local_directory):
         self.TyphoonName = typhoonname
         self.admin_level = admin_level
         #self.db = DatabaseManager(leadTimeLabel, countryCodeISO3,admin_level)
@@ -75,29 +76,34 @@ def __init__(self,main_path, remote_dir,typhoonname, countryCodeISO3, admin_leve
         self.Output_folder = Output_folder
 
         #download NOAA rainfall
-        try:
-            #Rainfall_data_window.download_rainfall_nomads(Input_folder,path,Alternative_data_point)
-            Rainfall_data.download_rainfall_nomads(self.Input_folder,self.main_path,self.Alternative_data_point)
-            rainfall_error=False
-            self.rainfall_data=pd.read_csv(os.path.join(self.Input_folder, "rainfall/rain_data.csv"))
-        except:
-            traceback.print_exc()
-            #logger.warning(f'Rainfall download failed, performing download in R script')
-            logger.info(f'Rainfall download failed, performing download in R script')
-            rainfall_error=True
-            self.rainfall_data=[]
-        ###### download UCL data
-
-        try:
-            ucl_data.create_ucl_metadata(self.main_path, 
-            self.UCL_USERNAME,self.UCL_PASSWORD)
-            ucl_data.process_ucl_data(self.main_path,
-            self.Input_folder,self.UCL_USERNAME,self.UCL_PASSWORD)
-        except:
-            logger.info(f'UCL download failed')        
+        if use_hindcast:
+            Rainfall_data.create_synthetic_rainfall(self.Input_folder)
+            rainfall_error = False
+        else:
+            try:
+                #Rainfall_data_window.download_rainfall_nomads(Input_folder,path,Alternative_data_point)
+                Rainfall_data.download_rainfall_nomads(self.Input_folder,self.main_path,self.Alternative_data_point)
+                rainfall_error=False
+                self.rainfall_data=pd.read_csv(os.path.join(self.Input_folder, "rainfall/rain_data.csv"))
+            except:
+                traceback.print_exc()
+                #logger.warning(f'Rainfall download failed, performing download in R script')
+                logger.info(f'Rainfall download failed, performing download in R script')
+                rainfall_error=True
+                self.rainfall_data=[]
+            ###### download UCL data
+
+            try:
+                ucl_data.create_ucl_metadata(self.main_path,
+                self.UCL_USERNAME,self.UCL_PASSWORD)
+                ucl_data.process_ucl_data(self.main_path,
+                self.Input_folder,self.UCL_USERNAME,self.UCL_PASSWORD)
+            except:
+                logger.info(f'UCL download failed')
 
 
         ##Create grid points to calculate Winfield
+        logger.info("Creating windfield")
         cent = Centroids()
         cent.set_raster_from_pnt_bounds((118,6,127,19), res=0.05)
         cent.check()
@@ -116,29 +122,33 @@ def __init__(self,main_path, remote_dir,typhoonname, countryCodeISO3, admin_leve
         self.df_admin=df_admin
         # Sometimes the ECMWF ftp server complains about too many requests
         # This code allows several retries with some sleep time in between
-        n_tries = 0
-        while True:
-            try:
-                logger.info("Downloading ECMWF typhoon tracks")
-                bufr_files = TCForecast.fetch_bufr_ftp(remote_dir=self.remote_dir)
-                fcast = TCForecast()
-                fcast.fetch_ecmwf(files=bufr_files)
-            except ftplib.all_errors as e:
-                n_tries += 1
-                if n_tries >= self.ECMWF_MAX_TRIES:
-                    logger.error(f' Data downloading from ECMWF failed: {e}, '
-                                 f'reached limit of {self.ECMWF_MAX_TRIES} tries, exiting')
-                    sys.exit()
-                logger.error(f' Data downloading from ECMWF failed: {e}, retrying after {self.ECMWF_SLEEP} s')
-                time.sleep(self.ECMWF_SLEEP)
-                continue
-            break
+        if use_hindcast:
+            logger.info("Reading in hindcast data")
+            fcast_data = read_in_hindcast.read_in_hindcast(typhoonname, remote_dir, local_directory)
+            fcast_data = [track_data_clean.track_data_clean(tr) for tr in fcast_data]
+        else:
+            n_tries = 0
+            while True:
+                try:
+                    logger.info("Downloading ECMWF typhoon tracks")
+                    bufr_files = TCForecast.fetch_bufr_ftp(remote_dir=self.remote_dir)
+                    fcast = TCForecast()
+                    fcast.fetch_ecmwf(files=bufr_files)
+                except ftplib.all_errors as e:
+                    n_tries += 1
+                    if n_tries >= self.ECMWF_MAX_TRIES:
+                        logger.error(f' Data downloading from ECMWF failed: {e}, '
+                                     f'reached limit of {self.ECMWF_MAX_TRIES} tries, exiting')
+                        sys.exit()
+                    logger.error(f' Data downloading from ECMWF failed: {e}, retrying after {self.ECMWF_SLEEP} s')
+                    time.sleep(self.ECMWF_SLEEP)
+                    continue
+                break
+            fcast_data = [track_data_clean.track_data_clean(tr) for tr in fcast.data if (tr.time.size>1 and tr.name in Activetyphoon)]
 
         #%% filter data downloaded in the above step for active typhoons  in PAR
         # filter tracks with name of current typhoons and drop tracks with only one timestep
-        fcast_data = [track_data_clean.track_data_clean(tr) for tr in fcast.data if (tr.time.size>1 and tr.name in Activetyphoon)]  
         self.fcast_data=fcast_data
-        fcast.data =fcast_data # [tr for tr in fcast.data if tr.name in Activetyphoon]
         self.data_filenames_list={}
         self.image_filenames_list={}
         self.typhhon_wind_data={}
@@ -160,11 +170,11 @@ def __init__(self,main_path, remote_dir,typhoonname, countryCodeISO3, admin_leve
             #typhoons='SURIGAE'  # to run it manually for any typhoon 
                         # select windspeed for HRS model
 
-            fcast.data=[tr for tr in fcast.data if tr.name==typhoons]
-            tr_HRS=[tr for tr in fcast.data if (tr.is_ensemble=='False')]
+            fcast_data=[tr for tr in fcast_data if tr.name==typhoons]
+            tr_HRS=[tr for tr in fcast_data if (tr.is_ensemble=='False')]
 
             if tr_HRS !=[]:
-                HRS_SPEED=(tr_HRS[0].max_sustained_wind.values/0.84).tolist()  ############# 0.84 is conversion factor for ECMWF 10MIN TO 1MIN AVERAGE
+                # Make HRS track same length as the others
                 dfff=tr_HRS[0].to_dataframe()
                 dfff[['VMAX','LAT','LON']]=dfff[['max_sustained_wind','lat','lon']]
                 dfff['YYYYMMDDHH']=dfff.index.values
@@ -177,18 +187,16 @@ def __init__(self,main_path, remote_dir,typhoonname, countryCodeISO3, admin_leve
                 #line_='Rainfall,'+'%sRainfall/' % Input_folder +','+ typhoons + ',' + date_dir #StormName #
                 fname.write(line_+'\n')        
                 # Adjust track time step
-                data_forced=[tr.where(tr.time <= max(tr_HRS[0].time.values),drop=True) for tr in fcast.data]
-                # data_forced = [track_data_clean.track_data_force_HRS(tr,HRS_SPEED) for tr in data_forced] # forced with HRS windspeed
-
-                #data_forced= [track_data_clean.track_data_clean(tr) for tr in fcast.data] # taking speed of ENS
-                # interpolate to 3h steps from the original 6h
-                #fcast.equal_timestep(3)
+                data_forced = fcast_data
+                # data_forced=[tr.where(tr.time <= max(tr_HRS[0].time.values),drop=True) for tr in fcast_data]
+                # TODO: Use pressure from high res
+                # Complicated because all tracks are diff length, some more or less than HRS
             else:
-                len_ar=np.min([ len(var.lat.values) for var in fcast.data ])
-                lat_ = np.ma.mean( [ var.lat.values[:len_ar] for var in fcast.data ], axis=0 )
-                lon_ = np.ma.mean( [ var.lon.values[:len_ar] for var in fcast.data ], axis=0 )             
-                YYYYMMDDHH =pd.date_range(fcast.data[0].time.values[0], periods=len_ar, freq="H")
-                vmax_ = np.ma.mean( [ var.max_sustained_wind.values[:len_ar] for var in fcast.data ], axis=0 )
+                len_ar=np.min([ len(var.lat.values) for var in fcast_data ])
+                lat_ = np.ma.mean( [ var.lat.values[:len_ar] for var in fcast_data ], axis=0 )
+                lon_ = np.ma.mean( [ var.lon.values[:len_ar] for var in fcast_data ], axis=0 )
+                YYYYMMDDHH =pd.date_range(fcast_data[0].time.values[0], periods=len_ar, freq="H")
+                vmax_ = np.ma.mean( [ var.max_sustained_wind.values[:len_ar] for var in fcast_data ], axis=0 )
                 d = {'YYYYMMDDHH':YYYYMMDDHH,
                      "VMAX":vmax_,
                      "LAT": lat_,
@@ -202,7 +210,7 @@ def __init__(self,main_path, remote_dir,typhoonname, countryCodeISO3, admin_leve
                 line_='ecmwf,'+'%secmwf_hrs_track.csv' % self.Input_folder+ ',' +typhoons+','+ self.date_dir   #StormName #
                 #line_='Rainfall,'+'%sRainfall/' % Input_folder +','+ typhoons + ',' + date_dir #StormName #
                 fname.write(line_+'\n') 
-                data_forced=fcast.data
+                data_forced=fcast_data
             landfall_location_={}
             check_flag=0
             for row,data in hrs_track_data.iterrows():

diff --git a/IBF-Typhoon-model/src/typhoonmodel/utility_fun/read_in_hindcast.py b/IBF-Typhoon-model/src/typhoonmodel/utility_fun/read_in_hindcast.py
@@ -0,0 +1,59 @@
+from datetime import datetime
+from pathlib import Path
+
+import numpy as np
+import xarray as xr
+import pandas as pd
+
+ONE_MIN_TO_TEN_MIN = 0.84
+
+
+def read_in_hindcast(typhoon_name: str, remote_dir: str, local_directory: str):
+    # Read in the hindcast csv
+    filename = Path(local_directory) / f"{typhoon_name.lower()}_all.csv"
+    forecast_time = datetime.strptime(remote_dir, "%Y%m%d%H%M%S")
+
+    df = pd.read_csv(filename)
+    for cname in ["time", "forecast_time"]:
+        df[cname] = pd.to_datetime(df[cname])
+    df = df[df["mtype"].isin(["ensembleforecast", "forecast"])]
+    df = df[df["forecast_time"] == forecast_time]
+
+    # Format into a list of tracks
+    tracks = []
+    for ensemble, group in df.groupby("ensemble"):
+
+        is_ensemble = 'False' if ensemble == 'none' else 'True'
+
+        time_step = (group["time"].values[1] - group["time"].values[0]).astype('timedelta64[h]')
+        time_step = pd.to_timedelta(time_step).total_seconds() / 3600
+
+        coords = dict(
+             time=(["time"], group["time"]),
+        )
+        data_vars=dict(
+            max_sustained_wind=(["time"], group['speed'] / ONE_MIN_TO_TEN_MIN),
+            central_pressure=(["time"], group['pressure']),
+            lat=(["time"], group["lat"]),
+            lon=(["time"], group["lon"]),
+            time_step=(["time"], [time_step] * len(group)),
+            radius_max_wind = (["time"], [np.nan] * len(group)),
+            environmental_pressure = (["time"], [1010.] * len(group)),
+        )
+        attrs=dict(
+            max_sustained_wind_unit="m/s",
+            central_pressure_unit="mb",
+            name=typhoon_name.upper(),
+            data_provider="ECMWF",
+            ensemble_number=ensemble,
+            is_ensemble=is_ensemble,
+            forecast_time=forecast_time,
+            basin="W - North West Pacific",
+            sid=typhoon_name,
+            orig_event_flag=False,
+            id_no=None,
+            category=None
+        )
+        ds = xr.Dataset(data_vars=data_vars, coords=coords, attrs=attrs).set_coords(["lat", "lon"])
+        tracks.append(ds)
+    return tracks
diff --git a/analysis/hindcasts/01_download_hindcasts.py b/analysis/hindcasts/01_download_hindcasts.py
@@ -0,0 +1,49 @@
+import os
+import requests
+from datetime import datetime
+from pathlib import Path
+
+from dateutil import rrule
+
+from constants import save_dir
+
+save_dir = Path(save_dir)
+
+email = input('email: ')
+pswd = input('password: ')
+
+values = {'email' : email, 'passwd' : pswd, 'action' : 'login'}
+login_url = 'https://rda.ucar.edu/cgi-bin/login'
+
+ret = requests.post(login_url, data=values)
+if ret.status_code != 200:
+    print('Bad Authentication')
+    print(ret.text)
+    exit(1)
+
+start_date = datetime(2006, 10, 1, 0, 0, 0)
+dspath = 'https://rda.ucar.edu/data/ds330.3/'
+date_list = rrule.rrule(rrule.HOURLY, dtstart=start_date, until=datetime.utcnow().date(), interval=12)
+verbose = True
+
+
+for date in date_list:
+    ymd = date.strftime("%Y%m%d")
+    ymdhms = date.strftime("%Y%m%d%H%M%S")
+    server = "test" if date < datetime(2008, 8, 1) else "prod"
+    file = f'ecmf/{date.year}/{ymd}/z_tigge_c_ecmf_{ymdhms}_ifs_glob_{server}_all_glo.xml'
+    filename = dspath + file
+    outfile = save_dir / "xml" / os.path.basename(filename)
+    # Don't download if exists already
+    if outfile.exists():
+        if verbose:
+            print(f'{file} already exists')
+        continue
+    req = requests.get(filename, cookies = ret.cookies, allow_redirects=True)
+    if req.status_code != 200:
+        if verbose:
+            print(f'{file} invalid URL')
+        continue
+    if verbose:
+        print(f'{file} downloading')
+    open(outfile, 'wb').write(req.content)