-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
ERKuipers
committed
Nov 12, 2024
1 parent
afdd131
commit 5799f03
Showing
8 changed files
with
177 additions
and
75 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,60 +1,89 @@ | ||
from aggregation import aggregation | ||
import os | ||
import numpy as np | ||
import pandas as pd | ||
import configuration as cfg | ||
from aggregation import aggregation | ||
from open_extract import unzipGloFAS, openGloFAS | ||
from vectorCheck import checkVectorFormat | ||
from vectorCheck import checkVectorFormat # this is correctly imported | ||
from forecast_dataFetch import compute_dates_range | ||
import numpy as np | ||
import pandas as pd | ||
import batch_configuration as cfg | ||
|
||
# Define constants | ||
START_DATE = '2024-06-26' | ||
END_DATE = '2024-11-01' | ||
leadtime = 168 | ||
IDhead = 'StationName' | ||
|
||
# Check vector format and retrieve point IDs | ||
pointvectorMODEL_gdf = checkVectorFormat(cfg.GloFASstations, shapeType='point', crs=cfg.crs, placement='model') | ||
pointIDs = pointvectorMODEL_gdf[IDhead] | ||
|
||
# Define probability percentiles | ||
probabilities = np.arange(0, 101, 10) # From 0% to 100% in steps of 10 | ||
|
||
# Define column headers for output dataframes | ||
indexheads = [] | ||
for pointID in pointIDs: | ||
for perc in probabilities: | ||
indexheads.append(f'{pointID}_{perc}') # Combining station ID and probability for unique column names | ||
|
||
# Generate date range | ||
dates = compute_dates_range(START_DATE, END_DATE) | ||
|
||
# Initialize DataFrames | ||
# Use np.nan to fill initial values, and set up DataFrames with proper indexing and column names | ||
aggregated_df = pd.DataFrame(np.nan, index=dates, columns=pointIDs) | ||
ensemblemembers_aggregated_df = pd.DataFrame(np.nan, index=dates, columns=indexheads) | ||
|
||
# Process data for each date | ||
for date in dates: | ||
# Extract year, month, and day for file path formatting | ||
year = date.strftime('%Y') | ||
month = date.strftime('%m') | ||
day = date.strftime('%d') | ||
|
||
# Unzip and open raster data for the current date | ||
rasterPath = unzipGloFAS(cfg.DataDir, leadtime, month, day, year) | ||
Q_da = openGloFAS(rasterPath, cfg.lakesPath) | ||
|
||
def aggregate_forecasted( | ||
START_DATE, | ||
END_DATE, | ||
pointvector, | ||
leadtime=168, # 7 days, default | ||
DataDir=os.getcwd(), | ||
IDhead='StationName', | ||
probability=False, | ||
probabilityInterval=10, # in percentile | ||
output_filename='aggregated.csv', | ||
lakesPath=None, | ||
crs='EPSG:4326' | ||
): | ||
|
||
""" | ||
Aggregate forecasted data from GloFAS for the given date range and station IDs. | ||
# Determine ensemble members based on the number of probabilities | ||
totalEnsembleMembers = len(Q_da.number) | ||
step = int(totalEnsembleMembers / 10) # Ensure step is integer | ||
relevantMembers = np.arange(1, totalEnsembleMembers, step) | ||
|
||
# Aggregate data for each ensemble member at each point | ||
for nrEnsemble in relevantMembers: | ||
# Perform the aggregation and assign to the respective date and column in `ensemblemembers_aggregated_df` | ||
agg_results = aggregation(Q_da, pointvectorMODEL_gdf, 'point', nrEnsemble=int(nrEnsemble), timestamp=date) | ||
for idx, (pointID, perc) in enumerate(zip(pointIDs, probabilities)): | ||
ensemblemembers_aggregated_df.loc[date, f'{pointID}_{perc}'] = agg_results.get((pointID, int(perc)), np.nan) | ||
|
||
# Write the final aggregated DataFrame to CSV | ||
ensemblemembers_aggregated_df.to_csv(f'{cfg.DataDir}/aggregated.csv') | ||
Arguments: | ||
- START_DATE (str): The start date for aggregation. | ||
- END_DATE (str): The end date for aggregation. | ||
- DataDir (str): Directory to save the output CSV. | ||
- leadtime (int): Leadtime for the forecast. | ||
- IDhead (str): Column name for the station ID. | ||
- probability (bool): Whether to aggregate based on probabilities. | ||
- probabilityInterval (int): The interval for probabilities (default 10). | ||
- output_filename (str): Name of the output file (default 'aggregated.csv'). | ||
Returns: | ||
- None: Writes the aggregated data to a CSV file. | ||
""" | ||
|
||
# Check vector format and retrieve point IDs | ||
pointvectorMODEL_gdf = checkVectorFormat(pointvector, shapeType='point', crs=crs, placement='real') | ||
pointIDs = pointvectorMODEL_gdf[IDhead] | ||
# Generate date range | ||
dates = compute_dates_range(START_DATE, END_DATE) | ||
|
||
# Initialize DataFrame for aggregation | ||
if probability: | ||
probabilities = np.arange(0, 101, probabilityInterval) # Probability percentiles | ||
indexheads = [f'{pointID}_{perc}' for pointID in pointIDs for perc in probabilities] | ||
aggregated_df = pd.DataFrame(np.nan, index=dates, columns=indexheads) | ||
else: | ||
aggregated_df = pd.DataFrame(np.nan, index=dates, columns=pointIDs) | ||
|
||
# Process data for each date | ||
for date in dates: | ||
# Extract year, month, and day for file path formatting | ||
year = date.strftime('%Y') | ||
month = date.strftime('%m') | ||
day = date.strftime('%d') | ||
|
||
# Unzip and open raster data for the current date | ||
rasterPath = unzipGloFAS(DataDir, leadtime, month, day, year) | ||
Q_da = openGloFAS(rasterPath, lakesPath, crs) | ||
|
||
# Aggregate data for probability case | ||
if probability: | ||
totalEnsembleMembers = len(Q_da.number) | ||
step = int(totalEnsembleMembers / probabilityInterval) # Ensure integer step | ||
relevantMembers = np.arange(1, totalEnsembleMembers, step) | ||
|
||
# Aggregate data for each ensemble member at each point | ||
for nrEnsemble in relevantMembers: | ||
agg_results = aggregation(Q_da, pointvectorMODEL_gdf, 'point', nrEnsemble=int(nrEnsemble), timestamp=date) | ||
for perc in probabilities: | ||
for pointID in pointIDs: | ||
aggregated_df.loc[date, f'{pointID}_{perc}'] = agg_results.get((pointID, int(perc)), np.nan) | ||
|
||
# Aggregate data for non-probability case | ||
else: | ||
aggregation_Q_gdf= aggregation(Q_da, pointvectorMODEL_gdf, 'point', timestamp=date) | ||
aggregated_df.loc[date, :] = aggregation_Q_gdf ['rastervalue'].values | ||
|
||
# Write the final aggregated DataFrame to CSV | ||
aggregated_df.to_csv(f'{DataDir}/{output_filename}') | ||
print(f'Aggregation complete! Data saved to {DataDir}/{output_filename}') | ||
|
||
if __name__=='__main__': | ||
aggregate_forecasted('2024-06-26', '2024-11-01', pointvector=cfg.GloFASstations, DataDir=cfg.DataDir) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,10 @@ | ||
import sys | ||
import os | ||
from pathlib import Path | ||
|
||
cur = Path(os.getcwd()) | ||
parent_dir = cur.parent | ||
working_dir = 'c:\\Users\\els-2\\MaliGloFAS\\river_flood_data_analysis\\GloFAS_prep' | ||
os.chdir(working_dir) | ||
sys.path.append(working_dir) | ||
print (sys.path) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,32 @@ | ||
from pathlib import Path | ||
import numpy as np | ||
import math | ||
import os | ||
|
||
os.chdir (f'C:\\Users\\els-2\\') | ||
cur = Path.cwd() | ||
DataDir = cur / 'MaliGloFAS\\data' | ||
# Mali area coordinates as in notation as used by GloFAS (which is very weird) | ||
# north, west, south, east | ||
MaliArea = [25, -12.25, 10, 4.25] # Mali area [lat_max, lon_min, lat_min, lon_max] (upper left corner , down right corner) | ||
regionPath = DataDir / f"Visualization/ADM1_Affected_SHP.shp" #région corresponds to ADM1, larger districts | ||
communePath = DataDir / f"Visualization/ADM3_Affected_SHP.shp" | ||
cerclePath = DataDir / f"Visualization/mli_admbnda_adm2_1m_gov_20211220.shp" | ||
adminPaths = [regionPath, cerclePath, communePath] | ||
lakesPath = DataDir / f'Visualization/waterbodies/waterbodies_merged.shp' | ||
stationsDir = DataDir / f'stations' | ||
|
||
googlestations = stationsDir / 'coords_google_gauges_Mali.csv' | ||
GloFASstations = stationsDir / 'GloFAS_MaliStations_v4.csv' | ||
impact_csvPath = DataDir / "impact/MergedImpactData.csv" | ||
crs = f'EPSG:4326' | ||
RPsyr = [1.5, 2.0, 5.0, 10.0] # return period threshold in years | ||
leadtimes = 168 # hours | ||
startYear = 2004 | ||
endYear = 2023 # 00:00 1st of january of that year, so up to but not including | ||
triggerProb = 0.6 | ||
actionLifetime = 10 #days | ||
adminLevel = 2 # choose level on which you would like to aggregate : 1,2,3 | ||
years = np.arange(startYear, endYear, 1) | ||
admPath = adminPaths [(adminLevel-1)] # generate the useful administrative unit path | ||
nrCores = 6 #determine number of cpu cores to use |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,10 @@ | ||
|
||
import os | ||
from pathlib import Path | ||
import sys | ||
cur = Path(os.getcwd()) | ||
parent_dir = cur.parent | ||
working_dir = parent_dir #/ 'MaliGloFAS/river_flood_data_analysis' | ||
os.chdir(working_dir) | ||
sys.path.append(working_dir) | ||
print (os.getcwd()) |