Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Script to generate validation material #22

Merged
merged 3 commits into from
Jun 22, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
173 changes: 142 additions & 31 deletions run_forecast_plots.sh
Original file line number Diff line number Diff line change
Expand Up @@ -3,21 +3,75 @@
source ENVS

if [ $# -lt 2 ] || [ "$1" == "-h" ]; then
echo "Usage $0 [-m <metrics>] [-e] [-l] [-r] <forecast_name> <hemisphere>"
echo "Usage $0 <forecast_name> <hemisphere>"
echo "\nArguments"
echo "<forecast_name> name of forecast"
echo "<hemisphere> hemisphere to use"
echo "\nOptions"
echo "-m <metrics> string of metrics separated by commas, by default \"binacc,sie,mae,rmse,sic\". Options: \"binacc\", \"sie\", \"mae\", \"mse\", \"rmse\", \"sic\""
echo "-r <region> region arguments, by default uses full hemisphere"
echo "-e compare forecast performance with ECMWF"
echo "-l produce leadtime averaged plots"
echo "-v produce video using the individual metric plots by stitching them together with ffmpeg"
echo "-t <threshold> SIC threshold to use (must be between 0 and 1), by default 0.15"
echo "-g <grid_area_size> grid area resolution to use - i.e. the length of the sides in km, by default 25 (i.e. 25km^2)"
echo "-o <output_dir> output directory path to store plots, by default \"plot/<forecast_name>\""
echo "\nList of outputs generated"
echo "* If \"binacc\" is included in the requested metrics, will generate all binary accuracy plots for dates in <forecast_name>_<hemisphere>.csv"
echo "- these will be saved in the format \"<output_dir>/binacc.t_<threshold>.<date>.png\""
echo "If \"-l\" is passed, leadtime averaged plots for binary accuracy will be generated too:"
echo " - averaging over all: \"<output_dir>/binacc.t_<threshold>_leadtime_avg_all.png\""
echo " - averaging by month and for initalisation date: \"<output_dir>/binacc.t_<threshold>_leadtime_avg_init_month.png\""
echo " - averaging by day and for initalisation date: \"<output_dir>/binacc.t_<threshold>_leadtime_avg_init_day.png\""
echo " - averaging by month and for target date: \"<output_dir>/binacc.t_<threshold>_leadtime_avg_target_month.png\""
echo " - averaging by day and for target date: \"<output_dir>/binacc.t_<threshold>_leadtime_avg_target_day.png\""
echo "If \"-v\" is passed, a video will be produced to stitch all these plots together and saved in \"<output_dir>/binacc.t_<threshold>.mp4\""
echo "* If \"sie\" is included in the requested metrics, will generate all SIE error plots for dates in <forecast_name>_<hemisphere>.csv"
echo "(these will be saved in the format \"<output_dir>/sie.t_<threshold>.g_<grid_area_size>.<date>.png\")"
echo "If \"-l\" is passed, leadtime averaged plots for SIE error will be generated too:"
echo " - averaging over all: \"<output_dir>/sie.t_<threshold>.g_<grid_area_size>_leadtime_avg_all.png\""
echo " - averaging by month and for initalisation date: \"<output_dir>/sie.t_<threshold>.g_<grid_area_size>_leadtime_avg_init_month.png\""
echo " - averaging by day and for initalisation date: \"<output_dir>/sie.t_<threshold>.g_<grid_area_size>_leadtime_avg_init_day.png\""
echo " - averaging by month and for target date: \"<output_dir>/sie.t_<threshold>.g_<grid_area_size>_leadtime_avg_target_month.png\""
echo " - averaging by day and for target date: \"<output_dir>/sie.t_<threshold>.g_<grid_area_size>_leadtime_avg_target_day.png\""
echo "If \"-v\" is passed, a video will be produced to stitch all these plots together and saved in \"<output_dir>/sie.t_<threshold>.g_<grid_area_size>.mp4\""
echo "* If \"mae\", \"mse\", or \"rmse\" is included in the requested metrics, will generate all MAE, MSE, or RMSE plots for dates in <forecast_name>_<hemisphere>.csv"
echo "the names for the plots follow a similar convention as above but without the threshold or grid-area-size being saved in the name..."
echo "for instance, for a given <metric>, these will be saved in the format \"<output_dir>/<metric>.<date>.png\""
echo "If \"-l\" is passed, leadtime averaged plots for <metric> will be generated too:"
echo " - averaging over all: \"<output_dir>/<metric>_leadtime_avg_all.png\""
echo " - averaging by month and for initalisation date: \"<output_dir>/<metric>_leadtime_avg_init_month.png\""
echo " - averaging by day and for initalisation date: \"<output_dir>/<metric>_leadtime_avg_init_day.png\""
echo " - averaging by month and for target date: \"<output_dir>/<metric>_leadtime_avg_target_month.png\""
echo " - averaging by day and for target date: \"<output_dir>/<metric>_leadtime_avg_target_day.png\""
echo "Note that if $\"-e\" is passed, all of these will have \"_comp\" after \"avg\""
echo "The plot of the standard deviation of the metric for each forecast will also be generated"
echo "If \"-v\" is passed, a video will be produced to stitch all these plots together and saved in \"<output_dir>/<metric>.mp4\""
echo "* If \"sic\" is included in the requested metrics, will generate all SIC error videos for dates in <forecast_name>_<hemisphere>.csv"
echo "(these will be saved in the format \"<output_dir>/sic.<date>.mp4\")"
exit 1
fi

# default values for metrics to produce and to compare with ECMWF
# default values
METRICS="binacc,sic"
REGION=""
ECMWF="false"
LEADTIME_AVG="false"
ROLLING="false"
VIDEO="false"
THRESHOLD="-t 0.15"
GRID_AREA_SIZE="-ga 25"
REQUESTED_OUTPUT_DIR=""
OPTIND=1
while getopts "m:elr" opt; do
while getopts "m:r:elvt:g:o:" opt; do
case "$opt" in
m) METRICS=${OPTARG} ;;
r) REGION="-r ${OPTARG}" ;;
e) ECMWF="true" ;;
l) LEADTIME_AVG="true" ;;
r) ROLLING="true"
v) VIDEO="true" ;;
t) THRESHOLD="-t ${OPTARG}" ;;
g) GRID_AREA_SIZE="-ga ${OPTARG}" ;;
o) REQUESTED_OUTPUT_DIR=${OPTARG}
esac
done

Expand Down Expand Up @@ -50,7 +104,7 @@ fi

shift $((OPTIND-1))

echo "Leftovers from getopt: $@"
# echo "Leftovers from getopt: $@"

FORECAST="$1"
HEMI="$2"
Expand All @@ -64,14 +118,20 @@ MAE_LOG="${LOG_PREFIX}_mae.log"
MSE_LOG="${LOG_PREFIX}_mse.log"
RMSE_LOG="${LOG_PREFIX}_rmse.log"
SICERR_LOG="${LOG_PREFIX}_sic.log"
OUTPUT_DIR="plot/$FORECAST_NAME"

if [ -d $OUTPUT_DIR ]; then
# remove existing log files if they exist
rm -v -f $BINACC_LOG $SIE_LOG $MAE_LOG $MSE_LOG $RMSE_LOG $SICERR_LOG
if [ "${REQUESTED_OUTPUT_DIR}" == "" ]; then
OUTPUT_DIR="plot/${FORECAST_NAME}"
else
OUTPUT_DIR=${REQUESTED_OUTPUT_DIR}
fi

# if [ -d $OUTPUT_DIR ]; then
# # remove existing log files if they exist
# rm -v -f $BINACC_LOG $SIE_LOG $MAE_LOG $MSE_LOG $RMSE_LOG $SICERR_LOG
# fi
mkdir -p $OUTPUT_DIR

echo "Saving plots in ${OUTPUT_DIR}"
echo "Reading ${FORECAST_NAME}.csv"

# create metric plots for each forecast date
Expand All @@ -80,29 +140,31 @@ cat ${FORECAST_NAME}.csv | while read -r FORECAST_DATE; do
do
OUTPUT="${OUTPUT_DIR}/${element}.${FORECAST_DATE}.png"
if [ "${element}" == "binacc" ]; then
OUTPUT="${OUTPUT_DIR}/${element}.t_${THRESHOLD:3}.${FORECAST_DATE}.png"
echo "Producing binary accuracy plot for $FORECAST_DATE (${OUTPUT})"
icenet_plot_bin_accuracy -b $E_FLAG -v -o $OUTPUT \
icenet_plot_bin_accuracy -b $E_FLAG -v $REGION -o $OUTPUT $THRESHOLD \
$HEMI $FORECAST_FILE $FORECAST_DATE >> $BINACC_LOG 2>&1
elif [ "${element}" == "sie" ]; then
OUTPUT="${OUTPUT_DIR}/${element}.t_${THRESHOLD:3}.ga_${GRID_AREA_SIZE:4}.${FORECAST_DATE}.png"
echo "Producing sea ice extent error plot for $FORECAST_DATE (${OUTPUT})"
icenet_plot_sie_error -b $E_FLAG -v -o $OUTPUT \
icenet_plot_sie_error -b $E_FLAG -v $REGION -o $OUTPUT $THRESHOLD $GRID_AREA_SIZE \
$HEMI $FORECAST_FILE $FORECAST_DATE >> $SIE_LOG 2>&1
elif [ "${element}" == "mae" ]; then
echo "Producing MAE plot for $FORECAST_DATE (${OUTPUT})"
icenet_plot_metrics -b $E_FLAG -v -m "MAE" -o $OUTPUT \
icenet_plot_metrics -b $E_FLAG -v $REGION -m $element -o $OUTPUT \
$HEMI $FORECAST_FILE $FORECAST_DATE >> $MAE_LOG 2>&1
elif [ "${element}" == "mse" ]; then
echo "Producing MSE plot for $FORECAST_DATE (${OUTPUT})"
icenet_plot_metrics -b $E_FLAG -v -m "MSE" -o $OUTPUT \
icenet_plot_metrics -b $E_FLAG -v $REGION -m $element -o $OUTPUT \
$HEMI $FORECAST_FILE $FORECAST_DATE >> $MSE_LOG 2>&1
elif [ "${element}" == "rmse" ]; then
echo "Producing RMSE plot for $FORECAST_DATE (${OUTPUT})"
icenet_plot_metrics -b $E_FLAG -v -m "RMSE" -o $OUTPUT \
icenet_plot_metrics -b $E_FLAG -v $REGION -m $element -o $OUTPUT \
$HEMI $FORECAST_FILE $FORECAST_DATE >> $RMSE_LOG 2>&1
elif [ "${element}" == "sic" ]; then
OUTPUT="${OUTPUT_DIR}/${element}.${FORECAST_DATE}.mp4"
echo "Producing SIC error video for $FORECAST_DATE (${OUTPUT})"
icenet_plot_sic_error -v -o $OUTPUT \
icenet_plot_sic_error -v $REGION -o $OUTPUT \
$HEMI $FORECAST_FILE $FORECAST_DATE >> $SICERR_LOG 2>&1
fi
done
Expand All @@ -115,37 +177,86 @@ if [[ "${LEADTIME_AVG}" == true ]]; then
if [ "${element}" == "sic" ]; then
continue
fi
OUTPUT="${OUTPUT_DIR}/${element}_leadtime_avg.png"
PATH_START="${OUTPUT_DIR}/${element}"
if [ "${element}" == "binacc" ]; then
echo "Producing leadtime averaged binary accuracy plot (${OUTPUT})"
echo "Producing leadtime averaged binary accuracy plots..."
PATH_START="${PATH_START}.t_${THRESHOLD:3}"
LOGFILE="${BINACC_LOG}"
elif [ "${element}" == "sie" ]; then
echo "Producing leadtime averaged sea ice extent error plot (${OUTPUT})"
echo "Producing leadtime averaged sea ice extent error plots..."
PATH_START="${PATH_START}.t_${THRESHOLD:3}.ga_${GRID_AREA_SIZE:4}"
LOGFILE="${SIE_LOG}"
elif [ "${element}" == "mae" ]; then
echo "Producing leadtime averaged MAE plot (${OUTPUT})"
echo "Producing leadtime averaged MAE plots..."
LOGFILE="${MAE_LOG}"
elif [ "${element}" == "mse" ]; then
echo "Producing leadtime averaged MSE plot (${OUTPUT})"
echo "Producing leadtime averaged MSE plots..."
LOGFILE="${MSE_LOG}"
elif [ "${element}" == "rmse" ]; then
echo "Producing leadtime averaged RMSE plot (${OUTPUT})"
echo "Producing leadtime averaged RMSE plots..."
LOGFILE="${RMSE_LOG}"
fi
icenet_plot_leadtime_avg $HEMI $FORECAST_FILE \
-m $element -ao "all" -s -sm 1 $E_FLAG \
-o $OUTPUT >> $LOGFILE
# determining the path to save metrics dataframe and the beginning of the output paths
if [[ "${ECMWF}" == true ]]; then
DATA_PATH="${PATH_START}_leadtime_avg_df_comp.csv"
OUTPUT_PATH_START="${PATH_START}_leadtime_avg_comp"
else
DATA_PATH="${PATH_START}_leadtime_avg_df.csv"
OUTPUT_PATH_START="${PATH_START}_leadtime_avg"
fi
echo "Will produce metrics dataframe in ${DATA_PATH}"
echo "Plots produced:"
# averaging over all
OUTPUT="${OUTPUT_PATH_START}_all.png"
icenet_plot_leadtime_avg $HEMI $FORECAST_FILE $REGION \
-m $element -ao "all" -s -sm 1 $E_FLAG $THRESHOLD $GRID_AREA_SIZE \
-dp $DATA_PATH -o $OUTPUT >> $LOGFILE 2>&1
echo "* ${OUTPUT}"
##### initialisation day
# averaging over monthly
OUTPUT="${OUTPUT_PATH_START}_init_month.png"
icenet_plot_leadtime_avg $HEMI $FORECAST_FILE $REGION \
-m $element -ao "month" -s $E_FLAG $THRESHOLD $GRID_AREA_SIZE \
-dp $DATA_PATH -o $OUTPUT >> $LOGFILE 2>&1
echo "* ${OUTPUT}"
# averaging over daily
OUTPUT="${OUTPUT_PATH_START}_init_day.png"
icenet_plot_leadtime_avg $HEMI $FORECAST_FILE $REGION \
-m $element -ao "day" -s $E_FLAG $THRESHOLD $GRID_AREA_SIZE \
-dp $DATA_PATH -o $OUTPUT >> $LOGFILE 2>&1
echo "* ${OUTPUT}"
##### target day
# averaging over monthly
OUTPUT="${OUTPUT_PATH_START}_target_month.png"
icenet_plot_leadtime_avg $HEMI $FORECAST_FILE $REGION \
-m $element -ao "month" -s -td $E_FLAG $THRESHOLD $GRID_AREA_SIZE \
-dp $DATA_PATH -o $OUTPUT >> $LOGFILE 2>&1
echo "* ${OUTPUT}"
# averaging over daily
OUTPUT="${OUTPUT_PATH_START}_target_day.png"
icenet_plot_leadtime_avg $HEMI $FORECAST_FILE $REGION \
-m $element -ao "day" -s -td $E_FLAG $THRESHOLD $GRID_AREA_SIZE \
-dp $DATA_PATH -o $OUTPUT >> $LOGFILE 2>&1
echo "* ${OUTPUT}"
done
fi

# stitch together metric plots if requested
if [[ "${ROLLING}" == true ]]; then
if [[ "${VIDEO}" == true ]]; then
for element in "${METRICS[@]}"
do
if [ "${element}" == "sic" ]; then
continue
fi
OUTPUT="${OUTPUT_DIR}/${element}.mp4"
PATH_START="${OUTPUT_DIR}/${element}"
# add to PATH_SPART if we're working with binacc or SIE
if [ "${element}" == "binacc" ]; then
PATH_START="${PATH_START}.t_${THRESHOLD:3}"
elif [ "${element}" == "sie" ]; then
PATH_START="${PATH_START}.t_${THRESHOLD:3}.ga_${GRID_AREA_SIZE:4}"
fi
# print out where the plot will be saved
OUTPUT="${PATH_START}.mp4"
if [ "${element}" == "binacc" ]; then
echo "Producing rolling binary accuracy plot (${OUTPUT})"
LOGFILE="${BINACC_LOG}"
Expand All @@ -162,13 +273,13 @@ if [[ "${ROLLING}" == true ]]; then
echo "Producing rolling RMSE plot (${OUTPUT})"
LOGFILE="${RMSE_LOG}"
fi
# determine whether or not to stitch the leadtime averaged plot
# determine whether or not to stitch the leadtime averaged plots
if [[ "${LEADTIME_AVG}" == true ]]; then
INPUTS="${OUTPUT_DIR}/${element}*.png"
INPUTS="${PATH_START}*.png"
else
INPUTS="${OUTPUT_DIR}/${element}.*.png"
INPUTS="${PATH_START}.*.png"
fi
ffmpeg -framerate 10 -y -pattern_type glob -i "${INPUTS}" \
-vcodec libx264 -pix_fmt yuv420p $OUTPUT >> $LOGFILE
-vcodec libx264 -pix_fmt yuv420p $OUTPUT >> $LOGFILE 2>&1
done
fi
56 changes: 33 additions & 23 deletions run_prediction.sh
Original file line number Diff line number Diff line change
Expand Up @@ -7,30 +7,40 @@ set -e -o pipefail
conda activate $ICENET_CONDA

if [ $# -lt 3 ] || [ "$1" == "-h" ]; then
echo "$0 [-m <metrics>] [-e] [-l] [-r] <forecast name> <model> <hemisphere> [date_vars] [train_data_name]"
exit 1
echo "Usage $0 <forecast name> <model> <hemisphere> [date_vars] [train_data_name]"
echo "<forecast_name> name of forecast"
echo "<model> model name"
echo "<hemisphere> hemisphere to use"
echo "[date_vars] variables for defining start and end dates to forecast"
echo "[train_data_name] name of data used to train the model"
echo "Options"
echo "-m <metrics> pass in a string of metrics separated by commas (to pass into ./run_forecast_plots.sh), by default \"\""
echo "-e compare forecast performance with ECMWF"
echo "-l produce leadtime averaged plots"
echo "-v produce video using the individual metric plots by stitching them together with ffmpeg"
exit 1
fi

# obtaining any arguments that should be passed onto run_forecast_plots.sh
METRICS_FLAG=""
E_FLAG=""
V_FLAG=""
L_FLAG=""
R_FLAG=""
OPTIND=1
while getopts "m:erl" opt; do
case "$opt" in
m) METRICS_FLAG="-m ${OPTARG}" ;;
e) E_FLAG="-e" ;;
l) L_FLAG="-l" ;;
r) R_FLAG="-r"
esac
while getopts "m:elv" opt; do
case "$opt" in
m) METRICS_FLAG="-m ${OPTARG}" ;;
e) E_FLAG="-e" ;;
l) L_FLAG="-l" ;;
v) V_FLAG="-v"
esac
done

echo "Passing on the following argument to run_forecast_plots.sh: ${METRICS_FLAG} ${E_FLAG}"
echo "Passing on the following arguments to run_forecast_plots.sh: ${METRICS_FLAG} ${E_FLAG} ${L_FLAG} ${V_FLAG}"

shift $((OPTIND-1))

echo "Leftovers from getopt: $@"
# echo "Leftovers from getopt: $@"

FORECAST="$1"
MODEL="$2"
Expand All @@ -40,13 +50,13 @@ DATA_PROC="${5:-${TRAIN_DATA_NAME}}_${HEMI}"

# This assumes you're not retraining using the same model name, eek
if [ -d results/networks/$MODEL ]; then
SAVEFILE=`ls results/networks/${MODEL}/${MODEL}.*.h5 | head -n 1`
DATASET=`echo $SAVEFILE | perl -lpe's/.+\.network_(.+)\.[0-9]+\.h5/$1/'`
echo "First model file: $SAVEFILE"
echo "Dataset model was trained on: $DATASET"
SAVEFILE=`ls results/networks/${MODEL}/${MODEL}.*.h5 | head -n 1`
DATASET=`echo $SAVEFILE | perl -lpe's/.+\.network_(.+)\.[0-9]+\.h5/$1/'`
echo "First model file: $SAVEFILE"
echo "Dataset model was trained on: $DATASET"
else
echo "Model $MODEL doesn't exist"
exit 1
echo "Model $MODEL doesn't exist"
exit 1
fi

NAME_START="${DATE_VARS^^}_START"
Expand All @@ -56,11 +66,11 @@ PREDICTION_START=${!NAME_START}
PREDICTION_END=${!NAME_END}

if [ -z $PREDICTION_START ] || [ -z $PREDICTION_END ]; then
echo "Prediction date args not set correctly: \"$PREDICTION_START\" to \"$PREDICTION_END\""
exit 1
echo "Prediction date args not set correctly: \"$PREDICTION_START\" to \"$PREDICTION_END\""
exit 1
else
echo "Prediction start arg: $PREDICTION_START"
echo "Prediction end arg: $PREDICTION_END"
echo "Prediction start arg: $PREDICTION_START"
echo "Prediction end arg: $PREDICTION_END"
fi

[ ! -z "$PROC_ARGS_ERA5" ] && \
Expand All @@ -85,4 +95,4 @@ icenet_dataset_create -l $LAG -c ${FORECAST}_${HEMI} $HEMI
./run_predict_ensemble.sh -i $DATASET -f $FILTER_FACTOR -p $PREP_SCRIPT \
$MODEL ${FORECAST}_${HEMI} ${FORECAST}_${HEMI} ${FORECAST}_${HEMI}.csv

./run_forecast_plots.sh $METRICS_FLAG $E_FLAG $L_FLAG $R_FLAG $FORECAST $HEMI
./run_forecast_plots.sh ${METRICS_FLAG} ${E_FLAG} ${V_FLAG} ${L_FLAG} $FORECAST $HEMI
Loading