From 55329d7c002c7d084fdf5a0e2044b1793f08ea3e Mon Sep 17 00:00:00 2001 From: Konrad Mayer Date: Tue, 12 Sep 2023 11:37:15 +0000 Subject: [PATCH] DEV: perform valdation with DeepR code Ref: https://github.com/ECMWFCode4Earth/tesserugged/issues/11#issuecomment-1713829586 --- dev/verification/DeepR/README.md | 17 +++++++ .../DeepR/configuration_validation_netcdf.yml | 49 +++++++++++++++++++ dev/verification/DeepR/wrangle.sh | 42 ++++++++++++++++ 3 files changed, 108 insertions(+) create mode 100644 dev/verification/DeepR/README.md create mode 100755 dev/verification/DeepR/configuration_validation_netcdf.yml create mode 100755 dev/verification/DeepR/wrangle.sh diff --git a/dev/verification/DeepR/README.md b/dev/verification/DeepR/README.md new file mode 100644 index 0000000..6a4f5e8 --- /dev/null +++ b/dev/verification/DeepR/README.md @@ -0,0 +1,17 @@ +for comparability we use the same validation code as the `DeepR` project. + +To run the validation, clone https://github.com/ECMWFCode4Earth/DeepR, +create a conda environment using the environment.yml contained in the repo, +or a reduced version only holding the packages needed for the validation scripts. + +The `tesserugged` output needs to be wrangled to be used as an input to the +`DeepR` code using the script `wrangle.sh` within this folder. + +Place the config file `configuration_validation_netcdf.yml` into the `resources` +directory in the `DeepR` project directory. + +Move to `./deepr/validation/netcdf` and start the validation with +`python validation.py`. + +Be aware that you might need to change paths within `wrangle.sh` and +`configuration_validation_netcdf.yml`, depending on your setup. \ No newline at end of file diff --git a/dev/verification/DeepR/configuration_validation_netcdf.yml b/dev/verification/DeepR/configuration_validation_netcdf.yml new file mode 100755 index 0000000..a7ad30c --- /dev/null +++ b/dev/verification/DeepR/configuration_validation_netcdf.yml @@ -0,0 +1,49 @@ +# Define the anchor for the 'locations' list +locations_anchor: &locations + [ + { "name": "Ibiza-Baleares", "lat": 38.9067, "lon": 1.4206 }, + { "name": "Mallorca-Baleares", "lat": 39.6953, "lon": 3.0176 }, + { "name": "Pyrenees-Spain", "lat": 42.5751, "lon": 1.6536 }, + { "name": "Madrid-Spain", "lat": 40.4168, "lon": -3.7038 }, + { "name": "Barcelona-Spain", "lat": 41.3851, "lon": 2.1734 }, + { "name": "Picos_de_Europa-Spain", "lat": 43.1963, "lon": -4.7461 }, + { "name": "Alicante-Spain", "lat": 38.3452, "lon": -0.4810 }, + { "name": "Valencia-Spain", "lat": 39.4699, "lon": -0.3763 }, + { "name": "Malaga-Spain", "lat": 36.7213, "lon": -4.4213 }, + { "name": "Almeria-Spain", "lat": 36.8381, "lon": -2.4597 }, + { "name": "Alboran_Sea-Spain", "lat": 35.9393, "lon": -3.2231 }, + { "name": "Balearic_Sea-Spain", "lat": 39.8223, "lon": 2.6480 } + ] + +validation: + model_name: "samos" + model_predictions_location: /scratch/klifol/kmayer/tmp/code4earth/datasets/model/ #/ssea/SSEA/C4E/DATA/TESTING/SAMOS/postprocessed + baseline_name: "bicubic" + baseline_predictions_location: /scratch/klifol/kmayer/tmp/code4earth/datasets/baseline/ #/ssea/SSEA/C4E/DATA/TESTING/PREPROCESSED/ERA5_regridded + observations_name: "cerra" + observations_location: /scratch/klifol/kmayer/tmp/code4earth/datasets/cerra/ #/ssea/SSEA/C4E/DATA/TESTING/PREPROCESSED/CERRA + visualization_types: + metrics_global_map: + show_baseline: True + color_palette: None + sample_observation_vs_prediction: + number_of_samples: 10 + time_series_for_a_single_site: + locations: *locations + temporal_subset: None + aggregate_by: ["1D", "7D", "15D", "1M"] + color_palette: None + error_time_series_for_a_single_site: + locations: *locations + temporal_subset: None + aggregate_by: [ "1D", "7D", "15D", "1M" ] + color_palette: None + error_distribution_for_a_single_site: + locations: *locations + temporal_subset: None + color_palette: None + boxplot_for_a_single_site: + locations: *locations + group_by: ["hour", "month", "season"] + color_palette: None + validation_dir: /scratch/klifol/kmayer/tmp/code4earth/validation/ \ No newline at end of file diff --git a/dev/verification/DeepR/wrangle.sh b/dev/verification/DeepR/wrangle.sh new file mode 100755 index 0000000..cc4f0cb --- /dev/null +++ b/dev/verification/DeepR/wrangle.sh @@ -0,0 +1,42 @@ +#!/usr/bin/env bash + +################################################################## +#Description : wrangle tesserugged output to be used by the +# DeepR valdation procedure +#Creation Date : 2023-09-12 +#Author : Konrad Mayer +################################################################## + +validation_dir='/scratch/klifol/kmayer/tmp/code4earth' +projectroot=$(git rev-parse --show-toplevel) + +# create folders +mkdir -R $validation_dir/datasets/{baseline,model,cerra} +mkdir $validation_dir/validation + +# merge output for individual lead times to common dataset +cdo mergetime $projectroot/dat/TESTING/SAMOS/postprocessed/*.nc $validation_dir/datasets/model/samos.nc +cdo mergetime $projectroot/dat/TESTING/PREPROCESSED/ERA5_regridded/*.nc $validation_dir/datasets/baseline/era5_regridded.nc +cdo mergetime $projectroot/dat/TESTING/PREPROCESSED/CERRA/*.nc $validation_dir/datasets/cerra/cerra.nc + +# rename variables for prediction datasets +cdo chname,mu_samos,prediction $validation_dir/datasets/model/samos.nc $validation_dir/datasets/model/tmp.nc; mv $validation_dir/datasets/model/tmp.nc $validation_dir/datasets/model/samos.nc +cdo chname,t2m,prediction $validation_dir/datasets/baseline/era5_regridded.nc $validation_dir/datasets/baseline/tmp.nc; mv $validation_dir/datasets/baseline/tmp.nc $validation_dir/datasets/baseline/era5_regridded.nc + +# removing of the grid_mapping attribute in the samos data and selection of the prediction variable +ncatted -a grid_mapping,prediction,d,, $validation_dir/datasets/model/samos.nc +cdo select,name=prediction, $validation_dir/datasets/model/samos.nc $validation_dir/datasets/model/tmp.nc; mv $validation_dir/datasets/model/tmp.nc $validation_dir/datasets/model/samos.nc + +# rename dimensions in baseline and model data to match the cerra dimension names +ncrename -d x,longitude $validation_dir/datasets/baseline/era5_regridded.nc +ncrename -d y,latitude $validation_dir/datasets/baseline/era5_regridded.nc +cdo chname,x,longitude,y,latitude $validation_dir/datasets/baseline/era5_regridded.nc $validation_dir/datasets/baseline/tmp.nc; mv $validation_dir/datasets/baseline/tmp.nc $validation_dir/datasets/baseline/era5_regridded.nc + +ncrename -d x,longitude $validation_dir/datasets/model/samos.nc +ncrename -d y,latitude $validation_dir/datasets/model/samos.nc +cdo chname,x,longitude,y,latitude $validation_dir/datasets/model/samos.nc $validation_dir/datasets/model/tmp.nc; mv $validation_dir/datasets/model/tmp.nc $validation_dir/datasets/model/samos.nc + +# the validation script cannot handle na values - there are some present on the edges due to the cdo bilinear interpolation in the baseline as well as samos output. clip all data on the edges +ncks -d latitude,5,159 -d longitude,2,237 $validation_dir/datasets/model/samos.nc -O $validation_dir/datasets/model/tmp.nc; mv $validation_dir/datasets/model/tmp.nc $validation_dir/datasets/model/samos.nc +ncks -d latitude,5,159 -d longitude,2,237 $validation_dir/datasets/baseline/era5_regridded.nc -O $validation_dir/datasets/baseline/tmp.nc; mv $validation_dir/datasets/baseline/tmp.nc $validation_dir/datasets/baseline/era5_regridded.nc +ncks -d latitude,5,159 -d longitude,2,237 $validation_dir/datasets/cerra/cerra.nc -O $validation_dir/datasets/cerra/tmp.nc; mv $validation_dir/datasets/cerra/tmp.nc $validation_dir/datasets/cerra/cerra.nc