diff --git a/dev/postprocessing/plot_results.R b/dev/postprocessing/plot_results.R index f9fc250..4b76253 100644 --- a/dev/postprocessing/plot_results.R +++ b/dev/postprocessing/plot_results.R @@ -1,3 +1,11 @@ +#!/usr/bin/env Rscript + +################################################################## +#Description : plot fields (era5, cerra, downscaled and +# difference) and PSD for individual timesteps +#Author : Konrad Mayer +################################################################## + library(here) library(lubridate) library(stars) @@ -8,6 +16,31 @@ library(dplyr) library(patchwork) library(metR) library(stringr) +library(radialpsd) + +# helpers +truncate_to_square <- function(m) { + dim_m <- dim(m) + mindim <- min(dim_m) + start <- floor((dim_m - mindim) / 2) + end <- start + mindim + start <- start + 1 + m[start[1]:end[1], start[2]:end[2]] +} + +replace_na <- function(m, replacement = 0) { + m[is.na(m)] <- replacement + m +} + +do_psd <- function(x) { + tmp <- replace_na(truncate_to_square(as.matrix(x[[1]]))) + psd <- radial.psd(tmp, plot = FALSE, scaled = FALSE, normalized = FALSE) + max_dist <- sqrt(sum((ceiling(dim(tmp)/2)-1)^2)) + mutate(psd, wavenumber = wavenumber / max_dist) +} + +# main per_lead_time <- function(lead_time) { @@ -20,11 +53,14 @@ per_lead_time <- function(lead_time) { downscaled <- read_stars(here(glue("dat/TESTING/SAMOS/postprocessed/samos-postprocessed_{lead_time}.nc"))) %>% setNames("samos") + era5_regridded <- era5 <- read_stars(here(glue("dat/TESTING/PREPROCESSED/ERA5_regridded/t2m_era5_{lead_time}.nc"))) %>% # only needed for PSD + setNames("era5") + diff_field <- downscaled[0] diff_field[["diff_samos_cerra"]] <- downscaled[[1]] - units::drop_units(cerra[[1]]) dothis <- function(eval_datetime) { - plts <- map2( + plts <- c(map2( list(era5, cerra, downscaled, diff_field) %>% map(~filter(.x, time == eval_datetime)), list(scale_fill_viridis_c, scale_fill_viridis_c, scale_fill_viridis_c, scale_fill_divergent), ~ggplot() + @@ -37,10 +73,23 @@ per_lead_time <- function(lead_time) { ) + .y() + theme_minimal() - ) + ), + list(map( + list(era5_regridded, cerra, downscaled) %>% map(~filter(.x %>%.[1, 3:238,6:160], time == eval_datetime)[drop = TRUE]), + ~{do_psd(st_apply(.x, c("x", "y"), mean))} + ) %>% + setNames(c("era5", "cerra", "samos")) %>% + bind_rows(.id = "model") %>% + ggplot(aes(wavenumber, r_spectrum, lty = model)) + + geom_line() + + theme_minimal(20) + + scale_y_log10() + annotation_logticks() + scale_x_log10(sec.axis = sec_axis(trans = ~ (.^-1), name = "wavelength")) + + labs(y = "Power") + )) + - wrap_plots(plts, ncol = 2) + plot_annotation(title = eval_datetime) - ggsave(here(glue("plt/SAMOS/{format(eval_datetime, '%Y%m%dT%H%M%S')}_testset.pdf")), height = 7, width = 10) + wrap_plots(plts[1:4], ncol = 2) | plts[[5]] + plot_annotation(title = eval_datetime) + ggsave(here(glue("plt/SAMOS/{format(eval_datetime, '%Y%m%dT%H%M%S')}_testset.pdf")), height = 6, width = 20) } diff --git a/dev/preprocessing/regrid_era5.sh b/dev/preprocessing/regrid_era5.sh index 6e4671c..8d5aa8d 100755 --- a/dev/preprocessing/regrid_era5.sh +++ b/dev/preprocessing/regrid_era5.sh @@ -23,4 +23,6 @@ for lt in "${leadtime[@]}"; do stdbuf -oL cdo griddes dat/TRAINING/RESIDUALS/CERRA/t2m_cerra_${lt}_residuals.nc > /tmp/targetgrid.txt # bilinear interpolation cdo remapbil,/tmp/targetgrid.txt ${projectroot}/dat/${dataset}/RESIDUALS/ERA5/t2m_era5_${lt}_residuals.nc ${projectroot}/dat/${dataset}/RESIDUALS/ERA5_regridded/t2m_era5_${lt}_residuals.nc + # bilinear interpolation of raw data + cdo remapbil,/tmp/targetgrid.txt ${projectroot}/dat/${dataset}/PREPROCESSED/ERA5/t2m_era5_${lt}.nc ${projectroot}/dat/${dataset}/PREPROCESSED/ERA5_regridded/t2m_era5_${lt}.nc done diff --git a/dev/verification/spatial_structures/compare_pds.R b/dev/verification/spatial_structures/compare_pds.R new file mode 100644 index 0000000..b11cc66 --- /dev/null +++ b/dev/verification/spatial_structures/compare_pds.R @@ -0,0 +1,133 @@ +#!/usr/bin/env Rscript + +################################################################## +#Description : power spectral density analysis +#Author : Konrad Mayer +################################################################## + +library(stars) +library(purrr) +library(here) +library(glue) +library(stringr) +library(tidyverse) +library(radialpsd) +library(ggfan) + +# helpers ---------------------------------------------------------------------- +# pad_to_square <- function(m, pad = 0) { +# dim_m <- dim(m) +# maxdim <- max(dim_m) +# out <- matrix(pad, nrow = maxdim, ncol = maxdim) +# start <- floor((maxdim - dim_m) / 2) +# end <- start + dim_m +# start <- start + 1 +# out[start[1]:end[1], start[2]:end[2]] <- m +# out +# } + +truncate_to_square <- function(m) { + dim_m <- dim(m) + mindim <- min(dim_m) + start <- floor((dim_m - mindim) / 2) + end <- start + mindim + start <- start + 1 + m[start[1]:end[1], start[2]:end[2]] +} + +replace_na <- function(m, replacement = 0) { + m[is.na(m)] <- replacement + m +} + +do_psd <- function(x) { + tmp <- replace_na(truncate_to_square(as.matrix(x[[1]]))) + psd <- radial.psd(tmp, plot = FALSE, scaled = FALSE, normalized = FALSE) + max_dist <- sqrt(sum((ceiling(dim(tmp)/2)-1)^2)) + mutate(psd, wavenumber = wavenumber / max_dist) +} + +drop_units <- function(x) { + tryCatch( + units::drop_units(x), + error = function(e) { + return(x) + } + ) +} +# main ------------------------------------------------------------------------- + +dothis <- function(lead_time) { + lead_time <- str_pad(lead_time, 2, pad = "0") + + # load datasets (crop a bit as SAMOS baseline has some values NA around) + datasets <- c( + cerra = "dat/TESTING/PREPROCESSED/CERRA/t2m_cerra_{lead_time}.nc", + downscaled = "dat/TESTING/SAMOS/postprocessed/samos-postprocessed_{lead_time}.nc", + era5 = "dat/TESTING/PREPROCESSED/ERA5_regridded/t2m_era5_{lead_time}.nc" + ) + dat <- map2(datasets, names(datasets), + ~setNames(drop_units(read_stars(here(glue(.x)))), .y)[1, 3:238,6:160]) # data gets truncated to get rid of NA values on two of the edges caused by CDO bilinear interpolation + + timesteps <- seq_len(dim(dat[[1]])["time"]) + + #overall + + psd_overall <- map(dat, + ~do_psd(st_apply(.x, c("x", "y"), mean))) %>% + bind_rows(.id = "model") + + psd_overall %>% + ggplot(aes(wavenumber, r_spectrum, lty = model)) + + geom_line() + + theme_minimal(20) + + scale_y_log10() + scale_x_log10(sec.axis = sec_axis(trans = ~ (.^-1), name = "wavelength")) + annotation_logticks() + + labs(y = "Power") + + ggsave(here(glue("plt/PSD/radialPSD_samos_leadtime{lead_time}_overall.pdf")), width = 10, height = 6) + + + # loop over timesteps and calculate mean psd + psd_timesteps <- map(dat, function(ds) { + map(timesteps, function(ts) do_psd(ds[,,,ts, drop = TRUE])) + }) + # common dataframe + plotdat <- psd_timesteps %>% + map(~bind_rows(setNames(.x, timesteps), .id = "timesteps")) %>% + bind_rows(.id = "dataset") + + # plot + plotdat %>% + ggplot(aes(x = wavenumber, y = r_spectrum, color = dataset, group = dataset)) + + geom_interval() + + theme_minimal(20) + + scale_y_log10() + scale_x_log10(sec.axis = sec_axis(trans = ~ (.^-1), name = "wavelength")) + annotation_logticks() + + labs(y = "Power") + + ggsave(here(glue("plt/PSD/radialPSD_samos_leadtime{lead_time}_distribution.pdf")), width = 10, height = 6) + + # aggregate to seasons and calculate PSD per season + seasons <- list(DJF = c(12, 1, 2), MAM = 3:5, JJA = 6:8, SON = 9:11) + + per_season <- function(months) { + map(dat, ~do_psd(st_apply(.x %>% filter(lubridate::month(time) %in% months), c("x", "y"), mean))) %>% + bind_rows(.id = "model") + } + + psd_season <- map(seasons, per_season) + + psd_season %>% + bind_rows(.id = "season") %>% + ggplot(aes(wavenumber, r_spectrum, color = season, lty = model)) + + geom_line() + + theme_minimal(20) + + scale_y_log10() + scale_x_log10(sec.axis = sec_axis(trans = ~ (.^-1), name = "wavelength")) + annotation_logticks() + + labs(y = "Power") + + ggsave(here(glue("plt/PSD/radialPSD_samos_leadtime{lead_time}_season.pdf")), width = 10, height = 6) +} + +lead_times <- seq(0, 21, by = 3) +walk(lead_times, dothis) + +# TODO: PSD needs to be done on projected data, otherwise distance is not valid \ No newline at end of file diff --git a/dev/verification/spatial_structures/compare_variogram.R b/dev/verification/spatial_structures/compare_variogram.R new file mode 100644 index 0000000..26ae994 --- /dev/null +++ b/dev/verification/spatial_structures/compare_variogram.R @@ -0,0 +1,55 @@ +library(tidyverse) +library(stringr) +library(here) +library(stars) +library(glue) +library(geoR) + +dothis <- function(lead_time) { + lead_time = 12# for testing + lead_time <- str_pad(lead_time, 2, pad = "0") + + # load datasets (crop a bit as SAMOS baseline has some values NA around) + cerra <- read_stars(here(glue("dat/TESTING/PREPROCESSED/CERRA/t2m_cerra_{lead_time}.nc"))) %>% + setNames("cerra") %>% + .[1, 3:238,6:160] + downscaled <- read_stars(here(glue("dat/TESTING/SAMOS/postprocessed/samos-postprocessed_{lead_time}.nc"))) %>% + setNames("samos") %>% + .[1, 3:238,6:160] + + + +# cerra_tibble <- as_tibble(cerra[,,,10]) # how to deal with timesteps? including more results in too big ram use +# downscaled_tibble <- as_tibble(downscaled[,,,10]) +# variog_cerra = variog(data = cerra_tibble[,4], coords = as.matrix(cerra_tibble[1:2])) +# variog_downscaled = variog(data = downscaled_tibble[,4], coords = as.matrix(downscaled_tibble[1:2])) + +# plot(variog_cerra) +# points(variog_downscaled$u, variog_downscaled$v, col = "red") + +# plot(variog_cerra) +# points(variog_downscaled$u, variog_downscaled$v, col = "red") + + seasons <- list(DJF = c(12, 1, 2), MAM = 3:5, JJA = 6:8, SON = 9:11) + + + # TODO: variogram needs to be done on projected data, otherwise distance is not valid + per_season <- function(months) { + cerra_tibble <- as_tibble(st_apply(cerra %>% filter(lubridate::month(time) %in% months), c("x", "y"), mean)) + downscaled_tibble <- as_tibble(st_apply(downscaled %>% filter(lubridate::month(time) %in% months), c("x", "y"), mean)) + variog_cerra = variog(data = cerra_tibble[,3], coords = as.matrix(cerra_tibble[1:2])) + variog_downscaled = variog(data = downscaled_tibble[,3], coords = as.matrix(downscaled_tibble[1:2])) + tibble(distance = variog_cerra$u, semivar_cerra = variog_cerra$v, semivar_downscaled = variog_downscaled$v) + } + + variog_seasons <- purrr::map(seasons, per_season) %>% setNames(names(seasons)) + + variog_seasons %>% + bind_rows(.id = "season") %>% + pivot_longer(-c(1:2), names_to = "model") %>% + ggplot(aes(distance, value, lty = model, color = season)) + + geom_line() + + theme_minimal() + ggsave(here(glue("plt/seasonal_variogram_samos_leadtime{lead_time}.pdf"))) +} +