From 9d2e0ffcb371e510b62be190bffed942f0c8b9c9 Mon Sep 17 00:00:00 2001 From: Konrad Mayer Date: Mon, 28 Aug 2023 14:46:11 +0000 Subject: [PATCH 1/6] DEV: playing around with PSD Ref: #6 --- dev/verification/PSD/compare_pds.R | 87 ++++++++++++++++++++++++++++++ 1 file changed, 87 insertions(+) create mode 100644 dev/verification/PSD/compare_pds.R diff --git a/dev/verification/PSD/compare_pds.R b/dev/verification/PSD/compare_pds.R new file mode 100644 index 0000000..656c8de --- /dev/null +++ b/dev/verification/PSD/compare_pds.R @@ -0,0 +1,87 @@ +library(stars) +library(purrr) +library(here) +library(glue) +library(stringr) +library(tidyverse) +library(radialpsd) + + +# helpers ---------------------------------------------------------------------- +# pad_to_square <- function(m, pad = 0) { +# dim_m <- dim(m) +# maxdim <- max(dim_m) +# out <- matrix(pad, nrow = maxdim, ncol = maxdim) +# start <- floor((maxdim - dim_m) / 2) +# end <- start + dim_m +# start <- start + 1 +# out[start[1]:end[1], start[2]:end[2]] <- m +# out +# } + +truncate_to_square <- function(m) { + dim_m <- dim(m) + mindim <- min(dim_m) + start <- floor((dim_m - mindim) / 2) + end <- start + mindim + start <- start + 1 + m[start[1]:end[1], start[2]:end[2]] +} + +replace_na <- function(m, replacement = 0) { + m[is.na(m)] <- replacement + m +} + +# main ------------------------------------------------------------------------- + +dothis <- function(lead_time) { + lead_time <- str_pad(lead_time, 2, pad = "0") + + # load datasets (crop a bit as SAMOS baseline has some values NA around) + cerra <- read_stars(here(glue("dat/TESTING/PREPROCESSED/CERRA/t2m_cerra_{lead_time}.nc"))) %>% + setNames("cerra") %>% + .[1, 3:238,6:160] + downscaled <- read_stars(here(glue("dat/TESTING/SAMOS/postprocessed/samos-postprocessed_{lead_time}.nc"))) %>% + setNames("samos") %>% + .[1, 3:238,6:160] + + timesteps <- seq_len(dim(cerra)["time"]) + + # loop over timesteps and calculate psd + psd_cerra <- list() + psd_ds <- list() + + for (i in seq_along(timesteps)){ + psd_cerra[[i]] <- radial.psd(replace_na(truncate_to_square(as.matrix(units::drop_units(cerra[[1]][,,timesteps[i]])))), plot = FALSE)#, scaled = FALSE, normalized = FALSE) + psd_ds[[i]] <- radial.psd(replace_na(truncate_to_square(as.matrix(downscaled[[1]][,,timesteps[i]]))), plot = FALSE)#, scaled = FALSE, normalized = FALSE) + } + + # common dataframe + plotdat <- bind_rows(list( + cerra = psd_cerra |> + setNames(timesteps) |> + bind_rows(.id = "timesteps"), + samos = psd_ds|> + setNames(timesteps) |> + bind_rows(.id = "timesteps") + ), .id = "dataset") + + # plot + plotdat %>% + ggplot(aes(x = log(wavenumber), y = log(r_spectrum), color = dataset)) + + stat_smooth( + geom="ribbon", + aes(ymax = after_stat(y) + 1, + ymin = after_stat(y) - 1, + fill = dataset), + alpha = 0.1, linetype = 0 + ) + + geom_smooth(se = FALSE)+ + theme_minimal() + + ggsave(here(glue("plt/PSD/radialPSD_samos_leadtime{lead_time}.pdf"))) +} + +lead_times <- seq(0, 21, by = 3) +walk(lead_times, dothis) From 4388e29ec16d567356bcdd8333e6e37a5faa7f52 Mon Sep 17 00:00:00 2001 From: Konrad Mayer Date: Thu, 31 Aug 2023 07:15:39 +0000 Subject: [PATCH 2/6] DEV: seasonal PSD and PSD distribution among images --- dev/verification/PSD/compare_pds.R | 87 ------------ .../spatial_structures/compare_pds.R | 126 ++++++++++++++++++ 2 files changed, 126 insertions(+), 87 deletions(-) delete mode 100644 dev/verification/PSD/compare_pds.R create mode 100644 dev/verification/spatial_structures/compare_pds.R diff --git a/dev/verification/PSD/compare_pds.R b/dev/verification/PSD/compare_pds.R deleted file mode 100644 index 656c8de..0000000 --- a/dev/verification/PSD/compare_pds.R +++ /dev/null @@ -1,87 +0,0 @@ -library(stars) -library(purrr) -library(here) -library(glue) -library(stringr) -library(tidyverse) -library(radialpsd) - - -# helpers ---------------------------------------------------------------------- -# pad_to_square <- function(m, pad = 0) { -# dim_m <- dim(m) -# maxdim <- max(dim_m) -# out <- matrix(pad, nrow = maxdim, ncol = maxdim) -# start <- floor((maxdim - dim_m) / 2) -# end <- start + dim_m -# start <- start + 1 -# out[start[1]:end[1], start[2]:end[2]] <- m -# out -# } - -truncate_to_square <- function(m) { - dim_m <- dim(m) - mindim <- min(dim_m) - start <- floor((dim_m - mindim) / 2) - end <- start + mindim - start <- start + 1 - m[start[1]:end[1], start[2]:end[2]] -} - -replace_na <- function(m, replacement = 0) { - m[is.na(m)] <- replacement - m -} - -# main ------------------------------------------------------------------------- - -dothis <- function(lead_time) { - lead_time <- str_pad(lead_time, 2, pad = "0") - - # load datasets (crop a bit as SAMOS baseline has some values NA around) - cerra <- read_stars(here(glue("dat/TESTING/PREPROCESSED/CERRA/t2m_cerra_{lead_time}.nc"))) %>% - setNames("cerra") %>% - .[1, 3:238,6:160] - downscaled <- read_stars(here(glue("dat/TESTING/SAMOS/postprocessed/samos-postprocessed_{lead_time}.nc"))) %>% - setNames("samos") %>% - .[1, 3:238,6:160] - - timesteps <- seq_len(dim(cerra)["time"]) - - # loop over timesteps and calculate psd - psd_cerra <- list() - psd_ds <- list() - - for (i in seq_along(timesteps)){ - psd_cerra[[i]] <- radial.psd(replace_na(truncate_to_square(as.matrix(units::drop_units(cerra[[1]][,,timesteps[i]])))), plot = FALSE)#, scaled = FALSE, normalized = FALSE) - psd_ds[[i]] <- radial.psd(replace_na(truncate_to_square(as.matrix(downscaled[[1]][,,timesteps[i]]))), plot = FALSE)#, scaled = FALSE, normalized = FALSE) - } - - # common dataframe - plotdat <- bind_rows(list( - cerra = psd_cerra |> - setNames(timesteps) |> - bind_rows(.id = "timesteps"), - samos = psd_ds|> - setNames(timesteps) |> - bind_rows(.id = "timesteps") - ), .id = "dataset") - - # plot - plotdat %>% - ggplot(aes(x = log(wavenumber), y = log(r_spectrum), color = dataset)) + - stat_smooth( - geom="ribbon", - aes(ymax = after_stat(y) + 1, - ymin = after_stat(y) - 1, - fill = dataset), - alpha = 0.1, linetype = 0 - ) + - geom_smooth(se = FALSE)+ - theme_minimal() - - ggsave(here(glue("plt/PSD/radialPSD_samos_leadtime{lead_time}.pdf"))) -} - -lead_times <- seq(0, 21, by = 3) -walk(lead_times, dothis) diff --git a/dev/verification/spatial_structures/compare_pds.R b/dev/verification/spatial_structures/compare_pds.R new file mode 100644 index 0000000..56ce7c2 --- /dev/null +++ b/dev/verification/spatial_structures/compare_pds.R @@ -0,0 +1,126 @@ +library(stars) +library(purrr) +library(here) +library(glue) +library(stringr) +library(tidyverse) +library(radialpsd) +library(ggfan) + +# helpers ---------------------------------------------------------------------- +# pad_to_square <- function(m, pad = 0) { +# dim_m <- dim(m) +# maxdim <- max(dim_m) +# out <- matrix(pad, nrow = maxdim, ncol = maxdim) +# start <- floor((maxdim - dim_m) / 2) +# end <- start + dim_m +# start <- start + 1 +# out[start[1]:end[1], start[2]:end[2]] <- m +# out +# } + +truncate_to_square <- function(m) { + dim_m <- dim(m) + mindim <- min(dim_m) + start <- floor((dim_m - mindim) / 2) + end <- start + mindim + start <- start + 1 + m[start[1]:end[1], start[2]:end[2]] +} + +replace_na <- function(m, replacement = 0) { + m[is.na(m)] <- replacement + m +} + +# main ------------------------------------------------------------------------- + +dothis <- function(lead_time) { + lead_time <- str_pad(lead_time, 2, pad = "0") + + # load datasets (crop a bit as SAMOS baseline has some values NA around) + cerra <- read_stars(here(glue("dat/TESTING/PREPROCESSED/CERRA/t2m_cerra_{lead_time}.nc"))) %>% + setNames("cerra") %>% + .[1, 3:238,6:160] + downscaled <- read_stars(here(glue("dat/TESTING/SAMOS/postprocessed/samos-postprocessed_{lead_time}.nc"))) %>% + setNames("samos") %>% + .[1, 3:238,6:160] + + timesteps <- seq_len(dim(cerra)["time"]) + + #overall + + psd_cerra_overall <- radial.psd(replace_na(truncate_to_square(as.matrix((st_apply(cerra, c("x", "y"), mean)[[1]])))), plot = FALSE)#, scaled = FALSE, normalized = FALSE) + psd_downscaled_overall <- radial.psd(replace_na(truncate_to_square(as.matrix((st_apply(downscaled, c("x", "y"), mean)[[1]])))), plot = FALSE)#, scaled = FALSE, normalized = FALSE) + psd_overall <- bind_rows(list(cerra = psd_cerra_overall, downscaled = psd_downscaled_overall), .id = "model") + + + psd_overall %>% + ggplot(aes(wavenumber, r_spectrum, lty = model)) + + geom_line() + + theme_minimal() + + scale_y_log10() + scale_x_log10() + annotation_logticks() + ggsave(here(glue("plt/PSD/radialPSD_samos_leadtime{lead_time}_overall.pdf"))) + + + # loop over timesteps and calculate mean psd + psd_cerra <- list() + psd_ds <- list() + + for (i in seq_along(timesteps)){ + psd_cerra[[i]] <- radial.psd(replace_na(truncate_to_square(as.matrix(units::drop_units(cerra[[1]][,,timesteps[i]])))), plot = FALSE)#, scaled = FALSE, normalized = FALSE) + psd_ds[[i]] <- radial.psd(replace_na(truncate_to_square(as.matrix(downscaled[[1]][,,timesteps[i]]))), plot = FALSE)#, scaled = FALSE, normalized = FALSE) + } + + # common dataframe + plotdat <- bind_rows(list( + cerra = psd_cerra |> + setNames(timesteps) |> + bind_rows(.id = "timesteps"), + samos = psd_ds|> + setNames(timesteps) |> + bind_rows(.id = "timesteps") + ), .id = "dataset") + + # plot + plotdat %>% + ggplot(aes(x = wavenumber, y = r_spectrum, color = dataset, group = dataset)) + + # stat_smooth( + # geom="ribbon", + # aes(ymax = after_stat(y) + 1, + # ymin = after_stat(y) - 1, + # fill = dataset), + # alpha = 0.1, linetype = 0 + # ) + + # geom_smooth(se = FALSE)+ + geom_interval() + + theme_minimal() + + scale_y_log10() + scale_x_log10() + annotation_logticks() + + ggsave(here(glue("plt/PSD/radialPSD_samos_leadtime{lead_time}_distribution.pdf"))) + + # aggregate to seasons and calculate PSD per season + seasons <- list(DJF = c(12, 1, 2), MAM = 3:5, JJA = 6:8, SON = 9:11) + + per_season <- function(months) { + cerra_season <- st_apply(cerra %>% filter(lubridate::month(time) %in% months), c("x", "y"), mean) + downscaled_season <- st_apply(downscaled %>% filter(lubridate::month(time) %in% months), c("x", "y"), mean) + psd_cerra_season <- radial.psd(replace_na(truncate_to_square(as.matrix((cerra_season[[1]])))), plot = FALSE)#, scaled = FALSE, normalized = FALSE) + psd_downscaled_season <- radial.psd(replace_na(truncate_to_square(as.matrix((downscaled_season[[1]])))), plot = FALSE)#, scaled = FALSE, normalized = FALSE) + bind_rows(list(cerra = psd_cerra_season, downscaled = psd_downscaled_season), .id = "model") + } + psd_season <- map(seasons, per_season) + + psd_season %>% + bind_rows(.id = "season") %>% + ggplot(aes(wavenumber, r_spectrum, color = season, lty = model)) + + geom_line() + + theme_minimal() + + scale_y_log10() + scale_x_log10() + annotation_logticks() + ggsave(here(glue("plt/PSD/radialPSD_samos_leadtime{lead_time}_season.pdf"))) +} + +lead_times <- seq(0, 21, by = 3) +walk(lead_times, dothis) + +# TODO: PSD needs to be done on projected data, otherwise distance is not valid \ No newline at end of file From 61fa862d51845b4c44840dc9b9169b58525aadaf Mon Sep 17 00:00:00 2001 From: Konrad Mayer Date: Thu, 31 Aug 2023 07:16:24 +0000 Subject: [PATCH 3/6] DEV: variogram --- .../spatial_structures/compare_variogram.R | 55 +++++++++++++++++++ 1 file changed, 55 insertions(+) create mode 100644 dev/verification/spatial_structures/compare_variogram.R diff --git a/dev/verification/spatial_structures/compare_variogram.R b/dev/verification/spatial_structures/compare_variogram.R new file mode 100644 index 0000000..26ae994 --- /dev/null +++ b/dev/verification/spatial_structures/compare_variogram.R @@ -0,0 +1,55 @@ +library(tidyverse) +library(stringr) +library(here) +library(stars) +library(glue) +library(geoR) + +dothis <- function(lead_time) { + lead_time = 12# for testing + lead_time <- str_pad(lead_time, 2, pad = "0") + + # load datasets (crop a bit as SAMOS baseline has some values NA around) + cerra <- read_stars(here(glue("dat/TESTING/PREPROCESSED/CERRA/t2m_cerra_{lead_time}.nc"))) %>% + setNames("cerra") %>% + .[1, 3:238,6:160] + downscaled <- read_stars(here(glue("dat/TESTING/SAMOS/postprocessed/samos-postprocessed_{lead_time}.nc"))) %>% + setNames("samos") %>% + .[1, 3:238,6:160] + + + +# cerra_tibble <- as_tibble(cerra[,,,10]) # how to deal with timesteps? including more results in too big ram use +# downscaled_tibble <- as_tibble(downscaled[,,,10]) +# variog_cerra = variog(data = cerra_tibble[,4], coords = as.matrix(cerra_tibble[1:2])) +# variog_downscaled = variog(data = downscaled_tibble[,4], coords = as.matrix(downscaled_tibble[1:2])) + +# plot(variog_cerra) +# points(variog_downscaled$u, variog_downscaled$v, col = "red") + +# plot(variog_cerra) +# points(variog_downscaled$u, variog_downscaled$v, col = "red") + + seasons <- list(DJF = c(12, 1, 2), MAM = 3:5, JJA = 6:8, SON = 9:11) + + + # TODO: variogram needs to be done on projected data, otherwise distance is not valid + per_season <- function(months) { + cerra_tibble <- as_tibble(st_apply(cerra %>% filter(lubridate::month(time) %in% months), c("x", "y"), mean)) + downscaled_tibble <- as_tibble(st_apply(downscaled %>% filter(lubridate::month(time) %in% months), c("x", "y"), mean)) + variog_cerra = variog(data = cerra_tibble[,3], coords = as.matrix(cerra_tibble[1:2])) + variog_downscaled = variog(data = downscaled_tibble[,3], coords = as.matrix(downscaled_tibble[1:2])) + tibble(distance = variog_cerra$u, semivar_cerra = variog_cerra$v, semivar_downscaled = variog_downscaled$v) + } + + variog_seasons <- purrr::map(seasons, per_season) %>% setNames(names(seasons)) + + variog_seasons %>% + bind_rows(.id = "season") %>% + pivot_longer(-c(1:2), names_to = "model") %>% + ggplot(aes(distance, value, lty = model, color = season)) + + geom_line() + + theme_minimal() + ggsave(here(glue("plt/seasonal_variogram_samos_leadtime{lead_time}.pdf"))) +} + From 4513e43dbdef3a05ad4a45e5d2d2a80fd1284ba9 Mon Sep 17 00:00:00 2001 From: Konrad Mayer Date: Thu, 31 Aug 2023 09:38:04 +0000 Subject: [PATCH 4/6] DEV: add ERA5 to PSD --- dev/postprocessing/plot_results.R | 40 +++++++++++++++++-- dev/preprocessing/regrid_era5.sh | 2 + .../spatial_structures/compare_pds.R | 23 ++++++++--- 3 files changed, 55 insertions(+), 10 deletions(-) diff --git a/dev/postprocessing/plot_results.R b/dev/postprocessing/plot_results.R index f9fc250..13684c3 100644 --- a/dev/postprocessing/plot_results.R +++ b/dev/postprocessing/plot_results.R @@ -8,6 +8,23 @@ library(dplyr) library(patchwork) library(metR) library(stringr) +library(radialpsd) + +# helpers +truncate_to_square <- function(m) { + dim_m <- dim(m) + mindim <- min(dim_m) + start <- floor((dim_m - mindim) / 2) + end <- start + mindim + start <- start + 1 + m[start[1]:end[1], start[2]:end[2]] +} + +replace_na <- function(m, replacement = 0) { + m[is.na(m)] <- replacement + m +} +# main per_lead_time <- function(lead_time) { @@ -20,11 +37,14 @@ per_lead_time <- function(lead_time) { downscaled <- read_stars(here(glue("dat/TESTING/SAMOS/postprocessed/samos-postprocessed_{lead_time}.nc"))) %>% setNames("samos") + era5_regridded <- era5 <- read_stars(here(glue("dat/TESTING/PREPROCESSED/ERA5_regridded/t2m_era5_{lead_time}.nc"))) %>% # only needed for PSD + setNames("era5") + diff_field <- downscaled[0] diff_field[["diff_samos_cerra"]] <- downscaled[[1]] - units::drop_units(cerra[[1]]) dothis <- function(eval_datetime) { - plts <- map2( + plts <- c(map2( list(era5, cerra, downscaled, diff_field) %>% map(~filter(.x, time == eval_datetime)), list(scale_fill_viridis_c, scale_fill_viridis_c, scale_fill_viridis_c, scale_fill_divergent), ~ggplot() + @@ -37,10 +57,22 @@ per_lead_time <- function(lead_time) { ) + .y() + theme_minimal() - ) + ), + list(map( + list(era5_regridded, cerra, downscaled) %>% map(~filter(.x %>%.[1, 3:238,6:160], time == eval_datetime)), + ~{radial.psd(replace_na(truncate_to_square(as.matrix((st_apply(.x, c("x", "y"), mean)[[1]])))), plot = FALSE)} + ) %>% + setNames(c("era5", "cerra", "samos")) %>% + bind_rows(.id = "model") %>% + ggplot(aes(wavenumber, r_spectrum, lty = model)) + + geom_line() + + theme_minimal() + + scale_y_log10() + annotation_logticks() + scale_x_log10() + )) + - wrap_plots(plts, ncol = 2) + plot_annotation(title = eval_datetime) - ggsave(here(glue("plt/SAMOS/{format(eval_datetime, '%Y%m%dT%H%M%S')}_testset.pdf")), height = 7, width = 10) + wrap_plots(plts[1:4], ncol = 2) / plts[[5]] + plot_annotation(title = eval_datetime) + ggsave(here(glue("plt/SAMOS/{format(eval_datetime, '%Y%m%dT%H%M%S')}_testset.pdf")), height = 14, width = 10) } diff --git a/dev/preprocessing/regrid_era5.sh b/dev/preprocessing/regrid_era5.sh index 6e4671c..8d5aa8d 100755 --- a/dev/preprocessing/regrid_era5.sh +++ b/dev/preprocessing/regrid_era5.sh @@ -23,4 +23,6 @@ for lt in "${leadtime[@]}"; do stdbuf -oL cdo griddes dat/TRAINING/RESIDUALS/CERRA/t2m_cerra_${lt}_residuals.nc > /tmp/targetgrid.txt # bilinear interpolation cdo remapbil,/tmp/targetgrid.txt ${projectroot}/dat/${dataset}/RESIDUALS/ERA5/t2m_era5_${lt}_residuals.nc ${projectroot}/dat/${dataset}/RESIDUALS/ERA5_regridded/t2m_era5_${lt}_residuals.nc + # bilinear interpolation of raw data + cdo remapbil,/tmp/targetgrid.txt ${projectroot}/dat/${dataset}/PREPROCESSED/ERA5/t2m_era5_${lt}.nc ${projectroot}/dat/${dataset}/PREPROCESSED/ERA5_regridded/t2m_era5_${lt}.nc done diff --git a/dev/verification/spatial_structures/compare_pds.R b/dev/verification/spatial_structures/compare_pds.R index 56ce7c2..d3ec819 100644 --- a/dev/verification/spatial_structures/compare_pds.R +++ b/dev/verification/spatial_structures/compare_pds.R @@ -45,14 +45,18 @@ dothis <- function(lead_time) { downscaled <- read_stars(here(glue("dat/TESTING/SAMOS/postprocessed/samos-postprocessed_{lead_time}.nc"))) %>% setNames("samos") %>% .[1, 3:238,6:160] + era5 <- read_stars(here(glue("dat/TESTING/PREPROCESSED/ERA5_regridded/t2m_era5_{lead_time}.nc"))) %>% + setNames("era5") %>% + .[1, 3:238,6:160] timesteps <- seq_len(dim(cerra)["time"]) #overall - psd_cerra_overall <- radial.psd(replace_na(truncate_to_square(as.matrix((st_apply(cerra, c("x", "y"), mean)[[1]])))), plot = FALSE)#, scaled = FALSE, normalized = FALSE) - psd_downscaled_overall <- radial.psd(replace_na(truncate_to_square(as.matrix((st_apply(downscaled, c("x", "y"), mean)[[1]])))), plot = FALSE)#, scaled = FALSE, normalized = FALSE) - psd_overall <- bind_rows(list(cerra = psd_cerra_overall, downscaled = psd_downscaled_overall), .id = "model") + psd_cerra_overall <- radial.psd(replace_na(truncate_to_square(as.matrix((st_apply(cerra, c("x", "y"), mean)[[1]])))), plot = FALSE) + psd_downscaled_overall <- radial.psd(replace_na(truncate_to_square(as.matrix((st_apply(downscaled, c("x", "y"), mean)[[1]])))), plot = FALSE) + psd_era5_overall <- radial.psd(replace_na(truncate_to_square(as.matrix((st_apply(era5, c("x", "y"), mean)[[1]])))), plot = FALSE) + psd_overall <- bind_rows(list(cerra = psd_cerra_overall, downscaled = psd_downscaled_overall, era5 = psd_era5_overall), .id = "model") psd_overall %>% @@ -60,16 +64,18 @@ dothis <- function(lead_time) { geom_line() + theme_minimal() + scale_y_log10() + scale_x_log10() + annotation_logticks() - ggsave(here(glue("plt/PSD/radialPSD_samos_leadtime{lead_time}_overall.pdf"))) +ggsave(here(glue("plt/PSD/radialPSD_samos_leadtime{lead_time}_overall.pdf"))) # loop over timesteps and calculate mean psd psd_cerra <- list() psd_ds <- list() + psd_era5 <- list() for (i in seq_along(timesteps)){ psd_cerra[[i]] <- radial.psd(replace_na(truncate_to_square(as.matrix(units::drop_units(cerra[[1]][,,timesteps[i]])))), plot = FALSE)#, scaled = FALSE, normalized = FALSE) psd_ds[[i]] <- radial.psd(replace_na(truncate_to_square(as.matrix(downscaled[[1]][,,timesteps[i]]))), plot = FALSE)#, scaled = FALSE, normalized = FALSE) + psd_era5[[i]] <- radial.psd(replace_na(truncate_to_square(as.matrix(era5[[1]][,,timesteps[i]]))), plot = FALSE)#, scaled = FALSE, normalized = FALSE) } # common dataframe @@ -77,7 +83,10 @@ dothis <- function(lead_time) { cerra = psd_cerra |> setNames(timesteps) |> bind_rows(.id = "timesteps"), - samos = psd_ds|> + samos = psd_ds |> + setNames(timesteps) |> + bind_rows(.id = "timesteps"), + era5 = psd_era5 |> setNames(timesteps) |> bind_rows(.id = "timesteps") ), .id = "dataset") @@ -105,9 +114,11 @@ dothis <- function(lead_time) { per_season <- function(months) { cerra_season <- st_apply(cerra %>% filter(lubridate::month(time) %in% months), c("x", "y"), mean) downscaled_season <- st_apply(downscaled %>% filter(lubridate::month(time) %in% months), c("x", "y"), mean) + era5_season <- st_apply(era5 %>% filter(lubridate::month(time) %in% months), c("x", "y"), mean) psd_cerra_season <- radial.psd(replace_na(truncate_to_square(as.matrix((cerra_season[[1]])))), plot = FALSE)#, scaled = FALSE, normalized = FALSE) psd_downscaled_season <- radial.psd(replace_na(truncate_to_square(as.matrix((downscaled_season[[1]])))), plot = FALSE)#, scaled = FALSE, normalized = FALSE) - bind_rows(list(cerra = psd_cerra_season, downscaled = psd_downscaled_season), .id = "model") + psd_era5_season <- radial.psd(replace_na(truncate_to_square(as.matrix((era5_season[[1]])))), plot = FALSE)#, scaled = FALSE, normalized = FALSE) + bind_rows(list(cerra = psd_cerra_season, downscaled = psd_downscaled_season, era5 = psd_era5_season), .id = "model") } psd_season <- map(seasons, per_season) From fa2a4ed4e5a1df3aa5ad73de42df43524db08040 Mon Sep 17 00:00:00 2001 From: Konrad Mayer Date: Tue, 5 Sep 2023 13:55:24 +0000 Subject: [PATCH 5/6] DEV: dont standardize at calculating PSD --- dev/postprocessing/plot_results.R | 2 +- dev/verification/spatial_structures/compare_pds.R | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/dev/postprocessing/plot_results.R b/dev/postprocessing/plot_results.R index 13684c3..32ad185 100644 --- a/dev/postprocessing/plot_results.R +++ b/dev/postprocessing/plot_results.R @@ -60,7 +60,7 @@ per_lead_time <- function(lead_time) { ), list(map( list(era5_regridded, cerra, downscaled) %>% map(~filter(.x %>%.[1, 3:238,6:160], time == eval_datetime)), - ~{radial.psd(replace_na(truncate_to_square(as.matrix((st_apply(.x, c("x", "y"), mean)[[1]])))), plot = FALSE)} + ~{radial.psd(replace_na(truncate_to_square(as.matrix((st_apply(.x, c("x", "y"), mean)[[1]])))), plot = FALSE, normalized = FALSE, scaled = FALSE)} ) %>% setNames(c("era5", "cerra", "samos")) %>% bind_rows(.id = "model") %>% diff --git a/dev/verification/spatial_structures/compare_pds.R b/dev/verification/spatial_structures/compare_pds.R index d3ec819..bed8ae4 100644 --- a/dev/verification/spatial_structures/compare_pds.R +++ b/dev/verification/spatial_structures/compare_pds.R @@ -53,9 +53,9 @@ dothis <- function(lead_time) { #overall - psd_cerra_overall <- radial.psd(replace_na(truncate_to_square(as.matrix((st_apply(cerra, c("x", "y"), mean)[[1]])))), plot = FALSE) - psd_downscaled_overall <- radial.psd(replace_na(truncate_to_square(as.matrix((st_apply(downscaled, c("x", "y"), mean)[[1]])))), plot = FALSE) - psd_era5_overall <- radial.psd(replace_na(truncate_to_square(as.matrix((st_apply(era5, c("x", "y"), mean)[[1]])))), plot = FALSE) + psd_cerra_overall <- radial.psd(replace_na(truncate_to_square(as.matrix((st_apply(cerra, c("x", "y"), mean)[[1]])))), plot = FALSE, scaled = FALSE, normalized = FALSE) + psd_downscaled_overall <- radial.psd(replace_na(truncate_to_square(as.matrix((st_apply(downscaled, c("x", "y"), mean)[[1]])))), plot = FALSE, scaled = FALSE, normalized = FALSE) + psd_era5_overall <- radial.psd(replace_na(truncate_to_square(as.matrix((st_apply(era5, c("x", "y"), mean)[[1]])))), plot = FALSE, scaled = FALSE, normalized = FALSE) psd_overall <- bind_rows(list(cerra = psd_cerra_overall, downscaled = psd_downscaled_overall, era5 = psd_era5_overall), .id = "model") From c36136b7e8b09cee34373d084ff1e6e3a0497eb8 Mon Sep 17 00:00:00 2001 From: Konrad Mayer Date: Tue, 12 Sep 2023 10:04:57 +0000 Subject: [PATCH 6/6] REF: generalize psd code --- dev/postprocessing/plot_results.R | 31 ++++- .../spatial_structures/compare_pds.R | 120 +++++++++--------- 2 files changed, 82 insertions(+), 69 deletions(-) diff --git a/dev/postprocessing/plot_results.R b/dev/postprocessing/plot_results.R index 32ad185..4b76253 100644 --- a/dev/postprocessing/plot_results.R +++ b/dev/postprocessing/plot_results.R @@ -1,3 +1,11 @@ +#!/usr/bin/env Rscript + +################################################################## +#Description : plot fields (era5, cerra, downscaled and +# difference) and PSD for individual timesteps +#Author : Konrad Mayer +################################################################## + library(here) library(lubridate) library(stars) @@ -24,6 +32,14 @@ replace_na <- function(m, replacement = 0) { m[is.na(m)] <- replacement m } + +do_psd <- function(x) { + tmp <- replace_na(truncate_to_square(as.matrix(x[[1]]))) + psd <- radial.psd(tmp, plot = FALSE, scaled = FALSE, normalized = FALSE) + max_dist <- sqrt(sum((ceiling(dim(tmp)/2)-1)^2)) + mutate(psd, wavenumber = wavenumber / max_dist) +} + # main per_lead_time <- function(lead_time) { @@ -59,20 +75,21 @@ per_lead_time <- function(lead_time) { theme_minimal() ), list(map( - list(era5_regridded, cerra, downscaled) %>% map(~filter(.x %>%.[1, 3:238,6:160], time == eval_datetime)), - ~{radial.psd(replace_na(truncate_to_square(as.matrix((st_apply(.x, c("x", "y"), mean)[[1]])))), plot = FALSE, normalized = FALSE, scaled = FALSE)} + list(era5_regridded, cerra, downscaled) %>% map(~filter(.x %>%.[1, 3:238,6:160], time == eval_datetime)[drop = TRUE]), + ~{do_psd(st_apply(.x, c("x", "y"), mean))} ) %>% setNames(c("era5", "cerra", "samos")) %>% bind_rows(.id = "model") %>% ggplot(aes(wavenumber, r_spectrum, lty = model)) + - geom_line() + - theme_minimal() + - scale_y_log10() + annotation_logticks() + scale_x_log10() + geom_line() + + theme_minimal(20) + + scale_y_log10() + annotation_logticks() + scale_x_log10(sec.axis = sec_axis(trans = ~ (.^-1), name = "wavelength")) + + labs(y = "Power") )) - wrap_plots(plts[1:4], ncol = 2) / plts[[5]] + plot_annotation(title = eval_datetime) - ggsave(here(glue("plt/SAMOS/{format(eval_datetime, '%Y%m%dT%H%M%S')}_testset.pdf")), height = 14, width = 10) + wrap_plots(plts[1:4], ncol = 2) | plts[[5]] + plot_annotation(title = eval_datetime) + ggsave(here(glue("plt/SAMOS/{format(eval_datetime, '%Y%m%dT%H%M%S')}_testset.pdf")), height = 6, width = 20) } diff --git a/dev/verification/spatial_structures/compare_pds.R b/dev/verification/spatial_structures/compare_pds.R index bed8ae4..b11cc66 100644 --- a/dev/verification/spatial_structures/compare_pds.R +++ b/dev/verification/spatial_structures/compare_pds.R @@ -1,3 +1,10 @@ +#!/usr/bin/env Rscript + +################################################################## +#Description : power spectral density analysis +#Author : Konrad Mayer +################################################################## + library(stars) library(purrr) library(here) @@ -33,102 +40,91 @@ replace_na <- function(m, replacement = 0) { m } +do_psd <- function(x) { + tmp <- replace_na(truncate_to_square(as.matrix(x[[1]]))) + psd <- radial.psd(tmp, plot = FALSE, scaled = FALSE, normalized = FALSE) + max_dist <- sqrt(sum((ceiling(dim(tmp)/2)-1)^2)) + mutate(psd, wavenumber = wavenumber / max_dist) +} + +drop_units <- function(x) { + tryCatch( + units::drop_units(x), + error = function(e) { + return(x) + } + ) +} # main ------------------------------------------------------------------------- dothis <- function(lead_time) { lead_time <- str_pad(lead_time, 2, pad = "0") # load datasets (crop a bit as SAMOS baseline has some values NA around) - cerra <- read_stars(here(glue("dat/TESTING/PREPROCESSED/CERRA/t2m_cerra_{lead_time}.nc"))) %>% - setNames("cerra") %>% - .[1, 3:238,6:160] - downscaled <- read_stars(here(glue("dat/TESTING/SAMOS/postprocessed/samos-postprocessed_{lead_time}.nc"))) %>% - setNames("samos") %>% - .[1, 3:238,6:160] - era5 <- read_stars(here(glue("dat/TESTING/PREPROCESSED/ERA5_regridded/t2m_era5_{lead_time}.nc"))) %>% - setNames("era5") %>% - .[1, 3:238,6:160] - - timesteps <- seq_len(dim(cerra)["time"]) + datasets <- c( + cerra = "dat/TESTING/PREPROCESSED/CERRA/t2m_cerra_{lead_time}.nc", + downscaled = "dat/TESTING/SAMOS/postprocessed/samos-postprocessed_{lead_time}.nc", + era5 = "dat/TESTING/PREPROCESSED/ERA5_regridded/t2m_era5_{lead_time}.nc" + ) + dat <- map2(datasets, names(datasets), + ~setNames(drop_units(read_stars(here(glue(.x)))), .y)[1, 3:238,6:160]) # data gets truncated to get rid of NA values on two of the edges caused by CDO bilinear interpolation + + timesteps <- seq_len(dim(dat[[1]])["time"]) #overall - psd_cerra_overall <- radial.psd(replace_na(truncate_to_square(as.matrix((st_apply(cerra, c("x", "y"), mean)[[1]])))), plot = FALSE, scaled = FALSE, normalized = FALSE) - psd_downscaled_overall <- radial.psd(replace_na(truncate_to_square(as.matrix((st_apply(downscaled, c("x", "y"), mean)[[1]])))), plot = FALSE, scaled = FALSE, normalized = FALSE) - psd_era5_overall <- radial.psd(replace_na(truncate_to_square(as.matrix((st_apply(era5, c("x", "y"), mean)[[1]])))), plot = FALSE, scaled = FALSE, normalized = FALSE) - psd_overall <- bind_rows(list(cerra = psd_cerra_overall, downscaled = psd_downscaled_overall, era5 = psd_era5_overall), .id = "model") - + psd_overall <- map(dat, + ~do_psd(st_apply(.x, c("x", "y"), mean))) %>% + bind_rows(.id = "model") psd_overall %>% ggplot(aes(wavenumber, r_spectrum, lty = model)) + geom_line() + - theme_minimal() + - scale_y_log10() + scale_x_log10() + annotation_logticks() -ggsave(here(glue("plt/PSD/radialPSD_samos_leadtime{lead_time}_overall.pdf"))) + theme_minimal(20) + + scale_y_log10() + scale_x_log10(sec.axis = sec_axis(trans = ~ (.^-1), name = "wavelength")) + annotation_logticks() + + labs(y = "Power") + ggsave(here(glue("plt/PSD/radialPSD_samos_leadtime{lead_time}_overall.pdf")), width = 10, height = 6) - # loop over timesteps and calculate mean psd - psd_cerra <- list() - psd_ds <- list() - psd_era5 <- list() - - for (i in seq_along(timesteps)){ - psd_cerra[[i]] <- radial.psd(replace_na(truncate_to_square(as.matrix(units::drop_units(cerra[[1]][,,timesteps[i]])))), plot = FALSE)#, scaled = FALSE, normalized = FALSE) - psd_ds[[i]] <- radial.psd(replace_na(truncate_to_square(as.matrix(downscaled[[1]][,,timesteps[i]]))), plot = FALSE)#, scaled = FALSE, normalized = FALSE) - psd_era5[[i]] <- radial.psd(replace_na(truncate_to_square(as.matrix(era5[[1]][,,timesteps[i]]))), plot = FALSE)#, scaled = FALSE, normalized = FALSE) - } + # loop over timesteps and calculate mean psd + psd_timesteps <- map(dat, function(ds) { + map(timesteps, function(ts) do_psd(ds[,,,ts, drop = TRUE])) + }) # common dataframe - plotdat <- bind_rows(list( - cerra = psd_cerra |> - setNames(timesteps) |> - bind_rows(.id = "timesteps"), - samos = psd_ds |> - setNames(timesteps) |> - bind_rows(.id = "timesteps"), - era5 = psd_era5 |> - setNames(timesteps) |> - bind_rows(.id = "timesteps") - ), .id = "dataset") - + plotdat <- psd_timesteps %>% + map(~bind_rows(setNames(.x, timesteps), .id = "timesteps")) %>% + bind_rows(.id = "dataset") + # plot plotdat %>% ggplot(aes(x = wavenumber, y = r_spectrum, color = dataset, group = dataset)) + - # stat_smooth( - # geom="ribbon", - # aes(ymax = after_stat(y) + 1, - # ymin = after_stat(y) - 1, - # fill = dataset), - # alpha = 0.1, linetype = 0 - # ) + - # geom_smooth(se = FALSE)+ geom_interval() + - theme_minimal() + - scale_y_log10() + scale_x_log10() + annotation_logticks() + theme_minimal(20) + + scale_y_log10() + scale_x_log10(sec.axis = sec_axis(trans = ~ (.^-1), name = "wavelength")) + annotation_logticks() + + labs(y = "Power") - ggsave(here(glue("plt/PSD/radialPSD_samos_leadtime{lead_time}_distribution.pdf"))) + ggsave(here(glue("plt/PSD/radialPSD_samos_leadtime{lead_time}_distribution.pdf")), width = 10, height = 6) # aggregate to seasons and calculate PSD per season seasons <- list(DJF = c(12, 1, 2), MAM = 3:5, JJA = 6:8, SON = 9:11) per_season <- function(months) { - cerra_season <- st_apply(cerra %>% filter(lubridate::month(time) %in% months), c("x", "y"), mean) - downscaled_season <- st_apply(downscaled %>% filter(lubridate::month(time) %in% months), c("x", "y"), mean) - era5_season <- st_apply(era5 %>% filter(lubridate::month(time) %in% months), c("x", "y"), mean) - psd_cerra_season <- radial.psd(replace_na(truncate_to_square(as.matrix((cerra_season[[1]])))), plot = FALSE)#, scaled = FALSE, normalized = FALSE) - psd_downscaled_season <- radial.psd(replace_na(truncate_to_square(as.matrix((downscaled_season[[1]])))), plot = FALSE)#, scaled = FALSE, normalized = FALSE) - psd_era5_season <- radial.psd(replace_na(truncate_to_square(as.matrix((era5_season[[1]])))), plot = FALSE)#, scaled = FALSE, normalized = FALSE) - bind_rows(list(cerra = psd_cerra_season, downscaled = psd_downscaled_season, era5 = psd_era5_season), .id = "model") + map(dat, ~do_psd(st_apply(.x %>% filter(lubridate::month(time) %in% months), c("x", "y"), mean))) %>% + bind_rows(.id = "model") } + psd_season <- map(seasons, per_season) psd_season %>% bind_rows(.id = "season") %>% ggplot(aes(wavenumber, r_spectrum, color = season, lty = model)) + geom_line() + - theme_minimal() + - scale_y_log10() + scale_x_log10() + annotation_logticks() - ggsave(here(glue("plt/PSD/radialPSD_samos_leadtime{lead_time}_season.pdf"))) + theme_minimal(20) + + scale_y_log10() + scale_x_log10(sec.axis = sec_axis(trans = ~ (.^-1), name = "wavelength")) + annotation_logticks() + + labs(y = "Power") + + ggsave(here(glue("plt/PSD/radialPSD_samos_leadtime{lead_time}_season.pdf")), width = 10, height = 6) } lead_times <- seq(0, 21, by = 3)