Skip to content

Commit

Permalink
Merge pull request #12 from ECMWFCode4Earth/psd
Browse files Browse the repository at this point in the history
power spectral density analysis
  • Loading branch information
konradmayer authored Sep 12, 2023
2 parents e80c881 + c36136b commit 4eea2ee
Show file tree
Hide file tree
Showing 4 changed files with 243 additions and 4 deletions.
57 changes: 53 additions & 4 deletions dev/postprocessing/plot_results.R
Original file line number Diff line number Diff line change
@@ -1,3 +1,11 @@
#!/usr/bin/env Rscript

##################################################################
#Description : plot fields (era5, cerra, downscaled and
# difference) and PSD for individual timesteps
#Author : Konrad Mayer
##################################################################

library(here)
library(lubridate)
library(stars)
Expand All @@ -8,6 +16,31 @@ library(dplyr)
library(patchwork)
library(metR)
library(stringr)
library(radialpsd)

# helpers
truncate_to_square <- function(m) {
dim_m <- dim(m)
mindim <- min(dim_m)
start <- floor((dim_m - mindim) / 2)
end <- start + mindim
start <- start + 1
m[start[1]:end[1], start[2]:end[2]]
}

replace_na <- function(m, replacement = 0) {
m[is.na(m)] <- replacement
m
}

do_psd <- function(x) {
tmp <- replace_na(truncate_to_square(as.matrix(x[[1]])))
psd <- radial.psd(tmp, plot = FALSE, scaled = FALSE, normalized = FALSE)
max_dist <- sqrt(sum((ceiling(dim(tmp)/2)-1)^2))
mutate(psd, wavenumber = wavenumber / max_dist)
}

# main

per_lead_time <- function(lead_time) {

Expand All @@ -20,11 +53,14 @@ per_lead_time <- function(lead_time) {
downscaled <- read_stars(here(glue("dat/TESTING/SAMOS/postprocessed/samos-postprocessed_{lead_time}.nc"))) %>%
setNames("samos")

era5_regridded <- era5 <- read_stars(here(glue("dat/TESTING/PREPROCESSED/ERA5_regridded/t2m_era5_{lead_time}.nc"))) %>% # only needed for PSD
setNames("era5")

diff_field <- downscaled[0]
diff_field[["diff_samos_cerra"]] <- downscaled[[1]] - units::drop_units(cerra[[1]])

dothis <- function(eval_datetime) {
plts <- map2(
plts <- c(map2(
list(era5, cerra, downscaled, diff_field) %>% map(~filter(.x, time == eval_datetime)),
list(scale_fill_viridis_c, scale_fill_viridis_c, scale_fill_viridis_c, scale_fill_divergent),
~ggplot() +
Expand All @@ -37,10 +73,23 @@ per_lead_time <- function(lead_time) {
) +
.y() +
theme_minimal()
)
),
list(map(
list(era5_regridded, cerra, downscaled) %>% map(~filter(.x %>%.[1, 3:238,6:160], time == eval_datetime)[drop = TRUE]),
~{do_psd(st_apply(.x, c("x", "y"), mean))}
) %>%
setNames(c("era5", "cerra", "samos")) %>%
bind_rows(.id = "model") %>%
ggplot(aes(wavenumber, r_spectrum, lty = model)) +
geom_line() +
theme_minimal(20) +
scale_y_log10() + annotation_logticks() + scale_x_log10(sec.axis = sec_axis(trans = ~ (.^-1), name = "wavelength")) +
labs(y = "Power")
))


wrap_plots(plts, ncol = 2) + plot_annotation(title = eval_datetime)
ggsave(here(glue("plt/SAMOS/{format(eval_datetime, '%Y%m%dT%H%M%S')}_testset.pdf")), height = 7, width = 10)
wrap_plots(plts[1:4], ncol = 2) | plts[[5]] + plot_annotation(title = eval_datetime)
ggsave(here(glue("plt/SAMOS/{format(eval_datetime, '%Y%m%dT%H%M%S')}_testset.pdf")), height = 6, width = 20)

}

Expand Down
2 changes: 2 additions & 0 deletions dev/preprocessing/regrid_era5.sh
Original file line number Diff line number Diff line change
Expand Up @@ -23,4 +23,6 @@ for lt in "${leadtime[@]}"; do
stdbuf -oL cdo griddes dat/TRAINING/RESIDUALS/CERRA/t2m_cerra_${lt}_residuals.nc > /tmp/targetgrid.txt
# bilinear interpolation
cdo remapbil,/tmp/targetgrid.txt ${projectroot}/dat/${dataset}/RESIDUALS/ERA5/t2m_era5_${lt}_residuals.nc ${projectroot}/dat/${dataset}/RESIDUALS/ERA5_regridded/t2m_era5_${lt}_residuals.nc
# bilinear interpolation of raw data
cdo remapbil,/tmp/targetgrid.txt ${projectroot}/dat/${dataset}/PREPROCESSED/ERA5/t2m_era5_${lt}.nc ${projectroot}/dat/${dataset}/PREPROCESSED/ERA5_regridded/t2m_era5_${lt}.nc
done
133 changes: 133 additions & 0 deletions dev/verification/spatial_structures/compare_pds.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,133 @@
#!/usr/bin/env Rscript

##################################################################
#Description : power spectral density analysis
#Author : Konrad Mayer
##################################################################

library(stars)
library(purrr)
library(here)
library(glue)
library(stringr)
library(tidyverse)
library(radialpsd)
library(ggfan)

# helpers ----------------------------------------------------------------------
# pad_to_square <- function(m, pad = 0) {
# dim_m <- dim(m)
# maxdim <- max(dim_m)
# out <- matrix(pad, nrow = maxdim, ncol = maxdim)
# start <- floor((maxdim - dim_m) / 2)
# end <- start + dim_m
# start <- start + 1
# out[start[1]:end[1], start[2]:end[2]] <- m
# out
# }

truncate_to_square <- function(m) {
dim_m <- dim(m)
mindim <- min(dim_m)
start <- floor((dim_m - mindim) / 2)
end <- start + mindim
start <- start + 1
m[start[1]:end[1], start[2]:end[2]]
}

replace_na <- function(m, replacement = 0) {
m[is.na(m)] <- replacement
m
}

do_psd <- function(x) {
tmp <- replace_na(truncate_to_square(as.matrix(x[[1]])))
psd <- radial.psd(tmp, plot = FALSE, scaled = FALSE, normalized = FALSE)
max_dist <- sqrt(sum((ceiling(dim(tmp)/2)-1)^2))
mutate(psd, wavenumber = wavenumber / max_dist)
}

drop_units <- function(x) {
tryCatch(
units::drop_units(x),
error = function(e) {
return(x)
}
)
}
# main -------------------------------------------------------------------------

dothis <- function(lead_time) {
lead_time <- str_pad(lead_time, 2, pad = "0")

# load datasets (crop a bit as SAMOS baseline has some values NA around)
datasets <- c(
cerra = "dat/TESTING/PREPROCESSED/CERRA/t2m_cerra_{lead_time}.nc",
downscaled = "dat/TESTING/SAMOS/postprocessed/samos-postprocessed_{lead_time}.nc",
era5 = "dat/TESTING/PREPROCESSED/ERA5_regridded/t2m_era5_{lead_time}.nc"
)
dat <- map2(datasets, names(datasets),
~setNames(drop_units(read_stars(here(glue(.x)))), .y)[1, 3:238,6:160]) # data gets truncated to get rid of NA values on two of the edges caused by CDO bilinear interpolation

timesteps <- seq_len(dim(dat[[1]])["time"])

#overall

psd_overall <- map(dat,
~do_psd(st_apply(.x, c("x", "y"), mean))) %>%
bind_rows(.id = "model")

psd_overall %>%
ggplot(aes(wavenumber, r_spectrum, lty = model)) +
geom_line() +
theme_minimal(20) +
scale_y_log10() + scale_x_log10(sec.axis = sec_axis(trans = ~ (.^-1), name = "wavelength")) + annotation_logticks() +
labs(y = "Power")

ggsave(here(glue("plt/PSD/radialPSD_samos_leadtime{lead_time}_overall.pdf")), width = 10, height = 6)


# loop over timesteps and calculate mean psd
psd_timesteps <- map(dat, function(ds) {
map(timesteps, function(ts) do_psd(ds[,,,ts, drop = TRUE]))
})
# common dataframe
plotdat <- psd_timesteps %>%
map(~bind_rows(setNames(.x, timesteps), .id = "timesteps")) %>%
bind_rows(.id = "dataset")

# plot
plotdat %>%
ggplot(aes(x = wavenumber, y = r_spectrum, color = dataset, group = dataset)) +
geom_interval() +
theme_minimal(20) +
scale_y_log10() + scale_x_log10(sec.axis = sec_axis(trans = ~ (.^-1), name = "wavelength")) + annotation_logticks() +
labs(y = "Power")

ggsave(here(glue("plt/PSD/radialPSD_samos_leadtime{lead_time}_distribution.pdf")), width = 10, height = 6)

# aggregate to seasons and calculate PSD per season
seasons <- list(DJF = c(12, 1, 2), MAM = 3:5, JJA = 6:8, SON = 9:11)

per_season <- function(months) {
map(dat, ~do_psd(st_apply(.x %>% filter(lubridate::month(time) %in% months), c("x", "y"), mean))) %>%
bind_rows(.id = "model")
}

psd_season <- map(seasons, per_season)

psd_season %>%
bind_rows(.id = "season") %>%
ggplot(aes(wavenumber, r_spectrum, color = season, lty = model)) +
geom_line() +
theme_minimal(20) +
scale_y_log10() + scale_x_log10(sec.axis = sec_axis(trans = ~ (.^-1), name = "wavelength")) + annotation_logticks() +
labs(y = "Power")

ggsave(here(glue("plt/PSD/radialPSD_samos_leadtime{lead_time}_season.pdf")), width = 10, height = 6)
}

lead_times <- seq(0, 21, by = 3)
walk(lead_times, dothis)

# TODO: PSD needs to be done on projected data, otherwise distance is not valid
55 changes: 55 additions & 0 deletions dev/verification/spatial_structures/compare_variogram.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
library(tidyverse)
library(stringr)
library(here)
library(stars)
library(glue)
library(geoR)

dothis <- function(lead_time) {
lead_time = 12# for testing
lead_time <- str_pad(lead_time, 2, pad = "0")

# load datasets (crop a bit as SAMOS baseline has some values NA around)
cerra <- read_stars(here(glue("dat/TESTING/PREPROCESSED/CERRA/t2m_cerra_{lead_time}.nc"))) %>%
setNames("cerra") %>%
.[1, 3:238,6:160]
downscaled <- read_stars(here(glue("dat/TESTING/SAMOS/postprocessed/samos-postprocessed_{lead_time}.nc"))) %>%
setNames("samos") %>%
.[1, 3:238,6:160]



# cerra_tibble <- as_tibble(cerra[,,,10]) # how to deal with timesteps? including more results in too big ram use
# downscaled_tibble <- as_tibble(downscaled[,,,10])
# variog_cerra = variog(data = cerra_tibble[,4], coords = as.matrix(cerra_tibble[1:2]))
# variog_downscaled = variog(data = downscaled_tibble[,4], coords = as.matrix(downscaled_tibble[1:2]))

# plot(variog_cerra)
# points(variog_downscaled$u, variog_downscaled$v, col = "red")

# plot(variog_cerra)
# points(variog_downscaled$u, variog_downscaled$v, col = "red")

seasons <- list(DJF = c(12, 1, 2), MAM = 3:5, JJA = 6:8, SON = 9:11)


# TODO: variogram needs to be done on projected data, otherwise distance is not valid
per_season <- function(months) {
cerra_tibble <- as_tibble(st_apply(cerra %>% filter(lubridate::month(time) %in% months), c("x", "y"), mean))
downscaled_tibble <- as_tibble(st_apply(downscaled %>% filter(lubridate::month(time) %in% months), c("x", "y"), mean))
variog_cerra = variog(data = cerra_tibble[,3], coords = as.matrix(cerra_tibble[1:2]))
variog_downscaled = variog(data = downscaled_tibble[,3], coords = as.matrix(downscaled_tibble[1:2]))
tibble(distance = variog_cerra$u, semivar_cerra = variog_cerra$v, semivar_downscaled = variog_downscaled$v)
}

variog_seasons <- purrr::map(seasons, per_season) %>% setNames(names(seasons))

variog_seasons %>%
bind_rows(.id = "season") %>%
pivot_longer(-c(1:2), names_to = "model") %>%
ggplot(aes(distance, value, lty = model, color = season)) +
geom_line() +
theme_minimal()
ggsave(here(glue("plt/seasonal_variogram_samos_leadtime{lead_time}.pdf")))
}

0 comments on commit 4eea2ee

Please sign in to comment.