diff --git a/.gitignore b/.gitignore index 6760f31..dacab48 100644 --- a/.gitignore +++ b/.gitignore @@ -52,9 +52,13 @@ rsconnect/ codes/drafts/ data/ +results/ functions/drafts/ *.zip -temp/ \ No newline at end of file +temp/ + +*_storr +*.parquet diff --git a/README.md b/README.md index 14f7687..ec9cc88 100644 --- a/README.md +++ b/README.md @@ -33,7 +33,7 @@ __Temperature by species__ - `get_edna_marinespecies.R`: aggregate species list from the eDNA project downloaded from the repository https://github.com/iobis/edna-species-lists - `temperature_database.R`: aggregate temperature/environmental layers to the OBIS/GBIF database (see details for the database here https://github.com/iobis/protectedseas-statistics) - `get_sst_marinespecies.R`: extract temperature information for all species listed on the sites -- `plot_sst_marinespecies.R`: plot temperature information for a selected species +- `get_sst_sites.R`: extract temperature information for each site (collection site, not _higherGeography_) There is also a function on the `functions` folder, `query_sst_marinespecies.R`, that can be used to query full data or summaries of temperature for a species (using the database). @@ -41,7 +41,13 @@ __Others__ - `get_depthprofiles.R`: see which is the actual depth that was used when obtaining the data from Copernicus - `generate_quarto_sites.R`: generate quarto pages by site +- `plot_sst_marinespecies.R`: plot temperature information for a selected species + +## Data analysis + +After retrieving temperature information for each species occurrence from OBIS/GBIF (aggregated by H3 cells on a resolution of 7), we generate a table with the different quantiles for each species. This is done still on the `get_sst_marinespecies.R` script. The same data is obtained for each site. +We join both tables to analyse the thermal limits for each species and to assess how species in each site might be at risk by climate change. This steps are done on the `analysis_sst_species.R` ---- diff --git a/codes/analysis_sst_species.R b/codes/analysis_sst_species.R new file mode 100644 index 0000000..44841e3 --- /dev/null +++ b/codes/analysis_sst_species.R @@ -0,0 +1,140 @@ +#################### eDNA expeditions - scientific analysis #################### +########################## Environmental data analysis ######################### +# January of 2024 +# Author: Silas C. Principe +# Contact: s.principe@unesco.org +# +############################# Species thermal limits ########################### + +# Load packages ---- +library(arrow) +library(dplyr) +library(tidyr) +library(ggplot2) +library(patchwork) + + +# Load files ---- +# Temperature per species dataset +temp_sp <- open_dataset("results/species_tsummaries.parquet") + +temp_sp_filt <- temp_sp %>% + filter(depth == "depthsurf") %>% + filter(variant == "mean") %>% + collect() + +# Temperature on sites +temp_sites <- read_parquet("results/sites_tsummaries.parquet") + +# Get sites list +sites_f <- list.files("data/species_list_full", full.names = T) + +sites_list <- lapply(sites_f, read.csv) +names(sites_list) <- gsub("_", " ", gsub("\\.csv", "", basename(sites_f))) + +sites_list <- bind_rows( + sites_list, .id = "higherGeography" +) + +sites_list_distinct <- sites_list %>% + select(species, AphiaID, source_obis, source_gbif, source_dna, higherGeography) %>% + group_by(higherGeography) %>% + distinct(species, .keep_all = T) + +# Get number of records +species_rec <- read_parquet("results/records_by_sp.parquet") + + +# Bind information ---- +# Bind species temperature +temp_sp_wide <- temp_sp_filt %>% + pivot_wider(names_from = metric, values_from = value) %>% + select(-depth, -variant) + +sites_list_binded <- sites_list_distinct %>% + left_join(temp_sp_wide) %>% + filter(!is.na(q_0.25)) + +# Bind temperature on sites +temp_sites_filt <- temp_sites %>% + filter(depth == "depthsurf") %>% + filter(variant == "mean") %>% + group_by(higherGeography, period) %>% + summarise(temperature = mean(temperature)) %>% + pivot_wider(names_from = period, values_from = temperature) %>% + mutate(higherGeography = tolower(higherGeography)) %>% + mutate(higherGeography = gsub("[']", " ", higherGeography)) %>% + mutate(higherGeography = gsub("[:,]", "", higherGeography)) + +colnames(temp_sites_filt)[2:length(temp_sites_filt)] <- paste0("site_", colnames(temp_sites_filt)[2:length(temp_sites_filt)]) + +sites_list_binded <- sites_list_binded %>% + left_join(temp_sites_filt) %>% + filter(!is.na(site_current)) + +# Bind number of records +sites_list_binded <- sites_list_binded %>% + left_join(species_rec) + +# Create tag for from where it comes +sites_list_binded <- sites_list_binded %>% + mutate(source_obis = ifelse(as.numeric(source_obis) == 1, 1, 0)) %>% + mutate(source_gbif = ifelse(as.numeric(source_gbif) == 1, 2, 0)) %>% + mutate(source_dna = ifelse(as.numeric(source_dna) == 1, 4, 0)) %>% + mutate(where = source_obis + source_gbif + source_dna) %>% + mutate(where = case_when( + where == 4 ~ "eDNA", + where %in% c(3, 2, 1) ~ "OBIS/GBIF", + where %in% c(6, 5, 7) ~ "Both" + )) + +# Save a copy +write_parquet(sites_list_binded, "results/tsummaries_aggregated.parquet") + + +# Get thermal ranges ---- +species_thermal_range <- sites_list_binded %>% + filter(records >= 10) %>% + mutate(loc_range = (site_current - q_0.01)/(q_0.99 - q_0.01)) + +# Plot to verify +ggplot(species_thermal_range) + + geom_boxplot(aes(x = loc_range)) + + +# Create a function to plot for just one site +plot_site <- function(site) { + + if (is.numeric(site)) { + site <- unique(sites_list_binded$higherGeography)[site] + } + + sel_site <- sites_list_binded %>% + filter(higherGeography == site) %>% + mutate(where = ifelse(source_obis & source_gbif & !source_dna, "OBIS/GBIF", + ifelse(source_dna & !source_obis & !source_gbif, "eDNA", "Both"))) + + plot_a <- sel_site %>% + mutate(status = ifelse(q_0.5 > min(sel_site$site_current), "Above", "Below")) %>% + ggplot() + + geom_jitter(aes(y = higherGeography, x = q_0.5, color = status), alpha = .2, height = 0.2) + + geom_vline(xintercept = min(sel_site$site_current), linetype = 2) + + theme_minimal() + + ylab(NULL) + xlab (NULL) + ggtitle("All species", site) + + theme(legend.title = element_blank(), + axis.text.y = element_blank()) + + plot_b <- sel_site %>% + mutate(status = ifelse(q_0.5 > min(sel_site$site_current), "Above", "Below")) %>% + ggplot() + + geom_jitter(aes(y = where, x = q_0.5, color = status), alpha = .2, height = 0.2) + + geom_vline(xintercept = min(sel_site$site_current), linetype = 2) + + theme_minimal() + + ylab(NULL) + ggtitle("By type") + + theme(legend.title = element_blank()) + + plot_a / plot_b + plot_layout(guides = "collect", heights = c(1,2)) & theme(legend.position = "bottom") + +} + +plot_site(3) diff --git a/codes/download_temperature_toolbox.R b/codes/download_temperature_toolbox.R index edc15f6..2fde7d8 100644 --- a/codes/download_temperature_toolbox.R +++ b/codes/download_temperature_toolbox.R @@ -9,6 +9,10 @@ # Load packages ---- library(terra) library(reticulate) +library(arrow) +library(parallel) +library(dplyr) +library(tidyr) cm <- import("copernicus_marine_client") @@ -113,36 +117,70 @@ for (site_idx in mhs$code) { # # # Print loaded dataset information # print(sst_l3s) - - -# Convert to parquet -proc <- job::job({ - lapply(list.files(outfolder, full.name = T, pattern = "\\.nc"), - function(fname, redo = T) { - - outfile <- gsub("\\.nc", ".parquet", fname) - - if (file.exists(outfile) & !redo) { - cat("File already processed, skipping.\n") - } else { - r <- rast(fname) - r_dat <- as.data.frame(r, xy = TRUE, time = TRUE) - r_dat <- r_dat %>% - pivot_longer(3:length(.), names_to = "time", values_to = "values") %>% - mutate(layer = gsub("\\..*", "", names(r))[1]) - arrow::write_parquet(r_dat, outfile, compression = "gzip") - } - - return(invisible(NULL)) - }) - - lf <- list.files(outfolder, full.name = T, pattern = "\\.parquet") - base <- list.files(outfolder, full.name = T, pattern = "\\.nc")[1] - - zip("temperature_dayly.zip", c(lf, base)) - - job::export("none") -}) +# +# +# # Convert to parquet +# proc <- job::job({ +# lapply(list.files(outfolder, full.name = T, pattern = "\\.nc"), +# function(fname, redo = F) { +# +# outfile <- gsub("\\.nc", ".parquet", fname) +# +# if (file.exists(outfile) & !redo) { +# cat("File already processed, skipping.\n") +# } else { +# cat("Processing", outfile, "\n") +# r <- rast(fname) +# +# if (file.size(fname)/1000/1000 > 1000) { +# +# cat("Splitting in pieces (large file) \n") +# +# fs::dir_create("temp") +# +# grid <- sf::st_make_grid(sf::st_bbox(r), cellsize = 1) +# +# mclapply(1:length(grid), function(x){ +# r_b <- r +# r_b <- crop(r, grid[x]) +# r_b <- as.data.frame(r_b, xy = TRUE, time = TRUE) +# r_b <- r_b %>% +# pivot_longer(3:length(.), names_to = "time", values_to = "values") %>% +# mutate(layer = gsub("\\..*", "", names(r)[1])) +# arrow::write_parquet(r_b, sprintf("temp/temp_%06d.parquet", x), +# compression = "gzip") +# }, mc.cores = 3) +# +# gen_files <- open_dataset("temp") +# +# gen_files %>% +# write_parquet(., outfile, compression = "gzip") +# +# fs::dir_delete("temp") +# } else { +# +# r_dat <- as.data.frame(r, xy = TRUE, time = TRUE) +# +# r_dat <- r_dat %>% +# pivot_longer(3:length(.), names_to = "time", values_to = "values") %>% +# mutate(layer = gsub("\\..*", "", names(r)[1])) +# +# arrow::write_parquet(r_dat, outfile, compression = "gzip") +# } +# +# +# } +# +# return(invisible(NULL)) +# }) +# +# lf <- list.files(outfolder, full.name = T, pattern = "\\.parquet") +# base <- list.files(outfolder, full.name = T, pattern = "\\.nc")[1] +# +# zip("temperature_dayly.zip", c(lf, base)) +# +# job::export("none") +# }) diff --git a/codes/get_point_sst.R b/codes/get_point_sst.R new file mode 100644 index 0000000..2b579c8 --- /dev/null +++ b/codes/get_point_sst.R @@ -0,0 +1,141 @@ +#################### eDNA expeditions - scientific analysis #################### +########################## Environmental data download ######################### +# January of 2024 +# Author: Silas C. Principe +# Contact: s.principe@unesco.org +# +######################## Sea temperature from Copernicus ####################### + +# Load packages ---- +library(dplyr) +library(stringr) +library(purrr) +library(parallel) +library(terra) +library(glue) +library(arrow) +fs::dir_create("data/agg_temperature") + + +# Get list of temperature files ---- +outfolder <- "data/temperature" + +fl <- list.files(outfolder, full.name = T, pattern = "\\.nc") + + +# Load occurrence info ---- +# Bind all occurrence files +occurrence_files <- list.files("data/species_lists/output", "*Occurrence*", full.names = TRUE) + +occurrence <- map(occurrence_files, read.table, sep = "\t", quote = "", header = TRUE) %>% + bind_rows() %>% + mutate_if(is.character, na_if, "") %>% + mutate( + species = ifelse(taxonRank == "species", scientificName, NA), + aphiaid = as.numeric(str_replace(scientificNameID, "urn:lsid:marinespecies.org:taxname:", "")) + ) + +# Get unique sites / get the cell of site ---- +base <- rast(res = 1/12) + +sites <- occurrence %>% + group_by(higherGeography, locality, decimalLongitude, decimalLatitude) %>% + distinct(decimalLongitude, decimalLatitude) %>% + ungroup() %>% + filter(!is.na(decimalLongitude)) %>% + mutate(unsid = 1:nrow(.)) %>% + rowwise() %>% + mutate(cell = cellFromXY(base, data.frame(decimalLongitude, decimalLatitude))) + + +sites_unique <- sites %>% + ungroup() %>% + distinct(cell, .keep_all = T) + + +# Load MHS shapefile ---- +mhs <- vect("data/shapefiles/marine_world_heritage.gpkg") +# Add an index - we put from 0 to keep consistency with the JupytherHub approaches +mhs$code <- 0:(length(mhs)-1) + +# Remove special strings +mhs$name <- stringi::stri_trans_general(str = mhs$name, id = "Latin-ASCII") +sites$higherGeography <- stringi::stri_trans_general(str = sites$higherGeography, id = "Latin-ASCII") +sites_unique$higherGeography <- stringi::stri_trans_general(str = sites_unique$higherGeography, id = "Latin-ASCII") + + +# Extract info for each site ---- +# Define selected depths +depths <- c(0, 25, 50, 75, 100) + +for (i in 1:nrow(sites_unique)) { + + cli::cli_h1(glue("Running site {i} out of {nrow(sites_unique)}")) + # Get site code + site_code <- mhs$code[mhs$name == sites_unique$higherGeography[i]] + + tgt <- as.data.frame(sites_unique[i, c("decimalLongitude", "decimalLatitude")]) + + outfile <- glue("data/agg_temperature/site={site_code[1]}_locality={sites_unique$locality[i]}_unsid={sites_unique$unsid[i]}.parquet") + + if (!file.exists(outfile)) { + + # Get for the selected depths + for (d in depths) { + cli::cli_progress_message(d) + # Load raster + # We select the first because for some there is with and without buffer + # But for this purpose they are the same + sst <- rast(glue("data/temperature/var=thetao_site={site_code}_depth={d}_product=glorys.nc")[1]) + + if (d == 0) { + # Test if is NA + trast <- sst[[1]] + tval <- trast[cellFromXY(trast, tgt)] + + if (is.na(tval[1,1])) { + st <- 1 + while (all(is.na(tval[,1])) & st < 5) { + mat <- c(3, 5, 7, 9)[st] + adj_cells <- adjacent(trast, cellFromXY(trast, tgt), + directions = matrix(1, nrow = mat, ncol = mat)) + tval <- trast[as.vector(adj_cells)] + st <- st+1 + } + tgc <- as.vector(adj_cells)[which(!is.na(tval[,1]))][1] + } else { + tgc <- cellFromXY(trast, tgt) + } + } + + values <- sst[tgc] + + values <- tidyr::pivot_longer(values, 1:length(values), + names_to = "time", values_to = "temperature") + + values$time <- gsub(" UTC", "", time(sst)) + values$site <- sites_unique$higherGeography[i] + values$depth <- d + + if (d == 0) { + all_values <- values + } else { + if (nrow(values) > 0 & !is.na(values$temperature[1])) { + all_values <- bind_rows(all_values, values) + } + } + + cli::cli_progress_done() + } + + write_parquet(all_values, outfile) + + + } else { + cli::cat_line("Already done, skipping.") + } + +} + +write.csv(sites, "data/agg_temperature/all_sites.csv", row.names = F) +write.csv(sites_unique, "data/agg_temperature/unique_sites.csv", row.names = F) diff --git a/codes/get_sst_marinespecies.R b/codes/get_sst_marinespecies.R index 22f068c..3e5c01e 100644 --- a/codes/get_sst_marinespecies.R +++ b/codes/get_sst_marinespecies.R @@ -4,26 +4,24 @@ # Author: Silas C. Principe # Contact: s.principe@unesco.org # -###################### Retrieve SST summaries for species ###################### +############# Retrieve SST summaries for species (full version) ################ # Load packages ---- library(DBI) -library(data.table) library(dplyr) library(tidyr) -library(storr) - - -# Load files ---- -species_list <- readRDS("data/marine_species.rds") +library(parquet) +fs::dir_create("results") +# Load files/settings ---- +# Database database <- "~/Downloads/protectedseas/database.sqlite" +con <- dbConnect(RSQLite::SQLite(), database) +# Settings gbif_occurrence_table <- "gbif_occurrence" obis_occurrence_table <- "obis_occurrence" - -# Select environmental variables ---- env_vars <- c( "thetao_baseline_depthmax_max", "thetao_baseline_depthmax_mean", "thetao_baseline_depthmax_min", "thetao_baseline_depthmean_max", @@ -32,99 +30,87 @@ env_vars <- c( "thetao_baseline_depthsurf_min" ) +# Load species list +species_list <- bind_rows( + lapply(list.files("data/species_list_full", full.names = T), read.csv) +) +species_list <- species_list %>% + select(species, AphiaID, source_obis, source_gbif, source_dna, group) %>% + distinct(species, .keep_all = T) -st <- storr_rds("data/speciestemp_storr") - +sel_species <- species_list$species +sel_species <- paste0("'", sel_species, "'", collapse = ", ") -# Get temperature for each species ---- -job::job( - { - con <- dbConnect(RSQLite::SQLite(), database) - - for (sp in 1:nrow(species_list)) { - tsp <- species_list[sp,] - cli::cat_line("Running species ", cli::col_cyan(tsp$scientificName)) - - cli::cli_progress_step("Querying OBIS") - - sel_species <- tsp$scientificName - - obis_query <- glue::glue(" +obis_query <- glue::glue(" with filtered_data as ( - select species, h3 + select species, h3, records from {obis_occurrence_table} - where {obis_occurrence_table}.species = '{sel_species}' + where {obis_occurrence_table}.species in ({sel_species}) ) - select species, filtered_data.h3, {paste(env_vars, collapse = ', ')} + select species, filtered_data.h3, filtered_data.records, {paste(env_vars, collapse = ', ')} from filtered_data inner join env_current on filtered_data.h3 = env_current.h3; ") - - obis_res <- dbSendQuery(con, obis_query) - obis_species <- dbFetch(obis_res) - - cli::cli_progress_step("Querying GBIF") - sel_species <- ifelse(!is.na(tsp$scientificName_gbif), - ifelse( - tsp$scientificName != tsp$scientificName_gbif, - tsp$scientificName_gbif, - tsp$scientificName - ), tsp$scientificName) - - gbif_query <- glue::glue(" + +obis_res <- dbSendQuery(con, obis_query) +obis_species <- dbFetch(obis_res) + +gbif_query <- glue::glue(" with filtered_data as ( - select species, h3 + select species, h3, records from {gbif_occurrence_table} - where {gbif_occurrence_table}.species = '{sel_species}' + where {gbif_occurrence_table}.species in ({sel_species}) ) - select species, filtered_data.h3, {paste(env_vars, collapse = ', ')} + select species, filtered_data.h3, filtered_data.records, {paste(env_vars, collapse = ', ')} from filtered_data inner join env_current on filtered_data.h3 = env_current.h3; ") - - gbif_res <- dbSendQuery(con, gbif_query) - gbif_species <- dbFetch(gbif_res) - - all_result <- bind_rows(obis_species, gbif_species) - - if (nrow(all_result) == 0) { - all_result <- NULL - } else { - q25_fun <- function(x) quantile(x, .25) - q75_fun <- function(x) quantile(x, .75) - all_result <- all_result %>% - select(-h3, -species) %>% - summarise(across(starts_with("thetao"), - list(max = max, min = min, - mean = mean, sd = sd, median = median, - q25 = q25_fun, q75 = q75_fun), - .names = "{.col}${.fn}")) %>% - pivot_longer(1:length(.), names_to = "variable", values_to = "value") %>% - separate_wider_delim(cols = "variable", names = c("variable", "variant"), - delim = "$") %>% - pivot_wider(names_from = variant, values_from = value) %>% - mutate(scientificName = tsp$scientificName, - AphiaID = tsp$AphiaID) - - - } - - st$set(sp, all_result) - - cli::cli_progress_done() - - } - - dbDisconnect(con) - job::export("none") - } -) +gbif_res <- dbSendQuery(con, gbif_query) +gbif_species <- dbFetch(gbif_res) + +all_result <- bind_rows(obis_species, gbif_species) + +# Get summaries +rec_by_sp <- all_result %>% + group_by(species) %>% + summarise(records = sum(records)) + +rec_by_h3 <- all_result %>% + group_by(h3) %>% + summarise(records = sum(records)) + +species_by_h3 <- all_result %>% + group_by(h3) %>% + distinct(species, .keep_all = T) %>% + summarise(n_species = n()) + +quantile_df <- function(x, probs = c(0, 0.01, 0.05, 0.1, 0.25, + 0.5, + 0.75, 0.9, 0.95, 0.99, 1)) { + res <- tibble(metric = c(paste0("q_", probs), "mean", "sd"), + value = c(quantile(x, probs), mean(x), sd(x))) + res +} + +final_result <- all_result %>% + select(-h3, -records) %>% + pivot_longer(2:length(.), names_to = "variable", values_to = "values") %>% + group_by(variable, species) %>% + reframe(quantile_df(values)) %>% + separate_wider_delim(cols = "variable", + names = c("variable", "baseline", "depth", "variant"), + delim = "_") %>% + select(-baseline, -variable) + +write_parquet(final_result, "results/species_tsummaries.parquet") +write_parquet(rec_by_sp, "results/records_by_sp.parquet") +write_parquet(rec_by_h3, "results/records_by_h3.parquet") +write_parquet(species_by_h3, "results/species_by_h3.parquet") +write_parquet(species_list, "results/species_list.parquet") + +dbDisconnect(con) -# Get ecological information of species -job::job({ - obissdm::mp_get_ecoinfo(species_list = species_list$AphiaID) - job::export("none") -}) +rm(list = ls());gc() ### END \ No newline at end of file diff --git a/codes/get_sst_sites.R b/codes/get_sst_sites.R new file mode 100644 index 0000000..15ad2ad --- /dev/null +++ b/codes/get_sst_sites.R @@ -0,0 +1,132 @@ +#################### eDNA expeditions - scientific analysis #################### +########################## Environmental data download ######################### +# January of 2024 +# Author: Silas C. Principe +# Contact: s.principe@unesco.org +# +##################### Retrieve SST summaries for the sites ##################### + +# Load packages ---- +library(terra) +library(dplyr) +# Folder where environmental files are located, from another project +ffold <- "../../../mpa_europe/mpaeu_sdm/" + +# Load sites list +sites <- read.csv("data/agg_temperature/unique_sites.csv") + +# Load a base raster to check which cell is valid +base <- rast(paste0(ffold, "data/env/current/thetao_baseline_depthsurf_mean.tif")) + +cells <- terra::extract(base, sites[,c("decimalLongitude", "decimalLatitude")], xy = T, cells = T) + +valid_cells <- lapply(1:nrow(cells), function(id){ + if (is.na(cells[id, 2])) { + + tval <- cells[id, 2] + + starter <- cells$cell[id] + + st <- 1 + while (all(is.na(tval)) & st < 5) { + mat <- c(3, 5, 7, 9)[st] + adj_cells <- adjacent(base, starter, + directions = matrix(1, nrow = mat, ncol = mat)) + tval <- unname(unlist(base[as.vector(adj_cells)])) + st <- st+1 + } + + tgc <- as.vector(adj_cells)[which(!is.na(tval))][1] + + return(tgc) + + } else { + return(cells$cell[id]) + } +}) + +cells$valid <- unlist(valid_cells) + +# Do last check +any(is.na(base[cells$valid])) + +cells_xy <- xyFromCell(base, cells$valid) + + +# Get polygons for the h3 sites +sites_pol <- h3jsr::cell_to_polygon(h3jsr::point_to_cell(cells_xy, res = 7)) +sites_pol <- vect(sites_pol) + +# Get temperature summaries for each period +all_vars <- list.files(paste0(ffold, "data/env"), recursive = T, full.names = T) +all_vars <- all_vars[!grepl("aux", all_vars)] +all_vars <- all_vars[grepl("thetao", all_vars)] +all_vars <- all_vars[!grepl("range", all_vars)] + +# List scenarios and decades +scenarios <- c("current", paste0("ssp", c(126, 245, 370, 460, 585))) +dec <- c("dec50", "dec100") + +# Get summaries +summaries <- lapply(scenarios, function(sc) { + + layers <- all_vars[grepl(sc, all_vars)] + + if (sc == "current") { + r <- rast(layers) + names(r) <- gsub("thetao_baseline_", "", gsub(".tif", "", basename(layers))) + + r_vals <- terra::extract(r, sites_pol, fun = mean, na.rm = T) + + r_vals <- r_vals %>% + select(-ID) %>% + mutate(unsid = sites$unsid) %>% + left_join(sites %>% select(higherGeography, locality, unsid)) %>% + mutate(period = sc) %>% + pivot_longer(1:9, names_to = "variable", values_to = "temperature") + + } else { + + r <- rast(layers[grepl(dec[1], layers)]) + names(r) <- gsub("thetao_", "", gsub(".tif", "", basename(layers[grepl(dec[1], layers)]))) + + r_vals_a <- terra::extract(r, sites_pol, fun = mean, na.rm = T) + + r_vals_a <- r_vals_a %>% + select(-ID) %>% + mutate(unsid = sites$unsid) %>% + left_join(sites %>% select(higherGeography, locality, unsid)) %>% + mutate(period = paste0(sc, "_", dec[1])) %>% + pivot_longer(1:9, names_to = "variable", values_to = "temperature") + + rm(r) + + r <- rast(layers[grepl(dec[2], layers)]) + names(r) <- gsub("thetao_", "", gsub(".tif", "", basename(layers[grepl(dec[2], layers)]))) + + r_vals_b <- terra::extract(r, sites_pol, fun = mean, na.rm = T) + + r_vals_b <- r_vals_b %>% + select(-ID) %>% + mutate(unsid = sites$unsid) %>% + left_join(sites %>% select(higherGeography, locality, unsid)) %>% + mutate(period = paste0(sc, "_", dec[2])) %>% + pivot_longer(1:9, names_to = "variable", values_to = "temperature") + + r_vals <- bind_rows(r_vals_a, r_vals_b) + + } + + return(r_vals) + +}) + +final_list <- bind_rows(summaries) + +final_list <- final_list %>% + mutate(depth = ifelse(grepl("depthmax", variable), "depthmax", + ifelse(grepl("depthsurf", variable), "depthsurf", "depthmean"))) %>% + mutate(variant = ifelse(grepl("_max", variable), "max", + ifelse(grepl("_mean", variable), "mean", "min"))) + +arrow::write_parquet(final_list, "results/sites_tsummaries.parquet") diff --git a/functions/query_sst_marinespecies.R b/functions/query_sst_marinespecies.R index e6220ef..f50e540 100644 --- a/functions/query_sst_marinespecies.R +++ b/functions/query_sst_marinespecies.R @@ -32,6 +32,9 @@ query_sst <- function(species, depth = NULL, variant = NULL){ library(DBI) + library(dplyr) + library(tidyr) + library(data.table) database <- "~/Downloads/protectedseas/database.sqlite" con <- dbConnect(RSQLite::SQLite(), database) @@ -107,20 +110,40 @@ query_sst <- function(species, warning("No results found for the species") } else { if (return_summary) { - q25_fun <- function(x) quantile(x, .25) - q75_fun <- function(x) quantile(x, .75) + # q25_fun <- function(x) quantile(x, .25) + # q75_fun <- function(x) quantile(x, .75) + # final_result <- all_result %>% + # select(-h3, -species) %>% + # summarise(across(starts_with("thetao"), + # list(max = max, min = min, + # mean = mean, sd = sd, median = median, + # q25 = q25_fun, q75 = q75_fun), + # .names = "{.col}${.fn}")) %>% + # pivot_longer(1:length(.), names_to = "variable", values_to = "value") %>% + # separate_wider_delim(cols = "variable", names = c("variable", "variant"), + # delim = "$") %>% + # pivot_wider(names_from = variant, values_from = value) %>% + # mutate(scientificName = species) + + quantile_df <- function(x, probs = c(0, 0.01, 0.05, 0.1, 0.25, + 0.5, + 0.75, 0.9, 0.95, 0.99, 1)) { + res <- tibble(metric = c(paste0("q_", probs), "mean", "sd"), + value = c(quantile(x, probs), mean(x), sd(x))) + res + } + final_result <- all_result %>% select(-h3, -species) %>% - summarise(across(starts_with("thetao"), - list(max = max, min = min, - mean = mean, sd = sd, median = median, - q25 = q25_fun, q75 = q75_fun), - .names = "{.col}${.fn}")) %>% - pivot_longer(1:length(.), names_to = "variable", values_to = "value") %>% - separate_wider_delim(cols = "variable", names = c("variable", "variant"), - delim = "$") %>% - pivot_wider(names_from = variant, values_from = value) %>% + pivot_longer(1:length(.), names_to = "variable", values_to = "values") %>% + group_by(variable) %>% + reframe(quantile_df(values)) %>% + separate_wider_delim(cols = "variable", + names = c("variable", "baseline", "depth", "variant"), + delim = "_") %>% + select(-baseline, -variable) %>% mutate(scientificName = species) + } else { final_result <- all_result }