From 6d819e1d0af6a35f8c685294e5edbc30654aae90 Mon Sep 17 00:00:00 2001 From: silasprincipe <53846571+silasprincipe@users.noreply.github.com> Date: Tue, 19 Nov 2024 15:15:08 +0100 Subject: [PATCH] Update codes for glossy report Reflect new lists on nov2024 --- codes/get_sst_summaries.R | 265 +++++++++++++++++++++++++++++ codes/nearby_from_nc.R | 188 +++++++++++++++++++++ codes/nearby_from_nc_terra.R | 70 ++++++++ codes/occurrence.R | 56 +++++++ codes/plot_paf_newlist.R | 315 +++++++++++++++++++++++++++++++++++ 5 files changed, 894 insertions(+) create mode 100644 codes/get_sst_summaries.R create mode 100644 codes/nearby_from_nc.R create mode 100644 codes/nearby_from_nc_terra.R create mode 100644 codes/occurrence.R create mode 100644 codes/plot_paf_newlist.R diff --git a/codes/get_sst_summaries.R b/codes/get_sst_summaries.R new file mode 100644 index 0000000..b5cf16c --- /dev/null +++ b/codes/get_sst_summaries.R @@ -0,0 +1,265 @@ +#################### eDNA expeditions - scientific analysis #################### +########################## Environmental data download ######################### +# November of 2024 +# Author: Silas C. Principe +# Contact: s.principe@unesco.org +# +############### Retrieve SST summaries for species and sites ################### + +# Dependencies note +# Part of this code can be executed with Python, using Xarray and Dask +# You need to install those libraries +# using reticulate::py_install() +# To use that option, you can source the code "nearby_from_nc.R" +# Here we use the R terra package instead. The functions syntax are exactly the same + + +# Load packages ----- +library(arrow) +library(dplyr) +library(tidyr) +library(biooracler) +library(obissdm) +library(h3jsr) +library(terra) + + + +# Settings ----- +datafolder <- "data" +fs::dir_create(datafolder) +outfolder <- "results" + + + +# Download data ------ +# Species grids +gridf <- file.path(datafolder, 'speciesgrids') +fs::dir_create(gridf) +system(glue::glue("aws s3 cp --recursive s3://obis-products/speciesgrids/h3_7 {gridf} --no-sign-request")) + +# Environmental data +datasets <- c( + "thetao_baseline_2000_2019_depthsurf" +) + +datasets <- c(datasets, + gsub("depthsurf", "depthmean", datasets)) + +future_scenarios <- c("ssp126", "ssp245", "ssp370", "ssp585") + +time_steps <- list( + current = c("2000-01-01T00:00:00Z", "2010-01-01T00:00:00Z"), #2000-2010/2010-2020 + dec50 = c("2030-01-01", "2040-01-01"), #2030-2040/2040-2050 + dec100 = c("2080-01-01", "2090-01-01") #2080-2090/2090-2100 +) + +variables <- c("mean") + +get_env_data(datasets = datasets, future_scenarios = future_scenarios, + time_steps = time_steps, variables = variables, + terrain_vars = NULL, average_time = T, outdir = datafolder) + + + +# Prepare environmental layers ------- +fpaths <- file.path(datafolder, paste0( + "future/", future_scenarios, "/thetao_", future_scenarios, "_depthsurf_mean.tif" +)) +all_paths <- c( + file.path(datafolder, "current/thetao_baseline_depthsurf_mean.tif"), + gsub("mean", "dec50_mean", fpaths), + gsub("mean", "dec100_mean", fpaths) +) +surface <- rast(all_paths) +bottom <- rast(gsub("depthsurf", "depthmean", all_paths)) + +names(surface) <- gsub("thetao_", "", gsub(".tif", "", basename(sources(surface)))) +names(bottom) <- gsub("thetao_", "", gsub(".tif", "", basename(sources(bottom)))) + +# Save as a single netcdf, as this will be used by the R or Python functions +writeCDF(sds(surface[[1]], bottom[[1]]), file.path(datafolder, "biooracle_data.nc"), + overwrite = T) + + +# Get list of species ------ +source("codes/occurrence.R") +download_occurrence() + +occ <- read_occurrence_data() + +occ <- occ %>% + filter(taxonRank == "species") %>% + select(scientificName, higherGeography) %>% + group_by(scientificName, higherGeography) %>% + distinct(scientificName, .keep_all = T) %>% + ungroup() + +species_list <- occ %>% distinct(scientificName) + + + +# Extract data and get nearby for those invalid ------- +source("codes/nearby_from_nc_terra.R") + +# Uncomment and use lines below to use Python version +# library(reticulate) +# source("codes/nearby_from_nc.R") +# da <- import("dask") +# dd <- import("dask.distributed") +# client <- dd$Client() +# browseURL("http://localhost:8787/status") +# xr <- import("xarray") + +target_nc <- file.path(datafolder, "biooracle_data.nc") + +quantile_df <- function(x, probs = c(0.05, 0.95)) { + res <- tibble(metric = c(paste0("q_", probs), "mean", "sd", "no_data", "with_data"), + value = c(quantile(x, probs, na.rm = T), + mean(x, na.rm = T), + sd(x, na.rm = T), + sum(is.na(x)), + sum(!is.na(x)))) + res +} + +grids_ds <- open_dataset(gridf) + +grids_ds <- grids_ds %>% + select(species, AphiaID, min_year, cell) + +number_recs <- grids_ds %>% + filter(species %in% species_list$scientificName) %>% + group_by(species) %>% + count() %>% + collect() + +number_recs <- number_recs %>% + ungroup() %>% + mutate(cumulative_sum = cumsum(n), + batch = cumsum(cumulative_sum >= lag(cumulative_sum, default = 0) + 100000)) %>% + select(-cumulative_sum) + +number_recs$batch <- number_recs$batch + 1 + +species_batch <- unique(number_recs$batch) +results_batch <- lapply(seq_along(species_batch), function(x) NULL) + +for (spi in species_batch) { + cat("\nRunning batch", spi, "out of", length(species_batch), "\n\n") + sel_species <- number_recs$species[number_recs$batch == spi] + + sel_data <- grids_ds %>% + filter(species %in% sel_species) %>% + collect() + + sel_data_crds <- as.data.frame(sf::st_coordinates(h3jsr::cell_to_point(sel_data$cell))) + sel_data_crds <- bind_cols(sel_data_crds, sel_data) + colnames(sel_data_crds)[1:2] <- c("decimalLongitude", "decimalLatitude") + + grids_result <- extract_from_nc(target_nc, sel_data_crds[,1:2]) + var_col <- which(grepl("thetao", colnames(grids_result)))[1] + + grids_result <- bind_cols(grids_result, sel_data) + + coords_na <- which(is.na(grids_result[, var_col])) + + if (length(coords_na) > 1) { + new_coords <- get_nearby(target_nc, names(grids_result)[var_col], + grids_result[coords_na, 1:2], mode = "25") + na_approx <- which(!is.na(new_coords[, "value"])) + new_coords$decimalLongitude[na_approx] <- new_coords$new_lon[na_approx] + new_coords$decimalLatitude[na_approx] <- new_coords$new_lat[na_approx] + + new_info <- grids_result[coords_na, colnames(sel_data)] + grids_result <- grids_result[-coords_na,] + new_result <- extract_from_nc(target_nc, new_coords[,1:2]) + grids_result <- bind_rows(grids_result, bind_cols(new_result, new_info)) + } + + grids_result <- grids_result %>% + select(cell, species, starts_with("thetao")) %>% + pivot_longer(starts_with("thetao"), names_to = "variable", values_to = "values") %>% + group_by(variable, species) %>% + distinct(cell, .keep_all = T) %>% + reframe(quantile_df(values)) %>% + separate_wider_delim(cols = "variable", + names = c("variable", "baseline", "depth", "variant"), + delim = "_") %>% + select(-variable, -variant, scenario = baseline) + + results_batch[[spi]] <- grids_result +} + +results_batch <- do.call("rbind", results_batch) + +write_parquet(results_batch, file.path(datafolder, "species_thermal_lims.parquet")) + + + +# Get temperature on sites on all scenarios ------ +# sites <- jsonlite::read_json("https://raw.githubusercontent.com/iobis/edna-tracker-data/data/generated.json") +# sites_samples <- sites$samples %>% bind_rows() +sites_shape <- sf::read_sf("https://samples.ednaexpeditions.org/sites.geojson") +sites_shape <- terra::vect(sites_shape) + +sites_values <- terra::extract(c(surface, bottom), sites_shape, fun = mean, na.rm = T) +sites_values$higherGeography <- sites_shape$name +sites_values <- sites_values %>% filter(!is.na(baseline_depthsurf_mean)) + +sites_values$higherGeography[sites_values$higherGeography == "Península Valdés"] <- "Peninsula Valdès" +sites_values$higherGeography[sites_values$higherGeography == "The Wadden Sea"] <- "Wadden Sea" + +write_parquet(sites_values, file.path(datafolder, "sites_sst_all.parquet")) + +species_site_temp <- left_join(occ, sites_values) %>% select(-ID) + +species_lims <- results_batch %>% + select(species, metric, value, scenario, depth) %>% + pivot_wider(names_from = metric, values_from = value, id_cols = c("species", "scenario", "depth")) + +species_lims_dsurf <- species_lims %>% + filter(depth == "depthsurf") %>% + rename(scientificName = species) +species_lims_dmean <- species_lims %>% + filter(depth == "depthmean") %>% + rename(scientificName = species) + +colnames(species_lims_dsurf)[4:9] <- paste0("dsurf_", colnames(species_lims_dsurf)[4:9]) +colnames(species_lims_dmean)[4:9] <- paste0("dmean_", colnames(species_lims_dmean)[4:9]) + +species_site_temp <- left_join(species_site_temp, species_lims_dsurf[,c("scientificName", "dsurf_q_0.95")]) +species_site_temp <- left_join(species_site_temp, species_lims_dmean[,c("scientificName", "dmean_q_0.95")]) + +sites_species_risk <- species_site_temp %>% + mutate(across(contains("depthsurf"), ~ .x - dsurf_q_0.95)) %>% + mutate(across(contains("depthmean"), ~ .x - dmean_q_0.95)) %>% + rename(limit_dsurf = dsurf_q_0.95, limit_dmean = dmean_q_0.95) %>% + mutate(across(3:length(.), ~round(.x, 2))) + +# View results +sites_species_risk %>% + filter(higherGeography == 'Aldabra Atoll') %>% + select(scientificName, + current = baseline_depthsurf_mean, + ssp1 = ssp126_depthsurf_dec100_mean, + ssp2 = ssp245_depthsurf_dec100_mean, + ssp3 = ssp370_depthsurf_dec100_mean, + ssp5 = ssp585_depthsurf_dec100_mean) + +write_parquet(sites_species_risk, file.path(datafolder, "species_sites_risk.parquet")) + +# Example application +sites_species_risk %>% + filter(higherGeography == 'Aldabra Atoll') %>% + select(scientificName, + current = baseline_depthsurf_mean, + ssp1 = ssp126_depthsurf_dec100_mean, + ssp2 = ssp245_depthsurf_dec100_mean, + ssp3 = ssp370_depthsurf_dec100_mean) %>% + mutate(across(2:length(.), ~ ifelse(.x <= 0, 0, 1))) %>% + summarise(current = sum(current, na.rm = T), ssp1 = sum(ssp1, na.rm = T), ssp2 = sum(ssp2, na.rm = T), ssp3 = sum(ssp3, na.rm = T)) %>% + pivot_longer(1:4, names_to = "scenario", values_to = "species") %>% + ggplot2::ggplot() + + ggplot2::geom_bar(ggplot2::aes(x = scenario, y = species), stat = "identity") + diff --git a/codes/nearby_from_nc.R b/codes/nearby_from_nc.R new file mode 100644 index 0000000..52c77de --- /dev/null +++ b/codes/nearby_from_nc.R @@ -0,0 +1,188 @@ +extract_from_nc <- function(netcdf, coordinates, variable = NULL) { + if (!exists("xr")) { + stop("Xarray is not loaded. Load it using `xr <- reticulate::import('xarray')`") + } + + ds <- xr$open_dataset(netcdf, chunks = "auto") + + if (!is.null(variable)) { + ds <- ds[variable] + } + + nams_coords <- unlist(ds$coords$dims) + + lons <- coordinates$decimalLongitude + lats <- coordinates$decimalLatitude + + if (length(lons) < 2) { + lons <- list(lons) + lats <- list(lats) + } + + lats <- xr$DataArray(lats, dims = "z") + lons <- xr$DataArray(lons, dims = "z") + + if (any(grepl("latitude", nams_coords))) { + temp_res <- ds$sel(latitude = lats, longitude = lons, method = "nearest") + } else { + temp_res <- ds$sel(lat = lats, lon = lons, method = "nearest") + } + + results <- temp_res$to_dataframe() + + results_final <- cbind(coordinates, results) + results_final <- apply(results_final, 2, function(x) ifelse(is.nan(x), NA, x)) + + return(as.data.frame(results_final)) +} + + +get_nearby <- function(netcdf, variable, coordinates, mode = "queen", verbose = TRUE) { + + if (verbose) cat("Getting nearby valid cells for", nrow(coordinates), "records\n") + + if (mode == "queen") { + fadj = 1 + } else { + fadj = 2 + } + + ds <- xr$open_dataset(netcdf, chunks = "auto") + ds <- ds[variable] + + nams_coords <- unlist(ds$coords$dims) + + if ("longitude" %in% nams_coords) { + layer_x <- ds$longitude$to_dataframe()[,"longitude"] + layer_y <- ds$latitude$to_dataframe()[,"latitude"] + } else { + layer_x <- ds$lon$to_dataframe()[,"lon"] + layer_y <- ds$lat$to_dataframe()[,"lat"] + } + lim_x <- range(seq_along(layer_x)) + lim_y <- range(seq_along(layer_y)) + + coordinates_idx <- coordinates + + lons <- coordinates$decimalLongitude + lats <- coordinates$decimalLatitude + + if (length(lons) < 2) { + lons <- list(lons) + lats <- list(lats) + } + + lons <- xr$DataArray(lons, dims = "z") + lats <- xr$DataArray(lats, dims = "z") + + if (any(grepl("latitude", nams_coords))) { + temp_res <- ds$sel(latitude = lats, longitude = lons, method = "nearest") + } else { + temp_res <- ds$sel(lat = lats, lon = lons, method = "nearest") + } + + temp_res <- temp_res$to_dataframe() + + extra_coords <- lapply(seq_len(nrow(temp_res)), function(x) NULL) + + if (verbose && nrow(coordinates) > 1) { + pb <- progress::progress_bar$new(total = nrow(temp_res)) + pbs <- TRUE + } else { + pbs <- FALSE + } + + for (id in 1:nrow(coordinates)) { + if (pbs) pb$tick() + + id_dim <- temp_res[id,] + + adj_x <- id_dim[,grep("lon", colnames(id_dim))] + adj_y <- id_dim[,grep("lat", colnames(id_dim))] + + adj_x <- which(adj_x == layer_x) + adj_y <- which(adj_y == layer_y) + + adj_x <- seq(adj_x - fadj, adj_x + fadj) + adj_y <- seq(adj_y - fadj, adj_y + fadj) + + adj_x[adj_x < lim_x[1]] <- lim_x[2] + adj_x[adj_x < lim_x[1]] + adj_x[adj_x > lim_x[2]] <- adj_x[adj_x > lim_x[2]] - lim_x[2] + + adj_x <- adj_x[adj_x >= lim_x[1] & adj_x <= lim_x[2]] + adj_y <- adj_y[adj_y >= lim_y[1] & adj_y <= lim_y[2]] + + vals_grid <- expand.grid(x = adj_x, y = adj_y) + vals_grid$value <- vals_grid$cy <- vals_grid$cx <- NA + vals_grid$ID <- id + extra_coords[[id]] <- vals_grid + } + + if (verbose) cat("Processing adjacent points...\n") + extra_coords <- do.call("rbind", extra_coords) + + x_ids <- xr$DataArray(as.integer(extra_coords$x - 1), dims = "z") + y_ids <- xr$DataArray(as.integer(extra_coords$y - 1), dims = "z") + + if ("longitude" %in% nams_coords) { + vg_pt <- ds$isel( + longitude = x_ids, + latitude = y_ids + ) + vg_pt <- vg_pt$to_dataframe() + } else { + vg_pt <- ds$isel( + lon = x_ids, + lat = y_ids + ) + vg_pt <- vg_pt$to_dataframe() + } + + if (nrow(extra_coords) != nrow(vg_pt)) stop("Error on data extraction.") + + extra_coords$cx <- vg_pt[,grep("lon", colnames(vg_pt))] + extra_coords$cy <- vg_pt[,grep("lat", colnames(vg_pt))] + extra_coords$value <- vg_pt[[variable]] + extra_coords$value[is.nan(extra_coords$value)] <- NA + + coordinates_idx$ID <- seq_len(nrow(coordinates_idx)) + extra_coords <- dplyr::left_join(extra_coords, + coordinates_idx[,c("decimalLongitude", "decimalLatitude", "ID")], by = "ID") + + tfun <- function(cx, cy, value, decimalLongitude, decimalLatitude) { + if (all(is.na(value))) { + df <- data.frame(new_lon = NA, new_lat = NA, value = NA) + } else { + if (sum(!is.na(value)) == 1) { + df <- data.frame(new_lon = cx[!is.na(value)], + new_lat = cy[!is.na(value)], + value = value[!is.na(value)]) + } else { + cx <- cx[!is.na(value)] + cy <- cy[!is.na(value)] + value <- value[!is.na(value)] + + true_x <- decimalLongitude[1] + true_y <- decimalLatitude[1] + diff <- abs(cy - true_y) + abs(cx - true_x) + diff[diff >= 360] <- diff[diff >= 360] - 360 + + df <- data.frame(new_lon = cx[order(diff)][1], + new_lat = cy[order(diff)][1], + value = value[order(diff)][1]) + } + } + return(dplyr::as_tibble(df)) + } + + extra_coords <- extra_coords %>% + group_by(ID) %>% + summarise(tfun(cx, cy, value, decimalLongitude, decimalLatitude)) + + coordinates_idx <- bind_cols( + coordinates_idx[,c("decimalLongitude", "decimalLatitude")], + extra_coords[,c("value", "new_lon", "new_lat", "ID")] + ) + + return(coordinates_idx) +} diff --git a/codes/nearby_from_nc_terra.R b/codes/nearby_from_nc_terra.R new file mode 100644 index 0000000..f9e13ff --- /dev/null +++ b/codes/nearby_from_nc_terra.R @@ -0,0 +1,70 @@ +extract_from_nc <- function(netcdf, coordinates, variable = NULL) { + + layer <- terra::rast(netcdf) + + results <- terra::extract(layer, coordinates, ID = F) + + results_final <- cbind(coordinates, results) + + return(as.data.frame(results_final)) +} + + +get_nearby <- function(netcdf, variable, coordinates, mode = "queen", verbose = TRUE) { + + if (verbose) cat("Getting nearby valid cells for", nrow(coordinates), "records\n") + + if (mode == "queen") { + fadj = 4 + } else { + fadj = matrix(nrow = 5, ncol = 5) + fadj[] <- c(rep(1, 12), 0, rep(1, 12)) + } + + layer <- terra::rast(netcdf) + layer <- terra::subset(layer, variable) + + adj_cells <- terra::adjacent(layer, terra::cellFromXY(layer, coordinates), + directions = fadj) + adj_cells <- as.data.frame(adj_cells) + + adj_cells$ID <- seq_len(nrow(adj_cells)) + adj_cells <- adj_cells |> + tidyr::pivot_longer(1:(ncol(adj_cells)-1), names_to = "index", values_to = "cell") |> + dplyr::select(-index) + + xy_adj <- terra::extract(layer[[1]], adj_cells$cell, xy = T) + xy_adj <- cbind(xy_adj, adj_cells) + + adj_cells_result <- tapply(seq_len(nrow(xy_adj)), xy_adj$ID, function(idx){ + target <- xy_adj[idx,] + values_c <- target[,3] + id <- target$ID[1] + if (all(is.na(values_c))) { + to_return <- data.frame(new_lon = coordinates$decimalLongitude[id], + new_lat = coordinates$decimalLatitude[id], + value = NA) + } else if (sum(!is.na(values_c)) == 1) { + to_return <- target[!is.na(values_c),1:3] + colnames(to_return) <- c("new_lon", "new_lat", "value") + } else { + focalcell <- coordinates[id,1:2] + possible <- target[!is.na(values_c),1:3] + true_x <- focalcell[1,1] + true_y <- focalcell[1,2] + diff <- abs(possible$y - true_y) + abs(possible$x - true_x) + diff[diff >= 360] <- diff[diff >= 360] - 360 + to_return <- possible[order(diff)[1],] + colnames(to_return) <- c("new_lon", "new_lat", "value") + } + return(to_return) + }) + adj_cells_result <- do.call("rbind", adj_cells_result) + + coordinates_idx <- bind_cols( + coordinates[,c("decimalLongitude", "decimalLatitude")], + adj_cells_result[,c("value", "new_lon", "new_lat")] + ) + + return(coordinates_idx) +} diff --git a/codes/occurrence.R b/codes/occurrence.R new file mode 100644 index 0000000..5e6e16d --- /dev/null +++ b/codes/occurrence.R @@ -0,0 +1,56 @@ +library(dplyr) +library(stringr) +library(purrr) +library(arrow) + +#' Download occurrence data +download_occurrence <- function(force = FALSE) { + if (!dir.exists("data/output") || force) { + cat("Downloading occurrence data ======\n") + system("rm -r output output.zip") + download.file( + "https://obis-edna-results.s3.amazonaws.com/output.zip", + "output.zip" + ) + unzip("output.zip", exdir = "data/") + file.remove("output.zip") + } else { + cat("Occurrence data already downloaded ======\n") + } +} + +#' Reads the full occurrence dataset from TSV. +read_occurrence_tsv <- function() { + occurrence_files <- list.files("data/output", "*Occurrence*", full.names = TRUE) + + dna_files <- list.files("data/output", "*DNADerivedData*", full.names = TRUE) + + dna <- map(dna_files, read.table, sep = "\t", quote = "", header = TRUE) %>% + bind_rows() %>% + mutate_if(is.character, na_if, "") + + occurrence <- map(occurrence_files, read.table, sep = "\t", quote = "", header = TRUE, comment.char = "") %>% + bind_rows() %>% + mutate_if(is.character, na_if, "")%>% + left_join(dna, by = "occurrenceID") + + return(occurrence) +} + +#' Converts the occurrence dataset to parquet. +convert_occurrence_data <- function() { + message("Converting TSV to parquet...") + occurrence <- read_occurrence_tsv() + write_parquet(occurrence, "data/output/occurrence.parquet") + return(occurrence) +} + +#' Reads the full occurrence dataset from parquet, converts TSV to parquet if the parquet file does not exist. +read_occurrence_data <- function() { + if (!file.exists("data/output/occurrence.parquet")) { + occurrence <- convert_occurrence_data() + } else { + occurrence <- read_parquet("data/output/occurrence.parquet") + } + return(occurrence) +} diff --git a/codes/plot_paf_newlist.R b/codes/plot_paf_newlist.R new file mode 100644 index 0000000..9fccadd --- /dev/null +++ b/codes/plot_paf_newlist.R @@ -0,0 +1,315 @@ +#################### eDNA expeditions - scientific analysis #################### +########################## Environmental data download ######################### +# January of 2024 +# Author: Silas Principe, Mike Burrows +# Contact: s.principe@unesco.org; michael.burrows@sams.ac.uk +# +############## Plot eDNA data from UNESCO World Heritage Sites ################# + +# Load packages ---- +library(dplyr) +library(terra) +library(tidyr) +library(stringr) +library(purrr) +library(arrow) +library(data.table) +library(patchwork) +library(ggplot2) +library(ggdist) +library(see) +fs::dir_create("figures") + +# Load data ---- +# Load temperature summaries by site +speciesthermsite <- read_parquet("data/species_sites_risk.parquet") +colnames(speciesthermsite)[1] <- "species" + +# Process the data ---- +# Convert to data.table +speciesthermsitedt <- data.table(speciesthermsite) + +# Load species list +specieslist <- open_dataset("data/output/occurrence.parquet") +groups <- read.csv("data/groups.csv") +group_lookup <- setNames(groups$group, paste(groups$rank, groups$taxon, sep = "_")) + +# Add a new column for group +specieslist <- specieslist %>% + filter(taxonRank == "species") %>% + collect() %>% + distinct(scientificName, .keep_all = T) %>% + mutate(group = coalesce( + group_lookup[paste("class", class, sep = "_")], + group_lookup[paste("order", order, sep = "_")], + group_lookup[paste("phylum", phylum, sep = "_")] + )) %>% + select(species = scientificName, group) + + +# Merge group information +speciesthermsitedt_gp <- merge(speciesthermsitedt, specieslist[,c("species", "group")], by = "species") + +# Filter for fish only +fishthermsitedt <- speciesthermsitedt_gp[group %in% c("fish", "sharks"), , ] +fishthermsitedt <- fishthermsitedt[!is.na(fishthermsitedt$baseline_depthsurf_mean),] + +# Get functional traits data +funcdiv <- rfishbase::species(unique(fishthermsitedt$species)) +funcdiv$to_include <- NA +funcdiv$to_include[funcdiv$DepthRangeDeep <= 200] <- TRUE + +table(funcdiv$DemersPelag[is.na(funcdiv$to_include)]) + +funcdiv$to_include[is.na(funcdiv$to_include) & + funcdiv$DemersPelag %in% c("reef-associated", "pelagic-neritic")] <- TRUE + +table(funcdiv$DemersPelag[is.na(funcdiv$to_include)]) + +sel_species <- funcdiv %>% + filter(to_include) %>% + select(species = Species, DepthRangeDeep, habitat = DemersPelag) + +# Convert to longer format +fishthermsitedt_long <- fishthermsitedt %>% + select(species, higherGeography, baseline_depthsurf_mean, contains("depthsurf_dec50"), limit_dsurf) %>% + filter(species %in% unique(sel_species$species)) %>% # Remove this line to not filter by the traits + pivot_longer(cols = contains("depthsurf"), names_to = "scenario", + values_to = "sst") %>% + mutate(scenario = case_when( + scenario == "baseline_depthsurf_mean" ~ "site_current", + scenario == "ssp126_depthsurf_dec50_mean" ~ "site_ssp126_dec50", + scenario == "ssp245_depthsurf_dec50_mean" ~ "site_ssp245_dec50", + scenario == "ssp370_depthsurf_dec50_mean" ~ "site_ssp370_dec50", + scenario == "ssp585_depthsurf_dec50_mean" ~ "site_ssp585_dec50", + )) + + +# Create a function to see at which scenario the species become at risk +scenario_order <- c("site_current", "site_ssp126_dec50", + "site_ssp585_dec50") +get_class <- function(x) { + if(all(x < 0)) { + return("not_risk") + } else { + return(scenario_order[min(which(x >= 0))]) + } +} + +# Retrieve sites names and latitudes +sites <- vect("data/shapefiles/marine_world_heritage.gpkg") +sites$latitude <- unlist(sapply(1:length(sites), function(x){ + unname(terra::geom(centroids(sites[x,]))[,4]) +})) +sites_org <- data.frame(sites = sites$name, latitude = sites$latitude) + +# Change names of sites +sites_org <- sites_org %>% + rename(higherGeography = sites) %>% + mutate(higherGeography = case_when( + higherGeography == "Wadden Sea" ~ "Wadden Sea", + higherGeography == "Archipiélago de Revillagigedo" ~ "Revillagigedo", + higherGeography == "iSimangaliso Wetland Park" ~ "iSimangaliso", + higherGeography == "Ningaloo Coast" ~ "Ningaloo coast", + higherGeography == "Socotra Archipelago" ~ "Socotra Archipelago", + higherGeography == "Belize Barrier Reef Reserve System" ~ "Belize Barrier Reef", + higherGeography == "Cocos Island National Park" ~ "Cocos Island", + higherGeography == "Lord Howe Island Group" ~ "Lord Howe Island", + higherGeography == "Sundarbans National Park" ~ "Sundarbans", + higherGeography == "Península Valdés" ~ "Peninsula Valdès", + higherGeography == "Gulf of Porto: Calanche of Piana, Gulf of Girolata, Scandola Reserve" ~ "Scandola", + higherGeography == "Coiba National Park and its Special Zone of Marine Protection" ~ "Coiba", + higherGeography == "Lagoons of New Caledonia: Reef Diversity and Associated Ecosystems" ~ "Lagoons of New Caledonia", + higherGeography == "Shark Bay, Western Australia" ~ "Shark Bay", + higherGeography == "Tubbataha Reefs Natural Park" ~ "Tubbataha Reefs", + higherGeography == "Brazilian Atlantic Islands: Fernando de Noronha and Atol das Rocas Reserves" ~ "Fernando de Noronha", + higherGeography == "Everglades National Park" ~ "Everglades", + higherGeography == "Aldabra Atoll" ~ "Aldabra Atoll", + higherGeography == "Banc d'Arguin National Park" ~ "Banc d'Arguin", + higherGeography == "French Austral Lands and Seas" ~ "French Austral Lands", + )) %>% + filter(!is.na(higherGeography)) %>% + arrange(desc(latitude)) %>% + distinct(higherGeography, .keep_all = T) + +# Update names on the longer object +fishthermsitedt_long <- fishthermsitedt_long %>% + mutate(higherGeography = case_when( + higherGeography == "Wadden Sea" ~ "Wadden Sea", + higherGeography == "Archipiélago de Revillagigedo" ~ "Revillagigedo", + higherGeography == "iSimangaliso Wetland Park" ~ "iSimangaliso", + higherGeography == "Ningaloo Coast" ~ "Ningaloo coast", + higherGeography == "Socotra Archipelago" ~ "Socotra Archipelago", + higherGeography == "Belize Barrier Reef Reserve System" ~ "Belize Barrier Reef", + higherGeography == "Cocos Island National Park" ~ "Cocos Island", + higherGeography == "Lord Howe Island Group" ~ "Lord Howe Island", + higherGeography == "The Sundarbans" ~ "Sundarbans", + higherGeography == "Peninsula Valdès" ~ "Peninsula Valdès", + higherGeography == "Gulf of Porto: Calanche of Piana, Gulf of Girolata, Scandola Reserve" ~ "Scandola", + higherGeography == "Coiba National Park and its Special Zone of Marine Protection" ~ "Coiba", + higherGeography == "Lagoons of New Caledonia: Reef Diversity and Associated Ecosystems" ~ "Lagoons of New Caledonia", + higherGeography == "Shark Bay, Western Australia" ~ "Shark Bay", + higherGeography == "Tubbataha Reefs Natural Park" ~ "Tubbataha Reefs", + higherGeography == "Brazilian Atlantic Islands: Fernando de Noronha and Atol das Rocas Reserves" ~ "Fernando de Noronha", + higherGeography == "Everglades National Park" ~ "Everglades", + higherGeography == "Aldabra Atoll" ~ "Aldabra Atoll", + higherGeography == "Banc d'Arguin National Park" ~ "Banc d'Arguin", + higherGeography == "French Austral Lands and Seas" ~ "French Austral Lands", + )) + +# Add means +species_lims_full <- read_parquet(file.path("data", "species_thermal_lims.parquet")) +species_lims_full <- species_lims_full %>% + filter(metric == "q_0.95") %>% + filter(depth == "depthsurf") %>% + select(species, q_0.95 = value) + +fishthermsitedt_long <- left_join(fishthermsitedt_long, species_lims_full) + +# Get the number of species in each site +sites_species <- fishthermsitedt_long %>% + group_by(higherGeography, species) %>% + summarise(n = n()) %>% + group_by(higherGeography) %>% + summarise(n = n()) + +# Join with the sites object +sites_org <- left_join(sites_org, sites_species) + +# Create the plot object with all information +plot_obj <- fishthermsitedt_long %>% + filter(scenario %in% c("site_current", "site_ssp126_dec50", "site_ssp585_dec50")) %>% + select(species, higherGeography, scenario, differences = sst, q_0.95) %>% + filter(!is.na(differences)) %>% + group_by(species, higherGeography) %>% + mutate(point_class = get_class(differences)) %>% + mutate(higherGeography = factor(higherGeography, levels = rev(sites_org$higherGeography), + labels = rev(paste(sites_org$higherGeography, paste0("(", sites_org$n, ")"))))) +plot_obj$point_class <- factor(plot_obj$point_class, levels = c("not_risk", "site_current", "site_ssp126_dec50", "site_ssp585_dec50")) + +# Extract sites SST +sites_sst <- read_parquet("data/sites_sst_all.parquet") + +sites_sst <- sites_sst %>% + mutate(higherGeography = case_when( + higherGeography == "Wadden Sea" ~ "Wadden Sea", + higherGeography == "Archipiélago de Revillagigedo" ~ "Revillagigedo", + higherGeography == "iSimangaliso Wetland Park" ~ "iSimangaliso", + higherGeography == "Ningaloo Coast" ~ "Ningaloo coast", + higherGeography == "Socotra Archipelago" ~ "Socotra Archipelago", + higherGeography == "Belize Barrier Reef Reserve System" ~ "Belize Barrier Reef", + higherGeography == "Cocos Island National Park" ~ "Cocos Island", + higherGeography == "Lord Howe Island Group" ~ "Lord Howe Island", + higherGeography == "The Sundarbans" ~ "Sundarbans", + higherGeography == "Peninsula Valdès" ~ "Peninsula Valdès", + higherGeography == "Gulf of Porto: Calanche of Piana, Gulf of Girolata, Scandola Reserve" ~ "Scandola", + higherGeography == "Coiba National Park and its Special Zone of Marine Protection" ~ "Coiba", + higherGeography == "Lagoons of New Caledonia: Reef Diversity and Associated Ecosystems" ~ "Lagoons of New Caledonia", + higherGeography == "Shark Bay, Western Australia" ~ "Shark Bay", + higherGeography == "Tubbataha Reefs Natural Park" ~ "Tubbataha Reefs", + higherGeography == "Brazilian Atlantic Islands: Fernando de Noronha and Atol das Rocas Reserves" ~ "Fernando de Noronha", + higherGeography == "Everglades National Park" ~ "Everglades", + higherGeography == "Aldabra Atoll" ~ "Aldabra Atoll", + higherGeography == "Banc d'Arguin National Park" ~ "Banc d'Arguin", + higherGeography == "French Austral Lands and Seas" ~ "French Austral Lands", + )) %>% + filter(!is.na(higherGeography)) + +sites_sst <- sites_sst %>% + select(higherGeography, baseline_depthsurf_mean, contains("depthsurf_dec50")) %>% + pivot_longer(cols = contains("depthsurf"), names_to = "scenario", + values_to = "sst") %>% + mutate(scenario = case_when( + scenario == "baseline_depthsurf_mean" ~ "site_current", + scenario == "ssp126_depthsurf_dec50_mean" ~ "site_ssp126_dec50", + scenario == "ssp245_depthsurf_dec50_mean" ~ "site_ssp245_dec50", + scenario == "ssp370_depthsurf_dec50_mean" ~ "site_ssp370_dec50", + scenario == "ssp585_depthsurf_dec50_mean" ~ "site_ssp585_dec50", + )) %>% + filter(scenario %in% c("site_current", "site_ssp126_dec50", "site_ssp585_dec50")) %>% + filter(higherGeography %in% unique(fishthermsitedt_long$higherGeography)) %>% + mutate(point_class = scenario) %>% + left_join(sites_org[,c("higherGeography", "n")]) %>% + mutate(higherGeography = paste(higherGeography, paste0("(", n, ")"))) +sites_sst$point_class <- factor(sites_sst$point_class, levels = c("not_risk", "site_current", "site_ssp126_dec50", "site_ssp585_dec50")) + + +# Make plots ---- +# Left plot - dots +plot_a <- plot_obj %>% + group_by(higherGeography) %>% + distinct(species, .keep_all = T) %>% + ggplot() + + geom_violinhalf(aes(x = higherGeography, y = q_0.95), + position = position_nudge(x = 0.3)) + + geom_jitter(aes(x = higherGeography, y = q_0.95, color = point_class), alpha = .3, + width = 0.12, shape = 16, show.legend = F) + + geom_linerange(data = sites_sst, aes(ymin = sst-0.1, ymax = sst+0.1, x = higherGeography, color = point_class), + linewidth = 7, show.legend = T) + + scale_color_manual(values = c("grey90", "#d4b9da", "#df65b0", "#980043"), + labels = c("Not at risk", "Current", "SSP1", "SSP5")) + + coord_flip() + + ylab("Temperature (°C)") + xlab(NULL) + + theme_light() + + scale_y_continuous(breaks = c(0, 10, 20, 30), limits = c(0, 32)) + + theme(panel.border = element_blank(), + panel.grid.minor.x = element_blank(), + panel.grid.major.y = element_blank(), + legend.title = element_blank(), + legend.position = "bottom", + strip.background = element_blank(), + strip.text = element_text(color = "black"), + plot.margin = unit(c(0.1,0.1,0.1,0), "cm"));plot_a + +# Right plot - bars +plot_b <- plot_obj %>% + group_by(higherGeography, scenario) %>% + summarise(percentage = sum(differences > 0)/length(differences)) %>% + ggplot() + + geom_bar(aes(x = higherGeography, y = percentage, fill = scenario), stat = "identity", position="dodge", width = .6) + + scale_fill_manual(values = c("#d4b9da", "#df65b0", "#980043")) + + scale_y_continuous(labels = c("0%", "", "50%", "", "100%")) + + coord_flip() + + ylab("Potentially Affected Fraction") + xlab(NULL) + + theme_light() + + theme(panel.border = element_blank(), + panel.grid.minor.x = element_blank(), + panel.grid.major.y = element_blank(), + axis.text.y = element_blank(), + axis.ticks.y = element_blank(), + legend.title = element_blank(), + legend.position = "none", + strip.background = element_blank(), + strip.text = element_text(color = "black"), + plot.margin = unit(c(0,0,0,0), "cm"));plot_b + +# Put together +plot_a + plot_b + plot_layout(widths = c(4, 1)) + +# Save +ggsave("figures/paf_fishes_filtered_v2_nl.png", width = 12, height = 7) +ggsave("figures/paf_fishes_filtered_v2_nl.svg", width = 12, height = 7) + +plot_c <- plot_obj %>% + group_by(higherGeography, scenario) %>% + summarise(percentage = sum(differences > 0)/length(differences)) %>% + ggplot() + + geom_bar(aes(x = higherGeography, y = percentage, fill = scenario), stat = "identity", position="dodge", width = .6) + + scale_fill_manual(values = c("#d4b9da", "#df65b0", "#980043")) + + scale_y_continuous(labels = c("0%", "", "50%", "", "100%")) + + coord_flip() + + ylab("Potentially Affected Fraction") + xlab(NULL) + + theme_light() + + theme(panel.border = element_blank(), + panel.grid.minor.x = element_blank(), + panel.grid.major.y = element_blank(), + #axis.text.y = element_blank(), + axis.ticks.y = element_blank(), + legend.title = element_blank(), + legend.position = "none", + strip.background = element_blank(), + strip.text = element_text(color = "black"), + plot.margin = unit(c(0,0,0,0), "cm"));plot_c + +ggsave("figures/paf_fishes_filtered_onlybars_v2_nl.png", width = 8, height = 7) +ggsave("figures/paf_fishes_filtered_onlybars_v2_nl.svg", width = 8, height = 7)