Skip to content

Commit

Permalink
First results
Browse files Browse the repository at this point in the history
  • Loading branch information
silasprincipe committed Jan 24, 2024
1 parent 5abb363 commit 6562244
Show file tree
Hide file tree
Showing 8 changed files with 600 additions and 130 deletions.
6 changes: 5 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -52,9 +52,13 @@ rsconnect/
codes/drafts/

data/
results/

functions/drafts/

*.zip

temp/
temp/

*_storr
*.parquet
8 changes: 7 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -33,15 +33,21 @@ __Temperature by species__
- `get_edna_marinespecies.R`: aggregate species list from the eDNA project downloaded from the repository https://github.com/iobis/edna-species-lists
- `temperature_database.R`: aggregate temperature/environmental layers to the OBIS/GBIF database (see details for the database here https://github.com/iobis/protectedseas-statistics)
- `get_sst_marinespecies.R`: extract temperature information for all species listed on the sites
- `plot_sst_marinespecies.R`: plot temperature information for a selected species
- `get_sst_sites.R`: extract temperature information for each site (collection site, not _higherGeography_)

There is also a function on the `functions` folder, `query_sst_marinespecies.R`, that can be used to query full data or summaries of temperature for a species (using the database).

__Others__

- `get_depthprofiles.R`: see which is the actual depth that was used when obtaining the data from Copernicus
- `generate_quarto_sites.R`: generate quarto pages by site
- `plot_sst_marinespecies.R`: plot temperature information for a selected species

## Data analysis

After retrieving temperature information for each species occurrence from OBIS/GBIF (aggregated by H3 cells on a resolution of 7), we generate a table with the different quantiles for each species. This is done still on the `get_sst_marinespecies.R` script. The same data is obtained for each site.

We join both tables to analyse the thermal limits for each species and to assess how species in each site might be at risk by climate change. This steps are done on the `analysis_sst_species.R`

----

Expand Down
140 changes: 140 additions & 0 deletions codes/analysis_sst_species.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,140 @@
#################### eDNA expeditions - scientific analysis ####################
########################## Environmental data analysis #########################
# January of 2024
# Author: Silas C. Principe
# Contact: [email protected]
#
############################# Species thermal limits ###########################

# Load packages ----
library(arrow)
library(dplyr)
library(tidyr)
library(ggplot2)
library(patchwork)


# Load files ----
# Temperature per species dataset
temp_sp <- open_dataset("results/species_tsummaries.parquet")

temp_sp_filt <- temp_sp %>%
filter(depth == "depthsurf") %>%
filter(variant == "mean") %>%
collect()

# Temperature on sites
temp_sites <- read_parquet("results/sites_tsummaries.parquet")

# Get sites list
sites_f <- list.files("data/species_list_full", full.names = T)

sites_list <- lapply(sites_f, read.csv)
names(sites_list) <- gsub("_", " ", gsub("\\.csv", "", basename(sites_f)))

sites_list <- bind_rows(
sites_list, .id = "higherGeography"
)

sites_list_distinct <- sites_list %>%
select(species, AphiaID, source_obis, source_gbif, source_dna, higherGeography) %>%
group_by(higherGeography) %>%
distinct(species, .keep_all = T)

# Get number of records
species_rec <- read_parquet("results/records_by_sp.parquet")


# Bind information ----
# Bind species temperature
temp_sp_wide <- temp_sp_filt %>%
pivot_wider(names_from = metric, values_from = value) %>%
select(-depth, -variant)

sites_list_binded <- sites_list_distinct %>%
left_join(temp_sp_wide) %>%
filter(!is.na(q_0.25))

# Bind temperature on sites
temp_sites_filt <- temp_sites %>%
filter(depth == "depthsurf") %>%
filter(variant == "mean") %>%
group_by(higherGeography, period) %>%
summarise(temperature = mean(temperature)) %>%
pivot_wider(names_from = period, values_from = temperature) %>%
mutate(higherGeography = tolower(higherGeography)) %>%
mutate(higherGeography = gsub("[']", " ", higherGeography)) %>%
mutate(higherGeography = gsub("[:,]", "", higherGeography))

colnames(temp_sites_filt)[2:length(temp_sites_filt)] <- paste0("site_", colnames(temp_sites_filt)[2:length(temp_sites_filt)])

sites_list_binded <- sites_list_binded %>%
left_join(temp_sites_filt) %>%
filter(!is.na(site_current))

# Bind number of records
sites_list_binded <- sites_list_binded %>%
left_join(species_rec)

# Create tag for from where it comes
sites_list_binded <- sites_list_binded %>%
mutate(source_obis = ifelse(as.numeric(source_obis) == 1, 1, 0)) %>%
mutate(source_gbif = ifelse(as.numeric(source_gbif) == 1, 2, 0)) %>%
mutate(source_dna = ifelse(as.numeric(source_dna) == 1, 4, 0)) %>%
mutate(where = source_obis + source_gbif + source_dna) %>%
mutate(where = case_when(
where == 4 ~ "eDNA",
where %in% c(3, 2, 1) ~ "OBIS/GBIF",
where %in% c(6, 5, 7) ~ "Both"
))

# Save a copy
write_parquet(sites_list_binded, "results/tsummaries_aggregated.parquet")


# Get thermal ranges ----
species_thermal_range <- sites_list_binded %>%
filter(records >= 10) %>%
mutate(loc_range = (site_current - q_0.01)/(q_0.99 - q_0.01))

# Plot to verify
ggplot(species_thermal_range) +
geom_boxplot(aes(x = loc_range))


# Create a function to plot for just one site
plot_site <- function(site) {

if (is.numeric(site)) {
site <- unique(sites_list_binded$higherGeography)[site]
}

sel_site <- sites_list_binded %>%
filter(higherGeography == site) %>%
mutate(where = ifelse(source_obis & source_gbif & !source_dna, "OBIS/GBIF",
ifelse(source_dna & !source_obis & !source_gbif, "eDNA", "Both")))

plot_a <- sel_site %>%
mutate(status = ifelse(q_0.5 > min(sel_site$site_current), "Above", "Below")) %>%
ggplot() +
geom_jitter(aes(y = higherGeography, x = q_0.5, color = status), alpha = .2, height = 0.2) +
geom_vline(xintercept = min(sel_site$site_current), linetype = 2) +
theme_minimal() +
ylab(NULL) + xlab (NULL) + ggtitle("All species", site) +
theme(legend.title = element_blank(),
axis.text.y = element_blank())

plot_b <- sel_site %>%
mutate(status = ifelse(q_0.5 > min(sel_site$site_current), "Above", "Below")) %>%
ggplot() +
geom_jitter(aes(y = where, x = q_0.5, color = status), alpha = .2, height = 0.2) +
geom_vline(xintercept = min(sel_site$site_current), linetype = 2) +
theme_minimal() +
ylab(NULL) + ggtitle("By type") +
theme(legend.title = element_blank())

plot_a / plot_b + plot_layout(guides = "collect", heights = c(1,2)) & theme(legend.position = "bottom")

}

plot_site(3)
98 changes: 68 additions & 30 deletions codes/download_temperature_toolbox.R
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,10 @@
# Load packages ----
library(terra)
library(reticulate)
library(arrow)
library(parallel)
library(dplyr)
library(tidyr)
cm <- import("copernicus_marine_client")


Expand Down Expand Up @@ -113,36 +117,70 @@ for (site_idx in mhs$code) {
#
# # Print loaded dataset information
# print(sst_l3s)


# Convert to parquet
proc <- job::job({
lapply(list.files(outfolder, full.name = T, pattern = "\\.nc"),
function(fname, redo = T) {

outfile <- gsub("\\.nc", ".parquet", fname)

if (file.exists(outfile) & !redo) {
cat("File already processed, skipping.\n")
} else {
r <- rast(fname)
r_dat <- as.data.frame(r, xy = TRUE, time = TRUE)
r_dat <- r_dat %>%
pivot_longer(3:length(.), names_to = "time", values_to = "values") %>%
mutate(layer = gsub("\\..*", "", names(r))[1])
arrow::write_parquet(r_dat, outfile, compression = "gzip")
}

return(invisible(NULL))
})

lf <- list.files(outfolder, full.name = T, pattern = "\\.parquet")
base <- list.files(outfolder, full.name = T, pattern = "\\.nc")[1]

zip("temperature_dayly.zip", c(lf, base))

job::export("none")
})
#
#
# # Convert to parquet
# proc <- job::job({
# lapply(list.files(outfolder, full.name = T, pattern = "\\.nc"),
# function(fname, redo = F) {
#
# outfile <- gsub("\\.nc", ".parquet", fname)
#
# if (file.exists(outfile) & !redo) {
# cat("File already processed, skipping.\n")
# } else {
# cat("Processing", outfile, "\n")
# r <- rast(fname)
#
# if (file.size(fname)/1000/1000 > 1000) {
#
# cat("Splitting in pieces (large file) \n")
#
# fs::dir_create("temp")
#
# grid <- sf::st_make_grid(sf::st_bbox(r), cellsize = 1)
#
# mclapply(1:length(grid), function(x){
# r_b <- r
# r_b <- crop(r, grid[x])
# r_b <- as.data.frame(r_b, xy = TRUE, time = TRUE)
# r_b <- r_b %>%
# pivot_longer(3:length(.), names_to = "time", values_to = "values") %>%
# mutate(layer = gsub("\\..*", "", names(r)[1]))
# arrow::write_parquet(r_b, sprintf("temp/temp_%06d.parquet", x),
# compression = "gzip")
# }, mc.cores = 3)
#
# gen_files <- open_dataset("temp")
#
# gen_files %>%
# write_parquet(., outfile, compression = "gzip")
#
# fs::dir_delete("temp")
# } else {
#
# r_dat <- as.data.frame(r, xy = TRUE, time = TRUE)
#
# r_dat <- r_dat %>%
# pivot_longer(3:length(.), names_to = "time", values_to = "values") %>%
# mutate(layer = gsub("\\..*", "", names(r)[1]))
#
# arrow::write_parquet(r_dat, outfile, compression = "gzip")
# }
#
#
# }
#
# return(invisible(NULL))
# })
#
# lf <- list.files(outfolder, full.name = T, pattern = "\\.parquet")
# base <- list.files(outfolder, full.name = T, pattern = "\\.nc")[1]
#
# zip("temperature_dayly.zip", c(lf, base))
#
# job::export("none")
# })



Loading

0 comments on commit 6562244

Please sign in to comment.