From 523ee9ee94abb8e26c240ffcce707219e655791c Mon Sep 17 00:00:00 2001 From: doserjef Date: Thu, 7 Dec 2023 20:42:54 -0500 Subject: [PATCH] updates --- .gitignore | 2 +- README.md | 6 +- code/archived/bbs-predict/bbs-data-prep.R | 103 ------ code/archived/bbs-predict/get-covs.R | 58 ---- code/archived/bbs-predict/main.R | 29 -- code/archived/main-figure.R | 318 ------------------ code/archived/testing.R | 257 -------------- code/archived/tree/archived/data-prep.R | 78 ----- code/archived/tree/archived/get-ndvi.R | 12 - .../tree/archived/main-full-SVI-stage-2.R | 51 --- .../tree/archived/main-full-stage-1.R | 95 ------ .../tree/archived/main-full-stage-2.R | 51 --- .../tree/archived/main-state-stage-1.R | 96 ------ .../tree/archived/pred-state-stage-1.R | 63 ---- code/archived/tree/archived/summary-stage-1.R | 68 ---- code/archived/tree/archived/test.R | 33 -- code/archived/tree/archived/total-data-prep.R | 62 ---- code/archived/tree/extract-data.R | 74 ---- code/archived/tree/format-data.R | 10 - code/archived/tree/get-covariate.R | 95 ------ code/archived/tree/get-pred-indx.R | 11 - code/archived/tree/get-prediction-data.R | 57 ---- code/archived/tree/main-hold-out-vt-SVC.R | 101 ------ code/archived/tree/main-hold-out-vt-SVI.R | 81 ----- .../tree/main-hold-out-vt-non-spatial.R | 78 ----- code/archived/tree/main-state-SVC-stage-2.R | 87 ----- code/archived/tree/main-state-SVI-stage-2.R | 86 ----- code/archived/tree/main-vt-SVC.R | 58 ---- code/archived/tree/main-vt-SVI.R | 57 ---- code/archived/tree/pred-state-SVC-stage-2.R | 71 ---- code/archived/tree/pred-state-SVI-stage-2.R | 69 ---- code/archived/tree/scratch.R | 10 - code/archived/tree/summary-hold-out.R | 61 ---- code/archived/tree/summary-scratch.R | 70 ---- code/archived/tree/summary-stage-2.R | 65 ---- code/archived/tree/summary.R | 91 ----- 36 files changed, 3 insertions(+), 2611 deletions(-) delete mode 100644 code/archived/bbs-predict/bbs-data-prep.R delete mode 100644 code/archived/bbs-predict/get-covs.R delete mode 100644 code/archived/bbs-predict/main.R delete mode 100644 code/archived/main-figure.R delete mode 100644 code/archived/testing.R delete mode 100644 code/archived/tree/archived/data-prep.R delete mode 100644 code/archived/tree/archived/get-ndvi.R delete mode 100644 code/archived/tree/archived/main-full-SVI-stage-2.R delete mode 100644 code/archived/tree/archived/main-full-stage-1.R delete mode 100644 code/archived/tree/archived/main-full-stage-2.R delete mode 100644 code/archived/tree/archived/main-state-stage-1.R delete mode 100644 code/archived/tree/archived/pred-state-stage-1.R delete mode 100644 code/archived/tree/archived/summary-stage-1.R delete mode 100644 code/archived/tree/archived/test.R delete mode 100644 code/archived/tree/archived/total-data-prep.R delete mode 100644 code/archived/tree/extract-data.R delete mode 100644 code/archived/tree/format-data.R delete mode 100644 code/archived/tree/get-covariate.R delete mode 100644 code/archived/tree/get-pred-indx.R delete mode 100644 code/archived/tree/get-prediction-data.R delete mode 100644 code/archived/tree/main-hold-out-vt-SVC.R delete mode 100644 code/archived/tree/main-hold-out-vt-SVI.R delete mode 100644 code/archived/tree/main-hold-out-vt-non-spatial.R delete mode 100644 code/archived/tree/main-state-SVC-stage-2.R delete mode 100644 code/archived/tree/main-state-SVI-stage-2.R delete mode 100644 code/archived/tree/main-vt-SVC.R delete mode 100644 code/archived/tree/main-vt-SVI.R delete mode 100644 code/archived/tree/pred-state-SVC-stage-2.R delete mode 100644 code/archived/tree/pred-state-SVI-stage-2.R delete mode 100644 code/archived/tree/scratch.R delete mode 100644 code/archived/tree/summary-hold-out.R delete mode 100644 code/archived/tree/summary-scratch.R delete mode 100644 code/archived/tree/summary-stage-2.R delete mode 100644 code/archived/tree/summary.R diff --git a/.gitignore b/.gitignore index 81cf1c0..488cc37 100644 --- a/.gitignore +++ b/.gitignore @@ -25,4 +25,4 @@ data/case-study-1/bird-life-processed.rda data/case-study-2/yearly-lulc data/case-study-2/species-BirdLife-ranges.rda data/archived/ - +code/archived/ diff --git a/README.md b/README.md index c20732d..8493813 100644 --- a/README.md +++ b/README.md @@ -1,10 +1,8 @@ # Guidelines for the use of spatially-varying coefficients in species distribution models -### In Review - ### Jeffrey W. Doser, Marc Kéry, Sarah P. Saunders, Andrew O. Finley, Brooke L. Bateman, Joanna Grand, Shannon Reault, Aaron S. Weed, Elise F. Zipkin -### spOccupancy Package [Website](https://www.jeffdoser.com/files/spoccupancy-web/) and [Repository](https://github.com/doserjef/spOccupancy/) +### Global Ecology and Biogeography ### Please contact the first author for questions about the code or data used in the manuscript: Jeffrey W. Doser (doserjef@msu.edu) @@ -14,7 +12,7 @@ ### Aim -Species distribution models (SDMs) are increasingly applied across macroscales using detection-nondetection data. Such models typically assume that a single set of regression coefficients can adequately describe species-environment relationships and/or population trends. However, such relationships often show nonlinear and/or spatially-varying patterns that arise from complex interactions with abiotic and biotic processes that operate at different scales. Spatially-varying coefficient (SVC) models can readily account for variability in the effects of environmental covariates. Yet, their use in ecology is relatively scarce due to gaps in understanding the inferential benefits that SVC models can provide compared to simpler frameworks. +Species distribution models (SDMs) are increasingly applied across macroscales using detection-nondetection data. These models typically assume that a single set of regression coefficients can adequately describe species-environment relationships and/or population trends. However, such relationships often show nonlinear and/or spatially-varying patterns that arise from complex interactions with abiotic and biotic processes that operate at different scales. Spatially-varying coefficient (SVC) models can readily account for variability in the effects of environmental covariates. Yet, their use in ecology is relatively scarce due to gaps in understanding the inferential benefits that SVC models can provide compared to simpler frameworks. ### Innovation diff --git a/code/archived/bbs-predict/bbs-data-prep.R b/code/archived/bbs-predict/bbs-data-prep.R deleted file mode 100644 index 16b1301..0000000 --- a/code/archived/bbs-predict/bbs-data-prep.R +++ /dev/null @@ -1,103 +0,0 @@ -# bbs-data-prep.R: this script takes the raw BBS data and preps it for analysis. Note -# that this script does not generate the covariate data. -# Author: Jeffrey W. Doser - -rm(list = ls()) -library(tidyverse) -library(lubridate) -library(sp) -library(raster) -library(sf) -library(stars) -# For linking bird codes with species info. -library(wildlifeR) - -# Read in BBS Data -------------------------------------------------------- -# This code extracts the BBS data directly from BBS downloaded data. It is -# memory intensive, and so it is commented out and I read in the saved object -# bbs.dat below that contains the data for the species of interest across the -# entire continental US from 1972-present. -# These are the 10-stop summary data -# bbs.dat <- list.files(path = "data/BBS/States/", full.names = TRUE) %>% -# lapply(read.csv) %>% -# bind_rows() -# # Get associated route data -# route.dat <- read.csv("data/BBS/routes.csv") -# # Note that route id is nested within state. -# # Join BBS data with route data -# bbs.dat <- left_join(bbs.dat, route.dat, by = c('Route', 'CountryNum', 'StateNum')) -# # Grab BBS data from 1970 and later, as well as routes with their starting points -# # east of the 100th meridian. -# bbs.dat <- bbs.dat %>% -# filter(Year == 2019, RPID == 101) -# # Select columns of interest -# bbs.dat <- bbs.dat %>% -# dplyr::select(RouteDataID, RouteName, Latitude, Longitude, AOU, starts_with("Count")) %>% -# # Data set only include US records, so can remove CountryNum -# dplyr::select(-CountryNum) -# # Fill in implicit zeros. For this situation, gives a value for each species in -# # each existing combination of RouteDataID, Latitude, Longitude, and Year -# # RouteDataID is a unique identifier for each combo of CountryNum, StateNum, Route, RPID, and Year -# bbs.dat <- bbs.dat %>% -# complete(AOU, nesting(RouteDataID, RouteName, Latitude, Longitude)) -# # Replace NAs with 0s for all columns at once. -# bbs.dat <- bbs.dat %>% -# replace(is.na(.), 0) -# # Save the BBS data to avoid having to run the above memory intensive code a bunch. -# save(bbs.dat, file = "data/bbs-predict/bbs-raw-data.rda") - -# Load in the above BBS data ---------------------------------------------- -load("data/bbs-predict/bbs-raw-data.rda") -# Get associated weather data -weather.dat <- read.csv("data/BBS/weather.csv") - -# Join data with Weather data -# Get date in proper format -weather.dat <- weather.dat %>% - unite('date', sep = '-', Year, Month, Day, remove = FALSE) -weather.dat$date <- as.Date(weather.dat$date, tz = "America/New_York") -# Get julian date of each survey -weather.dat$julian <- as.numeric(format(weather.dat$date, '%j')) -weather.covs <- weather.dat %>% - filter(RouteDataID %in% unique(bbs.dat$RouteDataID)) - -# Get data in format for spAbundance --------------------------------------- -bbs.dat <- left_join(bbs.dat, weather.covs, by = c('RouteDataID')) -# Sort data by species, Year, Longitude, Latitude -bbs.dat <- bbs.dat %>% - arrange(AOU, Longitude, Latitude) -# bbs.dat %>% -# group_by(RouteDataID) %>% -# summarize(n.coords = n_distinct(Longitude)) %>% -# arrange(desc(n.coords)) -coords <- unique(bbs.dat[, c('Longitude', 'Latitude')]) -# Number of routes -J <- nrow(coords) -# Create a site index for easy linking. -coords$site.index <- 1:J -# Join the site index with the full data -bbs.dat <- bbs.dat %>% - left_join(coords, by = c('Longitude', 'Latitude')) - -# Detection Covariates ---------------- -J <- nrow(coords) -day <- rep(NA, J) -obs <- rep(NA, J) -# Unique site indices for linking -site.ordered.indx <- sort(unique(bbs.dat$site.index)) -# Calculate -for (j in 1:J) { - tmp <- bbs.dat %>% - filter(site.index == site.ordered.indx[j]) - day[j] <- first(tmp$julian) - obs[j] <- first(tmp$ObsN) -} - -# Get observed species richness ------- -y <- bbs.dat %>% - mutate(totalCount = ifelse(Count10 + Count20 + Count30 + Count40 + Count50 > 0, 1, 0)) %>% - group_by(site.index) %>% - summarize(obs.rich = sum(totalCount)) - -# Save the data ----------------------------------------------------------- -save(coords, y, obs, day, file = "data/bbs-predict/bbs-rich-data.rda") diff --git a/code/archived/bbs-predict/get-covs.R b/code/archived/bbs-predict/get-covs.R deleted file mode 100644 index 5fb6e11..0000000 --- a/code/archived/bbs-predict/get-covs.R +++ /dev/null @@ -1,58 +0,0 @@ -library(AOI) -library(climateR) -library(sf) -library(raster) -library(rasterVis) -library(elevatr) -library(ggplot2) -library(cowplot) - -load("data/bbs-predict-bbs-rich-data.rda") -coords.sf <- st_as_sf(coords, - coords = c('Longitude', 'Latitude'), - crs = 4326) - -period <- "19812010" -cvars <- c("tmax","tmin","ppt") -nc.vars <- length(cvars) -J <- nrow(coords.sf) -X <- matrix(NA, J, nc.vars + 1) - -for (i in 1:nc.vars){ - cdat = getTerraClimNormals(coords.sf, cvars[i], period, 1:12)[[1]] - plt_dat = extract(cdat, coords.sf) - if (cvars[i] == "tmax"){ - val = apply(plt_dat[,2:ncol(plt_dat)], 1, max) - }else if (cvars[i] == "tmin"){ - val = apply(plt_dat[,2:ncol(plt_dat)], 1, min) - }else if (cvars[i] == "ppt"){ - val = apply(plt_dat[,2:ncol(plt_dat)], 1, sum) - }else if (cvars[i] %in% c("pet","aet","def")){ - val = apply(plt_dat[,5:9], 1, sum) - }else { - val = apply(plt_dat[,5:9], 1, mean) - } - X[,i] = val -} -#### Download elevation data -## Source: Amazon Web Services (AWS) Terrain Tiles (https://registry.opendata.aws/terrain-tiles/) -## Citation: Terrain Tiles was accessed on INSERT DATE from https://registry.opendata.aws/terrain-tiles. -## Data accessed on: August 17, 2022 - -elev = get_elev_point(coords.sf, src = "aws") -X[,nc.vars + 1] = elev$elevation - -# Format data for spAbundance --------------------------------------------- -my.proj <- "+proj=aea +lat_1=29.5 +lat_2=45.5 +lat_0=37.5 +lon_0=-96 +x_0=0 +y_0=0 +datum=NAD83 +units=km +no_defs" -coords.aea <- coords.sf %>% - st_transform(my.proj) -coords <- st_coordinates(coords.aea) -data.list <- list(y = y$obs.rich, - covs = data.frame(tmax = X[, 1], - tmin = X[, 2], - ppt = X[, 3], - elev = X[, 4], - day = day, - obs = obs), - coords = coords) -save(data.list, file = 'data/bbs-predict/spAbundance-data.rda') diff --git a/code/archived/bbs-predict/main.R b/code/archived/bbs-predict/main.R deleted file mode 100644 index 19885b0..0000000 --- a/code/archived/bbs-predict/main.R +++ /dev/null @@ -1,29 +0,0 @@ -rm(list = ls()) -library(spAbundance) - -load('data/bbs-predict/spAbundance-data.rda') - -out <- svcAbund(formula = ~ scale(tmax) + scale(ppt), - data = data.list, - n.batch = 400, - batch.length = 25, - NNGP = TRUE, - n.neighbors = 5, - svc.cols = c(1, 2, 3), - n.report = 1) - - -# Exploratory variogram analysis -library(MBA) -library(fields) -library(geoR) -par(mfrow=c(1,2)) -out.lm <- lm(data.list$y ~ scale(data.list$covs$tmax) + scale(data.list$covs$ppt)) -curr.res <- out.lm$residuals -vario.1.raw <- variog(coords = data.list$coords, data = data.list$y) -par(mfrow = c(1, 2)) -plot(vario.1.raw, pch=16) -vario.1.res <- variog(coords = data.list$coords, data = curr.res) -plot(vario.1.res, pch = 16) -par(mfrow = c(1, 1)) - diff --git a/code/archived/main-figure.R b/code/archived/main-figure.R deleted file mode 100644 index cf7977a..0000000 --- a/code/archived/main-figure.R +++ /dev/null @@ -1,318 +0,0 @@ -rm(list = ls()) -library(spOccupancy) -library(tidyverse) -library(ggpubr) -library(MASS) -library(spBayes) -library(viridis) - -# Simulate the occupancy process ------------------------------------------ -set.seed(500) -# Intercept + linear + quadratic + stratum + interaction + missing interaction -J.x <- 50 -J.y <- 50 -J <- J.x * J.y -# Subroutines ----------------------------------------------------------- -logit <- function(theta, a = 0, b = 1){log((theta-a)/(b-theta))} -logit.inv <- function(z, a = 0, b = 1){b-(b-a)/(1+exp(z))} - -# Matrix of spatial locations -s.x <- seq(0, 1, length.out = J.x) -s.y <- seq(0, 1, length.out = J.y) -coords <- as.matrix(expand.grid(s.x, s.y)) - -# Get strata for each cell -strata <- ifelse(coords[, 1] <= .33 & coords[, 2] <= .33, 1, - ifelse(coords[, 1] <= .33 & coords[, 2] <= .67, 2, - ifelse(coords[, 1] <= .33 & coords[, 2] <= 1, 3, - ifelse(coords[, 1] <= .67 & coords[, 2] <= .33, 4, - ifelse(coords[, 1] <= .67 & coords[, 2] <= .67, 5, - ifelse(coords[, 1] <= .67 & coords[, 2] <= 1, 6, - ifelse(coords[, 1] <= 1 & coords[, 2] <= .33, 7, - ifelse(coords[, 1] <= 1 & coords[, 2] <= .67, 8, - 9)))))))) - -# Main covariate -beta.0 <- 0 -beta.linear <- 0.5 -beta.quadratic <- -0.5 -beta.strata <- rnorm(n_distinct(strata)) -beta.interaction <- 0.5 -beta.miss.int <- 0.5 -# Main covariate -x.1 <- seq(from = -5, to = 5, length.out = J) -x.interaction <- c(t(matrix(x.1, J.x, J.y))) -sigma.sq <- 3 -phi <- 3 / .8 -Sigma <- mkSpCov(coords, as.matrix(sigma.sq), as.matrix(0), as.matrix(phi), 'exponential') -x.miss.int <- mvrnorm(1, rep(0, J), Sigma) - -# Form detection covariate (if any) ------------------------------------- -n.rep <- rep(4, J) -n.rep.max <- max(n.rep) -alpha <- c(0.5, -0.3) -n.alpha <- length(alpha) -X.p <- array(NA, dim = c(J, n.rep.max, n.alpha)) -# Get index of surveyed replicates for each site. -rep.indx <- list() -for (j in 1:J) { - rep.indx[[j]] <- sample(1:n.rep.max, n.rep[j], replace = FALSE) -} -X.p[, , 1] <- 1 -if (n.alpha > 1) { - for (i in 2:n.alpha) { - for (j in 1:J) { - X.p[j, rep.indx[[j]], i] <- rnorm(n.rep[j]) - } # j - } # i -} - -# Simulate spatial random effect ---------------------------------------- -svc.cols <- c(1) -p.svc <- length(svc.cols) -cov.model <- 'exponential' -w.mat <- matrix(NA, J, p.svc) -phi <- c(3 / 0.5) -sigma.sq <- 0.8 -theta <- as.matrix(phi) -for (i in 1:p.svc) { - Sigma <- mkSpCov(coords, as.matrix(sigma.sq[i]), as.matrix(0), theta[i, ], cov.model) - # Random spatial process - w.mat[, i] <- mvrnorm(1, rep(0, J), Sigma) -} -X.w <- matrix(1, J, 1) -w <- c(t(w.mat)) - -# Latent Occupancy Process ---------------------------------------------- -psi <- logit.inv(beta.0 + w + beta.linear * x.1 + beta.quadratic * x.1^2 + - beta.strata[strata] * x.1 + beta.interaction * x.1 * x.interaction + - beta.miss.int * x.1 * x.miss.int) -z <- rbinom(J, 1, psi) - -# Data Formation -------------------------------------------------------- -p <- matrix(NA, nrow = J, ncol = n.rep.max) -y <- matrix(NA, nrow = J, ncol = n.rep.max) -for (j in 1:J) { - p[j, rep.indx[[j]]] <- logit.inv(X.p[j, rep.indx[[j]], ] %*% as.matrix(alpha)) - y[j, rep.indx[[j]]] <- rbinom(n.rep[j], 1, p[j, rep.indx[[j]]] * z[j]) -} # j - -# Plot of the full covariate effect -------------------------------------- -full.cov.effect <- beta.linear + beta.quadratic * x.1 + beta.strata[strata] + - beta.interaction * x.interaction + beta.miss.int * x.miss.int -plot.df <- data.frame(easting = coords[, 1], - northing = coords[, 2], - beta.linear = beta.linear, - x.1 = x.1, - x.interaction = x.interaction, - x.miss.int = x.miss.int, - beta.quadratic = beta.quadratic * x.1, - beta.strata = beta.strata[strata], - beta.interaction = beta.interaction * x.interaction, - beta.miss.int = beta.miss.int * x.miss.int, - beta.full = full.cov.effect) -linear.map <- ggplot(data = plot.df, aes(x = easting, y = northing, fill = beta.linear)) + - geom_raster() + - scale_fill_gradient2(midpoint = 0, low = '#B2182B', mid = 'white', high = '#2166AC', - na.value = NA, limits = c(-5, 5)) + - scale_x_continuous(expand = c(0, 0)) + - scale_y_continuous(expand = c(0, 0)) + - theme_bw(base_size = 10) + - labs(x = 'Easting', y = 'Northing', fill = '', title = 'Linear') + - guides(fill = 'none') + - theme(text = element_text(family="LM Roman 10"), - axis.ticks.x = element_blank(), - axis.text.x = element_blank(), - #plot.background = element_rect(fill = 'cornsilk', color = 'black'), - axis.text.y = element_blank(), - axis.ticks.y = element_blank()) -ggsave(plot = linear.map, file = 'figures/main-fig-linear.png', device = 'png', - units = 'in', width = 2, height = 2) -quadratic.map <- ggplot(data = plot.df, aes(x = easting, y = northing, fill = beta.quadratic)) + - geom_raster() + - scale_fill_gradient2(midpoint = 0, low = '#B2182B', mid = 'white', high = '#2166AC', - na.value = NA) + - theme_bw(base_size = 10) + - scale_x_continuous(expand = c(0, 0)) + - scale_y_continuous(expand = c(0, 0)) + - labs(x = 'Easting', y = 'Northing', title = 'Quadratic', fill = '') + - guides(fill = 'none') + - theme(text = element_text(family="LM Roman 10"), - axis.ticks.x = element_blank(), - axis.text.x = element_blank(), - axis.text.y = element_blank(), - axis.ticks.y = element_blank()) -ggsave(plot = quadratic.map, file = 'figures/main-fig-quadratic.png', device = 'png', - units = 'in', width = 2, height = 2) - -strata.map <- ggplot(data = plot.df, aes(x = easting, y = northing, fill = beta.strata)) + - geom_raster() + - scale_fill_gradient2(midpoint = 0, low = '#B2182B', mid = 'white', high = '#2166AC', - na.value = NA) + - theme_bw(base_size = 10) + - scale_x_continuous(expand = c(0, 0)) + - scale_y_continuous(expand = c(0, 0)) + - guides(fill = 'none') + - labs(x = 'Easting', y = 'Northing', title = 'Stratum', fill = '') + - theme(text = element_text(family="LM Roman 10"), - axis.ticks.x = element_blank(), - axis.text.x = element_blank(), - axis.text.y = element_blank(), - axis.ticks.y = element_blank()) -ggsave(plot = strata.map, file = 'figures/main-fig-strata.png', device = 'png', - units = 'in', width = 2, height = 2) -interaction.map <- ggplot(data = plot.df, aes(x = easting, y = northing, fill = beta.interaction)) + - geom_raster() + - scale_fill_gradient2(midpoint = 0, low = '#B2182B', mid = 'white', high = '#2166AC', - na.value = NA) + - theme_bw(base_size = 10) + - scale_x_continuous(expand = c(0, 0)) + - scale_y_continuous(expand = c(0, 0)) + - guides(fill = 'none') + - labs(x = 'Easting', y = 'Northing', fill = '', title = 'Interaction') + - theme(text = element_text(family="LM Roman 10"), - axis.text.x = element_blank(), - axis.text.y = element_blank(), - axis.ticks.x = element_blank(), - axis.ticks.y = element_blank()) -ggsave(plot = interaction.map, file = 'figures/main-fig-interaction.png', device = 'png', - units = 'in', width = 2, height = 2) -miss.int.map <- ggplot(data = plot.df, aes(x = easting, y = northing, fill = beta.miss.int)) + - geom_raster() + - scale_fill_gradient2(midpoint = 0, low = '#B2182B', mid = 'white', high = '#2166AC', - na.value = NA) + - theme_bw(base_size = 10) + - scale_x_continuous(expand = c(0, 0)) + - scale_y_continuous(expand = c(0, 0)) + - guides(fill = 'none') + - labs(x = 'Easting', y = 'Northing', fill = '', title = 'Unknown interactions') + - theme(text = element_text(family="LM Roman 10"), - axis.text.x = element_blank(), - axis.text.y = element_blank(), - axis.ticks.x = element_blank(), - axis.ticks.y = element_blank()) -ggsave(plot = miss.int.map, file = 'figures/main-fig-miss-int.png', device = 'png', - units = 'in', width = 2, height = 2) -full.effect.map <- ggplot(data = plot.df, aes(x = easting, y = northing, fill = beta.full)) + - geom_raster() + - scale_fill_gradient2(midpoint = 0, low = '#B2182B', mid = 'white', high = '#2166AC', - na.value = NA) + - theme_bw(base_size = 10) + - scale_x_continuous(expand = c(0, 0)) + - scale_y_continuous(expand = c(0, 0)) + - labs(x = 'Easting', y = 'Northing', title = 'Full Effect', fill = '') + - guides(fill = 'none') + - theme(text = element_text(family="LM Roman 10"), - axis.ticks.x = element_blank(), - axis.text.x = element_blank(), - axis.text.y = element_blank(), - axis.ticks.y = element_blank()) -ggsave(plot = full.effect.map, file = 'figures/main-fig-full-effect.png', device = 'png', - units = 'in', width = 2, height = 2) - -cov.1.map <- ggplot(data = plot.df, aes(x = easting, y = northing, fill = x.1)) + - geom_raster() + - scale_fill_gradient2(midpoint = 0, low = '#B2182B', mid = 'white', high = '#2166AC', - na.value = NA) + - scale_x_continuous(expand = c(0, 0)) + - scale_y_continuous(expand = c(0, 0)) + - theme_bw(base_size = 10) + - labs(x = 'Easting', y = 'Northing', fill = '', title = 'Covariate') + - guides(fill = 'none') + - theme(text = element_text(family="LM Roman 10"), - axis.ticks.x = element_blank(), - # plot.background = element_rect(fill = 'gray', color = 'black'), - axis.text.x = element_blank(), - axis.text.y = element_blank(), - axis.ticks.y = element_blank()) - -cov.int.map <- ggplot(data = plot.df, aes(x = easting, y = northing, fill = x.interaction)) + - geom_raster() + - scale_fill_gradient2(midpoint = 0, low = '#B2182B', mid = 'white', high = '#2166AC', - na.value = NA) + - scale_x_continuous(expand = c(0, 0)) + - scale_y_continuous(expand = c(0, 0)) + - theme_bw(base_size = 10) + - labs(x = 'Easting', y = 'Northing', fill = '', title = 'Interacting Effect') + - guides(fill = 'none') + - theme(text = element_text(family="LM Roman 10"), - axis.ticks.x = element_blank(), - # plot.background = element_rect(fill = 'gray', color = 'black'), - axis.text.x = element_blank(), - axis.text.y = element_blank(), - axis.ticks.y = element_blank()) - -cov.miss.int.map <- ggplot(data = plot.df, aes(x = easting, y = northing, fill = x.miss.int)) + - geom_raster() + - scale_fill_gradient2(midpoint = 0, low = '#B2182B', mid = 'white', high = '#2166AC', - na.value = NA) + - scale_x_continuous(expand = c(0, 0)) + - scale_y_continuous(expand = c(0, 0)) + - theme_bw(base_size = 10) + - labs(x = 'Easting', y = 'Northing', fill = '', title = 'Missing interactions') + - guides(fill = 'none') + - theme(text = element_text(family="LM Roman 10"), - axis.ticks.x = element_blank(), - # plot.background = element_rect(fill = 'gray', color = 'black'), - axis.text.x = element_blank(), - axis.text.y = element_blank(), - axis.ticks.y = element_blank()) - -# Fit an SVC model -------------------------------------------------------- -data.list <- list(y = y, - occ.covs = data.frame(x.1 = x.1), - det.covs = list(det.cov.1 = X.p[, , 2]), - coords = coords) -priors <- list(phi.unif = list(a = 3 / .9, b = 3 / .1)) - -n.batch <- 800 -batch.length <- 25 -n.chains <- 1 -n.burn <- 10000 -n.thin <- 10 -out <- svcPGOcc(occ.formula = ~ x.1, - det.formula = ~ det.cov.1, - data = data.list, - priors = priors, - svc.cols = c(1, 2), - tuning = list(phi = 0.5), - n.batch = n.batch, - batch.length = batch.length, - n.chains = n.chains, - n.neighbors = 10, - n.burn = n.burn, - n.thin = n.thin, - n.report = 10) -svc.samples <- getSVCSamples(out) -svc.means <- apply(svc.samples$x.1, 2, mean) - -plot.df$svc.est <- svc.means -svc.est.map <- ggplot(data = plot.df, aes(x = easting, y = northing, fill = svc.est)) + - geom_raster() + - scale_fill_gradient2(midpoint = 0, low = '#B2182B', mid = 'white', high = '#2166AC', - na.value = NA) + - theme_bw(base_size = 10) + - scale_x_continuous(expand = c(0, 0)) + - scale_y_continuous(expand = c(0, 0)) + - labs(x = 'Easting', y = 'Northing', title = 'SVC Estimate', fill = '') + - guides(fill = 'none') + - theme(text = element_text(family="LM Roman 10"), - axis.ticks.x = element_blank(), - plot.background = element_rect(fill = 'transparent', color = NA), - axis.text.x = element_blank(), - axis.text.y = element_blank(), - axis.ticks.y = element_blank()) -ggsave(plot = svc.est.map, file = 'figures/main-fig-svc-est.png', device = 'png', - units = 'in', width = 2, height = 2, bg = NULL) - -# Put it all together ----------------------------------------------------- -# ggarrange(linear.curve, quadratic.curve, strata.curve, interaction.curve, -# svc.curve, linear.map, quadratic.map, strata.map, interaction.map, -# svc.map, ncol = 5, nrow = 2) -# ggsave(file = 'figures/main-figure.png', device = 'png', units = 'in', width = 10, height = 5) -# ggarrange(linear.curve, linear.map, -# quadratic.curve, quadratic.map, -# strata.curve, strata.map, -# interaction.curve, interaction.map, -# svc.curve, svc.map, ncol = 2, nrow = 5) -# ggsave(file = 'figures/main-figure.png', device = 'png', units = 'in', height = 12, width = 5) diff --git a/code/archived/testing.R b/code/archived/testing.R deleted file mode 100644 index daae82d..0000000 --- a/code/archived/testing.R +++ /dev/null @@ -1,257 +0,0 @@ -rm(list = ls()) -library(MASS) -library(spBayes) -library(tidyverse) -library(viridis) -library(ggpubr) -library(devtools) -load_all("~/Dropbox/DFKZ21/spOccupancy") -# Simulate the occupancy process ------------------------------------------ -# set.seed(500) -# Intercept + linear + quadratic + stratum + interaction + missing interaction -# Subroutines ----------------------------------------------------------- -logit <- function(theta, a = 0, b = 1){log((theta-a)/(b-theta))} -logit.inv <- function(z, a = 0, b = 1){b-(b-a)/(1+exp(z))} -# Simulate a data set based on the actual covariate data that you have. - -# Main covariate -beta.0 <- 0 -beta.linear <- 0.5 -beta.quadratic <- -0.5 -beta.int <- -0.5 -# sigma.sq <- 3 -# phi <- 3 / .8 -# Sigma <- mkSpCov(coords, as.matrix(sigma.sq), as.matrix(0), as.matrix(phi), 'exponential') -# x <- mvrnorm(1, rep(0, J), Sigma) -load("data/rel-import/EATO-spOccupancy-data.rda") -coords <- data.EATO$coords -# coord.x <- runif(nrow(data.EATO$coords)) -# coord.y <- runif(nrow(data.EATO$coords)) -# coords <- cbind(coord.x, coord.y) -# coords <- coords -J <- nrow(coords) -curr.points <- sample(1:J, 500, replace = FALSE) -coords <- coords[curr.points, ] -forest <- data.EATO$occ.covs$forest[curr.points] -tmax <- data.EATO$occ.covs$tmax[curr.points] -J <- length(curr.points) -# forest <- rnorm(J) -# tmax <- rnorm(J) - -# Form detection covariate (if any) ------------------------------------- -n.rep <- rep(4, J) -n.rep.max <- max(n.rep) -alpha <- c(0.5, -0.3) -n.alpha <- length(alpha) -X.p <- array(NA, dim = c(J, n.rep.max, n.alpha)) -# Get index of surveyed replicates for each site. -rep.indx <- list() -for (j in 1:J) { - rep.indx[[j]] <- sample(1:n.rep.max, n.rep[j], replace = FALSE) -} -X.p[, , 1] <- 1 -if (n.alpha > 1) { - for (i in 2:n.alpha) { - for (j in 1:J) { - X.p[j, rep.indx[[j]], i] <- rnorm(n.rep[j]) - } # j - } # i -} - -# Simulate spatial random effect ---------------------------------------- -svc.cols <- c(1) -p.svc <- length(svc.cols) -cov.model <- 'exponential' -w.mat <- matrix(NA, J, p.svc) -# phi <- c(3 / mean(dist(coords))) -phi <- c(3 / quantile(dist(coords), 0.7)) -sigma.sq <- 1 -theta <- as.matrix(phi) -for (i in 1:p.svc) { - Sigma <- mkSpCov(coords, as.matrix(sigma.sq[i]), as.matrix(0), theta[i, ], cov.model) - # Random spatial process - w.mat[, i] <- mvrnorm(1, rep(0, J), Sigma) -} -w <- c(t(w.mat)) - -# Latent Occupancy Process ---------------------------------------------- -# TODO: change for different forms -psi <- logit.inv(beta.0 + w + beta.linear * scale(tmax) + beta.quadratic * I(scale(tmax)^2) + - beta.linear * scale(forest) + beta.quadratic * I(scale(forest)^2)) -# psi <- logit.inv(beta.0 + w + beta.linear * scale(forest) + beta.quadratic * scale(forest)^2) -weights <- rep(1, J) -z <- rbinom(J, weights, psi) - -# Data Formation -------------------------------------------------------- -p <- matrix(NA, nrow = J, ncol = n.rep.max) -y <- matrix(NA, nrow = J, ncol = n.rep.max) -for (j in 1:J) { - p[j, rep.indx[[j]]] <- logit.inv(X.p[j, rep.indx[[j]], ] %*% as.matrix(alpha)) - y[j, rep.indx[[j]]] <- rbinom(n.rep[j], 1, p[j, rep.indx[[j]]] * z[j]) -} # j - - -data.list <- list(y = y, - occ.covs = data.frame(forest = forest, - tmax = tmax), - det.covs = list(det.cov.1 = X.p[, , 2]), - coords = coords, - weights = weights) -dist.coords <- dist(coords) -priors <- list(phi.unif = list(a = c(3 / max(dist.coords), 3 / quantile(dist.coords, 0.5), - 3 / quantile(dist.coords, 0.5)), - b = c(3 / quantile(dist.coords, 0.6), 3 / quantile(dist.coords, 0.5), - 3 / quantile(dist.coords, 0.5)))) -priors <- list(phi.unif = list(a = 3 / quantile(dist.coords, 0.95), b = 3 / quantile(dist.coords, 0.1))) -Sigma <- matrix(c(1, 0.8, 0.1, 0.8, 1, 0.1, 0.1, 0.1, 1), 3, 3) -# lambda <- matrix(c(1, 0.8, 0, 1), 2, 2) -lambda <- t(chol(Sigma)) -inits <- list(lambda = lambda) - -n.batch <- 800 -batch.length <- 25 -n.chains <- 1 -n.burn <- 10000 -n.thin <- 10 - -n.batch <- 400 -batch.length <- 25 -n.chains <- 1 -n.burn <- 5000 -n.thin <-5 -# This works, can easily recover all parameters here, so there -# clearly isn't a fundamental problem with the data locations. -# out.2 <- spPGOcc(occ.formula = ~ scale(tmax) + I(scale(tmax)^2), -# det.formula = ~ det.cov.1, -# data = data.list, -# # priors = priors, -# tuning = list(phi = 0.5), -# n.batch = n.batch, -# batch.length = batch.length, -# n.chains = n.chains, -# n.neighbors = 5, -# n.burn = n.burn, -# n.thin = n.thin, -# n.report = 10) - -# psi.means.2 <- apply(out.2$psi.samples, 2, mean) -# plot(psi, psi.means.2, pch = 19) -# abline(0, 1) - -# n.batch <- 1600 -# batch.length <- 25 -# n.chains <- 1 -# n.burn <- 20000 -# n.thin <- 10 - -out <- svcPGOcc(occ.formula = ~ scale(tmax) + scale(forest), - det.formula = ~ det.cov.1, - data = data.list, - priors = priors, - inits = inits, - svc.cols = c(1, 2, 3), - tuning = list(phi = 0.5, sigma.sq = 0.1, lambda = 0.5), - n.batch = n.batch, - batch.length = batch.length, - n.chains = n.chains, - n.neighbors = 5, - n.burn = n.burn, - n.thin = n.thin, - n.report = 10) -svc.samples <- getSVCSamples(out) -int.means <- apply(svc.samples[[1]], 2, mean) -tmax.means <- apply(svc.samples[[2]], 2, mean) -forest.means <- apply(svc.samples[[3]], 2, mean) -# svc.means <- svc.samples$x[10, ] - -plot(w, int.means, pch = 19) -abline(0, 1) - -# Tmax -svc.true <- beta.linear + 2 * beta.quadratic * scale(tmax) - -plot(svc.true, tmax.means, pch = 19) -abline(0, 1) - -# Forest -forest.svc.true <- beta.linear + 2 * beta.quadratic * scale(forest) - -plot(forest.svc.true, forest.means, pch = 19) -abline(0, 1) - -psi.means <- apply(out$psi.samples, 2, mean) -plot(psi, psi.means, pch = 19) -abline(0, 1) - -plot.df <- data.frame(x = coords[, 1], - y = coords[, 2], - true = svc.true, - int = int.means, - psi = psi, - int.true = w, - svc.est = tmax.means) -svc.est.map <- ggplot(data = plot.df, aes(x = x, y = y, color = svc.est)) + - geom_point(size = 5) + - scale_color_gradient2(midpoint = 0, low = '#B2182B', mid = 'white', high = '#2166AC', - na.value = NA) + - theme_bw(base_size = 10) + - scale_x_continuous(expand = c(0, 0)) + - scale_y_continuous(expand = c(0, 0)) + - labs(x = 'Easting', y = 'Northing', title = 'SVC Estimate', color = '') + - guides(color = 'none') + - theme(text = element_text(family="LM Roman 10"), - axis.ticks.x = element_blank(), - plot.background = element_rect(color = 'transparent', fill = NA), - axis.text.x = element_blank(), - axis.text.y = element_blank(), - axis.ticks.y = element_blank()) - -svc.true.map <- ggplot(data = plot.df, aes(x = x, y = y, color = svc.true)) + - geom_point(size = 5) + - scale_color_gradient2(midpoint = 0, low = '#B2182B', mid = 'white', high = '#2166AC', - na.value = NA) + - theme_bw(base_size = 10) + - scale_x_continuous(expand = c(0, 0)) + - scale_y_continuous(expand = c(0, 0)) + - labs(x = 'Easting', y = 'Northing', title = 'SVC True', color = '') + - guides(color = 'none') + - theme(text = element_text(family="LM Roman 10"), - axis.ticks.x = element_blank(), - plot.background = element_rect(color = 'transparent', fill = NA), - axis.text.x = element_blank(), - axis.text.y = element_blank(), - axis.ticks.y = element_blank()) -ggarrange(svc.est.map, svc.true.map) - -int.est.map <- ggplot(data = plot.df, aes(x = x, y = y, color = int)) + - geom_point(size = 5) + - scale_color_gradient2(midpoint = 0, low = '#B2182B', mid = 'white', high = '#2166AC', - na.value = NA) + - theme_bw(base_size = 10) + - scale_x_continuous(expand = c(0, 0)) + - scale_y_continuous(expand = c(0, 0)) + - labs(x = 'Easting', y = 'Northing', title = 'Intercept Estimate', color = '') + - guides(color = 'none') + - theme(text = element_text(family="LM Roman 10"), - axis.ticks.x = element_blank(), - plot.background = element_rect(color = 'transparent', fill = NA), - axis.text.x = element_blank(), - axis.text.y = element_blank(), - axis.ticks.y = element_blank()) - -int.true.map <- ggplot(data = plot.df, aes(x = x, y = y, color = int.true)) + - geom_point(size = 5) + - scale_color_gradient2(midpoint = 0, low = '#B2182B', mid = 'white', high = '#2166AC', - na.value = NA) + - theme_bw(base_size = 10) + - scale_x_continuous(expand = c(0, 0)) + - scale_y_continuous(expand = c(0, 0)) + - labs(x = 'Easting', y = 'Northing', title = 'Intercept Truth', color = '') + - guides(color = 'none') + - theme(text = element_text(family="LM Roman 10"), - axis.ticks.x = element_blank(), - plot.background = element_rect(color = 'transparent', fill = NA), - axis.text.x = element_blank(), - axis.text.y = element_blank(), - axis.ticks.y = element_blank()) -ggarrange(int.est.map, int.true.map) diff --git a/code/archived/tree/archived/data-prep.R b/code/archived/tree/archived/data-prep.R deleted file mode 100644 index b566ae6..0000000 --- a/code/archived/tree/archived/data-prep.R +++ /dev/null @@ -1,78 +0,0 @@ -rm(list = ls()) -library(tidyverse) -library(sf) - -# Load data --------------------------------------------------------------- -# Don't move these somewhere else -load("~/../andy/data_actual/actual_forest_plot_spp_data.RData") -# Spatial coordinates -load("~/../andy/data_actual/actual_forest_plot_coords.RData") -# Covariates -load("~/../andy/data_actual/X_vars.rda") - -coords <- actual_forest_plot_coords %>% - select(x = LON, y = LAT) %>% - as.matrix() -# Convert to matrix -# Number of sites -J <- nrow(coords) -# Number of species -N <- n_distinct(actual_forest_plot_spp_data$SPCD) -# Species ids -sp.id <- unique(actual_forest_plot_spp_data$SPCD) -# Get data in species x site matrix. -y <- actual_forest_plot_spp_data %>% - pull(SP_PRESENT) -y <- matrix(y, N, J) -rownames(y) <- sp.id -y.bio <- actual_forest_plot_spp_data %>% - pull(BIO_ACRE) -y.bio <- matrix(y.bio, N, J) -rownames(y.bio) <- sp.id - -# Format data for spOccupancy --------------------------------------------- -usa <- st_as_sf(maps::map("state", fill = TRUE, plot = FALSE)) -usa <- usa %>% - st_transform(crs = "+proj=aea +lat_1=29.5 +lat_2=45.5 +lat_0=37.5 +lon_0=-96 +x_0=0 +y_0=0 +ellps=GRS80 +datum=NAD83 +units=km +no_defs") -coords.sf <- st_as_sf(data.frame(coords), - coords = c('x', 'y'), - crs = st_crs(usa)) - -# Reset all the variables -J <- nrow(coords) -# 318 is sugar maple. That is the species we're going to focus on. -curr.sp.indx <- which(sp.id == 318) -curr.indx <- 1:J -data.list <- list(y = y[curr.sp.indx, ], - coords = coords, - covs = X) -data.list$weights <- rep(1, J) -save(data.list, file = 'data/tree/stage-1-data.rda') - -# Format Stage 2 data for spAbundance ------------------------------------- -# Get ecoregion for splitting up data into smaller sets. -ecr <- st_read(dsn = "~/DFISD22/data/ecoregions/", layer = "us_eco_l3") -coords.sf <- st_as_sf(as.data.frame(data.list$coords), - coords = c("x", "y"), - crs = st_crs(usa)) - -# Get the state that each -ecrs.albers <- ecr %>% - st_transform(crs = st_crs(usa)) - -# Join points toBCRs -tmp <- st_join(coords.sf, ecrs.albers, join = st_intersects) -# Sites where species was observed -# site.indx <- which(data.list$y == 1) -data.list.2 <- list(y = ifelse(y.bio[curr.sp.indx, ] == 0, 0, log(y.bio[curr.sp.indx, ])), - coords = coords, - covs = as.data.frame(X), - z = data.list$y) -# data.list.2$covs$ecoregionL3 <- as.numeric(tmp$US_L3CODE)[site.indx] -# Fill in missing indices for ecoregion -# miss.indx <- which(is.na(data.list.2$covs$ecoregionL3)) -# # NOTE: hardcoded -# data.list.2$covs$ecoregionL3[c(692, 715, 793, 947)] <- 50 -# data.list.2$covs$ecoregionL3[c(6415, 6426)] <- 51 - -save(data.list.2, file = 'data/tree/stage-2-data.rda') diff --git a/code/archived/tree/archived/get-ndvi.R b/code/archived/tree/archived/get-ndvi.R deleted file mode 100644 index 6a49c4e..0000000 --- a/code/archived/tree/archived/get-ndvi.R +++ /dev/null @@ -1,12 +0,0 @@ -rm(list = ls()) -library(tidyverse) -library(sf) -library(MODIStsp) - -# Get boundary of region of interest (aka Vermont) ------------------------ -usa <- st_as_sf(maps::map("state", fill = TRUE, plot = FALSE)) -usa <- usa %>% - st_transform(crs = "+proj=aea +lat_1=29.5 +lat_2=45.5 +lat_0=37.5 +lon_0=-96 +x_0=0 +y_0=0 +ellps=GRS80 +datum=NAD83 +units=km +no_defs") -vermont <- usa %>% - dplyr::filter(ID == 'vermont') - diff --git a/code/archived/tree/archived/main-full-SVI-stage-2.R b/code/archived/tree/archived/main-full-SVI-stage-2.R deleted file mode 100644 index 2d07d0f..0000000 --- a/code/archived/tree/archived/main-full-SVI-stage-2.R +++ /dev/null @@ -1,51 +0,0 @@ -# main-uni-gaussian: fits a Gaussian univariate SVC model analogous to spSVC -rm(list = ls()) -library(spAbundance) - -# Load data --------------------------------------------------------------- -load('data/tree/stage-2-data.rda') - -# Prep the model ---------------------------------------------------------- -# Priors -curr.indx <- which(data.list.2$y != 0) -dist.fia <- dist(data.list.2$coords[curr.indx, ]) -mean.dist <- mean(dist.fia) -min.dist <- 5 -max.dist <- max(dist.fia) -prior.list <- list(beta.normal = list(mean = 0, var = 100), - phi.unif = list(a = 3 / max.dist, b = 3 / min.dist), - sigma.sq = list(a = 2, b = 2), - tau.sq = c(2, 1)) -inits.list <- list(beta = 0, sigma.sq = 5, tau.sq = 0.2, phi = 3 / mean.dist) -tuning.list <- list(phi = c(0.3)) - -# Biggest -n.batch <- 2000 -batch.length <- 25 -n.burn <- 30000 -n.thin <- 20 -n.chains <- 1 - -# Big -# n.batch <- 500 -# batch.length <- 25 -# n.burn <- 7500 -# n.thin <- 5 - -# n.batch <- 50 -# batch.length <- 25 -# n.burn <- 500 -# n.thin <- 1 - -out <- svcAbund(formula = ~ scale(vpd) + I(scale(vpd)^2) + - scale(ppt) + I(scale(ppt)^2) + - scale(elev) + I(scale(elev)^2), - data = data.list.2, priors = prior.list, - inits = inits.list, tuning = tuning.list, svc.cols = c(1), - n.neighbors = 5, cov.model = 'exponential', NNGP = TRUE, - n.batch = n.batch, batch.length = batch.length, family = 'Gaussian-hurdle', - n.burn = n.burn, accept.rate = 0.43, n.thin = n.thin, - n.chains = n.chains, n.report = 1, n.omp.threads = 5) - -save(out, file = 'results/tree/stage-2-sugar-maple-full-SVI-samples.rda') - diff --git a/code/archived/tree/archived/main-full-stage-1.R b/code/archived/tree/archived/main-full-stage-1.R deleted file mode 100644 index a389429..0000000 --- a/code/archived/tree/archived/main-full-stage-1.R +++ /dev/null @@ -1,95 +0,0 @@ -# main-full-stage-1.R: this script fits a spatially-explicit GLM for sugar -# maple across the US. -rm(list = ls()) -library(tidyverse) -library(sf) -library(spOccupancy) - -# Load data --------------------------------------------------------------- -load("data/tree/stage-1-data.rda") - -# Prep the model ---------------------------------------------------------- -# Priors -# dist.fia <- dist(data.list$coords) -# mean.dist <- mean(dist.fia) # 1783.395 -# min.dist <- min(dist.fia) # 0.007832157 -# max.dist <- max(dist.fia) # 4642.168 -mean.dist <- 1783.395 -min.dist <- 0.007832157 -max.dist <- 4642.168 -# Might need further restrictions on phi later on. -prior.list <- list(beta.normal = list(mean = 0, var = 2.72), - phi.unif = list(a = 3 / max.dist, b = 3 / min.dist), - sigma.sq.ig = list(2, 5)) -inits.list <- list(beta = 0, phi = 3 / mean.dist, sigma.sq = 1) -tuning.list <- list(phi = 0.05) -n.neighbors <- 5 -# Should probably explore this more thoroughly. -n.factors <- 1 -cov.model <- "exponential" -n.chains <- 1 - -# Biggest -n.batch <- 2000 -batch.length <- 25 -n.burn <- 30000 -n.thin <- 20 - -# Medium -n.batch <- 200 -batch.length <- 25 -n.burn <- 3000 -n.thin <- 5 - -n.batch <- 1 -batch.length <- 25 -n.burn <- 0 -n.thin <- 1 - -out <- svcPGBinom(formula = ~ scale(tmin) + I(scale(tmin)^2) + - scale(ppt) + I(scale(ppt)^2) + - scale(elev) + I(scale(elev)^2), - data = data.list, priors = prior.list, - inits = inits.list, tuning = tuning.list, svc.cols = c(1), - n.neighbors = n.neighbors, cov.model = cov.model, NNGP = TRUE, - n.batch = n.batch, batch.length = batch.length, - n.burn = n.burn, accept.rate = 0.43, n.thin = n.thin, - n.chains = n.chains, n.report = 1, n.omp.threads = 10) - -svc.samples <- getSVCSamples(out) -psi.samples <- out$psi.samples - -save(svc.samples, psi.samples, - file = 'results/stage-1-sugar-maple-full-samples.rda') - -# w.means <- apply(out$w.samples[, 1, ], 2, mean) -# psi.ci.width <- psi.high - psi.low -# y.rep.means <- apply(out$y.rep.samples, 2, mean) -# -# plot.df <- data.frame(w.mean = w.means, -# psi.mean = psi.means, -# psi.ci.width = psi.ci.width, -# x = coords[curr.indx, 1], -# y = coords[curr.indx, 2]) -# -# plot.sf <- st_as_sf(plot.df, -# coords = c("x", "y"), -# crs = "+proj=aea +lat_1=29.5 +lat_2=45.5 +lat_0=37.5 +lon_0=-96 +x_0=0 +y_0=0 +ellps=GRS80 +datum=NAD83 +units=km +no_defs") -# -# ggplot(plot.sf) + -# geom_sf(aes(col = w.mean)) + -# scale_color_viridis() + -# theme_bw(base_size = 18) -# -# psi.mean.plot <- ggplot(plot.sf) + -# geom_sf(aes(col = psi.mean)) + -# scale_color_viridis() + -# theme_bw(base_size = 18) -# -# psi.ci.width.plot <- ggplot(plot.sf) + -# geom_sf(aes(col = psi.ci.width)) + -# scale_color_viridis() + -# theme_bw(base_size = 18) -# -# ggarrange(psi.mean.plot, psi.ci.width.plot, labels = c('Mean', 'CI Width')) - diff --git a/code/archived/tree/archived/main-full-stage-2.R b/code/archived/tree/archived/main-full-stage-2.R deleted file mode 100644 index 453c173..0000000 --- a/code/archived/tree/archived/main-full-stage-2.R +++ /dev/null @@ -1,51 +0,0 @@ -# main-uni-gaussian: fits a Gaussian univariate SVC model analogous to spSVC -rm(list = ls()) -library(spAbundance) - -# Load data --------------------------------------------------------------- -load('data/tree/stage-2-data.rda') - -# Prep the model ---------------------------------------------------------- -# Priors -curr.indx <- which(data.list.2$y != 0) -dist.fia <- dist(data.list.2$coords[curr.indx, ]) -mean.dist <- mean(dist.fia) -min.dist <- 5 -max.dist <- max(dist.fia) -prior.list <- list(beta.normal = list(mean = 0, var = 100), - phi.unif = list(a = 3 / max.dist, b = 3 / min.dist), - sigma.sq = list(a = 2, b = 2), - tau.sq = c(2, 1)) -inits.list <- list(beta = 0, sigma.sq = 5, tau.sq = 0.2, phi = 3 / mean.dist) -tuning.list <- list(phi = c(0.3)) - -# Biggest -n.batch <- 2000 -batch.length <- 25 -n.burn <- 30000 -n.thin <- 20 -n.chains <- 1 - -# Big -# n.batch <- 500 -# batch.length <- 25 -# n.burn <- 7500 -# n.thin <- 5 - -# n.batch <- 50 -# batch.length <- 25 -# n.burn <- 500 -# n.thin <- 1 - -data.list.2$covs$vpd <- data.list.2$covs$vpd / sd(data.list.2$covs$vpd) -data.list.2$covs$ppt <- data.list.2$covs$ppt / sd(data.list.2$covs$ppt) - -out <- svcAbund(formula = ~ vpd + ppt + scale(elev) + I(scale(elev)^2), - data = data.list.2, priors = prior.list, - inits = inits.list, tuning = tuning.list, svc.cols = c(1, 2, 3), - n.neighbors = 5, cov.model = 'exponential', NNGP = TRUE, - n.batch = n.batch, batch.length = batch.length, family = 'Gaussian-hurdle', - n.burn = n.burn, accept.rate = 0.43, n.thin = n.thin, - n.chains = n.chains, n.report = 1, n.omp.threads = 5) - -save(out, file = 'results/tree/stage-2-sugar-maple-full-samples.rda') diff --git a/code/archived/tree/archived/main-state-stage-1.R b/code/archived/tree/archived/main-state-stage-1.R deleted file mode 100644 index 2f07ee4..0000000 --- a/code/archived/tree/archived/main-state-stage-1.R +++ /dev/null @@ -1,96 +0,0 @@ -# main-full-stage-1.R: this script fits a spatially-explicit GLM for sugar -# maple across the US. -rm(list = ls()) -library(tidyverse) -library(sf) -library(spOccupancy) - -# Get info from command line ---------------------------------------------- -# This code is to extract the current species name from the command line -# to easily run the script for different species -args <- commandArgs(trailingOnly = TRUE) -# Current species -curr.state <- args[1] -# Alternatively, if not running the script from the command line: -# TODO: -curr.state <- 'new york' -if(length(args) == 0) base::stop('Need to tell spOccupancy the current state') -print(curr.state) - -# Load data --------------------------------------------------------------- -load("data/tree/stage-1-data.rda") - -# Filter to one state of interest ----------------------------------------- -usa <- st_as_sf(maps::map("state", fill = TRUE, plot = FALSE)) -usa <- usa %>% - st_transform(crs = "+proj=aea +lat_1=29.5 +lat_2=45.5 +lat_0=37.5 +lon_0=-96 +x_0=0 +y_0=0 +ellps=GRS80 +datum=NAD83 +units=km +no_defs") -state <- usa %>% filter(ID == 'new york') -coords.sf <- st_as_sf(as.data.frame(data.list$coords), - coords = c('x', 'y'), - crs = st_crs(usa)) -site.indx <- which(sapply(st_within(coords.sf, st_make_valid(state)), length) == 1) -data.list$y <- data.list$y[site.indx] -data.list$coords <- data.list$coords[site.indx, ] -data.list$covs <- data.list$covs[site.indx, ] -data.list$weights <- data.list$weights[site.indx] - -# Remove 25% of locations ------------------------------------------------- -set.seed(4040) -J <- nrow(data.list$coords) -pred.indx <- sample(1:J, round(J * .25), replace = FALSE) -save(pred.indx, file = paste0('data/tree/pred-indx-', str_replace_all(curr.state, ' ', '-'), - '.rda')) -data.list$y <- data.list$y[-pred.indx] -data.list$coords <- data.list$coords[-pred.indx, ] -data.list$covs <- data.list$covs[-pred.indx, ] -data.list$weights <- data.list$weights[-pred.indx] - - -# Prep the model ---------------------------------------------------------- -# Priors -dist.fia <- dist(data.list$coords) -mean.dist <- mean(dist.fia) # 1783.395 -# min.dist <- min(dist.fia) # 0.007832157 -min.dist <- 5 -max.dist <- max(dist.fia) # 4642.168 -# Might need further restrictions on phi later on. -prior.list <- list(beta.normal = list(mean = 0, var = 2.72), - phi.unif = list(a = 3 / max.dist, b = 3 / min.dist), - sigma.sq.unif = list(0, 10)) -inits.list <- list(beta = 0, phi = 3 / mean.dist, sigma.sq = 1) -tuning.list <- list(phi = 0.05, sigma.sq = 0.1) -n.neighbors <- 5 - -cov.model <- "exponential" -n.chains <- 1 - -# Biggest -n.batch <- 2000 -batch.length <- 25 -n.burn <- 30000 -n.thin <- 20 - -# Medium -# n.batch <- 200 -# batch.length <- 25 -# n.burn <- 3000 -# n.thin <- 5 -# -# n.batch <- 1 -# batch.length <- 25 -# n.burn <- 0 -# n.thin <- 1 - -out <- svcPGBinom(formula = ~ scale(tmin) + I(scale(tmin)^2) + - scale(ppt) + I(scale(ppt)^2) + - scale(elev) + I(scale(elev)^2), - data = data.list, priors = prior.list, - inits = inits.list, tuning = tuning.list, svc.cols = c(1), - n.neighbors = n.neighbors, cov.model = cov.model, NNGP = TRUE, - n.batch = n.batch, batch.length = batch.length, - n.burn = n.burn, accept.rate = 0.43, n.thin = n.thin, - n.chains = n.chains, n.report = 1, n.omp.threads = 5) - -save(out, file = paste0('results/tree/stage-1-sugar-maple-', - str_replace_all(curr.state, ' ', '-'), '-samples.rda')) - diff --git a/code/archived/tree/archived/pred-state-stage-1.R b/code/archived/tree/archived/pred-state-stage-1.R deleted file mode 100644 index 9be853b..0000000 --- a/code/archived/tree/archived/pred-state-stage-1.R +++ /dev/null @@ -1,63 +0,0 @@ -rm(list = ls()) -library(tidyverse) -library(sf) -library(spOccupancy) - -# Get info from command line ---------------------------------------------- -# This code is to extract the current species name from the command line -# to easily run the script for different species -args <- commandArgs(trailingOnly = TRUE) -# Current species -curr.state <- args[1] -# Alternatively, if not running the script from the command line: -# TODO: -curr.state <- 'new york' -if(length(args) == 0) base::stop('Need to tell spOccupancy the current state') -print(curr.state) - -# Load data --------------------------------------------------------------- -load("data/tree/stage-1-data.rda") - -# Filter to one state of interest ----------------------------------------- -usa <- st_as_sf(maps::map("state", fill = TRUE, plot = FALSE)) -usa <- usa %>% - st_transform(crs = "+proj=aea +lat_1=29.5 +lat_2=45.5 +lat_0=37.5 +lon_0=-96 +x_0=0 +y_0=0 +ellps=GRS80 +datum=NAD83 +units=km +no_defs") -state <- usa %>% filter(ID == 'new york') -coords.sf <- st_as_sf(as.data.frame(data.list$coords), - coords = c('x', 'y'), - crs = st_crs(usa)) -site.indx <- which(sapply(st_within(coords.sf, st_make_valid(state)), length) == 1) -data.list$y <- data.list$y[site.indx] -data.list$coords <- data.list$coords[site.indx, ] -data.list$covs <- data.list$covs[site.indx, ] -data.list$weights <- data.list$weights[site.indx] - -# Remove 25% of locations ------------------------------------------------- -load(paste0("data/tree/pred-indx-", str_replace_all(curr.state, ' ', '-'), '.rda')) -covs.0 <- data.list$covs[pred.indx, ] -coords.0 <- data.list$coords[pred.indx, ] -y.0 <- data.list$y[pred.indx] -data.list$y <- data.list$y[-pred.indx] -data.list$coords <- data.list$coords[-pred.indx, ] -data.list$covs <- data.list$covs[-pred.indx, ] -data.list$weights <- data.list$weights[-pred.indx] - -# Get prediction data ----------------------------------------------------- -load(paste0("results/tree/stage-1-sugar-maple-", str_replace_all(curr.state, ' ', '-'), - '-samples.rda')) -X.0 <- matrix(1, length(pred.indx), ncol(out$X)) -colnames(X.0) <- c('intercept', 'tmin', 'tmin.2', 'ppt', 'ppt.2', - 'elev', 'elev.2') -X.0[, 'tmin'] <- (covs.0[, 'tmin'] - mean(data.list$covs[, 'tmin'])) / sd(data.list$covs[, 'tmin']) -X.0[, 'ppt'] <- (covs.0[, 'ppt'] - mean(data.list$covs[, 'ppt'])) / sd(data.list$covs[, 'ppt']) -X.0[, 'elev'] <- (covs.0[, 'elev'] - mean(data.list$covs[, 'elev'])) / sd(data.list$covs[, 'elev']) -X.0[, 'tmin.2'] <- X.0[, 'tmin']^2 -X.0[, 'ppt.2'] <- X.0[, 'ppt']^2 -X.0[, 'elev.2'] <- X.0[, 'elev']^2 - -out.pred <- predict(out, X.0, coords.0) -z.0.samples <- out.pred$y.0.samples -save(z.0.samples, file = paste0('results/tree/pred-stage-1-sugar-maple-', - str_replace_all(curr.state, ' ', '-'), - '-samples.rda')) - diff --git a/code/archived/tree/archived/summary-stage-1.R b/code/archived/tree/archived/summary-stage-1.R deleted file mode 100644 index ca36e6a..0000000 --- a/code/archived/tree/archived/summary-stage-1.R +++ /dev/null @@ -1,68 +0,0 @@ -rm(list = ls()) -library(spOccupancy) -library(sf) -library(viridis) -library(pals) -library(ggpubr) -library(tidyverse) - -# Read in results file ---------------------------------------------------- -load('results/tree/stage-1-sugar-maple-full-samples.rda') - -# Read in data ------------------------------------------------------------ -load('data/tree/stage-1-data.rda') - -svi.means <- apply(svc.samples[[1]], 2, mean) -psi.meds <- apply(psi.samples, 2, median) -psi.ci.width <- apply(psi.samples, 2, quantile, 0.975) - apply(psi.samples, 2, quantile, 0.025) - -plot.df <- data.frame(psi.med = psi.meds, - psi.ci.width = psi.ci.width, - x = data.list$coords[, 1], - y = data.list$coords[, 2]) - -plot.sf <- st_as_sf(plot.df, - coords = c("x", "y"), - crs = "+proj=aea +lat_1=29.5 +lat_2=45.5 +lat_0=37.5 +lon_0=-96 +x_0=0 +y_0=0 +ellps=GRS80 +datum=NAD83 +units=km +no_defs") -usa <- st_as_sf(maps::map("state", fill = TRUE, plot = FALSE)) -usa <- usa %>% - st_transform(crs = "+proj=aea +lat_1=29.5 +lat_2=45.5 +lat_0=37.5 +lon_0=-96 +x_0=0 +y_0=0 +ellps=GRS80 +datum=NAD83 +units=km +no_defs") - - -# Make a summary plot ----------------------------------------------------- -psi.plot <- ggplot(plot.sf) + - geom_sf(aes(col = psi.med), size = 0.1) + - scale_color_gradientn("", colors = ocean.tempo(1000), limits = c(0, 1), - guide = guide_colourbar(title.position="top", reverse = FALSE), - na.value = NA) + - geom_sf(data = usa, fill = NA, color=alpha("grey", 0.75)) + - theme_bw(base_size = 14) + - theme(text = element_text(family="LM Roman 10", size=10), - axis.title.x=element_blank(), axis.title.y=element_blank(), - axis.text.x = element_text(angle = 45, hjust = 1), - # axis.text.y = element_text(size = 18), - legend.key.size = unit(0.5, 'cm'), - legend.direction="horizontal", - legend.box="vertical", - legend.position=c(0.175,0.12), - legend.background = element_rect(fill = NA)) + - labs(title = 'Sugar Maple Occurrence Probability') -psi.ci.plot <- ggplot(plot.sf) + - geom_sf(aes(col = psi.ci.width), size = 0.1) + - scale_color_gradientn("", colors = ocean.tempo(1000), limits = c(0, 1), - guide = guide_colourbar(title.position="top", reverse = FALSE), - na.value = NA) + - geom_sf(data = usa, fill = NA, color=alpha("grey", 0.75)) + - theme_bw(base_size = 14) + - theme(text = element_text(family="LM Roman 10", size=10), - axis.title.x=element_blank(), axis.title.y=element_blank(), - axis.text.x = element_text(angle = 45, hjust = 1), - # axis.text.y = element_text(size = 18), - legend.key.size = unit(0.5, 'cm'), - legend.direction="horizontal", - legend.box="vertical", - legend.position=c(0.175,0.12), - legend.background = element_rect(fill = NA)) + - labs(title = 'Sugar Maple 95% Credible Interval Width') -plot(ggarrange(psi.plot, psi.ci.plot)) - diff --git a/code/archived/tree/archived/test.R b/code/archived/tree/archived/test.R deleted file mode 100644 index 2fb7e10..0000000 --- a/code/archived/tree/archived/test.R +++ /dev/null @@ -1,33 +0,0 @@ -rm(list = ls()) -library(spBayes) - -data(PM10.dat) -PM10.mod <- PM10.dat[!is.na(PM10.dat$pm10.obs), ] -PM10.pred <- PM10.dat[is.na(PM10.dat$pm10.obs), ] - -d.max <- max(iDist(PM10.mod[, c('x.coord', 'y.coord')])) - -r <- 2 - -priors <- list('phi.Unif' = list(rep(3 / (.75 * d.max), r), rep(3 / (.001 * d.max), r)), - 'sigma.sq.IG' = list(rep(2, r), rep(1, r)), - 'tau.sq.IG' = c(2, 1)) - -starting <- list('phi' = rep(3 / (.1 * d.max), r), 'sigma.sq' = rep(1, r), 'tau.sq' = 1) -tuning <- list(phi = rep(0.1, r), sigma.sq = rep(0.05, r), tau.sq = 0.1) -n.samples <- 40000 -out <- spSVC(pm10.obs ~ pm10.ctm, coords = c('x.coord', 'y.coord'), - data = PM10.mod, starting = starting, svc.cols = c(1, 2), - tuning = tuning, priors = priors, cov.model = 'exponential', - n.samples = n.samples, n.report = 1000, n.omp.threads = 4) - -out.recover <- spRecover(out, start = floor(0.75 * n.samples), thin = 2, - n.omp.threads = 4) - -summary(out.recover$p.beta.recover.samples) -summary(out.recover$p.theta.recover.samples) - -y.rep.means <- apply(out.recover$p.y.samples, 1, mean) - -plot(PM10.mod$pm10.obs, y.rep.means, pch = 19) -abline(0, 1) diff --git a/code/archived/tree/archived/total-data-prep.R b/code/archived/tree/archived/total-data-prep.R deleted file mode 100644 index 5d1911c..0000000 --- a/code/archived/tree/archived/total-data-prep.R +++ /dev/null @@ -1,62 +0,0 @@ -rm(list = ls()) -library(tidyverse) -library(sf) - -# Load data --------------------------------------------------------------- -# Don't move these somewhere else -load("~/../andy/data_actual/actual_forest_plot_spp_data.RData") -# Spatial coordinates -load("~/../andy/data_actual/actual_forest_plot_coords.RData") -# Covariates -load("~/../andy/data_actual/X_vars.rda") - -coords <- actual_forest_plot_coords %>% - select(x = LON, y = LAT) %>% - as.matrix() -# Convert to matrix -# Number of sites -J <- nrow(coords) -# Number of species -N <- n_distinct(actual_forest_plot_spp_data$SPCD) -# Species ids -sp.id <- unique(actual_forest_plot_spp_data$SPCD) -# Get data in species x site matrix. -y <- actual_forest_plot_spp_data %>% - pull(SP_PRESENT) -y <- matrix(y, N, J) -rownames(y) <- sp.id -y.bio <- actual_forest_plot_spp_data %>% - pull(BIO_ACRE) -y.bio <- matrix(y.bio, N, J) -rownames(y.bio) <- sp.id - -usa <- st_as_sf(maps::map("state", fill = TRUE, plot = FALSE)) -usa <- usa %>% - st_transform(crs = "+proj=aea +lat_1=29.5 +lat_2=45.5 +lat_0=37.5 +lon_0=-96 +x_0=0 +y_0=0 +ellps=GRS80 +datum=NAD83 +units=km +no_defs") -coords.sf <- st_as_sf(data.frame(coords), - coords = c('x', 'y'), - crs = st_crs(usa)) - -# Reset all the variables -J <- nrow(coords) - -# Get ecoregion for splitting up data into smaller sets. -ecr <- st_read(dsn = "~/DFISD22/data/ecoregions/", layer = "us_eco_l3") - -# Get the state that each -ecrs.albers <- ecr %>% - st_transform(crs = st_crs(usa)) - -# Join points toBCRs -tmp <- st_join(coords.sf, ecrs.albers, join = st_intersects) -data.list <- list(y = apply(y.bio, 2, sum), - coords = coords, - covs = as.data.frame(X)) -# data.list.2$covs$ecoregionL3 <- as.numeric(tmp$US_L3CODE)[site.indx] -# Fill in missing indices for ecoregion -# miss.indx <- which(is.na(data.list.2$covs$ecoregionL3)) -# # NOTE: hardcoded -# data.list.2$covs$ecoregionL3[c(692, 715, 793, 947)] <- 50 -# data.list.2$covs$ecoregionL3[c(6415, 6426)] <- 51 - -save(data.list, file = 'data/tree/all-trees-data.rda') diff --git a/code/archived/tree/extract-data.R b/code/archived/tree/extract-data.R deleted file mode 100644 index 2b5c805..0000000 --- a/code/archived/tree/extract-data.R +++ /dev/null @@ -1,74 +0,0 @@ -rm(list = ls()) -library(tidyverse) -library(sf) - -# Load data --------------------------------------------------------------- -# Don't move these somewhere else -load("~/../andy/data_actual/actual_forest_plot_spp_data.RData") -# Spatial coordinates -load("~/../andy/data_actual/actual_forest_plot_coords.RData") -# Covariates -load("~/../andy/data_actual/X_vars.rda") - -coords <- actual_forest_plot_coords %>% - select(x = LON, y = LAT) %>% - as.matrix() -# Convert to matrix -# Number of sites -J <- nrow(coords) -# Number of species -N <- n_distinct(actual_forest_plot_spp_data$SPCD) -# Species ids -sp.id <- unique(actual_forest_plot_spp_data$SPCD) -# Get data in species x site matrix. -y <- actual_forest_plot_spp_data %>% - pull(SP_PRESENT) -y <- matrix(y, N, J) -rownames(y) <- sp.id -y.bio <- actual_forest_plot_spp_data %>% - pull(BIO_ACRE) -y.bio <- matrix(y.bio, N, J) -rownames(y.bio) <- sp.id - -# Subset data to Vermont -------------------------------------------------- -usa <- st_as_sf(maps::map("state", fill = TRUE, plot = FALSE)) -vermont <- usa -# vermont <- usa %>% filter(ID == 'maine') -# Restrict to east of the 100th meridian -# usa.bbox <- st_bbox(usa) -# usa.bbox[1] <- -100 -# usa.bbox <- as.vector(usa.bbox) -# sf_use_s2(FALSE) -# east.us <- st_crop(st_make_valid(usa), xmin = usa.bbox[1], ymin = usa.bbox[2], -# xmax = usa.bbox[3], ymax = usa.bbox[4]) -vermont <- vermont %>% - st_transform(st_crs("+proj=aea +lat_1=29.5 +lat_2=45.5 +lat_0=37.5 +lon_0=-96 +x_0=0 +y_0=0 +datum=NAD83 +units=km +no_defs")) -coords.sf <- st_as_sf(data.frame(coords), - coords = c('x', 'y'), - crs = st_crs(vermont)) - -# Indexing vector for the coordinates out vermont -vermont.indx <- st_within(coords.sf, vermont) -vermont.indx <- which(sapply(vermont.indx, function(a) ifelse(length(a) == 0, 0, 1)) == 1) -coords.vermont <- coords[vermont.indx, ] -# Reset all the variables -J <- nrow(coords.vermont) -y.bio.vermont <- y.bio[, vermont.indx] -y.vermont <- y[, vermont.indx] -X.vermont <- X[vermont.indx, ] -# Grab the 10 most common species in Vermont -sp.indx.order <- rev(order(apply(y.bio.vermont, 1, sum, na.rm = TRUE))) -curr.sp.indx <- sp.indx.order[1:10] -curr.indx <- 1:J - -y <- apply(y.bio.vermont, 2, sum) -coords <- coords.vermont - -# Randomly subset to a couple thousand data points -curr.indx <- sample(1:length(y), 2000, replace = FALSE) - -y <- y[curr.indx] -coords <- coords[curr.indx, ] - -save(y, coords, file = "data/tree/vermont-bio-coords.rda") - diff --git a/code/archived/tree/format-data.R b/code/archived/tree/format-data.R deleted file mode 100644 index 386bd40..0000000 --- a/code/archived/tree/format-data.R +++ /dev/null @@ -1,10 +0,0 @@ -rm(list = ls()) - -load("data/tree/vermont-bio-coords.rda") -load('data/tree/tree-canopy-cover.rda') - -data.list <- list(y = y[!is.na(tcc)], - covs = data.frame(tcc = tcc[!is.na(tcc)]), - coords = coords[!is.na(tcc), ]) - -save(data.list, file = 'data/tree/spAbundance-data.rda') diff --git a/code/archived/tree/get-covariate.R b/code/archived/tree/get-covariate.R deleted file mode 100644 index 02aa10f..0000000 --- a/code/archived/tree/get-covariate.R +++ /dev/null @@ -1,95 +0,0 @@ -rm(list = ls()) - -library(tidyverse) -library(magrittr) -library(stars) -library(sf) -library(FedData) - -# Load data --------------------------------------------------------------- -load('data/tree/vermont-bio-coords.rda') - -# Change order to help speed things up -ord <- order(coords[, 1]) -# coords <- coords[ord, ] - -# Filter to NE, area of interest ------------------------------------------ -usa <- st_as_sf(maps::map("state", fill = TRUE, plot = FALSE)) -usa <- usa %>% - st_transform(crs = "+proj=aea +lat_1=29.5 +lat_2=45.5 +lat_0=37.5 +lon_0=-96 +x_0=0 +y_0=0 +ellps=GRS80 +datum=NAD83 +units=km +no_defs") -coords.sf <- st_as_sf(as.data.frame(coords), - coords = c('x', 'y'), - crs = st_crs(usa)) - -coords.sf.albers <- coords.sf - -# Get percent forest cover from NLCD -------------------------------------- -# Create matrix to store all values for grassland and cropland variables -J <- nrow(coords.sf.albers) -# Loop through all the sites -vals <- split(1:J, ceiling(seq_along(1:J)/100)) -# # Function to get proportion forest cover -# props <- function(a, na.rm = TRUE) { -# my.sum <- sum(!is.na(a)) -# prop.for <- sum(a %in% c(41, 42, 43), na.rm = na.rm) / my.sum -# return(prop.for) -# } -my.mean <- function(a) { - mean(a, na.rm = TRUE) -} -tcc <- rep(NA, J) -for (j in 1:length(vals)) { - print(paste0("Currently on ", j, " out of ", length(vals))) - coords.curr <- coords.sf.albers[vals[[j]], ] - nlcd.dat <- get_nlcd(template = coords.curr, label = paste0('canopy-', j), year = 2016, - dataset = 'canopy') - # Calculating forest cover in 1km. - # forest[vals[[j]]] <- raster::extract(nlcd.dat, coords.curr, buffer = 50, fun = props) - tcc[vals[[j]]] <- raster::extract(nlcd.dat, coords.curr, buffer = 50, fun = my.mean) -} - -# Save results ------------------------------------------------------------ -# Extract year from the file name -tcc <- tcc[order(ord)] -tcc <- ifelse(is.na(tcc), 0, tcc) -save(tcc, file = 'data/tree/tree-canopy-cover.rda') - - -# Exploratory variogram analysis -library(MBA) -library(fields) -library(geoR) -par(mfrow=c(1,2)) -out.lm <- lm(sqrt(y) ~ tcc) -curr.res <- out.lm$residuals -vario.1.raw <- variog(coords = coords, data = sqrt(y)) -par(mfrow = c(1, 2)) -plot(vario.1.raw, pch=16) -vario.1.res <- variog(coords = coords[which(!is.na(tcc)), ], data = curr.res) -plot(vario.1.res, pch = 16) -par(mfrow = c(1, 1)) - - - -# An exploratory plot, motivated by May et al. 2022 -library(MASS) -library(ggplot2) -library(viridis) -#> Loading required package: viridisLite -theme_set(theme_bw(base_size = 16)) - -# Get density of points in 2 dimensions. -# @param x A numeric vector. -# @param y A numeric vector. -# @param n Create a square n by n grid to compute density. -# @return The density within each square. -get_density <- function(x, y, ...) { - dens <- MASS::kde2d(x, y, ...) - ix <- findInterval(x, dens$x) - iy <- findInterval(y, dens$y) - ii <- cbind(ix, iy) - return(dens$z[ii]) -} -dat <- data.frame(tcc = tcc, y = y) -dat$density <- get_density(dat$tcc, dat$y, n = 100) -ggplot(dat) + geom_point(aes(tcc, sqrt(y), color = density)) + scale_color_viridis() diff --git a/code/archived/tree/get-pred-indx.R b/code/archived/tree/get-pred-indx.R deleted file mode 100644 index 5c2ee37..0000000 --- a/code/archived/tree/get-pred-indx.R +++ /dev/null @@ -1,11 +0,0 @@ -rm(list = ls()) - -# Load data --------------------------------------------------------------- -load('data/tree/spAbundance-data.rda') - -# Generate a hold-out set and remove 25% of locations --------------------- -set.seed(19191) -J <- nrow(data.list$coords) -pred.indx <- sample(1:J, round(J * .25), replace = FALSE) -save(pred.indx, file = 'data/tree/pred-indx.rda') - diff --git a/code/archived/tree/get-prediction-data.R b/code/archived/tree/get-prediction-data.R deleted file mode 100644 index 5d3ac80..0000000 --- a/code/archived/tree/get-prediction-data.R +++ /dev/null @@ -1,57 +0,0 @@ -# get-prediction-data.R: this script extracts the grid for prediction across -# Vermont. -# Author: Jeffrey W. Doser -rm(list = ls()) -library(tidyverse) -library(sf) -library(sp) -library(FedData) -library(stars) - -# Get prediction coordinates ---------------------------------------------- -usa <- st_as_sf(maps::map("state", fill = TRUE, plot = FALSE)) -usa <- usa %>% - st_transform(crs = "+proj=aea +lat_1=29.5 +lat_2=45.5 +lat_0=37.5 +lon_0=-96 +x_0=0 +y_0=0 +ellps=GRS80 +datum=NAD83 +units=km +no_defs") -vermont <- usa %>% - dplyr::filter(ID == 'michigan') - -# Grid the area for predictions. -# The dx x dy indicates the resolution in terms of km -grid.pred <- st_as_stars(st_bbox(vermont), dx = 1, dy = 1) -# Convert to data frame -coords.pred <- as.data.frame(grid.pred, center = TRUE) -# Convert coordinates to an sf object -coords.pred.sf <- st_as_sf(coords.pred, - coords = c('x', 'y'), - crs = st_crs(vermont)) - -# Intersect with region of interest -coords.pred.sf <- st_intersection(coords.pred.sf, st_make_valid(vermont)) -coords.0 <- as.data.frame(st_coordinates(coords.pred.sf)) - -# Get percent forest data ------------------------------------------------- -# TODO: there's a problem here that's causing an error with the calculation of forest -# cover in certain locations... Need to figure out what the deal is with that. -# Create matrix to store all values for grassland and cropland variables -J <- nrow(coords.pred.sf) -# Loop through all the sites -vals <- split(1:J, ceiling(seq_along(1:J)/100)) -# Function to get proportion forest cover -props <- function(a, na.rm = TRUE) { - my.sum <- sum(!is.na(a)) - prop.for <- sum(a %in% c(41, 42, 43), na.rm = na.rm) / my.sum - return(prop.for) -} -forest.0 <- rep(NA, J) -for (j in 1:length(vals)) { - print(paste0("Currently on ", j, " out of ", length(vals))) - coords.curr <- coords.pred.sf[vals[[j]], ] - nlcd.dat <- tryCatch({get_nlcd(template = coords.curr, label = paste0('biomass-', j), year = 2019)}, error = function(e) NULL) - # Calculating forest cover in 1km. - if (!is.null(nlcd.dat)) { - forest.0[vals[[j]]] <- raster::extract(nlcd.dat, coords.curr, buffer = 1000, fun = props) - } -} - -# Save prediction coordinates -save(coords.0, file = "data/pred-coordinates.rda") diff --git a/code/archived/tree/main-hold-out-vt-SVC.R b/code/archived/tree/main-hold-out-vt-SVC.R deleted file mode 100644 index 05475df..0000000 --- a/code/archived/tree/main-hold-out-vt-SVC.R +++ /dev/null @@ -1,101 +0,0 @@ -# main-uni-gaussian: fits a Gaussian univariate SVC model analogous to spSVC -rm(list = ls()) -library(tidyverse) -library(sf) -library(spAbundance) - -# Load data --------------------------------------------------------------- -load('data/tree/spAbundance-data.rda') - -# Generate a hold-out set and remove 25% of locations --------------------- -load('data/tree/pred-indx.rda') -tcc.pred <- data.list$covs[pred.indx, ] -coords.0 <- data.list$coords[pred.indx, ] -y.0 <- data.list$y[pred.indx] -data.list$y <- data.list$y[-pred.indx] -data.list$coords <- data.list$coords[-pred.indx, ] -data.list$covs <- data.list$covs[-pred.indx, , drop = FALSE] - - -library(MBA) -library(fields) -library(geoR) -par(mfrow=c(1,2)) -out.lm <- lm(sqrt(data.list$y) ~ data.list$covs$tcc) -curr.res <- out.lm$residuals -vario.1.raw <- variog(coords = data.list$coords, data = sqrt(data.list$y)) -par(mfrow = c(1, 2)) -plot(vario.1.raw, pch=16) -vario.1.res <- variog(coords = data.list$coords, data = curr.res) -plot(vario.1.res, pch = 16) -par(mfrow = c(1, 1)) - - -# Prep the model ---------------------------------------------------------- -dist.fia <- dist(data.list$coords) -# Priors -# bound.dists <- quantile(dist.fia, c(0.05, 0.95)) -# 0.05: 236.6716 -# 0.95: 2132.5627 -# min.dist <- bound.dists[1] -# max.dist <- bound.dists[2] -min.dist <- min(dist.fia) -max.dist <- max(dist.fia) -prior.list <- list(beta.normal = list(mean = 0, var = 100), - phi.unif = list(a = 3 / max.dist, b = 3 / min.dist), - sigma.sq = list(a = 2, b = 2), - tau.sq = c(2, 1)) -tuning.list <- list(phi = c(0.3)) - -# Biggest -n.batch <- 4000 -batch.length <- 25 -n.burn <- 50000 -n.thin <- 50 -n.chains <- 1 - -# Big -n.batch <- 500 -batch.length <- 25 -n.burn <- 7500 -n.thin <- 5 -n.chains <- 1 - -# n.batch <- 100 -# batch.length <- 25 -# n.burn <- 500 -# n.thin <- 2 -# n.chains <- 1 - -data.list$y <- sqrt(data.list$y) -data.list$covs$tcc <- data.list$covs$tcc / 100 - -out <- svcAbund(formula = ~ tcc, - data = data.list, priors = prior.list, - tuning = tuning.list, svc.cols = c(1, 2), - n.neighbors = 5, cov.model = 'exponential', NNGP = TRUE, - n.batch = n.batch, batch.length = batch.length, family = 'Gaussian', - n.burn = n.burn, accept.rate = 0.43, n.thin = n.thin, - n.chains = n.chains, n.report = 1, n.omp.threads = 1) - -summary(out) -y.rep.means <- apply(out$y.rep.samples, 2, mean) -plot(data.list$y, y.rep.means, pch = 19) -abline(0, 1) - -# Predict at hold-out locations ------------------------------------------- -J.0 <- nrow(coords.0) -X.0 <- matrix(1, J.0, ncol(out$X)) -X.0[, 2] <- tcc.pred / 100 -# X.0[, 2] <- (tcc.pred / 100 - mean(data.list$covs$tcc)) / sd(data.list$covs$tcc) -colnames(X.0) <- c("(Intercept)", "tcc") -out.pred <- predict(out, X.0, coords.0, n.omp.threads = 1) - -# png("tmp.png") -y.0.means <- apply(out.pred$y.0.samples^2, 2, mean) -plot(y.0, y.0.means, pch = 19) -abline(0, 1) -# dev.off() - -save(out, out.pred, file = 'results/tree/hold-out-vermont-SVC-results.rda') - diff --git a/code/archived/tree/main-hold-out-vt-SVI.R b/code/archived/tree/main-hold-out-vt-SVI.R deleted file mode 100644 index 4d9ad03..0000000 --- a/code/archived/tree/main-hold-out-vt-SVI.R +++ /dev/null @@ -1,81 +0,0 @@ -# main-uni-gaussian: fits a Gaussian univariate SVC model analogous to spSVC -rm(list = ls()) -library(tidyverse) -library(sf) -library(spAbundance) - -# Load data --------------------------------------------------------------- -load('data/tree/spAbundance-data.rda') - -# Generate a hold-out set and remove 25% of locations --------------------- -set.seed(19191) -J <- nrow(data.list$coords) -load('data/tree/pred-indx.rda') -tcc.pred <- data.list$covs[pred.indx, ] -coords.0 <- data.list$coords[pred.indx, ] -y.0 <- data.list$y[pred.indx] -data.list$y <- data.list$y[-pred.indx] -data.list$coords <- data.list$coords[-pred.indx, ] -data.list$covs <- data.list$covs[-pred.indx, , drop = FALSE] - -# Prep the model ---------------------------------------------------------- -# Priors -dist.fia <- dist(data.list$coords) -min.dist <- min(dist.fia) -max.dist <- max(dist.fia) -prior.list <- list(beta.normal = list(mean = 0, var = 100), - phi.unif = c(a = 3 / max.dist, b = 3 / min.dist), - sigma.sq = c(a = 2, b = 2), - tau.sq = c(2, 1)) -# Based on an initial run with 12,500 iterations. -# inits.list <- list(beta = c(6, 0.04), -# sigma.sq = c(.5), -# tau.sq = 0.6, -# phi = c(.5)) -inits.list <- list(sigma.sq = 1) -tuning.list <- list(phi = c(0.5)) - -# Biggest -n.batch <- 4000 -batch.length <- 25 -n.burn <- 50000 -n.thin <- 50 -n.chains <- 1 - -# Big -n.batch <- 500 -batch.length <- 25 -n.burn <- 7500 -n.thin <- 5 -n.chains <- 1 - -data.list$y <- sqrt(data.list$y) -data.list$covs$tcc <- data.list$covs$tcc / 100 - -out <- spAbund(formula = ~ tcc, - data = data.list, priors = prior.list, - inits = inits.list, tuning = tuning.list, - n.neighbors = 10, cov.model = 'exponential', NNGP = TRUE, - n.batch = n.batch, batch.length = batch.length, family = 'Gaussian', - n.burn = n.burn, accept.rate = 0.43, n.thin = n.thin, - n.chains = n.chains, n.report = 1, n.omp.threads = 1) - -summary(out) -y.rep.means <- apply(out$y.rep.samples, 2, mean) -plot(data.list$y, y.rep.means, pch = 19) -abline(0, 1) - -# Predict at hold-out locations ------------------------------------------- -J.0 <- nrow(coords.0) -X.0 <- matrix(1, J.0, ncol(out$X)) -colnames(X.0) <- c("(Intercept)", "tcc") -X.0[, 2] <- tcc.pred / 100 -# X.0[, 2] <- (tcc.pred / 100 - mean(data.list$covs$tcc)) / sd(data.list$covs$tcc) -out.pred <- predict(out, X.0, coords.0, n.omp.threads = 1) - -y.0.means <- apply(out.pred$y.0.samples^2, 2, mean) -plot(y.0, y.0.means, pch = 19) -abline(0, 1) - -save(out, out.pred, file = 'results/tree/hold-out-vermont-SVI-results.rda') - diff --git a/code/archived/tree/main-hold-out-vt-non-spatial.R b/code/archived/tree/main-hold-out-vt-non-spatial.R deleted file mode 100644 index da4660a..0000000 --- a/code/archived/tree/main-hold-out-vt-non-spatial.R +++ /dev/null @@ -1,78 +0,0 @@ -# main-uni-gaussian: fits a Gaussian univariate SVC model analogous to spSVC -rm(list = ls()) -library(tidyverse) -library(sf) -library(spAbundance) - -# Load data --------------------------------------------------------------- -load('data/tree/spAbundance-data.rda') - -# Generate a hold-out set and remove 25% of locations --------------------- -set.seed(19191) -J <- nrow(data.list$coords) -load('data/tree/pred-indx.rda') -tcc.pred <- data.list$covs[pred.indx, ] -coords.0 <- data.list$coords[pred.indx, ] -y.0 <- data.list$y[pred.indx] -data.list$y <- data.list$y[-pred.indx] -data.list$coords <- data.list$coords[-pred.indx, ] -data.list$covs <- data.list$covs[-pred.indx, , drop = FALSE] - -# Prep the model ---------------------------------------------------------- -# Priors -min.dist <- 236.6716 -max.dist <- 2132.5627 -prior.list <- list(beta.normal = list(mean = 0, var = 100), - phi.unif = c(a = 3 / max.dist, b = 3 / min.dist), - sigma.sq = c(a = 2, b = 2), - tau.sq = c(2, 1)) -# Based on an initial run with 12,500 iterations. -# inits.list <- list(beta = c(6, 0.04), -# sigma.sq = c(.5), -# tau.sq = 0.6, -# phi = c(.5)) -inits.list <- list(sigma.sq = 1) -tuning.list <- list(phi = c(0.5)) - -# Biggest -n.batch <- 5000 -batch.length <- 25 -n.burn <- 50000 -n.thin <- 75 -n.chains <- 1 - -# Big -# n.batch <- 500 -# batch.length <- 25 -# n.burn <- 7500 -# n.thin <- 5 -# n.chains <- 1 - -data.list$y <- sqrt(data.list$y) -data.list$covs$tcc <- data.list$covs$tcc / 100 - -out <- abund(formula = ~ tcc, - data = data.list, priors = prior.list, - inits = inits.list, tuning = tuning.list, - n.batch = n.batch, batch.length = batch.length, family = 'Gaussian', - n.burn = n.burn, accept.rate = 0.43, n.thin = n.thin, - n.chains = n.chains, n.report = 1) - -summary(out) -# y.rep.means <- apply(out$y.rep.samples, 2, mean) -# plot(data.list$y, y.rep.means, pch = 19) -# abline(0, 1) - -# Predict at hold-out locations ------------------------------------------- -J.0 <- nrow(coords.0) -X.0 <- matrix(1, J.0, ncol(out$X)) -colnames(X.0) <- c("(Intercept)", "tcc") -X.0[, 2] <- tcc.pred -out.pred <- predict(out, X.0) - -# y.0.means <- apply(out.pred$y.0.samples^2, 2, mean) -# plot(y.0, y.0.means, pch = 19) -# abline(0, 1) - -save(out, out.pred, file = 'results/tree/hold-out-east-us-non-spatial-results.rda') - diff --git a/code/archived/tree/main-state-SVC-stage-2.R b/code/archived/tree/main-state-SVC-stage-2.R deleted file mode 100644 index 0bc0042..0000000 --- a/code/archived/tree/main-state-SVC-stage-2.R +++ /dev/null @@ -1,87 +0,0 @@ -# main-uni-gaussian: fits a Gaussian univariate SVC model analogous to spSVC -rm(list = ls()) -library(tidyverse) -library(sf) -library(spAbundance) - -# Get info from command line ---------------------------------------------- -# This code is to extract the current species name from the command line -# to easily run the script for different species -args <- commandArgs(trailingOnly = TRUE) -# Current species -curr.state <- args[1] -# Alternatively, if not running the script from the command line: -curr.state <- 'new york' -if(length(args) == 0) base::stop('Need to tell spAbundance the current state') -print(curr.state) - -# Load data --------------------------------------------------------------- -load('data/tree/stage-2-data.rda') - -# Filter to one state of interest ----------------------------------------- -usa <- st_as_sf(maps::map("state", fill = TRUE, plot = FALSE)) -usa <- usa %>% - st_transform(crs = "+proj=aea +lat_1=29.5 +lat_2=45.5 +lat_0=37.5 +lon_0=-96 +x_0=0 +y_0=0 +ellps=GRS80 +datum=NAD83 +units=km +no_defs") -state <- usa %>% filter(ID == 'new york') -coords.sf <- st_as_sf(as.data.frame(data.list.2$coords), - coords = c('x', 'y'), - crs = st_crs(usa)) -site.indx <- which(sapply(st_within(coords.sf, st_make_valid(state)), length) == 1) -data.list.2$y <- data.list.2$y[site.indx] -data.list.2$coords <- data.list.2$coords[site.indx, ] -data.list.2$covs <- data.list.2$covs[site.indx, ] -data.list.2$z <- data.list.2$z[site.indx] - -# Remove 25% of locations ------------------------------------------------- -load(paste0("data/tree/pred-indx-", str_replace_all(curr.state, ' ', '-'), '.rda')) -data.list.2$y <- data.list.2$y[-pred.indx] -data.list.2$coords <- data.list.2$coords[-pred.indx, ] -data.list.2$covs <- data.list.2$covs[-pred.indx, ] -data.list.2$z <- data.list.2$z[-pred.indx] - -# Prep the model ---------------------------------------------------------- -# Priors -curr.indx <- which(data.list.2$y != 0) -dist.fia <- dist(data.list.2$coords[curr.indx, ]) -mean.dist <- mean(dist.fia) -min.dist <- 5 -max.dist <- max(dist.fia) -prior.list <- list(beta.normal = list(mean = 0, var = 100), - phi.unif = list(a = 3 / max.dist, b = 3 / min.dist), - sigma.sq.ig = list(a = 2, b = 2), - tau.sq = c(2, 1)) -inits.list <- list(beta = 0, sigma.sq = 5, tau.sq = 0.2, phi = 3 / mean.dist) -tuning.list <- list(phi = c(0.3)) - -# Biggest -n.batch <- 2000 -batch.length <- 25 -n.burn <- 30000 -n.thin <- 20 -n.chains <- 1 - -# Big -# n.batch <- 500 -# batch.length <- 25 -# n.burn <- 7500 -# n.thin <- 5 - -# n.batch <- 50 -# batch.length <- 25 -# n.burn <- 500 -# n.thin <- 1 - -data.list.2$covs$vpd <- data.list.2$covs$vpd / sd(data.list.2$covs$vpd) -data.list.2$covs$ppt <- data.list.2$covs$ppt / sd(data.list.2$covs$ppt) - -out <- svcAbund(formula = ~ vpd + ppt + scale(elev) + I(scale(elev)^2), - data = data.list.2, priors = prior.list, - inits = inits.list, tuning = tuning.list, svc.cols = c(1, 2, 3), - n.neighbors = 5, cov.model = 'exponential', NNGP = TRUE, - n.batch = n.batch, batch.length = batch.length, family = 'Gaussian-hurdle', - n.burn = n.burn, accept.rate = 0.43, n.thin = n.thin, - n.chains = n.chains, n.report = 1, n.omp.threads = 5) - -save(out, file = paste('results/tree/stage-2-sugar-maple-', str_replace(curr.state, ' ', '-'), - '-SVC-samples.rda', sep = '')) - diff --git a/code/archived/tree/main-state-SVI-stage-2.R b/code/archived/tree/main-state-SVI-stage-2.R deleted file mode 100644 index a9d3a3e..0000000 --- a/code/archived/tree/main-state-SVI-stage-2.R +++ /dev/null @@ -1,86 +0,0 @@ -# main-uni-gaussian: fits a Gaussian univariate SVC model analogous to spSVC -rm(list = ls()) -library(tidyverse) -library(sf) -library(spAbundance) - -# Get info from command line ---------------------------------------------- -# This code is to extract the current species name from the command line -# to easily run the script for different species -args <- commandArgs(trailingOnly = TRUE) -# Current species -curr.state <- args[1] -# Alternatively, if not running the script from the command line: -curr.state <- 'new york' -if(length(args) == 0) base::stop('Need to tell spAbundance the current state') -print(curr.state) - -# Load data --------------------------------------------------------------- -load('data/tree/stage-2-data.rda') - -# Filter to one state of interest ----------------------------------------- -usa <- st_as_sf(maps::map("state", fill = TRUE, plot = FALSE)) -usa <- usa %>% - st_transform(crs = "+proj=aea +lat_1=29.5 +lat_2=45.5 +lat_0=37.5 +lon_0=-96 +x_0=0 +y_0=0 +ellps=GRS80 +datum=NAD83 +units=km +no_defs") -state <- usa %>% filter(ID == 'new york') -coords.sf <- st_as_sf(as.data.frame(data.list.2$coords), - coords = c('x', 'y'), - crs = st_crs(usa)) -site.indx <- which(sapply(st_within(coords.sf, st_make_valid(state)), length) == 1) -data.list.2$y <- data.list.2$y[site.indx] -data.list.2$coords <- data.list.2$coords[site.indx, ] -data.list.2$covs <- data.list.2$covs[site.indx, ] -data.list.2$z <- data.list.2$z[site.indx] - -# Remove 25% of locations ------------------------------------------------- -load(paste0("data/tree/pred-indx-", str_replace_all(curr.state, ' ', '-'), '.rda')) -data.list.2$y <- data.list.2$y[-pred.indx] -data.list.2$coords <- data.list.2$coords[-pred.indx, ] -data.list.2$covs <- data.list.2$covs[-pred.indx, ] -data.list.2$z <- data.list.2$z[-pred.indx] - -# Prep the model ---------------------------------------------------------- -# Priors -curr.indx <- which(data.list.2$y != 0) -dist.fia <- dist(data.list.2$coords[curr.indx, ]) -mean.dist <- mean(dist.fia) -min.dist <- 5 -max.dist <- max(dist.fia) -prior.list <- list(beta.normal = list(mean = 0, var = 100), - phi.unif = list(a = 3 / max.dist, b = 3 / min.dist), - sigma.sq = list(a = 2, b = 2), - tau.sq = c(2, 1)) -inits.list <- list(beta = 0, sigma.sq = 5, tau.sq = 0.2, phi = 3 / mean.dist) -tuning.list <- list(phi = c(0.3)) - -# Biggest -n.batch <- 2000 -batch.length <- 25 -n.burn <- 30000 -n.thin <- 20 -n.chains <- 1 - -# Big -# n.batch <- 500 -# batch.length <- 25 -# n.burn <- 7500 -# n.thin <- 5 - -# n.batch <- 50 -# batch.length <- 25 -# n.burn <- 500 -# n.thin <- 1 - -out <- svcAbund(formula = ~ scale(vpd) + I(scale(vpd)^2) + - scale(ppt) + I(scale(ppt)^2) + - scale(elev) + I(scale(elev)^2), - data = data.list.2, priors = prior.list, - inits = inits.list, tuning = tuning.list, svc.cols = c(1), - n.neighbors = 5, cov.model = 'exponential', NNGP = TRUE, - n.batch = n.batch, batch.length = batch.length, family = 'Gaussian-hurdle', - n.burn = n.burn, accept.rate = 0.43, n.thin = n.thin, - n.chains = n.chains, n.report = 1, n.omp.threads = 5) # TODO: - -save(out, file = paste('results/tree/stage-2-sugar-maple-', str_replace(curr.state, ' ', '-'), - '-SVI-samples.rda', sep = '')) - diff --git a/code/archived/tree/main-vt-SVC.R b/code/archived/tree/main-vt-SVC.R deleted file mode 100644 index f44179b..0000000 --- a/code/archived/tree/main-vt-SVC.R +++ /dev/null @@ -1,58 +0,0 @@ -# main-uni-gaussian: fits a Gaussian univariate SVC model analogous to spSVC -rm(list = ls()) -library(tidyverse) -library(sf) -library(spAbundance) - -# Load data --------------------------------------------------------------- -load('data/tree/spAbundance-data.rda') - -# Prep the model ---------------------------------------------------------- -# Priors -dist.fia <- dist(data.list$coords) -mean.dist <- mean(dist.fia) -min.dist <- min(dist.fia) -max.dist <- max(dist.fia) -prior.list <- list(beta.normal = list(mean = 0, var = 100), - phi.unif = list(a = 3 / max.dist, b = 3 / min.dist), - sigma.sq = list(a = 2, b = 2), - tau.sq = c(2, 1)) -# Based on an initial run with 12,500 iterations. -inits.list <- list(beta = c(0.4, 0.04), - sigma.sq = c(.5, .009), - tau.sq = 0.6, - phi = c(.94, .02)) -tuning.list <- list(phi = c(1)) - -# Biggest -n.batch <- 4000 -batch.length <- 25 -n.burn <- 50000 -n.thin <- 25 -n.chains <- 1 - -# Big -# n.batch <- 500 -# batch.length <- 25 -# n.burn <- 7500 -# n.thin <- 5 -# n.chains <- 1 - -# data.list$covs$tcc <- data.list$covs$tcc / 100 -data.list$y <- sqrt(data.list$y) - -out <- svcAbund(formula = ~ tcc, - data = data.list, priors = prior.list, - inits = inits.list, tuning = tuning.list, svc.cols = c(1, 2), - n.neighbors = 5, cov.model = 'exponential', NNGP = TRUE, - n.batch = n.batch, batch.length = batch.length, family = 'Gaussian', - n.burn = n.burn, accept.rate = 0.43, n.thin = n.thin, - n.chains = n.chains, n.report = 1, n.omp.threads = 1) # TODO: - -summary(out) -y.rep.means <- apply(out$y.rep.samples, 2, mean) -plot(data.list$y, y.rep.means, pch = 19) -abline(0, 1) - -save(out, file = 'results/tree/vermont-SVC-results.rda') - diff --git a/code/archived/tree/main-vt-SVI.R b/code/archived/tree/main-vt-SVI.R deleted file mode 100644 index 507b61e..0000000 --- a/code/archived/tree/main-vt-SVI.R +++ /dev/null @@ -1,57 +0,0 @@ -# main-uni-gaussian: fits a Gaussian univariate SVC model analogous to spSVC -rm(list = ls()) -library(tidyverse) -library(sf) -library(spAbundance) - -# Load data --------------------------------------------------------------- -load('data/tree/spAbundance-data.rda') - -# Prep the model ---------------------------------------------------------- -# Priors -dist.fia <- dist(data.list$coords) -mean.dist <- mean(dist.fia) -min.dist <- min(dist.fia) -# min.dist <- 5 -max.dist <- max(dist.fia) -prior.list <- list(beta.normal = list(mean = 0, var = 100), - phi.unif = c(a = 3 / max.dist, b = 3 / min.dist), - sigma.sq = c(a = 2, b = 2), - tau.sq = c(2, 1)) -inits.list <- list(beta = c(6, 1), - sigma.sq = .5, - tau.sq = 0.6, - phi = .5) -tuning.list <- list(phi = c(0.5)) - -# Biggest -n.batch <- 4000 -batch.length <- 25 -n.burn <- 50000 -n.thin <- 25 -n.chains <- 1 - -# Big -# n.batch <- 500 -# batch.length <- 25 -# n.burn <- 7500 -# n.thin <- 5 -# n.chains <- 1 - -data.list$y <- sqrt(data.list$y) - -out <- spAbund(formula = ~ tcc, - data = data.list, priors = prior.list, - inits = inits.list, tuning = tuning.list, - n.neighbors = 10, cov.model = 'exponential', NNGP = TRUE, - n.batch = n.batch, batch.length = batch.length, family = 'Gaussian', - n.burn = n.burn, accept.rate = 0.43, n.thin = n.thin, - n.chains = n.chains, n.report = 1, n.omp.threads = 1) # TODO: - -summary(out) -y.rep.means <- apply(out$y.rep.samples, 2, mean) -plot(data.list$y, y.rep.means, pch = 19) -abline(0, 1) - -save(out, file = 'results/tree/vermont-SVI-results.rda') - diff --git a/code/archived/tree/pred-state-SVC-stage-2.R b/code/archived/tree/pred-state-SVC-stage-2.R deleted file mode 100644 index aa43805..0000000 --- a/code/archived/tree/pred-state-SVC-stage-2.R +++ /dev/null @@ -1,71 +0,0 @@ -rm(list = ls()) -library(tidyverse) -library(sf) -library(spOccupancy) -library(spAbundance) - -# Get info from command line ---------------------------------------------- -# This code is to extract the current species name from the command line -# to easily run the script for different species -args <- commandArgs(trailingOnly = TRUE) -# Current species -curr.state <- args[1] -# Alternatively, if not running the script from the command line: -# TODO: -curr.state <- 'new york' -if(length(args) == 0) base::stop('Need to tell spOccupancy the current state') -print(curr.state) - -# Load data --------------------------------------------------------------- -load("data/tree/stage-2-data.rda") - -# Filter to one state of interest ----------------------------------------- -usa <- st_as_sf(maps::map("state", fill = TRUE, plot = FALSE)) -usa <- usa %>% - st_transform(crs = "+proj=aea +lat_1=29.5 +lat_2=45.5 +lat_0=37.5 +lon_0=-96 +x_0=0 +y_0=0 +ellps=GRS80 +datum=NAD83 +units=km +no_defs") -state <- usa %>% filter(ID == 'new york') -coords.sf <- st_as_sf(as.data.frame(data.list.2$coords), - coords = c('x', 'y'), - crs = st_crs(usa)) -site.indx <- which(sapply(st_within(coords.sf, st_make_valid(state)), length) == 1) -data.list.2$y <- data.list.2$y[site.indx] -data.list.2$coords <- data.list.2$coords[site.indx, ] -data.list.2$covs <- data.list.2$covs[site.indx, ] -data.list.2$z <- data.list.2$z[site.indx] - -# Remove 25% of locations ------------------------------------------------- -load(paste0("data/tree/pred-indx-", str_replace_all(curr.state, ' ', '-'), '.rda')) -covs.0 <- data.list.2$covs[pred.indx, ] -coords.0 <- data.list.2$coords[pred.indx, ] -y.0 <- data.list.2$y[pred.indx] -z.0 <- data.list.2$z[pred.indx] -data.list.2$y <- data.list.2$y[-pred.indx] -data.list.2$coords <- data.list.2$coords[-pred.indx, ] -data.list.2$covs <- data.list.2$covs[-pred.indx, ] -data.list.2$z <- data.list.2$z[-pred.indx] - -# Get prediction data ----------------------------------------------------- -load(paste0("results/tree/stage-2-sugar-maple-", str_replace_all(curr.state, ' ', '-'), - '-SVC-samples.rda')) -X.0 <- matrix(1, length(pred.indx), ncol(out$X)) -colnames(X.0) <- c('intercept', 'vpd', 'ppt', 'elev', 'elev.2') -X.0[, 'vpd'] <- (covs.0[, 'vpd']) / sd(data.list.2$covs[, 'vpd']) -X.0[, 'ppt'] <- (covs.0[, 'ppt']) / sd(data.list.2$covs[, 'ppt']) -X.0[, 'elev'] <- (covs.0[, 'elev'] - mean(data.list.2$covs[, 'elev'])) / sd(data.list.2$covs[, 'elev']) -X.0[, 'elev.2'] <- X.0[, 'elev']^2 - -# Load predicted first stage samples -# TODO: testing -# load(paste0('results/tree/pred-stage-1-sugar-maple-', -# str_replace_all(curr.state, ' ', '-'), -# '-samples.rda')) -z.0.samples <- matrix(z.0, nrow(out$beta.samples), length(y.0), byrow = TRUE) -out.pred <- predict(out, X.0, coords.0, z.0.samples = z.0.samples) -save(out.pred, file = paste0('results/tree/pred-stage-2-sugar-maple-', - str_replace_all(curr.state, ' ', '-'), - '-SVC-samples.rda')) - -# TODO: temporary -# y.0.means <- apply(out.pred$y.0.samples, 2, mean) -# plot(y.0, y.0.means, pch = 19) -# abline(0, 1) diff --git a/code/archived/tree/pred-state-SVI-stage-2.R b/code/archived/tree/pred-state-SVI-stage-2.R deleted file mode 100644 index 10a03e2..0000000 --- a/code/archived/tree/pred-state-SVI-stage-2.R +++ /dev/null @@ -1,69 +0,0 @@ -rm(list = ls()) -library(tidyverse) -library(sf) -library(spOccupancy) -library(spAbundance) - -# Get info from command line ---------------------------------------------- -# This code is to extract the current species name from the command line -# to easily run the script for different species -args <- commandArgs(trailingOnly = TRUE) -# Current species -curr.state <- args[1] -# Alternatively, if not running the script from the command line: -# TODO: -curr.state <- 'new york' -if(length(args) == 0) base::stop('Need to tell spOccupancy the current state') -print(curr.state) - -# Load data --------------------------------------------------------------- -load("data/tree/stage-2-data.rda") - -# Filter to one state of interest ----------------------------------------- -usa <- st_as_sf(maps::map("state", fill = TRUE, plot = FALSE)) -usa <- usa %>% - st_transform(crs = "+proj=aea +lat_1=29.5 +lat_2=45.5 +lat_0=37.5 +lon_0=-96 +x_0=0 +y_0=0 +ellps=GRS80 +datum=NAD83 +units=km +no_defs") -state <- usa %>% filter(ID == 'new york') -coords.sf <- st_as_sf(as.data.frame(data.list.2$coords), - coords = c('x', 'y'), - crs = st_crs(usa)) -site.indx <- which(sapply(st_within(coords.sf, st_make_valid(state)), length) == 1) -data.list.2$y <- data.list.2$y[site.indx] -data.list.2$coords <- data.list.2$coords[site.indx, ] -data.list.2$covs <- data.list.2$covs[site.indx, ] -data.list.2$z <- data.list.2$z[site.indx] - -# Remove 25% of locations ------------------------------------------------- -load(paste0("data/tree/pred-indx-", str_replace_all(curr.state, ' ', '-'), '.rda')) -covs.0 <- data.list.2$covs[pred.indx, ] -coords.0 <- data.list.2$coords[pred.indx, ] -y.0 <- data.list.2$y[pred.indx] -z.0 <- data.list.2$z[pred.indx] -data.list.2$y <- data.list.2$y[-pred.indx] -data.list.2$coords <- data.list.2$coords[-pred.indx, ] -data.list.2$covs <- data.list.2$covs[-pred.indx, ] -data.list.2$z <- data.list.2$z[-pred.indx] - -# Get prediction data ----------------------------------------------------- -load(paste0("results/tree/stage-2-sugar-maple-", str_replace_all(curr.state, ' ', '-'), - '-SVI-samples.rda')) -X.0 <- matrix(1, length(pred.indx), ncol(out$X)) -colnames(X.0) <- c('intercept', 'vpd', 'vpd.2', 'ppt', 'ppt.2', 'elev', 'elev.2') -X.0[, 'vpd'] <- (covs.0[, 'vpd'] - mean(data.list.2$covs[, 'vpd'])) / sd(data.list.2$covs[, 'vpd']) -X.0[, 'ppt'] <- (covs.0[, 'ppt'] - mean(data.list.2$covs[, 'ppt'])) / sd(data.list.2$covs[, 'ppt']) -X.0[, 'elev'] <- (covs.0[, 'elev'] - mean(data.list.2$covs[, 'elev'])) / sd(data.list.2$covs[, 'elev']) -X.0[, 'vpd.2'] <- X.0[, 'vpd']^2 -X.0[, 'ppt.2'] <- X.0[, 'ppt']^2 -X.0[, 'elev.2'] <- X.0[, 'elev']^2 - -# Load predicted first stage samples -# TODO: testing -# load(paste0('results/tree/pred-stage-1-sugar-maple-', -# str_replace_all(curr.state, ' ', '-'), -# '-samples.rda')) -z.0.samples <- matrix(z.0, nrow(out$beta.samples), length(y.0), byrow = TRUE) -out.pred <- predict(out, X.0, coords.0, z.0.samples = z.0.samples) -save(out.pred, file = paste0('results/tree/pred-stage-2-sugar-maple-', - str_replace_all(curr.state, ' ', '-'), - '-SVI-samples.rda')) - diff --git a/code/archived/tree/scratch.R b/code/archived/tree/scratch.R deleted file mode 100644 index 23a81e2..0000000 --- a/code/archived/tree/scratch.R +++ /dev/null @@ -1,10 +0,0 @@ -library(MBA) -library(fields) -library(geoR) -par(mfrow=c(1,2)) - -load("data/tree/vermont-bio-coords.rda") -vario.1.raw <- variog(coords = coords, data = sqrt(y)) -# par(mfrow = c(1, 2)) -plot(vario.1.raw, pch=16) - diff --git a/code/archived/tree/summary-hold-out.R b/code/archived/tree/summary-hold-out.R deleted file mode 100644 index 0bdb274..0000000 --- a/code/archived/tree/summary-hold-out.R +++ /dev/null @@ -1,61 +0,0 @@ -rm(list = ls()) -library(tidyverse) -library(sf) -library(spOccupancy) -library(spAbundance) - -# Get info from command line ---------------------------------------------- -# This code is to extract the current species name from the command line -# to easily run the script for different species -args <- commandArgs(trailingOnly = TRUE) -# Current species -curr.state <- args[1] -# Alternatively, if not running the script from the command line: -# TODO: -curr.state <- 'new york' -if(length(args) == 0) base::stop('Need to tell spOccupancy the current state') -print(curr.state) - -# Load data --------------------------------------------------------------- -load("data/tree/stage-2-data.rda") - -# Filter to one state of interest ----------------------------------------- -usa <- st_as_sf(maps::map("state", fill = TRUE, plot = FALSE)) -usa <- usa %>% - st_transform(crs = "+proj=aea +lat_1=29.5 +lat_2=45.5 +lat_0=37.5 +lon_0=-96 +x_0=0 +y_0=0 +ellps=GRS80 +datum=NAD83 +units=km +no_defs") -state <- usa %>% filter(ID == 'new york') -coords.sf <- st_as_sf(as.data.frame(data.list.2$coords), - coords = c('x', 'y'), - crs = st_crs(usa)) -site.indx <- which(sapply(st_within(coords.sf, st_make_valid(state)), length) == 1) -data.list.2$y <- data.list.2$y[site.indx] -data.list.2$coords <- data.list.2$coords[site.indx, ] -data.list.2$covs <- data.list.2$covs[site.indx, ] -data.list.2$z <- data.list.2$z[site.indx] - -# Remove 25% of locations ------------------------------------------------- -load(paste0("data/tree/pred-indx-", str_replace_all(curr.state, ' ', '-'), '.rda')) -covs.0 <- data.list.2$covs[pred.indx, ] -coords.0 <- data.list.2$coords[pred.indx, ] -y.0 <- data.list.2$y[pred.indx] -data.list.2$y <- data.list.2$y[-pred.indx] -data.list.2$coords <- data.list.2$coords[-pred.indx, ] -data.list.2$covs <- data.list.2$covs[-pred.indx, ] -data.list.2$z <- data.list.2$z[-pred.indx] - -# Read in prediction results for SVC and SVI model ------------------------ -load(paste0('results/tree/pred-stage-2-sugar-maple-', - str_replace_all(curr.state, ' ', '-'), - '-SVI-samples.rda')) -out.pred.SVI <- out.pred -load(paste0('results/tree/pred-stage-2-sugar-maple-', - str_replace_all(curr.state, ' ', '-'), - '-SVC-samples.rda')) -out.pred.SVC <- out.pred - -y.rep.means.SVI <- apply(out.pred.SVI$y.0.samples, 2, mean) -y.rep.means.SVC <- apply(out.pred.SVC$y.0.samples, 2, mean) - -plot(y.0, y.rep.means.SVI, pch = 19) -plot(y.0, y.rep.means.SVC, pch = 19) - diff --git a/code/archived/tree/summary-scratch.R b/code/archived/tree/summary-scratch.R deleted file mode 100644 index b390701..0000000 --- a/code/archived/tree/summary-scratch.R +++ /dev/null @@ -1,70 +0,0 @@ -rm(list = ls()) -library(tidyverse) -library(sf) -library(spAbundance) - -# Get info from command line ---------------------------------------------- -# This code is to extract the current species name from the command line -# to easily run the script for different species -args <- commandArgs(trailingOnly = TRUE) -# Current species -curr.state <- args[1] -# Alternatively, if not running the script from the command line: -curr.state <- 'new york' -if(length(args) == 0) base::stop('Need to tell spAbundance the current state') -print(curr.state) - -# Load data --------------------------------------------------------------- -load('data/tree/all-trees-data.rda') - -# Filter to one state of interest ----------------------------------------- -usa <- st_as_sf(maps::map("state", fill = TRUE, plot = FALSE)) -usa <- usa %>% - st_transform(crs = "+proj=aea +lat_1=29.5 +lat_2=45.5 +lat_0=37.5 +lon_0=-96 +x_0=0 +y_0=0 +ellps=GRS80 +datum=NAD83 +units=km +no_defs") -state <- usa %>% filter(ID == 'new york') -coords.sf <- st_as_sf(as.data.frame(data.list$coords), - coords = c('x', 'y'), - crs = st_crs(usa)) -site.indx <- which(sapply(st_within(coords.sf, st_make_valid(state)), length) == 1) -data.list$y <- data.list$y[site.indx] -data.list$coords <- data.list$coords[site.indx, ] -data.list$covs <- data.list$covs[site.indx, ] - -# Remove 25% of locations ------------------------------------------------- -load(paste0("data/tree/pred-indx-", str_replace_all(curr.state, ' ', '-'), '.rda')) -data.list$y <- data.list$y[-pred.indx] -data.list$coords <- data.list$coords[-pred.indx, ] -data.list$covs <- data.list$covs[-pred.indx, ] -data.list$z <- data.list$z[-pred.indx] - -# TODO: transform the y's -data.list$y <- log(data.list$y) - -load("results/tree/stage-2-biomasss-new-york-SVI-samples.rda") - - -y.rep.means <- apply(out$y.rep.samples, 2, mean) - - -curr.indx <- which(data.list$y != 0) -dist.fia <- dist(data.list$coords[curr.indx, ]) -mean.dist <- mean(dist.fia) -min.dist <- 5 -max.dist <- max(dist.fia) -prior.list <- list(beta.normal = list(mean = 0, var = 100), - phi.unif = list(a = 3 / max.dist, b = 3 / min.dist), - sigma.sq = list(a = 2, b = 2), - tau.sq = c(2, 1)) -inits.list <- list(beta = 0, sigma.sq = 5, tau.sq = 0.2, phi = 3 / mean.dist) -tuning.list <- list(phi = c(0.3)) - -# Compare with spNNGP -# library(spNNGP) -# starting <- list(phi = 3 / mean.dist, sigma.sq = 5, tau.sq = 0.2) -# tuning <- list(phi = 0.3, sigma.sq = 0.5) -# priors <- list(phi.Unif = c(3 / max.dist, 3 / min.dist), sigma.sq.IG = c(2, 2), -# tau.sq.IG = c(2, 1)) -# out.spNNGP <- spNNGP(data.list$y ~ 1, coords = data.list$coords, starting = starting, -# method = 'latent', n.neighbors = 5, tuning = tuning, priors = priors, -# cov.model = 'exponential', n.samples = 12500) -# y.quants <- fitted(out.spNNGP)$y.rep.quants diff --git a/code/archived/tree/summary-stage-2.R b/code/archived/tree/summary-stage-2.R deleted file mode 100644 index 40fad87..0000000 --- a/code/archived/tree/summary-stage-2.R +++ /dev/null @@ -1,65 +0,0 @@ -rm(list = ls()) -library(spAbundance) -library(tidyverse) -library(sf) - -# Get info from command line ---------------------------------------------- -# This code is to extract the current species name from the command line -# to easily run the script for different species -args <- commandArgs(trailingOnly = TRUE) -# Current species -curr.state <- args[1] -# Alternatively, if not running the script from the command line: -curr.state <- 'new york' -if(length(args) == 0) base::stop('Need to tell spAbundance the current state') -print(curr.state) - -# Load data --------------------------------------------------------------- -load('data/tree/stage-2-data.rda') - -# Filter to one state of interest ----------------------------------------- -usa <- st_as_sf(maps::map("state", fill = TRUE, plot = FALSE)) -usa <- usa %>% - st_transform(crs = "+proj=aea +lat_1=29.5 +lat_2=45.5 +lat_0=37.5 +lon_0=-96 +x_0=0 +y_0=0 +ellps=GRS80 +datum=NAD83 +units=km +no_defs") -state <- usa %>% filter(ID == 'new york') -coords.sf <- st_as_sf(as.data.frame(data.list.2$coords), - coords = c('x', 'y'), - crs = st_crs(usa)) -site.indx <- which(sapply(st_within(coords.sf, st_make_valid(state)), length) == 1) -data.list.2$y <- data.list.2$y[site.indx] -data.list.2$coords <- data.list.2$coords[site.indx, ] -data.list.2$covs <- data.list.2$covs[site.indx, ] -data.list.2$z <- data.list.2$z[site.indx] - -# Remove 25% of locations ------------------------------------------------- -load(paste0("data/tree/pred-indx-", str_replace_all(curr.state, ' ', '-'), '.rda')) -data.list.2$y <- data.list.2$y[-pred.indx] -data.list.2$coords <- data.list.2$coords[-pred.indx, ] -data.list.2$covs <- data.list.2$covs[-pred.indx, ] -data.list.2$z <- data.list.2$z[-pred.indx] - -# Load results ------------------------------------------------------------ -# SVC model -load(paste0("results/tree/stage-2-sugar-maple-", - str_replace_all(curr.state, ' ', '-'), "-SVC-samples.rda")) -out.svc <- out -summary(out.svc) -# SVI model -load(paste0("results/tree/stage-2-sugar-maple-", - str_replace_all(curr.state, ' ', '-'), "-SVI-samples.rda")) -out.svi <- out -summary(out.svi) - -y.rep.means.svc <- apply(out.svc$y.rep.samples, 2, mean) -y.rep.means.svi <- apply(out.svi$y.rep.samples, 2, mean) -y.true <- data.list.2$y - -par(mfrow = c(1, 2)) -plot(y.true, y.rep.means.svc, pch = 19, main = 'SVC') -abline(0, 1) -plot(y.true, y.rep.means.svi, pch = 19, main = 'SVI') -abline(0, 1) -par(mfrow = c(1, 1)) - - - diff --git a/code/archived/tree/summary.R b/code/archived/tree/summary.R deleted file mode 100644 index b589024..0000000 --- a/code/archived/tree/summary.R +++ /dev/null @@ -1,91 +0,0 @@ -rm(list = ls()) -library(spAbundance) -library(tidyverse) -library(sf) -library(scoringRules) - -# Load data --------------------------------------------------------------- -load('data/tree/spAbundance-data.rda') - -# Filter to one state of interest ----------------------------------------- -usa <- st_as_sf(maps::map("state", fill = TRUE, plot = FALSE)) -# Restrict to east of the 100th meridian -usa.bbox <- st_bbox(usa) -usa.bbox[1] <- -100 -usa.bbox <- as.vector(usa.bbox) -sf_use_s2(FALSE) -east.us <- st_crop(st_make_valid(usa), xmin = usa.bbox[1], ymin = usa.bbox[2], - xmax = usa.bbox[3], ymax = usa.bbox[4]) -east.us <- east.us %>% - st_transform(st_crs("+proj=aea +lat_1=29.5 +lat_2=45.5 +lat_0=37.5 +lon_0=-96 +x_0=0 +y_0=0 +datum=NAD83 +units=km +no_defs")) -coords.sf <- st_as_sf(data.frame(data.list$coords), - coords = c('x', 'y'), - crs = st_crs(east.us)) - -# Load results ------------------------------------------------------------ -# SVC model -# load('results/tree/ne-SVC-results.rda') -# out.svc <- out -# summary(out.svc) -# # SVI model -# load('results/tree/ne-SVI-results.rda') -# out.svi <- out -# summary(out.svi) -# -# y.rep.means.svc <- apply(out.svc$y.rep.samples^2, 2, mean) -# y.rep.means.svi <- apply(out.svi$y.rep.samples^2, 2, mean) -# y.true <- data.list$y -# -# par(mfrow = c(1, 2)) -# plot(y.true, y.rep.means.svc, pch = 19, main = 'SVC') -# abline(0, 1) -# plot(y.true, y.rep.means.svi, pch = 19, main = 'SVI') -# abline(0, 1) -# par(mfrow = c(1, 1)) - -# Compare prediction results ---------------------------------------------- -load('results/tree/hold-out-vermont-SVC-results.rda') -out.pred.svc <- out.pred -load('results/tree/hold-out-vermont-SVI-results.rda') -out.pred.svi <- out.pred -load('data/tree/pred-indx.rda') -y.0 <- data.list$y[pred.indx] - -# Calculate CRPS ---------------------- -crps.svc <- crps_sample(sqrt(y.0), t(out.pred.svc$y.0.samples)) -crps.svi <- crps_sample(sqrt(y.0), t(out.pred.svi$y.0.samples)) - -mean(crps.svc) -mean(crps.svi) - -# Covergage --------------------------- -y.rep.svc.quants <- apply(out.pred.svc$y.0.samples, 2, quantile, c(0.025, 0.975)) -y.rep.svi.quants <- apply(out.pred.svi$y.0.samples, 2, quantile, c(0.025, 0.975)) -svc.coverage <- mean(sqrt(y.0) >= y.rep.svc.quants[1, ] & sqrt(y.0) <= y.rep.svc.quants[2, ]) -svi.coverage <- mean(sqrt(y.0) >= y.rep.svi.quants[1, ] & sqrt(y.0) <= y.rep.svi.quants[2, ]) -svc.coverage -svi.coverage - -# Calculate RMSPE --------------------- -y.0.med.svc <- apply(out.pred.svc$y.0.samples, 2, median) -y.0.med.svi <- apply(out.pred.svi$y.0.samples, 2, median) -rmspe.svc <- sqrt(mean((y.0.med.svc - sqrt(y.0))^2)) -rmspe.svi <- sqrt(mean((y.0.med.svi - sqrt(y.0))^2)) -rmspe.svc -rmspe.svi - -# Plot the estimates ------------------ -par(mfrow = c(1, 2)) -plot(sqrt(y.0), y.0.med.svc, pch = 19) -abline(0, 1) -plot(sqrt(y.0), y.0.med.svi, pch = 19) -abline(0, 1) -par(mfrow = c(1, 1)) - -# Credible interval widhts ------------ -mu.0.quants.svc <- apply(out.pred.svc$mu.0.samples, 2, quantile, c(0.025, 0.5, 0.975)) -mu.0.quants.svi <- apply(out.pred.svi$mu.0.samples, 2, quantile, c(0.025, 0.5, 0.975)) -mu.0.svc.width <- mu.0.quants.svc[3, ] - mu.0.quants.svc[1, ] -mu.0.svi.width <- mu.0.quants.svi[3, ] - mu.0.quants.svi[1, ] -plot(mu.0.svc.width, mu.0.svi.width, pch = 19) -abline(0, 1)