-
Notifications
You must be signed in to change notification settings - Fork 1
/
Data-simulation_IU.R
392 lines (324 loc) · 14.6 KB
/
Data-simulation_IU.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
## Author: A.D. Wright
## Project: NCRN Amphibians - Monitoring Optimization
## Code: Data simulation and analysis for the Indicator Unit case study
#rm(list = ls())
#options(max.print = 1000)
## TABLE OF CONTENTS
## Packages, working directory, and data 15
## Global parameters for simulation 29
## General f()'s for simulation 103
## Run and analyze simulations 279
#########
## Part - Packages, working directory, and data
#########
##
#### Install Packages
##
#tidyverse
if(!require(tidyverse)) {install.packages('tidyverse');require(tidyverse)}
#jagsUI
if(!require(jagsUI)) {install.packages('jagsUI');require(jagsUI)}
#########
## Part - Global parameters for simulation
#########
set.seed(25)
##
#### Sampling dimensions
##
#Years
Y <- 10
#Sampling Occassions per Year
K <- 6
Kmed <- 4
Klow <- 2
#Units
R <- 10
#Sites per unit
JMax <- 100
JMin <- 10
Jr <- as.integer(runif(n = R, min = JMin, max = JMax))
#Species Total (will vary by park)
I <- 25
M <- 25
#Datasets per scenario
#5 sampling scenarios:
scenarios <- c('stratified','indicator','rotating','split','weighted')
#5 sampling efforts: #(10%, 20%, 30%, 40%, 50%)
effort <- c(0.1, 0.2, 0.3, 0.4, 0.5)
##
#### Global, regional, and species parameters
##
#Global
#Unit Occupancy
mean.c0 <- 0.4
sd.c0 <- 0.25
#Site Occupancy
#Intercept
mean.a0.global <- 0.3
sd.a0.global <- 0.5
sd.a0 <- 0.5
#Site effect
mu.a1.global <- 0.8
sd.a1.global <- 0.5
sd.a1 <- 0.5
#Year effect
mu.a2.global <- -0.4
sd.a2.global <- 0.5
sd.a2 <- 0.5
#Autologistic effect
mu.a3.global <- 0.4
sd.a3.global <- 0.5
sd.a3 <- 0.5
#Detection
mean.b0.global <- 0.3
sd.b0.global <- 0.5
sd.b0 <- 0.5
##
#### Generate covaraite data
##
Site_effect_a1 <- array(rnorm(JMax*R, 0, 1), dim = c(100,10))
Year <- 1:10
Year_effect_a2 <- (Year - mean(Year))/sd(Year)
#########
## Part - General f()'s for simulation
#########
##
#### Simulation f() - A blend of Sutherland et al. 2016 S2 & simDCM
##
#Basic Function
sim_community <- function(R. = R, # of spatial units
I. = I, # of species
M. = M, # of augmented species
Y. = Y, # of years
K. = K, # of sampling occassions per year
JMax = max(Jr), # sites max per unit
Jr. = Jr, # of sites per spatial unit
mean.c0. = mean.c0, sd.c0. = sd.c0, #unit occupancy
mean.a0.global. = mean.a0.global, sd.a0.global. = sd.a0.global, sd.a0. = sd.a0, #mu.a0. = mu.a0, a0. = a0, #site occupancy
mu.a1.global. = mu.a1.global, sd.a1.global. = sd.a1.global, sd.a1. = sd.a1, #mu.a1. = mu.a1, a1. = a1, #site effect on occupancy
mu.a2.global. = mu.a2.global, sd.a2.global. = sd.a2.global, sd.a2. = sd.a2, #mu.a2. = mu.a2, a2. = a2, #year effect on occupancy
mu.a3.global. = mu.a3.global, sd.a3.global. = sd.a3.global, sd.a3. = sd.a3, #mu.a3. = mu.a3, a3. = a3, #auto effect on occupancy
mean.b0.global. = mean.b0.global, sd.b0.global. = sd.b0.global, sd.bo. = sd.b0, #mu.b0. = mu.b0, b0. = b0,#detection
Site_effect_a1. = Site_effect_a1, Year_effect_a2. = Year_effect_a2 #covariates
){
#Create empty objects for loops
#Region (and species - in 2nd loop)
mu.a0 <- mu.a1 <- mu.a2 <- mu.a3 <- mu.b0 <- omega <- N_unit <- array(NA, dim = R)
a0 <- a1 <- a2 <- a3 <- b0 <- array(NA, dim = c(I+M,R))
W_mat <- array(NA, c(I+M,R))
Z_mat <- logit_psi <- array(NA, c(JMax, Y, I+M, R))
X_mat <- array(NA, c(JMax, K, Y, I+M, R))
#Generate park and species parameters
for(r in 1:R){
omega[r] <- plogis(rnorm(1,qlogis(mean.c0),sd.c0))
#Occupancy
mu.a0[r] <- rnorm(1, mean = qlogis(mean.a0.global), sd = sd.a0.global)
mu.a1[r] <- rnorm(1, mean = mu.a1.global, sd = sd.a1.global)
mu.a2[r] <- rnorm(1, mean = mu.a2.global, sd = sd.a2.global)
mu.a3[r] <- rnorm(1, mean = mu.a3.global, sd = sd.a3.global)
#Detection
mu.b0[r] <- rnorm(1, mean = qlogis(mean.b0.global), sd = sd.b0.global)
for(i in 1:(I+M)){
W_mat[i,r] <- rbinom(1, 1, omega[r])
#Species
#Occupancy
a0[i,r] <- rnorm(1, mean = mu.a0[r], sd = sd.a0)
a1[i,r] <- rnorm(1, mean = mu.a1[r], sd = sd.a1)
a2[i,r] <- rnorm(1, mean = mu.a2[r], sd = sd.a2)
a3[i,r] <- rnorm(1, mean = mu.a3[r], sd = sd.a3)
#Detection
b0[i,r] <- rnorm(1, mean = mu.b0[r], sd = sd.b0)
}
N_unit[r] <- sum(W_mat[,r])
}
#Generate data
for(r in 1:R){
for(i in 1:(I+M)){
for(j in 1:Jr[r]) {
logit_psi[j,1,i,r] <- a0[i,r] + a1[i,r]*Site_effect_a1[j,r] + a2[i,r]*Year_effect_a2[1]
Z_mat[j,1,i,r] <- rbinom(1, 1, plogis(logit_psi[j,1,i,r])*W_mat[i,r])
for(y in 2:Y){
logit_psi[j,y,i,r] <- a0[i,r] + a1[i,r]*Site_effect_a1[j,r] + a2[i,r]*Year_effect_a2[y] + a3[i,r]*Z_mat[j,y-1,i,r]
Z_mat[j,y,i,r] <- rbinom(1, 1, plogis(logit_psi[j,y,i,r])*W_mat[i,r])
} #y
for(y in 1:Y){
for(k in 1:K){
X_mat[j,k,y,i,r] <- rbinom(1, 1, Z_mat[j,y,i,r]*plogis(b0[i,r]))
} #k
} #y
} #j
} #i
} #r
return(list(X_mat = X_mat, logit_psi = logit_psi, Z_mat = Z_mat, #the simulated data
R = R, I = I, M = M, Y = Y, K = K, JMax = JMax, Jr = Jr, #the dimensions used to simulate the data
Site_effect_a1 = Site_effect_a1, Year_effect_a2 = Year_effect_a2, #the covariates used to simulate the data
mean.c0 = mean.c0, sd.c0 = sd.c0, omega = omega, W_mat = W_mat, N_unit = N_unit, #the parameters used to simulate data - unit occupancy
mean.a0.global = mean.a0.global, sd.a0.global = sd.a0.global, sd.a0 = sd.a0, mu.a0 = mu.a0, a0 = a0, #the parameters used to simulate data - site occupancy (intercept)
mu.a1.global = mu.a1.global, sd.a1.global = sd.a1.global, sd.a1 = sd.a1, mu.a1 = mu.a1, a1 = a1, #the parameters used to simulate data - site occupancy (slope - site)
mu.a2.global = mu.a2.global, sd.a2.global = sd.a2.global, sd.a2 = sd.a2, mu.a2 = mu.a2, a2 = a2, #the parameters used to simulate data - site occupancy (slope - year)
mu.a3.global = mu.a3.global, sd.a3.global = sd.a3.global, sd.a3 = sd.a3, mu.a3 = mu.a3, a3 = a3, #the parameters used to simulate data - site occupancy (slope - auto)
mean.b0.global = mean.b0.global, sd.b0.global = sd.b0.global, sd.b0 = sd.b0, mu.b0 = mu.b0, b0= b0 #the parameters used to simulate data - detection (intercept)
))
} #f() - sim_community
##
#### Base function to create tables of results for plotting purposes
##
#This should work for both vary vs non-vary scenarios
org_results <- function(jagsOut, td){
nPark <- td$R
nSpp <- dim(td$W_mat)[1]*dim(td$W_mat)[2]
#Global
simTab_g <- data.frame(mean.c0 = NA,
sd.c0 = NA,
mu.a0.global = NA,
sd.a0.global = NA,
sd.a0 = NA,
mu.b0.global = NA,
sd.b0.global = NA,
sd.b0 = NA,
mu.a1.global = NA,
sd.a1.global = NA,
sd.a1 = NA,
mu.a2.global = NA,
sd.a2.global = NA,
sd.a2 = NA,
mu.a3.global = NA,
sd.a3.global = NA,
sd.a3 = NA
)
simTab_g$mean.c0 <- jagsOut$mean$mean.c0 - td$mean.c0
simTab_g$sd.c0 <- jagsOut$mean$sd.c0 - td$sd.c0
simTab_g$mu.a0.global <- jagsOut$mean$mu.a0.global - mean(td$mu.a0)
simTab_g$sd.a0.global <- jagsOut$mean$sd.a0.global - td$sd.a0.global
simTab_g$sd.a0 <- jagsOut$mean$sd.a0 - td$sd.a0
simTab_g$mu.a1.global <- jagsOut$mean$mu.a1.global - mean(td$mu.a1)
simTab_g$sd.a1.global <- jagsOut$mean$sd.a1.global - td$sd.a1.global
simTab_g$sd.a1 <- jagsOut$mean$sd.a1 - td$sd.a1
simTab_g$mu.a2.global <- jagsOut$mean$mu.a2.global - mean(td$mu.a2)
simTab_g$sd.a2.global <- jagsOut$mean$sd.a2.global - td$sd.a2.global
simTab_g$sd.a2 <- jagsOut$mean$sd.a2 - td$sd.a2
simTab_g$mu.a3.global <- jagsOut$mean$mu.a3.global - mean(td$mu.a3)
simTab_g$sd.a3.global <- jagsOut$mean$sd.a3.global - td$sd.a3.global
simTab_g$sd.a3 <- jagsOut$mean$sd.a3 - td$sd.a3
simTab_g$mu.b0.global <- jagsOut$mean$mu.b0.global - mean(td$mu.b0)
simTab_g$sd.b0.global <- jagsOut$mean$sd.b0.global - td$sd.b0.global
simTab_g$sd.b0 <- jagsOut$mean$sd.b0 - td$sd.b0
#Park
simTab_p <- data.frame(mu.a0 = rep(NA,nPark),
mu.b0 = rep(NA,nPark),
mu.a1 = rep(NA,nPark),
mu.a2 = rep(NA,nPark),
mu.a3 = rep(NA,nPark)
)
simTab_p$mu.a0[1:nPark] <- (jagsOut$mean$mu.a0 - apply(td$a0,2,mean))
simTab_p$mu.a1[1:nPark] <- (jagsOut$mean$mu.a1 - apply(td$a1,2,mean))
simTab_p$mu.a2[1:nPark] <- (jagsOut$mean$mu.a2 - apply(td$a2,2,mean))
simTab_p$mu.a3[1:nPark] <- (jagsOut$mean$mu.a3 - apply(td$a3,2,mean))
simTab_p$mu.b0[1:nPark] <- (jagsOut$mean$mu.b0 - apply(td$b0,2,mean))
#Species
simTab_s <- data.frame(a0 = rep(NA,nSpp),
b0 = rep(NA,nSpp),
a1 = rep(NA,nSpp),
a2 = rep(NA,nSpp),
a3 = rep(NA,nSpp)
)
simTab_s$a0[1:nSpp] <- as.vector(jagsOut$mean$a0*na_if(td$W_mat, 0)) - (td$a0*na_if(td$W_mat, 0))
simTab_s$b0[1:nSpp] <- as.vector(jagsOut$mean$b0*na_if(td$W_mat, 0)) - (td$b0*na_if(td$W_mat, 0))
simTab_s$a1[1:nSpp] <- as.vector(jagsOut$mean$a1*na_if(td$W_mat, 0)) - (td$a1*na_if(td$W_mat, 0))
simTab_s$a2[1:nSpp] <- as.vector(jagsOut$mean$a2*na_if(td$W_mat, 0)) - (td$a2*na_if(td$W_mat, 0))
simTab_s$a3[1:nSpp] <- as.vector(jagsOut$mean$a3*na_if(td$W_mat, 0)) - (td$a3*na_if(td$W_mat, 0))
#Put results all together
x <- list(global = simTab_g, park = simTab_p, species = simTab_s)
return(x)
}
#########
## Part - Run and analyze simulations
#########
## Looping Variables
start <- 1
end <- 10 #Run this script 10 times in HPCC for a total of 650 sims: for i in {1..65}; do sbatch amphibianIU.sb; done
results <- Jr_temp <- maxJr_temp <- K_temp <- list()
converge <- vector()
##Loop
for(i in start:end){
#Remove seed so simulations in parallel are all different
set.seed(NULL)
#Simulate a data set
td <- sim_community()
## Simulaiton specific variable
strategy <- "indUnit"
effort <- 0.1
Jr_temp[[i]] <- round(td$Jr*(effort*2))
Jr_temp[[i]] <- c(Jr_temp[[i]][1], Jr_temp[[i]][2], 2, 2, Jr_temp[[i]][5], Jr_temp[[i]][6], 2, 2, Jr_temp[[i]][9], 2) # Parks sampled c(1,2,5,6,9)
maxJr_temp[[i]] <- max(Jr_temp[[i]])
K_temp[[i]] <- 4
# #Need to run when effort = 0.5
# #Need to rewrite Jr_temp[[6]] for loop to work
# Jr_temp[[i]][6] <- 96
# X_mat_placeholder <- td$X_mat[97:98,,,,6]
# Z_mat_placeholder <- td$Z_mat[97:98,,,6]
#Need to rewrite Z_mat and W_mat based on effort for initial values
for(r in 1:td$R){
for(m in 1:(td$I+td$M)){
for(j in (Jr_temp[[i]][r]+1):td$JMax) {
for(y in 1:td$Y){
td$Z_mat[j,y,m,r] <- NA
for(k in 1:6){
td$X_mat[j,k,y,m,r] <- NA
} #k
} #y
} #j
} #i
} #r
# #Need to run when effort = 0.5
# #Add X_mat and Z_mat values back in for the sites that were taken out
# td$X_mat[97:98,,,,6] <- X_mat_placeholder
# td$Z_mat[97:98,,,6] <- Z_mat_placeholder
# Jr_temp[[i]][6] <- 98
td$X_mat <- td$X_mat[1:maxJr_temp[[i]],1:K,1:10,1:50,1:10]
td$X_mat[1:maxJr_temp[[i]],1:K,1:10,1:50,c(3,4,7,8,10)] <- NA #Remove data from parks that didn't collect data
#Test td$X_mat[1:(maxJr_temp[[i]]),1,1,1,]
td$Z_mat <- td$Z_mat[1:maxJr_temp[[i]],1:10,1:50,1:10]
td$Z_mat[1:maxJr_temp[[i]],1:10,1:50,c(3,4,7,8,10)] <- NA #Remove data from parks that didn't collect data
#Need to also remove site-specific covariate data - added an imputation model to create fake covaraite data in the model for missing covariate data
td$Site_effect_a1[,c(3,4,7,8,10)] <- NA
# Organize data for jags
jagsDat <- list(X = td$X_mat, #Detection data
R = td$R, I = td$I, M = td$M, Y = td$Y, K = K_temp[[i]], Jr = Jr_temp[[i]], #Looping variables
Site_effect_a1 = td$Site_effect_a1, Year_effect_a2 = td$Year_effect_a2 #Covariates
)
# Compile inititial values for jags
jagsIni <- function(){
list(Z=td$Z_mat, W=td$W_mat)
}
# Paramaters to monitor for jags
jagsPar <- c('mean.c0', 'sd.c0', #unit occupancy
'mu.a0.global', 'sd.a0.global', 'sd.a0', 'mu.a0', 'a0', #site occupancy (intercept)
'mu.a1.global', 'sd.a1.global', 'sd.a1', 'mu.a1', 'a1', #site occupancy (slope)
'mu.a2.global', 'sd.a2.global', 'sd.a2', 'mu.a2', 'a2', #site occupancy (slope)
'mu.a3.global', 'sd.a3.global', 'sd.a3', 'mu.a3', 'a3', #site occupancy (slope)
'mu.b0.global', 'sd.b0.global', 'sd.b0', 'mu.b0', 'b0' #detection (intercept)
)
#Run jags()
jagsFit <- autojags(data = jagsDat,
inits = jagsIni,
parameters.to.save = jagsPar,
model.file = "mrcm_jags.txt",
parallel=T,
n.chains=3,
n.adapt=1000,
iter.increment=10000,
max.iter=50000,
n.burnin=5000,
n.thin=10,
Rhat.limit = 1.1
)
# Append this run to one full results object
results[[i]] <- org_results(jagsOut = jagsFit, td = td)
converge[i] <- max(unlist(jagsFit$Rhat))
}# END OF LOOP
##Save results file
date <- gsub(pattern = c(":| "), replacement = "-", x = as.character(Sys.time()))
file_str <- paste("jagsFit_","Simul_", effort*100, strategy,"_",date,".R",sep="")
#Save
save(results, converge, file=file_str)