Skip to content

Commit

Permalink
Dev (#91)
Browse files Browse the repository at this point in the history
* change paste0(.) to file.path(.) in file naming for plots

* another fix for `plot_factors`

* fix bug in switchover from paste0 to file.path for plots

* fix another bug in switchover from paste0 to file.path

* fix marginaleffects

* adding sample-based calculation of approx. sample-size

* small expansion of docs

* update paste0 to file.path

* change default so fit_model runs in tempdir() ...

... to decrease change of user-error in re-using existing extrapolation-grid or Kmeans objects

* separate plot_legend from plot_variable

* adding new summary.fit_model option ...

... to output dataframe of expanded values

* fix bug in amend_output when plotting EOF factors

* small updates

* switch from INLA to fmesher dependency

* eliminate rgdal

* bug fix for rotate_factors given sum-to-zero constraint

* fix bug in updates for fmesher package

* fix error from rnaturalearth using "sf" output

* remove warning from inla.barrier.fem.copy

* Update DESCRIPTION

* run `document`

---------

Co-authored-by: Jim Thorson <[email protected]>
  • Loading branch information
James-Thorson-NOAA and James-Thorson authored Jan 10, 2024
1 parent 0e8c1e8 commit 619acb7
Show file tree
Hide file tree
Showing 52 changed files with 542 additions and 217 deletions.
10 changes: 5 additions & 5 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
Package: FishStatsUtils
Type: Package
Title: Utilities (shared code and data) for FishStats spatio-temporal modeling toolbox
Version: 2.12.1
Date: 2022-11-18
Version: 2.13.0
Date: 2024-01-10
Authors@R:
c(person(given = "James",
family = "Thorson",
Expand All @@ -21,13 +21,12 @@ Imports:
DHARMa,
ecodist,
fastcluster,
INLA,
fmesher,
plotrix,
RANN,
sf,
raster,
reshape2,
rgdal,
rnaturalearth,
rnaturalearthdata,
seriation,
Expand All @@ -45,7 +44,8 @@ Depends:
units,
marginaleffects
Enhances:
tidyr
tidyr,
INLA,
Suggests:
testthat
Remotes:
Expand Down
4 changes: 4 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,7 @@ export(plot_data)
export(plot_dharma)
export(plot_factors)
export(plot_index)
export(plot_legend)
export(plot_loadings)
export(plot_maps)
export(plot_overdispersion)
Expand All @@ -76,3 +77,6 @@ export(rotate_factors)
export(sample_variable)
export(simulate_data)
export(strip_units)
importFrom(fmesher,fm_evaluator)
importFrom(fmesher,fm_fem)
importFrom(fmesher,fm_mesh_2d)
1 change: 1 addition & 0 deletions R/FishStatsUtils.R
Original file line number Diff line number Diff line change
Expand Up @@ -6,4 +6,5 @@
#'
#' @docType package
#' @name FishStatsUtils
#' @importFrom fmesher fm_mesh_2d fm_fem fm_evaluator
NULL
8 changes: 4 additions & 4 deletions R/amend_output.R
Original file line number Diff line number Diff line change
Expand Up @@ -91,15 +91,15 @@ function( fit = NULL,
Num_ctm = abind::adrop(TmbData$Options_list$metadata_ctz[,,'num_notna',drop=FALSE], drop=3) %o% rep(1,TmbData$n_m)
if( treat_missing_as_zero==TRUE ){
# if treat_missing_as_zero==TRUE, then switch density from year-categories with no data to zero
if("D_gct"%in%names(Report)) Report$D_gct = ifelse(Num_gct==0, 0, Report$D_gct)
if("D_gcy"%in%names(Report)) Report$D_gcy = ifelse(Num_gct==0, 0, Report$D_gcy)
if("D_gct"%in%names(Report) & all(dim(Report$D_gct)==dim(Num_gct))) Report$D_gct = ifelse(Num_gct==0, 0, Report$D_gct)
if("D_gcy"%in%names(Report) & all(dim(Report$D_gct)==dim(Num_gct))) Report$D_gcy = ifelse(Num_gct==0, 0, Report$D_gcy)
if("Index_ctl"%in%names(Report)) Report$Index_ctl = ifelse(Num_ctl==0, 0, Report$Index_ctl)
if("Index_cyl"%in%names(Report)) Report$Index_cyl = ifelse(Num_ctl==0, 0, Report$Index_cyl)
}else{
# If some intercepts are mapped off, then switch density from year-categories with no data to NA
if( any(is.na(Map$beta2_ft)) | any(is.na(Map$beta2_ft)) ){
if("D_gct"%in%names(Report)) Report$D_gct = ifelse(Num_gct==0, NA, Report$D_gct)
if("D_gcy"%in%names(Report)) Report$D_gcy = ifelse(Num_gct==0, NA, Report$D_gcy)
if("D_gct"%in%names(Report) & all(dim(Report$D_gct)==dim(Num_gct))) Report$D_gct = ifelse(Num_gct==0, NA, Report$D_gct)
if("D_gcy"%in%names(Report) & all(dim(Report$D_gct)==dim(Num_gct))) Report$D_gcy = ifelse(Num_gct==0, NA, Report$D_gcy)
if("Index_ctl"%in%names(Report)) Report$Index_ctl = ifelse(Num_ctl==0, NA, Report$Index_ctl)
if("Index_cyl"%in%names(Report)) Report$Index_cyl = ifelse(Num_ctl==0, NA, Report$Index_cyl)
}
Expand Down
52 changes: 37 additions & 15 deletions R/calculate_proportion.R
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,12 @@
#'
#' @inheritParams plot_biomass_index
#' @inheritParams VAST::make_data
#' @param sample_size_method Method used to calculate the variance in proportions, which is then converted to an approximately equivalent multinomial sample size that can be used as input-sample-size in a subsequent stock assessment model. Options are:
#' \describe{
#' \item{\code{sample_size_method="Taylor_series"}}{a Taylor-series approximation to the ratio of X/(X+Y) where X is the category-specific index and Y is the index for for all other categories}
#' \item{\code{sample_size_method="sample_based"}}{Taking samples from the joint precision of fixed and random effects, calculating proportions for each sample, and then computing the variance across those samples}
#' }
#' The sample-based approximation is expected to have higher variance and therefore lower approximate sample size. However, it may also have poor performance in cases when variance estimates are imprecise (such that the multivariate-normal approximation to joint precision is poor), and has not been thoroughly groundtested in real-world cases.
#' @param Index output from \code{FishStatsUtils::plot_biomass_index}
#' @param ... list of arguments to pass to \code{plot_index}
#'
Expand All @@ -18,22 +24,25 @@
#' @references For details regarding multivariate index standardization and expansion see \url{https://cdnsciencepub.com/doi/full/10.1139/cjfas-2018-0015}
#' @export
calculate_proportion <-
function( TmbData,
function( fit,
TmbData,
Index,
Expansion_cz = NULL,
year_labels = NULL,
years_to_plot = NULL,
strata_names = NULL,
category_names = NULL,
sample_size_method = c("Taylor_series","sample_based"),
plot_legend = ifelse(TmbData$n_l>1,TRUE,FALSE),
DirName = paste0(getwd(),"/"),
DirName = getwd(),
PlotName = "Proportion.png",
PlotName2 = "Average.png",
interval_width = 1,
width = 6,
height = 6,
xlab = "Category",
ylab = "Proportion",
n_samples = 250,
... ){

# Warnings and errors
Expand All @@ -55,19 +64,32 @@ function( TmbData,
Index_tl = apply(Index_ctl,MARGIN=2:3,FUN=sum)
SE_Index_tl = sqrt(apply(SE_Index_ctl^2,MARGIN=2:3,FUN=sum,na.rm=TRUE))

# Approximate variance for proportions, and effective sample size
Neff_ctl = var_Prop_ctl = array(NA,dim=dim(Prop_ctl))
for( cI in 1:dim(var_Prop_ctl)[1]){
for( tI in 1:dim(var_Prop_ctl)[2]){
for( lI in 1:dim(var_Prop_ctl)[3]){
# Original version
#var_Prop_ctl[cI,tI,lI] = Index_ctl[cI,tI,lI]^2/Index_tl[tI,lI]^2 * (SE_Index_ctl[cI,tI,lI]^2/Index_ctl[cI,tI,lI]^2 + SE_Index_tl[tI,lI]^2/Index_tl[tI,lI]^2 )
# Slightly extended version
var_Prop_ctl[cI,tI,lI] = Index_ctl[cI,tI,lI]^2/Index_tl[tI,lI]^2 * (SE_Index_ctl[cI,tI,lI]^2/Index_ctl[cI,tI,lI]^2 - 2*SE_Index_ctl[cI,tI,lI]^2/(Index_ctl[cI,tI,lI]*Index_tl[tI,lI]) + SE_Index_tl[tI,lI]^2/Index_tl[tI,lI]^2 )
var_Prop_ctl[cI,tI,lI] = ifelse( Index_ctl[cI,tI,lI]==0, 0, var_Prop_ctl[cI,tI,lI] ) # If dividing by zero, replace with 0
# Covert to effective sample size
Neff_ctl[cI,tI,lI] = Prop_ctl[cI,tI,lI] * (1-Prop_ctl[cI,tI,lI]) / var_Prop_ctl[cI,tI,lI]
}}}
if( tolower(sample_size_method[1]) == "taylor_series" ){
# Approximate variance for proportions, and effective sample size
Neff_ctl = var_Prop_ctl = array(NA,dim=dim(Prop_ctl))
for( cI in 1:dim(var_Prop_ctl)[1]){
for( tI in 1:dim(var_Prop_ctl)[2]){
for( lI in 1:dim(var_Prop_ctl)[3]){
# Original version
#var_Prop_ctl[cI,tI,lI] = Index_ctl[cI,tI,lI]^2/Index_tl[tI,lI]^2 * (SE_Index_ctl[cI,tI,lI]^2/Index_ctl[cI,tI,lI]^2 + SE_Index_tl[tI,lI]^2/Index_tl[tI,lI]^2 )
# Slightly extended version
var_Prop_ctl[cI,tI,lI] = Index_ctl[cI,tI,lI]^2/Index_tl[tI,lI]^2 * (SE_Index_ctl[cI,tI,lI]^2/Index_ctl[cI,tI,lI]^2 - 2*SE_Index_ctl[cI,tI,lI]^2/(Index_ctl[cI,tI,lI]*Index_tl[tI,lI]) + SE_Index_tl[tI,lI]^2/Index_tl[tI,lI]^2 )
var_Prop_ctl[cI,tI,lI] = ifelse( Index_ctl[cI,tI,lI]==0, 0, var_Prop_ctl[cI,tI,lI] ) # If dividing by zero, replace with 0
# Covert to effective sample size
#Neff_ctl[cI,tI,lI] = Prop_ctl[cI,tI,lI] * (1-Prop_ctl[cI,tI,lI]) / var_Prop_ctl[cI,tI,lI]
}}}
}else{
Index_ctlr = sample_variable( Sdreport = fit$parameter_estimates$SD,
Obj = fit$tmb_list$Obj,
variable_name = "Index_ctl",
n_samples = n_samples,
sample_fixed = TRUE )
Prop_ctlr = Index_ctlr / outer( rep(1,dim(Index_ctlr)[1]), apply(Index_ctlr,MARGIN=2:4,FUN=sum) )
var_Prop_ctl = apply(Prop_ctlr, MARGIN=1:3, FUN=var)
}

# Covert to effective sample size
Neff_ctl = Prop_ctl * (1-Prop_ctl) / var_Prop_ctl

# Median effective sample size across categories
Neff_tl = apply(Neff_ctl, MARGIN=2:3, FUN=median, na.rm=TRUE)
Expand Down
4 changes: 3 additions & 1 deletion R/convert_shapefile.R
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,9 @@ convert_shapefile = function( file_path,
area_tolerance = 0.05,
... ){

shapefile_input = rgdal::readOGR( file_path, verbose=FALSE, p4s=projargs_for_shapefile )
#shapefile_input = rgdal::readOGR( file_path, verbose=FALSE, p4s=projargs_for_shapefile )
shapefile_input = sf::st_read( file_path )
st_crs(shapefile_input) = st_crs(projargs_for_shapefile)
# rgdal::writeOGR
message("Reading shapefile with projargs: ", print(shapefile_input@proj4string))
# raster::shapefile(.) has simplified read-write interface for future reference
Expand Down
6 changes: 4 additions & 2 deletions R/deprecated.R
Original file line number Diff line number Diff line change
Expand Up @@ -1049,8 +1049,10 @@ function( Lon, Lat, crs=NA ){
# SEE: https://github.com/nwfsc-assess/geostatistical_delta-GLMM/issues/25#issuecomment-345825230

# Attach package
require(rgdal)
on.exit( detach("package:rgdal") )
#require(rgdal)
#on.exit( detach("package:rgdal") )
require(sp)
on.exit( detach("package:sp") )

# Transform
dstart<-data.frame(lon=Lon, lat=Lat) # that's the object
Expand Down
76 changes: 42 additions & 34 deletions R/fit_model.R
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@
#' @param year_labels character vector specifying names for labeling times \code{t_i}
#' @param ... additional arguments to pass to \code{\link{make_extrapolation_info}}, \code{\link{make_spatial_info}}, \code{\link[VAST]{make_data}}, \code{\link[VAST]{make_model}}, or \code{\link[TMBhelper]{fit_tmb}},
#' where arguments are matched by name against each function. If an argument doesn't match, it is still passed to \code{\link[VAST]{make_data}}. Note that \code{\link{make_spatial_info}}
#' passes named arguments to \code{\link[INLA]{inla.mesh.create}}.
#' passes named arguments to \code{\link[fmesher]{fm_mesh_2d}}.
#'
#' @return Object of class \code{fit_model}, containing formatted inputs and outputs from VAST
#' \describe{
Expand Down Expand Up @@ -105,7 +105,7 @@ function( settings,
a_i,
c_iz = rep(0,length(b_i)),
v_i = rep(0,length(b_i)),
working_dir = paste0(getwd(),"/"),
working_dir = tempdir(),
X1config_cp = NULL,
X2config_cp = NULL,
covariate_data,
Expand Down Expand Up @@ -180,8 +180,9 @@ function( settings,
DirPath = working_dir,
Save_Results = TRUE,
fine_scale = settings$fine_scale,
knot_method = settings$knot_method)
spatial_args_input = combine_lists( input=extra_args, default=spatial_args_default, args_to_use=c(formalArgs(make_spatial_info),formalArgs(INLA::inla.mesh.create)) )
knot_method = settings$knot_method,
mesh_package = settings$mesh_package )
spatial_args_input = combine_lists( input=extra_args, default=spatial_args_default, args_to_use=c(formalArgs(make_spatial_info),formalArgs(fmesher::fm_mesh_2d)) )
spatial_list = do.call( what=make_spatial_info, args=spatial_args_input )
}else{
spatial_args_input = NULL
Expand Down Expand Up @@ -507,7 +508,7 @@ plot.fit_model <- function(x, what="results", ...)
#' @inheritParams DHARMa::plotResiduals
#'
#' @param x Output from \code{\link{fit_model}}
#' @param what String indicating what to summarize; options are `density` or `residuals`
#' @param what String indicating what to summarize; options are `density`, `index` or `residuals`
#' @param n_samples Number of samples used when \code{what="residuals"}
#' @param ... additional arguments passed to \code{\link[DHARMa]{plotResiduals}} when \code{what="residuals"}
#'
Expand All @@ -534,34 +535,41 @@ function( x,
year_labels = year_labels,
category_names = category_names )

if( tolower(what) == "density" ){
if( tolower(what) %in% c("density","index") ){
# Load location of extrapolation-grid
ans[["extrapolation_grid"]] = print( x$extrapolation_list, quiet=TRUE )

# Load density estimates
if( any(c("D_gct","D_gcy") %in% names(x$Report)) ){
if("D_gcy" %in% names(x$Report)) Dvar_name = "D_gcy"
if("D_gct" %in% names(x$Report)) Dvar_name = "D_gct"
ans[["Density_array"]] = x$Report[[Dvar_name]]
if( !( x$settings$fine_scale==TRUE | x$spatial_list$Method=="Stream_network" ) ){
index_tmp = x$spatial_list$NN_Extrap$nn.idx[ which(x$extrapolation_list[["Area_km2_x"]]>0), 1 ]
ans[["Density_array"]] = ans[["Density_array"]][ index_tmp,,,drop=FALSE]
}
if( any(sapply(dimnames(ans[["Density_array"]]),FUN=is.null)) ){
dimnames(ans[["Density_array"]]) = list( rownames(ans[["extrapolation_grid"]]), paste0("Category_",1:dim(ans[["Density_array"]])[[2]]), x$year_labels )
}
# Expand as grid
Density_dataframe = expand.grid("Grid"=1:dim(ans[["Density_array"]])[[1]], "Category"=dimnames(ans[["Density_array"]])[[2]], "Year"=dimnames(ans[["Density_array"]])[[3]])
Density_dataframe = cbind( Density_dataframe, ans[["extrapolation_grid"]][Density_dataframe[,'Grid'],], "Density"=as.vector(ans[["Density_array"]]) )
ans[["Density_dataframe"]] = Density_dataframe
ans[['year_labels']] = x[['year_labels']]
rownames(Density_dataframe) = NULL
cat("\n### Printing head of and tail `Density_dataframe`, and returning data frame in output object\n")
print(head(Density_dataframe))
print(tail(Density_dataframe))
}else{
stop( "`summary.fit_model` not implemented for the version of `VAST` being used" )
if( tolower(what) == "density" ){
ans[["Density_array"]] = x$Report[["D_gct"]]
}else if( tolower(what) == "index" ){
ans[["Density_array"]] = x$Report[["Index_gctl"]]
}

# Exclude boundary knots
if( !( x$settings$fine_scale==TRUE | x$spatial_list$Method=="Stream_network" ) ){
index_tmp = x$spatial_list$NN_Extrap$nn.idx[ which(x$extrapolation_list[["Area_km2_x"]]>0), 1 ]
ans[["Density_array"]] = ans[["Density_array"]][ index_tmp,,,drop=FALSE]
}
# Error check
if( any(sapply(dimnames(ans[["Density_array"]]),FUN=is.null)) ){
stop("`summary.fit_model` assumes that arrays are labeled")
}

# Expand as grid
Density_dataframe = expand.grid( dimnames(ans[["Density_array"]]) )
Density_dataframe = cbind( Density_dataframe, ans[["extrapolation_grid"]][Density_dataframe[,'Site'],], as.vector(ans[["Density_array"]]) )
colnames(Density_dataframe)[ncol(Density_dataframe)] = tolower(what)

# Save output
ans[["Density_dataframe"]] = Density_dataframe
ans[['year_labels']] = x[['year_labels']]
rownames(Density_dataframe) = NULL

# Print to terminal
cat("\n### Printing head of and tail `Density_dataframe`, and returning data frame in output object\n")
print(head(Density_dataframe))
print(tail(Density_dataframe))
}

# Residuals
Expand Down Expand Up @@ -601,10 +609,10 @@ function( x,

# Run DHARMa
# Adding jitters because DHARMa version 0.3.2.0 sometimes still throws an error method="traditional" and integer=FALSE without jitters
dharmaRes = DHARMa::createDHARMa(simulatedResponse=strip_units(b_iz) + 1e-10*array(rnorm(prod(dim(b_iz))),dim=dim(b_iz)),
observedResponse=strip_units(x$data_list$b_i) + 1e-10*rnorm(length(x$data_list$b_i)),
fittedPredictedResponse=strip_units(x$Report$D_i),
integer=FALSE)
dharmaRes = DHARMa::createDHARMa( simulatedResponse = strip_units(b_iz) + 1e-10*array(rnorm(prod(dim(b_iz))),dim=dim(b_iz)),
observedResponse = strip_units(x$data_list$b_i) + 1e-10*rnorm(length(x$data_list$b_i)),
fittedPredictedResponse = strip_units(x$Report$D_i),
integer = FALSE)
#dharmaRes = DHARMa::createDHARMa(simulatedResponse=strip_units(b_iz),
# observedResponse=strip_units(x$data_list$b_i),
# fittedPredictedResponse=strip_units(x$Report$D_i),
Expand Down Expand Up @@ -660,7 +668,7 @@ function( x,
if( is.null(working_dir) ){
plot_dharma(dharmaRes, ...)
}else if(!is.na(working_dir) ){
png(file=paste0(working_dir,"quantile_residuals.png"), width=8, height=4, res=200, units='in')
png(file=file.path(working_dir,"quantile_residuals.png"), width=8, height=4, res=200, units='in')
plot_dharma(dharmaRes, form=form, ...)
dev.off()
}
Expand Down Expand Up @@ -750,7 +758,7 @@ predict.fit_model <- function(x,
new_covariate_data = NULL,
new_catchability_data = NULL,
do_checks = TRUE,
working_dir = paste0(getwd(),"/") )
working_dir = getwd() )
{
message("`predict.fit_model(.)` is in beta-testing, and please explore results carefully prior to using")

Expand Down
Loading

0 comments on commit 619acb7

Please sign in to comment.