Dev (#91)

* change paste0(.) to file.path(.) in file naming for plots * another fix for `plot_factors` * fix bug in switchover from paste0 to file.path for plots * fix another bug in switchover from paste0 to file.path * fix marginaleffects * adding sample-based calculation of approx. sample-size * small expansion of docs * update paste0 to file.path * change default so fit_model runs in tempdir() ... ... to decrease change of user-error in re-using existing extrapolation-grid or Kmeans objects * separate plot_legend from plot_variable * adding new summary.fit_model option ... ... to output dataframe of expanded values * fix bug in amend_output when plotting EOF factors * small updates * switch from INLA to fmesher dependency * eliminate rgdal * bug fix for rotate_factors given sum-to-zero constraint * fix bug in updates for fmesher package * fix error from rnaturalearth using "sf" output * remove warning from inla.barrier.fem.copy * Update DESCRIPTION * run `document` --------- Co-authored-by: Jim Thorson <[email protected]>
James-Thorson-NOAA · Jan 10, 2024 · 619acb7 · 619acb7
1 parent 0e8c1e8
commit 619acb7
Show file tree

Hide file tree

Showing 52 changed files with 542 additions and 217 deletions.
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,8 +1,8 @@
 Package: FishStatsUtils
 Type: Package
 Title: Utilities (shared code and data) for FishStats spatio-temporal modeling toolbox
-Version: 2.12.1
-Date: 2022-11-18
+Version: 2.13.0
+Date: 2024-01-10
 Authors@R: 
     c(person(given = "James",
              family = "Thorson",
@@ -21,13 +21,12 @@ Imports:
     DHARMa,
     ecodist,
     fastcluster,
-    INLA,
+    fmesher,
     plotrix,
     RANN,
     sf,
     raster,
     reshape2,
-    rgdal,
     rnaturalearth,
     rnaturalearthdata,
     seriation,
@@ -45,7 +44,8 @@ Depends:
     units,
     marginaleffects
 Enhances:
-    tidyr
+    tidyr,
+    INLA,
 Suggests:
     testthat
 Remotes: 

diff --git a/NAMESPACE b/NAMESPACE
@@ -60,6 +60,7 @@ export(plot_data)
 export(plot_dharma)
 export(plot_factors)
 export(plot_index)
+export(plot_legend)
 export(plot_loadings)
 export(plot_maps)
 export(plot_overdispersion)
@@ -76,3 +77,6 @@ export(rotate_factors)
 export(sample_variable)
 export(simulate_data)
 export(strip_units)
+importFrom(fmesher,fm_evaluator)
+importFrom(fmesher,fm_fem)
+importFrom(fmesher,fm_mesh_2d)
diff --git a/R/FishStatsUtils.R b/R/FishStatsUtils.R
@@ -6,4 +6,5 @@
 #'
 #' @docType package
 #' @name FishStatsUtils
+#' @importFrom fmesher fm_mesh_2d fm_fem fm_evaluator
 NULL
diff --git a/R/amend_output.R b/R/amend_output.R
@@ -91,15 +91,15 @@ function( fit = NULL,
     Num_ctm = abind::adrop(TmbData$Options_list$metadata_ctz[,,'num_notna',drop=FALSE], drop=3) %o% rep(1,TmbData$n_m)
     if( treat_missing_as_zero==TRUE ){
       # if treat_missing_as_zero==TRUE, then switch density from year-categories with no data to zero
-      if("D_gct"%in%names(Report)) Report$D_gct = ifelse(Num_gct==0, 0, Report$D_gct)
-      if("D_gcy"%in%names(Report)) Report$D_gcy = ifelse(Num_gct==0, 0, Report$D_gcy)
+      if("D_gct"%in%names(Report) & all(dim(Report$D_gct)==dim(Num_gct))) Report$D_gct = ifelse(Num_gct==0, 0, Report$D_gct)
+      if("D_gcy"%in%names(Report) & all(dim(Report$D_gct)==dim(Num_gct))) Report$D_gcy = ifelse(Num_gct==0, 0, Report$D_gcy)
       if("Index_ctl"%in%names(Report)) Report$Index_ctl = ifelse(Num_ctl==0, 0, Report$Index_ctl)
       if("Index_cyl"%in%names(Report)) Report$Index_cyl = ifelse(Num_ctl==0, 0, Report$Index_cyl)
     }else{
       # If some intercepts are mapped off, then switch density from year-categories with no data to NA
       if( any(is.na(Map$beta2_ft)) | any(is.na(Map$beta2_ft)) ){
-        if("D_gct"%in%names(Report)) Report$D_gct = ifelse(Num_gct==0, NA, Report$D_gct)
-        if("D_gcy"%in%names(Report)) Report$D_gcy = ifelse(Num_gct==0, NA, Report$D_gcy)
+        if("D_gct"%in%names(Report) & all(dim(Report$D_gct)==dim(Num_gct))) Report$D_gct = ifelse(Num_gct==0, NA, Report$D_gct)
+        if("D_gcy"%in%names(Report) & all(dim(Report$D_gct)==dim(Num_gct))) Report$D_gcy = ifelse(Num_gct==0, NA, Report$D_gcy)
         if("Index_ctl"%in%names(Report)) Report$Index_ctl = ifelse(Num_ctl==0, NA, Report$Index_ctl)
         if("Index_cyl"%in%names(Report)) Report$Index_cyl = ifelse(Num_ctl==0, NA, Report$Index_cyl)
       }

diff --git a/R/calculate_proportion.R b/R/calculate_proportion.R
@@ -6,6 +6,12 @@
 #'
 #' @inheritParams plot_biomass_index
 #' @inheritParams VAST::make_data
+#' @param sample_size_method Method used to calculate the variance in proportions, which is then converted to an approximately equivalent multinomial sample size that can be used as input-sample-size in a subsequent stock assessment model.  Options are:
+#' \describe{
+#'   \item{\code{sample_size_method="Taylor_series"}}{a Taylor-series approximation to the ratio of X/(X+Y) where X is the category-specific index and Y is the index for for all other categories}
+#'   \item{\code{sample_size_method="sample_based"}}{Taking samples from the joint precision of fixed and random effects, calculating proportions for each sample, and then computing the variance across those samples}
+#' }
+#' The sample-based approximation is expected to have higher variance and therefore lower approximate sample size.  However, it may also have poor performance in cases when variance estimates are imprecise (such that the multivariate-normal approximation to joint precision is poor), and has not been thoroughly groundtested in real-world cases.
 #' @param Index output from \code{FishStatsUtils::plot_biomass_index}
 #' @param ... list of arguments to pass to \code{plot_index}
 #'
@@ -18,22 +24,25 @@
 #' @references For details regarding multivariate index standardization and expansion see \url{https://cdnsciencepub.com/doi/full/10.1139/cjfas-2018-0015}
 #' @export
 calculate_proportion <-
-function( TmbData,
+function( fit,
+          TmbData,
           Index,
           Expansion_cz = NULL,
           year_labels = NULL,
           years_to_plot = NULL,
           strata_names = NULL,
           category_names = NULL,
+          sample_size_method = c("Taylor_series","sample_based"),
           plot_legend = ifelse(TmbData$n_l>1,TRUE,FALSE),
-          DirName = paste0(getwd(),"/"),
+          DirName = getwd(),
           PlotName = "Proportion.png",
           PlotName2 = "Average.png",
           interval_width = 1,
           width = 6,
           height = 6,
           xlab = "Category",
           ylab = "Proportion",
+          n_samples = 250,
           ... ){
 
   # Warnings and errors
@@ -55,19 +64,32 @@ function( TmbData,
   Index_tl = apply(Index_ctl,MARGIN=2:3,FUN=sum)
   SE_Index_tl = sqrt(apply(SE_Index_ctl^2,MARGIN=2:3,FUN=sum,na.rm=TRUE))
 
-  # Approximate variance for proportions, and effective sample size
-  Neff_ctl = var_Prop_ctl = array(NA,dim=dim(Prop_ctl))
-  for( cI in 1:dim(var_Prop_ctl)[1]){
-  for( tI in 1:dim(var_Prop_ctl)[2]){
-  for( lI in 1:dim(var_Prop_ctl)[3]){
-    # Original version
-    #var_Prop_ctl[cI,tI,lI] = Index_ctl[cI,tI,lI]^2/Index_tl[tI,lI]^2 * (SE_Index_ctl[cI,tI,lI]^2/Index_ctl[cI,tI,lI]^2  + SE_Index_tl[tI,lI]^2/Index_tl[tI,lI]^2 )
-    # Slightly extended version
-    var_Prop_ctl[cI,tI,lI] = Index_ctl[cI,tI,lI]^2/Index_tl[tI,lI]^2 * (SE_Index_ctl[cI,tI,lI]^2/Index_ctl[cI,tI,lI]^2 - 2*SE_Index_ctl[cI,tI,lI]^2/(Index_ctl[cI,tI,lI]*Index_tl[tI,lI]) + SE_Index_tl[tI,lI]^2/Index_tl[tI,lI]^2 )
-    var_Prop_ctl[cI,tI,lI] = ifelse( Index_ctl[cI,tI,lI]==0, 0, var_Prop_ctl[cI,tI,lI] )  # If dividing by zero, replace with 0
-    # Covert to effective sample size
-    Neff_ctl[cI,tI,lI] = Prop_ctl[cI,tI,lI] * (1-Prop_ctl[cI,tI,lI]) / var_Prop_ctl[cI,tI,lI]
-  }}}
+  if( tolower(sample_size_method[1]) == "taylor_series" ){
+    # Approximate variance for proportions, and effective sample size
+    Neff_ctl = var_Prop_ctl = array(NA,dim=dim(Prop_ctl))
+    for( cI in 1:dim(var_Prop_ctl)[1]){
+    for( tI in 1:dim(var_Prop_ctl)[2]){
+    for( lI in 1:dim(var_Prop_ctl)[3]){
+      # Original version
+      #var_Prop_ctl[cI,tI,lI] = Index_ctl[cI,tI,lI]^2/Index_tl[tI,lI]^2 * (SE_Index_ctl[cI,tI,lI]^2/Index_ctl[cI,tI,lI]^2  + SE_Index_tl[tI,lI]^2/Index_tl[tI,lI]^2 )
+      # Slightly extended version
+      var_Prop_ctl[cI,tI,lI] = Index_ctl[cI,tI,lI]^2/Index_tl[tI,lI]^2 * (SE_Index_ctl[cI,tI,lI]^2/Index_ctl[cI,tI,lI]^2 - 2*SE_Index_ctl[cI,tI,lI]^2/(Index_ctl[cI,tI,lI]*Index_tl[tI,lI]) + SE_Index_tl[tI,lI]^2/Index_tl[tI,lI]^2 )
+      var_Prop_ctl[cI,tI,lI] = ifelse( Index_ctl[cI,tI,lI]==0, 0, var_Prop_ctl[cI,tI,lI] )  # If dividing by zero, replace with 0
+      # Covert to effective sample size
+      #Neff_ctl[cI,tI,lI] = Prop_ctl[cI,tI,lI] * (1-Prop_ctl[cI,tI,lI]) / var_Prop_ctl[cI,tI,lI]
+    }}}
+  }else{
+    Index_ctlr = sample_variable( Sdreport = fit$parameter_estimates$SD, 
+                            Obj = fit$tmb_list$Obj, 
+                            variable_name = "Index_ctl", 
+                            n_samples = n_samples, 
+                            sample_fixed = TRUE )
+    Prop_ctlr = Index_ctlr / outer( rep(1,dim(Index_ctlr)[1]), apply(Index_ctlr,MARGIN=2:4,FUN=sum) )
+    var_Prop_ctl = apply(Prop_ctlr, MARGIN=1:3, FUN=var)
+  }
+
+  # Covert to effective sample size
+  Neff_ctl = Prop_ctl * (1-Prop_ctl) / var_Prop_ctl
 
   # Median effective sample size across categories
   Neff_tl = apply(Neff_ctl, MARGIN=2:3, FUN=median, na.rm=TRUE)

diff --git a/R/convert_shapefile.R b/R/convert_shapefile.R
@@ -30,7 +30,9 @@ convert_shapefile = function( file_path,
     area_tolerance = 0.05,
     ... ){
 
-  shapefile_input = rgdal::readOGR( file_path, verbose=FALSE, p4s=projargs_for_shapefile )
+  #shapefile_input = rgdal::readOGR( file_path, verbose=FALSE, p4s=projargs_for_shapefile )
+  shapefile_input = sf::st_read( file_path )
+    st_crs(shapefile_input) = st_crs(projargs_for_shapefile)
   # rgdal::writeOGR
   message("Reading shapefile with projargs: ", print(shapefile_input@proj4string))
   # raster::shapefile(.) has simplified read-write interface for future reference

diff --git a/R/deprecated.R b/R/deprecated.R
@@ -1049,8 +1049,10 @@ function( Lon, Lat, crs=NA ){
   # SEE:  https://github.com/nwfsc-assess/geostatistical_delta-GLMM/issues/25#issuecomment-345825230
 
   # Attach package
-  require(rgdal)
-  on.exit( detach("package:rgdal") )
+  #require(rgdal)
+  #on.exit( detach("package:rgdal") )
+  require(sp)
+  on.exit( detach("package:sp") )
 
   # Transform
   dstart<-data.frame(lon=Lon, lat=Lat) # that's the object

diff --git a/R/fit_model.R b/R/fit_model.R
@@ -43,7 +43,7 @@
 #' @param year_labels character vector specifying names for labeling times \code{t_i}
 #' @param ... additional arguments to pass to \code{\link{make_extrapolation_info}}, \code{\link{make_spatial_info}}, \code{\link[VAST]{make_data}}, \code{\link[VAST]{make_model}}, or \code{\link[TMBhelper]{fit_tmb}},
 #' where arguments are matched by name against each function.  If an argument doesn't match, it is still passed to \code{\link[VAST]{make_data}}.  Note that \code{\link{make_spatial_info}}
-#' passes named arguments to \code{\link[INLA]{inla.mesh.create}}.
+#' passes named arguments to \code{\link[fmesher]{fm_mesh_2d}}.
 #'
 #' @return Object of class \code{fit_model}, containing formatted inputs and outputs from VAST
 #' \describe{
@@ -105,7 +105,7 @@ function( settings,
           a_i,
           c_iz = rep(0,length(b_i)),
           v_i = rep(0,length(b_i)),
-          working_dir = paste0(getwd(),"/"),
+          working_dir = tempdir(),
           X1config_cp = NULL,
           X2config_cp = NULL,
           covariate_data,
@@ -180,8 +180,9 @@ function( settings,
                          DirPath = working_dir,
                          Save_Results = TRUE,
                          fine_scale = settings$fine_scale,
-                         knot_method = settings$knot_method)
-    spatial_args_input = combine_lists( input=extra_args, default=spatial_args_default, args_to_use=c(formalArgs(make_spatial_info),formalArgs(INLA::inla.mesh.create)) )
+                         knot_method = settings$knot_method,
+                         mesh_package = settings$mesh_package )
+    spatial_args_input = combine_lists( input=extra_args, default=spatial_args_default, args_to_use=c(formalArgs(make_spatial_info),formalArgs(fmesher::fm_mesh_2d)) )
     spatial_list = do.call( what=make_spatial_info, args=spatial_args_input )
   }else{
     spatial_args_input = NULL
@@ -507,7 +508,7 @@ plot.fit_model <- function(x, what="results", ...)
 #' @inheritParams DHARMa::plotResiduals
 #'
 #' @param x Output from \code{\link{fit_model}}
-#' @param what String indicating what to summarize; options are `density` or `residuals`
+#' @param what String indicating what to summarize; options are `density`, `index` or `residuals`
 #' @param n_samples Number of samples used when \code{what="residuals"}
 #' @param ... additional arguments passed to \code{\link[DHARMa]{plotResiduals}} when \code{what="residuals"}
 #'
@@ -534,34 +535,41 @@ function( x,
                            year_labels = year_labels,
                            category_names = category_names )
 
-  if( tolower(what) == "density" ){
+  if( tolower(what) %in% c("density","index") ){
     # Load location of extrapolation-grid
     ans[["extrapolation_grid"]] = print( x$extrapolation_list, quiet=TRUE )
 
     # Load density estimates
-    if( any(c("D_gct","D_gcy") %in% names(x$Report)) ){
-      if("D_gcy" %in% names(x$Report)) Dvar_name = "D_gcy"
-      if("D_gct" %in% names(x$Report)) Dvar_name = "D_gct"
-      ans[["Density_array"]] =  x$Report[[Dvar_name]]
-      if( !( x$settings$fine_scale==TRUE | x$spatial_list$Method=="Stream_network" ) ){
-        index_tmp = x$spatial_list$NN_Extrap$nn.idx[ which(x$extrapolation_list[["Area_km2_x"]]>0), 1 ]
-        ans[["Density_array"]] = ans[["Density_array"]][ index_tmp,,,drop=FALSE]
-      }
-      if( any(sapply(dimnames(ans[["Density_array"]]),FUN=is.null)) ){
-        dimnames(ans[["Density_array"]]) = list( rownames(ans[["extrapolation_grid"]]), paste0("Category_",1:dim(ans[["Density_array"]])[[2]]), x$year_labels )
-      }
-      # Expand as grid
-      Density_dataframe = expand.grid("Grid"=1:dim(ans[["Density_array"]])[[1]], "Category"=dimnames(ans[["Density_array"]])[[2]], "Year"=dimnames(ans[["Density_array"]])[[3]])
-      Density_dataframe = cbind( Density_dataframe, ans[["extrapolation_grid"]][Density_dataframe[,'Grid'],], "Density"=as.vector(ans[["Density_array"]]) )
-      ans[["Density_dataframe"]] = Density_dataframe
-      ans[['year_labels']] = x[['year_labels']]
-      rownames(Density_dataframe) = NULL
-      cat("\n### Printing head of and tail `Density_dataframe`, and returning data frame in output object\n")
-      print(head(Density_dataframe))
-      print(tail(Density_dataframe))
-    }else{
-      stop( "`summary.fit_model` not implemented for the version of `VAST` being used" )
+    if( tolower(what) == "density" ){
+      ans[["Density_array"]] =  x$Report[["D_gct"]]
+    }else if( tolower(what) == "index" ){
+      ans[["Density_array"]] =  x$Report[["Index_gctl"]]
+    }
+
+    # Exclude boundary knots
+    if( !( x$settings$fine_scale==TRUE | x$spatial_list$Method=="Stream_network" ) ){
+      index_tmp = x$spatial_list$NN_Extrap$nn.idx[ which(x$extrapolation_list[["Area_km2_x"]]>0), 1 ]
+      ans[["Density_array"]] = ans[["Density_array"]][ index_tmp,,,drop=FALSE]
     }
+    # Error check
+    if( any(sapply(dimnames(ans[["Density_array"]]),FUN=is.null)) ){
+      stop("`summary.fit_model` assumes that arrays are labeled")
+    }
+
+    # Expand as grid
+    Density_dataframe = expand.grid( dimnames(ans[["Density_array"]]) )
+    Density_dataframe = cbind( Density_dataframe, ans[["extrapolation_grid"]][Density_dataframe[,'Site'],], as.vector(ans[["Density_array"]]) )
+    colnames(Density_dataframe)[ncol(Density_dataframe)] = tolower(what)
+
+    # Save output
+    ans[["Density_dataframe"]] = Density_dataframe
+    ans[['year_labels']] = x[['year_labels']]
+    rownames(Density_dataframe) = NULL
+
+    # Print to terminal
+    cat("\n### Printing head of and tail `Density_dataframe`, and returning data frame in output object\n")
+    print(head(Density_dataframe))
+    print(tail(Density_dataframe))
   }
 
   # Residuals
@@ -601,10 +609,10 @@ function( x,
 
       # Run DHARMa
       # Adding jitters because DHARMa version 0.3.2.0 sometimes still throws an error method="traditional" and integer=FALSE without jitters
-      dharmaRes = DHARMa::createDHARMa(simulatedResponse=strip_units(b_iz) + 1e-10*array(rnorm(prod(dim(b_iz))),dim=dim(b_iz)),
-        observedResponse=strip_units(x$data_list$b_i) + 1e-10*rnorm(length(x$data_list$b_i)),
-        fittedPredictedResponse=strip_units(x$Report$D_i),
-        integer=FALSE)
+      dharmaRes = DHARMa::createDHARMa( simulatedResponse = strip_units(b_iz) + 1e-10*array(rnorm(prod(dim(b_iz))),dim=dim(b_iz)),
+                                        observedResponse = strip_units(x$data_list$b_i) + 1e-10*rnorm(length(x$data_list$b_i)),
+                                        fittedPredictedResponse = strip_units(x$Report$D_i),
+                                        integer = FALSE)
       #dharmaRes = DHARMa::createDHARMa(simulatedResponse=strip_units(b_iz),
       #  observedResponse=strip_units(x$data_list$b_i),
       #  fittedPredictedResponse=strip_units(x$Report$D_i),
@@ -660,7 +668,7 @@ function( x,
     if( is.null(working_dir) ){
       plot_dharma(dharmaRes, ...)
     }else if(!is.na(working_dir) ){
-      png(file=paste0(working_dir,"quantile_residuals.png"), width=8, height=4, res=200, units='in')
+      png(file=file.path(working_dir,"quantile_residuals.png"), width=8, height=4, res=200, units='in')
         plot_dharma(dharmaRes, form=form, ...)
       dev.off()
     }
@@ -750,7 +758,7 @@ predict.fit_model <- function(x,
                   new_covariate_data = NULL,
                   new_catchability_data = NULL,
                   do_checks = TRUE,
-                  working_dir = paste0(getwd(),"/") )
+                  working_dir = getwd() )
 {
   message("`predict.fit_model(.)` is in beta-testing, and please explore results carefully prior to using")