From fcd5afe8f39ff110381c6472bf8cd7cfa57a629c Mon Sep 17 00:00:00 2001 From: MOshima-PIFSC Date: Fri, 2 Feb 2024 12:17:44 -1000 Subject: [PATCH] clean up vignettes --- vignettes/articles/Jitter.Rmd | 11 ++++++----- vignettes/articles/Retrospective-Analysis.Rmd | 2 +- vignettes/articles/aspm.Rmd | 4 ++-- vignettes/articles/hcxval.Rmd | 6 ++---- vignettes/articles/likelihood.Rmd | 8 ++++---- vignettes/articles/residuals.Rmd | 6 +++--- 6 files changed, 18 insertions(+), 19 deletions(-) diff --git a/vignettes/articles/Jitter.Rmd b/vignettes/articles/Jitter.Rmd index 7d47ed1..4595a68 100644 --- a/vignettes/articles/Jitter.Rmd +++ b/vignettes/articles/Jitter.Rmd @@ -48,7 +48,7 @@ r4ss::run(dir = dir_jitter, exe = "ss3", verbose = FALSE) ``` ## Jitter -For this example, we will run 50 jitters. The `jitter()` function automates the entire process so you only need to give it a few arguments and it will run and produce the total likelihoods for each run. +For this example, we will run 50 jitters. The `jitter()` function automates the entire process so you only need to give it a few arguments and it will run and produce the total likelihoods for each run. Full documentation of the `jitter()` function can be found at the [r4ss website](https://r4ss.github.io/r4ss/reference/jitter.html). ```{r run-jitter} Njitter <- 50 @@ -87,13 +87,13 @@ converged_grad <- which(jit_summary$maxgrad < 0.0001) converged_ssb <- jit_summary$SpawnBio %>% mutate(across(c(1:(Njitter+1)), - .fns = ~./replist)) %>% # for each column, divide SSB by SSB from the reference run (replist0) + .fns = ~./replist)) %>% # for each column, divide SSB by SSB from the reference run (replist) select(-Label) %>% pivot_longer(col = c(1:(Njitter+1)), names_to = "jitter", values_to = "SSB") %>% pivot_wider(names_from = Yr, values_from = SSB) %>% mutate(rownumber = seq(1, nrow(.))) %>% tibble::column_to_rownames("jitter") %>% - filter_at(vars(1:(ncol(.)-1)), all_vars((.) < 2 & (.) > 0.5)) %>% #keep only + filter_at(vars(1:(ncol(.)-1)), all_vars((.) < 2 & (.) > 0.5)) %>% #keep only rows where SSB is a reasonable value select(rownumber) %>% pull(rownumber) converged_mods <- intersect(converged_grad, converged_ssb) #getting which models are in both groups @@ -103,7 +103,7 @@ converged_sum <- SSsummarize(converged_jitters, verbose = FALSE) ## Visualizing Output -To compare the likelihoods of all runs, we plot them as shown below. There are no built in functions (as of writing this recipe) in `r4ss` or `ss3diags` to generate a likelihood plot, therefore we provide code in the tidyverse syntax (using `ggplot2`) to visualize the results. +To compare the likelihoods of all runs, we plot them as shown below. There are no built in functions (as of writing this vignette) in `r4ss` or `ss3diags` to generate a likelihood plot, therefore we provide code in the tidyverse syntax (using `ggplot2`) to visualize the results. ```{r } converged_sum$likelihoods %>% filter(str_detect(Label, "TOTAL")) %>% @@ -124,6 +124,7 @@ We can also use `r4ss::SSplotComparisons()` to compare the spawning biomass traj ```{r } SSplotComparisons(converged_sum, subplots = 2, - ylimAdj=1) + ylimAdj=1, + new = FALSE) ``` diff --git a/vignettes/articles/Retrospective-Analysis.Rmd b/vignettes/articles/Retrospective-Analysis.Rmd index d8ebcbf..5e532e1 100644 --- a/vignettes/articles/Retrospective-Analysis.Rmd +++ b/vignettes/articles/Retrospective-Analysis.Rmd @@ -42,7 +42,7 @@ r4ss::get_ss3_exe(dir = dir_retro, version = "v3.30.21") ``` ## Retrospective Analysis -Once you have the 4 input files and SS executable, you can run retrospective analysis as shown below. We are running it for 5 1-year peels, so with each run, the last year of data is removed and the model is re-run for a total of 5 times. The number of year peels can be adjusted with the `years` argument. If the SS executable file you are using is named something other than "ss" (e.g. ss_opt_win.exe), you will need to specify this with the argument `exe = "ss_opt_win"`. +Once you have the 4 input files and SS executable, you can run retrospective analysis as shown below. We are running it for 5 1-year peels, so with each run, *n* years of data are removed from the reference model and the model is re-run for a total of 5 times (i.e. peel 1 removes the last year of data, peel 2 removes the last 2 years of data, etc.) . The number of year peels can be adjusted with the `years` argument. If the SS executable file you are using is named something other than "ss3" (e.g. ss_opt_win.exe), you will need to specify this with the argument `exe = "ss_opt_win"`. Full documentation of the `retro()` function can be found on the [r4ss website](https://r4ss.github.io/r4ss/reference/retro.html). ```{r message=FALSE} r4ss::retro(dir = dir_retro, exe = "ss3", years = 0:-5, verbose = FALSE) diff --git a/vignettes/articles/aspm.Rmd b/vignettes/articles/aspm.Rmd index c7b4ce8..7982f7b 100644 --- a/vignettes/articles/aspm.Rmd +++ b/vignettes/articles/aspm.Rmd @@ -84,8 +84,7 @@ starter$init_values_src <- 1 SS_writestarter(starter, dir = dir_aspm, overwrite = TRUE, - verbose = FALSE, - warn = FALSE) + verbose = FALSE) ``` ### Control File @@ -140,6 +139,7 @@ r4ss::run(dir = dir_aspm, exe = "ss3", skipfinished = FALSE, verbose = FALSE) ### Visualize Results +To compare the ASPM model with the age-structured model, you can use `SSplotComparisons()`. Comparing spawning biomass and F estimates between the two models and fits to indices of abundance can help to understand if there is enough information in the indices to inform the production function. ```{r warning=FALSE} mods <- SSgetoutput(dirvec = c( dir_tmp, diff --git a/vignettes/articles/hcxval.Rmd b/vignettes/articles/hcxval.Rmd index bd73402..8dd3f36 100644 --- a/vignettes/articles/hcxval.Rmd +++ b/vignettes/articles/hcxval.Rmd @@ -17,7 +17,7 @@ Hindcast Cross-Validation is used to evaluate the prediction skill of a model by Implementing the Hindcast Cross-Validation (HCxval) diagnostic in Stock Synthesis requires the same model outputs generated when running a retrospective analysis. Therefore, no additional step is needed for HCxval if conducted in conjunction with retrospective analysis (see how to conduct a [restrospective analysis](https://pifscstockassessments.github.io/ss3diags/articles/Retrospective-Analysis.html)). For this example we will use the same output created in the retrospective example. -```{r echo=FALSE, message=FALSE, results=FALSE} +```{r echo=FALSE, message=FALSE, results=FALSE, warning=FALSE} library(r4ss) files_path <- system.file("extdata", package = "ss3diags") @@ -28,7 +28,7 @@ r4ss::get_ss3_exe(dir = dir_retro, version = "v3.30.21") r4ss::retro(dir = dir_retro, exe = "ss3", years = 0:-5, verbose = FALSE) ``` - +Analyzing HCxval results requires the same first step of summarizing the list of retrospective runs as for the retrospective analysis. ```{r} retro_mods <- r4ss::SSgetoutput(dirvec = file.path(dir_retro, "retrospectives", paste0("retro", seq(0,-5,by=-1))), verbose = F) retroSummary <- r4ss::SSsummarize(retro_mods, verbose = F) @@ -39,8 +39,6 @@ retroSummary <- r4ss::SSsummarize(retro_mods, verbose = F) HCxval is implemented using `ss3diags::SSplotHCxval()`, which produces the novel HCxval diagnostic plot and computes the MASE scores for indices of abundance, mean lengths, or mean ages that have observations falling within the hindcast evaluation period. -Plotting HCxval for abundance indices requires the same step of summarizing the list of retrospective runs as for the retrospective analysis, which therefore only needs be done once. In this example we will use `retroSummary` created [here](https://pifscstockassessments.github.io/ss3diags/articles/Retrospective-Analysis.html). - ```{r} r4ss::sspar(mfrow = c(1,2)) SSplotHCxval(retroSummary, subplots = "cpue", add = TRUE) diff --git a/vignettes/articles/likelihood.Rmd b/vignettes/articles/likelihood.Rmd index 4ce45b2..d573590 100644 --- a/vignettes/articles/likelihood.Rmd +++ b/vignettes/articles/likelihood.Rmd @@ -12,7 +12,7 @@ knitr::opts_chunk$set( ```{r setup} library(ss3diags) ``` -Likelihood profiling is a key model diagnostic that helps identify the influence of information sources on model estimates. R_0 is a commonly profiled parameter because it represents a global scaling parameter. To conduct a profile, values of the parameter over a given range are fixed and the model re-run and then changes in total likelihood and data-component likelihoods are examined. It is recommended to run this throughout the development process, particularly after incorporating new data sets to understand how informative each component is on the estimation of R0 and if there is conflict between data sources. +Likelihood profiling is a key model diagnostic that helps identify the influence of information sources on model estimates. R~0~ is a commonly profiled parameter because it represents a global scaling parameter. To conduct a profile, values of the parameter over a given range are fixed and the model re-run and then changes in total likelihood and data-component likelihoods are examined. It is recommended to run this throughout the development process, particularly after incorporating new data sets to understand how informative each component is on the estimation of R0 and if there is conflict between data sources. ## Model inputs To run a stock synthesis model, 4 input files are required: starter, forecast, control, and data. The input files for the example model can be found within the `ss3diags` package and accessed as shown below. Also, if you do not have `r4ss` installed, you will need to install for this tutorial. @@ -39,7 +39,7 @@ You will need to make sure you have the [SS executable](https://github.com/nmfs- r4ss::get_ss3_exe(dir = dir_tmp, version = "v3.30.21") ``` -## R0 Profile +## R~0~ Profile Once you have the 4 input files and SS executable, you can run a likelihood profile as shown below. The first step is to run the model you would like to do a profile for. We will do this in `dir_tmp` and then copy the necessary files to `dir_profile`. It's best to run the profile in a subdirectory of your main model run to keep the output files separate from other diagnostic tests you may run. ```{r} @@ -70,7 +70,7 @@ SS_writestarter(START, dir = dir_profile, overwrite = TRUE, verbose = F) ``` -To run the profile, use `r4ss::profile()` and you will need to specify a partial string of the name of the parameter you are profiling over (in this case "SR_LN" will match with "SR_LN(R0)"), and the vector of values to profile across. The `newctlfile` is the control file that will be adjusted with values from the `profilevec` and can be named anything you prefer, it just needs to match what you put in the starter file for "cltfile". +To run the profile, use `r4ss::profile()` and you will need to specify a partial string of the name of the parameter you are profiling over (in this case "SR_LN" will match with "SR_LN(R0)"), and the vector of values to profile across. The `newctlfile` is the control file that will be adjusted with values from the `profilevec` and can be named anything you prefer, it just needs to match what you put in the starter file for "cltfile". Full documentation of the `profile()` function can be found on the [r4ss website](https://r4ss.github.io/r4ss/reference/profile.html). ```{r} profile(dir = dir_profile, newctlfile = "control_modified.ss", @@ -81,7 +81,7 @@ profile(dir = dir_profile, ``` ## Visualizing Output -Once the profile is finished running, we can visualize the results to determine if there is conflict between the data sources. If all data sources reach a minimum at the same R0 value, this indicates good agreement between them. However, more likely, one or more will be minimized at different R0 values from the global R0 value. This is a sign of conflict between your data sources and may require you to consider data weighting. +Once the profile is finished running, we can visualize the results to determine if there is conflict between the data sources. If all data sources reach a minimum at the same R~0~ value, this indicates good agreement between them. However, more likely, one or more will be minimized at different R0 values from the global R~0~ value. This is a sign of conflict between your data sources and may require you to consider data weighting. ```{r} profile_mods <- SSgetoutput(dirvec = dir_profile, keyvec = 1:length(r0_vec), verbose = FALSE) diff --git a/vignettes/articles/residuals.Rmd b/vignettes/articles/residuals.Rmd index fcd43ff..5cd1d2a 100644 --- a/vignettes/articles/residuals.Rmd +++ b/vignettes/articles/residuals.Rmd @@ -34,17 +34,17 @@ SSplotRunstest(report, add = TRUE) ``` -The output for `SSplotRunstest()` includes a plot of the residuals by fleet and a table with the results from the runs test and 'three-sigma limit' values. In the plots below, the shaded area represents the 'three-sigma limit', or three residual standard deviations from zero. If any of the individual residual points fall outside of the three-sigma limit, they are colored red. Green shaded area indicates the residuals are randomly distributed (p-value >= 0.05) whereas red shaded area indicates the residuals are not randomly distributed. Failing the runs test (p-value < 0.05) can be indicative of some misspecification or conflict with the indices or composition data. +The output for `SSplotRunstest()` includes a plot of the residuals by fleet and a table with the results from the runs test and 'three-sigma limit' values. In the plots above, the shaded area represents the 'three-sigma limit', or three residual standard deviations from zero. If any of the individual residual points fall outside of the three-sigma limit, they are colored red. Green shaded area indicates the residuals are randomly distributed (p-value >= 0.05) whereas red shaded area indicates the residuals are not randomly distributed. Failing the runs test (p-value < 0.05) can be indicative of some misspecification or conflict with the indices or composition data. ### Customizing the Plot -Runs test plots can be customized, some common features that you may want to specify are: +Runs test plots can be customized as needed. Some common features that you may want to specify are: * plotting other data types (default is Index of Abundance) * plotting specific fleet(s) * adjusting the y-axis range -To plot other data types, they can be specified with the `subplots` argument, and the options include "cpue", "len", "age", "size", or "con". "con" is for conditional size-at-age data and "size" is for generalized size composition data. Fleets can be specified using the `indexselect()` function, which takes a vector of the fleet numbers to plot. +Examples of each of these changes are shown below, incrementally making each adjustment. To plot other data types, they can be specified with the `subplots` argument, and the options include "cpue", "len", "age", "size", or "con". "con" is for conditional size-at-age data and "size" is for generalized size composition data. Fleets can be specified using the `indexselect()` function, which takes a vector of the fleet numbers to plot. ```{r} sspar(mfrow = c(2,2))