From 6944510ca9e62fd33a576580dd227ea94b11d1ea Mon Sep 17 00:00:00 2001
From: Kelly Sovacool <kellysovacool@gmail.com>
Date: Thu, 19 May 2022 12:34:17 -0400
Subject: [PATCH 01/10] Tweak doc title for compare_models()

---
 R/compare_models.R    |  7 ++++---
 man/compare_models.Rd | 35 +++++++++++++++++++++++++++++++++++
 2 files changed, 39 insertions(+), 3 deletions(-)
 create mode 100644 man/compare_models.Rd

diff --git a/R/compare_models.R b/R/compare_models.R
index 13ba1a19..e6ef622e 100644
--- a/R/compare_models.R
+++ b/R/compare_models.R
@@ -124,9 +124,10 @@ permute_p_value <- function(merged_data, metric, group_name, group_1, group_2, n
 }
 
 
-#' Compute all pairs of comparisons
-#' calculate permuted p-value across all pairs of group variable.
-#' wrapper for `permute_p_value`
+#' Perform permutation tests to compare the performance metric
+#' across all pairs of a group variable.
+#'
+#' A wrapper for `permute_p_value()`.
 #'
 #' @param merged_data the concatenated performance data from `run_ml`
 #' @param metric metric to compare, must be numeric
diff --git a/man/compare_models.Rd b/man/compare_models.Rd
new file mode 100644
index 00000000..d2ccab3e
--- /dev/null
+++ b/man/compare_models.Rd
@@ -0,0 +1,35 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/compare_models.R
+\name{compare_models}
+\alias{compare_models}
+\title{Perform permutation tests to compare the performance metric
+across all pairs of a group variable.}
+\usage{
+compare_models(merged_data, metric, group_name, nperm = 10000)
+}
+\arguments{
+\item{merged_data}{the concatenated performance data from \code{run_ml}}
+
+\item{metric}{metric to compare, must be numeric}
+
+\item{group_name}{column with group variables to compare}
+
+\item{nperm}{number of permutations, default=10000}
+}
+\value{
+a table of p-values for all pairs of group varible
+}
+\description{
+A wrapper for \code{permute_p_value()}.
+}
+\examples{
+df <- dplyr::tibble(
+  model = c("rf", "rf", "glmnet", "glmnet", "svmRadial", "svmRadial"),
+  AUC = c(.2, 0.3, 0.8, 0.9, 0.85, 0.95)
+)
+set.seed(123)
+compare_models(df, "AUC", "model", nperm = 10)
+}
+\author{
+Courtney R Armour, \email{armourc@umich.edu}
+}

From 81d1605fecf63ed3feb543a784262c4fa8bb8ff3 Mon Sep 17 00:00:00 2001
From: Kelly Sovacool <kellysovacool@gmail.com>
Date: Thu, 19 May 2022 12:34:37 -0400
Subject: [PATCH 02/10] Prepare to release 1.3.0

---
 DESCRIPTION | 6 +++---
 NEWS.md     | 2 +-
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/DESCRIPTION b/DESCRIPTION
index b94c54f8..b8ea3e32 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -1,7 +1,7 @@
 Package: mikropml
 Title: User-Friendly R Package for Supervised Machine Learning Pipelines
-Version: 1.2.2.9000
-Date: 2022-02-03
+Version: 1.3.0
+Date: 2022-05-19
 Authors@R: 
     c(person(given = "Begüm",
              family = "Topçuoğlu",
@@ -91,4 +91,4 @@ VignetteBuilder:
 Encoding: UTF-8
 LazyData: true
 Roxygen: list(markdown = TRUE)
-RoxygenNote: 7.1.1
+RoxygenNote: 7.1.2
diff --git a/NEWS.md b/NEWS.md
index af236574..aebff2c4 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -1,4 +1,4 @@
-# development version
+# mikropml 1.3.0
 
 - mikropml now requires R version 4.1.0 or greater due to an update in the randomForest package (#292). 
 - New function `compare_models()` compares the performance of two models with a permutation test (#295, @courtneyarmour).

From ca1df249a9b76019f29c479bc69c258da95f8476 Mon Sep 17 00:00:00 2001
From: Kelly Sovacool <kellysovacool@gmail.com>
Date: Thu, 19 May 2022 12:34:55 -0400
Subject: [PATCH 03/10] document() & build_site()

---
 NAMESPACE                                     |   4 +
 docs/404.html                                 |   2 +-
 docs/CODE_OF_CONDUCT.html                     |   3 +-
 docs/CONTRIBUTING.html                        |   3 +-
 docs/LICENSE-text.html                        |   2 +-
 docs/LICENSE.html                             |   3 +-
 docs/SUPPORT.html                             |   3 +-
 docs/articles/index.html                      |   2 +-
 docs/articles/introduction.html               | 359 +++++++++++----
 docs/articles/paper.html                      | 417 +++++++++++++-----
 docs/articles/parallel.html                   | 161 ++++---
 docs/articles/preprocess.html                 | 216 ++++++---
 docs/articles/tuning.html                     | 169 +++++--
 docs/authors.html                             |   2 +-
 docs/index.html                               |   9 +-
 docs/news/index.html                          |  46 +-
 docs/pkgdown.yml                              |   4 +-
 docs/pull_request_template.html               |  28 +-
 docs/reference/calc_perf_metrics.html         |   2 +-
 docs/reference/combine_hp_performance.html    |   2 +-
 docs/reference/compare_models.html            | 145 ++++++
 docs/reference/define_cv.html                 |   2 +-
 docs/reference/get_caret_processed_df.html    |   2 +-
 docs/reference/get_difference.html            | 137 ++++++
 docs/reference/get_feature_importance.html    |   4 +-
 docs/reference/get_hp_performance.html        |   2 +-
 docs/reference/get_hyperparams_list.html      |   2 +-
 docs/reference/get_outcome_type.html          |   2 +-
 docs/reference/get_partition_indices.html     |   2 +-
 docs/reference/get_perf_metric_fn.html        |   8 +-
 docs/reference/get_perf_metric_name.html      |   2 +-
 docs/reference/get_performance_tbl.html       |   2 +-
 docs/reference/get_tuning_grid.html           |   2 +-
 docs/reference/group_correlated_features.html |   2 +-
 docs/reference/index.html                     |   7 +-
 docs/reference/mikropml.html                  |   2 +-
 docs/reference/otu_mini_bin.html              |   2 +-
 .../otu_mini_bin_results_glmnet.html          |   2 +-
 docs/reference/otu_mini_bin_results_rf.html   |   2 +-
 .../otu_mini_bin_results_rpart2.html          |   2 +-
 .../otu_mini_bin_results_svmRadial.html       |   2 +-
 .../otu_mini_bin_results_xgbTree.html         |   2 +-
 .../otu_mini_cont_results_glmnet.html         |   2 +-
 .../reference/otu_mini_cont_results_nocv.html |   2 +-
 docs/reference/otu_mini_cv.html               |   2 +-
 docs/reference/otu_mini_multi.html            |   2 +-
 docs/reference/otu_mini_multi_group.html      |   2 +-
 .../otu_mini_multi_results_glmnet.html        |   2 +-
 docs/reference/otu_small.html                 |   2 +-
 docs/reference/permute_p_value.html           | 151 +++++++
 docs/reference/plot_hp_performance.html       |   2 +-
 docs/reference/plot_model_performance.html    |   2 +-
 docs/reference/preprocess_data.html           |   2 +-
 docs/reference/randomize_feature_order.html   |  10 +-
 docs/reference/reexports.html                 |   4 +-
 docs/reference/remove_singleton_columns.html  |   2 +-
 docs/reference/replace_spaces.html            |   2 +-
 docs/reference/run_ml.html                    |   2 +-
 docs/reference/shuffle_group.html             | 141 ++++++
 docs/reference/tidy_perf_data.html            |   2 +-
 docs/reference/train_model.html               |   2 +-
 docs/sitemap.xml                              |  12 +
 man/get_difference.Rd                         |  32 ++
 man/permute_p_value.Rd                        |  47 ++
 man/reexports.Rd                              |   2 +-
 man/shuffle_group.Rd                          |  30 ++
 66 files changed, 1799 insertions(+), 432 deletions(-)
 create mode 100644 docs/reference/compare_models.html
 create mode 100644 docs/reference/get_difference.html
 create mode 100644 docs/reference/permute_p_value.html
 create mode 100644 docs/reference/shuffle_group.html
 create mode 100644 man/get_difference.Rd
 create mode 100644 man/permute_p_value.Rd
 create mode 100644 man/shuffle_group.Rd

diff --git a/NAMESPACE b/NAMESPACE
index 81dd1dd4..8410e1c2 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -6,9 +6,11 @@ export(":=")
 export(.data)
 export(calc_perf_metrics)
 export(combine_hp_performance)
+export(compare_models)
 export(contr.ltfr)
 export(define_cv)
 export(get_caret_processed_df)
+export(get_difference)
 export(get_feature_importance)
 export(get_hp_performance)
 export(get_hyperparams_list)
@@ -19,6 +21,7 @@ export(get_perf_metric_name)
 export(get_performance_tbl)
 export(get_tuning_grid)
 export(group_correlated_features)
+export(permute_p_value)
 export(plot_hp_performance)
 export(plot_model_performance)
 export(preprocess_data)
@@ -26,6 +29,7 @@ export(randomize_feature_order)
 export(remove_singleton_columns)
 export(replace_spaces)
 export(run_ml)
+export(shuffle_group)
 export(tidy_perf_data)
 export(train_model)
 importFrom(MLmetrics,AUC)
diff --git a/docs/404.html b/docs/404.html
index 986332d0..50b47da1 100644
--- a/docs/404.html
+++ b/docs/404.html
@@ -39,7 +39,7 @@
       </button>
       <span class="navbar-brand">
         <a class="navbar-link" href="http://www.schlosslab.org/mikropml/index.html">mikropml</a>
-        <span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="">1.2.2.9000</span>
+        <span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="">1.3.0</span>
       </span>
     </div>
 
diff --git a/docs/CODE_OF_CONDUCT.html b/docs/CODE_OF_CONDUCT.html
index ccf732ce..5bb0afd6 100644
--- a/docs/CODE_OF_CONDUCT.html
+++ b/docs/CODE_OF_CONDUCT.html
@@ -17,7 +17,7 @@
       </button>
       <span class="navbar-brand">
         <a class="navbar-link" href="index.html">mikropml</a>
-        <span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="">1.2.2.9000</span>
+        <span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="">1.3.0</span>
       </span>
     </div>
 
@@ -73,6 +73,7 @@ <h1>Contributor Covenant Code of Conduct</h1>
     </div>
 
 <div id="contributor-covenant-code-of-conduct" class="section level1">
+
 <p>This document was adapted from the <a href="https://tidyverse.tidyverse.org/CODE_OF_CONDUCT.html" class="external-link">Tidyverse Code of Conduct</a>.</p>
 <div class="section level2">
 <h2 id="our-pledge">Our Pledge<a class="anchor" aria-label="anchor" href="#our-pledge"></a></h2>
diff --git a/docs/CONTRIBUTING.html b/docs/CONTRIBUTING.html
index 632efc65..4cc444a7 100644
--- a/docs/CONTRIBUTING.html
+++ b/docs/CONTRIBUTING.html
@@ -17,7 +17,7 @@
       </button>
       <span class="navbar-brand">
         <a class="navbar-link" href="index.html">mikropml</a>
-        <span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="">1.2.2.9000</span>
+        <span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="">1.3.0</span>
       </span>
     </div>
 
@@ -73,6 +73,7 @@ <h1>Contributing to mikropml</h1>
     </div>
 
 <div id="contributing-to-mikropml" class="section level1">
+
 <p>This document was adapted from the <a href="https://tidyverse.tidyverse.org/CONTRIBUTING.html" class="external-link">Tidyverse Contributing guide</a>.</p>
 <div class="section level2">
 <h2 id="fixing-typos">Fixing typos<a class="anchor" aria-label="anchor" href="#fixing-typos"></a></h2>
diff --git a/docs/LICENSE-text.html b/docs/LICENSE-text.html
index c35487fd..8c20f191 100644
--- a/docs/LICENSE-text.html
+++ b/docs/LICENSE-text.html
@@ -17,7 +17,7 @@
       </button>
       <span class="navbar-brand">
         <a class="navbar-link" href="index.html">mikropml</a>
-        <span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="">1.2.2.9000</span>
+        <span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="">1.3.0</span>
       </span>
     </div>
 
diff --git a/docs/LICENSE.html b/docs/LICENSE.html
index 757d2260..2d2d71ff 100644
--- a/docs/LICENSE.html
+++ b/docs/LICENSE.html
@@ -17,7 +17,7 @@
       </button>
       <span class="navbar-brand">
         <a class="navbar-link" href="index.html">mikropml</a>
-        <span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="">1.2.2.9000</span>
+        <span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="">1.3.0</span>
       </span>
     </div>
 
@@ -73,6 +73,7 @@ <h1>MIT License</h1>
     </div>
 
 <div id="mit-license" class="section level1">
+
 <p>Copyright (c) 2019-2021 Begüm D. Topçuoğlu, Zena Lapp, Kelly L. Sovacool, Evan Snitkin, Jenna Wiens, and Patrick D. Schloss</p>
 <p>Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the “Software”), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:</p>
 <p>The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.</p>
diff --git a/docs/SUPPORT.html b/docs/SUPPORT.html
index a297054e..e80f09e2 100644
--- a/docs/SUPPORT.html
+++ b/docs/SUPPORT.html
@@ -17,7 +17,7 @@
       </button>
       <span class="navbar-brand">
         <a class="navbar-link" href="index.html">mikropml</a>
-        <span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="">1.2.2.9000</span>
+        <span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="">1.3.0</span>
       </span>
     </div>
 
@@ -73,6 +73,7 @@ <h1>Getting help with mikropml</h1>
     </div>
 
 <div id="getting-help-with-mikropml" class="section level1">
+
 <p>Thanks for using mikropml! Before filing an issue, there are a few places to explore and pieces to put together to make the process as smooth as possible.</p>
 <div class="section level2">
 <h2 id="make-a-reprex">Make a reprex<a class="anchor" aria-label="anchor" href="#make-a-reprex"></a></h2>
diff --git a/docs/articles/index.html b/docs/articles/index.html
index 11263a96..9e13e8a1 100644
--- a/docs/articles/index.html
+++ b/docs/articles/index.html
@@ -17,7 +17,7 @@
       </button>
       <span class="navbar-brand">
         <a class="navbar-link" href="../index.html">mikropml</a>
-        <span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="">1.2.2.9000</span>
+        <span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="">1.3.0</span>
       </span>
     </div>
 
diff --git a/docs/articles/introduction.html b/docs/articles/introduction.html
index 0b546ce5..57ba736e 100644
--- a/docs/articles/introduction.html
+++ b/docs/articles/introduction.html
@@ -40,7 +40,7 @@
       </button>
       <span class="navbar-brand">
         <a class="navbar-link" href="../index.html">mikropml</a>
-        <span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="">1.2.2.9000</span>
+        <span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="">1.3.0</span>
       </span>
     </div>
 
@@ -100,7 +100,7 @@
 
       
 
-      </header><script src="introduction_files/accessible-code-block-0.0.1/empty-anchor.js"></script><div class="row">
+      </header><div class="row">
   <div class="col-md-9 contents">
     <div class="page-header toc-ignore">
       <h1 data-toc-skip>Introduction to mikropml</h1>
@@ -114,11 +114,20 @@ <h4 data-toc-skip class="author">Zena Lapp</h4>
 
     
     
-<p>The goal of <code>mikropml</code> is to make supervised machine learning (ML) easy for you to run while implementing good practices for machine learning pipelines. All you need to run the ML pipeline is one function: <code><a href="../reference/run_ml.html">run_ml()</a></code>. We’ve selected sensible default arguments related to good practices <span class="citation">(Topçuoğlu et al. 2020; Tang et al. 2020)</span>, but we allow you to change those arguments to tailor <code><a href="../reference/run_ml.html">run_ml()</a></code> to the needs of your data.</p>
-<p>This document takes you through all of the <code><a href="../reference/run_ml.html">run_ml()</a></code> inputs, both required and optional, as well as the outputs.</p>
+<p>The goal of <code>mikropml</code> is to make supervised machine
+learning (ML) easy for you to run while implementing good practices for
+machine learning pipelines. All you need to run the ML pipeline is one
+function: <code><a href="../reference/run_ml.html">run_ml()</a></code>. We’ve selected sensible default
+arguments related to good practices <span class="citation">(Topçuoğlu et
+al. 2020; Tang et al. 2020)</span>, but we allow you to change those
+arguments to tailor <code><a href="../reference/run_ml.html">run_ml()</a></code> to the needs of your data.</p>
+<p>This document takes you through all of the <code><a href="../reference/run_ml.html">run_ml()</a></code>
+inputs, both required and optional, as well as the outputs.</p>
 <p>In summary, you provide:</p>
 <ul>
-<li>A dataset with an outcome column and feature columns (rows are samples; unfortunately we do not support multi-label classification)</li>
+<li>A dataset with an outcome column and feature columns (rows are
+samples; unfortunately we do not support multi-label
+classification)</li>
 <li>Model choice (i.e. method)</li>
 </ul>
 <p>And the function outputs:</p>
@@ -130,7 +139,10 @@ <h4 data-toc-skip class="author">Zena Lapp</h4>
 <div class="section level2">
 <h2 id="its-running-so-slow">It’s running so slow!<a class="anchor" aria-label="anchor" href="#its-running-so-slow"></a>
 </h2>
-<p>Since I assume a lot of you won’t read this entire vignette, I’m going to say this at the beginning. If the <code><a href="../reference/run_ml.html">run_ml()</a></code> function is running super slow, you should consider parallelizing. See <code><a href="../articles/parallel.html">vignette("parallel")</a></code> for examples.</p>
+<p>Since I assume a lot of you won’t read this entire vignette, I’m
+going to say this at the beginning. If the <code><a href="../reference/run_ml.html">run_ml()</a></code>
+function is running super slow, you should consider parallelizing. See
+<code><a href="../articles/parallel.html">vignette("parallel")</a></code> for examples.</p>
 </div>
 <div class="section level2">
 <h2 id="understanding-the-inputs">Understanding the inputs<a class="anchor" aria-label="anchor" href="#understanding-the-inputs"></a>
@@ -138,7 +150,11 @@ <h2 id="understanding-the-inputs">Understanding the inputs<a class="anchor" aria
 <div class="section level3">
 <h3 id="the-input-data">The input data<a class="anchor" aria-label="anchor" href="#the-input-data"></a>
 </h3>
-<p>The input data to <code><a href="../reference/run_ml.html">run_ml()</a></code> is a dataframe where each row is a sample or observation. One column (assumed to be the first) is the outcome of interest, and all of the other columns are the features. We package <code>otu_mini_bin</code> as a small example dataset with <code>mikropml</code>.</p>
+<p>The input data to <code><a href="../reference/run_ml.html">run_ml()</a></code> is a dataframe where each row
+is a sample or observation. One column (assumed to be the first) is the
+outcome of interest, and all of the other columns are the features. We
+package <code>otu_mini_bin</code> as a small example dataset with
+<code>mikropml</code>.</p>
 <div class="sourceCode" id="cb1"><pre class="downlit sourceCode r">
 <code class="sourceCode R"><span class="co">#install.packages("devtools")</span>
 <span class="co">#devtools::install_github("SchlossLab/mikropml")</span>
@@ -158,41 +174,78 @@ <h3 id="the-input-data">The input data<a class="anchor" aria-label="anchor" href
 <span class="co">#&gt; 4       78      255      197</span>
 <span class="co">#&gt; 5        1      537      533</span>
 <span class="co">#&gt; 6      251      155      122</span></code></pre></div>
-<p>Here, <code>dx</code> is the outcome column (normal or cancer), and there are 10 features (<code>Otu00001</code> through <code>Otu00010</code>). Because there are only 2 outcomes, we will be performing binary classification in the majority of the examples below. At the bottom, we will also briefly provide examples of multi-class and continuous outcomes. As you’ll see, you run them in the same way as for binary classification!</p>
-<p>The feature columns are the amount of each <a href="https://en.wikipedia.org/wiki/Operational_taxonomic_unit" class="external-link">Operational Taxonomic Unit (OTU)</a> in microbiome samples from patients with cancer and without cancer. The goal is to predict <code>dx</code>, which stands for diagnosis. This diagnosis can be cancer or not based on an individual’s microbiome. No need to understand exactly what that means, but if you’re interested you can read more about it from the original paper <span class="citation">(Topçuoğlu et al. 2020)</span>.</p>
-<p>For real machine learning applications you’ll need to use more features, but for the purposes of this vignette we’ll stick with this example dataset so everything runs faster.</p>
+<p>Here, <code>dx</code> is the outcome column (normal or cancer), and
+there are 10 features (<code>Otu00001</code> through
+<code>Otu00010</code>). Because there are only 2 outcomes, we will be
+performing binary classification in the majority of the examples below.
+At the bottom, we will also briefly provide examples of multi-class and
+continuous outcomes. As you’ll see, you run them in the same way as for
+binary classification!</p>
+<p>The feature columns are the amount of each <a href="https://en.wikipedia.org/wiki/Operational_taxonomic_unit" class="external-link">Operational
+Taxonomic Unit (OTU)</a> in microbiome samples from patients with cancer
+and without cancer. The goal is to predict <code>dx</code>, which stands
+for diagnosis. This diagnosis can be cancer or not based on an
+individual’s microbiome. No need to understand exactly what that means,
+but if you’re interested you can read more about it from the original
+paper <span class="citation">(Topçuoğlu et al. 2020)</span>.</p>
+<p>For real machine learning applications you’ll need to use more
+features, but for the purposes of this vignette we’ll stick with this
+example dataset so everything runs faster.</p>
 </div>
 <div class="section level3">
 <h3 id="the-methods-we-support">The methods we support<a class="anchor" aria-label="anchor" href="#the-methods-we-support"></a>
 </h3>
-<p>All of the methods we use are supported by a great ML wrapper package <a href="https://topepo.github.io/caret/" class="external-link"><code>caret</code></a>, which we use to train our machine learning models.</p>
+<p>All of the methods we use are supported by a great ML wrapper package
+<a href="https://topepo.github.io/caret/" class="external-link"><code>caret</code></a>, which
+we use to train our machine learning models.</p>
 <p>The methods we have tested (and their backend packages) are:</p>
 <ul>
 <li>Logistic/multiclass/linear regression (<code>"glmnet"</code>)</li>
 <li>Random forest (<code>"rf"</code>)</li>
 <li>Decision tree (<code>"rpart2"</code>)</li>
-<li>Support vector machine with a radial basis kernel (<code>"svmRadial"</code>)</li>
+<li>Support vector machine with a radial basis kernel
+(<code>"svmRadial"</code>)</li>
 <li>xgboost (<code>"xgbTree"</code>)</li>
 </ul>
-<p>For documentation on these methods, as well as many others, you can look at the <a href="https://topepo.github.io/caret/available-models.html" class="external-link">available models</a> (or see <a href="https://topepo.github.io/caret/train-models-by-tag.html" class="external-link">here</a> for a list by tag). While we have not vetted the other models used by <code>caret</code>, our function is general enough that others might work. While we can’t promise that we can help with other models, feel free to <a href="https://github.com/SchlossLab/mikropml/issues" class="external-link">open an issue on GitHub</a> if you have questions about other models and we <em>might</em> be able to help.</p>
-<p>We will first focus on <code>glmnet</code>, which is our default implementation of L2-regularized logistic regression. Then we will cover a few other examples towards the end.</p>
+<p>For documentation on these methods, as well as many others, you can
+look at the <a href="https://topepo.github.io/caret/available-models.html" class="external-link">available
+models</a> (or see <a href="https://topepo.github.io/caret/train-models-by-tag.html" class="external-link">here</a>
+for a list by tag). While we have not vetted the other models used by
+<code>caret</code>, our function is general enough that others might
+work. While we can’t promise that we can help with other models, feel
+free to <a href="https://github.com/SchlossLab/mikropml/issues" class="external-link">open an
+issue on GitHub</a> if you have questions about other models and we
+<em>might</em> be able to help.</p>
+<p>We will first focus on <code>glmnet</code>, which is our default
+implementation of L2-regularized logistic regression. Then we will cover
+a few other examples towards the end.</p>
 </div>
 </div>
 <div class="section level2">
 <h2 id="before-running-ml">Before running ML<a class="anchor" aria-label="anchor" href="#before-running-ml"></a>
 </h2>
-<p>Before you execute <code><a href="../reference/run_ml.html">run_ml()</a></code>, you should consider preprocessing your data, either on your own or with the <code><a href="../reference/preprocess_data.html">preprocess_data()</a></code> function. You can learn more about this in the preprocessing vignette: <code><a href="../articles/preprocess.html">vignette("preprocess")</a></code>.</p>
+<p>Before you execute <code><a href="../reference/run_ml.html">run_ml()</a></code>, you should consider
+preprocessing your data, either on your own or with the
+<code><a href="../reference/preprocess_data.html">preprocess_data()</a></code> function. You can learn more about this
+in the preprocessing vignette: <code><a href="../articles/preprocess.html">vignette("preprocess")</a></code>.</p>
 </div>
 <div class="section level2">
 <h2 id="the-simplest-way-to-run_ml">The simplest way to <code>run_ml()</code><a class="anchor" aria-label="anchor" href="#the-simplest-way-to-run_ml"></a>
 </h2>
-<p>As mentioned above, the minimal input is your dataset (<code>dataset</code>) and the machine learning model you want to use (<code>method</code>).</p>
+<p>As mentioned above, the minimal input is your dataset
+(<code>dataset</code>) and the machine learning model you want to use
+(<code>method</code>).</p>
 <p>You may also want to provide:</p>
 <ul>
-<li>The outcome column name. By default <code><a href="../reference/run_ml.html">run_ml()</a></code> will pick the first column, but it’s best practice to specify the column name explicitly.</li>
-<li>A seed so that the results will be reproducible, and so that you get the same results as those you see here (i.e have the same train/test split).</li>
+<li>The outcome column name. By default <code><a href="../reference/run_ml.html">run_ml()</a></code> will pick
+the first column, but it’s best practice to specify the column name
+explicitly.</li>
+<li>A seed so that the results will be reproducible, and so that you get
+the same results as those you see here (i.e have the same train/test
+split).</li>
 </ul>
-<p>Say we want to use logistic regression, then the method we will use is <code>glmnet</code>. To do so, run the ML pipeline with:</p>
+<p>Say we want to use logistic regression, then the method we will use
+is <code>glmnet</code>. To do so, run the ML pipeline with:</p>
 <div class="sourceCode" id="cb2"><pre class="downlit sourceCode r">
 <code class="sourceCode R"><span class="va">results</span> <span class="op">&lt;-</span> <span class="fu"><a href="../reference/run_ml.html">run_ml</a></span><span class="op">(</span><span class="va">otu_mini_bin</span>,
                   <span class="st">'glmnet'</span>,
@@ -200,16 +253,25 @@ <h2 id="the-simplest-way-to-run_ml">The simplest way to <code>run_ml()</code><a
                   seed <span class="op">=</span> <span class="fl">2019</span><span class="op">)</span></code></pre></div>
 <p>You’ll notice a few things:</p>
 <ol style="list-style-type: decimal">
-<li>It takes a little while to run. This is because of some of the parameters we use.</li>
-<li>There is a message stating that ‘dx’ is being used as the outcome column. This is what we want, but it’s a nice sanity check!</li>
-<li>There was a warning. Don’t worry about this warning right now - it just means that some of the hyperparameters aren’t a good fit - but if you’re interested in learning more, see <code><a href="../articles/tuning.html">vignette("tuning")</a></code>.</li>
+<li>It takes a little while to run. This is because of some of the
+parameters we use.</li>
+<li>There is a message stating that ‘dx’ is being used as the outcome
+column. This is what we want, but it’s a nice sanity check!</li>
+<li>There was a warning. Don’t worry about this warning right now - it
+just means that some of the hyperparameters aren’t a good fit - but if
+you’re interested in learning more, see
+<code><a href="../articles/tuning.html">vignette("tuning")</a></code>.</li>
 </ol>
-<p>Now, let’s dig into the output a bit. The results is a list of 4 things:</p>
+<p>Now, let’s dig into the output a bit. The results is a list of 4
+things:</p>
 <div class="sourceCode" id="cb3"><pre class="downlit sourceCode r">
 <code class="sourceCode R"><span class="fu"><a href="https://rdrr.io/r/base/names.html" class="external-link">names</a></span><span class="op">(</span><span class="va">results</span><span class="op">)</span>
 <span class="co">#&gt; [1] "trained_model"      "test_data"          "performance"       </span>
 <span class="co">#&gt; [4] "feature_importance"</span></code></pre></div>
-<p><code>trained_model</code> is the trained model from <code>caret</code>. There is a bunch of info in this that we won’t get into, because you can learn more from the <code><a href="https://rdrr.io/pkg/caret/man/train.html" class="external-link">caret::train()</a></code> documentation.</p>
+<p><code>trained_model</code> is the trained model from
+<code>caret</code>. There is a bunch of info in this that we won’t get
+into, because you can learn more from the <code><a href="https://rdrr.io/pkg/caret/man/train.html" class="external-link">caret::train()</a></code>
+documentation.</p>
 <div class="sourceCode" id="cb4"><pre class="downlit sourceCode r">
 <code class="sourceCode R"><span class="fu"><a href="https://rdrr.io/r/base/names.html" class="external-link">names</a></span><span class="op">(</span><span class="va">results</span><span class="op">$</span><span class="va">trained_model</span><span class="op">)</span>
 <span class="co">#&gt;  [1] "method"       "modelInfo"    "modelType"    "results"      "pred"        </span>
@@ -217,7 +279,13 @@ <h2 id="the-simplest-way-to-run_ml">The simplest way to <code>run_ml()</code><a
 <span class="co">#&gt; [11] "finalModel"   "preProcess"   "trainingData" "ptype"        "resample"    </span>
 <span class="co">#&gt; [16] "resampledCM"  "perfNames"    "maximize"     "yLimits"      "times"       </span>
 <span class="co">#&gt; [21] "levels"       "terms"        "coefnames"    "xlevels"</span></code></pre></div>
-<p><code>test_data</code> is the partition of the dataset that was used for testing. In machine learning, it’s always important to have a held-out test dataset that is not used in the training stage. In this pipeline we do that using <code><a href="../reference/run_ml.html">run_ml()</a></code> where we split your data into training and testing sets. The training data are used to build the model (e.g. tune hyperparameters, learn the data) and the test data are used to evaluate how well the model performs.</p>
+<p><code>test_data</code> is the partition of the dataset that was used
+for testing. In machine learning, it’s always important to have a
+held-out test dataset that is not used in the training stage. In this
+pipeline we do that using <code><a href="../reference/run_ml.html">run_ml()</a></code> where we split your data
+into training and testing sets. The training data are used to build the
+model (e.g. tune hyperparameters, learn the data) and the test data are
+used to evaluate how well the model performs.</p>
 <div class="sourceCode" id="cb5"><pre class="downlit sourceCode r">
 <code class="sourceCode R"><span class="fu"><a href="https://rdrr.io/r/utils/head.html" class="external-link">head</a></span><span class="op">(</span><span class="va">results</span><span class="op">$</span><span class="va">test_data</span><span class="op">)</span>
 <span class="co">#&gt;        dx Otu00009 Otu00005 Otu00010 Otu00001 Otu00008 Otu00004 Otu00003</span>
@@ -234,7 +302,10 @@ <h2 id="the-simplest-way-to-run_ml">The simplest way to <code>run_ml()</code><a
 <span class="co">#&gt; 17      357      253      341</span>
 <span class="co">#&gt; 27       25      322        5</span>
 <span class="co">#&gt; 30      179        6       30</span></code></pre></div>
-<p><code>performance</code> is a dataframe of (mainly) performance metrics (1 column for cross-validation performance metric, several for test performance metrics, and 2 columns at the end with ML method and seed):</p>
+<p><code>performance</code> is a dataframe of (mainly) performance
+metrics (1 column for cross-validation performance metric, several for
+test performance metrics, and 2 columns at the end with ML method and
+seed):</p>
 <div class="sourceCode" id="cb6"><pre class="downlit sourceCode r">
 <code class="sourceCode R"><span class="va">results</span><span class="op">$</span><span class="va">performance</span>
 <span class="co">#&gt; <span style="color: #949494;"># A tibble: 1 × 17</span></span>
@@ -244,11 +315,33 @@ <h2 id="the-simplest-way-to-run_ml">The simplest way to <code>run_ml()</code><a
 <span class="co">#&gt; <span style="color: #949494;"># … with 8 more variables: Pos_Pred_Value &lt;dbl&gt;, Neg_Pred_Value &lt;dbl&gt;,</span></span>
 <span class="co">#&gt; <span style="color: #949494;">#   Precision &lt;dbl&gt;, Recall &lt;dbl&gt;, Detection_Rate &lt;dbl&gt;,</span></span>
 <span class="co">#&gt; <span style="color: #949494;">#   Balanced_Accuracy &lt;dbl&gt;, method &lt;chr&gt;, seed &lt;dbl&gt;</span></span></code></pre></div>
-<p>When using logistic regression for binary classification, area under the receiver-operator characteristic curve (AUC) is a useful metric to evaluate model performance. Because of that, it’s the default that we use for <code>mikropml</code>. However, it is crucial to evaluate your model performance using multiple metrics. Below you can find more information about other performance metrics and how to use them in our package.</p>
-<p><code>cv_metric_AUC</code> is the AUC for the cross-validation folds for the training data. This gives us a sense of how well the model performs on the training data.</p>
-<p>Most of the other columns are performance metrics for the test data — the data that wasn’t used to build the model. Here, you can see that the AUC for the test data is not much above 0.5, suggesting that this model does not predict much better than chance, and that the model is overfit because the cross-validation AUC (<code>cv_metric_AUC</code>, measured during training) is much higher than the testing AUC. This isn’t too surprising since we’re using so few features with this example dataset, so don’t be discouraged. The default option also provides a number of other performance metrics that you might be interested in, including area under the precision-recall curve (prAUC).</p>
-<p>The last columns of <code>results$performance</code> are the method and seed (if you set one) to help with combining results from multiple runs (see <code><a href="../articles/parallel.html">vignette("parallel")</a></code>).</p>
-<p><code>feature_importance</code> has information about feature importance values if <code>find_feature_importance = TRUE</code> (the default is <code>FALSE</code>). Since we used the defaults, there’s nothing here:</p>
+<p>When using logistic regression for binary classification, area under
+the receiver-operator characteristic curve (AUC) is a useful metric to
+evaluate model performance. Because of that, it’s the default that we
+use for <code>mikropml</code>. However, it is crucial to evaluate your
+model performance using multiple metrics. Below you can find more
+information about other performance metrics and how to use them in our
+package.</p>
+<p><code>cv_metric_AUC</code> is the AUC for the cross-validation folds
+for the training data. This gives us a sense of how well the model
+performs on the training data.</p>
+<p>Most of the other columns are performance metrics for the test data —
+the data that wasn’t used to build the model. Here, you can see that the
+AUC for the test data is not much above 0.5, suggesting that this model
+does not predict much better than chance, and that the model is overfit
+because the cross-validation AUC (<code>cv_metric_AUC</code>, measured
+during training) is much higher than the testing AUC. This isn’t too
+surprising since we’re using so few features with this example dataset,
+so don’t be discouraged. The default option also provides a number of
+other performance metrics that you might be interested in, including
+area under the precision-recall curve (prAUC).</p>
+<p>The last columns of <code>results$performance</code> are the method
+and seed (if you set one) to help with combining results from multiple
+runs (see <code><a href="../articles/parallel.html">vignette("parallel")</a></code>).</p>
+<p><code>feature_importance</code> has information about feature
+importance values if <code>find_feature_importance = TRUE</code> (the
+default is <code>FALSE</code>). Since we used the defaults, there’s
+nothing here:</p>
 <div class="sourceCode" id="cb7"><pre class="downlit sourceCode r">
 <code class="sourceCode R"><span class="va">results</span><span class="op">$</span><span class="va">feature_importance</span>
 <span class="co">#&gt; [1] "Skipped feature importance"</span></code></pre></div>
@@ -256,17 +349,24 @@ <h2 id="the-simplest-way-to-run_ml">The simplest way to <code>run_ml()</code><a
 <div class="section level2">
 <h2 id="customizing-parameters">Customizing parameters<a class="anchor" aria-label="anchor" href="#customizing-parameters"></a>
 </h2>
-<p>There are a few arguments that allow you to change how you execute <code><a href="../reference/run_ml.html">run_ml()</a></code>. We’ve chosen reasonable defaults for you, but we encourage you to change these if you think something else would be better for your data.</p>
+<p>There are a few arguments that allow you to change how you execute
+<code><a href="../reference/run_ml.html">run_ml()</a></code>. We’ve chosen reasonable defaults for you, but we
+encourage you to change these if you think something else would be
+better for your data.</p>
 <div class="section level3">
-<h3 id="changing-kfold-cv_times-and-training_frac">Changing <code>kfold</code>, <code>cv_times</code>, and <code>training_frac</code><a class="anchor" aria-label="anchor" href="#changing-kfold-cv_times-and-training_frac"></a>
+<h3 id="changing-kfold-cv_times-and-training_frac">Changing <code>kfold</code>, <code>cv_times</code>, and
+<code>training_frac</code><a class="anchor" aria-label="anchor" href="#changing-kfold-cv_times-and-training_frac"></a>
 </h3>
 <ul>
 <li>
-<code>kfold</code>: The number of folds to run for cross-validation (default: 5).</li>
+<code>kfold</code>: The number of folds to run for cross-validation
+(default: 5).</li>
 <li>
-<code>cv_times</code>: The number of times to run repeated cross-validation (default: 100).</li>
+<code>cv_times</code>: The number of times to run repeated
+cross-validation (default: 100).</li>
 <li>
-<code>training_frac</code>: The fraction of data for the training set (default: 0.8). The rest of the data is used for testing.</li>
+<code>training_frac</code>: The fraction of data for the training
+set (default: 0.8). The rest of the data is used for testing.</li>
 </ul>
 <p>Here’s an example where we change some of the default parameters:</p>
 <div class="sourceCode" id="cb8"><pre class="downlit sourceCode r">
@@ -280,17 +380,32 @@ <h3 id="changing-kfold-cv_times-and-training_frac">Changing <code>kfold</code>,
 <span class="co">#&gt; Training the model...</span>
 <span class="co">#&gt; Loading required package: ggplot2</span>
 <span class="co">#&gt; Loading required package: lattice</span>
+<span class="co">#&gt; </span>
+<span class="co">#&gt; Attaching package: 'caret'</span>
+<span class="co">#&gt; The following object is masked from 'package:mikropml':</span>
+<span class="co">#&gt; </span>
+<span class="co">#&gt;     compare_models</span>
 <span class="co">#&gt; Warning in (function (w) : `caret::train()` issued the following warning:</span>
 <span class="co">#&gt;  </span>
 <span class="co">#&gt; simpleWarning in nominalTrainWorkflow(x = x, y = y, wts = weights, info = trainInfo, : There were missing values in resampled performance measures.</span>
 <span class="co">#&gt; </span>
 <span class="co">#&gt; This warning usually means that the model didn't converge in some cross-validation folds because it is predicting something close to a constant. As a result, certain performance metrics can't be calculated. This suggests that some of the hyperparameters chosen are doing very poorly.</span>
 <span class="co">#&gt; Training complete.</span></code></pre></div>
-<p>You might have noticed that this one ran faster — that’s because we reduced <code>kfold</code> and <code>cv_times</code>. This is okay for testing things out and may even be necessary for smaller datasets. But in general it may be better to have larger numbers for these parameters; we think the defaults are a good starting point <span class="citation">(Topçuoğlu et al. 2020)</span>.</p>
+<p>You might have noticed that this one ran faster — that’s because we
+reduced <code>kfold</code> and <code>cv_times</code>. This is okay for
+testing things out and may even be necessary for smaller datasets. But
+in general it may be better to have larger numbers for these parameters;
+we think the defaults are a good starting point <span class="citation">(Topçuoğlu et al. 2020)</span>.</p>
 <div class="section level4">
 <h4 id="custom-training-indices">Custom training indices<a class="anchor" aria-label="anchor" href="#custom-training-indices"></a>
 </h4>
-<p>When <code>training_frac</code> is a fraction between 0 and 1, a random sample of observations in the dataset are chosen for the training set to satisfy the <code>training_frac</code>. However, in some cases you might wish to control exactly which observations are in the training set. You can instead assign <code>training_frac</code> a vector of indices that correspond to which rows of the dataset should go in the training set (all remaining sequences will go in the testing set).</p>
+<p>When <code>training_frac</code> is a fraction between 0 and 1, a
+random sample of observations in the dataset are chosen for the training
+set to satisfy the <code>training_frac</code>. However, in some cases
+you might wish to control exactly which observations are in the training
+set. You can instead assign <code>training_frac</code> a vector of
+indices that correspond to which rows of the dataset should go in the
+training set (all remaining sequences will go in the testing set).</p>
 <div class="sourceCode" id="cb9"><pre class="downlit sourceCode r">
 <code class="sourceCode R"><span class="va">n_obs</span> <span class="op">&lt;-</span> <span class="va">otu_mini_bin</span> <span class="op"><a href="https://magrittr.tidyverse.org/reference/pipe.html" class="external-link">%&gt;%</a></span> <span class="fu"><a href="https://rdrr.io/r/base/nrow.html" class="external-link">nrow</a></span><span class="op">(</span><span class="op">)</span>
 <span class="va">training_size</span> <span class="op">&lt;-</span> <span class="fl">0.8</span> <span class="op">*</span> <span class="va">n_obs</span>
@@ -312,10 +427,20 @@ <h4 id="custom-training-indices">Custom training indices<a class="anchor" aria-l
 <div class="section level3">
 <h3 id="changing-the-performance-metric">Changing the performance metric<a class="anchor" aria-label="anchor" href="#changing-the-performance-metric"></a>
 </h3>
-<p>There are two arguments that allow you to change what performance metric to use for model evaluation, and what performance metrics to calculate using the test data.</p>
-<p><code>perf_metric_function</code> is the function used to calculate the performance metrics.</p>
-<p>The default for classification is <code><a href="https://rdrr.io/pkg/caret/man/postResample.html" class="external-link">caret::multiClassSummary()</a></code> and the default for regression is <code><a href="https://rdrr.io/pkg/caret/man/postResample.html" class="external-link">caret::defaultSummary()</a></code>. We’d suggest not changing this unless you really know what you’re doing.</p>
-<p><code>perf_metric_name</code> is the column name from the output of <code>perf_metric_function</code>. We chose reasonable defaults (AUC for binary, logLoss for multiclass, and RMSE for continuous), but the default functions calculate a bunch of different performance metrics, so you can choose a different one if you’d like.</p>
+<p>There are two arguments that allow you to change what performance
+metric to use for model evaluation, and what performance metrics to
+calculate using the test data.</p>
+<p><code>perf_metric_function</code> is the function used to calculate
+the performance metrics.</p>
+<p>The default for classification is
+<code><a href="https://rdrr.io/pkg/caret/man/postResample.html" class="external-link">caret::multiClassSummary()</a></code> and the default for regression
+is <code><a href="https://rdrr.io/pkg/caret/man/postResample.html" class="external-link">caret::defaultSummary()</a></code>. We’d suggest not changing this
+unless you really know what you’re doing.</p>
+<p><code>perf_metric_name</code> is the column name from the output of
+<code>perf_metric_function</code>. We chose reasonable defaults (AUC for
+binary, logLoss for multiclass, and RMSE for continuous), but the
+default functions calculate a bunch of different performance metrics, so
+you can choose a different one if you’d like.</p>
 <p>The default performance metrics available for classification are:</p>
 <pre><code><span class="co">#&gt;  [1] "logLoss"                "AUC"                    "prAUC"                 </span>
 <span class="co">#&gt;  [4] "Accuracy"               "Kappa"                  "Mean_F1"               </span>
@@ -339,7 +464,8 @@ <h3 id="changing-the-performance-metric">Changing the performance metric<a class
 <span class="co">#&gt; </span>
 <span class="co">#&gt; This warning usually means that the model didn't converge in some cross-validation folds because it is predicting something close to a constant. As a result, certain performance metrics can't be calculated. This suggests that some of the hyperparameters chosen are doing very poorly.</span>
 <span class="co">#&gt; Training complete.</span></code></pre></div>
-<p>You’ll see that the cross-validation metric is prAUC, instead of the default AUC:</p>
+<p>You’ll see that the cross-validation metric is prAUC, instead of the
+default AUC:</p>
 <div class="sourceCode" id="cb13"><pre class="downlit sourceCode r">
 <code class="sourceCode R"><span class="va">results_pr</span><span class="op">$</span><span class="va">performance</span>
 <span class="co">#&gt; <span style="color: #949494;"># A tibble: 1 × 17</span></span>
@@ -353,8 +479,16 @@ <h3 id="changing-the-performance-metric">Changing the performance metric<a class
 <div class="section level3">
 <h3 id="using-groups">Using groups<a class="anchor" aria-label="anchor" href="#using-groups"></a>
 </h3>
-<p>The optional <code>groups</code> is a vector of groups to keep together when splitting the data into train and test sets and for cross-validation. Sometimes it’s important to split up the data based on a grouping instead of just randomly. This allows you to control for similarities within groups that you don’t want to skew your predictions (i.e. batch effects). For example, with biological data you may have samples collected from multiple hospitals, and you might like to keep observations from the same hospital in the same partition.</p>
-<p>Here’s an example where we split the data into train/test sets based on groups:</p>
+<p>The optional <code>groups</code> is a vector of groups to keep
+together when splitting the data into train and test sets and for
+cross-validation. Sometimes it’s important to split up the data based on
+a grouping instead of just randomly. This allows you to control for
+similarities within groups that you don’t want to skew your predictions
+(i.e. batch effects). For example, with biological data you may have
+samples collected from multiple hospitals, and you might like to keep
+observations from the same hospital in the same partition.</p>
+<p>Here’s an example where we split the data into train/test sets based
+on groups:</p>
 <div class="sourceCode" id="cb14"><pre class="downlit sourceCode r">
 <code class="sourceCode R"><span class="co"># make random groups</span>
 <span class="fu"><a href="https://rdrr.io/r/base/Random.html" class="external-link">set.seed</a></span><span class="op">(</span><span class="fl">2019</span><span class="op">)</span>
@@ -372,12 +506,28 @@ <h3 id="using-groups">Using groups<a class="anchor" aria-label="anchor" href="#u
 <span class="co">#&gt; Groups will be kept together in CV partitions</span>
 <span class="co">#&gt; Training the model...</span>
 <span class="co">#&gt; Training complete.</span></code></pre></div>
-<p>The one difference here is <code><a href="../reference/run_ml.html">run_ml()</a></code> will report how much of the data is in the training set if you run the above code chunk. This can be a little finicky depending on how many samples and groups you have. This is because it won’t be exactly what you specify with <code>training_frac</code>, since you have to include all of one group in either the training set <em>or</em> the test set.</p>
+<p>The one difference here is <code><a href="../reference/run_ml.html">run_ml()</a></code> will report how much
+of the data is in the training set if you run the above code chunk. This
+can be a little finicky depending on how many samples and groups you
+have. This is because it won’t be exactly what you specify with
+<code>training_frac</code>, since you have to include all of one group
+in either the training set <em>or</em> the test set.</p>
 <div class="section level4">
 <h4 id="controling-how-groups-are-assigned-to-partitions">Controling how groups are assigned to partitions<a class="anchor" aria-label="anchor" href="#controling-how-groups-are-assigned-to-partitions"></a>
 </h4>
-<p>When you use the <code>groups</code> parameter as above, by default <code><a href="../reference/run_ml.html">run_ml()</a></code> will assume that you want all of the observations from each group to be placed in the same partition of the train/test split. This makes sense when you want to use groups to control for batch effects. However, in some cases you might prefer to control exactly which groups end up in which partition, and you might even be okay with some observations from the same group being assigned to different partitions.</p>
-<p>For example, say you want groups A and B to be used for training, C and D for testing, and you don’t have a preference for what happens to the other groups. You can give the <code>group_partitions</code> parameter a named list to specify which groups should go in the training set and which should go in the testing set.</p>
+<p>When you use the <code>groups</code> parameter as above, by default
+<code><a href="../reference/run_ml.html">run_ml()</a></code> will assume that you want all of the observations
+from each group to be placed in the same partition of the train/test
+split. This makes sense when you want to use groups to control for batch
+effects. However, in some cases you might prefer to control exactly
+which groups end up in which partition, and you might even be okay with
+some observations from the same group being assigned to different
+partitions.</p>
+<p>For example, say you want groups A and B to be used for training, C
+and D for testing, and you don’t have a preference for what happens to
+the other groups. You can give the <code>group_partitions</code>
+parameter a named list to specify which groups should go in the training
+set and which should go in the testing set.</p>
 <div class="sourceCode" id="cb15"><pre class="downlit sourceCode r">
 <code class="sourceCode R"><span class="va">results_grp_part</span> <span class="op">&lt;-</span> <span class="fu"><a href="../reference/run_ml.html">run_ml</a></span><span class="op">(</span><span class="va">otu_mini_bin</span>, 
                       <span class="st">'glmnet'</span>, 
@@ -395,8 +545,13 @@ <h4 id="controling-how-groups-are-assigned-to-partitions">Controling how groups
 <span class="co">#&gt; Groups will not be kept together in CV partitions because the number of groups in the training set is not larger than `kfold`</span>
 <span class="co">#&gt; Training the model...</span>
 <span class="co">#&gt; Training complete.</span></code></pre></div>
-<p>In the above case, all observations from A &amp; B will be used for training, all from C &amp; D will be used for testing, and the remaining groups will be randomly assigned to one or the other to satisfy the <code>training_frac</code> as closely as possible.</p>
-<p>In another scenario, maybe you want only groups A through F to be used for training, but you also want to allow other observations not selected for training from A through F to be used for testing:</p>
+<p>In the above case, all observations from A &amp; B will be used for
+training, all from C &amp; D will be used for testing, and the remaining
+groups will be randomly assigned to one or the other to satisfy the
+<code>training_frac</code> as closely as possible.</p>
+<p>In another scenario, maybe you want only groups A through F to be
+used for training, but you also want to allow other observations not
+selected for training from A through F to be used for testing:</p>
 <div class="sourceCode" id="cb16"><pre class="downlit sourceCode r">
 <code class="sourceCode R"><span class="va">results_grp_trainA</span> <span class="op">&lt;-</span> <span class="fu"><a href="../reference/run_ml.html">run_ml</a></span><span class="op">(</span><span class="va">otu_mini_bin</span>, 
                       <span class="st">'glmnet'</span>, 
@@ -415,14 +570,24 @@ <h4 id="controling-how-groups-are-assigned-to-partitions">Controling how groups
 <span class="co">#&gt; Groups will be kept together in CV partitions</span>
 <span class="co">#&gt; Training the model...</span>
 <span class="co">#&gt; Training complete.</span></code></pre></div>
-<p>If you need even more control than this, take a look at <a href="#custom-training-indices">setting custom training indices</a>. You might also prefer to provide your own train control scheme with the <code>cross_val</code> parameter in <code><a href="../reference/run_ml.html">run_ml()</a></code>.</p>
+<p>If you need even more control than this, take a look at <a href="#custom-training-indices">setting custom training indices</a>. You
+might also prefer to provide your own train control scheme with the
+<code>cross_val</code> parameter in <code><a href="../reference/run_ml.html">run_ml()</a></code>.</p>
 </div>
 </div>
 </div>
 <div class="section level2">
 <h2 id="finding-feature-importance">Finding feature importance<a class="anchor" aria-label="anchor" href="#finding-feature-importance"></a>
 </h2>
-<p>To find which features are contributing to predictive power, you can use <code>find_feature_importance = TRUE</code>. How we use permutation importance to determine feature importance is described in <span class="citation">(Topçuoğlu et al. 2020)</span>. Briefly, it permutes each of the features individually (or correlated ones together) and evaluates how much the performance metric decreases. The more performance decreases when the feature is randomly shuffled, the more important that feature is. The default is <code>FALSE</code> because it takes a while to run and is only useful if you want to know what features are important in predicting your outcome.</p>
+<p>To find which features are contributing to predictive power, you can
+use <code>find_feature_importance = TRUE</code>. How we use permutation
+importance to determine feature importance is described in <span class="citation">(Topçuoğlu et al. 2020)</span>. Briefly, it permutes
+each of the features individually (or correlated ones together) and
+evaluates how much the performance metric decreases. The more
+performance decreases when the feature is randomly shuffled, the more
+important that feature is. The default is <code>FALSE</code> because it
+takes a while to run and is only useful if you want to know what
+features are important in predicting your outcome.</p>
 <p>Let’s look at some feature importance results:</p>
 <div class="sourceCode" id="cb17"><pre class="downlit sourceCode r">
 <code class="sourceCode R"><span class="va">results_imp</span> <span class="op">&lt;-</span> <span class="fu"><a href="../reference/run_ml.html">run_ml</a></span><span class="op">(</span><span class="va">otu_mini_bin</span>,
@@ -459,11 +624,16 @@ <h2 id="finding-feature-importance">Finding feature importance<a class="anchor"
 <p>There are several columns:</p>
 <ol style="list-style-type: decimal">
 <li>
-<code>perf_metric</code>: The performance value of the permuted feature.</li>
+<code>perf_metric</code>: The performance value of the permuted
+feature.</li>
 <li>
-<code>perf_metric_diff</code>: The difference between the performance for the actual and permuted data (i.e. test performance minus permuted performance). Features with a larger <code>perf_metric_diff</code> are more important.</li>
+<code>perf_metric_diff</code>: The difference between the
+performance for the actual and permuted data (i.e. test performance
+minus permuted performance). Features with a larger
+<code>perf_metric_diff</code> are more important.</li>
 <li>
-<code>pvalue</code>: the probability of obtaining the actual performance value under the null hypothesis.</li>
+<code>pvalue</code>: the probability of obtaining the actual
+performance value under the null hypothesis.</li>
 <li>
 <code>names</code>: The feature that was permuted.</li>
 <li>
@@ -473,8 +643,15 @@ <h2 id="finding-feature-importance">Finding feature importance<a class="anchor"
 <li>
 <code>seed</code>: The seed (if set).</li>
 </ol>
-<p>As you can see here, the differences are negligible (close to zero), which makes sense since our model isn’t great. If you’re interested in feature importance, it’s especially useful to run multiple different train/test splits, as shown in our <a href="https://github.com/SchlossLab/mikropml-snakemake-workflow/" class="external-link">example snakemake workflow</a>.</p>
-<p>You can also choose to permute correlated features together using <code>corr_thresh</code> (default: 1). Any features that are above the correlation threshold are permuted together; i.e. perfectly correlated features are permuted together when using the default value.</p>
+<p>As you can see here, the differences are negligible (close to zero),
+which makes sense since our model isn’t great. If you’re interested in
+feature importance, it’s especially useful to run multiple different
+train/test splits, as shown in our <a href="https://github.com/SchlossLab/mikropml-snakemake-workflow/" class="external-link">example
+snakemake workflow</a>.</p>
+<p>You can also choose to permute correlated features together using
+<code>corr_thresh</code> (default: 1). Any features that are above the
+correlation threshold are permuted together; i.e. perfectly correlated
+features are permuted together when using the default value.</p>
 <div class="sourceCode" id="cb19"><pre class="downlit sourceCode r">
 <code class="sourceCode R"><span class="va">results_imp_corr</span> <span class="op">&lt;-</span> <span class="fu"><a href="../reference/run_ml.html">run_ml</a></span><span class="op">(</span><span class="va">otu_mini_bin</span>,
                            <span class="st">'glmnet'</span>,
@@ -505,19 +682,32 @@ <h2 id="finding-feature-importance">Finding feature importance<a class="anchor"
 <span class="co">#&gt; 1 glmnet              AUC 2019</span>
 <span class="co">#&gt; 2 glmnet              AUC 2019</span>
 <span class="co">#&gt; 3 glmnet              AUC 2019</span></code></pre></div>
-<p>You can see which features were permuted together in the <code>names</code> column. Here all 3 features were permuted together (which doesn’t really make sense, but it’s just an example).</p>
-<p>If you previously executed <code><a href="../reference/run_ml.html">run_ml()</a></code> without feature importance but now wish to find feature importance after the fact, see the example code in the <code><a href="../reference/get_feature_importance.html">get_feature_importance()</a></code> documentation.</p>
-<p><code><a href="../reference/get_feature_importance.html">get_feature_importance()</a></code> can show a live progress bar, see <code><a href="../articles/parallel.html">vignette("parallel")</a></code> for examples.</p>
+<p>You can see which features were permuted together in the
+<code>names</code> column. Here all 3 features were permuted together
+(which doesn’t really make sense, but it’s just an example).</p>
+<p>If you previously executed <code><a href="../reference/run_ml.html">run_ml()</a></code> without feature
+importance but now wish to find feature importance after the fact, see
+the example code in the <code><a href="../reference/get_feature_importance.html">get_feature_importance()</a></code>
+documentation.</p>
+<p><code><a href="../reference/get_feature_importance.html">get_feature_importance()</a></code> can show a live progress bar,
+see <code><a href="../articles/parallel.html">vignette("parallel")</a></code> for examples.</p>
 </div>
 <div class="section level2">
-<h2 id="tuning-hyperparameters-using-the-hyperparameter-argument">Tuning hyperparameters (using the <code>hyperparameter</code> argument)<a class="anchor" aria-label="anchor" href="#tuning-hyperparameters-using-the-hyperparameter-argument"></a>
+<h2 id="tuning-hyperparameters-using-the-hyperparameter-argument">Tuning hyperparameters (using the <code>hyperparameter</code>
+argument)<a class="anchor" aria-label="anchor" href="#tuning-hyperparameters-using-the-hyperparameter-argument"></a>
 </h2>
-<p>This is important, so we have a whole vignette about them. The bottom line is we provide default hyperparameters that you can start with, but it’s important to tune your hyperparameters. For more information about what the default hyperparameters are, and how to tune hyperparameters, see <code><a href="../articles/tuning.html">vignette("tuning")</a></code>.</p>
+<p>This is important, so we have a whole vignette about them. The bottom
+line is we provide default hyperparameters that you can start with, but
+it’s important to tune your hyperparameters. For more information about
+what the default hyperparameters are, and how to tune hyperparameters,
+see <code><a href="../articles/tuning.html">vignette("tuning")</a></code>.</p>
 </div>
 <div class="section level2">
 <h2 id="other-models">Other models<a class="anchor" aria-label="anchor" href="#other-models"></a>
 </h2>
-<p>Here are examples of how to train and evaluate other models. The output for all of them is very similar, so we won’t go into those details.</p>
+<p>Here are examples of how to train and evaluate other models. The
+output for all of them is very similar, so we won’t go into those
+details.</p>
 <div class="section level3">
 <h3 id="random-forest">Random forest<a class="anchor" aria-label="anchor" href="#random-forest"></a>
 </h3>
@@ -526,7 +716,11 @@ <h3 id="random-forest">Random forest<a class="anchor" aria-label="anchor" href="
                      <span class="st">'rf'</span>,
                      cv_times <span class="op">=</span> <span class="fl">5</span>,
                      seed <span class="op">=</span> <span class="fl">2019</span><span class="op">)</span></code></pre></div>
-<p>You can also change the number of trees to use for random forest (<code>ntree</code>; default: 1000). This can’t be tuned using <code>rf</code> package implementation of random forest. Please refer to <code>caret</code> documentation if you are interested in other packages with random forest implementations.</p>
+<p>You can also change the number of trees to use for random forest
+(<code>ntree</code>; default: 1000). This can’t be tuned using
+<code>rf</code> package implementation of random forest. Please refer to
+<code>caret</code> documentation if you are interested in other packages
+with random forest implementations.</p>
 <div class="sourceCode" id="cb21"><pre class="downlit sourceCode r">
 <code class="sourceCode R"><span class="va">results_rf_nt</span> <span class="op">&lt;-</span> <span class="fu"><a href="../reference/run_ml.html">run_ml</a></span><span class="op">(</span><span class="va">otu_mini_bin</span>,
                         <span class="st">'rf'</span>,
@@ -551,7 +745,8 @@ <h3 id="svm">SVM<a class="anchor" aria-label="anchor" href="#svm"></a>
                       <span class="st">'svmRadial'</span>,
                       cv_times <span class="op">=</span> <span class="fl">5</span>,
                       seed <span class="op">=</span> <span class="fl">2019</span><span class="op">)</span></code></pre></div>
-<p>If you get a message “maximum number of iterations reached”, see <a href="https://github.com/topepo/caret/issues/425" class="external-link">this issue</a> in caret.</p>
+<p>If you get a message “maximum number of iterations reached”, see <a href="https://github.com/topepo/caret/issues/425" class="external-link">this issue</a> in
+caret.</p>
 </div>
 </div>
 <div class="section level2">
@@ -560,7 +755,8 @@ <h2 id="other-data">Other data<a class="anchor" aria-label="anchor" href="#other
 <div class="section level3">
 <h3 id="multiclass-data">Multiclass data<a class="anchor" aria-label="anchor" href="#multiclass-data"></a>
 </h3>
-<p>We provide <code>otu_mini_multi</code> with a multiclass outcome (three or more outcomes):</p>
+<p>We provide <code>otu_mini_multi</code> with a multiclass outcome
+(three or more outcomes):</p>
 <div class="sourceCode" id="cb24"><pre class="downlit sourceCode r">
 <code class="sourceCode R"><span class="va">otu_mini_multi</span> <span class="op"><a href="https://magrittr.tidyverse.org/reference/pipe.html" class="external-link">%&gt;%</a></span> <span class="fu">dplyr</span><span class="fu">::</span><span class="fu"><a href="https://dplyr.tidyverse.org/reference/pull.html" class="external-link">pull</a></span><span class="op">(</span><span class="st">'dx'</span><span class="op">)</span> <span class="op"><a href="https://magrittr.tidyverse.org/reference/pipe.html" class="external-link">%&gt;%</a></span> <span class="fu"><a href="https://rdrr.io/r/base/unique.html" class="external-link">unique</a></span><span class="op">(</span><span class="op">)</span>
 <span class="co">#&gt; [1] "adenoma"   "carcinoma" "normal"</span></code></pre></div>
@@ -570,7 +766,8 @@ <h3 id="multiclass-data">Multiclass data<a class="anchor" aria-label="anchor" hr
                         outcome_colname <span class="op">=</span> <span class="st">"dx"</span>,
                         seed <span class="op">=</span> <span class="fl">2019</span>
 <span class="op">)</span></code></pre></div>
-<p>The performance metrics are slightly different, but the format of everything else is the same:</p>
+<p>The performance metrics are slightly different, but the format of
+everything else is the same:</p>
 <div class="sourceCode" id="cb26"><pre class="downlit sourceCode r">
 <code class="sourceCode R"><span class="va">results_multi</span><span class="op">$</span><span class="va">performance</span>
 <span class="co">#&gt; <span style="color: #949494;"># A tibble: 1 × 17</span></span>
@@ -585,13 +782,15 @@ <h3 id="multiclass-data">Multiclass data<a class="anchor" aria-label="anchor" hr
 <div class="section level3">
 <h3 id="continuous-data">Continuous data<a class="anchor" aria-label="anchor" href="#continuous-data"></a>
 </h3>
-<p>And here’s an example for running continuous data, where the outcome column is numerical:</p>
+<p>And here’s an example for running continuous data, where the outcome
+column is numerical:</p>
 <div class="sourceCode" id="cb27"><pre class="downlit sourceCode r">
 <code class="sourceCode R"><span class="va">results_cont</span> <span class="op">&lt;-</span> <span class="fu"><a href="../reference/run_ml.html">run_ml</a></span><span class="op">(</span><span class="va">otu_mini_bin</span><span class="op">[</span>, <span class="fl">2</span><span class="op">:</span><span class="fl">11</span><span class="op">]</span>,
                        <span class="st">'glmnet'</span>,
                        outcome_colname <span class="op">=</span> <span class="st">'Otu00001'</span>,
                        seed <span class="op">=</span> <span class="fl">2019</span><span class="op">)</span></code></pre></div>
-<p>Again, the performance metrics are slightly different, but the format of the rest is the same:</p>
+<p>Again, the performance metrics are slightly different, but the format
+of the rest is the same:</p>
 <div class="sourceCode" id="cb28"><pre class="downlit sourceCode r">
 <code class="sourceCode R"><span class="va">results_cont</span><span class="op">$</span><span class="va">performance</span>
 <span class="co">#&gt; <span style="color: #949494;"># A tibble: 1 × 6</span></span>
@@ -601,14 +800,22 @@ <h3 id="continuous-data">Continuous data<a class="anchor" aria-label="anchor" hr
 </div>
 </div>
 <div class="section level2">
-<h2 id="references">References<a class="anchor" aria-label="anchor" href="#references"></a>
+<h2 class="unnumbered" id="references">References<a class="anchor" aria-label="anchor" href="#references"></a>
 </h2>
-<div id="refs" class="references">
-<div id="ref-tang_democratizing_2020">
-<p>Tang, Shengpu, Parmida Davarmanesh, Yanmeng Song, Danai Koutra, Michael W. Sjoding, and Jenna Wiens. 2020. “Democratizing EHR Analyses with FIDDLE: A Flexible Data-Driven Preprocessing Pipeline for Structured Clinical Data.” <em>J Am Med Inform Assoc</em>, October. <a href="https://doi.org/10.1093/jamia/ocaa139" class="external-link">https://doi.org/10.1093/jamia/ocaa139</a>.</p>
-</div>
-<div id="ref-topcuoglu_framework_2020">
-<p>Topçuoğlu, Begüm D., Nicholas A. Lesniak, Mack T. Ruffin, Jenna Wiens, and Patrick D. Schloss. 2020. “A Framework for Effective Application of Machine Learning to Microbiome-Based Classification Problems.” <em>mBio</em> 11 (3). <a href="https://doi.org/10.1128/mBio.00434-20" class="external-link">https://doi.org/10.1128/mBio.00434-20</a>.</p>
+<div id="refs" class="references csl-bib-body hanging-indent">
+<div id="ref-tang_democratizing_2020" class="csl-entry">
+Tang, Shengpu, Parmida Davarmanesh, Yanmeng Song, Danai Koutra, Michael
+W. Sjoding, and Jenna Wiens. 2020. <span>“Democratizing <span>EHR</span>
+Analyses with <span>FIDDLE</span>: A Flexible Data-Driven Preprocessing
+Pipeline for Structured Clinical Data.”</span> <em>J Am Med Inform
+Assoc</em>, October. <a href="https://doi.org/10.1093/jamia/ocaa139" class="external-link">https://doi.org/10.1093/jamia/ocaa139</a>.
+</div>
+<div id="ref-topcuoglu_framework_2020" class="csl-entry">
+Topçuoğlu, Begüm D., Nicholas A. Lesniak, Mack T. Ruffin, Jenna Wiens,
+and Patrick D. Schloss. 2020. <span>“A <span>Framework</span> for
+<span>Effective Application</span> of <span>Machine Learning</span> to
+<span>Microbiome</span>-<span>Based Classification
+Problems</span>.”</span> <em>mBio</em> 11 (3). <a href="https://doi.org/10.1128/mBio.00434-20" class="external-link">https://doi.org/10.1128/mBio.00434-20</a>.
 </div>
 </div>
 </div>
diff --git a/docs/articles/paper.html b/docs/articles/paper.html
index 4f4af106..22fa9401 100644
--- a/docs/articles/paper.html
+++ b/docs/articles/paper.html
@@ -40,7 +40,7 @@
       </button>
       <span class="navbar-brand">
         <a class="navbar-link" href="../index.html">mikropml</a>
-        <span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="">1.2.2.9000</span>
+        <span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="">1.3.0</span>
       </span>
     </div>
 
@@ -100,11 +100,14 @@
 
       
 
-      </header><script src="paper_files/accessible-code-block-0.0.1/empty-anchor.js"></script><div class="row">
+      </header><div class="row">
   <div class="col-md-9 contents">
     <div class="page-header toc-ignore">
-      <h1 data-toc-skip>mikropml: User-Friendly R Package for Supervised Machine Learning Pipelines</h1>
-                        <h4 data-toc-skip class="author">Begüm D. Topçuoğlu, Zena Lapp, Kelly L. Sovacool, Evan Snitkin, Jenna Wiens, Patrick D. Schloss</h4>
+      <h1 data-toc-skip>mikropml: User-Friendly R Package for Supervised
+Machine Learning Pipelines</h1>
+                        <h4 data-toc-skip class="author">Begüm D.
+Topçuoğlu, Zena Lapp, Kelly L. Sovacool, Evan Snitkin, Jenna Wiens,
+Patrick D. Schloss</h4>
             
             <h4 data-toc-skip class="date">2020</h4>
       
@@ -119,66 +122,204 @@ <h4 data-toc-skip class="date">2020</h4>
 <h2 id="summary">Summary<a class="anchor" aria-label="anchor" href="#summary"></a>
 </h2>
 <p><img src="mikropml-logo.png" width="120"></p>
-<p>Machine learning (ML) for classification and prediction based on a set of features is used to make decisions in healthcare, economics, criminal justice and more. However, implementing an ML pipeline including preprocessing, model selection, and evaluation can be time-consuming, confusing, and difficult. Here, we present <a href="http://www.schlosslab.org/mikropml/"><code>mikropml</code></a> (prononced “meek-ROPE em el”), an easy-to-use R package that implements ML pipelines using regression, support vector machines, decision trees, random forest, or gradient-boosted trees. The package is available on <a href="https://github.com/SchlossLab/mikropml/" class="external-link">GitHub</a>, <a href="https://cran.r-project.org/package=mikropml" class="external-link">CRAN</a>, and <a href="https://anaconda.org/conda-forge/r-mikropml" class="external-link">conda</a>.</p>
+<p>Machine learning (ML) for classification and prediction based on a
+set of features is used to make decisions in healthcare, economics,
+criminal justice and more. However, implementing an ML pipeline
+including preprocessing, model selection, and evaluation can be
+time-consuming, confusing, and difficult. Here, we present <a href="http://www.schlosslab.org/mikropml/"><code>mikropml</code></a>
+(prononced “meek-ROPE em el”), an easy-to-use R package that implements
+ML pipelines using regression, support vector machines, decision trees,
+random forest, or gradient-boosted trees. The package is available on <a href="https://github.com/SchlossLab/mikropml/" class="external-link">GitHub</a>, <a href="https://cran.r-project.org/package=mikropml" class="external-link">CRAN</a>, and <a href="https://anaconda.org/conda-forge/r-mikropml" class="external-link">conda</a>.</p>
 </div>
 <div class="section level2">
 <h2 id="statement-of-need">Statement of need<a class="anchor" aria-label="anchor" href="#statement-of-need"></a>
 </h2>
-<p>Most applications of machine learning (ML) require reproducible steps for data pre-processing, cross-validation, testing, model evaluation, and often interpretation of why the model makes particular predictions. Performing these steps is important, as failure to implement them can result in incorrect and misleading results <span class="citation">(Teschendorff 2019; Wiens et al. 2019)</span>.</p>
-<p>Supervised ML is widely used to recognize patterns in large datasets and to make predictions about outcomes of interest. Several packages including <code>caret</code> <span class="citation">(Kuhn 2008)</span> and <code>tidymodels</code> <span class="citation">(Kuhn, Wickham, and RStudio 2020)</span> in R, <code>scikitlearn</code> <span class="citation">(Pedregosa et al. 2011)</span> in Python, and the H2O <code>autoML</code> platform <span class="citation">(H2O.ai 2020)</span> allow scientists to train ML models with a variety of algorithms. While these packages provide the tools necessary for each ML step, they do not implement a complete ML pipeline according to good practices in the literature. This makes it difficult for practitioners new to ML to easily begin to perform ML analyses.</p>
-<p>To enable a broader range of researchers to apply ML to their problem domains, we created <a href="https://github.com/SchlossLab/mikropml/" class="external-link"><code>mikropml</code></a>, an easy-to-use R package <span class="citation">(R Core Team 2020)</span> that implements the ML pipeline created by Topçuoğlu <em>et al.</em> <span class="citation">(Topçuoğlu et al. 2020)</span> in a single function that returns a trained model, model performance metrics and feature importance. <code>mikropml</code> leverages the <code>caret</code> package to support several ML algorithms: linear regression, logistic regression, support vector machines with a radial basis kernel, decision trees, random forest, and gradient boosted trees. It incorporates good practices in ML training, testing, and model evaluation <span class="citation">(Topçuoğlu et al. 2020; Teschendorff 2019)</span>. Furthermore, it provides data preprocessing steps based on the FIDDLE (FlexIble Data-Driven pipeLinE) framework outlined in Tang <em>et al.</em> <span class="citation">(Tang et al. 2020)</span> and post-training permutation importance steps to estimate the importance of each feature in the models trained <span class="citation">(Breiman 2001; Fisher, Rudin, and Dominici 2018)</span>.</p>
-<p><code>mikropml</code> can be used as a starting point in the application of ML to datasets from many different fields. It has already been applied to microbiome data to categorize patients with colorectal cancer <span class="citation">(Topçuoğlu et al. 2020)</span>, to identify differences in genomic and clinical features associated with bacterial infections <span class="citation">(Lapp et al. 2020)</span>, and to predict gender-based biases in academic publishing <span class="citation">(Hagan et al. 2020)</span>.</p>
+<p>Most applications of machine learning (ML) require reproducible steps
+for data pre-processing, cross-validation, testing, model evaluation,
+and often interpretation of why the model makes particular predictions.
+Performing these steps is important, as failure to implement them can
+result in incorrect and misleading results <span class="citation">(Teschendorff 2019; Wiens et al. 2019)</span>.</p>
+<p>Supervised ML is widely used to recognize patterns in large datasets
+and to make predictions about outcomes of interest. Several packages
+including <code>caret</code> <span class="citation">(Kuhn 2008)</span>
+and <code>tidymodels</code> <span class="citation">(Kuhn, Wickham, and
+RStudio 2020)</span> in R, <code>scikitlearn</code> <span class="citation">(Pedregosa et al. 2011)</span> in Python, and the H2O
+<code>autoML</code> platform <span class="citation">(H2O.ai 2020)</span>
+allow scientists to train ML models with a variety of algorithms. While
+these packages provide the tools necessary for each ML step, they do not
+implement a complete ML pipeline according to good practices in the
+literature. This makes it difficult for practitioners new to ML to
+easily begin to perform ML analyses.</p>
+<p>To enable a broader range of researchers to apply ML to their problem
+domains, we created <a href="https://github.com/SchlossLab/mikropml/" class="external-link"><code>mikropml</code></a>,
+an easy-to-use R package <span class="citation">(R Core Team
+2020)</span> that implements the ML pipeline created by Topçuoğlu <em>et
+al.</em> <span class="citation">(Topçuoğlu et al. 2020)</span> in a
+single function that returns a trained model, model performance metrics
+and feature importance. <code>mikropml</code> leverages the
+<code>caret</code> package to support several ML algorithms: linear
+regression, logistic regression, support vector machines with a radial
+basis kernel, decision trees, random forest, and gradient boosted trees.
+It incorporates good practices in ML training, testing, and model
+evaluation <span class="citation">(Topçuoğlu et al. 2020; Teschendorff
+2019)</span>. Furthermore, it provides data preprocessing steps based on
+the FIDDLE (FlexIble Data-Driven pipeLinE) framework outlined in Tang
+<em>et al.</em> <span class="citation">(Tang et al. 2020)</span> and
+post-training permutation importance steps to estimate the importance of
+each feature in the models trained <span class="citation">(Breiman 2001;
+Fisher, Rudin, and Dominici 2018)</span>.</p>
+<p><code>mikropml</code> can be used as a starting point in the
+application of ML to datasets from many different fields. It has already
+been applied to microbiome data to categorize patients with colorectal
+cancer <span class="citation">(Topçuoğlu et al. 2020)</span>, to
+identify differences in genomic and clinical features associated with
+bacterial infections <span class="citation">(Lapp et al. 2020)</span>,
+and to predict gender-based biases in academic publishing <span class="citation">(Hagan et al. 2020)</span>.</p>
 </div>
 <div class="section level2">
 <h2 id="mikropml-package">mikropml package<a class="anchor" aria-label="anchor" href="#mikropml-package"></a>
 </h2>
-<p>The <code>mikropml</code> package includes functionality to preprocess the data, train ML models, evaluate model performance, and quantify feature importance (Figure 1). We also provide <a href="http://www.schlosslab.org/mikropml/articles/index.html">vignettes</a> and an <a href="https://github.com/SchlossLab/mikropml-snakemake-workflow" class="external-link">example Snakemake workflow</a> <span class="citation">(Köster and Rahmann 2012)</span> to showcase how to run an ideal ML pipeline with multiple different train/test data splits. The results can be visualized using helper functions that use <code>ggplot2</code> <span class="citation">(Wickham 2016)</span>.</p>
-<p>While mikropml allows users to get started quickly and facilitates reproducibility, it is not a replacement for understanding the ML workflow which is still necessary when interpreting results <span class="citation">(Pollard et al. 2019)</span>. To facilitate understanding and enable one to tailor the code to their application, we have heavily commented the code and have provided supporting documentation which can be read <a href="http://www.schlosslab.org/mikropml/">online</a>.</p>
+<p>The <code>mikropml</code> package includes functionality to
+preprocess the data, train ML models, evaluate model performance, and
+quantify feature importance (Figure 1). We also provide <a href="http://www.schlosslab.org/mikropml/articles/index.html">vignettes</a>
+and an <a href="https://github.com/SchlossLab/mikropml-snakemake-workflow" class="external-link">example
+Snakemake workflow</a> <span class="citation">(Köster and Rahmann
+2012)</span> to showcase how to run an ideal ML pipeline with multiple
+different train/test data splits. The results can be visualized using
+helper functions that use <code>ggplot2</code> <span class="citation">(Wickham 2016)</span>.</p>
+<p>While mikropml allows users to get started quickly and facilitates
+reproducibility, it is not a replacement for understanding the ML
+workflow which is still necessary when interpreting results <span class="citation">(Pollard et al. 2019)</span>. To facilitate
+understanding and enable one to tailor the code to their application, we
+have heavily commented the code and have provided supporting
+documentation which can be read <a href="http://www.schlosslab.org/mikropml/">online</a>.</p>
 <div class="section level3">
 <h3 id="preprocessing-data">Preprocessing data<a class="anchor" aria-label="anchor" href="#preprocessing-data"></a>
 </h3>
-<p>We provide the function <code><a href="../reference/preprocess_data.html">preprocess_data()</a></code> to preprocess features using several different functions from the <code>caret</code> package. <code><a href="../reference/preprocess_data.html">preprocess_data()</a></code> takes continuous and categorical data, re-factors categorical data into binary features, and provides options to normalize continuous data, remove features with near-zero variance, and keep only one instance of perfectly correlated features. We set the default options based on those implemented in FIDDLE <span class="citation">(Tang et al. 2020)</span>. More details on how to use <code><a href="../reference/preprocess_data.html">preprocess_data()</a></code> can be found in the accompanying <a href="http://www.schlosslab.org/mikropml/articles/preprocess.html">vignette</a>.</p>
+<p>We provide the function <code><a href="../reference/preprocess_data.html">preprocess_data()</a></code> to preprocess
+features using several different functions from the <code>caret</code>
+package. <code><a href="../reference/preprocess_data.html">preprocess_data()</a></code> takes continuous and categorical
+data, re-factors categorical data into binary features, and provides
+options to normalize continuous data, remove features with near-zero
+variance, and keep only one instance of perfectly correlated features.
+We set the default options based on those implemented in FIDDLE <span class="citation">(Tang et al. 2020)</span>. More details on how to use
+<code><a href="../reference/preprocess_data.html">preprocess_data()</a></code> can be found in the accompanying <a href="http://www.schlosslab.org/mikropml/articles/preprocess.html">vignette</a>.</p>
 </div>
 <div class="section level3">
 <h3 id="running-ml">Running ML<a class="anchor" aria-label="anchor" href="#running-ml"></a>
 </h3>
-<p>The main function in mikropml, <code><a href="../reference/run_ml.html">run_ml()</a></code>, minimally takes in the model choice and a data frame with an outcome column and feature columns. For model choice, <code>mikropml</code> currently supports logistic and linear regression <span class="citation">(<code>glmnet</code>: Friedman, Hastie, and Tibshirani 2010)</span>, support vector machines with a radial basis kernel <span class="citation">(<code>kernlab</code>: Karatzoglou et al. 2004)</span>, decision trees <span class="citation">(<code>rpart</code>: Therneau et al. 2019)</span>, random forest <span class="citation">(<code>randomForest</code>: Liaw and Wiener 2002)</span>, and gradient-boosted trees <span class="citation">(<code>xgboost</code>: Chen et al. 2020)</span>. <code><a href="../reference/run_ml.html">run_ml()</a></code> randomly splits the data into train and test sets while maintaining the distribution of the outcomes found in the full dataset. It also provides the option to split the data into train and test sets based on categorical variables (e.g. batch, geographic location, etc.). <code>mikropml</code> uses the <code>caret</code> package <span class="citation">(Kuhn 2008)</span> to train and evaluate the models, and optionally quantifies feature importance. The output includes the best model built based on tuning hyperparameters in an internal and repeated cross-validation step, model evaluation metrics, and optional feature importances. Feature importances are calculated using a permutation test, which breaks the relationship between the feature and the true outcome in the test data, and measures the change in model performance. This provides an intuitive metric of how individual features influence model performance and is comparable across model types, which is particularly useful for model interpretation <span class="citation">(Topçuoğlu et al. 2020)</span>. Our <a href="http://www.schlosslab.org/mikropml/articles/introduction.html">introductory vignette</a> contains a comprehensive tutorial on how to use <code><a href="../reference/run_ml.html">run_ml()</a></code>.</p>
+<p>The main function in mikropml, <code><a href="../reference/run_ml.html">run_ml()</a></code>, minimally takes
+in the model choice and a data frame with an outcome column and feature
+columns. For model choice, <code>mikropml</code> currently supports
+logistic and linear regression <span class="citation">(<code>glmnet</code>: Friedman, Hastie, and Tibshirani
+2010)</span>, support vector machines with a radial basis kernel <span class="citation">(<code>kernlab</code>: Karatzoglou et al. 2004)</span>,
+decision trees <span class="citation">(<code>rpart</code>: Therneau et
+al. 2019)</span>, random forest <span class="citation">(<code>randomForest</code>: Liaw and Wiener
+2002)</span>, and gradient-boosted trees <span class="citation">(<code>xgboost</code>: Chen et al. 2020)</span>.
+<code><a href="../reference/run_ml.html">run_ml()</a></code> randomly splits the data into train and test sets
+while maintaining the distribution of the outcomes found in the full
+dataset. It also provides the option to split the data into train and
+test sets based on categorical variables (e.g. batch, geographic
+location, etc.). <code>mikropml</code> uses the <code>caret</code>
+package <span class="citation">(Kuhn 2008)</span> to train and evaluate
+the models, and optionally quantifies feature importance. The output
+includes the best model built based on tuning hyperparameters in an
+internal and repeated cross-validation step, model evaluation metrics,
+and optional feature importances. Feature importances are calculated
+using a permutation test, which breaks the relationship between the
+feature and the true outcome in the test data, and measures the change
+in model performance. This provides an intuitive metric of how
+individual features influence model performance and is comparable across
+model types, which is particularly useful for model interpretation <span class="citation">(Topçuoğlu et al. 2020)</span>. Our <a href="http://www.schlosslab.org/mikropml/articles/introduction.html">introductory
+vignette</a> contains a comprehensive tutorial on how to use
+<code><a href="../reference/run_ml.html">run_ml()</a></code>.</p>
 <div class="figure">
 <img src="mikRopML-pipeline.png" style="width:100.0%" alt=""><p class="caption">mikropml pipeline</p>
 </div>
 </div>
 <div class="section level3">
-<h3 id="ideal-workflow-for-running-mikropml-with-many-different-traintest-splits">Ideal workflow for running mikropml with many different train/test splits<a class="anchor" aria-label="anchor" href="#ideal-workflow-for-running-mikropml-with-many-different-traintest-splits"></a>
+<h3 id="ideal-workflow-for-running-mikropml-with-many-different-traintest-splits">Ideal workflow for running mikropml with many different train/test
+splits<a class="anchor" aria-label="anchor" href="#ideal-workflow-for-running-mikropml-with-many-different-traintest-splits"></a>
 </h3>
-<p>To investigate the variation in model performance depending on the train and test set used <span class="citation">(Topçuoğlu et al. 2020; Lapp et al. 2020)</span>, we provide examples of how to <code><a href="../reference/run_ml.html">run_ml()</a></code> many times with different train/test splits and how to get summary information about model performance on <a href="http://www.schlosslab.org/mikropml/articles/parallel.html">a local computer</a> or on a high-performance computing cluster using a <a href="https://github.com/SchlossLab/mikropml-snakemake-workflow" class="external-link">Snakemake workflow</a>.</p>
+<p>To investigate the variation in model performance depending on the
+train and test set used <span class="citation">(Topçuoğlu et al. 2020;
+Lapp et al. 2020)</span>, we provide examples of how to
+<code><a href="../reference/run_ml.html">run_ml()</a></code> many times with different train/test splits and
+how to get summary information about model performance on <a href="http://www.schlosslab.org/mikropml/articles/parallel.html">a local
+computer</a> or on a high-performance computing cluster using a <a href="https://github.com/SchlossLab/mikropml-snakemake-workflow" class="external-link">Snakemake
+workflow</a>.</p>
 </div>
 <div class="section level3">
 <h3 id="tuning-visualization">Tuning &amp; visualization<a class="anchor" aria-label="anchor" href="#tuning-visualization"></a>
 </h3>
-<p>One particularly important aspect of ML is hyperparameter tuning. We provide a reasonable range of default hyperparameters for each model type. However practitioners should explore whether that range is appropriate for their data, or if they should customize the hyperparameter range. Therefore, we provide a function <code><a href="../reference/plot_hp_performance.html">plot_hp_performance()</a></code> to plot the cross-validation performance metric of a single model or models built using different train/test splits. This helps evaluate if the hyperparameter range is being searched exhaustively and allows the user to pick the ideal set. We also provide summary plots of test performance metrics for the many train/test splits with different models using <code><a href="../reference/plot_model_performance.html">plot_model_performance()</a></code>. Examples are described in the accompanying <a href="http://www.schlosslab.org/mikropml/articles/tuning.html">vignette on hyperparameter tuning</a>.</p>
+<p>One particularly important aspect of ML is hyperparameter tuning. We
+provide a reasonable range of default hyperparameters for each model
+type. However practitioners should explore whether that range is
+appropriate for their data, or if they should customize the
+hyperparameter range. Therefore, we provide a function
+<code><a href="../reference/plot_hp_performance.html">plot_hp_performance()</a></code> to plot the cross-validation
+performance metric of a single model or models built using different
+train/test splits. This helps evaluate if the hyperparameter range is
+being searched exhaustively and allows the user to pick the ideal set.
+We also provide summary plots of test performance metrics for the many
+train/test splits with different models using
+<code><a href="../reference/plot_model_performance.html">plot_model_performance()</a></code>. Examples are described in the
+accompanying <a href="http://www.schlosslab.org/mikropml/articles/tuning.html">vignette
+on hyperparameter tuning</a>.</p>
 </div>
 <div class="section level3">
 <h3 id="dependencies">Dependencies<a class="anchor" aria-label="anchor" href="#dependencies"></a>
 </h3>
-<p>mikropml is written in R <span class="citation">(R Core Team 2020)</span> and depends on several packages: <code>dplyr</code> <span class="citation">(Wickham et al. 2020)</span>, <code>rlang</code> <span class="citation">(Henry, Wickham, and RStudio 2020)</span> and <code>caret</code> <span class="citation">(Kuhn 2008)</span>. The ML algorithms supported by <code>mikropml</code> require: <code>glmnet</code> <span class="citation">(Friedman, Hastie, and Tibshirani 2010)</span>, <code>e1071</code> <span class="citation">(Meyer et al. 2020)</span>, and <code>MLmetrics</code> <span class="citation">(Yan 2016)</span> for logistic regression, <code>rpart2</code> <span class="citation">(Therneau et al. 2019)</span> for decision trees, <code>randomForest</code> <span class="citation">(Liaw and Wiener 2002)</span> for random forest, <code>xgboost</code> <span class="citation">(Chen et al. 2020)</span> for xgboost, and <code>kernlab</code> <span class="citation">(Karatzoglou et al. 2004)</span> for support vector machines. We also allow for parallelization of cross-validation and other steps using the <code>foreach</code>, <code>doFuture</code>, <code>future.apply</code>, and <code>future</code> packages <span class="citation">(Bengtsson and Team 2020)</span>. Finally, we use <code>ggplot2</code> for plotting <span class="citation">(Wickham 2016)</span>.</p>
+<p>mikropml is written in R <span class="citation">(R Core Team
+2020)</span> and depends on several packages: <code>dplyr</code> <span class="citation">(Wickham et al. 2020)</span>, <code>rlang</code> <span class="citation">(Henry, Wickham, and RStudio 2020)</span> and
+<code>caret</code> <span class="citation">(Kuhn 2008)</span>. The ML
+algorithms supported by <code>mikropml</code> require:
+<code>glmnet</code> <span class="citation">(Friedman, Hastie, and
+Tibshirani 2010)</span>, <code>e1071</code> <span class="citation">(Meyer et al. 2020)</span>, and <code>MLmetrics</code>
+<span class="citation">(Yan 2016)</span> for logistic regression,
+<code>rpart2</code> <span class="citation">(Therneau et al. 2019)</span>
+for decision trees, <code>randomForest</code> <span class="citation">(Liaw and Wiener 2002)</span> for random forest,
+<code>xgboost</code> <span class="citation">(Chen et al. 2020)</span>
+for xgboost, and <code>kernlab</code> <span class="citation">(Karatzoglou et al. 2004)</span> for support vector
+machines. We also allow for parallelization of cross-validation and
+other steps using the <code>foreach</code>, <code>doFuture</code>,
+<code>future.apply</code>, and <code>future</code> packages <span class="citation">(Bengtsson and Team 2020)</span>. Finally, we use
+<code>ggplot2</code> for plotting <span class="citation">(Wickham
+2016)</span>.</p>
 </div>
 </div>
 <div class="section level2">
 <h2 id="acknowledgments">Acknowledgments<a class="anchor" aria-label="anchor" href="#acknowledgments"></a>
 </h2>
-<p>We thank members of the Schloss Lab who participated in code clubs related to the initial development of the pipeline, made documentation improvements, and provided general feedback. We also thank Nick Lesniak for designing the mikropml logo.</p>
-<p>We thank the US Research Software Sustainability Institute (NSF #1743188) for providing training to KLS at the Winter School in Research Software Engineering.</p>
+<p>We thank members of the Schloss Lab who participated in code clubs
+related to the initial development of the pipeline, made documentation
+improvements, and provided general feedback. We also thank Nick Lesniak
+for designing the mikropml logo.</p>
+<p>We thank the US Research Software Sustainability Institute (NSF
+#1743188) for providing training to KLS at the Winter School in Research
+Software Engineering.</p>
 </div>
 <div class="section level2">
 <h2 id="funding">Funding<a class="anchor" aria-label="anchor" href="#funding"></a>
 </h2>
-<p>Salary support for PDS came from NIH grant 1R01CA215574. KLS received support from the NIH Training Program in Bioinformatics (T32 GM070449). ZL received support from the National Science Foundation Graduate Research Fellowship Program under Grant No. DGE 1256260. Any opinions, findings, and conclusions or recommendations expressed in this material are those of the authors and do not necessarily reflect the views of the National Science Foundation.</p>
+<p>Salary support for PDS came from NIH grant 1R01CA215574. KLS received
+support from the NIH Training Program in Bioinformatics (T32 GM070449).
+ZL received support from the National Science Foundation Graduate
+Research Fellowship Program under Grant No. DGE 1256260. Any opinions,
+findings, and conclusions or recommendations expressed in this material
+are those of the authors and do not necessarily reflect the views of the
+National Science Foundation.</p>
 </div>
 <div class="section level2">
 <h2 id="author-contributions">Author contributions<a class="anchor" aria-label="anchor" href="#author-contributions"></a>
 </h2>
-<p>BDT, ZL, and KLS contributed equally. Author order among the co-first authors was determined by time since joining the project.</p>
-<p>BDT, ZL, and KLS conceptualized the study and wrote the code. KLS structured the code in R package form. BDT, ZL, JW, and PDS developed methodology. PDS, ES, and JW supervised the project. BDT, ZL, and KLS wrote the original draft. All authors reviewed and edited the manuscript.</p>
+<p>BDT, ZL, and KLS contributed equally. Author order among the co-first
+authors was determined by time since joining the project.</p>
+<p>BDT, ZL, and KLS conceptualized the study and wrote the code. KLS
+structured the code in R package form. BDT, ZL, JW, and PDS developed
+methodology. PDS, ES, and JW supervised the project. BDT, ZL, and KLS
+wrote the original draft. All authors reviewed and edited the
+manuscript.</p>
 </div>
 <div class="section level2">
 <h2 id="conflicts-of-interest">Conflicts of interest<a class="anchor" aria-label="anchor" href="#conflicts-of-interest"></a>
@@ -186,90 +327,156 @@ <h2 id="conflicts-of-interest">Conflicts of interest<a class="anchor" aria-label
 <p>None.</p>
 </div>
 <div class="section level2">
-<h2 id="references">References<a class="anchor" aria-label="anchor" href="#references"></a>
+<h2 class="unnumbered" id="references">References<a class="anchor" aria-label="anchor" href="#references"></a>
 </h2>
-<div id="refs" class="references">
-<div id="ref-bengtsson_futureapply_2020">
-<p>Bengtsson, Henrik, and R Core Team. 2020. “Future.Apply: Apply Function to Elements in Parallel Using Futures,” July.</p>
-</div>
-<div id="ref-breiman_random_2001">
-<p>Breiman, Leo. 2001. “Random Forests.” <em>Machine Learning</em> 45 (1): 5–32. <a href="https://doi.org/10.1023/A:1010933404324" class="external-link">https://doi.org/10.1023/A:1010933404324</a>.</p>
-</div>
-<div id="ref-chen_xgboost_2020">
-<p>Chen, Tianqi, Tong He, Michael Benesty, Vadim Khotilovich, Yuan Tang, Hyunsu Cho, Kailong Chen, et al. 2020. “Xgboost: Extreme Gradient Boosting,” June.</p>
-</div>
-<div id="ref-fisher_all_2018">
-<p>Fisher, Aaron, Cynthia Rudin, and Francesca Dominici. 2018. “All Models Are Wrong, but Many Are Useful: Learning a Variable’s Importance by Studying an Entire Class of Prediction Models Simultaneously.”</p>
-</div>
-<div id="ref-friedman_regularization_2010">
-<p>Friedman, Jerome H., Trevor Hastie, and Rob Tibshirani. 2010. “Regularization Paths for Generalized Linear Models via Coordinate Descent.” <em>Journal of Statistical Software</em> 33 (1): 1–22. <a href="https://doi.org/10.18637/jss.v033.i01" class="external-link">https://doi.org/10.18637/jss.v033.i01</a>.</p>
-</div>
-<div id="ref-h2o_platform">
-<p>H2O.ai. 2020. <em>H2O: Scalable Machine Learning Platform</em>. Manual.</p>
-</div>
-<div id="ref-hagan_women_2020">
-<p>Hagan, Ada K., Begüm D. Topçuoğlu, Mia E. Gregory, Hazel A. Barton, and Patrick D. Schloss. 2020. “Women Are Underrepresented and Receive Differential Outcomes at ASM Journals: A Six-Year Retrospective Analysis.” <em>mBio</em> 11 (6). <a href="https://doi.org/10.1128/mBio.01680-20" class="external-link">https://doi.org/10.1128/mBio.01680-20</a>.</p>
-</div>
-<div id="ref-henry_rlang_2020">
-<p>Henry, Lionel, Hadley Wickham, and RStudio. 2020. “Rlang: Functions for Base Types and Core R and ’Tidyverse’ Features,” July.</p>
-</div>
-<div id="ref-karatzoglou_kernlab_2004">
-<p>Karatzoglou, Alexandros, Alexandros Smola, Kurt Hornik, and Achim Zeileis. 2004. “Kernlab - an S4 Package for Kernel Methods in R.” <em>Journal of Statistical Software</em> 11 (1): 1–20. <a href="https://doi.org/10.18637/jss.v011.i09" class="external-link">https://doi.org/10.18637/jss.v011.i09</a>.</p>
-</div>
-<div id="ref-koster_snakemakescalable_2012">
-<p>Köster, Johannes, and Sven Rahmann. 2012. “Snakemakea Scalable Bioinformatics Workflow Engine.” <em>Bioinformatics</em> 28 (19): 2520–2. <a href="https://doi.org/10.1093/bioinformatics/bts480" class="external-link">https://doi.org/10.1093/bioinformatics/bts480</a>.</p>
-</div>
-<div id="ref-kuhn_building_2008">
-<p>Kuhn, Max. 2008. “Building Predictive Models in R Using the Caret Package.” <em>Journal of Statistical Software</em> 28 (1): 1–26. <a href="https://doi.org/10.18637/jss.v028.i05" class="external-link">https://doi.org/10.18637/jss.v028.i05</a>.</p>
-</div>
-<div id="ref-kuhn_tidymodels_2020">
-<p>Kuhn, Max, Hadley Wickham, and RStudio. 2020. “Tidymodels: Easily Install and Load the ’Tidymodels’ Packages,” July.</p>
-</div>
-<div id="ref-lapp_machine_2020">
-<p>Lapp, Zena, Jennifer Han, Jenna Wiens, Ellie JC Goldstein, Ebbing Lautenbach, and Evan Snitkin. 2020. “Machine Learning Models to Identify Patient and Microbial Genetic Factors Associated with Carbapenem-Resistant Klebsiella Pneumoniae Infection.” <em>medRxiv</em>, July, 2020.07.06.20147306. <a href="https://doi.org/10.1101/2020.07.06.20147306" class="external-link">https://doi.org/10.1101/2020.07.06.20147306</a>.</p>
-</div>
-<div id="ref-liaw_classication_2002">
-<p>Liaw, Andy, and Matthew Wiener. 2002. “Classification and Regression by randomForest” 2: 5.</p>
-</div>
-<div id="ref-meyer_e1071_2020">
-<p>Meyer, David, Evgenia Dimitriadou, Kurt Hornik, Andreas Weingessel, Friedrich Leisch, Chih-Chung Chang (libsvm C++-code), and Chih-Chen Lin (libsvm C++-code). 2020. “E1071: Misc Functions of the Department of Statistics, Probability Theory Group (Formerly: E1071), TU Wien.”</p>
-</div>
-<div id="ref-pedregosa_scikit-learn_2011">
-<p>Pedregosa, Fabian, Gaël Varoquaux, Alexandre Gramfort, Vincent Michel, Bertrand Thirion, Olivier Grisel, Mathieu Blondel, et al. 2011. “Scikit-Learn: Machine Learning in Python.” <em>Journal of Machine Learning Research</em> 12 (85): 2825–30.</p>
-</div>
-<div id="ref-pollard_turning_2019">
-<p>Pollard, Tom J., Irene Chen, Jenna Wiens, Steven Horng, Danny Wong, Marzyeh Ghassemi, Heather Mattie, Emily Lindemer, and Trishan Panch. 2019. “Turning the Crank for Machine Learning: Ease, at What Expense?” <em>The Lancet Digital Health</em> 1 (5): e198–e199. <a href="https://doi.org/10.1016/S2589-7500(19)30112-8" class="external-link">https://doi.org/10.1016/S2589-7500(19)30112-8</a>.</p>
-</div>
-<div id="ref-r_core_team_r_2020">
-<p>R Core Team. 2020. “R: A Language and Environment for Statistical Computing.”</p>
-</div>
-<div id="ref-tang_democratizing_2020">
-<p>Tang, Shengpu, Parmida Davarmanesh, Yanmeng Song, Danai Koutra, Michael W. Sjoding, and Jenna Wiens. 2020. “Democratizing EHR Analyses with FIDDLE: A Flexible Data-Driven Preprocessing Pipeline for Structured Clinical Data.” <em>J Am Med Inform Assoc</em>, October. <a href="https://doi.org/10.1093/jamia/ocaa139" class="external-link">https://doi.org/10.1093/jamia/ocaa139</a>.</p>
-</div>
-<div id="ref-teschendorff_avoiding_2019">
-<p>Teschendorff, Andrew E. 2019. “Avoiding Common Pitfalls in Machine Learning Omic Data Science.” <em>Nature Materials</em> 18 (5): 422–27. <a href="https://doi.org/10.1038/s41563-018-0241-z" class="external-link">https://doi.org/10.1038/s41563-018-0241-z</a>.</p>
-</div>
-<div id="ref-therneau_rpart_2019">
-<p>Therneau, Terry, Beth Atkinson, Brian Ripley (producer of the initial R. port, and maintainer 1999-2017). 2019. “Rpart: Recursive Partitioning and Regression Trees,” April.</p>
-</div>
-<div id="ref-topcuoglu_framework_2020">
-<p>Topçuoğlu, Begüm D., Nicholas A. Lesniak, Mack T. Ruffin, Jenna Wiens, and Patrick D. Schloss. 2020. “A Framework for Effective Application of Machine Learning to Microbiome-Based Classification Problems.” <em>mBio</em> 11 (3). <a href="https://doi.org/10.1128/mBio.00434-20" class="external-link">https://doi.org/10.1128/mBio.00434-20</a>.</p>
-</div>
-<div id="ref-wickham_ggplot2_2016">
-<p>Wickham, Hadley. 2016. <em>Ggplot2: Elegant Graphics for Data Analysis</em>. Use R! Cham: Springer International Publishing. <a href="https://doi.org/10.1007/978-3-319-24277-4" class="external-link">https://doi.org/10.1007/978-3-319-24277-4</a>.</p>
-</div>
-<div id="ref-wickham_dplyr_2020">
-<p>Wickham, Hadley, Romain François, Lionel Henry, Kirill Müller, and RStudio. 2020. “Dplyr: A Grammar of Data Manipulation,” August.</p>
-</div>
-<div id="ref-wiens_no_2019">
-<p>Wiens, Jenna, Suchi Saria, Mark Sendak, Marzyeh Ghassemi, Vincent X. Liu, Finale Doshi-Velez, Kenneth Jung, et al. 2019. “Do No Harm: A Roadmap for Responsible Machine Learning for Health Care.” <em>Nat. Med.</em> 25 (9): 1337–40. <a href="https://doi.org/10.1038/s41591-019-0548-6" class="external-link">https://doi.org/10.1038/s41591-019-0548-6</a>.</p>
-</div>
-<div id="ref-yan_mlmetrics_2016">
-<p>Yan, Yachen. 2016. “MLmetrics: Machine Learning Evaluation Metrics.”</p>
-</div>
-</div>
-</div>
-<div class="footnotes">
+<div id="refs" class="references csl-bib-body hanging-indent">
+<div id="ref-bengtsson_futureapply_2020" class="csl-entry">
+Bengtsson, Henrik, and R Core Team. 2020. <span>“Future.apply:
+<span>Apply Function</span> to <span>Elements</span> in
+<span>Parallel</span> Using <span>Futures</span>,”</span> July.
+</div>
+<div id="ref-breiman_random_2001" class="csl-entry">
+Breiman, Leo. 2001. <span>“Random Forests.”</span> <em>Machine
+Learning</em> 45 (1): 5–32. <a href="https://doi.org/10.1023/A:1010933404324" class="external-link">https://doi.org/10.1023/A:1010933404324</a>.
+</div>
+<div id="ref-chen_xgboost_2020" class="csl-entry">
+Chen, Tianqi, Tong He, Michael Benesty, Vadim Khotilovich, Yuan Tang,
+Hyunsu Cho, Kailong Chen, et al. 2020. <span>“Xgboost: <span>Extreme
+Gradient Boosting</span>,”</span> June.
+</div>
+<div id="ref-fisher_all_2018" class="csl-entry">
+Fisher, Aaron, Cynthia Rudin, and Francesca Dominici. 2018. <span>“All
+Models Are Wrong, but Many Are Useful: <span>Learning</span> a
+Variable’s Importance by Studying an Entire Class of Prediction Models
+Simultaneously.”</span>
+</div>
+<div id="ref-friedman_regularization_2010" class="csl-entry">
+Friedman, Jerome H., Trevor Hastie, and Rob Tibshirani. 2010.
+<span>“Regularization <span>Paths</span> for <span>Generalized Linear
+Models</span> via <span>Coordinate Descent</span>.”</span> <em>Journal
+of Statistical Software</em> 33 (1): 1–22. <a href="https://doi.org/10.18637/jss.v033.i01" class="external-link">https://doi.org/10.18637/jss.v033.i01</a>.
+</div>
+<div id="ref-h2o_platform" class="csl-entry">
+H2O.ai. 2020. <em><span>H2o</span>: <span>Scalable</span> Machine
+Learning Platform</em>. Manual.
+</div>
+<div id="ref-hagan_women_2020" class="csl-entry">
+Hagan, Ada K., Begüm D. Topçuoğlu, Mia E. Gregory, Hazel A. Barton, and
+Patrick D. Schloss. 2020. <span>“Women <span>Are Underrepresented</span>
+and <span>Receive Differential Outcomes</span> at <span>ASM
+Journals</span>: A <span>Six</span>-<span>Year Retrospective
+Analysis</span>.”</span> <em>mBio</em> 11 (6). <a href="https://doi.org/10.1128/mBio.01680-20" class="external-link">https://doi.org/10.1128/mBio.01680-20</a>.
+</div>
+<div id="ref-henry_rlang_2020" class="csl-entry">
+Henry, Lionel, Hadley Wickham, and RStudio. 2020. <span>“Rlang:
+<span>Functions</span> for <span>Base Types</span> and <span>Core
+R</span> and ’<span>Tidyverse</span>’ <span>Features</span>,”</span>
+July.
+</div>
+<div id="ref-karatzoglou_kernlab_2004" class="csl-entry">
+Karatzoglou, Alexandros, Alexandros Smola, Kurt Hornik, and Achim
+Zeileis. 2004. <span>“Kernlab - <span>An S4 Package</span> for
+<span>Kernel Methods</span> in <span>R</span>.”</span> <em>Journal of
+Statistical Software</em> 11 (1): 1–20. <a href="https://doi.org/10.18637/jss.v011.i09" class="external-link">https://doi.org/10.18637/jss.v011.i09</a>.
+</div>
+<div id="ref-koster_snakemakescalable_2012" class="csl-entry">
+Köster, Johannes, and Sven Rahmann. 2012. <span>“Snakemakea Scalable
+Bioinformatics Workflow Engine.”</span> <em>Bioinformatics</em> 28 (19):
+2520–22. <a href="https://doi.org/10.1093/bioinformatics/bts480" class="external-link">https://doi.org/10.1093/bioinformatics/bts480</a>.
+</div>
+<div id="ref-kuhn_building_2008" class="csl-entry">
+Kuhn, Max. 2008. <span>“Building <span>Predictive Models</span> in
+<span>R Using</span> the Caret <span>Package</span>.”</span> <em>Journal
+of Statistical Software</em> 28 (1): 1–26. <a href="https://doi.org/10.18637/jss.v028.i05" class="external-link">https://doi.org/10.18637/jss.v028.i05</a>.
+</div>
+<div id="ref-kuhn_tidymodels_2020" class="csl-entry">
+Kuhn, Max, Hadley Wickham, and RStudio. 2020. <span>“Tidymodels:
+<span>Easily Install</span> and <span>Load</span> the
+’<span>Tidymodels</span>’ <span>Packages</span>,”</span> July.
+</div>
+<div id="ref-lapp_machine_2020" class="csl-entry">
+Lapp, Zena, Jennifer Han, Jenna Wiens, Ellie JC Goldstein, Ebbing
+Lautenbach, and Evan Snitkin. 2020. <span>“Machine Learning Models to
+Identify Patient and Microbial Genetic Factors Associated with
+Carbapenem-Resistant <span>Klebsiella</span> Pneumoniae
+Infection.”</span> <em>medRxiv</em>, July, 2020.07.06.20147306. <a href="https://doi.org/10.1101/2020.07.06.20147306" class="external-link">https://doi.org/10.1101/2020.07.06.20147306</a>.
+</div>
+<div id="ref-liaw_classication_2002" class="csl-entry">
+Liaw, Andy, and Matthew Wiener. 2002. <span>“Classification and
+<span>Regression</span> by <span class="nocase">randomForest</span>”</span> 2: 5.
+</div>
+<div id="ref-meyer_e1071_2020" class="csl-entry">
+Meyer, David, Evgenia Dimitriadou, Kurt Hornik, Andreas Weingessel,
+Friedrich Leisch, Chih-Chung Chang (libsvm C++-code), and Chih-Chen Lin
+(libsvm C++-code). 2020. <span>“E1071: <span>Misc Functions</span> of
+the <span>Department</span> of <span>Statistics</span>,
+<span>Probability Theory Group</span> (<span>Formerly</span>:
+<span>E1071</span>), <span>TU Wien</span>.”</span>
+</div>
+<div id="ref-pedregosa_scikit-learn_2011" class="csl-entry">
+Pedregosa, Fabian, Gaël Varoquaux, Alexandre Gramfort, Vincent Michel,
+Bertrand Thirion, Olivier Grisel, Mathieu Blondel, et al. 2011.
+<span>“Scikit-Learn: <span>Machine Learning</span> in
+<span>Python</span>.”</span> <em>Journal of Machine Learning
+Research</em> 12 (85): 2825–30.
+</div>
+<div id="ref-pollard_turning_2019" class="csl-entry">
+Pollard, Tom J., Irene Chen, Jenna Wiens, Steven Horng, Danny Wong,
+Marzyeh Ghassemi, Heather Mattie, Emily Lindemer, and Trishan Panch.
+2019. <span>“Turning the Crank for Machine Learning: Ease, at What
+Expense?”</span> <em>The Lancet Digital Health</em> 1 (5): e198–99. <a href="https://doi.org/10.1016/S2589-7500(19)30112-8" class="external-link">https://doi.org/10.1016/S2589-7500(19)30112-8</a>.
+</div>
+<div id="ref-r_core_team_r_2020" class="csl-entry">
+R Core Team. 2020. <span>“R: <span>A Language</span> and
+<span>Environment</span> for <span>Statistical Computing</span>.”</span>
+</div>
+<div id="ref-tang_democratizing_2020" class="csl-entry">
+Tang, Shengpu, Parmida Davarmanesh, Yanmeng Song, Danai Koutra, Michael
+W. Sjoding, and Jenna Wiens. 2020. <span>“Democratizing <span>EHR</span>
+Analyses with <span>FIDDLE</span>: A Flexible Data-Driven Preprocessing
+Pipeline for Structured Clinical Data.”</span> <em>J Am Med Inform
+Assoc</em>, October. <a href="https://doi.org/10.1093/jamia/ocaa139" class="external-link">https://doi.org/10.1093/jamia/ocaa139</a>.
+</div>
+<div id="ref-teschendorff_avoiding_2019" class="csl-entry">
+Teschendorff, Andrew E. 2019. <span>“Avoiding Common Pitfalls in Machine
+Learning Omic Data Science.”</span> <em>Nature Materials</em> 18 (5):
+422–27. <a href="https://doi.org/10.1038/s41563-018-0241-z" class="external-link">https://doi.org/10.1038/s41563-018-0241-z</a>.
+</div>
+<div id="ref-therneau_rpart_2019" class="csl-entry">
+Therneau, Terry, Beth Atkinson, Brian Ripley (producer of the initial R.
+port, and maintainer 1999-2017). 2019. <span>“Rpart: <span>Recursive
+Partitioning</span> and <span>Regression Trees</span>,”</span> April.
+</div>
+<div id="ref-topcuoglu_framework_2020" class="csl-entry">
+Topçuoğlu, Begüm D., Nicholas A. Lesniak, Mack T. Ruffin, Jenna Wiens,
+and Patrick D. Schloss. 2020. <span>“A <span>Framework</span> for
+<span>Effective Application</span> of <span>Machine Learning</span> to
+<span>Microbiome</span>-<span>Based Classification
+Problems</span>.”</span> <em>mBio</em> 11 (3). <a href="https://doi.org/10.1128/mBio.00434-20" class="external-link">https://doi.org/10.1128/mBio.00434-20</a>.
+</div>
+<div id="ref-wickham_ggplot2_2016" class="csl-entry">
+Wickham, Hadley. 2016. <em>Ggplot2: <span>Elegant Graphics</span> for
+<span>Data Analysis</span></em>. Use <span>R</span>! <span>Cham</span>:
+<span>Springer International Publishing</span>. <a href="https://doi.org/10.1007/978-3-319-24277-4" class="external-link">https://doi.org/10.1007/978-3-319-24277-4</a>.
+</div>
+<div id="ref-wickham_dplyr_2020" class="csl-entry">
+Wickham, Hadley, Romain François, Lionel Henry, Kirill Müller, and
+RStudio. 2020. <span>“Dplyr: <span>A Grammar</span> of <span>Data
+Manipulation</span>,”</span> August.
+</div>
+<div id="ref-wiens_no_2019" class="csl-entry">
+Wiens, Jenna, Suchi Saria, Mark Sendak, Marzyeh Ghassemi, Vincent X.
+Liu, Finale Doshi-Velez, Kenneth Jung, et al. 2019. <span>“Do No Harm: A
+Roadmap for Responsible Machine Learning for Health Care.”</span>
+<em>Nat. Med.</em> 25 (9): 1337–40. <a href="https://doi.org/10.1038/s41591-019-0548-6" class="external-link">https://doi.org/10.1038/s41591-019-0548-6</a>.
+</div>
+<div id="ref-yan_mlmetrics_2016" class="csl-entry">
+Yan, Yachen. 2016. <span>“<span>MLmetrics</span>: <span>Machine Learning
+Evaluation Metrics</span>.”</span>
+</div>
+</div>
+</div>
+<div class="footnotes footnotes-end-of-document">
 <hr>
 <ol>
 <li id="fn1"><p>co-first author<a href="#fnref1" class="footnote-back">↩︎</a></p></li>
diff --git a/docs/articles/parallel.html b/docs/articles/parallel.html
index a20fdfa3..e1ac7594 100644
--- a/docs/articles/parallel.html
+++ b/docs/articles/parallel.html
@@ -40,7 +40,7 @@
       </button>
       <span class="navbar-brand">
         <a class="navbar-link" href="../index.html">mikropml</a>
-        <span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="">1.2.2.9000</span>
+        <span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="">1.3.0</span>
       </span>
     </div>
 
@@ -100,11 +100,12 @@
 
       
 
-      </header><script src="parallel_files/accessible-code-block-0.0.1/empty-anchor.js"></script><div class="row">
+      </header><div class="row">
   <div class="col-md-9 contents">
     <div class="page-header toc-ignore">
       <h1 data-toc-skip>Parallel processing</h1>
-                        <h4 data-toc-skip class="author">Kelly L. Sovacool</h4>
+                        <h4 data-toc-skip class="author">Kelly L.
+Sovacool</h4>
             
       
       <small class="dont-index">Source: <a href="https://github.com/SchlossLab/mikropml/blob/HEAD/vignettes/parallel.Rmd" class="external-link"><code>vignettes/parallel.Rmd</code></a></small>
@@ -116,55 +117,61 @@ <h4 data-toc-skip class="author">Kelly L. Sovacool</h4>
     
 <div class="sourceCode" id="cb1"><pre class="downlit sourceCode r">
 <code class="sourceCode R"><span class="kw"><a href="https://rdrr.io/r/base/library.html" class="external-link">library</a></span><span class="op">(</span><span class="va"><a href="https://www.schlosslab.org/mikropml/" class="external-link">mikropml</a></span><span class="op">)</span>
-<span class="kw"><a href="https://rdrr.io/r/base/library.html" class="external-link">library</a></span><span class="op">(</span><span class="va"><a href="https://dplyr.tidyverse.org" class="external-link">dplyr</a></span><span class="op">)</span>
-<span class="co">#&gt; </span>
-<span class="co">#&gt; Attaching package: 'dplyr'</span>
-<span class="co">#&gt; The following objects are masked from 'package:stats':</span>
-<span class="co">#&gt; </span>
-<span class="co">#&gt;     filter, lag</span>
-<span class="co">#&gt; The following objects are masked from 'package:base':</span>
-<span class="co">#&gt; </span>
-<span class="co">#&gt;     intersect, setdiff, setequal, union</span></code></pre></div>
+<span class="kw"><a href="https://rdrr.io/r/base/library.html" class="external-link">library</a></span><span class="op">(</span><span class="va"><a href="https://dplyr.tidyverse.org" class="external-link">dplyr</a></span><span class="op">)</span></code></pre></div>
 <div class="section level2">
 <h2 id="speed-up-single-runs">Speed up single runs<a class="anchor" aria-label="anchor" href="#speed-up-single-runs"></a>
 </h2>
-<p>By default, <code><a href="../reference/preprocess_data.html">preprocess_data()</a></code>, <code><a href="../reference/run_ml.html">run_ml()</a></code>, and <code>compare_models()</code> use only one process in series. If you’d like to parallelize various steps of the pipeline to make them run faster, install <code>foreach</code>, <code>future</code>, <code>future.apply</code>, and <code>doFuture</code>. Then, register a future plan prior to calling these functions:</p>
+<p>By default, <code><a href="../reference/preprocess_data.html">preprocess_data()</a></code>, <code><a href="../reference/run_ml.html">run_ml()</a></code>,
+and <code><a href="../reference/compare_models.html">compare_models()</a></code> use only one process in series. If
+you’d like to parallelize various steps of the pipeline to make them run
+faster, install <code>foreach</code>, <code>future</code>,
+<code>future.apply</code>, and <code>doFuture</code>. Then, register a
+future plan prior to calling these functions:</p>
 <div class="sourceCode" id="cb2"><pre class="downlit sourceCode r">
-<code class="sourceCode R"><span class="fu">doFuture</span><span class="fu">::</span><span class="fu"><a href="https://doFuture.futureverse.org/reference/registerDoFuture.html" class="external-link">registerDoFuture</a></span><span class="op">(</span><span class="op">)</span>
+<code class="sourceCode R"><span class="fu">doFuture</span><span class="fu">::</span><span class="fu">registerDoFuture</span><span class="op">(</span><span class="op">)</span>
 <span class="fu">future</span><span class="fu">::</span><span class="fu"><a href="https://future.futureverse.org/reference/plan.html" class="external-link">plan</a></span><span class="op">(</span><span class="fu">future</span><span class="fu">::</span><span class="va"><a href="https://future.futureverse.org/reference/multicore.html" class="external-link">multicore</a></span>, workers <span class="op">=</span> <span class="fl">2</span><span class="op">)</span></code></pre></div>
-<p>Above, we used the <code>multicore</code> plan to split the work across 2 cores. See the <a href="https://cran.r-project.org/web/packages/future/vignettes/future-1-overview.html" class="external-link"><code>future</code> documentation</a> for more about picking the best plan for your use case. Notably, <code>multicore</code> does not work inside RStudio or on Windows; you will need to use <code>multisession</code> instead in those cases.</p>
-<p>After registering a future plan, you can call <code><a href="../reference/preprocess_data.html">preprocess_data()</a></code> and <code><a href="../reference/run_ml.html">run_ml()</a></code> as usual, and they will run certain tasks in parallel.</p>
+<p>Above, we used the <code>multicore</code> plan to split the work
+across 2 cores. See the <a href="https://cran.r-project.org/web/packages/future/vignettes/future-1-overview.html" class="external-link"><code>future</code>
+documentation</a> for more about picking the best plan for your use
+case. Notably, <code>multicore</code> does not work inside RStudio or on
+Windows; you will need to use <code>multisession</code> instead in those
+cases.</p>
+<p>After registering a future plan, you can call
+<code><a href="../reference/preprocess_data.html">preprocess_data()</a></code> and <code><a href="../reference/run_ml.html">run_ml()</a></code> as usual, and
+they will run certain tasks in parallel.</p>
 <div class="sourceCode" id="cb3"><pre class="downlit sourceCode r">
 <code class="sourceCode R"><span class="va">otu_data_preproc</span> <span class="op">&lt;-</span> <span class="fu"><a href="../reference/preprocess_data.html">preprocess_data</a></span><span class="op">(</span><span class="va">otu_mini_bin</span>, <span class="st">'dx'</span><span class="op">)</span><span class="op">$</span><span class="va">dat_transformed</span>
-<span class="co">#&gt; Using 'dx' as the outcome column.</span>
-<span class="va">result1</span> <span class="op">&lt;-</span> <span class="fu"><a href="../reference/run_ml.html">run_ml</a></span><span class="op">(</span><span class="va">otu_data_preproc</span>, <span class="st">'glmnet'</span><span class="op">)</span>
-<span class="co">#&gt; Using 'dx' as the outcome column.</span>
-<span class="co">#&gt; Training the model...</span>
-<span class="co">#&gt; Loading required package: ggplot2</span>
-<span class="co">#&gt; Loading required package: lattice</span>
-<span class="co">#&gt; Training complete.</span></code></pre></div>
+<span class="va">result1</span> <span class="op">&lt;-</span> <span class="fu"><a href="../reference/run_ml.html">run_ml</a></span><span class="op">(</span><span class="va">otu_data_preproc</span>, <span class="st">'glmnet'</span><span class="op">)</span></code></pre></div>
 </div>
 <div class="section level2">
 <h2 id="call-run_ml-multiple-times-in-parallel-in-r">Call <code>run_ml()</code> multiple times in parallel in R<a class="anchor" aria-label="anchor" href="#call-run_ml-multiple-times-in-parallel-in-r"></a>
 </h2>
-<p>You can use functions from the <code>future.apply</code> package to call <code><a href="../reference/run_ml.html">run_ml()</a></code> multiple times in parallel with different parameters. You will first need to run <code><a href="https://future.futureverse.org/reference/plan.html" class="external-link">future::plan()</a></code> as above if you haven’t already. Then, call <code><a href="../reference/run_ml.html">run_ml()</a></code> with multiple seeds using <code>future_lapply()</code>:</p>
+<p>You can use functions from the <code>future.apply</code> package to
+call <code><a href="../reference/run_ml.html">run_ml()</a></code> multiple times in parallel with different
+parameters. You will first need to run <code><a href="https://future.futureverse.org/reference/plan.html" class="external-link">future::plan()</a></code> as
+above if you haven’t already. Then, call <code><a href="../reference/run_ml.html">run_ml()</a></code> with
+multiple seeds using <code>future_lapply()</code>:</p>
 <div class="sourceCode" id="cb4"><pre class="downlit sourceCode r">
 <code class="sourceCode R"><span class="co"># NOTE: use more seeds for real-world data</span>
 <span class="va">results_multi</span> <span class="op">&lt;-</span> <span class="fu">future.apply</span><span class="fu">::</span><span class="fu"><a href="https://future.apply.futureverse.org/reference/future_lapply.html" class="external-link">future_lapply</a></span><span class="op">(</span><span class="fu"><a href="https://rdrr.io/r/base/seq.html" class="external-link">seq</a></span><span class="op">(</span><span class="fl">100</span>, <span class="fl">102</span><span class="op">)</span>, <span class="kw">function</span><span class="op">(</span><span class="va">seed</span><span class="op">)</span> <span class="op">{</span>
   <span class="fu"><a href="../reference/run_ml.html">run_ml</a></span><span class="op">(</span><span class="va">otu_data_preproc</span>, <span class="st">'glmnet'</span>, seed <span class="op">=</span> <span class="va">seed</span><span class="op">)</span>
-  <span class="op">}</span>, future.seed <span class="op">=</span> <span class="cn">TRUE</span><span class="op">)</span>
-<span class="co">#&gt; Using 'dx' as the outcome column.</span>
-<span class="co">#&gt; Training the model...</span>
-<span class="co">#&gt; Training complete.</span>
-<span class="co">#&gt; Using 'dx' as the outcome column.</span>
-<span class="co">#&gt; Training the model...</span>
-<span class="co">#&gt; Training complete.</span>
-<span class="co">#&gt; Using 'dx' as the outcome column.</span>
-<span class="co">#&gt; Training the model...</span>
-<span class="co">#&gt; Training complete.</span></code></pre></div>
-<p>Each call to <code><a href="../reference/run_ml.html">run_ml()</a></code> with a different seed uses a different random split of the data into training and testing sets. Since we are using seeds, we must set <code>future.seed</code> to <code>TRUE</code> (see the <a href="https://cran.r-project.org/web/packages/future.apply/future.apply.pdf" class="external-link"><code>future.apply</code> documentation</a> and <a href="https://www.r-bloggers.com/2020/09/future-1-19-1-making-sure-proper-random-numbers-are-produced-in-parallel-processing/" class="external-link">this blog post</a> for details on parallel-safe random seeds). This example uses only a few seeds for speed and simplicity, but for real data we recommend using many more seeds to get a better estimate of model performance.</p>
-<p>In these examples, we used functions from the <code>future.apply</code> package to <code><a href="../reference/run_ml.html">run_ml()</a></code> in parallel, but you can accomplish the same thing with parallel versions of the <code><a href="https://purrr.tidyverse.org/reference/map.html" class="external-link">purrr::map()</a></code> functions using the <code>furrr</code> package (e.g. <code>furrr::future_map_dfr()</code>).</p>
-<p>Extract the performance results and combine into one dataframe for all seeds:</p>
+  <span class="op">}</span>, future.seed <span class="op">=</span> <span class="cn">TRUE</span><span class="op">)</span></code></pre></div>
+<p>Each call to <code><a href="../reference/run_ml.html">run_ml()</a></code> with a different seed uses a
+different random split of the data into training and testing sets. Since
+we are using seeds, we must set <code>future.seed</code> to
+<code>TRUE</code> (see the <a href="https://cran.r-project.org/web/packages/future.apply/future.apply.pdf" class="external-link"><code>future.apply</code>
+documentation</a> and <a href="https://www.r-bloggers.com/2020/09/future-1-19-1-making-sure-proper-random-numbers-are-produced-in-parallel-processing/" class="external-link">this
+blog post</a> for details on parallel-safe random seeds). This example
+uses only a few seeds for speed and simplicity, but for real data we
+recommend using many more seeds to get a better estimate of model
+performance.</p>
+<p>In these examples, we used functions from the
+<code>future.apply</code> package to <code><a href="../reference/run_ml.html">run_ml()</a></code> in parallel,
+but you can accomplish the same thing with parallel versions of the
+<code><a href="https://purrr.tidyverse.org/reference/map.html" class="external-link">purrr::map()</a></code> functions using the <code>furrr</code> package
+(e.g. <code>furrr::future_map_dfr()</code>).</p>
+<p>Extract the performance results and combine into one dataframe for
+all seeds:</p>
 <div class="sourceCode" id="cb5"><pre class="downlit sourceCode r">
 <code class="sourceCode R"><span class="va">perf_df</span> <span class="op">&lt;-</span> <span class="fu">future.apply</span><span class="fu">::</span><span class="fu"><a href="https://future.apply.futureverse.org/reference/future_lapply.html" class="external-link">future_lapply</a></span><span class="op">(</span><span class="va">results_multi</span>, 
                                        <span class="kw">function</span><span class="op">(</span><span class="va">result</span><span class="op">)</span> <span class="op">{</span>
@@ -173,17 +180,13 @@ <h2 id="call-run_ml-multiple-times-in-parallel-in-r">Call <code>run_ml()</code>
                                          <span class="op">}</span>,
                                        future.seed <span class="op">=</span> <span class="cn">TRUE</span><span class="op">)</span> <span class="op"><a href="https://magrittr.tidyverse.org/reference/pipe.html" class="external-link">%&gt;%</a></span> 
   <span class="fu">dplyr</span><span class="fu">::</span><span class="fu"><a href="https://dplyr.tidyverse.org/reference/bind.html" class="external-link">bind_rows</a></span><span class="op">(</span><span class="op">)</span>
-<span class="va">perf_df</span>
-<span class="co">#&gt; <span style="color: #949494;"># A tibble: 3 × 3</span></span>
-<span class="co">#&gt;   cv_metric_AUC   AUC method</span>
-<span class="co">#&gt;           <span style="color: #949494; font-style: italic;">&lt;dbl&gt;</span> <span style="color: #949494; font-style: italic;">&lt;dbl&gt;</span> <span style="color: #949494; font-style: italic;">&lt;chr&gt;</span> </span>
-<span class="co">#&gt; <span style="color: #BCBCBC;">1</span>         0.630 0.634 glmnet</span>
-<span class="co">#&gt; <span style="color: #BCBCBC;">2</span>         0.591 0.608 glmnet</span>
-<span class="co">#&gt; <span style="color: #BCBCBC;">3</span>         0.671 0.471 glmnet</span></code></pre></div>
+<span class="va">perf_df</span></code></pre></div>
 <div class="section level3">
 <h3 id="multiple-ml-methods">Multiple ML methods<a class="anchor" aria-label="anchor" href="#multiple-ml-methods"></a>
 </h3>
-<p>You may also wish to compare performance for different ML methods. <code><a href="https://rdrr.io/r/base/mapply.html" class="external-link">mapply()</a></code> can iterate over multiple lists or vectors, and <code>future_mapply()</code> works the same way:</p>
+<p>You may also wish to compare performance for different ML methods.
+<code><a href="https://rdrr.io/r/base/mapply.html" class="external-link">mapply()</a></code> can iterate over multiple lists or vectors, and
+<code>future_mapply()</code> works the same way:</p>
 <div class="sourceCode" id="cb6"><pre class="downlit sourceCode r">
 <code class="sourceCode R"><span class="co"># NOTE: use more seeds for real-world data</span>
 <span class="va">param_grid</span> <span class="op">&lt;-</span> <span class="fu"><a href="https://rdrr.io/r/base/expand.grid.html" class="external-link">expand.grid</a></span><span class="op">(</span>seeds <span class="op">=</span> <span class="fu"><a href="https://rdrr.io/r/base/seq.html" class="external-link">seq</a></span><span class="op">(</span><span class="fl">100</span>, <span class="fl">102</span><span class="op">)</span>,
@@ -195,61 +198,39 @@ <h3 id="multiple-ml-methods">Multiple ML methods<a class="anchor" aria-label="an
     <span class="va">param_grid</span><span class="op">$</span><span class="va">seeds</span>,
     <span class="va">param_grid</span><span class="op">$</span><span class="va">methods</span> <span class="op"><a href="https://magrittr.tidyverse.org/reference/pipe.html" class="external-link">%&gt;%</a></span> <span class="fu"><a href="https://rdrr.io/r/base/character.html" class="external-link">as.character</a></span><span class="op">(</span><span class="op">)</span>,
     future.seed <span class="op">=</span> <span class="cn">TRUE</span>
-  <span class="op">)</span>
-<span class="co">#&gt; Using 'dx' as the outcome column.</span>
-<span class="co">#&gt; Training the model...</span>
-<span class="co">#&gt; Training complete.</span>
-<span class="co">#&gt; Using 'dx' as the outcome column.</span>
-<span class="co">#&gt; Training the model...</span>
-<span class="co">#&gt; Training complete.</span>
-<span class="co">#&gt; Using 'dx' as the outcome column.</span>
-<span class="co">#&gt; Training the model...</span>
-<span class="co">#&gt; Training complete.</span>
-<span class="co">#&gt; Using 'dx' as the outcome column.</span>
-<span class="co">#&gt; Training the model...</span>
-<span class="co">#&gt; Training complete.</span>
-<span class="co">#&gt; Using 'dx' as the outcome column.</span>
-<span class="co">#&gt; Training the model...</span>
-<span class="co">#&gt; Training complete.</span>
-<span class="co">#&gt; Using 'dx' as the outcome column.</span>
-<span class="co">#&gt; Training the model...</span>
-<span class="co">#&gt; Training complete.</span></code></pre></div>
-<p>Extract and combine the performance results for all seeds and methods:</p>
+  <span class="op">)</span></code></pre></div>
+<p>Extract and combine the performance results for all seeds and
+methods:</p>
 <div class="sourceCode" id="cb7"><pre class="downlit sourceCode r">
 <code class="sourceCode R"><span class="va">perf_df2</span> <span class="op">&lt;-</span> <span class="fu"><a href="https://rdrr.io/r/base/lapply.html" class="external-link">lapply</a></span><span class="op">(</span><span class="va">results_mtx</span><span class="op">[</span><span class="st">'performance'</span>,<span class="op">]</span>, 
                    <span class="kw">function</span><span class="op">(</span><span class="va">x</span><span class="op">)</span> <span class="op">{</span>
                      <span class="va">x</span> <span class="op"><a href="https://magrittr.tidyverse.org/reference/pipe.html" class="external-link">%&gt;%</a></span> <span class="fu"><a href="https://dplyr.tidyverse.org/reference/select.html" class="external-link">select</a></span><span class="op">(</span><span class="va">cv_metric_AUC</span>, <span class="va">AUC</span>, <span class="va">method</span><span class="op">)</span>
                    <span class="op">}</span><span class="op">)</span> <span class="op"><a href="https://magrittr.tidyverse.org/reference/pipe.html" class="external-link">%&gt;%</a></span> 
   <span class="fu">dplyr</span><span class="fu">::</span><span class="fu"><a href="https://dplyr.tidyverse.org/reference/bind.html" class="external-link">bind_rows</a></span><span class="op">(</span><span class="op">)</span>
-<span class="va">perf_df2</span>
-<span class="co">#&gt; <span style="color: #949494;"># A tibble: 6 × 3</span></span>
-<span class="co">#&gt;   cv_metric_AUC   AUC method</span>
-<span class="co">#&gt;           <span style="color: #949494; font-style: italic;">&lt;dbl&gt;</span> <span style="color: #949494; font-style: italic;">&lt;dbl&gt;</span> <span style="color: #949494; font-style: italic;">&lt;chr&gt;</span> </span>
-<span class="co">#&gt; <span style="color: #BCBCBC;">1</span>         0.630 0.634 glmnet</span>
-<span class="co">#&gt; <span style="color: #BCBCBC;">2</span>         0.591 0.608 glmnet</span>
-<span class="co">#&gt; <span style="color: #BCBCBC;">3</span>         0.671 0.471 glmnet</span>
-<span class="co">#&gt; <span style="color: #BCBCBC;">4</span>         0.665 0.708 rf    </span>
-<span class="co">#&gt; <span style="color: #BCBCBC;">5</span>         0.651 0.697 rf    </span>
-<span class="co">#&gt; <span style="color: #BCBCBC;">6</span>         0.701 0.592 rf</span></code></pre></div>
-<p>Visualize the performance results (<code>ggplot2</code> is required):</p>
+<span class="va">perf_df2</span></code></pre></div>
+<p>Visualize the performance results (<code>ggplot2</code> is
+required):</p>
 <div class="sourceCode" id="cb8"><pre class="downlit sourceCode r">
 <code class="sourceCode R"><span class="va">perf_boxplot</span> <span class="op">&lt;-</span> <span class="fu"><a href="../reference/plot_model_performance.html">plot_model_performance</a></span><span class="op">(</span><span class="va">perf_df2</span><span class="op">)</span>
 <span class="va">perf_boxplot</span></code></pre></div>
-<p><img src="parallel_files/figure-html/plot_perf-1.png" width="700"></p>
-<p><code><a href="../reference/plot_model_performance.html">plot_model_performance()</a></code> returns a ggplot2 object. You can add layers to customize the plot:</p>
+<p><code><a href="../reference/plot_model_performance.html">plot_model_performance()</a></code> returns a ggplot2 object. You
+can add layers to customize the plot:</p>
 <div class="sourceCode" id="cb9"><pre class="downlit sourceCode r">
 <code class="sourceCode R"><span class="va">perf_boxplot</span> <span class="op">+</span>
    <span class="fu">theme_classic</span><span class="op">(</span><span class="op">)</span> <span class="op">+</span>
    <span class="fu">scale_color_brewer</span><span class="op">(</span>palette <span class="op">=</span> <span class="st">"Dark2"</span><span class="op">)</span> <span class="op">+</span>
    <span class="fu">coord_flip</span><span class="op">(</span><span class="op">)</span></code></pre></div>
-<p><img src="parallel_files/figure-html/customize_plot-1.png" width="700"></p>
-<p>You can also create your own plots however you like using the performance results.</p>
+<p>You can also create your own plots however you like using the
+performance results.</p>
 </div>
 </div>
 <div class="section level2">
 <h2 id="live-progress-updates">Live progress updates<a class="anchor" aria-label="anchor" href="#live-progress-updates"></a>
 </h2>
-<p><code><a href="../reference/preprocess_data.html">preprocess_data()</a></code> and <code><a href="../reference/get_feature_importance.html">get_feature_importance()</a></code> support reporting live progress updates using the <code>progressr</code> package. The format is up to you, but we recommend using a progress bar like this:</p>
+<p><code><a href="../reference/preprocess_data.html">preprocess_data()</a></code> and
+<code><a href="../reference/get_feature_importance.html">get_feature_importance()</a></code> support reporting live progress
+updates using the <code>progressr</code> package. The format is up to
+you, but we recommend using a progress bar like this:</p>
 <div class="sourceCode" id="cb10"><pre class="downlit sourceCode r">
 <code class="sourceCode R"><span class="co"># optionally, specify the progress bar format with the `progress` package.</span>
 <span class="fu">progressr</span><span class="fu">::</span><span class="fu"><a href="https://progressr.futureverse.org/reference/handlers.html" class="external-link">handlers</a></span><span class="op">(</span><span class="fu">progressr</span><span class="fu">::</span><span class="fu"><a href="https://progressr.futureverse.org/reference/handler_progress.html" class="external-link">handler_progress</a></span><span class="op">(</span>
@@ -270,12 +251,26 @@ <h2 id="live-progress-updates">Live progress updates<a class="anchor" aria-label
 <span class="co">#&gt; Training the model...</span>
 <span class="co">#&gt; Training complete.</span>
 <span class="co">#&gt; Feature importance =========================== 100% | elapsed: 37s | eta:  0s</span></code></pre></div>
-<p>Note that some future backends support “near-live” progress updates, meaning the progress may not be reported immediately when parallel processing with futures. Read more on that <a href="https://progressr.futureverse.org/articles/progressr-intro.html#near-live-versus-buffered-progress-updates-with-futures" class="external-link">in the <code>progressr</code> vignette</a>. For more on <code>progressr</code> and how to customize the format of progress updates, see the <a href="https://progressr.futureverse.org/" class="external-link"><code>progressr</code> docs</a>.</p>
+<p>Note that some future backends support “near-live” progress updates,
+meaning the progress may not be reported immediately when parallel
+processing with futures. Read more on that <a href="https://progressr.futureverse.org/articles/progressr-intro.html#near-live-versus-buffered-progress-updates-with-futures" class="external-link">in
+the <code>progressr</code> vignette</a>. For more on
+<code>progressr</code> and how to customize the format of progress
+updates, see the <a href="https://progressr.futureverse.org/" class="external-link"><code>progressr</code>
+docs</a>.</p>
 </div>
 <div class="section level2">
 <h2 id="parallelizing-with-snakemake">Parallelizing with Snakemake<a class="anchor" aria-label="anchor" href="#parallelizing-with-snakemake"></a>
 </h2>
-<p>When parallelizing multiple calls to <code><a href="../reference/run_ml.html">run_ml()</a></code> in R as in the examples above, all of the results objects are held in memory. This isn’t a big deal for a small dataset run with only a few seeds. However, for large datasets run in parallel with, say, 100 seeds (recommended), you may run into problems trying to store all of those objects in memory at once. One solution is to write the results files of each <code><a href="../reference/run_ml.html">run_ml()</a></code> call, then concatenate them at the end. We show one way to accomplish this with Snakemake in <a href="https://github.com/SchlossLab/mikropml-snakemake-workflow" class="external-link">an example Snakemake workflow here</a>.</p>
+<p>When parallelizing multiple calls to <code><a href="../reference/run_ml.html">run_ml()</a></code> in R as in
+the examples above, all of the results objects are held in memory. This
+isn’t a big deal for a small dataset run with only a few seeds. However,
+for large datasets run in parallel with, say, 100 seeds (recommended),
+you may run into problems trying to store all of those objects in memory
+at once. One solution is to write the results files of each
+<code><a href="../reference/run_ml.html">run_ml()</a></code> call, then concatenate them at the end. We show
+one way to accomplish this with Snakemake in <a href="https://github.com/SchlossLab/mikropml-snakemake-workflow" class="external-link">an
+example Snakemake workflow here</a>.</p>
 </div>
   </div>
 
diff --git a/docs/articles/preprocess.html b/docs/articles/preprocess.html
index 5ccf1133..d6e21856 100644
--- a/docs/articles/preprocess.html
+++ b/docs/articles/preprocess.html
@@ -40,7 +40,7 @@
       </button>
       <span class="navbar-brand">
         <a class="navbar-link" href="../index.html">mikropml</a>
-        <span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="">1.2.2.9000</span>
+        <span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="">1.3.0</span>
       </span>
     </div>
 
@@ -100,7 +100,7 @@
 
       
 
-      </header><script src="preprocess_files/accessible-code-block-0.0.1/empty-anchor.js"></script><div class="row">
+      </header><div class="row">
   <div class="col-md-9 contents">
     <div class="page-header toc-ignore">
       <h1 data-toc-skip>Preprocessing data</h1>
@@ -114,28 +114,49 @@ <h4 data-toc-skip class="author">Zena Lapp</h4>
 
     
     
-<p>Before training a model, it’s often necessary and prudent to preprocess your input data. We provide a function (<code><a href="../reference/preprocess_data.html">preprocess_data()</a></code>) to preprocess input data. The defaults we chose are based on best practices used in <a href="https://gitlab.eecs.umich.edu/mld3/FIDDLE/-/tree/master/" class="external-link">FIDDLE</a> <span class="citation">(Tang et al. 2020)</span>. Feel free to check out FIDDLE for more information about data preprocessing!</p>
-<p><code><a href="../reference/preprocess_data.html">preprocess_data()</a></code> takes an input dataset where the rows are the samples and the columns are the outcome variable and features. We preprocess the data as follows:</p>
+<p>Before training a model, it’s often necessary and prudent to
+preprocess your input data. We provide a function
+(<code><a href="../reference/preprocess_data.html">preprocess_data()</a></code>) to preprocess input data. The defaults
+we chose are based on best practices used in <a href="https://gitlab.eecs.umich.edu/mld3/FIDDLE/-/tree/master/" class="external-link">FIDDLE</a>
+<span class="citation">(Tang et al. 2020)</span>. Feel free to check out
+FIDDLE for more information about data preprocessing!</p>
+<p><code><a href="../reference/preprocess_data.html">preprocess_data()</a></code> takes an input dataset where the rows
+are the samples and the columns are the outcome variable and features.
+We preprocess the data as follows:</p>
 <ul>
 <li>Remove missing outcome values.</li>
-<li>Convert any spaces in outcome names to underscores (<code>_</code>).</li>
-<li>Leave binary features as-is (except that categorical variables are converted to 0 and 1, and binary variables with missing features are split into two rows - see below for more details).</li>
-<li>Normalize continuous features using <code><a href="https://rdrr.io/pkg/caret/man/preProcess.html" class="external-link">caret::preProcess()</a></code> based on the method provided.</li>
-<li>Convert categorical features with more than 2 categories to 0 and 1 in multiple columns (one for each category, so each category has it’s own column).</li>
+<li>Convert any spaces in outcome names to underscores
+(<code>_</code>).</li>
+<li>Leave binary features as-is (except that categorical variables are
+converted to 0 and 1, and binary variables with missing features are
+split into two rows - see below for more details).</li>
+<li>Normalize continuous features using <code><a href="https://rdrr.io/pkg/caret/man/preProcess.html" class="external-link">caret::preProcess()</a></code>
+based on the method provided.</li>
+<li>Convert categorical features with more than 2 categories to 0 and 1
+in multiple columns (one for each category, so each category has it’s
+own column).</li>
 <li>Replace missing categorical data with 0.</li>
-<li>Impute missing continuous values with the median of the feature.</li>
-<li>By default, remove all features with near-zero variance (option to also remove only features with zero variance).</li>
+<li>Impute missing continuous values with the median of the
+feature.</li>
+<li>By default, remove all features with near-zero variance (option to
+also remove only features with zero variance).</li>
 <li>By default, collapse correlated features.</li>
 </ul>
 <div class="section level2">
 <h2 id="its-running-so-slow">It’s running so slow!<a class="anchor" aria-label="anchor" href="#its-running-so-slow"></a>
 </h2>
-<p>Since I assume a lot of you won’t read this entire vignette, I’m going to say this at the beginning. If the <code><a href="../reference/preprocess_data.html">preprocess_data()</a></code> function is running super slow, you should consider parallelizing it so it goes faster! <code><a href="../reference/preprocess_data.html">preprocess_data()</a></code> also can report live progress updates. See <code><a href="../articles/parallel.html">vignette("parallel")</a></code> for details.</p>
+<p>Since I assume a lot of you won’t read this entire vignette, I’m
+going to say this at the beginning. If the
+<code><a href="../reference/preprocess_data.html">preprocess_data()</a></code> function is running super slow, you
+should consider parallelizing it so it goes faster!
+<code><a href="../reference/preprocess_data.html">preprocess_data()</a></code> also can report live progress updates.
+See <code><a href="../articles/parallel.html">vignette("parallel")</a></code> for details.</p>
 </div>
 <div class="section level2">
 <h2 id="examples">Examples<a class="anchor" aria-label="anchor" href="#examples"></a>
 </h2>
-<p>We’re going to start off simple and get more complicated, but if you want the whole shebang at once, just scroll to the bottom.</p>
+<p>We’re going to start off simple and get more complicated, but if you
+want the whole shebang at once, just scroll to the bottom.</p>
 <p>First, we have to load <code>mikropml</code>:</p>
 <div class="sourceCode" id="cb1"><pre class="downlit sourceCode r">
 <code class="sourceCode R"><span class="kw"><a href="https://rdrr.io/r/base/library.html" class="external-link">library</a></span><span class="op">(</span><span class="va"><a href="https://www.schlosslab.org/mikropml/" class="external-link">mikropml</a></span><span class="op">)</span></code></pre></div>
@@ -156,7 +177,9 @@ <h3 id="binary-data">Binary data<a class="anchor" aria-label="anchor" href="#bin
 <span class="co">#&gt; 1  normal   no    0    a</span>
 <span class="co">#&gt; 2  normal  yes    1    a</span>
 <span class="co">#&gt; 3  cancer   no    1    b</span></code></pre></div>
-<p>In addition to the dataframe itself, you have to provide the name of the outcome column to <code><a href="../reference/preprocess_data.html">preprocess_data()</a></code>. Here’s what the preprocessed data looks like:</p>
+<p>In addition to the dataframe itself, you have to provide the name of
+the outcome column to <code><a href="../reference/preprocess_data.html">preprocess_data()</a></code>. Here’s what the
+preprocessed data looks like:</p>
 <div class="sourceCode" id="cb3"><pre class="downlit sourceCode r">
 <code class="sourceCode R"><span class="co"># preprocess raw binary data</span>
 <span class="fu"><a href="../reference/preprocess_data.html">preprocess_data</a></span><span class="op">(</span>dataset <span class="op">=</span> <span class="va">bin_df</span>, outcome_colname <span class="op">=</span> <span class="st">"outcome"</span><span class="op">)</span>
@@ -174,8 +197,21 @@ <h3 id="binary-data">Binary data<a class="anchor" aria-label="anchor" href="#bin
 <span class="co">#&gt; </span>
 <span class="co">#&gt; $removed_feats</span>
 <span class="co">#&gt; character(0)</span></code></pre></div>
-<p>The output is a list: <code>dat_transformed</code> which has the transformed data, <code>grp_feats</code> which is a list of grouped features, and <code>removed_feats</code> which is a list of featuures that were removed. Here, <code>grp_feats</code> is <code>NULL</code> because there are no perfectly correlated features (e.g. <code>c(0,1,0)</code> and <code>c(0,1,0)</code>, or <code>c(0,1,0)</code> and <code>c(1,0,1)</code> - see below for more details).</p>
-<p>The first column (<code>var1</code>) in <code>dat_transformed</code> is a character and is changed to <code>var1_yes</code> that has zeros (no) and ones (yes). The values in the second column (<code>var2</code>) stay the same because it’s already binary, but the name changes to <code>var2_1</code>. The third column (<code>var3</code>) is a factor and is also changed to binary where b is 1 and a is 0, as denoted by the new column name <code>var3_b</code>.</p>
+<p>The output is a list: <code>dat_transformed</code> which has the
+transformed data, <code>grp_feats</code> which is a list of grouped
+features, and <code>removed_feats</code> which is a list of featuures
+that were removed. Here, <code>grp_feats</code> is <code>NULL</code>
+because there are no perfectly correlated features
+(e.g. <code>c(0,1,0)</code> and <code>c(0,1,0)</code>, or
+<code>c(0,1,0)</code> and <code>c(1,0,1)</code> - see below for more
+details).</p>
+<p>The first column (<code>var1</code>) in <code>dat_transformed</code>
+is a character and is changed to <code>var1_yes</code> that has zeros
+(no) and ones (yes). The values in the second column (<code>var2</code>)
+stay the same because it’s already binary, but the name changes to
+<code>var2_1</code>. The third column (<code>var3</code>) is a factor
+and is also changed to binary where b is 1 and a is 0, as denoted by the
+new column name <code>var3_b</code>.</p>
 </div>
 <div class="section level3">
 <h3 id="categorical-data">Categorical data<a class="anchor" aria-label="anchor" href="#categorical-data"></a>
@@ -209,7 +245,9 @@ <h3 id="categorical-data">Categorical data<a class="anchor" aria-label="anchor"
 <span class="co">#&gt; </span>
 <span class="co">#&gt; $removed_feats</span>
 <span class="co">#&gt; character(0)</span></code></pre></div>
-<p>As you can see, this variable was split into 3 different columns - one for each type (a, b, and c). And again, <code>grp_feats</code> is <code>NULL</code>.</p>
+<p>As you can see, this variable was split into 3 different columns -
+one for each type (a, b, and c). And again, <code>grp_feats</code> is
+<code>NULL</code>.</p>
 </div>
 <div class="section level3">
 <h3 id="continuous-data">Continuous data<a class="anchor" aria-label="anchor" href="#continuous-data"></a>
@@ -243,12 +281,23 @@ <h3 id="continuous-data">Continuous data<a class="anchor" aria-label="anchor" hr
 <span class="co">#&gt; </span>
 <span class="co">#&gt; $removed_feats</span>
 <span class="co">#&gt; character(0)</span></code></pre></div>
-<p>Wow! Why did the numbers change? This is because the default is to normalize the data using <code>"center"</code> and <code>"scale"</code>. While this is often best practice, you may not want to normalize the data, or you may want to normalize the data in a different way. If you don’t want to normalize the data, you can use <code>method=NULL</code>:</p>
+<p>Wow! Why did the numbers change? This is because the default is to
+normalize the data using <code>"center"</code> and <code>"scale"</code>.
+While this is often best practice, you may not want to normalize the
+data, or you may want to normalize the data in a different way. If you
+don’t want to normalize the data, you can use
+<code>method=NULL</code>:</p>
 <div class="sourceCode" id="cb8"><pre class="downlit sourceCode r">
 <code class="sourceCode R"><span class="co"># preprocess raw continuous data, no normalization</span>
 <span class="fu"><a href="../reference/preprocess_data.html">preprocess_data</a></span><span class="op">(</span>dataset <span class="op">=</span> <span class="va">cont_df</span>, outcome_colname <span class="op">=</span> <span class="st">"outcome"</span>, method <span class="op">=</span> <span class="cn">NULL</span><span class="op">)</span></code></pre></div>
-<p>You can also normalize the data in different ways. You can choose any method supported by the <code>method</code> argument of <code><a href="https://rdrr.io/pkg/caret/man/preProcess.html" class="external-link">caret::preProcess()</a></code> (see the <code><a href="https://rdrr.io/pkg/caret/man/preProcess.html" class="external-link">caret::preProcess()</a></code> docs for details). Note that these methods are only applied to continuous variables.</p>
-<p>Another feature of <code><a href="../reference/preprocess_data.html">preprocess_data()</a></code> is that if you provide continuous variables as characters, they will be converted to numeric:</p>
+<p>You can also normalize the data in different ways. You can choose any
+method supported by the <code>method</code> argument of
+<code><a href="https://rdrr.io/pkg/caret/man/preProcess.html" class="external-link">caret::preProcess()</a></code> (see the
+<code><a href="https://rdrr.io/pkg/caret/man/preProcess.html" class="external-link">caret::preProcess()</a></code> docs for details). Note that these
+methods are only applied to continuous variables.</p>
+<p>Another feature of <code><a href="../reference/preprocess_data.html">preprocess_data()</a></code> is that if you
+provide continuous variables as characters, they will be converted to
+numeric:</p>
 <div class="sourceCode" id="cb9"><pre class="downlit sourceCode r">
 <code class="sourceCode R"><span class="co"># raw continuous dataset as characters</span>
 <span class="va">cont_char_df</span> <span class="op">&lt;-</span> <span class="fu"><a href="https://rdrr.io/r/base/data.frame.html" class="external-link">data.frame</a></span><span class="op">(</span>
@@ -263,7 +312,10 @@ <h3 id="continuous-data">Continuous data<a class="anchor" aria-label="anchor" hr
 <div class="sourceCode" id="cb10"><pre class="downlit sourceCode r">
 <code class="sourceCode R"><span class="co"># preprocess raw continuous character data as numeric</span>
 <span class="fu"><a href="../reference/preprocess_data.html">preprocess_data</a></span><span class="op">(</span>dataset <span class="op">=</span> <span class="va">cont_char_df</span>, outcome_colname <span class="op">=</span> <span class="st">"outcome"</span><span class="op">)</span></code></pre></div>
-<p>If you don’t want this to happen, and you want character data to remain character data even if it can be converted to numeric, you can use <code>to_numeric=FALSE</code> and they will be kept as categorical:</p>
+<p>If you don’t want this to happen, and you want character data to
+remain character data even if it can be converted to numeric, you can
+use <code>to_numeric=FALSE</code> and they will be kept as
+categorical:</p>
 <div class="sourceCode" id="cb11"><pre class="downlit sourceCode r">
 <code class="sourceCode R"><span class="co"># preprocess raw continuous character data as characters</span>
 <span class="fu"><a href="../reference/preprocess_data.html">preprocess_data</a></span><span class="op">(</span>dataset <span class="op">=</span> <span class="va">cont_char_df</span>, outcome_colname <span class="op">=</span> <span class="st">"outcome"</span>, to_numeric <span class="op">=</span> <span class="cn">FALSE</span><span class="op">)</span>
@@ -281,12 +333,17 @@ <h3 id="continuous-data">Continuous data<a class="anchor" aria-label="anchor" hr
 <span class="co">#&gt; </span>
 <span class="co">#&gt; $removed_feats</span>
 <span class="co">#&gt; character(0)</span></code></pre></div>
-<p>As you can see from this output, in this case the features are treated as groups rather than numbers (e.g. they are not normalized).</p>
+<p>As you can see from this output, in this case the features are
+treated as groups rather than numbers (e.g. they are not
+normalized).</p>
 </div>
 <div class="section level3">
 <h3 id="collapse-perfectly-correlated-features">Collapse perfectly correlated features<a class="anchor" aria-label="anchor" href="#collapse-perfectly-correlated-features"></a>
 </h3>
-<p>By default, <code><a href="../reference/preprocess_data.html">preprocess_data()</a></code> collapses features that are perfectly positively or negatively correlated. This is because having multiple copies of those features does not add information to machine learning, and it makes <code>run_ml</code> faster.</p>
+<p>By default, <code><a href="../reference/preprocess_data.html">preprocess_data()</a></code> collapses features that
+are perfectly positively or negatively correlated. This is because
+having multiple copies of those features does not add information to
+machine learning, and it makes <code>run_ml</code> faster.</p>
 <div class="sourceCode" id="cb12"><pre class="downlit sourceCode r">
 <code class="sourceCode R"><span class="co"># raw correlated dataset</span>
 <span class="va">corr_df</span> <span class="op">&lt;-</span> <span class="fu"><a href="https://rdrr.io/r/base/data.frame.html" class="external-link">data.frame</a></span><span class="op">(</span>
@@ -319,8 +376,14 @@ <h3 id="collapse-perfectly-correlated-features">Collapse perfectly correlated fe
 <span class="co">#&gt; </span>
 <span class="co">#&gt; $removed_feats</span>
 <span class="co">#&gt; [1] "var2"</span></code></pre></div>
-<p>As you can see, we end up with only one variable, as all 3 are grouped together. Also, the second element in the list is no longer <code>NULL</code>. Instead, it tells you that <code>grp1</code> contains <code>var1</code>, <code>var2</code>, and <code>var3</code>.</p>
-<p>If you want to group positively correlated features, but not negatively correlated features (e.g. for interpretability, or another downstream application), you can do that by using <code>group_neg_corr=FALSE</code>:</p>
+<p>As you can see, we end up with only one variable, as all 3 are
+grouped together. Also, the second element in the list is no longer
+<code>NULL</code>. Instead, it tells you that <code>grp1</code> contains
+<code>var1</code>, <code>var2</code>, and <code>var3</code>.</p>
+<p>If you want to group positively correlated features, but not
+negatively correlated features (e.g. for interpretability, or another
+downstream application), you can do that by using
+<code>group_neg_corr=FALSE</code>:</p>
 <div class="sourceCode" id="cb14"><pre class="downlit sourceCode r">
 <code class="sourceCode R"><span class="co"># preprocess raw correlated dataset; don't group negatively correlated features</span>
 <span class="fu"><a href="../reference/preprocess_data.html">preprocess_data</a></span><span class="op">(</span>dataset <span class="op">=</span> <span class="va">corr_df</span>, outcome_colname <span class="op">=</span> <span class="st">"outcome"</span>, group_neg_corr <span class="op">=</span> <span class="cn">FALSE</span><span class="op">)</span>
@@ -338,7 +401,10 @@ <h3 id="collapse-perfectly-correlated-features">Collapse perfectly correlated fe
 <span class="co">#&gt; </span>
 <span class="co">#&gt; $removed_feats</span>
 <span class="co">#&gt; [1] "var2"</span></code></pre></div>
-<p>Here, <code>var3</code> is kept on it’s own because it’s negatively correlated with <code>var1</code> and <code>var2</code>. You can also choose to keep all features separate, even if they are perfectly correlated, by using <code>collapse_corr_feats=FALSE</code>:</p>
+<p>Here, <code>var3</code> is kept on it’s own because it’s negatively
+correlated with <code>var1</code> and <code>var2</code>. You can also
+choose to keep all features separate, even if they are perfectly
+correlated, by using <code>collapse_corr_feats=FALSE</code>:</p>
 <div class="sourceCode" id="cb15"><pre class="downlit sourceCode r">
 <code class="sourceCode R"><span class="co"># preprocess raw correlated dataset; don't group negatively correlated features</span>
 <span class="fu"><a href="../reference/preprocess_data.html">preprocess_data</a></span><span class="op">(</span>dataset <span class="op">=</span> <span class="va">corr_df</span>, outcome_colname <span class="op">=</span> <span class="st">"outcome"</span>, collapse_corr_feats <span class="op">=</span> <span class="cn">FALSE</span><span class="op">)</span>
@@ -356,12 +422,14 @@ <h3 id="collapse-perfectly-correlated-features">Collapse perfectly correlated fe
 <span class="co">#&gt; </span>
 <span class="co">#&gt; $removed_feats</span>
 <span class="co">#&gt; [1] "var2"</span></code></pre></div>
-<p>In this case, <code>grp_feats</code> will always be <code>NULL</code>.</p>
+<p>In this case, <code>grp_feats</code> will always be
+<code>NULL</code>.</p>
 </div>
 <div class="section level3">
 <h3 id="data-with-near-zero-variance">Data with near-zero variance<a class="anchor" aria-label="anchor" href="#data-with-near-zero-variance"></a>
 </h3>
-<p>What if we have variables that are all zero, or all “no”? Those ones won’t contribute any information, so we remove them:</p>
+<p>What if we have variables that are all zero, or all “no”? Those ones
+won’t contribute any information, so we remove them:</p>
 <div class="sourceCode" id="cb16"><pre class="downlit sourceCode r">
 <code class="sourceCode R"><span class="co"># raw dataset with non-variable features</span>
 <span class="va">nonvar_df</span> <span class="op">&lt;-</span> <span class="fu"><a href="https://rdrr.io/r/base/data.frame.html" class="external-link">data.frame</a></span><span class="op">(</span>
@@ -377,7 +445,9 @@ <h3 id="data-with-near-zero-variance">Data with near-zero variance<a class="anch
 <span class="co">#&gt; 1  normal   no    0   no    0   12</span>
 <span class="co">#&gt; 2  normal  yes    1   no    0   12</span>
 <span class="co">#&gt; 3  cancer   no    1   no    0   12</span></code></pre></div>
-<p>Here, <code>var3</code>, <code>var4</code>, and <code>var5</code> all have no variability, so these variables are removed during preprocessing:</p>
+<p>Here, <code>var3</code>, <code>var4</code>, and <code>var5</code> all
+have no variability, so these variables are removed during
+preprocessing:</p>
 <div class="sourceCode" id="cb17"><pre class="downlit sourceCode r">
 <code class="sourceCode R"><span class="co"># remove features with near-zero variance</span>
 <span class="fu"><a href="../reference/preprocess_data.html">preprocess_data</a></span><span class="op">(</span>dataset <span class="op">=</span> <span class="va">nonvar_df</span>, outcome_colname <span class="op">=</span> <span class="st">"outcome"</span><span class="op">)</span>
@@ -395,7 +465,13 @@ <h3 id="data-with-near-zero-variance">Data with near-zero variance<a class="anch
 <span class="co">#&gt; </span>
 <span class="co">#&gt; $removed_feats</span>
 <span class="co">#&gt; [1] "var4" "var3" "var5"</span></code></pre></div>
-<p>You can read the <code><a href="https://rdrr.io/pkg/caret/man/preProcess.html" class="external-link">caret::preProcess()</a></code> documentation for more information. By default, we remove features with “near-zero variance” (<code>remove_var='nzv'</code>). This uses the default arguments from <code><a href="https://rdrr.io/pkg/caret/man/nearZeroVar.html" class="external-link">caret::nearZeroVar()</a></code>. However, particularly with smaller datasets, you might not want to remove features with near-zero variance. If you want to remove only features with zero variance, you can use <code>remove_var='zv'</code>:</p>
+<p>You can read the <code><a href="https://rdrr.io/pkg/caret/man/preProcess.html" class="external-link">caret::preProcess()</a></code> documentation for
+more information. By default, we remove features with “near-zero
+variance” (<code>remove_var='nzv'</code>). This uses the default
+arguments from <code><a href="https://rdrr.io/pkg/caret/man/nearZeroVar.html" class="external-link">caret::nearZeroVar()</a></code>. However, particularly
+with smaller datasets, you might not want to remove features with
+near-zero variance. If you want to remove only features with zero
+variance, you can use <code>remove_var='zv'</code>:</p>
 <div class="sourceCode" id="cb18"><pre class="downlit sourceCode r">
 <code class="sourceCode R"><span class="co"># remove features with zero variance</span>
 <span class="fu"><a href="../reference/preprocess_data.html">preprocess_data</a></span><span class="op">(</span>dataset <span class="op">=</span> <span class="va">nonvar_df</span>, outcome_colname <span class="op">=</span> <span class="st">"outcome"</span>, remove_var <span class="op">=</span> <span class="st">'zv'</span><span class="op">)</span>
@@ -413,7 +489,10 @@ <h3 id="data-with-near-zero-variance">Data with near-zero variance<a class="anch
 <span class="co">#&gt; </span>
 <span class="co">#&gt; $removed_feats</span>
 <span class="co">#&gt; [1] "var4" "var3" "var5"</span></code></pre></div>
-<p>If you want to include all features, you can use the argument <code>remove_zv=NULL</code>. For this to work, you cannot collapse correlated features (otherwise it errors out because of the underlying <code>caret</code> function we use).</p>
+<p>If you want to include all features, you can use the argument
+<code>remove_zv=NULL</code>. For this to work, you cannot collapse
+correlated features (otherwise it errors out because of the underlying
+<code>caret</code> function we use).</p>
 <div class="sourceCode" id="cb19"><pre class="downlit sourceCode r">
 <code class="sourceCode R"><span class="co"># don't remove features with near-zero or zero variance</span>
 <span class="fu"><a href="../reference/preprocess_data.html">preprocess_data</a></span><span class="op">(</span>dataset <span class="op">=</span> <span class="va">nonvar_df</span>, outcome_colname <span class="op">=</span> <span class="st">"outcome"</span>, remove_var <span class="op">=</span> <span class="cn">NULL</span>, collapse_corr_feats <span class="op">=</span> <span class="cn">FALSE</span><span class="op">)</span>
@@ -431,7 +510,12 @@ <h3 id="data-with-near-zero-variance">Data with near-zero variance<a class="anch
 <span class="co">#&gt; </span>
 <span class="co">#&gt; $removed_feats</span>
 <span class="co">#&gt; [1] "var4"</span></code></pre></div>
-<p>If you want to be more nuanced in how you remove near-zero variance features (e.g. change the default 10% cutoff for the percentage of distinct values out of the total number of samples), you can use the <code><a href="https://rdrr.io/pkg/caret/man/preProcess.html" class="external-link">caret::preProcess()</a></code> function after running <code>preprocess_data</code> with <code>remove_var=NULL</code> (see the <code><a href="https://rdrr.io/pkg/caret/man/nearZeroVar.html" class="external-link">caret::nearZeroVar()</a></code> function for more information).</p>
+<p>If you want to be more nuanced in how you remove near-zero variance
+features (e.g. change the default 10% cutoff for the percentage of
+distinct values out of the total number of samples), you can use the
+<code><a href="https://rdrr.io/pkg/caret/man/preProcess.html" class="external-link">caret::preProcess()</a></code> function after running
+<code>preprocess_data</code> with <code>remove_var=NULL</code> (see the
+<code><a href="https://rdrr.io/pkg/caret/man/nearZeroVar.html" class="external-link">caret::nearZeroVar()</a></code> function for more information).</p>
 </div>
 <div class="section level3">
 <h3 id="missing-data">Missing data<a class="anchor" aria-label="anchor" href="#missing-data"></a>
@@ -439,11 +523,16 @@ <h3 id="missing-data">Missing data<a class="anchor" aria-label="anchor" href="#m
 <p><code><a href="../reference/preprocess_data.html">preprocess_data()</a></code> also deals with missing data. It:</p>
 <ul>
 <li>Removes missing outcome variables.</li>
-<li>Maintains zero variability in a feature if it already has no variability (i.e. the feature is removed if removing features with near-zero variance).</li>
-<li>Replaces missing binary and categorical variables with zero (after splitting into multiple columns).</li>
-<li>Replaces missing continuous data with the median value of that feature.</li>
+<li>Maintains zero variability in a feature if it already has no
+variability (i.e. the feature is removed if removing features with
+near-zero variance).</li>
+<li>Replaces missing binary and categorical variables with zero (after
+splitting into multiple columns).</li>
+<li>Replaces missing continuous data with the median value of that
+feature.</li>
 </ul>
-<p>If you’d like to deal with missing data in a different way, please do that prior to inputting the data to <code><a href="../reference/preprocess_data.html">preprocess_data()</a></code>.</p>
+<p>If you’d like to deal with missing data in a different way, please do
+that prior to inputting the data to <code><a href="../reference/preprocess_data.html">preprocess_data()</a></code>.</p>
 <div class="section level4">
 <h4 id="remove-missing-outcome-variables">Remove missing outcome variables<a class="anchor" aria-label="anchor" href="#remove-missing-outcome-variables"></a>
 </h4>
@@ -480,7 +569,8 @@ <h4 id="remove-missing-outcome-variables">Remove missing outcome variables<a cla
 <span class="co">#&gt; character(0)</span></code></pre></div>
 </div>
 <div class="section level4">
-<h4 id="maintain-zero-variability-in-a-feature-if-it-already-has-no-variability">Maintain zero variability in a feature if it already has no variability<a class="anchor" aria-label="anchor" href="#maintain-zero-variability-in-a-feature-if-it-already-has-no-variability"></a>
+<h4 id="maintain-zero-variability-in-a-feature-if-it-already-has-no-variability">Maintain zero variability in a feature if it already has no
+variability<a class="anchor" aria-label="anchor" href="#maintain-zero-variability-in-a-feature-if-it-already-has-no-variability"></a>
 </h4>
 <div class="sourceCode" id="cb22"><pre class="downlit sourceCode r">
 <code class="sourceCode R"><span class="co"># raw dataset with missing value in non-variable feature</span>
@@ -512,7 +602,9 @@ <h4 id="maintain-zero-variability-in-a-feature-if-it-already-has-no-variability"
 <span class="co">#&gt; </span>
 <span class="co">#&gt; $removed_feats</span>
 <span class="co">#&gt; [1] "var2"</span></code></pre></div>
-<p>Here, the non-variable feature with missing data is removed because we removed features with near-zero variance. If we maintained that feature, it’d be all ones:</p>
+<p>Here, the non-variable feature with missing data is removed because
+we removed features with near-zero variance. If we maintained that
+feature, it’d be all ones:</p>
 <div class="sourceCode" id="cb24"><pre class="downlit sourceCode r">
 <code class="sourceCode R"><span class="co"># preprocess raw dataset with missing value in non-variable feature</span>
 <span class="fu"><a href="../reference/preprocess_data.html">preprocess_data</a></span><span class="op">(</span>dataset <span class="op">=</span> <span class="va">miss_nonvar_df</span>, outcome_colname <span class="op">=</span> <span class="st">"outcome"</span>, remove_var <span class="op">=</span> <span class="cn">NULL</span>, collapse_corr_feats <span class="op">=</span> <span class="cn">FALSE</span><span class="op">)</span>
@@ -565,10 +657,12 @@ <h4 id="replace-missing-binary-and-categorical-variables-with-zero">Replace miss
 <span class="co">#&gt; </span>
 <span class="co">#&gt; $removed_feats</span>
 <span class="co">#&gt; [1] "var2"</span></code></pre></div>
-<p>Here each binary variable is split into two, and the missing value is considered zero for both of them.</p>
+<p>Here each binary variable is split into two, and the missing value is
+considered zero for both of them.</p>
 </div>
 <div class="section level4">
-<h4 id="replace-missing-continuous-data-with-the-median-value-of-that-feature">Replace missing continuous data with the median value of that feature<a class="anchor" aria-label="anchor" href="#replace-missing-continuous-data-with-the-median-value-of-that-feature"></a>
+<h4 id="replace-missing-continuous-data-with-the-median-value-of-that-feature">Replace missing continuous data with the median value of that
+feature<a class="anchor" aria-label="anchor" href="#replace-missing-continuous-data-with-the-median-value-of-that-feature"></a>
 </h4>
 <div class="sourceCode" id="cb27"><pre class="downlit sourceCode r">
 <code class="sourceCode R"><span class="co"># raw dataset with missing value in continuous feature</span>
@@ -583,7 +677,8 @@ <h4 id="replace-missing-continuous-data-with-the-median-value-of-that-feature">R
 <span class="co">#&gt; 2  normal    2    2</span>
 <span class="co">#&gt; 3  cancer    2    3</span>
 <span class="co">#&gt; 4  normal   NA   NA</span></code></pre></div>
-<p>Here we’re not normalizing continuous features so it’s easier to see what’s going on (i.e. the median value is used):</p>
+<p>Here we’re not normalizing continuous features so it’s easier to see
+what’s going on (i.e. the median value is used):</p>
 <div class="sourceCode" id="cb28"><pre class="downlit sourceCode r">
 <code class="sourceCode R"><span class="co"># preprocess raw dataset with missing value in continuous feature</span>
 <span class="fu"><a href="../reference/preprocess_data.html">preprocess_data</a></span><span class="op">(</span>dataset <span class="op">=</span> <span class="va">miss_cont_df</span>, outcome_colname <span class="op">=</span> <span class="st">"outcome"</span>, method <span class="op">=</span> <span class="cn">NULL</span><span class="op">)</span>
@@ -608,7 +703,8 @@ <h4 id="replace-missing-continuous-data-with-the-median-value-of-that-feature">R
 <div class="section level3">
 <h3 id="putting-it-all-together">Putting it all together<a class="anchor" aria-label="anchor" href="#putting-it-all-together"></a>
 </h3>
-<p>Here’s some more complicated example raw data that puts everything we discussed together:</p>
+<p>Here’s some more complicated example raw data that puts everything we
+discussed together:</p>
 <div class="sourceCode" id="cb29"><pre class="downlit sourceCode r">
 <code class="sourceCode R"><span class="va">test_df</span> <span class="op">&lt;-</span> <span class="fu"><a href="https://rdrr.io/r/base/data.frame.html" class="external-link">data.frame</a></span><span class="op">(</span>
   outcome <span class="op">=</span> <span class="fu"><a href="https://rdrr.io/r/base/c.html" class="external-link">c</a></span><span class="op">(</span><span class="st">"normal"</span>, <span class="st">"normal"</span>, <span class="st">"cancer"</span>, <span class="cn">NA</span><span class="op">)</span>,
@@ -631,7 +727,8 @@ <h3 id="putting-it-all-together">Putting it all together<a class="anchor" aria-l
 <span class="co">#&gt; 2  normal    2    b  yes    1    0   no    1    6    x     0     1     2</span>
 <span class="co">#&gt; 3  cancer    3    c   no    0    0   no    0   NA    y    NA    NA     3</span>
 <span class="co">#&gt; 4    &lt;NA&gt;    4    d   no    0    0   no    0    7    z    NA    NA     4</span></code></pre></div>
-<p>Let’s throw this into the preprocessing function with the default values:</p>
+<p>Let’s throw this into the preprocessing function with the default
+values:</p>
 <div class="sourceCode" id="cb30"><pre class="downlit sourceCode r">
 <code class="sourceCode R"><span class="fu"><a href="../reference/preprocess_data.html">preprocess_data</a></span><span class="op">(</span>dataset <span class="op">=</span> <span class="va">test_df</span>, outcome_colname <span class="op">=</span> <span class="st">"outcome"</span><span class="op">)</span>
 <span class="co">#&gt; Using 'outcome' as the outcome column.</span>
@@ -668,20 +765,37 @@ <h3 id="putting-it-all-together">Putting it all together<a class="anchor" aria-l
 <span class="co">#&gt; [1] "var4"  "var5"  "var10" "var6"  "var11"</span></code></pre></div>
 <p>As you can see, we got several messages:</p>
 <ul>
-<li>One of the samples (row 4) was removed because the outcome value was missing.</li>
-<li>One of the variables in a feature with no variation had a missing value that was replaced with the the non-varying value (<code>var11</code>).</li>
-<li>Four categorical missing values were replaced with zero (<code>var9</code>). There are 4 missing rather than just 1 (like in the raw data) because we split the categorical variable into 4 different columns first.</li>
-<li>One missing continuous value was imputed using the median value of that feature (<code>var8</code>).</li>
+<li>One of the samples (row 4) was removed because the outcome value was
+missing.</li>
+<li>One of the variables in a feature with no variation had a missing
+value that was replaced with the the non-varying value
+(<code>var11</code>).</li>
+<li>Four categorical missing values were replaced with zero
+(<code>var9</code>). There are 4 missing rather than just 1 (like in the
+raw data) because we split the categorical variable into 4 different
+columns first.</li>
+<li>One missing continuous value was imputed using the median value of
+that feature (<code>var8</code>).</li>
 </ul>
-<p>Additionally, you can see that the continuous variables were normalized, the categorical variables were all changed to binary, and several features were grouped together. The variables in each group can be found in <code>grp_feats</code>.</p>
+<p>Additionally, you can see that the continuous variables were
+normalized, the categorical variables were all changed to binary, and
+several features were grouped together. The variables in each group can
+be found in <code>grp_feats</code>.</p>
 </div>
 <div class="section level3">
 <h3 id="next-step-train-and-evaluate-your-model">Next step: train and evaluate your model!<a class="anchor" aria-label="anchor" href="#next-step-train-and-evaluate-your-model"></a>
 </h3>
-<p>After you preprocess your data (either using <code><a href="../reference/preprocess_data.html">preprocess_data()</a></code> or by preprocessing the data on your own), you’re ready to train and evaluate machine learning models! Please see <code><a href="../reference/run_ml.html">run_ml()</a></code> information about training models.</p>
-<div id="refs" class="references">
-<div id="ref-tang_democratizing_2020">
-<p>Tang, Shengpu, Parmida Davarmanesh, Yanmeng Song, Danai Koutra, Michael W. Sjoding, and Jenna Wiens. 2020. “Democratizing EHR Analyses with FIDDLE: A Flexible Data-Driven Preprocessing Pipeline for Structured Clinical Data.” <em>J Am Med Inform Assoc</em>, October. <a href="https://doi.org/10.1093/jamia/ocaa139" class="external-link">https://doi.org/10.1093/jamia/ocaa139</a>.</p>
+<p>After you preprocess your data (either using
+<code><a href="../reference/preprocess_data.html">preprocess_data()</a></code> or by preprocessing the data on your
+own), you’re ready to train and evaluate machine learning models! Please
+see <code><a href="../reference/run_ml.html">run_ml()</a></code> information about training models.</p>
+<div id="refs" class="references csl-bib-body hanging-indent">
+<div id="ref-tang_democratizing_2020" class="csl-entry">
+Tang, Shengpu, Parmida Davarmanesh, Yanmeng Song, Danai Koutra, Michael
+W. Sjoding, and Jenna Wiens. 2020. <span>“Democratizing <span>EHR</span>
+Analyses with <span>FIDDLE</span>: A Flexible Data-Driven Preprocessing
+Pipeline for Structured Clinical Data.”</span> <em>J Am Med Inform
+Assoc</em>, October. <a href="https://doi.org/10.1093/jamia/ocaa139" class="external-link">https://doi.org/10.1093/jamia/ocaa139</a>.
 </div>
 </div>
 </div>
diff --git a/docs/articles/tuning.html b/docs/articles/tuning.html
index 2092dc92..1cc971d6 100644
--- a/docs/articles/tuning.html
+++ b/docs/articles/tuning.html
@@ -40,7 +40,7 @@
       </button>
       <span class="navbar-brand">
         <a class="navbar-link" href="../index.html">mikropml</a>
-        <span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="">1.2.2.9000</span>
+        <span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="">1.3.0</span>
       </span>
     </div>
 
@@ -100,11 +100,12 @@
 
       
 
-      </header><script src="tuning_files/accessible-code-block-0.0.1/empty-anchor.js"></script><div class="row">
+      </header><div class="row">
   <div class="col-md-9 contents">
     <div class="page-header toc-ignore">
       <h1 data-toc-skip>Hyperparameter tuning</h1>
-                        <h4 data-toc-skip class="author">Begüm D. Topçuoğlu</h4>
+                        <h4 data-toc-skip class="author">Begüm D.
+Topçuoğlu</h4>
             
       
       <small class="dont-index">Source: <a href="https://github.com/SchlossLab/mikropml/blob/HEAD/vignettes/tuning.Rmd" class="external-link"><code>vignettes/tuning.Rmd</code></a></small>
@@ -114,8 +115,19 @@ <h4 data-toc-skip class="author">Begüm D. Topçuoğlu</h4>
 
     
     
-<p>One particularly important aspect of machine learning (ML) is hyperparameter tuning. A hyperparameter is a parameter that is set before the ML training begins. These parameters are tunable and they effect how well the model trains. We must do a grid search for many hyperparameter possibilities and exhaust our search to pick the ideal value for the model and dataset. In this package, we do this during the cross-validation step.</p>
-<p>Let’s start with an example ML run. The input data to <code><a href="../reference/run_ml.html">run_ml()</a></code> is a dataframe where each row is a sample or observation. One column (assumed to be the first) is the outcome of interest, and all of the other columns are the features. We package <code>otu_mini_bin</code> as a small example dataset with <code>mikropml</code>.</p>
+<p>One particularly important aspect of machine learning (ML) is
+hyperparameter tuning. A hyperparameter is a parameter that is set
+before the ML training begins. These parameters are tunable and they
+effect how well the model trains. We must do a grid search for many
+hyperparameter possibilities and exhaust our search to pick the ideal
+value for the model and dataset. In this package, we do this during the
+cross-validation step.</p>
+<p>Let’s start with an example ML run. The input data to
+<code><a href="../reference/run_ml.html">run_ml()</a></code> is a dataframe where each row is a sample or
+observation. One column (assumed to be the first) is the outcome of
+interest, and all of the other columns are the features. We package
+<code>otu_mini_bin</code> as a small example dataset with
+<code>mikropml</code>.</p>
 <div class="sourceCode" id="cb1"><pre class="downlit sourceCode r">
 <code class="sourceCode R"><span class="co">#install.packages("devtools")</span>
 <span class="co">#devtools::install_github("SchlossLab/mikropml")</span>
@@ -135,7 +147,9 @@ <h4 data-toc-skip class="author">Begüm D. Topçuoğlu</h4>
 <span class="co">#&gt; 4       78      255      197</span>
 <span class="co">#&gt; 5        1      537      533</span>
 <span class="co">#&gt; 6      251      155      122</span></code></pre></div>
-<p>Before we train and evaluate a ML model, we can preprocess the data. You can learn more about this in the preprocessing vignette: <code><a href="../articles/preprocess.html">vignette("preprocess")</a></code>.</p>
+<p>Before we train and evaluate a ML model, we can preprocess the data.
+You can learn more about this in the preprocessing vignette:
+<code><a href="../articles/preprocess.html">vignette("preprocess")</a></code>.</p>
 <div class="sourceCode" id="cb2"><pre class="downlit sourceCode r">
 <code class="sourceCode R"><span class="va">preproc</span> <span class="op">&lt;-</span> <span class="fu"><a href="../reference/preprocess_data.html">preprocess_data</a></span><span class="op">(</span>dataset <span class="op">=</span> <span class="va">otu_mini_bin</span>,
                            outcome_colname <span class="op">=</span> <span class="st">'dx'</span><span class="op">)</span>
@@ -145,9 +159,14 @@ <h4 data-toc-skip class="author">Begüm D. Topçuoğlu</h4>
 <div class="section level2">
 <h2 id="the-simplest-way-to-run_ml">The simplest way to <code>run_ml()</code><a class="anchor" aria-label="anchor" href="#the-simplest-way-to-run_ml"></a>
 </h2>
-<p>As mentioned above, the minimal input is your dataset (<code>dataset</code>) and the machine learning model you want to use (<code>method</code>).</p>
-<p>When we <code><a href="../reference/run_ml.html">run_ml()</a></code>, by default we do a 100 times repeated, 5-fold cross-validation, where we evaluate the hyperparameters in these 500 total iterations.</p>
-<p>Say we want to run L2 regularized logistic regression. We do this with:</p>
+<p>As mentioned above, the minimal input is your dataset
+(<code>dataset</code>) and the machine learning model you want to use
+(<code>method</code>).</p>
+<p>When we <code><a href="../reference/run_ml.html">run_ml()</a></code>, by default we do a 100 times repeated,
+5-fold cross-validation, where we evaluate the hyperparameters in these
+500 total iterations.</p>
+<p>Say we want to run L2 regularized logistic regression. We do this
+with:</p>
 <div class="sourceCode" id="cb3"><pre class="downlit sourceCode r">
 <code class="sourceCode R"><span class="va">results</span> <span class="op">&lt;-</span> <span class="fu"><a href="../reference/run_ml.html">run_ml</a></span><span class="op">(</span><span class="va">dat</span>,
                   <span class="st">'glmnet'</span>,
@@ -158,9 +177,18 @@ <h2 id="the-simplest-way-to-run_ml">The simplest way to <code>run_ml()</code><a
 <span class="co">#&gt; Training the model...</span>
 <span class="co">#&gt; Loading required package: ggplot2</span>
 <span class="co">#&gt; Loading required package: lattice</span>
+<span class="co">#&gt; </span>
+<span class="co">#&gt; Attaching package: 'caret'</span>
+<span class="co">#&gt; The following object is masked from 'package:mikropml':</span>
+<span class="co">#&gt; </span>
+<span class="co">#&gt;     compare_models</span>
 <span class="co">#&gt; Training complete.</span></code></pre></div>
-<p>You’ll probably get a warning when you run this because the dataset is very small. If you want to learn more about that, check out the introductory vignette about training and evaluating a ML model: <code><a href="../articles/introduction.html">vignette("introduction")</a></code>.</p>
-<p>By default, <code><a href="../reference/run_ml.html">run_ml()</a></code> selects hyperparameters depending on the dataset and method used.</p>
+<p>You’ll probably get a warning when you run this because the dataset
+is very small. If you want to learn more about that, check out the
+introductory vignette about training and evaluating a ML model:
+<code><a href="../articles/introduction.html">vignette("introduction")</a></code>.</p>
+<p>By default, <code><a href="../reference/run_ml.html">run_ml()</a></code> selects hyperparameters depending
+on the dataset and method used.</p>
 <div class="sourceCode" id="cb4"><pre class="downlit sourceCode r">
 <code class="sourceCode R"><span class="va">results</span><span class="op">$</span><span class="va">trained_model</span>
 <span class="co">#&gt; glmnet </span>
@@ -199,8 +227,19 @@ <h2 id="the-simplest-way-to-run_ml">The simplest way to <code>run_ml()</code><a
 <span class="co">#&gt; Tuning parameter 'alpha' was held constant at a value of 0</span>
 <span class="co">#&gt; AUC was used to select the optimal model using the largest value.</span>
 <span class="co">#&gt; The final values used for the model were alpha = 0 and lambda = 1.</span></code></pre></div>
-<p>As you can see, the <code>alpha</code> hyperparameter is set to 0, which specifies L2 regularization. <code>glmnet</code> gives us the option to run both L1 and L2 regularization. If we change <code>alpha</code> to 1, we would run L1-regularized logistic regression. You can also tune <code>alpha</code> by specifying a variety of values between 0 and 1. When you use a value that is between 0 and 1, you are running elastic net. The default hyperparameter <code>lambda</code> which adjusts the L2 regularization penalty is a range of values between 10^-4 to 10.</p>
-<p>When we look at the 100 repeated cross-validation performance metrics such as <code>AUC</code>, <code>Accuracy</code>, <code>prAUC</code> for each tested <code>lambda</code> value, we see that some are not appropriate for this dataset and some do better than others.</p>
+<p>As you can see, the <code>alpha</code> hyperparameter is set to 0,
+which specifies L2 regularization. <code>glmnet</code> gives us the
+option to run both L1 and L2 regularization. If we change
+<code>alpha</code> to 1, we would run L1-regularized logistic
+regression. You can also tune <code>alpha</code> by specifying a variety
+of values between 0 and 1. When you use a value that is between 0 and 1,
+you are running elastic net. The default hyperparameter
+<code>lambda</code> which adjusts the L2 regularization penalty is a
+range of values between 10^-4 to 10.</p>
+<p>When we look at the 100 repeated cross-validation performance metrics
+such as <code>AUC</code>, <code>Accuracy</code>, <code>prAUC</code> for
+each tested <code>lambda</code> value, we see that some are not
+appropriate for this dataset and some do better than others.</p>
 <div class="sourceCode" id="cb5"><pre class="downlit sourceCode r">
 <code class="sourceCode R"><span class="va">results</span><span class="op">$</span><span class="va">trained_model</span><span class="op">$</span><span class="va">results</span>
 <span class="co">#&gt;   alpha lambda   logLoss       AUC     prAUC  Accuracy      Kappa        F1</span>
@@ -242,7 +281,10 @@ <h2 id="the-simplest-way-to-run_ml">The simplest way to <code>run_ml()</code><a
 <div class="section level2">
 <h2 id="customizing-hyperparameters">Customizing hyperparameters<a class="anchor" aria-label="anchor" href="#customizing-hyperparameters"></a>
 </h2>
-<p>In this example, we want to change the <code>lambda</code> values to provide a better range to test in the cross-validation step. We don’t want to use the defaults but provide our own named list with new values.</p>
+<p>In this example, we want to change the <code>lambda</code> values to
+provide a better range to test in the cross-validation step. We don’t
+want to use the defaults but provide our own named list with new
+values.</p>
 <p>For example:</p>
 <div class="sourceCode" id="cb6"><pre class="downlit sourceCode r">
 <code class="sourceCode R"><span class="va">new_hp</span> <span class="op">&lt;-</span> <span class="fu"><a href="https://rdrr.io/r/base/list.html" class="external-link">list</a></span><span class="op">(</span>alpha <span class="op">=</span> <span class="fl">1</span>, 
@@ -254,7 +296,8 @@ <h2 id="customizing-hyperparameters">Customizing hyperparameters<a class="anchor
 <span class="co">#&gt; $lambda</span>
 <span class="co">#&gt;  [1] 0.00001 0.00010 0.00100 0.01000 0.01500 0.02000 0.02500 0.03000 0.04000</span>
 <span class="co">#&gt; [10] 0.05000 0.06000 0.10000</span></code></pre></div>
-<p>Now let’s run L2 logistic regression with the new <code>lambda</code> values:</p>
+<p>Now let’s run L2 logistic regression with the new <code>lambda</code>
+values:</p>
 <div class="sourceCode" id="cb7"><pre class="downlit sourceCode r">
 <code class="sourceCode R"><span class="va">results</span> <span class="op">&lt;-</span> <span class="fu"><a href="../reference/run_ml.html">run_ml</a></span><span class="op">(</span><span class="va">dat</span>,
                   <span class="st">'glmnet'</span>,
@@ -321,9 +364,20 @@ <h2 id="customizing-hyperparameters">Customizing hyperparameters<a class="anchor
 <span class="co">#&gt; Tuning parameter 'alpha' was held constant at a value of 1</span>
 <span class="co">#&gt; AUC was used to select the optimal model using the largest value.</span>
 <span class="co">#&gt; The final values used for the model were alpha = 1 and lambda = 0.02.</span></code></pre></div>
-<p>This time, we cover a larger and different range of <code>lambda</code> settings in cross-validation.</p>
-<p>How do we know which <code>lambda</code> value is the best one? To answer that, we need to run the ML pipeline on multiple data splits and look at the mean cross-validation performance of each <code>lambda</code> across those modeling experiments. We describe how to run the pipeline with multiple data splits in <code><a href="../articles/parallel.html">vignette("parallel")</a></code>.</p>
-<p>Here we train the model with the new <code>lambda</code> range we defined above. We run it 3 times each with a different seed, which will result in different splits of the data into training and testing sets. We can then use <code>plot_hp_performance</code> to see which <code>lambda</code> gives us the largest mean AUC value across modeling experiments.</p>
+<p>This time, we cover a larger and different range of
+<code>lambda</code> settings in cross-validation.</p>
+<p>How do we know which <code>lambda</code> value is the best one? To
+answer that, we need to run the ML pipeline on multiple data splits and
+look at the mean cross-validation performance of each
+<code>lambda</code> across those modeling experiments. We describe how
+to run the pipeline with multiple data splits in
+<code><a href="../articles/parallel.html">vignette("parallel")</a></code>.</p>
+<p>Here we train the model with the new <code>lambda</code> range we
+defined above. We run it 3 times each with a different seed, which will
+result in different splits of the data into training and testing sets.
+We can then use <code>plot_hp_performance</code> to see which
+<code>lambda</code> gives us the largest mean AUC value across modeling
+experiments.</p>
 <div class="sourceCode" id="cb8"><pre class="downlit sourceCode r">
 <code class="sourceCode R"><span class="va">results</span> <span class="op">&lt;-</span> <span class="fu"><a href="https://rdrr.io/r/base/lapply.html" class="external-link">lapply</a></span><span class="op">(</span><span class="fu"><a href="https://rdrr.io/r/base/seq.html" class="external-link">seq</a></span><span class="op">(</span><span class="fl">100</span>, <span class="fl">102</span><span class="op">)</span>, <span class="kw">function</span><span class="op">(</span><span class="va">seed</span><span class="op">)</span> <span class="op">{</span>
    <span class="fu"><a href="../reference/run_ml.html">run_ml</a></span><span class="op">(</span><span class="va">dat</span>, <span class="st">"glmnet"</span>, seed <span class="op">=</span> <span class="va">seed</span>, hyperparameters <span class="op">=</span> <span class="va">new_hp</span><span class="op">)</span>
@@ -341,12 +395,24 @@ <h2 id="customizing-hyperparameters">Customizing hyperparameters<a class="anchor
 <span class="va">hp_metrics</span> <span class="op">&lt;-</span> <span class="fu"><a href="../reference/combine_hp_performance.html">combine_hp_performance</a></span><span class="op">(</span><span class="va">models</span><span class="op">)</span>
 <span class="fu"><a href="../reference/plot_hp_performance.html">plot_hp_performance</a></span><span class="op">(</span><span class="va">hp_metrics</span><span class="op">$</span><span class="va">dat</span>, <span class="va">lambda</span>, <span class="va">AUC</span><span class="op">)</span></code></pre></div>
 <p><img src="tuning_files/figure-html/unnamed-chunk-9-1.png" width="700"></p>
-<p>As you can see, we get a mean maxima at <code>0.03</code> which is the best <code>lambda</code> value for this dataset when we run 3 data splits. The fact that we are seeing this maxima in the middle of our range and not at the edges, shows that we are providing a large enough range to exhaust our <code>lambda</code> search as we build the model. We recommend the user to use this plot to make sure the best hyperparameter is not on the edges of the provided list. For a better understanding of the global maxima, it would be better to run more data splits by using more seeds. We picked 3 seeds to keep the runtime down for this vignette, but for real-world data we recommend using many more seeds.</p>
+<p>As you can see, we get a mean maxima at <code>0.03</code> which is
+the best <code>lambda</code> value for this dataset when we run 3 data
+splits. The fact that we are seeing this maxima in the middle of our
+range and not at the edges, shows that we are providing a large enough
+range to exhaust our <code>lambda</code> search as we build the model.
+We recommend the user to use this plot to make sure the best
+hyperparameter is not on the edges of the provided list. For a better
+understanding of the global maxima, it would be better to run more data
+splits by using more seeds. We picked 3 seeds to keep the runtime down
+for this vignette, but for real-world data we recommend using many more
+seeds.</p>
 </div>
 <div class="section level2">
 <h2 id="hyperparameter-options">Hyperparameter options<a class="anchor" aria-label="anchor" href="#hyperparameter-options"></a>
 </h2>
-<p>You can see which default hyperparameters would be used for your dataset with <code><a href="../reference/get_hyperparams_list.html">get_hyperparams_list()</a></code>. Here are a few examples with built-in datasets we provide:</p>
+<p>You can see which default hyperparameters would be used for your
+dataset with <code><a href="../reference/get_hyperparams_list.html">get_hyperparams_list()</a></code>. Here are a few
+examples with built-in datasets we provide:</p>
 <div class="sourceCode" id="cb9"><pre class="downlit sourceCode r">
 <code class="sourceCode R"><span class="fu"><a href="../reference/get_hyperparams_list.html">get_hyperparams_list</a></span><span class="op">(</span><span class="va">otu_mini_bin</span>, <span class="st">'glmnet'</span><span class="op">)</span>
 <span class="co">#&gt; $lambda</span>
@@ -360,13 +426,22 @@ <h2 id="hyperparameter-options">Hyperparameter options<a class="anchor" aria-lab
 <span class="fu"><a href="../reference/get_hyperparams_list.html">get_hyperparams_list</a></span><span class="op">(</span><span class="va">otu_small</span>, <span class="st">'rf'</span><span class="op">)</span>
 <span class="co">#&gt; $mtry</span>
 <span class="co">#&gt; [1]  4  8 16</span></code></pre></div>
-<p>Here are the hyperparameters that are tuned for each of the modeling methods. The output for all of them is very similar, so we won’t go into those details.</p>
+<p>Here are the hyperparameters that are tuned for each of the modeling
+methods. The output for all of them is very similar, so we won’t go into
+those details.</p>
 <div class="section level3">
 <h3 id="regression">Regression<a class="anchor" aria-label="anchor" href="#regression"></a>
 </h3>
-<p>As mentioned above, <code>glmnet</code> uses the <code>alpha</code> parameter and <code>lambda</code> hyperparameter. <code>alpha</code> of <code>0</code> is for L2 regularization (ridge). <code>alpha</code> of <code>1</code> is for L1 regularization (lasso). <code>alpha</code> in between is elastic net. You can also tune <code>alpha</code> like you would any other hyperparameter.</p>
-<p>Please refer to original <code>glmnet</code> documentation for more information: <a href="https://web.stanford.edu/~hastie/glmnet/glmnet_alpha.html" class="external-link uri">https://web.stanford.edu/~hastie/glmnet/glmnet_alpha.html</a></p>
-<p>The default hyperparameters chosen by <code><a href="../reference/run_ml.html">run_ml()</a></code> are fixed for <code>glmnet</code>.</p>
+<p>As mentioned above, <code>glmnet</code> uses the <code>alpha</code>
+parameter and <code>lambda</code> hyperparameter. <code>alpha</code> of
+<code>0</code> is for L2 regularization (ridge). <code>alpha</code> of
+<code>1</code> is for L1 regularization (lasso). <code>alpha</code> in
+between is elastic net. You can also tune <code>alpha</code> like you
+would any other hyperparameter.</p>
+<p>Please refer to original <code>glmnet</code> documentation for more
+information: <a href="https://web.stanford.edu/~hastie/glmnet/glmnet_alpha.html" class="external-link uri">https://web.stanford.edu/~hastie/glmnet/glmnet_alpha.html</a></p>
+<p>The default hyperparameters chosen by <code><a href="../reference/run_ml.html">run_ml()</a></code> are fixed
+for <code>glmnet</code>.</p>
 <pre><code><span class="co">#&gt; $lambda</span>
 <span class="co">#&gt; [1] 1e-04 1e-03 1e-02 1e-01 1e+00 1e+01</span>
 <span class="co">#&gt; </span>
@@ -376,18 +451,31 @@ <h3 id="regression">Regression<a class="anchor" aria-label="anchor" href="#regre
 <div class="section level3">
 <h3 id="random-forest">Random forest<a class="anchor" aria-label="anchor" href="#random-forest"></a>
 </h3>
-<p>When we run <code>rf</code> we are using the the <code>randomForest</code> package implementation. We are tuning the <code>mtry</code> hyperparameter. This is the number of features that are randomly collected to be sampled at each tree node. This number needs to be less than the number of features in the dataset. Please refer to the original documentation for more information: <a href="https://cran.r-project.org/web/packages/randomForest/randomForest.pdf" class="external-link uri">https://cran.r-project.org/web/packages/randomForest/randomForest.pdf</a></p>
-<p>By default, we take the square root of number of features in the dataset and we provide a range that is <code>[sqrt_features / 2, sqrt_features, sqrt_features * 2]</code>.</p>
+<p>When we run <code>rf</code> we are using the the
+<code>randomForest</code> package implementation. We are tuning the
+<code>mtry</code> hyperparameter. This is the number of features that
+are randomly collected to be sampled at each tree node. This number
+needs to be less than the number of features in the dataset. Please
+refer to the original documentation for more information: <a href="https://cran.r-project.org/web/packages/randomForest/randomForest.pdf" class="external-link uri">https://cran.r-project.org/web/packages/randomForest/randomForest.pdf</a></p>
+<p>By default, we take the square root of number of features in the
+dataset and we provide a range that is
+<code>[sqrt_features / 2, sqrt_features, sqrt_features * 2]</code>.</p>
 <p>For example if the number of features is 1000:</p>
 <pre><code><span class="co">#&gt; $mtry</span>
 <span class="co">#&gt; [1] 16 32 64</span></code></pre>
-<p>Similar to <code>glmnet</code> method, we can provide our own <code>mtry</code> range.</p>
+<p>Similar to <code>glmnet</code> method, we can provide our own
+<code>mtry</code> range.</p>
 </div>
 <div class="section level3">
 <h3 id="decision-tree">Decision tree<a class="anchor" aria-label="anchor" href="#decision-tree"></a>
 </h3>
-<p>When we run <code>rpart2</code>, we are running the <code>rpart</code> package implementation of decision tree. We are tuning the <code>maxdepth</code> hyperparameter. This is the maximum depth of any node of the final tree. Please refer to the original documentation for more information on maxdepth: <a href="https://cran.r-project.org/web/packages/rpart/rpart.pdf" class="external-link uri">https://cran.r-project.org/web/packages/rpart/rpart.pdf</a></p>
-<p>By default, we provide a range that is less than the number of features in the dataset.</p>
+<p>When we run <code>rpart2</code>, we are running the
+<code>rpart</code> package implementation of decision tree. We are
+tuning the <code>maxdepth</code> hyperparameter. This is the maximum
+depth of any node of the final tree. Please refer to the original
+documentation for more information on maxdepth: <a href="https://cran.r-project.org/web/packages/rpart/rpart.pdf" class="external-link uri">https://cran.r-project.org/web/packages/rpart/rpart.pdf</a></p>
+<p>By default, we provide a range that is less than the number of
+features in the dataset.</p>
 <p>For example if we have 1000 features:</p>
 <pre><code><span class="co">#&gt; $maxdepth</span>
 <span class="co">#&gt; [1]  1  2  4  8 16 30</span></code></pre>
@@ -398,8 +486,14 @@ <h3 id="decision-tree">Decision tree<a class="anchor" aria-label="anchor" href="
 <div class="section level3">
 <h3 id="svm-with-radial-basis-kernel">SVM with radial basis kernel<a class="anchor" aria-label="anchor" href="#svm-with-radial-basis-kernel"></a>
 </h3>
-<p>When we run the <code>svmRadial</code> method, we are tuning the <code>C</code> and <code>sigma</code> hyperparameters. <code>sigma</code> defines how far the influence of a single training example reaches and <code>C</code> behaves as a regularization parameter. Please refer to this great <code>sklearn</code> resource for more information on these hyperparameters: <a href="https://scikit-learn.org/stable/auto_examples/svm/plot_rbf_parameters.html" class="external-link uri">https://scikit-learn.org/stable/auto_examples/svm/plot_rbf_parameters.html</a></p>
-<p>By default, we provide 2 separate range of values for the two hyperparameters.</p>
+<p>When we run the <code>svmRadial</code> method, we are tuning the
+<code>C</code> and <code>sigma</code> hyperparameters.
+<code>sigma</code> defines how far the influence of a single training
+example reaches and <code>C</code> behaves as a regularization
+parameter. Please refer to this great <code>sklearn</code> resource for
+more information on these hyperparameters: <a href="https://scikit-learn.org/stable/auto_examples/svm/plot_rbf_parameters.html" class="external-link uri">https://scikit-learn.org/stable/auto_examples/svm/plot_rbf_parameters.html</a></p>
+<p>By default, we provide 2 separate range of values for the two
+hyperparameters.</p>
 <pre><code><span class="co">#&gt; $C</span>
 <span class="co">#&gt; [1] 1e-03 1e-02 1e-01 1e+00 1e+01 1e+02</span>
 <span class="co">#&gt; </span>
@@ -409,9 +503,18 @@ <h3 id="svm-with-radial-basis-kernel">SVM with radial basis kernel<a class="anch
 <div class="section level3">
 <h3 id="xgboost">XGBoost<a class="anchor" aria-label="anchor" href="#xgboost"></a>
 </h3>
-<p>When we run the <code>xgbTree</code> method, we are tuning the <code>nrounds</code>, <code>gamma</code>, <code>eta</code> <code>max_depth</code>, <code>colsample_bytree</code>, <code>min_child_weight</code> and <code>subsample</code> hyperparameters.</p>
+<p>When we run the <code>xgbTree</code> method, we are tuning the
+<code>nrounds</code>, <code>gamma</code>, <code>eta</code>
+<code>max_depth</code>, <code>colsample_bytree</code>,
+<code>min_child_weight</code> and <code>subsample</code>
+hyperparameters.</p>
 <p>You can read more about these hyperparameters here: <a href="https://xgboost.readthedocs.io/en/latest/parameter.html" class="external-link uri">https://xgboost.readthedocs.io/en/latest/parameter.html</a></p>
-<p>By default, we set the <code>nrounds</code>, <code>gamma</code>, <code>colsample_bytree</code> and <code>min_child_weight</code> to fixed values and we provide a range of values for <code>eta</code>, <code>max_depth</code> and <code>subsample</code>. All of these can be changed and optimized by the user by supplying a custom named list of hyperparameters to <code><a href="../reference/run_ml.html">run_ml()</a></code>.</p>
+<p>By default, we set the <code>nrounds</code>, <code>gamma</code>,
+<code>colsample_bytree</code> and <code>min_child_weight</code> to fixed
+values and we provide a range of values for <code>eta</code>,
+<code>max_depth</code> and <code>subsample</code>. All of these can be
+changed and optimized by the user by supplying a custom named list of
+hyperparameters to <code><a href="../reference/run_ml.html">run_ml()</a></code>.</p>
 <pre><code><span class="co">#&gt; $nrounds</span>
 <span class="co">#&gt; [1] 100</span>
 <span class="co">#&gt; </span>
diff --git a/docs/authors.html b/docs/authors.html
index b1dd7d92..95618a32 100644
--- a/docs/authors.html
+++ b/docs/authors.html
@@ -17,7 +17,7 @@
       </button>
       <span class="navbar-brand">
         <a class="navbar-link" href="index.html">mikropml</a>
-        <span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="">1.2.2.9000</span>
+        <span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="">1.3.0</span>
       </span>
     </div>
 
diff --git a/docs/index.html b/docs/index.html
index eb215a3e..a72dc7ad 100644
--- a/docs/index.html
+++ b/docs/index.html
@@ -47,7 +47,7 @@
       </button>
       <span class="navbar-brand">
         <a class="navbar-link" href="index.html">mikropml</a>
-        <span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="">1.2.2.9000</span>
+        <span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="">1.3.0</span>
       </span>
     </div>
 
@@ -113,9 +113,12 @@
 <div class="page-header"><h1 id="mikropml-">mikropml <a href="http://www.schlosslab.org/mikropml/"><img src="reference/figures/logo.png" align="right" height="120"></a>
 <a class="anchor" aria-label="anchor" href="#mikropml-"></a>
 </h1></div>
-<blockquote><p>meek-ROPE em el</p></blockquote>
+<blockquote>
+<p>meek-ROPE em el</p>
+</blockquote>
 <p>User-Friendly R Package for Supervised Machine Learning Pipelines</p>
 <!-- badges: start -->
+
 <p>An interface to build machine learning models for classification and regression problems. <code>mikropml</code> implements the ML pipeline described by <a href="https://doi.org/doi:10.1128/mBio.00434-20" class="external-link">Topçuoğlu <em>et al.</em> (2020)</a> with reasonable default options for data preprocessing, hyperparameter tuning, cross-validation, testing, model evaluation, and interpretation steps. See the <a href="http://www.schlosslab.org/mikropml/">website</a> for more information, documentation, and examples.</p>
 <div class="section level2">
 <h2 id="installation">Installation<a class="anchor" aria-label="anchor" href="#installation"></a>
@@ -128,7 +131,7 @@ <h2 id="installation">Installation<a class="anchor" aria-label="anchor" href="#i
 <code class="sourceCode R"><span class="co"># install.packages("devtools")</span>
 <span class="fu">devtools</span><span class="fu">::</span><span class="fu"><a href="https://devtools.r-lib.org/reference/remote-reexports.html" class="external-link">install_github</a></span><span class="op">(</span><span class="st">"SchlossLab/mikropml"</span><span class="op">)</span></code></pre></div>
 <p>or install from a terminal using <a href="https://docs.conda.io/projects/conda/en/latest/index.html" class="external-link">conda</a>:</p>
-<div class="sourceCode" id="cb3"><pre class="sourceCode bash"><code class="sourceCode bash"><span id="cb3-1"><a href="#cb3-1"></a><span class="ex">conda</span> install -c conda-forge r-mikropml</span></code></pre></div>
+<div class="sourceCode" id="cb3"><pre class="sourceCode bash"><code class="sourceCode bash"><span id="cb3-1"><a href="#cb3-1" aria-hidden="true" tabindex="-1"></a><span class="ex">conda</span> install <span class="at">-c</span> conda-forge r-mikropml</span></code></pre></div>
 <div class="section level3">
 <h3 id="dependencies">Dependencies<a class="anchor" aria-label="anchor" href="#dependencies"></a>
 </h3>
diff --git a/docs/news/index.html b/docs/news/index.html
index 35695dcb..d17718fd 100644
--- a/docs/news/index.html
+++ b/docs/news/index.html
@@ -17,7 +17,7 @@
       </button>
       <span class="navbar-brand">
         <a class="navbar-link" href="../index.html">mikropml</a>
-        <span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="">1.2.2.9000</span>
+        <span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="">1.3.0</span>
       </span>
     </div>
 
@@ -73,30 +73,47 @@ <h1 data-toc-skip>Changelog <small></small></h1>
       <small>Source: <a href="https://github.com/SchlossLab/mikropml/blob/HEAD/NEWS.md" class="external-link"><code>NEWS.md</code></a></small>
     </div>
 
+    <div class="section level2">
+<h2 class="page-header" data-toc-text="1.3.0" id="mikropml-130">mikropml 1.3.0<a class="anchor" aria-label="anchor" href="#mikropml-130"></a></h2>
+<ul><li>mikropml now requires R version 4.1.0 or greater due to an update in the randomForest package (<a href="https://github.com/SchlossLab/mikropml/issues/292" class="external-link">#292</a>).</li>
+<li>New function <code><a href="../reference/compare_models.html">compare_models()</a></code> compares the performance of two models with a permutation test (<a href="https://github.com/SchlossLab/mikropml/issues/295" class="external-link">#295</a>, <a href="https://github.com/courtneyarmour" class="external-link">@courtneyarmour</a>).</li>
+<li>Fixed a bug where <code>cv_times</code> did not affect the reported repeats for cross-validation (<a href="https://github.com/SchlossLab/mikropml/issues/291" class="external-link">#291</a>, <a href="https://github.com/kelly-sovacool" class="external-link">@kelly-sovacool</a>).</li>
+<li>Made minor documentation improvements (<a href="https://github.com/SchlossLab/mikropml/issues/293" class="external-link">#293</a>, <a href="https://github.com/kelly-sovacool" class="external-link">@kelly-sovacool</a>)</li>
+</ul></div>
     <div class="section level2">
 <h2 class="page-header" data-toc-text="1.2.2" id="mikropml-122">mikropml 1.2.2<small>2022-02-03</small><a class="anchor" aria-label="anchor" href="#mikropml-122"></a></h2>
 <p>This minor patch fixes a test failure on platforms with no long doubles. The actual package code remains unchanged.</p>
 </div>
     <div class="section level2">
 <h2 class="page-header" data-toc-text="1.2.1" id="mikropml-121">mikropml 1.2.1<small>2022-01-30</small><a class="anchor" aria-label="anchor" href="#mikropml-121"></a></h2>
-<ul><li>Allow <code>kfold &gt;= length(groups)</code> (<a href="https://github.com/SchlossLab/mikropml/issues/285" class="external-link">#285</a>, <a href="https://github.com/kelly-sovacool" class="external-link">@kelly-sovacool</a>).<ul><li>When using the groups parameter, groups are kept together in cross-validation partitions when <code>kfold</code> &lt;= the number of groups in the training set. Previously, an error was thrown if this condition was not met. Now, if there are not enough groups in the training set for groups to be kept together during CV, groups are allowed to be split up across CV partitions.</li></ul></li>
+<ul><li>Allow <code>kfold &gt;= length(groups)</code> (<a href="https://github.com/SchlossLab/mikropml/issues/285" class="external-link">#285</a>, <a href="https://github.com/kelly-sovacool" class="external-link">@kelly-sovacool</a>).
+<ul><li>When using the groups parameter, groups are kept together in cross-validation partitions when <code>kfold</code> &lt;= the number of groups in the training set. Previously, an error was thrown if this condition was not met. Now, if there are not enough groups in the training set for groups to be kept together during CV, groups are allowed to be split up across CV partitions.</li>
+</ul></li>
 <li>Report p-values for permutation feature importance (<a href="https://github.com/SchlossLab/mikropml/issues/288" class="external-link">#288</a>, <a href="https://github.com/kelly-sovacool" class="external-link">@kelly-sovacool</a>).</li>
 </ul></div>
     <div class="section level2">
 <h2 class="page-header" data-toc-text="1.2.0" id="mikropml-120">mikropml 1.2.0<small>2021-11-10</small><a class="anchor" aria-label="anchor" href="#mikropml-120"></a></h2>
-<ul><li>New parameter <code>cross_val</code> added to <code><a href="../reference/run_ml.html">run_ml()</a></code> allows users to define their own custom cross-validation scheme (<a href="https://github.com/SchlossLab/mikropml/issues/278" class="external-link">#278</a>, <a href="https://github.com/kelly-sovacool" class="external-link">@kelly-sovacool</a>).<ul><li>Also added a new parameter <code>calculate_performance</code>, which controls whether performance metrics are calculated (default: <code>TRUE</code>). Users may wish to skip performance calculations when training models with no cross-validation.</li></ul></li>
+<ul><li>New parameter <code>cross_val</code> added to <code><a href="../reference/run_ml.html">run_ml()</a></code> allows users to define their own custom cross-validation scheme (<a href="https://github.com/SchlossLab/mikropml/issues/278" class="external-link">#278</a>, <a href="https://github.com/kelly-sovacool" class="external-link">@kelly-sovacool</a>).
+<ul><li>Also added a new parameter <code>calculate_performance</code>, which controls whether performance metrics are calculated (default: <code>TRUE</code>). Users may wish to skip performance calculations when training models with no cross-validation.</li>
+</ul></li>
 <li>New parameter <code>group_partitions</code> added to <code><a href="../reference/run_ml.html">run_ml()</a></code> allows users to control which groups should go to which partition of the train/test split (<a href="https://github.com/SchlossLab/mikropml/issues/281" class="external-link">#281</a>, <a href="https://github.com/kelly-sovacool" class="external-link">@kelly-sovacool</a>).</li>
-<li>Modified the <code>training_frac</code> parameter in <code><a href="../reference/run_ml.html">run_ml()</a></code> (<a href="https://github.com/SchlossLab/mikropml/issues/281" class="external-link">#281</a>, <a href="https://github.com/kelly-sovacool" class="external-link">@kelly-sovacool</a>).<ul><li>By default, <code>training_frac</code> is a fraction between 0 and 1 that specifies how much of the dataset should be used in the training fraction of the train/test split.</li>
+<li>Modified the <code>training_frac</code> parameter in <code><a href="../reference/run_ml.html">run_ml()</a></code> (<a href="https://github.com/SchlossLab/mikropml/issues/281" class="external-link">#281</a>, <a href="https://github.com/kelly-sovacool" class="external-link">@kelly-sovacool</a>).
+<ul><li>By default, <code>training_frac</code> is a fraction between 0 and 1 that specifies how much of the dataset should be used in the training fraction of the train/test split.</li>
 <li>Users can instead give <code>training_frac</code> a vector of indices that correspond to which rows of the dataset should go in the training fraction of the train/test split. This gives users direct control over exactly which observations are in the training fraction if desired.</li>
 </ul></li>
 </ul></div>
     <div class="section level2">
 <h2 class="page-header" data-toc-text="1.1.1" id="mikropml-111">mikropml 1.1.1<small>2021-09-14</small><a class="anchor" aria-label="anchor" href="#mikropml-111"></a></h2>
-<ul><li>Fixed bugs related to grouping correlated features (<a href="https://github.com/SchlossLab/mikropml/issues/276" class="external-link">#276</a>, <a href="https://github.com/kelly-sovacool" class="external-link">@kelly-sovacool</a>).<ul><li>Also, <code><a href="../reference/group_correlated_features.html">group_correlated_features()</a></code> is now a user-facing function.</li></ul></li></ul></div>
+<ul><li>Fixed bugs related to grouping correlated features (<a href="https://github.com/SchlossLab/mikropml/issues/276" class="external-link">#276</a>, <a href="https://github.com/kelly-sovacool" class="external-link">@kelly-sovacool</a>).
+<ul><li>Also, <code><a href="../reference/group_correlated_features.html">group_correlated_features()</a></code> is now a user-facing function.</li>
+</ul></li>
+</ul></div>
     <div class="section level2">
 <h2 class="page-header" data-toc-text="1.1.0" id="mikropml-110">mikropml 1.1.0<small>2021-08-10</small><a class="anchor" aria-label="anchor" href="#mikropml-110"></a></h2>
-<ul><li>New correlation method option for feature importance (<a href="https://github.com/SchlossLab/mikropml/issues/267" class="external-link">#267</a>, <a href="https://github.com/courtneyarmour" class="external-link">@courtneyarmour</a>).<ul><li>The default is still “spearman”, and now you can use other methods supported by <code><a href="https://rdrr.io/r/stats/cor.html" class="external-link">stats::cor</a></code> with the <code>corr_method</code> parameter: <code>get_feature_importance(corr_method = "pearson")</code>
-</li></ul></li>
+<ul><li>New correlation method option for feature importance (<a href="https://github.com/SchlossLab/mikropml/issues/267" class="external-link">#267</a>, <a href="https://github.com/courtneyarmour" class="external-link">@courtneyarmour</a>).
+<ul><li>The default is still “spearman”, and now you can use other methods supported by <code><a href="https://rdrr.io/r/stats/cor.html" class="external-link">stats::cor</a></code> with the <code>corr_method</code> parameter: <code>get_feature_importance(corr_method = "pearson")</code>
+</li>
+</ul></li>
 <li>There are now <a href="https://www.youtube.com/playlist?list=PLmNrK_nkqBpKpzb9-vI4V7SdXC-jXEcmg" class="external-link">video tutorials</a> covering mikropml and other skills related to machine learning, created by <a href="https://github.com/pschloss" class="external-link">@pschloss</a> (<a href="https://github.com/SchlossLab/mikropml/issues/270" class="external-link">#270</a>).</li>
 <li>Fixed a bug where <code><a href="../reference/preprocess_data.html">preprocess_data()</a></code> converted the outcome column to a character vector (<a href="https://github.com/SchlossLab/mikropml/issues/273" class="external-link">#273</a>, <a href="https://github.com/kelly-sovacool" class="external-link">@kelly-sovacool</a>, <a href="https://github.com/ecmaggioncalda" class="external-link">@ecmaggioncalda</a>).</li>
 </ul></div>
@@ -104,10 +121,12 @@ <h2 class="page-header" data-toc-text="1.1.0" id="mikropml-110">mikropml 1.1.0<s
 <h2 class="page-header" data-toc-text="1.0.0" id="mikropml-100">mikropml 1.0.0<small>2021-05-13</small><a class="anchor" aria-label="anchor" href="#mikropml-100"></a></h2>
 <ul><li>mikropml now has a logo created by <a href="https://github.com/NLesniak" class="external-link">@NLesniak</a>!</li>
 <li>Made documentation improvements (<a href="https://github.com/SchlossLab/mikropml/issues/238" class="external-link">#238</a>, <a href="https://github.com/SchlossLab/mikropml/issues/231" class="external-link">#231</a> <a href="https://github.com/kelly-sovacool" class="external-link">@kelly-sovacool</a>; <a href="https://github.com/SchlossLab/mikropml/issues/256" class="external-link">#256</a> <a href="https://github.com/BTopcuoglu" class="external-link">@BTopcuoglu</a>).</li>
-<li>New option in <code><a href="../reference/preprocess_data.html">preprocess_data()</a></code>: <code>prefilter_threshold</code> (<a href="https://github.com/SchlossLab/mikropml/issues/240" class="external-link">#240</a>, <a href="https://github.com/kelly-sovacool" class="external-link">@kelly-sovacool</a>, <a href="https://github.com/courtneyarmour" class="external-link">@courtneyarmour</a>).<ul><li>Remove any features that appear in N=<code>prefilter_threshold</code> or fewer rows in the data.</li>
+<li>New option in <code><a href="../reference/preprocess_data.html">preprocess_data()</a></code>: <code>prefilter_threshold</code> (<a href="https://github.com/SchlossLab/mikropml/issues/240" class="external-link">#240</a>, <a href="https://github.com/kelly-sovacool" class="external-link">@kelly-sovacool</a>, <a href="https://github.com/courtneyarmour" class="external-link">@courtneyarmour</a>).
+<ul><li>Remove any features that appear in N=<code>prefilter_threshold</code> or fewer rows in the data.</li>
 <li>Created function <code><a href="../reference/remove_singleton_columns.html">remove_singleton_columns()</a></code> called by <code><a href="../reference/preprocess_data.html">preprocess_data()</a></code> to carry this out.</li>
 </ul></li>
-<li>New option in <code><a href="../reference/get_feature_importance.html">get_feature_importance()</a></code>: <code>groups</code> (<a href="https://github.com/SchlossLab/mikropml/issues/246" class="external-link">#246</a>, <a href="https://github.com/kelly-sovacool" class="external-link">@kelly-sovacool</a>).<ul><li>Provide custom groups of features to permute together during permutation importance.</li>
+<li>New option in <code><a href="../reference/get_feature_importance.html">get_feature_importance()</a></code>: <code>groups</code> (<a href="https://github.com/SchlossLab/mikropml/issues/246" class="external-link">#246</a>, <a href="https://github.com/kelly-sovacool" class="external-link">@kelly-sovacool</a>).
+<ul><li>Provide custom groups of features to permute together during permutation importance.</li>
 <li>
 <code>groups</code> is <code>NULL</code> by default; in this case, correlated features above <code>corr_thresh</code> are grouped together.</li>
 </ul></li>
@@ -128,12 +147,14 @@ <h2 class="page-header" data-toc-text="0.0.2" id="mikropml-002">mikropml 0.0.2<s
 <h2 class="page-header" data-toc-text="0.0.1" id="mikropml-001">mikropml 0.0.1<small>2020-11-23</small><a class="anchor" aria-label="anchor" href="#mikropml-001"></a></h2>
 <p>This is the first release version of mikropml! 🎉</p>
 <ul><li>Added a <code>NEWS.md</code> file to track changes to the package.</li>
-<li>Major new functions:<ul><li><code><a href="../reference/run_ml.html">run_ml()</a></code></li>
+<li>Major new functions:
+<ul><li><code><a href="../reference/run_ml.html">run_ml()</a></code></li>
 <li><code><a href="../reference/preprocess_data.html">preprocess_data()</a></code></li>
 <li><code><a href="../reference/plot_model_performance.html">plot_model_performance()</a></code></li>
 <li><code><a href="../reference/plot_hp_performance.html">plot_hp_performance()</a></code></li>
 </ul></li>
-<li>Support for ML methods in <code><a href="../reference/run_ml.html">run_ml()</a></code>:<ul><li>
+<li>Support for ML methods in <code><a href="../reference/run_ml.html">run_ml()</a></code>:
+<ul><li>
 <code>glmnet</code>: logistic and linear regression</li>
 <li>
 <code>rf</code>: random forest</li>
@@ -144,7 +165,8 @@ <h2 class="page-header" data-toc-text="0.0.1" id="mikropml-001">mikropml 0.0.1<s
 <li>
 <code>xgbTree</code>: gradient-boosted trees</li>
 </ul></li>
-<li>New vignettes:<ul><li><a href="http://www.schlosslab.org/mikropml/articles/introduction.html">Introduction</a></li>
+<li>New vignettes:
+<ul><li><a href="http://www.schlosslab.org/mikropml/articles/introduction.html">Introduction</a></li>
 <li><a href="http://www.schlosslab.org/mikropml/articles/preprocess.html">Preprocess data</a></li>
 <li><a href="http://www.schlosslab.org/mikropml/articles/tuning.html">Hyperparameter tuning</a></li>
 <li><a href="http://www.schlosslab.org/mikropml/articles/parallel.html">Parallel processing</a></li>
diff --git a/docs/pkgdown.yml b/docs/pkgdown.yml
index e9094896..43e8f4a9 100644
--- a/docs/pkgdown.yml
+++ b/docs/pkgdown.yml
@@ -1,4 +1,4 @@
-pandoc: 2.7.3
+pandoc: 2.17.1.1
 pkgdown: 2.0.3
 pkgdown_sha: ~
 articles:
@@ -7,7 +7,7 @@ articles:
   parallel: parallel.html
   preprocess: preprocess.html
   tuning: tuning.html
-last_built: 2022-05-18T17:33Z
+last_built: 2022-05-19T16:08Z
 urls:
   reference: http://www.schlosslab.org/mikropml/reference
   article: http://www.schlosslab.org/mikropml/articles
diff --git a/docs/pull_request_template.html b/docs/pull_request_template.html
index cc58413a..1a14b90b 100644
--- a/docs/pull_request_template.html
+++ b/docs/pull_request_template.html
@@ -17,7 +17,7 @@
       </button>
       <span class="navbar-brand">
         <a class="navbar-link" href="index.html">mikropml</a>
-        <span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="">1.2.2.9000</span>
+        <span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="">1.3.0</span>
       </span>
     </div>
 
@@ -75,25 +75,33 @@ <h1>NA</h1>
 
 <div class="section level2">
 <h2 id="issues">Issues<a class="anchor" aria-label="anchor" href="#issues"></a></h2>
-<ul><li>Resolves # .</li></ul></div>
+<ul><li>Resolves # .</li>
+</ul></div>
 <div class="section level2">
 <h2 id="-changes-made">## Change(s) made<a class="anchor" aria-label="anchor" href="#-changes-made"></a></h2>
-<ul><li></ul></div>
+<ul><li>
+</li></ul></div>
 <div class="section level2">
 <h2 id="checklist">Checklist<a class="anchor" aria-label="anchor" href="#checklist"></a></h2>
 <p>(<sub>Strikethrough</sub> any points that are not applicable.)</p>
-<ul><li>
-<input type="checkbox" disabled>Write unit tests for any new functionality or bug fixes.</li>
+<ul class="task-list"><li>
+<input type="checkbox" disabled>
+Write unit tests for any new functionality or bug fixes.</li>
 <li>
-<input type="checkbox" disabled>Update docs if there are any API changes:<ul><li>
-<input type="checkbox" disabled>roxygen comments</li>
+<input type="checkbox" disabled>
+Update docs if there are any API changes:<ul class="task-list"><li>
+<input type="checkbox" disabled>
+roxygen comments</li>
 <li>
-<input type="checkbox" disabled>vignettes</li>
+<input type="checkbox" disabled>
+vignettes</li>
 </ul></li>
 <li>
-<input type="checkbox" disabled>Update <code>NEWS.md</code> if this includes any user-facing changes.</li>
+<input type="checkbox" disabled>
+Update <code>NEWS.md</code> if this includes any user-facing changes.</li>
 <li>
-<input type="checkbox" disabled>The check workflow succeeds on your most recent commit. <strong>This is always required before the PR can be merged.</strong>
+<input type="checkbox" disabled>
+The check workflow succeeds on your most recent commit. <strong>This is always required before the PR can be merged.</strong>
 </li>
 </ul></div>
 
diff --git a/docs/reference/calc_perf_metrics.html b/docs/reference/calc_perf_metrics.html
index cb3f3f8e..8fea238d 100644
--- a/docs/reference/calc_perf_metrics.html
+++ b/docs/reference/calc_perf_metrics.html
@@ -17,7 +17,7 @@
       </button>
       <span class="navbar-brand">
         <a class="navbar-link" href="../index.html">mikropml</a>
-        <span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="">1.2.2.9000</span>
+        <span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="">1.3.0</span>
       </span>
     </div>
 
diff --git a/docs/reference/combine_hp_performance.html b/docs/reference/combine_hp_performance.html
index b3c69730..dca720af 100644
--- a/docs/reference/combine_hp_performance.html
+++ b/docs/reference/combine_hp_performance.html
@@ -17,7 +17,7 @@
       </button>
       <span class="navbar-brand">
         <a class="navbar-link" href="../index.html">mikropml</a>
-        <span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="">1.2.2.9000</span>
+        <span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="">1.3.0</span>
       </span>
     </div>
 
diff --git a/docs/reference/compare_models.html b/docs/reference/compare_models.html
new file mode 100644
index 00000000..e5c84f0d
--- /dev/null
+++ b/docs/reference/compare_models.html
@@ -0,0 +1,145 @@
+<!DOCTYPE html>
+<!-- Generated by pkgdown: do not edit by hand --><html lang="en"><head><meta http-equiv="Content-Type" content="text/html; charset=UTF-8"><meta charset="utf-8"><meta http-equiv="X-UA-Compatible" content="IE=edge"><meta name="viewport" content="width=device-width, initial-scale=1.0"><title>Perform permutation tests to compare the performance metric
+across all pairs of a group variable. — compare_models • mikropml</title><!-- favicons --><link rel="icon" type="image/png" sizes="16x16" href="../favicon-16x16.png"><link rel="icon" type="image/png" sizes="32x32" href="../favicon-32x32.png"><link rel="apple-touch-icon" type="image/png" sizes="180x180" href="../apple-touch-icon.png"><link rel="apple-touch-icon" type="image/png" sizes="120x120" href="../apple-touch-icon-120x120.png"><link rel="apple-touch-icon" type="image/png" sizes="76x76" href="../apple-touch-icon-76x76.png"><link rel="apple-touch-icon" type="image/png" sizes="60x60" href="../apple-touch-icon-60x60.png"><!-- jquery --><script src="https://cdnjs.cloudflare.com/ajax/libs/jquery/3.4.1/jquery.min.js" integrity="sha256-CSXorXvZcTkaix6Yvo6HppcZGetbYMGWSFlBw8HfCJo=" crossorigin="anonymous"></script><!-- Bootstrap --><link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/twitter-bootstrap/3.4.1/css/bootstrap.min.css" integrity="sha256-bZLfwXAP04zRMK2BjiO8iu9pf4FbLqX6zitd+tIvLhE=" crossorigin="anonymous"><script src="https://cdnjs.cloudflare.com/ajax/libs/twitter-bootstrap/3.4.1/js/bootstrap.min.js" integrity="sha256-nuL8/2cJ5NDSSwnKD8VqreErSWHtnEP9E7AySL+1ev4=" crossorigin="anonymous"></script><!-- bootstrap-toc --><link rel="stylesheet" href="../bootstrap-toc.css"><script src="../bootstrap-toc.js"></script><!-- Font Awesome icons --><link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/5.12.1/css/all.min.css" integrity="sha256-mmgLkCYLUQbXn0B1SRqzHar6dCnv9oZFPEC1g1cwlkk=" crossorigin="anonymous"><link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/5.12.1/css/v4-shims.min.css" integrity="sha256-wZjR52fzng1pJHwx4aV2AO3yyTOXrcDW7jBpJtTwVxw=" crossorigin="anonymous"><!-- clipboard.js --><script src="https://cdnjs.cloudflare.com/ajax/libs/clipboard.js/2.0.6/clipboard.min.js" integrity="sha256-inc5kl9MA1hkeYUt+EC3BhlIgyp/2jDIyBLS6k3UxPI=" crossorigin="anonymous"></script><!-- headroom.js --><script src="https://cdnjs.cloudflare.com/ajax/libs/headroom/0.11.0/headroom.min.js" integrity="sha256-AsUX4SJE1+yuDu5+mAVzJbuYNPHj/WroHuZ8Ir/CkE0=" crossorigin="anonymous"></script><script src="https://cdnjs.cloudflare.com/ajax/libs/headroom/0.11.0/jQuery.headroom.min.js" integrity="sha256-ZX/yNShbjqsohH1k95liqY9Gd8uOiE1S4vZc+9KQ1K4=" crossorigin="anonymous"></script><!-- pkgdown --><link href="../pkgdown.css" rel="stylesheet"><script src="../pkgdown.js"></script><meta property="og:title" content="Perform permutation tests to compare the performance metric
+across all pairs of a group variable. — compare_models"><meta property="og:description" content="A wrapper for permute_p_value()."><meta property="og:image" content="http://www.schlosslab.org/mikropml/logo.png"><!-- mathjax --><script src="https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.5/MathJax.js" integrity="sha256-nvJJv9wWKEm88qvoQl9ekL2J+k/RWIsaSScxxlsrv8k=" crossorigin="anonymous"></script><script src="https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.5/config/TeX-AMS-MML_HTMLorMML.js" integrity="sha256-84DKXVJXs0/F8OTMzX4UR909+jtl4G7SPypPavF+GfA=" crossorigin="anonymous"></script><!--[if lt IE 9]>
+<script src="https://oss.maxcdn.com/html5shiv/3.7.3/html5shiv.min.js"></script>
+<script src="https://oss.maxcdn.com/respond/1.4.2/respond.min.js"></script>
+<![endif]--></head><body data-spy="scroll" data-target="#toc">
+    
+
+    <div class="container template-reference-topic">
+      <header><div class="navbar navbar-default navbar-fixed-top" role="navigation">
+  <div class="container">
+    <div class="navbar-header">
+      <button type="button" class="navbar-toggle collapsed" data-toggle="collapse" data-target="#navbar" aria-expanded="false">
+        <span class="sr-only">Toggle navigation</span>
+        <span class="icon-bar"></span>
+        <span class="icon-bar"></span>
+        <span class="icon-bar"></span>
+      </button>
+      <span class="navbar-brand">
+        <a class="navbar-link" href="../index.html">mikropml</a>
+        <span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="">1.3.0</span>
+      </span>
+    </div>
+
+    <div id="navbar" class="navbar-collapse collapse">
+      <ul class="nav navbar-nav"><li>
+  <a href="../reference/index.html">Reference</a>
+</li>
+<li class="dropdown">
+  <a href="#" class="dropdown-toggle" data-toggle="dropdown" role="button" data-bs-toggle="dropdown" aria-expanded="false">
+    Articles
+     
+    <span class="caret"></span>
+  </a>
+  <ul class="dropdown-menu" role="menu"><li class="divider">
+    <li class="dropdown-header">Paper</li>
+    <li>
+      <a href="../articles/paper.html">mikropml: User-Friendly R Package for Supervised Machine Learning Pipelines</a>
+    </li>
+    <li class="divider">
+    <li class="dropdown-header">Vignettes</li>
+    <li>
+      <a href="../articles/introduction.html">Introduction to mikropml</a>
+    </li>
+    <li>
+      <a href="../articles/preprocess.html">Preprocessing data</a>
+    </li>
+    <li>
+      <a href="../articles/tuning.html">Hyperparameter tuning</a>
+    </li>
+    <li>
+      <a href="../articles/parallel.html">Parallel processing</a>
+    </li>
+  </ul></li>
+<li>
+  <a href="../news/index.html">Changelog</a>
+</li>
+      </ul><ul class="nav navbar-nav navbar-right"><li>
+  <a href="https://github.com/SchlossLab/mikropml/" class="external-link">
+    <span class="fab fa-github fa-lg"></span>
+     
+  </a>
+</li>
+      </ul></div><!--/.nav-collapse -->
+  </div><!--/.container -->
+</div><!--/.navbar -->
+
+      
+
+      </header><div class="row">
+  <div class="col-md-9 contents">
+    <div class="page-header">
+    <h1>Perform permutation tests to compare the performance metric
+across all pairs of a group variable.</h1>
+    <small class="dont-index">Source: <a href="https://github.com/SchlossLab/mikropml/blob/HEAD/R/compare_models.R" class="external-link"><code>R/compare_models.R</code></a></small>
+    <div class="hidden name"><code>compare_models.Rd</code></div>
+    </div>
+
+    <div class="ref-description">
+    <p>A wrapper for <code><a href="permute_p_value.html">permute_p_value()</a></code>.</p>
+    </div>
+
+    <div id="ref-usage">
+    <div class="sourceCode"><pre class="sourceCode r"><code><span class="fu">compare_models</span><span class="op">(</span><span class="va">merged_data</span>, <span class="va">metric</span>, <span class="va">group_name</span>, nperm <span class="op">=</span> <span class="fl">10000</span><span class="op">)</span></code></pre></div>
+    </div>
+
+    <div id="arguments">
+    <h2>Arguments</h2>
+    <dl><dt>merged_data</dt>
+<dd><p>the concatenated performance data from <code>run_ml</code></p></dd>
+<dt>metric</dt>
+<dd><p>metric to compare, must be numeric</p></dd>
+<dt>group_name</dt>
+<dd><p>column with group variables to compare</p></dd>
+<dt>nperm</dt>
+<dd><p>number of permutations, default=10000</p></dd>
+</dl></div>
+    <div id="value">
+    <h2>Value</h2>
+    <p>a table of p-values for all pairs of group varible</p>
+    </div>
+    <div id="author">
+    <h2>Author</h2>
+    <p>Courtney R Armour, <a href="mailto:armourc@umich.edu">armourc@umich.edu</a></p>
+    </div>
+
+    <div id="ref-examples">
+    <h2>Examples</h2>
+    <div class="sourceCode"><pre class="sourceCode r"><code><span class="r-in"><span class="va">df</span> <span class="op">&lt;-</span> <span class="fu">dplyr</span><span class="fu">::</span><span class="fu"><a href="https://tibble.tidyverse.org/reference/tibble.html" class="external-link">tibble</a></span><span class="op">(</span></span>
+<span class="r-in">  model <span class="op">=</span> <span class="fu"><a href="https://rdrr.io/r/base/c.html" class="external-link">c</a></span><span class="op">(</span><span class="st">"rf"</span>, <span class="st">"rf"</span>, <span class="st">"glmnet"</span>, <span class="st">"glmnet"</span>, <span class="st">"svmRadial"</span>, <span class="st">"svmRadial"</span><span class="op">)</span>,</span>
+<span class="r-in">  AUC <span class="op">=</span> <span class="fu"><a href="https://rdrr.io/r/base/c.html" class="external-link">c</a></span><span class="op">(</span><span class="fl">.2</span>, <span class="fl">0.3</span>, <span class="fl">0.8</span>, <span class="fl">0.9</span>, <span class="fl">0.85</span>, <span class="fl">0.95</span><span class="op">)</span></span>
+<span class="r-in"><span class="op">)</span></span>
+<span class="r-in"><span class="fu"><a href="https://rdrr.io/r/base/Random.html" class="external-link">set.seed</a></span><span class="op">(</span><span class="fl">123</span><span class="op">)</span></span>
+<span class="r-in"><span class="fu">compare_models</span><span class="op">(</span><span class="va">df</span>, <span class="st">"AUC"</span>, <span class="st">"model"</span>, nperm <span class="op">=</span> <span class="fl">10</span><span class="op">)</span></span>
+<span class="r-out co"><span class="r-pr">#&gt;</span>   group1    group2   p_value</span>
+<span class="r-out co"><span class="r-pr">#&gt;</span> 1 glmnet svmRadial 0.7272727</span>
+<span class="r-out co"><span class="r-pr">#&gt;</span> 2     rf    glmnet 0.2727273</span>
+<span class="r-out co"><span class="r-pr">#&gt;</span> 3     rf svmRadial 0.5454545</span>
+</code></pre></div>
+    </div>
+  </div>
+  <div class="col-md-3 hidden-xs hidden-sm" id="pkgdown-sidebar">
+    <nav id="toc" data-toggle="toc" class="sticky-top"><h2 data-toc-skip>Contents</h2>
+    </nav></div>
+</div>
+
+
+      <footer><div class="copyright">
+  <p></p><p>Developed by <a href="https://github.com/BTopcuoglu" class="external-link">Begüm Topçuoğlu</a>, <a href="https://github.com/zenalapp" class="external-link">Zena Lapp</a>, <a href="https://github.com/kelly-sovacool" class="external-link">Kelly Sovacool</a>, Evan Snitkin, Jenna Wiens, <a href="https://github.com/pschloss" class="external-link">Patrick Schloss</a>.</p>
+</div>
+
+<div class="pkgdown">
+  <p></p><p>Site built with <a href="https://pkgdown.r-lib.org/" class="external-link">pkgdown</a> 2.0.3.</p>
+</div>
+
+      </footer></div>
+
+  
+
+
+  
+
+  </body></html>
+
diff --git a/docs/reference/define_cv.html b/docs/reference/define_cv.html
index d88d7eef..c26bc116 100644
--- a/docs/reference/define_cv.html
+++ b/docs/reference/define_cv.html
@@ -17,7 +17,7 @@
       </button>
       <span class="navbar-brand">
         <a class="navbar-link" href="../index.html">mikropml</a>
-        <span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="">1.2.2.9000</span>
+        <span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="">1.3.0</span>
       </span>
     </div>
 
diff --git a/docs/reference/get_caret_processed_df.html b/docs/reference/get_caret_processed_df.html
index 1da76d45..dfbed020 100644
--- a/docs/reference/get_caret_processed_df.html
+++ b/docs/reference/get_caret_processed_df.html
@@ -17,7 +17,7 @@
       </button>
       <span class="navbar-brand">
         <a class="navbar-link" href="../index.html">mikropml</a>
-        <span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="">1.2.2.9000</span>
+        <span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="">1.3.0</span>
       </span>
     </div>
 
diff --git a/docs/reference/get_difference.html b/docs/reference/get_difference.html
new file mode 100644
index 00000000..e493205a
--- /dev/null
+++ b/docs/reference/get_difference.html
@@ -0,0 +1,137 @@
+<!DOCTYPE html>
+<!-- Generated by pkgdown: do not edit by hand --><html lang="en"><head><meta http-equiv="Content-Type" content="text/html; charset=UTF-8"><meta charset="utf-8"><meta http-equiv="X-UA-Compatible" content="IE=edge"><meta name="viewport" content="width=device-width, initial-scale=1.0"><title>Average metric difference — get_difference • mikropml</title><!-- favicons --><link rel="icon" type="image/png" sizes="16x16" href="../favicon-16x16.png"><link rel="icon" type="image/png" sizes="32x32" href="../favicon-32x32.png"><link rel="apple-touch-icon" type="image/png" sizes="180x180" href="../apple-touch-icon.png"><link rel="apple-touch-icon" type="image/png" sizes="120x120" href="../apple-touch-icon-120x120.png"><link rel="apple-touch-icon" type="image/png" sizes="76x76" href="../apple-touch-icon-76x76.png"><link rel="apple-touch-icon" type="image/png" sizes="60x60" href="../apple-touch-icon-60x60.png"><!-- jquery --><script src="https://cdnjs.cloudflare.com/ajax/libs/jquery/3.4.1/jquery.min.js" integrity="sha256-CSXorXvZcTkaix6Yvo6HppcZGetbYMGWSFlBw8HfCJo=" crossorigin="anonymous"></script><!-- Bootstrap --><link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/twitter-bootstrap/3.4.1/css/bootstrap.min.css" integrity="sha256-bZLfwXAP04zRMK2BjiO8iu9pf4FbLqX6zitd+tIvLhE=" crossorigin="anonymous"><script src="https://cdnjs.cloudflare.com/ajax/libs/twitter-bootstrap/3.4.1/js/bootstrap.min.js" integrity="sha256-nuL8/2cJ5NDSSwnKD8VqreErSWHtnEP9E7AySL+1ev4=" crossorigin="anonymous"></script><!-- bootstrap-toc --><link rel="stylesheet" href="../bootstrap-toc.css"><script src="../bootstrap-toc.js"></script><!-- Font Awesome icons --><link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/5.12.1/css/all.min.css" integrity="sha256-mmgLkCYLUQbXn0B1SRqzHar6dCnv9oZFPEC1g1cwlkk=" crossorigin="anonymous"><link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/5.12.1/css/v4-shims.min.css" integrity="sha256-wZjR52fzng1pJHwx4aV2AO3yyTOXrcDW7jBpJtTwVxw=" crossorigin="anonymous"><!-- clipboard.js --><script src="https://cdnjs.cloudflare.com/ajax/libs/clipboard.js/2.0.6/clipboard.min.js" integrity="sha256-inc5kl9MA1hkeYUt+EC3BhlIgyp/2jDIyBLS6k3UxPI=" crossorigin="anonymous"></script><!-- headroom.js --><script src="https://cdnjs.cloudflare.com/ajax/libs/headroom/0.11.0/headroom.min.js" integrity="sha256-AsUX4SJE1+yuDu5+mAVzJbuYNPHj/WroHuZ8Ir/CkE0=" crossorigin="anonymous"></script><script src="https://cdnjs.cloudflare.com/ajax/libs/headroom/0.11.0/jQuery.headroom.min.js" integrity="sha256-ZX/yNShbjqsohH1k95liqY9Gd8uOiE1S4vZc+9KQ1K4=" crossorigin="anonymous"></script><!-- pkgdown --><link href="../pkgdown.css" rel="stylesheet"><script src="../pkgdown.js"></script><meta property="og:title" content="Average metric difference — get_difference"><meta property="og:description" content="Calculate the difference in the mean of the metric for two groups"><meta property="og:image" content="http://www.schlosslab.org/mikropml/logo.png"><!-- mathjax --><script src="https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.5/MathJax.js" integrity="sha256-nvJJv9wWKEm88qvoQl9ekL2J+k/RWIsaSScxxlsrv8k=" crossorigin="anonymous"></script><script src="https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.5/config/TeX-AMS-MML_HTMLorMML.js" integrity="sha256-84DKXVJXs0/F8OTMzX4UR909+jtl4G7SPypPavF+GfA=" crossorigin="anonymous"></script><!--[if lt IE 9]>
+<script src="https://oss.maxcdn.com/html5shiv/3.7.3/html5shiv.min.js"></script>
+<script src="https://oss.maxcdn.com/respond/1.4.2/respond.min.js"></script>
+<![endif]--></head><body data-spy="scroll" data-target="#toc">
+    
+
+    <div class="container template-reference-topic">
+      <header><div class="navbar navbar-default navbar-fixed-top" role="navigation">
+  <div class="container">
+    <div class="navbar-header">
+      <button type="button" class="navbar-toggle collapsed" data-toggle="collapse" data-target="#navbar" aria-expanded="false">
+        <span class="sr-only">Toggle navigation</span>
+        <span class="icon-bar"></span>
+        <span class="icon-bar"></span>
+        <span class="icon-bar"></span>
+      </button>
+      <span class="navbar-brand">
+        <a class="navbar-link" href="../index.html">mikropml</a>
+        <span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="">1.3.0</span>
+      </span>
+    </div>
+
+    <div id="navbar" class="navbar-collapse collapse">
+      <ul class="nav navbar-nav"><li>
+  <a href="../reference/index.html">Reference</a>
+</li>
+<li class="dropdown">
+  <a href="#" class="dropdown-toggle" data-toggle="dropdown" role="button" data-bs-toggle="dropdown" aria-expanded="false">
+    Articles
+     
+    <span class="caret"></span>
+  </a>
+  <ul class="dropdown-menu" role="menu"><li class="divider">
+    <li class="dropdown-header">Paper</li>
+    <li>
+      <a href="../articles/paper.html">mikropml: User-Friendly R Package for Supervised Machine Learning Pipelines</a>
+    </li>
+    <li class="divider">
+    <li class="dropdown-header">Vignettes</li>
+    <li>
+      <a href="../articles/introduction.html">Introduction to mikropml</a>
+    </li>
+    <li>
+      <a href="../articles/preprocess.html">Preprocessing data</a>
+    </li>
+    <li>
+      <a href="../articles/tuning.html">Hyperparameter tuning</a>
+    </li>
+    <li>
+      <a href="../articles/parallel.html">Parallel processing</a>
+    </li>
+  </ul></li>
+<li>
+  <a href="../news/index.html">Changelog</a>
+</li>
+      </ul><ul class="nav navbar-nav navbar-right"><li>
+  <a href="https://github.com/SchlossLab/mikropml/" class="external-link">
+    <span class="fab fa-github fa-lg"></span>
+     
+  </a>
+</li>
+      </ul></div><!--/.nav-collapse -->
+  </div><!--/.container -->
+</div><!--/.navbar -->
+
+      
+
+      </header><div class="row">
+  <div class="col-md-9 contents">
+    <div class="page-header">
+    <h1>Average metric difference</h1>
+    <small class="dont-index">Source: <a href="https://github.com/SchlossLab/mikropml/blob/HEAD/R/compare_models.R" class="external-link"><code>R/compare_models.R</code></a></small>
+    <div class="hidden name"><code>get_difference.Rd</code></div>
+    </div>
+
+    <div class="ref-description">
+    <p>Calculate the difference in the mean of the metric for two groups</p>
+    </div>
+
+    <div id="ref-usage">
+    <div class="sourceCode"><pre class="sourceCode r"><code><span class="fu">get_difference</span><span class="op">(</span><span class="va">sub_data</span>, <span class="va">group_name</span>, <span class="va">metric</span><span class="op">)</span></code></pre></div>
+    </div>
+
+    <div id="arguments">
+    <h2>Arguments</h2>
+    <dl><dt>sub_data</dt>
+<dd><p>subset of the merged performance data frame for two groups</p></dd>
+<dt>group_name</dt>
+<dd><p>name of column with group variable</p></dd>
+<dt>metric</dt>
+<dd><p>metric to compare</p></dd>
+</dl></div>
+    <div id="value">
+    <h2>Value</h2>
+    <p>numeric difference in the average metric between the two groups</p>
+    </div>
+    <div id="author">
+    <h2>Author</h2>
+    <p>Courtney Armour, <a href="mailto:armourc@umich.edu">armourc@umich.edu</a></p>
+    </div>
+
+    <div id="ref-examples">
+    <h2>Examples</h2>
+    <div class="sourceCode"><pre class="sourceCode r"><code><span class="r-in"><span class="va">df</span> <span class="op">&lt;-</span> <span class="fu">dplyr</span><span class="fu">::</span><span class="fu"><a href="https://tibble.tidyverse.org/reference/tibble.html" class="external-link">tibble</a></span><span class="op">(</span></span>
+<span class="r-in">  condition <span class="op">=</span> <span class="fu"><a href="https://rdrr.io/r/base/c.html" class="external-link">c</a></span><span class="op">(</span><span class="st">"a"</span>, <span class="st">"a"</span>, <span class="st">"b"</span>, <span class="st">"b"</span><span class="op">)</span>,</span>
+<span class="r-in">  AUC <span class="op">=</span> <span class="fu"><a href="https://rdrr.io/r/base/c.html" class="external-link">c</a></span><span class="op">(</span><span class="fl">.2</span>, <span class="fl">0.3</span>, <span class="fl">0.8</span>, <span class="fl">0.9</span><span class="op">)</span></span>
+<span class="r-in"><span class="op">)</span></span>
+<span class="r-in"><span class="fu">get_difference</span><span class="op">(</span><span class="va">df</span>, <span class="st">"condition"</span>, <span class="st">"AUC"</span><span class="op">)</span></span>
+<span class="r-out co"><span class="r-pr">#&gt;</span> [1] 0.6</span>
+<span class="r-in"></span>
+</code></pre></div>
+    </div>
+  </div>
+  <div class="col-md-3 hidden-xs hidden-sm" id="pkgdown-sidebar">
+    <nav id="toc" data-toggle="toc" class="sticky-top"><h2 data-toc-skip>Contents</h2>
+    </nav></div>
+</div>
+
+
+      <footer><div class="copyright">
+  <p></p><p>Developed by <a href="https://github.com/BTopcuoglu" class="external-link">Begüm Topçuoğlu</a>, <a href="https://github.com/zenalapp" class="external-link">Zena Lapp</a>, <a href="https://github.com/kelly-sovacool" class="external-link">Kelly Sovacool</a>, Evan Snitkin, Jenna Wiens, <a href="https://github.com/pschloss" class="external-link">Patrick Schloss</a>.</p>
+</div>
+
+<div class="pkgdown">
+  <p></p><p>Site built with <a href="https://pkgdown.r-lib.org/" class="external-link">pkgdown</a> 2.0.3.</p>
+</div>
+
+      </footer></div>
+
+  
+
+
+  
+
+  </body></html>
+
diff --git a/docs/reference/get_feature_importance.html b/docs/reference/get_feature_importance.html
index 7b38b2f3..e12f97f8 100644
--- a/docs/reference/get_feature_importance.html
+++ b/docs/reference/get_feature_importance.html
@@ -18,7 +18,7 @@
       </button>
       <span class="navbar-brand">
         <a class="navbar-link" href="../index.html">mikropml</a>
-        <span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="">1.2.2.9000</span>
+        <span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="">1.3.0</span>
       </span>
     </div>
 
@@ -202,7 +202,7 @@ <h2>Examples</h2>
 <span class="r-in"></span>
 <span class="r-in"><span class="co"># We strongly recommend providing multiple cores to speed up computation time.</span></span>
 <span class="r-in"><span class="co"># Do this before calling `get_feature_importance()`.</span></span>
-<span class="r-in"><span class="fu">doFuture</span><span class="fu">::</span><span class="fu"><a href="https://doFuture.futureverse.org/reference/registerDoFuture.html" class="external-link">registerDoFuture</a></span><span class="op">(</span><span class="op">)</span></span>
+<span class="r-in"><span class="fu">doFuture</span><span class="fu">::</span><span class="fu">registerDoFuture</span><span class="op">(</span><span class="op">)</span></span>
 <span class="r-in"><span class="fu">future</span><span class="fu">::</span><span class="fu"><a href="https://future.futureverse.org/reference/plan.html" class="external-link">plan</a></span><span class="op">(</span><span class="fu">future</span><span class="fu">::</span><span class="va"><a href="https://future.futureverse.org/reference/multicore.html" class="external-link">multicore</a></span>, workers <span class="op">=</span> <span class="fl">2</span><span class="op">)</span></span>
 <span class="r-in"></span>
 <span class="r-in"><span class="co"># Optionally, you can group features together with a custom grouping</span></span>
diff --git a/docs/reference/get_hp_performance.html b/docs/reference/get_hp_performance.html
index 4725b431..d95f2c20 100644
--- a/docs/reference/get_hp_performance.html
+++ b/docs/reference/get_hp_performance.html
@@ -17,7 +17,7 @@
       </button>
       <span class="navbar-brand">
         <a class="navbar-link" href="../index.html">mikropml</a>
-        <span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="">1.2.2.9000</span>
+        <span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="">1.3.0</span>
       </span>
     </div>
 
diff --git a/docs/reference/get_hyperparams_list.html b/docs/reference/get_hyperparams_list.html
index cffb9d2c..e073866a 100644
--- a/docs/reference/get_hyperparams_list.html
+++ b/docs/reference/get_hyperparams_list.html
@@ -17,7 +17,7 @@
       </button>
       <span class="navbar-brand">
         <a class="navbar-link" href="../index.html">mikropml</a>
-        <span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="">1.2.2.9000</span>
+        <span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="">1.3.0</span>
       </span>
     </div>
 
diff --git a/docs/reference/get_outcome_type.html b/docs/reference/get_outcome_type.html
index 9932003f..ef79172a 100644
--- a/docs/reference/get_outcome_type.html
+++ b/docs/reference/get_outcome_type.html
@@ -19,7 +19,7 @@
       </button>
       <span class="navbar-brand">
         <a class="navbar-link" href="../index.html">mikropml</a>
-        <span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="">1.2.2.9000</span>
+        <span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="">1.3.0</span>
       </span>
     </div>
 
diff --git a/docs/reference/get_partition_indices.html b/docs/reference/get_partition_indices.html
index c3c9c641..a33ebb3f 100644
--- a/docs/reference/get_partition_indices.html
+++ b/docs/reference/get_partition_indices.html
@@ -17,7 +17,7 @@
       </button>
       <span class="navbar-brand">
         <a class="navbar-link" href="../index.html">mikropml</a>
-        <span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="">1.2.2.9000</span>
+        <span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="">1.3.0</span>
       </span>
     </div>
 
diff --git a/docs/reference/get_perf_metric_fn.html b/docs/reference/get_perf_metric_fn.html
index 2eb2e30c..11b8718c 100644
--- a/docs/reference/get_perf_metric_fn.html
+++ b/docs/reference/get_perf_metric_fn.html
@@ -17,7 +17,7 @@
       </button>
       <span class="navbar-brand">
         <a class="navbar-link" href="../index.html">mikropml</a>
-        <span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="">1.2.2.9000</span>
+        <span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="">1.3.0</span>
       </span>
     </div>
 
@@ -105,7 +105,7 @@ <h2>Examples</h2>
 <span class="r-out co"><span class="r-pr">#&gt;</span>         data$obs &lt;- factor(data$obs, levels = lev)</span>
 <span class="r-out co"><span class="r-pr">#&gt;</span>     postResample(data[, "pred"], data[, "obs"])</span>
 <span class="r-out co"><span class="r-pr">#&gt;</span> }</span>
-<span class="r-out co"><span class="r-pr">#&gt;</span> &lt;bytecode: 0x7fa3774c1120&gt;</span>
+<span class="r-out co"><span class="r-pr">#&gt;</span> &lt;bytecode: 0x7fdd8f312490&gt;</span>
 <span class="r-out co"><span class="r-pr">#&gt;</span> &lt;environment: namespace:caret&gt;</span>
 <span class="r-in"><span class="fu">get_perf_metric_fn</span><span class="op">(</span><span class="st">"binary"</span><span class="op">)</span></span>
 <span class="r-out co"><span class="r-pr">#&gt;</span> function (data, lev = NULL, model = NULL) </span>
@@ -163,7 +163,7 @@ <h2>Examples</h2>
 <span class="r-out co"><span class="r-pr">#&gt;</span>     stats &lt;- stats[c(stat_list)]</span>
 <span class="r-out co"><span class="r-pr">#&gt;</span>     return(stats)</span>
 <span class="r-out co"><span class="r-pr">#&gt;</span> }</span>
-<span class="r-out co"><span class="r-pr">#&gt;</span> &lt;bytecode: 0x7fa389e58f88&gt;</span>
+<span class="r-out co"><span class="r-pr">#&gt;</span> &lt;bytecode: 0x7fdd88c84980&gt;</span>
 <span class="r-out co"><span class="r-pr">#&gt;</span> &lt;environment: namespace:caret&gt;</span>
 <span class="r-in"><span class="fu">get_perf_metric_fn</span><span class="op">(</span><span class="st">"multiclass"</span><span class="op">)</span></span>
 <span class="r-out co"><span class="r-pr">#&gt;</span> function (data, lev = NULL, model = NULL) </span>
@@ -221,7 +221,7 @@ <h2>Examples</h2>
 <span class="r-out co"><span class="r-pr">#&gt;</span>     stats &lt;- stats[c(stat_list)]</span>
 <span class="r-out co"><span class="r-pr">#&gt;</span>     return(stats)</span>
 <span class="r-out co"><span class="r-pr">#&gt;</span> }</span>
-<span class="r-out co"><span class="r-pr">#&gt;</span> &lt;bytecode: 0x7fa389e58f88&gt;</span>
+<span class="r-out co"><span class="r-pr">#&gt;</span> &lt;bytecode: 0x7fdd88c84980&gt;</span>
 <span class="r-out co"><span class="r-pr">#&gt;</span> &lt;environment: namespace:caret&gt;</span>
 </code></pre></div>
     </div>
diff --git a/docs/reference/get_perf_metric_name.html b/docs/reference/get_perf_metric_name.html
index 16afe4eb..d96ec201 100644
--- a/docs/reference/get_perf_metric_name.html
+++ b/docs/reference/get_perf_metric_name.html
@@ -17,7 +17,7 @@
       </button>
       <span class="navbar-brand">
         <a class="navbar-link" href="../index.html">mikropml</a>
-        <span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="">1.2.2.9000</span>
+        <span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="">1.3.0</span>
       </span>
     </div>
 
diff --git a/docs/reference/get_performance_tbl.html b/docs/reference/get_performance_tbl.html
index cb071b7b..cce8a298 100644
--- a/docs/reference/get_performance_tbl.html
+++ b/docs/reference/get_performance_tbl.html
@@ -17,7 +17,7 @@
       </button>
       <span class="navbar-brand">
         <a class="navbar-link" href="../index.html">mikropml</a>
-        <span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="">1.2.2.9000</span>
+        <span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="">1.3.0</span>
       </span>
     </div>
 
diff --git a/docs/reference/get_tuning_grid.html b/docs/reference/get_tuning_grid.html
index 13497e90..a8d3faee 100644
--- a/docs/reference/get_tuning_grid.html
+++ b/docs/reference/get_tuning_grid.html
@@ -17,7 +17,7 @@
       </button>
       <span class="navbar-brand">
         <a class="navbar-link" href="../index.html">mikropml</a>
-        <span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="">1.2.2.9000</span>
+        <span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="">1.3.0</span>
       </span>
     </div>
 
diff --git a/docs/reference/group_correlated_features.html b/docs/reference/group_correlated_features.html
index e91c8f69..ec9f0acb 100644
--- a/docs/reference/group_correlated_features.html
+++ b/docs/reference/group_correlated_features.html
@@ -17,7 +17,7 @@
       </button>
       <span class="navbar-brand">
         <a class="navbar-link" href="../index.html">mikropml</a>
-        <span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="">1.2.2.9000</span>
+        <span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="">1.3.0</span>
       </span>
     </div>
 
diff --git a/docs/reference/index.html b/docs/reference/index.html
index 78413d2e..048d1f36 100644
--- a/docs/reference/index.html
+++ b/docs/reference/index.html
@@ -17,7 +17,7 @@
       </button>
       <span class="navbar-brand">
         <a class="navbar-link" href="../index.html">mikropml</a>
-        <span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="">1.2.2.9000</span>
+        <span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="">1.3.0</span>
       </span>
     </div>
 
@@ -93,6 +93,11 @@ <h2 id="plotting-evalutation-helpers">Plotting &amp; evalutation helpers <a href
           <p class="section-desc"></p><p>Visualize &amp; evalutate performance to help you tune hyperparameters and choose model methods.</p>
         </th>
       </tr></tbody><tbody><tr><td>
+          <p><code><a href="compare_models.html">compare_models()</a></code> </p>
+        </td>
+        <td><p>Perform permutation tests to compare the performance metric
+across all pairs of a group variable.</p></td>
+      </tr><tr><td>
           <p><code><a href="plot_hp_performance.html">plot_hp_performance()</a></code> </p>
         </td>
         <td><p>Plot hyperparameter performance metrics</p></td>
diff --git a/docs/reference/mikropml.html b/docs/reference/mikropml.html
index 033b0f39..53a58e4d 100644
--- a/docs/reference/mikropml.html
+++ b/docs/reference/mikropml.html
@@ -20,7 +20,7 @@
       </button>
       <span class="navbar-brand">
         <a class="navbar-link" href="../index.html">mikropml</a>
-        <span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="">1.2.2.9000</span>
+        <span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="">1.3.0</span>
       </span>
     </div>
 
diff --git a/docs/reference/otu_mini_bin.html b/docs/reference/otu_mini_bin.html
index 80b5eec8..f839e6da 100644
--- a/docs/reference/otu_mini_bin.html
+++ b/docs/reference/otu_mini_bin.html
@@ -19,7 +19,7 @@
       </button>
       <span class="navbar-brand">
         <a class="navbar-link" href="../index.html">mikropml</a>
-        <span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="">1.2.2.9000</span>
+        <span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="">1.3.0</span>
       </span>
     </div>
 
diff --git a/docs/reference/otu_mini_bin_results_glmnet.html b/docs/reference/otu_mini_bin_results_glmnet.html
index ca7233ac..5ae3499d 100644
--- a/docs/reference/otu_mini_bin_results_glmnet.html
+++ b/docs/reference/otu_mini_bin_results_glmnet.html
@@ -17,7 +17,7 @@
       </button>
       <span class="navbar-brand">
         <a class="navbar-link" href="../index.html">mikropml</a>
-        <span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="">1.2.2.9000</span>
+        <span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="">1.3.0</span>
       </span>
     </div>
 
diff --git a/docs/reference/otu_mini_bin_results_rf.html b/docs/reference/otu_mini_bin_results_rf.html
index 7400b058..aaf32bab 100644
--- a/docs/reference/otu_mini_bin_results_rf.html
+++ b/docs/reference/otu_mini_bin_results_rf.html
@@ -17,7 +17,7 @@
       </button>
       <span class="navbar-brand">
         <a class="navbar-link" href="../index.html">mikropml</a>
-        <span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="">1.2.2.9000</span>
+        <span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="">1.3.0</span>
       </span>
     </div>
 
diff --git a/docs/reference/otu_mini_bin_results_rpart2.html b/docs/reference/otu_mini_bin_results_rpart2.html
index adf384e5..f73bffd5 100644
--- a/docs/reference/otu_mini_bin_results_rpart2.html
+++ b/docs/reference/otu_mini_bin_results_rpart2.html
@@ -17,7 +17,7 @@
       </button>
       <span class="navbar-brand">
         <a class="navbar-link" href="../index.html">mikropml</a>
-        <span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="">1.2.2.9000</span>
+        <span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="">1.3.0</span>
       </span>
     </div>
 
diff --git a/docs/reference/otu_mini_bin_results_svmRadial.html b/docs/reference/otu_mini_bin_results_svmRadial.html
index 895b21d9..165e2009 100644
--- a/docs/reference/otu_mini_bin_results_svmRadial.html
+++ b/docs/reference/otu_mini_bin_results_svmRadial.html
@@ -17,7 +17,7 @@
       </button>
       <span class="navbar-brand">
         <a class="navbar-link" href="../index.html">mikropml</a>
-        <span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="">1.2.2.9000</span>
+        <span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="">1.3.0</span>
       </span>
     </div>
 
diff --git a/docs/reference/otu_mini_bin_results_xgbTree.html b/docs/reference/otu_mini_bin_results_xgbTree.html
index 4a593f16..bda6634d 100644
--- a/docs/reference/otu_mini_bin_results_xgbTree.html
+++ b/docs/reference/otu_mini_bin_results_xgbTree.html
@@ -17,7 +17,7 @@
       </button>
       <span class="navbar-brand">
         <a class="navbar-link" href="../index.html">mikropml</a>
-        <span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="">1.2.2.9000</span>
+        <span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="">1.3.0</span>
       </span>
     </div>
 
diff --git a/docs/reference/otu_mini_cont_results_glmnet.html b/docs/reference/otu_mini_cont_results_glmnet.html
index 36c38de6..5c5dba44 100644
--- a/docs/reference/otu_mini_cont_results_glmnet.html
+++ b/docs/reference/otu_mini_cont_results_glmnet.html
@@ -20,7 +20,7 @@
       </button>
       <span class="navbar-brand">
         <a class="navbar-link" href="../index.html">mikropml</a>
-        <span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="">1.2.2.9000</span>
+        <span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="">1.3.0</span>
       </span>
     </div>
 
diff --git a/docs/reference/otu_mini_cont_results_nocv.html b/docs/reference/otu_mini_cont_results_nocv.html
index f7a2e203..dda5c1ff 100644
--- a/docs/reference/otu_mini_cont_results_nocv.html
+++ b/docs/reference/otu_mini_cont_results_nocv.html
@@ -23,7 +23,7 @@
       </button>
       <span class="navbar-brand">
         <a class="navbar-link" href="../index.html">mikropml</a>
-        <span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="">1.2.2.9000</span>
+        <span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="">1.3.0</span>
       </span>
     </div>
 
diff --git a/docs/reference/otu_mini_cv.html b/docs/reference/otu_mini_cv.html
index ecf1bba4..d35a6be4 100644
--- a/docs/reference/otu_mini_cv.html
+++ b/docs/reference/otu_mini_cv.html
@@ -17,7 +17,7 @@
       </button>
       <span class="navbar-brand">
         <a class="navbar-link" href="../index.html">mikropml</a>
-        <span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="">1.2.2.9000</span>
+        <span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="">1.3.0</span>
       </span>
     </div>
 
diff --git a/docs/reference/otu_mini_multi.html b/docs/reference/otu_mini_multi.html
index 61380779..916e2f6c 100644
--- a/docs/reference/otu_mini_multi.html
+++ b/docs/reference/otu_mini_multi.html
@@ -17,7 +17,7 @@
       </button>
       <span class="navbar-brand">
         <a class="navbar-link" href="../index.html">mikropml</a>
-        <span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="">1.2.2.9000</span>
+        <span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="">1.3.0</span>
       </span>
     </div>
 
diff --git a/docs/reference/otu_mini_multi_group.html b/docs/reference/otu_mini_multi_group.html
index 3430838a..627812b4 100644
--- a/docs/reference/otu_mini_multi_group.html
+++ b/docs/reference/otu_mini_multi_group.html
@@ -17,7 +17,7 @@
       </button>
       <span class="navbar-brand">
         <a class="navbar-link" href="../index.html">mikropml</a>
-        <span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="">1.2.2.9000</span>
+        <span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="">1.3.0</span>
       </span>
     </div>
 
diff --git a/docs/reference/otu_mini_multi_results_glmnet.html b/docs/reference/otu_mini_multi_results_glmnet.html
index c2cb8ddb..eceaa22e 100644
--- a/docs/reference/otu_mini_multi_results_glmnet.html
+++ b/docs/reference/otu_mini_multi_results_glmnet.html
@@ -20,7 +20,7 @@
       </button>
       <span class="navbar-brand">
         <a class="navbar-link" href="../index.html">mikropml</a>
-        <span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="">1.2.2.9000</span>
+        <span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="">1.3.0</span>
       </span>
     </div>
 
diff --git a/docs/reference/otu_small.html b/docs/reference/otu_small.html
index c00d3a75..6689a854 100644
--- a/docs/reference/otu_small.html
+++ b/docs/reference/otu_small.html
@@ -19,7 +19,7 @@
       </button>
       <span class="navbar-brand">
         <a class="navbar-link" href="../index.html">mikropml</a>
-        <span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="">1.2.2.9000</span>
+        <span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="">1.3.0</span>
       </span>
     </div>
 
diff --git a/docs/reference/permute_p_value.html b/docs/reference/permute_p_value.html
new file mode 100644
index 00000000..74a8c3f0
--- /dev/null
+++ b/docs/reference/permute_p_value.html
@@ -0,0 +1,151 @@
+<!DOCTYPE html>
+<!-- Generated by pkgdown: do not edit by hand --><html lang="en"><head><meta http-equiv="Content-Type" content="text/html; charset=UTF-8"><meta charset="utf-8"><meta http-equiv="X-UA-Compatible" content="IE=edge"><meta name="viewport" content="width=device-width, initial-scale=1.0"><title>Calculated a permuted p-value comparing two models — permute_p_value • mikropml</title><!-- favicons --><link rel="icon" type="image/png" sizes="16x16" href="../favicon-16x16.png"><link rel="icon" type="image/png" sizes="32x32" href="../favicon-32x32.png"><link rel="apple-touch-icon" type="image/png" sizes="180x180" href="../apple-touch-icon.png"><link rel="apple-touch-icon" type="image/png" sizes="120x120" href="../apple-touch-icon-120x120.png"><link rel="apple-touch-icon" type="image/png" sizes="76x76" href="../apple-touch-icon-76x76.png"><link rel="apple-touch-icon" type="image/png" sizes="60x60" href="../apple-touch-icon-60x60.png"><!-- jquery --><script src="https://cdnjs.cloudflare.com/ajax/libs/jquery/3.4.1/jquery.min.js" integrity="sha256-CSXorXvZcTkaix6Yvo6HppcZGetbYMGWSFlBw8HfCJo=" crossorigin="anonymous"></script><!-- Bootstrap --><link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/twitter-bootstrap/3.4.1/css/bootstrap.min.css" integrity="sha256-bZLfwXAP04zRMK2BjiO8iu9pf4FbLqX6zitd+tIvLhE=" crossorigin="anonymous"><script src="https://cdnjs.cloudflare.com/ajax/libs/twitter-bootstrap/3.4.1/js/bootstrap.min.js" integrity="sha256-nuL8/2cJ5NDSSwnKD8VqreErSWHtnEP9E7AySL+1ev4=" crossorigin="anonymous"></script><!-- bootstrap-toc --><link rel="stylesheet" href="../bootstrap-toc.css"><script src="../bootstrap-toc.js"></script><!-- Font Awesome icons --><link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/5.12.1/css/all.min.css" integrity="sha256-mmgLkCYLUQbXn0B1SRqzHar6dCnv9oZFPEC1g1cwlkk=" crossorigin="anonymous"><link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/5.12.1/css/v4-shims.min.css" integrity="sha256-wZjR52fzng1pJHwx4aV2AO3yyTOXrcDW7jBpJtTwVxw=" crossorigin="anonymous"><!-- clipboard.js --><script src="https://cdnjs.cloudflare.com/ajax/libs/clipboard.js/2.0.6/clipboard.min.js" integrity="sha256-inc5kl9MA1hkeYUt+EC3BhlIgyp/2jDIyBLS6k3UxPI=" crossorigin="anonymous"></script><!-- headroom.js --><script src="https://cdnjs.cloudflare.com/ajax/libs/headroom/0.11.0/headroom.min.js" integrity="sha256-AsUX4SJE1+yuDu5+mAVzJbuYNPHj/WroHuZ8Ir/CkE0=" crossorigin="anonymous"></script><script src="https://cdnjs.cloudflare.com/ajax/libs/headroom/0.11.0/jQuery.headroom.min.js" integrity="sha256-ZX/yNShbjqsohH1k95liqY9Gd8uOiE1S4vZc+9KQ1K4=" crossorigin="anonymous"></script><!-- pkgdown --><link href="../pkgdown.css" rel="stylesheet"><script src="../pkgdown.js"></script><meta property="og:title" content="Calculated a permuted p-value comparing two models — permute_p_value"><meta property="og:description" content="Calculated a permuted p-value comparing two models"><meta property="og:image" content="http://www.schlosslab.org/mikropml/logo.png"><!-- mathjax --><script src="https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.5/MathJax.js" integrity="sha256-nvJJv9wWKEm88qvoQl9ekL2J+k/RWIsaSScxxlsrv8k=" crossorigin="anonymous"></script><script src="https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.5/config/TeX-AMS-MML_HTMLorMML.js" integrity="sha256-84DKXVJXs0/F8OTMzX4UR909+jtl4G7SPypPavF+GfA=" crossorigin="anonymous"></script><!--[if lt IE 9]>
+<script src="https://oss.maxcdn.com/html5shiv/3.7.3/html5shiv.min.js"></script>
+<script src="https://oss.maxcdn.com/respond/1.4.2/respond.min.js"></script>
+<![endif]--></head><body data-spy="scroll" data-target="#toc">
+    
+
+    <div class="container template-reference-topic">
+      <header><div class="navbar navbar-default navbar-fixed-top" role="navigation">
+  <div class="container">
+    <div class="navbar-header">
+      <button type="button" class="navbar-toggle collapsed" data-toggle="collapse" data-target="#navbar" aria-expanded="false">
+        <span class="sr-only">Toggle navigation</span>
+        <span class="icon-bar"></span>
+        <span class="icon-bar"></span>
+        <span class="icon-bar"></span>
+      </button>
+      <span class="navbar-brand">
+        <a class="navbar-link" href="../index.html">mikropml</a>
+        <span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="">1.3.0</span>
+      </span>
+    </div>
+
+    <div id="navbar" class="navbar-collapse collapse">
+      <ul class="nav navbar-nav"><li>
+  <a href="../reference/index.html">Reference</a>
+</li>
+<li class="dropdown">
+  <a href="#" class="dropdown-toggle" data-toggle="dropdown" role="button" data-bs-toggle="dropdown" aria-expanded="false">
+    Articles
+     
+    <span class="caret"></span>
+  </a>
+  <ul class="dropdown-menu" role="menu"><li class="divider">
+    <li class="dropdown-header">Paper</li>
+    <li>
+      <a href="../articles/paper.html">mikropml: User-Friendly R Package for Supervised Machine Learning Pipelines</a>
+    </li>
+    <li class="divider">
+    <li class="dropdown-header">Vignettes</li>
+    <li>
+      <a href="../articles/introduction.html">Introduction to mikropml</a>
+    </li>
+    <li>
+      <a href="../articles/preprocess.html">Preprocessing data</a>
+    </li>
+    <li>
+      <a href="../articles/tuning.html">Hyperparameter tuning</a>
+    </li>
+    <li>
+      <a href="../articles/parallel.html">Parallel processing</a>
+    </li>
+  </ul></li>
+<li>
+  <a href="../news/index.html">Changelog</a>
+</li>
+      </ul><ul class="nav navbar-nav navbar-right"><li>
+  <a href="https://github.com/SchlossLab/mikropml/" class="external-link">
+    <span class="fab fa-github fa-lg"></span>
+     
+  </a>
+</li>
+      </ul></div><!--/.nav-collapse -->
+  </div><!--/.container -->
+</div><!--/.navbar -->
+
+      
+
+      </header><div class="row">
+  <div class="col-md-9 contents">
+    <div class="page-header">
+    <h1>Calculated a permuted p-value comparing two models</h1>
+    <small class="dont-index">Source: <a href="https://github.com/SchlossLab/mikropml/blob/HEAD/R/compare_models.R" class="external-link"><code>R/compare_models.R</code></a></small>
+    <div class="hidden name"><code>permute_p_value.Rd</code></div>
+    </div>
+
+    <div class="ref-description">
+    <p>Calculated a permuted p-value comparing two models</p>
+    </div>
+
+    <div id="ref-usage">
+    <div class="sourceCode"><pre class="sourceCode r"><code><span class="fu">permute_p_value</span><span class="op">(</span>
+  <span class="va">merged_data</span>,
+  <span class="va">metric</span>,
+  <span class="va">group_name</span>,
+  <span class="va">group_1</span>,
+  <span class="va">group_2</span>,
+  nperm <span class="op">=</span> <span class="fl">10000</span>
+<span class="op">)</span></code></pre></div>
+    </div>
+
+    <div id="arguments">
+    <h2>Arguments</h2>
+    <dl><dt>merged_data</dt>
+<dd><p>the concatenated performance data from <code>run_ml</code></p></dd>
+<dt>metric</dt>
+<dd><p>metric to compare, must be numeric</p></dd>
+<dt>group_name</dt>
+<dd><p>column with group variables to compare</p></dd>
+<dt>group_1</dt>
+<dd><p>name of one group to compare</p></dd>
+<dt>group_2</dt>
+<dd><p>name of other group to compare</p></dd>
+<dt>nperm</dt>
+<dd><p>number of permutations, default=10000</p></dd>
+</dl></div>
+    <div id="value">
+    <h2>Value</h2>
+    <p>numeric p-value comparing two models</p>
+    </div>
+    <div id="author">
+    <h2>Author</h2>
+    <p>Begüm Topçuoğlu, <a href="mailto:topcuoglu.begum@gmail.com">topcuoglu.begum@gmail.com</a></p>
+<p>Courtney R Armour, <a href="mailto:armourc@umich.edu">armourc@umich.edu</a></p>
+    </div>
+
+    <div id="ref-examples">
+    <h2>Examples</h2>
+    <div class="sourceCode"><pre class="sourceCode r"><code><span class="r-in"><span class="va">df</span> <span class="op">&lt;-</span> <span class="fu">dplyr</span><span class="fu">::</span><span class="fu"><a href="https://tibble.tidyverse.org/reference/tibble.html" class="external-link">tibble</a></span><span class="op">(</span></span>
+<span class="r-in">  model <span class="op">=</span> <span class="fu"><a href="https://rdrr.io/r/base/c.html" class="external-link">c</a></span><span class="op">(</span><span class="st">"rf"</span>, <span class="st">"rf"</span>, <span class="st">"glmnet"</span>, <span class="st">"glmnet"</span>, <span class="st">"svmRadial"</span>, <span class="st">"svmRadial"</span><span class="op">)</span>,</span>
+<span class="r-in">  AUC <span class="op">=</span> <span class="fu"><a href="https://rdrr.io/r/base/c.html" class="external-link">c</a></span><span class="op">(</span><span class="fl">.2</span>, <span class="fl">0.3</span>, <span class="fl">0.8</span>, <span class="fl">0.9</span>, <span class="fl">0.85</span>, <span class="fl">0.95</span><span class="op">)</span></span>
+<span class="r-in"><span class="op">)</span></span>
+<span class="r-in"><span class="fu"><a href="https://rdrr.io/r/base/Random.html" class="external-link">set.seed</a></span><span class="op">(</span><span class="fl">123</span><span class="op">)</span></span>
+<span class="r-in"><span class="fu">permute_p_value</span><span class="op">(</span><span class="va">df</span>, <span class="st">"AUC"</span>, <span class="st">"model"</span>, <span class="st">"rf"</span>, <span class="st">"glmnet"</span>, nperm <span class="op">=</span> <span class="fl">100</span><span class="op">)</span></span>
+<span class="r-out co"><span class="r-pr">#&gt;</span> [1] 0.3663366</span>
+</code></pre></div>
+    </div>
+  </div>
+  <div class="col-md-3 hidden-xs hidden-sm" id="pkgdown-sidebar">
+    <nav id="toc" data-toggle="toc" class="sticky-top"><h2 data-toc-skip>Contents</h2>
+    </nav></div>
+</div>
+
+
+      <footer><div class="copyright">
+  <p></p><p>Developed by <a href="https://github.com/BTopcuoglu" class="external-link">Begüm Topçuoğlu</a>, <a href="https://github.com/zenalapp" class="external-link">Zena Lapp</a>, <a href="https://github.com/kelly-sovacool" class="external-link">Kelly Sovacool</a>, Evan Snitkin, Jenna Wiens, <a href="https://github.com/pschloss" class="external-link">Patrick Schloss</a>.</p>
+</div>
+
+<div class="pkgdown">
+  <p></p><p>Site built with <a href="https://pkgdown.r-lib.org/" class="external-link">pkgdown</a> 2.0.3.</p>
+</div>
+
+      </footer></div>
+
+  
+
+
+  
+
+  </body></html>
+
diff --git a/docs/reference/plot_hp_performance.html b/docs/reference/plot_hp_performance.html
index 3b4cd628..55a03389 100644
--- a/docs/reference/plot_hp_performance.html
+++ b/docs/reference/plot_hp_performance.html
@@ -17,7 +17,7 @@
       </button>
       <span class="navbar-brand">
         <a class="navbar-link" href="../index.html">mikropml</a>
-        <span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="">1.2.2.9000</span>
+        <span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="">1.3.0</span>
       </span>
     </div>
 
diff --git a/docs/reference/plot_model_performance.html b/docs/reference/plot_model_performance.html
index 33e8da4f..d60c03be 100644
--- a/docs/reference/plot_model_performance.html
+++ b/docs/reference/plot_model_performance.html
@@ -17,7 +17,7 @@
       </button>
       <span class="navbar-brand">
         <a class="navbar-link" href="../index.html">mikropml</a>
-        <span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="">1.2.2.9000</span>
+        <span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="">1.3.0</span>
       </span>
     </div>
 
diff --git a/docs/reference/preprocess_data.html b/docs/reference/preprocess_data.html
index cf768f1b..bd8f45e1 100644
--- a/docs/reference/preprocess_data.html
+++ b/docs/reference/preprocess_data.html
@@ -17,7 +17,7 @@
       </button>
       <span class="navbar-brand">
         <a class="navbar-link" href="../index.html">mikropml</a>
-        <span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="">1.2.2.9000</span>
+        <span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="">1.3.0</span>
       </span>
     </div>
 
diff --git a/docs/reference/randomize_feature_order.html b/docs/reference/randomize_feature_order.html
index 6167eb39..ecb5c6dd 100644
--- a/docs/reference/randomize_feature_order.html
+++ b/docs/reference/randomize_feature_order.html
@@ -17,7 +17,7 @@
       </button>
       <span class="navbar-brand">
         <a class="navbar-link" href="../index.html">mikropml</a>
-        <span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="">1.2.2.9000</span>
+        <span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="">1.3.0</span>
       </span>
     </div>
 
@@ -107,10 +107,10 @@ <h2>Examples</h2>
 <span class="r-in">  a <span class="op">=</span> <span class="fl">4</span><span class="op">:</span><span class="fl">6</span>, b <span class="op">=</span> <span class="fl">7</span><span class="op">:</span><span class="fl">9</span>, c <span class="op">=</span> <span class="fl">10</span><span class="op">:</span><span class="fl">12</span>, d <span class="op">=</span> <span class="fl">13</span><span class="op">:</span><span class="fl">15</span></span>
 <span class="r-in"><span class="op">)</span></span>
 <span class="r-in"><span class="fu">randomize_feature_order</span><span class="op">(</span><span class="va">dat</span>, <span class="st">"outcome"</span><span class="op">)</span></span>
-<span class="r-out co"><span class="r-pr">#&gt;</span>   outcome a  d  c b</span>
-<span class="r-out co"><span class="r-pr">#&gt;</span> 1       1 4 13 10 7</span>
-<span class="r-out co"><span class="r-pr">#&gt;</span> 2       2 5 14 11 8</span>
-<span class="r-out co"><span class="r-pr">#&gt;</span> 3       3 6 15 12 9</span>
+<span class="r-out co"><span class="r-pr">#&gt;</span>   outcome  c b a  d</span>
+<span class="r-out co"><span class="r-pr">#&gt;</span> 1       1 10 7 4 13</span>
+<span class="r-out co"><span class="r-pr">#&gt;</span> 2       2 11 8 5 14</span>
+<span class="r-out co"><span class="r-pr">#&gt;</span> 3       3 12 9 6 15</span>
 </code></pre></div>
     </div>
   </div>
diff --git a/docs/reference/reexports.html b/docs/reference/reexports.html
index 956d56db..7a0dbcb1 100644
--- a/docs/reference/reexports.html
+++ b/docs/reference/reexports.html
@@ -32,7 +32,7 @@
       </button>
       <span class="navbar-brand">
         <a class="navbar-link" href="../index.html">mikropml</a>
-        <span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="">1.2.2.9000</span>
+        <span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="">1.3.0</span>
       </span>
     </div>
 
@@ -101,7 +101,7 @@ <h1>dplyr pipe</h1>
 
 
   <dt>rlang</dt>
-<dd><p><code><a href="https://rlang.r-lib.org/reference/topic-inject.html" class="external-link">!!</a></code>, <code><a href="https://rlang.r-lib.org/reference/dot-data.html" class="external-link">.data</a></code>, <code><a href="https://rlang.r-lib.org/reference/topic-inject.html" class="external-link">:=</a></code></p></dd>
+<dd><p><code><a href="https://rlang.r-lib.org/reference/injection-operator.html" class="external-link">!!</a></code>, <code><a href="https://rlang.r-lib.org/reference/dot-data.html" class="external-link">.data</a></code>, <code><a href="https://rlang.r-lib.org/reference/dyn-dots.html" class="external-link">:=</a></code></p></dd>
 
 
 </dl></div>
diff --git a/docs/reference/remove_singleton_columns.html b/docs/reference/remove_singleton_columns.html
index 4b21208d..778d750d 100644
--- a/docs/reference/remove_singleton_columns.html
+++ b/docs/reference/remove_singleton_columns.html
@@ -17,7 +17,7 @@
       </button>
       <span class="navbar-brand">
         <a class="navbar-link" href="../index.html">mikropml</a>
-        <span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="">1.2.2.9000</span>
+        <span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="">1.3.0</span>
       </span>
     </div>
 
diff --git a/docs/reference/replace_spaces.html b/docs/reference/replace_spaces.html
index d6558c95..0cd9e165 100644
--- a/docs/reference/replace_spaces.html
+++ b/docs/reference/replace_spaces.html
@@ -17,7 +17,7 @@
       </button>
       <span class="navbar-brand">
         <a class="navbar-link" href="../index.html">mikropml</a>
-        <span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="">1.2.2.9000</span>
+        <span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="">1.3.0</span>
       </span>
     </div>
 
diff --git a/docs/reference/run_ml.html b/docs/reference/run_ml.html
index b47c7a7e..a6df61b5 100644
--- a/docs/reference/run_ml.html
+++ b/docs/reference/run_ml.html
@@ -23,7 +23,7 @@
       </button>
       <span class="navbar-brand">
         <a class="navbar-link" href="../index.html">mikropml</a>
-        <span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="">1.2.2.9000</span>
+        <span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="">1.3.0</span>
       </span>
     </div>
 
diff --git a/docs/reference/shuffle_group.html b/docs/reference/shuffle_group.html
new file mode 100644
index 00000000..99aa43ef
--- /dev/null
+++ b/docs/reference/shuffle_group.html
@@ -0,0 +1,141 @@
+<!DOCTYPE html>
+<!-- Generated by pkgdown: do not edit by hand --><html lang="en"><head><meta http-equiv="Content-Type" content="text/html; charset=UTF-8"><meta charset="utf-8"><meta http-equiv="X-UA-Compatible" content="IE=edge"><meta name="viewport" content="width=device-width, initial-scale=1.0"><title>Shuffle the rows in a column — shuffle_group • mikropml</title><!-- favicons --><link rel="icon" type="image/png" sizes="16x16" href="../favicon-16x16.png"><link rel="icon" type="image/png" sizes="32x32" href="../favicon-32x32.png"><link rel="apple-touch-icon" type="image/png" sizes="180x180" href="../apple-touch-icon.png"><link rel="apple-touch-icon" type="image/png" sizes="120x120" href="../apple-touch-icon-120x120.png"><link rel="apple-touch-icon" type="image/png" sizes="76x76" href="../apple-touch-icon-76x76.png"><link rel="apple-touch-icon" type="image/png" sizes="60x60" href="../apple-touch-icon-60x60.png"><!-- jquery --><script src="https://cdnjs.cloudflare.com/ajax/libs/jquery/3.4.1/jquery.min.js" integrity="sha256-CSXorXvZcTkaix6Yvo6HppcZGetbYMGWSFlBw8HfCJo=" crossorigin="anonymous"></script><!-- Bootstrap --><link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/twitter-bootstrap/3.4.1/css/bootstrap.min.css" integrity="sha256-bZLfwXAP04zRMK2BjiO8iu9pf4FbLqX6zitd+tIvLhE=" crossorigin="anonymous"><script src="https://cdnjs.cloudflare.com/ajax/libs/twitter-bootstrap/3.4.1/js/bootstrap.min.js" integrity="sha256-nuL8/2cJ5NDSSwnKD8VqreErSWHtnEP9E7AySL+1ev4=" crossorigin="anonymous"></script><!-- bootstrap-toc --><link rel="stylesheet" href="../bootstrap-toc.css"><script src="../bootstrap-toc.js"></script><!-- Font Awesome icons --><link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/5.12.1/css/all.min.css" integrity="sha256-mmgLkCYLUQbXn0B1SRqzHar6dCnv9oZFPEC1g1cwlkk=" crossorigin="anonymous"><link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/5.12.1/css/v4-shims.min.css" integrity="sha256-wZjR52fzng1pJHwx4aV2AO3yyTOXrcDW7jBpJtTwVxw=" crossorigin="anonymous"><!-- clipboard.js --><script src="https://cdnjs.cloudflare.com/ajax/libs/clipboard.js/2.0.6/clipboard.min.js" integrity="sha256-inc5kl9MA1hkeYUt+EC3BhlIgyp/2jDIyBLS6k3UxPI=" crossorigin="anonymous"></script><!-- headroom.js --><script src="https://cdnjs.cloudflare.com/ajax/libs/headroom/0.11.0/headroom.min.js" integrity="sha256-AsUX4SJE1+yuDu5+mAVzJbuYNPHj/WroHuZ8Ir/CkE0=" crossorigin="anonymous"></script><script src="https://cdnjs.cloudflare.com/ajax/libs/headroom/0.11.0/jQuery.headroom.min.js" integrity="sha256-ZX/yNShbjqsohH1k95liqY9Gd8uOiE1S4vZc+9KQ1K4=" crossorigin="anonymous"></script><!-- pkgdown --><link href="../pkgdown.css" rel="stylesheet"><script src="../pkgdown.js"></script><meta property="og:title" content="Shuffle the rows in a column — shuffle_group"><meta property="og:description" content="Shuffle the rows in a column"><meta property="og:image" content="http://www.schlosslab.org/mikropml/logo.png"><!-- mathjax --><script src="https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.5/MathJax.js" integrity="sha256-nvJJv9wWKEm88qvoQl9ekL2J+k/RWIsaSScxxlsrv8k=" crossorigin="anonymous"></script><script src="https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.5/config/TeX-AMS-MML_HTMLorMML.js" integrity="sha256-84DKXVJXs0/F8OTMzX4UR909+jtl4G7SPypPavF+GfA=" crossorigin="anonymous"></script><!--[if lt IE 9]>
+<script src="https://oss.maxcdn.com/html5shiv/3.7.3/html5shiv.min.js"></script>
+<script src="https://oss.maxcdn.com/respond/1.4.2/respond.min.js"></script>
+<![endif]--></head><body data-spy="scroll" data-target="#toc">
+    
+
+    <div class="container template-reference-topic">
+      <header><div class="navbar navbar-default navbar-fixed-top" role="navigation">
+  <div class="container">
+    <div class="navbar-header">
+      <button type="button" class="navbar-toggle collapsed" data-toggle="collapse" data-target="#navbar" aria-expanded="false">
+        <span class="sr-only">Toggle navigation</span>
+        <span class="icon-bar"></span>
+        <span class="icon-bar"></span>
+        <span class="icon-bar"></span>
+      </button>
+      <span class="navbar-brand">
+        <a class="navbar-link" href="../index.html">mikropml</a>
+        <span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="">1.3.0</span>
+      </span>
+    </div>
+
+    <div id="navbar" class="navbar-collapse collapse">
+      <ul class="nav navbar-nav"><li>
+  <a href="../reference/index.html">Reference</a>
+</li>
+<li class="dropdown">
+  <a href="#" class="dropdown-toggle" data-toggle="dropdown" role="button" data-bs-toggle="dropdown" aria-expanded="false">
+    Articles
+     
+    <span class="caret"></span>
+  </a>
+  <ul class="dropdown-menu" role="menu"><li class="divider">
+    <li class="dropdown-header">Paper</li>
+    <li>
+      <a href="../articles/paper.html">mikropml: User-Friendly R Package for Supervised Machine Learning Pipelines</a>
+    </li>
+    <li class="divider">
+    <li class="dropdown-header">Vignettes</li>
+    <li>
+      <a href="../articles/introduction.html">Introduction to mikropml</a>
+    </li>
+    <li>
+      <a href="../articles/preprocess.html">Preprocessing data</a>
+    </li>
+    <li>
+      <a href="../articles/tuning.html">Hyperparameter tuning</a>
+    </li>
+    <li>
+      <a href="../articles/parallel.html">Parallel processing</a>
+    </li>
+  </ul></li>
+<li>
+  <a href="../news/index.html">Changelog</a>
+</li>
+      </ul><ul class="nav navbar-nav navbar-right"><li>
+  <a href="https://github.com/SchlossLab/mikropml/" class="external-link">
+    <span class="fab fa-github fa-lg"></span>
+     
+  </a>
+</li>
+      </ul></div><!--/.nav-collapse -->
+  </div><!--/.container -->
+</div><!--/.navbar -->
+
+      
+
+      </header><div class="row">
+  <div class="col-md-9 contents">
+    <div class="page-header">
+    <h1>Shuffle the rows in a column</h1>
+    <small class="dont-index">Source: <a href="https://github.com/SchlossLab/mikropml/blob/HEAD/R/compare_models.R" class="external-link"><code>R/compare_models.R</code></a></small>
+    <div class="hidden name"><code>shuffle_group.Rd</code></div>
+    </div>
+
+    <div class="ref-description">
+    <p>Shuffle the rows in a column</p>
+    </div>
+
+    <div id="ref-usage">
+    <div class="sourceCode"><pre class="sourceCode r"><code><span class="fu">shuffle_group</span><span class="op">(</span><span class="va">dat</span>, <span class="va">col_name</span><span class="op">)</span></code></pre></div>
+    </div>
+
+    <div id="arguments">
+    <h2>Arguments</h2>
+    <dl><dt>dat</dt>
+<dd><p>a data frame containing <code>col_name</code></p></dd>
+<dt>col_name</dt>
+<dd><p>column name to shuffle</p></dd>
+</dl></div>
+    <div id="value">
+    <h2>Value</h2>
+    <p><code>dat</code> with the rows of <code>col_name</code> shuffled</p>
+    </div>
+    <div id="author">
+    <h2>Author</h2>
+    <p>Courtney R Armour, <a href="mailto:armourc@umich.edu">armourc@umich.edu</a></p>
+    </div>
+
+    <div id="ref-examples">
+    <h2>Examples</h2>
+    <div class="sourceCode"><pre class="sourceCode r"><code><span class="r-in"><span class="fu"><a href="https://rdrr.io/r/base/Random.html" class="external-link">set.seed</a></span><span class="op">(</span><span class="fl">123</span><span class="op">)</span></span>
+<span class="r-in"><span class="va">df</span> <span class="op">&lt;-</span> <span class="fu">dplyr</span><span class="fu">::</span><span class="fu"><a href="https://tibble.tidyverse.org/reference/tibble.html" class="external-link">tibble</a></span><span class="op">(</span></span>
+<span class="r-in">  condition <span class="op">=</span> <span class="fu"><a href="https://rdrr.io/r/base/c.html" class="external-link">c</a></span><span class="op">(</span><span class="st">"a"</span>, <span class="st">"a"</span>, <span class="st">"b"</span>, <span class="st">"b"</span><span class="op">)</span>,</span>
+<span class="r-in">  AUC <span class="op">=</span> <span class="fu"><a href="https://rdrr.io/r/base/c.html" class="external-link">c</a></span><span class="op">(</span><span class="fl">.2</span>, <span class="fl">0.3</span>, <span class="fl">0.8</span>, <span class="fl">0.9</span><span class="op">)</span></span>
+<span class="r-in"><span class="op">)</span></span>
+<span class="r-in"><span class="fu">shuffle_group</span><span class="op">(</span><span class="va">df</span>, <span class="st">"condition"</span><span class="op">)</span></span>
+<span class="r-out co"><span class="r-pr">#&gt;</span> <span style="color: #949494;"># A tibble: 4 × 2</span></span>
+<span class="r-out co"><span class="r-pr">#&gt;</span>   condition   AUC</span>
+<span class="r-out co"><span class="r-pr">#&gt;</span>   <span style="color: #949494; font-style: italic;">&lt;chr&gt;</span>     <span style="color: #949494; font-style: italic;">&lt;dbl&gt;</span></span>
+<span class="r-out co"><span class="r-pr">#&gt;</span> <span style="color: #BCBCBC;">1</span> b           0.2</span>
+<span class="r-out co"><span class="r-pr">#&gt;</span> <span style="color: #BCBCBC;">2</span> b           0.3</span>
+<span class="r-out co"><span class="r-pr">#&gt;</span> <span style="color: #BCBCBC;">3</span> a           0.8</span>
+<span class="r-out co"><span class="r-pr">#&gt;</span> <span style="color: #BCBCBC;">4</span> a           0.9</span>
+</code></pre></div>
+    </div>
+  </div>
+  <div class="col-md-3 hidden-xs hidden-sm" id="pkgdown-sidebar">
+    <nav id="toc" data-toggle="toc" class="sticky-top"><h2 data-toc-skip>Contents</h2>
+    </nav></div>
+</div>
+
+
+      <footer><div class="copyright">
+  <p></p><p>Developed by <a href="https://github.com/BTopcuoglu" class="external-link">Begüm Topçuoğlu</a>, <a href="https://github.com/zenalapp" class="external-link">Zena Lapp</a>, <a href="https://github.com/kelly-sovacool" class="external-link">Kelly Sovacool</a>, Evan Snitkin, Jenna Wiens, <a href="https://github.com/pschloss" class="external-link">Patrick Schloss</a>.</p>
+</div>
+
+<div class="pkgdown">
+  <p></p><p>Site built with <a href="https://pkgdown.r-lib.org/" class="external-link">pkgdown</a> 2.0.3.</p>
+</div>
+
+      </footer></div>
+
+  
+
+
+  
+
+  </body></html>
+
diff --git a/docs/reference/tidy_perf_data.html b/docs/reference/tidy_perf_data.html
index 73a145fe..3d97b53d 100644
--- a/docs/reference/tidy_perf_data.html
+++ b/docs/reference/tidy_perf_data.html
@@ -17,7 +17,7 @@
       </button>
       <span class="navbar-brand">
         <a class="navbar-link" href="../index.html">mikropml</a>
-        <span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="">1.2.2.9000</span>
+        <span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="">1.3.0</span>
       </span>
     </div>
 
diff --git a/docs/reference/train_model.html b/docs/reference/train_model.html
index dd9d4372..8a9766e1 100644
--- a/docs/reference/train_model.html
+++ b/docs/reference/train_model.html
@@ -17,7 +17,7 @@
       </button>
       <span class="navbar-brand">
         <a class="navbar-link" href="../index.html">mikropml</a>
-        <span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="">1.2.2.9000</span>
+        <span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="">1.3.0</span>
       </span>
     </div>
 
diff --git a/docs/sitemap.xml b/docs/sitemap.xml
index c203233c..6ed7b302 100644
--- a/docs/sitemap.xml
+++ b/docs/sitemap.xml
@@ -78,6 +78,9 @@
   <url>
     <loc>http://www.schlosslab.org/mikropml/reference/combine_hp_performance.html</loc>
   </url>
+  <url>
+    <loc>http://www.schlosslab.org/mikropml/reference/compare_models.html</loc>
+  </url>
   <url>
     <loc>http://www.schlosslab.org/mikropml/reference/createGroupedDataPartition.html</loc>
   </url>
@@ -108,6 +111,9 @@
   <url>
     <loc>http://www.schlosslab.org/mikropml/reference/get_corr_feats.html</loc>
   </url>
+  <url>
+    <loc>http://www.schlosslab.org/mikropml/reference/get_difference.html</loc>
+  </url>
   <url>
     <loc>http://www.schlosslab.org/mikropml/reference/get_feature_importance.html</loc>
   </url>
@@ -234,6 +240,9 @@
   <url>
     <loc>http://www.schlosslab.org/mikropml/reference/otu_small.html</loc>
   </url>
+  <url>
+    <loc>http://www.schlosslab.org/mikropml/reference/permute_p_value.html</loc>
+  </url>
   <url>
     <loc>http://www.schlosslab.org/mikropml/reference/plot_hp_performance.html</loc>
   </url>
@@ -282,6 +291,9 @@
   <url>
     <loc>http://www.schlosslab.org/mikropml/reference/setup_parallel.html</loc>
   </url>
+  <url>
+    <loc>http://www.schlosslab.org/mikropml/reference/shuffle_group.html</loc>
+  </url>
   <url>
     <loc>http://www.schlosslab.org/mikropml/reference/split_outcome_features.html</loc>
   </url>
diff --git a/man/get_difference.Rd b/man/get_difference.Rd
new file mode 100644
index 00000000..158e682f
--- /dev/null
+++ b/man/get_difference.Rd
@@ -0,0 +1,32 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/compare_models.R
+\name{get_difference}
+\alias{get_difference}
+\title{Average metric difference}
+\usage{
+get_difference(sub_data, group_name, metric)
+}
+\arguments{
+\item{sub_data}{subset of the merged performance data frame for two groups}
+
+\item{group_name}{name of column with group variable}
+
+\item{metric}{metric to compare}
+}
+\value{
+numeric difference in the average metric between the two groups
+}
+\description{
+Calculate the difference in the mean of the metric for two groups
+}
+\examples{
+df <- dplyr::tibble(
+  condition = c("a", "a", "b", "b"),
+  AUC = c(.2, 0.3, 0.8, 0.9)
+)
+get_difference(df, "condition", "AUC")
+
+}
+\author{
+Courtney Armour, \email{armourc@umich.edu}
+}
diff --git a/man/permute_p_value.Rd b/man/permute_p_value.Rd
new file mode 100644
index 00000000..c3bc7350
--- /dev/null
+++ b/man/permute_p_value.Rd
@@ -0,0 +1,47 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/compare_models.R
+\name{permute_p_value}
+\alias{permute_p_value}
+\title{Calculated a permuted p-value comparing two models}
+\usage{
+permute_p_value(
+  merged_data,
+  metric,
+  group_name,
+  group_1,
+  group_2,
+  nperm = 10000
+)
+}
+\arguments{
+\item{merged_data}{the concatenated performance data from \code{run_ml}}
+
+\item{metric}{metric to compare, must be numeric}
+
+\item{group_name}{column with group variables to compare}
+
+\item{group_1}{name of one group to compare}
+
+\item{group_2}{name of other group to compare}
+
+\item{nperm}{number of permutations, default=10000}
+}
+\value{
+numeric p-value comparing two models
+}
+\description{
+Calculated a permuted p-value comparing two models
+}
+\examples{
+df <- dplyr::tibble(
+  model = c("rf", "rf", "glmnet", "glmnet", "svmRadial", "svmRadial"),
+  AUC = c(.2, 0.3, 0.8, 0.9, 0.85, 0.95)
+)
+set.seed(123)
+permute_p_value(df, "AUC", "model", "rf", "glmnet", nperm = 100)
+}
+\author{
+Begüm Topçuoğlu, \email{topcuoglu.begum@gmail.com}
+
+Courtney R Armour, \email{armourc@umich.edu}
+}
diff --git a/man/reexports.Rd b/man/reexports.Rd
index 43708a49..b07f3030 100644
--- a/man/reexports.Rd
+++ b/man/reexports.Rd
@@ -19,6 +19,6 @@ below to see their documentation.
 
   \item{dplyr}{\code{\link[dplyr:reexports]{\%>\%}}}
 
-  \item{rlang}{\code{\link[rlang:nse-force]{!!}}, \code{\link[rlang:tidyeval-data]{.data}}, \code{\link[rlang:nse-force]{:=}}}
+  \item{rlang}{\code{\link[rlang:injection-operator]{!!}}, \code{\link[rlang:dot-data]{.data}}, \code{\link[rlang:dyn-dots]{:=}}}
 }}
 
diff --git a/man/shuffle_group.Rd b/man/shuffle_group.Rd
new file mode 100644
index 00000000..313f4f04
--- /dev/null
+++ b/man/shuffle_group.Rd
@@ -0,0 +1,30 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/compare_models.R
+\name{shuffle_group}
+\alias{shuffle_group}
+\title{Shuffle the rows in a column}
+\usage{
+shuffle_group(dat, col_name)
+}
+\arguments{
+\item{dat}{a data frame containing \code{col_name}}
+
+\item{col_name}{column name to shuffle}
+}
+\value{
+\code{dat} with the rows of \code{col_name} shuffled
+}
+\description{
+Shuffle the rows in a column
+}
+\examples{
+set.seed(123)
+df <- dplyr::tibble(
+  condition = c("a", "a", "b", "b"),
+  AUC = c(.2, 0.3, 0.8, 0.9)
+)
+shuffle_group(df, "condition")
+}
+\author{
+Courtney R Armour, \email{armourc@umich.edu}
+}

From e31c72f86f238e477f3abd2927d2baad1798cb2f Mon Sep 17 00:00:00 2001
From: Kelly Sovacool <kellysovacool@gmail.com>
Date: Thu, 19 May 2022 12:40:56 -0400
Subject: [PATCH 04/10] Fix typos

---
 R/compare_models.R         |  2 +-
 R/data.R                   | 10 +++++-----
 vignettes/introduction.Rmd |  4 ++--
 vignettes/paper.Rmd        |  2 +-
 vignettes/preprocess.Rmd   |  2 +-
 5 files changed, 10 insertions(+), 10 deletions(-)

diff --git a/R/compare_models.R b/R/compare_models.R
index e6ef622e..8cc66b08 100644
--- a/R/compare_models.R
+++ b/R/compare_models.R
@@ -134,7 +134,7 @@ permute_p_value <- function(merged_data, metric, group_name, group_1, group_2, n
 #' @param group_name column with group variables to compare
 #' @param nperm number of permutations, default=10000
 #'
-#' @return a table of p-values for all pairs of group varible
+#' @return a table of p-values for all pairs of group variable
 #' @export
 #' @author Courtney R Armour, \email{armourc@@umich.edu}
 #'
diff --git a/R/data.R b/R/data.R
index 8df06165..9f12e9b0 100644
--- a/R/data.R
+++ b/R/data.R
@@ -32,19 +32,19 @@
 #' Cross validation on `train_data_mini` with grouped features.
 "otu_mini_cv"
 
-#' Results from running the pipline with L2 logistic regression on `otu_mini_bin` with feature importance and grouping
+#' Results from running the pipeline with L2 logistic regression on `otu_mini_bin` with feature importance and grouping
 "otu_mini_bin_results_glmnet"
 
-#' Results from running the pipline with random forest on `otu_mini_bin`
+#' Results from running the pipeline with random forest on `otu_mini_bin`
 "otu_mini_bin_results_rf"
 
-#' Results from running the pipline with svmRadial on `otu_mini_bin`
+#' Results from running the pipeline with svmRadial on `otu_mini_bin`
 "otu_mini_bin_results_svmRadial"
 
-#' Results from running the pipline with xbgTree on `otu_mini_bin`
+#' Results from running the pipeline with xbgTree on `otu_mini_bin`
 "otu_mini_bin_results_xgbTree"
 
-#' Results from running the pipline with rpart2 on `otu_mini_bin`
+#' Results from running the pipeline with rpart2 on `otu_mini_bin`
 "otu_mini_bin_results_rpart2"
 
 #' Results from running the pipeline with glmnet on `otu_mini_bin` with `Otu00001`
diff --git a/vignettes/introduction.Rmd b/vignettes/introduction.Rmd
index 9cb1d8a9..faacf965 100644
--- a/vignettes/introduction.Rmd
+++ b/vignettes/introduction.Rmd
@@ -335,7 +335,7 @@ depending on how many samples and groups you have. This is because it won't be
 exactly what you specify with `training_frac`, since you have to include all of
 one group in either the training set _or_ the test set.
 
-### Controling how groups are assigned to partitions
+### Controlling how groups are assigned to partitions
 
 When you use the `groups` parameter as above, by default `run_ml()` will assume
 that you want all of the observations from each group to be placed in the same
@@ -426,7 +426,7 @@ There are several columns:
 1. `pvalue`: the probability of obtaining the actual performance value under the null hypothesis.
 1. `names`: The feature that was permuted.
 1. `method`: The ML method used.
-1. `perf_metric_name`: The peformance metric used.
+1. `perf_metric_name`: The performance metric used.
 1. `seed`: The seed (if set).
 
 As you can see here, the differences are negligible (close to zero), which makes
diff --git a/vignettes/paper.Rmd b/vignettes/paper.Rmd
index 0aafe5aa..11637300 100644
--- a/vignettes/paper.Rmd
+++ b/vignettes/paper.Rmd
@@ -72,7 +72,7 @@ Machine learning (ML) for classification and prediction based on a set of
 features is used to make decisions in healthcare, economics, criminal justice
 and more. However, implementing an ML pipeline including preprocessing, model
 selection, and evaluation can be time-consuming, confusing, and difficult. Here,
-we present [`mikropml`](http://www.schlosslab.org/mikropml/) (prononced
+we present [`mikropml`](http://www.schlosslab.org/mikropml/) (pronounced
 "meek-ROPE em el"), an easy-to-use R package that implements ML pipelines using
 regression, support vector machines, decision trees, random forest, or
 gradient-boosted trees. The package is available on
diff --git a/vignettes/preprocess.Rmd b/vignettes/preprocess.Rmd
index 3b7051c7..8eebee7a 100644
--- a/vignettes/preprocess.Rmd
+++ b/vignettes/preprocess.Rmd
@@ -81,7 +81,7 @@ preprocess_data(dataset = bin_df, outcome_colname = "outcome")
 
 The output is a list: `dat_transformed` which has the transformed data, 
 `grp_feats` which is a list of grouped features, and `removed_feats` which is a 
-list of featuures that were removed. Here, `grp_feats` is `NULL` because there 
+list of features that were removed. Here, `grp_feats` is `NULL` because there 
 are no perfectly correlated features (e.g. `c(0,1,0)` and `c(0,1,0)`, or 
 `c(0,1,0)` and `c(1,0,1)` - see below for more details). 
 

From f8bcec96f473a234c689c9b28df7503759ff0778 Mon Sep 17 00:00:00 2001
From: Kelly Sovacool <kellysovacool@gmail.com>
Date: Thu, 19 May 2022 15:19:22 -0400
Subject: [PATCH 05/10] Make shuffle_groups() & get_difference() internal
 functions

---
 R/compare_models.R | 7 ++-----
 1 file changed, 2 insertions(+), 5 deletions(-)

diff --git a/R/compare_models.R b/R/compare_models.R
index 8cc66b08..5d4844e7 100644
--- a/R/compare_models.R
+++ b/R/compare_models.R
@@ -1,5 +1,3 @@
-#' Average metric difference
-#'
 #' Calculate the difference in the mean of the metric for two groups
 #'
 #' @param sub_data subset of the merged performance data frame for two groups
@@ -8,7 +6,7 @@
 #'
 #' @return numeric difference in the average metric between the two groups
 #'
-#' @export
+#' @noRd
 #' @author Courtney Armour, \email{armourc@@umich.edu}
 #'
 #' @examples
@@ -38,7 +36,7 @@ get_difference <- function(sub_data, group_name, metric) {
 #' @param col_name column name to shuffle
 #'
 #' @return `dat` with the rows of `col_name` shuffled
-#' @export
+#' @noRd
 #' @author Courtney R Armour, \email{armourc@@umich.edu}
 #'
 #' @examples
@@ -62,7 +60,6 @@ shuffle_group <- function(dat, col_name) {
   return(data_shuffled)
 }
 
-
 #' Calculated a permuted p-value comparing two models
 #'
 #' @inheritParams compare_models

From d147a9960b550ea090cb11701e7371cc62553ad0 Mon Sep 17 00:00:00 2001
From: Kelly Sovacool <kellysovacool@gmail.com>
Date: Thu, 19 May 2022 15:19:35 -0400
Subject: [PATCH 06/10] document()

---
 NAMESPACE                             |  2 --
 man/compare_models.Rd                 |  2 +-
 man/get_difference.Rd                 | 32 ---------------------------
 man/otu_mini_bin_results_glmnet.Rd    |  4 ++--
 man/otu_mini_bin_results_rf.Rd        |  4 ++--
 man/otu_mini_bin_results_rpart2.Rd    |  4 ++--
 man/otu_mini_bin_results_svmRadial.Rd |  4 ++--
 man/otu_mini_bin_results_xgbTree.Rd   |  4 ++--
 man/shuffle_group.Rd                  | 30 -------------------------
 9 files changed, 11 insertions(+), 75 deletions(-)
 delete mode 100644 man/get_difference.Rd
 delete mode 100644 man/shuffle_group.Rd

diff --git a/NAMESPACE b/NAMESPACE
index 8410e1c2..68575aad 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -10,7 +10,6 @@ export(compare_models)
 export(contr.ltfr)
 export(define_cv)
 export(get_caret_processed_df)
-export(get_difference)
 export(get_feature_importance)
 export(get_hp_performance)
 export(get_hyperparams_list)
@@ -29,7 +28,6 @@ export(randomize_feature_order)
 export(remove_singleton_columns)
 export(replace_spaces)
 export(run_ml)
-export(shuffle_group)
 export(tidy_perf_data)
 export(train_model)
 importFrom(MLmetrics,AUC)
diff --git a/man/compare_models.Rd b/man/compare_models.Rd
index d2ccab3e..3f821954 100644
--- a/man/compare_models.Rd
+++ b/man/compare_models.Rd
@@ -17,7 +17,7 @@ compare_models(merged_data, metric, group_name, nperm = 10000)
 \item{nperm}{number of permutations, default=10000}
 }
 \value{
-a table of p-values for all pairs of group varible
+a table of p-values for all pairs of group variable
 }
 \description{
 A wrapper for \code{permute_p_value()}.
diff --git a/man/get_difference.Rd b/man/get_difference.Rd
deleted file mode 100644
index 158e682f..00000000
--- a/man/get_difference.Rd
+++ /dev/null
@@ -1,32 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/compare_models.R
-\name{get_difference}
-\alias{get_difference}
-\title{Average metric difference}
-\usage{
-get_difference(sub_data, group_name, metric)
-}
-\arguments{
-\item{sub_data}{subset of the merged performance data frame for two groups}
-
-\item{group_name}{name of column with group variable}
-
-\item{metric}{metric to compare}
-}
-\value{
-numeric difference in the average metric between the two groups
-}
-\description{
-Calculate the difference in the mean of the metric for two groups
-}
-\examples{
-df <- dplyr::tibble(
-  condition = c("a", "a", "b", "b"),
-  AUC = c(.2, 0.3, 0.8, 0.9)
-)
-get_difference(df, "condition", "AUC")
-
-}
-\author{
-Courtney Armour, \email{armourc@umich.edu}
-}
diff --git a/man/otu_mini_bin_results_glmnet.Rd b/man/otu_mini_bin_results_glmnet.Rd
index 41c4eb93..2af95b37 100644
--- a/man/otu_mini_bin_results_glmnet.Rd
+++ b/man/otu_mini_bin_results_glmnet.Rd
@@ -3,7 +3,7 @@
 \docType{data}
 \name{otu_mini_bin_results_glmnet}
 \alias{otu_mini_bin_results_glmnet}
-\title{Results from running the pipline with L2 logistic regression on \code{otu_mini_bin} with feature importance and grouping}
+\title{Results from running the pipeline with L2 logistic regression on \code{otu_mini_bin} with feature importance and grouping}
 \format{
 An object of class \code{list} of length 4.
 }
@@ -11,6 +11,6 @@ An object of class \code{list} of length 4.
 otu_mini_bin_results_glmnet
 }
 \description{
-Results from running the pipline with L2 logistic regression on \code{otu_mini_bin} with feature importance and grouping
+Results from running the pipeline with L2 logistic regression on \code{otu_mini_bin} with feature importance and grouping
 }
 \keyword{datasets}
diff --git a/man/otu_mini_bin_results_rf.Rd b/man/otu_mini_bin_results_rf.Rd
index 9ea47cac..302a0d6c 100644
--- a/man/otu_mini_bin_results_rf.Rd
+++ b/man/otu_mini_bin_results_rf.Rd
@@ -3,7 +3,7 @@
 \docType{data}
 \name{otu_mini_bin_results_rf}
 \alias{otu_mini_bin_results_rf}
-\title{Results from running the pipline with random forest on \code{otu_mini_bin}}
+\title{Results from running the pipeline with random forest on \code{otu_mini_bin}}
 \format{
 An object of class \code{list} of length 4.
 }
@@ -11,6 +11,6 @@ An object of class \code{list} of length 4.
 otu_mini_bin_results_rf
 }
 \description{
-Results from running the pipline with random forest on \code{otu_mini_bin}
+Results from running the pipeline with random forest on \code{otu_mini_bin}
 }
 \keyword{datasets}
diff --git a/man/otu_mini_bin_results_rpart2.Rd b/man/otu_mini_bin_results_rpart2.Rd
index a4ad66ec..72748945 100644
--- a/man/otu_mini_bin_results_rpart2.Rd
+++ b/man/otu_mini_bin_results_rpart2.Rd
@@ -3,7 +3,7 @@
 \docType{data}
 \name{otu_mini_bin_results_rpart2}
 \alias{otu_mini_bin_results_rpart2}
-\title{Results from running the pipline with rpart2 on \code{otu_mini_bin}}
+\title{Results from running the pipeline with rpart2 on \code{otu_mini_bin}}
 \format{
 An object of class \code{list} of length 4.
 }
@@ -11,6 +11,6 @@ An object of class \code{list} of length 4.
 otu_mini_bin_results_rpart2
 }
 \description{
-Results from running the pipline with rpart2 on \code{otu_mini_bin}
+Results from running the pipeline with rpart2 on \code{otu_mini_bin}
 }
 \keyword{datasets}
diff --git a/man/otu_mini_bin_results_svmRadial.Rd b/man/otu_mini_bin_results_svmRadial.Rd
index 1180e950..66194ad5 100644
--- a/man/otu_mini_bin_results_svmRadial.Rd
+++ b/man/otu_mini_bin_results_svmRadial.Rd
@@ -3,7 +3,7 @@
 \docType{data}
 \name{otu_mini_bin_results_svmRadial}
 \alias{otu_mini_bin_results_svmRadial}
-\title{Results from running the pipline with svmRadial on \code{otu_mini_bin}}
+\title{Results from running the pipeline with svmRadial on \code{otu_mini_bin}}
 \format{
 An object of class \code{list} of length 4.
 }
@@ -11,6 +11,6 @@ An object of class \code{list} of length 4.
 otu_mini_bin_results_svmRadial
 }
 \description{
-Results from running the pipline with svmRadial on \code{otu_mini_bin}
+Results from running the pipeline with svmRadial on \code{otu_mini_bin}
 }
 \keyword{datasets}
diff --git a/man/otu_mini_bin_results_xgbTree.Rd b/man/otu_mini_bin_results_xgbTree.Rd
index a509b3a2..3b193cd9 100644
--- a/man/otu_mini_bin_results_xgbTree.Rd
+++ b/man/otu_mini_bin_results_xgbTree.Rd
@@ -3,7 +3,7 @@
 \docType{data}
 \name{otu_mini_bin_results_xgbTree}
 \alias{otu_mini_bin_results_xgbTree}
-\title{Results from running the pipline with xbgTree on \code{otu_mini_bin}}
+\title{Results from running the pipeline with xbgTree on \code{otu_mini_bin}}
 \format{
 An object of class \code{list} of length 4.
 }
@@ -11,6 +11,6 @@ An object of class \code{list} of length 4.
 otu_mini_bin_results_xgbTree
 }
 \description{
-Results from running the pipline with xbgTree on \code{otu_mini_bin}
+Results from running the pipeline with xbgTree on \code{otu_mini_bin}
 }
 \keyword{datasets}
diff --git a/man/shuffle_group.Rd b/man/shuffle_group.Rd
deleted file mode 100644
index 313f4f04..00000000
--- a/man/shuffle_group.Rd
+++ /dev/null
@@ -1,30 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/compare_models.R
-\name{shuffle_group}
-\alias{shuffle_group}
-\title{Shuffle the rows in a column}
-\usage{
-shuffle_group(dat, col_name)
-}
-\arguments{
-\item{dat}{a data frame containing \code{col_name}}
-
-\item{col_name}{column name to shuffle}
-}
-\value{
-\code{dat} with the rows of \code{col_name} shuffled
-}
-\description{
-Shuffle the rows in a column
-}
-\examples{
-set.seed(123)
-df <- dplyr::tibble(
-  condition = c("a", "a", "b", "b"),
-  AUC = c(.2, 0.3, 0.8, 0.9)
-)
-shuffle_group(df, "condition")
-}
-\author{
-Courtney R Armour, \email{armourc@umich.edu}
-}

From 518bbc8228e6f070671ef1e304cd7d9d32dfe8fc Mon Sep 17 00:00:00 2001
From: Kelly Sovacool <kellysovacool@gmail.com>
Date: Thu, 19 May 2022 15:24:52 -0400
Subject: [PATCH 07/10] Create model evaluation section of reference page

---
 _pkgdown.yml | 20 ++++++++++++--------
 1 file changed, 12 insertions(+), 8 deletions(-)

diff --git a/_pkgdown.yml b/_pkgdown.yml
index b126508f..dc00676f 100644
--- a/_pkgdown.yml
+++ b/_pkgdown.yml
@@ -29,15 +29,22 @@ reference:
   - mikropml
   - preprocess_data
   - run_ml
-- title: Plotting & evalutation helpers
+- title: Plotting helpers
   desc: >
-    Visualize & evalutate performance to help you tune hyperparameters and choose model methods.
+    Visualize results to help you tune hyperparameters and choose model methods.
   contents:
-  - compare_models
   - starts_with('plot')
   - tidy_perf_data
   - get_hp_performance
   - combine_hp_performance
+- title: Model evaluation
+  desc: >
+    Evaluate and interpret models.
+  contents:
+  - get_feature_importance
+  - get_performance_tbl
+  - compare_models
+  - permute_p_value
 - title: Package Data
 - subtitle: datasets
   contents:
@@ -54,9 +61,8 @@ reference:
   - replace_spaces
 - title: Pipeline customization
   desc: >
-    These are functions called by preprocess_data() or run_ml().
-    We make them available in case you would like to customize various steps
-    of the pipeline beyond the arguments provided by the main functions.
+    Customize various steps of the pipeline beyond the arguments provided by
+    run_ml() and preprocess_data().
   contents:
   - remove_singleton_columns
   - get_caret_processed_df
@@ -70,6 +76,4 @@ reference:
   - get_perf_metric_fn
   - train_model
   - calc_perf_metrics
-  - get_performance_tbl
-  - get_feature_importance
   - group_correlated_features

From 5334183cba7c64e31d0f51676514f6b60edcd21e Mon Sep 17 00:00:00 2001
From: "github-actions[bot]"
 <41898282+github-actions[bot]@users.noreply.github.com>
Date: Thu, 19 May 2022 20:05:52 +0000
Subject: [PATCH 08/10] =?UTF-8?q?=F0=9F=93=91=20Build=20docs=20site?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 docs/CODE_OF_CONDUCT.html                     |   1 -
 docs/CONTRIBUTING.html                        |   1 -
 docs/LICENSE.html                             |   1 -
 docs/SUPPORT.html                             |   1 -
 docs/articles/introduction.html               | 356 ++++-----------
 docs/articles/paper.html                      | 415 +++++-------------
 docs/articles/parallel.html                   | 164 +++----
 docs/articles/preprocess.html                 | 214 +++------
 docs/articles/tuning.html                     | 162 ++-----
 docs/index.html                               |   7 +-
 docs/news/index.html                          |  37 +-
 docs/pkgdown.yml                              |   4 +-
 docs/pull_request_template.html               |  26 +-
 docs/reference/compare_models.html            |   2 +-
 docs/reference/get_feature_importance.html    |   2 +-
 docs/reference/get_perf_metric_fn.html        |   6 +-
 docs/reference/index.html                     |  50 ++-
 .../otu_mini_bin_results_glmnet.html          |   6 +-
 docs/reference/otu_mini_bin_results_rf.html   |   6 +-
 .../otu_mini_bin_results_rpart2.html          |   6 +-
 .../otu_mini_bin_results_svmRadial.html       |   6 +-
 .../otu_mini_bin_results_xgbTree.html         |   6 +-
 22 files changed, 423 insertions(+), 1056 deletions(-)

diff --git a/docs/CODE_OF_CONDUCT.html b/docs/CODE_OF_CONDUCT.html
index 5bb0afd6..356d405e 100644
--- a/docs/CODE_OF_CONDUCT.html
+++ b/docs/CODE_OF_CONDUCT.html
@@ -73,7 +73,6 @@ <h1>Contributor Covenant Code of Conduct</h1>
     </div>
 
 <div id="contributor-covenant-code-of-conduct" class="section level1">
-
 <p>This document was adapted from the <a href="https://tidyverse.tidyverse.org/CODE_OF_CONDUCT.html" class="external-link">Tidyverse Code of Conduct</a>.</p>
 <div class="section level2">
 <h2 id="our-pledge">Our Pledge<a class="anchor" aria-label="anchor" href="#our-pledge"></a></h2>
diff --git a/docs/CONTRIBUTING.html b/docs/CONTRIBUTING.html
index 4cc444a7..cfcb2766 100644
--- a/docs/CONTRIBUTING.html
+++ b/docs/CONTRIBUTING.html
@@ -73,7 +73,6 @@ <h1>Contributing to mikropml</h1>
     </div>
 
 <div id="contributing-to-mikropml" class="section level1">
-
 <p>This document was adapted from the <a href="https://tidyverse.tidyverse.org/CONTRIBUTING.html" class="external-link">Tidyverse Contributing guide</a>.</p>
 <div class="section level2">
 <h2 id="fixing-typos">Fixing typos<a class="anchor" aria-label="anchor" href="#fixing-typos"></a></h2>
diff --git a/docs/LICENSE.html b/docs/LICENSE.html
index 2d2d71ff..a38ff1d8 100644
--- a/docs/LICENSE.html
+++ b/docs/LICENSE.html
@@ -73,7 +73,6 @@ <h1>MIT License</h1>
     </div>
 
 <div id="mit-license" class="section level1">
-
 <p>Copyright (c) 2019-2021 Begüm D. Topçuoğlu, Zena Lapp, Kelly L. Sovacool, Evan Snitkin, Jenna Wiens, and Patrick D. Schloss</p>
 <p>Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the “Software”), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:</p>
 <p>The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.</p>
diff --git a/docs/SUPPORT.html b/docs/SUPPORT.html
index e80f09e2..31683945 100644
--- a/docs/SUPPORT.html
+++ b/docs/SUPPORT.html
@@ -73,7 +73,6 @@ <h1>Getting help with mikropml</h1>
     </div>
 
 <div id="getting-help-with-mikropml" class="section level1">
-
 <p>Thanks for using mikropml! Before filing an issue, there are a few places to explore and pieces to put together to make the process as smooth as possible.</p>
 <div class="section level2">
 <h2 id="make-a-reprex">Make a reprex<a class="anchor" aria-label="anchor" href="#make-a-reprex"></a></h2>
diff --git a/docs/articles/introduction.html b/docs/articles/introduction.html
index 57ba736e..8466437c 100644
--- a/docs/articles/introduction.html
+++ b/docs/articles/introduction.html
@@ -100,7 +100,7 @@
 
       
 
-      </header><div class="row">
+      </header><script src="introduction_files/accessible-code-block-0.0.1/empty-anchor.js"></script><div class="row">
   <div class="col-md-9 contents">
     <div class="page-header toc-ignore">
       <h1 data-toc-skip>Introduction to mikropml</h1>
@@ -114,20 +114,11 @@ <h4 data-toc-skip class="author">Zena Lapp</h4>
 
     
     
-<p>The goal of <code>mikropml</code> is to make supervised machine
-learning (ML) easy for you to run while implementing good practices for
-machine learning pipelines. All you need to run the ML pipeline is one
-function: <code><a href="../reference/run_ml.html">run_ml()</a></code>. We’ve selected sensible default
-arguments related to good practices <span class="citation">(Topçuoğlu et
-al. 2020; Tang et al. 2020)</span>, but we allow you to change those
-arguments to tailor <code><a href="../reference/run_ml.html">run_ml()</a></code> to the needs of your data.</p>
-<p>This document takes you through all of the <code><a href="../reference/run_ml.html">run_ml()</a></code>
-inputs, both required and optional, as well as the outputs.</p>
+<p>The goal of <code>mikropml</code> is to make supervised machine learning (ML) easy for you to run while implementing good practices for machine learning pipelines. All you need to run the ML pipeline is one function: <code><a href="../reference/run_ml.html">run_ml()</a></code>. We’ve selected sensible default arguments related to good practices <span class="citation">(Topçuoğlu et al. 2020; Tang et al. 2020)</span>, but we allow you to change those arguments to tailor <code><a href="../reference/run_ml.html">run_ml()</a></code> to the needs of your data.</p>
+<p>This document takes you through all of the <code><a href="../reference/run_ml.html">run_ml()</a></code> inputs, both required and optional, as well as the outputs.</p>
 <p>In summary, you provide:</p>
 <ul>
-<li>A dataset with an outcome column and feature columns (rows are
-samples; unfortunately we do not support multi-label
-classification)</li>
+<li>A dataset with an outcome column and feature columns (rows are samples; unfortunately we do not support multi-label classification)</li>
 <li>Model choice (i.e. method)</li>
 </ul>
 <p>And the function outputs:</p>
@@ -139,10 +130,7 @@ <h4 data-toc-skip class="author">Zena Lapp</h4>
 <div class="section level2">
 <h2 id="its-running-so-slow">It’s running so slow!<a class="anchor" aria-label="anchor" href="#its-running-so-slow"></a>
 </h2>
-<p>Since I assume a lot of you won’t read this entire vignette, I’m
-going to say this at the beginning. If the <code><a href="../reference/run_ml.html">run_ml()</a></code>
-function is running super slow, you should consider parallelizing. See
-<code><a href="../articles/parallel.html">vignette("parallel")</a></code> for examples.</p>
+<p>Since I assume a lot of you won’t read this entire vignette, I’m going to say this at the beginning. If the <code><a href="../reference/run_ml.html">run_ml()</a></code> function is running super slow, you should consider parallelizing. See <code><a href="../articles/parallel.html">vignette("parallel")</a></code> for examples.</p>
 </div>
 <div class="section level2">
 <h2 id="understanding-the-inputs">Understanding the inputs<a class="anchor" aria-label="anchor" href="#understanding-the-inputs"></a>
@@ -150,11 +138,7 @@ <h2 id="understanding-the-inputs">Understanding the inputs<a class="anchor" aria
 <div class="section level3">
 <h3 id="the-input-data">The input data<a class="anchor" aria-label="anchor" href="#the-input-data"></a>
 </h3>
-<p>The input data to <code><a href="../reference/run_ml.html">run_ml()</a></code> is a dataframe where each row
-is a sample or observation. One column (assumed to be the first) is the
-outcome of interest, and all of the other columns are the features. We
-package <code>otu_mini_bin</code> as a small example dataset with
-<code>mikropml</code>.</p>
+<p>The input data to <code><a href="../reference/run_ml.html">run_ml()</a></code> is a dataframe where each row is a sample or observation. One column (assumed to be the first) is the outcome of interest, and all of the other columns are the features. We package <code>otu_mini_bin</code> as a small example dataset with <code>mikropml</code>.</p>
 <div class="sourceCode" id="cb1"><pre class="downlit sourceCode r">
 <code class="sourceCode R"><span class="co">#install.packages("devtools")</span>
 <span class="co">#devtools::install_github("SchlossLab/mikropml")</span>
@@ -174,78 +158,41 @@ <h3 id="the-input-data">The input data<a class="anchor" aria-label="anchor" href
 <span class="co">#&gt; 4       78      255      197</span>
 <span class="co">#&gt; 5        1      537      533</span>
 <span class="co">#&gt; 6      251      155      122</span></code></pre></div>
-<p>Here, <code>dx</code> is the outcome column (normal or cancer), and
-there are 10 features (<code>Otu00001</code> through
-<code>Otu00010</code>). Because there are only 2 outcomes, we will be
-performing binary classification in the majority of the examples below.
-At the bottom, we will also briefly provide examples of multi-class and
-continuous outcomes. As you’ll see, you run them in the same way as for
-binary classification!</p>
-<p>The feature columns are the amount of each <a href="https://en.wikipedia.org/wiki/Operational_taxonomic_unit" class="external-link">Operational
-Taxonomic Unit (OTU)</a> in microbiome samples from patients with cancer
-and without cancer. The goal is to predict <code>dx</code>, which stands
-for diagnosis. This diagnosis can be cancer or not based on an
-individual’s microbiome. No need to understand exactly what that means,
-but if you’re interested you can read more about it from the original
-paper <span class="citation">(Topçuoğlu et al. 2020)</span>.</p>
-<p>For real machine learning applications you’ll need to use more
-features, but for the purposes of this vignette we’ll stick with this
-example dataset so everything runs faster.</p>
+<p>Here, <code>dx</code> is the outcome column (normal or cancer), and there are 10 features (<code>Otu00001</code> through <code>Otu00010</code>). Because there are only 2 outcomes, we will be performing binary classification in the majority of the examples below. At the bottom, we will also briefly provide examples of multi-class and continuous outcomes. As you’ll see, you run them in the same way as for binary classification!</p>
+<p>The feature columns are the amount of each <a href="https://en.wikipedia.org/wiki/Operational_taxonomic_unit" class="external-link">Operational Taxonomic Unit (OTU)</a> in microbiome samples from patients with cancer and without cancer. The goal is to predict <code>dx</code>, which stands for diagnosis. This diagnosis can be cancer or not based on an individual’s microbiome. No need to understand exactly what that means, but if you’re interested you can read more about it from the original paper <span class="citation">(Topçuoğlu et al. 2020)</span>.</p>
+<p>For real machine learning applications you’ll need to use more features, but for the purposes of this vignette we’ll stick with this example dataset so everything runs faster.</p>
 </div>
 <div class="section level3">
 <h3 id="the-methods-we-support">The methods we support<a class="anchor" aria-label="anchor" href="#the-methods-we-support"></a>
 </h3>
-<p>All of the methods we use are supported by a great ML wrapper package
-<a href="https://topepo.github.io/caret/" class="external-link"><code>caret</code></a>, which
-we use to train our machine learning models.</p>
+<p>All of the methods we use are supported by a great ML wrapper package <a href="https://topepo.github.io/caret/" class="external-link"><code>caret</code></a>, which we use to train our machine learning models.</p>
 <p>The methods we have tested (and their backend packages) are:</p>
 <ul>
 <li>Logistic/multiclass/linear regression (<code>"glmnet"</code>)</li>
 <li>Random forest (<code>"rf"</code>)</li>
 <li>Decision tree (<code>"rpart2"</code>)</li>
-<li>Support vector machine with a radial basis kernel
-(<code>"svmRadial"</code>)</li>
+<li>Support vector machine with a radial basis kernel (<code>"svmRadial"</code>)</li>
 <li>xgboost (<code>"xgbTree"</code>)</li>
 </ul>
-<p>For documentation on these methods, as well as many others, you can
-look at the <a href="https://topepo.github.io/caret/available-models.html" class="external-link">available
-models</a> (or see <a href="https://topepo.github.io/caret/train-models-by-tag.html" class="external-link">here</a>
-for a list by tag). While we have not vetted the other models used by
-<code>caret</code>, our function is general enough that others might
-work. While we can’t promise that we can help with other models, feel
-free to <a href="https://github.com/SchlossLab/mikropml/issues" class="external-link">open an
-issue on GitHub</a> if you have questions about other models and we
-<em>might</em> be able to help.</p>
-<p>We will first focus on <code>glmnet</code>, which is our default
-implementation of L2-regularized logistic regression. Then we will cover
-a few other examples towards the end.</p>
+<p>For documentation on these methods, as well as many others, you can look at the <a href="https://topepo.github.io/caret/available-models.html" class="external-link">available models</a> (or see <a href="https://topepo.github.io/caret/train-models-by-tag.html" class="external-link">here</a> for a list by tag). While we have not vetted the other models used by <code>caret</code>, our function is general enough that others might work. While we can’t promise that we can help with other models, feel free to <a href="https://github.com/SchlossLab/mikropml/issues" class="external-link">open an issue on GitHub</a> if you have questions about other models and we <em>might</em> be able to help.</p>
+<p>We will first focus on <code>glmnet</code>, which is our default implementation of L2-regularized logistic regression. Then we will cover a few other examples towards the end.</p>
 </div>
 </div>
 <div class="section level2">
 <h2 id="before-running-ml">Before running ML<a class="anchor" aria-label="anchor" href="#before-running-ml"></a>
 </h2>
-<p>Before you execute <code><a href="../reference/run_ml.html">run_ml()</a></code>, you should consider
-preprocessing your data, either on your own or with the
-<code><a href="../reference/preprocess_data.html">preprocess_data()</a></code> function. You can learn more about this
-in the preprocessing vignette: <code><a href="../articles/preprocess.html">vignette("preprocess")</a></code>.</p>
+<p>Before you execute <code><a href="../reference/run_ml.html">run_ml()</a></code>, you should consider preprocessing your data, either on your own or with the <code><a href="../reference/preprocess_data.html">preprocess_data()</a></code> function. You can learn more about this in the preprocessing vignette: <code><a href="../articles/preprocess.html">vignette("preprocess")</a></code>.</p>
 </div>
 <div class="section level2">
 <h2 id="the-simplest-way-to-run_ml">The simplest way to <code>run_ml()</code><a class="anchor" aria-label="anchor" href="#the-simplest-way-to-run_ml"></a>
 </h2>
-<p>As mentioned above, the minimal input is your dataset
-(<code>dataset</code>) and the machine learning model you want to use
-(<code>method</code>).</p>
+<p>As mentioned above, the minimal input is your dataset (<code>dataset</code>) and the machine learning model you want to use (<code>method</code>).</p>
 <p>You may also want to provide:</p>
 <ul>
-<li>The outcome column name. By default <code><a href="../reference/run_ml.html">run_ml()</a></code> will pick
-the first column, but it’s best practice to specify the column name
-explicitly.</li>
-<li>A seed so that the results will be reproducible, and so that you get
-the same results as those you see here (i.e have the same train/test
-split).</li>
+<li>The outcome column name. By default <code><a href="../reference/run_ml.html">run_ml()</a></code> will pick the first column, but it’s best practice to specify the column name explicitly.</li>
+<li>A seed so that the results will be reproducible, and so that you get the same results as those you see here (i.e have the same train/test split).</li>
 </ul>
-<p>Say we want to use logistic regression, then the method we will use
-is <code>glmnet</code>. To do so, run the ML pipeline with:</p>
+<p>Say we want to use logistic regression, then the method we will use is <code>glmnet</code>. To do so, run the ML pipeline with:</p>
 <div class="sourceCode" id="cb2"><pre class="downlit sourceCode r">
 <code class="sourceCode R"><span class="va">results</span> <span class="op">&lt;-</span> <span class="fu"><a href="../reference/run_ml.html">run_ml</a></span><span class="op">(</span><span class="va">otu_mini_bin</span>,
                   <span class="st">'glmnet'</span>,
@@ -253,25 +200,16 @@ <h2 id="the-simplest-way-to-run_ml">The simplest way to <code>run_ml()</code><a
                   seed <span class="op">=</span> <span class="fl">2019</span><span class="op">)</span></code></pre></div>
 <p>You’ll notice a few things:</p>
 <ol style="list-style-type: decimal">
-<li>It takes a little while to run. This is because of some of the
-parameters we use.</li>
-<li>There is a message stating that ‘dx’ is being used as the outcome
-column. This is what we want, but it’s a nice sanity check!</li>
-<li>There was a warning. Don’t worry about this warning right now - it
-just means that some of the hyperparameters aren’t a good fit - but if
-you’re interested in learning more, see
-<code><a href="../articles/tuning.html">vignette("tuning")</a></code>.</li>
+<li>It takes a little while to run. This is because of some of the parameters we use.</li>
+<li>There is a message stating that ‘dx’ is being used as the outcome column. This is what we want, but it’s a nice sanity check!</li>
+<li>There was a warning. Don’t worry about this warning right now - it just means that some of the hyperparameters aren’t a good fit - but if you’re interested in learning more, see <code><a href="../articles/tuning.html">vignette("tuning")</a></code>.</li>
 </ol>
-<p>Now, let’s dig into the output a bit. The results is a list of 4
-things:</p>
+<p>Now, let’s dig into the output a bit. The results is a list of 4 things:</p>
 <div class="sourceCode" id="cb3"><pre class="downlit sourceCode r">
 <code class="sourceCode R"><span class="fu"><a href="https://rdrr.io/r/base/names.html" class="external-link">names</a></span><span class="op">(</span><span class="va">results</span><span class="op">)</span>
 <span class="co">#&gt; [1] "trained_model"      "test_data"          "performance"       </span>
 <span class="co">#&gt; [4] "feature_importance"</span></code></pre></div>
-<p><code>trained_model</code> is the trained model from
-<code>caret</code>. There is a bunch of info in this that we won’t get
-into, because you can learn more from the <code><a href="https://rdrr.io/pkg/caret/man/train.html" class="external-link">caret::train()</a></code>
-documentation.</p>
+<p><code>trained_model</code> is the trained model from <code>caret</code>. There is a bunch of info in this that we won’t get into, because you can learn more from the <code><a href="https://rdrr.io/pkg/caret/man/train.html" class="external-link">caret::train()</a></code> documentation.</p>
 <div class="sourceCode" id="cb4"><pre class="downlit sourceCode r">
 <code class="sourceCode R"><span class="fu"><a href="https://rdrr.io/r/base/names.html" class="external-link">names</a></span><span class="op">(</span><span class="va">results</span><span class="op">$</span><span class="va">trained_model</span><span class="op">)</span>
 <span class="co">#&gt;  [1] "method"       "modelInfo"    "modelType"    "results"      "pred"        </span>
@@ -279,13 +217,7 @@ <h2 id="the-simplest-way-to-run_ml">The simplest way to <code>run_ml()</code><a
 <span class="co">#&gt; [11] "finalModel"   "preProcess"   "trainingData" "ptype"        "resample"    </span>
 <span class="co">#&gt; [16] "resampledCM"  "perfNames"    "maximize"     "yLimits"      "times"       </span>
 <span class="co">#&gt; [21] "levels"       "terms"        "coefnames"    "xlevels"</span></code></pre></div>
-<p><code>test_data</code> is the partition of the dataset that was used
-for testing. In machine learning, it’s always important to have a
-held-out test dataset that is not used in the training stage. In this
-pipeline we do that using <code><a href="../reference/run_ml.html">run_ml()</a></code> where we split your data
-into training and testing sets. The training data are used to build the
-model (e.g. tune hyperparameters, learn the data) and the test data are
-used to evaluate how well the model performs.</p>
+<p><code>test_data</code> is the partition of the dataset that was used for testing. In machine learning, it’s always important to have a held-out test dataset that is not used in the training stage. In this pipeline we do that using <code><a href="../reference/run_ml.html">run_ml()</a></code> where we split your data into training and testing sets. The training data are used to build the model (e.g. tune hyperparameters, learn the data) and the test data are used to evaluate how well the model performs.</p>
 <div class="sourceCode" id="cb5"><pre class="downlit sourceCode r">
 <code class="sourceCode R"><span class="fu"><a href="https://rdrr.io/r/utils/head.html" class="external-link">head</a></span><span class="op">(</span><span class="va">results</span><span class="op">$</span><span class="va">test_data</span><span class="op">)</span>
 <span class="co">#&gt;        dx Otu00009 Otu00005 Otu00010 Otu00001 Otu00008 Otu00004 Otu00003</span>
@@ -302,10 +234,7 @@ <h2 id="the-simplest-way-to-run_ml">The simplest way to <code>run_ml()</code><a
 <span class="co">#&gt; 17      357      253      341</span>
 <span class="co">#&gt; 27       25      322        5</span>
 <span class="co">#&gt; 30      179        6       30</span></code></pre></div>
-<p><code>performance</code> is a dataframe of (mainly) performance
-metrics (1 column for cross-validation performance metric, several for
-test performance metrics, and 2 columns at the end with ML method and
-seed):</p>
+<p><code>performance</code> is a dataframe of (mainly) performance metrics (1 column for cross-validation performance metric, several for test performance metrics, and 2 columns at the end with ML method and seed):</p>
 <div class="sourceCode" id="cb6"><pre class="downlit sourceCode r">
 <code class="sourceCode R"><span class="va">results</span><span class="op">$</span><span class="va">performance</span>
 <span class="co">#&gt; <span style="color: #949494;"># A tibble: 1 × 17</span></span>
@@ -315,33 +244,11 @@ <h2 id="the-simplest-way-to-run_ml">The simplest way to <code>run_ml()</code><a
 <span class="co">#&gt; <span style="color: #949494;"># … with 8 more variables: Pos_Pred_Value &lt;dbl&gt;, Neg_Pred_Value &lt;dbl&gt;,</span></span>
 <span class="co">#&gt; <span style="color: #949494;">#   Precision &lt;dbl&gt;, Recall &lt;dbl&gt;, Detection_Rate &lt;dbl&gt;,</span></span>
 <span class="co">#&gt; <span style="color: #949494;">#   Balanced_Accuracy &lt;dbl&gt;, method &lt;chr&gt;, seed &lt;dbl&gt;</span></span></code></pre></div>
-<p>When using logistic regression for binary classification, area under
-the receiver-operator characteristic curve (AUC) is a useful metric to
-evaluate model performance. Because of that, it’s the default that we
-use for <code>mikropml</code>. However, it is crucial to evaluate your
-model performance using multiple metrics. Below you can find more
-information about other performance metrics and how to use them in our
-package.</p>
-<p><code>cv_metric_AUC</code> is the AUC for the cross-validation folds
-for the training data. This gives us a sense of how well the model
-performs on the training data.</p>
-<p>Most of the other columns are performance metrics for the test data —
-the data that wasn’t used to build the model. Here, you can see that the
-AUC for the test data is not much above 0.5, suggesting that this model
-does not predict much better than chance, and that the model is overfit
-because the cross-validation AUC (<code>cv_metric_AUC</code>, measured
-during training) is much higher than the testing AUC. This isn’t too
-surprising since we’re using so few features with this example dataset,
-so don’t be discouraged. The default option also provides a number of
-other performance metrics that you might be interested in, including
-area under the precision-recall curve (prAUC).</p>
-<p>The last columns of <code>results$performance</code> are the method
-and seed (if you set one) to help with combining results from multiple
-runs (see <code><a href="../articles/parallel.html">vignette("parallel")</a></code>).</p>
-<p><code>feature_importance</code> has information about feature
-importance values if <code>find_feature_importance = TRUE</code> (the
-default is <code>FALSE</code>). Since we used the defaults, there’s
-nothing here:</p>
+<p>When using logistic regression for binary classification, area under the receiver-operator characteristic curve (AUC) is a useful metric to evaluate model performance. Because of that, it’s the default that we use for <code>mikropml</code>. However, it is crucial to evaluate your model performance using multiple metrics. Below you can find more information about other performance metrics and how to use them in our package.</p>
+<p><code>cv_metric_AUC</code> is the AUC for the cross-validation folds for the training data. This gives us a sense of how well the model performs on the training data.</p>
+<p>Most of the other columns are performance metrics for the test data — the data that wasn’t used to build the model. Here, you can see that the AUC for the test data is not much above 0.5, suggesting that this model does not predict much better than chance, and that the model is overfit because the cross-validation AUC (<code>cv_metric_AUC</code>, measured during training) is much higher than the testing AUC. This isn’t too surprising since we’re using so few features with this example dataset, so don’t be discouraged. The default option also provides a number of other performance metrics that you might be interested in, including area under the precision-recall curve (prAUC).</p>
+<p>The last columns of <code>results$performance</code> are the method and seed (if you set one) to help with combining results from multiple runs (see <code><a href="../articles/parallel.html">vignette("parallel")</a></code>).</p>
+<p><code>feature_importance</code> has information about feature importance values if <code>find_feature_importance = TRUE</code> (the default is <code>FALSE</code>). Since we used the defaults, there’s nothing here:</p>
 <div class="sourceCode" id="cb7"><pre class="downlit sourceCode r">
 <code class="sourceCode R"><span class="va">results</span><span class="op">$</span><span class="va">feature_importance</span>
 <span class="co">#&gt; [1] "Skipped feature importance"</span></code></pre></div>
@@ -349,24 +256,17 @@ <h2 id="the-simplest-way-to-run_ml">The simplest way to <code>run_ml()</code><a
 <div class="section level2">
 <h2 id="customizing-parameters">Customizing parameters<a class="anchor" aria-label="anchor" href="#customizing-parameters"></a>
 </h2>
-<p>There are a few arguments that allow you to change how you execute
-<code><a href="../reference/run_ml.html">run_ml()</a></code>. We’ve chosen reasonable defaults for you, but we
-encourage you to change these if you think something else would be
-better for your data.</p>
+<p>There are a few arguments that allow you to change how you execute <code><a href="../reference/run_ml.html">run_ml()</a></code>. We’ve chosen reasonable defaults for you, but we encourage you to change these if you think something else would be better for your data.</p>
 <div class="section level3">
-<h3 id="changing-kfold-cv_times-and-training_frac">Changing <code>kfold</code>, <code>cv_times</code>, and
-<code>training_frac</code><a class="anchor" aria-label="anchor" href="#changing-kfold-cv_times-and-training_frac"></a>
+<h3 id="changing-kfold-cv_times-and-training_frac">Changing <code>kfold</code>, <code>cv_times</code>, and <code>training_frac</code><a class="anchor" aria-label="anchor" href="#changing-kfold-cv_times-and-training_frac"></a>
 </h3>
 <ul>
 <li>
-<code>kfold</code>: The number of folds to run for cross-validation
-(default: 5).</li>
+<code>kfold</code>: The number of folds to run for cross-validation (default: 5).</li>
 <li>
-<code>cv_times</code>: The number of times to run repeated
-cross-validation (default: 100).</li>
+<code>cv_times</code>: The number of times to run repeated cross-validation (default: 100).</li>
 <li>
-<code>training_frac</code>: The fraction of data for the training
-set (default: 0.8). The rest of the data is used for testing.</li>
+<code>training_frac</code>: The fraction of data for the training set (default: 0.8). The rest of the data is used for testing.</li>
 </ul>
 <p>Here’s an example where we change some of the default parameters:</p>
 <div class="sourceCode" id="cb8"><pre class="downlit sourceCode r">
@@ -391,21 +291,11 @@ <h3 id="changing-kfold-cv_times-and-training_frac">Changing <code>kfold</code>,
 <span class="co">#&gt; </span>
 <span class="co">#&gt; This warning usually means that the model didn't converge in some cross-validation folds because it is predicting something close to a constant. As a result, certain performance metrics can't be calculated. This suggests that some of the hyperparameters chosen are doing very poorly.</span>
 <span class="co">#&gt; Training complete.</span></code></pre></div>
-<p>You might have noticed that this one ran faster — that’s because we
-reduced <code>kfold</code> and <code>cv_times</code>. This is okay for
-testing things out and may even be necessary for smaller datasets. But
-in general it may be better to have larger numbers for these parameters;
-we think the defaults are a good starting point <span class="citation">(Topçuoğlu et al. 2020)</span>.</p>
+<p>You might have noticed that this one ran faster — that’s because we reduced <code>kfold</code> and <code>cv_times</code>. This is okay for testing things out and may even be necessary for smaller datasets. But in general it may be better to have larger numbers for these parameters; we think the defaults are a good starting point <span class="citation">(Topçuoğlu et al. 2020)</span>.</p>
 <div class="section level4">
 <h4 id="custom-training-indices">Custom training indices<a class="anchor" aria-label="anchor" href="#custom-training-indices"></a>
 </h4>
-<p>When <code>training_frac</code> is a fraction between 0 and 1, a
-random sample of observations in the dataset are chosen for the training
-set to satisfy the <code>training_frac</code>. However, in some cases
-you might wish to control exactly which observations are in the training
-set. You can instead assign <code>training_frac</code> a vector of
-indices that correspond to which rows of the dataset should go in the
-training set (all remaining sequences will go in the testing set).</p>
+<p>When <code>training_frac</code> is a fraction between 0 and 1, a random sample of observations in the dataset are chosen for the training set to satisfy the <code>training_frac</code>. However, in some cases you might wish to control exactly which observations are in the training set. You can instead assign <code>training_frac</code> a vector of indices that correspond to which rows of the dataset should go in the training set (all remaining sequences will go in the testing set).</p>
 <div class="sourceCode" id="cb9"><pre class="downlit sourceCode r">
 <code class="sourceCode R"><span class="va">n_obs</span> <span class="op">&lt;-</span> <span class="va">otu_mini_bin</span> <span class="op"><a href="https://magrittr.tidyverse.org/reference/pipe.html" class="external-link">%&gt;%</a></span> <span class="fu"><a href="https://rdrr.io/r/base/nrow.html" class="external-link">nrow</a></span><span class="op">(</span><span class="op">)</span>
 <span class="va">training_size</span> <span class="op">&lt;-</span> <span class="fl">0.8</span> <span class="op">*</span> <span class="va">n_obs</span>
@@ -427,20 +317,10 @@ <h4 id="custom-training-indices">Custom training indices<a class="anchor" aria-l
 <div class="section level3">
 <h3 id="changing-the-performance-metric">Changing the performance metric<a class="anchor" aria-label="anchor" href="#changing-the-performance-metric"></a>
 </h3>
-<p>There are two arguments that allow you to change what performance
-metric to use for model evaluation, and what performance metrics to
-calculate using the test data.</p>
-<p><code>perf_metric_function</code> is the function used to calculate
-the performance metrics.</p>
-<p>The default for classification is
-<code><a href="https://rdrr.io/pkg/caret/man/postResample.html" class="external-link">caret::multiClassSummary()</a></code> and the default for regression
-is <code><a href="https://rdrr.io/pkg/caret/man/postResample.html" class="external-link">caret::defaultSummary()</a></code>. We’d suggest not changing this
-unless you really know what you’re doing.</p>
-<p><code>perf_metric_name</code> is the column name from the output of
-<code>perf_metric_function</code>. We chose reasonable defaults (AUC for
-binary, logLoss for multiclass, and RMSE for continuous), but the
-default functions calculate a bunch of different performance metrics, so
-you can choose a different one if you’d like.</p>
+<p>There are two arguments that allow you to change what performance metric to use for model evaluation, and what performance metrics to calculate using the test data.</p>
+<p><code>perf_metric_function</code> is the function used to calculate the performance metrics.</p>
+<p>The default for classification is <code><a href="https://rdrr.io/pkg/caret/man/postResample.html" class="external-link">caret::multiClassSummary()</a></code> and the default for regression is <code><a href="https://rdrr.io/pkg/caret/man/postResample.html" class="external-link">caret::defaultSummary()</a></code>. We’d suggest not changing this unless you really know what you’re doing.</p>
+<p><code>perf_metric_name</code> is the column name from the output of <code>perf_metric_function</code>. We chose reasonable defaults (AUC for binary, logLoss for multiclass, and RMSE for continuous), but the default functions calculate a bunch of different performance metrics, so you can choose a different one if you’d like.</p>
 <p>The default performance metrics available for classification are:</p>
 <pre><code><span class="co">#&gt;  [1] "logLoss"                "AUC"                    "prAUC"                 </span>
 <span class="co">#&gt;  [4] "Accuracy"               "Kappa"                  "Mean_F1"               </span>
@@ -464,8 +344,7 @@ <h3 id="changing-the-performance-metric">Changing the performance metric<a class
 <span class="co">#&gt; </span>
 <span class="co">#&gt; This warning usually means that the model didn't converge in some cross-validation folds because it is predicting something close to a constant. As a result, certain performance metrics can't be calculated. This suggests that some of the hyperparameters chosen are doing very poorly.</span>
 <span class="co">#&gt; Training complete.</span></code></pre></div>
-<p>You’ll see that the cross-validation metric is prAUC, instead of the
-default AUC:</p>
+<p>You’ll see that the cross-validation metric is prAUC, instead of the default AUC:</p>
 <div class="sourceCode" id="cb13"><pre class="downlit sourceCode r">
 <code class="sourceCode R"><span class="va">results_pr</span><span class="op">$</span><span class="va">performance</span>
 <span class="co">#&gt; <span style="color: #949494;"># A tibble: 1 × 17</span></span>
@@ -479,16 +358,8 @@ <h3 id="changing-the-performance-metric">Changing the performance metric<a class
 <div class="section level3">
 <h3 id="using-groups">Using groups<a class="anchor" aria-label="anchor" href="#using-groups"></a>
 </h3>
-<p>The optional <code>groups</code> is a vector of groups to keep
-together when splitting the data into train and test sets and for
-cross-validation. Sometimes it’s important to split up the data based on
-a grouping instead of just randomly. This allows you to control for
-similarities within groups that you don’t want to skew your predictions
-(i.e. batch effects). For example, with biological data you may have
-samples collected from multiple hospitals, and you might like to keep
-observations from the same hospital in the same partition.</p>
-<p>Here’s an example where we split the data into train/test sets based
-on groups:</p>
+<p>The optional <code>groups</code> is a vector of groups to keep together when splitting the data into train and test sets and for cross-validation. Sometimes it’s important to split up the data based on a grouping instead of just randomly. This allows you to control for similarities within groups that you don’t want to skew your predictions (i.e. batch effects). For example, with biological data you may have samples collected from multiple hospitals, and you might like to keep observations from the same hospital in the same partition.</p>
+<p>Here’s an example where we split the data into train/test sets based on groups:</p>
 <div class="sourceCode" id="cb14"><pre class="downlit sourceCode r">
 <code class="sourceCode R"><span class="co"># make random groups</span>
 <span class="fu"><a href="https://rdrr.io/r/base/Random.html" class="external-link">set.seed</a></span><span class="op">(</span><span class="fl">2019</span><span class="op">)</span>
@@ -506,28 +377,12 @@ <h3 id="using-groups">Using groups<a class="anchor" aria-label="anchor" href="#u
 <span class="co">#&gt; Groups will be kept together in CV partitions</span>
 <span class="co">#&gt; Training the model...</span>
 <span class="co">#&gt; Training complete.</span></code></pre></div>
-<p>The one difference here is <code><a href="../reference/run_ml.html">run_ml()</a></code> will report how much
-of the data is in the training set if you run the above code chunk. This
-can be a little finicky depending on how many samples and groups you
-have. This is because it won’t be exactly what you specify with
-<code>training_frac</code>, since you have to include all of one group
-in either the training set <em>or</em> the test set.</p>
+<p>The one difference here is <code><a href="../reference/run_ml.html">run_ml()</a></code> will report how much of the data is in the training set if you run the above code chunk. This can be a little finicky depending on how many samples and groups you have. This is because it won’t be exactly what you specify with <code>training_frac</code>, since you have to include all of one group in either the training set <em>or</em> the test set.</p>
 <div class="section level4">
-<h4 id="controling-how-groups-are-assigned-to-partitions">Controling how groups are assigned to partitions<a class="anchor" aria-label="anchor" href="#controling-how-groups-are-assigned-to-partitions"></a>
+<h4 id="controlling-how-groups-are-assigned-to-partitions">Controlling how groups are assigned to partitions<a class="anchor" aria-label="anchor" href="#controlling-how-groups-are-assigned-to-partitions"></a>
 </h4>
-<p>When you use the <code>groups</code> parameter as above, by default
-<code><a href="../reference/run_ml.html">run_ml()</a></code> will assume that you want all of the observations
-from each group to be placed in the same partition of the train/test
-split. This makes sense when you want to use groups to control for batch
-effects. However, in some cases you might prefer to control exactly
-which groups end up in which partition, and you might even be okay with
-some observations from the same group being assigned to different
-partitions.</p>
-<p>For example, say you want groups A and B to be used for training, C
-and D for testing, and you don’t have a preference for what happens to
-the other groups. You can give the <code>group_partitions</code>
-parameter a named list to specify which groups should go in the training
-set and which should go in the testing set.</p>
+<p>When you use the <code>groups</code> parameter as above, by default <code><a href="../reference/run_ml.html">run_ml()</a></code> will assume that you want all of the observations from each group to be placed in the same partition of the train/test split. This makes sense when you want to use groups to control for batch effects. However, in some cases you might prefer to control exactly which groups end up in which partition, and you might even be okay with some observations from the same group being assigned to different partitions.</p>
+<p>For example, say you want groups A and B to be used for training, C and D for testing, and you don’t have a preference for what happens to the other groups. You can give the <code>group_partitions</code> parameter a named list to specify which groups should go in the training set and which should go in the testing set.</p>
 <div class="sourceCode" id="cb15"><pre class="downlit sourceCode r">
 <code class="sourceCode R"><span class="va">results_grp_part</span> <span class="op">&lt;-</span> <span class="fu"><a href="../reference/run_ml.html">run_ml</a></span><span class="op">(</span><span class="va">otu_mini_bin</span>, 
                       <span class="st">'glmnet'</span>, 
@@ -545,13 +400,8 @@ <h4 id="controling-how-groups-are-assigned-to-partitions">Controling how groups
 <span class="co">#&gt; Groups will not be kept together in CV partitions because the number of groups in the training set is not larger than `kfold`</span>
 <span class="co">#&gt; Training the model...</span>
 <span class="co">#&gt; Training complete.</span></code></pre></div>
-<p>In the above case, all observations from A &amp; B will be used for
-training, all from C &amp; D will be used for testing, and the remaining
-groups will be randomly assigned to one or the other to satisfy the
-<code>training_frac</code> as closely as possible.</p>
-<p>In another scenario, maybe you want only groups A through F to be
-used for training, but you also want to allow other observations not
-selected for training from A through F to be used for testing:</p>
+<p>In the above case, all observations from A &amp; B will be used for training, all from C &amp; D will be used for testing, and the remaining groups will be randomly assigned to one or the other to satisfy the <code>training_frac</code> as closely as possible.</p>
+<p>In another scenario, maybe you want only groups A through F to be used for training, but you also want to allow other observations not selected for training from A through F to be used for testing:</p>
 <div class="sourceCode" id="cb16"><pre class="downlit sourceCode r">
 <code class="sourceCode R"><span class="va">results_grp_trainA</span> <span class="op">&lt;-</span> <span class="fu"><a href="../reference/run_ml.html">run_ml</a></span><span class="op">(</span><span class="va">otu_mini_bin</span>, 
                       <span class="st">'glmnet'</span>, 
@@ -570,24 +420,14 @@ <h4 id="controling-how-groups-are-assigned-to-partitions">Controling how groups
 <span class="co">#&gt; Groups will be kept together in CV partitions</span>
 <span class="co">#&gt; Training the model...</span>
 <span class="co">#&gt; Training complete.</span></code></pre></div>
-<p>If you need even more control than this, take a look at <a href="#custom-training-indices">setting custom training indices</a>. You
-might also prefer to provide your own train control scheme with the
-<code>cross_val</code> parameter in <code><a href="../reference/run_ml.html">run_ml()</a></code>.</p>
+<p>If you need even more control than this, take a look at <a href="#custom-training-indices">setting custom training indices</a>. You might also prefer to provide your own train control scheme with the <code>cross_val</code> parameter in <code><a href="../reference/run_ml.html">run_ml()</a></code>.</p>
 </div>
 </div>
 </div>
 <div class="section level2">
 <h2 id="finding-feature-importance">Finding feature importance<a class="anchor" aria-label="anchor" href="#finding-feature-importance"></a>
 </h2>
-<p>To find which features are contributing to predictive power, you can
-use <code>find_feature_importance = TRUE</code>. How we use permutation
-importance to determine feature importance is described in <span class="citation">(Topçuoğlu et al. 2020)</span>. Briefly, it permutes
-each of the features individually (or correlated ones together) and
-evaluates how much the performance metric decreases. The more
-performance decreases when the feature is randomly shuffled, the more
-important that feature is. The default is <code>FALSE</code> because it
-takes a while to run and is only useful if you want to know what
-features are important in predicting your outcome.</p>
+<p>To find which features are contributing to predictive power, you can use <code>find_feature_importance = TRUE</code>. How we use permutation importance to determine feature importance is described in <span class="citation">(Topçuoğlu et al. 2020)</span>. Briefly, it permutes each of the features individually (or correlated ones together) and evaluates how much the performance metric decreases. The more performance decreases when the feature is randomly shuffled, the more important that feature is. The default is <code>FALSE</code> because it takes a while to run and is only useful if you want to know what features are important in predicting your outcome.</p>
 <p>Let’s look at some feature importance results:</p>
 <div class="sourceCode" id="cb17"><pre class="downlit sourceCode r">
 <code class="sourceCode R"><span class="va">results_imp</span> <span class="op">&lt;-</span> <span class="fu"><a href="../reference/run_ml.html">run_ml</a></span><span class="op">(</span><span class="va">otu_mini_bin</span>,
@@ -624,34 +464,22 @@ <h2 id="finding-feature-importance">Finding feature importance<a class="anchor"
 <p>There are several columns:</p>
 <ol style="list-style-type: decimal">
 <li>
-<code>perf_metric</code>: The performance value of the permuted
-feature.</li>
+<code>perf_metric</code>: The performance value of the permuted feature.</li>
 <li>
-<code>perf_metric_diff</code>: The difference between the
-performance for the actual and permuted data (i.e. test performance
-minus permuted performance). Features with a larger
-<code>perf_metric_diff</code> are more important.</li>
+<code>perf_metric_diff</code>: The difference between the performance for the actual and permuted data (i.e. test performance minus permuted performance). Features with a larger <code>perf_metric_diff</code> are more important.</li>
 <li>
-<code>pvalue</code>: the probability of obtaining the actual
-performance value under the null hypothesis.</li>
+<code>pvalue</code>: the probability of obtaining the actual performance value under the null hypothesis.</li>
 <li>
 <code>names</code>: The feature that was permuted.</li>
 <li>
 <code>method</code>: The ML method used.</li>
 <li>
-<code>perf_metric_name</code>: The peformance metric used.</li>
+<code>perf_metric_name</code>: The performance metric used.</li>
 <li>
 <code>seed</code>: The seed (if set).</li>
 </ol>
-<p>As you can see here, the differences are negligible (close to zero),
-which makes sense since our model isn’t great. If you’re interested in
-feature importance, it’s especially useful to run multiple different
-train/test splits, as shown in our <a href="https://github.com/SchlossLab/mikropml-snakemake-workflow/" class="external-link">example
-snakemake workflow</a>.</p>
-<p>You can also choose to permute correlated features together using
-<code>corr_thresh</code> (default: 1). Any features that are above the
-correlation threshold are permuted together; i.e. perfectly correlated
-features are permuted together when using the default value.</p>
+<p>As you can see here, the differences are negligible (close to zero), which makes sense since our model isn’t great. If you’re interested in feature importance, it’s especially useful to run multiple different train/test splits, as shown in our <a href="https://github.com/SchlossLab/mikropml-snakemake-workflow/" class="external-link">example snakemake workflow</a>.</p>
+<p>You can also choose to permute correlated features together using <code>corr_thresh</code> (default: 1). Any features that are above the correlation threshold are permuted together; i.e. perfectly correlated features are permuted together when using the default value.</p>
 <div class="sourceCode" id="cb19"><pre class="downlit sourceCode r">
 <code class="sourceCode R"><span class="va">results_imp_corr</span> <span class="op">&lt;-</span> <span class="fu"><a href="../reference/run_ml.html">run_ml</a></span><span class="op">(</span><span class="va">otu_mini_bin</span>,
                            <span class="st">'glmnet'</span>,
@@ -682,32 +510,19 @@ <h2 id="finding-feature-importance">Finding feature importance<a class="anchor"
 <span class="co">#&gt; 1 glmnet              AUC 2019</span>
 <span class="co">#&gt; 2 glmnet              AUC 2019</span>
 <span class="co">#&gt; 3 glmnet              AUC 2019</span></code></pre></div>
-<p>You can see which features were permuted together in the
-<code>names</code> column. Here all 3 features were permuted together
-(which doesn’t really make sense, but it’s just an example).</p>
-<p>If you previously executed <code><a href="../reference/run_ml.html">run_ml()</a></code> without feature
-importance but now wish to find feature importance after the fact, see
-the example code in the <code><a href="../reference/get_feature_importance.html">get_feature_importance()</a></code>
-documentation.</p>
-<p><code><a href="../reference/get_feature_importance.html">get_feature_importance()</a></code> can show a live progress bar,
-see <code><a href="../articles/parallel.html">vignette("parallel")</a></code> for examples.</p>
+<p>You can see which features were permuted together in the <code>names</code> column. Here all 3 features were permuted together (which doesn’t really make sense, but it’s just an example).</p>
+<p>If you previously executed <code><a href="../reference/run_ml.html">run_ml()</a></code> without feature importance but now wish to find feature importance after the fact, see the example code in the <code><a href="../reference/get_feature_importance.html">get_feature_importance()</a></code> documentation.</p>
+<p><code><a href="../reference/get_feature_importance.html">get_feature_importance()</a></code> can show a live progress bar, see <code><a href="../articles/parallel.html">vignette("parallel")</a></code> for examples.</p>
 </div>
 <div class="section level2">
-<h2 id="tuning-hyperparameters-using-the-hyperparameter-argument">Tuning hyperparameters (using the <code>hyperparameter</code>
-argument)<a class="anchor" aria-label="anchor" href="#tuning-hyperparameters-using-the-hyperparameter-argument"></a>
+<h2 id="tuning-hyperparameters-using-the-hyperparameter-argument">Tuning hyperparameters (using the <code>hyperparameter</code> argument)<a class="anchor" aria-label="anchor" href="#tuning-hyperparameters-using-the-hyperparameter-argument"></a>
 </h2>
-<p>This is important, so we have a whole vignette about them. The bottom
-line is we provide default hyperparameters that you can start with, but
-it’s important to tune your hyperparameters. For more information about
-what the default hyperparameters are, and how to tune hyperparameters,
-see <code><a href="../articles/tuning.html">vignette("tuning")</a></code>.</p>
+<p>This is important, so we have a whole vignette about them. The bottom line is we provide default hyperparameters that you can start with, but it’s important to tune your hyperparameters. For more information about what the default hyperparameters are, and how to tune hyperparameters, see <code><a href="../articles/tuning.html">vignette("tuning")</a></code>.</p>
 </div>
 <div class="section level2">
 <h2 id="other-models">Other models<a class="anchor" aria-label="anchor" href="#other-models"></a>
 </h2>
-<p>Here are examples of how to train and evaluate other models. The
-output for all of them is very similar, so we won’t go into those
-details.</p>
+<p>Here are examples of how to train and evaluate other models. The output for all of them is very similar, so we won’t go into those details.</p>
 <div class="section level3">
 <h3 id="random-forest">Random forest<a class="anchor" aria-label="anchor" href="#random-forest"></a>
 </h3>
@@ -716,11 +531,7 @@ <h3 id="random-forest">Random forest<a class="anchor" aria-label="anchor" href="
                      <span class="st">'rf'</span>,
                      cv_times <span class="op">=</span> <span class="fl">5</span>,
                      seed <span class="op">=</span> <span class="fl">2019</span><span class="op">)</span></code></pre></div>
-<p>You can also change the number of trees to use for random forest
-(<code>ntree</code>; default: 1000). This can’t be tuned using
-<code>rf</code> package implementation of random forest. Please refer to
-<code>caret</code> documentation if you are interested in other packages
-with random forest implementations.</p>
+<p>You can also change the number of trees to use for random forest (<code>ntree</code>; default: 1000). This can’t be tuned using <code>rf</code> package implementation of random forest. Please refer to <code>caret</code> documentation if you are interested in other packages with random forest implementations.</p>
 <div class="sourceCode" id="cb21"><pre class="downlit sourceCode r">
 <code class="sourceCode R"><span class="va">results_rf_nt</span> <span class="op">&lt;-</span> <span class="fu"><a href="../reference/run_ml.html">run_ml</a></span><span class="op">(</span><span class="va">otu_mini_bin</span>,
                         <span class="st">'rf'</span>,
@@ -745,8 +556,7 @@ <h3 id="svm">SVM<a class="anchor" aria-label="anchor" href="#svm"></a>
                       <span class="st">'svmRadial'</span>,
                       cv_times <span class="op">=</span> <span class="fl">5</span>,
                       seed <span class="op">=</span> <span class="fl">2019</span><span class="op">)</span></code></pre></div>
-<p>If you get a message “maximum number of iterations reached”, see <a href="https://github.com/topepo/caret/issues/425" class="external-link">this issue</a> in
-caret.</p>
+<p>If you get a message “maximum number of iterations reached”, see <a href="https://github.com/topepo/caret/issues/425" class="external-link">this issue</a> in caret.</p>
 </div>
 </div>
 <div class="section level2">
@@ -755,8 +565,7 @@ <h2 id="other-data">Other data<a class="anchor" aria-label="anchor" href="#other
 <div class="section level3">
 <h3 id="multiclass-data">Multiclass data<a class="anchor" aria-label="anchor" href="#multiclass-data"></a>
 </h3>
-<p>We provide <code>otu_mini_multi</code> with a multiclass outcome
-(three or more outcomes):</p>
+<p>We provide <code>otu_mini_multi</code> with a multiclass outcome (three or more outcomes):</p>
 <div class="sourceCode" id="cb24"><pre class="downlit sourceCode r">
 <code class="sourceCode R"><span class="va">otu_mini_multi</span> <span class="op"><a href="https://magrittr.tidyverse.org/reference/pipe.html" class="external-link">%&gt;%</a></span> <span class="fu">dplyr</span><span class="fu">::</span><span class="fu"><a href="https://dplyr.tidyverse.org/reference/pull.html" class="external-link">pull</a></span><span class="op">(</span><span class="st">'dx'</span><span class="op">)</span> <span class="op"><a href="https://magrittr.tidyverse.org/reference/pipe.html" class="external-link">%&gt;%</a></span> <span class="fu"><a href="https://rdrr.io/r/base/unique.html" class="external-link">unique</a></span><span class="op">(</span><span class="op">)</span>
 <span class="co">#&gt; [1] "adenoma"   "carcinoma" "normal"</span></code></pre></div>
@@ -766,8 +575,7 @@ <h3 id="multiclass-data">Multiclass data<a class="anchor" aria-label="anchor" hr
                         outcome_colname <span class="op">=</span> <span class="st">"dx"</span>,
                         seed <span class="op">=</span> <span class="fl">2019</span>
 <span class="op">)</span></code></pre></div>
-<p>The performance metrics are slightly different, but the format of
-everything else is the same:</p>
+<p>The performance metrics are slightly different, but the format of everything else is the same:</p>
 <div class="sourceCode" id="cb26"><pre class="downlit sourceCode r">
 <code class="sourceCode R"><span class="va">results_multi</span><span class="op">$</span><span class="va">performance</span>
 <span class="co">#&gt; <span style="color: #949494;"># A tibble: 1 × 17</span></span>
@@ -782,15 +590,13 @@ <h3 id="multiclass-data">Multiclass data<a class="anchor" aria-label="anchor" hr
 <div class="section level3">
 <h3 id="continuous-data">Continuous data<a class="anchor" aria-label="anchor" href="#continuous-data"></a>
 </h3>
-<p>And here’s an example for running continuous data, where the outcome
-column is numerical:</p>
+<p>And here’s an example for running continuous data, where the outcome column is numerical:</p>
 <div class="sourceCode" id="cb27"><pre class="downlit sourceCode r">
 <code class="sourceCode R"><span class="va">results_cont</span> <span class="op">&lt;-</span> <span class="fu"><a href="../reference/run_ml.html">run_ml</a></span><span class="op">(</span><span class="va">otu_mini_bin</span><span class="op">[</span>, <span class="fl">2</span><span class="op">:</span><span class="fl">11</span><span class="op">]</span>,
                        <span class="st">'glmnet'</span>,
                        outcome_colname <span class="op">=</span> <span class="st">'Otu00001'</span>,
                        seed <span class="op">=</span> <span class="fl">2019</span><span class="op">)</span></code></pre></div>
-<p>Again, the performance metrics are slightly different, but the format
-of the rest is the same:</p>
+<p>Again, the performance metrics are slightly different, but the format of the rest is the same:</p>
 <div class="sourceCode" id="cb28"><pre class="downlit sourceCode r">
 <code class="sourceCode R"><span class="va">results_cont</span><span class="op">$</span><span class="va">performance</span>
 <span class="co">#&gt; <span style="color: #949494;"># A tibble: 1 × 6</span></span>
@@ -800,22 +606,14 @@ <h3 id="continuous-data">Continuous data<a class="anchor" aria-label="anchor" hr
 </div>
 </div>
 <div class="section level2">
-<h2 class="unnumbered" id="references">References<a class="anchor" aria-label="anchor" href="#references"></a>
+<h2 id="references">References<a class="anchor" aria-label="anchor" href="#references"></a>
 </h2>
-<div id="refs" class="references csl-bib-body hanging-indent">
-<div id="ref-tang_democratizing_2020" class="csl-entry">
-Tang, Shengpu, Parmida Davarmanesh, Yanmeng Song, Danai Koutra, Michael
-W. Sjoding, and Jenna Wiens. 2020. <span>“Democratizing <span>EHR</span>
-Analyses with <span>FIDDLE</span>: A Flexible Data-Driven Preprocessing
-Pipeline for Structured Clinical Data.”</span> <em>J Am Med Inform
-Assoc</em>, October. <a href="https://doi.org/10.1093/jamia/ocaa139" class="external-link">https://doi.org/10.1093/jamia/ocaa139</a>.
-</div>
-<div id="ref-topcuoglu_framework_2020" class="csl-entry">
-Topçuoğlu, Begüm D., Nicholas A. Lesniak, Mack T. Ruffin, Jenna Wiens,
-and Patrick D. Schloss. 2020. <span>“A <span>Framework</span> for
-<span>Effective Application</span> of <span>Machine Learning</span> to
-<span>Microbiome</span>-<span>Based Classification
-Problems</span>.”</span> <em>mBio</em> 11 (3). <a href="https://doi.org/10.1128/mBio.00434-20" class="external-link">https://doi.org/10.1128/mBio.00434-20</a>.
+<div id="refs" class="references">
+<div id="ref-tang_democratizing_2020">
+<p>Tang, Shengpu, Parmida Davarmanesh, Yanmeng Song, Danai Koutra, Michael W. Sjoding, and Jenna Wiens. 2020. “Democratizing EHR Analyses with FIDDLE: A Flexible Data-Driven Preprocessing Pipeline for Structured Clinical Data.” <em>J Am Med Inform Assoc</em>, October. <a href="https://doi.org/10.1093/jamia/ocaa139" class="external-link">https://doi.org/10.1093/jamia/ocaa139</a>.</p>
+</div>
+<div id="ref-topcuoglu_framework_2020">
+<p>Topçuoğlu, Begüm D., Nicholas A. Lesniak, Mack T. Ruffin, Jenna Wiens, and Patrick D. Schloss. 2020. “A Framework for Effective Application of Machine Learning to Microbiome-Based Classification Problems.” <em>mBio</em> 11 (3). <a href="https://doi.org/10.1128/mBio.00434-20" class="external-link">https://doi.org/10.1128/mBio.00434-20</a>.</p>
 </div>
 </div>
 </div>
diff --git a/docs/articles/paper.html b/docs/articles/paper.html
index 22fa9401..3dccf33d 100644
--- a/docs/articles/paper.html
+++ b/docs/articles/paper.html
@@ -100,14 +100,11 @@
 
       
 
-      </header><div class="row">
+      </header><script src="paper_files/accessible-code-block-0.0.1/empty-anchor.js"></script><div class="row">
   <div class="col-md-9 contents">
     <div class="page-header toc-ignore">
-      <h1 data-toc-skip>mikropml: User-Friendly R Package for Supervised
-Machine Learning Pipelines</h1>
-                        <h4 data-toc-skip class="author">Begüm D.
-Topçuoğlu, Zena Lapp, Kelly L. Sovacool, Evan Snitkin, Jenna Wiens,
-Patrick D. Schloss</h4>
+      <h1 data-toc-skip>mikropml: User-Friendly R Package for Supervised Machine Learning Pipelines</h1>
+                        <h4 data-toc-skip class="author">Begüm D. Topçuoğlu, Zena Lapp, Kelly L. Sovacool, Evan Snitkin, Jenna Wiens, Patrick D. Schloss</h4>
             
             <h4 data-toc-skip class="date">2020</h4>
       
@@ -122,204 +119,66 @@ <h4 data-toc-skip class="date">2020</h4>
 <h2 id="summary">Summary<a class="anchor" aria-label="anchor" href="#summary"></a>
 </h2>
 <p><img src="mikropml-logo.png" width="120"></p>
-<p>Machine learning (ML) for classification and prediction based on a
-set of features is used to make decisions in healthcare, economics,
-criminal justice and more. However, implementing an ML pipeline
-including preprocessing, model selection, and evaluation can be
-time-consuming, confusing, and difficult. Here, we present <a href="http://www.schlosslab.org/mikropml/"><code>mikropml</code></a>
-(prononced “meek-ROPE em el”), an easy-to-use R package that implements
-ML pipelines using regression, support vector machines, decision trees,
-random forest, or gradient-boosted trees. The package is available on <a href="https://github.com/SchlossLab/mikropml/" class="external-link">GitHub</a>, <a href="https://cran.r-project.org/package=mikropml" class="external-link">CRAN</a>, and <a href="https://anaconda.org/conda-forge/r-mikropml" class="external-link">conda</a>.</p>
+<p>Machine learning (ML) for classification and prediction based on a set of features is used to make decisions in healthcare, economics, criminal justice and more. However, implementing an ML pipeline including preprocessing, model selection, and evaluation can be time-consuming, confusing, and difficult. Here, we present <a href="http://www.schlosslab.org/mikropml/"><code>mikropml</code></a> (pronounced “meek-ROPE em el”), an easy-to-use R package that implements ML pipelines using regression, support vector machines, decision trees, random forest, or gradient-boosted trees. The package is available on <a href="https://github.com/SchlossLab/mikropml/" class="external-link">GitHub</a>, <a href="https://cran.r-project.org/package=mikropml" class="external-link">CRAN</a>, and <a href="https://anaconda.org/conda-forge/r-mikropml" class="external-link">conda</a>.</p>
 </div>
 <div class="section level2">
 <h2 id="statement-of-need">Statement of need<a class="anchor" aria-label="anchor" href="#statement-of-need"></a>
 </h2>
-<p>Most applications of machine learning (ML) require reproducible steps
-for data pre-processing, cross-validation, testing, model evaluation,
-and often interpretation of why the model makes particular predictions.
-Performing these steps is important, as failure to implement them can
-result in incorrect and misleading results <span class="citation">(Teschendorff 2019; Wiens et al. 2019)</span>.</p>
-<p>Supervised ML is widely used to recognize patterns in large datasets
-and to make predictions about outcomes of interest. Several packages
-including <code>caret</code> <span class="citation">(Kuhn 2008)</span>
-and <code>tidymodels</code> <span class="citation">(Kuhn, Wickham, and
-RStudio 2020)</span> in R, <code>scikitlearn</code> <span class="citation">(Pedregosa et al. 2011)</span> in Python, and the H2O
-<code>autoML</code> platform <span class="citation">(H2O.ai 2020)</span>
-allow scientists to train ML models with a variety of algorithms. While
-these packages provide the tools necessary for each ML step, they do not
-implement a complete ML pipeline according to good practices in the
-literature. This makes it difficult for practitioners new to ML to
-easily begin to perform ML analyses.</p>
-<p>To enable a broader range of researchers to apply ML to their problem
-domains, we created <a href="https://github.com/SchlossLab/mikropml/" class="external-link"><code>mikropml</code></a>,
-an easy-to-use R package <span class="citation">(R Core Team
-2020)</span> that implements the ML pipeline created by Topçuoğlu <em>et
-al.</em> <span class="citation">(Topçuoğlu et al. 2020)</span> in a
-single function that returns a trained model, model performance metrics
-and feature importance. <code>mikropml</code> leverages the
-<code>caret</code> package to support several ML algorithms: linear
-regression, logistic regression, support vector machines with a radial
-basis kernel, decision trees, random forest, and gradient boosted trees.
-It incorporates good practices in ML training, testing, and model
-evaluation <span class="citation">(Topçuoğlu et al. 2020; Teschendorff
-2019)</span>. Furthermore, it provides data preprocessing steps based on
-the FIDDLE (FlexIble Data-Driven pipeLinE) framework outlined in Tang
-<em>et al.</em> <span class="citation">(Tang et al. 2020)</span> and
-post-training permutation importance steps to estimate the importance of
-each feature in the models trained <span class="citation">(Breiman 2001;
-Fisher, Rudin, and Dominici 2018)</span>.</p>
-<p><code>mikropml</code> can be used as a starting point in the
-application of ML to datasets from many different fields. It has already
-been applied to microbiome data to categorize patients with colorectal
-cancer <span class="citation">(Topçuoğlu et al. 2020)</span>, to
-identify differences in genomic and clinical features associated with
-bacterial infections <span class="citation">(Lapp et al. 2020)</span>,
-and to predict gender-based biases in academic publishing <span class="citation">(Hagan et al. 2020)</span>.</p>
+<p>Most applications of machine learning (ML) require reproducible steps for data pre-processing, cross-validation, testing, model evaluation, and often interpretation of why the model makes particular predictions. Performing these steps is important, as failure to implement them can result in incorrect and misleading results <span class="citation">(Teschendorff 2019; Wiens et al. 2019)</span>.</p>
+<p>Supervised ML is widely used to recognize patterns in large datasets and to make predictions about outcomes of interest. Several packages including <code>caret</code> <span class="citation">(Kuhn 2008)</span> and <code>tidymodels</code> <span class="citation">(Kuhn, Wickham, and RStudio 2020)</span> in R, <code>scikitlearn</code> <span class="citation">(Pedregosa et al. 2011)</span> in Python, and the H2O <code>autoML</code> platform <span class="citation">(H2O.ai 2020)</span> allow scientists to train ML models with a variety of algorithms. While these packages provide the tools necessary for each ML step, they do not implement a complete ML pipeline according to good practices in the literature. This makes it difficult for practitioners new to ML to easily begin to perform ML analyses.</p>
+<p>To enable a broader range of researchers to apply ML to their problem domains, we created <a href="https://github.com/SchlossLab/mikropml/" class="external-link"><code>mikropml</code></a>, an easy-to-use R package <span class="citation">(R Core Team 2020)</span> that implements the ML pipeline created by Topçuoğlu <em>et al.</em> <span class="citation">(Topçuoğlu et al. 2020)</span> in a single function that returns a trained model, model performance metrics and feature importance. <code>mikropml</code> leverages the <code>caret</code> package to support several ML algorithms: linear regression, logistic regression, support vector machines with a radial basis kernel, decision trees, random forest, and gradient boosted trees. It incorporates good practices in ML training, testing, and model evaluation <span class="citation">(Topçuoğlu et al. 2020; Teschendorff 2019)</span>. Furthermore, it provides data preprocessing steps based on the FIDDLE (FlexIble Data-Driven pipeLinE) framework outlined in Tang <em>et al.</em> <span class="citation">(Tang et al. 2020)</span> and post-training permutation importance steps to estimate the importance of each feature in the models trained <span class="citation">(Breiman 2001; Fisher, Rudin, and Dominici 2018)</span>.</p>
+<p><code>mikropml</code> can be used as a starting point in the application of ML to datasets from many different fields. It has already been applied to microbiome data to categorize patients with colorectal cancer <span class="citation">(Topçuoğlu et al. 2020)</span>, to identify differences in genomic and clinical features associated with bacterial infections <span class="citation">(Lapp et al. 2020)</span>, and to predict gender-based biases in academic publishing <span class="citation">(Hagan et al. 2020)</span>.</p>
 </div>
 <div class="section level2">
 <h2 id="mikropml-package">mikropml package<a class="anchor" aria-label="anchor" href="#mikropml-package"></a>
 </h2>
-<p>The <code>mikropml</code> package includes functionality to
-preprocess the data, train ML models, evaluate model performance, and
-quantify feature importance (Figure 1). We also provide <a href="http://www.schlosslab.org/mikropml/articles/index.html">vignettes</a>
-and an <a href="https://github.com/SchlossLab/mikropml-snakemake-workflow" class="external-link">example
-Snakemake workflow</a> <span class="citation">(Köster and Rahmann
-2012)</span> to showcase how to run an ideal ML pipeline with multiple
-different train/test data splits. The results can be visualized using
-helper functions that use <code>ggplot2</code> <span class="citation">(Wickham 2016)</span>.</p>
-<p>While mikropml allows users to get started quickly and facilitates
-reproducibility, it is not a replacement for understanding the ML
-workflow which is still necessary when interpreting results <span class="citation">(Pollard et al. 2019)</span>. To facilitate
-understanding and enable one to tailor the code to their application, we
-have heavily commented the code and have provided supporting
-documentation which can be read <a href="http://www.schlosslab.org/mikropml/">online</a>.</p>
+<p>The <code>mikropml</code> package includes functionality to preprocess the data, train ML models, evaluate model performance, and quantify feature importance (Figure 1). We also provide <a href="http://www.schlosslab.org/mikropml/articles/index.html">vignettes</a> and an <a href="https://github.com/SchlossLab/mikropml-snakemake-workflow" class="external-link">example Snakemake workflow</a> <span class="citation">(Köster and Rahmann 2012)</span> to showcase how to run an ideal ML pipeline with multiple different train/test data splits. The results can be visualized using helper functions that use <code>ggplot2</code> <span class="citation">(Wickham 2016)</span>.</p>
+<p>While mikropml allows users to get started quickly and facilitates reproducibility, it is not a replacement for understanding the ML workflow which is still necessary when interpreting results <span class="citation">(Pollard et al. 2019)</span>. To facilitate understanding and enable one to tailor the code to their application, we have heavily commented the code and have provided supporting documentation which can be read <a href="http://www.schlosslab.org/mikropml/">online</a>.</p>
 <div class="section level3">
 <h3 id="preprocessing-data">Preprocessing data<a class="anchor" aria-label="anchor" href="#preprocessing-data"></a>
 </h3>
-<p>We provide the function <code><a href="../reference/preprocess_data.html">preprocess_data()</a></code> to preprocess
-features using several different functions from the <code>caret</code>
-package. <code><a href="../reference/preprocess_data.html">preprocess_data()</a></code> takes continuous and categorical
-data, re-factors categorical data into binary features, and provides
-options to normalize continuous data, remove features with near-zero
-variance, and keep only one instance of perfectly correlated features.
-We set the default options based on those implemented in FIDDLE <span class="citation">(Tang et al. 2020)</span>. More details on how to use
-<code><a href="../reference/preprocess_data.html">preprocess_data()</a></code> can be found in the accompanying <a href="http://www.schlosslab.org/mikropml/articles/preprocess.html">vignette</a>.</p>
+<p>We provide the function <code><a href="../reference/preprocess_data.html">preprocess_data()</a></code> to preprocess features using several different functions from the <code>caret</code> package. <code><a href="../reference/preprocess_data.html">preprocess_data()</a></code> takes continuous and categorical data, re-factors categorical data into binary features, and provides options to normalize continuous data, remove features with near-zero variance, and keep only one instance of perfectly correlated features. We set the default options based on those implemented in FIDDLE <span class="citation">(Tang et al. 2020)</span>. More details on how to use <code><a href="../reference/preprocess_data.html">preprocess_data()</a></code> can be found in the accompanying <a href="http://www.schlosslab.org/mikropml/articles/preprocess.html">vignette</a>.</p>
 </div>
 <div class="section level3">
 <h3 id="running-ml">Running ML<a class="anchor" aria-label="anchor" href="#running-ml"></a>
 </h3>
-<p>The main function in mikropml, <code><a href="../reference/run_ml.html">run_ml()</a></code>, minimally takes
-in the model choice and a data frame with an outcome column and feature
-columns. For model choice, <code>mikropml</code> currently supports
-logistic and linear regression <span class="citation">(<code>glmnet</code>: Friedman, Hastie, and Tibshirani
-2010)</span>, support vector machines with a radial basis kernel <span class="citation">(<code>kernlab</code>: Karatzoglou et al. 2004)</span>,
-decision trees <span class="citation">(<code>rpart</code>: Therneau et
-al. 2019)</span>, random forest <span class="citation">(<code>randomForest</code>: Liaw and Wiener
-2002)</span>, and gradient-boosted trees <span class="citation">(<code>xgboost</code>: Chen et al. 2020)</span>.
-<code><a href="../reference/run_ml.html">run_ml()</a></code> randomly splits the data into train and test sets
-while maintaining the distribution of the outcomes found in the full
-dataset. It also provides the option to split the data into train and
-test sets based on categorical variables (e.g. batch, geographic
-location, etc.). <code>mikropml</code> uses the <code>caret</code>
-package <span class="citation">(Kuhn 2008)</span> to train and evaluate
-the models, and optionally quantifies feature importance. The output
-includes the best model built based on tuning hyperparameters in an
-internal and repeated cross-validation step, model evaluation metrics,
-and optional feature importances. Feature importances are calculated
-using a permutation test, which breaks the relationship between the
-feature and the true outcome in the test data, and measures the change
-in model performance. This provides an intuitive metric of how
-individual features influence model performance and is comparable across
-model types, which is particularly useful for model interpretation <span class="citation">(Topçuoğlu et al. 2020)</span>. Our <a href="http://www.schlosslab.org/mikropml/articles/introduction.html">introductory
-vignette</a> contains a comprehensive tutorial on how to use
-<code><a href="../reference/run_ml.html">run_ml()</a></code>.</p>
+<p>The main function in mikropml, <code><a href="../reference/run_ml.html">run_ml()</a></code>, minimally takes in the model choice and a data frame with an outcome column and feature columns. For model choice, <code>mikropml</code> currently supports logistic and linear regression <span class="citation">(<code>glmnet</code>: Friedman, Hastie, and Tibshirani 2010)</span>, support vector machines with a radial basis kernel <span class="citation">(<code>kernlab</code>: Karatzoglou et al. 2004)</span>, decision trees <span class="citation">(<code>rpart</code>: Therneau et al. 2019)</span>, random forest <span class="citation">(<code>randomForest</code>: Liaw and Wiener 2002)</span>, and gradient-boosted trees <span class="citation">(<code>xgboost</code>: Chen et al. 2020)</span>. <code><a href="../reference/run_ml.html">run_ml()</a></code> randomly splits the data into train and test sets while maintaining the distribution of the outcomes found in the full dataset. It also provides the option to split the data into train and test sets based on categorical variables (e.g. batch, geographic location, etc.). <code>mikropml</code> uses the <code>caret</code> package <span class="citation">(Kuhn 2008)</span> to train and evaluate the models, and optionally quantifies feature importance. The output includes the best model built based on tuning hyperparameters in an internal and repeated cross-validation step, model evaluation metrics, and optional feature importances. Feature importances are calculated using a permutation test, which breaks the relationship between the feature and the true outcome in the test data, and measures the change in model performance. This provides an intuitive metric of how individual features influence model performance and is comparable across model types, which is particularly useful for model interpretation <span class="citation">(Topçuoğlu et al. 2020)</span>. Our <a href="http://www.schlosslab.org/mikropml/articles/introduction.html">introductory vignette</a> contains a comprehensive tutorial on how to use <code><a href="../reference/run_ml.html">run_ml()</a></code>.</p>
 <div class="figure">
 <img src="mikRopML-pipeline.png" style="width:100.0%" alt=""><p class="caption">mikropml pipeline</p>
 </div>
 </div>
 <div class="section level3">
-<h3 id="ideal-workflow-for-running-mikropml-with-many-different-traintest-splits">Ideal workflow for running mikropml with many different train/test
-splits<a class="anchor" aria-label="anchor" href="#ideal-workflow-for-running-mikropml-with-many-different-traintest-splits"></a>
+<h3 id="ideal-workflow-for-running-mikropml-with-many-different-traintest-splits">Ideal workflow for running mikropml with many different train/test splits<a class="anchor" aria-label="anchor" href="#ideal-workflow-for-running-mikropml-with-many-different-traintest-splits"></a>
 </h3>
-<p>To investigate the variation in model performance depending on the
-train and test set used <span class="citation">(Topçuoğlu et al. 2020;
-Lapp et al. 2020)</span>, we provide examples of how to
-<code><a href="../reference/run_ml.html">run_ml()</a></code> many times with different train/test splits and
-how to get summary information about model performance on <a href="http://www.schlosslab.org/mikropml/articles/parallel.html">a local
-computer</a> or on a high-performance computing cluster using a <a href="https://github.com/SchlossLab/mikropml-snakemake-workflow" class="external-link">Snakemake
-workflow</a>.</p>
+<p>To investigate the variation in model performance depending on the train and test set used <span class="citation">(Topçuoğlu et al. 2020; Lapp et al. 2020)</span>, we provide examples of how to <code><a href="../reference/run_ml.html">run_ml()</a></code> many times with different train/test splits and how to get summary information about model performance on <a href="http://www.schlosslab.org/mikropml/articles/parallel.html">a local computer</a> or on a high-performance computing cluster using a <a href="https://github.com/SchlossLab/mikropml-snakemake-workflow" class="external-link">Snakemake workflow</a>.</p>
 </div>
 <div class="section level3">
 <h3 id="tuning-visualization">Tuning &amp; visualization<a class="anchor" aria-label="anchor" href="#tuning-visualization"></a>
 </h3>
-<p>One particularly important aspect of ML is hyperparameter tuning. We
-provide a reasonable range of default hyperparameters for each model
-type. However practitioners should explore whether that range is
-appropriate for their data, or if they should customize the
-hyperparameter range. Therefore, we provide a function
-<code><a href="../reference/plot_hp_performance.html">plot_hp_performance()</a></code> to plot the cross-validation
-performance metric of a single model or models built using different
-train/test splits. This helps evaluate if the hyperparameter range is
-being searched exhaustively and allows the user to pick the ideal set.
-We also provide summary plots of test performance metrics for the many
-train/test splits with different models using
-<code><a href="../reference/plot_model_performance.html">plot_model_performance()</a></code>. Examples are described in the
-accompanying <a href="http://www.schlosslab.org/mikropml/articles/tuning.html">vignette
-on hyperparameter tuning</a>.</p>
+<p>One particularly important aspect of ML is hyperparameter tuning. We provide a reasonable range of default hyperparameters for each model type. However practitioners should explore whether that range is appropriate for their data, or if they should customize the hyperparameter range. Therefore, we provide a function <code><a href="../reference/plot_hp_performance.html">plot_hp_performance()</a></code> to plot the cross-validation performance metric of a single model or models built using different train/test splits. This helps evaluate if the hyperparameter range is being searched exhaustively and allows the user to pick the ideal set. We also provide summary plots of test performance metrics for the many train/test splits with different models using <code><a href="../reference/plot_model_performance.html">plot_model_performance()</a></code>. Examples are described in the accompanying <a href="http://www.schlosslab.org/mikropml/articles/tuning.html">vignette on hyperparameter tuning</a>.</p>
 </div>
 <div class="section level3">
 <h3 id="dependencies">Dependencies<a class="anchor" aria-label="anchor" href="#dependencies"></a>
 </h3>
-<p>mikropml is written in R <span class="citation">(R Core Team
-2020)</span> and depends on several packages: <code>dplyr</code> <span class="citation">(Wickham et al. 2020)</span>, <code>rlang</code> <span class="citation">(Henry, Wickham, and RStudio 2020)</span> and
-<code>caret</code> <span class="citation">(Kuhn 2008)</span>. The ML
-algorithms supported by <code>mikropml</code> require:
-<code>glmnet</code> <span class="citation">(Friedman, Hastie, and
-Tibshirani 2010)</span>, <code>e1071</code> <span class="citation">(Meyer et al. 2020)</span>, and <code>MLmetrics</code>
-<span class="citation">(Yan 2016)</span> for logistic regression,
-<code>rpart2</code> <span class="citation">(Therneau et al. 2019)</span>
-for decision trees, <code>randomForest</code> <span class="citation">(Liaw and Wiener 2002)</span> for random forest,
-<code>xgboost</code> <span class="citation">(Chen et al. 2020)</span>
-for xgboost, and <code>kernlab</code> <span class="citation">(Karatzoglou et al. 2004)</span> for support vector
-machines. We also allow for parallelization of cross-validation and
-other steps using the <code>foreach</code>, <code>doFuture</code>,
-<code>future.apply</code>, and <code>future</code> packages <span class="citation">(Bengtsson and Team 2020)</span>. Finally, we use
-<code>ggplot2</code> for plotting <span class="citation">(Wickham
-2016)</span>.</p>
+<p>mikropml is written in R <span class="citation">(R Core Team 2020)</span> and depends on several packages: <code>dplyr</code> <span class="citation">(Wickham et al. 2020)</span>, <code>rlang</code> <span class="citation">(Henry, Wickham, and RStudio 2020)</span> and <code>caret</code> <span class="citation">(Kuhn 2008)</span>. The ML algorithms supported by <code>mikropml</code> require: <code>glmnet</code> <span class="citation">(Friedman, Hastie, and Tibshirani 2010)</span>, <code>e1071</code> <span class="citation">(Meyer et al. 2020)</span>, and <code>MLmetrics</code> <span class="citation">(Yan 2016)</span> for logistic regression, <code>rpart2</code> <span class="citation">(Therneau et al. 2019)</span> for decision trees, <code>randomForest</code> <span class="citation">(Liaw and Wiener 2002)</span> for random forest, <code>xgboost</code> <span class="citation">(Chen et al. 2020)</span> for xgboost, and <code>kernlab</code> <span class="citation">(Karatzoglou et al. 2004)</span> for support vector machines. We also allow for parallelization of cross-validation and other steps using the <code>foreach</code>, <code>doFuture</code>, <code>future.apply</code>, and <code>future</code> packages <span class="citation">(Bengtsson and Team 2020)</span>. Finally, we use <code>ggplot2</code> for plotting <span class="citation">(Wickham 2016)</span>.</p>
 </div>
 </div>
 <div class="section level2">
 <h2 id="acknowledgments">Acknowledgments<a class="anchor" aria-label="anchor" href="#acknowledgments"></a>
 </h2>
-<p>We thank members of the Schloss Lab who participated in code clubs
-related to the initial development of the pipeline, made documentation
-improvements, and provided general feedback. We also thank Nick Lesniak
-for designing the mikropml logo.</p>
-<p>We thank the US Research Software Sustainability Institute (NSF
-#1743188) for providing training to KLS at the Winter School in Research
-Software Engineering.</p>
+<p>We thank members of the Schloss Lab who participated in code clubs related to the initial development of the pipeline, made documentation improvements, and provided general feedback. We also thank Nick Lesniak for designing the mikropml logo.</p>
+<p>We thank the US Research Software Sustainability Institute (NSF #1743188) for providing training to KLS at the Winter School in Research Software Engineering.</p>
 </div>
 <div class="section level2">
 <h2 id="funding">Funding<a class="anchor" aria-label="anchor" href="#funding"></a>
 </h2>
-<p>Salary support for PDS came from NIH grant 1R01CA215574. KLS received
-support from the NIH Training Program in Bioinformatics (T32 GM070449).
-ZL received support from the National Science Foundation Graduate
-Research Fellowship Program under Grant No. DGE 1256260. Any opinions,
-findings, and conclusions or recommendations expressed in this material
-are those of the authors and do not necessarily reflect the views of the
-National Science Foundation.</p>
+<p>Salary support for PDS came from NIH grant 1R01CA215574. KLS received support from the NIH Training Program in Bioinformatics (T32 GM070449). ZL received support from the National Science Foundation Graduate Research Fellowship Program under Grant No. DGE 1256260. Any opinions, findings, and conclusions or recommendations expressed in this material are those of the authors and do not necessarily reflect the views of the National Science Foundation.</p>
 </div>
 <div class="section level2">
 <h2 id="author-contributions">Author contributions<a class="anchor" aria-label="anchor" href="#author-contributions"></a>
 </h2>
-<p>BDT, ZL, and KLS contributed equally. Author order among the co-first
-authors was determined by time since joining the project.</p>
-<p>BDT, ZL, and KLS conceptualized the study and wrote the code. KLS
-structured the code in R package form. BDT, ZL, JW, and PDS developed
-methodology. PDS, ES, and JW supervised the project. BDT, ZL, and KLS
-wrote the original draft. All authors reviewed and edited the
-manuscript.</p>
+<p>BDT, ZL, and KLS contributed equally. Author order among the co-first authors was determined by time since joining the project.</p>
+<p>BDT, ZL, and KLS conceptualized the study and wrote the code. KLS structured the code in R package form. BDT, ZL, JW, and PDS developed methodology. PDS, ES, and JW supervised the project. BDT, ZL, and KLS wrote the original draft. All authors reviewed and edited the manuscript.</p>
 </div>
 <div class="section level2">
 <h2 id="conflicts-of-interest">Conflicts of interest<a class="anchor" aria-label="anchor" href="#conflicts-of-interest"></a>
@@ -327,156 +186,90 @@ <h2 id="conflicts-of-interest">Conflicts of interest<a class="anchor" aria-label
 <p>None.</p>
 </div>
 <div class="section level2">
-<h2 class="unnumbered" id="references">References<a class="anchor" aria-label="anchor" href="#references"></a>
+<h2 id="references">References<a class="anchor" aria-label="anchor" href="#references"></a>
 </h2>
-<div id="refs" class="references csl-bib-body hanging-indent">
-<div id="ref-bengtsson_futureapply_2020" class="csl-entry">
-Bengtsson, Henrik, and R Core Team. 2020. <span>“Future.apply:
-<span>Apply Function</span> to <span>Elements</span> in
-<span>Parallel</span> Using <span>Futures</span>,”</span> July.
-</div>
-<div id="ref-breiman_random_2001" class="csl-entry">
-Breiman, Leo. 2001. <span>“Random Forests.”</span> <em>Machine
-Learning</em> 45 (1): 5–32. <a href="https://doi.org/10.1023/A:1010933404324" class="external-link">https://doi.org/10.1023/A:1010933404324</a>.
-</div>
-<div id="ref-chen_xgboost_2020" class="csl-entry">
-Chen, Tianqi, Tong He, Michael Benesty, Vadim Khotilovich, Yuan Tang,
-Hyunsu Cho, Kailong Chen, et al. 2020. <span>“Xgboost: <span>Extreme
-Gradient Boosting</span>,”</span> June.
-</div>
-<div id="ref-fisher_all_2018" class="csl-entry">
-Fisher, Aaron, Cynthia Rudin, and Francesca Dominici. 2018. <span>“All
-Models Are Wrong, but Many Are Useful: <span>Learning</span> a
-Variable’s Importance by Studying an Entire Class of Prediction Models
-Simultaneously.”</span>
-</div>
-<div id="ref-friedman_regularization_2010" class="csl-entry">
-Friedman, Jerome H., Trevor Hastie, and Rob Tibshirani. 2010.
-<span>“Regularization <span>Paths</span> for <span>Generalized Linear
-Models</span> via <span>Coordinate Descent</span>.”</span> <em>Journal
-of Statistical Software</em> 33 (1): 1–22. <a href="https://doi.org/10.18637/jss.v033.i01" class="external-link">https://doi.org/10.18637/jss.v033.i01</a>.
-</div>
-<div id="ref-h2o_platform" class="csl-entry">
-H2O.ai. 2020. <em><span>H2o</span>: <span>Scalable</span> Machine
-Learning Platform</em>. Manual.
-</div>
-<div id="ref-hagan_women_2020" class="csl-entry">
-Hagan, Ada K., Begüm D. Topçuoğlu, Mia E. Gregory, Hazel A. Barton, and
-Patrick D. Schloss. 2020. <span>“Women <span>Are Underrepresented</span>
-and <span>Receive Differential Outcomes</span> at <span>ASM
-Journals</span>: A <span>Six</span>-<span>Year Retrospective
-Analysis</span>.”</span> <em>mBio</em> 11 (6). <a href="https://doi.org/10.1128/mBio.01680-20" class="external-link">https://doi.org/10.1128/mBio.01680-20</a>.
-</div>
-<div id="ref-henry_rlang_2020" class="csl-entry">
-Henry, Lionel, Hadley Wickham, and RStudio. 2020. <span>“Rlang:
-<span>Functions</span> for <span>Base Types</span> and <span>Core
-R</span> and ’<span>Tidyverse</span>’ <span>Features</span>,”</span>
-July.
-</div>
-<div id="ref-karatzoglou_kernlab_2004" class="csl-entry">
-Karatzoglou, Alexandros, Alexandros Smola, Kurt Hornik, and Achim
-Zeileis. 2004. <span>“Kernlab - <span>An S4 Package</span> for
-<span>Kernel Methods</span> in <span>R</span>.”</span> <em>Journal of
-Statistical Software</em> 11 (1): 1–20. <a href="https://doi.org/10.18637/jss.v011.i09" class="external-link">https://doi.org/10.18637/jss.v011.i09</a>.
-</div>
-<div id="ref-koster_snakemakescalable_2012" class="csl-entry">
-Köster, Johannes, and Sven Rahmann. 2012. <span>“Snakemakea Scalable
-Bioinformatics Workflow Engine.”</span> <em>Bioinformatics</em> 28 (19):
-2520–22. <a href="https://doi.org/10.1093/bioinformatics/bts480" class="external-link">https://doi.org/10.1093/bioinformatics/bts480</a>.
-</div>
-<div id="ref-kuhn_building_2008" class="csl-entry">
-Kuhn, Max. 2008. <span>“Building <span>Predictive Models</span> in
-<span>R Using</span> the Caret <span>Package</span>.”</span> <em>Journal
-of Statistical Software</em> 28 (1): 1–26. <a href="https://doi.org/10.18637/jss.v028.i05" class="external-link">https://doi.org/10.18637/jss.v028.i05</a>.
-</div>
-<div id="ref-kuhn_tidymodels_2020" class="csl-entry">
-Kuhn, Max, Hadley Wickham, and RStudio. 2020. <span>“Tidymodels:
-<span>Easily Install</span> and <span>Load</span> the
-’<span>Tidymodels</span>’ <span>Packages</span>,”</span> July.
-</div>
-<div id="ref-lapp_machine_2020" class="csl-entry">
-Lapp, Zena, Jennifer Han, Jenna Wiens, Ellie JC Goldstein, Ebbing
-Lautenbach, and Evan Snitkin. 2020. <span>“Machine Learning Models to
-Identify Patient and Microbial Genetic Factors Associated with
-Carbapenem-Resistant <span>Klebsiella</span> Pneumoniae
-Infection.”</span> <em>medRxiv</em>, July, 2020.07.06.20147306. <a href="https://doi.org/10.1101/2020.07.06.20147306" class="external-link">https://doi.org/10.1101/2020.07.06.20147306</a>.
-</div>
-<div id="ref-liaw_classication_2002" class="csl-entry">
-Liaw, Andy, and Matthew Wiener. 2002. <span>“Classification and
-<span>Regression</span> by <span class="nocase">randomForest</span>”</span> 2: 5.
-</div>
-<div id="ref-meyer_e1071_2020" class="csl-entry">
-Meyer, David, Evgenia Dimitriadou, Kurt Hornik, Andreas Weingessel,
-Friedrich Leisch, Chih-Chung Chang (libsvm C++-code), and Chih-Chen Lin
-(libsvm C++-code). 2020. <span>“E1071: <span>Misc Functions</span> of
-the <span>Department</span> of <span>Statistics</span>,
-<span>Probability Theory Group</span> (<span>Formerly</span>:
-<span>E1071</span>), <span>TU Wien</span>.”</span>
-</div>
-<div id="ref-pedregosa_scikit-learn_2011" class="csl-entry">
-Pedregosa, Fabian, Gaël Varoquaux, Alexandre Gramfort, Vincent Michel,
-Bertrand Thirion, Olivier Grisel, Mathieu Blondel, et al. 2011.
-<span>“Scikit-Learn: <span>Machine Learning</span> in
-<span>Python</span>.”</span> <em>Journal of Machine Learning
-Research</em> 12 (85): 2825–30.
-</div>
-<div id="ref-pollard_turning_2019" class="csl-entry">
-Pollard, Tom J., Irene Chen, Jenna Wiens, Steven Horng, Danny Wong,
-Marzyeh Ghassemi, Heather Mattie, Emily Lindemer, and Trishan Panch.
-2019. <span>“Turning the Crank for Machine Learning: Ease, at What
-Expense?”</span> <em>The Lancet Digital Health</em> 1 (5): e198–99. <a href="https://doi.org/10.1016/S2589-7500(19)30112-8" class="external-link">https://doi.org/10.1016/S2589-7500(19)30112-8</a>.
-</div>
-<div id="ref-r_core_team_r_2020" class="csl-entry">
-R Core Team. 2020. <span>“R: <span>A Language</span> and
-<span>Environment</span> for <span>Statistical Computing</span>.”</span>
-</div>
-<div id="ref-tang_democratizing_2020" class="csl-entry">
-Tang, Shengpu, Parmida Davarmanesh, Yanmeng Song, Danai Koutra, Michael
-W. Sjoding, and Jenna Wiens. 2020. <span>“Democratizing <span>EHR</span>
-Analyses with <span>FIDDLE</span>: A Flexible Data-Driven Preprocessing
-Pipeline for Structured Clinical Data.”</span> <em>J Am Med Inform
-Assoc</em>, October. <a href="https://doi.org/10.1093/jamia/ocaa139" class="external-link">https://doi.org/10.1093/jamia/ocaa139</a>.
-</div>
-<div id="ref-teschendorff_avoiding_2019" class="csl-entry">
-Teschendorff, Andrew E. 2019. <span>“Avoiding Common Pitfalls in Machine
-Learning Omic Data Science.”</span> <em>Nature Materials</em> 18 (5):
-422–27. <a href="https://doi.org/10.1038/s41563-018-0241-z" class="external-link">https://doi.org/10.1038/s41563-018-0241-z</a>.
-</div>
-<div id="ref-therneau_rpart_2019" class="csl-entry">
-Therneau, Terry, Beth Atkinson, Brian Ripley (producer of the initial R.
-port, and maintainer 1999-2017). 2019. <span>“Rpart: <span>Recursive
-Partitioning</span> and <span>Regression Trees</span>,”</span> April.
-</div>
-<div id="ref-topcuoglu_framework_2020" class="csl-entry">
-Topçuoğlu, Begüm D., Nicholas A. Lesniak, Mack T. Ruffin, Jenna Wiens,
-and Patrick D. Schloss. 2020. <span>“A <span>Framework</span> for
-<span>Effective Application</span> of <span>Machine Learning</span> to
-<span>Microbiome</span>-<span>Based Classification
-Problems</span>.”</span> <em>mBio</em> 11 (3). <a href="https://doi.org/10.1128/mBio.00434-20" class="external-link">https://doi.org/10.1128/mBio.00434-20</a>.
-</div>
-<div id="ref-wickham_ggplot2_2016" class="csl-entry">
-Wickham, Hadley. 2016. <em>Ggplot2: <span>Elegant Graphics</span> for
-<span>Data Analysis</span></em>. Use <span>R</span>! <span>Cham</span>:
-<span>Springer International Publishing</span>. <a href="https://doi.org/10.1007/978-3-319-24277-4" class="external-link">https://doi.org/10.1007/978-3-319-24277-4</a>.
-</div>
-<div id="ref-wickham_dplyr_2020" class="csl-entry">
-Wickham, Hadley, Romain François, Lionel Henry, Kirill Müller, and
-RStudio. 2020. <span>“Dplyr: <span>A Grammar</span> of <span>Data
-Manipulation</span>,”</span> August.
-</div>
-<div id="ref-wiens_no_2019" class="csl-entry">
-Wiens, Jenna, Suchi Saria, Mark Sendak, Marzyeh Ghassemi, Vincent X.
-Liu, Finale Doshi-Velez, Kenneth Jung, et al. 2019. <span>“Do No Harm: A
-Roadmap for Responsible Machine Learning for Health Care.”</span>
-<em>Nat. Med.</em> 25 (9): 1337–40. <a href="https://doi.org/10.1038/s41591-019-0548-6" class="external-link">https://doi.org/10.1038/s41591-019-0548-6</a>.
-</div>
-<div id="ref-yan_mlmetrics_2016" class="csl-entry">
-Yan, Yachen. 2016. <span>“<span>MLmetrics</span>: <span>Machine Learning
-Evaluation Metrics</span>.”</span>
-</div>
-</div>
-</div>
-<div class="footnotes footnotes-end-of-document">
+<div id="refs" class="references">
+<div id="ref-bengtsson_futureapply_2020">
+<p>Bengtsson, Henrik, and R Core Team. 2020. “Future.Apply: Apply Function to Elements in Parallel Using Futures,” July.</p>
+</div>
+<div id="ref-breiman_random_2001">
+<p>Breiman, Leo. 2001. “Random Forests.” <em>Machine Learning</em> 45 (1): 5–32. <a href="https://doi.org/10.1023/A:1010933404324" class="external-link">https://doi.org/10.1023/A:1010933404324</a>.</p>
+</div>
+<div id="ref-chen_xgboost_2020">
+<p>Chen, Tianqi, Tong He, Michael Benesty, Vadim Khotilovich, Yuan Tang, Hyunsu Cho, Kailong Chen, et al. 2020. “Xgboost: Extreme Gradient Boosting,” June.</p>
+</div>
+<div id="ref-fisher_all_2018">
+<p>Fisher, Aaron, Cynthia Rudin, and Francesca Dominici. 2018. “All Models Are Wrong, but Many Are Useful: Learning a Variable’s Importance by Studying an Entire Class of Prediction Models Simultaneously.”</p>
+</div>
+<div id="ref-friedman_regularization_2010">
+<p>Friedman, Jerome H., Trevor Hastie, and Rob Tibshirani. 2010. “Regularization Paths for Generalized Linear Models via Coordinate Descent.” <em>Journal of Statistical Software</em> 33 (1): 1–22. <a href="https://doi.org/10.18637/jss.v033.i01" class="external-link">https://doi.org/10.18637/jss.v033.i01</a>.</p>
+</div>
+<div id="ref-h2o_platform">
+<p>H2O.ai. 2020. <em>H2O: Scalable Machine Learning Platform</em>. Manual.</p>
+</div>
+<div id="ref-hagan_women_2020">
+<p>Hagan, Ada K., Begüm D. Topçuoğlu, Mia E. Gregory, Hazel A. Barton, and Patrick D. Schloss. 2020. “Women Are Underrepresented and Receive Differential Outcomes at ASM Journals: A Six-Year Retrospective Analysis.” <em>mBio</em> 11 (6). <a href="https://doi.org/10.1128/mBio.01680-20" class="external-link">https://doi.org/10.1128/mBio.01680-20</a>.</p>
+</div>
+<div id="ref-henry_rlang_2020">
+<p>Henry, Lionel, Hadley Wickham, and RStudio. 2020. “Rlang: Functions for Base Types and Core R and ’Tidyverse’ Features,” July.</p>
+</div>
+<div id="ref-karatzoglou_kernlab_2004">
+<p>Karatzoglou, Alexandros, Alexandros Smola, Kurt Hornik, and Achim Zeileis. 2004. “Kernlab - an S4 Package for Kernel Methods in R.” <em>Journal of Statistical Software</em> 11 (1): 1–20. <a href="https://doi.org/10.18637/jss.v011.i09" class="external-link">https://doi.org/10.18637/jss.v011.i09</a>.</p>
+</div>
+<div id="ref-koster_snakemakescalable_2012">
+<p>Köster, Johannes, and Sven Rahmann. 2012. “Snakemakea Scalable Bioinformatics Workflow Engine.” <em>Bioinformatics</em> 28 (19): 2520–2. <a href="https://doi.org/10.1093/bioinformatics/bts480" class="external-link">https://doi.org/10.1093/bioinformatics/bts480</a>.</p>
+</div>
+<div id="ref-kuhn_building_2008">
+<p>Kuhn, Max. 2008. “Building Predictive Models in R Using the Caret Package.” <em>Journal of Statistical Software</em> 28 (1): 1–26. <a href="https://doi.org/10.18637/jss.v028.i05" class="external-link">https://doi.org/10.18637/jss.v028.i05</a>.</p>
+</div>
+<div id="ref-kuhn_tidymodels_2020">
+<p>Kuhn, Max, Hadley Wickham, and RStudio. 2020. “Tidymodels: Easily Install and Load the ’Tidymodels’ Packages,” July.</p>
+</div>
+<div id="ref-lapp_machine_2020">
+<p>Lapp, Zena, Jennifer Han, Jenna Wiens, Ellie JC Goldstein, Ebbing Lautenbach, and Evan Snitkin. 2020. “Machine Learning Models to Identify Patient and Microbial Genetic Factors Associated with Carbapenem-Resistant Klebsiella Pneumoniae Infection.” <em>medRxiv</em>, July, 2020.07.06.20147306. <a href="https://doi.org/10.1101/2020.07.06.20147306" class="external-link">https://doi.org/10.1101/2020.07.06.20147306</a>.</p>
+</div>
+<div id="ref-liaw_classication_2002">
+<p>Liaw, Andy, and Matthew Wiener. 2002. “Classification and Regression by randomForest” 2: 5.</p>
+</div>
+<div id="ref-meyer_e1071_2020">
+<p>Meyer, David, Evgenia Dimitriadou, Kurt Hornik, Andreas Weingessel, Friedrich Leisch, Chih-Chung Chang (libsvm C++-code), and Chih-Chen Lin (libsvm C++-code). 2020. “E1071: Misc Functions of the Department of Statistics, Probability Theory Group (Formerly: E1071), TU Wien.”</p>
+</div>
+<div id="ref-pedregosa_scikit-learn_2011">
+<p>Pedregosa, Fabian, Gaël Varoquaux, Alexandre Gramfort, Vincent Michel, Bertrand Thirion, Olivier Grisel, Mathieu Blondel, et al. 2011. “Scikit-Learn: Machine Learning in Python.” <em>Journal of Machine Learning Research</em> 12 (85): 2825–30.</p>
+</div>
+<div id="ref-pollard_turning_2019">
+<p>Pollard, Tom J., Irene Chen, Jenna Wiens, Steven Horng, Danny Wong, Marzyeh Ghassemi, Heather Mattie, Emily Lindemer, and Trishan Panch. 2019. “Turning the Crank for Machine Learning: Ease, at What Expense?” <em>The Lancet Digital Health</em> 1 (5): e198–e199. <a href="https://doi.org/10.1016/S2589-7500(19)30112-8" class="external-link">https://doi.org/10.1016/S2589-7500(19)30112-8</a>.</p>
+</div>
+<div id="ref-r_core_team_r_2020">
+<p>R Core Team. 2020. “R: A Language and Environment for Statistical Computing.”</p>
+</div>
+<div id="ref-tang_democratizing_2020">
+<p>Tang, Shengpu, Parmida Davarmanesh, Yanmeng Song, Danai Koutra, Michael W. Sjoding, and Jenna Wiens. 2020. “Democratizing EHR Analyses with FIDDLE: A Flexible Data-Driven Preprocessing Pipeline for Structured Clinical Data.” <em>J Am Med Inform Assoc</em>, October. <a href="https://doi.org/10.1093/jamia/ocaa139" class="external-link">https://doi.org/10.1093/jamia/ocaa139</a>.</p>
+</div>
+<div id="ref-teschendorff_avoiding_2019">
+<p>Teschendorff, Andrew E. 2019. “Avoiding Common Pitfalls in Machine Learning Omic Data Science.” <em>Nature Materials</em> 18 (5): 422–27. <a href="https://doi.org/10.1038/s41563-018-0241-z" class="external-link">https://doi.org/10.1038/s41563-018-0241-z</a>.</p>
+</div>
+<div id="ref-therneau_rpart_2019">
+<p>Therneau, Terry, Beth Atkinson, Brian Ripley (producer of the initial R. port, and maintainer 1999-2017). 2019. “Rpart: Recursive Partitioning and Regression Trees,” April.</p>
+</div>
+<div id="ref-topcuoglu_framework_2020">
+<p>Topçuoğlu, Begüm D., Nicholas A. Lesniak, Mack T. Ruffin, Jenna Wiens, and Patrick D. Schloss. 2020. “A Framework for Effective Application of Machine Learning to Microbiome-Based Classification Problems.” <em>mBio</em> 11 (3). <a href="https://doi.org/10.1128/mBio.00434-20" class="external-link">https://doi.org/10.1128/mBio.00434-20</a>.</p>
+</div>
+<div id="ref-wickham_ggplot2_2016">
+<p>Wickham, Hadley. 2016. <em>Ggplot2: Elegant Graphics for Data Analysis</em>. Use R! Cham: Springer International Publishing. <a href="https://doi.org/10.1007/978-3-319-24277-4" class="external-link">https://doi.org/10.1007/978-3-319-24277-4</a>.</p>
+</div>
+<div id="ref-wickham_dplyr_2020">
+<p>Wickham, Hadley, Romain François, Lionel Henry, Kirill Müller, and RStudio. 2020. “Dplyr: A Grammar of Data Manipulation,” August.</p>
+</div>
+<div id="ref-wiens_no_2019">
+<p>Wiens, Jenna, Suchi Saria, Mark Sendak, Marzyeh Ghassemi, Vincent X. Liu, Finale Doshi-Velez, Kenneth Jung, et al. 2019. “Do No Harm: A Roadmap for Responsible Machine Learning for Health Care.” <em>Nat. Med.</em> 25 (9): 1337–40. <a href="https://doi.org/10.1038/s41591-019-0548-6" class="external-link">https://doi.org/10.1038/s41591-019-0548-6</a>.</p>
+</div>
+<div id="ref-yan_mlmetrics_2016">
+<p>Yan, Yachen. 2016. “MLmetrics: Machine Learning Evaluation Metrics.”</p>
+</div>
+</div>
+</div>
+<div class="footnotes">
 <hr>
 <ol>
 <li id="fn1"><p>co-first author<a href="#fnref1" class="footnote-back">↩︎</a></p></li>
diff --git a/docs/articles/parallel.html b/docs/articles/parallel.html
index e1ac7594..373dfe7e 100644
--- a/docs/articles/parallel.html
+++ b/docs/articles/parallel.html
@@ -100,12 +100,11 @@
 
       
 
-      </header><div class="row">
+      </header><script src="parallel_files/accessible-code-block-0.0.1/empty-anchor.js"></script><div class="row">
   <div class="col-md-9 contents">
     <div class="page-header toc-ignore">
       <h1 data-toc-skip>Parallel processing</h1>
-                        <h4 data-toc-skip class="author">Kelly L.
-Sovacool</h4>
+                        <h4 data-toc-skip class="author">Kelly L. Sovacool</h4>
             
       
       <small class="dont-index">Source: <a href="https://github.com/SchlossLab/mikropml/blob/HEAD/vignettes/parallel.Rmd" class="external-link"><code>vignettes/parallel.Rmd</code></a></small>
@@ -117,61 +116,60 @@ <h4 data-toc-skip class="author">Kelly L.
     
 <div class="sourceCode" id="cb1"><pre class="downlit sourceCode r">
 <code class="sourceCode R"><span class="kw"><a href="https://rdrr.io/r/base/library.html" class="external-link">library</a></span><span class="op">(</span><span class="va"><a href="https://www.schlosslab.org/mikropml/" class="external-link">mikropml</a></span><span class="op">)</span>
-<span class="kw"><a href="https://rdrr.io/r/base/library.html" class="external-link">library</a></span><span class="op">(</span><span class="va"><a href="https://dplyr.tidyverse.org" class="external-link">dplyr</a></span><span class="op">)</span></code></pre></div>
+<span class="kw"><a href="https://rdrr.io/r/base/library.html" class="external-link">library</a></span><span class="op">(</span><span class="va"><a href="https://dplyr.tidyverse.org" class="external-link">dplyr</a></span><span class="op">)</span>
+<span class="co">#&gt; </span>
+<span class="co">#&gt; Attaching package: 'dplyr'</span>
+<span class="co">#&gt; The following objects are masked from 'package:stats':</span>
+<span class="co">#&gt; </span>
+<span class="co">#&gt;     filter, lag</span>
+<span class="co">#&gt; The following objects are masked from 'package:base':</span>
+<span class="co">#&gt; </span>
+<span class="co">#&gt;     intersect, setdiff, setequal, union</span></code></pre></div>
 <div class="section level2">
 <h2 id="speed-up-single-runs">Speed up single runs<a class="anchor" aria-label="anchor" href="#speed-up-single-runs"></a>
 </h2>
-<p>By default, <code><a href="../reference/preprocess_data.html">preprocess_data()</a></code>, <code><a href="../reference/run_ml.html">run_ml()</a></code>,
-and <code><a href="../reference/compare_models.html">compare_models()</a></code> use only one process in series. If
-you’d like to parallelize various steps of the pipeline to make them run
-faster, install <code>foreach</code>, <code>future</code>,
-<code>future.apply</code>, and <code>doFuture</code>. Then, register a
-future plan prior to calling these functions:</p>
+<p>By default, <code><a href="../reference/preprocess_data.html">preprocess_data()</a></code>, <code><a href="../reference/run_ml.html">run_ml()</a></code>, and <code><a href="../reference/compare_models.html">compare_models()</a></code> use only one process in series. If you’d like to parallelize various steps of the pipeline to make them run faster, install <code>foreach</code>, <code>future</code>, <code>future.apply</code>, and <code>doFuture</code>. Then, register a future plan prior to calling these functions:</p>
 <div class="sourceCode" id="cb2"><pre class="downlit sourceCode r">
-<code class="sourceCode R"><span class="fu">doFuture</span><span class="fu">::</span><span class="fu">registerDoFuture</span><span class="op">(</span><span class="op">)</span>
+<code class="sourceCode R"><span class="fu">doFuture</span><span class="fu">::</span><span class="fu"><a href="https://doFuture.futureverse.org/reference/registerDoFuture.html" class="external-link">registerDoFuture</a></span><span class="op">(</span><span class="op">)</span>
 <span class="fu">future</span><span class="fu">::</span><span class="fu"><a href="https://future.futureverse.org/reference/plan.html" class="external-link">plan</a></span><span class="op">(</span><span class="fu">future</span><span class="fu">::</span><span class="va"><a href="https://future.futureverse.org/reference/multicore.html" class="external-link">multicore</a></span>, workers <span class="op">=</span> <span class="fl">2</span><span class="op">)</span></code></pre></div>
-<p>Above, we used the <code>multicore</code> plan to split the work
-across 2 cores. See the <a href="https://cran.r-project.org/web/packages/future/vignettes/future-1-overview.html" class="external-link"><code>future</code>
-documentation</a> for more about picking the best plan for your use
-case. Notably, <code>multicore</code> does not work inside RStudio or on
-Windows; you will need to use <code>multisession</code> instead in those
-cases.</p>
-<p>After registering a future plan, you can call
-<code><a href="../reference/preprocess_data.html">preprocess_data()</a></code> and <code><a href="../reference/run_ml.html">run_ml()</a></code> as usual, and
-they will run certain tasks in parallel.</p>
+<p>Above, we used the <code>multicore</code> plan to split the work across 2 cores. See the <a href="https://cran.r-project.org/web/packages/future/vignettes/future-1-overview.html" class="external-link"><code>future</code> documentation</a> for more about picking the best plan for your use case. Notably, <code>multicore</code> does not work inside RStudio or on Windows; you will need to use <code>multisession</code> instead in those cases.</p>
+<p>After registering a future plan, you can call <code><a href="../reference/preprocess_data.html">preprocess_data()</a></code> and <code><a href="../reference/run_ml.html">run_ml()</a></code> as usual, and they will run certain tasks in parallel.</p>
 <div class="sourceCode" id="cb3"><pre class="downlit sourceCode r">
 <code class="sourceCode R"><span class="va">otu_data_preproc</span> <span class="op">&lt;-</span> <span class="fu"><a href="../reference/preprocess_data.html">preprocess_data</a></span><span class="op">(</span><span class="va">otu_mini_bin</span>, <span class="st">'dx'</span><span class="op">)</span><span class="op">$</span><span class="va">dat_transformed</span>
-<span class="va">result1</span> <span class="op">&lt;-</span> <span class="fu"><a href="../reference/run_ml.html">run_ml</a></span><span class="op">(</span><span class="va">otu_data_preproc</span>, <span class="st">'glmnet'</span><span class="op">)</span></code></pre></div>
+<span class="co">#&gt; Using 'dx' as the outcome column.</span>
+<span class="va">result1</span> <span class="op">&lt;-</span> <span class="fu"><a href="../reference/run_ml.html">run_ml</a></span><span class="op">(</span><span class="va">otu_data_preproc</span>, <span class="st">'glmnet'</span><span class="op">)</span>
+<span class="co">#&gt; Using 'dx' as the outcome column.</span>
+<span class="co">#&gt; Training the model...</span>
+<span class="co">#&gt; Loading required package: ggplot2</span>
+<span class="co">#&gt; Loading required package: lattice</span>
+<span class="co">#&gt; </span>
+<span class="co">#&gt; Attaching package: 'caret'</span>
+<span class="co">#&gt; The following object is masked from 'package:mikropml':</span>
+<span class="co">#&gt; </span>
+<span class="co">#&gt;     compare_models</span>
+<span class="co">#&gt; Training complete.</span></code></pre></div>
 </div>
 <div class="section level2">
 <h2 id="call-run_ml-multiple-times-in-parallel-in-r">Call <code>run_ml()</code> multiple times in parallel in R<a class="anchor" aria-label="anchor" href="#call-run_ml-multiple-times-in-parallel-in-r"></a>
 </h2>
-<p>You can use functions from the <code>future.apply</code> package to
-call <code><a href="../reference/run_ml.html">run_ml()</a></code> multiple times in parallel with different
-parameters. You will first need to run <code><a href="https://future.futureverse.org/reference/plan.html" class="external-link">future::plan()</a></code> as
-above if you haven’t already. Then, call <code><a href="../reference/run_ml.html">run_ml()</a></code> with
-multiple seeds using <code>future_lapply()</code>:</p>
+<p>You can use functions from the <code>future.apply</code> package to call <code><a href="../reference/run_ml.html">run_ml()</a></code> multiple times in parallel with different parameters. You will first need to run <code><a href="https://future.futureverse.org/reference/plan.html" class="external-link">future::plan()</a></code> as above if you haven’t already. Then, call <code><a href="../reference/run_ml.html">run_ml()</a></code> with multiple seeds using <code>future_lapply()</code>:</p>
 <div class="sourceCode" id="cb4"><pre class="downlit sourceCode r">
 <code class="sourceCode R"><span class="co"># NOTE: use more seeds for real-world data</span>
 <span class="va">results_multi</span> <span class="op">&lt;-</span> <span class="fu">future.apply</span><span class="fu">::</span><span class="fu"><a href="https://future.apply.futureverse.org/reference/future_lapply.html" class="external-link">future_lapply</a></span><span class="op">(</span><span class="fu"><a href="https://rdrr.io/r/base/seq.html" class="external-link">seq</a></span><span class="op">(</span><span class="fl">100</span>, <span class="fl">102</span><span class="op">)</span>, <span class="kw">function</span><span class="op">(</span><span class="va">seed</span><span class="op">)</span> <span class="op">{</span>
   <span class="fu"><a href="../reference/run_ml.html">run_ml</a></span><span class="op">(</span><span class="va">otu_data_preproc</span>, <span class="st">'glmnet'</span>, seed <span class="op">=</span> <span class="va">seed</span><span class="op">)</span>
-  <span class="op">}</span>, future.seed <span class="op">=</span> <span class="cn">TRUE</span><span class="op">)</span></code></pre></div>
-<p>Each call to <code><a href="../reference/run_ml.html">run_ml()</a></code> with a different seed uses a
-different random split of the data into training and testing sets. Since
-we are using seeds, we must set <code>future.seed</code> to
-<code>TRUE</code> (see the <a href="https://cran.r-project.org/web/packages/future.apply/future.apply.pdf" class="external-link"><code>future.apply</code>
-documentation</a> and <a href="https://www.r-bloggers.com/2020/09/future-1-19-1-making-sure-proper-random-numbers-are-produced-in-parallel-processing/" class="external-link">this
-blog post</a> for details on parallel-safe random seeds). This example
-uses only a few seeds for speed and simplicity, but for real data we
-recommend using many more seeds to get a better estimate of model
-performance.</p>
-<p>In these examples, we used functions from the
-<code>future.apply</code> package to <code><a href="../reference/run_ml.html">run_ml()</a></code> in parallel,
-but you can accomplish the same thing with parallel versions of the
-<code><a href="https://purrr.tidyverse.org/reference/map.html" class="external-link">purrr::map()</a></code> functions using the <code>furrr</code> package
-(e.g. <code>furrr::future_map_dfr()</code>).</p>
-<p>Extract the performance results and combine into one dataframe for
-all seeds:</p>
+  <span class="op">}</span>, future.seed <span class="op">=</span> <span class="cn">TRUE</span><span class="op">)</span>
+<span class="co">#&gt; Using 'dx' as the outcome column.</span>
+<span class="co">#&gt; Training the model...</span>
+<span class="co">#&gt; Training complete.</span>
+<span class="co">#&gt; Using 'dx' as the outcome column.</span>
+<span class="co">#&gt; Training the model...</span>
+<span class="co">#&gt; Training complete.</span>
+<span class="co">#&gt; Using 'dx' as the outcome column.</span>
+<span class="co">#&gt; Training the model...</span>
+<span class="co">#&gt; Training complete.</span></code></pre></div>
+<p>Each call to <code><a href="../reference/run_ml.html">run_ml()</a></code> with a different seed uses a different random split of the data into training and testing sets. Since we are using seeds, we must set <code>future.seed</code> to <code>TRUE</code> (see the <a href="https://cran.r-project.org/web/packages/future.apply/future.apply.pdf" class="external-link"><code>future.apply</code> documentation</a> and <a href="https://www.r-bloggers.com/2020/09/future-1-19-1-making-sure-proper-random-numbers-are-produced-in-parallel-processing/" class="external-link">this blog post</a> for details on parallel-safe random seeds). This example uses only a few seeds for speed and simplicity, but for real data we recommend using many more seeds to get a better estimate of model performance.</p>
+<p>In these examples, we used functions from the <code>future.apply</code> package to <code><a href="../reference/run_ml.html">run_ml()</a></code> in parallel, but you can accomplish the same thing with parallel versions of the <code><a href="https://purrr.tidyverse.org/reference/map.html" class="external-link">purrr::map()</a></code> functions using the <code>furrr</code> package (e.g. <code>furrr::future_map_dfr()</code>).</p>
+<p>Extract the performance results and combine into one dataframe for all seeds:</p>
 <div class="sourceCode" id="cb5"><pre class="downlit sourceCode r">
 <code class="sourceCode R"><span class="va">perf_df</span> <span class="op">&lt;-</span> <span class="fu">future.apply</span><span class="fu">::</span><span class="fu"><a href="https://future.apply.futureverse.org/reference/future_lapply.html" class="external-link">future_lapply</a></span><span class="op">(</span><span class="va">results_multi</span>, 
                                        <span class="kw">function</span><span class="op">(</span><span class="va">result</span><span class="op">)</span> <span class="op">{</span>
@@ -180,13 +178,17 @@ <h2 id="call-run_ml-multiple-times-in-parallel-in-r">Call <code>run_ml()</code>
                                          <span class="op">}</span>,
                                        future.seed <span class="op">=</span> <span class="cn">TRUE</span><span class="op">)</span> <span class="op"><a href="https://magrittr.tidyverse.org/reference/pipe.html" class="external-link">%&gt;%</a></span> 
   <span class="fu">dplyr</span><span class="fu">::</span><span class="fu"><a href="https://dplyr.tidyverse.org/reference/bind.html" class="external-link">bind_rows</a></span><span class="op">(</span><span class="op">)</span>
-<span class="va">perf_df</span></code></pre></div>
+<span class="va">perf_df</span>
+<span class="co">#&gt; <span style="color: #949494;"># A tibble: 3 × 3</span></span>
+<span class="co">#&gt;   cv_metric_AUC   AUC method</span>
+<span class="co">#&gt;           <span style="color: #949494; font-style: italic;">&lt;dbl&gt;</span> <span style="color: #949494; font-style: italic;">&lt;dbl&gt;</span> <span style="color: #949494; font-style: italic;">&lt;chr&gt;</span> </span>
+<span class="co">#&gt; <span style="color: #BCBCBC;">1</span>         0.630 0.634 glmnet</span>
+<span class="co">#&gt; <span style="color: #BCBCBC;">2</span>         0.591 0.608 glmnet</span>
+<span class="co">#&gt; <span style="color: #BCBCBC;">3</span>         0.671 0.471 glmnet</span></code></pre></div>
 <div class="section level3">
 <h3 id="multiple-ml-methods">Multiple ML methods<a class="anchor" aria-label="anchor" href="#multiple-ml-methods"></a>
 </h3>
-<p>You may also wish to compare performance for different ML methods.
-<code><a href="https://rdrr.io/r/base/mapply.html" class="external-link">mapply()</a></code> can iterate over multiple lists or vectors, and
-<code>future_mapply()</code> works the same way:</p>
+<p>You may also wish to compare performance for different ML methods. <code><a href="https://rdrr.io/r/base/mapply.html" class="external-link">mapply()</a></code> can iterate over multiple lists or vectors, and <code>future_mapply()</code> works the same way:</p>
 <div class="sourceCode" id="cb6"><pre class="downlit sourceCode r">
 <code class="sourceCode R"><span class="co"># NOTE: use more seeds for real-world data</span>
 <span class="va">param_grid</span> <span class="op">&lt;-</span> <span class="fu"><a href="https://rdrr.io/r/base/expand.grid.html" class="external-link">expand.grid</a></span><span class="op">(</span>seeds <span class="op">=</span> <span class="fu"><a href="https://rdrr.io/r/base/seq.html" class="external-link">seq</a></span><span class="op">(</span><span class="fl">100</span>, <span class="fl">102</span><span class="op">)</span>,
@@ -198,39 +200,61 @@ <h3 id="multiple-ml-methods">Multiple ML methods<a class="anchor" aria-label="an
     <span class="va">param_grid</span><span class="op">$</span><span class="va">seeds</span>,
     <span class="va">param_grid</span><span class="op">$</span><span class="va">methods</span> <span class="op"><a href="https://magrittr.tidyverse.org/reference/pipe.html" class="external-link">%&gt;%</a></span> <span class="fu"><a href="https://rdrr.io/r/base/character.html" class="external-link">as.character</a></span><span class="op">(</span><span class="op">)</span>,
     future.seed <span class="op">=</span> <span class="cn">TRUE</span>
-  <span class="op">)</span></code></pre></div>
-<p>Extract and combine the performance results for all seeds and
-methods:</p>
+  <span class="op">)</span>
+<span class="co">#&gt; Using 'dx' as the outcome column.</span>
+<span class="co">#&gt; Training the model...</span>
+<span class="co">#&gt; Training complete.</span>
+<span class="co">#&gt; Using 'dx' as the outcome column.</span>
+<span class="co">#&gt; Training the model...</span>
+<span class="co">#&gt; Training complete.</span>
+<span class="co">#&gt; Using 'dx' as the outcome column.</span>
+<span class="co">#&gt; Training the model...</span>
+<span class="co">#&gt; Training complete.</span>
+<span class="co">#&gt; Using 'dx' as the outcome column.</span>
+<span class="co">#&gt; Training the model...</span>
+<span class="co">#&gt; Training complete.</span>
+<span class="co">#&gt; Using 'dx' as the outcome column.</span>
+<span class="co">#&gt; Training the model...</span>
+<span class="co">#&gt; Training complete.</span>
+<span class="co">#&gt; Using 'dx' as the outcome column.</span>
+<span class="co">#&gt; Training the model...</span>
+<span class="co">#&gt; Training complete.</span></code></pre></div>
+<p>Extract and combine the performance results for all seeds and methods:</p>
 <div class="sourceCode" id="cb7"><pre class="downlit sourceCode r">
 <code class="sourceCode R"><span class="va">perf_df2</span> <span class="op">&lt;-</span> <span class="fu"><a href="https://rdrr.io/r/base/lapply.html" class="external-link">lapply</a></span><span class="op">(</span><span class="va">results_mtx</span><span class="op">[</span><span class="st">'performance'</span>,<span class="op">]</span>, 
                    <span class="kw">function</span><span class="op">(</span><span class="va">x</span><span class="op">)</span> <span class="op">{</span>
                      <span class="va">x</span> <span class="op"><a href="https://magrittr.tidyverse.org/reference/pipe.html" class="external-link">%&gt;%</a></span> <span class="fu"><a href="https://dplyr.tidyverse.org/reference/select.html" class="external-link">select</a></span><span class="op">(</span><span class="va">cv_metric_AUC</span>, <span class="va">AUC</span>, <span class="va">method</span><span class="op">)</span>
                    <span class="op">}</span><span class="op">)</span> <span class="op"><a href="https://magrittr.tidyverse.org/reference/pipe.html" class="external-link">%&gt;%</a></span> 
   <span class="fu">dplyr</span><span class="fu">::</span><span class="fu"><a href="https://dplyr.tidyverse.org/reference/bind.html" class="external-link">bind_rows</a></span><span class="op">(</span><span class="op">)</span>
-<span class="va">perf_df2</span></code></pre></div>
-<p>Visualize the performance results (<code>ggplot2</code> is
-required):</p>
+<span class="va">perf_df2</span>
+<span class="co">#&gt; <span style="color: #949494;"># A tibble: 6 × 3</span></span>
+<span class="co">#&gt;   cv_metric_AUC   AUC method</span>
+<span class="co">#&gt;           <span style="color: #949494; font-style: italic;">&lt;dbl&gt;</span> <span style="color: #949494; font-style: italic;">&lt;dbl&gt;</span> <span style="color: #949494; font-style: italic;">&lt;chr&gt;</span> </span>
+<span class="co">#&gt; <span style="color: #BCBCBC;">1</span>         0.630 0.634 glmnet</span>
+<span class="co">#&gt; <span style="color: #BCBCBC;">2</span>         0.591 0.608 glmnet</span>
+<span class="co">#&gt; <span style="color: #BCBCBC;">3</span>         0.671 0.471 glmnet</span>
+<span class="co">#&gt; <span style="color: #BCBCBC;">4</span>         0.665 0.708 rf    </span>
+<span class="co">#&gt; <span style="color: #BCBCBC;">5</span>         0.651 0.697 rf    </span>
+<span class="co">#&gt; <span style="color: #BCBCBC;">6</span>         0.701 0.592 rf</span></code></pre></div>
+<p>Visualize the performance results (<code>ggplot2</code> is required):</p>
 <div class="sourceCode" id="cb8"><pre class="downlit sourceCode r">
 <code class="sourceCode R"><span class="va">perf_boxplot</span> <span class="op">&lt;-</span> <span class="fu"><a href="../reference/plot_model_performance.html">plot_model_performance</a></span><span class="op">(</span><span class="va">perf_df2</span><span class="op">)</span>
 <span class="va">perf_boxplot</span></code></pre></div>
-<p><code><a href="../reference/plot_model_performance.html">plot_model_performance()</a></code> returns a ggplot2 object. You
-can add layers to customize the plot:</p>
+<p><img src="parallel_files/figure-html/plot_perf-1.png" width="700"></p>
+<p><code><a href="../reference/plot_model_performance.html">plot_model_performance()</a></code> returns a ggplot2 object. You can add layers to customize the plot:</p>
 <div class="sourceCode" id="cb9"><pre class="downlit sourceCode r">
 <code class="sourceCode R"><span class="va">perf_boxplot</span> <span class="op">+</span>
    <span class="fu">theme_classic</span><span class="op">(</span><span class="op">)</span> <span class="op">+</span>
    <span class="fu">scale_color_brewer</span><span class="op">(</span>palette <span class="op">=</span> <span class="st">"Dark2"</span><span class="op">)</span> <span class="op">+</span>
    <span class="fu">coord_flip</span><span class="op">(</span><span class="op">)</span></code></pre></div>
-<p>You can also create your own plots however you like using the
-performance results.</p>
+<p><img src="parallel_files/figure-html/customize_plot-1.png" width="700"></p>
+<p>You can also create your own plots however you like using the performance results.</p>
 </div>
 </div>
 <div class="section level2">
 <h2 id="live-progress-updates">Live progress updates<a class="anchor" aria-label="anchor" href="#live-progress-updates"></a>
 </h2>
-<p><code><a href="../reference/preprocess_data.html">preprocess_data()</a></code> and
-<code><a href="../reference/get_feature_importance.html">get_feature_importance()</a></code> support reporting live progress
-updates using the <code>progressr</code> package. The format is up to
-you, but we recommend using a progress bar like this:</p>
+<p><code><a href="../reference/preprocess_data.html">preprocess_data()</a></code> and <code><a href="../reference/get_feature_importance.html">get_feature_importance()</a></code> support reporting live progress updates using the <code>progressr</code> package. The format is up to you, but we recommend using a progress bar like this:</p>
 <div class="sourceCode" id="cb10"><pre class="downlit sourceCode r">
 <code class="sourceCode R"><span class="co"># optionally, specify the progress bar format with the `progress` package.</span>
 <span class="fu">progressr</span><span class="fu">::</span><span class="fu"><a href="https://progressr.futureverse.org/reference/handlers.html" class="external-link">handlers</a></span><span class="op">(</span><span class="fu">progressr</span><span class="fu">::</span><span class="fu"><a href="https://progressr.futureverse.org/reference/handler_progress.html" class="external-link">handler_progress</a></span><span class="op">(</span>
@@ -251,26 +275,12 @@ <h2 id="live-progress-updates">Live progress updates<a class="anchor" aria-label
 <span class="co">#&gt; Training the model...</span>
 <span class="co">#&gt; Training complete.</span>
 <span class="co">#&gt; Feature importance =========================== 100% | elapsed: 37s | eta:  0s</span></code></pre></div>
-<p>Note that some future backends support “near-live” progress updates,
-meaning the progress may not be reported immediately when parallel
-processing with futures. Read more on that <a href="https://progressr.futureverse.org/articles/progressr-intro.html#near-live-versus-buffered-progress-updates-with-futures" class="external-link">in
-the <code>progressr</code> vignette</a>. For more on
-<code>progressr</code> and how to customize the format of progress
-updates, see the <a href="https://progressr.futureverse.org/" class="external-link"><code>progressr</code>
-docs</a>.</p>
+<p>Note that some future backends support “near-live” progress updates, meaning the progress may not be reported immediately when parallel processing with futures. Read more on that <a href="https://progressr.futureverse.org/articles/progressr-intro.html#near-live-versus-buffered-progress-updates-with-futures" class="external-link">in the <code>progressr</code> vignette</a>. For more on <code>progressr</code> and how to customize the format of progress updates, see the <a href="https://progressr.futureverse.org/" class="external-link"><code>progressr</code> docs</a>.</p>
 </div>
 <div class="section level2">
 <h2 id="parallelizing-with-snakemake">Parallelizing with Snakemake<a class="anchor" aria-label="anchor" href="#parallelizing-with-snakemake"></a>
 </h2>
-<p>When parallelizing multiple calls to <code><a href="../reference/run_ml.html">run_ml()</a></code> in R as in
-the examples above, all of the results objects are held in memory. This
-isn’t a big deal for a small dataset run with only a few seeds. However,
-for large datasets run in parallel with, say, 100 seeds (recommended),
-you may run into problems trying to store all of those objects in memory
-at once. One solution is to write the results files of each
-<code><a href="../reference/run_ml.html">run_ml()</a></code> call, then concatenate them at the end. We show
-one way to accomplish this with Snakemake in <a href="https://github.com/SchlossLab/mikropml-snakemake-workflow" class="external-link">an
-example Snakemake workflow here</a>.</p>
+<p>When parallelizing multiple calls to <code><a href="../reference/run_ml.html">run_ml()</a></code> in R as in the examples above, all of the results objects are held in memory. This isn’t a big deal for a small dataset run with only a few seeds. However, for large datasets run in parallel with, say, 100 seeds (recommended), you may run into problems trying to store all of those objects in memory at once. One solution is to write the results files of each <code><a href="../reference/run_ml.html">run_ml()</a></code> call, then concatenate them at the end. We show one way to accomplish this with Snakemake in <a href="https://github.com/SchlossLab/mikropml-snakemake-workflow" class="external-link">an example Snakemake workflow here</a>.</p>
 </div>
   </div>
 
diff --git a/docs/articles/preprocess.html b/docs/articles/preprocess.html
index d6e21856..7d7bdb8d 100644
--- a/docs/articles/preprocess.html
+++ b/docs/articles/preprocess.html
@@ -100,7 +100,7 @@
 
       
 
-      </header><div class="row">
+      </header><script src="preprocess_files/accessible-code-block-0.0.1/empty-anchor.js"></script><div class="row">
   <div class="col-md-9 contents">
     <div class="page-header toc-ignore">
       <h1 data-toc-skip>Preprocessing data</h1>
@@ -114,49 +114,28 @@ <h4 data-toc-skip class="author">Zena Lapp</h4>
 
     
     
-<p>Before training a model, it’s often necessary and prudent to
-preprocess your input data. We provide a function
-(<code><a href="../reference/preprocess_data.html">preprocess_data()</a></code>) to preprocess input data. The defaults
-we chose are based on best practices used in <a href="https://gitlab.eecs.umich.edu/mld3/FIDDLE/-/tree/master/" class="external-link">FIDDLE</a>
-<span class="citation">(Tang et al. 2020)</span>. Feel free to check out
-FIDDLE for more information about data preprocessing!</p>
-<p><code><a href="../reference/preprocess_data.html">preprocess_data()</a></code> takes an input dataset where the rows
-are the samples and the columns are the outcome variable and features.
-We preprocess the data as follows:</p>
+<p>Before training a model, it’s often necessary and prudent to preprocess your input data. We provide a function (<code><a href="../reference/preprocess_data.html">preprocess_data()</a></code>) to preprocess input data. The defaults we chose are based on best practices used in <a href="https://gitlab.eecs.umich.edu/mld3/FIDDLE/-/tree/master/" class="external-link">FIDDLE</a> <span class="citation">(Tang et al. 2020)</span>. Feel free to check out FIDDLE for more information about data preprocessing!</p>
+<p><code><a href="../reference/preprocess_data.html">preprocess_data()</a></code> takes an input dataset where the rows are the samples and the columns are the outcome variable and features. We preprocess the data as follows:</p>
 <ul>
 <li>Remove missing outcome values.</li>
-<li>Convert any spaces in outcome names to underscores
-(<code>_</code>).</li>
-<li>Leave binary features as-is (except that categorical variables are
-converted to 0 and 1, and binary variables with missing features are
-split into two rows - see below for more details).</li>
-<li>Normalize continuous features using <code><a href="https://rdrr.io/pkg/caret/man/preProcess.html" class="external-link">caret::preProcess()</a></code>
-based on the method provided.</li>
-<li>Convert categorical features with more than 2 categories to 0 and 1
-in multiple columns (one for each category, so each category has it’s
-own column).</li>
+<li>Convert any spaces in outcome names to underscores (<code>_</code>).</li>
+<li>Leave binary features as-is (except that categorical variables are converted to 0 and 1, and binary variables with missing features are split into two rows - see below for more details).</li>
+<li>Normalize continuous features using <code><a href="https://rdrr.io/pkg/caret/man/preProcess.html" class="external-link">caret::preProcess()</a></code> based on the method provided.</li>
+<li>Convert categorical features with more than 2 categories to 0 and 1 in multiple columns (one for each category, so each category has it’s own column).</li>
 <li>Replace missing categorical data with 0.</li>
-<li>Impute missing continuous values with the median of the
-feature.</li>
-<li>By default, remove all features with near-zero variance (option to
-also remove only features with zero variance).</li>
+<li>Impute missing continuous values with the median of the feature.</li>
+<li>By default, remove all features with near-zero variance (option to also remove only features with zero variance).</li>
 <li>By default, collapse correlated features.</li>
 </ul>
 <div class="section level2">
 <h2 id="its-running-so-slow">It’s running so slow!<a class="anchor" aria-label="anchor" href="#its-running-so-slow"></a>
 </h2>
-<p>Since I assume a lot of you won’t read this entire vignette, I’m
-going to say this at the beginning. If the
-<code><a href="../reference/preprocess_data.html">preprocess_data()</a></code> function is running super slow, you
-should consider parallelizing it so it goes faster!
-<code><a href="../reference/preprocess_data.html">preprocess_data()</a></code> also can report live progress updates.
-See <code><a href="../articles/parallel.html">vignette("parallel")</a></code> for details.</p>
+<p>Since I assume a lot of you won’t read this entire vignette, I’m going to say this at the beginning. If the <code><a href="../reference/preprocess_data.html">preprocess_data()</a></code> function is running super slow, you should consider parallelizing it so it goes faster! <code><a href="../reference/preprocess_data.html">preprocess_data()</a></code> also can report live progress updates. See <code><a href="../articles/parallel.html">vignette("parallel")</a></code> for details.</p>
 </div>
 <div class="section level2">
 <h2 id="examples">Examples<a class="anchor" aria-label="anchor" href="#examples"></a>
 </h2>
-<p>We’re going to start off simple and get more complicated, but if you
-want the whole shebang at once, just scroll to the bottom.</p>
+<p>We’re going to start off simple and get more complicated, but if you want the whole shebang at once, just scroll to the bottom.</p>
 <p>First, we have to load <code>mikropml</code>:</p>
 <div class="sourceCode" id="cb1"><pre class="downlit sourceCode r">
 <code class="sourceCode R"><span class="kw"><a href="https://rdrr.io/r/base/library.html" class="external-link">library</a></span><span class="op">(</span><span class="va"><a href="https://www.schlosslab.org/mikropml/" class="external-link">mikropml</a></span><span class="op">)</span></code></pre></div>
@@ -177,9 +156,7 @@ <h3 id="binary-data">Binary data<a class="anchor" aria-label="anchor" href="#bin
 <span class="co">#&gt; 1  normal   no    0    a</span>
 <span class="co">#&gt; 2  normal  yes    1    a</span>
 <span class="co">#&gt; 3  cancer   no    1    b</span></code></pre></div>
-<p>In addition to the dataframe itself, you have to provide the name of
-the outcome column to <code><a href="../reference/preprocess_data.html">preprocess_data()</a></code>. Here’s what the
-preprocessed data looks like:</p>
+<p>In addition to the dataframe itself, you have to provide the name of the outcome column to <code><a href="../reference/preprocess_data.html">preprocess_data()</a></code>. Here’s what the preprocessed data looks like:</p>
 <div class="sourceCode" id="cb3"><pre class="downlit sourceCode r">
 <code class="sourceCode R"><span class="co"># preprocess raw binary data</span>
 <span class="fu"><a href="../reference/preprocess_data.html">preprocess_data</a></span><span class="op">(</span>dataset <span class="op">=</span> <span class="va">bin_df</span>, outcome_colname <span class="op">=</span> <span class="st">"outcome"</span><span class="op">)</span>
@@ -197,21 +174,8 @@ <h3 id="binary-data">Binary data<a class="anchor" aria-label="anchor" href="#bin
 <span class="co">#&gt; </span>
 <span class="co">#&gt; $removed_feats</span>
 <span class="co">#&gt; character(0)</span></code></pre></div>
-<p>The output is a list: <code>dat_transformed</code> which has the
-transformed data, <code>grp_feats</code> which is a list of grouped
-features, and <code>removed_feats</code> which is a list of featuures
-that were removed. Here, <code>grp_feats</code> is <code>NULL</code>
-because there are no perfectly correlated features
-(e.g. <code>c(0,1,0)</code> and <code>c(0,1,0)</code>, or
-<code>c(0,1,0)</code> and <code>c(1,0,1)</code> - see below for more
-details).</p>
-<p>The first column (<code>var1</code>) in <code>dat_transformed</code>
-is a character and is changed to <code>var1_yes</code> that has zeros
-(no) and ones (yes). The values in the second column (<code>var2</code>)
-stay the same because it’s already binary, but the name changes to
-<code>var2_1</code>. The third column (<code>var3</code>) is a factor
-and is also changed to binary where b is 1 and a is 0, as denoted by the
-new column name <code>var3_b</code>.</p>
+<p>The output is a list: <code>dat_transformed</code> which has the transformed data, <code>grp_feats</code> which is a list of grouped features, and <code>removed_feats</code> which is a list of features that were removed. Here, <code>grp_feats</code> is <code>NULL</code> because there are no perfectly correlated features (e.g. <code>c(0,1,0)</code> and <code>c(0,1,0)</code>, or <code>c(0,1,0)</code> and <code>c(1,0,1)</code> - see below for more details).</p>
+<p>The first column (<code>var1</code>) in <code>dat_transformed</code> is a character and is changed to <code>var1_yes</code> that has zeros (no) and ones (yes). The values in the second column (<code>var2</code>) stay the same because it’s already binary, but the name changes to <code>var2_1</code>. The third column (<code>var3</code>) is a factor and is also changed to binary where b is 1 and a is 0, as denoted by the new column name <code>var3_b</code>.</p>
 </div>
 <div class="section level3">
 <h3 id="categorical-data">Categorical data<a class="anchor" aria-label="anchor" href="#categorical-data"></a>
@@ -245,9 +209,7 @@ <h3 id="categorical-data">Categorical data<a class="anchor" aria-label="anchor"
 <span class="co">#&gt; </span>
 <span class="co">#&gt; $removed_feats</span>
 <span class="co">#&gt; character(0)</span></code></pre></div>
-<p>As you can see, this variable was split into 3 different columns -
-one for each type (a, b, and c). And again, <code>grp_feats</code> is
-<code>NULL</code>.</p>
+<p>As you can see, this variable was split into 3 different columns - one for each type (a, b, and c). And again, <code>grp_feats</code> is <code>NULL</code>.</p>
 </div>
 <div class="section level3">
 <h3 id="continuous-data">Continuous data<a class="anchor" aria-label="anchor" href="#continuous-data"></a>
@@ -281,23 +243,12 @@ <h3 id="continuous-data">Continuous data<a class="anchor" aria-label="anchor" hr
 <span class="co">#&gt; </span>
 <span class="co">#&gt; $removed_feats</span>
 <span class="co">#&gt; character(0)</span></code></pre></div>
-<p>Wow! Why did the numbers change? This is because the default is to
-normalize the data using <code>"center"</code> and <code>"scale"</code>.
-While this is often best practice, you may not want to normalize the
-data, or you may want to normalize the data in a different way. If you
-don’t want to normalize the data, you can use
-<code>method=NULL</code>:</p>
+<p>Wow! Why did the numbers change? This is because the default is to normalize the data using <code>"center"</code> and <code>"scale"</code>. While this is often best practice, you may not want to normalize the data, or you may want to normalize the data in a different way. If you don’t want to normalize the data, you can use <code>method=NULL</code>:</p>
 <div class="sourceCode" id="cb8"><pre class="downlit sourceCode r">
 <code class="sourceCode R"><span class="co"># preprocess raw continuous data, no normalization</span>
 <span class="fu"><a href="../reference/preprocess_data.html">preprocess_data</a></span><span class="op">(</span>dataset <span class="op">=</span> <span class="va">cont_df</span>, outcome_colname <span class="op">=</span> <span class="st">"outcome"</span>, method <span class="op">=</span> <span class="cn">NULL</span><span class="op">)</span></code></pre></div>
-<p>You can also normalize the data in different ways. You can choose any
-method supported by the <code>method</code> argument of
-<code><a href="https://rdrr.io/pkg/caret/man/preProcess.html" class="external-link">caret::preProcess()</a></code> (see the
-<code><a href="https://rdrr.io/pkg/caret/man/preProcess.html" class="external-link">caret::preProcess()</a></code> docs for details). Note that these
-methods are only applied to continuous variables.</p>
-<p>Another feature of <code><a href="../reference/preprocess_data.html">preprocess_data()</a></code> is that if you
-provide continuous variables as characters, they will be converted to
-numeric:</p>
+<p>You can also normalize the data in different ways. You can choose any method supported by the <code>method</code> argument of <code><a href="https://rdrr.io/pkg/caret/man/preProcess.html" class="external-link">caret::preProcess()</a></code> (see the <code><a href="https://rdrr.io/pkg/caret/man/preProcess.html" class="external-link">caret::preProcess()</a></code> docs for details). Note that these methods are only applied to continuous variables.</p>
+<p>Another feature of <code><a href="../reference/preprocess_data.html">preprocess_data()</a></code> is that if you provide continuous variables as characters, they will be converted to numeric:</p>
 <div class="sourceCode" id="cb9"><pre class="downlit sourceCode r">
 <code class="sourceCode R"><span class="co"># raw continuous dataset as characters</span>
 <span class="va">cont_char_df</span> <span class="op">&lt;-</span> <span class="fu"><a href="https://rdrr.io/r/base/data.frame.html" class="external-link">data.frame</a></span><span class="op">(</span>
@@ -312,10 +263,7 @@ <h3 id="continuous-data">Continuous data<a class="anchor" aria-label="anchor" hr
 <div class="sourceCode" id="cb10"><pre class="downlit sourceCode r">
 <code class="sourceCode R"><span class="co"># preprocess raw continuous character data as numeric</span>
 <span class="fu"><a href="../reference/preprocess_data.html">preprocess_data</a></span><span class="op">(</span>dataset <span class="op">=</span> <span class="va">cont_char_df</span>, outcome_colname <span class="op">=</span> <span class="st">"outcome"</span><span class="op">)</span></code></pre></div>
-<p>If you don’t want this to happen, and you want character data to
-remain character data even if it can be converted to numeric, you can
-use <code>to_numeric=FALSE</code> and they will be kept as
-categorical:</p>
+<p>If you don’t want this to happen, and you want character data to remain character data even if it can be converted to numeric, you can use <code>to_numeric=FALSE</code> and they will be kept as categorical:</p>
 <div class="sourceCode" id="cb11"><pre class="downlit sourceCode r">
 <code class="sourceCode R"><span class="co"># preprocess raw continuous character data as characters</span>
 <span class="fu"><a href="../reference/preprocess_data.html">preprocess_data</a></span><span class="op">(</span>dataset <span class="op">=</span> <span class="va">cont_char_df</span>, outcome_colname <span class="op">=</span> <span class="st">"outcome"</span>, to_numeric <span class="op">=</span> <span class="cn">FALSE</span><span class="op">)</span>
@@ -333,17 +281,12 @@ <h3 id="continuous-data">Continuous data<a class="anchor" aria-label="anchor" hr
 <span class="co">#&gt; </span>
 <span class="co">#&gt; $removed_feats</span>
 <span class="co">#&gt; character(0)</span></code></pre></div>
-<p>As you can see from this output, in this case the features are
-treated as groups rather than numbers (e.g. they are not
-normalized).</p>
+<p>As you can see from this output, in this case the features are treated as groups rather than numbers (e.g. they are not normalized).</p>
 </div>
 <div class="section level3">
 <h3 id="collapse-perfectly-correlated-features">Collapse perfectly correlated features<a class="anchor" aria-label="anchor" href="#collapse-perfectly-correlated-features"></a>
 </h3>
-<p>By default, <code><a href="../reference/preprocess_data.html">preprocess_data()</a></code> collapses features that
-are perfectly positively or negatively correlated. This is because
-having multiple copies of those features does not add information to
-machine learning, and it makes <code>run_ml</code> faster.</p>
+<p>By default, <code><a href="../reference/preprocess_data.html">preprocess_data()</a></code> collapses features that are perfectly positively or negatively correlated. This is because having multiple copies of those features does not add information to machine learning, and it makes <code>run_ml</code> faster.</p>
 <div class="sourceCode" id="cb12"><pre class="downlit sourceCode r">
 <code class="sourceCode R"><span class="co"># raw correlated dataset</span>
 <span class="va">corr_df</span> <span class="op">&lt;-</span> <span class="fu"><a href="https://rdrr.io/r/base/data.frame.html" class="external-link">data.frame</a></span><span class="op">(</span>
@@ -376,14 +319,8 @@ <h3 id="collapse-perfectly-correlated-features">Collapse perfectly correlated fe
 <span class="co">#&gt; </span>
 <span class="co">#&gt; $removed_feats</span>
 <span class="co">#&gt; [1] "var2"</span></code></pre></div>
-<p>As you can see, we end up with only one variable, as all 3 are
-grouped together. Also, the second element in the list is no longer
-<code>NULL</code>. Instead, it tells you that <code>grp1</code> contains
-<code>var1</code>, <code>var2</code>, and <code>var3</code>.</p>
-<p>If you want to group positively correlated features, but not
-negatively correlated features (e.g. for interpretability, or another
-downstream application), you can do that by using
-<code>group_neg_corr=FALSE</code>:</p>
+<p>As you can see, we end up with only one variable, as all 3 are grouped together. Also, the second element in the list is no longer <code>NULL</code>. Instead, it tells you that <code>grp1</code> contains <code>var1</code>, <code>var2</code>, and <code>var3</code>.</p>
+<p>If you want to group positively correlated features, but not negatively correlated features (e.g. for interpretability, or another downstream application), you can do that by using <code>group_neg_corr=FALSE</code>:</p>
 <div class="sourceCode" id="cb14"><pre class="downlit sourceCode r">
 <code class="sourceCode R"><span class="co"># preprocess raw correlated dataset; don't group negatively correlated features</span>
 <span class="fu"><a href="../reference/preprocess_data.html">preprocess_data</a></span><span class="op">(</span>dataset <span class="op">=</span> <span class="va">corr_df</span>, outcome_colname <span class="op">=</span> <span class="st">"outcome"</span>, group_neg_corr <span class="op">=</span> <span class="cn">FALSE</span><span class="op">)</span>
@@ -401,10 +338,7 @@ <h3 id="collapse-perfectly-correlated-features">Collapse perfectly correlated fe
 <span class="co">#&gt; </span>
 <span class="co">#&gt; $removed_feats</span>
 <span class="co">#&gt; [1] "var2"</span></code></pre></div>
-<p>Here, <code>var3</code> is kept on it’s own because it’s negatively
-correlated with <code>var1</code> and <code>var2</code>. You can also
-choose to keep all features separate, even if they are perfectly
-correlated, by using <code>collapse_corr_feats=FALSE</code>:</p>
+<p>Here, <code>var3</code> is kept on it’s own because it’s negatively correlated with <code>var1</code> and <code>var2</code>. You can also choose to keep all features separate, even if they are perfectly correlated, by using <code>collapse_corr_feats=FALSE</code>:</p>
 <div class="sourceCode" id="cb15"><pre class="downlit sourceCode r">
 <code class="sourceCode R"><span class="co"># preprocess raw correlated dataset; don't group negatively correlated features</span>
 <span class="fu"><a href="../reference/preprocess_data.html">preprocess_data</a></span><span class="op">(</span>dataset <span class="op">=</span> <span class="va">corr_df</span>, outcome_colname <span class="op">=</span> <span class="st">"outcome"</span>, collapse_corr_feats <span class="op">=</span> <span class="cn">FALSE</span><span class="op">)</span>
@@ -422,14 +356,12 @@ <h3 id="collapse-perfectly-correlated-features">Collapse perfectly correlated fe
 <span class="co">#&gt; </span>
 <span class="co">#&gt; $removed_feats</span>
 <span class="co">#&gt; [1] "var2"</span></code></pre></div>
-<p>In this case, <code>grp_feats</code> will always be
-<code>NULL</code>.</p>
+<p>In this case, <code>grp_feats</code> will always be <code>NULL</code>.</p>
 </div>
 <div class="section level3">
 <h3 id="data-with-near-zero-variance">Data with near-zero variance<a class="anchor" aria-label="anchor" href="#data-with-near-zero-variance"></a>
 </h3>
-<p>What if we have variables that are all zero, or all “no”? Those ones
-won’t contribute any information, so we remove them:</p>
+<p>What if we have variables that are all zero, or all “no”? Those ones won’t contribute any information, so we remove them:</p>
 <div class="sourceCode" id="cb16"><pre class="downlit sourceCode r">
 <code class="sourceCode R"><span class="co"># raw dataset with non-variable features</span>
 <span class="va">nonvar_df</span> <span class="op">&lt;-</span> <span class="fu"><a href="https://rdrr.io/r/base/data.frame.html" class="external-link">data.frame</a></span><span class="op">(</span>
@@ -445,9 +377,7 @@ <h3 id="data-with-near-zero-variance">Data with near-zero variance<a class="anch
 <span class="co">#&gt; 1  normal   no    0   no    0   12</span>
 <span class="co">#&gt; 2  normal  yes    1   no    0   12</span>
 <span class="co">#&gt; 3  cancer   no    1   no    0   12</span></code></pre></div>
-<p>Here, <code>var3</code>, <code>var4</code>, and <code>var5</code> all
-have no variability, so these variables are removed during
-preprocessing:</p>
+<p>Here, <code>var3</code>, <code>var4</code>, and <code>var5</code> all have no variability, so these variables are removed during preprocessing:</p>
 <div class="sourceCode" id="cb17"><pre class="downlit sourceCode r">
 <code class="sourceCode R"><span class="co"># remove features with near-zero variance</span>
 <span class="fu"><a href="../reference/preprocess_data.html">preprocess_data</a></span><span class="op">(</span>dataset <span class="op">=</span> <span class="va">nonvar_df</span>, outcome_colname <span class="op">=</span> <span class="st">"outcome"</span><span class="op">)</span>
@@ -465,13 +395,7 @@ <h3 id="data-with-near-zero-variance">Data with near-zero variance<a class="anch
 <span class="co">#&gt; </span>
 <span class="co">#&gt; $removed_feats</span>
 <span class="co">#&gt; [1] "var4" "var3" "var5"</span></code></pre></div>
-<p>You can read the <code><a href="https://rdrr.io/pkg/caret/man/preProcess.html" class="external-link">caret::preProcess()</a></code> documentation for
-more information. By default, we remove features with “near-zero
-variance” (<code>remove_var='nzv'</code>). This uses the default
-arguments from <code><a href="https://rdrr.io/pkg/caret/man/nearZeroVar.html" class="external-link">caret::nearZeroVar()</a></code>. However, particularly
-with smaller datasets, you might not want to remove features with
-near-zero variance. If you want to remove only features with zero
-variance, you can use <code>remove_var='zv'</code>:</p>
+<p>You can read the <code><a href="https://rdrr.io/pkg/caret/man/preProcess.html" class="external-link">caret::preProcess()</a></code> documentation for more information. By default, we remove features with “near-zero variance” (<code>remove_var='nzv'</code>). This uses the default arguments from <code><a href="https://rdrr.io/pkg/caret/man/nearZeroVar.html" class="external-link">caret::nearZeroVar()</a></code>. However, particularly with smaller datasets, you might not want to remove features with near-zero variance. If you want to remove only features with zero variance, you can use <code>remove_var='zv'</code>:</p>
 <div class="sourceCode" id="cb18"><pre class="downlit sourceCode r">
 <code class="sourceCode R"><span class="co"># remove features with zero variance</span>
 <span class="fu"><a href="../reference/preprocess_data.html">preprocess_data</a></span><span class="op">(</span>dataset <span class="op">=</span> <span class="va">nonvar_df</span>, outcome_colname <span class="op">=</span> <span class="st">"outcome"</span>, remove_var <span class="op">=</span> <span class="st">'zv'</span><span class="op">)</span>
@@ -489,10 +413,7 @@ <h3 id="data-with-near-zero-variance">Data with near-zero variance<a class="anch
 <span class="co">#&gt; </span>
 <span class="co">#&gt; $removed_feats</span>
 <span class="co">#&gt; [1] "var4" "var3" "var5"</span></code></pre></div>
-<p>If you want to include all features, you can use the argument
-<code>remove_zv=NULL</code>. For this to work, you cannot collapse
-correlated features (otherwise it errors out because of the underlying
-<code>caret</code> function we use).</p>
+<p>If you want to include all features, you can use the argument <code>remove_zv=NULL</code>. For this to work, you cannot collapse correlated features (otherwise it errors out because of the underlying <code>caret</code> function we use).</p>
 <div class="sourceCode" id="cb19"><pre class="downlit sourceCode r">
 <code class="sourceCode R"><span class="co"># don't remove features with near-zero or zero variance</span>
 <span class="fu"><a href="../reference/preprocess_data.html">preprocess_data</a></span><span class="op">(</span>dataset <span class="op">=</span> <span class="va">nonvar_df</span>, outcome_colname <span class="op">=</span> <span class="st">"outcome"</span>, remove_var <span class="op">=</span> <span class="cn">NULL</span>, collapse_corr_feats <span class="op">=</span> <span class="cn">FALSE</span><span class="op">)</span>
@@ -510,12 +431,7 @@ <h3 id="data-with-near-zero-variance">Data with near-zero variance<a class="anch
 <span class="co">#&gt; </span>
 <span class="co">#&gt; $removed_feats</span>
 <span class="co">#&gt; [1] "var4"</span></code></pre></div>
-<p>If you want to be more nuanced in how you remove near-zero variance
-features (e.g. change the default 10% cutoff for the percentage of
-distinct values out of the total number of samples), you can use the
-<code><a href="https://rdrr.io/pkg/caret/man/preProcess.html" class="external-link">caret::preProcess()</a></code> function after running
-<code>preprocess_data</code> with <code>remove_var=NULL</code> (see the
-<code><a href="https://rdrr.io/pkg/caret/man/nearZeroVar.html" class="external-link">caret::nearZeroVar()</a></code> function for more information).</p>
+<p>If you want to be more nuanced in how you remove near-zero variance features (e.g. change the default 10% cutoff for the percentage of distinct values out of the total number of samples), you can use the <code><a href="https://rdrr.io/pkg/caret/man/preProcess.html" class="external-link">caret::preProcess()</a></code> function after running <code>preprocess_data</code> with <code>remove_var=NULL</code> (see the <code><a href="https://rdrr.io/pkg/caret/man/nearZeroVar.html" class="external-link">caret::nearZeroVar()</a></code> function for more information).</p>
 </div>
 <div class="section level3">
 <h3 id="missing-data">Missing data<a class="anchor" aria-label="anchor" href="#missing-data"></a>
@@ -523,16 +439,11 @@ <h3 id="missing-data">Missing data<a class="anchor" aria-label="anchor" href="#m
 <p><code><a href="../reference/preprocess_data.html">preprocess_data()</a></code> also deals with missing data. It:</p>
 <ul>
 <li>Removes missing outcome variables.</li>
-<li>Maintains zero variability in a feature if it already has no
-variability (i.e. the feature is removed if removing features with
-near-zero variance).</li>
-<li>Replaces missing binary and categorical variables with zero (after
-splitting into multiple columns).</li>
-<li>Replaces missing continuous data with the median value of that
-feature.</li>
+<li>Maintains zero variability in a feature if it already has no variability (i.e. the feature is removed if removing features with near-zero variance).</li>
+<li>Replaces missing binary and categorical variables with zero (after splitting into multiple columns).</li>
+<li>Replaces missing continuous data with the median value of that feature.</li>
 </ul>
-<p>If you’d like to deal with missing data in a different way, please do
-that prior to inputting the data to <code><a href="../reference/preprocess_data.html">preprocess_data()</a></code>.</p>
+<p>If you’d like to deal with missing data in a different way, please do that prior to inputting the data to <code><a href="../reference/preprocess_data.html">preprocess_data()</a></code>.</p>
 <div class="section level4">
 <h4 id="remove-missing-outcome-variables">Remove missing outcome variables<a class="anchor" aria-label="anchor" href="#remove-missing-outcome-variables"></a>
 </h4>
@@ -569,8 +480,7 @@ <h4 id="remove-missing-outcome-variables">Remove missing outcome variables<a cla
 <span class="co">#&gt; character(0)</span></code></pre></div>
 </div>
 <div class="section level4">
-<h4 id="maintain-zero-variability-in-a-feature-if-it-already-has-no-variability">Maintain zero variability in a feature if it already has no
-variability<a class="anchor" aria-label="anchor" href="#maintain-zero-variability-in-a-feature-if-it-already-has-no-variability"></a>
+<h4 id="maintain-zero-variability-in-a-feature-if-it-already-has-no-variability">Maintain zero variability in a feature if it already has no variability<a class="anchor" aria-label="anchor" href="#maintain-zero-variability-in-a-feature-if-it-already-has-no-variability"></a>
 </h4>
 <div class="sourceCode" id="cb22"><pre class="downlit sourceCode r">
 <code class="sourceCode R"><span class="co"># raw dataset with missing value in non-variable feature</span>
@@ -602,9 +512,7 @@ <h4 id="maintain-zero-variability-in-a-feature-if-it-already-has-no-variability"
 <span class="co">#&gt; </span>
 <span class="co">#&gt; $removed_feats</span>
 <span class="co">#&gt; [1] "var2"</span></code></pre></div>
-<p>Here, the non-variable feature with missing data is removed because
-we removed features with near-zero variance. If we maintained that
-feature, it’d be all ones:</p>
+<p>Here, the non-variable feature with missing data is removed because we removed features with near-zero variance. If we maintained that feature, it’d be all ones:</p>
 <div class="sourceCode" id="cb24"><pre class="downlit sourceCode r">
 <code class="sourceCode R"><span class="co"># preprocess raw dataset with missing value in non-variable feature</span>
 <span class="fu"><a href="../reference/preprocess_data.html">preprocess_data</a></span><span class="op">(</span>dataset <span class="op">=</span> <span class="va">miss_nonvar_df</span>, outcome_colname <span class="op">=</span> <span class="st">"outcome"</span>, remove_var <span class="op">=</span> <span class="cn">NULL</span>, collapse_corr_feats <span class="op">=</span> <span class="cn">FALSE</span><span class="op">)</span>
@@ -657,12 +565,10 @@ <h4 id="replace-missing-binary-and-categorical-variables-with-zero">Replace miss
 <span class="co">#&gt; </span>
 <span class="co">#&gt; $removed_feats</span>
 <span class="co">#&gt; [1] "var2"</span></code></pre></div>
-<p>Here each binary variable is split into two, and the missing value is
-considered zero for both of them.</p>
+<p>Here each binary variable is split into two, and the missing value is considered zero for both of them.</p>
 </div>
 <div class="section level4">
-<h4 id="replace-missing-continuous-data-with-the-median-value-of-that-feature">Replace missing continuous data with the median value of that
-feature<a class="anchor" aria-label="anchor" href="#replace-missing-continuous-data-with-the-median-value-of-that-feature"></a>
+<h4 id="replace-missing-continuous-data-with-the-median-value-of-that-feature">Replace missing continuous data with the median value of that feature<a class="anchor" aria-label="anchor" href="#replace-missing-continuous-data-with-the-median-value-of-that-feature"></a>
 </h4>
 <div class="sourceCode" id="cb27"><pre class="downlit sourceCode r">
 <code class="sourceCode R"><span class="co"># raw dataset with missing value in continuous feature</span>
@@ -677,8 +583,7 @@ <h4 id="replace-missing-continuous-data-with-the-median-value-of-that-feature">R
 <span class="co">#&gt; 2  normal    2    2</span>
 <span class="co">#&gt; 3  cancer    2    3</span>
 <span class="co">#&gt; 4  normal   NA   NA</span></code></pre></div>
-<p>Here we’re not normalizing continuous features so it’s easier to see
-what’s going on (i.e. the median value is used):</p>
+<p>Here we’re not normalizing continuous features so it’s easier to see what’s going on (i.e. the median value is used):</p>
 <div class="sourceCode" id="cb28"><pre class="downlit sourceCode r">
 <code class="sourceCode R"><span class="co"># preprocess raw dataset with missing value in continuous feature</span>
 <span class="fu"><a href="../reference/preprocess_data.html">preprocess_data</a></span><span class="op">(</span>dataset <span class="op">=</span> <span class="va">miss_cont_df</span>, outcome_colname <span class="op">=</span> <span class="st">"outcome"</span>, method <span class="op">=</span> <span class="cn">NULL</span><span class="op">)</span>
@@ -703,8 +608,7 @@ <h4 id="replace-missing-continuous-data-with-the-median-value-of-that-feature">R
 <div class="section level3">
 <h3 id="putting-it-all-together">Putting it all together<a class="anchor" aria-label="anchor" href="#putting-it-all-together"></a>
 </h3>
-<p>Here’s some more complicated example raw data that puts everything we
-discussed together:</p>
+<p>Here’s some more complicated example raw data that puts everything we discussed together:</p>
 <div class="sourceCode" id="cb29"><pre class="downlit sourceCode r">
 <code class="sourceCode R"><span class="va">test_df</span> <span class="op">&lt;-</span> <span class="fu"><a href="https://rdrr.io/r/base/data.frame.html" class="external-link">data.frame</a></span><span class="op">(</span>
   outcome <span class="op">=</span> <span class="fu"><a href="https://rdrr.io/r/base/c.html" class="external-link">c</a></span><span class="op">(</span><span class="st">"normal"</span>, <span class="st">"normal"</span>, <span class="st">"cancer"</span>, <span class="cn">NA</span><span class="op">)</span>,
@@ -727,8 +631,7 @@ <h3 id="putting-it-all-together">Putting it all together<a class="anchor" aria-l
 <span class="co">#&gt; 2  normal    2    b  yes    1    0   no    1    6    x     0     1     2</span>
 <span class="co">#&gt; 3  cancer    3    c   no    0    0   no    0   NA    y    NA    NA     3</span>
 <span class="co">#&gt; 4    &lt;NA&gt;    4    d   no    0    0   no    0    7    z    NA    NA     4</span></code></pre></div>
-<p>Let’s throw this into the preprocessing function with the default
-values:</p>
+<p>Let’s throw this into the preprocessing function with the default values:</p>
 <div class="sourceCode" id="cb30"><pre class="downlit sourceCode r">
 <code class="sourceCode R"><span class="fu"><a href="../reference/preprocess_data.html">preprocess_data</a></span><span class="op">(</span>dataset <span class="op">=</span> <span class="va">test_df</span>, outcome_colname <span class="op">=</span> <span class="st">"outcome"</span><span class="op">)</span>
 <span class="co">#&gt; Using 'outcome' as the outcome column.</span>
@@ -765,37 +668,20 @@ <h3 id="putting-it-all-together">Putting it all together<a class="anchor" aria-l
 <span class="co">#&gt; [1] "var4"  "var5"  "var10" "var6"  "var11"</span></code></pre></div>
 <p>As you can see, we got several messages:</p>
 <ul>
-<li>One of the samples (row 4) was removed because the outcome value was
-missing.</li>
-<li>One of the variables in a feature with no variation had a missing
-value that was replaced with the the non-varying value
-(<code>var11</code>).</li>
-<li>Four categorical missing values were replaced with zero
-(<code>var9</code>). There are 4 missing rather than just 1 (like in the
-raw data) because we split the categorical variable into 4 different
-columns first.</li>
-<li>One missing continuous value was imputed using the median value of
-that feature (<code>var8</code>).</li>
+<li>One of the samples (row 4) was removed because the outcome value was missing.</li>
+<li>One of the variables in a feature with no variation had a missing value that was replaced with the the non-varying value (<code>var11</code>).</li>
+<li>Four categorical missing values were replaced with zero (<code>var9</code>). There are 4 missing rather than just 1 (like in the raw data) because we split the categorical variable into 4 different columns first.</li>
+<li>One missing continuous value was imputed using the median value of that feature (<code>var8</code>).</li>
 </ul>
-<p>Additionally, you can see that the continuous variables were
-normalized, the categorical variables were all changed to binary, and
-several features were grouped together. The variables in each group can
-be found in <code>grp_feats</code>.</p>
+<p>Additionally, you can see that the continuous variables were normalized, the categorical variables were all changed to binary, and several features were grouped together. The variables in each group can be found in <code>grp_feats</code>.</p>
 </div>
 <div class="section level3">
 <h3 id="next-step-train-and-evaluate-your-model">Next step: train and evaluate your model!<a class="anchor" aria-label="anchor" href="#next-step-train-and-evaluate-your-model"></a>
 </h3>
-<p>After you preprocess your data (either using
-<code><a href="../reference/preprocess_data.html">preprocess_data()</a></code> or by preprocessing the data on your
-own), you’re ready to train and evaluate machine learning models! Please
-see <code><a href="../reference/run_ml.html">run_ml()</a></code> information about training models.</p>
-<div id="refs" class="references csl-bib-body hanging-indent">
-<div id="ref-tang_democratizing_2020" class="csl-entry">
-Tang, Shengpu, Parmida Davarmanesh, Yanmeng Song, Danai Koutra, Michael
-W. Sjoding, and Jenna Wiens. 2020. <span>“Democratizing <span>EHR</span>
-Analyses with <span>FIDDLE</span>: A Flexible Data-Driven Preprocessing
-Pipeline for Structured Clinical Data.”</span> <em>J Am Med Inform
-Assoc</em>, October. <a href="https://doi.org/10.1093/jamia/ocaa139" class="external-link">https://doi.org/10.1093/jamia/ocaa139</a>.
+<p>After you preprocess your data (either using <code><a href="../reference/preprocess_data.html">preprocess_data()</a></code> or by preprocessing the data on your own), you’re ready to train and evaluate machine learning models! Please see <code><a href="../reference/run_ml.html">run_ml()</a></code> information about training models.</p>
+<div id="refs" class="references">
+<div id="ref-tang_democratizing_2020">
+<p>Tang, Shengpu, Parmida Davarmanesh, Yanmeng Song, Danai Koutra, Michael W. Sjoding, and Jenna Wiens. 2020. “Democratizing EHR Analyses with FIDDLE: A Flexible Data-Driven Preprocessing Pipeline for Structured Clinical Data.” <em>J Am Med Inform Assoc</em>, October. <a href="https://doi.org/10.1093/jamia/ocaa139" class="external-link">https://doi.org/10.1093/jamia/ocaa139</a>.</p>
 </div>
 </div>
 </div>
diff --git a/docs/articles/tuning.html b/docs/articles/tuning.html
index 1cc971d6..13d99588 100644
--- a/docs/articles/tuning.html
+++ b/docs/articles/tuning.html
@@ -100,12 +100,11 @@
 
       
 
-      </header><div class="row">
+      </header><script src="tuning_files/accessible-code-block-0.0.1/empty-anchor.js"></script><div class="row">
   <div class="col-md-9 contents">
     <div class="page-header toc-ignore">
       <h1 data-toc-skip>Hyperparameter tuning</h1>
-                        <h4 data-toc-skip class="author">Begüm D.
-Topçuoğlu</h4>
+                        <h4 data-toc-skip class="author">Begüm D. Topçuoğlu</h4>
             
       
       <small class="dont-index">Source: <a href="https://github.com/SchlossLab/mikropml/blob/HEAD/vignettes/tuning.Rmd" class="external-link"><code>vignettes/tuning.Rmd</code></a></small>
@@ -115,19 +114,8 @@ <h4 data-toc-skip class="author">Begüm D.
 
     
     
-<p>One particularly important aspect of machine learning (ML) is
-hyperparameter tuning. A hyperparameter is a parameter that is set
-before the ML training begins. These parameters are tunable and they
-effect how well the model trains. We must do a grid search for many
-hyperparameter possibilities and exhaust our search to pick the ideal
-value for the model and dataset. In this package, we do this during the
-cross-validation step.</p>
-<p>Let’s start with an example ML run. The input data to
-<code><a href="../reference/run_ml.html">run_ml()</a></code> is a dataframe where each row is a sample or
-observation. One column (assumed to be the first) is the outcome of
-interest, and all of the other columns are the features. We package
-<code>otu_mini_bin</code> as a small example dataset with
-<code>mikropml</code>.</p>
+<p>One particularly important aspect of machine learning (ML) is hyperparameter tuning. A hyperparameter is a parameter that is set before the ML training begins. These parameters are tunable and they effect how well the model trains. We must do a grid search for many hyperparameter possibilities and exhaust our search to pick the ideal value for the model and dataset. In this package, we do this during the cross-validation step.</p>
+<p>Let’s start with an example ML run. The input data to <code><a href="../reference/run_ml.html">run_ml()</a></code> is a dataframe where each row is a sample or observation. One column (assumed to be the first) is the outcome of interest, and all of the other columns are the features. We package <code>otu_mini_bin</code> as a small example dataset with <code>mikropml</code>.</p>
 <div class="sourceCode" id="cb1"><pre class="downlit sourceCode r">
 <code class="sourceCode R"><span class="co">#install.packages("devtools")</span>
 <span class="co">#devtools::install_github("SchlossLab/mikropml")</span>
@@ -147,9 +135,7 @@ <h4 data-toc-skip class="author">Begüm D.
 <span class="co">#&gt; 4       78      255      197</span>
 <span class="co">#&gt; 5        1      537      533</span>
 <span class="co">#&gt; 6      251      155      122</span></code></pre></div>
-<p>Before we train and evaluate a ML model, we can preprocess the data.
-You can learn more about this in the preprocessing vignette:
-<code><a href="../articles/preprocess.html">vignette("preprocess")</a></code>.</p>
+<p>Before we train and evaluate a ML model, we can preprocess the data. You can learn more about this in the preprocessing vignette: <code><a href="../articles/preprocess.html">vignette("preprocess")</a></code>.</p>
 <div class="sourceCode" id="cb2"><pre class="downlit sourceCode r">
 <code class="sourceCode R"><span class="va">preproc</span> <span class="op">&lt;-</span> <span class="fu"><a href="../reference/preprocess_data.html">preprocess_data</a></span><span class="op">(</span>dataset <span class="op">=</span> <span class="va">otu_mini_bin</span>,
                            outcome_colname <span class="op">=</span> <span class="st">'dx'</span><span class="op">)</span>
@@ -159,14 +145,9 @@ <h4 data-toc-skip class="author">Begüm D.
 <div class="section level2">
 <h2 id="the-simplest-way-to-run_ml">The simplest way to <code>run_ml()</code><a class="anchor" aria-label="anchor" href="#the-simplest-way-to-run_ml"></a>
 </h2>
-<p>As mentioned above, the minimal input is your dataset
-(<code>dataset</code>) and the machine learning model you want to use
-(<code>method</code>).</p>
-<p>When we <code><a href="../reference/run_ml.html">run_ml()</a></code>, by default we do a 100 times repeated,
-5-fold cross-validation, where we evaluate the hyperparameters in these
-500 total iterations.</p>
-<p>Say we want to run L2 regularized logistic regression. We do this
-with:</p>
+<p>As mentioned above, the minimal input is your dataset (<code>dataset</code>) and the machine learning model you want to use (<code>method</code>).</p>
+<p>When we <code><a href="../reference/run_ml.html">run_ml()</a></code>, by default we do a 100 times repeated, 5-fold cross-validation, where we evaluate the hyperparameters in these 500 total iterations.</p>
+<p>Say we want to run L2 regularized logistic regression. We do this with:</p>
 <div class="sourceCode" id="cb3"><pre class="downlit sourceCode r">
 <code class="sourceCode R"><span class="va">results</span> <span class="op">&lt;-</span> <span class="fu"><a href="../reference/run_ml.html">run_ml</a></span><span class="op">(</span><span class="va">dat</span>,
                   <span class="st">'glmnet'</span>,
@@ -183,12 +164,8 @@ <h2 id="the-simplest-way-to-run_ml">The simplest way to <code>run_ml()</code><a
 <span class="co">#&gt; </span>
 <span class="co">#&gt;     compare_models</span>
 <span class="co">#&gt; Training complete.</span></code></pre></div>
-<p>You’ll probably get a warning when you run this because the dataset
-is very small. If you want to learn more about that, check out the
-introductory vignette about training and evaluating a ML model:
-<code><a href="../articles/introduction.html">vignette("introduction")</a></code>.</p>
-<p>By default, <code><a href="../reference/run_ml.html">run_ml()</a></code> selects hyperparameters depending
-on the dataset and method used.</p>
+<p>You’ll probably get a warning when you run this because the dataset is very small. If you want to learn more about that, check out the introductory vignette about training and evaluating a ML model: <code><a href="../articles/introduction.html">vignette("introduction")</a></code>.</p>
+<p>By default, <code><a href="../reference/run_ml.html">run_ml()</a></code> selects hyperparameters depending on the dataset and method used.</p>
 <div class="sourceCode" id="cb4"><pre class="downlit sourceCode r">
 <code class="sourceCode R"><span class="va">results</span><span class="op">$</span><span class="va">trained_model</span>
 <span class="co">#&gt; glmnet </span>
@@ -227,19 +204,8 @@ <h2 id="the-simplest-way-to-run_ml">The simplest way to <code>run_ml()</code><a
 <span class="co">#&gt; Tuning parameter 'alpha' was held constant at a value of 0</span>
 <span class="co">#&gt; AUC was used to select the optimal model using the largest value.</span>
 <span class="co">#&gt; The final values used for the model were alpha = 0 and lambda = 1.</span></code></pre></div>
-<p>As you can see, the <code>alpha</code> hyperparameter is set to 0,
-which specifies L2 regularization. <code>glmnet</code> gives us the
-option to run both L1 and L2 regularization. If we change
-<code>alpha</code> to 1, we would run L1-regularized logistic
-regression. You can also tune <code>alpha</code> by specifying a variety
-of values between 0 and 1. When you use a value that is between 0 and 1,
-you are running elastic net. The default hyperparameter
-<code>lambda</code> which adjusts the L2 regularization penalty is a
-range of values between 10^-4 to 10.</p>
-<p>When we look at the 100 repeated cross-validation performance metrics
-such as <code>AUC</code>, <code>Accuracy</code>, <code>prAUC</code> for
-each tested <code>lambda</code> value, we see that some are not
-appropriate for this dataset and some do better than others.</p>
+<p>As you can see, the <code>alpha</code> hyperparameter is set to 0, which specifies L2 regularization. <code>glmnet</code> gives us the option to run both L1 and L2 regularization. If we change <code>alpha</code> to 1, we would run L1-regularized logistic regression. You can also tune <code>alpha</code> by specifying a variety of values between 0 and 1. When you use a value that is between 0 and 1, you are running elastic net. The default hyperparameter <code>lambda</code> which adjusts the L2 regularization penalty is a range of values between 10^-4 to 10.</p>
+<p>When we look at the 100 repeated cross-validation performance metrics such as <code>AUC</code>, <code>Accuracy</code>, <code>prAUC</code> for each tested <code>lambda</code> value, we see that some are not appropriate for this dataset and some do better than others.</p>
 <div class="sourceCode" id="cb5"><pre class="downlit sourceCode r">
 <code class="sourceCode R"><span class="va">results</span><span class="op">$</span><span class="va">trained_model</span><span class="op">$</span><span class="va">results</span>
 <span class="co">#&gt;   alpha lambda   logLoss       AUC     prAUC  Accuracy      Kappa        F1</span>
@@ -281,10 +247,7 @@ <h2 id="the-simplest-way-to-run_ml">The simplest way to <code>run_ml()</code><a
 <div class="section level2">
 <h2 id="customizing-hyperparameters">Customizing hyperparameters<a class="anchor" aria-label="anchor" href="#customizing-hyperparameters"></a>
 </h2>
-<p>In this example, we want to change the <code>lambda</code> values to
-provide a better range to test in the cross-validation step. We don’t
-want to use the defaults but provide our own named list with new
-values.</p>
+<p>In this example, we want to change the <code>lambda</code> values to provide a better range to test in the cross-validation step. We don’t want to use the defaults but provide our own named list with new values.</p>
 <p>For example:</p>
 <div class="sourceCode" id="cb6"><pre class="downlit sourceCode r">
 <code class="sourceCode R"><span class="va">new_hp</span> <span class="op">&lt;-</span> <span class="fu"><a href="https://rdrr.io/r/base/list.html" class="external-link">list</a></span><span class="op">(</span>alpha <span class="op">=</span> <span class="fl">1</span>, 
@@ -296,8 +259,7 @@ <h2 id="customizing-hyperparameters">Customizing hyperparameters<a class="anchor
 <span class="co">#&gt; $lambda</span>
 <span class="co">#&gt;  [1] 0.00001 0.00010 0.00100 0.01000 0.01500 0.02000 0.02500 0.03000 0.04000</span>
 <span class="co">#&gt; [10] 0.05000 0.06000 0.10000</span></code></pre></div>
-<p>Now let’s run L2 logistic regression with the new <code>lambda</code>
-values:</p>
+<p>Now let’s run L2 logistic regression with the new <code>lambda</code> values:</p>
 <div class="sourceCode" id="cb7"><pre class="downlit sourceCode r">
 <code class="sourceCode R"><span class="va">results</span> <span class="op">&lt;-</span> <span class="fu"><a href="../reference/run_ml.html">run_ml</a></span><span class="op">(</span><span class="va">dat</span>,
                   <span class="st">'glmnet'</span>,
@@ -364,20 +326,9 @@ <h2 id="customizing-hyperparameters">Customizing hyperparameters<a class="anchor
 <span class="co">#&gt; Tuning parameter 'alpha' was held constant at a value of 1</span>
 <span class="co">#&gt; AUC was used to select the optimal model using the largest value.</span>
 <span class="co">#&gt; The final values used for the model were alpha = 1 and lambda = 0.02.</span></code></pre></div>
-<p>This time, we cover a larger and different range of
-<code>lambda</code> settings in cross-validation.</p>
-<p>How do we know which <code>lambda</code> value is the best one? To
-answer that, we need to run the ML pipeline on multiple data splits and
-look at the mean cross-validation performance of each
-<code>lambda</code> across those modeling experiments. We describe how
-to run the pipeline with multiple data splits in
-<code><a href="../articles/parallel.html">vignette("parallel")</a></code>.</p>
-<p>Here we train the model with the new <code>lambda</code> range we
-defined above. We run it 3 times each with a different seed, which will
-result in different splits of the data into training and testing sets.
-We can then use <code>plot_hp_performance</code> to see which
-<code>lambda</code> gives us the largest mean AUC value across modeling
-experiments.</p>
+<p>This time, we cover a larger and different range of <code>lambda</code> settings in cross-validation.</p>
+<p>How do we know which <code>lambda</code> value is the best one? To answer that, we need to run the ML pipeline on multiple data splits and look at the mean cross-validation performance of each <code>lambda</code> across those modeling experiments. We describe how to run the pipeline with multiple data splits in <code><a href="../articles/parallel.html">vignette("parallel")</a></code>.</p>
+<p>Here we train the model with the new <code>lambda</code> range we defined above. We run it 3 times each with a different seed, which will result in different splits of the data into training and testing sets. We can then use <code>plot_hp_performance</code> to see which <code>lambda</code> gives us the largest mean AUC value across modeling experiments.</p>
 <div class="sourceCode" id="cb8"><pre class="downlit sourceCode r">
 <code class="sourceCode R"><span class="va">results</span> <span class="op">&lt;-</span> <span class="fu"><a href="https://rdrr.io/r/base/lapply.html" class="external-link">lapply</a></span><span class="op">(</span><span class="fu"><a href="https://rdrr.io/r/base/seq.html" class="external-link">seq</a></span><span class="op">(</span><span class="fl">100</span>, <span class="fl">102</span><span class="op">)</span>, <span class="kw">function</span><span class="op">(</span><span class="va">seed</span><span class="op">)</span> <span class="op">{</span>
    <span class="fu"><a href="../reference/run_ml.html">run_ml</a></span><span class="op">(</span><span class="va">dat</span>, <span class="st">"glmnet"</span>, seed <span class="op">=</span> <span class="va">seed</span>, hyperparameters <span class="op">=</span> <span class="va">new_hp</span><span class="op">)</span>
@@ -395,24 +346,12 @@ <h2 id="customizing-hyperparameters">Customizing hyperparameters<a class="anchor
 <span class="va">hp_metrics</span> <span class="op">&lt;-</span> <span class="fu"><a href="../reference/combine_hp_performance.html">combine_hp_performance</a></span><span class="op">(</span><span class="va">models</span><span class="op">)</span>
 <span class="fu"><a href="../reference/plot_hp_performance.html">plot_hp_performance</a></span><span class="op">(</span><span class="va">hp_metrics</span><span class="op">$</span><span class="va">dat</span>, <span class="va">lambda</span>, <span class="va">AUC</span><span class="op">)</span></code></pre></div>
 <p><img src="tuning_files/figure-html/unnamed-chunk-9-1.png" width="700"></p>
-<p>As you can see, we get a mean maxima at <code>0.03</code> which is
-the best <code>lambda</code> value for this dataset when we run 3 data
-splits. The fact that we are seeing this maxima in the middle of our
-range and not at the edges, shows that we are providing a large enough
-range to exhaust our <code>lambda</code> search as we build the model.
-We recommend the user to use this plot to make sure the best
-hyperparameter is not on the edges of the provided list. For a better
-understanding of the global maxima, it would be better to run more data
-splits by using more seeds. We picked 3 seeds to keep the runtime down
-for this vignette, but for real-world data we recommend using many more
-seeds.</p>
+<p>As you can see, we get a mean maxima at <code>0.03</code> which is the best <code>lambda</code> value for this dataset when we run 3 data splits. The fact that we are seeing this maxima in the middle of our range and not at the edges, shows that we are providing a large enough range to exhaust our <code>lambda</code> search as we build the model. We recommend the user to use this plot to make sure the best hyperparameter is not on the edges of the provided list. For a better understanding of the global maxima, it would be better to run more data splits by using more seeds. We picked 3 seeds to keep the runtime down for this vignette, but for real-world data we recommend using many more seeds.</p>
 </div>
 <div class="section level2">
 <h2 id="hyperparameter-options">Hyperparameter options<a class="anchor" aria-label="anchor" href="#hyperparameter-options"></a>
 </h2>
-<p>You can see which default hyperparameters would be used for your
-dataset with <code><a href="../reference/get_hyperparams_list.html">get_hyperparams_list()</a></code>. Here are a few
-examples with built-in datasets we provide:</p>
+<p>You can see which default hyperparameters would be used for your dataset with <code><a href="../reference/get_hyperparams_list.html">get_hyperparams_list()</a></code>. Here are a few examples with built-in datasets we provide:</p>
 <div class="sourceCode" id="cb9"><pre class="downlit sourceCode r">
 <code class="sourceCode R"><span class="fu"><a href="../reference/get_hyperparams_list.html">get_hyperparams_list</a></span><span class="op">(</span><span class="va">otu_mini_bin</span>, <span class="st">'glmnet'</span><span class="op">)</span>
 <span class="co">#&gt; $lambda</span>
@@ -426,22 +365,13 @@ <h2 id="hyperparameter-options">Hyperparameter options<a class="anchor" aria-lab
 <span class="fu"><a href="../reference/get_hyperparams_list.html">get_hyperparams_list</a></span><span class="op">(</span><span class="va">otu_small</span>, <span class="st">'rf'</span><span class="op">)</span>
 <span class="co">#&gt; $mtry</span>
 <span class="co">#&gt; [1]  4  8 16</span></code></pre></div>
-<p>Here are the hyperparameters that are tuned for each of the modeling
-methods. The output for all of them is very similar, so we won’t go into
-those details.</p>
+<p>Here are the hyperparameters that are tuned for each of the modeling methods. The output for all of them is very similar, so we won’t go into those details.</p>
 <div class="section level3">
 <h3 id="regression">Regression<a class="anchor" aria-label="anchor" href="#regression"></a>
 </h3>
-<p>As mentioned above, <code>glmnet</code> uses the <code>alpha</code>
-parameter and <code>lambda</code> hyperparameter. <code>alpha</code> of
-<code>0</code> is for L2 regularization (ridge). <code>alpha</code> of
-<code>1</code> is for L1 regularization (lasso). <code>alpha</code> in
-between is elastic net. You can also tune <code>alpha</code> like you
-would any other hyperparameter.</p>
-<p>Please refer to original <code>glmnet</code> documentation for more
-information: <a href="https://web.stanford.edu/~hastie/glmnet/glmnet_alpha.html" class="external-link uri">https://web.stanford.edu/~hastie/glmnet/glmnet_alpha.html</a></p>
-<p>The default hyperparameters chosen by <code><a href="../reference/run_ml.html">run_ml()</a></code> are fixed
-for <code>glmnet</code>.</p>
+<p>As mentioned above, <code>glmnet</code> uses the <code>alpha</code> parameter and <code>lambda</code> hyperparameter. <code>alpha</code> of <code>0</code> is for L2 regularization (ridge). <code>alpha</code> of <code>1</code> is for L1 regularization (lasso). <code>alpha</code> in between is elastic net. You can also tune <code>alpha</code> like you would any other hyperparameter.</p>
+<p>Please refer to original <code>glmnet</code> documentation for more information: <a href="https://web.stanford.edu/~hastie/glmnet/glmnet_alpha.html" class="external-link uri">https://web.stanford.edu/~hastie/glmnet/glmnet_alpha.html</a></p>
+<p>The default hyperparameters chosen by <code><a href="../reference/run_ml.html">run_ml()</a></code> are fixed for <code>glmnet</code>.</p>
 <pre><code><span class="co">#&gt; $lambda</span>
 <span class="co">#&gt; [1] 1e-04 1e-03 1e-02 1e-01 1e+00 1e+01</span>
 <span class="co">#&gt; </span>
@@ -451,31 +381,18 @@ <h3 id="regression">Regression<a class="anchor" aria-label="anchor" href="#regre
 <div class="section level3">
 <h3 id="random-forest">Random forest<a class="anchor" aria-label="anchor" href="#random-forest"></a>
 </h3>
-<p>When we run <code>rf</code> we are using the the
-<code>randomForest</code> package implementation. We are tuning the
-<code>mtry</code> hyperparameter. This is the number of features that
-are randomly collected to be sampled at each tree node. This number
-needs to be less than the number of features in the dataset. Please
-refer to the original documentation for more information: <a href="https://cran.r-project.org/web/packages/randomForest/randomForest.pdf" class="external-link uri">https://cran.r-project.org/web/packages/randomForest/randomForest.pdf</a></p>
-<p>By default, we take the square root of number of features in the
-dataset and we provide a range that is
-<code>[sqrt_features / 2, sqrt_features, sqrt_features * 2]</code>.</p>
+<p>When we run <code>rf</code> we are using the the <code>randomForest</code> package implementation. We are tuning the <code>mtry</code> hyperparameter. This is the number of features that are randomly collected to be sampled at each tree node. This number needs to be less than the number of features in the dataset. Please refer to the original documentation for more information: <a href="https://cran.r-project.org/web/packages/randomForest/randomForest.pdf" class="external-link uri">https://cran.r-project.org/web/packages/randomForest/randomForest.pdf</a></p>
+<p>By default, we take the square root of number of features in the dataset and we provide a range that is <code>[sqrt_features / 2, sqrt_features, sqrt_features * 2]</code>.</p>
 <p>For example if the number of features is 1000:</p>
 <pre><code><span class="co">#&gt; $mtry</span>
 <span class="co">#&gt; [1] 16 32 64</span></code></pre>
-<p>Similar to <code>glmnet</code> method, we can provide our own
-<code>mtry</code> range.</p>
+<p>Similar to <code>glmnet</code> method, we can provide our own <code>mtry</code> range.</p>
 </div>
 <div class="section level3">
 <h3 id="decision-tree">Decision tree<a class="anchor" aria-label="anchor" href="#decision-tree"></a>
 </h3>
-<p>When we run <code>rpart2</code>, we are running the
-<code>rpart</code> package implementation of decision tree. We are
-tuning the <code>maxdepth</code> hyperparameter. This is the maximum
-depth of any node of the final tree. Please refer to the original
-documentation for more information on maxdepth: <a href="https://cran.r-project.org/web/packages/rpart/rpart.pdf" class="external-link uri">https://cran.r-project.org/web/packages/rpart/rpart.pdf</a></p>
-<p>By default, we provide a range that is less than the number of
-features in the dataset.</p>
+<p>When we run <code>rpart2</code>, we are running the <code>rpart</code> package implementation of decision tree. We are tuning the <code>maxdepth</code> hyperparameter. This is the maximum depth of any node of the final tree. Please refer to the original documentation for more information on maxdepth: <a href="https://cran.r-project.org/web/packages/rpart/rpart.pdf" class="external-link uri">https://cran.r-project.org/web/packages/rpart/rpart.pdf</a></p>
+<p>By default, we provide a range that is less than the number of features in the dataset.</p>
 <p>For example if we have 1000 features:</p>
 <pre><code><span class="co">#&gt; $maxdepth</span>
 <span class="co">#&gt; [1]  1  2  4  8 16 30</span></code></pre>
@@ -486,14 +403,8 @@ <h3 id="decision-tree">Decision tree<a class="anchor" aria-label="anchor" href="
 <div class="section level3">
 <h3 id="svm-with-radial-basis-kernel">SVM with radial basis kernel<a class="anchor" aria-label="anchor" href="#svm-with-radial-basis-kernel"></a>
 </h3>
-<p>When we run the <code>svmRadial</code> method, we are tuning the
-<code>C</code> and <code>sigma</code> hyperparameters.
-<code>sigma</code> defines how far the influence of a single training
-example reaches and <code>C</code> behaves as a regularization
-parameter. Please refer to this great <code>sklearn</code> resource for
-more information on these hyperparameters: <a href="https://scikit-learn.org/stable/auto_examples/svm/plot_rbf_parameters.html" class="external-link uri">https://scikit-learn.org/stable/auto_examples/svm/plot_rbf_parameters.html</a></p>
-<p>By default, we provide 2 separate range of values for the two
-hyperparameters.</p>
+<p>When we run the <code>svmRadial</code> method, we are tuning the <code>C</code> and <code>sigma</code> hyperparameters. <code>sigma</code> defines how far the influence of a single training example reaches and <code>C</code> behaves as a regularization parameter. Please refer to this great <code>sklearn</code> resource for more information on these hyperparameters: <a href="https://scikit-learn.org/stable/auto_examples/svm/plot_rbf_parameters.html" class="external-link uri">https://scikit-learn.org/stable/auto_examples/svm/plot_rbf_parameters.html</a></p>
+<p>By default, we provide 2 separate range of values for the two hyperparameters.</p>
 <pre><code><span class="co">#&gt; $C</span>
 <span class="co">#&gt; [1] 1e-03 1e-02 1e-01 1e+00 1e+01 1e+02</span>
 <span class="co">#&gt; </span>
@@ -503,18 +414,9 @@ <h3 id="svm-with-radial-basis-kernel">SVM with radial basis kernel<a class="anch
 <div class="section level3">
 <h3 id="xgboost">XGBoost<a class="anchor" aria-label="anchor" href="#xgboost"></a>
 </h3>
-<p>When we run the <code>xgbTree</code> method, we are tuning the
-<code>nrounds</code>, <code>gamma</code>, <code>eta</code>
-<code>max_depth</code>, <code>colsample_bytree</code>,
-<code>min_child_weight</code> and <code>subsample</code>
-hyperparameters.</p>
+<p>When we run the <code>xgbTree</code> method, we are tuning the <code>nrounds</code>, <code>gamma</code>, <code>eta</code> <code>max_depth</code>, <code>colsample_bytree</code>, <code>min_child_weight</code> and <code>subsample</code> hyperparameters.</p>
 <p>You can read more about these hyperparameters here: <a href="https://xgboost.readthedocs.io/en/latest/parameter.html" class="external-link uri">https://xgboost.readthedocs.io/en/latest/parameter.html</a></p>
-<p>By default, we set the <code>nrounds</code>, <code>gamma</code>,
-<code>colsample_bytree</code> and <code>min_child_weight</code> to fixed
-values and we provide a range of values for <code>eta</code>,
-<code>max_depth</code> and <code>subsample</code>. All of these can be
-changed and optimized by the user by supplying a custom named list of
-hyperparameters to <code><a href="../reference/run_ml.html">run_ml()</a></code>.</p>
+<p>By default, we set the <code>nrounds</code>, <code>gamma</code>, <code>colsample_bytree</code> and <code>min_child_weight</code> to fixed values and we provide a range of values for <code>eta</code>, <code>max_depth</code> and <code>subsample</code>. All of these can be changed and optimized by the user by supplying a custom named list of hyperparameters to <code><a href="../reference/run_ml.html">run_ml()</a></code>.</p>
 <pre><code><span class="co">#&gt; $nrounds</span>
 <span class="co">#&gt; [1] 100</span>
 <span class="co">#&gt; </span>
diff --git a/docs/index.html b/docs/index.html
index a72dc7ad..2ba4a5ac 100644
--- a/docs/index.html
+++ b/docs/index.html
@@ -113,12 +113,9 @@
 <div class="page-header"><h1 id="mikropml-">mikropml <a href="http://www.schlosslab.org/mikropml/"><img src="reference/figures/logo.png" align="right" height="120"></a>
 <a class="anchor" aria-label="anchor" href="#mikropml-"></a>
 </h1></div>
-<blockquote>
-<p>meek-ROPE em el</p>
-</blockquote>
+<blockquote><p>meek-ROPE em el</p></blockquote>
 <p>User-Friendly R Package for Supervised Machine Learning Pipelines</p>
 <!-- badges: start -->
-
 <p>An interface to build machine learning models for classification and regression problems. <code>mikropml</code> implements the ML pipeline described by <a href="https://doi.org/doi:10.1128/mBio.00434-20" class="external-link">Topçuoğlu <em>et al.</em> (2020)</a> with reasonable default options for data preprocessing, hyperparameter tuning, cross-validation, testing, model evaluation, and interpretation steps. See the <a href="http://www.schlosslab.org/mikropml/">website</a> for more information, documentation, and examples.</p>
 <div class="section level2">
 <h2 id="installation">Installation<a class="anchor" aria-label="anchor" href="#installation"></a>
@@ -131,7 +128,7 @@ <h2 id="installation">Installation<a class="anchor" aria-label="anchor" href="#i
 <code class="sourceCode R"><span class="co"># install.packages("devtools")</span>
 <span class="fu">devtools</span><span class="fu">::</span><span class="fu"><a href="https://devtools.r-lib.org/reference/remote-reexports.html" class="external-link">install_github</a></span><span class="op">(</span><span class="st">"SchlossLab/mikropml"</span><span class="op">)</span></code></pre></div>
 <p>or install from a terminal using <a href="https://docs.conda.io/projects/conda/en/latest/index.html" class="external-link">conda</a>:</p>
-<div class="sourceCode" id="cb3"><pre class="sourceCode bash"><code class="sourceCode bash"><span id="cb3-1"><a href="#cb3-1" aria-hidden="true" tabindex="-1"></a><span class="ex">conda</span> install <span class="at">-c</span> conda-forge r-mikropml</span></code></pre></div>
+<div class="sourceCode" id="cb3"><pre class="sourceCode bash"><code class="sourceCode bash"><span id="cb3-1"><a href="#cb3-1"></a><span class="ex">conda</span> install -c conda-forge r-mikropml</span></code></pre></div>
 <div class="section level3">
 <h3 id="dependencies">Dependencies<a class="anchor" aria-label="anchor" href="#dependencies"></a>
 </h3>
diff --git a/docs/news/index.html b/docs/news/index.html
index d17718fd..28e93f1e 100644
--- a/docs/news/index.html
+++ b/docs/news/index.html
@@ -86,34 +86,24 @@ <h2 class="page-header" data-toc-text="1.2.2" id="mikropml-122">mikropml 1.2.2<s
 </div>
     <div class="section level2">
 <h2 class="page-header" data-toc-text="1.2.1" id="mikropml-121">mikropml 1.2.1<small>2022-01-30</small><a class="anchor" aria-label="anchor" href="#mikropml-121"></a></h2>
-<ul><li>Allow <code>kfold &gt;= length(groups)</code> (<a href="https://github.com/SchlossLab/mikropml/issues/285" class="external-link">#285</a>, <a href="https://github.com/kelly-sovacool" class="external-link">@kelly-sovacool</a>).
-<ul><li>When using the groups parameter, groups are kept together in cross-validation partitions when <code>kfold</code> &lt;= the number of groups in the training set. Previously, an error was thrown if this condition was not met. Now, if there are not enough groups in the training set for groups to be kept together during CV, groups are allowed to be split up across CV partitions.</li>
-</ul></li>
+<ul><li>Allow <code>kfold &gt;= length(groups)</code> (<a href="https://github.com/SchlossLab/mikropml/issues/285" class="external-link">#285</a>, <a href="https://github.com/kelly-sovacool" class="external-link">@kelly-sovacool</a>).<ul><li>When using the groups parameter, groups are kept together in cross-validation partitions when <code>kfold</code> &lt;= the number of groups in the training set. Previously, an error was thrown if this condition was not met. Now, if there are not enough groups in the training set for groups to be kept together during CV, groups are allowed to be split up across CV partitions.</li></ul></li>
 <li>Report p-values for permutation feature importance (<a href="https://github.com/SchlossLab/mikropml/issues/288" class="external-link">#288</a>, <a href="https://github.com/kelly-sovacool" class="external-link">@kelly-sovacool</a>).</li>
 </ul></div>
     <div class="section level2">
 <h2 class="page-header" data-toc-text="1.2.0" id="mikropml-120">mikropml 1.2.0<small>2021-11-10</small><a class="anchor" aria-label="anchor" href="#mikropml-120"></a></h2>
-<ul><li>New parameter <code>cross_val</code> added to <code><a href="../reference/run_ml.html">run_ml()</a></code> allows users to define their own custom cross-validation scheme (<a href="https://github.com/SchlossLab/mikropml/issues/278" class="external-link">#278</a>, <a href="https://github.com/kelly-sovacool" class="external-link">@kelly-sovacool</a>).
-<ul><li>Also added a new parameter <code>calculate_performance</code>, which controls whether performance metrics are calculated (default: <code>TRUE</code>). Users may wish to skip performance calculations when training models with no cross-validation.</li>
-</ul></li>
+<ul><li>New parameter <code>cross_val</code> added to <code><a href="../reference/run_ml.html">run_ml()</a></code> allows users to define their own custom cross-validation scheme (<a href="https://github.com/SchlossLab/mikropml/issues/278" class="external-link">#278</a>, <a href="https://github.com/kelly-sovacool" class="external-link">@kelly-sovacool</a>).<ul><li>Also added a new parameter <code>calculate_performance</code>, which controls whether performance metrics are calculated (default: <code>TRUE</code>). Users may wish to skip performance calculations when training models with no cross-validation.</li></ul></li>
 <li>New parameter <code>group_partitions</code> added to <code><a href="../reference/run_ml.html">run_ml()</a></code> allows users to control which groups should go to which partition of the train/test split (<a href="https://github.com/SchlossLab/mikropml/issues/281" class="external-link">#281</a>, <a href="https://github.com/kelly-sovacool" class="external-link">@kelly-sovacool</a>).</li>
-<li>Modified the <code>training_frac</code> parameter in <code><a href="../reference/run_ml.html">run_ml()</a></code> (<a href="https://github.com/SchlossLab/mikropml/issues/281" class="external-link">#281</a>, <a href="https://github.com/kelly-sovacool" class="external-link">@kelly-sovacool</a>).
-<ul><li>By default, <code>training_frac</code> is a fraction between 0 and 1 that specifies how much of the dataset should be used in the training fraction of the train/test split.</li>
+<li>Modified the <code>training_frac</code> parameter in <code><a href="../reference/run_ml.html">run_ml()</a></code> (<a href="https://github.com/SchlossLab/mikropml/issues/281" class="external-link">#281</a>, <a href="https://github.com/kelly-sovacool" class="external-link">@kelly-sovacool</a>).<ul><li>By default, <code>training_frac</code> is a fraction between 0 and 1 that specifies how much of the dataset should be used in the training fraction of the train/test split.</li>
 <li>Users can instead give <code>training_frac</code> a vector of indices that correspond to which rows of the dataset should go in the training fraction of the train/test split. This gives users direct control over exactly which observations are in the training fraction if desired.</li>
 </ul></li>
 </ul></div>
     <div class="section level2">
 <h2 class="page-header" data-toc-text="1.1.1" id="mikropml-111">mikropml 1.1.1<small>2021-09-14</small><a class="anchor" aria-label="anchor" href="#mikropml-111"></a></h2>
-<ul><li>Fixed bugs related to grouping correlated features (<a href="https://github.com/SchlossLab/mikropml/issues/276" class="external-link">#276</a>, <a href="https://github.com/kelly-sovacool" class="external-link">@kelly-sovacool</a>).
-<ul><li>Also, <code><a href="../reference/group_correlated_features.html">group_correlated_features()</a></code> is now a user-facing function.</li>
-</ul></li>
-</ul></div>
+<ul><li>Fixed bugs related to grouping correlated features (<a href="https://github.com/SchlossLab/mikropml/issues/276" class="external-link">#276</a>, <a href="https://github.com/kelly-sovacool" class="external-link">@kelly-sovacool</a>).<ul><li>Also, <code><a href="../reference/group_correlated_features.html">group_correlated_features()</a></code> is now a user-facing function.</li></ul></li></ul></div>
     <div class="section level2">
 <h2 class="page-header" data-toc-text="1.1.0" id="mikropml-110">mikropml 1.1.0<small>2021-08-10</small><a class="anchor" aria-label="anchor" href="#mikropml-110"></a></h2>
-<ul><li>New correlation method option for feature importance (<a href="https://github.com/SchlossLab/mikropml/issues/267" class="external-link">#267</a>, <a href="https://github.com/courtneyarmour" class="external-link">@courtneyarmour</a>).
-<ul><li>The default is still “spearman”, and now you can use other methods supported by <code><a href="https://rdrr.io/r/stats/cor.html" class="external-link">stats::cor</a></code> with the <code>corr_method</code> parameter: <code>get_feature_importance(corr_method = "pearson")</code>
-</li>
-</ul></li>
+<ul><li>New correlation method option for feature importance (<a href="https://github.com/SchlossLab/mikropml/issues/267" class="external-link">#267</a>, <a href="https://github.com/courtneyarmour" class="external-link">@courtneyarmour</a>).<ul><li>The default is still “spearman”, and now you can use other methods supported by <code><a href="https://rdrr.io/r/stats/cor.html" class="external-link">stats::cor</a></code> with the <code>corr_method</code> parameter: <code>get_feature_importance(corr_method = "pearson")</code>
+</li></ul></li>
 <li>There are now <a href="https://www.youtube.com/playlist?list=PLmNrK_nkqBpKpzb9-vI4V7SdXC-jXEcmg" class="external-link">video tutorials</a> covering mikropml and other skills related to machine learning, created by <a href="https://github.com/pschloss" class="external-link">@pschloss</a> (<a href="https://github.com/SchlossLab/mikropml/issues/270" class="external-link">#270</a>).</li>
 <li>Fixed a bug where <code><a href="../reference/preprocess_data.html">preprocess_data()</a></code> converted the outcome column to a character vector (<a href="https://github.com/SchlossLab/mikropml/issues/273" class="external-link">#273</a>, <a href="https://github.com/kelly-sovacool" class="external-link">@kelly-sovacool</a>, <a href="https://github.com/ecmaggioncalda" class="external-link">@ecmaggioncalda</a>).</li>
 </ul></div>
@@ -121,12 +111,10 @@ <h2 class="page-header" data-toc-text="1.1.0" id="mikropml-110">mikropml 1.1.0<s
 <h2 class="page-header" data-toc-text="1.0.0" id="mikropml-100">mikropml 1.0.0<small>2021-05-13</small><a class="anchor" aria-label="anchor" href="#mikropml-100"></a></h2>
 <ul><li>mikropml now has a logo created by <a href="https://github.com/NLesniak" class="external-link">@NLesniak</a>!</li>
 <li>Made documentation improvements (<a href="https://github.com/SchlossLab/mikropml/issues/238" class="external-link">#238</a>, <a href="https://github.com/SchlossLab/mikropml/issues/231" class="external-link">#231</a> <a href="https://github.com/kelly-sovacool" class="external-link">@kelly-sovacool</a>; <a href="https://github.com/SchlossLab/mikropml/issues/256" class="external-link">#256</a> <a href="https://github.com/BTopcuoglu" class="external-link">@BTopcuoglu</a>).</li>
-<li>New option in <code><a href="../reference/preprocess_data.html">preprocess_data()</a></code>: <code>prefilter_threshold</code> (<a href="https://github.com/SchlossLab/mikropml/issues/240" class="external-link">#240</a>, <a href="https://github.com/kelly-sovacool" class="external-link">@kelly-sovacool</a>, <a href="https://github.com/courtneyarmour" class="external-link">@courtneyarmour</a>).
-<ul><li>Remove any features that appear in N=<code>prefilter_threshold</code> or fewer rows in the data.</li>
+<li>New option in <code><a href="../reference/preprocess_data.html">preprocess_data()</a></code>: <code>prefilter_threshold</code> (<a href="https://github.com/SchlossLab/mikropml/issues/240" class="external-link">#240</a>, <a href="https://github.com/kelly-sovacool" class="external-link">@kelly-sovacool</a>, <a href="https://github.com/courtneyarmour" class="external-link">@courtneyarmour</a>).<ul><li>Remove any features that appear in N=<code>prefilter_threshold</code> or fewer rows in the data.</li>
 <li>Created function <code><a href="../reference/remove_singleton_columns.html">remove_singleton_columns()</a></code> called by <code><a href="../reference/preprocess_data.html">preprocess_data()</a></code> to carry this out.</li>
 </ul></li>
-<li>New option in <code><a href="../reference/get_feature_importance.html">get_feature_importance()</a></code>: <code>groups</code> (<a href="https://github.com/SchlossLab/mikropml/issues/246" class="external-link">#246</a>, <a href="https://github.com/kelly-sovacool" class="external-link">@kelly-sovacool</a>).
-<ul><li>Provide custom groups of features to permute together during permutation importance.</li>
+<li>New option in <code><a href="../reference/get_feature_importance.html">get_feature_importance()</a></code>: <code>groups</code> (<a href="https://github.com/SchlossLab/mikropml/issues/246" class="external-link">#246</a>, <a href="https://github.com/kelly-sovacool" class="external-link">@kelly-sovacool</a>).<ul><li>Provide custom groups of features to permute together during permutation importance.</li>
 <li>
 <code>groups</code> is <code>NULL</code> by default; in this case, correlated features above <code>corr_thresh</code> are grouped together.</li>
 </ul></li>
@@ -147,14 +135,12 @@ <h2 class="page-header" data-toc-text="0.0.2" id="mikropml-002">mikropml 0.0.2<s
 <h2 class="page-header" data-toc-text="0.0.1" id="mikropml-001">mikropml 0.0.1<small>2020-11-23</small><a class="anchor" aria-label="anchor" href="#mikropml-001"></a></h2>
 <p>This is the first release version of mikropml! 🎉</p>
 <ul><li>Added a <code>NEWS.md</code> file to track changes to the package.</li>
-<li>Major new functions:
-<ul><li><code><a href="../reference/run_ml.html">run_ml()</a></code></li>
+<li>Major new functions:<ul><li><code><a href="../reference/run_ml.html">run_ml()</a></code></li>
 <li><code><a href="../reference/preprocess_data.html">preprocess_data()</a></code></li>
 <li><code><a href="../reference/plot_model_performance.html">plot_model_performance()</a></code></li>
 <li><code><a href="../reference/plot_hp_performance.html">plot_hp_performance()</a></code></li>
 </ul></li>
-<li>Support for ML methods in <code><a href="../reference/run_ml.html">run_ml()</a></code>:
-<ul><li>
+<li>Support for ML methods in <code><a href="../reference/run_ml.html">run_ml()</a></code>:<ul><li>
 <code>glmnet</code>: logistic and linear regression</li>
 <li>
 <code>rf</code>: random forest</li>
@@ -165,8 +151,7 @@ <h2 class="page-header" data-toc-text="0.0.1" id="mikropml-001">mikropml 0.0.1<s
 <li>
 <code>xgbTree</code>: gradient-boosted trees</li>
 </ul></li>
-<li>New vignettes:
-<ul><li><a href="http://www.schlosslab.org/mikropml/articles/introduction.html">Introduction</a></li>
+<li>New vignettes:<ul><li><a href="http://www.schlosslab.org/mikropml/articles/introduction.html">Introduction</a></li>
 <li><a href="http://www.schlosslab.org/mikropml/articles/preprocess.html">Preprocess data</a></li>
 <li><a href="http://www.schlosslab.org/mikropml/articles/tuning.html">Hyperparameter tuning</a></li>
 <li><a href="http://www.schlosslab.org/mikropml/articles/parallel.html">Parallel processing</a></li>
diff --git a/docs/pkgdown.yml b/docs/pkgdown.yml
index 43e8f4a9..d50db519 100644
--- a/docs/pkgdown.yml
+++ b/docs/pkgdown.yml
@@ -1,4 +1,4 @@
-pandoc: 2.17.1.1
+pandoc: 2.7.3
 pkgdown: 2.0.3
 pkgdown_sha: ~
 articles:
@@ -7,7 +7,7 @@ articles:
   parallel: parallel.html
   preprocess: preprocess.html
   tuning: tuning.html
-last_built: 2022-05-19T16:08Z
+last_built: 2022-05-19T19:39Z
 urls:
   reference: http://www.schlosslab.org/mikropml/reference
   article: http://www.schlosslab.org/mikropml/articles
diff --git a/docs/pull_request_template.html b/docs/pull_request_template.html
index 1a14b90b..4a26cae7 100644
--- a/docs/pull_request_template.html
+++ b/docs/pull_request_template.html
@@ -75,33 +75,25 @@ <h1>NA</h1>
 
 <div class="section level2">
 <h2 id="issues">Issues<a class="anchor" aria-label="anchor" href="#issues"></a></h2>
-<ul><li>Resolves # .</li>
-</ul></div>
+<ul><li>Resolves # .</li></ul></div>
 <div class="section level2">
 <h2 id="-changes-made">## Change(s) made<a class="anchor" aria-label="anchor" href="#-changes-made"></a></h2>
-<ul><li>
-</li></ul></div>
+<ul><li></ul></div>
 <div class="section level2">
 <h2 id="checklist">Checklist<a class="anchor" aria-label="anchor" href="#checklist"></a></h2>
 <p>(<sub>Strikethrough</sub> any points that are not applicable.)</p>
-<ul class="task-list"><li>
-<input type="checkbox" disabled>
-Write unit tests for any new functionality or bug fixes.</li>
+<ul><li>
+<input type="checkbox" disabled>Write unit tests for any new functionality or bug fixes.</li>
 <li>
-<input type="checkbox" disabled>
-Update docs if there are any API changes:<ul class="task-list"><li>
-<input type="checkbox" disabled>
-roxygen comments</li>
+<input type="checkbox" disabled>Update docs if there are any API changes:<ul><li>
+<input type="checkbox" disabled>roxygen comments</li>
 <li>
-<input type="checkbox" disabled>
-vignettes</li>
+<input type="checkbox" disabled>vignettes</li>
 </ul></li>
 <li>
-<input type="checkbox" disabled>
-Update <code>NEWS.md</code> if this includes any user-facing changes.</li>
+<input type="checkbox" disabled>Update <code>NEWS.md</code> if this includes any user-facing changes.</li>
 <li>
-<input type="checkbox" disabled>
-The check workflow succeeds on your most recent commit. <strong>This is always required before the PR can be merged.</strong>
+<input type="checkbox" disabled>The check workflow succeeds on your most recent commit. <strong>This is always required before the PR can be merged.</strong>
 </li>
 </ul></div>
 
diff --git a/docs/reference/compare_models.html b/docs/reference/compare_models.html
index e5c84f0d..97b190dc 100644
--- a/docs/reference/compare_models.html
+++ b/docs/reference/compare_models.html
@@ -98,7 +98,7 @@ <h2>Arguments</h2>
 </dl></div>
     <div id="value">
     <h2>Value</h2>
-    <p>a table of p-values for all pairs of group varible</p>
+    <p>a table of p-values for all pairs of group variable</p>
     </div>
     <div id="author">
     <h2>Author</h2>
diff --git a/docs/reference/get_feature_importance.html b/docs/reference/get_feature_importance.html
index e12f97f8..b673dff3 100644
--- a/docs/reference/get_feature_importance.html
+++ b/docs/reference/get_feature_importance.html
@@ -202,7 +202,7 @@ <h2>Examples</h2>
 <span class="r-in"></span>
 <span class="r-in"><span class="co"># We strongly recommend providing multiple cores to speed up computation time.</span></span>
 <span class="r-in"><span class="co"># Do this before calling `get_feature_importance()`.</span></span>
-<span class="r-in"><span class="fu">doFuture</span><span class="fu">::</span><span class="fu">registerDoFuture</span><span class="op">(</span><span class="op">)</span></span>
+<span class="r-in"><span class="fu">doFuture</span><span class="fu">::</span><span class="fu"><a href="https://doFuture.futureverse.org/reference/registerDoFuture.html" class="external-link">registerDoFuture</a></span><span class="op">(</span><span class="op">)</span></span>
 <span class="r-in"><span class="fu">future</span><span class="fu">::</span><span class="fu"><a href="https://future.futureverse.org/reference/plan.html" class="external-link">plan</a></span><span class="op">(</span><span class="fu">future</span><span class="fu">::</span><span class="va"><a href="https://future.futureverse.org/reference/multicore.html" class="external-link">multicore</a></span>, workers <span class="op">=</span> <span class="fl">2</span><span class="op">)</span></span>
 <span class="r-in"></span>
 <span class="r-in"><span class="co"># Optionally, you can group features together with a custom grouping</span></span>
diff --git a/docs/reference/get_perf_metric_fn.html b/docs/reference/get_perf_metric_fn.html
index 11b8718c..f653dac8 100644
--- a/docs/reference/get_perf_metric_fn.html
+++ b/docs/reference/get_perf_metric_fn.html
@@ -105,7 +105,7 @@ <h2>Examples</h2>
 <span class="r-out co"><span class="r-pr">#&gt;</span>         data$obs &lt;- factor(data$obs, levels = lev)</span>
 <span class="r-out co"><span class="r-pr">#&gt;</span>     postResample(data[, "pred"], data[, "obs"])</span>
 <span class="r-out co"><span class="r-pr">#&gt;</span> }</span>
-<span class="r-out co"><span class="r-pr">#&gt;</span> &lt;bytecode: 0x7fdd8f312490&gt;</span>
+<span class="r-out co"><span class="r-pr">#&gt;</span> &lt;bytecode: 0x7fce51a85908&gt;</span>
 <span class="r-out co"><span class="r-pr">#&gt;</span> &lt;environment: namespace:caret&gt;</span>
 <span class="r-in"><span class="fu">get_perf_metric_fn</span><span class="op">(</span><span class="st">"binary"</span><span class="op">)</span></span>
 <span class="r-out co"><span class="r-pr">#&gt;</span> function (data, lev = NULL, model = NULL) </span>
@@ -163,7 +163,7 @@ <h2>Examples</h2>
 <span class="r-out co"><span class="r-pr">#&gt;</span>     stats &lt;- stats[c(stat_list)]</span>
 <span class="r-out co"><span class="r-pr">#&gt;</span>     return(stats)</span>
 <span class="r-out co"><span class="r-pr">#&gt;</span> }</span>
-<span class="r-out co"><span class="r-pr">#&gt;</span> &lt;bytecode: 0x7fdd88c84980&gt;</span>
+<span class="r-out co"><span class="r-pr">#&gt;</span> &lt;bytecode: 0x7fce50ebe0b8&gt;</span>
 <span class="r-out co"><span class="r-pr">#&gt;</span> &lt;environment: namespace:caret&gt;</span>
 <span class="r-in"><span class="fu">get_perf_metric_fn</span><span class="op">(</span><span class="st">"multiclass"</span><span class="op">)</span></span>
 <span class="r-out co"><span class="r-pr">#&gt;</span> function (data, lev = NULL, model = NULL) </span>
@@ -221,7 +221,7 @@ <h2>Examples</h2>
 <span class="r-out co"><span class="r-pr">#&gt;</span>     stats &lt;- stats[c(stat_list)]</span>
 <span class="r-out co"><span class="r-pr">#&gt;</span>     return(stats)</span>
 <span class="r-out co"><span class="r-pr">#&gt;</span> }</span>
-<span class="r-out co"><span class="r-pr">#&gt;</span> &lt;bytecode: 0x7fdd88c84980&gt;</span>
+<span class="r-out co"><span class="r-pr">#&gt;</span> &lt;bytecode: 0x7fce50ebe0b8&gt;</span>
 <span class="r-out co"><span class="r-pr">#&gt;</span> &lt;environment: namespace:caret&gt;</span>
 </code></pre></div>
     </div>
diff --git a/docs/reference/index.html b/docs/reference/index.html
index 048d1f36..6303fba6 100644
--- a/docs/reference/index.html
+++ b/docs/reference/index.html
@@ -89,15 +89,10 @@ <h2 id="main">Main <a href="#main" class="anchor" aria-hidden="true"></a></h2>
         </td>
         <td><p>Run the machine learning pipeline</p></td>
       </tr></tbody><tbody><tr><th colspan="2">
-          <h2 id="plotting-evalutation-helpers">Plotting &amp; evalutation helpers <a href="#plotting-evalutation-helpers" class="anchor" aria-hidden="true"></a></h2>
-          <p class="section-desc"></p><p>Visualize &amp; evalutate performance to help you tune hyperparameters and choose model methods.</p>
+          <h2 id="plotting-helpers">Plotting helpers <a href="#plotting-helpers" class="anchor" aria-hidden="true"></a></h2>
+          <p class="section-desc"></p><p>Visualize results to help you tune hyperparameters and choose model methods.</p>
         </th>
       </tr></tbody><tbody><tr><td>
-          <p><code><a href="compare_models.html">compare_models()</a></code> </p>
-        </td>
-        <td><p>Perform permutation tests to compare the performance metric
-across all pairs of a group variable.</p></td>
-      </tr><tr><td>
           <p><code><a href="plot_hp_performance.html">plot_hp_performance()</a></code> </p>
         </td>
         <td><p>Plot hyperparameter performance metrics</p></td>
@@ -117,6 +112,27 @@ <h2 id="plotting-evalutation-helpers">Plotting &amp; evalutation helpers <a href
           <p><code><a href="combine_hp_performance.html">combine_hp_performance()</a></code> </p>
         </td>
         <td><p>Combine hyperparameter performance metrics for multiple train/test splits</p></td>
+      </tr></tbody><tbody><tr><th colspan="2">
+          <h2 id="model-evaluation">Model evaluation <a href="#model-evaluation" class="anchor" aria-hidden="true"></a></h2>
+          <p class="section-desc"></p><p>Evaluate and interpret models.</p>
+        </th>
+      </tr></tbody><tbody><tr><td>
+          <p><code><a href="get_feature_importance.html">get_feature_importance()</a></code> </p>
+        </td>
+        <td><p>Get feature importance using the permutation method</p></td>
+      </tr><tr><td>
+          <p><code><a href="get_performance_tbl.html">get_performance_tbl()</a></code> </p>
+        </td>
+        <td><p>Get model performance metrics as a one-row tibble</p></td>
+      </tr><tr><td>
+          <p><code><a href="compare_models.html">compare_models()</a></code> </p>
+        </td>
+        <td><p>Perform permutation tests to compare the performance metric
+across all pairs of a group variable.</p></td>
+      </tr><tr><td>
+          <p><code><a href="permute_p_value.html">permute_p_value()</a></code> </p>
+        </td>
+        <td><p>Calculated a permuted p-value comparing two models</p></td>
       </tr></tbody><tbody><tr><th colspan="2">
           <h2 id="package-data">Package Data <a href="#package-data" class="anchor" aria-hidden="true"></a></h2>
           <p class="section-desc"></p>
@@ -148,23 +164,23 @@ <h3 id="ml-results">ML results <a href="#ml-results" class="anchor" aria-hidden=
       </tr></tbody><tbody><tr><td>
           <p><code><a href="otu_mini_bin_results_glmnet.html">otu_mini_bin_results_glmnet</a></code> </p>
         </td>
-        <td><p>Results from running the pipline with L2 logistic regression on <code>otu_mini_bin</code> with feature importance and grouping</p></td>
+        <td><p>Results from running the pipeline with L2 logistic regression on <code>otu_mini_bin</code> with feature importance and grouping</p></td>
       </tr><tr><td>
           <p><code><a href="otu_mini_bin_results_rf.html">otu_mini_bin_results_rf</a></code> </p>
         </td>
-        <td><p>Results from running the pipline with random forest on <code>otu_mini_bin</code></p></td>
+        <td><p>Results from running the pipeline with random forest on <code>otu_mini_bin</code></p></td>
       </tr><tr><td>
           <p><code><a href="otu_mini_bin_results_rpart2.html">otu_mini_bin_results_rpart2</a></code> </p>
         </td>
-        <td><p>Results from running the pipline with rpart2 on <code>otu_mini_bin</code></p></td>
+        <td><p>Results from running the pipeline with rpart2 on <code>otu_mini_bin</code></p></td>
       </tr><tr><td>
           <p><code><a href="otu_mini_bin_results_svmRadial.html">otu_mini_bin_results_svmRadial</a></code> </p>
         </td>
-        <td><p>Results from running the pipline with svmRadial on <code>otu_mini_bin</code></p></td>
+        <td><p>Results from running the pipeline with svmRadial on <code>otu_mini_bin</code></p></td>
       </tr><tr><td>
           <p><code><a href="otu_mini_bin_results_xgbTree.html">otu_mini_bin_results_xgbTree</a></code> </p>
         </td>
-        <td><p>Results from running the pipline with xbgTree on <code>otu_mini_bin</code></p></td>
+        <td><p>Results from running the pipeline with xbgTree on <code>otu_mini_bin</code></p></td>
       </tr><tr><td>
           <p><code><a href="otu_mini_cont_results_glmnet.html">otu_mini_cont_results_glmnet</a></code> </p>
         </td>
@@ -195,7 +211,7 @@ <h3 id="misc">misc <a href="#misc" class="anchor" aria-hidden="true"></a></h3>
         <td><p>Replace spaces in all elements of a character vector with underscores</p></td>
       </tr></tbody><tbody><tr><th colspan="2">
           <h2 id="pipeline-customization">Pipeline customization <a href="#pipeline-customization" class="anchor" aria-hidden="true"></a></h2>
-          <p class="section-desc"></p><p>These are functions called by preprocess_data() or run_ml(). We make them available in case you would like to customize various steps of the pipeline beyond the arguments provided by the main functions.</p>
+          <p class="section-desc"></p><p>Customize various steps of the pipeline beyond the arguments provided by run_ml() and preprocess_data().</p>
         </th>
       </tr></tbody><tbody><tr><td>
           <p><code><a href="remove_singleton_columns.html">remove_singleton_columns()</a></code> </p>
@@ -245,14 +261,6 @@ <h2 id="pipeline-customization">Pipeline customization <a href="#pipeline-custom
           <p><code><a href="calc_perf_metrics.html">calc_perf_metrics()</a></code> </p>
         </td>
         <td><p>Get performance metrics for test data</p></td>
-      </tr><tr><td>
-          <p><code><a href="get_performance_tbl.html">get_performance_tbl()</a></code> </p>
-        </td>
-        <td><p>Get model performance metrics as a one-row tibble</p></td>
-      </tr><tr><td>
-          <p><code><a href="get_feature_importance.html">get_feature_importance()</a></code> </p>
-        </td>
-        <td><p>Get feature importance using the permutation method</p></td>
       </tr><tr><td>
           <p><code><a href="group_correlated_features.html">group_correlated_features()</a></code> </p>
         </td>
diff --git a/docs/reference/otu_mini_bin_results_glmnet.html b/docs/reference/otu_mini_bin_results_glmnet.html
index 5ae3499d..4e42c52e 100644
--- a/docs/reference/otu_mini_bin_results_glmnet.html
+++ b/docs/reference/otu_mini_bin_results_glmnet.html
@@ -1,5 +1,5 @@
 <!DOCTYPE html>
-<!-- Generated by pkgdown: do not edit by hand --><html lang="en"><head><meta http-equiv="Content-Type" content="text/html; charset=UTF-8"><meta charset="utf-8"><meta http-equiv="X-UA-Compatible" content="IE=edge"><meta name="viewport" content="width=device-width, initial-scale=1.0"><title>Results from running the pipline with L2 logistic regression on otu_mini_bin with feature importance and grouping — otu_mini_bin_results_glmnet • mikropml</title><!-- favicons --><link rel="icon" type="image/png" sizes="16x16" href="../favicon-16x16.png"><link rel="icon" type="image/png" sizes="32x32" href="../favicon-32x32.png"><link rel="apple-touch-icon" type="image/png" sizes="180x180" href="../apple-touch-icon.png"><link rel="apple-touch-icon" type="image/png" sizes="120x120" href="../apple-touch-icon-120x120.png"><link rel="apple-touch-icon" type="image/png" sizes="76x76" href="../apple-touch-icon-76x76.png"><link rel="apple-touch-icon" type="image/png" sizes="60x60" href="../apple-touch-icon-60x60.png"><!-- jquery --><script src="https://cdnjs.cloudflare.com/ajax/libs/jquery/3.4.1/jquery.min.js" integrity="sha256-CSXorXvZcTkaix6Yvo6HppcZGetbYMGWSFlBw8HfCJo=" crossorigin="anonymous"></script><!-- Bootstrap --><link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/twitter-bootstrap/3.4.1/css/bootstrap.min.css" integrity="sha256-bZLfwXAP04zRMK2BjiO8iu9pf4FbLqX6zitd+tIvLhE=" crossorigin="anonymous"><script src="https://cdnjs.cloudflare.com/ajax/libs/twitter-bootstrap/3.4.1/js/bootstrap.min.js" integrity="sha256-nuL8/2cJ5NDSSwnKD8VqreErSWHtnEP9E7AySL+1ev4=" crossorigin="anonymous"></script><!-- bootstrap-toc --><link rel="stylesheet" href="../bootstrap-toc.css"><script src="../bootstrap-toc.js"></script><!-- Font Awesome icons --><link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/5.12.1/css/all.min.css" integrity="sha256-mmgLkCYLUQbXn0B1SRqzHar6dCnv9oZFPEC1g1cwlkk=" crossorigin="anonymous"><link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/5.12.1/css/v4-shims.min.css" integrity="sha256-wZjR52fzng1pJHwx4aV2AO3yyTOXrcDW7jBpJtTwVxw=" crossorigin="anonymous"><!-- clipboard.js --><script src="https://cdnjs.cloudflare.com/ajax/libs/clipboard.js/2.0.6/clipboard.min.js" integrity="sha256-inc5kl9MA1hkeYUt+EC3BhlIgyp/2jDIyBLS6k3UxPI=" crossorigin="anonymous"></script><!-- headroom.js --><script src="https://cdnjs.cloudflare.com/ajax/libs/headroom/0.11.0/headroom.min.js" integrity="sha256-AsUX4SJE1+yuDu5+mAVzJbuYNPHj/WroHuZ8Ir/CkE0=" crossorigin="anonymous"></script><script src="https://cdnjs.cloudflare.com/ajax/libs/headroom/0.11.0/jQuery.headroom.min.js" integrity="sha256-ZX/yNShbjqsohH1k95liqY9Gd8uOiE1S4vZc+9KQ1K4=" crossorigin="anonymous"></script><!-- pkgdown --><link href="../pkgdown.css" rel="stylesheet"><script src="../pkgdown.js"></script><meta property="og:title" content="Results from running the pipline with L2 logistic regression on otu_mini_bin with feature importance and grouping — otu_mini_bin_results_glmnet"><meta property="og:description" content="Results from running the pipline with L2 logistic regression on otu_mini_bin with feature importance and grouping"><meta property="og:image" content="http://www.schlosslab.org/mikropml/logo.png"><!-- mathjax --><script src="https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.5/MathJax.js" integrity="sha256-nvJJv9wWKEm88qvoQl9ekL2J+k/RWIsaSScxxlsrv8k=" crossorigin="anonymous"></script><script src="https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.5/config/TeX-AMS-MML_HTMLorMML.js" integrity="sha256-84DKXVJXs0/F8OTMzX4UR909+jtl4G7SPypPavF+GfA=" crossorigin="anonymous"></script><!--[if lt IE 9]>
+<!-- Generated by pkgdown: do not edit by hand --><html lang="en"><head><meta http-equiv="Content-Type" content="text/html; charset=UTF-8"><meta charset="utf-8"><meta http-equiv="X-UA-Compatible" content="IE=edge"><meta name="viewport" content="width=device-width, initial-scale=1.0"><title>Results from running the pipeline with L2 logistic regression on otu_mini_bin with feature importance and grouping — otu_mini_bin_results_glmnet • mikropml</title><!-- favicons --><link rel="icon" type="image/png" sizes="16x16" href="../favicon-16x16.png"><link rel="icon" type="image/png" sizes="32x32" href="../favicon-32x32.png"><link rel="apple-touch-icon" type="image/png" sizes="180x180" href="../apple-touch-icon.png"><link rel="apple-touch-icon" type="image/png" sizes="120x120" href="../apple-touch-icon-120x120.png"><link rel="apple-touch-icon" type="image/png" sizes="76x76" href="../apple-touch-icon-76x76.png"><link rel="apple-touch-icon" type="image/png" sizes="60x60" href="../apple-touch-icon-60x60.png"><!-- jquery --><script src="https://cdnjs.cloudflare.com/ajax/libs/jquery/3.4.1/jquery.min.js" integrity="sha256-CSXorXvZcTkaix6Yvo6HppcZGetbYMGWSFlBw8HfCJo=" crossorigin="anonymous"></script><!-- Bootstrap --><link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/twitter-bootstrap/3.4.1/css/bootstrap.min.css" integrity="sha256-bZLfwXAP04zRMK2BjiO8iu9pf4FbLqX6zitd+tIvLhE=" crossorigin="anonymous"><script src="https://cdnjs.cloudflare.com/ajax/libs/twitter-bootstrap/3.4.1/js/bootstrap.min.js" integrity="sha256-nuL8/2cJ5NDSSwnKD8VqreErSWHtnEP9E7AySL+1ev4=" crossorigin="anonymous"></script><!-- bootstrap-toc --><link rel="stylesheet" href="../bootstrap-toc.css"><script src="../bootstrap-toc.js"></script><!-- Font Awesome icons --><link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/5.12.1/css/all.min.css" integrity="sha256-mmgLkCYLUQbXn0B1SRqzHar6dCnv9oZFPEC1g1cwlkk=" crossorigin="anonymous"><link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/5.12.1/css/v4-shims.min.css" integrity="sha256-wZjR52fzng1pJHwx4aV2AO3yyTOXrcDW7jBpJtTwVxw=" crossorigin="anonymous"><!-- clipboard.js --><script src="https://cdnjs.cloudflare.com/ajax/libs/clipboard.js/2.0.6/clipboard.min.js" integrity="sha256-inc5kl9MA1hkeYUt+EC3BhlIgyp/2jDIyBLS6k3UxPI=" crossorigin="anonymous"></script><!-- headroom.js --><script src="https://cdnjs.cloudflare.com/ajax/libs/headroom/0.11.0/headroom.min.js" integrity="sha256-AsUX4SJE1+yuDu5+mAVzJbuYNPHj/WroHuZ8Ir/CkE0=" crossorigin="anonymous"></script><script src="https://cdnjs.cloudflare.com/ajax/libs/headroom/0.11.0/jQuery.headroom.min.js" integrity="sha256-ZX/yNShbjqsohH1k95liqY9Gd8uOiE1S4vZc+9KQ1K4=" crossorigin="anonymous"></script><!-- pkgdown --><link href="../pkgdown.css" rel="stylesheet"><script src="../pkgdown.js"></script><meta property="og:title" content="Results from running the pipeline with L2 logistic regression on otu_mini_bin with feature importance and grouping — otu_mini_bin_results_glmnet"><meta property="og:description" content="Results from running the pipeline with L2 logistic regression on otu_mini_bin with feature importance and grouping"><meta property="og:image" content="http://www.schlosslab.org/mikropml/logo.png"><!-- mathjax --><script src="https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.5/MathJax.js" integrity="sha256-nvJJv9wWKEm88qvoQl9ekL2J+k/RWIsaSScxxlsrv8k=" crossorigin="anonymous"></script><script src="https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.5/config/TeX-AMS-MML_HTMLorMML.js" integrity="sha256-84DKXVJXs0/F8OTMzX4UR909+jtl4G7SPypPavF+GfA=" crossorigin="anonymous"></script><!--[if lt IE 9]>
 <script src="https://oss.maxcdn.com/html5shiv/3.7.3/html5shiv.min.js"></script>
 <script src="https://oss.maxcdn.com/respond/1.4.2/respond.min.js"></script>
 <![endif]--></head><body data-spy="scroll" data-target="#toc">
@@ -69,13 +69,13 @@
       </header><div class="row">
   <div class="col-md-9 contents">
     <div class="page-header">
-    <h1>Results from running the pipline with L2 logistic regression on <code>otu_mini_bin</code> with feature importance and grouping</h1>
+    <h1>Results from running the pipeline with L2 logistic regression on <code>otu_mini_bin</code> with feature importance and grouping</h1>
     <small class="dont-index">Source: <a href="https://github.com/SchlossLab/mikropml/blob/HEAD/R/data.R" class="external-link"><code>R/data.R</code></a></small>
     <div class="hidden name"><code>otu_mini_bin_results_glmnet.Rd</code></div>
     </div>
 
     <div class="ref-description">
-    <p>Results from running the pipline with L2 logistic regression on <code>otu_mini_bin</code> with feature importance and grouping</p>
+    <p>Results from running the pipeline with L2 logistic regression on <code>otu_mini_bin</code> with feature importance and grouping</p>
     </div>
 
     <div id="ref-usage">
diff --git a/docs/reference/otu_mini_bin_results_rf.html b/docs/reference/otu_mini_bin_results_rf.html
index aaf32bab..9952d73e 100644
--- a/docs/reference/otu_mini_bin_results_rf.html
+++ b/docs/reference/otu_mini_bin_results_rf.html
@@ -1,5 +1,5 @@
 <!DOCTYPE html>
-<!-- Generated by pkgdown: do not edit by hand --><html lang="en"><head><meta http-equiv="Content-Type" content="text/html; charset=UTF-8"><meta charset="utf-8"><meta http-equiv="X-UA-Compatible" content="IE=edge"><meta name="viewport" content="width=device-width, initial-scale=1.0"><title>Results from running the pipline with random forest on otu_mini_bin — otu_mini_bin_results_rf • mikropml</title><!-- favicons --><link rel="icon" type="image/png" sizes="16x16" href="../favicon-16x16.png"><link rel="icon" type="image/png" sizes="32x32" href="../favicon-32x32.png"><link rel="apple-touch-icon" type="image/png" sizes="180x180" href="../apple-touch-icon.png"><link rel="apple-touch-icon" type="image/png" sizes="120x120" href="../apple-touch-icon-120x120.png"><link rel="apple-touch-icon" type="image/png" sizes="76x76" href="../apple-touch-icon-76x76.png"><link rel="apple-touch-icon" type="image/png" sizes="60x60" href="../apple-touch-icon-60x60.png"><!-- jquery --><script src="https://cdnjs.cloudflare.com/ajax/libs/jquery/3.4.1/jquery.min.js" integrity="sha256-CSXorXvZcTkaix6Yvo6HppcZGetbYMGWSFlBw8HfCJo=" crossorigin="anonymous"></script><!-- Bootstrap --><link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/twitter-bootstrap/3.4.1/css/bootstrap.min.css" integrity="sha256-bZLfwXAP04zRMK2BjiO8iu9pf4FbLqX6zitd+tIvLhE=" crossorigin="anonymous"><script src="https://cdnjs.cloudflare.com/ajax/libs/twitter-bootstrap/3.4.1/js/bootstrap.min.js" integrity="sha256-nuL8/2cJ5NDSSwnKD8VqreErSWHtnEP9E7AySL+1ev4=" crossorigin="anonymous"></script><!-- bootstrap-toc --><link rel="stylesheet" href="../bootstrap-toc.css"><script src="../bootstrap-toc.js"></script><!-- Font Awesome icons --><link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/5.12.1/css/all.min.css" integrity="sha256-mmgLkCYLUQbXn0B1SRqzHar6dCnv9oZFPEC1g1cwlkk=" crossorigin="anonymous"><link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/5.12.1/css/v4-shims.min.css" integrity="sha256-wZjR52fzng1pJHwx4aV2AO3yyTOXrcDW7jBpJtTwVxw=" crossorigin="anonymous"><!-- clipboard.js --><script src="https://cdnjs.cloudflare.com/ajax/libs/clipboard.js/2.0.6/clipboard.min.js" integrity="sha256-inc5kl9MA1hkeYUt+EC3BhlIgyp/2jDIyBLS6k3UxPI=" crossorigin="anonymous"></script><!-- headroom.js --><script src="https://cdnjs.cloudflare.com/ajax/libs/headroom/0.11.0/headroom.min.js" integrity="sha256-AsUX4SJE1+yuDu5+mAVzJbuYNPHj/WroHuZ8Ir/CkE0=" crossorigin="anonymous"></script><script src="https://cdnjs.cloudflare.com/ajax/libs/headroom/0.11.0/jQuery.headroom.min.js" integrity="sha256-ZX/yNShbjqsohH1k95liqY9Gd8uOiE1S4vZc+9KQ1K4=" crossorigin="anonymous"></script><!-- pkgdown --><link href="../pkgdown.css" rel="stylesheet"><script src="../pkgdown.js"></script><meta property="og:title" content="Results from running the pipline with random forest on otu_mini_bin — otu_mini_bin_results_rf"><meta property="og:description" content="Results from running the pipline with random forest on otu_mini_bin"><meta property="og:image" content="http://www.schlosslab.org/mikropml/logo.png"><!-- mathjax --><script src="https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.5/MathJax.js" integrity="sha256-nvJJv9wWKEm88qvoQl9ekL2J+k/RWIsaSScxxlsrv8k=" crossorigin="anonymous"></script><script src="https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.5/config/TeX-AMS-MML_HTMLorMML.js" integrity="sha256-84DKXVJXs0/F8OTMzX4UR909+jtl4G7SPypPavF+GfA=" crossorigin="anonymous"></script><!--[if lt IE 9]>
+<!-- Generated by pkgdown: do not edit by hand --><html lang="en"><head><meta http-equiv="Content-Type" content="text/html; charset=UTF-8"><meta charset="utf-8"><meta http-equiv="X-UA-Compatible" content="IE=edge"><meta name="viewport" content="width=device-width, initial-scale=1.0"><title>Results from running the pipeline with random forest on otu_mini_bin — otu_mini_bin_results_rf • mikropml</title><!-- favicons --><link rel="icon" type="image/png" sizes="16x16" href="../favicon-16x16.png"><link rel="icon" type="image/png" sizes="32x32" href="../favicon-32x32.png"><link rel="apple-touch-icon" type="image/png" sizes="180x180" href="../apple-touch-icon.png"><link rel="apple-touch-icon" type="image/png" sizes="120x120" href="../apple-touch-icon-120x120.png"><link rel="apple-touch-icon" type="image/png" sizes="76x76" href="../apple-touch-icon-76x76.png"><link rel="apple-touch-icon" type="image/png" sizes="60x60" href="../apple-touch-icon-60x60.png"><!-- jquery --><script src="https://cdnjs.cloudflare.com/ajax/libs/jquery/3.4.1/jquery.min.js" integrity="sha256-CSXorXvZcTkaix6Yvo6HppcZGetbYMGWSFlBw8HfCJo=" crossorigin="anonymous"></script><!-- Bootstrap --><link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/twitter-bootstrap/3.4.1/css/bootstrap.min.css" integrity="sha256-bZLfwXAP04zRMK2BjiO8iu9pf4FbLqX6zitd+tIvLhE=" crossorigin="anonymous"><script src="https://cdnjs.cloudflare.com/ajax/libs/twitter-bootstrap/3.4.1/js/bootstrap.min.js" integrity="sha256-nuL8/2cJ5NDSSwnKD8VqreErSWHtnEP9E7AySL+1ev4=" crossorigin="anonymous"></script><!-- bootstrap-toc --><link rel="stylesheet" href="../bootstrap-toc.css"><script src="../bootstrap-toc.js"></script><!-- Font Awesome icons --><link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/5.12.1/css/all.min.css" integrity="sha256-mmgLkCYLUQbXn0B1SRqzHar6dCnv9oZFPEC1g1cwlkk=" crossorigin="anonymous"><link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/5.12.1/css/v4-shims.min.css" integrity="sha256-wZjR52fzng1pJHwx4aV2AO3yyTOXrcDW7jBpJtTwVxw=" crossorigin="anonymous"><!-- clipboard.js --><script src="https://cdnjs.cloudflare.com/ajax/libs/clipboard.js/2.0.6/clipboard.min.js" integrity="sha256-inc5kl9MA1hkeYUt+EC3BhlIgyp/2jDIyBLS6k3UxPI=" crossorigin="anonymous"></script><!-- headroom.js --><script src="https://cdnjs.cloudflare.com/ajax/libs/headroom/0.11.0/headroom.min.js" integrity="sha256-AsUX4SJE1+yuDu5+mAVzJbuYNPHj/WroHuZ8Ir/CkE0=" crossorigin="anonymous"></script><script src="https://cdnjs.cloudflare.com/ajax/libs/headroom/0.11.0/jQuery.headroom.min.js" integrity="sha256-ZX/yNShbjqsohH1k95liqY9Gd8uOiE1S4vZc+9KQ1K4=" crossorigin="anonymous"></script><!-- pkgdown --><link href="../pkgdown.css" rel="stylesheet"><script src="../pkgdown.js"></script><meta property="og:title" content="Results from running the pipeline with random forest on otu_mini_bin — otu_mini_bin_results_rf"><meta property="og:description" content="Results from running the pipeline with random forest on otu_mini_bin"><meta property="og:image" content="http://www.schlosslab.org/mikropml/logo.png"><!-- mathjax --><script src="https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.5/MathJax.js" integrity="sha256-nvJJv9wWKEm88qvoQl9ekL2J+k/RWIsaSScxxlsrv8k=" crossorigin="anonymous"></script><script src="https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.5/config/TeX-AMS-MML_HTMLorMML.js" integrity="sha256-84DKXVJXs0/F8OTMzX4UR909+jtl4G7SPypPavF+GfA=" crossorigin="anonymous"></script><!--[if lt IE 9]>
 <script src="https://oss.maxcdn.com/html5shiv/3.7.3/html5shiv.min.js"></script>
 <script src="https://oss.maxcdn.com/respond/1.4.2/respond.min.js"></script>
 <![endif]--></head><body data-spy="scroll" data-target="#toc">
@@ -69,13 +69,13 @@
       </header><div class="row">
   <div class="col-md-9 contents">
     <div class="page-header">
-    <h1>Results from running the pipline with random forest on <code>otu_mini_bin</code></h1>
+    <h1>Results from running the pipeline with random forest on <code>otu_mini_bin</code></h1>
     <small class="dont-index">Source: <a href="https://github.com/SchlossLab/mikropml/blob/HEAD/R/data.R" class="external-link"><code>R/data.R</code></a></small>
     <div class="hidden name"><code>otu_mini_bin_results_rf.Rd</code></div>
     </div>
 
     <div class="ref-description">
-    <p>Results from running the pipline with random forest on <code>otu_mini_bin</code></p>
+    <p>Results from running the pipeline with random forest on <code>otu_mini_bin</code></p>
     </div>
 
     <div id="ref-usage">
diff --git a/docs/reference/otu_mini_bin_results_rpart2.html b/docs/reference/otu_mini_bin_results_rpart2.html
index f73bffd5..8e49390e 100644
--- a/docs/reference/otu_mini_bin_results_rpart2.html
+++ b/docs/reference/otu_mini_bin_results_rpart2.html
@@ -1,5 +1,5 @@
 <!DOCTYPE html>
-<!-- Generated by pkgdown: do not edit by hand --><html lang="en"><head><meta http-equiv="Content-Type" content="text/html; charset=UTF-8"><meta charset="utf-8"><meta http-equiv="X-UA-Compatible" content="IE=edge"><meta name="viewport" content="width=device-width, initial-scale=1.0"><title>Results from running the pipline with rpart2 on otu_mini_bin — otu_mini_bin_results_rpart2 • mikropml</title><!-- favicons --><link rel="icon" type="image/png" sizes="16x16" href="../favicon-16x16.png"><link rel="icon" type="image/png" sizes="32x32" href="../favicon-32x32.png"><link rel="apple-touch-icon" type="image/png" sizes="180x180" href="../apple-touch-icon.png"><link rel="apple-touch-icon" type="image/png" sizes="120x120" href="../apple-touch-icon-120x120.png"><link rel="apple-touch-icon" type="image/png" sizes="76x76" href="../apple-touch-icon-76x76.png"><link rel="apple-touch-icon" type="image/png" sizes="60x60" href="../apple-touch-icon-60x60.png"><!-- jquery --><script src="https://cdnjs.cloudflare.com/ajax/libs/jquery/3.4.1/jquery.min.js" integrity="sha256-CSXorXvZcTkaix6Yvo6HppcZGetbYMGWSFlBw8HfCJo=" crossorigin="anonymous"></script><!-- Bootstrap --><link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/twitter-bootstrap/3.4.1/css/bootstrap.min.css" integrity="sha256-bZLfwXAP04zRMK2BjiO8iu9pf4FbLqX6zitd+tIvLhE=" crossorigin="anonymous"><script src="https://cdnjs.cloudflare.com/ajax/libs/twitter-bootstrap/3.4.1/js/bootstrap.min.js" integrity="sha256-nuL8/2cJ5NDSSwnKD8VqreErSWHtnEP9E7AySL+1ev4=" crossorigin="anonymous"></script><!-- bootstrap-toc --><link rel="stylesheet" href="../bootstrap-toc.css"><script src="../bootstrap-toc.js"></script><!-- Font Awesome icons --><link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/5.12.1/css/all.min.css" integrity="sha256-mmgLkCYLUQbXn0B1SRqzHar6dCnv9oZFPEC1g1cwlkk=" crossorigin="anonymous"><link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/5.12.1/css/v4-shims.min.css" integrity="sha256-wZjR52fzng1pJHwx4aV2AO3yyTOXrcDW7jBpJtTwVxw=" crossorigin="anonymous"><!-- clipboard.js --><script src="https://cdnjs.cloudflare.com/ajax/libs/clipboard.js/2.0.6/clipboard.min.js" integrity="sha256-inc5kl9MA1hkeYUt+EC3BhlIgyp/2jDIyBLS6k3UxPI=" crossorigin="anonymous"></script><!-- headroom.js --><script src="https://cdnjs.cloudflare.com/ajax/libs/headroom/0.11.0/headroom.min.js" integrity="sha256-AsUX4SJE1+yuDu5+mAVzJbuYNPHj/WroHuZ8Ir/CkE0=" crossorigin="anonymous"></script><script src="https://cdnjs.cloudflare.com/ajax/libs/headroom/0.11.0/jQuery.headroom.min.js" integrity="sha256-ZX/yNShbjqsohH1k95liqY9Gd8uOiE1S4vZc+9KQ1K4=" crossorigin="anonymous"></script><!-- pkgdown --><link href="../pkgdown.css" rel="stylesheet"><script src="../pkgdown.js"></script><meta property="og:title" content="Results from running the pipline with rpart2 on otu_mini_bin — otu_mini_bin_results_rpart2"><meta property="og:description" content="Results from running the pipline with rpart2 on otu_mini_bin"><meta property="og:image" content="http://www.schlosslab.org/mikropml/logo.png"><!-- mathjax --><script src="https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.5/MathJax.js" integrity="sha256-nvJJv9wWKEm88qvoQl9ekL2J+k/RWIsaSScxxlsrv8k=" crossorigin="anonymous"></script><script src="https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.5/config/TeX-AMS-MML_HTMLorMML.js" integrity="sha256-84DKXVJXs0/F8OTMzX4UR909+jtl4G7SPypPavF+GfA=" crossorigin="anonymous"></script><!--[if lt IE 9]>
+<!-- Generated by pkgdown: do not edit by hand --><html lang="en"><head><meta http-equiv="Content-Type" content="text/html; charset=UTF-8"><meta charset="utf-8"><meta http-equiv="X-UA-Compatible" content="IE=edge"><meta name="viewport" content="width=device-width, initial-scale=1.0"><title>Results from running the pipeline with rpart2 on otu_mini_bin — otu_mini_bin_results_rpart2 • mikropml</title><!-- favicons --><link rel="icon" type="image/png" sizes="16x16" href="../favicon-16x16.png"><link rel="icon" type="image/png" sizes="32x32" href="../favicon-32x32.png"><link rel="apple-touch-icon" type="image/png" sizes="180x180" href="../apple-touch-icon.png"><link rel="apple-touch-icon" type="image/png" sizes="120x120" href="../apple-touch-icon-120x120.png"><link rel="apple-touch-icon" type="image/png" sizes="76x76" href="../apple-touch-icon-76x76.png"><link rel="apple-touch-icon" type="image/png" sizes="60x60" href="../apple-touch-icon-60x60.png"><!-- jquery --><script src="https://cdnjs.cloudflare.com/ajax/libs/jquery/3.4.1/jquery.min.js" integrity="sha256-CSXorXvZcTkaix6Yvo6HppcZGetbYMGWSFlBw8HfCJo=" crossorigin="anonymous"></script><!-- Bootstrap --><link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/twitter-bootstrap/3.4.1/css/bootstrap.min.css" integrity="sha256-bZLfwXAP04zRMK2BjiO8iu9pf4FbLqX6zitd+tIvLhE=" crossorigin="anonymous"><script src="https://cdnjs.cloudflare.com/ajax/libs/twitter-bootstrap/3.4.1/js/bootstrap.min.js" integrity="sha256-nuL8/2cJ5NDSSwnKD8VqreErSWHtnEP9E7AySL+1ev4=" crossorigin="anonymous"></script><!-- bootstrap-toc --><link rel="stylesheet" href="../bootstrap-toc.css"><script src="../bootstrap-toc.js"></script><!-- Font Awesome icons --><link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/5.12.1/css/all.min.css" integrity="sha256-mmgLkCYLUQbXn0B1SRqzHar6dCnv9oZFPEC1g1cwlkk=" crossorigin="anonymous"><link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/5.12.1/css/v4-shims.min.css" integrity="sha256-wZjR52fzng1pJHwx4aV2AO3yyTOXrcDW7jBpJtTwVxw=" crossorigin="anonymous"><!-- clipboard.js --><script src="https://cdnjs.cloudflare.com/ajax/libs/clipboard.js/2.0.6/clipboard.min.js" integrity="sha256-inc5kl9MA1hkeYUt+EC3BhlIgyp/2jDIyBLS6k3UxPI=" crossorigin="anonymous"></script><!-- headroom.js --><script src="https://cdnjs.cloudflare.com/ajax/libs/headroom/0.11.0/headroom.min.js" integrity="sha256-AsUX4SJE1+yuDu5+mAVzJbuYNPHj/WroHuZ8Ir/CkE0=" crossorigin="anonymous"></script><script src="https://cdnjs.cloudflare.com/ajax/libs/headroom/0.11.0/jQuery.headroom.min.js" integrity="sha256-ZX/yNShbjqsohH1k95liqY9Gd8uOiE1S4vZc+9KQ1K4=" crossorigin="anonymous"></script><!-- pkgdown --><link href="../pkgdown.css" rel="stylesheet"><script src="../pkgdown.js"></script><meta property="og:title" content="Results from running the pipeline with rpart2 on otu_mini_bin — otu_mini_bin_results_rpart2"><meta property="og:description" content="Results from running the pipeline with rpart2 on otu_mini_bin"><meta property="og:image" content="http://www.schlosslab.org/mikropml/logo.png"><!-- mathjax --><script src="https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.5/MathJax.js" integrity="sha256-nvJJv9wWKEm88qvoQl9ekL2J+k/RWIsaSScxxlsrv8k=" crossorigin="anonymous"></script><script src="https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.5/config/TeX-AMS-MML_HTMLorMML.js" integrity="sha256-84DKXVJXs0/F8OTMzX4UR909+jtl4G7SPypPavF+GfA=" crossorigin="anonymous"></script><!--[if lt IE 9]>
 <script src="https://oss.maxcdn.com/html5shiv/3.7.3/html5shiv.min.js"></script>
 <script src="https://oss.maxcdn.com/respond/1.4.2/respond.min.js"></script>
 <![endif]--></head><body data-spy="scroll" data-target="#toc">
@@ -69,13 +69,13 @@
       </header><div class="row">
   <div class="col-md-9 contents">
     <div class="page-header">
-    <h1>Results from running the pipline with rpart2 on <code>otu_mini_bin</code></h1>
+    <h1>Results from running the pipeline with rpart2 on <code>otu_mini_bin</code></h1>
     <small class="dont-index">Source: <a href="https://github.com/SchlossLab/mikropml/blob/HEAD/R/data.R" class="external-link"><code>R/data.R</code></a></small>
     <div class="hidden name"><code>otu_mini_bin_results_rpart2.Rd</code></div>
     </div>
 
     <div class="ref-description">
-    <p>Results from running the pipline with rpart2 on <code>otu_mini_bin</code></p>
+    <p>Results from running the pipeline with rpart2 on <code>otu_mini_bin</code></p>
     </div>
 
     <div id="ref-usage">
diff --git a/docs/reference/otu_mini_bin_results_svmRadial.html b/docs/reference/otu_mini_bin_results_svmRadial.html
index 165e2009..34b5c1b8 100644
--- a/docs/reference/otu_mini_bin_results_svmRadial.html
+++ b/docs/reference/otu_mini_bin_results_svmRadial.html
@@ -1,5 +1,5 @@
 <!DOCTYPE html>
-<!-- Generated by pkgdown: do not edit by hand --><html lang="en"><head><meta http-equiv="Content-Type" content="text/html; charset=UTF-8"><meta charset="utf-8"><meta http-equiv="X-UA-Compatible" content="IE=edge"><meta name="viewport" content="width=device-width, initial-scale=1.0"><title>Results from running the pipline with svmRadial on otu_mini_bin — otu_mini_bin_results_svmRadial • mikropml</title><!-- favicons --><link rel="icon" type="image/png" sizes="16x16" href="../favicon-16x16.png"><link rel="icon" type="image/png" sizes="32x32" href="../favicon-32x32.png"><link rel="apple-touch-icon" type="image/png" sizes="180x180" href="../apple-touch-icon.png"><link rel="apple-touch-icon" type="image/png" sizes="120x120" href="../apple-touch-icon-120x120.png"><link rel="apple-touch-icon" type="image/png" sizes="76x76" href="../apple-touch-icon-76x76.png"><link rel="apple-touch-icon" type="image/png" sizes="60x60" href="../apple-touch-icon-60x60.png"><!-- jquery --><script src="https://cdnjs.cloudflare.com/ajax/libs/jquery/3.4.1/jquery.min.js" integrity="sha256-CSXorXvZcTkaix6Yvo6HppcZGetbYMGWSFlBw8HfCJo=" crossorigin="anonymous"></script><!-- Bootstrap --><link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/twitter-bootstrap/3.4.1/css/bootstrap.min.css" integrity="sha256-bZLfwXAP04zRMK2BjiO8iu9pf4FbLqX6zitd+tIvLhE=" crossorigin="anonymous"><script src="https://cdnjs.cloudflare.com/ajax/libs/twitter-bootstrap/3.4.1/js/bootstrap.min.js" integrity="sha256-nuL8/2cJ5NDSSwnKD8VqreErSWHtnEP9E7AySL+1ev4=" crossorigin="anonymous"></script><!-- bootstrap-toc --><link rel="stylesheet" href="../bootstrap-toc.css"><script src="../bootstrap-toc.js"></script><!-- Font Awesome icons --><link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/5.12.1/css/all.min.css" integrity="sha256-mmgLkCYLUQbXn0B1SRqzHar6dCnv9oZFPEC1g1cwlkk=" crossorigin="anonymous"><link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/5.12.1/css/v4-shims.min.css" integrity="sha256-wZjR52fzng1pJHwx4aV2AO3yyTOXrcDW7jBpJtTwVxw=" crossorigin="anonymous"><!-- clipboard.js --><script src="https://cdnjs.cloudflare.com/ajax/libs/clipboard.js/2.0.6/clipboard.min.js" integrity="sha256-inc5kl9MA1hkeYUt+EC3BhlIgyp/2jDIyBLS6k3UxPI=" crossorigin="anonymous"></script><!-- headroom.js --><script src="https://cdnjs.cloudflare.com/ajax/libs/headroom/0.11.0/headroom.min.js" integrity="sha256-AsUX4SJE1+yuDu5+mAVzJbuYNPHj/WroHuZ8Ir/CkE0=" crossorigin="anonymous"></script><script src="https://cdnjs.cloudflare.com/ajax/libs/headroom/0.11.0/jQuery.headroom.min.js" integrity="sha256-ZX/yNShbjqsohH1k95liqY9Gd8uOiE1S4vZc+9KQ1K4=" crossorigin="anonymous"></script><!-- pkgdown --><link href="../pkgdown.css" rel="stylesheet"><script src="../pkgdown.js"></script><meta property="og:title" content="Results from running the pipline with svmRadial on otu_mini_bin — otu_mini_bin_results_svmRadial"><meta property="og:description" content="Results from running the pipline with svmRadial on otu_mini_bin"><meta property="og:image" content="http://www.schlosslab.org/mikropml/logo.png"><!-- mathjax --><script src="https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.5/MathJax.js" integrity="sha256-nvJJv9wWKEm88qvoQl9ekL2J+k/RWIsaSScxxlsrv8k=" crossorigin="anonymous"></script><script src="https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.5/config/TeX-AMS-MML_HTMLorMML.js" integrity="sha256-84DKXVJXs0/F8OTMzX4UR909+jtl4G7SPypPavF+GfA=" crossorigin="anonymous"></script><!--[if lt IE 9]>
+<!-- Generated by pkgdown: do not edit by hand --><html lang="en"><head><meta http-equiv="Content-Type" content="text/html; charset=UTF-8"><meta charset="utf-8"><meta http-equiv="X-UA-Compatible" content="IE=edge"><meta name="viewport" content="width=device-width, initial-scale=1.0"><title>Results from running the pipeline with svmRadial on otu_mini_bin — otu_mini_bin_results_svmRadial • mikropml</title><!-- favicons --><link rel="icon" type="image/png" sizes="16x16" href="../favicon-16x16.png"><link rel="icon" type="image/png" sizes="32x32" href="../favicon-32x32.png"><link rel="apple-touch-icon" type="image/png" sizes="180x180" href="../apple-touch-icon.png"><link rel="apple-touch-icon" type="image/png" sizes="120x120" href="../apple-touch-icon-120x120.png"><link rel="apple-touch-icon" type="image/png" sizes="76x76" href="../apple-touch-icon-76x76.png"><link rel="apple-touch-icon" type="image/png" sizes="60x60" href="../apple-touch-icon-60x60.png"><!-- jquery --><script src="https://cdnjs.cloudflare.com/ajax/libs/jquery/3.4.1/jquery.min.js" integrity="sha256-CSXorXvZcTkaix6Yvo6HppcZGetbYMGWSFlBw8HfCJo=" crossorigin="anonymous"></script><!-- Bootstrap --><link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/twitter-bootstrap/3.4.1/css/bootstrap.min.css" integrity="sha256-bZLfwXAP04zRMK2BjiO8iu9pf4FbLqX6zitd+tIvLhE=" crossorigin="anonymous"><script src="https://cdnjs.cloudflare.com/ajax/libs/twitter-bootstrap/3.4.1/js/bootstrap.min.js" integrity="sha256-nuL8/2cJ5NDSSwnKD8VqreErSWHtnEP9E7AySL+1ev4=" crossorigin="anonymous"></script><!-- bootstrap-toc --><link rel="stylesheet" href="../bootstrap-toc.css"><script src="../bootstrap-toc.js"></script><!-- Font Awesome icons --><link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/5.12.1/css/all.min.css" integrity="sha256-mmgLkCYLUQbXn0B1SRqzHar6dCnv9oZFPEC1g1cwlkk=" crossorigin="anonymous"><link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/5.12.1/css/v4-shims.min.css" integrity="sha256-wZjR52fzng1pJHwx4aV2AO3yyTOXrcDW7jBpJtTwVxw=" crossorigin="anonymous"><!-- clipboard.js --><script src="https://cdnjs.cloudflare.com/ajax/libs/clipboard.js/2.0.6/clipboard.min.js" integrity="sha256-inc5kl9MA1hkeYUt+EC3BhlIgyp/2jDIyBLS6k3UxPI=" crossorigin="anonymous"></script><!-- headroom.js --><script src="https://cdnjs.cloudflare.com/ajax/libs/headroom/0.11.0/headroom.min.js" integrity="sha256-AsUX4SJE1+yuDu5+mAVzJbuYNPHj/WroHuZ8Ir/CkE0=" crossorigin="anonymous"></script><script src="https://cdnjs.cloudflare.com/ajax/libs/headroom/0.11.0/jQuery.headroom.min.js" integrity="sha256-ZX/yNShbjqsohH1k95liqY9Gd8uOiE1S4vZc+9KQ1K4=" crossorigin="anonymous"></script><!-- pkgdown --><link href="../pkgdown.css" rel="stylesheet"><script src="../pkgdown.js"></script><meta property="og:title" content="Results from running the pipeline with svmRadial on otu_mini_bin — otu_mini_bin_results_svmRadial"><meta property="og:description" content="Results from running the pipeline with svmRadial on otu_mini_bin"><meta property="og:image" content="http://www.schlosslab.org/mikropml/logo.png"><!-- mathjax --><script src="https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.5/MathJax.js" integrity="sha256-nvJJv9wWKEm88qvoQl9ekL2J+k/RWIsaSScxxlsrv8k=" crossorigin="anonymous"></script><script src="https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.5/config/TeX-AMS-MML_HTMLorMML.js" integrity="sha256-84DKXVJXs0/F8OTMzX4UR909+jtl4G7SPypPavF+GfA=" crossorigin="anonymous"></script><!--[if lt IE 9]>
 <script src="https://oss.maxcdn.com/html5shiv/3.7.3/html5shiv.min.js"></script>
 <script src="https://oss.maxcdn.com/respond/1.4.2/respond.min.js"></script>
 <![endif]--></head><body data-spy="scroll" data-target="#toc">
@@ -69,13 +69,13 @@
       </header><div class="row">
   <div class="col-md-9 contents">
     <div class="page-header">
-    <h1>Results from running the pipline with svmRadial on <code>otu_mini_bin</code></h1>
+    <h1>Results from running the pipeline with svmRadial on <code>otu_mini_bin</code></h1>
     <small class="dont-index">Source: <a href="https://github.com/SchlossLab/mikropml/blob/HEAD/R/data.R" class="external-link"><code>R/data.R</code></a></small>
     <div class="hidden name"><code>otu_mini_bin_results_svmRadial.Rd</code></div>
     </div>
 
     <div class="ref-description">
-    <p>Results from running the pipline with svmRadial on <code>otu_mini_bin</code></p>
+    <p>Results from running the pipeline with svmRadial on <code>otu_mini_bin</code></p>
     </div>
 
     <div id="ref-usage">
diff --git a/docs/reference/otu_mini_bin_results_xgbTree.html b/docs/reference/otu_mini_bin_results_xgbTree.html
index bda6634d..4a8ff944 100644
--- a/docs/reference/otu_mini_bin_results_xgbTree.html
+++ b/docs/reference/otu_mini_bin_results_xgbTree.html
@@ -1,5 +1,5 @@
 <!DOCTYPE html>
-<!-- Generated by pkgdown: do not edit by hand --><html lang="en"><head><meta http-equiv="Content-Type" content="text/html; charset=UTF-8"><meta charset="utf-8"><meta http-equiv="X-UA-Compatible" content="IE=edge"><meta name="viewport" content="width=device-width, initial-scale=1.0"><title>Results from running the pipline with xbgTree on otu_mini_bin — otu_mini_bin_results_xgbTree • mikropml</title><!-- favicons --><link rel="icon" type="image/png" sizes="16x16" href="../favicon-16x16.png"><link rel="icon" type="image/png" sizes="32x32" href="../favicon-32x32.png"><link rel="apple-touch-icon" type="image/png" sizes="180x180" href="../apple-touch-icon.png"><link rel="apple-touch-icon" type="image/png" sizes="120x120" href="../apple-touch-icon-120x120.png"><link rel="apple-touch-icon" type="image/png" sizes="76x76" href="../apple-touch-icon-76x76.png"><link rel="apple-touch-icon" type="image/png" sizes="60x60" href="../apple-touch-icon-60x60.png"><!-- jquery --><script src="https://cdnjs.cloudflare.com/ajax/libs/jquery/3.4.1/jquery.min.js" integrity="sha256-CSXorXvZcTkaix6Yvo6HppcZGetbYMGWSFlBw8HfCJo=" crossorigin="anonymous"></script><!-- Bootstrap --><link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/twitter-bootstrap/3.4.1/css/bootstrap.min.css" integrity="sha256-bZLfwXAP04zRMK2BjiO8iu9pf4FbLqX6zitd+tIvLhE=" crossorigin="anonymous"><script src="https://cdnjs.cloudflare.com/ajax/libs/twitter-bootstrap/3.4.1/js/bootstrap.min.js" integrity="sha256-nuL8/2cJ5NDSSwnKD8VqreErSWHtnEP9E7AySL+1ev4=" crossorigin="anonymous"></script><!-- bootstrap-toc --><link rel="stylesheet" href="../bootstrap-toc.css"><script src="../bootstrap-toc.js"></script><!-- Font Awesome icons --><link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/5.12.1/css/all.min.css" integrity="sha256-mmgLkCYLUQbXn0B1SRqzHar6dCnv9oZFPEC1g1cwlkk=" crossorigin="anonymous"><link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/5.12.1/css/v4-shims.min.css" integrity="sha256-wZjR52fzng1pJHwx4aV2AO3yyTOXrcDW7jBpJtTwVxw=" crossorigin="anonymous"><!-- clipboard.js --><script src="https://cdnjs.cloudflare.com/ajax/libs/clipboard.js/2.0.6/clipboard.min.js" integrity="sha256-inc5kl9MA1hkeYUt+EC3BhlIgyp/2jDIyBLS6k3UxPI=" crossorigin="anonymous"></script><!-- headroom.js --><script src="https://cdnjs.cloudflare.com/ajax/libs/headroom/0.11.0/headroom.min.js" integrity="sha256-AsUX4SJE1+yuDu5+mAVzJbuYNPHj/WroHuZ8Ir/CkE0=" crossorigin="anonymous"></script><script src="https://cdnjs.cloudflare.com/ajax/libs/headroom/0.11.0/jQuery.headroom.min.js" integrity="sha256-ZX/yNShbjqsohH1k95liqY9Gd8uOiE1S4vZc+9KQ1K4=" crossorigin="anonymous"></script><!-- pkgdown --><link href="../pkgdown.css" rel="stylesheet"><script src="../pkgdown.js"></script><meta property="og:title" content="Results from running the pipline with xbgTree on otu_mini_bin — otu_mini_bin_results_xgbTree"><meta property="og:description" content="Results from running the pipline with xbgTree on otu_mini_bin"><meta property="og:image" content="http://www.schlosslab.org/mikropml/logo.png"><!-- mathjax --><script src="https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.5/MathJax.js" integrity="sha256-nvJJv9wWKEm88qvoQl9ekL2J+k/RWIsaSScxxlsrv8k=" crossorigin="anonymous"></script><script src="https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.5/config/TeX-AMS-MML_HTMLorMML.js" integrity="sha256-84DKXVJXs0/F8OTMzX4UR909+jtl4G7SPypPavF+GfA=" crossorigin="anonymous"></script><!--[if lt IE 9]>
+<!-- Generated by pkgdown: do not edit by hand --><html lang="en"><head><meta http-equiv="Content-Type" content="text/html; charset=UTF-8"><meta charset="utf-8"><meta http-equiv="X-UA-Compatible" content="IE=edge"><meta name="viewport" content="width=device-width, initial-scale=1.0"><title>Results from running the pipeline with xbgTree on otu_mini_bin — otu_mini_bin_results_xgbTree • mikropml</title><!-- favicons --><link rel="icon" type="image/png" sizes="16x16" href="../favicon-16x16.png"><link rel="icon" type="image/png" sizes="32x32" href="../favicon-32x32.png"><link rel="apple-touch-icon" type="image/png" sizes="180x180" href="../apple-touch-icon.png"><link rel="apple-touch-icon" type="image/png" sizes="120x120" href="../apple-touch-icon-120x120.png"><link rel="apple-touch-icon" type="image/png" sizes="76x76" href="../apple-touch-icon-76x76.png"><link rel="apple-touch-icon" type="image/png" sizes="60x60" href="../apple-touch-icon-60x60.png"><!-- jquery --><script src="https://cdnjs.cloudflare.com/ajax/libs/jquery/3.4.1/jquery.min.js" integrity="sha256-CSXorXvZcTkaix6Yvo6HppcZGetbYMGWSFlBw8HfCJo=" crossorigin="anonymous"></script><!-- Bootstrap --><link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/twitter-bootstrap/3.4.1/css/bootstrap.min.css" integrity="sha256-bZLfwXAP04zRMK2BjiO8iu9pf4FbLqX6zitd+tIvLhE=" crossorigin="anonymous"><script src="https://cdnjs.cloudflare.com/ajax/libs/twitter-bootstrap/3.4.1/js/bootstrap.min.js" integrity="sha256-nuL8/2cJ5NDSSwnKD8VqreErSWHtnEP9E7AySL+1ev4=" crossorigin="anonymous"></script><!-- bootstrap-toc --><link rel="stylesheet" href="../bootstrap-toc.css"><script src="../bootstrap-toc.js"></script><!-- Font Awesome icons --><link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/5.12.1/css/all.min.css" integrity="sha256-mmgLkCYLUQbXn0B1SRqzHar6dCnv9oZFPEC1g1cwlkk=" crossorigin="anonymous"><link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/5.12.1/css/v4-shims.min.css" integrity="sha256-wZjR52fzng1pJHwx4aV2AO3yyTOXrcDW7jBpJtTwVxw=" crossorigin="anonymous"><!-- clipboard.js --><script src="https://cdnjs.cloudflare.com/ajax/libs/clipboard.js/2.0.6/clipboard.min.js" integrity="sha256-inc5kl9MA1hkeYUt+EC3BhlIgyp/2jDIyBLS6k3UxPI=" crossorigin="anonymous"></script><!-- headroom.js --><script src="https://cdnjs.cloudflare.com/ajax/libs/headroom/0.11.0/headroom.min.js" integrity="sha256-AsUX4SJE1+yuDu5+mAVzJbuYNPHj/WroHuZ8Ir/CkE0=" crossorigin="anonymous"></script><script src="https://cdnjs.cloudflare.com/ajax/libs/headroom/0.11.0/jQuery.headroom.min.js" integrity="sha256-ZX/yNShbjqsohH1k95liqY9Gd8uOiE1S4vZc+9KQ1K4=" crossorigin="anonymous"></script><!-- pkgdown --><link href="../pkgdown.css" rel="stylesheet"><script src="../pkgdown.js"></script><meta property="og:title" content="Results from running the pipeline with xbgTree on otu_mini_bin — otu_mini_bin_results_xgbTree"><meta property="og:description" content="Results from running the pipeline with xbgTree on otu_mini_bin"><meta property="og:image" content="http://www.schlosslab.org/mikropml/logo.png"><!-- mathjax --><script src="https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.5/MathJax.js" integrity="sha256-nvJJv9wWKEm88qvoQl9ekL2J+k/RWIsaSScxxlsrv8k=" crossorigin="anonymous"></script><script src="https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.5/config/TeX-AMS-MML_HTMLorMML.js" integrity="sha256-84DKXVJXs0/F8OTMzX4UR909+jtl4G7SPypPavF+GfA=" crossorigin="anonymous"></script><!--[if lt IE 9]>
 <script src="https://oss.maxcdn.com/html5shiv/3.7.3/html5shiv.min.js"></script>
 <script src="https://oss.maxcdn.com/respond/1.4.2/respond.min.js"></script>
 <![endif]--></head><body data-spy="scroll" data-target="#toc">
@@ -69,13 +69,13 @@
       </header><div class="row">
   <div class="col-md-9 contents">
     <div class="page-header">
-    <h1>Results from running the pipline with xbgTree on <code>otu_mini_bin</code></h1>
+    <h1>Results from running the pipeline with xbgTree on <code>otu_mini_bin</code></h1>
     <small class="dont-index">Source: <a href="https://github.com/SchlossLab/mikropml/blob/HEAD/R/data.R" class="external-link"><code>R/data.R</code></a></small>
     <div class="hidden name"><code>otu_mini_bin_results_xgbTree.Rd</code></div>
     </div>
 
     <div class="ref-description">
-    <p>Results from running the pipline with xbgTree on <code>otu_mini_bin</code></p>
+    <p>Results from running the pipeline with xbgTree on <code>otu_mini_bin</code></p>
     </div>
 
     <div id="ref-usage">

From e0f3fbec133f6826bcb51f2622e75a644dea78ee Mon Sep 17 00:00:00 2001
From: Kelly Sovacool <kellysovacool@gmail.com>
Date: Thu, 19 May 2022 21:57:38 -0400
Subject: [PATCH 09/10] Update comments for CRAN

---
 cran-comments.md | 1 -
 1 file changed, 1 deletion(-)

diff --git a/cran-comments.md b/cran-comments.md
index a6e1dfd3..7078366e 100644
--- a/cran-comments.md
+++ b/cran-comments.md
@@ -1,4 +1,3 @@
-This patch fixes a test failure on the no long doubles platform.
 
 ## Test environments
 

From 7c17eb768e3f9832bf3c2b04cda4bfc7d8eb7594 Mon Sep 17 00:00:00 2001
From: Kelly Sovacool <kellysovacool@gmail.com>
Date: Thu, 19 May 2022 22:07:05 -0400
Subject: [PATCH 10/10] Ignore CRAN-SUBMISSION

I guess they changed the name from CRAN-RELEASE
---
 .Rbuildignore | 1 +
 1 file changed, 1 insertion(+)

diff --git a/.Rbuildignore b/.Rbuildignore
index c0900ea1..96912ad0 100644
--- a/.Rbuildignore
+++ b/.Rbuildignore
@@ -20,3 +20,4 @@
 ^cran-comments\.md$
 ^revdep$
 ^CRAN-RELEASE$
+^CRAN-SUBMISSION$