diff --git a/.github/pull_request_template.md b/.github/pull_request_template.md
index f1c204fb..745f3805 100644
--- a/.github/pull_request_template.md
+++ b/.github/pull_request_template.md
@@ -10,6 +10,8 @@
 (~Strikethrough~ any points that are not applicable.)
 
 - [ ] Write unit tests for any new functionality or bug fixes.
-- [ ] Update roxygen comments & vignettes if there are any API changes.
+- [ ] Update docs if there are any API changes:
+  - [ ] roxygen comments
+  - [ ] vignettes
 - [ ] Update `NEWS.md` if this includes any user-facing changes. 
 - [ ] The check workflow succeeds on your most recent commit. **This is always required before the PR can be merged.**
diff --git a/.gitignore b/.gitignore
index c030667b..c58a0f37 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,5 +1,3 @@
-Meta/
-doc/
 .DS_Store
 *.Rproj
 *.Rproj.user
@@ -7,5 +5,5 @@ doc/
 .Rhistory
 inst/doc
 .snakemake
-doc
-Meta
+/doc/
+/Meta/
diff --git a/NEWS.md b/NEWS.md
index 360d9264..6f23ac00 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -1,7 +1,8 @@
 # development version 1.3.0
 
-- Allow `kfold >= length(groups)`. (#285, @kelly-sovacool)
+- Allow `kfold >= length(groups)` (#285, @kelly-sovacool).
     - When using the groups parameter, groups are kept together in cross-validation partitions when `kfold` <= the number of groups in the training set. Previously, an error was thrown if this condition was not met. Now, if there are not enough groups in the training set for groups to be kept together during CV, groups are allowed to be split up across CV partitions. 
+- Report p-values for permutation feature importance (#288, @kelly-sovacool)
 
 # mikropml 1.2.0
 
diff --git a/R/feature_importance.R b/R/feature_importance.R
index 0692ac08..8e18caa9 100644
--- a/R/feature_importance.R
+++ b/R/feature_importance.R
@@ -13,12 +13,26 @@
 #'   character (`|`). If this is `NULL` (default), correlated features will be
 #'   grouped together based on `corr_thresh`.
 #'
-#' @return Dataframe with performance metrics for when each feature (or group of
-#'   correlated features; `names`) is permuted (`perf_metric`), and differences
-#'   between test performance metric and permuted performance metric
-#'   (`perf_metric_diff`; test minus permuted performance). Features with a
-#'   larger `perf_metric_diff` are more important. The performance metric name
-#'   (`perf_metric_name`) and seed (`seed`) are also returned.
+#' @return Data frame with performance metrics for when each feature (or group
+#'   of correlated features; `names`) is permuted (`perf_metric`), differences
+#'   between the actual test performance metric on and the permuted performance
+#'   metric (`perf_metric_diff`; test minus permuted performance), and the
+#'   p-value (`pvalue`: the probability of obtaining the actual performance
+#'   value under the null hypothesis). Features with a larger `perf_metric_diff`
+#'   are more important. The performance metric name (`perf_metric_name`) and
+#'   seed (`seed`) are also returned.
+#'
+#' @details
+#' For permutation tests, the p-value is the number of permutation statistics
+#' that are greater than the test statistic, divided by the number of
+#' permutations. In our case, the permutation statistic is the model performance
+#' (e.g. AUROC) after randomizing the order of observations for one feature, and
+#' the test statistic is the actual performance on the test data. By default we
+#' perform 100 permutations per feature; increasing this will increase the
+#' precision of estimating the null distribution, but also increases runtime.
+#' The p-value represents the probability of obtaining the actual performance in
+#' the event that the null hypothesis is true, where the null hypothesis is that
+#' the feature is not important for model performance.
 #'
 #' @examples
 #' \dontrun{
@@ -83,6 +97,7 @@
 #' @export
 #' @author Begüm Topçuoğlu, \email{topcuoglu.begum@@gmail.com}
 #' @author Zena Lapp, \email{zenalapp@@umich.edu}
+#' @author Kelly Sovacool, \email{sovacool@@umich.edu}
 get_feature_importance <- function(trained_model, train_data, test_data,
                                    outcome_colname, perf_metric_function,
                                    perf_metric_name, class_probs, method,
@@ -195,9 +210,10 @@ find_permuted_perf_metric <- function(test_data, trained_model, outcome_colname,
       )[[perf_metric_name]]
     )
   })
-  mean_perm_perf <- sum(perm_perfs) / nperms
+  mean_perm_perf <- mean(perm_perfs)
   return(c(
     perf_metric = mean_perm_perf,
-    perf_metric_diff = test_perf_value - mean_perm_perf
+    perf_metric_diff = test_perf_value - mean_perm_perf,
+    pvalue = calc_pvalue(perm_perfs, test_perf_value)
   ))
 }
diff --git a/R/utils.R b/R/utils.R
index c1fb9956..1b6bcb98 100644
--- a/R/utils.R
+++ b/R/utils.R
@@ -231,3 +231,17 @@ radix_sort <- function(...) {
 is_whole_number <- function(x, tol = .Machine$double.eps^0.5) {
   abs(x - round(x)) < tol
 }
+
+#' Calculate the p-value for a permutation test
+#'
+#' @param vctr vector of statistics
+#' @param test_stat the test statistic
+#'
+#' @return the number of observations in `vctr` that are greater than
+#'   `test_stat` divided by the number of observations in `vctr`
+#'
+#' @noRd
+#' @author Kelly Sovacool \email{sovacool@@umich.edu}
+calc_pvalue <- function(vctr, test_stat) {
+  return(sum(vctr > test_stat) / length(vctr))
+}
diff --git a/data-raw/otu_mini_bin.R b/data-raw/otu_mini_bin.R
index e733cfd9..b48fd017 100644
--- a/data-raw/otu_mini_bin.R
+++ b/data-raw/otu_mini_bin.R
@@ -74,7 +74,7 @@ otu_mini_bin_results_rf <- mikropml::run_ml(otu_mini_bin,
   find_feature_importance = TRUE,
   seed = 2019,
   cv_times = 2,
-  group = otu_mini_group
+  groups = otu_mini_group
 )
 usethis::use_data(otu_mini_bin_results_rf, overwrite = TRUE)
 
diff --git a/data-raw/otu_mini_multi.R b/data-raw/otu_mini_multi.R
index 37b43bab..1d13f23d 100644
--- a/data-raw/otu_mini_multi.R
+++ b/data-raw/otu_mini_multi.R
@@ -12,6 +12,6 @@ otu_mini_multi_results_glmnet <- mikropml::run_ml(otu_mini_multi, # use built-in
   find_feature_importance = TRUE,
   seed = 2019,
   cv_times = 2,
-  group = otu_mini_multi_group
+  groups = otu_mini_multi_group
 )
 usethis::use_data(otu_mini_multi_results_glmnet, overwrite = TRUE)
diff --git a/data/otu_mini_bin.rda b/data/otu_mini_bin.rda
index 84ba2e2a..c8e69ca7 100644
Binary files a/data/otu_mini_bin.rda and b/data/otu_mini_bin.rda differ
diff --git a/data/otu_mini_bin_results_rf.rda b/data/otu_mini_bin_results_rf.rda
index d0f72c1e..27c0b828 100644
Binary files a/data/otu_mini_bin_results_rf.rda and b/data/otu_mini_bin_results_rf.rda differ
diff --git a/data/otu_mini_cont_results_glmnet.rda b/data/otu_mini_cont_results_glmnet.rda
index 65c0b6b7..386b81fd 100644
Binary files a/data/otu_mini_cont_results_glmnet.rda and b/data/otu_mini_cont_results_glmnet.rda differ
diff --git a/data/otu_mini_multi.rda b/data/otu_mini_multi.rda
index b28d66c1..9db7d23e 100644
Binary files a/data/otu_mini_multi.rda and b/data/otu_mini_multi.rda differ
diff --git a/data/otu_mini_multi_group.rda b/data/otu_mini_multi_group.rda
index 5db10426..747efbcc 100644
Binary files a/data/otu_mini_multi_group.rda and b/data/otu_mini_multi_group.rda differ
diff --git a/data/otu_mini_multi_results_glmnet.rda b/data/otu_mini_multi_results_glmnet.rda
index a0280f12..4109291f 100644
Binary files a/data/otu_mini_multi_results_glmnet.rda and b/data/otu_mini_multi_results_glmnet.rda differ
diff --git a/docs/articles/introduction.html b/docs/articles/introduction.html
index ab988cf6..769488ae 100644
--- a/docs/articles/introduction.html
+++ b/docs/articles/introduction.html
@@ -438,23 +438,25 @@ <h1 class="hasAnchor">
 <p>Now, we can check out the feature importances:</p>
 <div class="sourceCode" id="cb18"><pre class="downlit sourceCode r">
 <code class="sourceCode R"><span class="va">results_imp</span><span class="op">$</span><span class="va">feature_importance</span>
-<span class="co">#&gt;    perf_metric perf_metric_diff    names method perf_metric_name seed</span>
-<span class="co">#&gt; 1    0.5542375        0.0082625 Otu00001     rf              AUC 2019</span>
-<span class="co">#&gt; 2    0.5731750       -0.0106750 Otu00002     rf              AUC 2019</span>
-<span class="co">#&gt; 3    0.5548750        0.0076250 Otu00003     rf              AUC 2019</span>
-<span class="co">#&gt; 4    0.6414750       -0.0789750 Otu00004     rf              AUC 2019</span>
-<span class="co">#&gt; 5    0.5049625        0.0575375 Otu00005     rf              AUC 2019</span>
-<span class="co">#&gt; 6    0.5444500        0.0180500 Otu00006     rf              AUC 2019</span>
-<span class="co">#&gt; 7    0.5417125        0.0207875 Otu00007     rf              AUC 2019</span>
-<span class="co">#&gt; 8    0.5257750        0.0367250 Otu00008     rf              AUC 2019</span>
-<span class="co">#&gt; 9    0.5395750        0.0229250 Otu00009     rf              AUC 2019</span>
-<span class="co">#&gt; 10   0.4977625        0.0647375 Otu00010     rf              AUC 2019</span></code></pre></div>
+<span class="co">#&gt;    perf_metric perf_metric_diff pvalue    names method perf_metric_name seed</span>
+<span class="co">#&gt; 1    0.5542375        0.0082625   0.37 Otu00001     rf              AUC 2019</span>
+<span class="co">#&gt; 2    0.5731750       -0.0106750   0.57 Otu00002     rf              AUC 2019</span>
+<span class="co">#&gt; 3    0.5548750        0.0076250   0.38 Otu00003     rf              AUC 2019</span>
+<span class="co">#&gt; 4    0.6414750       -0.0789750   0.99 Otu00004     rf              AUC 2019</span>
+<span class="co">#&gt; 5    0.5049625        0.0575375   0.05 Otu00005     rf              AUC 2019</span>
+<span class="co">#&gt; 6    0.5444500        0.0180500   0.18 Otu00006     rf              AUC 2019</span>
+<span class="co">#&gt; 7    0.5417125        0.0207875   0.21 Otu00007     rf              AUC 2019</span>
+<span class="co">#&gt; 8    0.5257750        0.0367250   0.05 Otu00008     rf              AUC 2019</span>
+<span class="co">#&gt; 9    0.5395750        0.0229250   0.02 Otu00009     rf              AUC 2019</span>
+<span class="co">#&gt; 10   0.4977625        0.0647375   0.05 Otu00010     rf              AUC 2019</span></code></pre></div>
 <p>There are several columns:</p>
 <ol style="list-style-type: decimal">
 <li>
 <code>perf_metric</code>: The performance value of the permuted feature.</li>
 <li>
-<code>perf_metric_diff</code>: The difference between the performance for the true and permuted data (i.e. test performance minus permuted performance). Features with a larger <code>perf_metric_diff</code> are more important.</li>
+<code>perf_metric_diff</code>: The difference between the performance for the actual and permuted data (i.e. test performance minus permuted performance). Features with a larger <code>perf_metric_diff</code> are more important.</li>
+<li>
+<code>pvalue</code>: the probability of obtaining the actual performance value under the null hypothesis.</li>
 <li>
 <code>names</code>: The feature that was permuted.</li>
 <li>
@@ -484,10 +486,10 @@ <h1 class="hasAnchor">
 <span class="co">#&gt; Finding feature importance...</span>
 <span class="co">#&gt; Feature importance complete.</span>
 <span class="va">results_imp_corr</span><span class="op">$</span><span class="va">feature_importance</span>
-<span class="co">#&gt;   perf_metric perf_metric_diff</span>
-<span class="co">#&gt; 1   0.5502105       0.09715789</span>
-<span class="co">#&gt; 2   0.6369474       0.01042105</span>
-<span class="co">#&gt; 3   0.5951316       0.05223684</span>
+<span class="co">#&gt;   perf_metric perf_metric_diff pvalue</span>
+<span class="co">#&gt; 1   0.5502105       0.09715789   0.08</span>
+<span class="co">#&gt; 2   0.6369474       0.01042105   0.40</span>
+<span class="co">#&gt; 3   0.5951316       0.05223684   0.08</span>
 <span class="co">#&gt;                                                                     names</span>
 <span class="co">#&gt; 1 Otu00001|Otu00002|Otu00003|Otu00005|Otu00006|Otu00007|Otu00009|Otu00010</span>
 <span class="co">#&gt; 2                                                                Otu00004</span>
diff --git a/docs/articles/introduction_files/header-attrs-2.11/header-attrs.js b/docs/articles/introduction_files/header-attrs-2.11/header-attrs.js
new file mode 100644
index 00000000..dd57d92e
--- /dev/null
+++ b/docs/articles/introduction_files/header-attrs-2.11/header-attrs.js
@@ -0,0 +1,12 @@
+// Pandoc 2.9 adds attributes on both header and div. We remove the former (to
+// be compatible with the behavior of Pandoc < 2.8).
+document.addEventListener('DOMContentLoaded', function(e) {
+  var hs = document.querySelectorAll("div.section[class*='level'] > :first-child");
+  var i, h, a;
+  for (i = 0; i < hs.length; i++) {
+    h = hs[i];
+    if (!/^h[1-6]$/i.test(h.tagName)) continue;  // it should be a header h1-h6
+    a = h.attributes;
+    while (a.length > 0) h.removeAttribute(a[0].name);
+  }
+});
diff --git a/docs/pkgdown.yml b/docs/pkgdown.yml
index ad2e9f24..18bf8145 100644
--- a/docs/pkgdown.yml
+++ b/docs/pkgdown.yml
@@ -7,7 +7,7 @@ articles:
   parallel: parallel.html
   preprocess: preprocess.html
   tuning: tuning.html
-last_built: 2021-11-22T15:57Z
+last_built: 2021-11-30T18:45Z
 urls:
   reference: http://www.schlosslab.org/mikropml/reference
   article: http://www.schlosslab.org/mikropml/articles
diff --git a/docs/pull_request_template.html b/docs/pull_request_template.html
index fc7a8cb4..c3d0a27c 100644
--- a/docs/pull_request_template.html
+++ b/docs/pull_request_template.html
@@ -174,7 +174,15 @@ <h2 class="hasAnchor">
 Write unit tests for any new functionality or bug fixes.</li>
 <li>
 <input type="checkbox" disabled>
-Update roxygen comments &amp; vignettes if there are any API changes.</li>
+Update docs if there are any API changes:<ul>
+<li>
+<input type="checkbox" disabled>
+roxygen comments</li>
+<li>
+<input type="checkbox" disabled>
+vignettes</li>
+</ul>
+</li>
 <li>
 <input type="checkbox" disabled>
 Update <code>NEWS.md</code> if this includes any user-facing changes.</li>
diff --git a/docs/reference/get_feature_importance.html b/docs/reference/get_feature_importance.html
index 2f93efb4..759ea311 100644
--- a/docs/reference/get_feature_importance.html
+++ b/docs/reference/get_feature_importance.html
@@ -256,16 +256,31 @@ <h2 class="hasAnchor" id="arguments"><a class="anchor" href="#arguments"></a>Arg
 
     <h2 class="hasAnchor" id="value"><a class="anchor" href="#value"></a>Value</h2>
 
-    <p>Dataframe with performance metrics for when each feature (or group of
-correlated features; <code>names</code>) is permuted (<code>perf_metric</code>), and differences
-between test performance metric and permuted performance metric
-(<code>perf_metric_diff</code>; test minus permuted performance). Features with a
-larger <code>perf_metric_diff</code> are more important. The performance metric name
-(<code>perf_metric_name</code>) and seed (<code>seed</code>) are also returned.</p>
+    <p>Data frame with performance metrics for when each feature (or group
+of correlated features; <code>names</code>) is permuted (<code>perf_metric</code>), differences
+between the actual test performance metric on and the permuted performance
+metric (<code>perf_metric_diff</code>; test minus permuted performance), and the
+p-value (<code>pvalue</code>: the probability of obtaining the actual performance
+value under the null hypothesis). Features with a larger <code>perf_metric_diff</code>
+are more important. The performance metric name (<code>perf_metric_name</code>) and
+seed (<code>seed</code>) are also returned.</p>
+    <h2 class="hasAnchor" id="details"><a class="anchor" href="#details"></a>Details</h2>
+
+    <p>For permutation tests, the p-value is the number of permutation statistics
+that are greater than the test statistic, divided by the number of
+permutations. In our case, the permutation statistic is the model performance
+(e.g. AUROC) after randomizing the order of observations for one feature, and
+the test statistic is the actual performance on the test data. By default we
+perform 100 permutations per feature; increasing this will increase the
+precision of estimating the null distribution, but also increases runtime.
+The p-value represents the probability of obtaining the actual performance in
+the event that the null hypothesis is true, where the null hypothesis is that
+the feature is not important for model performance.</p>
     <h2 class="hasAnchor" id="author"><a class="anchor" href="#author"></a>Author</h2>
 
     <p>Begüm Topçuoğlu, <a href='mailto:topcuoglu.begum@gmail.com'>topcuoglu.begum@gmail.com</a></p>
 <p>Zena Lapp, <a href='mailto:zenalapp@umich.edu'>zenalapp@umich.edu</a></p>
+<p>Kelly Sovacool, <a href='mailto:sovacool@umich.edu'>sovacool@umich.edu</a></p>
 
     <h2 class="hasAnchor" id="examples"><a class="anchor" href="#examples"></a>Examples</h2>
     <pre class="examples"><span class='r-in'><span class='kw'>if</span> <span class='op'>(</span><span class='cn'>FALSE</span><span class='op'>)</span> <span class='op'>{</span></span>
diff --git a/docs/reference/get_perf_metric_fn.html b/docs/reference/get_perf_metric_fn.html
index e285da89..82c5d2d0 100644
--- a/docs/reference/get_perf_metric_fn.html
+++ b/docs/reference/get_perf_metric_fn.html
@@ -182,7 +182,7 @@ <h2 class="hasAnchor" id="examples"><a class="anchor" href="#examples"></a>Examp
 <span class='r-out co'><span class='r-pr'>#&gt;</span>         data$obs &lt;- factor(data$obs, levels = lev)</span>
 <span class='r-out co'><span class='r-pr'>#&gt;</span>     postResample(data[, "pred"], data[, "obs"])</span>
 <span class='r-out co'><span class='r-pr'>#&gt;</span> }</span>
-<span class='r-out co'><span class='r-pr'>#&gt;</span> &lt;bytecode: 0x7f7fa0eaf3c0&gt;</span>
+<span class='r-out co'><span class='r-pr'>#&gt;</span> &lt;bytecode: 0x7fc428237898&gt;</span>
 <span class='r-out co'><span class='r-pr'>#&gt;</span> &lt;environment: namespace:caret&gt;</span>
 <span class='r-in'><span class='fu'>get_perf_metric_fn</span><span class='op'>(</span><span class='st'>"binary"</span><span class='op'>)</span></span>
 <span class='r-out co'><span class='r-pr'>#&gt;</span> function (data, lev = NULL, model = NULL) </span>
@@ -240,7 +240,7 @@ <h2 class="hasAnchor" id="examples"><a class="anchor" href="#examples"></a>Examp
 <span class='r-out co'><span class='r-pr'>#&gt;</span>     stats &lt;- stats[c(stat_list)]</span>
 <span class='r-out co'><span class='r-pr'>#&gt;</span>     return(stats)</span>
 <span class='r-out co'><span class='r-pr'>#&gt;</span> }</span>
-<span class='r-out co'><span class='r-pr'>#&gt;</span> &lt;bytecode: 0x7f7f9f130158&gt;</span>
+<span class='r-out co'><span class='r-pr'>#&gt;</span> &lt;bytecode: 0x7fc445fd1c50&gt;</span>
 <span class='r-out co'><span class='r-pr'>#&gt;</span> &lt;environment: namespace:caret&gt;</span>
 <span class='r-in'><span class='fu'>get_perf_metric_fn</span><span class='op'>(</span><span class='st'>"multiclass"</span><span class='op'>)</span></span>
 <span class='r-out co'><span class='r-pr'>#&gt;</span> function (data, lev = NULL, model = NULL) </span>
@@ -298,7 +298,7 @@ <h2 class="hasAnchor" id="examples"><a class="anchor" href="#examples"></a>Examp
 <span class='r-out co'><span class='r-pr'>#&gt;</span>     stats &lt;- stats[c(stat_list)]</span>
 <span class='r-out co'><span class='r-pr'>#&gt;</span>     return(stats)</span>
 <span class='r-out co'><span class='r-pr'>#&gt;</span> }</span>
-<span class='r-out co'><span class='r-pr'>#&gt;</span> &lt;bytecode: 0x7f7f9f130158&gt;</span>
+<span class='r-out co'><span class='r-pr'>#&gt;</span> &lt;bytecode: 0x7fc445fd1c50&gt;</span>
 <span class='r-out co'><span class='r-pr'>#&gt;</span> &lt;environment: namespace:caret&gt;</span>
 </pre>
   </div>
diff --git a/man/get_feature_importance.Rd b/man/get_feature_importance.Rd
index fda505bc..e6b48868 100644
--- a/man/get_feature_importance.Rd
+++ b/man/get_feature_importance.Rd
@@ -72,17 +72,31 @@ grouped together based on \code{corr_thresh}.}
 by \code{stats::cor}: spearman, pearson, kendall. (default: spearman)}
 }
 \value{
-Dataframe with performance metrics for when each feature (or group of
-correlated features; \code{names}) is permuted (\code{perf_metric}), and differences
-between test performance metric and permuted performance metric
-(\code{perf_metric_diff}; test minus permuted performance). Features with a
-larger \code{perf_metric_diff} are more important. The performance metric name
-(\code{perf_metric_name}) and seed (\code{seed}) are also returned.
+Data frame with performance metrics for when each feature (or group
+of correlated features; \code{names}) is permuted (\code{perf_metric}), differences
+between the actual test performance metric on and the permuted performance
+metric (\code{perf_metric_diff}; test minus permuted performance), and the
+p-value (\code{pvalue}: the probability of obtaining the actual performance
+value under the null hypothesis). Features with a larger \code{perf_metric_diff}
+are more important. The performance metric name (\code{perf_metric_name}) and
+seed (\code{seed}) are also returned.
 }
 \description{
 Calculates feature importance using a trained model and test data. Requires
 the \code{future.apply} package.
 }
+\details{
+For permutation tests, the p-value is the number of permutation statistics
+that are greater than the test statistic, divided by the number of
+permutations. In our case, the permutation statistic is the model performance
+(e.g. AUROC) after randomizing the order of observations for one feature, and
+the test statistic is the actual performance on the test data. By default we
+perform 100 permutations per feature; increasing this will increase the
+precision of estimating the null distribution, but also increases runtime.
+The p-value represents the probability of obtaining the actual performance in
+the event that the null hypothesis is true, where the null hypothesis is that
+the feature is not important for model performance.
+}
 \examples{
 \dontrun{
 results <- run_ml(otu_small, "glmnet", kfold = 2, cv_times = 2)
@@ -148,4 +162,6 @@ feat_imp <- get_feature_importance(results$trained_model,
 Begüm Topçuoğlu, \email{topcuoglu.begum@gmail.com}
 
 Zena Lapp, \email{zenalapp@umich.edu}
+
+Kelly Sovacool, \email{sovacool@umich.edu}
 }
diff --git a/tests/testthat/test-feature_importance.R b/tests/testthat/test-feature_importance.R
index 748af1f0..7db5805f 100644
--- a/tests/testthat/test-feature_importance.R
+++ b/tests/testthat/test-feature_importance.R
@@ -23,7 +23,8 @@ test_that("find_permuted_perf_metric works", {
     ),
     c(
       perf_metric = 0.647368421052632,
-      perf_metric_diff = -2.1052632526164e-08
+      perf_metric_diff = -2.1052632526164e-08,
+      pvalue = 1
     ),
     tol = 10e-5
   )
@@ -38,7 +39,8 @@ test_that("find_permuted_perf_metric works", {
     ),
     c(
       perf_metric = 0.647368421052632,
-      perf_metric_diff = -2.1052632526164e-08
+      perf_metric_diff = -2.1052632526164e-08,
+      pvalue = 1
     ),
     tol = 10e-5
   )
@@ -54,39 +56,50 @@ test_that("find_permuted_perf_metric works", {
     ),
     c(
       perf_metric = 0.639315789473684,
-      perf_metric_diff = 0.00805261052631601
+      perf_metric_diff = 0.00805261052631601,
+      pvalue = 0.18
     ),
     tol = 10e-5
   )
 })
 
-feat_imp <- structure(list(perf_metric = c(
-  0.629157894736842, 0.605473684210526,
-  0.638789473684211, 0.636763157894737, 0.629447368421053, 0.637868421052632,
-  0.642552631578947, 0.592157894736842, 0.639684210526316, 0.637526315789474
-), perf_metric_diff = c(
-  0.0182105263157896, 0.0418947368421053,
-  0.00857894736842102, 0.0106052631578948, 0.0179210526315789,
-  0.00950000000000006, 0.00481578947368433, 0.0552105263157895,
-  0.00768421052631585, 0.00984210526315799
-), names = structure(1:10, .Label = c(
-  "Otu00001",
-  "Otu00002", "Otu00003", "Otu00004", "Otu00005", "Otu00006", "Otu00007",
-  "Otu00008", "Otu00009", "Otu00010"
-), class = "factor"), method = c(
-  "glmnet",
-  "glmnet", "glmnet", "glmnet", "glmnet", "glmnet", "glmnet", "glmnet",
-  "glmnet", "glmnet"
-), perf_metric_name = c(
-  "AUC", "AUC", "AUC",
-  "AUC", "AUC", "AUC", "AUC", "AUC", "AUC", "AUC"
-), seed = c(
-  2019,
-  2019, 2019, 2019, 2019, 2019, 2019, 2019, 2019, 2019
-)), row.names = c(
-  NA,
-  -10L
-), class = "data.frame")
+feat_imp <- structure(list(
+  perf_metric = c(
+    0.629157894736842, 0.605473684210526,
+    0.63878947368421, 0.636763157894737, 0.629447368421053, 0.637868421052632,
+    0.642552631578947, 0.592157894736842, 0.639684210526316, 0.637526315789474
+  ),
+  perf_metric_diff = c(
+    0.0182105263157895, 0.0418947368421053,
+    0.00857894736842113, 0.0106052631578948, 0.0179210526315789,
+    0.00950000000000006, 0.00481578947368422, 0.0552105263157895,
+    0.00768421052631585, 0.00984210526315787
+  ),
+  pvalue = c(
+    0.17, 0.06,
+    0.09, 0.28, 0.37, 0.28, 0.24, 0.09, 0.12, 0.41
+  ),
+  names = structure(1:10, .Label = c(
+    "Otu00001",
+    "Otu00002", "Otu00003", "Otu00004", "Otu00005", "Otu00006", "Otu00007",
+    "Otu00008", "Otu00009", "Otu00010"
+  ), class = "factor"),
+  method = c(
+    "glmnet",
+    "glmnet", "glmnet", "glmnet", "glmnet", "glmnet", "glmnet", "glmnet",
+    "glmnet", "glmnet"
+  ),
+  perf_metric_name = c(
+    "AUC", "AUC", "AUC",
+    "AUC", "AUC", "AUC", "AUC", "AUC", "AUC", "AUC"
+  ),
+  seed = c(
+    2019,
+    2019, 2019, 2019, 2019, 2019, 2019, 2019, 2019, 2019
+  )
+),
+row.names = c(NA, -10L), class = "data.frame"
+)
 
 test_that("feature importances are correct", {
   set.seed(2019)
@@ -158,10 +171,13 @@ test_that("custom grouped features works", {
       0.633605263157895, 0.639105263157895, 0.642421052631579, 0.596842105263158,
       0.640289473684211, 0.629868421052632
     ), perf_metric_diff = c(
-      0.0182105263157896,
+      0.0182105263157895,
       0.0504473684210527, 0.0137631578947369, 0.00826315789473686,
-      0.00494736842105259, 0.0505263157894738, 0.00707894736842096,
+      0.0049473684210527, 0.0505263157894738, 0.00707894736842107,
       0.0175000000000001
+    ), pvalue = c(
+      0.17, 0.24, 0.27, 0.3, 0.26,
+      0.08, 0.11, 0.3
     ), names = structure(1:8, .Label = c(
       "Otu00001",
       "Otu00002|Otu00003|Otu00005", "Otu00004", "Otu00006", "Otu00007",
diff --git a/tests/testthat/test-utils.R b/tests/testthat/test-utils.R
index c547d401..338b792e 100644
--- a/tests/testthat/test-utils.R
+++ b/tests/testthat/test-utils.R
@@ -104,3 +104,9 @@ test_that("is_whole_number() checks for integer numbers regardless of class", {
   expect_true(all(is_whole_number(c(1.0, 2.0, 3.0))))
   expect_false(is_whole_number(1.2))
 })
+
+test_that("calc_pvalue() works", {
+  expect_equal(calc_pvalue(1:20, 10), 0.5)
+  expect_equal(calc_pvalue(1, 1), 0)
+  expect_equal(calc_pvalue(1:3, 0), 1)
+})
diff --git a/vignettes/introduction.Rmd b/vignettes/introduction.Rmd
index 17e0c5a1..9cb1d8a9 100644
--- a/vignettes/introduction.Rmd
+++ b/vignettes/introduction.Rmd
@@ -422,7 +422,8 @@ results_imp$feature_importance
 There are several columns:
 
 1. `perf_metric`: The performance value of the permuted feature.
-1. `perf_metric_diff`: The difference between the performance for the true and permuted data (i.e. test performance minus permuted performance). Features with a larger `perf_metric_diff` are more important.
+1. `perf_metric_diff`: The difference between the performance for the actual and permuted data (i.e. test performance minus permuted performance). Features with a larger `perf_metric_diff` are more important.
+1. `pvalue`: the probability of obtaining the actual performance value under the null hypothesis.
 1. `names`: The feature that was permuted.
 1. `method`: The ML method used.
 1. `perf_metric_name`: The peformance metric used.