Merge pull request #293 from SchlossLab/feat-imp-doc

Improve feature importance examples
SchlossLab · Mar 11, 2022 · d6b511d · d6b511d
2 parents 60608a5 + 181fa88
commit d6b511d
Show file tree

Hide file tree

Showing 7 changed files with 135 additions and 66 deletions.
diff --git a/NEWS.md b/NEWS.md
@@ -1,7 +1,8 @@
 # development version
 
 - mikropml now requires R version 4.1.0 or greater due to an update in the randomForest package (#292). 
-- Fix bug where `cv_times` had no effect on repeats for cross-validation (#291, @kelly-sovacool).
+- Fix bug where `cv_times` had no effect on reported repeats for cross-validation (#291, @kelly-sovacool).
+- Made minor documentation improvements (#293, @kelly-sovacool)
 
 # mikropml 1.2.2
 

diff --git a/R/feature_importance.R b/R/feature_importance.R
@@ -34,23 +34,41 @@
 #' the event that the null hypothesis is true, where the null hypothesis is that
 #' the feature is not important for model performance.
 #'
+#' We strongly recommend providing multiple cores to speed up computation time.
+#' See [our vignette on parallel processing](http://www.schlosslab.org/mikropml/articles/parallel.html)
+#' for more details.
+#'
 #' @examples
 #' \dontrun{
+#' # If you called `run_ml()` with `feature_importance = FALSE` (the default),
+#' # you can use `get_feature_importance()` later as long as you have the
+#' # trained model and test data.
 #' results <- run_ml(otu_small, "glmnet", kfold = 2, cv_times = 2)
 #' names(results$trained_model$trainingData)[1] <- "dx"
-#' get_feature_importance(results$trained_model,
-#'   results$trained_model$trainingData, results$test_data,
+#' feat_imp <- get_feature_importance(results$trained_model,
+#'   results$trained_model$trainingData,
+#'   results$test_data,
 #'   "dx",
-#'   multiClassSummary, "AUC",
-#'   class_probs = TRUE, method = "glmnet"
+#'   multiClassSummary,
+#'   "AUC",
+#'   class_probs = TRUE,
+#'   method = "glmnet"
 #' )
 #'
-#' # optionally, you can group features together with a custom grouping
-#' get_feature_importance(results$trained_model,
-#'   results$trained_model$trainingData, results$test_data,
+#' # We strongly recommend providing multiple cores to speed up computation time.
+#' # Do this before calling `get_feature_importance()`.
+#' doFuture::registerDoFuture()
+#' future::plan(future::multicore, workers = 2)
+#'
+#' # Optionally, you can group features together with a custom grouping
+#' feat_imp <- get_feature_importance(results$trained_model,
+#'   results$trained_model$trainingData,
+#'   results$test_data,
 #'   "dx",
-#'   multiClassSummary, "AUC",
-#'   class_probs = TRUE, method = "glmnet",
+#'   multiClassSummary,
+#'   "AUC",
+#'   class_probs = TRUE,
+#'   method = "glmnet",
 #'   groups = c(
 #'     "Otu00007", "Otu00008", "Otu00009", "Otu00011", "Otu00012",
 #'     "Otu00015", "Otu00016", "Otu00018", "Otu00019", "Otu00020", "Otu00022",
@@ -66,9 +84,8 @@
 #'   )
 #' )
 #'
-#' # the function can show a progress bar if you have the progressr package installed
-#' ## optionally, specify the progress bar format
-#'
+#' # the function can show a progress bar if you have the `progressr` package installed.
+#' ## optionally, specify the progress bar format:
 #' progressr::handlers(progressr::handler_progress(
 #'   format = ":message :bar :percent | elapsed: :elapsed | eta: :eta",
 #'   clear = FALSE,
@@ -78,18 +95,24 @@
 #' progressr::handlers(global = TRUE)
 #' ## run the function and watch the live progress udpates
 #' feat_imp <- get_feature_importance(results$trained_model,
-#'   results$trained_model$trainingData, results$test_data,
+#'   results$trained_model$trainingData,
+#'   results$test_data,
 #'   "dx",
-#'   multiClassSummary, "AUC",
-#'   class_probs = TRUE, method = "glmnet"
+#'   multiClassSummary,
+#'   "AUC",
+#'   class_probs = TRUE,
+#'   method = "glmnet"
 #' )
 #'
-#' # you can specify any correlation method supported by `stats::cor`:
+#' # You can specify any correlation method supported by `stats::cor`:
 #' feat_imp <- get_feature_importance(results$trained_model,
-#'   results$trained_model$trainingData, results$test_data,
+#'   results$trained_model$trainingData,
+#'   results$test_data,
 #'   "dx",
-#'   multiClassSummary, "AUC",
-#'   class_probs = TRUE, method = "glmnet",
+#'   multiClassSummary,
+#'   "AUC",
+#'   class_probs = TRUE,
+#'   method = "glmnet",
 #'   corr_method = "pearson"
 #' )
 #' }

diff --git a/docs/pkgdown.yml b/docs/pkgdown.yml
@@ -7,7 +7,7 @@ articles:
   parallel: parallel.html
   preprocess: preprocess.html
   tuning: tuning.html
-last_built: 2022-02-16T16:38Z
+last_built: 2022-02-24T19:44Z
 urls:
   reference: http://www.schlosslab.org/mikropml/reference
   article: http://www.schlosslab.org/mikropml/articles

diff --git a/docs/reference/get_feature_importance.html b/docs/reference/get_feature_importance.html
diff --git a/docs/reference/get_perf_metric_fn.html b/docs/reference/get_perf_metric_fn.html