From 5c822f8236b5d1d19f26a49be8ce404899c8d623 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" <41898282+github-actions[bot]@users.noreply.github.com> Date: Sat, 29 Jan 2022 16:50:57 +0000 Subject: [PATCH] =?UTF-8?q?=F0=9F=93=91=20Build=20docs=20site?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- docs/404.html | 156 +- docs/CODE_OF_CONDUCT.html | 198 +- docs/CONTRIBUTING.html | 176 +- docs/LICENSE-text.html | 123 +- docs/LICENSE.html | 123 +- docs/SUPPORT.html | 150 +- docs/articles/index.html | 145 +- docs/articles/introduction.html | 272 +- docs/articles/paper.html | 144 +- docs/articles/parallel.html | 140 +- docs/articles/preprocess.html | 402 +- docs/articles/tuning.html | 173 +- docs/authors.html | 212 +- docs/index.html | 157 +- docs/news/index.html | 280 +- docs/pkgdown.css | 83 +- docs/pkgdown.js | 4 +- docs/pkgdown.yml | 4 +- docs/pull_request_template.html | 164 +- docs/reference/calc_perf_metrics.html | 226 +- docs/reference/combine_hp_performance.html | 183 +- docs/reference/define_cv.html | 268 +- docs/reference/get_caret_processed_df.html | 4611 ++++++++--------- docs/reference/get_feature_importance.html | 415 +- docs/reference/get_hp_performance.html | 203 +- docs/reference/get_hyperparams_list.html | 211 +- docs/reference/get_outcome_type.html | 178 +- docs/reference/get_partition_indices.html | 209 +- docs/reference/get_perf_metric_fn.html | 412 +- docs/reference/get_perf_metric_name.html | 176 +- docs/reference/get_performance_tbl.html | 261 +- docs/reference/get_tuning_grid.html | 199 +- docs/reference/group_correlated_features.html | 210 +- docs/reference/index.html | 378 +- docs/reference/mikropml.html | 171 +- docs/reference/otu_mini_bin.html | 138 +- .../otu_mini_bin_results_glmnet.html | 136 +- docs/reference/otu_mini_bin_results_rf.html | 136 +- .../otu_mini_bin_results_rpart2.html | 136 +- .../otu_mini_bin_results_svmRadial.html | 136 +- .../otu_mini_bin_results_xgbTree.html | 136 +- .../otu_mini_cont_results_glmnet.html | 142 +- .../reference/otu_mini_cont_results_nocv.html | 142 +- docs/reference/otu_mini_cv.html | 136 +- docs/reference/otu_mini_multi.html | 136 +- docs/reference/otu_mini_multi_group.html | 136 +- .../otu_mini_multi_results_glmnet.html | 142 +- docs/reference/otu_small.html | 140 +- docs/reference/plot_hp_performance.html | 236 +- docs/reference/plot_model_performance.html | 238 +- docs/reference/preprocess_data.html | 342 +- docs/reference/randomize_feature_order.html | 192 +- docs/reference/reexports.html | 149 +- docs/reference/remove_singleton_columns.html | 250 +- docs/reference/replace_spaces.html | 190 +- docs/reference/run_ml.html | 397 +- docs/reference/tidy_perf_data.html | 194 +- docs/reference/train_model.html | 279 +- docs/sitemap.xml | 192 +- 59 files changed, 5909 insertions(+), 9959 deletions(-) diff --git a/docs/404.html b/docs/404.html index 5b1bb82c..7b971dc5 100644 --- a/docs/404.html +++ b/docs/404.html @@ -1,74 +1,34 @@ - - - - + + + + - Page not found (404) • mikropml - - - + - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + + + + + + + + + - - - - -
-
- + +
+ + + - - -
+
+
-
+ + - - diff --git a/docs/CODE_OF_CONDUCT.html b/docs/CODE_OF_CONDUCT.html index c1ff535e..f1d8d4dc 100644 --- a/docs/CODE_OF_CONDUCT.html +++ b/docs/CODE_OF_CONDUCT.html @@ -1,74 +1,12 @@ - - - - - - - -Contributor Covenant Code of Conduct • mikropml - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -Contributor Covenant Code of Conduct • mikropml - + + - - - -
-
- -
- -
+
- - + + diff --git a/docs/reference/get_outcome_type.html b/docs/reference/get_outcome_type.html index 001f334a..018bb133 100644 --- a/docs/reference/get_outcome_type.html +++ b/docs/reference/get_outcome_type.html @@ -1,77 +1,14 @@ - - - - - - - -Get outcome type. — get_outcome_type • mikropml - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -Get outcome type. — get_outcome_type • mikropml - - - - - - - - - - + + - - - - -
-
- -
- -
+
@@ -160,57 +82,55 @@

Get outcome type.

multiclass if there are more than two outcomes.

-
get_outcome_type(outcomes_vec)
- -

Arguments

- - - - - - -
outcomes_vec

Vector of outcomes.

- -

Value

+
+
get_outcome_type(outcomes_vec)
+
+
+

Arguments

+
outcomes_vec
+

Vector of outcomes.

+
+
+

Value

Outcome type (continuous, binary, or multiclass).

-

Author

- -

Zena Lapp, zenalapp@umich.edu

- -

Examples

-
get_outcome_type(c(1, 2, 1))
-#> [1] "continuous"
-get_outcome_type(c("a", "b", "b"))
-#> [1] "binary"
-get_outcome_type(c("a", "b", "c"))
-#> [1] "multiclass"
-
+
+
+

Author

+

Zena Lapp, zenalapp@umich.edu

+
+ +
+

Examples

+
get_outcome_type(c(1, 2, 1))
+#> [1] "continuous"
+get_outcome_type(c("a", "b", "b"))
+#> [1] "binary"
+get_outcome_type(c("a", "b", "c"))
+#> [1] "multiclass"
+
+
+
-
- - + + diff --git a/docs/reference/get_partition_indices.html b/docs/reference/get_partition_indices.html index 8b1950dc..6d6cd042 100644 --- a/docs/reference/get_partition_indices.html +++ b/docs/reference/get_partition_indices.html @@ -1,75 +1,12 @@ - - - - - - - -Select indices to partition the data into training & testing sets. — get_partition_indices • mikropml - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -Select indices to partition the data into training & testing sets. — get_partition_indices • mikropml - - + + - - -
-
- -
- -
+
@@ -156,38 +78,32 @@

Select indices to partition the data into training & testing sets.

Use this function to get the row indices for the training set.

-
get_partition_indices(
-  outcomes,
-  training_frac = 0.8,
-  groups = NULL,
-  group_partitions = NULL
-)
- -

Arguments

- - - - - - - - - - - - - - - - - - -
outcomes

vector of outcomes

training_frac

Fraction of data for training set (default: 0.8). Rows +

+
get_partition_indices(
+  outcomes,
+  training_frac = 0.8,
+  groups = NULL,
+  group_partitions = NULL
+)
+
+ +
+

Arguments

+
outcomes
+

vector of outcomes

+
training_frac
+

Fraction of data for training set (default: 0.8). Rows from the dataset will be randomly selected for the training set, and all remaining rows will be used in the testing set. Alternatively, if you provide a vector of integers, these will be used as the row indices for the -training set. All remaining rows will be used in the testing set.

groups

Vector of groups to keep together when splitting the data into +training set. All remaining rows will be used in the testing set.

+
groups
+

Vector of groups to keep together when splitting the data into train and test sets. If the number of groups in the training set is larger than kfold, the groups will also be kept together for cross-validation. -Length matches the number of rows in the dataset (default: NULL).

group_partitions

Specify how to assign groups to the training and +Length matches the number of rows in the dataset (default: NULL).

+
group_partitions
+

Specify how to assign groups to the training and testing partitions (default: NULL). If groups specifies that some samples belong to group "A" and some belong to group "B", then setting group_partitions = list(train = c("A", "B"), test = c("B")) will result @@ -196,53 +112,52 @@

Arg "B" in the testing set. The partition sizes will be as close to training_frac as possible. If the number of groups in the training set is larger than kfold, the groups will also be kept together for -cross-validation.

- -

Value

- +cross-validation.

+
+
+

Value

Vector of row indices for the training set.

-

Details

- -

If groups is NULL, uses createDataPartition. +

+
+

Details

+

If groups is NULL, uses createDataPartition. Otherwise, uses create_grouped_data_partition().

Set the seed prior to calling this function if you would like your data partitions to be reproducible (recommended).

-

Author

- +
+
+

Author

Kelly Sovacool, sovacool@umich.edu

+
-

Examples

-
training_inds <- get_partition_indices(otu_mini_bin$dx)
-train_data <- otu_mini_bin[training_inds, ]
-test_data <- otu_mini_bin[-training_inds, ]
-
+
+

Examples

+
training_inds <- get_partition_indices(otu_mini_bin$dx)
+train_data <- otu_mini_bin[training_inds, ]
+test_data <- otu_mini_bin[-training_inds, ]
+
+
+
-
- - + + diff --git a/docs/reference/get_perf_metric_fn.html b/docs/reference/get_perf_metric_fn.html index 82c5d2d0..48842363 100644 --- a/docs/reference/get_perf_metric_fn.html +++ b/docs/reference/get_perf_metric_fn.html @@ -1,75 +1,12 @@ - - - - - - - -Get default performance metric function — get_perf_metric_fn • mikropml - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -Get default performance metric function — get_perf_metric_fn • mikropml + + - - - - -
-
- -
- -
+
@@ -156,176 +78,174 @@

Get default performance metric function

Get default performance metric function

-
get_perf_metric_fn(outcome_type)
- -

Arguments

- - - - - - -
outcome_type

Type of outcome (one of: "continuous","binary","multiclass").

- -

Value

+
+
get_perf_metric_fn(outcome_type)
+
+
+

Arguments

+
outcome_type
+

Type of outcome (one of: "continuous","binary","multiclass").

+
+
+

Value

Performance metric function.

-

Author

- -

Zena Lapp, zenalapp@umich.edu

+
+
+

Author

+

Zena Lapp, zenalapp@umich.edu

+
-

Examples

-
get_perf_metric_fn("continuous")
-#> function (data, lev = NULL, model = NULL) 
-#> {
-#>     if (is.character(data$obs)) 
-#>         data$obs <- factor(data$obs, levels = lev)
-#>     postResample(data[, "pred"], data[, "obs"])
-#> }
-#> <bytecode: 0x7fc428237898>
-#> <environment: namespace:caret>
-get_perf_metric_fn("binary")
-#> function (data, lev = NULL, model = NULL) 
-#> {
-#>     if (!all(levels(data[, "pred"]) == levels(data[, "obs"]))) 
-#>         stop("levels of observed and predicted data do not match")
-#>     has_class_probs <- all(lev %in% colnames(data))
-#>     if (has_class_probs) {
-#>         lloss <- mnLogLoss(data = data, lev = lev, model = model)
-#>         requireNamespaceQuietStop("pROC")
-#>         requireNamespaceQuietStop("MLmetrics")
-#>         prob_stats <- lapply(levels(data[, "pred"]), function(x) {
-#>             obs <- ifelse(data[, "obs"] == x, 1, 0)
-#>             prob <- data[, x]
-#>             roc_auc <- try(pROC::roc(obs, data[, x], direction = "<", 
-#>                 quiet = TRUE), silent = TRUE)
-#>             roc_auc <- if (inherits(roc_auc, "try-error")) 
-#>                 NA
-#>             else roc_auc$auc
-#>             pr_auc <- try(MLmetrics::PRAUC(y_pred = data[, x], 
-#>                 y_true = obs), silent = TRUE)
-#>             if (inherits(pr_auc, "try-error")) 
-#>                 pr_auc <- NA
-#>             res <- c(ROC = roc_auc, AUC = pr_auc)
-#>             return(res)
-#>         })
-#>         prob_stats <- do.call("rbind", prob_stats)
-#>         prob_stats <- colMeans(prob_stats, na.rm = TRUE)
-#>     }
-#>     CM <- confusionMatrix(data[, "pred"], data[, "obs"], mode = "everything")
-#>     if (length(levels(data[, "pred"])) == 2) {
-#>         class_stats <- CM$byClass
-#>     }
-#>     else {
-#>         class_stats <- colMeans(CM$byClass)
-#>         names(class_stats) <- paste("Mean", names(class_stats))
-#>     }
-#>     overall_stats <- if (has_class_probs) 
-#>         c(CM$overall, logLoss = as.numeric(lloss), AUC = unname(prob_stats["ROC"]), 
-#>             prAUC = unname(prob_stats["AUC"]))
-#>     else CM$overall
-#>     stats <- c(overall_stats, class_stats)
-#>     stats <- stats[!names(stats) %in% c("AccuracyNull", "AccuracyLower", 
-#>         "AccuracyUpper", "AccuracyPValue", "McnemarPValue", "Mean Prevalence", 
-#>         "Mean Detection Prevalence")]
-#>     names(stats) <- gsub("[[:blank:]]+", "_", names(stats))
-#>     stat_list <- c("Accuracy", "Kappa", "Mean_F1", "Mean_Sensitivity", 
-#>         "Mean_Specificity", "Mean_Pos_Pred_Value", "Mean_Neg_Pred_Value", 
-#>         "Mean_Precision", "Mean_Recall", "Mean_Detection_Rate", 
-#>         "Mean_Balanced_Accuracy")
-#>     if (has_class_probs) 
-#>         stat_list <- c("logLoss", "AUC", "prAUC", stat_list)
-#>     if (length(levels(data[, "pred"])) == 2) 
-#>         stat_list <- gsub("^Mean_", "", stat_list)
-#>     stats <- stats[c(stat_list)]
-#>     return(stats)
-#> }
-#> <bytecode: 0x7fc445fd1c50>
-#> <environment: namespace:caret>
-get_perf_metric_fn("multiclass")
-#> function (data, lev = NULL, model = NULL) 
-#> {
-#>     if (!all(levels(data[, "pred"]) == levels(data[, "obs"]))) 
-#>         stop("levels of observed and predicted data do not match")
-#>     has_class_probs <- all(lev %in% colnames(data))
-#>     if (has_class_probs) {
-#>         lloss <- mnLogLoss(data = data, lev = lev, model = model)
-#>         requireNamespaceQuietStop("pROC")
-#>         requireNamespaceQuietStop("MLmetrics")
-#>         prob_stats <- lapply(levels(data[, "pred"]), function(x) {
-#>             obs <- ifelse(data[, "obs"] == x, 1, 0)
-#>             prob <- data[, x]
-#>             roc_auc <- try(pROC::roc(obs, data[, x], direction = "<", 
-#>                 quiet = TRUE), silent = TRUE)
-#>             roc_auc <- if (inherits(roc_auc, "try-error")) 
-#>                 NA
-#>             else roc_auc$auc
-#>             pr_auc <- try(MLmetrics::PRAUC(y_pred = data[, x], 
-#>                 y_true = obs), silent = TRUE)
-#>             if (inherits(pr_auc, "try-error")) 
-#>                 pr_auc <- NA
-#>             res <- c(ROC = roc_auc, AUC = pr_auc)
-#>             return(res)
-#>         })
-#>         prob_stats <- do.call("rbind", prob_stats)
-#>         prob_stats <- colMeans(prob_stats, na.rm = TRUE)
-#>     }
-#>     CM <- confusionMatrix(data[, "pred"], data[, "obs"], mode = "everything")
-#>     if (length(levels(data[, "pred"])) == 2) {
-#>         class_stats <- CM$byClass
-#>     }
-#>     else {
-#>         class_stats <- colMeans(CM$byClass)
-#>         names(class_stats) <- paste("Mean", names(class_stats))
-#>     }
-#>     overall_stats <- if (has_class_probs) 
-#>         c(CM$overall, logLoss = as.numeric(lloss), AUC = unname(prob_stats["ROC"]), 
-#>             prAUC = unname(prob_stats["AUC"]))
-#>     else CM$overall
-#>     stats <- c(overall_stats, class_stats)
-#>     stats <- stats[!names(stats) %in% c("AccuracyNull", "AccuracyLower", 
-#>         "AccuracyUpper", "AccuracyPValue", "McnemarPValue", "Mean Prevalence", 
-#>         "Mean Detection Prevalence")]
-#>     names(stats) <- gsub("[[:blank:]]+", "_", names(stats))
-#>     stat_list <- c("Accuracy", "Kappa", "Mean_F1", "Mean_Sensitivity", 
-#>         "Mean_Specificity", "Mean_Pos_Pred_Value", "Mean_Neg_Pred_Value", 
-#>         "Mean_Precision", "Mean_Recall", "Mean_Detection_Rate", 
-#>         "Mean_Balanced_Accuracy")
-#>     if (has_class_probs) 
-#>         stat_list <- c("logLoss", "AUC", "prAUC", stat_list)
-#>     if (length(levels(data[, "pred"])) == 2) 
-#>         stat_list <- gsub("^Mean_", "", stat_list)
-#>     stats <- stats[c(stat_list)]
-#>     return(stats)
-#> }
-#> <bytecode: 0x7fc445fd1c50>
-#> <environment: namespace:caret>
-
+
+

Examples

+
get_perf_metric_fn("continuous")
+#> function (data, lev = NULL, model = NULL) 
+#> {
+#>     if (is.character(data$obs)) 
+#>         data$obs <- factor(data$obs, levels = lev)
+#>     postResample(data[, "pred"], data[, "obs"])
+#> }
+#> <bytecode: 0x7ff670ada678>
+#> <environment: namespace:caret>
+get_perf_metric_fn("binary")
+#> function (data, lev = NULL, model = NULL) 
+#> {
+#>     if (!all(levels(data[, "pred"]) == levels(data[, "obs"]))) 
+#>         stop("levels of observed and predicted data do not match")
+#>     has_class_probs <- all(lev %in% colnames(data))
+#>     if (has_class_probs) {
+#>         lloss <- mnLogLoss(data = data, lev = lev, model = model)
+#>         requireNamespaceQuietStop("pROC")
+#>         requireNamespaceQuietStop("MLmetrics")
+#>         prob_stats <- lapply(levels(data[, "pred"]), function(x) {
+#>             obs <- ifelse(data[, "obs"] == x, 1, 0)
+#>             prob <- data[, x]
+#>             roc_auc <- try(pROC::roc(obs, data[, x], direction = "<", 
+#>                 quiet = TRUE), silent = TRUE)
+#>             roc_auc <- if (inherits(roc_auc, "try-error")) 
+#>                 NA
+#>             else roc_auc$auc
+#>             pr_auc <- try(MLmetrics::PRAUC(y_pred = data[, x], 
+#>                 y_true = obs), silent = TRUE)
+#>             if (inherits(pr_auc, "try-error")) 
+#>                 pr_auc <- NA
+#>             res <- c(ROC = roc_auc, AUC = pr_auc)
+#>             return(res)
+#>         })
+#>         prob_stats <- do.call("rbind", prob_stats)
+#>         prob_stats <- colMeans(prob_stats, na.rm = TRUE)
+#>     }
+#>     CM <- confusionMatrix(data[, "pred"], data[, "obs"], mode = "everything")
+#>     if (length(levels(data[, "pred"])) == 2) {
+#>         class_stats <- CM$byClass
+#>     }
+#>     else {
+#>         class_stats <- colMeans(CM$byClass)
+#>         names(class_stats) <- paste("Mean", names(class_stats))
+#>     }
+#>     overall_stats <- if (has_class_probs) 
+#>         c(CM$overall, logLoss = as.numeric(lloss), AUC = unname(prob_stats["ROC"]), 
+#>             prAUC = unname(prob_stats["AUC"]))
+#>     else CM$overall
+#>     stats <- c(overall_stats, class_stats)
+#>     stats <- stats[!names(stats) %in% c("AccuracyNull", "AccuracyLower", 
+#>         "AccuracyUpper", "AccuracyPValue", "McnemarPValue", "Mean Prevalence", 
+#>         "Mean Detection Prevalence")]
+#>     names(stats) <- gsub("[[:blank:]]+", "_", names(stats))
+#>     stat_list <- c("Accuracy", "Kappa", "Mean_F1", "Mean_Sensitivity", 
+#>         "Mean_Specificity", "Mean_Pos_Pred_Value", "Mean_Neg_Pred_Value", 
+#>         "Mean_Precision", "Mean_Recall", "Mean_Detection_Rate", 
+#>         "Mean_Balanced_Accuracy")
+#>     if (has_class_probs) 
+#>         stat_list <- c("logLoss", "AUC", "prAUC", stat_list)
+#>     if (length(levels(data[, "pred"])) == 2) 
+#>         stat_list <- gsub("^Mean_", "", stat_list)
+#>     stats <- stats[c(stat_list)]
+#>     return(stats)
+#> }
+#> <bytecode: 0x7ff668067158>
+#> <environment: namespace:caret>
+get_perf_metric_fn("multiclass")
+#> function (data, lev = NULL, model = NULL) 
+#> {
+#>     if (!all(levels(data[, "pred"]) == levels(data[, "obs"]))) 
+#>         stop("levels of observed and predicted data do not match")
+#>     has_class_probs <- all(lev %in% colnames(data))
+#>     if (has_class_probs) {
+#>         lloss <- mnLogLoss(data = data, lev = lev, model = model)
+#>         requireNamespaceQuietStop("pROC")
+#>         requireNamespaceQuietStop("MLmetrics")
+#>         prob_stats <- lapply(levels(data[, "pred"]), function(x) {
+#>             obs <- ifelse(data[, "obs"] == x, 1, 0)
+#>             prob <- data[, x]
+#>             roc_auc <- try(pROC::roc(obs, data[, x], direction = "<", 
+#>                 quiet = TRUE), silent = TRUE)
+#>             roc_auc <- if (inherits(roc_auc, "try-error")) 
+#>                 NA
+#>             else roc_auc$auc
+#>             pr_auc <- try(MLmetrics::PRAUC(y_pred = data[, x], 
+#>                 y_true = obs), silent = TRUE)
+#>             if (inherits(pr_auc, "try-error")) 
+#>                 pr_auc <- NA
+#>             res <- c(ROC = roc_auc, AUC = pr_auc)
+#>             return(res)
+#>         })
+#>         prob_stats <- do.call("rbind", prob_stats)
+#>         prob_stats <- colMeans(prob_stats, na.rm = TRUE)
+#>     }
+#>     CM <- confusionMatrix(data[, "pred"], data[, "obs"], mode = "everything")
+#>     if (length(levels(data[, "pred"])) == 2) {
+#>         class_stats <- CM$byClass
+#>     }
+#>     else {
+#>         class_stats <- colMeans(CM$byClass)
+#>         names(class_stats) <- paste("Mean", names(class_stats))
+#>     }
+#>     overall_stats <- if (has_class_probs) 
+#>         c(CM$overall, logLoss = as.numeric(lloss), AUC = unname(prob_stats["ROC"]), 
+#>             prAUC = unname(prob_stats["AUC"]))
+#>     else CM$overall
+#>     stats <- c(overall_stats, class_stats)
+#>     stats <- stats[!names(stats) %in% c("AccuracyNull", "AccuracyLower", 
+#>         "AccuracyUpper", "AccuracyPValue", "McnemarPValue", "Mean Prevalence", 
+#>         "Mean Detection Prevalence")]
+#>     names(stats) <- gsub("[[:blank:]]+", "_", names(stats))
+#>     stat_list <- c("Accuracy", "Kappa", "Mean_F1", "Mean_Sensitivity", 
+#>         "Mean_Specificity", "Mean_Pos_Pred_Value", "Mean_Neg_Pred_Value", 
+#>         "Mean_Precision", "Mean_Recall", "Mean_Detection_Rate", 
+#>         "Mean_Balanced_Accuracy")
+#>     if (has_class_probs) 
+#>         stat_list <- c("logLoss", "AUC", "prAUC", stat_list)
+#>     if (length(levels(data[, "pred"])) == 2) 
+#>         stat_list <- gsub("^Mean_", "", stat_list)
+#>     stats <- stats[c(stat_list)]
+#>     return(stats)
+#> }
+#> <bytecode: 0x7ff668067158>
+#> <environment: namespace:caret>
+
+
+
-
- - + + diff --git a/docs/reference/get_perf_metric_name.html b/docs/reference/get_perf_metric_name.html index bbb23f22..0ada12bc 100644 --- a/docs/reference/get_perf_metric_name.html +++ b/docs/reference/get_perf_metric_name.html @@ -1,75 +1,12 @@ - - - - - - - -Get default performance metric name — get_perf_metric_name • mikropml - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -Get default performance metric name — get_perf_metric_name • mikropml + + - - - - -
-
- -
- -
+
@@ -156,57 +78,55 @@

Get default performance metric name

Get default performance metric name for cross-validation.

-
get_perf_metric_name(outcome_type)
- -

Arguments

- - - - - - -
outcome_type

Type of outcome (one of: "continuous","binary","multiclass").

- -

Value

+
+
get_perf_metric_name(outcome_type)
+
+
+

Arguments

+
outcome_type
+

Type of outcome (one of: "continuous","binary","multiclass").

+
+
+

Value

Performance metric name.

-

Author

- -

Zena Lapp, zenalapp@umich.edu

- -

Examples

-
get_perf_metric_name("continuous")
-#> [1] "RMSE"
-get_perf_metric_name("binary")
-#> [1] "AUC"
-get_perf_metric_name("multiclass")
-#> [1] "logLoss"
-
+
+
+

Author

+

Zena Lapp, zenalapp@umich.edu

+
+ +
+

Examples

+
get_perf_metric_name("continuous")
+#> [1] "RMSE"
+get_perf_metric_name("binary")
+#> [1] "AUC"
+get_perf_metric_name("multiclass")
+#> [1] "logLoss"
+
+
+
- - - + + diff --git a/docs/reference/get_performance_tbl.html b/docs/reference/get_performance_tbl.html index 2a21af4c..19ec8c50 100644 --- a/docs/reference/get_performance_tbl.html +++ b/docs/reference/get_performance_tbl.html @@ -1,75 +1,12 @@ - - - - - - - -Get model performance metrics as a one-row tibble — get_performance_tbl • mikropml - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -Get model performance metrics as a one-row tibble — get_performance_tbl • mikropml + + - - - - -
-
- -
- -
+
@@ -156,118 +78,101 @@

Get model performance metrics as a one-row tibble

Get model performance metrics as a one-row tibble

-
get_performance_tbl(
-  trained_model,
-  test_data,
-  outcome_colname,
-  perf_metric_function,
-  perf_metric_name,
-  class_probs,
-  method,
-  seed = NA
-)
+
+
get_performance_tbl(
+  trained_model,
+  test_data,
+  outcome_colname,
+  perf_metric_function,
+  perf_metric_name,
+  class_probs,
+  method,
+  seed = NA
+)
+
-

Arguments

- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
trained_model

Trained model from caret::train().

test_data

Held out test data: dataframe of outcome and features.

outcome_colname

Column name as a string of the outcome variable -(default NULL; the first column will be chosen automatically).

perf_metric_function

Function to calculate the performance metric to +

+

Arguments

+
trained_model
+

Trained model from caret::train().

+
test_data
+

Held out test data: dataframe of outcome and features.

+
outcome_colname
+

Column name as a string of the outcome variable +(default NULL; the first column will be chosen automatically).

+
perf_metric_function
+

Function to calculate the performance metric to be used for cross-validation and test performance. Some functions are -provided by caret (see caret::defaultSummary()). +provided by caret (see caret::defaultSummary()). Defaults: binary classification = twoClassSummary, multi-class classification = multiClassSummary, -regression = defaultSummary.

perf_metric_name

The column name from the output of the function +regression = defaultSummary.

+
perf_metric_name
+

The column name from the output of the function provided to perf_metric_function that is to be used as the performance metric. Defaults: binary classification = "ROC", multi-class classification = "logLoss", -regression = "RMSE".

class_probs

Whether to use class probabilities (TRUE for categorical outcomes, FALSE for numeric outcomes).

method

ML method. -Options: c("glmnet", "rf", "rpart2", "svmRadial", "xgbTree").

    -
  • glmnet: linear, logistic, or multiclass regression

  • +regression = "RMSE".

    +
    class_probs
    +

    Whether to use class probabilities (TRUE for categorical outcomes, FALSE for numeric outcomes).

    +
    method
    +

    ML method. +Options: c("glmnet", "rf", "rpart2", "svmRadial", "xgbTree").

    • glmnet: linear, logistic, or multiclass regression

    • rf: random forest

    • rpart2: decision tree

    • svmRadial: support vector machine

    • xgbTree: xgboost

    • -
seed

Random seed (default: NA). -Your results will only be reproducible if you set a seed.

- -

Value

- + +
seed
+

Random seed (default: NA). +Your results will only be reproducible if you set a seed.

+
+
+

Value

A one-row tibble with columns cv_auroc, column for each of the performance metrics for the test data method, and seed.

-

Author

- -

Kelly Sovacool, sovacool@umich.edu

-

Zena Lapp, zenalapp@umich.edu

+
+
+

Author

+

Kelly Sovacool, sovacool@umich.edu

+

Zena Lapp, zenalapp@umich.edu

+
-

Examples

-
if (FALSE) {
-results <- run_ml(otu_small, "glmnet", kfold = 2, cv_times = 2)
-names(results$trained_model$trainingData)[1] <- "dx"
-get_performance_tbl(results$trained_model, results$test_data,
-  "dx",
-  multiClassSummary, "AUC",
-  class_probs = TRUE,
-  method = "glmnet"
-)
-}
-
-
+
+

Examples

+
if (FALSE) {
+results <- run_ml(otu_small, "glmnet", kfold = 2, cv_times = 2)
+names(results$trained_model$trainingData)[1] <- "dx"
+get_performance_tbl(results$trained_model, results$test_data,
+  "dx",
+  multiClassSummary, "AUC",
+  class_probs = TRUE,
+  method = "glmnet"
+)
+}
+
+
+
+
- - - + + diff --git a/docs/reference/get_tuning_grid.html b/docs/reference/get_tuning_grid.html index 722251fd..f15dd71a 100644 --- a/docs/reference/get_tuning_grid.html +++ b/docs/reference/get_tuning_grid.html @@ -1,75 +1,12 @@ - - - - - - - -Generate the tuning grid for tuning hyperparameters — get_tuning_grid • mikropml - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -Generate the tuning grid for tuning hyperparameters — get_tuning_grid • mikropml - + + - - - -
-
- -
- -
+
@@ -156,73 +78,68 @@

Generate the tuning grid for tuning hyperparameters

Generate the tuning grid for tuning hyperparameters

-
get_tuning_grid(hyperparams_list, method)
- -

Arguments

- - - - - - - - - - -
hyperparams_list

Named list of lists of hyperparameters.

method

ML method. -Options: c("glmnet", "rf", "rpart2", "svmRadial", "xgbTree").

    -
  • glmnet: linear, logistic, or multiclass regression

  • +
    +
    get_tuning_grid(hyperparams_list, method)
    +
    + +
    +

    Arguments

    +
    hyperparams_list
    +

    Named list of lists of hyperparameters.

    +
    method
    +

    ML method. +Options: c("glmnet", "rf", "rpart2", "svmRadial", "xgbTree").

    • glmnet: linear, logistic, or multiclass regression

    • rf: random forest

    • rpart2: decision tree

    • svmRadial: support vector machine

    • xgbTree: xgboost

    • -
- -

Value

- + +
+
+

Value

The tuning grid.

-

Author

- -

Begüm Topçuoğlu, topcuoglu.begum@gmail.com

-

Kelly Sovacool, sovacool@umich.edu

- -

Examples

-
ml_method <- "glmnet"
-hparams_list <- get_hyperparams_list(otu_small, ml_method)
-get_tuning_grid(hparams_list, ml_method)
-#>   lambda alpha
-#> 1  1e-04     0
-#> 2  1e-03     0
-#> 3  1e-02     0
-#> 4  1e-01     0
-#> 5  1e+00     0
-#> 6  1e+01     0
-
+
+
+

Author

+

Begüm Topçuoğlu, topcuoglu.begum@gmail.com

+

Kelly Sovacool, sovacool@umich.edu

+
+ +
+

Examples

+
ml_method <- "glmnet"
+hparams_list <- get_hyperparams_list(otu_small, ml_method)
+get_tuning_grid(hparams_list, ml_method)
+#>   lambda alpha
+#> 1  1e-04     0
+#> 2  1e-03     0
+#> 3  1e-02     0
+#> 4  1e-01     0
+#> 5  1e+00     0
+#> 6  1e+01     0
+
+
+
- - - + + diff --git a/docs/reference/group_correlated_features.html b/docs/reference/group_correlated_features.html index 0a8808c0..5712d246 100644 --- a/docs/reference/group_correlated_features.html +++ b/docs/reference/group_correlated_features.html @@ -1,75 +1,12 @@ - - - - - - - -Group correlated features — group_correlated_features • mikropml - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -Group correlated features — group_correlated_features • mikropml - - + + - - -
-
- -
- -
+
@@ -156,78 +78,70 @@

Group correlated features

Group correlated features

-
group_correlated_features(
-  features,
-  corr_thresh = 1,
-  group_neg_corr = TRUE,
-  corr_method = "spearman"
-)
- -

Arguments

- - - - - - - - - - - - - - - - - - -
features

a dataframe with each column as a feature for ML

corr_thresh

For feature importance, group correlations -above or equal to corr_thresh (range 0 to 1; default: 1).

group_neg_corr

Whether to group negatively correlated features -together (e.g. c(0,1) and c(1,0)).

corr_method

correlation method. options or the same as those supported -by stats::cor: spearman, pearson, kendall. (default: spearman)

- -

Value

+
+
group_correlated_features(
+  features,
+  corr_thresh = 1,
+  group_neg_corr = TRUE,
+  corr_method = "spearman"
+)
+
+
+

Arguments

+
features
+

a dataframe with each column as a feature for ML

+
corr_thresh
+

For feature importance, group correlations +above or equal to corr_thresh (range 0 to 1; default: 1).

+
group_neg_corr
+

Whether to group negatively correlated features +together (e.g. c(0,1) and c(1,0)).

+
corr_method
+

correlation method. options or the same as those supported +by stats::cor: spearman, pearson, kendall. (default: spearman)

+
+
+

Value

vector where each element is a group of correlated features separated by pipes (|)

-

Author

- -

Kelly Sovacool, sovacool@umich.edu

- -

Examples

-
features <- data.frame(
-  a = 1:3, b = 2:4, c = c(1, 0, 1),
-  d = (5:7), e = c(5, 1, 4), f = c(-1, 0, -1)
-)
-group_correlated_features(features)
-#> [1] "a|b|d" "c|f"   "e"    
-
+
+
+

Author

+

Kelly Sovacool, sovacool@umich.edu

+
+ +
+

Examples

+
features <- data.frame(
+  a = 1:3, b = 2:4, c = c(1, 0, 1),
+  d = (5:7), e = c(5, 1, 4), f = c(-1, 0, -1)
+)
+group_correlated_features(features)
+#> [1] "a|b|d" "c|f"   "e"    
+
+
+
- - - + + diff --git a/docs/reference/index.html b/docs/reference/index.html index d603d420..3e0c7a89 100644 --- a/docs/reference/index.html +++ b/docs/reference/index.html @@ -1,74 +1,12 @@ - - - - - - - -Function reference • mikropml - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -Function reference • mikropml + + - - - - -
-
- -
- -
+
- - - - - - - - - - -
-

Main

-

The foundations for training machine learning models.

+ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + - - - - - - - - - - - -
+

Main

+

The foundations for training machine learning models.

+

mikropml

mikropml: User-Friendly R Package for Robust Machine Learning Pipelines

+

preprocess_data()

Preprocess data prior to running machine learning

+

run_ml()

Run the machine learning pipeline

-

Plotting helpers

-

Visualize performance to help you tune hyperparameters and choose model methods.

+
+

Plotting helpers

+

Visualize performance to help you tune hyperparameters and choose model methods.

+

plot_hp_performance()

Plot hyperparameter performance metrics

+

plot_model_performance()

Plot performance metrics for multiple ML runs with different parameters

+

tidy_perf_data()

Tidy the performance dataframe

+

get_hp_performance()

Get hyperparameter performance metrics

+

combine_hp_performance()

Combine hyperparameter performance metrics for multiple train/test splits

-

Package Data

+
+

Package Data

-

datasets

+
+

datasets

+

otu_small

Small OTU abundance dataset

+

otu_mini_bin

Mini OTU abundance dataset

+

otu_mini_multi

Mini OTU abundance dataset with 3 categorical variables

+

otu_mini_multi_group

Groups for otu_mini_multi

-

ML results

+
+

ML results

+

otu_mini_bin_results_glmnet

Results from running the pipline with L2 logistic regression on otu_mini_bin with feature importance and grouping

+

otu_mini_bin_results_rf

Results from running the pipline with random forest on otu_mini_bin

+

otu_mini_bin_results_rpart2

Results from running the pipline with rpart2 on otu_mini_bin

+

otu_mini_bin_results_svmRadial

Results from running the pipline with svmRadial on otu_mini_bin

+

otu_mini_bin_results_xgbTree

Results from running the pipline with xbgTree on otu_mini_bin

+

otu_mini_cont_results_glmnet

Results from running the pipeline with glmnet on otu_mini_bin with Otu00001 as the outcome

+

otu_mini_cont_results_nocv

Results from running the pipeline with glmnet on otu_mini_bin with Otu00001 as the outcome column, using a custom train control scheme that does not perform cross-validation

+

otu_mini_multi_results_glmnet

Results from running the pipeline with glmnet on otu_mini_multi for multiclass outcomes

-

misc

+
+

misc

+

otu_mini_cv

Cross validation on train_data_mini with grouped features.

+

replace_spaces()

Replace spaces in all elements of a character vector with underscores

-

Pipeline customization

-

These are functions called by preprocess_data() or run_ml(). We make them available in case you would like to customize various steps of the pipeline beyond the arguments provided by the main functions.

+
+

Pipeline customization

+

These are functions called by preprocess_data() or run_ml(). We make them available in case you would like to customize various steps of the pipeline beyond the arguments provided by the main functions.

+

remove_singleton_columns()

Remove columns appearing in only threshold row(s) or fewer.

+

get_caret_processed_df()

Get preprocessed dataframe for continuous variables

+

randomize_feature_order()

Randomize feature order to eliminate any position-dependent effects

+

get_partition_indices()

Select indices to partition the data into training & testing sets.

+

get_outcome_type()

Get outcome type.

+

get_hyperparams_list()

Set hyperparameters based on ML method and dataset characteristics

+

get_tuning_grid()

Generate the tuning grid for tuning hyperparameters

+

define_cv()

Define cross-validation scheme and training parameters

+

get_perf_metric_name()

Get default performance metric name

+

get_perf_metric_fn()

Get default performance metric function

+

train_model()

Train model using caret::train().

+

Train model using caret::train().

calc_perf_metrics()

Get performance metrics for test data

+

get_performance_tbl()

Get model performance metrics as a one-row tibble

+

get_feature_importance()

Get feature importance using the permutation method

+

group_correlated_features()

Group correlated features

- +
+
-
- - + + diff --git a/docs/reference/mikropml.html b/docs/reference/mikropml.html index 5af7dea0..b4e5acff 100644 --- a/docs/reference/mikropml.html +++ b/docs/reference/mikropml.html @@ -1,78 +1,15 @@ - - - - - - - -mikropml: User-Friendly R Package for Robust Machine Learning Pipelines — mikropml • mikropml - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -mikropml: User-Friendly R Package for Robust Machine Learning Pipelines — mikropml • mikropml - - - - - - - - - - - - - - + + -
-
- -
- -
+

mikropml implements supervised machine learning pipelines using regression, support vector machines, decision trees, random forest, or gradient-boosted trees. -The main functions are preprocess_data() to process your data prior to -running machine learning, and run_ml() to run machine learning.

+The main functions are preprocess_data() to process your data prior to +running machine learning, and run_ml() to run machine learning.

- -

Authors

- +
+

Authors

-
    -
  • Begüm D. Topçuoğlu (ORCID)

  • -
  • Zena Lapp (ORCID)

  • -
  • Kelly L. Sovacool (ORCID)

  • -
  • Evan Snitkin (ORCID)

  • -
  • Jenna Wiens (ORCID)

  • -
  • Patrick D. Schloss (ORCID)

  • -
- -

See vignettes

- +
+
+
-
- - + + diff --git a/docs/reference/otu_mini_bin.html b/docs/reference/otu_mini_bin.html index de3a0ddd..8ada81b9 100644 --- a/docs/reference/otu_mini_bin.html +++ b/docs/reference/otu_mini_bin.html @@ -1,77 +1,14 @@ - - - - - - - -Mini OTU abundance dataset — otu_mini_bin • mikropml - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -Mini OTU abundance dataset — otu_mini_bin • mikropml - - - - - - - - - - - - + + - - -
-
- -
- -
+
@@ -160,40 +82,38 @@

Mini OTU abundance dataset

This is a subset of otu_small.

-
otu_mini_bin
- - -

Format

+
+
otu_mini_bin
+
+
+

Format

A data frame The dx column is the diagnosis: healthy or cancerous (colorectal). All other columns are OTU relative abundances.

+
+
- - - + + diff --git a/docs/reference/otu_mini_bin_results_glmnet.html b/docs/reference/otu_mini_bin_results_glmnet.html index 9a06bc14..67af728a 100644 --- a/docs/reference/otu_mini_bin_results_glmnet.html +++ b/docs/reference/otu_mini_bin_results_glmnet.html @@ -1,75 +1,12 @@ - - - - - - - -Results from running the pipline with L2 logistic regression on otu_mini_bin with feature importance and grouping — otu_mini_bin_results_glmnet • mikropml - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -Results from running the pipline with L2 logistic regression on otu_mini_bin with feature importance and grouping — otu_mini_bin_results_glmnet • mikropml - - + + - - -
-
- -
- -
+
@@ -156,38 +78,36 @@

Results from running the pipline with L2 logistic regression on otu_mi

Results from running the pipline with L2 logistic regression on otu_mini_bin with feature importance and grouping

-
otu_mini_bin_results_glmnet
- - -

Format

+
+
otu_mini_bin_results_glmnet
+
+
+

Format

An object of class list of length 4.

+
+
- - - + + diff --git a/docs/reference/otu_mini_bin_results_rf.html b/docs/reference/otu_mini_bin_results_rf.html index cab3458e..bf017786 100644 --- a/docs/reference/otu_mini_bin_results_rf.html +++ b/docs/reference/otu_mini_bin_results_rf.html @@ -1,75 +1,12 @@ - - - - - - - -Results from running the pipline with random forest on otu_mini_bin — otu_mini_bin_results_rf • mikropml - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -Results from running the pipline with random forest on otu_mini_bin — otu_mini_bin_results_rf • mikropml - - + + - - -
-
- -
- -
+
@@ -156,38 +78,36 @@

Results from running the pipline with random forest on otu_mini_binResults from running the pipline with random forest on otu_mini_bin

-
otu_mini_bin_results_rf
- - -

Format

+
+
otu_mini_bin_results_rf
+
+
+

Format

An object of class list of length 4.

+
+
- - - + + diff --git a/docs/reference/otu_mini_bin_results_rpart2.html b/docs/reference/otu_mini_bin_results_rpart2.html index a1272039..02195147 100644 --- a/docs/reference/otu_mini_bin_results_rpart2.html +++ b/docs/reference/otu_mini_bin_results_rpart2.html @@ -1,75 +1,12 @@ - - - - - - - -Results from running the pipline with rpart2 on otu_mini_bin — otu_mini_bin_results_rpart2 • mikropml - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -Results from running the pipline with rpart2 on otu_mini_bin — otu_mini_bin_results_rpart2 • mikropml - - + + - - -
-
- -
- -
+
@@ -156,38 +78,36 @@

Results from running the pipline with rpart2 on otu_mini_binResults from running the pipline with rpart2 on otu_mini_bin

-
otu_mini_bin_results_rpart2
- - -

Format

+
+
otu_mini_bin_results_rpart2
+
+
+

Format

An object of class list of length 4.

+
+
- - - + + diff --git a/docs/reference/otu_mini_bin_results_svmRadial.html b/docs/reference/otu_mini_bin_results_svmRadial.html index 61605322..7af4cd27 100644 --- a/docs/reference/otu_mini_bin_results_svmRadial.html +++ b/docs/reference/otu_mini_bin_results_svmRadial.html @@ -1,75 +1,12 @@ - - - - - - - -Results from running the pipline with svmRadial on otu_mini_bin — otu_mini_bin_results_svmRadial • mikropml - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -Results from running the pipline with svmRadial on otu_mini_bin — otu_mini_bin_results_svmRadial • mikropml - - + + - - -
-
- -
- -
+
@@ -156,38 +78,36 @@

Results from running the pipline with svmRadial on otu_mini_bin

Results from running the pipline with svmRadial on otu_mini_bin

-
otu_mini_bin_results_svmRadial
- - -

Format

+
+
otu_mini_bin_results_svmRadial
+
+
+

Format

An object of class list of length 4.

+
+
- - - + + diff --git a/docs/reference/otu_mini_bin_results_xgbTree.html b/docs/reference/otu_mini_bin_results_xgbTree.html index 4200eced..93f25984 100644 --- a/docs/reference/otu_mini_bin_results_xgbTree.html +++ b/docs/reference/otu_mini_bin_results_xgbTree.html @@ -1,75 +1,12 @@ - - - - - - - -Results from running the pipline with xbgTree on otu_mini_bin — otu_mini_bin_results_xgbTree • mikropml - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -Results from running the pipline with xbgTree on otu_mini_bin — otu_mini_bin_results_xgbTree • mikropml - - + + - - -
-
- -
- -
+
@@ -156,38 +78,36 @@

Results from running the pipline with xbgTree on otu_mini_binResults from running the pipline with xbgTree on otu_mini_bin

-
otu_mini_bin_results_xgbTree
- - -

Format

+
+
otu_mini_bin_results_xgbTree
+
+
+

Format

An object of class list of length 4.

+
+
- - - + + diff --git a/docs/reference/otu_mini_cont_results_glmnet.html b/docs/reference/otu_mini_cont_results_glmnet.html index 84a9d502..d65102cc 100644 --- a/docs/reference/otu_mini_cont_results_glmnet.html +++ b/docs/reference/otu_mini_cont_results_glmnet.html @@ -1,78 +1,15 @@ - - - - - - - -Results from running the pipeline with glmnet on otu_mini_bin with Otu00001 -as the outcome — otu_mini_cont_results_glmnet • mikropml - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -Results from running the pipeline with glmnet on otu_mini_bin with Otu00001 +as the outcome — otu_mini_cont_results_glmnet • mikropml - - + + - - -
-
- -
- -
+
@@ -161,38 +83,36 @@

Results from running the pipeline with glmnet on otu_mini_bin w as the outcome

-
otu_mini_cont_results_glmnet
- - -

Format

+
+
otu_mini_cont_results_glmnet
+
+
+

Format

An object of class list of length 4.

+
+
- - - + + diff --git a/docs/reference/otu_mini_cont_results_nocv.html b/docs/reference/otu_mini_cont_results_nocv.html index 91542f0a..6c6efa8c 100644 --- a/docs/reference/otu_mini_cont_results_nocv.html +++ b/docs/reference/otu_mini_cont_results_nocv.html @@ -1,81 +1,18 @@ - - - - - - - -Results from running the pipeline with glmnet on otu_mini_bin with Otu00001 +<!-- Generated by pkgdown: do not edit by hand --><html lang="en"><head><meta http-equiv="Content-Type" content="text/html; charset=UTF-8"><meta charset="utf-8"><meta http-equiv="X-UA-Compatible" content="IE=edge"><meta name="viewport" content="width=device-width, initial-scale=1.0"><title>Results from running the pipeline with glmnet on otu_mini_bin with Otu00001 as the outcome column, -using a custom train control scheme that does not perform cross-validation — otu_mini_cont_results_nocv • mikropml - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + - - -
-
- -
- -
+
@@ -166,38 +88,36 @@

Results from running the pipeline with glmnet on otu_mini_bin w using a custom train control scheme that does not perform cross-validation

-
otu_mini_cont_results_nocv
- - -

Format

+
+
otu_mini_cont_results_nocv
+
+
+

Format

An object of class list of length 4.

+
+
- - - + + diff --git a/docs/reference/otu_mini_cv.html b/docs/reference/otu_mini_cv.html index b85db175..e0f8c469 100644 --- a/docs/reference/otu_mini_cv.html +++ b/docs/reference/otu_mini_cv.html @@ -1,75 +1,12 @@ - - - - - - - -Cross validation on train_data_mini with grouped features. — otu_mini_cv • mikropml - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -Cross validation on train_data_mini with grouped features. — otu_mini_cv • mikropml - - + + - - -
-
- -
- -
+
@@ -156,38 +78,36 @@

Cross validation on train_data_mini with grouped features.

Cross validation on train_data_mini with grouped features.

-
otu_mini_cv
- - -

Format

+
+
otu_mini_cv
+
+
+

Format

An object of class list of length 27.

+
+
- - - + + diff --git a/docs/reference/otu_mini_multi.html b/docs/reference/otu_mini_multi.html index 0bfa78a3..5c2d176c 100644 --- a/docs/reference/otu_mini_multi.html +++ b/docs/reference/otu_mini_multi.html @@ -1,75 +1,12 @@ - - - - - - - -Mini OTU abundance dataset with 3 categorical variables — otu_mini_multi • mikropml - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -Mini OTU abundance dataset with 3 categorical variables — otu_mini_multi • mikropml - - + + - - -
-
- -
- -
+
@@ -156,40 +78,38 @@

Mini OTU abundance dataset with 3 categorical variables

A dataset containing relatives abundances of OTUs for human stool samples

-
otu_mini_multi
- - -

Format

+
+
otu_mini_multi
+
+
+

Format

A data frame The dx column is the colorectal cancer diagnosis: adenoma, carcinoma, normal. All other columns are OTU relative abundances.

+
+
- - - + + diff --git a/docs/reference/otu_mini_multi_group.html b/docs/reference/otu_mini_multi_group.html index 1146e5d4..1daaf7c4 100644 --- a/docs/reference/otu_mini_multi_group.html +++ b/docs/reference/otu_mini_multi_group.html @@ -1,75 +1,12 @@ - - - - - - - -Groups for otu_mini_multi — otu_mini_multi_group • mikropml - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -Groups for otu_mini_multi — otu_mini_multi_group • mikropml - - + + - - -
-
- -
- -
+
@@ -156,38 +78,36 @@

Groups for otu_mini_multi

Groups for otu_mini_multi

-
otu_mini_multi_group
- - -

Format

+
+
otu_mini_multi_group
+
+
+

Format

An object of class character of length 490.

+
+
- - - + + diff --git a/docs/reference/otu_mini_multi_results_glmnet.html b/docs/reference/otu_mini_multi_results_glmnet.html index 967c3d07..ad97ae77 100644 --- a/docs/reference/otu_mini_multi_results_glmnet.html +++ b/docs/reference/otu_mini_multi_results_glmnet.html @@ -1,78 +1,15 @@ - - - - - - - -Results from running the pipeline with glmnet on otu_mini_multi for -multiclass outcomes — otu_mini_multi_results_glmnet • mikropml - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -Results from running the pipeline with glmnet on otu_mini_multi for +multiclass outcomes — otu_mini_multi_results_glmnet • mikropml - - + + - - -
-
- -
- -
+
@@ -161,38 +83,36 @@

Results from running the pipeline with glmnet on otu_mini_multi multiclass outcomes

-
otu_mini_multi_results_glmnet
- - -

Format

+
+
otu_mini_multi_results_glmnet
+
+
+

Format

An object of class list of length 4.

+
+
- - - + + diff --git a/docs/reference/otu_small.html b/docs/reference/otu_small.html index d9dc63a7..e3dc4a12 100644 --- a/docs/reference/otu_small.html +++ b/docs/reference/otu_small.html @@ -1,77 +1,14 @@ - - - - - - - -Small OTU abundance dataset — otu_small • mikropml - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -Small OTU abundance dataset — otu_small • mikropml - - - - - - - - - - - - + + - - -
-
- -
- -
+

A dataset containing relatives abundances of 60 OTUs for 60 human stool samples. This is a subset of the data provided in extdata/otu_large.csv, which was -used in Topçuoğlu et al. 2020.

+used in Topçuoğlu et al. 2020.

-
otu_small
- - -

Format

+
+
otu_small
+
+
+

Format

A data frame with 60 rows and 61 variables. The dx column is the diagnosis: healthy or cancerous (colorectal). All other columns are OTU relative abundances.

+
+
-
- - + + diff --git a/docs/reference/plot_hp_performance.html b/docs/reference/plot_hp_performance.html index f5658b63..2502fde0 100644 --- a/docs/reference/plot_hp_performance.html +++ b/docs/reference/plot_hp_performance.html @@ -1,75 +1,12 @@ - - - - - - - -Plot hyperparameter performance metrics — plot_hp_performance • mikropml - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -Plot hyperparameter performance metrics — plot_hp_performance • mikropml - - + + - - -
-
- -
- -
+
@@ -156,89 +78,83 @@

Plot hyperparameter performance metrics

Plot hyperparameter performance metrics

-
plot_hp_performance(dat, param_col, metric_col)
- -

Arguments

- - - - - - - - - - - - - - -
dat

dataframe of hyperparameters and performance metric (e.g. from get_hp_performance() or combine_hp_performance())

param_col

hyperparameter to be plotted. must be a column in dat.

metric_col

performance metric. must be a column in dat.

- -

Value

+
+
plot_hp_performance(dat, param_col, metric_col)
+
+
+

Arguments

+
dat
+

dataframe of hyperparameters and performance metric (e.g. from get_hp_performance() or combine_hp_performance())

+
param_col
+

hyperparameter to be plotted. must be a column in dat.

+
metric_col
+

performance metric. must be a column in dat.

+
+
+

Value

ggplot of hyperparameter performance.

-

Author

- -

Zena Lapp, zenalapp@umich.edu

-

Kelly Sovacool sovacool@umich.edu

- -

Examples

-
# plot for a single `run_ml()` call
-hp_metrics <- get_hp_performance(otu_mini_bin_results_glmnet$trained_model)
-hp_metrics
-#> $dat
-#>   alpha lambda       AUC
-#> 1     0  1e-04 0.6082552
-#> 2     0  1e-03 0.6082552
-#> 3     0  1e-02 0.6086458
-#> 4     0  1e-01 0.6166789
-#> 5     0  1e+00 0.6221737
-#> 6     0  1e+01 0.6187408
-#> 
-#> $params
-#> [1] "lambda"
-#> 
-#> $metric
-#> [1] "AUC"
-#> 
-plot_hp_performance(hp_metrics$dat, lambda, AUC)
-
-if (FALSE) {
-# plot for multiple `run_ml()` calls
-results <- lapply(seq(100, 102), function(seed) {
-  run_ml(otu_small, "glmnet", seed = seed)
-})
-models <- lapply(results, function(x) x$trained_model)
-hp_metrics <- combine_hp_performance(models)
-plot_hp_performance(hp_metrics$dat, lambda, AUC)
-}
-
+
+
+

Author

+

Zena Lapp, zenalapp@umich.edu

+

Kelly Sovacool sovacool@umich.edu

+
+ +
+

Examples

+
# plot for a single `run_ml()` call
+hp_metrics <- get_hp_performance(otu_mini_bin_results_glmnet$trained_model)
+hp_metrics
+#> $dat
+#>   alpha lambda       AUC
+#> 1     0  1e-04 0.6082552
+#> 2     0  1e-03 0.6082552
+#> 3     0  1e-02 0.6086458
+#> 4     0  1e-01 0.6166789
+#> 5     0  1e+00 0.6221737
+#> 6     0  1e+01 0.6187408
+#> 
+#> $params
+#> [1] "lambda"
+#> 
+#> $metric
+#> [1] "AUC"
+#> 
+plot_hp_performance(hp_metrics$dat, lambda, AUC)
+
+if (FALSE) {
+# plot for multiple `run_ml()` calls
+results <- lapply(seq(100, 102), function(seed) {
+  run_ml(otu_small, "glmnet", seed = seed)
+})
+models <- lapply(results, function(x) x$trained_model)
+hp_metrics <- combine_hp_performance(models)
+plot_hp_performance(hp_metrics$dat, lambda, AUC)
+}
+
+
+
- - - + + diff --git a/docs/reference/plot_model_performance.html b/docs/reference/plot_model_performance.html index acd2ae5b..679c23bd 100644 --- a/docs/reference/plot_model_performance.html +++ b/docs/reference/plot_model_performance.html @@ -1,75 +1,12 @@ - - - - - - - -Plot performance metrics for multiple ML runs with different parameters — plot_model_performance • mikropml - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -Plot performance metrics for multiple ML runs with different parameters — plot_model_performance • mikropml + + - - - - -
-
- -
- -
+
@@ -156,88 +78,86 @@

Plot performance metrics for multiple ML runs with different parameters

ggplot2 is required to use this function.

-
plot_model_performance(performance_df)
- -

Arguments

- - - - - - -
performance_df

dataframe of performance results from multiple calls to run_ml()

- -

Value

+
+
plot_model_performance(performance_df)
+
+
+

Arguments

+
performance_df
+

dataframe of performance results from multiple calls to run_ml()

+
+
+

Value

A ggplot2 plot of performance.

-

Author

- -

Begüm Topçuoglu, topcuoglu.begum@gmail.com

-

Kelly Sovacool, sovacool@umich.edu

- -

Examples

-
if (FALSE) {
-# call `run_ml()` multiple times with different seeds
-results_lst <- lapply(seq(100, 104), function(seed) {
-  run_ml(otu_small, "glmnet", seed = seed)
-})
-# extract and combine the performance results
-perf_df <- lapply(results_lst, function(result) {
-  result[["performance"]]
-}) %>%
-  dplyr::bind_rows()
-# plot the performance results
-p <- plot_model_performance(perf_df)
-
-
-# call `run_ml()` with different ML methods
-param_grid <- expand.grid(
-  seeds = seq(100, 104),
-  methods = c("glmnet", "rf")
-)
-results_mtx <- mapply(
-  function(seed, method) {
-    run_ml(otu_mini_bin, method, seed = seed, kfold = 2)
-  },
-  param_grid$seeds, param_grid$methods
-)
-# extract and combine the performance results
-perf_df2 <- dplyr::bind_rows(results_mtx["performance", ])
-# plot the performance results
-p <- plot_model_performance(perf_df2)
-
-# you can continue adding layers to customize the plot
-p +
-  theme_classic() +
-  scale_color_brewer(palette = "Dark2") +
-  coord_flip()
-}
-
+
+
+

Author

+

Begüm Topçuoglu, topcuoglu.begum@gmail.com

+

Kelly Sovacool, sovacool@umich.edu

+
+ +
+

Examples

+
if (FALSE) {
+# call `run_ml()` multiple times with different seeds
+results_lst <- lapply(seq(100, 104), function(seed) {
+  run_ml(otu_small, "glmnet", seed = seed)
+})
+# extract and combine the performance results
+perf_df <- lapply(results_lst, function(result) {
+  result[["performance"]]
+}) %>%
+  dplyr::bind_rows()
+# plot the performance results
+p <- plot_model_performance(perf_df)
+
+
+# call `run_ml()` with different ML methods
+param_grid <- expand.grid(
+  seeds = seq(100, 104),
+  methods = c("glmnet", "rf")
+)
+results_mtx <- mapply(
+  function(seed, method) {
+    run_ml(otu_mini_bin, method, seed = seed, kfold = 2)
+  },
+  param_grid$seeds, param_grid$methods
+)
+# extract and combine the performance results
+perf_df2 <- dplyr::bind_rows(results_mtx["performance", ])
+# plot the performance results
+p <- plot_model_performance(perf_df2)
+
+# you can continue adding layers to customize the plot
+p +
+  theme_classic() +
+  scale_color_brewer(palette = "Dark2") +
+  coord_flip()
+}
+
+
+
- - - + + diff --git a/docs/reference/preprocess_data.html b/docs/reference/preprocess_data.html index b643f2ab..2a37c6dc 100644 --- a/docs/reference/preprocess_data.html +++ b/docs/reference/preprocess_data.html @@ -1,75 +1,12 @@ - - - - - - - -Preprocess data prior to running machine learning — preprocess_data • mikropml - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -Preprocess data prior to running machine learning — preprocess_data • mikropml - - - - + + -
-
- -
- -
+
-

Function to preprocess your data for input into run_ml().

+

Function to preprocess your data for input into run_ml().

-
preprocess_data(
-  dataset,
-  outcome_colname,
-  method = c("center", "scale"),
-  remove_var = "nzv",
-  collapse_corr_feats = TRUE,
-  to_numeric = TRUE,
-  group_neg_corr = TRUE,
-  prefilter_threshold = 1
-)
+
+
preprocess_data(
+  dataset,
+  outcome_colname,
+  method = c("center", "scale"),
+  remove_var = "nzv",
+  collapse_corr_feats = TRUE,
+  to_numeric = TRUE,
+  group_neg_corr = TRUE,
+  prefilter_threshold = 1
+)
+
-

Arguments

- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
dataset

Dataframe with an outcome variable and other columns as features.

outcome_colname

Column name as a string of the outcome variable -(default NULL; the first column will be chosen automatically).

method

Methods to preprocess the data, described in -caret::preProcess() (default: c("center","scale"), use NULL for -no normalization).

remove_var

Whether to remove variables with near-zero variance -('nzv'; default), zero variance ('zv'), or none (NULL).

collapse_corr_feats

Whether to keep only one of perfectly correlated -features.

to_numeric

Whether to change features to numeric where possible.

group_neg_corr

Whether to group negatively correlated features -together (e.g. c(0,1) and c(1,0)).

prefilter_threshold

Remove features which only have non-zero & non-NA +

+

Arguments

+
dataset
+

Dataframe with an outcome variable and other columns as features.

+
outcome_colname
+

Column name as a string of the outcome variable +(default NULL; the first column will be chosen automatically).

+
method
+

Methods to preprocess the data, described in +caret::preProcess() (default: c("center","scale"), use NULL for +no normalization).

+
remove_var
+

Whether to remove variables with near-zero variance +('nzv'; default), zero variance ('zv'), or none (NULL).

+
collapse_corr_feats
+

Whether to keep only one of perfectly correlated +features.

+
to_numeric
+

Whether to change features to numeric where possible.

+
group_neg_corr
+

Whether to group negatively correlated features +together (e.g. c(0,1) and c(1,0)).

+
prefilter_threshold
+

Remove features which only have non-zero & non-NA values N rows or fewer (default: 1). Set this to -1 to keep all columns at this step. This step will also be skipped if to_numeric is set to -FALSE.

- -

Value

- -

Named list including:

    -
  • dat_transformed: Preprocessed data.

  • +FALSE.

    +
+
+

Value

+

Named list including:

  • dat_transformed: Preprocessed data.

  • grp_feats: If features were grouped together, a named list of the features corresponding to each group.

  • removed_feats: Any features that were removed during preprocessing (e.g. because there was zero variance or near-zero variance for those features).

  • -
- -

If the progressr package is installed, a progress bar with time elapsed +

If the progressr package is installed, a progress bar with time elapsed and estimated time to completion can be displayed.

-

More details

- +
+
+

More details

-

See the preprocessing vignette +

See the preprocessing vignette for more details.

Note that if any values in outcome_colname contain spaces, they will be converted to underscores for compatibility with caret.

-

Author

- -

Zena Lapp, zenalapp@umich.edu

-

Kelly Sovacool, sovacool@umich.edu

+
+
+

Author

+

Zena Lapp, zenalapp@umich.edu

+

Kelly Sovacool, sovacool@umich.edu

+
-

Examples

-
preprocess_data(mikropml::otu_small, "dx")
-#> Using 'dx' as the outcome column.
-#> $dat_transformed
-#> # A tibble: 200 × 61
-#>    dx    Otu00001 Otu00002 Otu00003 Otu00004 Otu00005 Otu00006 Otu00007 Otu00008
-#>    <chr>    <dbl>    <dbl>    <dbl>    <dbl>    <dbl>    <dbl>    <dbl>    <dbl>
-#>  1 norm…   -0.420   -0.219   -0.174  -0.591   -0.0488  -0.167    -0.569 -0.0624 
-#>  2 norm…   -0.105    1.75    -0.718   0.0381   1.54    -0.573    -0.643 -0.132  
-#>  3 norm…   -0.708    0.696    1.43    0.604   -0.265   -0.0364   -0.612 -0.207  
-#>  4 norm…   -0.494   -0.665    2.02   -0.593   -0.676   -0.586    -0.552 -0.470  
-#>  5 norm…    1.11    -0.395   -0.754  -0.586   -0.754    2.73      0.191 -0.676  
-#>  6 norm…   -0.685    0.614   -0.174  -0.584    0.376    0.804    -0.337 -0.00608
-#>  7 canc…   -0.770   -0.496   -0.318   0.159   -0.658    2.20     -0.717  0.0636 
-#>  8 norm…   -0.424   -0.478   -0.397  -0.556   -0.391   -0.0620    0.376 -0.0222 
-#>  9 norm…   -0.556    1.14     1.62   -0.352   -0.275   -0.465    -0.804  0.294  
-#> 10 canc…    1.46    -0.451   -0.694  -0.0567  -0.706    0.689    -0.370  1.59   
-#> # … with 190 more rows, and 52 more variables: Otu00009 <dbl>, Otu00010 <dbl>,
-#> #   Otu00011 <dbl>, Otu00012 <dbl>, Otu00013 <dbl>, Otu00014 <dbl>,
-#> #   Otu00015 <dbl>, Otu00016 <dbl>, Otu00017 <dbl>, Otu00018 <dbl>,
-#> #   Otu00019 <dbl>, Otu00020 <dbl>, Otu00021 <dbl>, Otu00022 <dbl>,
-#> #   Otu00023 <dbl>, Otu00024 <dbl>, Otu00025 <dbl>, Otu00026 <dbl>,
-#> #   Otu00027 <dbl>, Otu00028 <dbl>, Otu00029 <dbl>, Otu00030 <dbl>,
-#> #   Otu00031 <dbl>, Otu00032 <dbl>, Otu00033 <dbl>, Otu00034 <dbl>, …
-#> 
-#> $grp_feats
-#> NULL
-#> 
-#> $removed_feats
-#> character(0)
-#> 
-
-# the function can show a progress bar if you have the progressr package installed
-## optionally, specify the progress bar format
-progressr::handlers(progressr::handler_progress(
-  format = ":message :bar :percent | elapsed: :elapsed | eta: :eta",
-  clear = FALSE,
-  show_after = 0
-))
-## tell progressor to always report progress
-if (FALSE) {
-progressr::handlers(global = TRUE)
-## run the function and watch the live progress udpates
-dat_preproc <- preprocess_data(mikropml::otu_small, "dx")
-}
-
+
+

Examples

+
preprocess_data(mikropml::otu_small, "dx")
+#> Using 'dx' as the outcome column.
+#> $dat_transformed
+#> # A tibble: 200 × 61
+#>    dx    Otu00001 Otu00002 Otu00003 Otu00004 Otu00005 Otu00006 Otu00007 Otu00008
+#>    <chr>    <dbl>    <dbl>    <dbl>    <dbl>    <dbl>    <dbl>    <dbl>    <dbl>
+#>  1 norm…   -0.420   -0.219   -0.174  -0.591   -0.0488  -0.167    -0.569 -0.0624 
+#>  2 norm…   -0.105    1.75    -0.718   0.0381   1.54    -0.573    -0.643 -0.132  
+#>  3 norm…   -0.708    0.696    1.43    0.604   -0.265   -0.0364   -0.612 -0.207  
+#>  4 norm…   -0.494   -0.665    2.02   -0.593   -0.676   -0.586    -0.552 -0.470  
+#>  5 norm…    1.11    -0.395   -0.754  -0.586   -0.754    2.73      0.191 -0.676  
+#>  6 norm…   -0.685    0.614   -0.174  -0.584    0.376    0.804    -0.337 -0.00608
+#>  7 canc…   -0.770   -0.496   -0.318   0.159   -0.658    2.20     -0.717  0.0636 
+#>  8 norm…   -0.424   -0.478   -0.397  -0.556   -0.391   -0.0620    0.376 -0.0222 
+#>  9 norm…   -0.556    1.14     1.62   -0.352   -0.275   -0.465    -0.804  0.294  
+#> 10 canc…    1.46    -0.451   -0.694  -0.0567  -0.706    0.689    -0.370  1.59   
+#> # … with 190 more rows, and 52 more variables: Otu00009 <dbl>, Otu00010 <dbl>,
+#> #   Otu00011 <dbl>, Otu00012 <dbl>, Otu00013 <dbl>, Otu00014 <dbl>,
+#> #   Otu00015 <dbl>, Otu00016 <dbl>, Otu00017 <dbl>, Otu00018 <dbl>,
+#> #   Otu00019 <dbl>, Otu00020 <dbl>, Otu00021 <dbl>, Otu00022 <dbl>,
+#> #   Otu00023 <dbl>, Otu00024 <dbl>, Otu00025 <dbl>, Otu00026 <dbl>,
+#> #   Otu00027 <dbl>, Otu00028 <dbl>, Otu00029 <dbl>, Otu00030 <dbl>,
+#> #   Otu00031 <dbl>, Otu00032 <dbl>, Otu00033 <dbl>, Otu00034 <dbl>, …
+#> 
+#> $grp_feats
+#> NULL
+#> 
+#> $removed_feats
+#> character(0)
+#> 
+
+# the function can show a progress bar if you have the progressr package installed
+## optionally, specify the progress bar format
+progressr::handlers(progressr::handler_progress(
+  format = ":message :bar :percent | elapsed: :elapsed | eta: :eta",
+  clear = FALSE,
+  show_after = 0
+))
+## tell progressor to always report progress
+if (FALSE) {
+progressr::handlers(global = TRUE)
+## run the function and watch the live progress udpates
+dat_preproc <- preprocess_data(mikropml::otu_small, "dx")
+}
+
+
+
- - - + + diff --git a/docs/reference/randomize_feature_order.html b/docs/reference/randomize_feature_order.html index 5d3c6399..f30f553d 100644 --- a/docs/reference/randomize_feature_order.html +++ b/docs/reference/randomize_feature_order.html @@ -1,75 +1,12 @@ - - - - - - - -Randomize feature order to eliminate any position-dependent effects — randomize_feature_order • mikropml - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -Randomize feature order to eliminate any position-dependent effects — randomize_feature_order • mikropml + + - - - - -
-
- -
- -
+
@@ -156,66 +78,62 @@

Randomize feature order to eliminate any position-dependent effects

Randomize feature order to eliminate any position-dependent effects

-
randomize_feature_order(dataset, outcome_colname)
- -

Arguments

- - - - - - - - - - -
dataset

Dataframe with an outcome variable and other columns as features.

outcome_colname

Column name as a string of the outcome variable -(default NULL; the first column will be chosen automatically).

- -

Value

+
+
randomize_feature_order(dataset, outcome_colname)
+
+
+

Arguments

+
dataset
+

Dataframe with an outcome variable and other columns as features.

+
outcome_colname
+

Column name as a string of the outcome variable +(default NULL; the first column will be chosen automatically).

+
+
+

Value

Dataset with feature order randomized.

-

Author

- -

Nick Lesniak, nlesniak@umich.edu

-

Kelly Sovacool, sovacool@umich.edu

- -

Examples

-
dat <- data.frame(
-  outcome = c("1", "2", "3"),
-  a = 4:6, b = 7:9, c = 10:12, d = 13:15
-)
-randomize_feature_order(dat, "outcome")
-#>   outcome b  d a  c
-#> 1       1 7 13 4 10
-#> 2       2 8 14 5 11
-#> 3       3 9 15 6 12
-
+
+
+

Author

+

Nick Lesniak, nlesniak@umich.edu

+

Kelly Sovacool, sovacool@umich.edu

+
+ +
+

Examples

+
dat <- data.frame(
+  outcome = c("1", "2", "3"),
+  a = 4:6, b = 7:9, c = 10:12, d = 13:15
+)
+randomize_feature_order(dat, "outcome")
+#>   outcome  d  c b a
+#> 1       1 13 10 7 4
+#> 2       2 14 11 8 5
+#> 3       3 15 12 9 6
+
+
+
- - - + + diff --git a/docs/reference/reexports.html b/docs/reference/reexports.html index ee7794a2..0a55c110 100644 --- a/docs/reference/reexports.html +++ b/docs/reference/reexports.html @@ -1,84 +1,27 @@ - - - - - - - -dplyr pipe — reexports • mikropml - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -dplyr pipe — reexports • mikropml - + dplyr +%&gt;% + rlang +!!, .data, := - - - - + + - - - - -
-
- -
- -
+

These objects are imported from other packages. Follow the links below to see their documentation.

-
-
caret

contr.ltfr

+
caret
+

contr.ltfr

-
dplyr

%>%

-
rlang

!!, .data, :=

+
dplyr
+

%>%

-
-
+ +
rlang
+

!!, .data, :=

+
+
+
- - - + + diff --git a/docs/reference/remove_singleton_columns.html b/docs/reference/remove_singleton_columns.html index eb90ff56..fe01c284 100644 --- a/docs/reference/remove_singleton_columns.html +++ b/docs/reference/remove_singleton_columns.html @@ -1,75 +1,12 @@ - - - - - - - -Remove columns appearing in only threshold row(s) or fewer. — remove_singleton_columns • mikropml - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -Remove columns appearing in only threshold row(s) or fewer. — remove_singleton_columns • mikropml + + - - - - -
-
- -
- -
+
@@ -156,97 +78,93 @@

Remove columns appearing in only threshold row(s) or fewer.

Removes columns which only have non-zero & non-NA values in threshold row(s) or fewer.

-
remove_singleton_columns(dat, threshold = 1)
- -

Arguments

- - - - - - - - - - -
dat

dataframe

threshold

Number of rows. If a column only has non-zero & non-NA values -in threshold row(s) or fewer, it will be removed.

- -

Value

+
+
remove_singleton_columns(dat, threshold = 1)
+
+
+

Arguments

+
dat
+

dataframe

+
threshold
+

Number of rows. If a column only has non-zero & non-NA values +in threshold row(s) or fewer, it will be removed.

+
+
+

Value

dataframe without singleton columns

-

Author

- -

Kelly Sovacool, sovacool@umich.edu

+
+
+

Author

+

Kelly Sovacool, sovacool@umich.edu

Courtney Armour

+
-

Examples

-
remove_singleton_columns(data.frame(a = 1:3, b = c(0, 1, 0), c = 4:6))
-#> $dat
-#>   a c
-#> 1 1 4
-#> 2 2 5
-#> 3 3 6
-#> 
-#> $removed_feats
-#> [1] "b"
-#> 
-remove_singleton_columns(data.frame(a = 1:3, b = c(0, 1, 0), c = 4:6), threshold = 0)
-#> $dat
-#>   a b c
-#> 1 1 0 4
-#> 2 2 1 5
-#> 3 3 0 6
-#> 
-#> $removed_feats
-#> character(0)
-#> 
-remove_singleton_columns(data.frame(a = 1:3, b = c(0, 1, NA), c = 4:6))
-#> $dat
-#>   a c
-#> 1 1 4
-#> 2 2 5
-#> 3 3 6
-#> 
-#> $removed_feats
-#> [1] "b"
-#> 
-remove_singleton_columns(data.frame(a = 1:3, b = c(1, 1, 1), c = 4:6))
-#> $dat
-#>   a b c
-#> 1 1 1 4
-#> 2 2 1 5
-#> 3 3 1 6
-#> 
-#> $removed_feats
-#> character(0)
-#> 
-
+
+

Examples

+
remove_singleton_columns(data.frame(a = 1:3, b = c(0, 1, 0), c = 4:6))
+#> $dat
+#>   a c
+#> 1 1 4
+#> 2 2 5
+#> 3 3 6
+#> 
+#> $removed_feats
+#> [1] "b"
+#> 
+remove_singleton_columns(data.frame(a = 1:3, b = c(0, 1, 0), c = 4:6), threshold = 0)
+#> $dat
+#>   a b c
+#> 1 1 0 4
+#> 2 2 1 5
+#> 3 3 0 6
+#> 
+#> $removed_feats
+#> character(0)
+#> 
+remove_singleton_columns(data.frame(a = 1:3, b = c(0, 1, NA), c = 4:6))
+#> $dat
+#>   a c
+#> 1 1 4
+#> 2 2 5
+#> 3 3 6
+#> 
+#> $removed_feats
+#> [1] "b"
+#> 
+remove_singleton_columns(data.frame(a = 1:3, b = c(1, 1, 1), c = 4:6))
+#> $dat
+#>   a b c
+#> 1 1 1 4
+#> 2 2 1 5
+#> 3 3 1 6
+#> 
+#> $removed_feats
+#> character(0)
+#> 
+
+
+
- - - + + diff --git a/docs/reference/replace_spaces.html b/docs/reference/replace_spaces.html index 2a60a938..a35d1a46 100644 --- a/docs/reference/replace_spaces.html +++ b/docs/reference/replace_spaces.html @@ -1,75 +1,12 @@ - - - - - - - -Replace spaces in all elements of a character vector with underscores — replace_spaces • mikropml - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -Replace spaces in all elements of a character vector with underscores — replace_spaces • mikropml + + - - - - -
-
- -
- -
+
@@ -156,65 +78,61 @@

Replace spaces in all elements of a character vector with underscores

Replace spaces in all elements of a character vector with underscores

-
replace_spaces(x, new_char = "_")
- -

Arguments

- - - - - - - - - - -
x

a character vector

new_char

the character to replace spaces (default: _)

- -

Value

+
+
replace_spaces(x, new_char = "_")
+
+
+

Arguments

+
x
+

a character vector

+
new_char
+

the character to replace spaces (default: _)

+
+
+

Value

character vector with all spaces replaced with new_char

-

Author

- -

Kelly Sovacool, sovacool@umich.edu

- -

Examples

-
dat <- data.frame(
-  dx = c("outcome 1", "outcome 2", "outcome 1"),
-  a = 1:3, b = c(5, 7, 1)
-)
-dat$dx <- replace_spaces(dat$dx)
-dat
-#>          dx a b
-#> 1 outcome_1 1 5
-#> 2 outcome_2 2 7
-#> 3 outcome_1 3 1
-
+
+
+

Author

+

Kelly Sovacool, sovacool@umich.edu

+
+ +
+

Examples

+
dat <- data.frame(
+  dx = c("outcome 1", "outcome 2", "outcome 1"),
+  a = 1:3, b = c(5, 7, 1)
+)
+dat$dx <- replace_spaces(dat$dx)
+dat
+#>          dx a b
+#> 1 outcome_1 1 5
+#> 2 outcome_2 2 7
+#> 3 outcome_1 3 1
+
+
+
- - - + + diff --git a/docs/reference/run_ml.html b/docs/reference/run_ml.html index 3d264ea6..4f44ab45 100644 --- a/docs/reference/run_ml.html +++ b/docs/reference/run_ml.html @@ -1,81 +1,18 @@ - - - - - - - -Run the machine learning pipeline — run_ml • mikropml - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -Run the machine learning pipeline — run_ml • mikropml - - - - - - - - - - - - - - + + -
-
- -
- -
+

This function runs machine learning (ML), evaluates the best model, and optionally calculates feature importance using the framework -outlined in Topçuoğlu et al. 2020 (doi: 10.1128/mBio.00434-20 +outlined in Topçuoğlu et al. 2020 (doi: 10.1128/mBio.00434-20 ). Required inputs are a dataframe with an outcome variable and other columns as features, as well as the ML method. -See vignette('introduction') for more details.

+See vignette('introduction') for more details.

-
run_ml(
-  dataset,
-  method,
-  outcome_colname = NULL,
-  hyperparameters = NULL,
-  find_feature_importance = FALSE,
-  calculate_performance = TRUE,
-  kfold = 5,
-  cv_times = 100,
-  cross_val = NULL,
-  training_frac = 0.8,
-  perf_metric_function = NULL,
-  perf_metric_name = NULL,
-  groups = NULL,
-  group_partitions = NULL,
-  corr_thresh = 1,
-  ntree = 1000,
-  seed = NA
-)
+
+
run_ml(
+  dataset,
+  method,
+  outcome_colname = NULL,
+  hyperparameters = NULL,
+  find_feature_importance = FALSE,
+  calculate_performance = TRUE,
+  kfold = 5,
+  cv_times = 100,
+  cross_val = NULL,
+  training_frac = 0.8,
+  perf_metric_function = NULL,
+  perf_metric_name = NULL,
+  groups = NULL,
+  group_partitions = NULL,
+  corr_thresh = 1,
+  ntree = 1000,
+  seed = NA
+)
+
-

Arguments

- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
dataset

Dataframe with an outcome variable and other columns as features.

method

ML method. -Options: c("glmnet", "rf", "rpart2", "svmRadial", "xgbTree").

    -
  • glmnet: linear, logistic, or multiclass regression

  • +
    +

    Arguments

    +
    dataset
    +

    Dataframe with an outcome variable and other columns as features.

    +
    method
    +

    ML method. +Options: c("glmnet", "rf", "rpart2", "svmRadial", "xgbTree").

    • glmnet: linear, logistic, or multiclass regression

    • rf: random forest

    • rpart2: decision tree

    • svmRadial: support vector machine

    • xgbTree: xgboost

    • -
outcome_colname

Column name as a string of the outcome variable -(default NULL; the first column will be chosen automatically).

hyperparameters

Dataframe of hyperparameters -(default NULL; sensible defaults will be chosen automatically).

find_feature_importance

Run permutation importance (default: FALSE). + +

outcome_colname
+

Column name as a string of the outcome variable +(default NULL; the first column will be chosen automatically).

+
hyperparameters
+

Dataframe of hyperparameters +(default NULL; sensible defaults will be chosen automatically).

+
find_feature_importance
+

Run permutation importance (default: FALSE). TRUE is recommended if you would like to identify features important for -predicting your outcome, but it is resource-intensive.

calculate_performance

Whether to calculate performance metrics (default: TRUE). -You might choose to skip this if you do not perform cross-validation during model training.

kfold

Fold number for k-fold cross-validation (default: 5).

cv_times

Number of cross-validation partitions to create (default: 100).

cross_val

a custom cross-validation scheme from caret::trainControl() +predicting your outcome, but it is resource-intensive.

+
calculate_performance
+

Whether to calculate performance metrics (default: TRUE). +You might choose to skip this if you do not perform cross-validation during model training.

+
kfold
+

Fold number for k-fold cross-validation (default: 5).

+
cv_times
+

Number of cross-validation partitions to create (default: 100).

+
cross_val
+

a custom cross-validation scheme from caret::trainControl() (default: NULL, uses kfold cross validation repeated cv_times). kfold and cv_times are ignored if the user provides a custom cross-validation scheme. -See the caret::trainControl() docs for information on how to use it.

training_frac

Fraction of data for training set (default: 0.8). Rows +See the caret::trainControl() docs for information on how to use it.

+
training_frac
+

Fraction of data for training set (default: 0.8). Rows from the dataset will be randomly selected for the training set, and all remaining rows will be used in the testing set. Alternatively, if you provide a vector of integers, these will be used as the row indices for the -training set. All remaining rows will be used in the testing set.

perf_metric_function

Function to calculate the performance metric to +training set. All remaining rows will be used in the testing set.

+
perf_metric_function
+

Function to calculate the performance metric to be used for cross-validation and test performance. Some functions are -provided by caret (see caret::defaultSummary()). +provided by caret (see caret::defaultSummary()). Defaults: binary classification = twoClassSummary, multi-class classification = multiClassSummary, -regression = defaultSummary.

perf_metric_name

The column name from the output of the function +regression = defaultSummary.

+
perf_metric_name
+

The column name from the output of the function provided to perf_metric_function that is to be used as the performance metric. Defaults: binary classification = "ROC", multi-class classification = "logLoss", -regression = "RMSE".

groups

Vector of groups to keep together when splitting the data into +regression = "RMSE".

+
groups
+

Vector of groups to keep together when splitting the data into train and test sets. If the number of groups in the training set is larger than kfold, the groups will also be kept together for cross-validation. -Length matches the number of rows in the dataset (default: NULL).

group_partitions

Specify how to assign groups to the training and +Length matches the number of rows in the dataset (default: NULL).

+
group_partitions
+

Specify how to assign groups to the training and testing partitions (default: NULL). If groups specifies that some samples belong to group "A" and some belong to group "B", then setting group_partitions = list(train = c("A", "B"), test = c("B")) will result @@ -285,96 +180,86 @@

Arg "B" in the testing set. The partition sizes will be as close to training_frac as possible. If the number of groups in the training set is larger than kfold, the groups will also be kept together for -cross-validation.

corr_thresh

For feature importance, group correlations -above or equal to corr_thresh (range 0 to 1; default: 1).

ntree

For random forest, how many trees to use (default: 1000). -Note that caret doesn't allow this parameter to be tuned.

seed

Random seed (default: NA). -Your results will only be reproducible if you set a seed.

- -

Value

- -

Named list with results:

    -
  • trained_model: Output of caret::train(), including the best model.

  • +cross-validation.

    +
    corr_thresh
    +

    For feature importance, group correlations +above or equal to corr_thresh (range 0 to 1; default: 1).

    +
    ntree
    +

    For random forest, how many trees to use (default: 1000). +Note that caret doesn't allow this parameter to be tuned.

    +
    seed
    +

    Random seed (default: NA). +Your results will only be reproducible if you set a seed.

    +
+
+

Value

+

Named list with results:

  • trained_model: Output of caret::train(), including the best model.

  • test_data: Part of the data that was used for testing.

  • -
  • performance: Dataframe of performance metrics. The first column is the cross-validation performance metric, and the last two columns are the ML method used and the seed (if one was set), respectively. All other columns are performance metrics calculated on the test data. This contains only one row, so you can easily combine performance dataframes from multiple calls to run_ml() (see vignette("parallel")).

  • +
  • performance: Dataframe of performance metrics. The first column is the cross-validation performance metric, and the last two columns are the ML method used and the seed (if one was set), respectively. All other columns are performance metrics calculated on the test data. This contains only one row, so you can easily combine performance dataframes from multiple calls to run_ml() (see vignette("parallel")).

  • feature_importance: If feature importances were calculated, a dataframe where each row is a feature or correlated group. The columns are the performance metric of the permuted data, the difference between the true performance metric and the performance metric of the permuted data (true - permuted), the feature name, the ML method, the performance metric name, and the seed (if provided). For AUC and RMSE, the higher perf_metric_diff is, the more important that feature is for predicting the outcome. For log loss, the lower perf_metric_diff is, the more important that feature is for predicting the outcome.

  • -
- -

More details

- +
+
+

More details

-

For more details, please see the vignettes.

-

Author

- -

Begüm Topçuoğlu, topcuoglu.begum@gmail.com

-

Zena Lapp, zenalapp@umich.edu

-

Kelly Sovacool, sovacool@umich.edu

+

For more details, please see the vignettes.

+
+
+

Author

+

Begüm Topçuoğlu, topcuoglu.begum@gmail.com

+

Zena Lapp, zenalapp@umich.edu

+

Kelly Sovacool, sovacool@umich.edu

+
-

Examples

-
if (FALSE) {
-
-# regression
-run_ml(otu_small, "glmnet",
-  seed = 2019
-)
-
-# random forest w/ feature importance
-run_ml(otu_small, "rf",
-  outcome_colname = "dx",
-  find_feature_importance = TRUE
-)
-
-# custom cross validation & hyperparameters
-run_ml(otu_mini_bin[, 2:11],
-  "glmnet",
-  outcome_colname = "Otu00001",
-  seed = 2019,
-  hyperparameters = list(lambda = c(1e-04), alpha = 0),
-  cross_val = caret::trainControl(method = "none"),
-  calculate_performance = FALSE
-)
-}
-
+
+

Examples

+
if (FALSE) {
+
+# regression
+run_ml(otu_small, "glmnet",
+  seed = 2019
+)
+
+# random forest w/ feature importance
+run_ml(otu_small, "rf",
+  outcome_colname = "dx",
+  find_feature_importance = TRUE
+)
+
+# custom cross validation & hyperparameters
+run_ml(otu_mini_bin[, 2:11],
+  "glmnet",
+  outcome_colname = "Otu00001",
+  seed = 2019,
+  hyperparameters = list(lambda = c(1e-04), alpha = 0),
+  cross_val = caret::trainControl(method = "none"),
+  calculate_performance = FALSE
+)
+}
+
+
+
- - - + + diff --git a/docs/reference/tidy_perf_data.html b/docs/reference/tidy_perf_data.html index 335d898d..7c39a61a 100644 --- a/docs/reference/tidy_perf_data.html +++ b/docs/reference/tidy_perf_data.html @@ -1,75 +1,12 @@ - - - - - - - -Tidy the performance dataframe — tidy_perf_data • mikropml - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -Tidy the performance dataframe — tidy_perf_data • mikropml + + - - - - -
-
- -
- -
+
-
tidy_perf_data(performance_df)
- -

Arguments

- - - - - - -
performance_df

dataframe of performance results from multiple calls to run_ml()

- -

Value

+
+
tidy_perf_data(performance_df)
+
+
+

Arguments

+
performance_df
+

dataframe of performance results from multiple calls to run_ml()

+
+
+

Value

Tidy dataframe with model performance metrics.

-

Author

- -

Begüm Topçuoglu, topcuoglu.begum@gmail.com

-

Kelly Sovacool, sovacool@umich.edu

- -

Examples

-
if (FALSE) {
-# call `run_ml()` multiple times with different seeds
-results_lst <- lapply(seq(100, 104), function(seed) {
-  run_ml(otu_small, "glmnet", seed = seed)
-})
-# extract and combine the performance results
-perf_df <- lapply(results_lst, function(result) {
-  result[["performance"]]
-}) %>%
-  dplyr::bind_rows()
-# make it pretty!
-tidy_perf_data(perf_df)
-}
-
+
+
+

Author

+

Begüm Topçuoglu, topcuoglu.begum@gmail.com

+

Kelly Sovacool, sovacool@umich.edu

+
+ +
+

Examples

+
if (FALSE) {
+# call `run_ml()` multiple times with different seeds
+results_lst <- lapply(seq(100, 104), function(seed) {
+  run_ml(otu_small, "glmnet", seed = seed)
+})
+# extract and combine the performance results
+perf_df <- lapply(results_lst, function(result) {
+  result[["performance"]]
+}) %>%
+  dplyr::bind_rows()
+# make it pretty!
+tidy_perf_data(perf_df)
+}
+
+
+
-
- - + + diff --git a/docs/reference/train_model.html b/docs/reference/train_model.html index 965af491..8e6a48e6 100644 --- a/docs/reference/train_model.html +++ b/docs/reference/train_model.html @@ -1,75 +1,12 @@ - - - - - - - -Train model using caret::train(). — train_model • mikropml - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -Train model using caret::train(). — train_model • mikropml + + - - - - -
-
- -
- -
+
-

Train model using caret::train().

+

Train model using caret::train().

-
train_model(
-  model_formula,
-  train_data,
-  method,
-  cv,
-  perf_metric_name,
-  tune_grid,
-  ntree
-)
+
+
train_model(
+  model_formula,
+  train_data,
+  method,
+  cv,
+  perf_metric_name,
+  tune_grid,
+  ntree
+)
+
-

Arguments

- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
model_formula

Model formula, typically created with stats::as.formula().

train_data

Training data. Expected to be a subset of the full dataset.

method

ML method. -Options: c("glmnet", "rf", "rpart2", "svmRadial", "xgbTree").

    -
  • glmnet: linear, logistic, or multiclass regression

  • +
    +

    Arguments

    +
    model_formula
    +

    Model formula, typically created with stats::as.formula().

    +
    train_data
    +

    Training data. Expected to be a subset of the full dataset.

    +
    method
    +

    ML method. +Options: c("glmnet", "rf", "rpart2", "svmRadial", "xgbTree").

    • glmnet: linear, logistic, or multiclass regression

    • rf: random forest

    • rpart2: decision tree

    • svmRadial: support vector machine

    • xgbTree: xgboost

    • -
cv

Cross-validation caret scheme from define_cv().

perf_metric_name

The column name from the output of the function + +

cv
+

Cross-validation caret scheme from define_cv().

+
perf_metric_name
+

The column name from the output of the function provided to perf_metric_function that is to be used as the performance metric. Defaults: binary classification = "ROC", multi-class classification = "logLoss", -regression = "RMSE".

tune_grid

Tuning grid from get_tuning_grid().

ntree

For random forest, how many trees to use (default: 1000). -Note that caret doesn't allow this parameter to be tuned.

- -

Value

- -

Trained model from caret::train().

-

Author

- -

Zena Lapp, zenalapp@umich.edu

+regression = "RMSE".

+
tune_grid
+

Tuning grid from get_tuning_grid().

+
ntree
+

For random forest, how many trees to use (default: 1000). +Note that caret doesn't allow this parameter to be tuned.

+
+
+

Value

+

Trained model from caret::train().

+
+
+

Author

+

Zena Lapp, zenalapp@umich.edu

+
-

Examples

-
if (FALSE) {
-training_data <- otu_mini_bin_results_glmnet$trained_model$trainingData %>%
-  dplyr::rename(dx = .outcome)
-method <- "rf"
-hyperparameters <- get_hyperparams_list(otu_mini_bin, method)
-cross_val <- define_cv(training_data,
-  "dx",
-  hyperparameters,
-  perf_metric_function = caret::multiClassSummary,
-  class_probs = TRUE,
-  cv_times = 2
-)
-tune_grid <- get_tuning_grid(hyperparameters, method)
-
-rf_model <- train_model(
-  stats::as.formula(paste("dx", "~ .")),
-  training_data,
-  method,
-  cross_val,
-  "AUC",
-  tune_grid,
-  1000
-)
-rf_model$results %>% dplyr::select(mtry, AUC, prAUC)
-}
-
+
+

Examples

+
if (FALSE) {
+training_data <- otu_mini_bin_results_glmnet$trained_model$trainingData %>%
+  dplyr::rename(dx = .outcome)
+method <- "rf"
+hyperparameters <- get_hyperparams_list(otu_mini_bin, method)
+cross_val <- define_cv(training_data,
+  "dx",
+  hyperparameters,
+  perf_metric_function = caret::multiClassSummary,
+  class_probs = TRUE,
+  cv_times = 2
+)
+tune_grid <- get_tuning_grid(hyperparameters, method)
+
+rf_model <- train_model(
+  stats::as.formula(paste("dx", "~ .")),
+  training_data,
+  method,
+  cross_val,
+  "AUC",
+  tune_grid,
+  1000
+)
+rf_model$results %>% dplyr::select(mtry, AUC, prAUC)
+}
+
+
+
- - - + + diff --git a/docs/sitemap.xml b/docs/sitemap.xml index caa54a8c..c203233c 100644 --- a/docs/sitemap.xml +++ b/docs/sitemap.xml @@ -1,26 +1,122 @@ + + http://www.schlosslab.org/mikropml/404.html + + + http://www.schlosslab.org/mikropml/CODE_OF_CONDUCT.html + + + http://www.schlosslab.org/mikropml/CONTRIBUTING.html + + + http://www.schlosslab.org/mikropml/LICENSE-text.html + + + http://www.schlosslab.org/mikropml/LICENSE.html + + + http://www.schlosslab.org/mikropml/SUPPORT.html + + + http://www.schlosslab.org/mikropml/articles/index.html + + + http://www.schlosslab.org/mikropml/articles/introduction.html + + + http://www.schlosslab.org/mikropml/articles/paper.html + + + http://www.schlosslab.org/mikropml/articles/parallel.html + + + http://www.schlosslab.org/mikropml/articles/preprocess.html + + + http://www.schlosslab.org/mikropml/articles/snakemake.html + + + http://www.schlosslab.org/mikropml/articles/tuning.html + + + http://www.schlosslab.org/mikropml/authors.html + http://www.schlosslab.org/mikropml/index.html + + http://www.schlosslab.org/mikropml/news/index.html + + + http://www.schlosslab.org/mikropml/pull_request_template.html + + + http://www.schlosslab.org/mikropml/reference/calc_aucs.html + http://www.schlosslab.org/mikropml/reference/calc_perf_metrics.html + + http://www.schlosslab.org/mikropml/reference/change_to_num.html + + + http://www.schlosslab.org/mikropml/reference/check_features.html + + + http://www.schlosslab.org/mikropml/reference/check_group.html + + + http://www.schlosslab.org/mikropml/reference/check_groups.html + + + http://www.schlosslab.org/mikropml/reference/check_training_indices.html + + + http://www.schlosslab.org/mikropml/reference/collapse_correlated_features.html + http://www.schlosslab.org/mikropml/reference/combine_hp_performance.html + + http://www.schlosslab.org/mikropml/reference/createGroupedDataPartition.html + + + http://www.schlosslab.org/mikropml/reference/create_grouped_data_partition.html + + + http://www.schlosslab.org/mikropml/reference/create_grouped_k_multifolds.html + http://www.schlosslab.org/mikropml/reference/define_cv.html + + http://www.schlosslab.org/mikropml/reference/find_permuted_auc.html + + + http://www.schlosslab.org/mikropml/reference/find_permuted_perf_metric.html + + + http://www.schlosslab.org/mikropml/reference/flatten_corr_mat.html + + + http://www.schlosslab.org/mikropml/reference/get_caret_dummyvars_df.html + http://www.schlosslab.org/mikropml/reference/get_caret_processed_df.html + + http://www.schlosslab.org/mikropml/reference/get_corr_feats.html + http://www.schlosslab.org/mikropml/reference/get_feature_importance.html http://www.schlosslab.org/mikropml/reference/get_hp_performance.html + + http://www.schlosslab.org/mikropml/reference/get_hyperparams_from_df.html + http://www.schlosslab.org/mikropml/reference/get_hyperparams_list.html @@ -39,18 +135,42 @@ http://www.schlosslab.org/mikropml/reference/get_performance_tbl.html + + http://www.schlosslab.org/mikropml/reference/get_seeds_trainControl.html + http://www.schlosslab.org/mikropml/reference/get_tuning_grid.html + + http://www.schlosslab.org/mikropml/reference/groupKMultiFolds.html + http://www.schlosslab.org/mikropml/reference/group_correlated_features.html + + http://www.schlosslab.org/mikropml/reference/index.html + http://www.schlosslab.org/mikropml/reference/keep_groups_in_cv_partitions.html http://www.schlosslab.org/mikropml/reference/mikropml.html + + http://www.schlosslab.org/mikropml/reference/mutate_all_types.html + + + http://www.schlosslab.org/mikropml/reference/otu_large.html + + + http://www.schlosslab.org/mikropml/reference/otu_med_results.html + + + http://www.schlosslab.org/mikropml/reference/otu_medium.html + + + http://www.schlosslab.org/mikropml/reference/otu_mini.html + http://www.schlosslab.org/mikropml/reference/otu_mini_bin.html @@ -69,6 +189,9 @@ http://www.schlosslab.org/mikropml/reference/otu_mini_bin_results_xgbTree.html + + http://www.schlosslab.org/mikropml/reference/otu_mini_cont_results1.html + http://www.schlosslab.org/mikropml/reference/otu_mini_cont_results_glmnet.html @@ -78,6 +201,9 @@ http://www.schlosslab.org/mikropml/reference/otu_mini_cv.html + + http://www.schlosslab.org/mikropml/reference/otu_mini_cv2.html + http://www.schlosslab.org/mikropml/reference/otu_mini_multi.html @@ -87,6 +213,24 @@ http://www.schlosslab.org/mikropml/reference/otu_mini_multi_results_glmnet.html + + http://www.schlosslab.org/mikropml/reference/otu_mini_results1.html + + + http://www.schlosslab.org/mikropml/reference/otu_mini_results2.html + + + http://www.schlosslab.org/mikropml/reference/otu_mini_results3.html + + + http://www.schlosslab.org/mikropml/reference/otu_mini_results4.html + + + http://www.schlosslab.org/mikropml/reference/otu_mini_results5.html + + + http://www.schlosslab.org/mikropml/reference/otu_sm_cv5.html + http://www.schlosslab.org/mikropml/reference/otu_small.html @@ -96,9 +240,21 @@ http://www.schlosslab.org/mikropml/reference/plot_model_performance.html + + http://www.schlosslab.org/mikropml/reference/plot_performance.html + http://www.schlosslab.org/mikropml/reference/preprocess_data.html + + http://www.schlosslab.org/mikropml/reference/process_cat_feats.html + + + http://www.schlosslab.org/mikropml/reference/process_cont_feats.html + + + http://www.schlosslab.org/mikropml/reference/process_novar_feats.html + http://www.schlosslab.org/mikropml/reference/randomize_feature_order.html @@ -111,28 +267,52 @@ http://www.schlosslab.org/mikropml/reference/replace_spaces.html + + http://www.schlosslab.org/mikropml/reference/rm_corr_feats.html + + + http://www.schlosslab.org/mikropml/reference/rm_missing_outcome.html + http://www.schlosslab.org/mikropml/reference/run_ml.html + + http://www.schlosslab.org/mikropml/reference/set_hparams_regLogistic.html + + + http://www.schlosslab.org/mikropml/reference/setup_parallel.html + + + http://www.schlosslab.org/mikropml/reference/split_outcome_features.html + + + http://www.schlosslab.org/mikropml/reference/stop_parallel.html + + + http://www.schlosslab.org/mikropml/reference/test_data_mini.html + + + http://www.schlosslab.org/mikropml/reference/test_data_sm.html + http://www.schlosslab.org/mikropml/reference/tidy_perf_data.html - http://www.schlosslab.org/mikropml/reference/train_model.html + http://www.schlosslab.org/mikropml/reference/train_data_mini.html - http://www.schlosslab.org/mikropml/articles/introduction.html + http://www.schlosslab.org/mikropml/reference/train_data_sm.html - http://www.schlosslab.org/mikropml/articles/paper.html + http://www.schlosslab.org/mikropml/reference/train_model.html - http://www.schlosslab.org/mikropml/articles/parallel.html + http://www.schlosslab.org/mikropml/reference/train_model_w_warnings.html - http://www.schlosslab.org/mikropml/articles/preprocess.html + http://www.schlosslab.org/mikropml/reference/trained_model_mini.html - http://www.schlosslab.org/mikropml/articles/tuning.html + http://www.schlosslab.org/mikropml/reference/trained_model_sm1.html