diff --git a/.Rbuildignore b/.Rbuildignore index c0900ea1..96912ad0 100644 --- a/.Rbuildignore +++ b/.Rbuildignore @@ -20,3 +20,4 @@ ^cran-comments\.md$ ^revdep$ ^CRAN-RELEASE$ +^CRAN-SUBMISSION$ diff --git a/DESCRIPTION b/DESCRIPTION index b94c54f8..b8ea3e32 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Package: mikropml Title: User-Friendly R Package for Supervised Machine Learning Pipelines -Version: 1.2.2.9000 -Date: 2022-02-03 +Version: 1.3.0 +Date: 2022-05-19 Authors@R: c(person(given = "Begüm", family = "Topçuoğlu", @@ -91,4 +91,4 @@ VignetteBuilder: Encoding: UTF-8 LazyData: true Roxygen: list(markdown = TRUE) -RoxygenNote: 7.1.1 +RoxygenNote: 7.1.2 diff --git a/NAMESPACE b/NAMESPACE index 81dd1dd4..68575aad 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -6,6 +6,7 @@ export(":=") export(.data) export(calc_perf_metrics) export(combine_hp_performance) +export(compare_models) export(contr.ltfr) export(define_cv) export(get_caret_processed_df) @@ -19,6 +20,7 @@ export(get_perf_metric_name) export(get_performance_tbl) export(get_tuning_grid) export(group_correlated_features) +export(permute_p_value) export(plot_hp_performance) export(plot_model_performance) export(preprocess_data) diff --git a/NEWS.md b/NEWS.md index af236574..aebff2c4 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,4 +1,4 @@ -# development version +# mikropml 1.3.0 - mikropml now requires R version 4.1.0 or greater due to an update in the randomForest package (#292). - New function `compare_models()` compares the performance of two models with a permutation test (#295, @courtneyarmour). diff --git a/R/compare_models.R b/R/compare_models.R index 13ba1a19..5d4844e7 100644 --- a/R/compare_models.R +++ b/R/compare_models.R @@ -1,5 +1,3 @@ -#' Average metric difference -#' #' Calculate the difference in the mean of the metric for two groups #' #' @param sub_data subset of the merged performance data frame for two groups @@ -8,7 +6,7 @@ #' #' @return numeric difference in the average metric between the two groups #' -#' @export +#' @noRd #' @author Courtney Armour, \email{armourc@@umich.edu} #' #' @examples @@ -38,7 +36,7 @@ get_difference <- function(sub_data, group_name, metric) { #' @param col_name column name to shuffle #' #' @return `dat` with the rows of `col_name` shuffled -#' @export +#' @noRd #' @author Courtney R Armour, \email{armourc@@umich.edu} #' #' @examples @@ -62,7 +60,6 @@ shuffle_group <- function(dat, col_name) { return(data_shuffled) } - #' Calculated a permuted p-value comparing two models #' #' @inheritParams compare_models @@ -124,16 +121,17 @@ permute_p_value <- function(merged_data, metric, group_name, group_1, group_2, n } -#' Compute all pairs of comparisons -#' calculate permuted p-value across all pairs of group variable. -#' wrapper for `permute_p_value` +#' Perform permutation tests to compare the performance metric +#' across all pairs of a group variable. +#' +#' A wrapper for `permute_p_value()`. #' #' @param merged_data the concatenated performance data from `run_ml` #' @param metric metric to compare, must be numeric #' @param group_name column with group variables to compare #' @param nperm number of permutations, default=10000 #' -#' @return a table of p-values for all pairs of group varible +#' @return a table of p-values for all pairs of group variable #' @export #' @author Courtney R Armour, \email{armourc@@umich.edu} #' diff --git a/R/data.R b/R/data.R index 8df06165..9f12e9b0 100644 --- a/R/data.R +++ b/R/data.R @@ -32,19 +32,19 @@ #' Cross validation on `train_data_mini` with grouped features. "otu_mini_cv" -#' Results from running the pipline with L2 logistic regression on `otu_mini_bin` with feature importance and grouping +#' Results from running the pipeline with L2 logistic regression on `otu_mini_bin` with feature importance and grouping "otu_mini_bin_results_glmnet" -#' Results from running the pipline with random forest on `otu_mini_bin` +#' Results from running the pipeline with random forest on `otu_mini_bin` "otu_mini_bin_results_rf" -#' Results from running the pipline with svmRadial on `otu_mini_bin` +#' Results from running the pipeline with svmRadial on `otu_mini_bin` "otu_mini_bin_results_svmRadial" -#' Results from running the pipline with xbgTree on `otu_mini_bin` +#' Results from running the pipeline with xbgTree on `otu_mini_bin` "otu_mini_bin_results_xgbTree" -#' Results from running the pipline with rpart2 on `otu_mini_bin` +#' Results from running the pipeline with rpart2 on `otu_mini_bin` "otu_mini_bin_results_rpart2" #' Results from running the pipeline with glmnet on `otu_mini_bin` with `Otu00001` diff --git a/_pkgdown.yml b/_pkgdown.yml index b126508f..dc00676f 100644 --- a/_pkgdown.yml +++ b/_pkgdown.yml @@ -29,15 +29,22 @@ reference: - mikropml - preprocess_data - run_ml -- title: Plotting & evalutation helpers +- title: Plotting helpers desc: > - Visualize & evalutate performance to help you tune hyperparameters and choose model methods. + Visualize results to help you tune hyperparameters and choose model methods. contents: - - compare_models - starts_with('plot') - tidy_perf_data - get_hp_performance - combine_hp_performance +- title: Model evaluation + desc: > + Evaluate and interpret models. + contents: + - get_feature_importance + - get_performance_tbl + - compare_models + - permute_p_value - title: Package Data - subtitle: datasets contents: @@ -54,9 +61,8 @@ reference: - replace_spaces - title: Pipeline customization desc: > - These are functions called by preprocess_data() or run_ml(). - We make them available in case you would like to customize various steps - of the pipeline beyond the arguments provided by the main functions. + Customize various steps of the pipeline beyond the arguments provided by + run_ml() and preprocess_data(). contents: - remove_singleton_columns - get_caret_processed_df @@ -70,6 +76,4 @@ reference: - get_perf_metric_fn - train_model - calc_perf_metrics - - get_performance_tbl - - get_feature_importance - group_correlated_features diff --git a/cran-comments.md b/cran-comments.md index a6e1dfd3..7078366e 100644 --- a/cran-comments.md +++ b/cran-comments.md @@ -1,4 +1,3 @@ -This patch fixes a test failure on the no long doubles platform. ## Test environments diff --git a/docs/404.html b/docs/404.html index 986332d0..50b47da1 100644 --- a/docs/404.html +++ b/docs/404.html @@ -39,7 +39,7 @@ mikropml - 1.2.2.9000 + 1.3.0 diff --git a/docs/CODE_OF_CONDUCT.html b/docs/CODE_OF_CONDUCT.html index ccf732ce..356d405e 100644 --- a/docs/CODE_OF_CONDUCT.html +++ b/docs/CODE_OF_CONDUCT.html @@ -17,7 +17,7 @@ mikropml - 1.2.2.9000 + 1.3.0 diff --git a/docs/CONTRIBUTING.html b/docs/CONTRIBUTING.html index 632efc65..cfcb2766 100644 --- a/docs/CONTRIBUTING.html +++ b/docs/CONTRIBUTING.html @@ -17,7 +17,7 @@ mikropml - 1.2.2.9000 + 1.3.0 diff --git a/docs/LICENSE-text.html b/docs/LICENSE-text.html index c35487fd..8c20f191 100644 --- a/docs/LICENSE-text.html +++ b/docs/LICENSE-text.html @@ -17,7 +17,7 @@ mikropml - 1.2.2.9000 + 1.3.0 diff --git a/docs/LICENSE.html b/docs/LICENSE.html index 757d2260..a38ff1d8 100644 --- a/docs/LICENSE.html +++ b/docs/LICENSE.html @@ -17,7 +17,7 @@ mikropml - 1.2.2.9000 + 1.3.0 diff --git a/docs/SUPPORT.html b/docs/SUPPORT.html index a297054e..31683945 100644 --- a/docs/SUPPORT.html +++ b/docs/SUPPORT.html @@ -17,7 +17,7 @@ mikropml - 1.2.2.9000 + 1.3.0 diff --git a/docs/articles/index.html b/docs/articles/index.html index 11263a96..9e13e8a1 100644 --- a/docs/articles/index.html +++ b/docs/articles/index.html @@ -17,7 +17,7 @@ mikropml - 1.2.2.9000 + 1.3.0 diff --git a/docs/articles/introduction.html b/docs/articles/introduction.html index 0b546ce5..8466437c 100644 --- a/docs/articles/introduction.html +++ b/docs/articles/introduction.html @@ -40,7 +40,7 @@ mikropml - 1.2.2.9000 + 1.3.0 @@ -280,6 +280,11 @@

Changing kfold, #> Training the model... #> Loading required package: ggplot2 #> Loading required package: lattice +#> +#> Attaching package: 'caret' +#> The following object is masked from 'package:mikropml': +#> +#> compare_models #> Warning in (function (w) : `caret::train()` issued the following warning: #> #> simpleWarning in nominalTrainWorkflow(x = x, y = y, wts = weights, info = trainInfo, : There were missing values in resampled performance measures. @@ -374,7 +379,7 @@

Using groups#> Training complete.

The one difference here is run_ml() will report how much of the data is in the training set if you run the above code chunk. This can be a little finicky depending on how many samples and groups you have. This is because it won’t be exactly what you specify with training_frac, since you have to include all of one group in either the training set or the test set.

-

Controling how groups are assigned to partitions +

Controlling how groups are assigned to partitions

When you use the groups parameter as above, by default run_ml() will assume that you want all of the observations from each group to be placed in the same partition of the train/test split. This makes sense when you want to use groups to control for batch effects. However, in some cases you might prefer to control exactly which groups end up in which partition, and you might even be okay with some observations from the same group being assigned to different partitions.

For example, say you want groups A and B to be used for training, C and D for testing, and you don’t have a preference for what happens to the other groups. You can give the group_partitions parameter a named list to specify which groups should go in the training set and which should go in the testing set.

@@ -469,7 +474,7 @@

Finding feature importance method: The ML method used.
  • -perf_metric_name: The peformance metric used.
  • +perf_metric_name: The performance metric used.
  • seed: The seed (if set).
  • diff --git a/docs/articles/paper.html b/docs/articles/paper.html index 4f4af106..3dccf33d 100644 --- a/docs/articles/paper.html +++ b/docs/articles/paper.html @@ -40,7 +40,7 @@
    mikropml - 1.2.2.9000 + 1.3.0

    @@ -119,7 +119,7 @@

    2020

    Summary

    -

    Machine learning (ML) for classification and prediction based on a set of features is used to make decisions in healthcare, economics, criminal justice and more. However, implementing an ML pipeline including preprocessing, model selection, and evaluation can be time-consuming, confusing, and difficult. Here, we present mikropml (prononced “meek-ROPE em el”), an easy-to-use R package that implements ML pipelines using regression, support vector machines, decision trees, random forest, or gradient-boosted trees. The package is available on GitHub, CRAN, and conda.

    +

    Machine learning (ML) for classification and prediction based on a set of features is used to make decisions in healthcare, economics, criminal justice and more. However, implementing an ML pipeline including preprocessing, model selection, and evaluation can be time-consuming, confusing, and difficult. Here, we present mikropml (pronounced “meek-ROPE em el”), an easy-to-use R package that implements ML pipelines using regression, support vector machines, decision trees, random forest, or gradient-boosted trees. The package is available on GitHub, CRAN, and conda.

    Statement of need diff --git a/docs/articles/parallel.html b/docs/articles/parallel.html index a20fdfa3..373dfe7e 100644 --- a/docs/articles/parallel.html +++ b/docs/articles/parallel.html @@ -40,7 +40,7 @@ mikropml - 1.2.2.9000 + 1.3.0

    @@ -128,7 +128,7 @@

    Kelly L. Sovacool

    Speed up single runs

    -

    By default, preprocess_data(), run_ml(), and compare_models() use only one process in series. If you’d like to parallelize various steps of the pipeline to make them run faster, install foreach, future, future.apply, and doFuture. Then, register a future plan prior to calling these functions:

    +

    By default, preprocess_data(), run_ml(), and compare_models() use only one process in series. If you’d like to parallelize various steps of the pipeline to make them run faster, install foreach, future, future.apply, and doFuture. Then, register a future plan prior to calling these functions:

     doFuture::registerDoFuture()
     future::plan(future::multicore, workers = 2)
    @@ -142,6 +142,11 @@

    Speed up single runs#> Training the model... #> Loading required package: ggplot2 #> Loading required package: lattice +#> +#> Attaching package: 'caret' +#> The following object is masked from 'package:mikropml': +#> +#> compare_models #> Training complete.

    diff --git a/docs/articles/preprocess.html b/docs/articles/preprocess.html index 5ccf1133..7d7bdb8d 100644 --- a/docs/articles/preprocess.html +++ b/docs/articles/preprocess.html @@ -40,7 +40,7 @@ mikropml - 1.2.2.9000 + 1.3.0
    @@ -174,7 +174,7 @@

    Binary data#> #> $removed_feats #> character(0) -

    The output is a list: dat_transformed which has the transformed data, grp_feats which is a list of grouped features, and removed_feats which is a list of featuures that were removed. Here, grp_feats is NULL because there are no perfectly correlated features (e.g. c(0,1,0) and c(0,1,0), or c(0,1,0) and c(1,0,1) - see below for more details).

    +

    The output is a list: dat_transformed which has the transformed data, grp_feats which is a list of grouped features, and removed_feats which is a list of features that were removed. Here, grp_feats is NULL because there are no perfectly correlated features (e.g. c(0,1,0) and c(0,1,0), or c(0,1,0) and c(1,0,1) - see below for more details).

    The first column (var1) in dat_transformed is a character and is changed to var1_yes that has zeros (no) and ones (yes). The values in the second column (var2) stay the same because it’s already binary, but the name changes to var2_1. The third column (var3) is a factor and is also changed to binary where b is 1 and a is 0, as denoted by the new column name var3_b.

    diff --git a/docs/articles/tuning.html b/docs/articles/tuning.html index 2092dc92..13d99588 100644 --- a/docs/articles/tuning.html +++ b/docs/articles/tuning.html @@ -40,7 +40,7 @@ mikropml - 1.2.2.9000 + 1.3.0
    @@ -158,6 +158,11 @@

    The simplest way to run_ml()#> Training the model... #> Loading required package: ggplot2 #> Loading required package: lattice +#> +#> Attaching package: 'caret' +#> The following object is masked from 'package:mikropml': +#> +#> compare_models #> Training complete.

    You’ll probably get a warning when you run this because the dataset is very small. If you want to learn more about that, check out the introductory vignette about training and evaluating a ML model: vignette("introduction").

    By default, run_ml() selects hyperparameters depending on the dataset and method used.

    diff --git a/docs/authors.html b/docs/authors.html index b1dd7d92..95618a32 100644 --- a/docs/authors.html +++ b/docs/authors.html @@ -17,7 +17,7 @@ mikropml - 1.2.2.9000 + 1.3.0 diff --git a/docs/index.html b/docs/index.html index eb215a3e..2ba4a5ac 100644 --- a/docs/index.html +++ b/docs/index.html @@ -47,7 +47,7 @@ mikropml - 1.2.2.9000 + 1.3.0 diff --git a/docs/news/index.html b/docs/news/index.html index 35695dcb..28e93f1e 100644 --- a/docs/news/index.html +++ b/docs/news/index.html @@ -17,7 +17,7 @@ mikropml - 1.2.2.9000 + 1.3.0 @@ -73,6 +73,13 @@

    Changelog

    Source: NEWS.md +
    + +

    This minor patch fixes a test failure on platforms with no long doubles. The actual package code remains unchanged.

    diff --git a/docs/pkgdown.yml b/docs/pkgdown.yml index e9094896..d50db519 100644 --- a/docs/pkgdown.yml +++ b/docs/pkgdown.yml @@ -7,7 +7,7 @@ articles: parallel: parallel.html preprocess: preprocess.html tuning: tuning.html -last_built: 2022-05-18T17:33Z +last_built: 2022-05-19T19:39Z urls: reference: http://www.schlosslab.org/mikropml/reference article: http://www.schlosslab.org/mikropml/articles diff --git a/docs/pull_request_template.html b/docs/pull_request_template.html index cc58413a..4a26cae7 100644 --- a/docs/pull_request_template.html +++ b/docs/pull_request_template.html @@ -17,7 +17,7 @@ mikropml - 1.2.2.9000 + 1.3.0
    diff --git a/docs/reference/calc_perf_metrics.html b/docs/reference/calc_perf_metrics.html index cb3f3f8e..8fea238d 100644 --- a/docs/reference/calc_perf_metrics.html +++ b/docs/reference/calc_perf_metrics.html @@ -17,7 +17,7 @@ mikropml - 1.2.2.9000 + 1.3.0 diff --git a/docs/reference/combine_hp_performance.html b/docs/reference/combine_hp_performance.html index b3c69730..dca720af 100644 --- a/docs/reference/combine_hp_performance.html +++ b/docs/reference/combine_hp_performance.html @@ -17,7 +17,7 @@ mikropml - 1.2.2.9000 + 1.3.0 diff --git a/docs/reference/compare_models.html b/docs/reference/compare_models.html new file mode 100644 index 00000000..97b190dc --- /dev/null +++ b/docs/reference/compare_models.html @@ -0,0 +1,145 @@ + +Perform permutation tests to compare the performance metric +across all pairs of a group variable. — compare_models • mikropml + + +
    +
    + + + +
    +
    + + +
    +

    A wrapper for permute_p_value().

    +
    + +
    +
    compare_models(merged_data, metric, group_name, nperm = 10000)
    +
    + +
    +

    Arguments

    +
    merged_data
    +

    the concatenated performance data from run_ml

    +
    metric
    +

    metric to compare, must be numeric

    +
    group_name
    +

    column with group variables to compare

    +
    nperm
    +

    number of permutations, default=10000

    +
    +
    +

    Value

    +

    a table of p-values for all pairs of group variable

    +
    +
    +

    Author

    +

    Courtney R Armour, armourc@umich.edu

    +
    + +
    +

    Examples

    +
    df <- dplyr::tibble(
    +  model = c("rf", "rf", "glmnet", "glmnet", "svmRadial", "svmRadial"),
    +  AUC = c(.2, 0.3, 0.8, 0.9, 0.85, 0.95)
    +)
    +set.seed(123)
    +compare_models(df, "AUC", "model", nperm = 10)
    +#>   group1    group2   p_value
    +#> 1 glmnet svmRadial 0.7272727
    +#> 2     rf    glmnet 0.2727273
    +#> 3     rf svmRadial 0.5454545
    +
    +
    +
    + +
    + + +
    + + + + + + + + diff --git a/docs/reference/define_cv.html b/docs/reference/define_cv.html index d88d7eef..c26bc116 100644 --- a/docs/reference/define_cv.html +++ b/docs/reference/define_cv.html @@ -17,7 +17,7 @@ mikropml - 1.2.2.9000 + 1.3.0 diff --git a/docs/reference/get_caret_processed_df.html b/docs/reference/get_caret_processed_df.html index 1da76d45..dfbed020 100644 --- a/docs/reference/get_caret_processed_df.html +++ b/docs/reference/get_caret_processed_df.html @@ -17,7 +17,7 @@ mikropml - 1.2.2.9000 + 1.3.0 diff --git a/docs/reference/get_difference.html b/docs/reference/get_difference.html new file mode 100644 index 00000000..e493205a --- /dev/null +++ b/docs/reference/get_difference.html @@ -0,0 +1,137 @@ + +Average metric difference — get_difference • mikropml + + +
    +
    + + + +
    +
    + + +
    +

    Calculate the difference in the mean of the metric for two groups

    +
    + +
    +
    get_difference(sub_data, group_name, metric)
    +
    + +
    +

    Arguments

    +
    sub_data
    +

    subset of the merged performance data frame for two groups

    +
    group_name
    +

    name of column with group variable

    +
    metric
    +

    metric to compare

    +
    +
    +

    Value

    +

    numeric difference in the average metric between the two groups

    +
    +
    +

    Author

    +

    Courtney Armour, armourc@umich.edu

    +
    + +
    +

    Examples

    +
    df <- dplyr::tibble(
    +  condition = c("a", "a", "b", "b"),
    +  AUC = c(.2, 0.3, 0.8, 0.9)
    +)
    +get_difference(df, "condition", "AUC")
    +#> [1] 0.6
    +
    +
    +
    +
    + +
    + + +
    + + + + + + + + diff --git a/docs/reference/get_feature_importance.html b/docs/reference/get_feature_importance.html index 7b38b2f3..b673dff3 100644 --- a/docs/reference/get_feature_importance.html +++ b/docs/reference/get_feature_importance.html @@ -18,7 +18,7 @@ mikropml - 1.2.2.9000 + 1.3.0 diff --git a/docs/reference/get_hp_performance.html b/docs/reference/get_hp_performance.html index 4725b431..d95f2c20 100644 --- a/docs/reference/get_hp_performance.html +++ b/docs/reference/get_hp_performance.html @@ -17,7 +17,7 @@ mikropml - 1.2.2.9000 + 1.3.0 diff --git a/docs/reference/get_hyperparams_list.html b/docs/reference/get_hyperparams_list.html index cffb9d2c..e073866a 100644 --- a/docs/reference/get_hyperparams_list.html +++ b/docs/reference/get_hyperparams_list.html @@ -17,7 +17,7 @@ mikropml - 1.2.2.9000 + 1.3.0 diff --git a/docs/reference/get_outcome_type.html b/docs/reference/get_outcome_type.html index 9932003f..ef79172a 100644 --- a/docs/reference/get_outcome_type.html +++ b/docs/reference/get_outcome_type.html @@ -19,7 +19,7 @@ mikropml - 1.2.2.9000 + 1.3.0 diff --git a/docs/reference/get_partition_indices.html b/docs/reference/get_partition_indices.html index c3c9c641..a33ebb3f 100644 --- a/docs/reference/get_partition_indices.html +++ b/docs/reference/get_partition_indices.html @@ -17,7 +17,7 @@ mikropml - 1.2.2.9000 + 1.3.0 diff --git a/docs/reference/get_perf_metric_fn.html b/docs/reference/get_perf_metric_fn.html index 2eb2e30c..f653dac8 100644 --- a/docs/reference/get_perf_metric_fn.html +++ b/docs/reference/get_perf_metric_fn.html @@ -17,7 +17,7 @@ mikropml - 1.2.2.9000 + 1.3.0 @@ -105,7 +105,7 @@

    Examples

    #> data$obs <- factor(data$obs, levels = lev) #> postResample(data[, "pred"], data[, "obs"]) #> } -#> <bytecode: 0x7fa3774c1120> +#> <bytecode: 0x7fce51a85908> #> <environment: namespace:caret> get_perf_metric_fn("binary") #> function (data, lev = NULL, model = NULL) @@ -163,7 +163,7 @@

    Examples

    #> stats <- stats[c(stat_list)] #> return(stats) #> } -#> <bytecode: 0x7fa389e58f88> +#> <bytecode: 0x7fce50ebe0b8> #> <environment: namespace:caret> get_perf_metric_fn("multiclass") #> function (data, lev = NULL, model = NULL) @@ -221,7 +221,7 @@

    Examples

    #> stats <- stats[c(stat_list)] #> return(stats) #> } -#> <bytecode: 0x7fa389e58f88> +#> <bytecode: 0x7fce50ebe0b8> #> <environment: namespace:caret> diff --git a/docs/reference/get_perf_metric_name.html b/docs/reference/get_perf_metric_name.html index 16afe4eb..d96ec201 100644 --- a/docs/reference/get_perf_metric_name.html +++ b/docs/reference/get_perf_metric_name.html @@ -17,7 +17,7 @@ mikropml - 1.2.2.9000 + 1.3.0 diff --git a/docs/reference/get_performance_tbl.html b/docs/reference/get_performance_tbl.html index cb071b7b..cce8a298 100644 --- a/docs/reference/get_performance_tbl.html +++ b/docs/reference/get_performance_tbl.html @@ -17,7 +17,7 @@ mikropml - 1.2.2.9000 + 1.3.0 diff --git a/docs/reference/get_tuning_grid.html b/docs/reference/get_tuning_grid.html index 13497e90..a8d3faee 100644 --- a/docs/reference/get_tuning_grid.html +++ b/docs/reference/get_tuning_grid.html @@ -17,7 +17,7 @@ mikropml - 1.2.2.9000 + 1.3.0 diff --git a/docs/reference/group_correlated_features.html b/docs/reference/group_correlated_features.html index e91c8f69..ec9f0acb 100644 --- a/docs/reference/group_correlated_features.html +++ b/docs/reference/group_correlated_features.html @@ -17,7 +17,7 @@ mikropml - 1.2.2.9000 + 1.3.0 diff --git a/docs/reference/index.html b/docs/reference/index.html index 78413d2e..6303fba6 100644 --- a/docs/reference/index.html +++ b/docs/reference/index.html @@ -17,7 +17,7 @@ mikropml - 1.2.2.9000 + 1.3.0 @@ -89,8 +89,8 @@

    Main

    Run the machine learning pipeline

    -

    Plotting & evalutation helpers

    -

    Visualize & evalutate performance to help you tune hyperparameters and choose model methods.

    +

    Plotting helpers

    +

    Visualize results to help you tune hyperparameters and choose model methods.

    plot_hp_performance()

    @@ -112,6 +112,27 @@

    Plotting & evalutation helpers combine_hp_performance()

    Combine hyperparameter performance metrics for multiple train/test splits

    + +

    Model evaluation

    +

    Evaluate and interpret models.

    + + +

    get_feature_importance()

    + +

    Get feature importance using the permutation method

    + +

    get_performance_tbl()

    + +

    Get model performance metrics as a one-row tibble

    + +

    compare_models()

    + +

    Perform permutation tests to compare the performance metric +across all pairs of a group variable.

    + +

    permute_p_value()

    + +

    Calculated a permuted p-value comparing two models

    Package Data

    @@ -143,23 +164,23 @@

    ML results

    otu_mini_bin_results_glmnet

    -

    Results from running the pipline with L2 logistic regression on otu_mini_bin with feature importance and grouping

    +

    Results from running the pipeline with L2 logistic regression on otu_mini_bin with feature importance and grouping

    otu_mini_bin_results_rf

    -

    Results from running the pipline with random forest on otu_mini_bin

    +

    Results from running the pipeline with random forest on otu_mini_bin

    otu_mini_bin_results_rpart2

    -

    Results from running the pipline with rpart2 on otu_mini_bin

    +

    Results from running the pipeline with rpart2 on otu_mini_bin

    otu_mini_bin_results_svmRadial

    -

    Results from running the pipline with svmRadial on otu_mini_bin

    +

    Results from running the pipeline with svmRadial on otu_mini_bin

    otu_mini_bin_results_xgbTree

    -

    Results from running the pipline with xbgTree on otu_mini_bin

    +

    Results from running the pipeline with xbgTree on otu_mini_bin

    otu_mini_cont_results_glmnet

    @@ -190,7 +211,7 @@

    misc

    Replace spaces in all elements of a character vector with underscores

    Pipeline customization

    -

    These are functions called by preprocess_data() or run_ml(). We make them available in case you would like to customize various steps of the pipeline beyond the arguments provided by the main functions.

    +

    Customize various steps of the pipeline beyond the arguments provided by run_ml() and preprocess_data().

    remove_singleton_columns()

    @@ -240,14 +261,6 @@

    Pipeline customization calc_perf_metrics()

    Get performance metrics for test data

    - -

    get_performance_tbl()

    - -

    Get model performance metrics as a one-row tibble

    - -

    get_feature_importance()

    - -

    Get feature importance using the permutation method

    group_correlated_features()

    diff --git a/docs/reference/mikropml.html b/docs/reference/mikropml.html index 033b0f39..53a58e4d 100644 --- a/docs/reference/mikropml.html +++ b/docs/reference/mikropml.html @@ -20,7 +20,7 @@ mikropml - 1.2.2.9000 + 1.3.0 diff --git a/docs/reference/otu_mini_bin.html b/docs/reference/otu_mini_bin.html index 80b5eec8..f839e6da 100644 --- a/docs/reference/otu_mini_bin.html +++ b/docs/reference/otu_mini_bin.html @@ -19,7 +19,7 @@ mikropml - 1.2.2.9000 + 1.3.0 diff --git a/docs/reference/otu_mini_bin_results_glmnet.html b/docs/reference/otu_mini_bin_results_glmnet.html index ca7233ac..4e42c52e 100644 --- a/docs/reference/otu_mini_bin_results_glmnet.html +++ b/docs/reference/otu_mini_bin_results_glmnet.html @@ -1,5 +1,5 @@ -Results from running the pipline with L2 logistic regression on otu_mini_bin with feature importance and grouping — otu_mini_bin_results_glmnet • mikropmlResults from running the pipeline with L2 logistic regression on otu_mini_bin with feature importance and grouping — otu_mini_bin_results_glmnet • mikropml @@ -17,7 +17,7 @@ mikropml - 1.2.2.9000 + 1.3.0 @@ -69,13 +69,13 @@
    -

    Results from running the pipline with L2 logistic regression on otu_mini_bin with feature importance and grouping

    +

    Results from running the pipeline with L2 logistic regression on otu_mini_bin with feature importance and grouping

    diff --git a/docs/reference/otu_mini_bin_results_rf.html b/docs/reference/otu_mini_bin_results_rf.html index 7400b058..9952d73e 100644 --- a/docs/reference/otu_mini_bin_results_rf.html +++ b/docs/reference/otu_mini_bin_results_rf.html @@ -1,5 +1,5 @@ -Results from running the pipline with random forest on otu_mini_bin — otu_mini_bin_results_rf • mikropmlResults from running the pipeline with random forest on otu_mini_bin — otu_mini_bin_results_rf • mikropml @@ -17,7 +17,7 @@ mikropml - 1.2.2.9000 + 1.3.0
    @@ -69,13 +69,13 @@
    -

    Results from running the pipline with random forest on otu_mini_bin

    +

    Results from running the pipeline with random forest on otu_mini_bin

    diff --git a/docs/reference/otu_mini_bin_results_rpart2.html b/docs/reference/otu_mini_bin_results_rpart2.html index adf384e5..8e49390e 100644 --- a/docs/reference/otu_mini_bin_results_rpart2.html +++ b/docs/reference/otu_mini_bin_results_rpart2.html @@ -1,5 +1,5 @@ -Results from running the pipline with rpart2 on otu_mini_bin — otu_mini_bin_results_rpart2 • mikropmlResults from running the pipeline with rpart2 on otu_mini_bin — otu_mini_bin_results_rpart2 • mikropml @@ -17,7 +17,7 @@ mikropml - 1.2.2.9000 + 1.3.0
    @@ -69,13 +69,13 @@
    -

    Results from running the pipline with rpart2 on otu_mini_bin

    +

    Results from running the pipeline with rpart2 on otu_mini_bin

    diff --git a/docs/reference/otu_mini_bin_results_svmRadial.html b/docs/reference/otu_mini_bin_results_svmRadial.html index 895b21d9..34b5c1b8 100644 --- a/docs/reference/otu_mini_bin_results_svmRadial.html +++ b/docs/reference/otu_mini_bin_results_svmRadial.html @@ -1,5 +1,5 @@ -Results from running the pipline with svmRadial on otu_mini_bin — otu_mini_bin_results_svmRadial • mikropmlResults from running the pipeline with svmRadial on otu_mini_bin — otu_mini_bin_results_svmRadial • mikropml @@ -17,7 +17,7 @@ mikropml - 1.2.2.9000 + 1.3.0
    @@ -69,13 +69,13 @@
    -

    Results from running the pipline with svmRadial on otu_mini_bin

    +

    Results from running the pipeline with svmRadial on otu_mini_bin

    diff --git a/docs/reference/otu_mini_bin_results_xgbTree.html b/docs/reference/otu_mini_bin_results_xgbTree.html index 4a593f16..4a8ff944 100644 --- a/docs/reference/otu_mini_bin_results_xgbTree.html +++ b/docs/reference/otu_mini_bin_results_xgbTree.html @@ -1,5 +1,5 @@ -Results from running the pipline with xbgTree on otu_mini_bin — otu_mini_bin_results_xgbTree • mikropmlResults from running the pipeline with xbgTree on otu_mini_bin — otu_mini_bin_results_xgbTree • mikropml @@ -17,7 +17,7 @@ mikropml - 1.2.2.9000 + 1.3.0
    @@ -69,13 +69,13 @@
    -

    Results from running the pipline with xbgTree on otu_mini_bin

    +

    Results from running the pipeline with xbgTree on otu_mini_bin

    diff --git a/docs/reference/otu_mini_cont_results_glmnet.html b/docs/reference/otu_mini_cont_results_glmnet.html index 36c38de6..5c5dba44 100644 --- a/docs/reference/otu_mini_cont_results_glmnet.html +++ b/docs/reference/otu_mini_cont_results_glmnet.html @@ -20,7 +20,7 @@ mikropml - 1.2.2.9000 + 1.3.0
    diff --git a/docs/reference/otu_mini_cont_results_nocv.html b/docs/reference/otu_mini_cont_results_nocv.html index f7a2e203..dda5c1ff 100644 --- a/docs/reference/otu_mini_cont_results_nocv.html +++ b/docs/reference/otu_mini_cont_results_nocv.html @@ -23,7 +23,7 @@ mikropml - 1.2.2.9000 + 1.3.0
    diff --git a/docs/reference/otu_mini_cv.html b/docs/reference/otu_mini_cv.html index ecf1bba4..d35a6be4 100644 --- a/docs/reference/otu_mini_cv.html +++ b/docs/reference/otu_mini_cv.html @@ -17,7 +17,7 @@ mikropml - 1.2.2.9000 + 1.3.0
    diff --git a/docs/reference/otu_mini_multi.html b/docs/reference/otu_mini_multi.html index 61380779..916e2f6c 100644 --- a/docs/reference/otu_mini_multi.html +++ b/docs/reference/otu_mini_multi.html @@ -17,7 +17,7 @@ mikropml - 1.2.2.9000 + 1.3.0
    diff --git a/docs/reference/otu_mini_multi_group.html b/docs/reference/otu_mini_multi_group.html index 3430838a..627812b4 100644 --- a/docs/reference/otu_mini_multi_group.html +++ b/docs/reference/otu_mini_multi_group.html @@ -17,7 +17,7 @@ mikropml - 1.2.2.9000 + 1.3.0
    diff --git a/docs/reference/otu_mini_multi_results_glmnet.html b/docs/reference/otu_mini_multi_results_glmnet.html index c2cb8ddb..eceaa22e 100644 --- a/docs/reference/otu_mini_multi_results_glmnet.html +++ b/docs/reference/otu_mini_multi_results_glmnet.html @@ -20,7 +20,7 @@ mikropml - 1.2.2.9000 + 1.3.0
    diff --git a/docs/reference/otu_small.html b/docs/reference/otu_small.html index c00d3a75..6689a854 100644 --- a/docs/reference/otu_small.html +++ b/docs/reference/otu_small.html @@ -19,7 +19,7 @@ mikropml - 1.2.2.9000 + 1.3.0
    diff --git a/docs/reference/permute_p_value.html b/docs/reference/permute_p_value.html new file mode 100644 index 00000000..74a8c3f0 --- /dev/null +++ b/docs/reference/permute_p_value.html @@ -0,0 +1,151 @@ + +Calculated a permuted p-value comparing two models — permute_p_value • mikropml + + +
    +
    + + + +
    +
    + + +
    +

    Calculated a permuted p-value comparing two models

    +
    + +
    +
    permute_p_value(
    +  merged_data,
    +  metric,
    +  group_name,
    +  group_1,
    +  group_2,
    +  nperm = 10000
    +)
    +
    + +
    +

    Arguments

    +
    merged_data
    +

    the concatenated performance data from run_ml

    +
    metric
    +

    metric to compare, must be numeric

    +
    group_name
    +

    column with group variables to compare

    +
    group_1
    +

    name of one group to compare

    +
    group_2
    +

    name of other group to compare

    +
    nperm
    +

    number of permutations, default=10000

    +
    +
    +

    Value

    +

    numeric p-value comparing two models

    +
    +
    +

    Author

    +

    Begüm Topçuoğlu, topcuoglu.begum@gmail.com

    +

    Courtney R Armour, armourc@umich.edu

    +
    + +
    +

    Examples

    +
    df <- dplyr::tibble(
    +  model = c("rf", "rf", "glmnet", "glmnet", "svmRadial", "svmRadial"),
    +  AUC = c(.2, 0.3, 0.8, 0.9, 0.85, 0.95)
    +)
    +set.seed(123)
    +permute_p_value(df, "AUC", "model", "rf", "glmnet", nperm = 100)
    +#> [1] 0.3663366
    +
    +
    +
    + +
    + + +
    + + + + + + + + diff --git a/docs/reference/plot_hp_performance.html b/docs/reference/plot_hp_performance.html index 3b4cd628..55a03389 100644 --- a/docs/reference/plot_hp_performance.html +++ b/docs/reference/plot_hp_performance.html @@ -17,7 +17,7 @@ mikropml - 1.2.2.9000 + 1.3.0
    diff --git a/docs/reference/plot_model_performance.html b/docs/reference/plot_model_performance.html index 33e8da4f..d60c03be 100644 --- a/docs/reference/plot_model_performance.html +++ b/docs/reference/plot_model_performance.html @@ -17,7 +17,7 @@ mikropml - 1.2.2.9000 + 1.3.0
    diff --git a/docs/reference/preprocess_data.html b/docs/reference/preprocess_data.html index cf768f1b..bd8f45e1 100644 --- a/docs/reference/preprocess_data.html +++ b/docs/reference/preprocess_data.html @@ -17,7 +17,7 @@ mikropml - 1.2.2.9000 + 1.3.0
    diff --git a/docs/reference/randomize_feature_order.html b/docs/reference/randomize_feature_order.html index 6167eb39..ecb5c6dd 100644 --- a/docs/reference/randomize_feature_order.html +++ b/docs/reference/randomize_feature_order.html @@ -17,7 +17,7 @@ mikropml - 1.2.2.9000 + 1.3.0
    @@ -107,10 +107,10 @@

    Examples

    a = 4:6, b = 7:9, c = 10:12, d = 13:15 ) randomize_feature_order(dat, "outcome") -#> outcome a d c b -#> 1 1 4 13 10 7 -#> 2 2 5 14 11 8 -#> 3 3 6 15 12 9 +#> outcome c b a d +#> 1 1 10 7 4 13 +#> 2 2 11 8 5 14 +#> 3 3 12 9 6 15 diff --git a/docs/reference/reexports.html b/docs/reference/reexports.html index 956d56db..7a0dbcb1 100644 --- a/docs/reference/reexports.html +++ b/docs/reference/reexports.html @@ -32,7 +32,7 @@ mikropml - 1.2.2.9000 + 1.3.0 @@ -101,7 +101,7 @@

    dplyr pipe

    rlang
    -

    !!, .data, :=

    +

    !!, .data, :=

    diff --git a/docs/reference/remove_singleton_columns.html b/docs/reference/remove_singleton_columns.html index 4b21208d..778d750d 100644 --- a/docs/reference/remove_singleton_columns.html +++ b/docs/reference/remove_singleton_columns.html @@ -17,7 +17,7 @@ mikropml - 1.2.2.9000 + 1.3.0 diff --git a/docs/reference/replace_spaces.html b/docs/reference/replace_spaces.html index d6558c95..0cd9e165 100644 --- a/docs/reference/replace_spaces.html +++ b/docs/reference/replace_spaces.html @@ -17,7 +17,7 @@ mikropml - 1.2.2.9000 + 1.3.0 diff --git a/docs/reference/run_ml.html b/docs/reference/run_ml.html index b47c7a7e..a6df61b5 100644 --- a/docs/reference/run_ml.html +++ b/docs/reference/run_ml.html @@ -23,7 +23,7 @@ mikropml - 1.2.2.9000 + 1.3.0 diff --git a/docs/reference/shuffle_group.html b/docs/reference/shuffle_group.html new file mode 100644 index 00000000..99aa43ef --- /dev/null +++ b/docs/reference/shuffle_group.html @@ -0,0 +1,141 @@ + +Shuffle the rows in a column — shuffle_group • mikropml + + +
    +
    + + + +
    +
    + + +
    +

    Shuffle the rows in a column

    +
    + +
    +
    shuffle_group(dat, col_name)
    +
    + +
    +

    Arguments

    +
    dat
    +

    a data frame containing col_name

    +
    col_name
    +

    column name to shuffle

    +
    +
    +

    Value

    +

    dat with the rows of col_name shuffled

    +
    +
    +

    Author

    +

    Courtney R Armour, armourc@umich.edu

    +
    + +
    +

    Examples

    +
    set.seed(123)
    +df <- dplyr::tibble(
    +  condition = c("a", "a", "b", "b"),
    +  AUC = c(.2, 0.3, 0.8, 0.9)
    +)
    +shuffle_group(df, "condition")
    +#> # A tibble: 4 × 2
    +#>   condition   AUC
    +#>   <chr>     <dbl>
    +#> 1 b           0.2
    +#> 2 b           0.3
    +#> 3 a           0.8
    +#> 4 a           0.9
    +
    +
    +
    + +
    + + +
    + + + + + + + + diff --git a/docs/reference/tidy_perf_data.html b/docs/reference/tidy_perf_data.html index 73a145fe..3d97b53d 100644 --- a/docs/reference/tidy_perf_data.html +++ b/docs/reference/tidy_perf_data.html @@ -17,7 +17,7 @@ mikropml - 1.2.2.9000 + 1.3.0 diff --git a/docs/reference/train_model.html b/docs/reference/train_model.html index dd9d4372..8a9766e1 100644 --- a/docs/reference/train_model.html +++ b/docs/reference/train_model.html @@ -17,7 +17,7 @@ mikropml - 1.2.2.9000 + 1.3.0 diff --git a/docs/sitemap.xml b/docs/sitemap.xml index c203233c..6ed7b302 100644 --- a/docs/sitemap.xml +++ b/docs/sitemap.xml @@ -78,6 +78,9 @@ http://www.schlosslab.org/mikropml/reference/combine_hp_performance.html + + http://www.schlosslab.org/mikropml/reference/compare_models.html + http://www.schlosslab.org/mikropml/reference/createGroupedDataPartition.html @@ -108,6 +111,9 @@ http://www.schlosslab.org/mikropml/reference/get_corr_feats.html + + http://www.schlosslab.org/mikropml/reference/get_difference.html + http://www.schlosslab.org/mikropml/reference/get_feature_importance.html @@ -234,6 +240,9 @@ http://www.schlosslab.org/mikropml/reference/otu_small.html + + http://www.schlosslab.org/mikropml/reference/permute_p_value.html + http://www.schlosslab.org/mikropml/reference/plot_hp_performance.html @@ -282,6 +291,9 @@ http://www.schlosslab.org/mikropml/reference/setup_parallel.html + + http://www.schlosslab.org/mikropml/reference/shuffle_group.html + http://www.schlosslab.org/mikropml/reference/split_outcome_features.html diff --git a/man/compare_models.Rd b/man/compare_models.Rd new file mode 100644 index 00000000..3f821954 --- /dev/null +++ b/man/compare_models.Rd @@ -0,0 +1,35 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/compare_models.R +\name{compare_models} +\alias{compare_models} +\title{Perform permutation tests to compare the performance metric +across all pairs of a group variable.} +\usage{ +compare_models(merged_data, metric, group_name, nperm = 10000) +} +\arguments{ +\item{merged_data}{the concatenated performance data from \code{run_ml}} + +\item{metric}{metric to compare, must be numeric} + +\item{group_name}{column with group variables to compare} + +\item{nperm}{number of permutations, default=10000} +} +\value{ +a table of p-values for all pairs of group variable +} +\description{ +A wrapper for \code{permute_p_value()}. +} +\examples{ +df <- dplyr::tibble( + model = c("rf", "rf", "glmnet", "glmnet", "svmRadial", "svmRadial"), + AUC = c(.2, 0.3, 0.8, 0.9, 0.85, 0.95) +) +set.seed(123) +compare_models(df, "AUC", "model", nperm = 10) +} +\author{ +Courtney R Armour, \email{armourc@umich.edu} +} diff --git a/man/otu_mini_bin_results_glmnet.Rd b/man/otu_mini_bin_results_glmnet.Rd index 41c4eb93..2af95b37 100644 --- a/man/otu_mini_bin_results_glmnet.Rd +++ b/man/otu_mini_bin_results_glmnet.Rd @@ -3,7 +3,7 @@ \docType{data} \name{otu_mini_bin_results_glmnet} \alias{otu_mini_bin_results_glmnet} -\title{Results from running the pipline with L2 logistic regression on \code{otu_mini_bin} with feature importance and grouping} +\title{Results from running the pipeline with L2 logistic regression on \code{otu_mini_bin} with feature importance and grouping} \format{ An object of class \code{list} of length 4. } @@ -11,6 +11,6 @@ An object of class \code{list} of length 4. otu_mini_bin_results_glmnet } \description{ -Results from running the pipline with L2 logistic regression on \code{otu_mini_bin} with feature importance and grouping +Results from running the pipeline with L2 logistic regression on \code{otu_mini_bin} with feature importance and grouping } \keyword{datasets} diff --git a/man/otu_mini_bin_results_rf.Rd b/man/otu_mini_bin_results_rf.Rd index 9ea47cac..302a0d6c 100644 --- a/man/otu_mini_bin_results_rf.Rd +++ b/man/otu_mini_bin_results_rf.Rd @@ -3,7 +3,7 @@ \docType{data} \name{otu_mini_bin_results_rf} \alias{otu_mini_bin_results_rf} -\title{Results from running the pipline with random forest on \code{otu_mini_bin}} +\title{Results from running the pipeline with random forest on \code{otu_mini_bin}} \format{ An object of class \code{list} of length 4. } @@ -11,6 +11,6 @@ An object of class \code{list} of length 4. otu_mini_bin_results_rf } \description{ -Results from running the pipline with random forest on \code{otu_mini_bin} +Results from running the pipeline with random forest on \code{otu_mini_bin} } \keyword{datasets} diff --git a/man/otu_mini_bin_results_rpart2.Rd b/man/otu_mini_bin_results_rpart2.Rd index a4ad66ec..72748945 100644 --- a/man/otu_mini_bin_results_rpart2.Rd +++ b/man/otu_mini_bin_results_rpart2.Rd @@ -3,7 +3,7 @@ \docType{data} \name{otu_mini_bin_results_rpart2} \alias{otu_mini_bin_results_rpart2} -\title{Results from running the pipline with rpart2 on \code{otu_mini_bin}} +\title{Results from running the pipeline with rpart2 on \code{otu_mini_bin}} \format{ An object of class \code{list} of length 4. } @@ -11,6 +11,6 @@ An object of class \code{list} of length 4. otu_mini_bin_results_rpart2 } \description{ -Results from running the pipline with rpart2 on \code{otu_mini_bin} +Results from running the pipeline with rpart2 on \code{otu_mini_bin} } \keyword{datasets} diff --git a/man/otu_mini_bin_results_svmRadial.Rd b/man/otu_mini_bin_results_svmRadial.Rd index 1180e950..66194ad5 100644 --- a/man/otu_mini_bin_results_svmRadial.Rd +++ b/man/otu_mini_bin_results_svmRadial.Rd @@ -3,7 +3,7 @@ \docType{data} \name{otu_mini_bin_results_svmRadial} \alias{otu_mini_bin_results_svmRadial} -\title{Results from running the pipline with svmRadial on \code{otu_mini_bin}} +\title{Results from running the pipeline with svmRadial on \code{otu_mini_bin}} \format{ An object of class \code{list} of length 4. } @@ -11,6 +11,6 @@ An object of class \code{list} of length 4. otu_mini_bin_results_svmRadial } \description{ -Results from running the pipline with svmRadial on \code{otu_mini_bin} +Results from running the pipeline with svmRadial on \code{otu_mini_bin} } \keyword{datasets} diff --git a/man/otu_mini_bin_results_xgbTree.Rd b/man/otu_mini_bin_results_xgbTree.Rd index a509b3a2..3b193cd9 100644 --- a/man/otu_mini_bin_results_xgbTree.Rd +++ b/man/otu_mini_bin_results_xgbTree.Rd @@ -3,7 +3,7 @@ \docType{data} \name{otu_mini_bin_results_xgbTree} \alias{otu_mini_bin_results_xgbTree} -\title{Results from running the pipline with xbgTree on \code{otu_mini_bin}} +\title{Results from running the pipeline with xbgTree on \code{otu_mini_bin}} \format{ An object of class \code{list} of length 4. } @@ -11,6 +11,6 @@ An object of class \code{list} of length 4. otu_mini_bin_results_xgbTree } \description{ -Results from running the pipline with xbgTree on \code{otu_mini_bin} +Results from running the pipeline with xbgTree on \code{otu_mini_bin} } \keyword{datasets} diff --git a/man/permute_p_value.Rd b/man/permute_p_value.Rd new file mode 100644 index 00000000..c3bc7350 --- /dev/null +++ b/man/permute_p_value.Rd @@ -0,0 +1,47 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/compare_models.R +\name{permute_p_value} +\alias{permute_p_value} +\title{Calculated a permuted p-value comparing two models} +\usage{ +permute_p_value( + merged_data, + metric, + group_name, + group_1, + group_2, + nperm = 10000 +) +} +\arguments{ +\item{merged_data}{the concatenated performance data from \code{run_ml}} + +\item{metric}{metric to compare, must be numeric} + +\item{group_name}{column with group variables to compare} + +\item{group_1}{name of one group to compare} + +\item{group_2}{name of other group to compare} + +\item{nperm}{number of permutations, default=10000} +} +\value{ +numeric p-value comparing two models +} +\description{ +Calculated a permuted p-value comparing two models +} +\examples{ +df <- dplyr::tibble( + model = c("rf", "rf", "glmnet", "glmnet", "svmRadial", "svmRadial"), + AUC = c(.2, 0.3, 0.8, 0.9, 0.85, 0.95) +) +set.seed(123) +permute_p_value(df, "AUC", "model", "rf", "glmnet", nperm = 100) +} +\author{ +Begüm Topçuoğlu, \email{topcuoglu.begum@gmail.com} + +Courtney R Armour, \email{armourc@umich.edu} +} diff --git a/man/reexports.Rd b/man/reexports.Rd index 43708a49..b07f3030 100644 --- a/man/reexports.Rd +++ b/man/reexports.Rd @@ -19,6 +19,6 @@ below to see their documentation. \item{dplyr}{\code{\link[dplyr:reexports]{\%>\%}}} - \item{rlang}{\code{\link[rlang:nse-force]{!!}}, \code{\link[rlang:tidyeval-data]{.data}}, \code{\link[rlang:nse-force]{:=}}} + \item{rlang}{\code{\link[rlang:injection-operator]{!!}}, \code{\link[rlang:dot-data]{.data}}, \code{\link[rlang:dyn-dots]{:=}}} }} diff --git a/vignettes/introduction.Rmd b/vignettes/introduction.Rmd index 9cb1d8a9..faacf965 100644 --- a/vignettes/introduction.Rmd +++ b/vignettes/introduction.Rmd @@ -335,7 +335,7 @@ depending on how many samples and groups you have. This is because it won't be exactly what you specify with `training_frac`, since you have to include all of one group in either the training set _or_ the test set. -### Controling how groups are assigned to partitions +### Controlling how groups are assigned to partitions When you use the `groups` parameter as above, by default `run_ml()` will assume that you want all of the observations from each group to be placed in the same @@ -426,7 +426,7 @@ There are several columns: 1. `pvalue`: the probability of obtaining the actual performance value under the null hypothesis. 1. `names`: The feature that was permuted. 1. `method`: The ML method used. -1. `perf_metric_name`: The peformance metric used. +1. `perf_metric_name`: The performance metric used. 1. `seed`: The seed (if set). As you can see here, the differences are negligible (close to zero), which makes diff --git a/vignettes/paper.Rmd b/vignettes/paper.Rmd index 0aafe5aa..11637300 100644 --- a/vignettes/paper.Rmd +++ b/vignettes/paper.Rmd @@ -72,7 +72,7 @@ Machine learning (ML) for classification and prediction based on a set of features is used to make decisions in healthcare, economics, criminal justice and more. However, implementing an ML pipeline including preprocessing, model selection, and evaluation can be time-consuming, confusing, and difficult. Here, -we present [`mikropml`](http://www.schlosslab.org/mikropml/) (prononced +we present [`mikropml`](http://www.schlosslab.org/mikropml/) (pronounced "meek-ROPE em el"), an easy-to-use R package that implements ML pipelines using regression, support vector machines, decision trees, random forest, or gradient-boosted trees. The package is available on diff --git a/vignettes/preprocess.Rmd b/vignettes/preprocess.Rmd index 3b7051c7..8eebee7a 100644 --- a/vignettes/preprocess.Rmd +++ b/vignettes/preprocess.Rmd @@ -81,7 +81,7 @@ preprocess_data(dataset = bin_df, outcome_colname = "outcome") The output is a list: `dat_transformed` which has the transformed data, `grp_feats` which is a list of grouped features, and `removed_feats` which is a -list of featuures that were removed. Here, `grp_feats` is `NULL` because there +list of features that were removed. Here, `grp_feats` is `NULL` because there are no perfectly correlated features (e.g. `c(0,1,0)` and `c(0,1,0)`, or `c(0,1,0)` and `c(1,0,1)` - see below for more details).