diff --git a/.Rbuildignore b/.Rbuildignore index e698fee..e60ddfd 100644 --- a/.Rbuildignore +++ b/.Rbuildignore @@ -10,3 +10,5 @@ ^CONTRIBUTING.MD$ ^figures$ ^dabestr.Rmd$ +^cran-comments\.md$ +^revdep$ diff --git a/.github/workflows/R-CMD-check.yaml b/.github/workflows/R-CMD-check.yaml index 71ae6a0..24ec5d1 100644 --- a/.github/workflows/R-CMD-check.yaml +++ b/.github/workflows/R-CMD-check.yaml @@ -2,9 +2,9 @@ # Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help on: push: - branches: [dev] + branches: [dev, master] pull_request: - branches: [dev] + branches: [dev, master] name: R-CMD-check diff --git a/.github/workflows/pkgdown.yaml b/.github/workflows/pkgdown.yaml index 2aa6ce5..fee0731 100644 --- a/.github/workflows/pkgdown.yaml +++ b/.github/workflows/pkgdown.yaml @@ -2,9 +2,9 @@ # Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help on: push: - branches: [dev] + branches: [master] pull_request: - branches: [dev] + branches: [master] release: types: [published] workflow_dispatch: diff --git a/.gitignore b/.gitignore index a85f29a..91aff1d 100644 --- a/.gitignore +++ b/.gitignore @@ -1,7 +1,7 @@ # Byte-compiled / optimized / DLL files -# Knitted html file -dabestr.html +# Testing Files +dabestr.Rmd # Mac **/.DS_Store @@ -14,3 +14,4 @@ dabestr.html .RProfile docs inst/doc +figures diff --git a/DESCRIPTION b/DESCRIPTION index dec1999..9fd5e0c 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,14 +1,16 @@ Package: dabestr Type: Package Title: Data Analysis using Bootstrap-Coupled Estimation -Version: 0.9.9.9 +Version: 2023.9.12 Authors@R: c( - person("Kah Seng", "Lian", email = "kahseng@u.nus.edu", role = c("cre", "aut")), - person("Zhuoyu", "Wang", email = "wzhuoyu21@u.nus,edu", role = c("aut")), - person("Jun Yang", "Liao", email = "name@example.com", role = c("aut")), - person("Joses W.", "Ho", email = "joseshowh@gmail.com", role = c("aut")), + person("Yishan", "Mai", email = "maiyishan@u.duke.nus.edu", role = c("aut", "cre")), + person("Kah Seng", "Lian", email = "kahseng@u.nus.edu", role = c("aut")), + person("Zhuoyu", "Wang", email = "wzhuoyu21@u.nus,edu", role = "aut"), + person("Jun Yang", "Liao", email = "name@example.com", role = "aut"), + person("Joses W.", "Ho", email = "joseshowh@gmail.com", role = "aut"), person("ACCLAB", role = c("cph", "fnd")), - person("Tayfun", "Tumkaya", role = c("ctb"))) + person("Tayfun", "Tumkaya", role = "aut"), + person("Felicia", role = "aut")) Description: Data Analysis using Bootstrap-Coupled ESTimation. Estimation statistics is a simple framework that avoids the pitfalls of significance testing. It uses familiar statistical concepts: means, @@ -22,7 +24,7 @@ Description: Data Analysis using Bootstrap-Coupled ESTimation. separate but aligned axes. Estimation plots are introduced in Ho et al., Nature Methods 2019, 1548-7105. . - The free-to-view PDF is located at . + The free-to-view PDF is located at . License: Apache License (>= 2) Encoding: UTF-8 URL: https://github.com/ACCLAB/dabestr, @@ -42,22 +44,20 @@ Imports: grid, scales, ggsci, - colorspace, cli, boot, stats, stringr, - coin, - mvtnorm, - multcomp, - brunnermunzel + brunnermunzel, + methods Roxygen: list(markdown = TRUE) RoxygenNote: 7.2.3 Suggests: testthat (>= 3.0.0), vdiffr, knitr, - rmarkdown + rmarkdown, + kableExtra Config/testthat/edition: 3 LazyData: true -VignetteBuilder: knitr +VignetteBuilder: knitr, kableExtra diff --git a/NEWS.md b/NEWS.md index ea873ad..20082c8 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,5 +1,25 @@ -# dabestr v0.9.9.9 -* This is the current pre-official, teaser release version of dabestr. +# dabestr v2023.9.12 +This release is a complete rebuild of dabestr. + +Previous functions from v0.3.0 and before are now depreciated. + +#### Main API +This version of dabestr features a new Main API. The following functions have been sequentially organised for their intended procedural utilisation. + +* `load()` +processes and converts a tidy dataset into the dabestr format. + +* `mean_diff()`, `median_diff()`, `cohens_d()`, `hedges_g()`, `cliffs_delta()`, `cohens_h()` +Computes the effect size for each control-test group pairing in `idx`. + +* `dabest_plot()` produces a Gardner-Altman estimation plot or a Cumming estimation plot depending on whether float_contrast is TRUE. + +#### New features +* Plotting of shared control and repeated measures +* Proportion plots (unpaired and paired) +* Mini-Meta Delta plots +* Delta-Delta plots + # dabestr v0.3.0 * This is a breaking release that includes standardised effect sizes, and a bunch of bugfixes. diff --git a/R/000_README.R b/R/000_README.R index 478f33f..2fb5cf6 100644 --- a/R/000_README.R +++ b/R/000_README.R @@ -1,6 +1,6 @@ -#####################################################' +#####################################################' #' -#' The files in this package are all labelled by numbers. +#' The files in this package are all labelled by numbers. #' #' e.g. #' 000_README.R @@ -10,13 +10,14 @@ #' #' This file serves to give an overview of what each number means in the dabestr package. #' -#####################################################' +#####################################################' #' FILE STRUCTURE -#####################################################' +#####################################################' #' #' 000 - Info #' 001 - Main Api #' 002 - Plotting Tools #' 003 - Stats Tools #' 004 - Main Plots +#' 005 - Printing Tools #' 999 - Plot Kwargs diff --git a/R/000_data.R b/R/000_data.R index 553b03b..a698fa8 100644 --- a/R/000_data.R +++ b/R/000_data.R @@ -1,7 +1,7 @@ #' Non-proportional data for Estimation plots. -#' +#' #' Contains 3 Control Samples and 6 Test Samples. -#' +#' #' @keywords internal #' @format A data frame with 180 rows and 4 variables: #' \describe{ @@ -10,8 +10,8 @@ #' \item{Group}{Which control group or test it is} #' \item{Measurement}{Measurement value} #' } -#' -#' @examples +#' +#' @examples #' data(non_proportional_data) # Lazy loading. Data becomes visible as soon as it is loaded "non_proportional_data" @@ -19,9 +19,9 @@ #' Numerical Binary data for Proportion Plots -#' +#' #' Contains 3 Control Samples and 7 Test Samples. -#' +#' #' @keywords internal #' @format A data frame with 400 rows and 4 variables: #' \describe{ @@ -30,8 +30,8 @@ #' \item{Group}{Which control group or test it is} #' \item{Success}{1 (Success) or 0 (Failure)} #' } -#' -#' @examples +#' +#' @examples #' data(proportional_data) # Lazy loading. Data becomes visible as soon as it is loaded "proportional_data" @@ -39,9 +39,9 @@ #' Data to produce a mini-meta Dabest plot -#' +#' #' Contains 3 Control Samples and 3 Test Samples. -#' +#' #' @keywords internal #' @format A data frame with 120 rows and 5 variables: #' \describe{ @@ -50,8 +50,8 @@ #' \item{Group}{Which control group or test it is} #' \item{Measurement}{Measurement value} #' } -#' -#' @examples +#' +#' @examples #' data(minimeta_data) # Lazy loading. Data becomes visible as soon as it is loaded "minimeta_data" @@ -59,9 +59,9 @@ #' Data to produce a delta2 Dabest plot -#' +#' #' Contains 2 Genotype groups and 2 Treatment groups. -#' +#' #' @keywords internal #' @format A data frame with 40 rows and 5 variables: #' \describe{ @@ -71,7 +71,7 @@ #' \item{Treatment}{Which treatment method was used} #' \item{Measurement}{Measurement value} #' } -#' -#' @examples +#' +#' @examples #' data(deltadelta_data) # Lazy loading. Data becomes visible as soon as it is loaded -"deltadelta_data" \ No newline at end of file +"deltadelta_data" diff --git a/R/000_globals.R b/R/000_globals.R index a108646..aea5bdf 100644 --- a/R/000_globals.R +++ b/R/000_globals.R @@ -1,3 +1,5 @@ -utils::globalVariables(c("N", "n", "plot_extra_yaxis", "proportion_success", "sd", "sf_change", "success_change", - "tag", "x", "x_axis_raw", "x_failure", "x_success", "xend", "y", "y_failure", "y_success", - "failure_change")) \ No newline at end of file +utils::globalVariables(c( + "N", "n", "plot_extra_yaxis", "proportion_success", "sd", "sf_change", "success_change", + "tag", "x", "x_axis_raw", "x_failure", "x_success", "xend", "y", "y_failure", "y_success", + "failure_change", "y_top_start", "y_top_end", "y_bot_start", "y_bot_end" +)) diff --git a/R/000_reexports.R b/R/000_reexports.R index af09ac8..ce0b79e 100644 --- a/R/000_reexports.R +++ b/R/000_reexports.R @@ -1,3 +1,3 @@ #' @importFrom magrittr %>% #' @export -magrittr::`%>%` \ No newline at end of file +magrittr::`%>%` diff --git a/R/001_api.R b/R/001_api.R index 16bc77b..6fa8c64 100644 --- a/R/001_api.R +++ b/R/001_api.R @@ -1,24 +1,24 @@ -#' Loading data with dabestr -#' -#' @description -#' Processes and converts a tidy dataset into the dabestr format. +#' Loading data with dabestr +#' +#' @description +#' Processes and converts a tidy dataset into the dabestr format. #' The output of this function is then used as an input for various procedural -#' functions within dabestr to create estimation plots. -#' +#' functions within dabestr to create estimation plots. +#' #' @param data A tidy dataframe. -#' @param x Column in `data` that contains the treatment groups. +#' @param x Column in `data` that contains the treatment groups. #' @param y Column in `data` that contains the measurement values. -#' @param idx List of control-test groupings for which the +#' @param idx List of control-test groupings for which the #' effect size will be computed for. #' @param paired Paired ("sequential" or "baseline"). Used for plots for experiments #' with repeated-measures designs. -#' -#' If "sequential", comparison happens between each measurement to the one directly +#' +#' If "sequential", comparison happens between each measurement to the one directly #' preceding it. (control vs group i) -#' -#' If "baseline", comparison happens between each group to a shared control. +#' +#' If "baseline", comparison happens between each group to a shared control. #' (group i vs group i+1) -#' +#' #' @param id_col Column in `data` indicating the identity of the #' datapoint if the data is tagged. Compulsory parameter if paired is TRUE. #' @param ci Default 95. Determines the range of the confidence interval for effect size @@ -27,17 +27,17 @@ #' @param colour Column in `data` that determines the groupings for colour of the #' swarmplot as opposed to `x`. #' @param proportional Boolean value determining if proportion plots are being -#' produced. +#' produced. #' @param minimeta Boolean value determining if mini-meta analysis is conducted. -#' @param delta2 Boolean value determining if delta-delta analysis for +#' @param delta2 Boolean value determining if delta-delta analysis for #' 2 by 2 experimental designs is conducted. #' @param experiment Experiment column name for delta-delta analysis. -#' @param experiment_label String specifying the experiment label that is used to +#' @param experiment_label String specifying the experiment label that is used to #' distinguish the experiment and the factors (being used in the plotting labels). -#' @param x1_level String setting the first factor level in +#' @param x1_level String setting the first factor level in #' a 2 by 2 experimental design. #' -#' @return +#' @return #' Returns a `dabest_obj` list with 18 elements. The following are the elements contained within: #' #' - `raw_data` The tidy dataset passed to [load()] that was cleaned and altered for plotting. @@ -47,15 +47,15 @@ #' - `enquo_id_col` Quosure of id_col as initially passed to [load()]. #' - `enquo_colour` Quosure of colour as initially passed to [load()]. #' - `proportional` Boolean value determining if proportion plots are being -#' produced. +#' produced. #' - `minimeta` Boolean value determining if mini-meta analysis is conducted. -#' - `delta2` Boolean value determining if delta-delta analysis for +#' - `delta2` Boolean value determining if delta-delta analysis for #' 2 by 2 experimental designs is conducted. -#' - `idx` List of control-test groupings for which the +#' - `idx` List of control-test groupings for which the #' effect size will be computed for. #' - `resamples` The number of resamples to be used to generate the effect size bootstraps. #' - `is_paired` Boolean value determining if it is a paired plot. -#' - `is_colour` Boolean value determining if there is a specified colour column +#' - `is_colour` Boolean value determining if there is a specified colour column #' for the plot. #' - `paired` Paired ("sequential" or "baseline") as initially passed to [load()]. #' - `ci` Numeric value which determines the range of the confidence interval for effect size @@ -64,15 +64,20 @@ #' - `control_summary` Numeric value for plotting of control summary lines for float_contrast= TRUE. #' - `test_summary` Numeric value for plotting of test summary lines for float_contrast = TRUE. #' - `ylim` Vector containing the y limits for the rawdata swarm plot. -#' -#' @examples +#' +#' @examples #' # Loading in of the dataset #' data(non_proportional_data) -#' +#' #' # Creating a dabest object -#' dabest_obj <- load(data = non_proportional_data, x = Group, y = Measurement, -#' idx = c("Control1 ", "Test 1")) -#' +#' dabest_obj <- load( +#' data = non_proportional_data, x = Group, y = Measurement, +#' idx = c("Control 1", "Test 1") +#' ) +#' +#' # Printing dabest object +#' print(dabest_obj) +#' #' @export load #' load <- function( @@ -83,157 +88,174 @@ load <- function( paired = NULL, id_col = NULL, ci = 95, - resamples =5000, + resamples = 5000, colour = NULL, proportional = FALSE, minimeta = FALSE, delta2 = FALSE, experiment = NULL, experiment_label = NULL, - x1_level = NULL - ){ - + x1_level = NULL) { # Storing plotting params as quosures enquo_x <- rlang::enquo(x) enquo_y <- rlang::enquo(y) enquo_id_col <- rlang::enquo(id_col) enquo_colour <- rlang::enquo(colour) - + is_colour <- isFALSE(rlang::quo_is_null(enquo_colour)) is_id_col <- isFALSE(rlang::quo_is_null(enquo_id_col)) is_paired <- isFALSE(is.null(paired)) - + name_x <- rlang::as_name(enquo_x) name_y <- rlang::as_name(enquo_y) - + #### Checking Validity of params #### if (isFALSE(name_x %in% colnames(data))) { - cli::cli_abort(c("Column {.field x} is {.emph not} in {.field data}.", - "x" = "Please enter a valid entry for {.field x} in {.fun load}.")) + cli::cli_abort(c("Column {.field x} is {.emph not} in {.field data}.", + "x" = "Please enter a valid entry for {.field x} in {.fun load}." + )) } if (isFALSE(name_y %in% colnames(data))) { - cli::cli_abort(c("Column {.field y} is {.strong not} in {.field data}.", - "x" = "Please enter a valid entry for {.field y} in {.fun load}.")) + cli::cli_abort(c("Column {.field y} is {.strong not} in {.field data}.", + "x" = "Please enter a valid entry for {.field y} in {.fun load}." + )) } if (isTRUE(is_id_col)) { if (isFALSE(rlang::as_name(enquo_id_col) %in% colnames(data))) { - cli::cli_abort(c("Column {.field id_col} is {.strong not} in {.field data}.", - "x" = "Please enter a valid entry for {.field id_col} in {.fun load}.")) + cli::cli_abort(c("Column {.field id_col} is {.strong not} in {.field data}.", + "x" = "Please enter a valid entry for {.field id_col} in {.fun load}." + )) } } if (isTRUE(is_colour)) { if (isFALSE(rlang::as_name(enquo_colour) %in% colnames(data))) { - cli::cli_abort(c("Column {.field colour} is {.strong not} in {.field data}.", - "x" = "Please enter a valid entry for {.field colour} in {.fun load}.")) + cli::cli_abort(c("Column {.field colour} is {.strong not} in {.field data}.", + "x" = "Please enter a valid entry for {.field colour} in {.fun load}." + )) } } if (isFALSE(delta2)) { if (is.null(idx)) { - cli::cli_abort(c("Column {.field idx} is currently NULL.", - "x" = "Please enter a valid entry for {.field idx} in {.fun load}.")) + cli::cli_abort(c("Column {.field idx} is currently NULL.", + "x" = "Please enter a valid entry for {.field idx} in {.fun load}." + )) } if (is.list(idx)) { - general_idx_lengths <- sapply(idx,length) - if (any(general_idx_lengths<2)==TRUE) { + general_idx_lengths <- sapply(idx, length) + if (any(general_idx_lengths < 2) == TRUE) { cli::cli_abort(c("Some {.field idx} does not consist of at least 2 groups", - "x" = "Make sure each nested group in {.field idx} has length >=2.")) + "x" = "Make sure each nested group in {.field idx} has length >=2." + )) } } else { general_idx_lengths <- length(idx) - if (any(general_idx_lengths<2)==TRUE) { + if (any(general_idx_lengths < 2) == TRUE) { cli::cli_abort(c("Some {.field idx} does not consist of at least 2 groups", - "x" = "Make sure each nested group in {.field idx} has length >=2.")) + "x" = "Make sure each nested group in {.field idx} has length >=2." + )) } } - } - - + } + + ## Check that data is proportional if (isTRUE(proportional)) { values <- unique(data[[name_y]]) - if (isFALSE(setequal(c(0,1), values))) { - cli::cli_abort(c("{.field proportional} is {.strong TRUE} but {.field data} is not proportional.", - "x" = "{.field y} Column of {.field data} should only contain 1 and 0.")) + if (isFALSE(setequal(c(0, 1), values))) { + cli::cli_abort(c("{.field proportional} is {.strong TRUE} but {.field data} is not proportional.", + "x" = "{.field y} Column of {.field data} should only contain 1 and 0." + )) } } - + ## Check that id_col is not NULL if is_paired is TRUE if (isTRUE(is_paired) & isFALSE(is_id_col)) { - cli::cli_abort(c("{.field paired} is {.strong TRUE} but no {.field id_col} was supplied.", - "x" = "Please enter an entry for {.field id_col} in {.fun load}.")) + cli::cli_abort(c("{.field paired} is {.strong TRUE} but no {.field id_col} was supplied.", + "x" = "Please enter an entry for {.field id_col} in {.fun load}." + )) } - + ## Check that paired must be either "baseline" or "sequential" if (isTRUE(is_paired)) { - if (isFALSE(paired %in% c("baseline","sequential"))) { - cli::cli_abort(c("{.field paired} is not 'baseline' or 'sequential'.", - "x" = "{.field paired} can only be 'baseline' or 'sequential'.")) + if (isFALSE(paired %in% c("baseline", "sequential"))) { + cli::cli_abort(c("{.field paired} is not 'baseline' or 'sequential'.", + "x" = "{.field paired} can only be 'baseline' or 'sequential'." + )) } } - + ## Make idx into a list if it is a vector if (typeof(idx) != "list" && isFALSE(is.null(idx))) { idx <- list(idx) } - - ## Check for valid mini-meta + + ## Check for valid mini-meta if (isTRUE(minimeta)) { if (isTRUE(proportional)) { - cli::cli_abort(c("{.field proportional} is {.strong TRUE} but {.field minimeta} is also {.strong TRUE}.", - "x" = "{.field proportional} and {.field minimeta} cannot be {.strong TRUE} at the same time.")) + cli::cli_abort(c("{.field proportional} is {.strong TRUE} but {.field minimeta} is also {.strong TRUE}.", + "x" = "{.field proportional} and {.field minimeta} cannot be {.strong TRUE} at the same time." + )) } else if (isTRUE(delta2)) { - cli::cli_abort(c("{.field delta2} is {.strong TRUE} but {.field minimeta} is also {.strong TRUE}.", - "x" = "{.field delta2} and {.field minimeta} cannot be {.strong TRUE} at the same time.")) + cli::cli_abort(c("{.field delta2} is {.strong TRUE} but {.field minimeta} is also {.strong TRUE}.", + "x" = "{.field delta2} and {.field minimeta} cannot be {.strong TRUE} at the same time." + )) } - + minimeta_idx_lengths <- sapply(idx, length) - if (any(minimeta_idx_lengths!=2)==TRUE) { + if (any(minimeta_idx_lengths != 2) == TRUE) { cli::cli_abort(c("{.field minimeta} is {.strong TRUE}, but some {.field idx} does not consist of exactly 2 groups", - "x" = "You can only put in exactly 2 groups in {.field idx} when {.field minimeta} is {.strong TRUE}.")) + "x" = "You can only put in exactly 2 groups in {.field idx} when {.field minimeta} is {.strong TRUE}." + )) } } - + if (isTRUE(delta2)) { if (isTRUE(proportional)) { - cli::cli_abort(c("{.field delta2} is {.strong TRUE} but {.field proportional} is also {.strong TRUE}.", - "x" = "{.field delta2} and {.field proportional} cannot be {.strong TRUE} at the same time.")) + cli::cli_abort(c("{.field delta2} is {.strong TRUE} but {.field proportional} is also {.strong TRUE}.", + "x" = "{.field delta2} and {.field proportional} cannot be {.strong TRUE} at the same time." + )) } - + enquo_experiment <- rlang::enquo(experiment) name_experiment <- rlang::as_name(enquo_experiment) - + # Make sure that data is a 2x2 ANOVA case - if (length(unique(data[[name_experiment]]))!=2) { - cli::cli_abort(c("{.field experiment} does not have a length of 2.", - "x" = "There can only be 2 groups in {.field experiment} when {.field delta2} is {.strong TRUE}.")) - } else if (length(unique(data[[name_x]]))!=2) { - cli::cli_abort(c("{.field x} does not have a length of 2.", - "x" = "There can only be 2 groups in {.field x} when {.field delta2} is {.strong TRUE}.")) + if (length(unique(data[[name_experiment]])) != 2) { + cli::cli_abort(c("{.field experiment} does not have a length of 2.", + "x" = "There can only be 2 groups in {.field experiment} when {.field delta2} is {.strong TRUE}." + )) + } else if (length(unique(data[[name_x]])) != 2) { + cli::cli_abort(c("{.field x} does not have a length of 2.", + "x" = "There can only be 2 groups in {.field x} when {.field delta2} is {.strong TRUE}." + )) } - + # Check for idx, experiment_label and x1_level if (isTRUE(is.null(idx))) { # Set levels for experiment and x if they are present if (isFALSE(is.null(experiment_label))) { - data[[name_experiment]] = factor(x = data[[name_experiment]], levels = experiment_label) + data[[name_experiment]] <- factor(x = data[[name_experiment]], levels = experiment_label) } if (isFALSE(is.null(x1_level))) { - data[[name_x]] = factor(x = data[[name_x]], levels = x1_level) + data[[name_x]] <- factor(x = data[[name_x]], levels = x1_level) } data <- data %>% dplyr::arrange(!!enquo_experiment, !!enquo_x) } - + if (is.null(experiment_label)) { + experiment_label <- unique(data[[name_experiment]]) + } + data <- data %>% dplyr::mutate(grouping = !!enquo_x) %>% - tidyr::unite(!!enquo_experiment, c(!!enquo_x,!!enquo_experiment),sep = " ",remove=FALSE) + tidyr::unite(!!enquo_experiment, c(!!enquo_x, !!enquo_experiment), sep = " ", remove = FALSE) if (dplyr::as_label(enquo_colour) == "NULL") { enquo_colour <- enquo_x } enquo_x <- enquo_experiment name_x <- rlang::as_name(enquo_x) is_colour <- TRUE - + # Obtain idx if is null if (isTRUE(is.null(idx))) { spread_idx <- unique(data[[name_experiment]]) @@ -253,59 +275,70 @@ load <- function( idx <- c(idx, list(curr_group_vector)) } } - + unlist_idx <- unlist(idx) - + + ## Check to ensure that each treatment group in idx is present in the x column + unique_x <- unique(data[[name_x]]) + for (i in 1:length(unlist_idx)) { + if (isFALSE(unlist_idx[i] %in% unique_x)) { + cli::cli_abort(c("{.field idx} contains treatment groups not present in {.field x}.", + "x" = "Ensure that idx does not have any treatment groups not present in dataset." + )) + } + } + raw_data <- data %>% dplyr::filter(!!enquo_x %in% unlist_idx) %>% dplyr::mutate(x_axis_raw = 0) - - raw_data[[name_x]] = factor(x = raw_data[[name_x]], levels = unlist_idx) - + + raw_data[[name_x]] <- factor(x = raw_data[[name_x]], levels = unlist_idx) + for (i in 1:length(unlist_idx)) { raw_data <- raw_data %>% dplyr::mutate(x_axis_raw = ifelse( - !!enquo_x == unlist_idx[i], i, x_axis_raw + !!enquo_x == unlist_idx[i], i, x_axis_raw )) } - + # Obtain raw_y_range_vector ylim <- range(raw_data[[name_y]]) - + # Creation of x-axis label Ns <- raw_data %>% dplyr::group_by(!!enquo_x) %>% dplyr::count() Ns$swarmticklabs <- do.call(paste, c(Ns[c(name_x, "n")], sep = "\nN = ")) - + # Extending ylim for plotting - ylim[1] <- ylim[1] - (ylim[2]-ylim[1])/25 - ylim[2] <- ylim[2] + (ylim[2]-ylim[1])/25 - - if(isTRUE(proportional)){ + ylim[1] <- ylim[1] - (ylim[2] - ylim[1]) / 25 + ylim[2] <- ylim[2] + (ylim[2] - ylim[1]) / 25 + + if (isTRUE(proportional)) { proportional_data <- raw_data %>% dplyr::select(!!enquo_x, !!enquo_y, !!enquo_id_col, !!enquo_colour) %>% dplyr::group_by(!!enquo_x) %>% - dplyr::summarise(proportion_success = mean(!!enquo_y), - y_success = proportion_success/2, - y_failure = (1+proportion_success)/2) - + dplyr::summarise( + proportion_success = mean(!!enquo_y), + y_success = proportion_success / 2, + y_failure = (1 + proportion_success) / 2 + ) + control_summary <- proportional_data$proportion_success[1] test_summary <- proportional_data$proportion_success[2] - } else { # Calculation of summary lines summaries <- raw_data %>% dplyr::group_by(!!enquo_x) %>% dplyr::summarise(summary_stats = mean(!!enquo_y)) - + # Only currently works for two-groups, if needed for extended features in future, to be changed control_summary <- summaries$summary_stats[1] test_summary <- summaries$summary_stats[2] - + proportional_data <- NULL } - + dabest_object <- list( raw_data = raw_data, proportional_data = proportional_data, @@ -314,6 +347,7 @@ load <- function( enquo_id_col = enquo_id_col, enquo_colour = enquo_colour, proportional = proportional, + experiment_label = experiment_label, minimeta = minimeta, delta2 = delta2, idx = idx, @@ -327,62 +361,65 @@ load <- function( test_summary = test_summary, ylim = ylim ) - + class(dabest_object) <- c("dabest") return(dabest_object) } #' Print a `dabest` object -#' +#' #' @noRd #' -#' @param dabest_obj a list +#' @param dabest_obj a list #' @param ... S3 signature for generic plot function. #' #' @return A summary of the experimental designs. #' -#' @examples +#' @examples #' # Loading in of the dataset #' data(twogroup_data) -#' +#' #' # Creating a dabest object -#' dabest_obj <- load(data = twogroup_data, x = Group, y = Measurement, -#' idx = c("Control1", "Group1")) -#' +#' dabest_obj <- load( +#' data = twogroup_data, x = Group, y = Measurement, +#' idx = c("Control1", "Group1") +#' ) +#' #' # Display the results in a user-friendly format. #' print(dabest_obj) -#' +#' #' @export print.dabest <- function(x, ...) { if (class(x)[1] != "dabest") { - cli::cli_abort(c("Only dabest class can be used.", - "x" = "Please enter a valid entry into the function.")) + cli::cli_abort(c("Only dabest class can be used.", + "x" = "Please enter a valid entry into the function." + )) } - + dabest_obj <- x print_greeting_header() - + paired <- dabest_obj$paired ci <- dabest_obj$ci - + if (is.null(paired)) { rm_status <- "" - } else if (paired =="sequential") { + } else if (paired == "sequential") { rm_status <- "for the sequential design of repeated-measures experiment \n" - } else if (paired=="baseline") { + } else if (paired == "baseline") { rm_status <- "for repeated measures against baseline \n" } - + if (is.null(paired)) { paired_status <- "E" - } else if (paired =="sequential") { + } else if (paired == "sequential") { paired_status <- "Paired e" - } else if (paired =="baseline") { + } else if (paired == "baseline") { paired_status <- "Paired e" } - line1 <- paste(paired_status,"ffect size(s) ",rm_status,sep="") - line2 <- paste("with ",ci,"% confidence intervals will be computed for:",sep="") + line1 <- paste(paired_status, "ffect size(s) ", rm_status, sep = "") + line2 <- paste("with ", ci, "% confidence intervals will be computed for:", sep = "") cat(line1) cat(line2) cat("\n") diff --git a/R/001_effsize_func.R b/R/001_effsize_func.R index 4fbc090..5a1650e 100644 --- a/R/001_effsize_func.R +++ b/R/001_effsize_func.R @@ -1,20 +1,20 @@ #' Calculating effect sizes -#' +#' #' @name effect_size -#' +#' #' @description #' Computes the effect size for each control-test group pairing in `idx`. #' The resampling bootstrap distribution of the effect size is then subjected #' to Bias-corrected and accelerated bootstrap (BCa) correction. -#' +#' #' The following effect sizes `mean_diff`, `median_diff`, `cohens_d`, `hedges_g` and `cliffs_delta` -#' are used for most plot types. -#' -#' @param dabest_obj A dabest_obj created by loading in dataset along with other +#' are used for most plot types. +#' +#' @param dabest_obj A dabest_obj created by loading in dataset along with other #' specified parameters with the [load()] function. #' @param perm_count The number of reshuffles of control and test labels to be performed for each p-value. -#' -#' @returns +#' +#' @returns #' Returns a `dabest_effectsize_obj` list with 22 elements. The following are the elements contained within: #' - `raw_data` The tidy dataset passed to [load()] that was cleaned and altered for plotting. @@ -38,60 +38,66 @@ #' - `minimeta` Boolean value as initially passed to [load()]. #' - `delta` Boolean value as initially passed to [load()]. #' - `proportional_data` List of calculations related to the plotting of proportion plots. -#' - `boot_result` List containing values related to the calculation of the effect sizes, +#' - `boot_result` List containing values related to the calculation of the effect sizes, #' bootstrapping and BCa correction. -#' - `baseline_ec_boot_result` List containing values related to the calculation +#' - `baseline_ec_boot_result` List containing values related to the calculation #' of the effect sizes, bootstrapping and BCa correction for the baseline error #' curve. -#' - `permtest_pvals` List containing values related to the calculations of permutation -#' t tests and the corresponding p values, and p values for different types of effect sizes +#' - `permtest_pvals` List containing values related to the calculations of permutation +#' t tests and the corresponding p values, and p values for different types of effect sizes #' and different statistical tests. #' #' @details #' The plot types listed under here are limited to use only the following effect sizes. #' * Proportion plots offers only `mean_diff` and `cohens_h`. #' * Mini-Meta Delta plots offers only `mean_diff`. -#' +#' #' The other plots are able to use all given basic effect sizes as listed in the Description. #' #' @examples #' # Loading of the dataset #' data(non_proportional_data) -#' -#' # Preparing the data to be plotted -#' dabest_obj <- load(non_proportional_data, x = Group, y = Measurement, idx = c("Control 1", "Test 1")) +#' +#' # Applying effect size to the dabest object +#' dabest_obj <- load(non_proportional_data, +#' x = Group, y = Measurement, +#' idx = c("Control 1", "Test 1") +#' ) #' dabest_obj.mean_diff <- mean_diff(dabest_obj) -#' -#' @export +#' +#' # Printing dabest effectsize object +#' print(dabest_obj.mean_diff) +#' @export mean_diff <- function(dabest_obj, perm_count = 5000) { - effect_size_type <- "mean_diff" - if (class(dabest_obj)!="dabest") { + if (!methods::is(dabest_obj, "dabest")) { cli::cli_abort(c("{.field dabest_obj} must be a {.cls dabest} object."), - "x" = "Please supply a {.cls dabest} object.") + "x" = "Please supply a {.cls dabest} object." + ) } - + effect_size_func <- function(control, test, paired) { if (identical(paired, FALSE)) { return(mean(test) - mean(control)) } return(mean(test - control)) } - + is_paired <- dabest_obj$is_paired reps <- dabest_obj$resamples - - if (is_paired){ + + if (is_paired) { main_results <- bootstrap(dabest_obj, effect_size_func, boot_labs = "Paired\nmean difference", reps = reps) } else { main_results <- bootstrap(dabest_obj, effect_size_func, boot_labs = "Mean difference", reps = reps) } - permtest_and_pvalues <- Pvalues_statistics(dabest_obj, - ef_size_fn = effect_size_func, - effect_size_type = effect_size_type, - perm_count = perm_count) + permtest_and_pvalues <- Pvalues_statistics(dabest_obj, + ef_size_fn = effect_size_func, + effect_size_type = effect_size_type, + perm_count = perm_count + ) output <- c(main_results, list(effect_size_type = effect_size_type), permtest_and_pvalues) - + class(output) <- c("dabest_effectsize") return(output) @@ -101,196 +107,211 @@ mean_diff <- function(dabest_obj, perm_count = 5000) { #' @export median_diff <- function(dabest_obj, perm_count = 5000) { effect_size_type <- "median_diff" - if (class(dabest_obj)!="dabest") { + if (!methods::is(dabest_obj, "dabest")) { cli::cli_abort(c("{.field dabest_obj} must be a {.cls dabest} object."), - "x" = "Please supply a {.cls dabest} object.") + "x" = "Please supply a {.cls dabest} object." + ) } - + effect_size_func <- function(control, test, paired) { if (identical(paired, FALSE)) { return(stats::median(test) - stats::median(control)) } return(stats::median(test - control)) } - + is_paired <- dabest_obj$is_paired reps <- dabest_obj$resamples - - if (is_paired){ + + if (is_paired) { main_results <- bootstrap(dabest_obj, effect_size_func, boot_labs = "Paired\nmedian difference", reps = reps) } else { main_results <- bootstrap(dabest_obj, effect_size_func, boot_labs = "Median difference", reps = reps) } - - permtest_and_pvalues <- Pvalues_statistics(dabest_obj, - ef_size_fn = effect_size_func, - effect_size_type = effect_size_type, - perm_count = perm_count) + + permtest_and_pvalues <- Pvalues_statistics(dabest_obj, + ef_size_fn = effect_size_func, + effect_size_type = effect_size_type, + perm_count = perm_count + ) output <- c(main_results, list(effect_size_type = effect_size_type), permtest_and_pvalues) - + class(output) <- c("dabest_effectsize") - + return(output) } #' @rdname effect_size -#' @export +#' @export cohens_d <- function(dabest_obj, perm_count = 5000) { effect_size_type <- "cohens_d" - if (class(dabest_obj)!="dabest") { + if (!methods::is(dabest_obj, "dabest")) { cli::cli_abort(c("{.field dabest_obj} must be a {.cls dabest} object."), - "x" = "Please supply a {.cls dabest} object.") + "x" = "Please supply a {.cls dabest} object." + ) } - + effect_size_func <- function(control, test, paired) { return(effsize::cohen.d(test, control, paired = paired)$estimate) } - + reps <- dabest_obj$resamples - - main_results <- bootstrap(dabest_obj, - effect_size_func, - boot_labs = "Cohen's d", - reps = reps) - permtest_and_pvalues <- Pvalues_statistics(dabest_obj, - ef_size_fn = effect_size_func, - effect_size_type = effect_size_type, - perm_count = perm_count) - output <- c(main_results,list(effect_size_type = effect_size_type),permtest_and_pvalues) - + + main_results <- bootstrap(dabest_obj, + effect_size_func, + boot_labs = "Cohen's d", + reps = reps + ) + permtest_and_pvalues <- Pvalues_statistics(dabest_obj, + ef_size_fn = effect_size_func, + effect_size_type = effect_size_type, + perm_count = perm_count + ) + output <- c(main_results, list(effect_size_type = effect_size_type), permtest_and_pvalues) + class(output) <- c("dabest_effectsize") return(output) } #' @rdname effect_size -#' @export +#' @export hedges_g <- function(dabest_obj, perm_count = 5000) { effect_size_type <- "hedges_g" - if (class(dabest_obj)!="dabest") { + if (!methods::is(dabest_obj, "dabest")) { cli::cli_abort(c("{.field dabest_obj} must be a {.cls dabest} object."), - "x" = "Please supply a {.cls dabest} object.") + "x" = "Please supply a {.cls dabest} object." + ) } - + cohens_d_ <- function(control, test, paired) { - return(effsize::cohen.d(test, control, paired=paired)$estimate) + return(effsize::cohen.d(test, control, paired = paired)$estimate) } - + effect_size_func <- function(control, test, paired) { cd <- cohens_d_(test, control, paired = paired) corr.factor <- -hedges_correction(test, control) return(cd * corr.factor) } - + reps <- dabest_obj$resamples - - main_results <- bootstrap(dabest_obj, - effect_size_func, - boot_labs = "Hedges' g", - reps = reps) - permtest_and_pvalues <- Pvalues_statistics(dabest_obj, - ef_size_fn = effect_size_func, - effect_size_type = effect_size_type, - perm_count = perm_count) - output <- c(main_results, list(effect_size_type = effect_size_type),permtest_and_pvalues) - + + main_results <- bootstrap(dabest_obj, + effect_size_func, + boot_labs = "Hedges' g", + reps = reps + ) + permtest_and_pvalues <- Pvalues_statistics(dabest_obj, + ef_size_fn = effect_size_func, + effect_size_type = effect_size_type, + perm_count = perm_count + ) + output <- c(main_results, list(effect_size_type = effect_size_type), permtest_and_pvalues) + class(output) <- c("dabest_effectsize") return(output) } #' @rdname effect_size -#' @export +#' @export cliffs_delta <- function(dabest_obj, perm_count = 5000) { effect_size_type <- "cliffs_delta" - if (class(dabest_obj)!="dabest") { + if (!methods::is(dabest_obj, "dabest")) { cli::cli_abort(c("{.field dabest_obj} must be a {.cls dabest} object."), - "x" = "Please supply a {.cls dabest} object.") + "x" = "Please supply a {.cls dabest} object." + ) } - + effect_size_func <- function(control, test, paired = NA) { return(effsize::cliff.delta(test, control)$estimate) } - + reps <- dabest_obj$resamples - - main_results <- bootstrap(dabest_obj, - effect_size_func, - boot_labs = "Cliffs' delta", - reps = reps) - permtest_and_pvalues <- Pvalues_statistics(dabest_obj, - ef_size_fn = effect_size_func, - effect_size_type = effect_size_type, - perm_count = perm_count) - output <- c(main_results, list(effect_size_type = effect_size_type),permtest_and_pvalues) - + + main_results <- bootstrap(dabest_obj, + effect_size_func, + boot_labs = "Cliffs' delta", + reps = reps + ) + permtest_and_pvalues <- Pvalues_statistics(dabest_obj, + ef_size_fn = effect_size_func, + effect_size_type = effect_size_type, + perm_count = perm_count + ) + output <- c(main_results, list(effect_size_type = effect_size_type), permtest_and_pvalues) + class(output) <- c("dabest_effectsize") return(output) } #' @rdname effect_size -#' @export -cohens_h <- function(dabest_obj, perm_count = 5000){ +#' @export +cohens_h <- function(dabest_obj, perm_count = 5000) { effect_size_type <- "cohens_h" - if (class(dabest_obj)!="dabest") { + if (!methods::is(dabest_obj, "dabest")) { cli::cli_abort(c("{.field dabest_obj} must be a {.cls dabest} object."), - "x" = "Please supply a {.cls dabest} object.") + "x" = "Please supply a {.cls dabest} object." + ) } - + effect_size_func <- function(control, test, paired) { - #remove nas and nulls later on + # remove nas and nulls later on prop_control <- mean(control) prop_test <- mean(test) - + # Arcsine transformation phi_control <- 2 * asin(sqrt(prop_control)) phi_test <- 2 * asin(sqrt(prop_test)) result <- phi_test - phi_control return(result) } - + reps <- dabest_obj$resamples - - main_results <- bootstrap(dabest_obj, - effect_size_func, - boot_labs = "Cohen's h", - reps = reps) - permtest_and_pvalues <- Pvalues_statistics(dabest_obj, - ef_size_fn = effect_size_func, - effect_size_type = effect_size_type, - perm_count = perm_count) - output <- c(main_results, list(effect_size_type = effect_size_type),permtest_and_pvalues) - + + main_results <- bootstrap(dabest_obj, + effect_size_func, + boot_labs = "Cohen's h", + reps = reps + ) + permtest_and_pvalues <- Pvalues_statistics(dabest_obj, + ef_size_fn = effect_size_func, + effect_size_type = effect_size_type, + perm_count = perm_count + ) + output <- c(main_results, list(effect_size_type = effect_size_type), permtest_and_pvalues) + class(output) <- c("dabest_effectsize") return(output) } hedges_correction <- function(x1, x2) { - n1 <- length(x1) n2 <- length(x2) - + deg.freedom <- n1 + n2 - 2 - numer <- gamma(deg.freedom/2) - denom0 <- gamma((deg.freedom - 1) / 2) - denom <- sqrt((deg.freedom / 2)) * denom0 - + numer <- gamma(deg.freedom / 2) + denom0 <- gamma((deg.freedom - 1) / 2) + denom <- sqrt((deg.freedom / 2)) * denom0 + if (is.infinite(numer) | is.infinite(denom)) { # Occurs when df is too large. # Applies Hedges and Olkin's approximation. df.sum <- n1 + n2 denom <- (4 * df.sum) - 9 out <- 1 - (3 / denom) - } else out <- numer / denom - + } else { + out <- numer / denom + } + return(out) } #' Print a `dabest_effectsize_obj` object -#' +#' #' @noRd #' #' @param dabest_effectsize_obj a list object created by `effect_size()` functions @@ -298,48 +319,51 @@ hedges_correction <- function(x1, x2) { #' #' @return A summary of the effect sizes and respective confidence intervals. #' -#' @examples +#' @examples #' # Loading in of the dataset #' data(twogroup_data) -#' +#' #' # Preparing the data to be plotted -#' dabest_obj <- load(twogroup_data, x = Group, y = Measurement, -#' idx = c("Control1", "Group1")) -#' +#' dabest_obj <- load(twogroup_data, +#' x = Group, y = Measurement, +#' idx = c("Control1", "Group1") +#' ) +#' #' dabest_obj.mean_diff <- mean_diff(dabest_obj) -#' +#' #' # Display the results in a user-friendly format. #' print(dabest_obj.mean_diff) -#' +#' #' @export print.dabest_effectsize <- function(x, ...) { - if (class(x) != "dabest_effectsize") { - cli::cli_abort(c("Only dabest_effectsize objects can be used.", - "x" = "Please enter a valid entry into the function.")) + if (!methods::is(x, "dabest_effectsize")) { + cli::cli_abort(c("Only dabest_effectsize objects can be used.", + "x" = "Please enter a valid entry into the function." + )) } - + dabest_effectsize_obj <- x print_greeting_header() - + paired <- dabest_effectsize_obj$paired ci <- dabest_effectsize_obj$ci - + if (is.null(paired)) { rm_status <- "" - } else if (paired =="sequential") { + } else if (paired == "sequential") { rm_status <- "for the sequential design of repeated-measures experiment \n" - } else if (paired=="baseline") { + } else if (paired == "baseline") { rm_status <- "for repeated measures against baseline \n" } - + if (is.null(paired)) { paired_status <- "E" - } else if (paired =="sequential") { + } else if (paired == "sequential") { paired_status <- "Paired e" - } else if (paired =="baseline") { + } else if (paired == "baseline") { paired_status <- "Paired e" } es <- dabest_effectsize_obj$effect_size_type - print_each_comparism_effectsize(dabest_effectsize_obj,es) + print_each_comparism_effectsize(dabest_effectsize_obj, es) print_ending(dabest_effectsize_obj) } diff --git a/R/001_plotter.R b/R/001_plotter.R index 1d221b8..8c18fe1 100644 --- a/R/001_plotter.R +++ b/R/001_plotter.R @@ -1,132 +1,144 @@ #' Producing an estimation plot -#' +#' #' @name dabest_plot -#' -#' @param dabest_effectsize_obj A dabest_effectsize_obj created by loading in a +#' +#' @param dabest_effectsize_obj A dabest_effectsize_obj created by loading in a #' dabest_obj along with other specified parameters with the [effect_size()] function. -#' @param float_contrast Default TRUE. If TRUE, a Gardner-Altman plot will be produced. +#' @param float_contrast Default TRUE. If TRUE, a Gardner-Altman plot will be produced. #' If FALSE, a Cumming estimation plot will be produced. -#' @param ... Adjustment parameters to control and adjust the appearance of the plot. +#' @param ... Adjustment parameters to control and adjust the appearance of the plot. #' (list of all possible adjustment parameters can be found under [plot_kwargs]) -#' +#' #' @description -#' Produces a Gardner-Altman estimation plot or a Cumming estimation plot depending -#' on whether float_contrast is TRUE. The plot presents all datapoints as a swarmplot, -#' which orders each point to display the underlying distribution. It also presents -#' the effect size as a bootstrap 95% confidence interval (95% CI) on a separate +#' Produces a Gardner-Altman estimation plot or a Cumming estimation plot depending +#' on whether float_contrast is TRUE. The plot presents all datapoints as a swarmplot, +#' which orders each point to display the underlying distribution. It also presents +#' the effect size as a bootstrap 95% confidence interval (95% CI) on a separate #' but aligned axes. -#' -#' @usage +#' +#' @usage #' dabest_plot(dabest_effectsize_obj, float_contrast = TRUE, ...) -#' +#' #' @examples #' # Loading of the dataset #' data(twogroup_data) -#' +#' #' # Preparing the data to be plotted -#' dabest_obj <- load(non_proportional_data, x = Group, y = Measurement, idx = c("Control 1", "Test 1")) +#' dabest_obj <- load(non_proportional_data, +#' x = Group, y = Measurement, +#' idx = c("Control 1", "Test 1") +#' ) #' dabest_obj.mean_diff <- mean_diff(dabest_obj) -#' -#' # Plotting of dabest_obj.mean_diff -#' dabest_plot(dabest_obj.mean_diff, TRUE) -#' +#' +#' # Plotting an estimation plot +#' dabest_plot(dabest_obj.mean_diff, TRUE) +#' #' @export dabest_plot <- function(dabest_effectsize_obj, float_contrast = TRUE, ...) { - - if (class(dabest_effectsize_obj)!="dabest_effectsize") { + if (!methods::is(dabest_effectsize_obj, "dabest_effectsize")) { cli::cli_abort(c("{.field dabest_effectsize_obj} must be a {.cls dabest_effectsize} object."), - "x" = "Please supply a {.cls dabest_effectsize} object.") + "x" = "Please supply a {.cls dabest_effectsize} object." + ) } - + plot_kwargs <- list(...) plot_kwargs <- assign_plot_kwargs(dabest_effectsize_obj, plot_kwargs) - + custom_palette <- plot_kwargs$custom_palette - + is_colour <- dabest_effectsize_obj$is_colour is_deltadelta <- plot_kwargs$show_delta2 is_mini_meta <- plot_kwargs$show_mini_meta idx <- dabest_effectsize_obj$idx raw_legend <- NULL - - if(length(unlist(idx)) >= 3) { + + if (length(unlist(idx)) >= 3) { float_contrast <- FALSE } - - if(isFALSE(float_contrast)) { - raw_plot <- plot_raw(dabest_effectsize_obj, float_contrast=FALSE, plot_kwargs) - delta_plot <- plot_delta(dabest_effectsize_obj, float_contrast=FALSE, plot_kwargs) - + + if (isFALSE(float_contrast)) { + raw_plot <- plot_raw(dabest_effectsize_obj, float_contrast = FALSE, plot_kwargs) + delta_plot <- plot_delta(dabest_effectsize_obj, float_contrast = FALSE, plot_kwargs) + delta_range <- delta_plot$delta_range delta_plot <- delta_plot$delta_plot - + raw_plot <- apply_palette(raw_plot, custom_palette) delta_plot <- apply_palette(delta_plot, custom_palette) - - raw_legend <- cowplot::get_legend(raw_plot + - ggplot2::guides(alpha = "none") + - ggplot2::theme(legend.box.margin = ggplot2::margin(0, 0, 0, 0))) - + + raw_legend <- cowplot::get_legend(raw_plot + + ggplot2::guides(alpha = "none") + + ggplot2::theme(legend.box.margin = ggplot2::margin(0, 0, 0, 0))) + plot_margin <- ggplot2::unit(c(0, 0, 0, 0), "pt") - - if(isTRUE(is_mini_meta)) { + + if (isTRUE(is_mini_meta)) { plot_margin <- ggplot2::unit(c(0, 5.5, 0, 0), "pt") } - + final_plot <- cowplot::plot_grid( - plotlist = list(raw_plot + ggplot2::theme(legend.position="none", - plot.margin = plot_margin), - delta_plot + ggplot2::theme(legend.position="none", - plot.margin = plot_margin)), - nrow = 2, - ncol = 1, - axis = "tblr", - align = "vh" + plotlist = list( + raw_plot + ggplot2::theme( + legend.position = "none", + plot.margin = plot_margin + ), + delta_plot + ggplot2::theme( + legend.position = "none", + plot.margin = plot_margin + ) + ), + nrow = 2, + ncol = 1, + axis = "tblr", + align = "vh" ) - - if(isTRUE(is_colour)) { + + if (isTRUE(is_colour)) { legend_plot <- cowplot::plot_grid( plotlist = list(raw_legend, NULL), nrow = 2, ncol = 1, rel_heights = c(0.1, 0.9) ) - + final_plot <- cowplot::plot_grid(final_plot, legend_plot, ncol = 2, nrow = 1, rel_widths = c(0.9, 0.1)) } - + return(final_plot) - } else { - #isTRUE(float_contrast) - raw_plot <- plot_raw(dabest_effectsize_obj, float_contrast=TRUE, plot_kwargs) - delta_plot <- plot_delta(dabest_effectsize_obj, float_contrast=TRUE, plot_kwargs) - + # isTRUE(float_contrast) + raw_plot <- plot_raw(dabest_effectsize_obj, float_contrast = TRUE, plot_kwargs) + delta_plot <- plot_delta(dabest_effectsize_obj, float_contrast = TRUE, plot_kwargs) + delta_plot_range <- delta_plot$delta_range delta_plot <- delta_plot$delta_plot - + raw_plot <- apply_palette(raw_plot, custom_palette) delta_plot <- apply_palette(delta_plot, custom_palette) - + final_plot <- cowplot::plot_grid( - plotlist = list(raw_plot + ggplot2::theme(legend.position="none"), - delta_plot + ggplot2::theme(legend.position="none")), - nrow = 1, - ncol = 2, + plotlist = list( + raw_plot + ggplot2::theme(legend.position = "none"), + delta_plot + ggplot2::theme(legend.position = "none") + ), + nrow = 1, + ncol = 2, rel_widths = c(0.75, 0.25), - axis = "lr", - align = "h" + axis = "lr", + align = "h" ) - - if(isTRUE(is_colour)) { - raw_legend <- cowplot::get_legend(raw_plot + - ggplot2::guides(color = ggplot2::guide_legend(nrow = 1), - alpha = "none") + - ggplot2::theme(legend.position = "bottom")) - + + if (isTRUE(is_colour)) { + raw_legend <- cowplot::get_legend(raw_plot + + ggplot2::guides( + color = ggplot2::guide_legend(nrow = 1), + alpha = "none" + ) + + ggplot2::theme(legend.position = "bottom")) + final_plot <- cowplot::plot_grid(final_plot, raw_legend, ncol = 1, rel_heights = c(0.9, 0.1)) } return(final_plot) } -} \ No newline at end of file +} diff --git a/R/002_df_for_plots.R b/R/002_df_for_plots.R index bb7b4f9..42a41c3 100644 --- a/R/002_df_for_plots.R +++ b/R/002_df_for_plots.R @@ -1,104 +1,145 @@ -# Helper functions that generate dfs for plot -# -# Contains functions `df_for_tufte`, `create_dfs_for_sankey`, `create_dfs_for_xaxis_redraw`, `create_dfs_for_proportion_bar` for generation of dfs. -# - -# Function for creation of df for tuftelines plot -create_df_for_tufte <- function(raw_data, enquo_x, enquo_y, proportional){ +#' Helper functions that generate dfs for tuftelines plot +#' +#' @noRd +#' +#' @param raw_data The tidy dataset passed to [load()] that was cleaned and altered for plotting. +#' @param enquo_x Quosure of x as initially passed to [load()]. +#' @param enquo_y Quosure of y as initially passed to [load()]. +#' @param proportional Boolean value as initially passed to [load()]. +#' @param float_contrast Default TRUE. If TRUE, a Gardner-Altman plot will be produced. +#' If FALSE, a Cumming estimation plot will be produced. +#' @param enquo_id_col Quosure of id_col as initially passed to [load()]. +#' @param idx The list of control-test groupings as initially passed to [load()]. +#' @param tufte_lines_df Dataset for the creation of tufte lines. +#' @param proportional_data List of calculations related to the plotting of proportion plots. +#' @param scale_factor_sig Decimal value ranging from 0 to 1 which determines the significance of the scale factor. +#' @param bar_width Numeric value determining the width of the bar in the sankey diagram or proportion plots. +#' @param gap Numeric value determining the space between the bars in the sankey diagram or proportion plots. +#' @param sankey Boolean value determining if the flows between the bar charts will be plotted. +#' @param flow Boolean value determining whether the bars will be plotted in pairs. +#' @param N The number of rows in the proportional dataset. +#' @param proportion_success The proportion or ratio of values that are 1. +#' @param boots Boot result obtained from boot.ci +#' @param x_idx_position Numeric value determining the position of the plots on the x-axis. +#' +#' @details +#' Contains functions `df_for_tufte`, `create_dfs_for_sankey`, `create_dfs_for_xaxis_redraw`, `create_dfs_for_proportion_bar` for generation of dfs. +#' +#' @return +#' Various dfs which is required for the plotting functions to obtain a complete dabest plot. +create_df_for_tufte <- function(raw_data, enquo_x, enquo_y, proportional, gap, effsize_type) { tufte_lines_df <- raw_data %>% dplyr::group_by(!!enquo_x) %>% - dplyr::summarize(mean = mean(!!enquo_y), - median = stats::median(!!enquo_y), - sd = stats::sd(!!enquo_y), - lower_quartile = stats::quantile(!!enquo_y)[2], - upper_quartile = stats::quantile(!!enquo_y)[4]) - - if(isTRUE(proportional)){ + dplyr::summarize( + mean = mean(!!enquo_y), + median = stats::median(!!enquo_y), + sd = stats::sd(!!enquo_y), + lower_quartile = stats::quantile(!!enquo_y)[2], + upper_quartile = stats::quantile(!!enquo_y)[4] + ) + + if (isTRUE(proportional)) { tufte_lines_df <- tufte_lines_df %>% - dplyr::mutate(sd = sd/7) + dplyr::mutate(sd = sd / 7) } tufte_lines_df <- tufte_lines_df %>% dplyr::mutate(lower_sd = mean - sd, upper_sd = mean + sd) - + + if (isTRUE(stringr::str_detect(effsize_type, "edian"))) { + tufte_lines_df <- tufte_lines_df %>% + dplyr::mutate(no_diff = (sd == 0)) %>% + dplyr::mutate( + y_top_start = dplyr::case_when(no_diff ~ NA, !no_diff ~ median + gap), + y_top_end = dplyr::case_when(no_diff ~ NA, !no_diff ~ upper_quartile), + y_bot_start = dplyr::case_when(no_diff ~ NA, !no_diff ~ median - gap), + y_bot_end = dplyr::case_when(no_diff ~ NA, !no_diff ~ lower_quartile) + ) + } else { + tufte_lines_df <- tufte_lines_df %>% + dplyr::mutate(no_diff = (sd == 0)) %>% + dplyr::mutate( + y_top_start = dplyr::case_when(no_diff ~ NA, !no_diff ~ mean + gap), + y_top_end = dplyr::case_when(no_diff ~ NA, !no_diff ~ upper_sd), + y_bot_start = dplyr::case_when(no_diff ~ NA, !no_diff ~ mean - gap), + y_bot_end = dplyr::case_when(no_diff ~ NA, !no_diff ~ lower_sd) + ) + } + return(tufte_lines_df) } # Function for creation of df for flow = FALSE plot create_dfs_for_nonflow_tufte_lines <- function(idx, tufte_lines_df, - enquo_x){ + enquo_x) { new_tufte_lines_df <- tibble::tibble() total_length <- length(unlist(idx)) temp_idx <- unlist(idx) - for (i in (1: total_length)){ + for (i in (1:total_length)) { group_name <- temp_idx[i] - row <- tufte_lines_df %>% + row <- tufte_lines_df %>% dplyr::filter(!!enquo_x == group_name) new_tufte_lines_df <- dplyr::bind_rows(new_tufte_lines_df, row) } - return (new_tufte_lines_df) + return(new_tufte_lines_df) } # Function for creation of df for sankey plot -create_dfs_for_sankey <- function( +create_dfs_for_sankey <- function( float_contrast = FALSE, raw_data, proportional_data, enquo_x, enquo_y, enquo_id_col, - x_axis_raw, idx, scale_factor_sig = 0.8, bar_width = 0.15, gap, sankey = TRUE, flow = TRUE, - N - ){ - - type <- ifelse(length(unlist(idx)) <= 2, - "single sankey", - "multiple sankeys") - - flow_success_to_failure = tibble::tibble() - flow_success_to_success = tibble::tibble() - flow_failure_to_success = tibble::tibble() - flow_failure_to_failure = tibble::tibble() - + N) { + type <- ifelse(length(unlist(idx)) <= 2, + "single sankey", + "multiple sankeys" + ) + + flow_success_to_failure <- tibble::tibble() + flow_success_to_success <- tibble::tibble() + flow_failure_to_success <- tibble::tibble() + flow_failure_to_failure <- tibble::tibble() + bar_width <- ifelse(float_contrast, 0.15, 0.03) - - if(type == "single sankey" && float_contrast){ + + if (type == "single sankey" && float_contrast) { scale_factor_sig <- 0.72 - } - else if(type == "multiple sankeys"){ + } else if (type == "multiple sankeys") { scale_factor_sig <- 0.92 - } - else{ + } else { scale_factor_sig <- 0.95 } x_padding <- ifelse(float_contrast, 0.008, 0.006) - + prop <- proportional_data ind <- 1 x_start <- 1 - + sankey_bars <- prop - - if (isFALSE(flow)){ + + if (isFALSE(flow)) { sankey_bars <- tibble::tibble() - - for (group in idx){ + + for (group in idx) { group_length <- length(group) - - for (i in 1: (group_length - 1)){ + + for (i in 1:(group_length - 1)) { ctrl <- group[i] - treat <- group[i+1] - temp_row_ctrl <- prop %>% + treat <- group[i + 1] + temp_row_ctrl <- prop %>% dplyr::group_by(!!enquo_x) %>% dplyr::filter(!!enquo_x == ctrl) - - temp_row_treat <- prop %>% + + temp_row_treat <- prop %>% dplyr::group_by(!!enquo_x) %>% dplyr::filter(!!enquo_x == treat) pair_rows <- rbind(temp_row_ctrl, temp_row_treat) @@ -106,139 +147,186 @@ create_dfs_for_sankey <- function( } } } - + means_c_t <- sankey_bars$proportion_success - if (isTRUE(sankey)){ + if (isTRUE(sankey)) { for (group in idx) { group_length <- length(group) - - for (i in 1: (group_length - 1)) { - #redraw_x_axis <- append(redraw_x_axis, x_start) + + for (i in 1:(group_length - 1)) { + # redraw_x_axis <- append(redraw_x_axis, x_start) success_success <- raw_data %>% dplyr::group_by(!!enquo_id_col) %>% - dplyr::summarise(success_change = - any(!!enquo_y == 1 & !!enquo_x == group[i]) & - any(!!enquo_y == 1 & - !!enquo_x == group[i + 1])) %>% + dplyr::summarise( + success_change = + any(!!enquo_y == 1 & !!enquo_x == group[i]) & + any(!!enquo_y == 1 & + !!enquo_x == group[i + 1]) + ) %>% dplyr::filter(success_change) %>% dplyr::summarise(SS = dplyr::n() / N) - + success_failure <- raw_data %>% dplyr::group_by(!!enquo_id_col) %>% - dplyr::summarise(sf_change = - any(!!enquo_y == 1 & !!enquo_x == group[i]) & - any(!!enquo_y == 0 & - !!enquo_x == group[i + 1])) %>% + dplyr::summarise( + sf_change = + any(!!enquo_y == 1 & !!enquo_x == group[i]) & + any(!!enquo_y == 0 & + !!enquo_x == group[i + 1]) + ) %>% dplyr::filter(sf_change) %>% dplyr::summarise(SF = dplyr::n() / N) - - failure_failire <- raw_data %>% + + failure_failure <- raw_data %>% dplyr::group_by(!!enquo_id_col) %>% - dplyr::summarise(failure_change = - any(!!enquo_y == 0 & !!enquo_x == group[i]) & - any(!!enquo_y == 0 & - !!enquo_x == group[i + 1])) %>% + dplyr::summarise( + failure_change = + any(!!enquo_y == 0 & !!enquo_x == group[i]) & + any(!!enquo_y == 0 & + !!enquo_x == group[i + 1]) + ) %>% dplyr::filter(failure_change) %>% dplyr::summarise(FF = dplyr::n() / N) - + failure_success <- raw_data %>% dplyr::group_by(!!enquo_id_col) %>% - dplyr::summarise(failure_change = - any(!!enquo_y == 0 & !!enquo_x == group[i]) & - any(!!enquo_y == 1 & - !!enquo_x == group[i + 1])) %>% + dplyr::summarise( + failure_change = + any(!!enquo_y == 0 & !!enquo_x == group[i]) & + any(!!enquo_y == 1 & + !!enquo_x == group[i + 1]) + ) %>% dplyr::filter(failure_change) %>% dplyr::summarise(FS = dplyr::n() / N) # find values for lower flow success to failure flow ss <- success_success$SS[1] - ff <- failure_failire$FF[1] + ff <- failure_failure$FF[1] sf <- success_failure$SF[1] fs <- failure_success$FS[1] - sf_start1 <- ss - sf_start2 <- means_c_t[ind] - gap/2 - sf_end1 <- means_c_t[ind + 1] + gap/2 - sf_end2 <- 1 - ff - - + sf_start1 <- ss + sf_start2 <- means_c_t[ind] - gap / 2 + sf_end1 <- means_c_t[ind + 1] + gap / 2 + sf_end2 <- 1 - ff + + # find values for upper flppied flow success to failure flow fs_start1 <- 1 - ff - fs_start2 <- means_c_t[ind] + gap/2 - fs_end1 <- means_c_t[ind + 1] - gap/2 + fs_start2 <- means_c_t[ind] + gap / 2 + fs_end1 <- means_c_t[ind + 1] - gap / 2 fs_end2 <- ss - + # form dataframes from sigmoid / flippedSig functions and the rectangles, later fit into sankeyflow - sig_success_failure_bot <- sigmoid(x_start + bar_width - x_padding, - scale_factor_sig, - sf_start1 - 0.002, - sf_end1 + 0.002) - sig_success_failure_top <- sigmoid(x_start + bar_width - x_padding, - scale_factor_sig, - sf_start2 - 0.002, - sf_end2 + 0.002) + sig_success_failure_bot <- sigmoid( + x_start + bar_width - x_padding, + scale_factor_sig, + sf_start1 - 0.002, + sf_end1 + 0.002 + ) + sig_success_failure_top <- sigmoid( + x_start + bar_width - x_padding, + scale_factor_sig, + sf_start2 - 0.002, + sf_end2 + 0.002 + ) sig_success_failure_bot <- dplyr::arrange(sig_success_failure_bot, dplyr::desc(x)) - sig_failure_success_top <- flipped_sig(x_start + bar_width - x_padding, - scale_factor_sig, - fs_start1 + 0.002, - fs_end1 - 0.002) - sig_failure_success_bot <- flipped_sig(x_start + bar_width - x_padding, - scale_factor_sig, - fs_start2 + 0.002, - fs_end2 - 0.002) + sig_failure_success_top <- flipped_sig( + x_start + bar_width - x_padding, + scale_factor_sig, + fs_start1 + 0.002, + fs_end1 - 0.002 + ) + sig_failure_success_bot <- flipped_sig( + x_start + bar_width - x_padding, + scale_factor_sig, + fs_start2 + 0.002, + fs_end2 - 0.002 + ) sig_failure_success_bot <- dplyr::arrange(sig_failure_success_bot, dplyr::desc(x)) - - #number of points of data points + + + # For datasets with purely 1s or 0s + if (sf == 0) { + sig_success_failure_top <- data.frame(x = NaN, y = NaN) + sig_success_failure_bot <- sig_success_failure_top + } + if (fs == 0) { + sig_failure_success_top <- data.frame(x = NaN, y = NaN) + sig_failure_success_bot <- sig_failure_success_top + } + + # number of points of data points N_points <- length(sig_success_failure_bot) + # generate the tag column for all of these tag <- rep(ind, N_points) - sankey_success_failure <- rbind(sig_success_failure_top, - sig_success_failure_bot) + sankey_success_failure <- rbind( + sig_success_failure_top, + sig_success_failure_bot + ) sankey_success_failure <- cbind(sankey_success_failure, tag) - - sankey_failure_success <- rbind(sig_failure_success_top, - sig_failure_success_bot) + + sankey_failure_success <- rbind( + sig_failure_success_top, + sig_failure_success_bot + ) sankey_failure_success <- cbind(sankey_failure_success, tag) - + rect_flow_x <- c(x_start, x_start + 1) - - sankey_failure_failure <- data.frame(x = c(rect_flow_x, rev(rect_flow_x)), - y = c(1, 1, rep(fs_start1, 2)), - tag = c(rep(ind, 4))) - sankey_success_success <- data.frame(x = c(rect_flow_x, rev(rect_flow_x)), - y = c(rep(ss, 2), 0, 0), - tag = c(rep(ind, 4))) - + + sankey_failure_failure <- data.frame( + x = c(rect_flow_x, rev(rect_flow_x)), + y = c(1, 1, rep(fs_start1, 2)), + tag = c(rep(ind, 4)) + ) + sankey_success_success <- data.frame( + x = c(rect_flow_x, rev(rect_flow_x)), + y = c(rep(ss, 2), 0, 0), + tag = c(rep(ind, 4)) + ) + x_start <- x_start + 1 - + ind <- ind + 1 - #` update the 4 sankey flow dfs for plotting - flow_success_to_failure <- dplyr::bind_rows(flow_success_to_failure, - sankey_success_failure) - flow_success_to_success <- dplyr::bind_rows(flow_success_to_success, - sankey_success_success) - flow_failure_to_success <- dplyr::bind_rows(flow_failure_to_success, - sankey_failure_success) - flow_failure_to_failure <- dplyr::bind_rows(flow_failure_to_failure, - sankey_failure_failure) + + # update the 4 sankey flow dfs for plotting + flow_success_to_failure <- dplyr::bind_rows( + flow_success_to_failure, + sankey_success_failure + ) + flow_success_to_success <- dplyr::bind_rows( + flow_success_to_success, + sankey_success_success + ) + flow_failure_to_success <- dplyr::bind_rows( + flow_failure_to_success, + sankey_failure_success + ) + flow_failure_to_failure <- dplyr::bind_rows( + flow_failure_to_failure, + sankey_failure_failure + ) } - + x_start <- x_start + 1 ind <- ind + 1 } } else { - flow_success_to_failure = data.frame(x = NaN, y = NaN, tag = NaN) - flow_failure_to_success = data.frame(x = NaN, y = NaN, tag = NaN) - flow_success_to_success = data.frame(x = NaN, y = NaN, tag = NaN) - flow_failure_to_failure = data.frame(x = NaN, y = NaN, tag = NaN) + flow_success_to_failure <- data.frame(x = NaN, y = NaN, tag = NaN) + flow_failure_to_success <- data.frame(x = NaN, y = NaN, tag = NaN) + flow_success_to_success <- data.frame(x = NaN, y = NaN, tag = NaN) + flow_failure_to_failure <- data.frame(x = NaN, y = NaN, tag = NaN) } - - redraw_x_axis <- c(1 : length(unlist(idx))) - dfs_for_sankeys <- list(flow_success_to_failure = flow_success_to_failure, - flow_failure_to_success = flow_failure_to_success, - flow_success_to_success = flow_success_to_success, - flow_failure_to_failure = flow_failure_to_failure, - sankey_bars = sankey_bars, - redraw_x_axis = redraw_x_axis) - + + redraw_x_axis <- c(1:length(unlist(idx))) + dfs_for_sankeys <- list( + flow_success_to_failure = flow_success_to_failure, + flow_failure_to_success = flow_failure_to_success, + flow_success_to_success = flow_success_to_success, + flow_failure_to_failure = flow_failure_to_failure, + sankey_bars = sankey_bars, + redraw_x_axis = redraw_x_axis + ) + return(dfs_for_sankeys) } @@ -248,14 +336,14 @@ create_dfs_for_xaxis_redraw <- function(idx) { xaxis_line_x_vector <- c() xaxis_line_xend_vector <- c() xaxis_ticks_x_vector <- c() - + for (j in 1:length(idx)) { # calculate xaxis line x coords x_coord <- x_axis_pointer + 1 xaxis_line_x_vector <- append(xaxis_line_x_vector, x_coord) xend_coord <- x_axis_pointer + length(idx[[j]]) xaxis_line_xend_vector <- append(xaxis_line_xend_vector, xend_coord) - + # calculate xaxis ticks x coords for (k in 1:length(idx[[j]])) { x_coord <- x_axis_pointer + k @@ -263,13 +351,15 @@ create_dfs_for_xaxis_redraw <- function(idx) { } x_axis_pointer <- x_axis_pointer + length(idx[[j]]) } - + dfs_for_xaxis_redraw <- list( - df_for_line = data.frame(x = xaxis_line_x_vector, - xend = xaxis_line_xend_vector), + df_for_line = data.frame( + x = xaxis_line_x_vector, + xend = xaxis_line_xend_vector + ), df_for_ticks = data.frame(x = xaxis_ticks_x_vector) ) - + return(dfs_for_xaxis_redraw) } @@ -281,13 +371,25 @@ create_dfs_for_proportion_bar <- function(proportion_success, bar_width = 0.3, g y_success = NA, tag = NA ) - + for (x in 1:length(proportion_success)) { y <- proportion_success[x] - - x_failure_success <- c(x-bar_width/2, x+bar_width/2, x+bar_width/2, x-bar_width/2) - y_success <- c(y-gap/2, y-gap/2, 0, 0) - y_failure <- c(1, 1, y+gap/2, y+gap/2) + + x_failure_success <- c(x - bar_width / 2, x + bar_width / 2, x + bar_width / 2, x - bar_width / 2) + y_success <- c(y - gap / 2, y - gap / 2, 0, 0) + y_failure <- c(1, 1, y + gap / 2, y + gap / 2) + + # For treatment groups with all 1s/all 0s + if (y == 0) { + y_failure <- c(1, 1, 0, 0) + y_success <- c(0, 0, 0, 0) + } + + if (y == 1) { + y_success <- c(1, 1, 0, 0) + y_failure <- c(0, 0, 0, 0) + } + temp_df_proportion_bar <- data.frame( x_failure = x_failure_success, y_failure = y_failure, @@ -295,11 +397,11 @@ create_dfs_for_proportion_bar <- function(proportion_success, bar_width = 0.3, g y_success = y_success, tag = rep(toString(x), 4) ) - + df_for_proportion_bar <- rbind(df_for_proportion_bar, temp_df_proportion_bar) } df_for_proportion_bar <- df_for_proportion_bar %>% stats::na.omit() - + return(df_for_proportion_bar) } @@ -311,33 +413,35 @@ create_dfs_for_baseline_ec_violin <- function(boots, x_idx_position, float_contr ) x_axis_scalar <- ifelse(flow, 0, 0.5) curr_boot_idx <- 1 - + for (i in x_idx_position) { ci_coords <- stats::density(boots[[curr_boot_idx]]) - + x_coords_ci <- ci_coords$x y_coords_ci <- ci_coords$y - + # Standardise y - y_coords_ci <- (y_coords_ci - min(y_coords_ci))/(max(y_coords_ci) - min(y_coords_ci)) - y_coords_ci <- y_coords_ci/6 - + y_coords_ci <- (y_coords_ci - min(y_coords_ci)) / (max(y_coords_ci) - min(y_coords_ci)) + y_coords_ci <- y_coords_ci / 6 + if (isFALSE(float_contrast)) { - y_coords_ci <- y_coords_ci/1.5 + y_coords_ci <- y_coords_ci / 1.5 } - + y_coords_ci <- y_coords_ci + i - x_axis_scalar - - temp_df_violin <- data.frame(x = x_coords_ci, - y = y_coords_ci, - tag = rep(toString(i), 512)) - + + temp_df_violin <- data.frame( + x = x_coords_ci, + y = y_coords_ci, + tag = rep(toString(i), 512) + ) + df_for_violin <- rbind(df_for_violin, temp_df_violin) - + curr_boot_idx <- curr_boot_idx + 1 } df_for_violin <- df_for_violin %>% - dplyr::arrange(tag, x , y) - + dplyr::arrange(tag, x, y) + return(df_for_violin) -} \ No newline at end of file +} diff --git a/R/002_plot_api.R b/R/002_plot_api.R index 5690fb1..2c7a684 100644 --- a/R/002_plot_api.R +++ b/R/002_plot_api.R @@ -1,50 +1,71 @@ -# Contains functions responsible for generation of raw_plot and delta_plot. -# -# Contains main plotting functions `plot_raw` and `plot_delta` for plotting of the rawdata and effectsize parts. - -# Raw plot function +#' Functions responsible for generation of raw_plot and delta_plot. +#' +#' @noRd +#' +#' @param dabest_effectsize_obj A dabest_effectsize_obj created by loading in a +#' dabest_obj along with other specified parameters with the [effect_size()] function. +#' @param float_contrast Boolean value determining if a Gardner-Altman plot or Cumming estimation plot will be produced. +#' @param plot_kwargs Adjustment parameters to control and adjust the appearance of the plot. +#' (list of all possible adjustment parameters can be found under [plot_kwargs]) +#' +#' @details +#' Contains main plotting functions `plot_raw` and `plot_delta` for plotting of the rawdata and effectsize parts. +#' +#' @examples +#' # Loading of the dataset +#' data(twogroup_data) +#' +#' # Preparing the data to be plotted +#' dabest_obj <- load(non_proportional_data, x = Group, y = Measurement, idx = c("Control 1", "Test 1")) +#' dabest_obj.mean_diff <- mean_diff(dabest_obj) +#' +#' # Plotting of dabest_obj.mean_diff (rawplot only) +#' plot_raw(dabest_obj.mean_diff, TRUE) +#' +#' # Plotting of dabest_obj.mean_diff (deltaplot only) +#' plot_delta(dabest_obj.mean_diff, True) plot_raw <- function(dabest_effectsize_obj, float_contrast, plot_kwargs) { enquo_x <- dabest_effectsize_obj$enquo_x enquo_y <- dabest_effectsize_obj$enquo_y enquo_id_col <- dabest_effectsize_obj$enquo_id_col enquo_colour <- dabest_effectsize_obj$enquo_colour proportional <- dabest_effectsize_obj$proportional - + proportional_data <- dabest_effectsize_obj$proportional_data proportion_success <- proportional_data$proportion_success - + raw_y_labels <- plot_kwargs$swarm_label minimeta <- plot_kwargs$show_mini_meta delta2 <- plot_kwargs$show_delta2 - + raw_data <- dabest_effectsize_obj$raw_data Ns <- dabest_effectsize_obj$Ns raw_y_range_vector <- dabest_effectsize_obj$ylim - + test_summary <- dabest_effectsize_obj$test_summary control_summary <- dabest_effectsize_obj$control_summary is_paired <- dabest_effectsize_obj$is_paired is_colour <- dabest_effectsize_obj$is_colour - + paired <- dabest_effectsize_obj$paired - + idx <- dabest_effectsize_obj$idx separated_idx <- idx - raw_x_max = length(unlist(idx)) + raw_x_max <- length(unlist(idx)) x_axis_raw <- c(seq(1, raw_x_max, 1)) - + # Extend x_axis if minimeta/deltadelta is being plotted. - if(isTRUE(minimeta) || isTRUE(delta2)) { + if (isTRUE(minimeta) || isTRUE(delta2)) { raw_x_max <- raw_x_max + 2 } - + effsize_type <- dabest_effectsize_obj$delta_y_labels - + # Check if multiplot. - if(length(unlist(idx)) >= 3) { - float_contrast = FALSE + if (length(unlist(idx)) >= 3) { + float_contrast <- FALSE } - + #### Load in sizes of plot elements #### raw_marker_size <- plot_kwargs$raw_marker_size raw_marker_alpha <- plot_kwargs$raw_marker_alpha @@ -56,331 +77,389 @@ plot_raw <- function(dabest_effectsize_obj, float_contrast, plot_kwargs) { es_line_size <- plot_kwargs$es_line_size sankey <- plot_kwargs$sankey flow <- plot_kwargs$flow - sankey_alpha <- plot_kwargs$sankey_alpha + raw_flow_alpha <- plot_kwargs$raw_flow_alpha swarm_x_text <- plot_kwargs$swarm_x_text swarm_y_text <- plot_kwargs$swarm_y_text asymmetric_side <- plot_kwargs$asymmetric_side - asymmetric_side <- ifelse(asymmetric_side=="right", -1, 1) - + asymmetric_side <- ifelse(asymmetric_side == "right", -1, 1) + #### Rawplot Building #### plot_components <- create_rawplot_components(proportional, is_paired, float_contrast) main_plot_type <- plot_components$main_plot_type is_summary_lines <- plot_components$is_summary_lines is_tufte_lines <- plot_components$is_tufte_lines - + ## Creation of dfs for specific main_plot_types ## - if(main_plot_type == "sankey") { - if(isFALSE(flow)) { + if (main_plot_type == "sankey") { + if (isFALSE(flow)) { separated_idx <- separate_idx(idx, paired) - raw_x_max = length(unlist(separated_idx)) - x_axis_raw <- c(seq(2, raw_x_max, 2)) - 0.5 + raw_x_max <- length(unlist(separated_idx)) + x_axis_raw <- c(seq(2, raw_x_max, 2)) - 0.5 is_tufte_lines <- TRUE } sankey_bar_gap <- 0.025 - sankey_df <- create_dfs_for_sankey(float_contrast = float_contrast, - raw_data = raw_data, - proportional_data = proportional_data, - enquo_x = enquo_x, - enquo_y = enquo_y, - enquo_id_col = enquo_id_col, - gap = sankey_bar_gap, - sankey = sankey, - idx = separated_idx, - flow = flow, - N = Ns$n[1]) + sankey_df <- create_dfs_for_sankey( + float_contrast = float_contrast, + raw_data = raw_data, + proportional_data = proportional_data, + enquo_x = enquo_x, + enquo_y = enquo_y, + enquo_id_col = enquo_id_col, + gap = sankey_bar_gap, + sankey = sankey, + idx = separated_idx, + flow = flow, + N = Ns$n[1] + ) flow_success_to_failure <- sankey_df$flow_success_to_failure flow_failure_to_success <- sankey_df$flow_failure_to_success flow_success_to_success <- sankey_df$flow_success_to_success flow_failure_to_failure <- sankey_df$flow_failure_to_failure sankey_bars <- sankey_df$sankey_bars - sankey_bars <- create_dfs_for_proportion_bar(sankey_bars$proportion_success, - bar_width = raw_bar_width, - gap = sankey_bar_gap) + sankey_bars <- create_dfs_for_proportion_bar(sankey_bars$proportion_success, + bar_width = raw_bar_width, + gap = sankey_bar_gap + ) } - - if(main_plot_type == "unpaired proportions") { - if(isTRUE(float_contrast)) { + + if (main_plot_type == "unpaired proportions") { + if (isTRUE(float_contrast)) { raw_y_max <- 1 raw_y_min <- 0 } df_for_proportion_bar <- create_dfs_for_proportion_bar(proportion_success, bar_width = raw_bar_width) } - + ## Adjustment of labels ## - if(ggplot2::as_label(enquo_colour) == "NULL" && main_plot_type != "slope") { + if (ggplot2::as_label(enquo_colour) == "NULL" && main_plot_type != "slope") { enquo_colour <- enquo_x } - + #### Initialise raw_plot & Add main_plot_type component #### - raw_plot <- switch( - main_plot_type, - + raw_plot <- switch(main_plot_type, "swarmplot" = ggplot2::ggplot() + - ggbeeswarm::geom_beeswarm(data = raw_data, - ggplot2::aes(x = x_axis_raw + asymmetric_side*raw_marker_side_shift, - y = !!enquo_y, - colour = !!enquo_colour), - cex = raw_marker_spread, - method = "swarm", - side = -asymmetric_side*1L, - size = raw_marker_size, - alpha = raw_marker_alpha, - corral = "wrap", - corral.width = 0.35 + raw_marker_spread), - - "slope" = + ggbeeswarm::geom_beeswarm( + data = raw_data, + ggplot2::aes( + x = x_axis_raw + asymmetric_side * raw_marker_side_shift, + y = !!enquo_y, + colour = !!enquo_colour + ), + cex = raw_marker_spread, + method = "swarm", + side = -asymmetric_side * 1L, + size = raw_marker_size, + alpha = raw_marker_alpha, + corral = "wrap", + corral.width = 0.35 + raw_marker_spread + ), + "slope" = plot_slopegraph(dabest_effectsize_obj, plot_kwargs), - - "unpaired proportions" = + "unpaired proportions" = ggplot2::ggplot() + - # failure bar - geom_proportionbar(data = df_for_proportion_bar, - ggplot2::aes(x = x_failure, y = y_failure, colour = tag)) + - # success bar - geom_proportionbar(data = df_for_proportion_bar, - ggplot2::aes(x = x_success, y = y_success, colour = tag, fill = tag)), - + # failure bar + geom_proportionbar( + data = df_for_proportion_bar, + ggplot2::aes(x = x_failure, y = y_failure, colour = tag) + ) + + # success bar + geom_proportionbar( + data = df_for_proportion_bar, + ggplot2::aes(x = x_success, y = y_success, colour = tag, fill = tag) + ), "sankey" = ggplot2::ggplot() + - geom_sankeyflow(data = flow_success_to_failure, na.rm = TRUE, - ggplot2::aes(x = x, y = y, fillcol = "#db6159", group = tag), - alpha = sankey_alpha) + - geom_sankeyflow(data = flow_failure_to_success, na.rm = TRUE, - ggplot2::aes(x = x, y = y, fillcol = "#818181", group = tag), - alpha = sankey_alpha) + - geom_sankeyflow(data = flow_success_to_success, na.rm = TRUE, - ggplot2::aes(x = x, y = y, fillcol = "#db6159", group = tag), - alpha = sankey_alpha) + - geom_sankeyflow(data = flow_failure_to_failure, na.rm = TRUE, - ggplot2::aes(x = x, y = y, fillcol = "#818181", group = tag), - alpha = sankey_alpha) + - geom_proportionbar(data = sankey_bars, - ggplot2::aes(x = x_failure, y = y_failure, group = tag, colour = NULL), fill = "#818181", - alpha = raw_marker_alpha) + - geom_proportionbar(data = sankey_bars, - ggplot2::aes(x = x_success, y = y_success, group = tag, colour = NULL), fill = "#db6159", - alpha = raw_marker_alpha) - + geom_sankeyflow( + data = flow_success_to_failure, na.rm = TRUE, + ggplot2::aes(x = x, y = y, group = tag, colour = NULL), + fill = "#db6159", alpha = raw_flow_alpha + ) + + geom_sankeyflow( + data = flow_failure_to_success, na.rm = TRUE, + ggplot2::aes(x = x, y = y, group = tag, colour = NULL), + fill = "#818181", alpha = raw_flow_alpha + ) + + geom_sankeyflow( + data = flow_success_to_success, na.rm = TRUE, + ggplot2::aes(x = x, y = y, group = tag, colour = NULL), + fill = "#db6159", alpha = raw_flow_alpha + ) + + geom_sankeyflow( + data = flow_failure_to_failure, na.rm = TRUE, + ggplot2::aes(x = x, y = y, group = tag, colour = NULL), + fill = "#818181", alpha = raw_flow_alpha + ) + + geom_proportionbar( + data = sankey_bars, + ggplot2::aes(x = x_failure, y = y_failure, group = tag, colour = NULL), + fill = "#818181", alpha = raw_marker_alpha + ) + + geom_proportionbar( + data = sankey_bars, + ggplot2::aes(x = x_success, y = y_success, group = tag, colour = NULL), + fill = "#db6159", alpha = raw_marker_alpha + ) ) - + #### Add scaling Component #### raw_x_labels <- Ns$swarmticklabs - if(main_plot_type == "sankey" && isFALSE(flow)) { + if (main_plot_type == "sankey" && isFALSE(flow)) { raw_x_labels <- create_xlabs_for_sankey(idx, Ns, enquo_x) } raw_ylim <- plot_kwargs$swarm_ylim - raw_ylim <- if (is.null(raw_ylim)){raw_y_range_vector} else {raw_ylim} - + raw_ylim <- if (is.null(raw_ylim)) { + raw_y_range_vector + } else { + raw_ylim + } + raw_y_max <- raw_ylim[2] raw_y_min <- raw_ylim[1] - if(isFALSE(float_contrast) && isFALSE(proportional)) { - raw_y_min <- raw_y_min - (raw_y_max - raw_y_min)/15 + if (isFALSE(float_contrast) && isFALSE(proportional)) { + raw_y_min <- raw_y_min - (raw_y_max - raw_y_min) / 15 } raw_y_mean <- raw_y_max - raw_y_min - + raw_x_min <- ifelse(float_contrast, 0.6, 0.6) raw_x_scalar <- ifelse(float_contrast, 0.5, 0.3) - + raw_plot <- raw_plot + ggplot2::theme_classic() + - ggplot2::coord_cartesian(ylim = c(raw_y_min, raw_y_max), - xlim = c(raw_x_min, raw_x_max+raw_x_scalar), - expand = FALSE, - clip = "off") + - ggplot2::scale_x_continuous(breaks = c(x_axis_raw), - labels = raw_x_labels) - + ggplot2::coord_cartesian( + ylim = c(raw_y_min, raw_y_max), + xlim = c(raw_x_min, raw_x_max + raw_x_scalar), + expand = FALSE, + clip = "off" + ) + + ggplot2::scale_x_continuous( + breaks = c(x_axis_raw), + labels = raw_x_labels + ) + #### Add summary_lines component #### - if(isTRUE(is_summary_lines)) { + if (isTRUE(is_summary_lines)) { raw_plot <- raw_plot + - ggplot2::geom_segment(colour = "black",linewidth = 0.3, - ggplot2::aes(x = 1, - xend = raw_x_max+raw_x_scalar, - y = control_summary, - yend = control_summary)) + - ggplot2::geom_segment(colour = "black", linewidth = 0.3, - ggplot2::aes(x = 2, - xend = raw_x_max+raw_x_scalar, - y = test_summary, - yend = test_summary)) + ggplot2::geom_segment( + colour = "black", linewidth = 0.3, + ggplot2::aes( + x = 1, + xend = raw_x_max + raw_x_scalar, + y = control_summary, + yend = control_summary + ) + ) + + ggplot2::geom_segment( + colour = "black", linewidth = 0.3, + ggplot2::aes( + x = 2, + xend = raw_x_max + raw_x_scalar, + y = test_summary, + yend = test_summary + ) + ) } - - #### Add tufte_lines component #### - if(isTRUE(is_tufte_lines)) { - tufte_lines_df <- create_df_for_tufte(raw_data, enquo_x, enquo_y, proportional) - if(main_plot_type == "sankey"){ + + #### Add tufte lines component #### + if (isTRUE(is_tufte_lines)) { + if (main_plot_type == "sankey") { tufte_gap_value <- sankey_bar_gap - if(isFALSE(flow)){ - tufte_lines_df <- create_dfs_for_nonflow_tufte_lines(tufte_lines_df, - idx = separated_idx, - enquo_x =enquo_x) + tufte_lines_df <- create_df_for_tufte(raw_data, enquo_x, enquo_y, proportional, tufte_gap_value, effsize_type) + if (isFALSE(flow)) { + tufte_lines_df <- create_dfs_for_nonflow_tufte_lines(tufte_lines_df, + idx = separated_idx, + enquo_x = enquo_x + ) } } else { - tufte_gap_value <- ifelse(proportional, min(tufte_lines_df$mean)/20, min(tufte_lines_df$mean)/50) - tufte_gap_value <- ifelse(float_contrast, tufte_gap_value, tufte_gap_value) + # if (main_plot_type != "sankey") + tufte_lines_df <- create_df_for_tufte(raw_data, enquo_x, enquo_y, proportional, 0, effsize_type) + tufte_gap_value <- ifelse(proportional, min(tufte_lines_df$mean) / 20, min(tufte_lines_df$mean) / 50) + tufte_lines_df <- create_df_for_tufte(raw_data, enquo_x, enquo_y, proportional, tufte_gap_value, effsize_type) } + + ## Adjusting side shifting of tufte lines tufte_side_adjust_value <- ifelse(proportional, 0, 0.05) - row_num <- max(x_axis_raw) - row_ref <- c(seq(1, row_num, 1)) + asymmetric_side*tufte_side_adjust_value + asymmetric_side*raw_marker_side_shift - if (isFALSE(flow)){ - row_ref <- c(seq(1, raw_x_max, 1)) + asymmetric_side*tufte_side_adjust_value + asymmetric_side*raw_marker_side_shift + row_ref <- c(seq(1, row_num, 1)) + asymmetric_side * tufte_side_adjust_value + asymmetric_side * raw_marker_side_shift + if (isFALSE(flow)) { + row_ref <- c(seq(1, raw_x_max, 1)) + asymmetric_side * tufte_side_adjust_value + asymmetric_side * raw_marker_side_shift } - - y_top_t <-list(y = tufte_lines_df$mean + tufte_gap_value, - yend = tufte_lines_df$upper_sd) - y_bot_t <-list(y = tufte_lines_df$mean - tufte_gap_value, - yend = tufte_lines_df$lower_sd) - if (isTRUE(stringr::str_detect(effsize_type, "edian"))) { - y_top_t <-list(y = tufte_lines_df$median + tufte_gap_value, - yend = tufte_lines_df$upper_quartile) - y_bot_t <-list(y = tufte_lines_df$mean - tufte_gap_value, - yend = tufte_lines_df$lower_quartile) - } - + # to change: temporary fix for tufte lines black for proportional graphs - if(isTRUE(proportional) | isTRUE(is_colour)) { + if (isTRUE(proportional) | isTRUE(is_colour)) { raw_plot <- raw_plot + - ggplot2::geom_segment(data = tufte_lines_df, - linewidth = tufte_size, - colour = "black", - ggplot2::aes(x = row_ref, - xend = row_ref, - y = y_bot_t$y, - yend = y_bot_t$yend, - colour = !!enquo_x), - lineend = "square") + - ggplot2::geom_segment(data = tufte_lines_df, - linewidth = tufte_size, - colour = "black", - ggplot2::aes(x = row_ref, - xend = row_ref, - y = y_top_t$y, - yend = y_top_t$yend, - colour = !!enquo_x), - lineend = "square") + ggplot2::geom_segment( + data = tufte_lines_df, + na.rm = TRUE, + linewidth = tufte_size, + colour = "black", + ggplot2::aes( + x = row_ref, + xend = row_ref, + y = y_top_start, + yend = y_top_end, + colour = !!enquo_x + ), + lineend = "square" + ) + + ggplot2::geom_segment( + data = tufte_lines_df, + na.rm = TRUE, + linewidth = tufte_size, + colour = "black", + ggplot2::aes( + x = row_ref, + xend = row_ref, + y = y_bot_start, + yend = y_bot_end, + colour = !!enquo_x + ), + lineend = "square" + ) } else { raw_plot <- raw_plot + - ggplot2::geom_segment(data = tufte_lines_df, linewidth = tufte_size, - ggplot2::aes(x = row_ref, - xend = row_ref, - y = y_bot_t$y, - yend = y_bot_t$yend, - colour = !!enquo_x), - lineend = "square") + - ggplot2::geom_segment(data = tufte_lines_df, linewidth = tufte_size, - ggplot2::aes(x = row_ref, - xend = row_ref, - y = y_top_t$y, - yend = y_top_t$yend, - colour = !!enquo_x), - lineend = "square") + ggplot2::geom_segment( + data = tufte_lines_df, + linewidth = tufte_size, + ggplot2::aes( + x = row_ref, + xend = row_ref, + y = y_top_start, + yend = y_top_end, + colour = !!enquo_x + ), + lineend = "square" + ) + + ggplot2::geom_segment( + data = tufte_lines_df, + linewidth = tufte_size, + ggplot2::aes( + x = row_ref, + xend = row_ref, + y = y_bot_start, + yend = y_bot_end, + colour = !!enquo_x + ), + lineend = "square" + ) } } - + #### Remove x-axis and redraw x_axis component #### - if(isTRUE(float_contrast)) { + if (isTRUE(float_contrast)) { raw_plot <- raw_plot + float_contrast_theme + - ggplot2::geom_segment(linewidth = 0.4, - color = "black", - ggplot2::aes(x = raw_x_min, xend = raw_x_max+0.2, y = raw_y_min, yend = raw_y_min)) - + ggplot2::geom_segment( + linewidth = 0.4, + color = "black", + ggplot2::aes(x = raw_x_min, xend = raw_x_max + 0.2, y = raw_y_min, yend = raw_y_min) + ) } else { # Obtain dfs for xaxis redraw - if(main_plot_type == "sankey" && isFALSE(flow)) { + if (main_plot_type == "sankey" && isFALSE(flow)) { idx_for_xaxis_redraw <- remove_last_ele_from_nested_list(idx) dfs_for_xaxis_redraw <- create_dfs_for_xaxis_redraw(idx_for_xaxis_redraw) df_for_line <- dfs_for_xaxis_redraw$df_for_line df_for_ticks <- dfs_for_xaxis_redraw$df_for_ticks - + df_for_line <- df_for_line %>% - dplyr::mutate(x = x + 0.5 + (x-1), - xend = xend + 0.5 + (xend-1)) - + dplyr::mutate( + x = x + 0.5 + (x - 1), + xend = xend + 0.5 + (xend - 1) + ) + df_for_ticks <- df_for_ticks %>% - dplyr::mutate(x = x + 0.5 + (x-1)) - + dplyr::mutate(x = x + 0.5 + (x - 1)) } else { idx_for_xaxis_redraw <- idx dfs_for_xaxis_redraw <- create_dfs_for_xaxis_redraw(idx_for_xaxis_redraw) df_for_line <- dfs_for_xaxis_redraw$df_for_line df_for_ticks <- dfs_for_xaxis_redraw$df_for_ticks } - + raw_plot <- raw_plot + non_float_contrast_theme + # Redraw xaxis line - ggplot2::geom_segment(data = df_for_line, - linewidth = 0.5, - lineend = "square", - color = "black", - ggplot2::aes(x = x, - xend = xend, - y = raw_y_min + raw_y_mean/40, - yend = raw_y_min + raw_y_mean/40)) + + ggplot2::geom_segment( + data = df_for_line, + linewidth = 0.5, + lineend = "square", + color = "black", + ggplot2::aes( + x = x, + xend = xend, + y = raw_y_min + raw_y_mean / 40, + yend = raw_y_min + raw_y_mean / 40 + ) + ) + # Redraw xaxis ticks - ggplot2::geom_segment(data = df_for_ticks, - linewidth = 0.5, - lineend = "square", - color = "black", - ggplot2::aes(x = x, - xend = x, - y = raw_y_min + raw_y_mean/40, - yend = raw_y_min)) + ggplot2::geom_segment( + data = df_for_ticks, + linewidth = 0.5, + lineend = "square", + color = "black", + ggplot2::aes( + x = x, + xend = x, + y = raw_y_min + raw_y_mean / 40, + yend = raw_y_min + ) + ) } - + #### Add y_labels component #### raw_plot <- raw_plot + ggplot2::labs(y = raw_y_labels) - + #### Adjust font sizes #### raw_plot <- raw_plot + - ggplot2::theme(axis.text.x = ggplot2::element_text(size = swarm_x_text), - axis.title.y = ggplot2::element_text(size = swarm_y_text)) - + ggplot2::theme( + axis.text.x = ggplot2::element_text(size = swarm_x_text), + axis.title.y = ggplot2::element_text(size = swarm_y_text) + ) + return(raw_plot) } # Delta plot function plot_delta <- function(dabest_effectsize_obj, float_contrast, plot_kwargs) { - idx = dabest_effectsize_obj$idx + idx <- dabest_effectsize_obj$idx separated_idx <- idx - bootstraps = dabest_effectsize_obj$bootstraps + bootstraps <- dabest_effectsize_obj$bootstraps proportional <- dabest_effectsize_obj$proportional paired <- dabest_effectsize_obj$paired - - delta_x_labels = unlist(dabest_effectsize_obj$delta_x_labels) - delta_y_labels = plot_kwargs$contrast_label - + + delta_x_labels <- unlist(dabest_effectsize_obj$delta_x_labels) + delta_y_labels <- plot_kwargs$contrast_label + minimeta <- plot_kwargs$show_mini_meta delta2 <- plot_kwargs$show_delta2 - + is_colour <- dabest_effectsize_obj$is_colour is_paired <- dabest_effectsize_obj$is_paired - + raw_y_range_vector <- dabest_effectsize_obj$ylim raw_y_max <- raw_y_range_vector[2] raw_y_min <- raw_y_range_vector[1] - + control_summary <- dabest_effectsize_obj$control_summary test_summary <- dabest_effectsize_obj$test_summary - + # Initialising x & y limits - delta_x_max = length(unlist(idx)) - delta_y_min = .Machine$double.xmax - delta_y_max = .Machine$double.xmin - + delta_x_max <- length(unlist(idx)) + delta_y_min <- .Machine$double.xmax + delta_y_max <- .Machine$double.xmin + # Obtain boot boot_result <- dabest_effectsize_obj$boot_result boots <- boot_result$bootstraps - + # Check if multiplot - if(length(unlist(idx)) >= 3) { + if (length(unlist(idx)) >= 3) { float_contrast <- FALSE } - + #### Load in sizes of plot elements #### raw_marker_size <- plot_kwargs$raw_marker_size raw_marker_alpha <- plot_kwargs$raw_marker_alpha @@ -393,107 +472,119 @@ plot_delta <- function(dabest_effectsize_obj, float_contrast, plot_kwargs) { contrast_y_text <- plot_kwargs$contrast_y_text show_zero_dot <- plot_kwargs$show_zero_dot show_baseline_ec <- plot_kwargs$show_baseline_ec - + #### Deltaplot Building #### - delta_plot_components <- create_deltaplot_components(proportional, - is_paired, - float_contrast, - is_colour, - delta2, - show_zero_dot, - flow, - show_baseline_ec) + delta_plot_components <- create_deltaplot_components( + proportional, + is_paired, + float_contrast, + is_colour, + delta2, + show_zero_dot, + flow, + show_baseline_ec + ) main_violin_type <- delta_plot_components$main_violin_type is_summary_lines <- delta_plot_components$is_summary_lines is_bootci <- delta_plot_components$is_bootci is_deltadelta <- delta_plot_components$is_deltadelta is_zero_dot <- delta_plot_components$is_zero_dot is_baseline_ec <- delta_plot_components$is_baseline_ec - + raw_plot_components <- create_rawplot_components(proportional, is_paired, float_contrast) main_plot_type <- raw_plot_components$main_plot_type - + #### initialise delta_plot & Add main_violin_type component #### # Extend idx and labels if minimeta or deltadelta if (isTRUE(minimeta) || isTRUE(delta2)) { separated_idx <- c(separated_idx, list(c("minimeta", "deltadelta"))) idx <- separated_idx } - if(main_plot_type == "sankey" && isFALSE(flow)) { + if (main_plot_type == "sankey" && isFALSE(flow)) { separated_idx <- separate_idx(idx, paired) - delta_x_max = length(unlist(separated_idx)) + delta_x_max <- length(unlist(separated_idx)) is_tufte_lines <- FALSE } - - violin_plot_components <- create_violinplot_components(boots, - separated_idx, - float_contrast, - delta_y_max, - delta_y_min, - flow, - show_zero_dot) - + + violin_plot_components <- create_violinplot_components( + boots, + separated_idx, + float_contrast, + delta_y_max, + delta_y_min, + flow, + show_zero_dot + ) + df_for_violin <- violin_plot_components$df_for_violin delta_y_min <- violin_plot_components$delta_y_min delta_y_max <- violin_plot_components$delta_y_max - delta_y_mean <- (delta_y_max - delta_y_min)/2 + delta_y_mean <- (delta_y_max - delta_y_min) / 2 x_axis_breaks <- violin_plot_components$x_axis_breaks zero_dot_x_breaks <- violin_plot_components$zero_dot_x_breaks - - if(main_plot_type == "sankey" && isFALSE(flow)) { + + if (main_plot_type == "sankey" && isFALSE(flow)) { x_axis_breaks <- x_axis_breaks - 0.5 } - - delta_plot <- switch( - main_violin_type, - - "multicolour" = + + delta_plot <- switch(main_violin_type, + "multicolour" = ggplot2::ggplot() + - geom_halfviolin(na.rm = TRUE, - data = df_for_violin, - ggplot2::aes(x = y, y = x, fill = tag)), - - "singlecolour" = + geom_halfviolin( + na.rm = TRUE, + data = df_for_violin, + ggplot2::aes(x = y, y = x, fill = tag) + ), + "singlecolour" = ggplot2::ggplot() + - geom_halfviolin(na.rm = TRUE, - data = df_for_violin, - ggplot2::aes(x = y, y = x, group = tag)) + geom_halfviolin( + na.rm = TRUE, + data = df_for_violin, + ggplot2::aes(x = y, y = x, group = tag) + ) ) - + #### Add scaling Component #### raw_ylim <- plot_kwargs$swarm_ylim - raw_ylim <- if (is.null(raw_ylim)){c(raw_y_min, raw_y_max)} else {raw_ylim} - + raw_ylim <- if (is.null(raw_ylim)) { + c(raw_y_min, raw_y_max) + } else { + raw_ylim + } + ## Add labels ## if (isTRUE(minimeta)) { delta_x_labels <- append(delta_x_labels, "Weighted\nDelta") - } + } if (isTRUE(delta2)) { delta_x_labels <- append(delta_x_labels, "delta-delta") } - - if(isTRUE(float_contrast)) { - difference = boot_result$difference - - if(main_plot_type == "unpaired proportions") { + + if (isTRUE(float_contrast)) { + difference <- boot_result$difference + + if (main_plot_type == "unpaired proportions") { raw_y_range_vector <- c(0, 1) } # Calculate new ylims to align summary lines min_raw_y <- raw_ylim[1] max_raw_y <- raw_ylim[2] raw_y_range <- max_raw_y - min_raw_y - min_y_coords <- difference/(1 - (test_summary - min_raw_y)/(control_summary - min_raw_y)) - delta_y_range <- raw_y_range * -min_y_coords/(control_summary - min_raw_y) - + min_y_coords <- difference / (1 - (test_summary - min_raw_y) / (control_summary - min_raw_y)) + delta_y_range <- raw_y_range * -min_y_coords / (control_summary - min_raw_y) + delta_plot <- delta_plot + ggplot2::theme_classic() + - ggplot2::coord_cartesian(ylim = c(min_y_coords, min_y_coords + delta_y_range), - xlim = c(1.8, delta_x_max+0.4), - expand = FALSE) + - ggplot2::scale_x_continuous(breaks = c(2), - labels = delta_x_labels) + - ggplot2::scale_y_continuous(position = "right") - + ggplot2::coord_cartesian( + ylim = c(min_y_coords, min_y_coords + delta_y_range), + xlim = c(1.8, delta_x_max + 0.4), + expand = FALSE + ) + + ggplot2::scale_x_continuous( + breaks = c(2), + labels = delta_x_labels + ) + + ggplot2::scale_y_continuous(position = "right") } else { delta_x_min <- 0.6 delta_x_scalar <- 0.3 @@ -506,157 +597,194 @@ plot_delta <- function(dabest_effectsize_obj, float_contrast, plot_kwargs) { if (isFALSE(is.null(delta_ylim))) { delta_y_min <- delta_ylim[1] delta_y_max <- delta_ylim[2] - delta_y_mean <- (delta_y_max - delta_y_min)/2 + delta_y_mean <- (delta_y_max - delta_y_min) / 2 } - + delta_plot <- delta_plot + ggplot2::theme_classic() + - ggplot2::coord_cartesian(ylim = c(delta_y_min - delta_y_mean/10, - delta_y_max), - xlim = c(delta_x_min, delta_x_max+delta_x_scalar), - expand = FALSE) + - ggplot2::scale_x_continuous(breaks = x_axis_breaks, - labels = delta_x_labels) + ggplot2::coord_cartesian( + ylim = c( + delta_y_min - delta_y_mean / 10, + delta_y_max + ), + xlim = c(delta_x_min, delta_x_max + delta_x_scalar), + expand = FALSE + ) + + ggplot2::scale_x_continuous( + breaks = x_axis_breaks, + labels = delta_x_labels + ) } - + #### Add bootci Component #### # if isFALSE(show_delta = FALSE) || isFALSE(show_mini_meta) if (delta2 != dabest_effectsize_obj$delta2 || minimeta != dabest_effectsize_obj$minimeta) { - boot_result <- boot_result[-nrow(boot_result),] + boot_result <- boot_result[-nrow(boot_result), ] } - ci_low = boot_result$bca_ci_low - ci_high = boot_result$bca_ci_high - difference = boot_result$difference - + ci_low <- boot_result$bca_ci_low + ci_high <- boot_result$bca_ci_high + difference <- boot_result$difference + if (isTRUE(is_bootci)) { delta_plot <- delta_plot + - geom_bootci(ggplot2::aes(x = x_axis_breaks, - ymin = ci_low, - ymax = ci_high, - middle = difference, - dotsize = es_marker_size, - linesize = es_line_size)) + geom_bootci(ggplot2::aes( + x = x_axis_breaks, + ymin = ci_low, + ymax = ci_high, + middle = difference, + dotsize = es_marker_size, + linesize = es_line_size + )) } - + #### Add zero_dot Component #### # removes extra dot if isTRUE(show_delta2) || isTRUE(show_mini_meta) if (isTRUE(delta2) || isTRUE(minimeta)) { zero_dot_x_breaks <- zero_dot_x_breaks[-length(zero_dot_x_breaks)] } - + if (isTRUE(is_zero_dot)) { delta_plot <- delta_plot + - geom_bootci(ggplot2::aes(x = zero_dot_x_breaks, - ymin = 0, - ymax = 0, - middle = 0, - dotsize = es_marker_size, - linesize = es_line_size)) + geom_bootci(ggplot2::aes( + x = zero_dot_x_breaks, + ymin = 0, + ymax = 0, + middle = 0, + dotsize = es_marker_size, + linesize = es_line_size + )) } - + #### Add baseline_error_curve Component #### if (isTRUE(is_baseline_ec)) { # Add violinplot Component baseline_ec_boot_result <- dabest_effectsize_obj$baseline_ec_boot_result - + baseline_boots <- baseline_ec_boot_result$bootstraps - baseline_ci_low = baseline_ec_boot_result$bca_ci_low - baseline_ci_high = baseline_ec_boot_result$bca_ci_high - baseline_difference = baseline_ec_boot_result$difference - - df_for_baseline_ec_violin <- create_dfs_for_baseline_ec_violin(baseline_boots, - zero_dot_x_breaks, - float_contrast, - flow) + baseline_ci_low <- baseline_ec_boot_result$bca_ci_low + baseline_ci_high <- baseline_ec_boot_result$bca_ci_high + baseline_difference <- baseline_ec_boot_result$difference + + df_for_baseline_ec_violin <- create_dfs_for_baseline_ec_violin( + baseline_boots, + zero_dot_x_breaks, + float_contrast, + flow + ) if (main_violin_type == "multicolour") { delta_plot <- delta_plot + - geom_halfviolin(na.rm = TRUE, - data = df_for_baseline_ec_violin, - ggplot2::aes(x = y, y = x, fill = tag)) + geom_halfviolin( + na.rm = TRUE, + data = df_for_baseline_ec_violin, + ggplot2::aes(x = y, y = x, fill = tag) + ) } else { delta_plot <- delta_plot + - geom_halfviolin(na.rm = TRUE, - data = df_for_baseline_ec_violin, - ggplot2::aes(x = y, y = x, group = tag)) + geom_halfviolin( + na.rm = TRUE, + data = df_for_baseline_ec_violin, + ggplot2::aes(x = y, y = x, group = tag) + ) } # Add bootci Component delta_plot <- delta_plot + - geom_bootci(ggplot2::aes(x = zero_dot_x_breaks, - ymin = baseline_ci_low, - ymax = baseline_ci_high, - middle = baseline_difference, - dotsize = es_marker_size, - linesize = es_line_size)) + geom_bootci(ggplot2::aes( + x = zero_dot_x_breaks, + ymin = baseline_ci_low, + ymax = baseline_ci_high, + middle = baseline_difference, + dotsize = es_marker_size, + linesize = es_line_size + )) } - + #### Add summary lines Component #### if (isTRUE(is_summary_lines)) { delta_plot <- delta_plot + - ggplot2::geom_segment(colour = "black", - linewidth = 0.3, - ggplot2::aes(x = 1.8, - xend = delta_x_max+0.4, - y = difference, - yend = difference)) + - ggplot2::geom_segment(colour = "black", - linewidth = 0.3, - ggplot2::aes(x = 1.8, - xend = delta_x_max+0.4, - y = 0, - yend = 0)) + ggplot2::geom_segment( + colour = "black", + linewidth = 0.3, + ggplot2::aes( + x = 1.8, + xend = delta_x_max + 0.4, + y = difference, + yend = difference + ) + ) + + ggplot2::geom_segment( + colour = "black", + linewidth = 0.3, + ggplot2::aes( + x = 1.8, + xend = delta_x_max + 0.4, + y = 0, + yend = 0 + ) + ) } - + #### Remove xaxis and redraw xaxis component #### if (isTRUE(float_contrast)) { delta_plot <- delta_plot + float_contrast_theme + - ggplot2::geom_hline(linewidth = 0.8, - yintercept = min_y_coords) + ggplot2::geom_hline( + linewidth = 0.8, + yintercept = min_y_coords + ) } else { # Obtain xaxis line and ticks elements for xaxis redraw - if(main_plot_type == "sankey" && isFALSE(flow)) { + if (main_plot_type == "sankey" && isFALSE(flow)) { idx_for_xaxis_redraw <- remove_last_ele_from_nested_list(idx) dfs_for_xaxis_redraw <- create_dfs_for_xaxis_redraw(idx_for_xaxis_redraw) df_for_line <- dfs_for_xaxis_redraw$df_for_line df_for_ticks <- dfs_for_xaxis_redraw$df_for_ticks - + df_for_line <- df_for_line %>% - dplyr::mutate(x = x + 0.5 + (x-1), - xend = xend + 0.5 + (xend-1)) - + dplyr::mutate( + x = x + 0.5 + (x - 1), + xend = xend + 0.5 + (xend - 1) + ) + df_for_ticks <- df_for_ticks %>% - dplyr::mutate(x = x + 0.5 + (x-1)) - + dplyr::mutate(x = x + 0.5 + (x - 1)) } else { dfs_for_xaxis_redraw <- create_dfs_for_xaxis_redraw(idx) df_for_line <- dfs_for_xaxis_redraw$df_for_line df_for_ticks <- dfs_for_xaxis_redraw$df_for_ticks } - - delta_plot <- delta_plot + + + delta_plot <- delta_plot + non_float_contrast_theme + - + # Redraw xaxis line - ggplot2::geom_segment(data = df_for_line, - linewidth = 0.5, - lineend = "square", - color = "black", - ggplot2::aes(x = x, - xend = xend, - y = delta_y_min - delta_y_mean/22, - yend = delta_y_min - delta_y_mean/22)) + - + ggplot2::geom_segment( + data = df_for_line, + linewidth = 0.5, + lineend = "square", + color = "black", + ggplot2::aes( + x = x, + xend = xend, + y = delta_y_min - delta_y_mean / 22, + yend = delta_y_min - delta_y_mean / 22 + ) + ) + + # Redraw xaxis ticks - ggplot2::geom_segment(data = df_for_ticks, - linewidth = 0.5, - lineend = "square", - color = "black", - ggplot2::aes(x = x, - xend = x, - y = delta_y_min - delta_y_mean/22, - yend = delta_y_min - delta_y_mean/10)) + ggplot2::geom_segment( + data = df_for_ticks, + linewidth = 0.5, + lineend = "square", + color = "black", + ggplot2::aes( + x = x, + xend = x, + y = delta_y_min - delta_y_mean / 22, + yend = delta_y_min - delta_y_mean / 10 + ) + ) } - + #### Add y = 0 line Component #### if (isFALSE(float_contrast)) { zero_line_xend <- delta_x_max + 0.3 @@ -664,28 +792,34 @@ plot_delta <- function(dabest_effectsize_obj, float_contrast, plot_kwargs) { zero_line_xend <- zero_line_xend + 0.2 } delta_plot <- delta_plot + - ggplot2::geom_segment(colour = "black", - linewidth = 0.3, - ggplot2::aes(x = 0.6, - xend = zero_line_xend, - y = 0, - yend = 0)) + ggplot2::geom_segment( + colour = "black", + linewidth = 0.3, + ggplot2::aes( + x = 0.6, + xend = zero_line_xend, + y = 0, + yend = 0 + ) + ) } - + #### Add y_labels Component #### delta_plot <- delta_plot + ggplot2::labs(y = delta_y_labels) - + #### Add extra_axis Componenet #### - if(isTRUE(is_deltadelta)) { + if (isTRUE(is_deltadelta)) { delta_plot <- delta_plot + ggplot2::scale_y_continuous(sec.axis = ggplot2::dup_axis(name = "delta-delta")) } - + #### Adjust font sizes #### delta_plot <- delta_plot + - ggplot2::theme(axis.text.x = ggplot2::element_text(size = contrast_x_text), - axis.title.y = ggplot2::element_text(size = contrast_y_text)) - - return(list(delta_plot = delta_plot, delta_range = c(delta_y_min - delta_y_mean/10, delta_y_max))) -} \ No newline at end of file + ggplot2::theme( + axis.text.x = ggplot2::element_text(size = contrast_x_text), + axis.title.y = ggplot2::element_text(size = contrast_y_text) + ) + + return(list(delta_plot = delta_plot, delta_range = c(delta_y_min - delta_y_mean / 10, delta_y_max))) +} diff --git a/R/002_plot_components.R b/R/002_plot_components.R index 3a8cf8b..137fb8e 100644 --- a/R/002_plot_components.R +++ b/R/002_plot_components.R @@ -1,19 +1,19 @@ # Helper functions that generates plot components. -# +# # Contains functions `create_rawplot_components`, `create_deltaplot_components` and `create_violinplot_components`. # Function for creation of list of TRUE/FALSE for raw plot components that will be built -create_rawplot_components <- function(proportional, - is_paired, +create_rawplot_components <- function(proportional, + is_paired, float_contrast) { main_plot_type <- "" is_summary_lines <- TRUE is_tufte_lines <- TRUE - + if (isTRUE(proportional)) { if (isFALSE(is_paired)) { main_plot_type <- "unpaired proportions" - if(isTRUE(float_contrast)){ + if (isTRUE(float_contrast)) { is_summary_lines <- TRUE } else { is_summary_lines <- FALSE @@ -25,7 +25,7 @@ create_rawplot_components <- function(proportional, } else { if (isFALSE(is_paired)) { main_plot_type <- "swarmplot" - if(isTRUE(float_contrast)){ + if (isTRUE(float_contrast)) { is_summary_lines <- TRUE } else { is_summary_lines <- FALSE @@ -33,14 +33,14 @@ create_rawplot_components <- function(proportional, } else { main_plot_type <- "slope" is_tufte_lines <- FALSE - if(isTRUE(float_contrast)){ + if (isTRUE(float_contrast)) { is_summary_lines <- TRUE } else { is_summary_lines <- FALSE } } } - + plot_component <- list( main_plot_type = main_plot_type, is_summary_lines = is_summary_lines, @@ -50,8 +50,8 @@ create_rawplot_components <- function(proportional, } # Function for creation of list of TRUE/FALSE for delta plot components that will be built -create_deltaplot_components <- function(proportional, - is_paired, +create_deltaplot_components <- function(proportional, + is_paired, float_contrast, is_colour, delta2, @@ -64,7 +64,7 @@ create_deltaplot_components <- function(proportional, is_deltadelta <- FALSE is_zero_dot <- FALSE is_baseline_ec <- FALSE - + if (isTRUE(is_paired) || isTRUE(is_colour)) { main_violin_type <- "singlecolour" } @@ -80,7 +80,7 @@ create_deltaplot_components <- function(proportional, if (isTRUE(show_baseline_ec)) { is_baseline_ec <- TRUE } - + plot_component <- list( main_violin_type = main_violin_type, is_summary_lines = is_summary_lines, @@ -93,9 +93,9 @@ create_deltaplot_components <- function(proportional, } # Function for creation of list of values of the violin plot components that will be built -create_violinplot_components <- function(boots, - idx, - float_contrast, +create_violinplot_components <- function(boots, + idx, + float_contrast, delta_y_max, delta_y_min, flow = TRUE, @@ -105,66 +105,70 @@ create_violinplot_components <- function(boots, y = NA, tag = NA ) - + x_axis_breaks <- c() zero_dot_x_breaks <- c() curr_boot_idx <- 1 curr_x_idx <- 0 x_axis_scalar <- ifelse(flow, 0, 0.5) - - for(group in idx) { + + for (group in idx) { curr_x_idx <- curr_x_idx + 1 - if(isTRUE(zero_dot)) { + if (isTRUE(zero_dot)) { zero_dot_x_breaks <- append(zero_dot_x_breaks, curr_x_idx) } - temp_df_violin <- data.frame(x = NA, - y = NA, - tag = toString(curr_x_idx)) - + temp_df_violin <- data.frame( + x = NA, + y = NA, + tag = toString(curr_x_idx) + ) + df_for_violin <- rbind(df_for_violin, temp_df_violin) - - for(i in 2:length(group)) { + + for (i in 2:length(group)) { curr_x_idx <- curr_x_idx + 1 x_axis_breaks <- append(x_axis_breaks, curr_x_idx) - + ci_coords <- stats::density(boots[[curr_boot_idx]]) - + x_coords_ci <- ci_coords$x y_coords_ci <- ci_coords$y - + # Standardise y - y_coords_ci <- (y_coords_ci - min(y_coords_ci))/(max(y_coords_ci) - min(y_coords_ci)) - y_coords_ci <- y_coords_ci/6 - + y_coords_ci <- (y_coords_ci - min(y_coords_ci)) / (max(y_coords_ci) - min(y_coords_ci)) + y_coords_ci <- y_coords_ci / 6 + if (isFALSE(float_contrast)) { - y_coords_ci <- y_coords_ci/1.5 + y_coords_ci <- y_coords_ci / 1.5 } - + y_coords_ci <- y_coords_ci + curr_x_idx - x_axis_scalar - + min_x_coords <- min(x_coords_ci) max_x_coords <- max(x_coords_ci) - + # Keeping track of ylim limits - if(min_x_coords < delta_y_min) { + if (min_x_coords < delta_y_min) { delta_y_min <- min_x_coords } - if(max_x_coords > delta_y_max) { + if (max_x_coords > delta_y_max) { delta_y_max <- max_x_coords } - - temp_df_violin <- data.frame(x = x_coords_ci, - y = y_coords_ci, - tag = rep(toString(curr_x_idx), 512)) - + + temp_df_violin <- data.frame( + x = x_coords_ci, + y = y_coords_ci, + tag = rep(toString(curr_x_idx), 512) + ) + df_for_violin <- rbind(df_for_violin, temp_df_violin) - + curr_boot_idx <- curr_boot_idx + 1 } } df_for_violin <- df_for_violin %>% - dplyr::arrange(tag, x , y) - + dplyr::arrange(tag, x, y) + plot_component <- list( df_for_violin = df_for_violin, delta_y_min = delta_y_min, @@ -172,6 +176,6 @@ create_violinplot_components <- function(boots, x_axis_breaks = x_axis_breaks, zero_dot_x_breaks = zero_dot_x_breaks ) - + return(plot_component) -} \ No newline at end of file +} diff --git a/R/002_plot_geoms.R b/R/002_plot_geoms.R index 353ffbf..9d781fb 100644 --- a/R/002_plot_geoms.R +++ b/R/002_plot_geoms.R @@ -1,82 +1,106 @@ -# Contains custom geom_objects for plotting. -# -# Contains main geoms `geom_halfviolin`, `geom_bootci`, `geom_proportionbar`, `geom_sankeybar` and `geom_sankeyflow`. +#' Contains custom geom_objects for plotting. +#' +#' List of geom_*: +#' - `geom_halfviolin` +#' - `geom_bootci`, +#' - `geom_proportionbar` +#' - `geom_sankeyflow`. +#' #' @importFrom ggplot2 .pt +#' @noRd # Halfviolin Geom draw_group_halfviolin <- function(data, panel_scales, coord) { - coords <- coord$transform(data, panel_scales) - + coords <- coord$transform(data, panel_scales) + first_row <- coords[1, , drop = FALSE] - - violin <- grid::polygonGrob(x = coords$x, - y = coords$y, - gp = grid::gpar(col = first_row$colour, - fill = scales::alpha(first_row$fill, first_row$alpha))) - + + violin <- grid::polygonGrob( + x = coords$x, + y = coords$y, + gp = grid::gpar( + col = first_row$colour, + fill = scales::alpha(first_row$fill, first_row$alpha) + ) + ) } GeomHalfViolin <- ggplot2::ggproto("GeomHalfViolin", ggplot2::Geom, - required_aes = c("x", "y"), - default_aes = ggplot2::aes(colour = NA, - fill = "grey35", - alpha = 0.8), - draw_key = ggplot2::draw_key_point, - draw_group = draw_group_halfviolin) - -geom_halfviolin <- function(mapping = NULL, data = NULL, stat = "identity", - position = "identity", show.legend = NA, + required_aes = c("x", "y"), + default_aes = ggplot2::aes( + colour = NA, + fill = "grey35", + alpha = 0.8 + ), + draw_key = ggplot2::draw_key_point, + draw_group = draw_group_halfviolin +) + +geom_halfviolin <- function(mapping = NULL, data = NULL, stat = "identity", + position = "identity", show.legend = NA, na.rm = FALSE, inherit.aes = TRUE, ...) { - ggplot2::layer(data = data, - mapping = mapping, - stat = stat, - geom = GeomHalfViolin, - position = position, - show.legend = show.legend, - inherit.aes = inherit.aes, - params = list(na.rm = na.rm, ...)) + ggplot2::layer( + data = data, + mapping = mapping, + stat = stat, + geom = GeomHalfViolin, + position = position, + show.legend = show.legend, + inherit.aes = inherit.aes, + params = list(na.rm = na.rm, ...) + ) } # Boot_CI Geom draw_panel_boot_ci <- function(data, panel_scales, coord) { - coords <- coord$transform(data, panel_scales) - - ci_line <- grid::segmentsGrob(x0 = coords$x, - x1 = coords$x, - y0 = coords$ymin, - y1 = coords$ymax, - gp = grid::gpar(lwd = coords$linesize * .pt, - lineend = coords$lineend)) - - ci_dot <- grid::pointsGrob(x = coords$x, - y = coords$middle, - pch = coords$shape, - size = grid::unit(coords$dotsize, "char")) - + coords <- coord$transform(data, panel_scales) + + ci_line <- grid::segmentsGrob( + x0 = coords$x, + x1 = coords$x, + y0 = coords$ymin, + y1 = coords$ymax, + gp = grid::gpar( + lwd = coords$linesize * .pt, + lineend = coords$lineend + ) + ) + + ci_dot <- grid::pointsGrob( + x = coords$x, + y = coords$middle, + pch = coords$shape, + size = grid::unit(coords$dotsize, "char") + ) + grid::gTree(children = grid::gList(ci_line, ci_dot)) - } GeomBootCI <- ggplot2::ggproto("GeomBootCI", ggplot2::Geom, - required_aes = c("x", "ymin", "ymax", "middle"), - default_aes = ggplot2::aes(linesize = 0.8, - dotsize = 0.5, - shape = 19, - lwd = 2, - lineend = "square"), - draw_key = ggplot2::draw_key_point, - draw_panel = draw_panel_boot_ci) - -geom_bootci <- function(mapping = NULL, data = NULL, stat = "identity", - position = "identity", show.legend = NA, + required_aes = c("x", "ymin", "ymax", "middle"), + default_aes = ggplot2::aes( + linesize = 0.8, + dotsize = 0.5, + shape = 19, + lwd = 2, + lineend = "square" + ), + draw_key = ggplot2::draw_key_point, + draw_panel = draw_panel_boot_ci +) + +geom_bootci <- function(mapping = NULL, data = NULL, stat = "identity", + position = "identity", show.legend = NA, na.rm = FALSE, inherit.aes = TRUE, ...) { - ggplot2::layer(data = data, - mapping = mapping, - stat = stat, - geom = GeomBootCI, - position = position, - show.legend = show.legend, - inherit.aes = inherit.aes, - params = list(na.rm = na.rm, ...)) + ggplot2::layer( + data = data, + mapping = mapping, + stat = stat, + geom = GeomBootCI, + position = position, + show.legend = show.legend, + inherit.aes = inherit.aes, + params = list(na.rm = na.rm, ...) + ) } # Proportion Bar Geom @@ -85,70 +109,86 @@ geom_bootci <- function(mapping = NULL, data = NULL, stat = "identity", } draw_group_proportion_bar <- function(data, panel_scales, coord) { - coords <- coord$transform(data, panel_scales) - + coords <- coord$transform(data, panel_scales) + first_row <- coords[1, , drop = FALSE] - - failure_bar <- grid::polygonGrob(x = coords$x, - y = coords$y, - gp = grid::gpar(col = first_row$colour, - fill = scales::alpha(first_row$fill, first_row$alpha))) + + failure_bar <- grid::polygonGrob( + x = coords$x, + y = coords$y, + gp = grid::gpar( + col = first_row$colour, + fill = scales::alpha(first_row$fill, first_row$alpha) + ) + ) } GeomProportionBar <- ggplot2::ggproto("GeomProportionBar", ggplot2::Geom, - required_aes = c("x", "y"), - default_aes = ggplot2::aes(colour = NA, - fill = "white", - alpha = NA), - draw_key = ggplot2::draw_key_polygon, - draw_group = draw_group_proportion_bar) - -geom_proportionbar <- function(mapping = NULL, data = NULL, - stat = "identity", position = "identity", + required_aes = c("x", "y"), + default_aes = ggplot2::aes( + colour = NA, + fill = "white", + alpha = NA + ), + draw_key = ggplot2::draw_key_polygon, + draw_group = draw_group_proportion_bar +) + +geom_proportionbar <- function(mapping = NULL, data = NULL, + stat = "identity", position = "identity", ..., - show.legend = NA, - na.rm = FALSE, + show.legend = NA, + na.rm = FALSE, inherit.aes = TRUE) { - ggplot2::layer(data = data, - mapping = mapping, - stat = stat, - geom = GeomProportionBar, - position = position, - show.legend = show.legend, - inherit.aes = inherit.aes, - params = list( - na.rm = na.rm, - ...)) + ggplot2::layer( + data = data, + mapping = mapping, + stat = stat, + geom = GeomProportionBar, + position = position, + show.legend = show.legend, + inherit.aes = inherit.aes, + params = list(na.rm = na.rm, ...) + ) } # SankeyFlow Geom draw_group_sankey_flow <- function(data, panel_scales, coord) { - coords <- coord$transform(data, panel_scales) + coords <- coord$transform(data, panel_scales) first_row <- coords[1, , drop = FALSE] - - flow <- grid::polygonGrob(x = coords$x, - y = coords$y, - gp = grid::gpar(col = first_row$colour, - fill = scales::alpha(first_row$fillcol, first_row$alpha))) + + flow <- grid::polygonGrob( + x = coords$x, + y = coords$y, + gp = grid::gpar( + col = first_row$colour, + fill = scales::alpha(first_row$fill, first_row$alpha) + ) + ) } GeomSankeyFlow <- ggplot2::ggproto("GeomSankeyFlow", ggplot2::Geom, - required_aes = c("x", "y"), - default_aes = ggplot2::aes(colour = NA, - fillcol = "gray50", - alpha = 0.5), - draw_key = ggplot2::draw_key_polygon, - draw_group = draw_group_sankey_flow) - -geom_sankeyflow <- function(mapping = NULL, data = NULL, stat = "identity", - position = "identity", show.legend = NA, + required_aes = c("x", "y"), + default_aes = ggplot2::aes( + colour = NA, + fill = "gray50", + alpha = NA + ), + draw_key = ggplot2::draw_key_polygon, + draw_group = draw_group_sankey_flow +) + +geom_sankeyflow <- function(mapping = NULL, data = NULL, stat = "identity", + position = "identity", show.legend = NA, na.rm = FALSE, inherit.aes = TRUE, ...) { - ggplot2::layer(data = data, - mapping = mapping, - stat = stat, - geom = GeomSankeyFlow, - position = position, - show.legend = show.legend, - inherit.aes = inherit.aes, - params = list(na.rm = na.rm, ...)) -} \ No newline at end of file + ggplot2::layer( + data = data, + mapping = mapping, + stat = stat, + geom = GeomSankeyFlow, + position = position, + show.legend = show.legend, + inherit.aes = inherit.aes, + params = list(na.rm = na.rm, ...) + ) +} diff --git a/R/002_plot_themes.R b/R/002_plot_themes.R index 917516b..0a76235 100644 --- a/R/002_plot_themes.R +++ b/R/002_plot_themes.R @@ -1,26 +1,32 @@ # Contains themes for float_contrast == TRUE | FALSE -# +# # Contains themes `float_contrast_theme` and `non_float_contrast_theme`. # Theme for left-right graph -float_contrast_theme <- ggplot2::theme(plot.margin = ggplot2::unit(c(0, 0, 0, 0), "pt"), - axis.line.x = ggplot2::element_blank(), - axis.title.x = ggplot2::element_blank(), - axis.ticks.x = ggplot2::element_blank(), - axis.title.x.bottom = ggplot2::element_blank(), - legend.title = ggplot2::element_blank()) +float_contrast_theme <- ggplot2::theme( + plot.margin = ggplot2::unit(c(0, 0, 0, 0), "pt"), + axis.line.x = ggplot2::element_blank(), + axis.title.x = ggplot2::element_blank(), + axis.ticks.x = ggplot2::element_blank(), + axis.title.x.bottom = ggplot2::element_blank(), + legend.title = ggplot2::element_blank() +) # Theme for top-down graph -non_float_contrast_theme <- ggplot2::theme(plot.margin = ggplot2::unit(c(0, 0, 0, 0), "pt"), - axis.line.x = ggplot2::element_blank(), - axis.title.x = ggplot2::element_blank(), - axis.ticks.x = ggplot2::element_blank(), - axis.title.x.bottom = ggplot2::element_blank(), - legend.title = ggplot2::element_blank()) +non_float_contrast_theme <- ggplot2::theme( + plot.margin = ggplot2::unit(c(0, 0, 0, 0), "pt"), + axis.line.x = ggplot2::element_blank(), + axis.title.x = ggplot2::element_blank(), + axis.ticks.x = ggplot2::element_blank(), + axis.title.x.bottom = ggplot2::element_blank(), + legend.title = ggplot2::element_blank() +) # Theme for removal of all axes and labels -remove_all_axes_theme <- ggplot2::theme(axis.line = ggplot2::element_blank(), - axis.title = ggplot2::element_blank(), - axis.ticks = ggplot2::element_blank(), - legend.title = ggplot2::element_blank(), - plot.margin = ggplot2::unit(c(0, 0, 0, 0), "pt")) \ No newline at end of file +remove_all_axes_theme <- ggplot2::theme( + axis.line = ggplot2::element_blank(), + axis.title = ggplot2::element_blank(), + axis.ticks = ggplot2::element_blank(), + legend.title = ggplot2::element_blank(), + plot.margin = ggplot2::unit(c(0, 0, 0, 0), "pt") +) diff --git a/R/002_separate_plots.R b/R/002_separate_plots.R index 6d6f81e..12c0f2f 100644 --- a/R/002_separate_plots.R +++ b/R/002_separate_plots.R @@ -1,26 +1,26 @@ # Contains functions responsible for separate plot functionalities via flow = FALSE. -# +# # Contains `separate_idx`, `remove_last_ele_from_nested_list`, `create_xlabs_for_sankey` functions. # Separate idx function separate_idx <- function(idx, paired) { separated_idx <- list() curr_group_vector <- c() - if(paired == "baseline") { - for(group in idx) { - ctrl_grp = group[1] - for(index in 2:length(group)) { - test_grp = group[index] + if (paired == "baseline") { + for (group in idx) { + ctrl_grp <- group[1] + for (index in 2:length(group)) { + test_grp <- group[index] curr_group_vector <- append(ctrl_grp, test_grp) separated_idx <- c(separated_idx, list(curr_group_vector)) curr_group_vector <- c() } } } else { - for(group in idx) { - for(index in 1:(length(group)-1)) { - ctrl_grp = group[index] - test_grp = group[index+1] + for (group in idx) { + for (index in 1:(length(group) - 1)) { + ctrl_grp <- group[index] + test_grp <- group[index + 1] curr_group_vector <- append(ctrl_grp, test_grp) separated_idx <- c(separated_idx, list(curr_group_vector)) curr_group_vector <- c() @@ -33,7 +33,7 @@ separate_idx <- function(idx, paired) { # Function that removes the last element from each subgroup within a list() remove_last_ele_from_nested_list <- function(nested_list) { ## nested_array can be in the form of list[][] or list(vectors[]) - for(index in 1:length(nested_list)) { + for (index in 1:length(nested_list)) { sub_group <- nested_list[[index]] nested_list[[index]] <- sub_group[-length(sub_group)] } @@ -41,22 +41,23 @@ remove_last_ele_from_nested_list <- function(nested_list) { } # Function that creates xlabels for separated sankey diagrams -create_xlabs_for_sankey <- function(idx, +create_xlabs_for_sankey <- function(idx, Ns, enquo_x) { sankey_x_labels <- c() Ns_sankey <- dplyr::ungroup(Ns) - for (group in idx){ + for (group in idx) { group_length <- length(group) - for (i in 1: (group_length - 1)){ + for (i in 1:(group_length - 1)) { ctrl <- group[i] - treat <- group[i+1] - count_for_pair <- Ns_sankey %>% - dplyr::filter(!!enquo_x == treat) %>% - dplyr::select(n) %>% dplyr::pull() + treat <- group[i + 1] + count_for_pair <- Ns_sankey %>% + dplyr::filter(!!enquo_x == treat) %>% + dplyr::select(n) %>% + dplyr::pull() label <- paste(ctrl, "\nv.s.\n", treat, "\nN=", count_for_pair) sankey_x_labels <- c(sankey_x_labels, label) } } return(sankey_x_labels) -} \ No newline at end of file +} diff --git a/R/003_bootstrap_tools.R b/R/003_bootstrap_tools.R index 007aa49..470f704 100644 --- a/R/003_bootstrap_tools.R +++ b/R/003_bootstrap_tools.R @@ -1,24 +1,24 @@ # Obtains bootstraps as well as bca intervals -# -# Contains functions `effsize_boot`, `bootstrap`, `bca` and `boot_weighted_row`. +# +# Contains functions `effsize_boot`, `bootstrap`, `bca` and `boot_weighted_row`. effsize_boot <- function( - data, - effect_size_func, - reps = 5000, - paired = FALSE - ){ - - s <- c(rep(1, length(data$control)), - rep(2, length(data$test))) - + data, + effect_size_func, + reps = 5000, + paired = FALSE) { + s <- c( + rep(1, length(data$control)), + rep(2, length(data$test)) + ) + bootboot <- function(d, indices, paired) { c <- d[indices[s == 1]] t <- d[indices[s == 2]] - + return(effect_size_func(c, t, paired)) } - + b <- boot::boot( c(data$control, data$test), statistic = bootboot, @@ -26,7 +26,7 @@ effsize_boot <- function( strata = s, paired = paired ) - + return(b) } @@ -36,16 +36,14 @@ bootstrap <- function( effect_size_func, seed = 12345, reps = 5000, - boot_labs - ){ - + boot_labs) { boot_result <- tibble::tibble() baseline_ec_boot_result <- tibble::tibble() - + raw_data <- dabest_obj$raw_data idx <- dabest_obj$idx resamples <- dabest_obj$resamples - + if (isFALSE(is.list(idx))) { idx <- list(idx) } @@ -55,84 +53,94 @@ bootstrap <- function( paired <- dabest_obj$paired is_paired <- dabest_obj$is_paired is_colour <- dabest_obj$is_colour - + proportional <- dabest_obj$proportional - + quoname_x <- rlang::as_name(enquo_x) quoname_y <- rlang::as_name(enquo_y) delta_x_labels <- list() delta_y_labels <- boot_labs - + minimeta <- dabest_obj$minimeta delta2 <- dabest_obj$delta2 - + ## Validity Checks if (isTRUE(is_paired) && boot_labs == "Cliffs' delta") { cli::cli_abort(c("{.var Cliffs' delta} cannot be used when {.field paired} is not NULL.", - "x" = "Please change {.var effect_size_func}.")) - } else if (isTRUE(proportional) && !(boot_labs %in% c("Mean difference","Cohen's h","Paired\nmean difference"))) { + "x" = "Please change {.var effect_size_func}." + )) + } else if (isTRUE(proportional) && !(boot_labs %in% c("Mean difference", "Cohen's h", "Paired\nmean difference"))) { cli::cli_abort(c("Other effect sizes besides {.var Cohens h} and {.var Mean difference} cannot be used when {.field proportional} is TRUE.", - "x" = "Please change {.var effect_size_func}.")) + "x" = "Please change {.var effect_size_func}." + )) } ## Check that if delta2 is true, only mean difference can be computed - if (isTRUE(delta2) && !(boot_labs %in% c("Paired\nmean difference","Mean difference"))) { + if (isTRUE(delta2) && !(boot_labs %in% c("Paired\nmean difference", "Mean difference"))) { cli::cli_abort(c("Other effect sizes besides {.var Mean difference} cannot be used when {.field delta2} is TRUE.", - "x" = "Please change {.var effect_size_func}.")) + "x" = "Please change {.var effect_size_func}." + )) } - + ## Getting boot_results if (isFALSE(is_paired) || isTRUE(paired == "baseline")) { for (group in idx) { group_length <- length(group) - - ctrl_tibble <- raw_data %>% + + ctrl_tibble <- raw_data %>% dplyr::filter(!!enquo_x == !!group[1]) ctrl_measurement <- ctrl_tibble[[quoname_y]] - + tests <- group[2:group_length] - + ctrl_size <- length(ctrl_measurement) ctrl_var <- var_w_df(ctrl_measurement, ctrl_size) - - + + for (test_group in tests) { test_tibble <- raw_data %>% dplyr::filter(!!enquo_x == !!test_group) - + test_measurement <- test_tibble[[quoname_y]] - - xlabels <- paste(test_group, group[1], sep="\nminus\n") + + xlabels <- paste(test_group, group[1], sep = "\nminus\n") delta_x_labels <- append(delta_x_labels, xlabels) - - control_test_measurement <- list(control = ctrl_measurement, - test = test_measurement) - - + + control_test_measurement <- list( + control = ctrl_measurement, + test = test_measurement + ) + + test_size <- length(test_measurement) test_var <- var_w_df(test_measurement, test_size) - - grp_var <- calculate_group_variance(ctrl_var = ctrl_var, - ctrl_N = ctrl_size, - test_var = test_var, - test_N = test_size) - - weight <- 1/grp_var - + + grp_var <- calculate_group_variance( + ctrl_var = ctrl_var, + ctrl_N = ctrl_size, + test_var = test_var, + test_N = test_size + ) + + weight <- 1 / grp_var + set.seed(seed) - - boots <- effsize_boot(data = control_test_measurement, - effect_size_func = effect_size_func, - reps = reps, - paired = is_paired) - + + boots <- effsize_boot( + data = control_test_measurement, + effect_size_func = effect_size_func, + reps = reps, + paired = is_paired + ) + if (ci < 0 | ci > 100) { cli::cli_abort(c("{.field ci} is not between 0 and 100.", - "x" = "{.field ci} must be between 0 and 100, not {ci}.")) + "x" = "{.field ci} must be between 0 and 100, not {ci}." + )) } - - bootci <- boot::boot.ci(boots, conf=ci/100, type = c("perc","bca")) - + + bootci <- boot::boot.ci(boots, conf = ci / 100, type = c("perc", "bca")) + boot_row <- list( control_group = group[1], test_group = test_group, @@ -152,49 +160,56 @@ bootstrap <- function( } else { for (group in idx) { group_length <- length(group) - for (i in 1:(group_length-1)) { + for (i in 1:(group_length - 1)) { control_group <- group[i] - test_group <- group[i+1] - - ctrl_tibble <- raw_data %>% + test_group <- group[i + 1] + + ctrl_tibble <- raw_data %>% dplyr::filter(!!enquo_x == !!control_group) ctrl_measurement <- ctrl_tibble[[quoname_y]] - - test_tibble <- raw_data %>% + + test_tibble <- raw_data %>% dplyr::filter(!!enquo_x == !!test_group) test_measurement <- test_tibble[[quoname_y]] - - xlabels <- paste(test_group, control_group, sep="\nminus\n") + + xlabels <- paste(test_group, control_group, sep = "\nminus\n") delta_x_labels <- append(delta_x_labels, xlabels) - - control_test_measurement <- list(control = ctrl_measurement, - test = test_measurement) - #add weights column + + control_test_measurement <- list( + control = ctrl_measurement, + test = test_measurement + ) + # add weights column ctrl_size <- length(ctrl_measurement) ctrl_var <- var_w_df(ctrl_measurement, ctrl_size) test_size <- length(test_measurement) test_var <- var_w_df(test_measurement, test_size) - grp_var <- calculate_group_variance(ctrl_var = ctrl_var, - ctrl_N = ctrl_size, - test_var = test_var, - test_N = test_size) - - weight <- 1/grp_var - + grp_var <- calculate_group_variance( + ctrl_var = ctrl_var, + ctrl_N = ctrl_size, + test_var = test_var, + test_N = test_size + ) + + weight <- 1 / grp_var + set.seed(seed) - - boots <- effsize_boot(data = control_test_measurement, - effect_size_func = effect_size_func, - reps = reps, - paired = is_paired) - + + boots <- effsize_boot( + data = control_test_measurement, + effect_size_func = effect_size_func, + reps = reps, + paired = is_paired + ) + if (ci < 0 | ci > 100) { cli::cli_abort(c("{.field ci} is not between 0 and 100.", - "x" = "{.field ci} must be between 0 and 100, not {ci}.")) + "x" = "{.field ci} must be between 0 and 100, not {ci}." + )) } - - bootci <- boot::boot.ci(boots, conf=ci/100, type = c("perc","bca")) - + + bootci <- boot::boot.ci(boots, conf = ci / 100, type = c("perc", "bca")) + boot_row <- list( control_group = group[1], test_group = test_group, @@ -212,55 +227,62 @@ bootstrap <- function( } } } - if (isTRUE(minimeta)){ + if (isTRUE(minimeta)) { boot_last_row <- boot_weighted_row(boot_result = boot_result, ci) boot_result <- dplyr::bind_rows(boot_result, boot_last_row) } if (isTRUE(delta2)) { - boot_last_row <- boot_delta_delta(boot_result = boot_result,ci) - boot_result <- dplyr::bind_rows(boot_result,boot_last_row) + boot_last_row <- boot_delta_delta(boot_result = boot_result, ci) + boot_result <- dplyr::bind_rows(boot_result, boot_last_row) } - + ## Getting boot_results for baseline_error_curve for (group in idx) { control_group <- group[1] test_group <- control_group - - ctrl_tibble <- raw_data %>% + + ctrl_tibble <- raw_data %>% dplyr::filter(!!enquo_x == !!control_group) ctrl_measurement <- ctrl_tibble[[quoname_y]] test_measurement <- ctrl_measurement - - xlabels <- paste(test_group, control_group, sep="\nminus\n") - - control_test_measurement <- list(control = ctrl_measurement, - test = test_measurement) - #add weights column + + xlabels <- paste(test_group, control_group, sep = "\nminus\n") + + control_test_measurement <- list( + control = ctrl_measurement, + test = test_measurement + ) + # add weights column ctrl_size <- length(ctrl_measurement) ctrl_var <- var_w_df(ctrl_measurement, ctrl_size) test_size <- length(test_measurement) test_var <- var_w_df(test_measurement, test_size) - grp_var <- calculate_group_variance(ctrl_var = ctrl_var, - ctrl_N = ctrl_size, - test_var = test_var, - test_N = test_size) - - weight <- 1/grp_var - + grp_var <- calculate_group_variance( + ctrl_var = ctrl_var, + ctrl_N = ctrl_size, + test_var = test_var, + test_N = test_size + ) + + weight <- 1 / grp_var + set.seed(seed) - - boots <- effsize_boot(data = control_test_measurement, - effect_size_func = effect_size_func, - reps = reps, - paired = is_paired) - + + boots <- effsize_boot( + data = control_test_measurement, + effect_size_func = effect_size_func, + reps = reps, + paired = is_paired + ) + if (ci < 0 | ci > 100) { cli::cli_abort(c("{.field ci} is not between 0 and 100.", - "x" = "{.field ci} must be between 0 and 100, not {ci}.")) + "x" = "{.field ci} must be between 0 and 100, not {ci}." + )) } - - bootci <- boot::boot.ci(boots, conf=ci/100, type = c("perc","bca")) - + + bootci <- boot::boot.ci(boots, conf = ci / 100, type = c("perc", "bca")) + boot_row <- list( control_group = group[1], test_group = test_group, @@ -276,85 +298,93 @@ bootstrap <- function( ) baseline_ec_boot_result <- dplyr::bind_rows(baseline_ec_boot_result, boot_row) } - + raw_y_labels <- ifelse(proportional, "proportion of success", "value") - - out <- list(raw_data = raw_data, - idx = idx, - delta_x_labels = delta_x_labels, - delta_y_labels = delta_y_labels, - raw_y_labels = raw_y_labels, - is_paired = is_paired, - is_colour = is_colour, - paired = paired, - resamples = resamples, - Ns = dabest_obj$Ns, - control_summary = dabest_obj$control_summary, - test_summary = dabest_obj$test_summary, - ylim = dabest_obj$ylim, - enquo_x = dabest_obj$enquo_x, - enquo_y = dabest_obj$enquo_y, - enquo_id_col = dabest_obj$enquo_id_col, - enquo_colour = dabest_obj$enquo_colour, - proportional = proportional, - minimeta = minimeta, - delta2 = dabest_obj$delta2, - proportional_data = dabest_obj$proportional_data, - boot_result = boot_result, - baseline_ec_boot_result = baseline_ec_boot_result) - + + out <- list( + raw_data = raw_data, + idx = idx, + delta_x_labels = delta_x_labels, + delta_y_labels = delta_y_labels, + raw_y_labels = raw_y_labels, + is_paired = is_paired, + is_colour = is_colour, + paired = paired, + resamples = resamples, + Ns = dabest_obj$Ns, + control_summary = dabest_obj$control_summary, + test_summary = dabest_obj$test_summary, + ylim = dabest_obj$ylim, + enquo_x = dabest_obj$enquo_x, + enquo_y = dabest_obj$enquo_y, + enquo_id_col = dabest_obj$enquo_id_col, + enquo_colour = dabest_obj$enquo_colour, + proportional = proportional, + minimeta = minimeta, + delta2 = dabest_obj$delta2, + proportional_data = dabest_obj$proportional_data, + boot_result = boot_result, + baseline_ec_boot_result = baseline_ec_boot_result + ) + class(out) <- c("dabest_effectsize") - + return(out) } # BCA function -bca <- function(bootstraps, conf.level = .95){ +bca <- function(bootstraps, conf.level = .95) { # Inverse Variance Method - if(stats::var(bootstraps)==0){ + if (stats::var(bootstraps) == 0) { lower <- mean(bootstraps) upper <- mean(bootstraps) return(c(lower, upper)) } - - if(max(bootstraps)==Inf | min(bootstraps)==-Inf){ + + if (max(bootstraps) == Inf | min(bootstraps) == -Inf) { stop("bca() function does not work when some values are infinite") } - - low <- (1 - conf.level)/2 + + low <- (1 - conf.level) / 2 high <- 1 - low sims <- length(bootstraps) - z.inv <- length(bootstraps[bootstraps < mean(bootstraps)])/sims + z.inv <- length(bootstraps[bootstraps < mean(bootstraps)]) / sims z <- stats::qnorm(z.inv) - U <- (sims - 1) * (mean(bootstraps, na.rm=TRUE) - bootstraps) + U <- (sims - 1) * (mean(bootstraps, na.rm = TRUE) - bootstraps) top <- sum(U^3) - under <- 6 * (sum(U^2))^{3/2} + under <- 6 * (sum(U^2))^{ + 3 / 2 + } a <- top / under - lower.inv <- stats::pnorm(z + (z + stats::qnorm(low))/(1 - a * (z + stats::qnorm(low)))) - lower <- stats::quantile(bootstraps, lower.inv, names=FALSE) - upper.inv <- stats::pnorm(z + (z + stats::qnorm(high))/(1 - a * (z + stats::qnorm(high)))) - upper <- stats::quantile(bootstraps, upper.inv, names=FALSE) + lower.inv <- stats::pnorm(z + (z + stats::qnorm(low)) / (1 - a * (z + stats::qnorm(low)))) + lower <- stats::quantile(bootstraps, lower.inv, names = FALSE) + upper.inv <- stats::pnorm(z + (z + stats::qnorm(high)) / (1 - a * (z + stats::qnorm(high)))) + upper <- stats::quantile(bootstraps, upper.inv, names = FALSE) return(c(lower, upper)) -} +} # Creates df of values (bca ci, weighted bootstraps) for minimeta -boot_weighted_row <- function(boot_result, ci){ +boot_weighted_row <- function(boot_result, ci) { bootstraps <- boot_result$bootstraps weights <- boot_result$weight - - weighted_result <- Map(function(x, w) x * w, - boot_result$bootstraps, boot_result$weight) + + weighted_result <- Map( + function(x, w) x * w, + boot_result$bootstraps, boot_result$weight + ) weighted_bootstrap <- Reduce("+", weighted_result) - weighted_bootstrap <- weighted_bootstrap/sum(weights) - - - weighted_difference <- calculate_weighted_delta(weight = boot_result$weight, - differences = boot_result$difference) + weighted_bootstrap <- weighted_bootstrap / sum(weights) + + + weighted_difference <- calculate_weighted_delta( + weight = boot_result$weight, + differences = boot_result$difference + ) bca_weighted <- bca(bootstraps = weighted_bootstrap) - pct_interval <- confinterval(weighted_bootstrap, ci/100) + pct_interval <- confinterval(weighted_bootstrap, ci / 100) boot_last_row <- list( - control_group = 'Minimeta Overall Test', - test_group = 'Minimeta Overall Test', + control_group = "Minimeta Overall Test", + test_group = "Minimeta Overall Test", bootstraps = list(as.vector(weighted_bootstrap)), nboots = length(weighted_bootstrap), bca_ci_low = bca_weighted[1], @@ -364,20 +394,20 @@ boot_weighted_row <- function(boot_result, ci){ ci = ci, difference = weighted_difference, weight = 1 - ) + ) return(boot_last_row) } # Creates df of values (bca ci, weighted bootstraps) for deltadelta -boot_delta_delta <- function(boot_result,ci) { - bootstrap_delta_delta_neg <- Reduce("-",boot_result$bootstraps) - bootstrap_delta_delta <- bootstrap_delta_delta_neg *-1 +boot_delta_delta <- function(boot_result, ci) { + bootstrap_delta_delta_neg <- Reduce("-", boot_result$bootstraps) + bootstrap_delta_delta <- bootstrap_delta_delta_neg * -1 difference_delta_delta <- calculate_delta_difference(boot_result$difference) bca_delta_delta <- bca(bootstrap_delta_delta) - pct_interval <- confinterval(bootstrap_delta_delta,ci/100) + pct_interval <- confinterval(bootstrap_delta_delta, ci / 100) boot_last_row <- list( - control_group = 'Delta2 Overall Test', - test_group = 'Delta2 Overall Test', + control_group = "Delta2 Overall Test", + test_group = "Delta2 Overall Test", bootstraps = list(as.vector(bootstrap_delta_delta)), nboots = length(bootstrap_delta_delta), bca_ci_low = bca_delta_delta[1], @@ -387,5 +417,5 @@ boot_delta_delta <- function(boot_result,ci) { ci = ci, difference = difference_delta_delta, weight = 1 - ) + ) } diff --git a/R/003_formulas.R b/R/003_formulas.R index 5dbed74..d60c810 100644 --- a/R/003_formulas.R +++ b/R/003_formulas.R @@ -1,58 +1,64 @@ # Formulas used -# +# # Contains functions `sigmoid`, `flipped_sig`, `calculate_group_variance`, `calculate_weighted_delta`, `confinterval` and `var_w_df`. sigmoid <- function(x_from, factor, y_from, y_to, smooth = 5.5, n = 300) { x <- seq(-smooth, smooth, length = n) y <- exp(x) / (exp(x) + 1) - out <- data.frame(x = (x + smooth) / (smooth * 2) * factor + x_from, - y = y * (y_to - y_from) + y_from) + out <- data.frame( + x = (x + smooth) / (smooth * 2) * factor + x_from, + y = y * (y_to - y_from) + y_from + ) } flipped_sig <- function(x_from, factor, y_from, y_to, smooth = 5.5, n = 300) { x <- seq(-smooth, smooth, length = n) y <- -exp(-x) / (exp(-x) + 1) - out <- data.frame(x = (x + smooth) / (smooth * 2) * factor + x_from, - y = y * (y_to - y_from) + y_to) + out <- data.frame( + x = (x + smooth) / (smooth * 2) * factor + x_from, + y = y * (y_to - y_from) + y_to + ) } -calculate_group_variance <- function(ctrl_var, +calculate_group_variance <- function(ctrl_var, ctrl_N, - test_var, + test_var, test_N) { - num <- (test_N-1)*test_var + (ctrl_N-1)*ctrl_var - denom <- test_N + ctrl_N -2 - return(num/denom) + num <- (test_N - 1) * test_var + (ctrl_N - 1) * ctrl_var + denom <- test_N + ctrl_N - 2 + return(num / denom) # return(ctrl_var/ctrl_N + test_var/test_N) } calculate_weighted_delta <- function(weight, differences) { denom <- sum(weight) - num <- sum(weight*differences) + num <- sum(weight * differences) return(num / denom) } calculate_delta_difference <- function(differences) { - delta_difference <- differences[2]-differences[1] + delta_difference <- differences[2] - differences[1] return(delta_difference) } -confinterval <- function(vector, ci_decimal){ +confinterval <- function(vector, ci_decimal) { sample_mean <- mean(vector) standard_error <- sd(vector) / sqrt(length(vector)) z <- stats::qnorm(1 - (1 - ci_decimal) / 2) - + margin_of_error <- z * standard_error - confidence_interval <- c(sample_mean - margin_of_error, - sample_mean + margin_of_error) - + confidence_interval <- c( + sample_mean - margin_of_error, + sample_mean + margin_of_error + ) + return(confidence_interval) } -var_w_df <- function(measurement, size){ +var_w_df <- function(measurement, size) { df <- size - 1 # Calculate the variance with specified degrees of freedom var <- (sum((measurement - mean(measurement))^2) / df) - + return(var) -} \ No newline at end of file +} diff --git a/R/003_pvalues_and_permutation_test_tools.R b/R/003_pvalues_and_permutation_test_tools.R index 438498c..cd86d4e 100644 --- a/R/003_pvalues_and_permutation_test_tools.R +++ b/R/003_pvalues_and_permutation_test_tools.R @@ -1,217 +1,233 @@ - -#Obtain permutation tests, permutations and p values +# Obtain permutation tests, permutations and p values PermutationTest <- function(control, test, effect_size, is_paired = NULL, - permutation_count=5000, + permutation_count = 5000, random_seed = 12345, ef_size_fn) { - # Check if the arrays have the same length for paired test if (!is.null(is_paired) && length(control) != length(test)) { stop("The two arrays do not have the same length.") } - + # Initialize random number generator set.seed(random_seed) - + # Set required constants and variables control <- as.numeric(control) test <- as.numeric(test) - + control_sample <- control test_sample <- test - + BAG <- c(control, test) CONTROL_LEN <- length(control) EXTREME_COUNT <- 0 THRESHOLD <- abs(ef_size_fn(control, test, is_paired)) permutations <- vector("numeric", length = permutation_count) permutations_var <- vector("numeric", length = permutation_count) - + for (i in 1:permutation_count) { - if (!is.null(is_paired)) { # Select which control-test pairs to swap. - random_idx <- sample(1:CONTROL_LEN, size = sample(1:CONTROL_LEN, 1), - replace = TRUE) - + random_idx <- sample(1:CONTROL_LEN, + size = sample(1:CONTROL_LEN, 1), + replace = TRUE + ) + # Perform swap. for (idx in random_idx) { placeholder <- control_sample[idx] control_sample[idx] <- test_sample[idx] test_sample[idx] <- placeholder } - } else { # Shuffle the bag and assign to control and test groups. shuffled <- sample(BAG) control_sample <- shuffled[1:CONTROL_LEN] test_sample <- shuffled[(CONTROL_LEN + 1):length(shuffled)] } - + es <- ef_size_fn(control_sample, test_sample, is_paired) - + control_var <- stats::var(control_sample, na.rm = TRUE) test_var <- stats::var(test_sample, na.rm = TRUE) control_N <- length(control_sample) test_N <- length(test_sample) var <- calculate_group_variance(control_var, control_N, test_var, test_N) - + permutations[i] <- es permutations_var[i] <- var - + if (abs(es) > THRESHOLD) { EXTREME_COUNT <- EXTREME_COUNT + 1 } } - + pvalue <- EXTREME_COUNT / permutation_count - + perm_results <- list( permutations = permutations, permutations_var = permutations_var, pvalue = pvalue, es = es ) - + return(perm_results) } # p values -pvals_statistics <- function(control, - test, +pvals_statistics <- function(control, + test, is_paired, proportional, - effect_size){ - + effect_size) { pvals_stats <- list() if (!is.null(is_paired) && !proportional) { # Wilcoxon test (non-parametric version of the paired T-test) wilcoxon <- stats::wilcox.test(control, test) pvalue_wilcoxon <- wilcoxon$p.value statistic_wilcoxon <- wilcoxon$statistic - + paired_t <- NA pvalue_paired_students_t <- NA - statistic_paired_students_t <-NA - + statistic_paired_students_t <- NA + if (effect_size != "median_diff") { # Paired Student's t-test paired_t <- stats::t.test(control, test, paired = TRUE, na.rm = TRUE) pvalue_paired_students_t <- paired_t$p.value statistic_paired_students_t <- paired_t$statistic } - pvals_stats <- list(pvalue_wilcoxon = pvalue_wilcoxon, - wilcoxon = wilcoxon, - statistic_wilcoxon = statistic_wilcoxon, - paired_t = paired_t, - pvalue_paired_students_t = pvalue_paired_students_t, - statistic_paired_students_t = statistic_paired_students_t) + pvals_stats <- list( + pvalue_wilcoxon = pvalue_wilcoxon, + wilcoxon = wilcoxon, + statistic_wilcoxon = statistic_wilcoxon, + paired_t = paired_t, + pvalue_paired_students_t = pvalue_paired_students_t, + statistic_paired_students_t = statistic_paired_students_t + ) } else if (!is.null(is_paired) && proportional) { # McNemar's test for binary paired data - table <- matrix(c(sum(control == 0 & test == 0), sum(control == 0 & test == 1), - sum(control == 1 & test == 0), sum(control == 1 & test == 1)), - nrow = 2, byrow = TRUE) + table <- matrix( + c( + sum(control == 0 & test == 0), sum(control == 0 & test == 1), + sum(control == 1 & test == 0), sum(control == 1 & test == 1) + ), + nrow = 2, byrow = TRUE + ) mcnemar_result <- stats::mcnemar.test(table, correct = TRUE) pvalue_mcnemar <- mcnemar_result$p.value statistic_mcnemar <- mcnemar_result$statistic - pvals_stats <- list(pvalue_mcnemar = pvalue_mcnemar, - statistic_mcnemar = statistic_mcnemar) + pvals_stats <- list( + pvalue_mcnemar = pvalue_mcnemar, + statistic_mcnemar = statistic_mcnemar + ) } else if (effect_size == "cliffs_delta") { # Brunner-Munzel test brunner_munzel <- brunnermunzel::brunnermunzel.test(control, test, na.rm = TRUE) pvalue_brunner_munzel <- brunner_munzel$p.value statistic_brunner_munzel <- brunner_munzel$statistic - pvals_stats <- list(pvalue_brunner_munzel = pvalue_brunner_munzel, - statistic_brunner_munzel = statistic_brunner_munzel) + pvals_stats <- list( + pvalue_brunner_munzel = pvalue_brunner_munzel, + statistic_brunner_munzel = statistic_brunner_munzel + ) } else if (effect_size == "median_diff") { # Kruskal-Wallis H-test kruskal <- stats::kruskal.test(list(control, test)) pvalue_kruskal <- kruskal$p.value statistic_kruskal <- kruskal$statistic - pvals_stats <- list(pvalue_kruskal = pvalue_kruskal, - statistic_kruskal = statistic_kruskal) + pvals_stats <- list( + pvalue_kruskal = pvalue_kruskal, + statistic_kruskal = statistic_kruskal + ) } else { # For mean difference, Cohen's d, and Hedges' g # Welch's t-test (equal_var = FALSE) to not assume equal variances welch <- stats::t.test(control, test, equal.var = FALSE, na.rm = TRUE) pvalue_welch <- welch$p.value statistic_welch <- welch$statistic - + # Student's t-test (equal_var = TRUE) to assume equal variances students_t <- stats::t.test(control, test, equal.var = TRUE, na.rm = TRUE) pvalue_students_t <- students_t$p.value statistic_students_t <- students_t$statistic - + # Mann-Whitney test: non-parametric, does not assume normality of distributions - tryCatch({ - mann_whitney <- stats::wilcox.test(control, test, alternative = "two.sided") - pvalue_mann_whitney <- mann_whitney$p.value - statistic_mann_whitney <- mann_whitney$statistic - }, error = function(e) { - # Occurs when the control and test are exactly identical in terms of rank (e.g., all zeros). - pvalue_mann_whitney <- NA - statistic_mann_whitney <- NA - }) - + tryCatch( + { + mann_whitney <- stats::wilcox.test(control, test, alternative = "two.sided") + pvalue_mann_whitney <- mann_whitney$p.value + statistic_mann_whitney <- mann_whitney$statistic + }, + error = function(e) { + # Occurs when the control and test are exactly identical in terms of rank (e.g., all zeros). + pvalue_mann_whitney <- NA + statistic_mann_whitney <- NA + } + ) + standardized_es <- effsize::cohen.d(control, test, is_paired = NULL) - + # Cohen's h calculation for binary categorical data - tryCatch({ - cohens_h_cal <- function(control, test) { - #remove nas and nulls later on - prop_control <- mean(control) - prop_test <- mean(test) - - # Arcsine transformation - phi_control <- 2 * asin(sqrt(prop_control)) - phi_test <- 2 * asin(sqrt(prop_test)) - result <- phi_test - phi_control - return(result) + tryCatch( + { + cohens_h_cal <- function(control, test) { + # remove nas and nulls later on + prop_control <- mean(control) + prop_test <- mean(test) + + # Arcsine transformation + phi_control <- 2 * asin(sqrt(prop_control)) + phi_test <- 2 * asin(sqrt(prop_test)) + result <- phi_test - phi_control + return(result) + } + proportional_difference <- cohens_h_cal(control, test) + }, + error = function(e) { + # Occur only when the data consists not only 0's and 1's. + proportional_difference <- NA } - proportional_difference <- cohens_h_cal(control, test) - }, error = function(e) { - # Occur only when the data consists not only 0's and 1's. - proportional_difference <- NA - }) - - pvals_stats <- list(pvalue_welch = pvalue_welch, - statistic_welch = statistic_welch, - # Student's t-test (equal_var = TRUE) to assume equal variances - students_t = students_t, - pvalue_students_t = pvalue_students_t, - statistic_students_t = statistic_students_t, - # Mann-Whitney test: non-parametric, does not assume normality of distributions - - pvalue_mann_whitney = pvalue_mann_whitney, - statistic_mann_whitney = statistic_mann_whitney) + ) + + pvals_stats <- list( + pvalue_welch = pvalue_welch, + statistic_welch = statistic_welch, + # Student's t-test (equal_var = TRUE) to assume equal variances + students_t = students_t, + pvalue_students_t = pvalue_students_t, + statistic_students_t = statistic_students_t, + # Mann-Whitney test: non-parametric, does not assume normality of distributions + + pvalue_mann_whitney = pvalue_mann_whitney, + statistic_mann_whitney = statistic_mann_whitney + ) } - - return (pvals_stats) + + return(pvals_stats) } # collate permtest and p values with function "Pvalues_statistics" Pvalues_statistics <- function(dabest_object, seed = 12345, - perm_count=5000, + perm_count = 5000, ef_size_fn = NULL, effect_size_type) { - permtest_pvals <- tibble::tibble() - + # check if effect size function is supplied - if (is.null(ef_size_fn)){ + if (is.null(ef_size_fn)) { stop("No effect size calculation methods are supplied.") } - + raw_data <- dabest_object$raw_data idx <- dabest_object$idx - + if (isFALSE(is.list(idx))) { idx <- list(idx) } @@ -220,55 +236,57 @@ Pvalues_statistics <- function(dabest_object, ci <- dabest_object$ci paired <- dabest_object$paired is_paired <- dabest_object$is_paired - + proportional <- dabest_object$proportional - + quoname_x <- rlang::as_name(enquo_x) quoname_y <- rlang::as_name(enquo_y) - + minimeta <- dabest_object$minimeta delta2 <- dabest_object$delta2 - + if (isFALSE(is_paired) || isTRUE(paired == "baseline")) { for (group in idx) { control_group <- group[1] group_length <- length(group) - - ctrl_tibble <- raw_data %>% + + ctrl_tibble <- raw_data %>% dplyr::filter(!!enquo_x == !!group[1]) ctrl_measurement <- ctrl_tibble[[quoname_y]] - + tests <- group[2:group_length] - + for (test_group in tests) { test_group <- test_group test_tibble <- raw_data %>% dplyr::filter(!!enquo_x == !!test_group) - + test_measurement <- test_tibble[[quoname_y]] - - xlabels <- paste(test_group, group[1], sep="\nminus\n") - + + xlabels <- paste(test_group, group[1], sep = "\nminus\n") + test_size <- length(test_measurement) - + es <- ef_size_fn(ctrl_measurement, test_measurement, paired = is_paired) - + # do permutation tests accordingly - PermutationTest_result <- PermutationTest(ctrl_measurement, - test_measurement, - effect_size = effect_size_type, - is_paired = is_paired, - permutation_count = perm_count, - random_seed = 12345, - ef_size_fn = ef_size_fn) - - # calculate p values - pvals_and_stats <- pvals_statistics(ctrl_measurement, - test_measurement, - is_paired = is_paired, - proportional = proportional, - effect_size = effect_size_type) - + PermutationTest_result <- PermutationTest(ctrl_measurement, + test_measurement, + effect_size = effect_size_type, + is_paired = is_paired, + permutation_count = perm_count, + random_seed = 12345, + ef_size_fn = ef_size_fn + ) + + # calculate p values + pvals_and_stats <- pvals_statistics(ctrl_measurement, + test_measurement, + is_paired = is_paired, + proportional = proportional, + effect_size = effect_size_type + ) + pval_row <- list( control_group = control_group, test_group = test_group, @@ -278,57 +296,63 @@ Pvalues_statistics <- function(dabest_object, pval_for_tests = pvals_and_stats[1], pvalues = list(pvals_and_stats) ) - + permtest_pvals <- dplyr::bind_rows(permtest_pvals, pval_row) } } } else { for (group in idx) { group_length <- length(group) - for (i in 1:(group_length-1)) { + for (i in 1:(group_length - 1)) { control_group <- group[i] - test_group <- group[i+1] - - ctrl_tibble <- raw_data %>% + test_group <- group[i + 1] + + ctrl_tibble <- raw_data %>% dplyr::filter(!!enquo_x == !!control_group) ctrl_measurement <- ctrl_tibble[[quoname_y]] - - test_tibble <- raw_data %>% + + test_tibble <- raw_data %>% dplyr::filter(!!enquo_x == !!test_group) test_measurement <- test_tibble[[quoname_y]] - - xlabels <- paste(test_group, control_group, sep="\nminus\n") - - control_test_measurement <- list(control = ctrl_measurement, - test = test_measurement) - #add weights column + + xlabels <- paste(test_group, control_group, sep = "\nminus\n") + + control_test_measurement <- list( + control = ctrl_measurement, + test = test_measurement + ) + # add weights column ctrl_size <- length(ctrl_measurement) ctrl_var <- var_w_df(ctrl_measurement, ctrl_size) test_size <- length(test_measurement) test_var <- var_w_df(test_measurement, test_size) - grp_var <- calculate_group_variance(ctrl_var = ctrl_var, - ctrl_N = ctrl_size, - test_var = test_var, - test_N = test_size) - + grp_var <- calculate_group_variance( + ctrl_var = ctrl_var, + ctrl_N = ctrl_size, + test_var = test_var, + test_N = test_size + ) + es <- ef_size_fn(ctrl_measurement, test_measurement, paired) - + # do permutation tests accordingly - PermutationTest_result <- PermutationTest(ctrl_measurement, - test_measurement, - effect_size = effect_size_type, - is_paired = is_paired, - permutation_count = perm_count, - random_seed = 12345, - ef_size_fn = ef_size_fn) - # calculate p values - pvals_and_stats <- pvals_statistics(ctrl_measurement, - test_measurement, - is_paired = is_paired, - proportional = proportional, - effect_size = effect_size_type) - - + PermutationTest_result <- PermutationTest(ctrl_measurement, + test_measurement, + effect_size = effect_size_type, + is_paired = is_paired, + permutation_count = perm_count, + random_seed = 12345, + ef_size_fn = ef_size_fn + ) + # calculate p values + pvals_and_stats <- pvals_statistics(ctrl_measurement, + test_measurement, + is_paired = is_paired, + proportional = proportional, + effect_size = effect_size_type + ) + + pval_row <- list( control_group = control_group, test_group = test_group, @@ -338,11 +362,11 @@ Pvalues_statistics <- function(dabest_object, pval_for_tests = pvals_and_stats[1], pvalues = list(pvals_and_stats) ) - + permtest_pvals <- dplyr::bind_rows(permtest_pvals, pval_row) } } } - + return(list(permtest_pvals = permtest_pvals)) } diff --git a/R/004_slopegraph.R b/R/004_slopegraph.R index ec73064..23dc053 100644 --- a/R/004_slopegraph.R +++ b/R/004_slopegraph.R @@ -1,55 +1,57 @@ # Functions that generate main_plots based on the main_plot_type of `create_rawplot_components`. -# +# # Contains function `plot_slopegraph`. # Function that plots slopegraph plot_slopegraph <- function(dabest_effectsize_obj, plot_kwargs) { raw_data <- dabest_effectsize_obj$raw_data - + raw_marker_size <- plot_kwargs$raw_marker_size raw_marker_alpha <- plot_kwargs$raw_marker_alpha - - enquo_x = dabest_effectsize_obj$enquo_x - enquo_y = dabest_effectsize_obj$enquo_y - enquo_id_col = dabest_effectsize_obj$enquo_id_col - enquo_colour = dabest_effectsize_obj$enquo_colour - + + enquo_x <- dabest_effectsize_obj$enquo_x + enquo_y <- dabest_effectsize_obj$enquo_y + enquo_id_col <- dabest_effectsize_obj$enquo_id_col + enquo_colour <- dabest_effectsize_obj$enquo_colour + name_x <- rlang::as_name(enquo_x) name_y <- rlang::as_name(enquo_y) - - idx = dabest_effectsize_obj$idx - + + idx <- dabest_effectsize_obj$idx + raw_plot <- ggplot2::ggplot() slopegraph_params <- list(linewidth = raw_marker_size, alpha = raw_marker_alpha) - - for(subplot_groups in idx) { + + for (subplot_groups in idx) { # Assign subplot. subplot <- dplyr::filter(raw_data, !!enquo_x %in% subplot_groups) - + subplot[[name_x]] <- subplot[[name_x]] %>% factor(subplot_groups, ordered = TRUE) - + slopegraph_params[["data"]] <- subplot - + # Assign aesthetic mappings. - if(rlang::quo_is_null(enquo_colour)) { + if (rlang::quo_is_null(enquo_colour)) { slopegraph_aes <- ggplot2::aes(x_axis_raw, !!enquo_y, - group = !!enquo_id_col) + group = !!enquo_id_col + ) } else { slopegraph_aes <- ggplot2::aes(x_axis_raw, !!enquo_y, - group = !!enquo_id_col, - col = !!enquo_colour) + group = !!enquo_id_col, + col = !!enquo_colour + ) } - + slopegraph_params[["mapping"]] <- slopegraph_aes - + # Create slopegraph raw_plot <- raw_plot + do.call(ggplot2::geom_line, slopegraph_params) } - + return(raw_plot) -} \ No newline at end of file +} diff --git a/R/005_printing.R b/R/005_printing.R index 5258e94..f635188 100644 --- a/R/005_printing.R +++ b/R/005_printing.R @@ -1,64 +1,71 @@ print_greeting_header <- function() { - dabest_ver <- utils::packageVersion("dabestr") - line1 <- paste("DABESTR v",dabest_ver,sep="") - - now = Sys.time() - - now_hour <- as.integer(format(now,"%H")) - + line1 <- paste("DABESTR v", dabest_ver, sep = "") + + now <- Sys.time() + + now_hour <- as.integer(format(now, "%H")) + if (now_hour < 12) { greeting <- "Good morning!" - } else if (now_hour >=12 & now_hour <18) { + } else if (now_hour >= 12 & now_hour < 18) { greeting <- "Good afternoon!" } else { greeting <- "Good evening!" } - - curr_time <- paste("The current time is", format(now,"%R %p on %A %B %d, %Y.")) - + + curr_time <- paste("The current time is", format(now, "%R %p on %A %B %d, %Y.")) + cat(line1) cat("\n") - cat(rep("=",nchar(line1)),sep="") + cat(rep("=", nchar(line1)), sep = "") cat("\n\n") - cat(greeting,curr_time,sep="\n") + cat(greeting, curr_time, sep = "\n") cat("\n") } print_each_comparism <- function(dabest_object) { - i <- 1 if (is.list(dabest_object$idx)) { - for (group in dabest_object$idx) { # Get test groups (everything else in group), loop through them and compute # the difference between group[1] and each group. # Test groups are the 2nd element of group onwards. - control_group <- group[1] - test_groups <- group[2: length(group)] - - for (current_test_group in test_groups) { - cat(stringr::str_interp("${i}. ${current_test_group} minus ${control_group}\n")) - i <- i + 1 + test_groups <- group[2:length(group)] + + if (is.null(dabest_object$paired) || dabest_object$paired == "baseline") { + control_group <- group[1] + test_groups <- group[2:length(group)] + for (current_test_group in test_groups) { + cat(stringr::str_interp("${i}. ${current_test_group} minus ${control_group}\n")) + i <- i + 1 + } + } else { + for (n in 1:(length(group) - 1)) { + current_group <- group[n + 1] + previous_group <- group[n] + cat(stringr::str_interp("${i}. ${current_group} minus ${previous_group}\n")) + i <- i + 1 + } } } - + if (isTRUE(dabest_object$minimeta)) { cat(stringr::str_interp("${i}. weighted delta (only for mean difference)\n")) - i <- i+1 + i <- i + 1 } - + if (isTRUE(dabest_object$delta2)) { - experiment1 <- dabest_object$experiment[1] - experiment2 <- dabest_object$experiment[2] - + experiment1 <- dabest_object$experiment_label[2] + experiment2 <- dabest_object$experiment_label[1] + cat(stringr::str_interp("${i}. ${experiment1} minus ${experiment2} (only for mean difference)\n")) } } else { control_group <- dabest_object$idx[1] test_groups <- dabest_object$idx[2:length(dabest_object$idx)] - + for (current_test_group in test_groups) { cat(stringr::str_interp(" ${i}. ${current_test_group} minus ${control_group}\n")) i <- i + 1 @@ -67,82 +74,101 @@ print_each_comparism <- function(dabest_object) { cat("\n") } -print_each_comparism_effectsize <- function(dabest_object,effectsize) { - - if (effectsize=="mean_diff") { +print_each_comparism_effectsize <- function(dabest_object, effectsize) { + if (effectsize == "mean_diff") { es <- "mean difference" - } else if (effectsize =="median_diff") { + } else if (effectsize == "median_diff") { es <- "median difference" } else if (effectsize == "cohens_d") { es <- "Cohen's d" - } else if (effectsize =="hedges_g") { + } else if (effectsize == "hedges_g") { es <- "Hedges'g" } else if (effectsize == "cliffs_delta") { es <- "Cliff's delta" } else { es <- "Cohen's h" } - + i <- 1 paired <- dabest_object$paired - difference <- round(dabest_object$boot_result$difference,3) - bca_low <- round(dabest_object$boot_result$bca_ci_low,3) - bca_high <- round(dabest_object$boot_result$bca_ci_high,3) + difference <- round(dabest_object$boot_result$difference, 3) + bca_low <- round(dabest_object$boot_result$bca_ci_low, 3) + bca_high <- round(dabest_object$boot_result$bca_ci_high, 3) ci <- dabest_object$boot_result$ci - pvalue <- dabest_object$permtest_pvals$pval_permtest - + pvalue <- dabest_object$permtest_pvals$pval_for_tests + if (is.null(paired)) { rm_status <- "" - } else if (paired =="sequential") { + } else if (paired == "sequential") { rm_status <- "for the sequential design of repeated-measures experiment \n" - } else if (paired=="baseline") { + } else if (paired == "baseline") { rm_status <- "for repeated measures against baseline \n" } - + if (is.null(paired)) { paired_status <- "unpaired" - } else if (paired =="sequential") { + } else if (paired == "sequential") { paired_status <- "paired" - } else if (paired =="baseline") { + } else if (paired == "baseline") { paired_status <- "paired" } - + if (is.list(dabest_object$idx)) { - for (group in dabest_object$idx) { # Get test groups (everything else in group), loop through them and compute # the difference between group[1] and each group. # Test groups are the 2nd element of group onwards. - + control_group <- group[1] - test_groups <- group[2: length(group)] - - for (current_test_group in test_groups) { - current_paired <- paired[i] - current_difference <- difference[i] - current_bca_low <- bca_low[i] - current_bca_high <- bca_high[i] - current_ci <- ci[i] - - cat(stringr::str_interp("The ${paired_status} ${es} between ${current_test_group} and ${control_group} is ${current_difference} [${current_ci}%CI ${current_bca_low}, ${current_bca_high}].\n")) - cat(stringr::str_interp("The p-value of the two-sided permutation t-test is ${pvalue}, calculated for legacy purposes only.")) - cat("\n\n") - i <- i+1 + test_groups <- group[2:length(group)] + + if (is.null(dabest_object$paired) || dabest_object$paired == "baseline") { + control_group <- group[1] + test_groups <- group[2:length(group)] + for (current_test_group in test_groups) { + current_paired <- paired[i] + current_difference <- difference[i] + current_bca_low <- bca_low[i] + current_bca_high <- bca_high[i] + current_ci <- ci[i] + current_pval <- pvalue[i] + + cat(stringr::str_interp("The ${paired_status} ${es} between ${current_test_group} and ${control_group} is ${current_difference} [${current_ci}%CI ${current_bca_low}, ${current_bca_high}].\n")) + cat(stringr::str_interp("The p-value of the two-sided permutation t-test is ${sprintf(current_pval, fmt = '%#.4f')}, calculated for legacy purposes only.")) + cat("\n\n") + i <- i + 1 + } + } else { + for (n in 1:(length(group) - 1)) { + current_group <- group[n + 1] + previous_group <- group[n] + current_paired <- paired[i] + current_difference <- difference[i] + current_bca_low <- bca_low[i] + current_bca_high <- bca_high[i] + current_ci <- ci[i] + current_pval <- pvalue[i] + + cat(stringr::str_interp("The ${paired_status} ${es} between ${current_group} and ${previous_group} is ${current_difference} [${current_ci}%CI ${current_bca_low}, ${current_bca_high}].\n")) + cat(stringr::str_interp("The p-value of the two-sided permutation t-test is ${sprintf(current_pval, fmt = '%#.4f')}, calculated for legacy purposes only.")) + cat("\n\n") + i <- i + 1 + } } } } else { control_group <- dabest_object$idx[1] test_groups <- dabest_object$idx[2:length(dabest_object$idx)] - + for (current_test_group in test_groups) { cat(stringr::str_interp("The ${paired_status} ${es} between ${current_test_group} and ${control_group} is ${difference} [${ci}%CI ${bca_low}, ${bca_high}].\n")) - cat(stringr::str_interp("The p-value of the two-sided permutation t-test is ${pvalue}, calculated for legacy purposes only.\n")) + cat(stringr::str_interp("The p-value of the two-sided permutation t-test is ${sprintf(current_pval, fmt = '%#.4f')}, calculated for legacy purposes only.\n")) } } } print_ending <- function(dabest_object) { - if (class(dabest_object)=="dabest") { + if (methods::is(dabest_object, "dabest")) { nboots <- dabest_object$resamples cat(stringr::str_interp("${nboots} resamples will be used to generate the effect size bootstraps.\n\n")) } else { @@ -153,7 +179,5 @@ print_ending <- function(dabest_object) { cat("assuming the null hypothesis of zero difference is true.\n") cat(stringr::str_interp("For each p-value, ${nreshuffles} reshuffles of the control and test labels were performed.\n")) cat("\n") - cat("To get the results of all valid statistical tests, use .mean_diff.statistical_tests\n\n") } } - diff --git a/R/999_plot_kwargs.R b/R/999_plot_kwargs.R index a9f8e59..459b12a 100644 --- a/R/999_plot_kwargs.R +++ b/R/999_plot_kwargs.R @@ -1,27 +1,27 @@ # Helper functions that deal with assignment of plot_kwargs for plot -# +# # Contains function `assign_plot_kwargs`. #' Adjustable Plot Aesthetics #' @name plot_kwargs -#' -#' @description +#' +#' @description #' These are the available plot kwargs for adjusting the plot aesthetics of your #' estimation plot: -#' -#' - `swarm_label` Default "value" or "proportion of success" for proportion plots. +#' +#' - `swarm_label` Default "value" or "proportion of success" for proportion plots. #' Label for the y-axis of the swarm plot. -#' - `contrast_label` Default "effect size", based on the effect sizes as given in [effect_size()]. +#' - `contrast_label` Default "effect size", based on the effect sizes as given in [effect_size()]. #' Label for the y-axis of the contrast plot. -#' - `delta2_label` Default NULL. Label for the y-label for the delta-delta plot. +#' - `delta2_label` Default NULL. Label for the y-label for the delta-delta plot. #' - `swarm_x_text` Default 11. Numeric value determining the font size of the x-axis of the swarm plot. -#' - `swarm_y_text` Default 15. Numeric value determining the font size of the y-axis of the swarm plot. +#' - `swarm_y_text` Default 15. Numeric value determining the font size of the y-axis of the swarm plot. #' - `contrast_x_text` Default 11. Numeric value determining the font size of the x-axis of the delta plot. #' - `contrast_y_text` Default 15. Numeric value determining the font size of the y-axis of the delta plot. #' - `swarm_ylim` Default NULL. Vector containing the y limits for the swarm plot #' - `contrast_ylim` Default NULL. Vector containing the y limits for the delta plot. #' - `delta2_ylim` Default NULL. Vector containing the y limits for the delta-delta plot. -#' - `raw_marker_size` Default 1.5. Numeric value determining the size of the points used in the swarm plot. +#' - `raw_marker_size` Default 1.5. Numeric value determining the size of the points used in the swarm plot. #' - `tufte_size` Default 0.8. Numeric value determining the size of the tufte line in the swarm plot. #' - `es_marker_size` Default 0.5. Numeric value determining the size of the points used in the delta plot. #' - `es_line_size` Default 0.8. Numeric value determining the size of the ci line in the delta plot. @@ -32,139 +32,139 @@ #' direction of the `asymmetric_side`. #' - `asymmetric_side` Default "right". Can be either "right" or "left". Controls which side the swarm points are shown. #' - `show_delta2` Default FALSE. Boolean value determining if the delta-delta plot is shown. -#' - `show_mini_meta` Default FALSE. Boolean value determining if the weighted average plot is shown. +#' - `show_mini_meta` Default FALSE. Boolean value determining if the weighted average plot is shown. #' If False, the resulting graph would be identical to a multiple two-groups plot. -#' - `show_zero_dot` Default TRUE. Boolean value determining if there is a dot on +#' - `show_zero_dot` Default TRUE. Boolean value determining if there is a dot on #' the zero line of the effect size for the control-control group. #' - `show_baseline_ec` Default FALSE. Boolean value determining whether the baseline curve is shown. #' - `sankey` Default TRUE. Boolean value determining if the flows between the bar charts will be plotted. -#' - `sankey_alpha` Default 0.5. Numeric value determining the transparency of the sankey flows in a +#' - `raw_flow_alpha` Default 0.5. Numeric value determining the transparency of the sankey flows in a #' paired proportion plot. #' - `flow` Default TRUE. Boolean value determining whether the bars will be plotted in pairs. #' - `custom_palette` Default "d3". String. The following palettes are available for use: #' npg, aaas, nejm, lancet, jama, jco, ucscgb, d3, locuszoom, igv, cosmic, uchicago, brewer, ordinal, viridis_d. -#' -#' +#' +#' NULL assign_plot_kwargs <- function(dabest_effectsize_obj, plot_kwargs) { custom_palette <- "d3" - + swarm_label <- dabest_effectsize_obj$raw_y_labels contrast_label <- dabest_effectsize_obj$delta_y_labels delta2_label <- NULL - + swarm_ylim <- NULL contrast_ylim <- NULL delta2_ylim <- NULL - + show_delta2 <- dabest_effectsize_obj$delta2 show_mini_meta <- dabest_effectsize_obj$minimeta - + asymmetric_side <- "right" raw_marker_size <- 1.5 raw_marker_alpha <- 1 raw_marker_spread <- 2 raw_marker_side_shift <- 0 + raw_flow_alpha <- 0.5 raw_bar_width <- 0.3 tufte_size <- 0.8 es_marker_size <- 0.5 es_line_size <- 0.8 - + swarm_y_text <- 15 swarm_x_text <- 11 contrast_y_text <- 15 contrast_x_text <- 11 - + show_zero_dot <- TRUE show_baseline_ec <- FALSE - + sankey <- TRUE flow <- TRUE - sankey_alpha <- 0.5 - - if(isFALSE(is.null(plot_kwargs$swarm_label))) { + + if (isFALSE(is.null(plot_kwargs$swarm_label))) { swarm_label <- plot_kwargs$swarm_label } - if(isFALSE(is.null(plot_kwargs$contrast_label))) { + if (isFALSE(is.null(plot_kwargs$contrast_label))) { contrast_label <- plot_kwargs$contrast_label } - if(isFALSE(is.null(plot_kwargs$custom_palette))) { + if (isFALSE(is.null(plot_kwargs$custom_palette))) { custom_palette <- plot_kwargs$custom_palette } - if(isFALSE(is.null(plot_kwargs$swarm_ylim))) { + if (isFALSE(is.null(plot_kwargs$swarm_ylim))) { swarm_ylim <- plot_kwargs$swarm_ylim } - if(isFALSE(is.null(plot_kwargs$contrast_ylim))) { + if (isFALSE(is.null(plot_kwargs$contrast_ylim))) { contrast_ylim <- plot_kwargs$contrast_ylim } - if(isFALSE(is.null(plot_kwargs$delta2_ylim))) { + if (isFALSE(is.null(plot_kwargs$delta2_ylim))) { delta2_ylim <- plot_kwargs$delta2_ylim } - if(isFALSE(is.null(plot_kwargs$delta2_label))) { + if (isFALSE(is.null(plot_kwargs$delta2_label))) { delta2_label <- plot_kwargs$delta2_label } - if(isFALSE(is.null(plot_kwargs$show_delta2))) { + if (isFALSE(is.null(plot_kwargs$show_delta2))) { show_delta2 <- plot_kwargs$show_delta2 } - if(isFALSE(is.null(plot_kwargs$show_mini_meta))) { + if (isFALSE(is.null(plot_kwargs$show_mini_meta))) { show_mini_meta <- plot_kwargs$show_mini_meta } - if(isFALSE(is.null(plot_kwargs$raw_marker_size))) { + if (isFALSE(is.null(plot_kwargs$raw_marker_size))) { raw_marker_size <- plot_kwargs$raw_marker_size } - if(isFALSE(is.null(plot_kwargs$raw_marker_alpha))) { + if (isFALSE(is.null(plot_kwargs$raw_marker_alpha))) { raw_marker_alpha <- plot_kwargs$raw_marker_alpha } - if(isFALSE(is.null(plot_kwargs$raw_marker_side_shift))) { + if (isFALSE(is.null(plot_kwargs$raw_marker_side_shift))) { raw_marker_side_shift <- plot_kwargs$raw_marker_side_shift } - if(isFALSE(is.null(plot_kwargs$tufte_size))) { + if (isFALSE(is.null(plot_kwargs$tufte_size))) { tufte_size <- plot_kwargs$tufte_size } - if(isFALSE(is.null(plot_kwargs$es_marker_size))) { + if (isFALSE(is.null(plot_kwargs$es_marker_size))) { es_marker_size <- plot_kwargs$es_marker_size } - if(isFALSE(is.null(plot_kwargs$es_line_size))) { + if (isFALSE(is.null(plot_kwargs$es_line_size))) { es_line_size <- plot_kwargs$es_line_size } - if(isFALSE(is.null(plot_kwargs$raw_bar_width))) { + if (isFALSE(is.null(plot_kwargs$raw_bar_width))) { raw_bar_width <- plot_kwargs$raw_bar_width } - if(isFALSE(is.null(plot_kwargs$raw_marker_spread))) { + if (isFALSE(is.null(plot_kwargs$raw_marker_spread))) { raw_marker_spread <- plot_kwargs$raw_marker_spread } - if(isFALSE(is.null(plot_kwargs$sankey))) { + if (isFALSE(is.null(plot_kwargs$sankey))) { sankey <- plot_kwargs$sankey } - if(isFALSE(is.null(plot_kwargs$flow))) { + if (isFALSE(is.null(plot_kwargs$flow))) { flow <- plot_kwargs$flow } - if(isFALSE(is.null(plot_kwargs$sankey_alpha))) { - sankey_alpha <- plot_kwargs$sankey_alpha + if (isFALSE(is.null(plot_kwargs$raw_flow_alpha))) { + raw_flow_alpha <- plot_kwargs$raw_flow_alpha } - if(isFALSE(is.null(plot_kwargs$swarm_y_text))) { + if (isFALSE(is.null(plot_kwargs$swarm_y_text))) { swarm_y_text <- plot_kwargs$swarm_y_text } - if(isFALSE(is.null(plot_kwargs$swarm_x_text))) { + if (isFALSE(is.null(plot_kwargs$swarm_x_text))) { swarm_x_text <- plot_kwargs$swarm_x_text } - if(isFALSE(is.null(plot_kwargs$contrast_y_text))) { + if (isFALSE(is.null(plot_kwargs$contrast_y_text))) { contrast_y_text <- plot_kwargs$contrast_y_text } - if(isFALSE(is.null(plot_kwargs$contrast_x_text))) { + if (isFALSE(is.null(plot_kwargs$contrast_x_text))) { contrast_x_text <- plot_kwargs$contrast_x_text } - if(isFALSE(is.null(plot_kwargs$show_zero_dot))) { + if (isFALSE(is.null(plot_kwargs$show_zero_dot))) { show_zero_dot <- plot_kwargs$show_zero_dot } - if(isFALSE(is.null(plot_kwargs$show_baseline_ec))) { + if (isFALSE(is.null(plot_kwargs$show_baseline_ec))) { show_baseline_ec <- plot_kwargs$show_baseline_ec } - if(isFALSE(is.null(plot_kwargs$asymmetric_side))) { + if (isFALSE(is.null(plot_kwargs$asymmetric_side))) { asymmetric_side <- plot_kwargs$asymmetric_side } - + return(list( swarm_label = swarm_label, contrast_label = contrast_label, @@ -184,9 +184,9 @@ assign_plot_kwargs <- function(dabest_effectsize_obj, plot_kwargs) { es_marker_size = es_marker_size, es_line_size = es_line_size, sankey = sankey, - flow = flow, - sankey_alpha = sankey_alpha, - swarm_y_text = swarm_y_text, + flow = flow, + raw_flow_alpha = raw_flow_alpha, + swarm_y_text = swarm_y_text, swarm_x_text = swarm_x_text, contrast_y_text = contrast_y_text, contrast_x_text = contrast_x_text, diff --git a/R/999_plot_palettes.R b/R/999_plot_palettes.R index b1b9b96..5d17c4a 100644 --- a/R/999_plot_palettes.R +++ b/R/999_plot_palettes.R @@ -1,57 +1,41 @@ # Helper functions that deal with assignment of colour palettes for the overall plots -# +# # Contains function `apply_palette`. # Applies palettes to objects apply_palette <- function(ggplot_object, palette_name) { - ggplot_object <- switch( - palette_name, - + ggplot_object <- switch(palette_name, "npg" = ggplot_object + ggsci::scale_color_npg() + ggsci::scale_fill_npg(), - - "aaas" = + "aaas" = ggplot_object + ggsci::scale_color_aaas() + ggsci::scale_fill_aaas(), - - "nejm" = + "nejm" = ggplot_object + ggsci::scale_color_nejm() + ggsci::scale_fill_nejm(), - - "lancet" = + "lancet" = ggplot_object + ggsci::scale_color_lancet() + ggsci::scale_fill_lancet(), - - "jama" = + "jama" = ggplot_object + ggsci::scale_color_jama() + ggsci::scale_fill_jama(), - - "jco" = + "jco" = ggplot_object + ggsci::scale_color_jco() + ggsci::scale_fill_jco(), - - "ucscgb" = + "ucscgb" = ggplot_object + ggsci::scale_color_ucscgb() + ggsci::scale_fill_ucscgb(), - - "d3" = + "d3" = ggplot_object + ggsci::scale_color_d3() + ggsci::scale_fill_d3(), - - "locuszoom" = + "locuszoom" = ggplot_object + ggsci::scale_color_locuszoom() + ggsci::scale_fill_locuszoom(), - - "igv" = + "igv" = ggplot_object + ggsci::scale_color_igv() + ggsci::scale_fill_igv(), - - "cosmic" = + "cosmic" = ggplot_object + ggsci::scale_color_cosmic() + ggsci::scale_fill_cosmic(), - - "uchicago" = + "uchicago" = ggplot_object + ggsci::scale_color_uchicago() + ggsci::scale_fill_uchicago(), - - "brewer" = + "brewer" = ggplot_object + ggplot2::scale_color_brewer() + ggplot2::scale_fill_brewer(), - - "ordinal" = + "ordinal" = ggplot_object + ggplot2::scale_color_ordinal() + ggplot2::scale_fill_ordinal(), - - "viridis_d" = + "viridis_d" = ggplot_object + ggplot2::scale_color_viridis_d() + ggplot2::scale_fill_viridis_d() ) - + return(ggplot_object) -} \ No newline at end of file +} diff --git a/README.Rmd b/README.Rmd index 2a77b3b..4957712 100644 --- a/README.Rmd +++ b/README.Rmd @@ -15,7 +15,7 @@ knitr::opts_chunk$set( # dabestr -[![Travis CI build status](https://img.shields.io/travis/com/ACCLAB/dabestr/master.svg)](https://travis-ci.com/ACCLAB/dabestr/) [![minimal R version](https://img.shields.io/badge/R%3E%3D-2.10-6666ff.svg)](https://cran.r-project.org/) [![CRAN Download Count](https://cranlogs.r-pkg.org/badges/grand-total/dabestr?color=brightgreen)](https://cran.r-project.org/package=dabestr) [![Free-to-view citation](https://zenodo.org/badge/DOI/10.1038/s41592-019-0470-3.svg)](https://rdcu.be/bHhJ4) [![License](https://img.shields.io/badge/License-Apache_2.0-orange.svg)](https://spdx.org/licenses/BSD-3-Clause-Clear.html) +[![minimal R version](https://img.shields.io/badge/R%3E%3D-2.10-6666ff.svg)](https://cran.r-project.org/) [![CRAN Download Count](https://cranlogs.r-pkg.org/badges/grand-total/dabestr?color=brightgreen)](https://cran.r-project.org/package=dabestr) [![Free-to-view citation](https://zenodo.org/badge/DOI/10.1038/s41592-019-0470-3.svg)](https://www.nature.com/articles/s41592-019-0470-3.epdf?author_access_token=Euy6APITxsYA3huBKOFBvNRgN0jAjWel9jnR3ZoTv0Pr6zJiJ3AA5aH4989gOJS_dajtNr1Wt17D0fh-t4GFcvqwMYN03qb8C33na_UrCUcGrt-Z0J9aPL6TPSbOxIC-pbHWKUDo2XsUOr3hQmlRew%3D%3D) [![License](https://img.shields.io/badge/License-Apache_2.0-orange.svg)](https://spdx.org/licenses/BSD-3-Clause-Clear.html) [![R-CMD-check](https://github.com/sunroofgod/dabestr-prototype/actions/workflows/R-CMD-check.yaml/badge.svg)](https://github.com/sunroofgod/dabestr-prototype/actions/workflows/R-CMD-check.yaml) @@ -30,17 +30,15 @@ An estimation plot has two key features. 2. It presents the **effect size** as a **bootstrap 95% confidence interval** on a **separate but aligned axes**. -```{r, echo = FALSE, out.width = "47%", out.height = "20%", fig.cap = " ", fig.show = 'hold', fig.align = 'center', out.extra = 'style="background-colour: #FFFFFF;padding:10px; display: inline-block;"'} -knitr::include_graphics(c("man/figures/2group_float_true.png","man/figures/multigroup_unpaired_proportion.png")) -knitr::include_graphics(c("man/figures/multigroup_unpaired_alpha=0.8.png","man/figures/multigroup_paired_proportion_sequential.png")) -knitr::include_graphics(c("man/figures/multigroup_minimeta.png","man/figures/multigroup_deltadelta_unpaired.png")) -``` - - ## Installation -```{bash, eval = FALSE} -git clone https://github.com/ACCLAB/dabestr +```{r, eval = FALSE} +# Install it from CRAN +install.packages("dabestr") + +# Or the development version from GitHub: +# install.packages("devtools") +devtools::install_github(repo = "ACCLAB/dabestr", ref = "dev") ``` ## Usage @@ -52,11 +50,13 @@ library(dabestr) ```{r, dpi = 500} data("non_proportional_data") -dabest_obj.mean_diff <- dabestr::load(data = non_proportional_data, - x = Group, - y = Measurement, - idx = c("Control 1", "Test 1")) %>% - dabestr::mean_diff() +dabest_obj.mean_diff <- load( + data = non_proportional_data, + x = Group, + y = Measurement, + idx = c("Control 1", "Test 1") +) %>% + mean_diff() dabest_plot(dabest_obj.mean_diff, TRUE) ``` @@ -69,7 +69,7 @@ dabest_plot(dabest_obj.mean_diff, TRUE) Nature Methods 2019, 1548-7105. [10.1038/s41592-019-0470-3](http://dx.doi.org/10.1038/s41592-019-0470-3) -[Paywalled publisher site](https://www.nature.com/articles/s41592-019-0470-3); [Free-to-view PDF](https://rdcu.be/bHhJ4) +[Paywalled publisher site](https://www.nature.com/articles/s41592-019-0470-3); [Free-to-view PDF](https://www.nature.com/articles/s41592-019-0470-3.epdf?author_access_token=Euy6APITxsYA3huBKOFBvNRgN0jAjWel9jnR3ZoTv0Pr6zJiJ3AA5aH4989gOJS_dajtNr1Wt17D0fh-t4GFcvqwMYN03qb8C33na_UrCUcGrt-Z0J9aPL6TPSbOxIC-pbHWKUDo2XsUOr3hQmlRew%3D%3D) ## Contributing diff --git a/README.md b/README.md index 6d87501..a65d231 100644 --- a/README.md +++ b/README.md @@ -5,14 +5,12 @@ -[![Travis CI build -status](https://img.shields.io/travis/com/ACCLAB/dabestr/master.svg)](https://travis-ci.com/ACCLAB/dabestr/) [![minimal R version](https://img.shields.io/badge/R%3E%3D-2.10-6666ff.svg)](https://cran.r-project.org/) [![CRAN Download Count](https://cranlogs.r-pkg.org/badges/grand-total/dabestr?color=brightgreen)](https://cran.r-project.org/package=dabestr) [![Free-to-view -citation](https://zenodo.org/badge/DOI/10.1038/s41592-019-0470-3.svg)](https://rdcu.be/bHhJ4) +citation](https://zenodo.org/badge/DOI/10.1038/s41592-019-0470-3.svg)](https://www.nature.com/articles/s41592-019-0470-3.epdf?author_access_token=Euy6APITxsYA3huBKOFBvNRgN0jAjWel9jnR3ZoTv0Pr6zJiJ3AA5aH4989gOJS_dajtNr1Wt17D0fh-t4GFcvqwMYN03qb8C33na_UrCUcGrt-Z0J9aPL6TPSbOxIC-pbHWKUDo2XsUOr3hQmlRew%3D%3D) [![License](https://img.shields.io/badge/License-Apache_2.0-orange.svg)](https://spdx.org/licenses/BSD-3-Clause-Clear.html) [![R-CMD-check](https://github.com/sunroofgod/dabestr-prototype/actions/workflows/R-CMD-check.yaml/badge.svg)](https://github.com/sunroofgod/dabestr-prototype/actions/workflows/R-CMD-check.yaml) @@ -41,18 +39,15 @@ An estimation plot has two key features. 2. It presents the **effect size** as a **bootstrap 95% confidence interval** on a **separate but aligned axes**. -
- -      -

-

- -
- ## Installation -``` bash -git clone https://github.com/ACCLAB/dabestr +``` r +# Install it from CRAN +install.packages("dabestr") + +# Or the development version from GitHub: +# install.packages("devtools") +devtools::install_github(repo = "ACCLAB/dabestr", ref = "dev") ``` ## Usage @@ -64,16 +59,18 @@ library(dabestr) ``` r data("non_proportional_data") -dabest_obj.mean_diff <- dabestr::load(data = non_proportional_data, - x = Group, - y = Measurement, - idx = c("Control 1", "Test 1")) %>% - dabestr::mean_diff() +dabest_obj.mean_diff <- load( + data = non_proportional_data, + x = Group, + y = Measurement, + idx = c("Control 1", "Test 1") +) %>% + mean_diff() dabest_plot(dabest_obj.mean_diff, TRUE) ``` -![](man/figures/README-unnamed-chunk-5-1.png) +![](man/figures/README-unnamed-chunk-4-1.png) ## Citation @@ -87,7 +84,7 @@ Nature Methods 2019, 1548-7105. [Paywalled publisher site](https://www.nature.com/articles/s41592-019-0470-3); [Free-to-view -PDF](https://rdcu.be/bHhJ4) +PDF](https://www.nature.com/articles/s41592-019-0470-3.epdf?author_access_token=Euy6APITxsYA3huBKOFBvNRgN0jAjWel9jnR3ZoTv0Pr6zJiJ3AA5aH4989gOJS_dajtNr1Wt17D0fh-t4GFcvqwMYN03qb8C33na_UrCUcGrt-Z0J9aPL6TPSbOxIC-pbHWKUDo2XsUOr3hQmlRew%3D%3D) ## Contributing diff --git a/_pkgdown.yml b/_pkgdown.yml index 0773b7c..aa71bef 100644 --- a/_pkgdown.yml +++ b/_pkgdown.yml @@ -38,7 +38,7 @@ articles: - tutorial_deltadelta - datasets news: - cran_dates: false + cran_dates: true reference: - subtitle: Main API desc: "Functions responsible for producing an estimation plot. They have been sequentially diff --git a/cran-comments.md b/cran-comments.md new file mode 100644 index 0000000..31053cc --- /dev/null +++ b/cran-comments.md @@ -0,0 +1,43 @@ +## R CMD check results + +0 errors | 0 warnings | 3 note + +1. checking CRAN incoming feasibility ... [29s] NOTE +``` +Maintainer: 'Yishan Mai ' +New maintainer: + Yishan Mai +Old maintainer(s): + Joses W. Ho +CRAN repository db overrides: + + License_is_FOSS: yes +``` +Changing of maintainer. + +2. checking for non-standard things in the check directory ... NOTE +``` +Found the following files/directories: + ''NULL'' +``` +As noted in [R-hub issue #560](https://github.com/r-hub/rhub/issues/560), this seems to be an Rhub issue and so can likely be ignored. + +3. checking for detritus in the temp directory ... NOTE +``` +Found the following files/directories: + 'lastMiKTeXException' +``` +As noted in [R-hub issue #503](https://github.com/r-hub/rhub/issues/503), this could be due to a bug/crash in MiKTeX and can likely be ignored. + +## revdepcheck results + +We checked 1 reverse dependencies, comparing R CMD check results across CRAN and dev versions of this package. + + * We saw 0 new problems + * We failed to check 1 packages + +Issues with CRAN packages are summarised below. + +### Failed to check + +* permubiome (NA) \ No newline at end of file diff --git a/inst/CITATION b/inst/CITATION index 757aeed..b1643ea 100644 --- a/inst/CITATION +++ b/inst/CITATION @@ -1,15 +1,14 @@ -citHeader("To cite dabestr in publications, please use:") - -citEntry(entry = "Misc", - title = "Moving beyond P values: Everyday data analysis with estimation plots.", - author = personList(as.person("Joses Ho"), - as.person("Tayfun Tumkaya"), - as.person("Sameer Aryal"), - as.person("Hyungwon Choi"), - as.person("Adam Claridge-Chang") - ), +bibentry(bibtype = "Misc", + title = "Moving beyond P values: Everyday data analysis with estimation plots.", + author = c( + person("Joses Ho"), + person("Tayfun Tumkaya"), + person("Sameer Aryal"), + person("Hyungwon Choi"), + person("Adam Claridge-Chang") + ), year = "2019", doi = "10.1038/s41592-019-0470-3", - url = "https://rdcu.be/bHhJ4", + url = "https://www.nature.com/articles/s41592-019-0470-3.epdf?author_access_token=Euy6APITxsYA3huBKOFBvNRgN0jAjWel9jnR3ZoTv0Pr6zJiJ3AA5aH4989gOJS_dajtNr1Wt17D0fh-t4GFcvqwMYN03qb8C33na_UrCUcGrt-Z0J9aPL6TPSbOxIC-pbHWKUDo2XsUOr3hQmlRew%3D%3D", textVersion = "Moving beyond P values: Everyday data analysis with estimation plots. (2019) Joses Ho, Tayfun Tumkaya, Sameer Aryal, Hyungwon Choi, Adam Claridge-Chang. Nature Methods 2019, 1548-710. doi: https://doi.org/10.1038/s41592-019-0470-3" ) diff --git a/man/dabest_plot.Rd b/man/dabest_plot.Rd index 59ba8a7..265ac40 100644 --- a/man/dabest_plot.Rd +++ b/man/dabest_plot.Rd @@ -28,10 +28,13 @@ but aligned axes. data(twogroup_data) # Preparing the data to be plotted -dabest_obj <- load(non_proportional_data, x = Group, y = Measurement, idx = c("Control 1", "Test 1")) +dabest_obj <- load(non_proportional_data, + x = Group, y = Measurement, + idx = c("Control 1", "Test 1") +) dabest_obj.mean_diff <- mean_diff(dabest_obj) -# Plotting of dabest_obj.mean_diff -dabest_plot(dabest_obj.mean_diff, TRUE) +# Plotting an estimation plot +dabest_plot(dabest_obj.mean_diff, TRUE) } diff --git a/man/effect_size.Rd b/man/effect_size.Rd index 6073a52..69578d5 100644 --- a/man/effect_size.Rd +++ b/man/effect_size.Rd @@ -83,8 +83,13 @@ The other plots are able to use all given basic effect sizes as listed in the De # Loading of the dataset data(non_proportional_data) -# Preparing the data to be plotted -dabest_obj <- load(non_proportional_data, x = Group, y = Measurement, idx = c("Control 1", "Test 1")) +# Applying effect size to the dabest object +dabest_obj <- load(non_proportional_data, + x = Group, y = Measurement, + idx = c("Control 1", "Test 1") +) dabest_obj.mean_diff <- mean_diff(dabest_obj) +# Printing dabest effectsize object +print(dabest_obj.mean_diff) } diff --git a/man/figures/2group_float_true.png b/man/figures/2group_float_true.png deleted file mode 100644 index 155d1da..0000000 Binary files a/man/figures/2group_float_true.png and /dev/null differ diff --git a/man/figures/ACCLAB.png b/man/figures/ACCLAB.png deleted file mode 100644 index b59a191..0000000 Binary files a/man/figures/ACCLAB.png and /dev/null differ diff --git a/man/figures/README-example-1.png b/man/figures/README-example-1.png deleted file mode 100644 index 3a5680e..0000000 Binary files a/man/figures/README-example-1.png and /dev/null differ diff --git a/man/figures/README-unnamed-chunk-2-1.png b/man/figures/README-unnamed-chunk-2-1.png deleted file mode 100644 index 3a5680e..0000000 Binary files a/man/figures/README-unnamed-chunk-2-1.png and /dev/null differ diff --git a/man/figures/README-unnamed-chunk-3-1.png b/man/figures/README-unnamed-chunk-3-1.png deleted file mode 100644 index e786447..0000000 Binary files a/man/figures/README-unnamed-chunk-3-1.png and /dev/null differ diff --git a/man/figures/README-unnamed-chunk-4-1.png b/man/figures/README-unnamed-chunk-4-1.png index da03078..574b5c5 100644 Binary files a/man/figures/README-unnamed-chunk-4-1.png and b/man/figures/README-unnamed-chunk-4-1.png differ diff --git a/man/figures/README-unnamed-chunk-6-1.png b/man/figures/README-unnamed-chunk-6-1.png deleted file mode 100644 index da03078..0000000 Binary files a/man/figures/README-unnamed-chunk-6-1.png and /dev/null differ diff --git a/man/figures/multigroup_baseline_colour.png b/man/figures/multigroup_baseline_colour.png deleted file mode 100644 index ef47eec..0000000 Binary files a/man/figures/multigroup_baseline_colour.png and /dev/null differ diff --git a/man/figures/multigroup_deltadelta_unpaired.png b/man/figures/multigroup_deltadelta_unpaired.png deleted file mode 100644 index e104932..0000000 Binary files a/man/figures/multigroup_deltadelta_unpaired.png and /dev/null differ diff --git a/man/figures/multigroup_minimeta.png b/man/figures/multigroup_minimeta.png deleted file mode 100644 index ed2a9cd..0000000 Binary files a/man/figures/multigroup_minimeta.png and /dev/null differ diff --git a/man/figures/multigroup_paired_proportion_sequential.png b/man/figures/multigroup_paired_proportion_sequential.png deleted file mode 100644 index 551a467..0000000 Binary files a/man/figures/multigroup_paired_proportion_sequential.png and /dev/null differ diff --git a/man/figures/multigroup_unpaired_alpha=0.8.png b/man/figures/multigroup_unpaired_alpha=0.8.png deleted file mode 100644 index b2ebe1b..0000000 Binary files a/man/figures/multigroup_unpaired_alpha=0.8.png and /dev/null differ diff --git a/man/figures/multigroup_unpaired_proportion.png b/man/figures/multigroup_unpaired_proportion.png deleted file mode 100644 index 022b315..0000000 Binary files a/man/figures/multigroup_unpaired_proportion.png and /dev/null differ diff --git a/man/load.Rd b/man/load.Rd index dc59a95..9d6e14b 100644 --- a/man/load.Rd +++ b/man/load.Rd @@ -107,7 +107,12 @@ functions within dabestr to create estimation plots. data(non_proportional_data) # Creating a dabest object -dabest_obj <- load(data = non_proportional_data, x = Group, y = Measurement, -idx = c("Control1 ", "Test 1")) +dabest_obj <- load( + data = non_proportional_data, x = Group, y = Measurement, + idx = c("Control 1", "Test 1") +) + +# Printing dabest object +print(dabest_obj) } diff --git a/man/plot_kwargs.Rd b/man/plot_kwargs.Rd index 23978a5..eea62c1 100644 --- a/man/plot_kwargs.Rd +++ b/man/plot_kwargs.Rd @@ -36,7 +36,7 @@ If False, the resulting graph would be identical to a multiple two-groups plot. the zero line of the effect size for the control-control group. \item \code{show_baseline_ec} Default FALSE. Boolean value determining whether the baseline curve is shown. \item \code{sankey} Default TRUE. Boolean value determining if the flows between the bar charts will be plotted. -\item \code{sankey_alpha} Default 0.5. Numeric value determining the transparency of the sankey flows in a +\item \code{raw_flow_alpha} Default 0.5. Numeric value determining the transparency of the sankey flows in a paired proportion plot. \item \code{flow} Default TRUE. Boolean value determining whether the bars will be plotted in pairs. \item \code{custom_palette} Default "d3". String. The following palettes are available for use: diff --git a/revdep/.gitignore b/revdep/.gitignore new file mode 100644 index 0000000..111ab32 --- /dev/null +++ b/revdep/.gitignore @@ -0,0 +1,7 @@ +checks +library +checks.noindex +library.noindex +cloud.noindex +data.sqlite +*.html diff --git a/revdep/README.md b/revdep/README.md new file mode 100644 index 0000000..25a1a94 --- /dev/null +++ b/revdep/README.md @@ -0,0 +1,37 @@ +# Platform + +|field |value | +|:--------|:---------------------------------------| +|version |R version 4.1.0 (2021-05-18) | +|os |macOS Big Sur 11.5.1 | +|system |x86_64, darwin17.0 | +|ui |RStudio | +|language |(EN) | +|collate |en_US.UTF-8 | +|ctype |en_US.UTF-8 | +|tz |Asia/Singapore | +|date |2023-09-09 | +|rstudio |2023.03.1+446 Cherry Blossom (desktop) | +|pandoc |3.1.6 @ /usr/local/bin/ (via rmarkdown) | + +# Dependencies + +|package |old |new |Δ | +|:--------|:-----|:---------|:--| +|dabestr |0.3.0 |2023.9.12 |* | +|cpp11 |NA |0.4.6 |* | +|dplyr |NA |1.1.3 |* | +|generics |NA |0.1.3 |* | +|ggplot2 |NA |3.4.3 |* | +|gtable |NA |0.3.4 |* | +|labeling |NA |0.4.3 |* | +|purrr |NA |1.0.2 |* | + +# Revdeps + +## Failed to check (1) + +|package |version |error |warning |note | +|:----------|:-------|:------|:-------|:----| +|[permubiome](failures.md#permubiome)|1.3.1 |__+1__ | | | + diff --git a/revdep/cran.md b/revdep/cran.md new file mode 100644 index 0000000..add90e5 --- /dev/null +++ b/revdep/cran.md @@ -0,0 +1,12 @@ +## revdepcheck results + +We checked 1 reverse dependencies, comparing R CMD check results across CRAN and dev versions of this package. + + * We saw 0 new problems + * We failed to check 1 packages + +Issues with CRAN packages are summarised below. + +### Failed to check + +* permubiome (NA) diff --git a/revdep/email.yml b/revdep/email.yml new file mode 100644 index 0000000..0c5cef8 --- /dev/null +++ b/revdep/email.yml @@ -0,0 +1,5 @@ +release_date: ??? +rel_release_date: ??? +my_news_url: ??? +release_version: ??? +release_details: ??? diff --git a/revdep/failures.md b/revdep/failures.md new file mode 100644 index 0000000..05e51b0 --- /dev/null +++ b/revdep/failures.md @@ -0,0 +1,59 @@ +# permubiome + +
+ +* Version: 1.3.1 +* GitHub: NA +* Source code: https://github.com/cran/permubiome +* Date/Publication: 2020-07-31 06:40:03 UTC +* Number of recursive dependencies: 54 + +Run `revdepcheck::revdep_details(, "permubiome")` for more info + +
+ +## Newly broken + +* checking whether package ‘permubiome’ can be installed ... ERROR + ``` + Installation failed. + See ‘/Users/liankahseng/Desktop/Work/dabestr/revdep/checks.noindex/permubiome/new/permubiome.Rcheck/00install.out’ for details. + ``` + +## Installation + +### Devel + +``` +* installing *source* package ‘permubiome’ ... +** package ‘permubiome’ successfully unpacked and MD5 sums checked +** using staged installation +** R +** inst +** byte-compile and prepare package for lazy loading +Error: object ‘dabest’ is not exported by 'namespace:dabestr' +Execution halted +ERROR: lazy loading failed for package ‘permubiome’ +* removing ‘/Users/liankahseng/Desktop/Work/dabestr/revdep/checks.noindex/permubiome/new/permubiome.Rcheck/permubiome’ + + +``` +### CRAN + +``` +* installing *source* package ‘permubiome’ ... +** package ‘permubiome’ successfully unpacked and MD5 sums checked +** using staged installation +** R +** inst +** byte-compile and prepare package for lazy loading +** help +*** installing help indices +** building package indices +** testing if installed package can be loaded from temporary location +** testing if installed package can be loaded from final location +** testing if installed package keeps a record of temporary installation path +* DONE (permubiome) + + +``` diff --git a/revdep/problems.md b/revdep/problems.md new file mode 100644 index 0000000..05e51b0 --- /dev/null +++ b/revdep/problems.md @@ -0,0 +1,59 @@ +# permubiome + +
+ +* Version: 1.3.1 +* GitHub: NA +* Source code: https://github.com/cran/permubiome +* Date/Publication: 2020-07-31 06:40:03 UTC +* Number of recursive dependencies: 54 + +Run `revdepcheck::revdep_details(, "permubiome")` for more info + +
+ +## Newly broken + +* checking whether package ‘permubiome’ can be installed ... ERROR + ``` + Installation failed. + See ‘/Users/liankahseng/Desktop/Work/dabestr/revdep/checks.noindex/permubiome/new/permubiome.Rcheck/00install.out’ for details. + ``` + +## Installation + +### Devel + +``` +* installing *source* package ‘permubiome’ ... +** package ‘permubiome’ successfully unpacked and MD5 sums checked +** using staged installation +** R +** inst +** byte-compile and prepare package for lazy loading +Error: object ‘dabest’ is not exported by 'namespace:dabestr' +Execution halted +ERROR: lazy loading failed for package ‘permubiome’ +* removing ‘/Users/liankahseng/Desktop/Work/dabestr/revdep/checks.noindex/permubiome/new/permubiome.Rcheck/permubiome’ + + +``` +### CRAN + +``` +* installing *source* package ‘permubiome’ ... +** package ‘permubiome’ successfully unpacked and MD5 sums checked +** using staged installation +** R +** inst +** byte-compile and prepare package for lazy loading +** help +*** installing help indices +** building package indices +** testing if installed package can be loaded from temporary location +** testing if installed package can be loaded from final location +** testing if installed package keeps a record of temporary installation path +* DONE (permubiome) + + +``` diff --git a/tests/testthat/_snaps/001_plotter/deltadelta-mean-diff.svg b/tests/testthat/_snaps/001_plotter/deltadelta-mean-diff.svg index df4a24e..ce8f790 100644 --- a/tests/testthat/_snaps/001_plotter/deltadelta-mean-diff.svg +++ b/tests/testthat/_snaps/001_plotter/deltadelta-mean-diff.svg @@ -78,14 +78,14 @@ - - - - + + + + diff --git a/tests/testthat/_snaps/001_plotter/minimeta-mean-diff.svg b/tests/testthat/_snaps/001_plotter/minimeta-mean-diff.svg index f20b273..02f6c9f 100644 --- a/tests/testthat/_snaps/001_plotter/minimeta-mean-diff.svg +++ b/tests/testthat/_snaps/001_plotter/minimeta-mean-diff.svg @@ -149,18 +149,18 @@ - - - - - - + + + + + + diff --git a/tests/testthat/_snaps/001_plotter/multigroup-unpaired-mean-diff-colour.svg b/tests/testthat/_snaps/001_plotter/multigroup-unpaired-mean-diff-colour.svg index e8c8dac..cc3758c 100644 --- a/tests/testthat/_snaps/001_plotter/multigroup-unpaired-mean-diff-colour.svg +++ b/tests/testthat/_snaps/001_plotter/multigroup-unpaired-mean-diff-colour.svg @@ -138,16 +138,16 @@ - - - - - + + + + + diff --git a/tests/testthat/_snaps/001_plotter/multigroup-unpaired-mean-diff.svg b/tests/testthat/_snaps/001_plotter/multigroup-unpaired-mean-diff.svg index 124f4bc..d7af508 100644 --- a/tests/testthat/_snaps/001_plotter/multigroup-unpaired-mean-diff.svg +++ b/tests/testthat/_snaps/001_plotter/multigroup-unpaired-mean-diff.svg @@ -129,16 +129,16 @@ - - - - - + + + + + diff --git a/tests/testthat/_snaps/001_plotter/proportion-baseline-flow-false-mean-diff.svg b/tests/testthat/_snaps/001_plotter/proportion-baseline-flow-false-mean-diff.svg index d9c43d0..2c68bff 100644 --- a/tests/testthat/_snaps/001_plotter/proportion-baseline-flow-false-mean-diff.svg +++ b/tests/testthat/_snaps/001_plotter/proportion-baseline-flow-false-mean-diff.svg @@ -61,14 +61,6 @@ - - - - - - - - @@ -77,6 +69,14 @@ + + + + + + + + diff --git a/tests/testthat/_snaps/001_plotter/proportion-baseline-mean-diff.svg b/tests/testthat/_snaps/001_plotter/proportion-baseline-mean-diff.svg index efbf020..e6cee39 100644 --- a/tests/testthat/_snaps/001_plotter/proportion-baseline-mean-diff.svg +++ b/tests/testthat/_snaps/001_plotter/proportion-baseline-mean-diff.svg @@ -57,18 +57,18 @@ - - - - - - + + + + + + diff --git a/tests/testthat/_snaps/001_plotter/proportion-paired-mean-diff-float-false.svg b/tests/testthat/_snaps/001_plotter/proportion-paired-mean-diff-float-false.svg index f85a56e..400e3c9 100644 --- a/tests/testthat/_snaps/001_plotter/proportion-paired-mean-diff-float-false.svg +++ b/tests/testthat/_snaps/001_plotter/proportion-paired-mean-diff-float-false.svg @@ -37,10 +37,10 @@ - - + + diff --git a/tests/testthat/_snaps/001_plotter/proportion-paired-mean-diff-float-true.svg b/tests/testthat/_snaps/001_plotter/proportion-paired-mean-diff-float-true.svg index 9705ce4..82cf533 100644 --- a/tests/testthat/_snaps/001_plotter/proportion-paired-mean-diff-float-true.svg +++ b/tests/testthat/_snaps/001_plotter/proportion-paired-mean-diff-float-true.svg @@ -37,10 +37,10 @@ - - + + 0.00 diff --git a/tests/testthat/_snaps/001_plotter/proportion-sequential-mean-diff.svg b/tests/testthat/_snaps/001_plotter/proportion-sequential-mean-diff.svg index 7e889ac..0afd7af 100644 --- a/tests/testthat/_snaps/001_plotter/proportion-sequential-mean-diff.svg +++ b/tests/testthat/_snaps/001_plotter/proportion-sequential-mean-diff.svg @@ -57,18 +57,18 @@ - - - - - - + + + + + + diff --git a/tests/testthat/_snaps/001_plotter/proportion-unpaired-mean-diff-float-false.svg b/tests/testthat/_snaps/001_plotter/proportion-unpaired-mean-diff-float-false.svg index 8cf4a6f..789ac62 100644 --- a/tests/testthat/_snaps/001_plotter/proportion-unpaired-mean-diff-float-false.svg +++ b/tests/testthat/_snaps/001_plotter/proportion-unpaired-mean-diff-float-false.svg @@ -33,10 +33,10 @@ - - + + diff --git a/tests/testthat/_snaps/001_plotter/proportion-unpaired-mean-diff-float-true.svg b/tests/testthat/_snaps/001_plotter/proportion-unpaired-mean-diff-float-true.svg index a257e29..850e1e5 100644 --- a/tests/testthat/_snaps/001_plotter/proportion-unpaired-mean-diff-float-true.svg +++ b/tests/testthat/_snaps/001_plotter/proportion-unpaired-mean-diff-float-true.svg @@ -35,10 +35,10 @@ - - + + 0.00 diff --git a/tests/testthat/_snaps/001_plotter/proportion-unpaired-multigroup-mean-diff.svg b/tests/testthat/_snaps/001_plotter/proportion-unpaired-multigroup-mean-diff.svg index f7c672f..31b5a0e 100644 --- a/tests/testthat/_snaps/001_plotter/proportion-unpaired-multigroup-mean-diff.svg +++ b/tests/testthat/_snaps/001_plotter/proportion-unpaired-multigroup-mean-diff.svg @@ -35,12 +35,12 @@ - - - + + + diff --git a/tests/testthat/_snaps/001_plotter/two-groups-unpaired-mean-diff-colour-float-false.svg b/tests/testthat/_snaps/001_plotter/two-groups-unpaired-mean-diff-colour-float-false.svg index ee9d593..f172047 100644 --- a/tests/testthat/_snaps/001_plotter/two-groups-unpaired-mean-diff-colour-float-false.svg +++ b/tests/testthat/_snaps/001_plotter/two-groups-unpaired-mean-diff-colour-float-false.svg @@ -78,10 +78,10 @@ - - + + diff --git a/tests/testthat/_snaps/001_plotter/two-groups-unpaired-mean-diff-colour-float-true.svg b/tests/testthat/_snaps/001_plotter/two-groups-unpaired-mean-diff-colour-float-true.svg index 7a47a23..ec48671 100644 --- a/tests/testthat/_snaps/001_plotter/two-groups-unpaired-mean-diff-colour-float-true.svg +++ b/tests/testthat/_snaps/001_plotter/two-groups-unpaired-mean-diff-colour-float-true.svg @@ -80,10 +80,10 @@ - - + + 3 diff --git a/tests/testthat/_snaps/001_plotter/two-groups-unpaired-mean-diff-float-false.svg b/tests/testthat/_snaps/001_plotter/two-groups-unpaired-mean-diff-float-false.svg index 522f1fa..ab02957 100644 --- a/tests/testthat/_snaps/001_plotter/two-groups-unpaired-mean-diff-float-false.svg +++ b/tests/testthat/_snaps/001_plotter/two-groups-unpaired-mean-diff-float-false.svg @@ -69,10 +69,10 @@ - - + + diff --git a/tests/testthat/_snaps/001_plotter/two-groups-unpaired-mean-diff-float-true.svg b/tests/testthat/_snaps/001_plotter/two-groups-unpaired-mean-diff-float-true.svg index 6d6d632..aaedd31 100644 --- a/tests/testthat/_snaps/001_plotter/two-groups-unpaired-mean-diff-float-true.svg +++ b/tests/testthat/_snaps/001_plotter/two-groups-unpaired-mean-diff-float-true.svg @@ -71,10 +71,10 @@ - - + + 3 diff --git a/tests/testthat/helper_generate_datasets.R b/tests/testthat/helper_generate_datasets.R index b7bc7b9..27faf87 100644 --- a/tests/testthat/helper_generate_datasets.R +++ b/tests/testthat/helper_generate_datasets.R @@ -1,48 +1,49 @@ generate_non_proportional_dataset <- function(N = 40, seed = 12345) { set.seed(seed) # Fix the seed so the results are replicable. # pop_size = 10000 # Size of each population. - N = 20 # The number of samples taken from each population - + N <- 20 # The number of samples taken from each population + # Create samples c1 <- rnorm(N, mean = 3, sd = 0.4) c2 <- rnorm(N, mean = 3.5, sd = 0.75) c3 <- rnorm(N, mean = 3.25, sd = 0.4) - + t1 <- rnorm(N, mean = 3.5, sd = 0.5) t2 <- rnorm(N, mean = 2.5, sd = 0.6) t3 <- rnorm(N, mean = 3, sd = 0.75) t4 <- rnorm(N, mean = 3.5, sd = 0.75) t5 <- rnorm(N, mean = 3.25, sd = 0.4) t6 <- rnorm(N, mean = 3.25, sd = 0.4) - + # Add a `gender` column for coloring the data. - gender <- c(rep('Male', N/2), rep('Female', N/2)) - + gender <- c(rep("Male", N / 2), rep("Female", N / 2)) + # Add an `id` column for paired data plotting. id <- 1:N - + # Combine samples and gender into a DataFrame. df <- tibble::tibble( `Control 1` = c1, `Control 2` = c2, `Control 3` = c3, `Test 1` = t1, `Test 2` = t2, `Test 3` = t3, `Test 4` = t4, `Test 5` = t5, `Test 6` = t6, - Gender = gender, ID = id) - + Gender = gender, ID = id + ) + df <- df %>% tidyr::gather(key = Group, value = Measurement, -ID, -Gender) - + return(df) } generate_proportional_dataset <- function(N = 40, seed = 12345) { set.seed(seed) # Fix the seed so the results are replicable. - N = 40 # The number of samples taken from each population - + N <- 40 # The number of samples taken from each population + # Create samples - size = 1 + size <- 1 c1 <- rbinom(N, size, prob = 0.2) c2 <- rbinom(N, size, prob = 0.2) c3 <- rbinom(N, size, prob = 0.8) - + t1 <- rbinom(N, size, prob = 0.35) t2 <- rbinom(N, size, prob = 0.2) t3 <- rbinom(N, size, prob = 0.3) @@ -50,54 +51,57 @@ generate_proportional_dataset <- function(N = 40, seed = 12345) { t5 <- rbinom(N, size, prob = 0.5) t6 <- rbinom(N, size, prob = 0.6) t7 <- c(rep(1, N)) - + t8 <- c(rep(0, N)) + # Add a `gender` column for coloring the data. - gender <- c(rep('Male', N/2), rep('Female', N/2)) - + gender <- c(rep("Male", N / 2), rep("Female", N / 2)) + # Add an `id` column for paired data plotting. id <- 1:N - + # Combine samples and gender into a DataFrame. df <- tibble::tibble( `Control 1` = c1, `Control 2` = c2, `Control 3` = c3, - `Test 1` = t1, `Test 2` = t2, `Test 3` = t3, `Test 4` = t4, `Test 5` = t5, - `Test 6` = t6, `Test 7` = t7, - Gender = gender, ID = id) - + `Test 1` = t1, `Test 2` = t2, `Test 3` = t3, `Test 4` = t4, `Test 5` = t5, + `Test 6` = t6, `Test 7` = t7, `Test 8` = t8, + Gender = gender, ID = id + ) + df <- df %>% tidyr::gather(key = Group, value = Success, -ID, -Gender) - + return(df) } generate_deltadelta_dataset <- function(N = 40, seed = 12345) { set.seed(seed) # Fix the seed so the results are replicable. # pop_size = 10000 # Size of each population. - N = 20 # The number of samples taken from each population - + N <- 20 # The number of samples taken from each population + # Create samples placebo <- rnorm(N, mean = 3, sd = 0.4) drug <- rnorm(N, mean = 3.5, sd = 0.75) - + # Add a `Genotype` column as the second variable - genotype <- c(rep('M', N/2), rep('W', N/2)) - + genotype <- c(rep("M", N / 2), rep("W", N / 2)) + # Add an `id` column for paired data plotting. id <- 1:N - + # Add a `Rep` column as the first variable for the 2 replicates of experiments done - Rep <- rep(c("Rep1", "Rep2"), N/2) - + Rep <- rep(c("Rep1", "Rep2"), N / 2) + # Combine all columns into a DataFrame. df <- tibble::tibble( Placebo = placebo, Drug = drug, - Genotype = genotype, + Genotype = genotype, ID = id, - Rep = Rep) - + Rep = Rep + ) + df <- df %>% tidyr::gather(key = Treatment, value = Measurement, -ID, -Genotype, -Rep) - + return(df) } diff --git a/tests/testthat/test-001_effsize_func.R b/tests/testthat/test-001_effsize_func.R index c93aa1d..de42f54 100644 --- a/tests/testthat/test-001_effsize_func.R +++ b/tests/testthat/test-001_effsize_func.R @@ -1,3 +1,4 @@ +#### Apply valid effect sizes to dabest_obj testthat::test_that("Able to apply all correct effect sizes to dabest_obj", { np_dataset <- generate_non_proportional_dataset() dabest_obj <- dabestr::load(data = np_dataset, x = Group, y = Measurement, idx = c("Control 1", "Test 1")) @@ -6,37 +7,128 @@ testthat::test_that("Able to apply all correct effect sizes to dabest_obj", { expect_no_error(dabestr::cohens_d(dabest_obj)) expect_no_error(dabestr::hedges_g(dabest_obj)) expect_no_error(dabestr::cliffs_delta(dabest_obj)) - + p_dataset <- generate_proportional_dataset() - dabest_prop_obj <- dabestr::load(data = p_dataset, x = Group, y = Success, idx = c("Control 1", "Test 1"), - proportional = TRUE) + dabest_prop_obj <- dabestr::load( + data = p_dataset, x = Group, y = Success, idx = c("Control 1", "Test 1"), + proportional = TRUE + ) expect_no_error(dabestr::cohens_h(dabest_prop_obj)) expect_no_error(dabestr::mean_diff(dabest_prop_obj)) - - dabest_mm_obj <- dabestr::load(data = np_dataset, x = Group, y = Measurement, idx = c("Control 1", "Test 1"), - minimeta = TRUE) + + dabest_mm_obj <- dabestr::load( + data = np_dataset, x = Group, y = Measurement, idx = c("Control 1", "Test 1"), + minimeta = TRUE + ) expect_no_error(dabestr::mean_diff(dabest_mm_obj)) }) +#### Detecting non-valid effect sizes for specific dabest_objs testthat::test_that("Able to detect non-dabest_obj", { expect_error(dabestr::mean_diff("a"), "dabest_obj must be a obj") }) testthat::test_that("Able to detect non-valid effect sizes", { np_dataset <- generate_non_proportional_dataset() - dabest_obj <- dabestr::load(data = np_dataset, x = Group, y = Measurement, idx = c("Control 1", "Test 1"), - paired = "sequential", id_col = ID) + dabest_obj <- dabestr::load( + data = np_dataset, x = Group, y = Measurement, idx = c("Control 1", "Test 1"), + paired = "sequential", id_col = ID + ) expect_error(dabestr::cliffs_delta(dabest_obj), "`Cliffs' delta` cannot be used when paired is not NULL.") - + p_dataset <- generate_proportional_dataset() - dabest_prop_obj <- dabestr::load(data = p_dataset, x = Group, y = Success, idx = c("Control 1", "Test 1"), - proportional = TRUE) - expect_error(dabestr::median_diff(dabest_prop_obj), - "Other effect sizes besides `Cohens h` and `Mean difference` cannot be used when proportional is TRUE.") - expect_error(dabestr::cohens_d(dabest_prop_obj), - "Other effect sizes besides `Cohens h` and `Mean difference` cannot be used when proportional is TRUE.") - expect_error(dabestr::hedges_g(dabest_prop_obj), - "Other effect sizes besides `Cohens h` and `Mean difference` cannot be used when proportional is TRUE.") - expect_error(dabestr::cliffs_delta(dabest_prop_obj), - "Other effect sizes besides `Cohens h` and `Mean difference` cannot be used when proportional is TRUE.") -}) \ No newline at end of file + dabest_prop_obj <- dabestr::load( + data = p_dataset, x = Group, y = Success, idx = c("Control 1", "Test 1"), + proportional = TRUE + ) + expect_error( + dabestr::median_diff(dabest_prop_obj), + "Other effect sizes besides `Cohens h` and `Mean difference` cannot be used when proportional is TRUE." + ) + expect_error( + dabestr::cohens_d(dabest_prop_obj), + "Other effect sizes besides `Cohens h` and `Mean difference` cannot be used when proportional is TRUE." + ) + expect_error( + dabestr::hedges_g(dabest_prop_obj), + "Other effect sizes besides `Cohens h` and `Mean difference` cannot be used when proportional is TRUE." + ) + expect_error( + dabestr::cliffs_delta(dabest_prop_obj), + "Other effect sizes besides `Cohens h` and `Mean difference` cannot be used when proportional is TRUE." + ) +}) + +#### Printing functions +testthat::test_that("Prints correct output for dabest_effectsize_obj object", { + #### 2GROUP #### + np_dataset <- generate_non_proportional_dataset() + dabest_effectsize_obj <- dabestr::load( + data = np_dataset, x = Group, y = Measurement, idx = c("Control 1", "Test 1") + ) %>% mean_diff() + expect_output(print(dabest_effectsize_obj), regexp = "Test 1 and Control 1") + expect_output(print(dabest_effectsize_obj), regexp = "5000 bootstrap samples") + + #### MULTIGROUP BASELINE #### + dabest_effectsize_obj <- dabestr::load(np_dataset, + x = Group, y = Measurement, + idx = list(c("Control 1", "Test 1"), c("Control 2", "Test 2", "Test 3")), + paired = "baseline", id_col = ID + ) %>% mean_diff() + expect_output(print(dabest_effectsize_obj), regexp = "Test 1 and Control 1") + expect_output(print(dabest_effectsize_obj), regexp = "Test 2 and Control 2") + expect_output(print(dabest_effectsize_obj), regexp = "Test 3 and Control 2") + + #### MULTIGROUP SEQUENTIAL #### + dabest_effectsize_obj <- dabestr::load(np_dataset, + x = Group, y = Measurement, + idx = list(c("Control 1", "Test 1"), c("Control 2", "Test 2", "Test 3")), + paired = "sequential", id_col = ID + ) %>% mean_diff() + expect_output(print(dabest_effectsize_obj), regexp = "Test 1 and Control 1") + expect_output(print(dabest_effectsize_obj), regexp = "Test 2 and Control 2") + expect_output(print(dabest_effectsize_obj), regexp = "Test 3 and Test 2") + + #### 2GROUP PROPORTION #### + p_dataset <- generate_proportional_dataset() + dabest_effectsize_obj <- dabestr::load( + data = p_dataset, x = Group, y = Success, idx = c("Control 2", "Test 2"), + proportional = TRUE + ) %>% mean_diff() + expect_output(print(dabest_effectsize_obj), regexp = "Test 2 and Control 2") + + #### MINIMETA #### + np_dataset <- generate_non_proportional_dataset() + dabest_effectsize_obj <- dabestr::load( + data = np_dataset, x = Group, y = Measurement, idx = list( + c("Control 1", "Test 1"), + c("Control 2", "Test 2") + ), + minimeta = TRUE + ) %>% mean_diff() + expect_output(print(dabest_effectsize_obj), regexp = "Test 1 and Control 1") + expect_output(print(dabest_effectsize_obj), regexp = "Test 2 and Control 2") + + #### DELTADELTA #### + dd_dataset <- generate_deltadelta_dataset() + dabest_effectsize_obj <- dabestr::load(dd_dataset, + x = Genotype, y = Measurement, delta2 = TRUE, experiment = Treatment, + idx = list(c("W Placebo", "M Placebo"), c("W Drug", "M Drug")), colour = Genotype + ) %>% mean_diff() + expect_output(print(dabest_effectsize_obj), regexp = "M Placebo and W Placebo") + expect_output(print(dabest_effectsize_obj), regexp = "M Drug and W Drug") + + #### ADJUSTING CI #### + dabest_effectsize_obj <- dabestr::load( + data = np_dataset, x = Group, y = Measurement, idx = c("Control 1", "Test 1"), ci = 85 + ) %>% mean_diff() + expect_output(print(dabest_effectsize_obj), regexp = "85%CI") + + #### ADJUSTING RESAMPLES #### + dabest_effectsize_obj <- dabestr::load( + data = np_dataset, x = Group, y = Measurement, idx = c("Control 1", "Test 1"), resamples = 3000 + ) %>% mean_diff() + expect_output(print(dabest_effectsize_obj), regexp = "3000 bootstrap samples") + + #### CALCULATION OF PVALUES ##### +}) diff --git a/tests/testthat/test-001_load.R b/tests/testthat/test-001_load.R index ac81c32..110a43b 100644 --- a/tests/testthat/test-001_load.R +++ b/tests/testthat/test-001_load.R @@ -1,81 +1,224 @@ +#### Loading of datasets testthat::test_that("Able to load dataset", { np_dataset <- generate_non_proportional_dataset() - expect_no_error(dabestr::load(np_dataset, x = Group, y = Measurement, - idx = c("Control 1", "Test 1"))) - + expect_no_error(dabestr::load(np_dataset, + x = Group, y = Measurement, + idx = c("Control 1", "Test 1") + )) + p_dataset <- generate_proportional_dataset() - expect_no_error(dabestr::load(p_dataset, x = Group, y = Success, - idx = c("Control 1", "Test 1"), proportional = TRUE)) - + expect_no_error(dabestr::load(p_dataset, + x = Group, y = Success, + idx = c("Control 1", "Test 1"), proportional = TRUE + )) + dd_dataset <- generate_deltadelta_dataset() - expect_no_error(dabestr::load(dd_dataset, x = Genotype, y = Measurement, - delta2 = TRUE, experiment = Treatment, - idx = list(c("W Placebo","M Placebo"),c("W Drug","M Drug")), - colour = Genotype)) + expect_no_error(dabestr::load(dd_dataset, + x = Genotype, y = Measurement, + delta2 = TRUE, experiment = Treatment, + idx = list(c("W Placebo", "M Placebo"), c("W Drug", "M Drug")), + colour = Genotype + )) }) +#### Detecting non-valid params testthat::test_that("Able to detect non-valid params", { np_dataset <- generate_non_proportional_dataset() - expect_error(dabestr::load(np_dataset, x = Grou, y = Measurement, - idx = c("Control 1", "Test 1")), - regexp = "Column x is not in data") - expect_error(dabestr::load(np_dataset, x = Group, y = Measuremen, - idx = c("Control 1", "Test 1")), - regexp = "Column y is not in data") - expect_error(dabestr::load(np_dataset, x = Group, y = Measurement, - idx = c("Control 1", "Test 1"), id_col = I), - regexp = "Column id_col is not in data") - expect_error(dabestr::load(np_dataset, x = Group, y = Measurement, - idx = c("Control 1", "Test 1"), colour = Grou), - regexp = "Column colour is not in data") - expect_error(dabestr::load(np_dataset, x = Group, y = Measurement, - idx = c("Control 1")), - regexp = "does not consist of at least 2 groups") - expect_error(dabestr::load(np_dataset, x = Group, y = Measurement, - idx = list(c("Control 1", "Test 1"), c("Control 2"))), - regexp = "does not consist of at least 2 groups") + expect_error( + dabestr::load(np_dataset, + x = Grou, y = Measurement, + idx = c("Control 1", "Test 1") + ), + regexp = "Column x is not in data" + ) + expect_error( + dabestr::load(np_dataset, + x = Group, y = Measuremen, + idx = c("Control 1", "Test 1") + ), + regexp = "Column y is not in data" + ) + expect_error( + dabestr::load(np_dataset, + x = Group, y = Measurement, + idx = c("Control 1", "Test 1"), id_col = I + ), + regexp = "Column id_col is not in data" + ) + expect_error( + dabestr::load(np_dataset, + x = Group, y = Measurement, + idx = c("Control 1", "Test 1"), colour = Grou + ), + regexp = "Column colour is not in data" + ) + expect_error( + dabestr::load(np_dataset, + x = Group, y = Measurement, + idx = c("Control 1") + ), + regexp = "does not consist of at least 2 groups" + ) + expect_error( + dabestr::load(np_dataset, + x = Group, y = Measurement, + idx = list(c("Control 1", "Test 1"), c("Control 2")) + ), + regexp = "does not consist of at least 2 groups" + ) }) testthat::test_that("Able to detect non-valid params for proportional = TRUE", { np_dataset <- generate_non_proportional_dataset() - expect_error(dabestr::load(np_dataset, x = Group, y = Measurement, - idx = c("Control 1", "Test 1"), proportional = TRUE), - regexp = "data is not proportional") + expect_error( + dabestr::load(np_dataset, + x = Group, y = Measurement, + idx = c("Control 1", "Test 1"), proportional = TRUE + ), + regexp = "data is not proportional" + ) }) testthat::test_that("Able to detect non-valid params for is_paired = TRUE", { np_dataset <- generate_non_proportional_dataset() - expect_error(dabestr::load(np_dataset, x = Group, y = Measurement, - idx = c("Control 1", "Test 1", "Test 2"), paired = "baseline"), - regexp = "is TRUE but no id_col was supplied") - expect_error(dabestr::load(np_dataset, x = Group, y = Measurement, - idx = c("Control 1", "Test 1", "Test 2"), paired = "some", - id_col = ID), - regexp = "is not 'baseline' or 'sequential'.") + expect_error( + dabestr::load(np_dataset, + x = Group, y = Measurement, + idx = c("Control 1", "Test 1", "Test 2"), paired = "baseline" + ), + regexp = "is TRUE but no id_col was supplied" + ) + expect_error( + dabestr::load(np_dataset, + x = Group, y = Measurement, + idx = c("Control 1", "Test 1", "Test 2"), paired = "some", + id_col = ID + ), + regexp = "is not 'baseline' or 'sequential'." + ) }) testthat::test_that("Able to detect non-valid params for minimeta = TRUE", { p_dataset <- generate_proportional_dataset() - expect_error(dabestr::load(p_dataset, x = Group, y = Success, - idx = c("Control 1", "Test 1"), id_col = ID, - proportional = TRUE, minimeta = TRUE), - regexp = "proportional is TRUE but minimeta is also TRUE.") - + expect_error( + dabestr::load(p_dataset, + x = Group, y = Success, + idx = c("Control 1", "Test 1"), id_col = ID, + proportional = TRUE, minimeta = TRUE + ), + regexp = "proportional is TRUE but minimeta is also TRUE." + ) + np_dataset <- generate_non_proportional_dataset() - expect_error(dabestr::load(np_dataset, x = Group, y = Measurement, - idx = c("Control 1", "Test 1"), - delta2 = TRUE, minimeta = TRUE), - regexp = "delta2 is TRUE but minimeta is also TRUE.") - expect_error(dabestr::load(np_dataset, x = Group, y = Measurement, - idx = c("Control 1", "Test 1", "Test 2"), - minimeta = TRUE), - regexp = "minimeta is TRUE, but some idx does not consist of exactly 2 groups") + expect_error( + dabestr::load(np_dataset, + x = Group, y = Measurement, + idx = c("Control 1", "Test 1"), + delta2 = TRUE, minimeta = TRUE + ), + regexp = "delta2 is TRUE but minimeta is also TRUE." + ) + expect_error( + dabestr::load(np_dataset, + x = Group, y = Measurement, + idx = c("Control 1", "Test 1", "Test 2"), + minimeta = TRUE + ), + regexp = "minimeta is TRUE, but some idx does not consist of exactly 2 groups" + ) }) testthat::test_that("Able to detect non-valid params for delta2 = TRUE", { p_dataset <- generate_proportional_dataset() - expect_error(dabestr::load(p_dataset, x = Group, y = Success, - idx = c("Control 1", "Test 1"), id_col = ID, - proportional = TRUE, delta2 = TRUE), - regexp = "delta2 is TRUE but proportional is also TRUE.") -}) \ No newline at end of file + expect_error( + dabestr::load(p_dataset, + x = Group, y = Success, + idx = c("Control 1", "Test 1"), id_col = ID, + proportional = TRUE, delta2 = TRUE + ), + regexp = "delta2 is TRUE but proportional is also TRUE." + ) +}) + +#### Printing functions +testthat::test_that("Prints correct output for dabestr object", { + #### 2GROUP #### + np_dataset <- generate_non_proportional_dataset() + dabest_obj <- dabestr::load( + data = np_dataset, x = Group, y = Measurement, idx = c("Control 1", "Test 1") + ) + expect_output(print(dabest_obj), regexp = "95% confidence intervals") + expect_output(print(dabest_obj), regexp = "Test 1 minus Control 1") + expect_output(print(dabest_obj), regexp = "5000 resamples") + + #### MULTIGROUP BASELINE #### + dabest_obj <- dabestr::load(np_dataset, + x = Group, y = Measurement, + idx = list(c("Control 1", "Test 1"), c("Control 2", "Test 2", "Test 3")), + paired = "baseline", id_col = ID + ) + expect_output(print(dabest_obj), regexp = "Test 1 minus Control 1") + expect_output(print(dabest_obj), regexp = "Test 2 minus Control 2") + expect_output(print(dabest_obj), regexp = "Test 3 minus Control 2") + + #### MULTIGROUP SEQUENTIAL #### + dabest_obj <- dabestr::load(np_dataset, + x = Group, y = Measurement, + idx = list(c("Control 1", "Test 1"), c("Control 2", "Test 2", "Test 3")), + paired = "sequential", id_col = ID + ) + expect_output(print(dabest_obj), regexp = "Test 1 minus Control 1") + expect_output(print(dabest_obj), regexp = "Test 2 minus Control 2") + expect_output(print(dabest_obj), regexp = "Test 3 minus Test 2") + + #### 2GROUP PROPORTION #### + p_dataset <- generate_proportional_dataset() + dabest_obj <- dabestr::load( + data = p_dataset, x = Group, y = Success, idx = c("Control 2", "Test 2"), + proportional = TRUE + ) + expect_output(print(dabest_obj), regexp = "95% confidence intervals") + expect_output(print(dabest_obj), regexp = "Test 2 minus Control 2") + expect_output(print(dabest_obj), regexp = "5000 resamples") + + #### MINIMETA #### + np_dataset <- generate_non_proportional_dataset() + dabest_obj <- dabestr::load( + data = np_dataset, x = Group, y = Measurement, idx = list( + c("Control 1", "Test 1"), + c("Control 2", "Test 2") + ), + minimeta = TRUE + ) + expect_output(print(dabest_obj), regexp = "95% confidence intervals") + expect_output(print(dabest_obj), regexp = "Test 1 minus Control 1") + expect_output(print(dabest_obj), regexp = "Test 2 minus Control 2") + expect_output(print(dabest_obj), regexp = "weighted delta") + expect_output(print(dabest_obj), regexp = "5000 resamples") + + #### DELTADELTA #### + dd_dataset <- generate_deltadelta_dataset() + dabest_obj <- dabestr::load(dd_dataset, + x = Genotype, y = Measurement, delta2 = TRUE, experiment = Treatment, + idx = list(c("W Placebo", "M Placebo"), c("W Drug", "M Drug")), colour = Genotype + ) + expect_output(print(dabest_obj), regexp = "M Placebo minus W Placebo") + expect_output(print(dabest_obj), regexp = "M Drug minus W Drug") + expect_output(print(dabest_obj), regexp = "Drug minus Placebo") + + #### ADJUSTING CI #### + dabest_obj <- dabestr::load( + data = np_dataset, x = Group, y = Measurement, idx = c("Control 1", "Test 1"), ci = 85 + ) + expect_output(print(dabest_obj), regexp = "85% confidence intervals") + expect_output(print(dabest_obj), regexp = "Test 1 minus Control 1") + expect_output(print(dabest_obj), regexp = "5000 resamples") + + #### ADJUSTING RESAMPLES #### + dabest_obj <- dabestr::load( + data = np_dataset, x = Group, y = Measurement, idx = c("Control 1", "Test 1"), resamples = 3000 + ) + expect_output(print(dabest_obj), regexp = "95% confidence intervals") + expect_output(print(dabest_obj), regexp = "Test 1 minus Control 1") + expect_output(print(dabest_obj), regexp = "3000 resamples") +}) diff --git a/tests/testthat/test-001_plotter.R b/tests/testthat/test-001_plotter.R index cd8ad02..30ff1a8 100644 --- a/tests/testthat/test-001_plotter.R +++ b/tests/testthat/test-001_plotter.R @@ -1,14 +1,14 @@ testthat::test_that("Plot two groups correctly", { np_dataset <- generate_non_proportional_dataset() - + #### 2GROUP #### unpaired_mean_diff <- dabestr::load(np_dataset, x = Group, y = Measurement, idx = c("Control 1", "Test 1")) %>% dabestr::mean_diff() - + #### FLOAT TRUE #### unpaired_mean_diff_float_true <- dabestr::dabest_plot(unpaired_mean_diff, float_contrast = TRUE) vdiffr::expect_doppelganger("two-groups unpaired mean diff float true", unpaired_mean_diff_float_true) - + #### FLOAT FALSE #### unpaired_mean_diff_float_false <- dabestr::dabest_plot(unpaired_mean_diff, float_contrast = FALSE) vdiffr::expect_doppelganger("two-groups unpaired mean diff float false", unpaired_mean_diff_float_false) @@ -16,17 +16,19 @@ testthat::test_that("Plot two groups correctly", { testthat::test_that("Plot two groups colour correctly", { np_dataset <- generate_non_proportional_dataset() - + #### 2GROUP COLOUR #### - unpaired_mean_diff_colour <- dabestr::load(np_dataset, x = Group, y = Measurement, - idx = c("Control 1", "Test 1"), - colour = Gender) %>% + unpaired_mean_diff_colour <- dabestr::load(np_dataset, + x = Group, y = Measurement, + idx = c("Control 1", "Test 1"), + colour = Gender + ) %>% dabestr::mean_diff() - + #### FLOAT TRUE #### unpaired_mean_diff_colour_float_true <- dabestr::dabest_plot(unpaired_mean_diff_colour, float_contrast = TRUE) vdiffr::expect_doppelganger("two-groups unpaired mean diff colour float true", unpaired_mean_diff_colour_float_true) - + #### FLOAT FALSE #### unpaired_mean_diff_colour_float_false <- dabestr::dabest_plot(unpaired_mean_diff_colour, float_contrast = FALSE) vdiffr::expect_doppelganger("two-groups unpaired mean diff colour float false", unpaired_mean_diff_colour_float_false) @@ -36,115 +38,135 @@ testthat::test_that("Plot multi groups correctly", { np_dataset <- generate_non_proportional_dataset() #### MULTIGROUP UNPAIRED #### - multi_unpaired <- dabestr::load(np_dataset, x = Group, y = Measurement, - idx = list(c("Control 1", "Test 1"), c("Control 2", "Test 2", "Test 3"))) %>% + multi_unpaired <- dabestr::load(np_dataset, + x = Group, y = Measurement, + idx = list(c("Control 1", "Test 1"), c("Control 2", "Test 2", "Test 3")) + ) %>% dabestr::mean_diff() - + multi_unpaired <- dabestr::dabest_plot(multi_unpaired, float_contrast = TRUE) vdiffr::expect_doppelganger("multigroup unpaired mean diff", multi_unpaired) - + #### MULTIGROUP COLOUR UNPAIRED #### - multi_unpaired_colour <- dabestr::load(np_dataset, x = Group, y = Measurement, - idx = list(c("Control 1", "Test 1"), c("Control 2", "Test 2", "Test 3")), - colour = Gender) %>% + multi_unpaired_colour <- dabestr::load(np_dataset, + x = Group, y = Measurement, + idx = list(c("Control 1", "Test 1"), c("Control 2", "Test 2", "Test 3")), + colour = Gender + ) %>% dabestr::mean_diff() - + multi_unpaired_colour <- dabestr::dabest_plot(multi_unpaired_colour, float_contrast = TRUE) vdiffr::expect_doppelganger("multigroup unpaired mean diff colour", multi_unpaired_colour) - + #### MULTIGROUP SEQUENTIAL #### - multi_sequential <- dabestr::load(np_dataset, x = Group, y = Measurement, - idx = list(c("Control 1", "Test 1"), c("Control 2", "Test 2", "Test 3")), - paired = "sequential", id_col = ID) %>% + multi_sequential <- dabestr::load(np_dataset, + x = Group, y = Measurement, + idx = list(c("Control 1", "Test 1"), c("Control 2", "Test 2", "Test 3")), + paired = "sequential", id_col = ID + ) %>% dabestr::mean_diff() - + multi_sequential <- dabestr::dabest_plot(multi_sequential, float_contrast = TRUE) vdiffr::expect_doppelganger("multigroup sequential mean diff", multi_sequential) - + #### MULTIGROUP BASELINE #### - multi_baseline <- dabestr::load(np_dataset, x = Group, y = Measurement, - idx = list(c("Control 1", "Test 1"), c("Control 2", "Test 2", "Test 3")), - paired = "baseline", id_col = ID) %>% + multi_baseline <- dabestr::load(np_dataset, + x = Group, y = Measurement, + idx = list(c("Control 1", "Test 1"), c("Control 2", "Test 2", "Test 3")), + paired = "baseline", id_col = ID + ) %>% dabestr::mean_diff() - + multi_baseline <- dabestr::dabest_plot(multi_baseline, float_contrast = TRUE) vdiffr::expect_doppelganger("multigroup baseline mean diff", multi_baseline) - + #### MULTIGROUP BASELINE COLOUR #### - multi_baseline_colour <- dabestr::load(np_dataset, x = Group, y = Measurement, - idx = list(c("Control 1", "Test 1"), c("Control 2", "Test 2", "Test 3")), - paired = "baseline", id_col = ID, colour = Gender) %>% + multi_baseline_colour <- dabestr::load(np_dataset, + x = Group, y = Measurement, + idx = list(c("Control 1", "Test 1"), c("Control 2", "Test 2", "Test 3")), + paired = "baseline", id_col = ID, colour = Gender + ) %>% dabestr::mean_diff() - + multi_baseline_colour <- dabestr::dabest_plot(multi_baseline_colour, float_contrast = TRUE) vdiffr::expect_doppelganger("multigroup baseline colour mean diff", multi_baseline_colour) }) testthat::test_that("Plot unpaired proportions correctly", { p_dataset <- generate_proportional_dataset() - + #### 2GROUP PROPORTION UNPAIRED #### - unpaired_mean_diff <- dabestr::load(p_dataset, x = Group, y = Success, - idx = c("Control 1", "Test 1"), - proportional = TRUE) %>% + unpaired_mean_diff <- dabestr::load(p_dataset, + x = Group, y = Success, + idx = c("Control 1", "Test 1"), + proportional = TRUE + ) %>% dabestr::mean_diff() - + #### FLOAT TRUE #### unpaired_mean_diff_float_true <- dabestr::dabest_plot(unpaired_mean_diff, float_contrast = TRUE) vdiffr::expect_doppelganger("proportion unpaired mean diff float true", unpaired_mean_diff_float_true) - + #### FLOAT FALSE #### unpaired_mean_diff_float_false <- dabestr::dabest_plot(unpaired_mean_diff, float_contrast = FALSE) vdiffr::expect_doppelganger("proportion unpaired mean diff float false", unpaired_mean_diff_float_false) - + #### MULTIGROUP PROPORTION UNPAIRED #### - multi_unpaired <- dabestr::load(p_dataset, x = Group, y = Success, - idx = c("Control 1", "Test 1", "Test 2"), - proportional = TRUE) %>% + multi_unpaired <- dabestr::load(p_dataset, + x = Group, y = Success, + idx = c("Control 1", "Test 1", "Test 2"), + proportional = TRUE + ) %>% dabestr::mean_diff() - + multi_unpaired <- dabestr::dabest_plot(multi_unpaired, float_contrast = FALSE) vdiffr::expect_doppelganger("proportion unpaired multigroup mean diff", multi_unpaired) }) testthat::test_that("Plot paired proportions correctly", { p_dataset <- generate_proportional_dataset() - + #### 2GROUP PROPORTION BASELINE #### - paired_mean_diff <- dabestr::load(p_dataset, x = Group, y = Success, - idx = c("Control 1", "Test 1"), - proportional = TRUE, paired = "baseline", - id_col = ID) %>% + paired_mean_diff <- dabestr::load(p_dataset, + x = Group, y = Success, + idx = c("Control 1", "Test 1"), + proportional = TRUE, paired = "baseline", + id_col = ID + ) %>% dabestr::mean_diff() - + #### FLOAT TRUE #### paired_mean_diff_float_true <- dabestr::dabest_plot(paired_mean_diff, float_contrast = TRUE) vdiffr::expect_doppelganger("proportion paired mean diff float true", paired_mean_diff_float_true) - + #### FLOAT FALSE #### paired_mean_diff_float_false <- dabestr::dabest_plot(paired_mean_diff, float_contrast = FALSE) vdiffr::expect_doppelganger("proportion paired mean diff float false", paired_mean_diff_float_false) - + #### MULTIGROUP PROPORTION SEQUENTIAL #### - multi_sequential <- dabestr::load(p_dataset, x = Group, y = Success, - idx = list(c("Control 1", "Test 1", "Test 2", "Test 3"), c("Control 2", "Test 4")), - proportional = TRUE, paired = "sequential", - id_col = ID) %>% + multi_sequential <- dabestr::load(p_dataset, + x = Group, y = Success, + idx = list(c("Control 1", "Test 1", "Test 2", "Test 3"), c("Control 2", "Test 4")), + proportional = TRUE, paired = "sequential", + id_col = ID + ) %>% dabestr::mean_diff() - + multi_sequential <- dabestr::dabest_plot(multi_sequential, float_contrast = FALSE) vdiffr::expect_doppelganger("proportion sequential mean diff", multi_sequential) - + #### MULTIGROUP PROPORTION BASELINE #### - multi_baseline <- dabestr::load(p_dataset, x = Group, y = Success, - idx = list(c("Control 1", "Test 1", "Test 2", "Test 3"), c("Control 2", "Test 4")), - proportional = TRUE, paired = "baseline", - id_col = ID) %>% + multi_baseline <- dabestr::load(p_dataset, + x = Group, y = Success, + idx = list(c("Control 1", "Test 1", "Test 2", "Test 3"), c("Control 2", "Test 4")), + proportional = TRUE, paired = "baseline", + id_col = ID + ) %>% dabestr::mean_diff() - + multi_baseline_flow_true <- dabestr::dabest_plot(multi_baseline, float_contrast = FALSE) vdiffr::expect_doppelganger("proportion baseline mean diff", multi_baseline_flow_true) - + #### FLOW FALSE #### multi_baseline_flow_false <- dabestr::dabest_plot(multi_baseline, float_contrast = FALSE, flow = FALSE) vdiffr::expect_doppelganger("proportion baseline flow false mean diff", multi_baseline_flow_false) @@ -154,25 +176,27 @@ testthat::test_that("Plot minimeta correctly", { np_dataset <- generate_non_proportional_dataset() #### MINIMETA #### - minimeta <- dabestr::load(np_dataset, x = Group, y = Measurement, - idx = list(c("Control 1", "Test 1"), c("Control 2", "Test 2"), c("Control 3", "Test 3")), - minimeta = TRUE) %>% + minimeta <- dabestr::load(np_dataset, + x = Group, y = Measurement, + idx = list(c("Control 1", "Test 1"), c("Control 2", "Test 2"), c("Control 3", "Test 3")), + minimeta = TRUE + ) %>% dabestr::mean_diff() - + minimeta <- dabestr::dabest_plot(minimeta, float_contrast = TRUE) vdiffr::expect_doppelganger("minimeta mean diff", minimeta) }) testthat::test_that("Plot deltadelta correctly", { d_dataset <- generate_deltadelta_dataset() - + #### DELTADELTA #### - deltadelta <- dabestr::load(d_dataset, x = Genotype, y = Measurement, delta2 = TRUE, experiment = Treatment, - idx = list(c("W Placebo","M Placebo"),c("W Drug","M Drug")), colour = Genotype) %>% + deltadelta <- dabestr::load(d_dataset, + x = Genotype, y = Measurement, delta2 = TRUE, experiment = Treatment, + idx = list(c("W Placebo", "M Placebo"), c("W Drug", "M Drug")), colour = Genotype + ) %>% dabestr::mean_diff() - + deltadelta <- dabestr::dabest_plot(deltadelta, float_contrast = TRUE) vdiffr::expect_doppelganger("deltadelta mean diff", deltadelta) }) - - diff --git a/tests/testthat/test-helper_generate_datasets.R b/tests/testthat/test-helper_generate_datasets.R index 72cb020..648a522 100644 --- a/tests/testthat/test-helper_generate_datasets.R +++ b/tests/testthat/test-helper_generate_datasets.R @@ -2,4 +2,4 @@ testthat::test_that("Able to generate dataset for testing", { expect_no_error(generate_non_proportional_dataset()) expect_no_error(generate_proportional_dataset()) expect_no_error(generate_deltadelta_dataset()) -}) \ No newline at end of file +}) diff --git a/vignettes/datasets.Rmd b/vignettes/datasets.Rmd index 7d2d0c8..9072221 100644 --- a/vignettes/datasets.Rmd +++ b/vignettes/datasets.Rmd @@ -33,4 +33,4 @@ data("proportional_data") data("minimeta_data") data("deltadelta_data") -``` \ No newline at end of file +``` diff --git a/vignettes/plot_aesthetics.Rmd b/vignettes/plot_aesthetics.Rmd index 9695cb4..b8650d9 100644 --- a/vignettes/plot_aesthetics.Rmd +++ b/vignettes/plot_aesthetics.Rmd @@ -52,32 +52,38 @@ library(dabestr) data(non_proportional_data) data(proportional_data) data(deltadelta_data) -dabest_twogroup_obj.mean_diff <- load(non_proportional_data, x = Group, y = Measurement, idx = c("Control 1", "Test 1")) %>% +dabest_twogroup_obj.mean_diff <- load(non_proportional_data, x = Group, y = Measurement, idx = c("Control 1", "Test 1")) %>% mean_diff() -dabest_multigroup_obj.mean_diff <- load(non_proportional_data, x = Group, y = Measurement, - idx = list(c("Control 1", "Test 1", "Test 2"), c("Control 2", "Test 3"))) %>% +dabest_multigroup_obj.mean_diff <- load(non_proportional_data, + x = Group, y = Measurement, + idx = list(c("Control 1", "Test 1", "Test 2"), c("Control 2", "Test 3")) +) %>% mean_diff() -dabest_unpaired_props.mean_diff <- load(proportional_data, x = Group, y = Success, - idx = list(c("Control 1", "Test 1")), - proportional = TRUE) %>% +dabest_unpaired_props.mean_diff <- load(proportional_data, + x = Group, y = Success, + idx = list(c("Control 1", "Test 1")), + proportional = TRUE +) %>% mean_diff() -dabest_paired_props.mean_diff <- load(proportional_data, x = Group, y = Success, - idx = list(c("Control 1", "Test 1", "Test 2", "Test 3"), c("Control 2", "Test 4")), - proportional = TRUE, paired = "sequential", - id_col = ID) %>% +dabest_paired_props.mean_diff <- load(proportional_data, + x = Group, y = Success, + idx = list(c("Control 1", "Test 1", "Test 2", "Test 3"), c("Control 2", "Test 4")), + proportional = TRUE, paired = "sequential", + id_col = ID +) %>% mean_diff() ``` ```{r} dabest_plot( - dabest_twogroup_obj.mean_diff, - float_contrast = TRUE, - swarm_x_text = 30, - swarm_y_text = 1, - contrast_x_text = 30, + dabest_twogroup_obj.mean_diff, + float_contrast = TRUE, + swarm_x_text = 30, + swarm_y_text = 1, + contrast_x_text = 30, contrast_y_text = 5 ) ``` @@ -91,8 +97,8 @@ The following parameters are responsible for adjusting the content of the text e ```{r} dabest_plot( - dabest_twogroup_obj.mean_diff, - float_contrast = TRUE, + dabest_twogroup_obj.mean_diff, + float_contrast = TRUE, swarm_label = "I love estimation statistics.", contrast_label = "I love it more than you do!" ) @@ -117,12 +123,16 @@ direction of the `asymmetric_side`.. - `es_line_size` Default 0.8. Numeric value determining the size of the ci line in the delta plot. ```{r} -A <- dabest_plot(dabest_twogroup_obj.mean_diff, float_contrast = TRUE, - swarm_label = "", contrast_label = "", - raw_marker_size = 1, raw_marker_alpha = 1) -B <- dabest_plot(dabest_twogroup_obj.mean_diff, float_contrast = TRUE, - swarm_label = "", contrast_label = "", - raw_marker_size = 2, raw_marker_alpha = 0.5) +A <- dabest_plot(dabest_twogroup_obj.mean_diff, + float_contrast = TRUE, + swarm_label = "", contrast_label = "", + raw_marker_size = 1, raw_marker_alpha = 1 +) +B <- dabest_plot(dabest_twogroup_obj.mean_diff, + float_contrast = TRUE, + swarm_label = "", contrast_label = "", + raw_marker_size = 2, raw_marker_alpha = 0.5 +) cowplot::plot_grid( plotlist = list(A, B), @@ -146,9 +156,11 @@ outcome), you can invert the vector passed to `contrast_ylim.` ```{r} -dabest_plot(dabest_multigroup_obj.mean_diff, float_contrast = FALSE, - contrast_label = "More negative is better!", - swarm_ylim = c(1, 5), contrast_ylim = c(0.7, -1.2)) +dabest_plot(dabest_multigroup_obj.mean_diff, + float_contrast = FALSE, + contrast_label = "More negative is better!", + swarm_ylim = c(1, 5), contrast_ylim = c(0.7, -1.2) +) ``` ### Palettes @@ -160,18 +172,22 @@ npg, aaas, nejm, lancet, jama, jco, ucscgb, d3, locuszoom, igv, cosmic, uchicago ordinal, viridis_d. ```{r} -npg <- dabest_plot(dabest_unpaired_props.mean_diff, - swarm_label = "", contrast_label = "", - custom_palette = "npg") -nejm <- dabest_plot(dabest_unpaired_props.mean_diff, - swarm_label = "", contrast_label = "", - custom_palette = "nejm") -jama <- dabest_plot(dabest_unpaired_props.mean_diff, - swarm_label = "", contrast_label = "", - custom_palette = "jama") -locuszoom <- dabest_plot(dabest_unpaired_props.mean_diff, - swarm_label = "", contrast_label = "", - custom_palette = "locuszoom") +npg <- dabest_plot(dabest_unpaired_props.mean_diff, + swarm_label = "", contrast_label = "", + custom_palette = "npg" +) +nejm <- dabest_plot(dabest_unpaired_props.mean_diff, + swarm_label = "", contrast_label = "", + custom_palette = "nejm" +) +jama <- dabest_plot(dabest_unpaired_props.mean_diff, + swarm_label = "", contrast_label = "", + custom_palette = "jama" +) +locuszoom <- dabest_plot(dabest_unpaired_props.mean_diff, + swarm_label = "", contrast_label = "", + custom_palette = "locuszoom" +) cowplot::plot_grid( plotlist = list(npg, nejm, jama, locuszoom), @@ -193,12 +209,16 @@ dabest_plot(dabest_paired_props.mean_diff, flow = FALSE, raw_bar_width = 0.15) - `asymmetric_side` Default "right". Can be either "right" or "left". Controls which side the swarm points are shown. ```{r} -right <- dabest_plot(dabest_twogroup_obj.mean_diff, float_contrast = FALSE, - swarm_label = "", contrast_label = "", - asymmetric_side = "right") -left <- dabest_plot(dabest_twogroup_obj.mean_diff, float_contrast = FALSE, - swarm_label = "", contrast_label = "", - asymmetric_side = "left") +right <- dabest_plot(dabest_twogroup_obj.mean_diff, + float_contrast = FALSE, + swarm_label = "", contrast_label = "", + asymmetric_side = "right" +) +left <- dabest_plot(dabest_twogroup_obj.mean_diff, + float_contrast = FALSE, + swarm_label = "", contrast_label = "", + asymmetric_side = "left" +) cowplot::plot_grid( plotlist = list(right, left), @@ -214,6 +234,8 @@ If False, the resulting graph would be identical to a multiple two-groups plot. the zero line of the effect size for the control-control group. - `show_baseline_ec` Default FALSE. Boolean value determining whether the baseline curve is shown. ```{r} -dabest_plot(dabest_multigroup_obj.mean_diff, float_contrast = FALSE, - show_baseline_ec = TRUE) -``` \ No newline at end of file +dabest_plot(dabest_multigroup_obj.mean_diff, + float_contrast = FALSE, + show_baseline_ec = TRUE +) +``` diff --git a/vignettes/tutorial_basics.Rmd b/vignettes/tutorial_basics.Rmd index fe53f05..bc9ec7d 100644 --- a/vignettes/tutorial_basics.Rmd +++ b/vignettes/tutorial_basics.Rmd @@ -31,7 +31,7 @@ each column corresponds to a group of observations. ```{r} set.seed(12345) # Fix the seed so the results are replicable. # pop_size = 10000 # Size of each population. -N = 20 +N <- 20 # Create samples c1 <- rnorm(N, mean = 3, sd = 0.4) @@ -46,7 +46,7 @@ t5 <- rnorm(N, mean = 3.25, sd = 0.4) t6 <- rnorm(N, mean = 3.25, sd = 0.4) # Add a `gender` column for coloring the data. -gender <- c(rep('Male', N/2), rep('Female', N/2)) +gender <- c(rep("Male", N / 2), rep("Female", N / 2)) # Add an `id` column for paired data plotting. id <- 1:N @@ -55,7 +55,8 @@ id <- 1:N df <- tibble::tibble( `Control 1` = c1, `Control 2` = c2, `Control 3` = c3, `Test 1` = t1, `Test 2` = t2, `Test 3` = t3, `Test 4` = t4, `Test 5` = t5, `Test 6` = t6, - Gender = gender, ID = id) + Gender = gender, ID = id +) df <- df %>% tidyr::gather(key = Group, value = Measurement, -ID, -Gender) @@ -65,7 +66,7 @@ Note that we have 9 groups (3 Control samples and 6 Test samples). Our dataset a This is known as a ‘long’ dataset. See this [writeup](https://simonejdemyr.com/r-tutorials/basics/wide-and-long/) for more details. ```{r} -head(df) +knitr::kable(head(df)) ``` ## Loading Data (Step 1) @@ -73,8 +74,10 @@ Before we create estimation plots and obtain confidence intervals for our effect We simply supply the DataFrame to `load()` along with x and y - the columns in the DataFrame that contains the treatment groups and measurement values respectively. We also must supply the two groups you want to compare in the `idx` argument as a vector or list. ```{r} -two_groups_unpaired <- load(df, x = Group, y = Measurement, - idx = c("Control 1", "Test 1")) +two_groups_unpaired <- load(df, + x = Group, y = Measurement, + idx = c("Control 1", "Test 1") +) ``` Printing this `dabestr` object gives you a gentle greeting, as well as the comparisons that can be computed. @@ -85,8 +88,10 @@ print(two_groups_unpaired) ### Changing statistical parameters You can change the width of the confidence interval that will be produced by manipulating the `ci` argument. ```{r} -two_groups_unpaired_ci90 = load(df, x = Group, y = Measurement, - idx = c("Control 1", "Test 1"), ci=90) +two_groups_unpaired_ci90 <- load(df, + x = Group, y = Measurement, + idx = c("Control 1", "Test 1"), ci = 90 +) ``` ```{r} @@ -115,21 +120,8 @@ For each comparison, the type of effect size is reported (here, it’s the “un This confidence interval is generated through bootstrap resampling. See Bootstrap Confidence Intervals for more details. -### Statistical tests -Since v0.3.0, dabestr will report the p-value of the non-parametric two-sided approximate permutation t-test. This is also known as the Monte Carlo permutation test. - -For unpaired comparisons, the p-values and test statistics of Welch’s t test, Student’s t test, and Mann-Whitney U test can be found in addition. For paired comparisons, the p-values and test statistics of the paired Student’s t and Wilcoxon tests are presented. - -```{r} -print(two_groups_unpaired.mean_diff$permtest_pvals$pvalues) -``` - -Let’s compute the Hedges’ g for our comparison. - -```{r, eval = FALSE} -two_groups_unpaired.hedges_g <- hedges_g(two_groups_unpaired) -print(two_groups_unpaired.hedges_g$permtest_pvals$pvalues) -``` +### P-values and statistical tests +Permutation P values are only provided to allow analysts to satisfy a customary requirement of scientific journals. DABEST's provision of P values does not constitute an endorsement of P values or null-hypothesis significance testing (NHST). If users need to include these in a study, we recommend that they (1) avoid performing NHST, i.e. do not compare P to an alpha, (2) never refer to the P values in the Results text, and (3) state in their Methods section that "No null-hypothesis significance testing was performed; P values are provided for legacy purposes only." ## Producing estimation plots (Step 3) To produce a **Gardner-Altman estimation plot**, simply use the `dabest_plot()`. You can read more about its genesis and design inspiration at Robust and Beautiful Statistical Visualization. @@ -143,13 +135,17 @@ dabest_plot(two_groups_unpaired.mean_diff) Instead of a Gardner-Altman plot, you can produce a **Cumming estimation plot** by setting `float_contrast = FALSE` in the `dabest_plot()` function This will plot the bootstrap effect sizes below the raw data, and also displays the the mean (gap) and ± standard deviation of each group (vertical ends) as gapped lines. This design was inspired by Edward Tufte’s dictum to maximise the data-ink ratio. ```{r, eval = FALSE} -dabest_plot(two_groups_unpaired.mean_diff, float_contrast = FALSE, - contrast_ylim = c(-0.3, 1.3)) +dabest_plot(two_groups_unpaired.mean_diff, + float_contrast = FALSE, + contrast_ylim = c(-0.3, 1.3) +) ``` ```{r, echo = FALSE} -pp_plot <- dabest_plot(two_groups_unpaired.mean_diff, float_contrast = FALSE, - contrast_ylim = c(-0.3, 1.3)) +pp_plot <- dabest_plot(two_groups_unpaired.mean_diff, + float_contrast = FALSE, + contrast_ylim = c(-0.3, 1.3) +) cowplot::plot_grid( plotlist = list(NULL, pp_plot, NULL), @@ -157,7 +153,6 @@ cowplot::plot_grid( ncol = 3, rel_widths = c(2.5, 5, 2.5) ) - ``` The `dabestr` package also implements a range of estimation plot designs aimed at depicting common experimental designs. @@ -167,20 +162,29 @@ The **multi-two-group estimation plot** tiles two or more Cumming plots horizont Thus, the lower axes in the Cumming plot is effectively a [forest plot](https://en.wikipedia.org/wiki/Forest_plot), used in meta-analyses to aggregate and compare data from different experiments. ```{r} -multi_2group <- load(df, x = Group, y = Measurement, - idx = list(c("Control 1", "Test 1"), - c("Control 2", "Test 2")) - ) -multi_2group %>% mean_diff() %>% dabest_plot() +multi_2group <- load(df, + x = Group, y = Measurement, + idx = list( + c("Control 1", "Test 1"), + c("Control 2", "Test 2") + ) +) +multi_2group %>% + mean_diff() %>% + dabest_plot() ``` The **shared control plot** displays another common experimental paradigm, where several test samples are compared against a common reference sample. This type of Cumming plot is automatically generated if the vector passed to `idx` has more than two data columns. ```{r} -shared_control <- load(df, x = Group, y = Measurement, - idx = c("Control 1", "Test 1", "Test 2", "Test 3", - "Test 4", "Test 5", "Test 6")) +shared_control <- load(df, + x = Group, y = Measurement, + idx = c( + "Control 1", "Test 1", "Test 2", "Test 3", + "Test 4", "Test 5", "Test 6" + ) +) print(shared_control) ``` @@ -197,11 +201,14 @@ dabest_plot(shared_control.mean_diff) `dabestr` thus empowers you to robustly perform and elegantly present complex visualizations and statistics. ```{r} -multi_groups <- load(df, x = Group, y = Measurement, - idx = list(c("Control 1", "Test 1"), - c("Control 2", "Test 2", "Test 3"), - c("Control 3", "Test 4", "Test 5", "Test 6")) - ) +multi_groups <- load(df, + x = Group, y = Measurement, + idx = list( + c("Control 1", "Test 1"), + c("Control 2", "Test 2", "Test 3"), + c("Control 3", "Test 4", "Test 5", "Test 6") + ) +) print(multi_groups) ``` @@ -215,6 +222,6 @@ print(multi_groups.mean_diff) ```{r} dabest_plot(multi_groups.mean_diff) ``` - + ## Using wide datasets -`dabestr` does not currently support the use of 'wide' data. To convert datasets from 'wide' to 'long'/'tidy', consider taking a look at [gather()](https://tidyr.tidyverse.org/reference/gather.html) as part of the [tidyr](https://tidyr.tidyverse.org) package. \ No newline at end of file +`dabestr` does not currently support the use of 'wide' data. To convert datasets from 'wide' to 'long'/'tidy', consider taking a look at [gather()](https://tidyr.tidyverse.org/reference/gather.html) as part of the [tidyr](https://tidyr.tidyverse.org) package. diff --git a/vignettes/tutorial_deltadelta.Rmd b/vignettes/tutorial_deltadelta.Rmd index 1e3ba86..dec8e94 100644 --- a/vignettes/tutorial_deltadelta.Rmd +++ b/vignettes/tutorial_deltadelta.Rmd @@ -14,6 +14,10 @@ knitr::opts_chunk$set( ) ``` +```{r, include = FALSE, warning = FALSE, message = FALSE} +library(dabestr) +``` + This vignette documents how `dabestr` is able to compute the calculation of delta-delta, an experimental function that allows the comparison between two bootstrapped effect sizes computed from two independent categorical variables. Many experimental designs investigate the effects of two interacting independent variables on a dependent variable. The delta-delta effect size lets us distill the net effect of the two variables. To illustrate this, let’s delve into the following problem. @@ -22,21 +26,21 @@ Consider an experiment where we test the efficacy of a drug named `Drug` on a di Effectively, we have 4 groups of subjects for comparison. -```{=latex} -\begin{tabular}{|l|c|c|} - \hline - & Wild Type & Mutant \\ - \hline - Drug & X_D, W & X_D, M \\ - \hline - Placebo & X_P, W & X_P, M \\ - \hline -\end{tabular} +```{r, echo = FALSE, warning = FALSE, message = FALSE} +df <- data.frame( + `s` = c("Drug", "Placebo"), + `Wild type` = c("$X_D, W$", "$X_P, W$"), + `Mutant` = c("$X_D, M$", "$X_P, M$") +) +colnames(df) <- c(" ", "Wild type", "Mutant") +knitr::kable(df, escape = FALSE) %>% + kableExtra::column_spec(1, bold = TRUE) %>% + kableExtra::column_spec(1:2, border_right = TRUE) ``` There are 2 `Treatment` conditions, `Placebo` (control group) and `Drug` (test group). There are 2 `Genotypes`: `W` (wild type population) and `M` (mutant population). In addition, each experiment was done twice (`Rep1` and `Rep2`). We shall do a few analyses to visualise these differences in a simulated dataset. -```{r setup, warning = FALSE, message = FALSE} +```{r setup, eval = FALSE} library(dabestr) ``` @@ -44,36 +48,38 @@ library(dabestr) ```{r} set.seed(12345) # Fix the seed so the results are replicable. # pop_size = 10000 # Size of each population. -N = 20 # The number of samples taken from each population +N <- 20 # The number of samples taken from each population # Create samples -placebo <- rnorm(N, mean = 3, sd = 0.4) -drug <- rnorm(N, mean = 3.5, sd = 0.75) +placebo <- rnorm(N / 2, mean = 4, sd = 0.4) +placebo <- c(placebo, rnorm(N / 2, mean = 2.8, sd = 0.4)) +drug <- rnorm(N / 2, mean = 3, sd = 0.4) +drug <- c(drug, rnorm(N / 2, mean = 2.5, sd = 0.4)) # Add a `Genotype` column as the second variable -genotype <- c(rep('M', N/2), rep('W', N/2)) +genotype <- c(rep("M", N / 2), rep("W", N / 2)) # Add an `id` column for paired data plotting. id <- 1:N # Add a `Rep` column as the first variable for the 2 replicates of experiments done -Rep <- rep(c("Rep1", "Rep2"), N/2) +Rep <- rep(c("Rep1", "Rep2"), N / 2) # Combine all columns into a DataFrame. df <- tibble::tibble( Placebo = placebo, Drug = drug, - Genotype = genotype, + Genotype = genotype, ID = id, - Rep = Rep) + Rep = Rep +) df <- df %>% tidyr::gather(key = Treatment, value = Measurement, -ID, -Genotype, -Rep) ``` ```{r} -df_head <- head(df) -knitr::kable(df_head) +knitr::kable(head(df)) ``` ## Loading Data @@ -84,17 +90,21 @@ For delta-delta plots, the `idx` is a non-compulsory input. ## Unpaired Data ```{r, eval = FALSE} -unpaired_delta2 <- load(df, x = Genotype, y = Measurement, - experiment = Treatment, colour = Genotype, - delta2 = TRUE) +unpaired_delta2 <- load(df, + x = Genotype, y = Measurement, + experiment = Treatment, colour = Genotype, + delta2 = TRUE +) ``` -```{r, echo = TRUE} -unpaired_delta2 <- load(df, x = Genotype, y = Measurement, - experiment = Treatment, colour = Genotype, - delta2 = TRUE, - experiment_label = c("Placebo", "Drug"), - x1_level = c("W", "M")) +```{r, echo = FALSE} +unpaired_delta2 <- load(df, + x = Genotype, y = Measurement, + experiment = Treatment, colour = Genotype, + delta2 = TRUE, + experiment_label = c("Placebo", "Drug"), + x1_level = c("W", "M") +) ``` The above function creates the following `dabest` object: @@ -133,11 +143,13 @@ where $\bar{X}$ is the sample mean, $\Delta$ is the mean difference. In the example above, we used the convention of "test - control' but you can manipulate the orders of experiment groups as well as the horizontal axis variable by setting `experiment_label` and `x1_level`. ```{r} -unpaired_delta2_specified.mean_diff <- load(df, x = Genotype, y = Measurement, - experiment = Treatment, colour = Genotype, - delta2 = TRUE, - experiment_label = c("Drug", "Placebo"), - x1_level = c("M", "W")) %>% +unpaired_delta2_specified.mean_diff <- load(df, + x = Genotype, y = Measurement, + experiment = Treatment, colour = Genotype, + delta2 = TRUE, + experiment_label = c("Drug", "Placebo"), + x1_level = c("M", "W") +) %>% mean_diff() dabest_plot(unpaired_delta2_specified.mean_diff) @@ -149,19 +161,24 @@ The delta - delta function also supports paired data, which is useful for us to Although the `idx` is a non-compulsory parameter, it is still possible to have it as an input to adjust the order as opposed to using `experiment_label` and `x1_level`. ```{r} -paired_delta2.mean_diff <- load(df, x = Treatment, y = Measurement, - experiment = Genotype, colour = Rep, - delta2 = TRUE, - idx = list(c("Placebo W","Drug W"), - c("Placebo M","Drug M")), - paired = "baseline", id_col = ID) %>% +paired_delta2.mean_diff <- load(df, + x = Treatment, y = Measurement, + experiment = Genotype, colour = Rep, + delta2 = TRUE, + idx = list( + c("Placebo W", "Drug W"), + c("Placebo M", "Drug M") + ), + paired = "baseline", id_col = ID +) %>% mean_diff() dabest_plot(paired_delta2.mean_diff, - raw_marker_size = 0.5, raw_marker_alpha = 0.3) + raw_marker_size = 0.5, raw_marker_alpha = 0.3 +) ``` -We see that the drug had a non-specific effect of -0.321 [95%CI -0.498, -0.131] on wild type subjects even when they were not sick, and it had a bigger effect of -1.22 [95%CI -1.52, -0.906] in mutant subjects. In this visualisation, we can see the delta-delta value of -0.903 [95%CI -1.21, -0.587] as the net effect of the drug accounting for non-specific actions in healthy individuals +We see that the drug had a non-specific effect of `r format(paired_delta2.mean_diff[["boot_result"]][["difference"]][[1]], digits=3)` [95%CI `r format(paired_delta2.mean_diff[["boot_result"]][["bca_ci_low"]][[1]], digits=3)` , `r format(paired_delta2.mean_diff[["boot_result"]][["bca_ci_high"]][[1]], digits=3)`] on wild type subjects even when they were not sick, and it had a bigger effect of `r format(paired_delta2.mean_diff[["boot_result"]][["difference"]][[2]], digits=3)` [95%CI `r format(paired_delta2.mean_diff[["boot_result"]][["bca_ci_low"]][[2]], digits=3)` , `r format(paired_delta2.mean_diff[["boot_result"]][["bca_ci_high"]][[2]], digits=3)`] in mutant subjects. In this visualisation, we can see the delta-delta value of `r format(paired_delta2.mean_diff[["boot_result"]][["difference"]][[3]], digits=3)` [95%CI `r format(paired_delta2.mean_diff[["boot_result"]][["bca_ci_low"]][[3]], digits=3)` , `r format(paired_delta2.mean_diff[["boot_result"]][["bca_ci_high"]][[3]], digits=3)`] as the net effect of the drug accounting for non-specific actions in healthy individuals Mean difference between drug and placebo treatments in wild type subjects is: @@ -197,4 +214,23 @@ You can find all outputs of the delta - delta calculation by assessing the colum ```{r} print(unpaired_delta2.mean_diff$boot_result) -``` \ No newline at end of file +``` + +If you want to extract the permutations, permutation test’s p values, the statistical tests and the p value results, you can access it with the columns `permutation_test_results`, `pval_permtest`, `pval_for_tests` and `pvalues` respectively. + +P values for permutation tests `pval_permtest` (and the permutation calculations and results accessed by running the commented out session). +```{r} +# print(unpaired_delta2.mean_diff$permtest_pvals$permutation_test_results) +print(unpaired_delta2.mean_diff$permtest_pvals$pval_permtest) +``` + +An representative p value for statistical tests (`pval_for_tests`) + +```{r} +print(unpaired_delta2.mean_diff$permtest_pvals$pval_for_tests) +``` + +Statistical test results and `pvalues`. +```{r} +print(unpaired_delta2.mean_diff$permtest_pvals$pvalues) +``` diff --git a/vignettes/tutorial_minimeta.Rmd b/vignettes/tutorial_minimeta.Rmd index b77bae5..d74fdf6 100644 --- a/vignettes/tutorial_minimeta.Rmd +++ b/vignettes/tutorial_minimeta.Rmd @@ -39,7 +39,7 @@ library(dabestr) ```{r} set.seed(12345) # Fix the seed so the results are replicable. # pop_size = 10000 # Size of each population. -N = 20 # The number of samples taken from each population +N <- 20 # The number of samples taken from each population # Create samples c1 <- rnorm(N, mean = 3, sd = 0.4) @@ -51,7 +51,7 @@ t2 <- rnorm(N, mean = 2.5, sd = 0.6) t3 <- rnorm(N, mean = 3, sd = 0.75) # Add a `gender` column for coloring the data. -gender <- c(rep('Male', N/2), rep('Female', N/2)) +gender <- c(rep("Male", N / 2), rep("Female", N / 2)) # Add an `id` column for paired data plotting. id <- 1:N @@ -60,7 +60,8 @@ id <- 1:N df <- tibble::tibble( `Control 1` = c1, `Control 2` = c2, `Control 3` = c3, `Test 1` = t1, `Test 2` = t2, `Test 3` = t3, - Gender = gender, ID = id) + Gender = gender, ID = id +) df <- df %>% tidyr::gather(key = Group, value = Measurement, -ID, -Gender) @@ -68,22 +69,25 @@ df <- df %>% We now have 3 Control and 3 Test groups, simulating 3 replicates of the same experiment. Our dataset also has a non-numerical column indicating gender, and another column indicating the identity of each observation. -This is known as a ‘long’ dataset. See this [writeup](https://sejdemyr.github.io/r-tutorials/basics/wide-and-long/) for more details. +This is known as a ‘long’ dataset. See this [writeup](https://simonejdemyr.com/r-tutorials/basics/wide-and-long/) for more details. ```{r} -df_head <- head(df) -knitr::kable(df_head) +knitr::kable(head(df)) ``` ## Loading Data Next, we load data as we would normally using `load()`. This time, however, we also specify the argument `minimeta = TRUE` As we are loading three experiments’ worth of data, `idx` is passed as a list of vectors, as follows: ```{r} -unpaired <- load(df, x = Group, y = Measurement, - idx = list(c("Control 1", "Test 1"), - c("Control 2", "Test 2"), - c("Control 3", "Test 3")), - minimeta = TRUE) +unpaired <- load(df, + x = Group, y = Measurement, + idx = list( + c("Control 1", "Test 1"), + c("Control 2", "Test 2"), + c("Control 3", "Test 3") + ), + minimeta = TRUE +) ``` When this `dabest` object is printed, it should show that effect sizes will be calculated for each group, as well as the weighted delta. Note once again that weighted delta will only be calculated for mean difference. @@ -122,12 +126,16 @@ dabest_plot(unpaired.mean_diff, show_mini_meta = FALSE) The tutorial up to this point has dealt with unpaired data. If your data is paired data, the process for loading, plotting and accessing the data is the same as for unpaired data, except the argument `paired = "sequential"` or `paired = "baseline"` and an appropriate `id_col` are passed during the `load()` step, as follows: ```{r} -paired.mean_diff <- load(df, x = Group, y = Measurement, - idx = list(c("Control 1", "Test 1"), - c("Control 2", "Test 2"), - c("Control 3", "Test 3")), - paired = "baseline", id_col = ID, - minimeta = TRUE) %>% +paired.mean_diff <- load(df, + x = Group, y = Measurement, + idx = list( + c("Control 1", "Test 1"), + c("Control 2", "Test 2"), + c("Control 3", "Test 3") + ), + paired = "baseline", id_col = ID, + minimeta = TRUE +) %>% mean_diff() dabest_plot(paired.mean_diff, raw_marker_size = 0.5, raw_marker_alpha = 0.3) diff --git a/vignettes/tutorial_proportion_plots.Rmd b/vignettes/tutorial_proportion_plots.Rmd index b709dd8..3693fba 100644 --- a/vignettes/tutorial_proportion_plots.Rmd +++ b/vignettes/tutorial_proportion_plots.Rmd @@ -25,10 +25,10 @@ library(dabestr) ## Create dataset for demo ```{r} set.seed(12345) # Fix the seed so the results are replicable. -N = 40 # The number of samples taken from each population +N <- 40 # The number of samples taken from each population # Create samples -size = 1 +size <- 1 c1 <- rbinom(N, size, prob = 0.2) c2 <- rbinom(N, size, prob = 0.2) c3 <- rbinom(N, size, prob = 0.8) @@ -42,7 +42,7 @@ t6 <- rbinom(N, size, prob = 0.6) t7 <- c(rep(1, N)) # Add a `gender` column for coloring the data. -gender <- c(rep('Male', N/2), rep('Female', N/2)) +gender <- c(rep("Male", N / 2), rep("Female", N / 2)) # Add an `id` column for paired data plotting. id <- 1:N @@ -50,22 +50,27 @@ id <- 1:N # Combine samples and gender into a DataFrame. df <- tibble::tibble( `Control 1` = c1, `Control 2` = c2, `Control 3` = c3, - `Test 1` = t1, `Test 2` = t2, `Test 3` = t3, `Test 4` = t4, `Test 5` = t5, + `Test 1` = t1, `Test 2` = t2, `Test 3` = t3, `Test 4` = t4, `Test 5` = t5, `Test 6` = t6, `Test 7` = t7, - Gender = gender, ID = id) + Gender = gender, ID = id +) df <- df %>% tidyr::gather(key = Group, value = Success, -ID, -Gender) +``` -head(df) +```{r} +knitr::kable(head(df)) ``` ## Loading Data When loading data, specify `proportional = TRUE`. ```{r} -two_groups_unpaired <- load(df, x = Group, y = Success, - idx = c("Control 1", "Test 1"), - proportional = TRUE) +two_groups_unpaired <- load(df, + x = Group, y = Success, + idx = c("Control 1", "Test 1"), + proportional = TRUE +) print(two_groups_unpaired) ``` @@ -111,8 +116,10 @@ dabest_plot(two_groups_unpaired.mean_diff, float_contrast = FALSE) ``` ```{r, echo = FALSE} -pp_plot <- dabest_plot(two_groups_unpaired.mean_diff, float_contrast = FALSE, - swarm_y_text = 11, contrast_y_text = 11) +pp_plot <- dabest_plot(two_groups_unpaired.mean_diff, + float_contrast = FALSE, + swarm_y_text = 11, contrast_y_text = 11 +) cowplot::plot_grid( plotlist = list(NULL, pp_plot, NULL), @@ -130,8 +137,9 @@ dabest_plot(two_groups_unpaired.mean_diff, raw_bar_width = 0.15) `swarm_label` and `contrast_label` can be used to set labels for the y-axis of the bar plot and the contrast plot. ```{r} -dabest_plot(two_groups_unpaired.mean_diff, - swarm_label = "success", contrast_label = "difference") +dabest_plot(two_groups_unpaired.mean_diff, + swarm_label = "success", contrast_label = "difference" +) ``` ## Producing Paired Proportion Plots @@ -140,10 +148,12 @@ For paired version of proportional plot, we adapt the style of Sankey Diagram. T Similar to the unpaired version, the `dabest_plot()` function is used to produce a **Gardner-Altman estimation plot**, the only difference is that the `paired` parameter is set to either "baseline" or "sequential" when loading data. ```{r} -two_groups_baseline.mean_diff <- load(df, x = Group, y = Success, - idx = c("Control 1", "Test 1"), - proportional = TRUE, - paired = "baseline", id_col = ID) %>% +two_groups_baseline.mean_diff <- load(df, + x = Group, y = Success, + idx = c("Control 1", "Test 1"), + proportional = TRUE, + paired = "baseline", id_col = ID +) %>% mean_diff() dabest_plot(two_groups_baseline.mean_diff) @@ -156,9 +166,11 @@ dabest_plot(two_groups_baseline.mean_diff, float_contrast = FALSE) ``` ```{r, echo = FALSE} -pp_plot <- dabest_plot(two_groups_baseline.mean_diff, float_contrast = FALSE, - swarm_y_text = 11, contrast_y_text = 11, - raw_bar_width = 0.2) +pp_plot <- dabest_plot(two_groups_baseline.mean_diff, + float_contrast = FALSE, + swarm_y_text = 11, contrast_y_text = 11, + raw_bar_width = 0.2 +) cowplot::plot_grid( plotlist = list(NULL, pp_plot, NULL), @@ -175,31 +187,49 @@ Repeated measures is also supported in paired proportional plot, by changing the By default, the raw data plot (upper part) in both "baseline" and "sequential" repeated measures are the same, the only difference is the lower part. For detailed information about repeated measures, please refer to `vignette("tutorial_repeated_measures")`. ```{r} -multi_group_baseline.mean_diff <- load(df, x = Group, y = Success, - idx = list(c("Control 1", "Test 1", - "Test 2", "Test 3"), - c("Test 4", "Test 5", - "Test 6")), - proportional = TRUE, - paired = "baseline", id_col = ID) %>% +multi_group_baseline.mean_diff <- load(df, + x = Group, y = Success, + idx = list( + c( + "Control 1", "Test 1", + "Test 2", "Test 3" + ), + c( + "Test 4", "Test 5", + "Test 6" + ) + ), + proportional = TRUE, + paired = "baseline", id_col = ID +) %>% mean_diff() -dabest_plot(multi_group_baseline.mean_diff, - swarm_y_text = 11, contrast_y_text = 11) +dabest_plot(multi_group_baseline.mean_diff, + swarm_y_text = 11, contrast_y_text = 11 +) ``` ```{r} -multi_group_sequential.mean_diff <- load(df, x = Group, y = Success, - idx = list(c("Control 1", "Test 1", - "Test 2", "Test 3"), - c("Test 4", "Test 5", - "Test 6")), - proportional = TRUE, - paired = "sequential", id_col = ID) %>% +multi_group_sequential.mean_diff <- load(df, + x = Group, y = Success, + idx = list( + c( + "Control 1", "Test 1", + "Test 2", "Test 3" + ), + c( + "Test 4", "Test 5", + "Test 6" + ) + ), + proportional = TRUE, + paired = "sequential", id_col = ID +) %>% mean_diff() dabest_plot(multi_group_sequential.mean_diff, - swarm_y_text = 11, contrast_y_text = 11) + swarm_y_text = 11, contrast_y_text = 11 +) ``` If you want to specify the order of the groups, you can use the `idx` parameter in the `load()` function. @@ -207,17 +237,22 @@ If you want to specify the order of the groups, you can use the `idx` parameter For all the groups to be compared together, you can put all the groups in the `idx` parameter in the `load()` function in a singular vector/non-nested list. ```{r} -multi_group_baseline_specify.mean_diff <- load(df, x = Group, y = Success, - idx = c("Control 1", "Test 1", - "Test 2", "Test 3", - "Test 4", "Test 5", - "Test 6"), - proportional = TRUE, - paired = "baseline", id_col = ID) %>% +multi_group_baseline_specify.mean_diff <- load(df, + x = Group, y = Success, + idx = c( + "Control 1", "Test 1", + "Test 2", "Test 3", + "Test 4", "Test 5", + "Test 6" + ), + proportional = TRUE, + paired = "baseline", id_col = ID +) %>% mean_diff() dabest_plot(multi_group_baseline_specify.mean_diff, - swarm_y_text = 11, contrast_y_text = 11) + swarm_y_text = 11, contrast_y_text = 11 +) ``` ### Adjustment parameters @@ -226,17 +261,25 @@ By changing the `sankey` and `flow` parameter, you can produce different types o By default, the `sankey` and `flow` are set to `TRUE` to cater the need for the repeated measures. When `sankey` is set to `FALSE`, DABEST will generate a bar plot with similar aesthetic to the paired proportional plot. When `flow` is set to `FALSE`, each group of comparison form a sankey diagram which does not connect to other groups of comparison. ```{r} -separate_control.mean_diff <- load(df, x = Group, y = Success, - idx = list(c("Control 1", "Test 1"), - c("Test 2", "Test 3"), - c("Test 4", "Test 5", "Test 6")), - proportional = TRUE, - paired = "sequential", id_col = ID) %>% +separate_control.mean_diff <- load(df, + x = Group, y = Success, + idx = list( + c("Control 1", "Test 1"), + c("Test 2", "Test 3"), + c("Test 4", "Test 5", "Test 6") + ), + proportional = TRUE, + paired = "sequential", id_col = ID +) %>% mean_diff() dabest_plot(separate_control.mean_diff, swarm_y_text = 11, contrast_y_text = 11) -dabest_plot(separate_control.mean_diff, swarm_y_text = 11, contrast_y_text = 11, - sankey = FALSE) -dabest_plot(separate_control.mean_diff, swarm_y_text = 11, contrast_y_text = 11, - flow = FALSE) -``` \ No newline at end of file +dabest_plot(separate_control.mean_diff, + swarm_y_text = 11, contrast_y_text = 11, + sankey = FALSE +) +dabest_plot(separate_control.mean_diff, + swarm_y_text = 11, contrast_y_text = 11, + flow = FALSE +) +``` diff --git a/vignettes/tutorial_repeated_measures.Rmd b/vignettes/tutorial_repeated_measures.Rmd index 04c7eec..851e26a 100644 --- a/vignettes/tutorial_repeated_measures.Rmd +++ b/vignettes/tutorial_repeated_measures.Rmd @@ -31,7 +31,7 @@ library(dabestr) ```{r} set.seed(12345) # Fix the seed so the results are replicable. # pop_size = 10000 # Size of each population. -N = 20 # The number of samples taken from each population +N <- 20 # The number of samples taken from each population # Create samples c1 <- rnorm(N, mean = 3, sd = 0.4) @@ -46,7 +46,7 @@ t5 <- rnorm(N, mean = 3.25, sd = 0.4) t6 <- rnorm(N, mean = 3.25, sd = 0.4) # Add a `gender` column for coloring the data. -gender <- c(rep('Male', N/2), rep('Female', N/2)) +gender <- c(rep("Male", N / 2), rep("Female", N / 2)) # Add an `id` column for paired data plotting. id <- 1:N @@ -55,7 +55,8 @@ id <- 1:N df <- tibble::tibble( `Control 1` = c1, `Control 2` = c2, `Control 3` = c3, `Test 1` = t1, `Test 2` = t2, `Test 3` = t3, `Test 4` = t4, `Test 5` = t5, `Test 6` = t6, - Gender = gender, ID = id) + Gender = gender, ID = id +) df <- df %>% tidyr::gather(key = Group, value = Measurement, -ID, -Gender) @@ -63,17 +64,21 @@ df <- df %>% ## Loading Data ```{r} -two_groups_paired_sequential <- load(df, x = Group, y = Measurement, - idx = c("Control 1", "Test 1"), - paired = "sequential", id_col = ID) +two_groups_paired_sequential <- load(df, + x = Group, y = Measurement, + idx = c("Control 1", "Test 1"), + paired = "sequential", id_col = ID +) print(two_groups_paired_sequential) ``` ```{r} -two_groups_paired_baseline <- load(df, x = Group, y = Measurement, - idx = c("Control 1", "Test 1"), - paired = "baseline", id_col = ID) +two_groups_paired_baseline <- load(df, + x = Group, y = Measurement, + idx = c("Control 1", "Test 1"), + paired = "baseline", id_col = ID +) print(two_groups_paired_baseline) ``` @@ -96,19 +101,24 @@ For paired data, we use [slopegraphs](https://www.edwardtufte.com/bboard/q-and-a ```{r} dabest_plot(two_groups_paired_sequential.mean_diff, - raw_marker_size = 0.5, raw_marker_alpha = 0.3) + raw_marker_size = 0.5, raw_marker_alpha = 0.3 +) ``` ```{r, eval = FALSE} -dabest_plot(two_groups_paired_sequential.mean_diff, float_contrast = FALSE, - raw_marker_size = 0.5, raw_marker_alpha = 0.3, - contrast_ylim = c(-0.3, 1.3)) +dabest_plot(two_groups_paired_sequential.mean_diff, + float_contrast = FALSE, + raw_marker_size = 0.5, raw_marker_alpha = 0.3, + contrast_ylim = c(-0.3, 1.3) +) ``` ```{r, echo = FALSE} -pp_plot <- dabest_plot(two_groups_paired_sequential.mean_diff, float_contrast = FALSE, - raw_marker_size = 0.5, raw_marker_alpha = 0.3, - contrast_ylim = c(-0.3, 1.3)) +pp_plot <- dabest_plot(two_groups_paired_sequential.mean_diff, + float_contrast = FALSE, + raw_marker_size = 0.5, raw_marker_alpha = 0.3, + contrast_ylim = c(-0.3, 1.3) +) cowplot::plot_grid( plotlist = list(NULL, pp_plot, NULL), @@ -120,19 +130,24 @@ cowplot::plot_grid( ```{r} dabest_plot(two_groups_paired_baseline.mean_diff, - raw_marker_size = 0.5, raw_marker_alpha = 0.3) + raw_marker_size = 0.5, raw_marker_alpha = 0.3 +) ``` ```{r, eval = FALSE} -dabest_plot(two_groups_paired_baseline.mean_diff, float_contrast = FALSE, - raw_marker_size = 0.5, raw_marker_alpha = 0.3, - contrast_ylim = c(-0.3, 1.3)) +dabest_plot(two_groups_paired_baseline.mean_diff, + float_contrast = FALSE, + raw_marker_size = 0.5, raw_marker_alpha = 0.3, + contrast_ylim = c(-0.3, 1.3) +) ``` ```{r, echo = FALSE} -pp_plot <- dabest_plot(two_groups_paired_baseline.mean_diff, float_contrast = FALSE, - raw_marker_size = 0.5, raw_marker_alpha = 0.3, - contrast_ylim = c(-0.3, 1.3)) +pp_plot <- dabest_plot(two_groups_paired_baseline.mean_diff, + float_contrast = FALSE, + raw_marker_size = 0.5, raw_marker_alpha = 0.3, + contrast_ylim = c(-0.3, 1.3) +) cowplot::plot_grid( plotlist = list(NULL, pp_plot, NULL), @@ -145,10 +160,14 @@ cowplot::plot_grid( You can also create repeated-measures plots with multiple test groups. In this case, declaring `paired` to be "sequential" or "baseline" will generate the same slopegraph, reflecting the repeated-measures experimental design, but different contrast plots, to show the "sequential" or "baseline" comparison: ```{r} -sequential_repeated_measures.mean_diff <- load(df, x = Group, y = Measurement, - idx = c("Control 1", "Test 1", - "Test 2", "Test 3"), - paired = "sequential", id_col = ID) %>% +sequential_repeated_measures.mean_diff <- load(df, + x = Group, y = Measurement, + idx = c( + "Control 1", "Test 1", + "Test 2", "Test 3" + ), + paired = "sequential", id_col = ID +) %>% mean_diff() print(sequential_repeated_measures.mean_diff) @@ -156,14 +175,19 @@ print(sequential_repeated_measures.mean_diff) ```{r} dabest_plot(sequential_repeated_measures.mean_diff, - raw_marker_size = 0.5, raw_marker_alpha = 0.3) + raw_marker_size = 0.5, raw_marker_alpha = 0.3 +) ``` ```{r} -baseline_repeated_measures.mean_diff <- load(df, x = Group, y = Measurement, - idx = c("Control 1", "Test 1", - "Test 2", "Test 3"), - paired = "baseline", id_col = ID) %>% +baseline_repeated_measures.mean_diff <- load(df, + x = Group, y = Measurement, + idx = c( + "Control 1", "Test 1", + "Test 2", "Test 3" + ), + paired = "baseline", id_col = ID +) %>% mean_diff() print(baseline_repeated_measures.mean_diff) @@ -171,20 +195,30 @@ print(baseline_repeated_measures.mean_diff) ```{r} dabest_plot(baseline_repeated_measures.mean_diff, - raw_marker_size = 0.5, raw_marker_alpha = 0.3) + raw_marker_size = 0.5, raw_marker_alpha = 0.3 +) ``` As with unpaired data, `dabestr` empowers you to perform complex visualizations and statistics for paired data as well. ```{r} -multi_baseline_repeated_measures.mean_diff <- load(df, x = Group, y = Measurement, - idx = list(c("Control 1", "Test 1", - "Test 2", "Test 3"), - c("Control 2", "Test 4", - "Test 5", "Test 6")), - paired = "baseline", id_col = ID) %>% +multi_baseline_repeated_measures.mean_diff <- load(df, + x = Group, y = Measurement, + idx = list( + c( + "Control 1", "Test 1", + "Test 2", "Test 3" + ), + c( + "Control 2", "Test 4", + "Test 5", "Test 6" + ) + ), + paired = "baseline", id_col = ID +) %>% mean_diff() dabest_plot(multi_baseline_repeated_measures.mean_diff, - raw_marker_size = 0.5, raw_marker_alpha = 0.3) -``` \ No newline at end of file + raw_marker_size = 0.5, raw_marker_alpha = 0.3 +) +```