Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fixed mismatch Ns detection for paired data #155

Merged
merged 5 commits into from
Oct 31, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 11 additions & 2 deletions R/001_api.R
Original file line number Diff line number Diff line change
Expand Up @@ -282,8 +282,8 @@ load <- function(
unique_x <- unique(data[[name_x]])
for (i in 1:length(unlist_idx)) {
if (isFALSE(unlist_idx[i] %in% unique_x)) {
cli::cli_abort(c("{.field idx} contains treatment groups not present in {.field x}.",
"x" = "Ensure that idx does not have any treatment groups not present in dataset."
cli::cli_abort(c("{unlist_idx[i]} not present in {.field x}.",
"x" = "Ensure that idx does not have any control/treatment groups not present in dataset."
))
}
}
Expand All @@ -310,6 +310,15 @@ load <- function(
dplyr::count()
Ns$swarmticklabs <- do.call(paste, c(Ns[c(name_x, "n")], sep = "\nN = "))

## Check to ensure control & treatment groups have the same sample size if is_paired is TRUE
if (is_paired) {
if (length(unique(Ns$n)) > 1) {
cli::cli_abort(c("{.field data} is paired, as indicated by {.field paired} but size of control and treatment groups are not equal.",
"x" = "Ensure that the size of control and treatment groups are the same for paired comparisons."
))
}
}

# Extending ylim for plotting
ylim[1] <- ylim[1] - (ylim[2] - ylim[1]) / 25
ylim[2] <- ylim[2] + (ylim[2] - ylim[1]) / 25
Expand Down
44 changes: 22 additions & 22 deletions R/002_df_for_plots.R
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
#' Generates the dataframe requires for the tufte lines plot component in raw plots.
#'
#' Generates the dataframe requires for the tufte lines plot component in raw plots.
#'
#' This function returns a dataframe with summary statistics, such as mean, standard
#' deviation and quantiles (depending on the effect size) based on the input dataset.
#'
#' deviation and quantiles (depending on the effect size) based on the input dataset.
#'
#' @param raw_data The tidy dataset passed to [load()] that was cleaned and altered for plotting.
#' @param enquo_x Quosure of x as initially passed to [load()].
#' @param enquo_y Quosure of y as initially passed to [load()].
Expand Down Expand Up @@ -53,16 +53,16 @@ create_df_for_tufte <- function(raw_data, enquo_x, enquo_y, proportional, gap, e
return(tufte_lines_df)
}
#' Generates df for tufte lines plot component for raw plot WITH flow = FALSE.
#'
#'
#' This function rearranges and duplicates rows in a given dataframe (containing
#' summary statistics) in the order of the unlisted given list of vectors.
#' summary statistics) in the order of the unlisted given list of vectors.
#'
#' @param idx List of vectors of control-test groupings that determines the arrangement
#' of the final dataframe output.
#' @param tufte_lines_df dataframe for plotting of the tufte lines plot component as
#' @param tufte_lines_df dataframe for plotting of the tufte lines plot component as
#' generated by [create_df_for_tufte()].
#' @param enquo_x Quosure of x as initially passed to [load()].
#'
#'
#' @returns dataframe
#' @noRd
create_dfs_for_nonflow_tufte_lines <- function(idx,
Expand All @@ -81,11 +81,11 @@ create_dfs_for_nonflow_tufte_lines <- function(idx,
}

#' Generates df for sankey plot component for raw plot WITH flow = TRUE
#'
#' This function generates data frames necessary for generating
#' sankey diagrams for visualizing flows and proportions of categorical data.
#'
#'
#'
#' This function generates data frames necessary for generating
#' sankey diagrams for visualizing flows and proportions of categorical data.
#'
#'
#' @param float_contrast Boolean value determining if a Gardner-Altman plot or Cumming estimation plot will be produced.
#' @param raw_data The tidy dataset passed to [load()] that was cleaned and altered for plotting.
#' @param proportional_data List of calculations related to the plotting of proportion plots.
Expand All @@ -94,7 +94,7 @@ create_dfs_for_nonflow_tufte_lines <- function(idx,
#' @param enquo_id_col Quosure of id_col as initially passed to [load()].
#' @param idx List of vectors of control-test groupings that determines the arrangement
#' of the final dataframe output.
#' @param scale_factor_sig Numeric value determining the significance of the scale factor.
#' @param scale_factor_sig Numeric value determining the significance of the scale factor.
#' @param bar_width Numeric value determining the width of the bar in the sankey diagram.
#' @param gap Integer value specifying the amount of gap for each tufte line.
#' @param sankey Boolean value determining if the flows between the bar charts will be plotted.
Expand Down Expand Up @@ -349,11 +349,11 @@ create_dfs_for_sankey <- function(
}

#' Generates df for xaxis redraw for float_contrast = FALSE plot
#'
#' This function generates data frames that
#'
#' This function generates data frames that
#' define the positions of lines and ticks on an x-axis for when float_contrast = FALSE
#' to produce a Cumming estimation plot.
#'
#'
#' @param idx List of vectors of control-test groupings that determines the arrangement
#' of the final dataframe output.
#'
Expand Down Expand Up @@ -391,11 +391,11 @@ create_dfs_for_xaxis_redraw <- function(idx) {
return(dfs_for_xaxis_redraw)
}

#' Generates df for proportion bar component
#'
#' This function generates data frames to
#' Generates df for proportion bar component
#'
#' This function generates data frames to
#' represent bars with proportional data in a graphical display.
#'
#'
#' @param bar_width Numeric value determining the width of the bar in the sankey diagram.
#' @param gap Integer value specifying the amount of gap for each tufte line.
#'
Expand Down Expand Up @@ -445,7 +445,7 @@ create_dfs_for_proportion_bar <- function(proportion_success, bar_width = 0.3, g

#' Generates df for baseline violin plot WITH flow = TRUE
#'
#' This function generates data frames to represent
#' This function generates data frames to represent
#' data points for plotting violin plots
#'
#' @param boots Boot result obtained from boot.ci
Expand Down
2 changes: 1 addition & 1 deletion R/002_plot_api.R
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
#' Generates a ggplot object containing plot components for the rawplot component
#' Generates a ggplot object containing plot components for the rawplot component
#' of an estimation plot.
#'
#' This function takes in a dabest_effectsize_obj object and applies the [create_rawplot_components()]
Expand Down
16 changes: 8 additions & 8 deletions R/002_plot_components.R
Original file line number Diff line number Diff line change
Expand Up @@ -4,12 +4,12 @@

#' Generates list of TRUE/FALSE for raw plot components that will be built
#'
#' This function generates a list of booleans determining whether certain
#' This function generates a list of booleans determining whether certain
#' plot components will be constructed for the rawplot.
#'
#' @param proportional Boolean value as initially passed to [load()].
#' @param is_paired Boolean value determining if it is a paired plot.
#' @param float_contrast Boolean value determining which plot will be produced. If TRUE, a
#' @param float_contrast Boolean value determining which plot will be produced. If TRUE, a
#' Gardner-Altman plot will be produced.If FALSE, a Cumming estimation plot will be produced.
#'
#' @return List of booleans for raw plot components
Expand Down Expand Up @@ -63,12 +63,12 @@ create_rawplot_components <- function(proportional,

#' Generates list of TRUE/FALSE for delta plot components that will be built
#'
#' This function generates a list of booleans determining whether certain
#' This function generates a list of booleans determining whether certain
#' plot components will be constructed for the deltaplot.
#'
#'
#' @param proportional Boolean value as initially passed to [load()].
#' @param is_paired Boolean value determining if it is a paired plot.
#' @param float_contrast Boolean value determining which plot will be produced. If TRUE, a
#' @param float_contrast Boolean value determining which plot will be produced. If TRUE, a
#' Gardner-Altman plot will be produced.If FALSE, a Cumming estimation plot will be produced.
#' @param is_colour Boolean value determining if there is a colour column for the plot.
#' @param delta2 Boolean value determining if delta-delta analysis for
Expand All @@ -78,7 +78,7 @@ create_rawplot_components <- function(proportional,
#' @param flow Boolean value determining whether the bars will be plotted in pairs.
#' @param show_baseline_ec Boolean value determining whether the baseline curve is shown.
#'
#' @return List of booleans for delta plot components
#' @return List of booleans for delta plot components
#' @noRd
create_deltaplot_components <- function(proportional,
is_paired,
Expand Down Expand Up @@ -124,13 +124,13 @@ create_deltaplot_components <- function(proportional,

#' Generates list of values for the violin plot components that will be built
#'
#' This function generates the data and metadata necessary to create a
#' This function generates the data and metadata necessary to create a
#' violin plot with specific characteristics
#'
#' @param boots Boot result obtained from boot.ci
#' @param idx List of vectors of control-test groupings that determines the arrangement
#' of the final dataframe output.
#' @param float_contrast Boolean value determining if a Gardner-Altman plot or
#' @param float_contrast Boolean value determining if a Gardner-Altman plot or
#' Cumming estimation plot will be produced.
#' @param delta_y_max Max y limits for the delta-delta plot
#' @param delta_y_min Min y limits for the delta-delta plot
Expand Down
66 changes: 55 additions & 11 deletions R/003_pvalues_and_permutation_test_tools.R
Original file line number Diff line number Diff line change
@@ -1,5 +1,20 @@
# Obtain permutation tests, permutations and p values

#' Generates permutation test results.
#'
#' This function returns a list that include permutations results:
#' its corresponding permutations, variance of permutations, p value and effect size
#' (depending on the effect size).
#'
#' @param control Vector, the control group data.
#' @param test Vector, the test group data.
#' @param effect_size String. Any one of the following are accepted inputs:
#' 'mean_diff', 'median_diff', 'cohens_d', 'hedges_g', or 'cliffs_delta'.
#' @param is_paired Boolean value as initially passed to [load()].
#' @param permutation_count Integer value specifying the number of permutations being carried out.
#' @param random_seed Integer value specifying the random seed for permutations to be carried out.
#' @param ef_size_fn A function that calculates the specific type of effect size.
#'
#' @returns a list for permutation test results for a pair of control and test data points.
#' @noRd
PermutationTest <- function(control,
test,
effect_size,
Expand All @@ -8,7 +23,7 @@ PermutationTest <- function(control,
random_seed = 12345,
ef_size_fn) {
# Check if the arrays have the same length for paired test
if (!is.null(is_paired) && length(control) != length(test)) {
if (isTRUE(is_paired) && length(control) != length(test)) {
stop("The two arrays do not have the same length.")
}

Expand Down Expand Up @@ -77,16 +92,28 @@ PermutationTest <- function(control,

return(perm_results)
}

# p values

#' Generates statistical test results for possible hypothesis testings.
#'
#' This function returns a list that include statistical test results:
#' its corresponding statistics and p values
#'
#' @param control Vector, the control group data.
#' @param test Vector, the test group data.
#' @param is_paired Boolean value as initially passed to [load()].
#' @param proportional Boolean value as initially passed to [load()].
#' @param effect_size String. Any one of the following are accepted inputs:
#' 'mean_diff', 'median_diff', 'cohens_d', 'hedges_g', or 'cliffs_delta'.
#'
#' @returns a list for statistical test results and p values for the
#' corresponding tests of a pair of control and test data points.
#' @noRd
pvals_statistics <- function(control,
test,
is_paired,
proportional,
effect_size) {
pvals_stats <- list()
if (!is.null(is_paired) && !proportional) {
if (isTRUE(is_paired) && !proportional) {
# Wilcoxon test (non-parametric version of the paired T-test)
wilcoxon <- stats::wilcox.test(control, test)
pvalue_wilcoxon <- wilcoxon$p.value
Expand All @@ -110,7 +137,7 @@ pvals_statistics <- function(control,
pvalue_paired_students_t = pvalue_paired_students_t,
statistic_paired_students_t = statistic_paired_students_t
)
} else if (!is.null(is_paired) && proportional) {
} else if (isTRUE(is_paired) && proportional) {
# McNemar's test for binary paired data
table <- matrix(
c(
Expand Down Expand Up @@ -210,9 +237,26 @@ pvals_statistics <- function(control,

return(pvals_stats)
}

# collate permtest and p values with function "Pvalues_statistics"

#' Generates collated permutaion test results and statistical test results.
#'
#' This function returns a tibble (list) that includes statistical test results:
#' its corresponding statistics and p values.
#'
#' @param dabest_object A "dabest_obj" list created by loading in dataset along with other
#' specified parameters with the [load()] function.
#' @param seed Integer specifying random seed that will be passed to the
#' [PermutationTest()] function.
#' @param permutation_count Integer value specifying the number of permutations
#' being carried out in the [PermutationTest()] function.
#' @param ef_size_fn The effect size function passed to [PermutationTest()] that
#' help calculate the specific type of effect size.
#' @param effect_size_type String. Any one of the following are accepted inputs:
#' 'mean_diff', 'median_diff', 'cohens_d', 'hedges_g', or 'cliffs_delta'.
#'
#' @returns Tibble for statistical test and permutation test results for
#' all pairs of control and test datasets based on the experimental design
#' initially specified when passed to the [load()] function.
#' @noRd
Pvalues_statistics <- function(dabest_object,
seed = 12345,
perm_count = 5000,
Expand Down
15 changes: 15 additions & 0 deletions tests/testthat/test-001_load.R
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,13 @@ testthat::test_that("Able to detect non-valid params", {
),
regexp = "does not consist of at least 2 groups"
)
expect_error(
dabestr::load(np_dataset,
x = Group, y = Measurement,
idx = c("Control 1", "Test1")
),
regexp = "Test1 not present in x"
)
})

testthat::test_that("Able to detect non-valid params for proportional = TRUE", {
Expand Down Expand Up @@ -96,6 +103,14 @@ testthat::test_that("Able to detect non-valid params for is_paired = TRUE", {
),
regexp = "is not 'baseline' or 'sequential'."
)
expect_error(
dabestr::load(np_dataset[-2, ],
x = Group, y = Measurement,
idx = c("Control 1", "Test 1", "Test 2"), paired = "sequential",
id_col = ID
),
regexp = "data is paired, as indicated by paired but size of control and treatment groups are not equal."
)
})

testthat::test_that("Able to detect non-valid params for minimeta = TRUE", {
Expand Down