ACCLAB · sunroofgod · Oct 31, 2023 · Oct 22, 2023 · Oct 22, 2023 · Oct 31, 2023
diff --git a/R/001_api.R b/R/001_api.R
@@ -282,8 +282,8 @@ load <- function(
   unique_x <- unique(data[[name_x]])
   for (i in 1:length(unlist_idx)) {
     if (isFALSE(unlist_idx[i] %in% unique_x)) {
-      cli::cli_abort(c("{.field idx} contains treatment groups not present in {.field x}.",
-        "x" = "Ensure that idx does not have any treatment groups not present in dataset."
+      cli::cli_abort(c("{unlist_idx[i]} not present in {.field x}.",
+        "x" = "Ensure that idx does not have any control/treatment groups not present in dataset."
       ))
     }
   }
@@ -310,6 +310,15 @@ load <- function(
     dplyr::count()
   Ns$swarmticklabs <- do.call(paste, c(Ns[c(name_x, "n")], sep = "\nN = "))
 
+  ## Check to ensure control & treatment groups have the same sample size if is_paired is TRUE
+  if (is_paired) {
+    if (length(unique(Ns$n)) > 1) {
+      cli::cli_abort(c("{.field data} is paired, as indicated by {.field paired} but size of control and treatment groups are not equal.",
+        "x" = "Ensure that the size of control and treatment groups are the same for paired comparisons."
+      ))
+    }
+  }
+
   # Extending ylim for plotting
   ylim[1] <- ylim[1] - (ylim[2] - ylim[1]) / 25
   ylim[2] <- ylim[2] + (ylim[2] - ylim[1]) / 25

diff --git a/R/002_df_for_plots.R b/R/002_df_for_plots.R
@@ -1,8 +1,8 @@
-#' Generates the dataframe requires for the tufte lines plot component in raw plots. 
-#' 
+#' Generates the dataframe requires for the tufte lines plot component in raw plots.
+#'
 #' This function returns a dataframe with summary statistics, such as mean, standard
-#' deviation and quantiles (depending on the effect size) based on the input dataset. 
-#' 
+#' deviation and quantiles (depending on the effect size) based on the input dataset.
+#'
 #' @param raw_data The tidy dataset passed to [load()] that was cleaned and altered for plotting.
 #' @param enquo_x Quosure of x as initially passed to [load()].
 #' @param enquo_y Quosure of y as initially passed to [load()].
@@ -53,16 +53,16 @@ create_df_for_tufte <- function(raw_data, enquo_x, enquo_y, proportional, gap, e
   return(tufte_lines_df)
 }
 #' Generates df for tufte lines plot component for raw plot WITH flow = FALSE.
-#' 
+#'
 #' This function rearranges and duplicates rows in a given dataframe (containing
-#' summary statistics) in the order of the unlisted given list of vectors.   
+#' summary statistics) in the order of the unlisted given list of vectors.
 #'
 #' @param idx List of vectors of control-test groupings that determines the arrangement
 #' of the final dataframe output.
-#' @param tufte_lines_df dataframe for plotting of the tufte lines plot component as 
+#' @param tufte_lines_df dataframe for plotting of the tufte lines plot component as
 #' generated by [create_df_for_tufte()].
 #' @param enquo_x Quosure of x as initially passed to [load()].
-#' 
+#'
 #' @returns dataframe
 #' @noRd
 create_dfs_for_nonflow_tufte_lines <- function(idx,
@@ -81,11 +81,11 @@ create_dfs_for_nonflow_tufte_lines <- function(idx,
 }
 
 #' Generates df for sankey plot component for raw plot WITH flow = TRUE
-#' 
-#' This function generates data frames necessary for generating 
-#' sankey diagrams for visualizing flows and proportions of categorical data. 
-#' 
-#' 
+#'
+#' This function generates data frames necessary for generating
+#' sankey diagrams for visualizing flows and proportions of categorical data.
+#'
+#'
 #' @param float_contrast Boolean value determining if a Gardner-Altman plot or Cumming estimation plot will be produced.
 #' @param raw_data The tidy dataset passed to [load()] that was cleaned and altered for plotting.
 #' @param proportional_data List of calculations related to the plotting of proportion plots.
@@ -94,7 +94,7 @@ create_dfs_for_nonflow_tufte_lines <- function(idx,
 #' @param enquo_id_col Quosure of id_col as initially passed to [load()].
 #' @param idx List of vectors of control-test groupings that determines the arrangement
 #' of the final dataframe output.
-#' @param scale_factor_sig Numeric value determining the significance of the scale factor. 
+#' @param scale_factor_sig Numeric value determining the significance of the scale factor.
 #' @param bar_width Numeric value determining the width of the bar in the sankey diagram.
 #' @param gap Integer value specifying the amount of gap for each tufte line.
 #' @param sankey Boolean value determining if the flows between the bar charts will be plotted.
@@ -349,11 +349,11 @@ create_dfs_for_sankey <- function(
 }
 
 #' Generates df for xaxis redraw for float_contrast = FALSE plot
-#' 
-#' This function generates data frames that 
+#'
+#' This function generates data frames that
 #' define the positions of lines and ticks on an x-axis for when float_contrast = FALSE
 #' to produce a Cumming estimation plot.
-#' 
+#'
 #' @param idx List of vectors of control-test groupings that determines the arrangement
 #' of the final dataframe output.
 #'
@@ -391,11 +391,11 @@ create_dfs_for_xaxis_redraw <- function(idx) {
   return(dfs_for_xaxis_redraw)
 }
 
-#' Generates df for proportion bar component 
-#' 
-#' This function generates data frames to 
+#' Generates df for proportion bar component
+#'
+#' This function generates data frames to
 #' represent bars with proportional data in a graphical display.
-#' 
+#'
 #' @param bar_width Numeric value determining the width of the bar in the sankey diagram.
 #' @param gap Integer value specifying the amount of gap for each tufte line.
 #'
@@ -445,7 +445,7 @@ create_dfs_for_proportion_bar <- function(proportion_success, bar_width = 0.3, g
 
 #' Generates df for baseline violin plot WITH flow = TRUE
 #'
-#' This function generates data frames to represent 
+#' This function generates data frames to represent
 #' data points for plotting violin plots
 #'
 #' @param boots Boot result obtained from boot.ci

diff --git a/R/002_plot_api.R b/R/002_plot_api.R
@@ -1,4 +1,4 @@
-#' Generates a ggplot object containing plot components for the rawplot component 
+#' Generates a ggplot object containing plot components for the rawplot component
 #' of an estimation plot.
 #'
 #' This function takes in a dabest_effectsize_obj object and applies the [create_rawplot_components()]

diff --git a/R/002_plot_components.R b/R/002_plot_components.R
@@ -4,12 +4,12 @@
 
 #' Generates list of TRUE/FALSE for raw plot components that will be built
 #'
-#' This function generates a list of booleans determining whether certain 
+#' This function generates a list of booleans determining whether certain
 #' plot components will be constructed for the rawplot.
 #'
 #' @param proportional Boolean value as initially passed to [load()].
 #' @param is_paired Boolean value determining if it is a paired plot.
-#' @param float_contrast Boolean value determining which plot will be produced. If TRUE, a 
+#' @param float_contrast Boolean value determining which plot will be produced. If TRUE, a
 #' Gardner-Altman plot will be produced.If FALSE, a Cumming estimation plot will be produced.
 #'
 #' @return List of booleans for raw plot components
@@ -63,12 +63,12 @@ create_rawplot_components <- function(proportional,
 
 #' Generates list of TRUE/FALSE for delta plot components that will be built
 #'
-#' This function generates a list of booleans determining whether certain 
+#' This function generates a list of booleans determining whether certain
 #' plot components will be constructed for the deltaplot.
-#' 
+#'
 #' @param proportional Boolean value as initially passed to [load()].
 #' @param is_paired Boolean value determining if it is a paired plot.
-#' @param float_contrast Boolean value determining which plot will be produced. If TRUE, a 
+#' @param float_contrast Boolean value determining which plot will be produced. If TRUE, a
 #' Gardner-Altman plot will be produced.If FALSE, a Cumming estimation plot will be produced.
 #' @param is_colour Boolean value determining if there is a colour column for the plot.
 #' @param delta2 Boolean value determining if delta-delta analysis for
@@ -78,7 +78,7 @@ create_rawplot_components <- function(proportional,
 #' @param flow Boolean value determining whether the bars will be plotted in pairs.
 #' @param show_baseline_ec Boolean value determining whether the baseline curve is shown.
 #'
-#' @return List of booleans for delta plot components 
+#' @return List of booleans for delta plot components
 #' @noRd
 create_deltaplot_components <- function(proportional,
                                         is_paired,
@@ -124,13 +124,13 @@ create_deltaplot_components <- function(proportional,
 
 #' Generates list of values for the violin plot components that will be built
 #'
-#' This function generates the data and metadata necessary to create a 
+#' This function generates the data and metadata necessary to create a
 #' violin plot with specific characteristics
 #'
 #' @param boots Boot result obtained from boot.ci
 #' @param idx List of vectors of control-test groupings that determines the arrangement
 #' of the final dataframe output.
-#' @param float_contrast Boolean value determining if a Gardner-Altman plot or 
+#' @param float_contrast Boolean value determining if a Gardner-Altman plot or
 #' Cumming estimation plot will be produced.
 #' @param delta_y_max Max y limits for the delta-delta plot
 #' @param delta_y_min Min y limits for the delta-delta plot

diff --git a/R/003_pvalues_and_permutation_test_tools.R b/R/003_pvalues_and_permutation_test_tools.R
@@ -1,5 +1,20 @@
-# Obtain permutation tests, permutations and p values
-
+#' Generates permutation test results.
+#'
+#' This function returns a list that include permutations results:
+#' its corresponding permutations, variance of permutations, p value and effect size
+#' (depending on the effect size).
+#'
+#' @param control Vector, the control group data.
+#' @param test Vector, the test group data.
+#' @param effect_size String. Any one of the following are accepted inputs:
+#' 'mean_diff', 'median_diff', 'cohens_d', 'hedges_g', or 'cliffs_delta'.
+#' @param is_paired Boolean value as initially passed to [load()].
+#' @param permutation_count Integer value specifying the number of permutations being carried out.
+#' @param random_seed Integer value specifying the random seed for permutations to be carried out.
+#' @param ef_size_fn A function that calculates the specific type of effect size.
+#'
+#' @returns a list for permutation test results for a pair of control and test data points.
+#' @noRd
 PermutationTest <- function(control,
                             test,
                             effect_size,
@@ -8,7 +23,7 @@ PermutationTest <- function(control,
                             random_seed = 12345,
                             ef_size_fn) {
   # Check if the arrays have the same length for paired test
-  if (!is.null(is_paired) && length(control) != length(test)) {
+  if (isTRUE(is_paired) && length(control) != length(test)) {
     stop("The two arrays do not have the same length.")
   }
 
@@ -77,16 +92,28 @@ PermutationTest <- function(control,
 
   return(perm_results)
 }
-
-# p values
-
+#' Generates statistical test results for possible hypothesis testings.
+#'
+#' This function returns a list that include statistical test results:
+#' its corresponding statistics and p values
+#'
+#' @param control Vector, the control group data.
+#' @param test Vector, the test group data.
+#' @param is_paired Boolean value as initially passed to [load()].
+#' @param proportional Boolean value as initially passed to [load()].
+#' @param effect_size String. Any one of the following are accepted inputs:
+#' 'mean_diff', 'median_diff', 'cohens_d', 'hedges_g', or 'cliffs_delta'.
+#'
+#' @returns a list for statistical test results and p values for the
+#' corresponding tests of a pair of control and test data points.
+#' @noRd
 pvals_statistics <- function(control,
                              test,
                              is_paired,
                              proportional,
                              effect_size) {
   pvals_stats <- list()
-  if (!is.null(is_paired) && !proportional) {
+  if (isTRUE(is_paired) && !proportional) {
     # Wilcoxon test (non-parametric version of the paired T-test)
     wilcoxon <- stats::wilcox.test(control, test)
     pvalue_wilcoxon <- wilcoxon$p.value
@@ -110,7 +137,7 @@ pvals_statistics <- function(control,
       pvalue_paired_students_t = pvalue_paired_students_t,
       statistic_paired_students_t = statistic_paired_students_t
     )
-  } else if (!is.null(is_paired) && proportional) {
+  } else if (isTRUE(is_paired) && proportional) {
     # McNemar's test for binary paired data
     table <- matrix(
       c(
@@ -210,9 +237,26 @@ pvals_statistics <- function(control,
 
   return(pvals_stats)
 }
-
-# collate permtest and p values with function "Pvalues_statistics"
-
+#' Generates collated permutaion test results and statistical test results.
+#'
+#' This function returns a tibble (list) that includes statistical test results:
+#' its corresponding statistics and p values.
+#'
+#' @param dabest_object A "dabest_obj" list created by loading in dataset along with other
+#' specified parameters with the [load()] function.
+#' @param seed Integer specifying random seed that will be passed to the
+#' [PermutationTest()] function.
+#' @param permutation_count Integer value specifying the number of permutations
+#'  being carried out in the [PermutationTest()] function.
+#' @param ef_size_fn The effect size function passed to [PermutationTest()] that
+#'  help calculate the specific type of effect size.
+#' @param effect_size_type String. Any one of the following are accepted inputs:
+#' 'mean_diff', 'median_diff', 'cohens_d', 'hedges_g', or 'cliffs_delta'.
+#'
+#' @returns Tibble for statistical test and permutation test results for
+#' all pairs of control and test datasets based on the experimental design
+#' initially specified when passed to the [load()] function.
+#' @noRd
 Pvalues_statistics <- function(dabest_object,
                                seed = 12345,
                                perm_count = 5000,

diff --git a/tests/testthat/test-001_load.R b/tests/testthat/test-001_load.R
@@ -66,6 +66,13 @@ testthat::test_that("Able to detect non-valid params", {
     ),
     regexp = "does not consist of at least 2 groups"
   )
+  expect_error(
+    dabestr::load(np_dataset,
+      x = Group, y = Measurement,
+      idx = c("Control 1", "Test1")
+    ),
+    regexp = "Test1 not present in x"
+  )
 })
 
 testthat::test_that("Able to detect non-valid params for proportional = TRUE", {
@@ -96,6 +103,14 @@ testthat::test_that("Able to detect non-valid params for is_paired = TRUE", {
     ),
     regexp = "is not 'baseline' or 'sequential'."
   )
+  expect_error(
+    dabestr::load(np_dataset[-2, ],
+      x = Group, y = Measurement,
+      idx = c("Control 1", "Test 1", "Test 2"), paired = "sequential",
+      id_col = ID
+    ),
+    regexp = "data is paired, as indicated by paired but size of control and treatment groups are not equal."
+  )
 })
 
 testthat::test_that("Able to detect non-valid params for minimeta = TRUE", {