From 52424c95b89475974eff61488f1d6a73f182de9f Mon Sep 17 00:00:00 2001 From: almeidasilvaf Date: Fri, 23 Feb 2024 11:45:54 +0100 Subject: [PATCH] Added helper functions for input data validation --- R/01_data_processing.R | 13 +++++-- R/02_de_analyses.R | 4 ++- R/03_expression_partitioning.R | 7 ---- R/error_handling.R | 51 +++++++++++++++++++++++++++ R/visualization.R | 7 ++-- tests/testthat/test-data_processing.R | 3 +- 6 files changed, 69 insertions(+), 16 deletions(-) create mode 100644 R/error_handling.R diff --git a/R/01_data_processing.R b/R/01_data_processing.R index 1e11f8c..1c66a6e 100644 --- a/R/01_data_processing.R +++ b/R/01_data_processing.R @@ -31,6 +31,9 @@ add_midparent_expression <- function( se, coldata_column = "Generation", parent1 = "P1", parent2 = "P2", method = "mean", weights = c(1, 1) ) { + + c <- check_coldata_column(se, coldata_column) + c <- check_coldata_levels(se, coldata_column, c(parent1, parent2)) # Create a vector with samples from each parent - randomly sampled cdata <- as.data.frame(colData(se)) @@ -98,6 +101,7 @@ add_midparent_expression <- function( #' analyses. #' #' @importFrom DESeq2 DESeqDataSet estimateSizeFactors sizeFactors +#' @importFrom SummarizedExperiment colData rowData SummarizedExperiment assay #' @importFrom methods as #' @export #' @rdname add_size_factors @@ -132,9 +136,12 @@ add_size_factors <- function( DESeq2::sizeFactors(deseq) <- sf # Create SummarizedExperiment object from DESeqDataSet - final_se <- as(deseq, "SummarizedExperiment") - rownames(final_se) <- rownames(deseq) - + final_se <- SummarizedExperiment( + assays = list(counts = assay(deseq)), + colData = colData(deseq), + rowData = rowData(deseq) + ) + return(final_se) } diff --git a/R/02_de_analyses.R b/R/02_de_analyses.R index efc3080..f2f5964 100644 --- a/R/02_de_analyses.R +++ b/R/02_de_analyses.R @@ -66,7 +66,9 @@ get_deg_list <- function( alpha = 0.01, ... ) { - + + c <- check_coldata_column(se, coldata_column) + c <- check_coldata_levels(se, coldata_column, c(parent1, parent2, offspring)) ngenes <- nrow(se) # Create DESeq object diff --git a/R/03_expression_partitioning.R b/R/03_expression_partitioning.R index a3e30a5..efaad19 100644 --- a/R/03_expression_partitioning.R +++ b/R/03_expression_partitioning.R @@ -77,10 +77,3 @@ expression_partitioning <- function(deg_list) { return(class_df) } - - - - - - - diff --git a/R/error_handling.R b/R/error_handling.R new file mode 100644 index 0000000..e6c07be --- /dev/null +++ b/R/error_handling.R @@ -0,0 +1,51 @@ + +#' Check if a column exists in the colData of a `SummarizedExperiment` object +#' +#' @param se A `SummarizedExperiment` object. +#' @param column Character with name of the column to check. +#' +#' @return TRUE if the column exists, and ERROR otherwise. +#' +#' @importFrom SummarizedExperiment colData +#' @noRd +#' @examples +#' data(se_chlamy) +#' check_coldata_column(se_chlamy, "Generation") +check_coldata_column <- function(se, column) { + + cdata <- colData(se) + if(!column %in% names(cdata)) { + stop("Column '", column, "' is not present in the `colData` slot.") + } + + return(TRUE) +} + + +#' Check if levels exist in a colData column of a `SummarizedExperiment` object +#' +#' @param se A `SummarizedExperiment` object. +#' @param column Character with name of the column where levels are. +#' @param levels Character with levels to check for presence in \strong{column}. +#' +#' @return TRUE if the column exists, and ERROR otherwise. +#' +#' @importFrom SummarizedExperiment colData +#' @noRd +#' @examples +#' data(se_chlamy) +#' check_coldata_levels(se_chlamy, "Generation", levels = c("P1", "P2")) +check_coldata_levels <- function(se, column, levels) { + + col <- unique(colData(se)[[column]]) + + if(any(levels %in% col == FALSE)) { + stop( + "All levels (", paste0(levels, collapse = ","), + ") must be in column '", column, "'." + ) + } + + return(TRUE) +} + diff --git a/R/visualization.R b/R/visualization.R index e42fa2f..cf1b757 100644 --- a/R/visualization.R +++ b/R/visualization.R @@ -286,6 +286,8 @@ pca_plot <- function( add_mean = FALSE, palette = NULL ) { + c <- check_coldata_column(se, color_by) & check_coldata_column(se, shape_by) + pc <- paste0("PC", PCs) pal <- ppal(palette, "pca") @@ -325,10 +327,7 @@ pca_plot <- function( gpoint <- geom_point(aes(color = .data[[color_by]]), size = 3, alpha = 0.7) if(!is.null(shape_by)) { gpoint <- geom_point( - aes( - color = .data[[color_by]], - shape = .data[[shape_by]] - ), + aes(color = .data[[color_by]], shape = .data[[shape_by]]), size = 3, alpha = 0.7 ) } diff --git a/tests/testthat/test-data_processing.R b/tests/testthat/test-data_processing.R index 7f88dfa..c1b4c86 100644 --- a/tests/testthat/test-data_processing.R +++ b/tests/testthat/test-data_processing.R @@ -2,7 +2,6 @@ # Load data ---- data(se_chlamy) - # Start tests ---- test_that("add_midparent_expression() adds columns with midparent expression", { @@ -16,6 +15,8 @@ test_that("add_midparent_expression() adds columns with midparent expression", { expect_true(any(grepl("midparent", colnames(new_se)))) expect_error(add_midparent_expression(se_chlamy, method = "error")) + expect_error(add_midparent_expression(se_chlamy, coldata_column = "error")) + expect_error(add_midparent_expression(se_chlamy, parent1 = "error")) }) test_that("add_size_factors() adds a column named 'sizeFactor' for DESeq2", {