diff --git a/R/maaslin3.R b/R/maaslin3.R index 87e9d70..b1550d3 100755 --- a/R/maaslin3.R +++ b/R/maaslin3.R @@ -117,6 +117,7 @@ args$max_pngs <- 30 args$cores <- 1 args$save_models <- FALSE args$reference <- NULL +args$summary_plot_balanced <- FALSE #### end #### @@ -582,6 +583,19 @@ options <- ) ) +options <- + optparse::add_option( + options, + c("--summary_plot_balanced"), + type = "logical", + dest = "make_summary_plot_balanced", + default = args$summary_plot_balanced, + help = paste( + "If coef_plot_vars is selected this will", + "select balanced top features [ Default: %default ]" + ) + ) + option_not_valid_error <- function(message, valid_options) { logging::logerror(paste(message, ": %s"), toString(valid_options)) stop("Option not valid", call. = FALSE) @@ -765,7 +779,8 @@ maaslin_log_arguments <- function(input_data, max_pngs = 30, cores = 1, save_models = FALSE, - verbosity = 'FINEST') { + verbosity = 'FINEST', + summary_plot_balanced=FALSE) { # Allow for lower case variables normalization <- toupper(normalization) transform <- toupper(transform) @@ -873,6 +888,8 @@ maaslin_log_arguments <- function(input_data, logging::logdebug("Augment: %s", augment) logging::logdebug("Evaluate only: %s", evaluate_only) logging::logdebug("Cores: %d", cores) + logging::logdebug("Balanced Summary plot: %s", summary_plot_balanced) + maaslin_check_arguments( feature_specific_covariate, @@ -2307,7 +2324,8 @@ maaslin_plot_results <- function(output, coef_plot_vars = NULL, heatmap_vars = NULL, plot_associations = TRUE, - max_pngs = 30) { + max_pngs = 30, + balanced=FALSE) { # create an output folder and figures folder if it does not exist if (!file.exists(output)) { logging::loginfo("Creating output folder") @@ -2354,7 +2372,8 @@ maaslin_plot_results <- function(output, coef_plot_vars = coef_plot_vars, heatmap_vars = heatmap_vars, median_comparison_abundance = median_comparison_abundance, - median_comparison_prevalence = median_comparison_prevalence + median_comparison_prevalence = median_comparison_prevalence, + balanced=balanced ) } @@ -2416,7 +2435,8 @@ maaslin_plot_results_from_output <- function(output, coef_plot_vars = NULL, heatmap_vars = NULL, plot_associations = TRUE, - max_pngs = 30) { + max_pngs = 30, + balanced=FALSE) { # create an output folder and figures folder if it does not exist if (!file.exists(output)) { @@ -2469,7 +2489,8 @@ maaslin_plot_results_from_output <- function(output, coef_plot_vars = coef_plot_vars, heatmap_vars = heatmap_vars, median_comparison_abundance = median_comparison_abundance, - median_comparison_prevalence = median_comparison_prevalence + median_comparison_prevalence = median_comparison_prevalence, + balanced=balanced ) } @@ -2602,7 +2623,8 @@ maaslin3 <- function(input_data, max_pngs = 30, cores = 1, save_models = FALSE, - verbosity = 'FINEST') { + verbosity = 'FINEST', + summary_plot_balanced=FALSE) { logging::logReset() # Allow for lower case variables @@ -2798,7 +2820,8 @@ maaslin3 <- function(input_data, coef_plot_vars, heatmap_vars, plot_associations, - max_pngs + max_pngs, + balanced=summary_plot_balanced ) }, warning = function(w) { @@ -2898,6 +2921,7 @@ if (identical(environment(), globalenv()) && augment = current_args$augment, evaluate_only = current_args$evaluate_only, reference = current_args$reference, - unscaled_abundance = current_args$unscaled_abundance + unscaled_abundance = current_args$unscaled_abundance, + summary_plot_balanced=current_args$summary_plot_balanced ) } diff --git a/R/viz.R b/R/viz.R index fb1d059..a3a6a05 100644 --- a/R/viz.R +++ b/R/viz.R @@ -102,7 +102,8 @@ make_coef_plot <- function(merged_results_sig, max_significance, median_comparison_prevalence, median_comparison_abundance, - median_df) { + median_df, + plot_threshold=10) { coef_plot_data <- merged_results_sig[merged_results_sig$full_metadata_name %in% coef_plot_vars,] @@ -111,9 +112,9 @@ make_coef_plot <- function(merged_results_sig, quantile_df <- coef_plot_data %>% dplyr::group_by(.data$full_metadata_name) %>% dplyr::summarise( - lower_q = median(.data$coef) - 10 * + lower_q = median(.data$coef) - plot_threshold * (median(.data$coef) - quantile(.data$coef, 0.25)), - upper_q = median(.data$coef) + 10 * + upper_q = median(.data$coef) + plot_threshold * (quantile(.data$coef, 0.75) - median(.data$coef)) ) %>% data.frame() @@ -470,7 +471,8 @@ maaslin3_summary_plot <- coef_plot_vars = NULL, heatmap_vars = NULL, median_comparison_abundance = FALSE, - median_comparison_prevalence = FALSE) { + median_comparison_prevalence = FALSE, + balanced=FALSE) { if (first_n > 200) { logging::logerror( paste( @@ -533,15 +535,42 @@ maaslin3_summary_plot <- # Subset associations for plotting merged_results_joint_only <- - unique(merged_results[, c('feature', 'qval_joint')]) + unique(merged_results[, c('feature', 'qval_joint', 'full_metadata_name')]) merged_results_joint_only <- merged_results_joint_only[ order(merged_results_joint_only$qval_joint),] if (length(unique(merged_results_joint_only$feature)) < first_n) { first_n <- length(unique(merged_results_joint_only$feature)) + signif_taxa <- + unique(merged_results_joint_only$feature)[seq(first_n)] + }else{ + #If balanced is turned on but there is not coef's choosen error out. + if(balanced){ + if(is.null(coef_plot_vars)){ + logging::logerror( + paste( + "Balanced plotting requires you set the variables you want to plot using + the parameter coef_plot_vars" + ) + ) + return() + }else{ + #grab the first N feature where N=N/(length of coef_plot_var) to + #plot the coef plot + first_n_per = first_n/length(coef_plot_vars) + signif_taxa <- merged_results_joint_only %>% + dplyr::group_by(.data$full_metadata_name) %>% + dplyr::arrange(desc(-.data$qval_joint), .by_group = T) %>% + dplyr::slice_head(n=ceiling(first_n_per)) %>% + dplyr::pull(feature) %>% + unique() + } + }else{ + signif_taxa <- + unique(merged_results_joint_only$feature)[seq(first_n)] + } } - signif_taxa <- - unique(merged_results_joint_only$feature)[seq(first_n)] + merged_results_sig <- merged_results %>% dplyr::filter(.data$feature %in% signif_taxa) @@ -588,12 +617,18 @@ maaslin3_summary_plot <- if (length(coef_plot_vars) > 0 & sum(merged_results_sig$full_metadata_name %in% coef_plot_vars) >= 1) { + if(balanced){ + plot_thres=5 + }else{ + plot_thres=10 + } p1 <- make_coef_plot(merged_results_sig, coef_plot_vars, max_significance, median_comparison_prevalence, median_comparison_abundance, - median_df) + median_df, + plot_threshold = plot_thres) } else { p1 <- NULL diff --git a/tests/testthat/test_maaslin_log_arguments.R b/tests/testthat/test_maaslin_log_arguments.R index 6d198a7..db53dc7 100644 --- a/tests/testthat/test_maaslin_log_arguments.R +++ b/tests/testthat/test_maaslin_log_arguments.R @@ -80,6 +80,7 @@ lines_to_compare <- c("Writing function arguments to log file", "Augment: TRUE", "Evaluate only:", "Cores: 9", + "Balanced Summary plot: FALSE", "Verifying options selected are valid") expect_starts_with <- function(strings, prefixes) {