diff --git a/.github/pull_request_template.md b/.github/pull_request_template.md new file mode 100644 index 0000000..743bf17 --- /dev/null +++ b/.github/pull_request_template.md @@ -0,0 +1,14 @@ +We welcome feedback and issue reporting for all bioBakery tools through our [Discourse site](https://forum.biobakery.org/c/pull-request/featurepull-request/). For users that would like to directly contribute to the [tools](https://github.com/biobakery/) we are happy to field PRs to address **bug fixes**. Please note the turn around time on our end might be a bit long to field these but that does not mean we don't value the contribution! We currently **don't** accept PRs to add **new functionality** to tools but we would be happy to receive your feedback on [Discourse](https://forum.biobakery.org/c/pull-request/featurepull-request/). + +Also, we will make sure to attribute your contribution in our User’s manual(README.md) and in any associated paper Acknowledgements. + + +## Description + + +## Related Issue + + + + +## Screenshots (if appropriate): diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index c73dff8..8f4b5e8 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -1,42 +1,180 @@ +## Read more about GitHub actions the features of this GitHub Actions workflow +## at https://lcolladotor.github.io/biocthis/articles/biocthis.html#use_bioc_github_action +## +## For more details, check the biocthis developer notes vignette at +## https://lcolladotor.github.io/biocthis/articles/biocthis_dev_notes.html +## +## You can add this workflow to other packages using: +## > biocthis::use_bioc_github_action() +## +## Using GitHub Actions exposes you to many details about how R packages are +## compiled and installed in several operating system.s +### If you need help, please follow the steps listed at +## https://github.com/r-lib/actions#where-to-find-help +## +## If you found an issue specific to biocthis's GHA workflow, please report it +## with the information that will make it easier for others to help you. +## Thank you! -name: build and test +## Acronyms: +## * GHA: GitHub Action +## * OS: operating system -# Controls when the action will run. Triggers the workflow on push or pull request -# events but only for the master branch on: push: - branches: [ master ] pull_request: - branches: [ master ] + +name: build and test + +env: + has_testthat: 'true' jobs: - build: - runs-on: ubuntu-latest - container: - image: biobakery/maaslin2:latest + build-check: + runs-on: ${{ matrix.config.os }} + name: ${{ matrix.config.os }} (${{ matrix.config.r }}) + container: ${{ matrix.config.cont }} + ## Environment variables unique to this job. + + strategy: + fail-fast: false + matrix: + config: + - { os: ubuntu-latest, cont: "bioconductor/bioconductor_docker" } + - { os: macOS-latest } + - { os: windows-latest } + ## Check https://github.com/r-lib/actions/tree/master/examples + ## for examples using the http-user-agent + env: + R_REMOTES_NO_ERRORS_FROM_WARNINGS: true + NOT_CRAN: true + TZ: UTC steps: - # Checks-out your repository under $GITHUB_WORKSPACE, so your job can access it - - uses: actions/checkout@v2 + ## Most of these steps are the same as the ones in + ## https://github.com/r-lib/actions/blob/master/examples/check-standard.yaml + ## If they update their steps, we will also need to update ours. + - name: Checkout Repository + uses: actions/checkout@v3 + + ## R is already included in the Bioconductor docker images + - name: Setup R from r-lib + if: runner.os != 'Linux' + uses: r-lib/actions/setup-r@v2 + + ## pandoc is already included in the Bioconductor docker images + - name: Setup pandoc from r-lib + if: runner.os != 'Linux' + uses: r-lib/actions/setup-pandoc@v2 + + - name: Query dependencies + run: | + install.packages('remotes') + saveRDS(remotes::dev_package_deps(dependencies = TRUE), ".github/depends.Rds", version = 2) + shell: Rscript {0} + + - name: Install Linux system dependencies + if: runner.os == 'Linux' + run: | + apt-get update -y && apt-get install -y make cmake libicu-dev zlib1g-dev pandoc + + - name: Install macOS system dependencies + if: matrix.config.os == 'macOS-latest' + run: | + ## Enable installing XML from source if needed + brew install libxml2 + echo "XML_CONFIG=/usr/local/opt/libxml2/bin/xml2-config" >> $GITHUB_ENV + + ## Required to install magick as noted at + ## https://github.com/r-lib/usethis/commit/f1f1e0d10c1ebc75fd4c18fa7e2de4551fd9978f#diff-9bfee71065492f63457918efcd912cf2 + brew install imagemagick@6 + + ## For textshaping, required by ragg, and required by pkgdown + brew install harfbuzz fribidi + + ## For installing usethis's dependency gert + brew install libgit2 + + ## Required for tcltk + brew install xquartz --cask + + - name: Install Windows system dependencies + if: runner.os == 'Windows' + run: | + ## Edit below if you have any Windows system dependencies + shell: Rscript {0} + + - name: Install BiocManager + run: | + message(paste('****', Sys.time(), 'installing BiocManager ****')) + remotes::install_cran("BiocManager") + shell: Rscript {0} + + - name: Set BiocVersion + run: | + BiocManager::install(force = TRUE, ask = FALSE) + shell: Rscript {0} + + - name: Install dependencies pass 1 + run: | + ## Try installing the package dependencies in steps. First the local + ## dependencies, then any remaining dependencies to avoid the + ## issues described at + ## https://stat.ethz.ch/pipermail/bioc-devel/2020-April/016675.html + ## https://github.com/r-lib/remotes/issues/296 + ## Ideally, all dependencies should get installed in the first pass. + + ## For running the checks + message(paste('****', Sys.time(), 'installing rcmdcheck and BiocCheck ****')) + install.packages(c("rcmdcheck", "BiocCheck"), repos = BiocManager::repositories()) - - name: Install build and test dependencies from dist - run: apt-get update -y && apt-get install -y libssl-dev libxml2-dev libcurl4-openssl-dev texlive + ## Pass #1 at installing dependencies + message(paste('****', Sys.time(), 'pass number 1 at installing dependencies: local dependencies ****')) + remotes::install_local(dependencies = TRUE, repos = BiocManager::repositories(), build_vignettes = FALSE, upgrade = TRUE) + continue-on-error: true + shell: Rscript {0} - - name: install build and test dependencies from bioconductor - run: R -q -e "BiocManager::install('BiocCheck')" + - name: Install dependencies pass 2 + run: | + ## Pass #2 at installing dependencies + message(paste('****', Sys.time(), 'pass number 2 at installing dependencies: any remaining dependencies ****')) + remotes::install_local(dependencies = TRUE, repos = BiocManager::repositories(), build_vignettes = TRUE, upgrade = TRUE, force = TRUE) + shell: Rscript {0} - - name: Install build and test dependencies from CRAN - run: R -q -e "install.packages(c('knitr','glmmTMB'), repos='http://cran.r-project.org')" + - name: Session info + run: | + options(width = 100) + pkgs <- installed.packages()[, "Package"] + sessioninfo::session_info(pkgs, include_base = TRUE) + shell: Rscript {0} - - name: Build - run: cd $GITHUB_WORKSPACE && R CMD build . + - name: Run CMD check + env: + _R_CHECK_CRAN_INCOMING_: false + DISPLAY: 99.0 + run: | + options(crayon.enabled = TRUE) + rcmdcheck::rcmdcheck( + args = c("--no-manual", "--no-vignettes", "--timings"), + build_args = c("--no-manual", "--keep-empty-dirs", "--no-resave-data"), + error_on = "warning", + check_dir = "check" + ) + shell: Rscript {0} - - name: Install maaslin2 - run: VERSION=$(grep Version DESCRIPTION | cut -d" " -f2) && R CMD INSTALL "Maaslin2_${VERSION}.tar.gz" + ## Might need an to add this to the if: && runner.os == 'Linux' + - name: Reveal testthat details + if: env.has_testthat == 'true' + run: find . -name testthat.Rout -exec cat '{}' ';' - - name: R test - run: VERSION=$(grep Version DESCRIPTION | cut -d" " -f2) && R CMD check "Maaslin2_${VERSION}.tar.gz" - - - name: Bioc test - run: VERSION=$(grep Version DESCRIPTION | cut -d" " -f2) && R CMD BiocCheck "Maaslin2_${VERSION}.tar.gz" - + - name: Run BiocCheck + env: + DISPLAY: 99.0 + run: | + BiocCheck::BiocCheck( + dir('check', 'tar.gz$', full.names = TRUE), + `quit-with-status` = TRUE, + `no-check-R-ver` = TRUE, + `no-check-bioc-help` = TRUE + ) + shell: Rscript {0} diff --git a/DESCRIPTION b/DESCRIPTION index b1a3640..f9c008a 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Package: Maaslin2 Title: "Multivariable Association Discovery in Population-scale Meta-omics Studies" Year: 2021 -Version: 1.15.0 +Version: 1.7.4 Authors@R: c( person("Himel", "Mallick", email = "himel.stat.iitk@gmail.com", role = "aut"), person("Ali", "Rahnavard", email = "gholamali.rahnavard@gmail.com", role = "aut"), @@ -11,11 +11,12 @@ Depends: R (>= 3.6) Description: MaAsLin2 is comprehensive R package for efficiently determining multivariable association between clinical metadata and microbial meta'omic features. MaAsLin2 relies on general linear models to accommodate most modern epidemiological study designs, including cross-sectional and longitudinal, and offers a variety of data exploration, normalization, and transformation methods. MaAsLin2 is the next generation of MaAsLin. License: MIT + file LICENSE LazyData: false -Imports: robustbase, biglm, pcaPP, edgeR, metagenomeSeq, lpsymphony, pbapply, car, dplyr, vegan, chemometrics, ggplot2, pheatmap, logging, data.table, lmerTest, hash, optparse, grDevices, stats, utils, glmmTMB, MASS, cplm, pscl, lme4 +Imports: robustbase, biglm, pcaPP, edgeR, metagenomeSeq, pbapply, car, dplyr, vegan, chemometrics, ggplot2, pheatmap, logging, data.table, lmerTest, hash, optparse, grDevices, stats, utils, glmmTMB, MASS, cplm, pscl, lme4, tibble Suggests: knitr, testthat (>= 2.1.0), - rmarkdown + rmarkdown, + markdown VignetteBuilder: knitr Collate: fit.R utility_scripts.R viz.R Maaslin2.R URL: http://huttenhower.sph.harvard.edu/maaslin2 diff --git a/NAMESPACE b/NAMESPACE index 6947bac..60ec1ae 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -3,11 +3,11 @@ export(Maaslin2) import("robustbase") import("biglm") import("pcaPP") -import("lpsymphony") import("optparse") importFrom("grDevices", "colorRampPalette", "dev.off", "pdf", "jpeg","png") importFrom("stats", "as.formula", "coef", "glm", "lm", "na.exclude", - "p.adjust", "update") + "p.adjust", "update", "relevel") importFrom("utils", "capture.output", "read.table", "write.table") importFrom("dplyr", "%>%") importFrom("stats", "sd", "na.omit") +importFrom("utils","type.convert") diff --git a/NEWS b/NEWS index be1f167..6e76f2e 100644 --- a/NEWS +++ b/NEWS @@ -1,3 +1,10 @@ +Changes in version 1.7.3 (11-02-2021): +* Update sourcing R scripts +* Update reference level handling +* Update example files +* Fix saved plots to allow for runs with less then N plots per metadata +* Convert strings to numeric when needed for metadata +* Fix actions to build with latest version of dependencies Changes in version 1.7.1 (07-27-2021): * Update tutorial data files diff --git a/R/Maaslin2.R b/R/Maaslin2.R index f81553a..8e7298e 100755 --- a/R/Maaslin2.R +++ b/R/Maaslin2.R @@ -45,7 +45,7 @@ if (identical(environment(), globalenv()) && script_dir <- dirname(script_path) script_name <- basename(script_path) - for (R_file in dir(script_dir, pattern = "*.R")) + for (R_file in dir(script_dir, pattern = "*.R$")) { if (!(R_file == script_name)) source(file.path(script_dir, R_file)) @@ -93,7 +93,10 @@ args$standardize <- TRUE args$plot_heatmap <- TRUE args$heatmap_first_n <- 50 args$plot_scatter <- TRUE +args$max_pngs <- 10 +args$save_scatter <- FALSE args$cores <- 1 +args$save_models <- FALSE args$reference <- NULL ############################## @@ -116,7 +119,7 @@ options <- dest = "min_abundance", default = args$min_abundance, help = paste0("The minimum abundance for each feature", - " [ Default: %default ]" + "[ Default: %default ]" ) ) options <- @@ -128,7 +131,7 @@ options <- default = args$min_prevalence, help = paste0("The minimum percent of samples for which", "a feature is detected at minimum abundance", - " [ Default: %default ]" + "[ Default: %default ]" ) ) options <- @@ -140,7 +143,7 @@ options <- default = args$min_variance, help = paste0("Keep features with variances", "greater than value", - " [ Default: %default ]" + "[ Default: %default ]" ) ) options <- @@ -151,7 +154,7 @@ options <- dest = "max_significance", default = args$max_significance, help = paste0("The q-value threshold for significance", - " [ Default: %default ]" + "[ Default: %default ]" ) ) options <- @@ -163,7 +166,7 @@ options <- default = args$normalization, help = paste( "The normalization method to apply", - " [ Default: %default ] [ Choices:", + "[ Default: %default ] [ Choices:", toString(normalization_choices), "]" ) @@ -203,7 +206,7 @@ options <- default = args$random_effects, help = paste("The random effects for the model, ", "comma-delimited for multiple effects", - " [ Default: none ]" + "[ Default: none ]" ) ) options <- @@ -214,8 +217,8 @@ options <- dest = "fixed_effects", default = args$fixed_effects, help = paste("The fixed effects for the model,", - " comma-delimited for multiple effects", - " [ Default: all ]" + "comma-delimited for multiple effects", + "[ Default: all ]" ) ) options <- @@ -226,7 +229,7 @@ options <- dest = "correction", default = args$correction, help = paste("The correction method for computing", - " the q-value [ Default: %default ]" + "the q-value [ Default: %default ]" ) ) options <- @@ -237,7 +240,7 @@ options <- dest = "standardize", default = args$standardize, help = paste("Apply z-score so continuous metadata are on", - " the same scale [ Default: %default ]" + "the same scale [ Default: %default ]" ) ) options <- @@ -247,7 +250,7 @@ options <- type = "logical", dest = "plot_heatmap", default = args$plot_heatmap, - help = paste("Generate a heatmap for the significant ", + help = paste("Generate a heatmap for the significant", "associations [ Default: %default ]" ) ) @@ -258,7 +261,7 @@ options <- type = "double", dest = "heatmap_first_n", default = args$heatmap_first_n, - help = paste("In heatmap, plot top N features with significant ", + help = paste("In heatmap, plot top N features with significant", "associations [ Default: %default ]" ) ) @@ -270,7 +273,29 @@ options <- dest = "plot_scatter", default = args$plot_scatter, help = paste("Generate scatter plots for the significant", - " associations [ Default: %default ]" + "associations [ Default: %default ]" + ) + ) +options <- + optparse::add_option( + options, + c("-g", "--max_pngs"), + type = "double", + dest = "max_pngs", + default = args$max_pngs, + help = paste("The maximum number of scatterplots for signficant", + "associations to save as png files [ Default: %default ]" + ) + ) +options <- + optparse::add_option( + options, + c("-O", "--save_scatter"), + type = "logical", + dest = "save_scatter", + default = args$save_scatter, + help = paste("Save all scatter plot ggplot objects", + "to an RData file [ Default: %default ]" ) ) options <- @@ -284,7 +309,17 @@ options <- "run in parallel [ Default: %default ]" ) ) - +options <- + optparse::add_option( + options, + c("-j", "--save_models"), + type = "logical", + dest = "save_models", + default = args$save_models, + help = paste("Return the full model outputs ", + "and save to an RData file [ Default: %default ]" + ) + ) options <- optparse::add_option( options, @@ -326,22 +361,27 @@ Maaslin2 <- standardize = TRUE, cores = 1, plot_heatmap = TRUE, - plot_scatter = TRUE, heatmap_first_n = 50, + plot_scatter = TRUE, + max_pngs = 10, + save_scatter = FALSE, + save_models = FALSE, reference = NULL) { # Allow for lower case variables normalization <- toupper(normalization) transform <- toupper(transform) analysis_method <- toupper(analysis_method) - correction <- toupper(correction) + + # Match variable ignoring case then set correctly as required for p.adjust + correction <- correction_choices[match(toupper(correction), toupper(correction_choices))] ################################################################# # Read in the data and metadata, create output folder, init log # ################################################################# # if a character string then this is a file name, else it # is a data frame - if (is.character(input_data)) { + if (is.character(input_data) && file.exists(input_data)) { data <- data.frame(data.table::fread( input_data, header = TRUE, sep = "\t"), @@ -350,10 +390,19 @@ Maaslin2 <- # read again to get row name data <- read.table(input_data, header = TRUE, row.names = 1) } + } else if (is.data.frame(input_data)) { + if (!tibble::has_rownames(input_data)) { + stop("If supplying input_data as a data frame, it must have appropriate rownames!") + } + data <- as.data.frame(input_data) # in case it's a tibble or something + } else if (is.matrix(input_data)) { + logging::logwarn("Input is a matrix, passing through as.data.frame() .") + data <- as.data.frame(input_data) } else { - data <- input_data + stop("input_data is neither a file nor a data frame!") } - if (is.character(input_metadata)) { + + if (is.character(input_metadata) && file.exists(input_metadata)) { metadata <- data.frame(data.table::fread( input_metadata, header = TRUE, sep = "\t"), @@ -363,23 +412,40 @@ Maaslin2 <- header = TRUE, row.names = 1) } + } else if (is.data.frame(input_metadata)) { + if (!tibble::has_rownames(input_metadata)) { + stop("If supplying input_metadata as a data frame, it must have appropriate rownames!") + } + metadata <- as.data.frame(input_metadata) # in case it's a tibble or something } else { - metadata <- input_metadata - } - + stop("input_metadata is neither a file nor a data frame!") + } # create an output folder and figures folder if it does not exist if (!file.exists(output)) { print("Creating output folder") dir.create(output) } - - if (plot_heatmap || plot_scatter){ - figures_folder <- file.path(output,"figures") - if (!file.exists(figures_folder)) { - print("Creating output figures folder") - dir.create(figures_folder) + + features_folder <- file.path(output, "features") + if (!file.exists(features_folder)) { + print("Creating output feature tables folder") + dir.create(features_folder) + } + + fits_folder <- file.path(output, "fits") + if (!file.exists(fits_folder)) { + print("Creating output fits folder") + dir.create(fits_folder) } + + if (plot_heatmap || plot_scatter) { + figures_folder <- file.path(output, "figures") + if (!file.exists(figures_folder)) { + print("Creating output figures folder") + dir.create(figures_folder) + } } + # create log file (write info to stdout and debug level to log file) # set level to finest so all log levels are reviewed @@ -506,6 +572,11 @@ Maaslin2 <- ) } } + # check that plots are generated if to be saved + if (!plot_scatter && save_scatter) { + logging::logerror("Scatter plots cannot be saved if they are not plotted") + stop("Option not valid", call. = FALSE) + } ############################################################### # Determine orientation of data in input and reorder to match # @@ -659,7 +730,14 @@ Maaslin2 <- random_effects <- unlist(strsplit(random_effects, ",", fixed = TRUE)) # subtract random effects from fixed effects - fixed_effects <- setdiff(fixed_effects, random_effects) + common_variables <- intersect(fixed_effects, random_effects) + if (length(common_variables) > 0) { + logging::logwarn( + paste("Feature name included as fixed and random effect,", + "check that this is intended: %s"), + paste(common_variables, collapse = " , ") + ) + } # remove any random effects not found in metadata to_remove <- setdiff(random_effects, colnames(metadata)) if (length(to_remove) > 0) @@ -723,32 +801,48 @@ Maaslin2 <- # Filter data based on min abundance and min prevalence # ######################################################### - # use ordered factor for variables with more than two levels - # find variables with more than two levels if (is.null(reference)) { reference <- "," } - - for ( i in colnames(metadata) ) { - mlevels <- unique(na.omit(metadata[,i])) - numeric_levels <- grep('^-?[0-9.]+[eE+-]?', mlevels, value = T) - if ( ( length(mlevels[! (mlevels %in% c("UNK"))]) > 2 ) && - ( i %in% fixed_effects ) && - ( length(numeric_levels) == 0)) { - split_reference <- unlist(strsplit(reference, "[,;]")) - if (! i %in% split_reference ) { - stop( - paste("Please provide the reference for the variable '", - i, "' which includes more than 2 levels: ", - paste(as.character(mlevels), collapse=", "), ".", sep="") - ) - } else { - ref <- split_reference[match(i,split_reference)+1] - other_levels <- as.character(mlevels)[! as.character(mlevels) == ref] - metadata[,i] = factor(metadata[,i], levels=c(ref, other_levels)) + split_reference <- unlist(strsplit(reference, "[,;]")) + + # for each fixed effect, check that a reference level has been set if necessary: number of levels > 2 and metadata isn't already an ordered factor + for (i in fixed_effects) { + # don't check for or require reference levels for numeric metadata + if (is.numeric(metadata[,i])) { + next + } + # respect ordering if a factor is explicitly passed in with no reference set + if (is.factor(metadata[,i]) && !(i %in% split_reference)) { + logging::loginfo(paste("Factor detected for categorial metadata '", + i, "'. Provide a reference argument or manually set factor ordering to change reference level.", sep="")) + next + } + + # set metadata as a factor (ordered alphabetically) + metadata[,i] <- as.factor(metadata[,i]) + mlevels <- levels(metadata[,i]) + + # get reference level for variable being considered, returns NA if not found + ref <- split_reference[match(i, split_reference)+1] + + # if metadata has 2 levels, allow but don't require setting reference level, otherwise require it + if ((length(mlevels) == 2)) { + if(!is.na(ref)) { + metadata[,i] = relevel(metadata[,i], ref = ref) } + } else if (length(mlevels) > 2) { + if (!is.na(ref)) { + metadata[,i] = relevel(metadata[,i], ref = ref) + } else { + stop(paste("Please provide the reference for the variable '", + i, "' which includes more than 2 levels: ", + paste(as.character(mlevels), collapse=", "), ".", sep="")) + } + } else { + stop("Provided categorical metadata has fewer than 2 unique, non-NA values.") } - } + } unfiltered_data <- data unfiltered_metadata <- metadata @@ -835,6 +929,7 @@ Maaslin2 <- formula = formula, random_effects_formula = random_effects_formula, correction = correction, + save_models = save_models, cores = cores ) @@ -858,15 +953,61 @@ Maaslin2 <- length(which(filtered_data_norm[, x[1]] > 0)) ) - ######################### - # Write out the results # - ######################### + ################################ + # Write out the raw model fits # + ################################ + + if (save_models) { + model_file = file.path(fits_folder, "models.rds") + # remove models file if already exists (since models append) + if (file.exists(model_file)) { + logging::logwarn( + "Deleting existing model objects file: %s", model_file) + unlink(model_file) + } + logging::loginfo("Writing model objects to file %s", model_file) + saveRDS(fit_data$fits, file = model_file) + } + + ########################################## + # Write processed feature tables to file # + ########################################## + + filtered_file = file.path(features_folder, "filtered_data.tsv") + logging::loginfo("Writing filtered data to file %s", filtered_file) + write.table( + data.frame("feature" = rownames(filtered_data), filtered_data), + file = filtered_file, + sep = "\t", + quote = FALSE, + row.names = FALSE + ) + + filtered_data_norm_file = file.path(features_folder, "filtered_data_norm.tsv") + logging::loginfo("Writing filtered, normalized data to file %s", filtered_data_norm_file) + write.table( + data.frame("feature" = rownames(filtered_data_norm), filtered_data_norm), + file = filtered_data_norm_file, + sep = "\t", + quote = FALSE, + row.names = FALSE + ) + + filtered_data_norm_transformed_file = file.path(features_folder, "filtered_data_norm_transformed.tsv") + logging::loginfo("Writing filtered, normalized, transformed data to file %s", filtered_data_norm_transformed_file) + write.table( + data.frame("feature" = rownames(filtered_data_norm_transformed), filtered_data_norm_transformed), + file = filtered_data_norm_transformed_file, + sep = "\t", + quote = FALSE, + row.names = FALSE + ) ########################### - # write residuals to file # + # Write residuals to file # ########################### - residuals_file = file.path(output, "residuals.rds") + residuals_file = file.path(fits_folder, "residuals.rds") # remove residuals file if already exists (since residuals append) if (file.exists(residuals_file)) { logging::logwarn( @@ -877,10 +1018,10 @@ Maaslin2 <- saveRDS(fit_data$residuals, file = residuals_file) ############################### - # write fitted values to file # + # Write fitted values to file # ############################### - fitted_file = file.path(output, "fitted.rds") + fitted_file = file.path(fits_folder, "fitted.rds") # remove fitted file if already exists (since fitted append) if (file.exists(fitted_file)) { logging::logwarn( @@ -890,12 +1031,12 @@ Maaslin2 <- logging::loginfo("Writing fitted values to file %s", fitted_file) saveRDS(fit_data$fitted, file = fitted_file) - ######################################################## - # write extracted random effects to file (if specified) # - ######################################################## + ######################################################### + # Write extracted random effects to file (if specified) # + ######################################################### if (!is.null(random_effects)) { - ranef_file = file.path(output, "ranef.rds") + ranef_file = file.path(fits_folder, "ranef.rds") # remove ranef file if already exists (since ranef append) if (file.exists(ranef_file)) { logging::logwarn( @@ -907,7 +1048,7 @@ Maaslin2 <- } ############################# - # write all results to file # + # Write all results to file # ############################# results_file <- file.path(output, "all_results.tsv") @@ -947,7 +1088,7 @@ Maaslin2 <- ) ########################################### - # write results passing threshold to file # + # Write results passing threshold to file # ########################################### significant_results <- @@ -1009,12 +1150,25 @@ Maaslin2 <- "to output folder: %s"), output ) - maaslin2_association_plots( + saved_plots <- maaslin2_association_plots( unfiltered_metadata, filtered_data, significant_results_file, output, - figures_folder) + figures_folder, + max_pngs, + save_scatter) + if (save_scatter) { + scatter_file <- file.path(figures_folder, "scatter_plots.rds") + # remove plots file if already exists + if (file.exists(scatter_file)) { + logging::logwarn( + "Deleting existing scatter plot objects file: %s", scatter_file) + unlink(scatter_file) + } + logging::loginfo("Writing scatter plot objects to file %s", scatter_file) + saveRDS(saved_plots, file = scatter_file) + } } return(fit_data) @@ -1061,8 +1215,11 @@ if (identical(environment(), globalenv()) && current_args$standardize, current_args$cores, current_args$plot_heatmap, - current_args$plot_scatter, current_args$heatmap_first_n, + current_args$plot_scatter, + current_args$max_pngs, + current_args$save_scatter, + current_args$save_models, current_args$reference ) } diff --git a/R/fit.R b/R/fit.R index c1081ae..a17c235 100644 --- a/R/fit.R +++ b/R/fit.R @@ -5,7 +5,6 @@ for (lib in c( 'lmerTest', 'car', 'parallel', - 'MuMIn', 'glmmTMB', 'MASS', 'cplm', @@ -23,6 +22,7 @@ fit.data <- formula = NULL, random_effects_formula = NULL, correction = "BH", + save_models = FALSE, cores = 1) { # set the formula default to all fixed effects if not provided @@ -231,7 +231,14 @@ fit.data <- formula, data = dat_sub, na.action = na.exclude) - }, error = function(err) { + }, warning = function(w) { + message(paste("Feature", colnames(features)[x], ":", w)) + logging::logwarn(paste( + "Fitting problem for feature", + x, + "a warning was issued")) + return(fit1) + }, error = function(err) { fit1 <- try({ model_function( @@ -253,8 +260,13 @@ fit.data <- d<-as.vector(unlist(l)) names(d)<-unlist(lapply(l, row.names)) output$ranef<-d - } } + if (save_models) { + output$fit <- fit + } else { + output$fit <- NA + } + } else { logging::logwarn(paste( @@ -268,6 +280,7 @@ fit.data <- output$residuals <- NA output$fitted <- NA if (!(is.null(random_effects_formula))) output$ranef <- NA + output$fit <- NA } colnames(output$para) <- c('coef', 'stderr' , 'pval', 'name') output$para$feature <- colnames(features)[x] @@ -295,6 +308,17 @@ fit.data <- })) row.names(fitted) <- colnames(features) + fits <- + lapply(outputs, function(x) { + return(x$fit) + }) + names(fits) <- colnames(features) + + # Return NULL rather than empty object if fits aren't saved + if (all(is.na(fits))) { + fits <- NULL + } + if (!(is.null(random_effects_formula))) { ranef <- do.call(rbind, lapply(outputs, function(x) { @@ -349,9 +373,9 @@ fit.data <- rownames(paras)<-NULL if (!(is.null(random_effects_formula))) { - return(list("results" = paras, "residuals" = residuals, "fitted" = fitted, "ranef" = ranef)) + return(list("results" = paras, "residuals" = residuals, "fitted" = fitted, "ranef" = ranef, "fits" = fits)) } else { - return(list("results" = paras, "residuals" = residuals, "fitted" = fitted)) + return(list("results" = paras, "residuals" = residuals, "fitted" = fitted, "ranef" = NULL, "fits" = fits)) } } diff --git a/R/utility_scripts.R b/R/utility_scripts.R index 4fe6239..f8a0bb9 100644 --- a/R/utility_scripts.R +++ b/R/utility_scripts.R @@ -186,7 +186,15 @@ TMMnorm = function(features) { # Arc Sine Square Root Transformation AST <- function(x) { - return(sign(x) * asin(sqrt(abs(x)))) + y <- sign(x) * asin(sqrt(abs(x))) + if(any(is.na(y))) { + logging::logerror( + paste0("AST transform is only valid for values between -1 and 1. ", + "Please select an appropriate normalization option or ", + "normalize your data prior to running.")) + stop() + } + return(y) } ######################## diff --git a/R/viz.R b/R/viz.R index d528bc8..624b794 100644 --- a/R/viz.R +++ b/R/viz.R @@ -95,9 +95,9 @@ maaslin2_heatmap <- title_additional <- "" if (!is.na(first_n) & first_n > 0 & first_n < dim(df)[1]) { if (cell_value == 'coef') { - df <- df[order(-abs(df[cell_value])) , ] + df <- df[order(-abs(df[[cell_value]])) , ] } else{ - df <- df[order(df[cell_value]), ] + df <- df[order(df[[cell_value]]), ] } # get the top n features with significant associations df_sub <- df[1:first_n,] @@ -235,7 +235,8 @@ maaslin2_heatmap <- treeheight_row = 0, treeheight_col = 0, display_numbers = matrix(ifelse( - a > 0.0, "+", ifelse(a < 0.0, "-", "")), nrow(a)) + a > 0.0, "+", ifelse(a < 0.0, "-", "")), nrow(a)), + silent = TRUE ) }, error = function(err) { logging::logerror("Unable to plot heatmap") @@ -289,7 +290,8 @@ maaslin2_association_plots <- output_results, write_to = './', figures_folder = './figures/', - max_pngs = 10) + max_pngs = 10, + save_scatter = FALSE) { #MaAslin2 scatter plot function and theme @@ -351,9 +353,10 @@ maaslin2_association_plots <- metadata_labels <- unlist(metadata_types[!duplicated(metadata_types)]) metadata_number <- 1 + saved_plots <- list() for (label in metadata_labels) { - saved_plots <- vector('list', max_pngs) + saved_plots[[label]] <- list() # for file name replace any non alphanumeric with underscore plot_file <- paste( @@ -441,7 +444,7 @@ maaslin2_association_plots <- size = 2, fontface = "italic" ) - } else{ + } else { # if Metadata is categorical generate a boxplot ### check if the variable is categorical @@ -519,21 +522,38 @@ maaslin2_association_plots <- stdout <- capture.output(print(temp_plot), type = "message") if (length(stdout) > 0) logging::logdebug(stdout) - if (count < max_pngs + 1) - saved_plots[[count]] <- temp_plot + + # keep all plots if desired + # or only keep plots to be printed to png + if (save_scatter) { + saved_plots[[label]][[count]] <- temp_plot + } + else if (count <= max_pngs) { + saved_plots[[label]][[count]] <- temp_plot + } count <- count + 1 } dev.off() + # print the saved figures - for (plot_number in seq(1,max_pngs)) { - png_file <- file.path(figures_folder, + # this is done separately from pdf generation + # because nested graphics devices cause problems in rmarkdown output + for (plot_number in seq(1, min((count-1), max_pngs))) { + png_file <- file.path(figures_folder, paste0( substr(basename(plot_file),1,nchar(basename(plot_file))-4), "_",plot_number,".png")) - png(png_file, res = 300, width = 960, height = 960) - stdout <- capture.output(print(saved_plots[[plot_number]])) - dev.off() + png(png_file, res = 300, width = 960, height = 960) + stdout <- capture.output(print(saved_plots[[label]][[plot_number]])) + dev.off() + } + # give plots informative names + if (save_scatter) { + names(saved_plots[[label]]) <- make.names(output_df_all[data_index, 'feature'], unique = TRUE) + } else { + saved_plots[[label]] <- NULL # instead remove plots if only saved for png generation } metadata_number <- metadata_number + 1 } + return(saved_plots) } diff --git a/README.md b/README.md index b64f982..d151821 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,5 @@ + # MaAsLin2 User Manual # MaAsLin2 is the next generation of MaAsLin (Microbiome Multivariable Association with Linear Models). @@ -7,7 +8,7 @@ MaAsLin2 is the next generation of MaAsLin (Microbiome Multivariable Association If you use the MaAsLin2 software, please cite our manuscript: -Mallick H, Rahnavard A, McIver LJ, Ma S, Zhang Y, Nguyen LH, Tickle TL, Weingart G, Ren B, Schwager EH, Chatterjee S, Thompson KN, Wilkinson JE, Subramanian A, Lu Y, Waldron L, Paulson JN, Franzosa EA, Bravo HC, Huttenhower C (2021). [Multivariable Association Discovery in Population-scale Meta-omics Studies](https://www.biorxiv.org/content/10.1101/2021.01.20.427420v1). bioRxiv, https://doi.org/10.1101/2021.01.20.427420. +Mallick H, Rahnavard A, McIver LJ, Ma S, Zhang Y, Nguyen LH, Tickle TL, Weingart G, Ren B, Schwager EH, Chatterjee S, Thompson KN, Wilkinson JE, Subramanian A, Lu Y, Waldron L, Paulson JN, Franzosa EA, Bravo HC, Huttenhower C (2021). [Multivariable Association Discovery in Population-scale Meta-omics Studies](https://journals.plos.org/ploscompbiol/article?id=10.1371/journal.pcbi.1009442). PLoS Computational Biology, 17(11):e1009442. Check out the [MaAsLin 2 tutorial](https://github.com/biobakery/biobakery/wiki/maaslin2) for an overview of analysis options. @@ -51,10 +52,10 @@ If only running from the command line, you do not need to install the MaAsLin2 p 1. Download the source: [MaAsLin2.master.zip](https://github.com/biobakery/Maaslin2/archive/master.zip) 2. Decompress the download: - * ``$ tar xzvf Maaslin2-master.zip`` + * ``$ unzip master.zip`` 3. Install the Bioconductor dependencies edgeR and metagenomeSeq. 4. Install the CRAN dependencies: - * ``$ R -q -e "install.packages(c('lmerTest','pbapply','car','dplyr','vegan','chemometrics','ggplot2','pheatmap','hash','logging','data.table','MuMIn','glmmTMB','MASS','cplm','pscl'), repos='http://cran.r-project.org')"`` + * ``$ R -q -e "install.packages(c('lmerTest','pbapply','car','dplyr','vegan','chemometrics','ggplot2','pheatmap','hash','logging','data.table','glmmTMB','MASS','cplm','pscl'), repos='http://cran.r-project.org')"`` 5. Install the MaAsLin2 package (only r,equired if running as an R function): * ``$ R CMD INSTALL maaslin2`` @@ -123,6 +124,13 @@ MaAsLin2 generates two types of output files: data and visualization. * ``significant_results.tsv`` * This file is a subset of the results in the first file. * It only includes associations with q-values <= to the threshold. + * ``features``` + * This folder includes the filtered, normalized, and transformed versions of the input feature table. + * These steps are performed sequentially in the above order. + * If an option is set such that a step does not change the data, the resulting table will still be output. + * ``models.rds`` + * This file contains a list with every model fit object. + * It will only be generated if save_models is set to TRUE. * ``residuals.rds`` * This file contains a data frame with residuals for each feature. * ``fitted.rds`` @@ -139,7 +147,7 @@ MaAsLin2 generates two types of output files: data and visualization. * A plot is generated for each significant association. * Scatter plots are used for continuous metadata. * Box plots are for categorical data. - * Data points plotted are after normalization, filtering, and transform. + * Data points plotted are after filtering but prior to normalization and transform. ### Run a Demo ### @@ -154,7 +162,7 @@ the HMP2 data which can be downloaded from https://ibdmdb.org/ . #### Command line #### -``$ Maaslin2.R --transform=AST --fixed_effects="diagnosis,dysbiosisnonIBD,dysbiosisUC,dysbiosisCD,antibiotics,age" --random_effects="site,subject" --normalization=NONE --standardize=FALSE inst/extdata/HMP2_taxonomy.tsv inst/extdata/HMP2_metadata.tsv demo_output`` +``$ Maaslin2.R --fixed_effects="diagnosis,dysbiosisnonIBD,dysbiosisUC,dysbiosisCD,antibiotics,age" --random_effects="site,subject" --standardize=FALSE inst/extdata/HMP2_taxonomy.tsv inst/extdata/HMP2_metadata.tsv demo_output`` * Make sure to provide the full path to the MaAsLin2 executable (ie ./R/Maaslin2.R). * In the demo command: @@ -172,10 +180,9 @@ input_data <- system.file( input_metadata <-system.file( 'extdata','HMP2_metadata.tsv', package="Maaslin2") fit_data <- Maaslin2( - input_data, input_metadata, 'demo_output', transform = "AST", + input_data, input_metadata, 'demo_output' fixed_effects = c('diagnosis', 'dysbiosisnonIBD','dysbiosisUC','dysbiosisCD', 'antibiotics', 'age'), random_effects = c('site', 'subject'), - normalization = 'NONE', standardize = FALSE) ``` @@ -208,8 +215,7 @@ Options: is detected at minimum abundance [ Default: 0.1 ] -b MIN_VARIANCE, --min_variance=MIN_VARIANCE - Keep features with variance greater than - [ Default: 0.0 ] + Keep features with variance greater than [ Default: 0.0 ] -s MAX_SIGNIFICANCE, --max_significance=MAX_SIGNIFICANCE The q-value threshold for significance [ Default: 0.25 ] @@ -253,18 +259,35 @@ Options: -o PLOT_SCATTER, --plot_scatter=PLOT_SCATTER Generate scatter plots for the significant associations [ Default: TRUE ] + + -g MAX_PNGS, --max_pngs=MAX_PNGS + The maximum number of scatter plots for signficant associations + to save as png files [ Default: 10 ] + + -O SAVE_SCATTER, --save_scatter=SAVE_SCATTER + Save all scatter plot ggplot objects + to an RData file [ Default: FALSE ] -e CORES, --cores=CORES The number of R processes to run in parallel [ Default: 1 ] + + -j SAVE_MODELS --save_models=SAVE_MODELS + Return the full model outputs and save to an RData file + [ Default: FALSE ] -d REFERENCE, --reference=REFERENCE - The factor to use as a reference for a variable - with more than two levels provided as a string - of 'variable,reference' semi-colon delimited - for multiple variables [ Default: NA ] - NOTE: A space between the variable and reference - will not error but will cause an inaccurate result. + The factor to use as a reference level for a categorical variable + provided as a string of 'variable,reference', semi-colon delimited for + multiple variables. Not required if metadata is passed as a factor or + for variables with less than two levels but can be set regardless. + [ Default: NA ] + +## Contributions ## +Thanks go to these wonderful people: + +- Nick Waters + * Design of the PR and attribution process ## Troubleshooting ## diff --git a/inst/CITATION b/inst/CITATION index e7b60e1..ab74dbf 100644 --- a/inst/CITATION +++ b/inst/CITATION @@ -1,14 +1,15 @@ bibentry(bibtype = "Article", title = "Multivariable Association in Population-scale Meta-omics Studies.", author = "Himel Mallick, Ali Rahnavard, Lauren J. McIver, Siyuan Ma, Yangcong Zhang, Long H. Nguyen, Timothy L. Tickle, Geroge Weingart, Boyu Ren, Emma H. Schwager, Suvo Chatterjee, Kelsey N. Thompson, Jeremy E. Wilkinson, Ayshwarya Subramanian, Yiren Lu, Levi Waldron, Joseph N. Paulson, Eric A. Franzosa, Hector Corrada Bravo, Curtis Huttenhower", - year = 2020, - journal = "Nature Methods", - volume = "In Submission", - pages = "", - doi = "", + year = 2021, + journal = "PLOS Computational Biology", + volume = "17", + issue = "11", + pages = "e1009442", + doi = "https://doi.org/10.1371/journal.pcbi.1009442", url = "http://huttenhower.sph.harvard.edu/maaslin2", header = "To cite MaAsLin 2 in publications, please use:", - textVersion = "Mallick H et al. (2020). Multivariable Association in Population-scale Meta-omics Studies, http://huttenhower.sph.harvard.edu/maaslin2." + textVersion = "Mallick H et al. (2021). Multivariable Association in Population-scale Meta-omics Studies, http://huttenhower.sph.harvard.edu/maaslin2." ) bibentry(bibtype = "Manual", diff --git a/man/Maaslin2.Rd b/man/Maaslin2.Rd index ee50bd9..c0bbadc 100644 --- a/man/Maaslin2.Rd +++ b/man/Maaslin2.Rd @@ -30,8 +30,11 @@ Maaslin2( standardize = TRUE, cores = 1, plot_heatmap = TRUE, - plot_scatter = TRUE, heatmap_first_n = 50, + plot_scatter = TRUE, + max_pngs = 10, + save_scatter = FALSE, + save_models = FALSE, reference = NULL ) } @@ -87,9 +90,20 @@ Maaslin2( } \item{plot_scatter}{ Generate scatter plots for the significant associations. +} + \item{max_pngs}{ + Set the maximum number of scatter plots for signficant associations + to save as png files. +} + \item{save_scatter}{ + Save all scatter plot ggplot objects + to an RData file. } \item{cores}{ The number of R processes to run in parallel. +} + \item{save_models}{ + Return the full model outputs and save to an RData file. } \item{reference}{ The factor to use as a reference for a variable with more than two levels @@ -98,7 +112,7 @@ Maaslin2( } } \value{ - Data.frame containing the results from applying the model. + List containing the results from applying the model. } \author{ Himel Mallick,\cr diff --git a/vignettes/maaslin2.Rmd b/vignettes/maaslin2.Rmd index 80f0b7c..768bb5b 100644 --- a/vignettes/maaslin2.Rmd +++ b/vignettes/maaslin2.Rmd @@ -1,5 +1,5 @@ --- -title: "maaslin2" +title: "Maaslin2" date: "Friday, May 10, 2019" author: - name: Himel Mallick @@ -10,22 +10,26 @@ author: email: lauren.j.mciver@gmail.com output: html_document vignette: > - %\VignetteIndexEntry{MaAsLin2} + %\VignetteIndexEntry{Maaslin2} %\VignetteEngine{knitr::rmarkdown} %\VignetteEncoding{UTF-8} --- # MaAsLin2 User Manual # -MaAsLin2 is the next generation of MaAsLin. +MaAsLin2 is the next generation of MaAsLin (Microbiome Multivariable Association with Linear Models). -[MaAsLin2](http://huttenhower.sph.harvard.edu/maaslin2) is comprehensive R package for finding multivariable association between clinical metadata and microbial meta-omics features. MaAsLin2 relies on general linear models to accommodate most modern epidemiological study designs, including multiple analysis methods (with support for multiple covariates and repeated measures), filtering, normalization, and transform options to customize analysis for your specific study. +[MaAsLin2](http://huttenhower.sph.harvard.edu/maaslin2) is comprehensive R package for efficiently determining multivariable association between clinical metadata and microbial meta-omics features. MaAsLin2 relies on general linear models to accommodate most modern epidemiological study designs, including cross-sectional and longitudinal, along with a variety of filtering, normalization, and transform methods. If you use the MaAsLin2 software, please cite our manuscript: -Mallick et al. (2020+). "Multivariable Association in Population-scale Meta-omics Studies" (In Preparation). -If you have questions, please email the google group -[MaAsLin Users](https://groups.google.com/forum/#!forum/maaslin-users). +Mallick H, Rahnavard A, McIver LJ, Ma S, Zhang Y, Nguyen LH, Tickle TL, Weingart G, Ren B, Schwager EH, Chatterjee S, Thompson KN, Wilkinson JE, Subramanian A, Lu Y, Waldron L, Paulson JN, Franzosa EA, Bravo HC, Huttenhower C (2021). [Multivariable Association Discovery in Population-scale Meta-omics Studies](https://journals.plos.org/ploscompbiol/article?id=10.1371/journal.pcbi.1009442). PLoS Computational Biology, 17(11):e1009442. + +Check out the [MaAsLin 2 tutorial](https://github.com/biobakery/biobakery/wiki/maaslin2) for an overview of analysis options. + +If you have questions, please direct it to : +[MaAsLin2 Forum](https://forum.biobakery.org/c/Downstream-analysis-and-statistics/MaAsLin2) +[Google Groups](https://groups.google.com/forum/#!forum/maaslin-users) (Read only) -------------------------------------------- @@ -66,7 +70,7 @@ the MaAsLin2 dependencies. * ``$ tar xzvf maaslin2.tar.gz`` 3. Install the Bioconductor dependencies edgeR and metagenomeSeq. 4. Install the CRAN dependencies: - * ``$ R -q -e "install.packages(c('lmerTest','pbapply','car','dplyr','vegan','chemometrics','ggplot2','pheatmap','hash','logging','data.table','MuMIn','glmmTMB','MASS','cplm','pscl'), repos='http://cran.r-project.org')"`` + * ``$ R -q -e "install.packages(c('lmerTest','pbapply','car','dplyr','vegan','chemometrics','ggplot2','pheatmap','hash','logging','data.table','glmmTMB','MASS','cplm','pscl'), repos='http://cran.r-project.org')"`` 5. Install the MaAsLin2 package (only r,equired if running as an R function): * ``$ R CMD INSTALL maaslin2`` @@ -127,6 +131,13 @@ MaAsLin2 generates two types of output files: data and visualization. * ``significant_results.tsv`` * This file is a subset of the results in the first file. * It only includes associations with q-values <= to the threshold. + * ``features``` + * This folder includes the filtered, normalized, and transformed versions of the input feature table. + * These steps are performed sequentially in the above order. + * If an option is set such that a step does not change the data, the resulting table will still be output. + * ``models.rds`` + * This file contains a list with every model fit object. + * It will only be generated if save_models is set to TRUE. * ``residuals.rds`` * This file contains a data frame with residuals for each feature. * ``fitted.rds`` @@ -143,7 +154,7 @@ MaAsLin2 generates two types of output files: data and visualization. * A plot is generated for each significant association. * Scatter plots are used for continuous metadata. * Box plots are for categorical data. - * Data points plotted are after normalization, filtering, and transform. + * Data points plotted are after filtering but prior to normalization and transform. ### Run a Demo ### @@ -158,7 +169,7 @@ the HMP2 data which can be downloaded from https://ibdmdb.org/ . #### Command line #### -``$ Maaslin2.R --transform=AST --fixed_effects="diagnosis,dysbiosisnonIBD,dysbiosisUC,dysbiosisCD,antibiotics,age" --random_effects="site,subject" --normalization=NONE --standardize=FALSE inst/extdata/HMP2_taxonomy.tsv inst/extdata/HMP2_metadata.tsv demo_output`` +``$ Maaslin2.R --fixed_effects="diagnosis,dysbiosisnonIBD,dysbiosisUC,dysbiosisCD,antibiotics,age" --random_effects="site,subject" --standardize=FALSE inst/extdata/HMP2_taxonomy.tsv inst/extdata/HMP2_metadata.tsv demo_output`` * Make sure to provide the full path to the MaAsLin2 executable (ie ./R/Maaslin2.R). * In the demo command: @@ -176,11 +187,10 @@ input_data <- system.file( input_metadata <-system.file( 'extdata','HMP2_metadata.tsv', package="Maaslin2") fit_data <- Maaslin2( - input_data, input_metadata, 'demo_output', transform = "AST", + input_data, input_metadata, 'demo_output', fixed_effects = c('diagnosis', 'dysbiosisnonIBD','dysbiosisUC','dysbiosisCD', 'antibiotics', 'age'), random_effects = c('site', 'subject'), reference = "diagnosis,nonIBD", - normalization = 'NONE', standardize = FALSE) ``` @@ -213,8 +223,7 @@ Options: is detected at minimum abundance [ Default: 0.1 ] -b MIN_VARIANCE, --min_variance=MIN_VARIANCE - Keep features with variance greater than - [ Default: 0.0 ] + Keep features with variance greater than [ Default: 0.0 ] -s MAX_SIGNIFICANCE, --max_significance=MAX_SIGNIFICANCE The q-value threshold for significance [ Default: 0.25 ] @@ -258,16 +267,29 @@ Options: -o PLOT_SCATTER, --plot_scatter=PLOT_SCATTER Generate scatter plots for the significant associations [ Default: TRUE ] + + -g MAX_PNGS, --max_pngs=MAX_PNGS + The maximum number of scatter plots for signficant associations + to save as png files [ Default: 10 ] + + -O SAVE_SCATTER, --save_scatter=SAVE_SCATTER + Save all scatter plot ggplot objects + to an RData file [ Default: FALSE ] -e CORES, --cores=CORES The number of R processes to run in parallel [ Default: 1 ] + + -j SAVE_MODELS --save_models=SAVE_MODELS + Return the full model outputs and save to an RData file + [ Default: FALSE ] -d REFERENCE, --reference=REFERENCE - The factor to use as a reference for a variable - with more than two levels provided as a string - of 'variable,reference' semi-colon delimited - for multiple variables [ Default: NA ] + The factor to use as a reference level for a categorical variable + provided as a string of 'variable,reference', semi-colon delimited for + multiple variables. Not required if metadata is passed as a factor or + for variables with less than two levels but can be set regardless. + [ Default: NA ] ## Troubleshooting ##