From 82d5231b5a640104a136abffce353686d4e68b03 Mon Sep 17 00:00:00 2001 From: skchronicles Date: Mon, 8 Apr 2024 16:10:57 -0400 Subject: [PATCH] Adding option to add branch supports via bootstrapping --- mpox-seek | 21 ++++++++++++++++++++- workflow/Snakefile | 4 ++++ workflow/rules/tree.smk | 3 ++- 3 files changed, 26 insertions(+), 2 deletions(-) diff --git a/mpox-seek b/mpox-seek index c35691d..c1425c8 100755 --- a/mpox-seek +++ b/mpox-seek @@ -399,6 +399,7 @@ def parsed_arguments(name, description): [--conda-env-name CONDA_ENV_NAME] \\ [--additional-strains ADDITIONAL_STRAINS] \\ [--batch-id BATCH_ID] \\ + [--bootstrap-trees] \\ --input INPUT [INPUT ...] \\ --output OUTPUT @@ -444,7 +445,6 @@ def parsed_arguments(name, description): the option listed below. Example: --additional-strains resources/mpox_additional_strains.fa.gz - --batch-id BATCH_ID Unique identifer to associate with a batch of samples. This option can be provided to ensure that project-level @@ -462,6 +462,13 @@ def parsed_arguments(name, description): Here is a list of valid or acceptable characters: "aA-Zz", "0-9", "-", "_". Example: --batch-id "2024-04-01" + --bootstrap-trees + Computes branch support by bootstraping data. If this flag + is provided, raxml-ng is run in an all-in-one (ML search + + bootstrapping) mode via its '--all' option. Branch supports, + calculated by bootstrapping, will be added to the best + scoring tree. + Example: --bootstrap-trees {3}{4}Orchestration options:{5} --mode {{local,slurm}} @@ -593,6 +600,7 @@ def parsed_arguments(name, description): --output mpox-seek_output \\ --additional-strains resources/mpox_additional_strains.fa.gz \\ --batch-id "$(date '+%Y-%m-%d-%H-%M')" \\ + --bootstrap-trees \\ --mode local \\ --use-conda \\ --conda-env-name mpox-seek \\ @@ -609,6 +617,7 @@ def parsed_arguments(name, description): --output mpox-seek_output \\ --additional-strains resources/mpox_additional_strains.fa.gz \\ --batch-id "$(date '+%Y-%m-%d-%H-%M')" \\ + --bootstrap-trees \\ --use-conda \\ --conda-env-name mpox-seek \\ --mode local @@ -666,6 +675,16 @@ def parsed_arguments(name, description): help = argparse.SUPPRESS ) + # Add branch support/confidence values + # to best tree via bootstrapping + subparser_run.add_argument( + '--bootstrap-trees', + action = 'store_true', + required = False, + default = False, + help = argparse.SUPPRESS + ) + # Optional Arguments # Add custom help message subparser_run.add_argument( diff --git a/workflow/Snakefile b/workflow/Snakefile index 2e61a78..7f2c1e9 100644 --- a/workflow/Snakefile +++ b/workflow/Snakefile @@ -37,6 +37,10 @@ decompress_strains_fasta = False if strains_fasta.endswith('.gz') or strains_fasta.endswith('.gzip'): decompress_strains_fasta = True +# Maps branch support/confidence values +# to the best tree via bootstrapping data +bootstrap_trees = str_bool(config['options']['bootstrap_trees']) + # Find list of sample which # have mulitple barcodes, this # means they need to be merged diff --git a/workflow/rules/tree.smk b/workflow/rules/tree.smk index 79c39d0..a619dee 100644 --- a/workflow/rules/tree.smk +++ b/workflow/rules/tree.smk @@ -18,6 +18,7 @@ rule tree: params: rname = 'tree', prefix = join(workpath, "project", batch_id, "mpox_phylogeny"), + bootrapping_options = lambda _: "--all --bs-metric fbp,tbe" if bootstrap_trees else "" conda: depending(conda_yaml_or_named_env, use_conda) container: depending(config['images']['mpox-seek'], use_singularity) shell: @@ -31,5 +32,5 @@ rule tree: --model GTR+G \\ --msa-format FASTA \\ --prefix {params.prefix} \\ - --seed 42 + --seed 42 {params.bootrapping_options} """ \ No newline at end of file