Skip to content

Commit

Permalink
Use new augur ancestral interface
Browse files Browse the repository at this point in the history
Updates augur ancestral rule in the core workflow to generate ancestral
translations in addition to ancestral nucleotide sequences. Removes the
separate rule for translation and the corresponding custom script.
Updates all downstream rules to reference a new single `muts.json` file.
Rules that depended on the translations with internal node sequences do
not need to change, since the output files from the updated ancestral
rule maintain the same name.
  • Loading branch information
huddlej committed Aug 29, 2023
1 parent fe6b6f0 commit f9a06aa
Show file tree
Hide file tree
Showing 3 changed files with 21 additions and 135 deletions.
94 changes: 0 additions & 94 deletions scripts/translations_aamuts.py

This file was deleted.

61 changes: 21 additions & 40 deletions workflow/snakemake_rules/core.smk
Original file line number Diff line number Diff line change
Expand Up @@ -266,12 +266,19 @@ rule refine:
rule ancestral:
message: "Reconstructing ancestral sequences and mutations"
input:
tree = rules.refine.output.tree,
alignment = rules.align.output.alignment,
tree = build_dir + "/{build_name}/{segment}/tree.nwk",
alignment = build_dir + "/{build_name}/{segment}/aligned.fasta",
translations = aggregate_translations,
reference = lambda w: f"{config['builds'][w.build_name]['reference']}",
annotation = lambda w: f"{config['builds'][w.build_name]['annotation']}",
output:
node_data = build_dir + "/{build_name}/{segment}/nt-muts.json"
node_data = build_dir + "/{build_name}/{segment}/muts.json",
translations_done = build_dir + "/{build_name}/{segment}/translations.done",
params:
inference = "joint"
inference = "joint",
genes = lambda w: GENES[w.segment],
input_translations = lambda w: build_dir + f"/{w.build_name}/{w.segment}/nextalign/masked.gene.%GENE.fasta",
output_translations = lambda w: build_dir + f"/{w.build_name}/{w.segment}/nextalign/masked.gene.%GENE_withInternalNodes.fasta",
conda: "../envs/nextstrain.yaml"
benchmark:
"benchmarks/ancestral_{build_name}_{segment}.txt"
Expand All @@ -284,36 +291,13 @@ rule ancestral:
augur ancestral \
--tree {input.tree} \
--alignment {input.alignment} \
--output-node-data {output.node_data} \
--inference {params.inference} 2>&1 | tee {log}
"""

rule translate:
message: "Translating amino acid sequences"
input:
translations = aggregate_translations,
tree = rules.refine.output.tree,
reference = lambda w: f"{config['builds'][w.build_name]['reference']}",
annotation = lambda w: f"{config['builds'][w.build_name]['annotation']}",
output:
node_data = build_dir + "/{build_name}/{segment}/aa_muts.json",
translations_done = build_dir + "/{build_name}/{segment}/translations.done"
params:
genes = lambda w: GENES[w.segment]
conda: "../envs/nextstrain.yaml"
benchmark:
"benchmarks/translate_{build_name}_{segment}.txt"
log:
"logs/translate_{build_name}_{segment}.txt"
shell:
"""
python3 scripts/translations_aamuts.py \
--tree {input.tree} \
--root-sequence {input.reference} \
--annotation {input.annotation} \
--reference {input.reference} \
--translations {input.translations:q} \
--genes {params.genes} \
--output {output.node_data} 2>&1 | tee {log} && touch {output.translations_done}
--translations "{params.input_translations}" \
--output-node-data {output.node_data} \
--output-translations "{params.output_translations}" \
--inference {params.inference} 2>&1 | tee {log} && touch {output.translations_done}
"""

rule traits:
Expand Down Expand Up @@ -347,8 +331,7 @@ rule traits:
rule clades:
input:
tree = build_dir + "/{build_name}/ha/tree.nwk",
nt_muts = build_dir + "/{build_name}/ha/nt-muts.json",
aa_muts = build_dir + "/{build_name}/ha/aa_muts.json",
muts = build_dir + "/{build_name}/ha/muts.json",
clades = lambda wildcards: config["builds"][wildcards.build_name]["clades"],
output:
node_data = build_dir + "/{build_name}/ha/clades.json",
Expand All @@ -361,7 +344,7 @@ rule clades:
"""
augur clades \
--tree {input.tree} \
--mutations {input.nt_muts} {input.aa_muts} \
--mutations {input.muts} \
--clades {input.clades} \
--output {output.node_data} 2>&1 | tee {log}
"""
Expand All @@ -370,8 +353,7 @@ rule clades:
rule subclades:
input:
tree = build_dir + "/{build_name}/{segment}/tree.nwk",
nt_muts = build_dir + "/{build_name}/{segment}/nt-muts.json",
aa_muts = build_dir + "/{build_name}/{segment}/aa_muts.json",
muts = build_dir + "/{build_name}/{segment}/muts.json",
clades = lambda wildcards: config["builds"][wildcards.build_name]["subclades"],
output:
node_data = build_dir + "/{build_name}/{segment}/subclades.json",
Expand All @@ -387,7 +369,7 @@ rule subclades:
"""
augur clades \
--tree {input.tree} \
--mutations {input.nt_muts} {input.aa_muts} \
--mutations {input.muts} \
--clades {input.clades} \
--membership-name {params.membership_name} \
--label-name {params.label_name} \
Expand All @@ -398,8 +380,7 @@ rule subclades:
rule import_clades:
input:
tree = build_dir + "/{build_name}/ha/tree.nwk",
nt_muts = build_dir + "/{build_name}/{segment}/nt-muts.json",
aa_muts = build_dir + "/{build_name}/{segment}/aa_muts.json",
muts = build_dir + "/{build_name}/{segment}/muts.json",
clades = build_dir + "/{build_name}/ha/clades.json",
output:
node_data = build_dir + "/{build_name}/{segment}/clades.json",
Expand Down
1 change: 0 additions & 1 deletion workflow/snakemake_rules/export.smk
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,6 @@ def _get_node_data_by_wildcards(wildcards):
inputs = [
rules.refine.output.node_data,
rules.ancestral.output.node_data,
rules.translate.output.node_data,
rules.clades.output.node_data,
rules.traits.output.node_data,
rules.annotate_epiweeks.output.node_data,
Expand Down

0 comments on commit f9a06aa

Please sign in to comment.