From 717a4873ce9fb897bf75b4d65f5dcdc2b561c8ba Mon Sep 17 00:00:00 2001 From: Kim Andrews <17375001+kimandrews@users.noreply.github.com> Date: Thu, 2 Jan 2025 13:47:58 -0800 Subject: [PATCH 1/3] Assign clades using augur clades Clades are defined based on the "major clades" of Troupin et al 2016: https://journals.plos.org/plospathogens/article?id=10.1371/journal.ppat.1006041 --- phylogenetic/defaults/clades.tsv | 51 +++++++++++++++++++++++ phylogenetic/defaults/config.yaml | 1 + phylogenetic/rules/annotate_phylogeny.smk | 17 ++++++++ phylogenetic/rules/export.smk | 3 +- 4 files changed, 71 insertions(+), 1 deletion(-) create mode 100644 phylogenetic/defaults/clades.tsv diff --git a/phylogenetic/defaults/clades.tsv b/phylogenetic/defaults/clades.tsv new file mode 100644 index 0000000..92a050a --- /dev/null +++ b/phylogenetic/defaults/clades.tsv @@ -0,0 +1,51 @@ +clade gene site alt +Africa-2 nuc 499 G +Africa-2 nuc 708 A +Africa-2 nuc 3746 G +Africa-2 nuc 3914 A +Africa-2 nuc 4440 C +Africa-2 nuc 4628 G +Africa-2 nuc 4844 A +Africa-3 nuc 332 A +Africa-3 nuc 392 A +Africa-3 nuc 821 T +Africa-3 nuc 1209 T +Africa-3 nuc 1258 G +Africa-3 nuc 4473 T +Africa-3 nuc 4940 A +Arctic-related nuc 248 T +Arctic-related nuc 1030 G +Arctic-related nuc 1036 A +Arctic-related nuc 3363 A +Arctic-related nuc 3494 T +Arctic-related nuc 3657 C +Arctic-related nuc 4775 C +Asian nuc 1471 A +Asian nuc 8234 C +Asian nuc 8837 G +Asian nuc 9038 G +Asian nuc 9113 G +Asian nuc 9773 T +Asian nuc 9965 T +Asian nuc 10773 A +Bat nuc 331 T +Bat nuc 413 A +Bat nuc 1879 C +Bat nuc 2449 C +Bat nuc 2594 A +Bat nuc 2955 G +Bat nuc 5083 T +Bat nuc 11189 T +Cosmopolitan nuc 781 A +Cosmopolitan nuc 1057 T +Cosmopolitan nuc 1470 A +Cosmopolitan nuc 3998 C +Cosmopolitan nuc 4697 A +Cosmopolitan nuc 4949 T +Indian Subcontinent nuc 1207 T +Indian Subcontinent nuc 3548 C +Indian Subcontinent nuc 5054 G +RAC-SK nuc 574 T +RAC-SK nuc 844 T +RAC-SK nuc 1207 C +RAC-SK nuc 3924 G diff --git a/phylogenetic/defaults/config.yaml b/phylogenetic/defaults/config.yaml index e9205fb..7df0ec3 100644 --- a/phylogenetic/defaults/config.yaml +++ b/phylogenetic/defaults/config.yaml @@ -5,6 +5,7 @@ files: colors: "defaults/colors.tsv" auspice_config: "defaults/auspice_config.json" description: "defaults/description.md" + clades: "defaults/clades.tsv" filter: group_by: "country year" sequences_per_group: 20 diff --git a/phylogenetic/rules/annotate_phylogeny.smk b/phylogenetic/rules/annotate_phylogeny.smk index 0cb802b..c2d1f45 100644 --- a/phylogenetic/rules/annotate_phylogeny.smk +++ b/phylogenetic/rules/annotate_phylogeny.smk @@ -52,6 +52,23 @@ rule translate: 2>&1 | tee {log} """ +rule clades: + input: + tree = "results/tree.nwk", + nt_muts = "results/nt_muts.json", + aa_muts = "results/aa_muts.json", + clade_defs = config["files"]["clades"] + output: + clades = "results/clades.json" + shell: + """ + augur clades \ + --tree {input.tree} \ + --mutations {input.nt_muts} {input.aa_muts} \ + --clades {input.clade_defs} \ + --output {output.clades} + """ + rule add_year_metadata: input: metadata = "data/metadata.tsv", diff --git a/phylogenetic/rules/export.smk b/phylogenetic/rules/export.smk index b85de5f..2489abb 100644 --- a/phylogenetic/rules/export.smk +++ b/phylogenetic/rules/export.smk @@ -13,6 +13,7 @@ rule export: branch_lengths = "results/branch_lengths.json", nt_muts = "results/nt_muts.json", aa_muts = "results/aa_muts.json", + clades = "results/clades.json", year = "results/year.json", colors = config["files"]["colors"], auspice_config = config["files"]["auspice_config"], @@ -31,7 +32,7 @@ rule export: --tree {input.tree} \ --metadata {input.metadata} \ --metadata-id-columns {params.strain_id} \ - --node-data {input.branch_lengths} {input.nt_muts} {input.aa_muts} {input.year} \ + --node-data {input.branch_lengths} {input.nt_muts} {input.aa_muts} {input.year} {input.clades} \ --colors {input.colors} \ --auspice-config {input.auspice_config} \ --include-root-sequence-inline \ From 37d54f266c07fb488cb737eb71f872d978be80a5 Mon Sep 17 00:00:00 2001 From: Kim Andrews <17375001+kimandrews@users.noreply.github.com> Date: Thu, 2 Jan 2025 14:02:36 -0800 Subject: [PATCH 2/3] Include clades as coloring Adds clades as coloring in auspice --- phylogenetic/defaults/auspice_config.json | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/phylogenetic/defaults/auspice_config.json b/phylogenetic/defaults/auspice_config.json index e449eaf..46c1647 100644 --- a/phylogenetic/defaults/auspice_config.json +++ b/phylogenetic/defaults/auspice_config.json @@ -46,6 +46,11 @@ "key": "host_common_name", "title": "Host common name", "type": "categorical" + }, + { + "key": "clade_membership", + "title": "Clade", + "type": "categorical" } ], "geo_resolutions": [ From 9133653b34ba75599b03a9f9af6633bce72b3199 Mon Sep 17 00:00:00 2001 From: Kim Andrews <17375001+kimandrews@users.noreply.github.com> Date: Thu, 2 Jan 2025 14:04:31 -0800 Subject: [PATCH 3/3] Add clade definition info to `description.md` --- phylogenetic/defaults/description.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/phylogenetic/defaults/description.md b/phylogenetic/defaults/description.md index 837745c..19c9b4d 100644 --- a/phylogenetic/defaults/description.md +++ b/phylogenetic/defaults/description.md @@ -12,6 +12,10 @@ We curate sequence data and metadata from NCBI as starting point for our analyse - [data.nextstrain.org/files/workflows/rabies/sequences.fasta.zst](https://data.nextstrain.org/files/workflows/rabies/sequences.fasta.zst) - [data.nextstrain.org/files/workflows/rabies/metadata.tsv.zst](https://data.nextstrain.org/files/workflows/rabies/metadata.tsv.zst) +#### Clade annotation + +Clades are defined based on the "major clades" in [Troupin et al. 2016](https://journals.plos.org/plospathogens/article?id=10.1371/journal.ppat.1006041). + --- Screenshots may be used under a [CC-BY-4.0 license](https://creativecommons.org/licenses/by/4.0/) and attribution to nextstrain.org must be provided.