generated from nextstrain/pathogen-repo-guide
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #8 from nextstrain/add-host-categories
Add host taxonomic categories
- Loading branch information
Showing
7 changed files
with
173 additions
and
3 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,4 +1,5 @@ | ||
# CHANGELOG | ||
* 20 August 2024: Assign host taxa to taxonomic groupings that are relevant to rabies for coloring in auspice | ||
* 12 August 2024: Create a full genome phylogeny for rabies [PR#3](https://github.com/nextstrain/rabies/pull/3) | ||
* 25 July 2024: Add CI GH Action workflow to test the ingest workflow [PR#6](https://github.com/nextstrain/rabies/pull/6) | ||
* 15 July 2024: Make rabies-specific modifications to the ingest directory (which originated from the pathogen-repo-guide) [PR#2](https://github.com/nextstrain/rabies/pull/2) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,70 @@ | ||
#! /usr/bin/env python3 | ||
""" | ||
From stdin, generates host names using info from the NCBI taxonomy output of the NDJSON record, with output to 'host' | ||
Outputs the modified record to stdout. | ||
""" | ||
|
||
import argparse | ||
import json | ||
from sys import stdin, stdout | ||
|
||
def parse_args(): | ||
parser = argparse.ArgumentParser( | ||
description="Generate host names and output to 'host'.") | ||
parser.add_argument("--latin-field", default='host_latin_name', | ||
help="Field from the records to use as the host latin name.") | ||
parser.add_argument("--family-field", default='host_family', | ||
help="Field from the records to use as the host Family name.") | ||
parser.add_argument("--genus-field", default='host_genus', | ||
help="Field from the records to use as the host genus name.") | ||
parser.add_argument("--group-field", default='host_group', | ||
help="Field from the records to use as the host group.") | ||
return parser.parse_args() | ||
|
||
def _set_host_name_transformed(record, args): | ||
latin_replacements = { | ||
"Canis lupus familiaris": "Domestic Dog", | ||
"Homo sapiens": "Human", | ||
"Bos taurus": "Cattle", | ||
"Didelphis albiventris": "Other Mammal", | ||
"Elephas maximus": "Other Mammal", | ||
"Dasypus novemcinctus": "Other Mammal"} | ||
family_replacements = {"Mephitidae": "Skunk"} | ||
group_replacements = { | ||
"odd-toed ungulates": "Other Ungulate", | ||
"even-toed ungulates & whales": "Other Ungulate", | ||
"carnivores": "Other Carnivore", | ||
"bats": "Bat", | ||
"birds": "Bird", | ||
"primates": "Other Mammal", | ||
"rodents": "Other Mammal", | ||
"mammals": "Other Mammal" | ||
} | ||
latin_field = record[args.latin_field] | ||
family_field = record[args.family_field] | ||
group_field = record[args.group_field] | ||
|
||
if record[args.family_field] == "Canidae" and record[args.genus_field] == "Vulpes": | ||
return "Fox (Vulpes sp.)" | ||
elif record[args.family_field] == "Procyonidae" and record[args.genus_field] == "Procyon": | ||
return "Raccoon" | ||
elif latin_field in latin_replacements: | ||
return latin_replacements[latin_field] | ||
elif family_field in family_replacements: | ||
return family_replacements[family_field] | ||
elif group_field in group_replacements: | ||
return group_replacements[group_field] | ||
else: | ||
return group_field | ||
|
||
def main(): | ||
args = parse_args() | ||
|
||
for index, record in enumerate(stdin): | ||
record = json.loads(record) | ||
record['host'] = _set_host_name_transformed(record, args) | ||
stdout.write(json.dumps(record) + "\n") | ||
|
||
if __name__ == "__main__": | ||
main() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters