Skip to content

Commit

Permalink
Merge pull request #1261: export v2: Remove hardcoding of "strain" as…
Browse files Browse the repository at this point in the history
… the metadata ID column
  • Loading branch information
victorlin authored Jul 26, 2023
2 parents 9a914f3 + c26eb3d commit 030e95b
Show file tree
Hide file tree
Showing 9 changed files with 534 additions and 8 deletions.
6 changes: 6 additions & 0 deletions CHANGES.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,12 @@

## __NEXT__

### Bug fixes

* export v2: Previously, when `strain` was not used as the metadata ID column, node attributes might have gone missing from the final Auspice JSON. This has been fixed. [#1260][], [#1262][] (@victorlin, @joverlee521)

[#1260]: https://github.com/nextstrain/augur/issues/1260
[#1262]: https://github.com/nextstrain/augur/issues/1262

## 22.1.0 (10 July 2023)

Expand Down
18 changes: 10 additions & 8 deletions augur/export_v2.py
Original file line number Diff line number Diff line change
Expand Up @@ -1016,11 +1016,11 @@ def parse_node_data_and_metadata(T, node_data, metadata):
node_attrs = {clade.name: {} for clade in T.root.find_clades()}

# first pass: metadata
for node in metadata.values():
if node["strain"] in node_attrs: # i.e. this node name is in the tree
for metadata_id, node in metadata.items():
if metadata_id in node_attrs: # i.e. this node name is in the tree
for key, value in node.items():
corrected_key = update_deprecated_names(key)
node_attrs[node["strain"]][corrected_key] = value
node_attrs[metadata_id][corrected_key] = value
metadata_names.add(corrected_key)

# second pass: node data JSONs (overwrites keys of same name found in metadata)
Expand Down Expand Up @@ -1074,13 +1074,15 @@ def run(args):

if args.metadata is not None:
try:
metadata_file = read_metadata(
metadata_df = read_metadata(
args.metadata,
delimiters=args.metadata_delimiters,
id_columns=args.metadata_id_columns).to_dict(orient="index")
for strain in metadata_file.keys():
if "strain" not in metadata_file[strain]:
metadata_file[strain]["strain"] = strain
id_columns=args.metadata_id_columns)

# Add the index as a column.
metadata_df[metadata_df.index.name] = metadata_df.index

metadata_file = metadata_df.to_dict(orient="index")
except FileNotFoundError:
print(f"ERROR: meta data file ({args.metadata}) does not exist", file=sys.stderr)
sys.exit(2)
Expand Down
32 changes: 32 additions & 0 deletions tests/functional/export_v2/cram/metadata-with-accession.t
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
Setup

$ source "$TESTDIR"/_setup.sh

Run export with metadata that contains "accession".

$ ${AUGUR} export v2 \
> --tree "$TESTDIR/../data/tree.nwk" \
> --metadata "$TESTDIR/../data/dataset1_metadata_with_strain_and_accession.tsv" \
> --node-data "$TESTDIR/../data/div_node-data.json" "$TESTDIR/../data/location_node-data.json" \
> --auspice-config "$TESTDIR/../data/auspice_config1.json" \
> --maintainers "Nextstrain Team" \
> --output dataset.json > /dev/null

$ python3 "$TESTDIR/../../../../scripts/diff_jsons.py" "$TESTDIR/../data/dataset-with-accession.json" dataset.json \
> --exclude-paths "root['meta']['updated']" "root['meta']['maintainers']"
{}

Run export with metadata that contains "accession", and use "accession" as the ID column.

$ ${AUGUR} export v2 \
> --tree "$TESTDIR/../data/tree-by-accession.nwk" \
> --metadata "$TESTDIR/../data/dataset1_metadata_with_strain_and_accession.tsv" \
> --metadata-id-columns accession \
> --node-data "$TESTDIR/../data/div_node-data-by-accession.json" "$TESTDIR/../data/location_node-data-by-accession.json" \
> --auspice-config "$TESTDIR/../data/auspice_config1.json" \
> --maintainers "Nextstrain Team" \
> --output dataset.json > /dev/null

$ python3 "$TESTDIR/../../../../scripts/diff_jsons.py" "$TESTDIR/../data/dataset-with-accession-by-accession.json" dataset.json \
> --exclude-paths "root['meta']['updated']" "root['meta']['maintainers']"
{}
Original file line number Diff line number Diff line change
@@ -0,0 +1,211 @@
{
"version": "v2",
"meta": {
"updated": "2023-07-24",
"maintainers": [
{
"name": "Nextstrain Team"
}
],
"colorings": [
{
"key": "location",
"title": "Location",
"type": "categorical",
"scale": [
[
"beta",
"#bd0026"
],
[
"gamma",
"#6a51a3"
]
],
"legend": [
{
"value": "alpha",
"display": "\u03b1"
},
{
"value": "beta"
}
]
},
{
"key": "mutation_length",
"title": "Mutations per branch",
"type": "continuous",
"scale": [
[
1,
"#081d58"
],
[
3,
"#1d91c0"
],
[
5,
"#c7e9b4"
]
],
"legend": [
{
"value": 1,
"display": "0-2",
"bounds": [
-1,
2
]
},
{
"value": 3,
"display": "3-5",
"bounds": [
2,
5
]
},
{
"value": 5,
"display": ">5",
"bounds": [
5,
10
]
}
]
}
],
"filters": [
"location"
],
"panels": [
"tree"
]
},
"tree": {
"name": "ROOT",
"node_attrs": {
"div": 0,
"mutation_length": {
"value": 0
}
},
"branch_attrs": {},
"children": [
{
"name": "accessionA",
"node_attrs": {
"div": 1,
"accession": "accessionA",
"location": {
"value": "delta"
},
"mutation_length": {
"value": 1
}
},
"branch_attrs": {}
},
{
"name": "internalBC",
"node_attrs": {
"div": 2,
"mutation_length": {
"value": 2
}
},
"branch_attrs": {},
"children": [
{
"name": "accessionB",
"node_attrs": {
"div": 3,
"accession": "accessionB",
"location": {
"value": "gamma"
},
"mutation_length": {
"value": 1
}
},
"branch_attrs": {}
},
{
"name": "accessionC",
"node_attrs": {
"div": 3,
"accession": "accessionC",
"location": {
"value": "gamma"
},
"mutation_length": {
"value": 1
}
},
"branch_attrs": {}
}
]
},
{
"name": "internalDEF",
"node_attrs": {
"div": 5,
"location": {
"value": "alpha"
},
"mutation_length": {
"value": 5
}
},
"branch_attrs": {},
"children": [
{
"name": "accessionD",
"node_attrs": {
"div": 8,
"accession": "accessionD",
"location": {
"value": "alpha"
},
"mutation_length": {
"value": 3
}
},
"branch_attrs": {}
},
{
"name": "accessionE",
"node_attrs": {
"div": 9,
"accession": "accessionE",
"location": {
"value": "alpha"
},
"mutation_length": {
"value": 4
}
},
"branch_attrs": {}
},
{
"name": "accessionF",
"node_attrs": {
"div": 6,
"accession": "accessionF",
"location": {
"value": "beta"
},
"mutation_length": {
"value": 1
}
},
"branch_attrs": {}
}
]
}
]
}
}
Loading

0 comments on commit 030e95b

Please sign in to comment.