diff --git a/CHANGES.md b/CHANGES.md index 5e568a711..20ccf6e4d 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -2,11 +2,17 @@ ## __NEXT__ +### Features + +* ancestral, translate: Add `--skip-validation` as an alias to `--validation-mode=skip`. [#1656][] (@victorlin) +* clades: Allow customizing the validation of input node data JSON files with `--validation-mode` and `--skip-validation`. [#1656][] (@victorlin) + ### Bug Fixes * index: Previously specifying a directory that does not exist in the path to `--output` would result in an incorrect error stating that the input file does not exist. It now shows the correct path responsible for the error. [#1644][] (@victorlin) [#1644]: https://github.com/nextstrain/augur/issues/1644 +[#1656]: https://github.com/nextstrain/augur/pull/1656 ## 26.0.0 (17 September 2024) diff --git a/augur/ancestral.py b/augur/ancestral.py index 932d86072..7a39764da 100644 --- a/augur/ancestral.py +++ b/augur/ancestral.py @@ -34,9 +34,8 @@ from .io.vcf import is_vcf as is_filename_vcf from treetime.vcf_utils import read_vcf, write_vcf from collections import defaultdict -from .types import ValidationMode +from .argparse_ import add_validation_arguments from .util_support.node_data_file import NodeDataObject -from .export_v2 import validation_mode_help_message def ancestral_sequence_inference(tree=None, aln=None, ref=None, infer_gtr=True, marginal=False, fill_overhangs=True, infer_tips=False, @@ -335,8 +334,7 @@ def register_parser(parent_subparsers): general_group = parser.add_argument_group( "general", ) - general_group.add_argument('--validation-mode', type=ValidationMode, choices=[mode for mode in ValidationMode], default=ValidationMode.ERROR, - help=validation_mode_help_message) + add_validation_arguments(general_group) return parser diff --git a/augur/argparse_.py b/augur/argparse_.py index 6f0ee5e72..6084fdd5e 100644 --- a/augur/argparse_.py +++ b/augur/argparse_.py @@ -1,7 +1,9 @@ """ Custom helpers for the argparse standard library. """ -from argparse import Action, ArgumentDefaultsHelpFormatter +from argparse import Action, ArgumentDefaultsHelpFormatter, ArgumentParser, _ArgumentGroup +from typing import Union +from .types import ValidationMode # Include this in an argument help string to suppress the automatic appending @@ -93,3 +95,34 @@ def __call__(self, parser, namespace, value, option_string = None): current = [] setattr(namespace, self.dest, [*current, *value]) + + +def add_validation_arguments(parser: Union[ArgumentParser, _ArgumentGroup]): + """ + Add arguments to configure validation mode of node data JSON files. + """ + parser.add_argument( + '--validation-mode', + dest="validation_mode", + type=ValidationMode, + choices=[mode for mode in ValidationMode], + default=ValidationMode.ERROR, + help=""" + Control if optional validation checks are performed and what + happens if they fail. + + 'error' and 'warn' modes perform validation and emit messages about + failed validation checks. 'error' mode causes a non-zero exit + status if any validation checks failed, while 'warn' does not. + + 'skip' mode performs no validation. + + Note that some validation checks are non-optional and as such are + not affected by this setting. + """) + parser.add_argument( + '--skip-validation', + dest="validation_mode", + action="store_const", + const=ValidationMode.SKIP, + help="Skip validation of input/output files, equivalent to --validation-mode=skip. Use at your own risk!") diff --git a/augur/clades.py b/augur/clades.py index 71b868713..391de58b5 100644 --- a/augur/clades.py +++ b/augur/clades.py @@ -22,6 +22,7 @@ from .io.file import PANDAS_READ_CSV_OPTIONS from argparse import SUPPRESS from .utils import get_parent_name_by_child_name_for_tree, read_node_data, write_json, get_json_name +from .argparse_ import add_validation_arguments UNASSIGNED = 'unassigned' @@ -324,10 +325,10 @@ def get_reference_sequence_from_root_node(all_muts, root_name): return ref -def parse_nodes(tree_file, node_data_files): +def parse_nodes(tree_file, node_data_files, validation_mode): tree = Phylo.read(tree_file, 'newick') # don't supply tree to read_node_data as we don't want to require that every node is present in the node_data JSONs - node_data = read_node_data(node_data_files) + node_data = read_node_data(node_data_files, validation_mode=validation_mode) # node_data files can be parsed without 'nodes' (if they have 'branches') if "nodes" not in node_data or len(node_data['nodes'].keys())==0: raise AugurError(f"No nodes found in the supplied node data files. Please check {', '.join(node_data_files)}") @@ -347,11 +348,12 @@ def register_parser(parent_subparsers): parser.add_argument('--output-node-data', type=str, metavar="NODE_DATA_JSON", help='name of JSON file to save clade assignments to') parser.add_argument('--membership-name', type=str, default="clade_membership", help='Key to store clade membership under; use "None" to not export this') parser.add_argument('--label-name', type=str, default="clade", help='Key to store clade labels under; use "None" to not export this') + add_validation_arguments(parser) return parser def run(args): - (tree, all_muts) = parse_nodes(args.tree, args.mutations) + (tree, all_muts) = parse_nodes(args.tree, args.mutations, args.validation_mode) if args.reference: # PLACE HOLDER FOR vcf WORKFLOW. diff --git a/augur/export_v2.py b/augur/export_v2.py index b781bc835..6cd04eb37 100644 --- a/augur/export_v2.py +++ b/augur/export_v2.py @@ -13,7 +13,7 @@ from Bio import Phylo from typing import Dict, Union, TypedDict, Any, Tuple -from .argparse_ import ExtendOverwriteDefault +from .argparse_ import ExtendOverwriteDefault, add_validation_arguments from .errors import AugurError from .io.file import open_file from .io.metadata import DEFAULT_DELIMITERS, DEFAULT_ID_COLUMNS, InvalidDelimiter, read_metadata @@ -926,20 +926,6 @@ def node_data_prop_is_normal_trait(name): return True -validation_mode_help_message = """ - Control if optional validation checks are performed and what - happens if they fail. - - 'error' and 'warn' modes perform validation and emit messages about - failed validation checks. 'error' mode causes a non-zero exit - status if any validation checks failed, while 'warn' does not. - - 'skip' mode performs no validation. - - Note that some validation checks are non-optional and as such are - not affected by this setting. -""" - def register_parser(parent_subparsers): parser = parent_subparsers.add_parser("v2", help=__doc__) @@ -1007,19 +993,7 @@ def register_parser(parent_subparsers): optional_settings = parser.add_argument_group( title="OTHER OPTIONAL SETTINGS" ) - optional_settings.add_argument( - '--validation-mode', - dest="validation_mode", - type=ValidationMode, - choices=[mode for mode in ValidationMode], - default=ValidationMode.ERROR, - help=validation_mode_help_message) - optional_settings.add_argument( - '--skip-validation', - dest="validation_mode", - action="store_const", - const=ValidationMode.SKIP, - help="Skip validation of input/output files, equivalent to --validation-mode=skip. Use at your own risk!") + add_validation_arguments(optional_settings) return parser diff --git a/augur/translate.py b/augur/translate.py index 6ec4d4b3d..637759e99 100644 --- a/augur/translate.py +++ b/augur/translate.py @@ -22,9 +22,8 @@ from treetime.vcf_utils import read_vcf from augur.errors import AugurError from textwrap import dedent -from .types import ValidationMode +from .argparse_ import add_validation_arguments from .util_support.node_data_file import NodeDataObject -from .export_v2 import validation_mode_help_message class MissingNodeError(Exception): pass @@ -373,7 +372,7 @@ def register_parser(parent_subparsers): parser.add_argument('--alignment-output', type=str, help="write out translated gene alignments. " "If a VCF-input, a .vcf or .vcf.gz will be output here (depending on file ending). If fasta-input, specify the file name " "like so: 'my_alignment_%%GENE.fasta', where '%%GENE' will be replaced by the name of the gene") - parser.add_argument('--validation-mode', type=ValidationMode, choices=[mode for mode in ValidationMode], default=ValidationMode.ERROR, help=validation_mode_help_message) + add_validation_arguments(parser) vcf_only = parser.add_argument_group( title="VCF specific", diff --git a/tests/functional/clades/cram/augur-version-mismatch.t b/tests/functional/clades/cram/augur-version-mismatch.t new file mode 100644 index 000000000..cb0c55818 --- /dev/null +++ b/tests/functional/clades/cram/augur-version-mismatch.t @@ -0,0 +1,23 @@ +Integration tests for augur clades. + + $ source "$TESTDIR"/_setup.sh + +Node-data JSONs produced from a different major version of augur +are not allowed. + + $ ${AUGUR} clades \ + > --tree "$TESTDIR/../data/tree.nwk" \ + > --mutations "$TESTDIR/../data/aa_muts_generated_by.json" \ + > --clades "$TESTDIR/../data/clades.tsv" \ + > --output-node-data clades.json + ERROR: Augur version incompatibility detected: the JSON .*aa_muts_generated_by\.json.* was generated by \{'program': 'augur', 'version': '21.1.0'\}, which is incompatible with the current augur version \([.0-9]+\). We suggest you rerun the pipeline using the current version of augur. (re) + [2] + +Skipping validation allows mismatched augur versions to be used without error. + + $ ${AUGUR} clades \ + > --tree "$TESTDIR/../data/tree.nwk" \ + > --mutations "$TESTDIR/../data/aa_muts_generated_by.json" \ + > --clades "$TESTDIR/../data/clades.tsv" \ + > --output-node-data clades.json \ + > --skip-validation &>/dev/null diff --git a/tests/functional/clades/data/aa_muts_generated_by.json b/tests/functional/clades/data/aa_muts_generated_by.json new file mode 100644 index 000000000..5a2a47f63 --- /dev/null +++ b/tests/functional/clades/data/aa_muts_generated_by.json @@ -0,0 +1,332 @@ +{ + "generated_by": { + "program": "augur", + "version": "21.1.0" + }, + "nodes": { + "BRA/2016/FC_6706": { + "aa_muts": { + "2K": [], + "CA": [], + "ENV": [], + "MP": [], + "NS1": [], + "NS2A": [], + "NS2B": [], + "NS3": [], + "NS4A": [], + "NS4B": [], + "NS5": [], + "PRO": [] + } + }, + "COL/FLR_00008/2015": { + "aa_muts": { + "2K": [], + "CA": [], + "ENV": [], + "MP": [], + "NS1": [ + "L169I", + "G292E" + ], + "NS2A": [], + "NS2B": [], + "NS3": [], + "NS4A": [], + "NS4B": [], + "NS5": [], + "PRO": [] + } + }, + "Colombia/2016/ZC204Se": { + "aa_muts": { + "2K": [], + "CA": [], + "ENV": [], + "MP": [], + "NS1": [], + "NS2A": [], + "NS2B": [], + "NS3": [], + "NS4A": [], + "NS4B": [], + "NS5": [], + "PRO": [ + "N17K" + ] + } + }, + "DOM/2016/BB_0183": { + "aa_muts": { + "2K": [], + "CA": [], + "ENV": [], + "MP": [], + "NS1": [ + "D208G" + ], + "NS2A": [ + "L152M" + ], + "NS2B": [], + "NS3": [], + "NS4A": [], + "NS4B": [], + "NS5": [ + "I322V", + "Q650R", + "D878E" + ], + "PRO": [] + } + }, + "EcEs062_16": { + "aa_muts": { + "2K": [], + "CA": [], + "ENV": [], + "MP": [], + "NS1": [], + "NS2A": [], + "NS2B": [], + "NS3": [], + "NS4A": [], + "NS4B": [], + "NS5": [], + "PRO": [] + } + }, + "HND/2016/HU_ME59": { + "aa_muts": { + "2K": [], + "CA": [], + "ENV": [], + "MP": [], + "NS1": [ + "G100A" + ], + "NS2A": [], + "NS2B": [], + "NS3": [ + "M572L" + ], + "NS4A": [], + "NS4B": [], + "NS5": [ + "R525C" + ], + "PRO": [] + } + }, + "NODE_0000001": { + "aa_muts": { + "2K": [], + "CA": [ + "D107E" + ], + "ENV": [], + "MP": [], + "NS1": [ + "R324W" + ], + "NS2A": [], + "NS2B": [], + "NS3": [], + "NS4A": [], + "NS4B": [], + "NS5": [ + "T833A" + ], + "PRO": [] + } + }, + "NODE_0000002": { + "aa_muts": { + "2K": [], + "CA": [], + "ENV": [], + "MP": [], + "NS1": [ + "M349V" + ], + "NS2A": [], + "NS2B": [], + "NS3": [], + "NS4A": [], + "NS4B": [], + "NS5": [], + "PRO": [] + } + }, + "NODE_0000003": { + "aa_muts": { + "2K": [], + "CA": [], + "ENV": [], + "MP": [], + "NS1": [], + "NS2A": [], + "NS2B": [], + "NS3": [], + "NS4A": [], + "NS4B": [], + "NS5": [], + "PRO": [] + } + }, + "NODE_0000004": { + "aa_muts": { + "2K": [], + "CA": [], + "ENV": [], + "MP": [], + "NS1": [], + "NS2A": [], + "NS2B": [], + "NS3": [], + "NS4A": [], + "NS4B": [], + "NS5": [], + "PRO": [] + } + }, + "NODE_0000005": { + "aa_muts": { + "2K": [], + "CA": [], + "ENV": [], + "MP": [], + "NS1": [], + "NS2A": [], + "NS2B": [], + "NS3": [], + "NS4A": [], + "NS4B": [], + "NS5": [], + "PRO": [] + } + }, + "NODE_0000006": { + "aa_muts": {} + }, + "NODE_0000007": { + "aa_muts": { + "2K": [], + "CA": [], + "ENV": [], + "MP": [], + "NS1": [], + "NS2A": [], + "NS2B": [], + "NS3": [], + "NS4A": [], + "NS4B": [], + "NS5": [], + "PRO": [] + } + }, + "NODE_0000008": { + "aa_muts": { + "2K": [], + "CA": [], + "ENV": [], + "MP": [], + "NS1": [], + "NS2A": [], + "NS2B": [], + "NS3": [], + "NS4A": [], + "NS4B": [], + "NS5": [], + "PRO": [] + } + }, + "PAN/CDC_259359_V1_V3/2015": { + "aa_muts": { + "2K": [], + "CA": [], + "ENV": [], + "MP": [], + "NS1": [], + "NS2A": [], + "NS2B": [ + "M32I" + ], + "NS3": [], + "NS4A": [], + "NS4B": [], + "NS5": [], + "PRO": [] + } + }, + "PRVABC59": { + "aa_muts": { + "2K": [], + "CA": [ + "I80T" + ], + "ENV": [], + "MP": [], + "NS1": [], + "NS2A": [], + "NS2B": [], + "NS3": [], + "NS4A": [], + "NS4B": [], + "NS5": [ + "A91V" + ], + "PRO": [] + } + }, + "VEN/UF_1/2016": { + "aa_muts": { + "2K": [], + "CA": [ + "E76D" + ], + "ENV": [], + "MP": [], + "NS1": [ + "T301P" + ], + "NS2A": [], + "NS2B": [], + "NS3": [], + "NS4A": [], + "NS4B": [], + "NS5": [ + "A833T" + ], + "PRO": [] + } + }, + "ZKC2/2016": { + "aa_muts": { + "2K": [], + "CA": [ + "S109N" + ], + "ENV": [ + "K419R" + ], + "MP": [], + "NS1": [ + "R324Q" + ], + "NS2A": [], + "NS2B": [], + "NS3": [], + "NS4A": [], + "NS4B": [], + "NS5": [ + "V114M", + "N624S", + "K670R" + ], + "PRO": [] + } + } + } +}