Skip to content

Commit

Permalink
Merge pull request #1656: Centralize --validation-mode and --skip-val…
Browse files Browse the repository at this point in the history
…idation
  • Loading branch information
victorlin authored Oct 25, 2024
2 parents 37958a6 + a6e045d commit c0bccdf
Show file tree
Hide file tree
Showing 8 changed files with 406 additions and 39 deletions.
6 changes: 6 additions & 0 deletions CHANGES.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,17 @@

## __NEXT__

### Features

* ancestral, translate: Add `--skip-validation` as an alias to `--validation-mode=skip`. [#1656][] (@victorlin)
* clades: Allow customizing the validation of input node data JSON files with `--validation-mode` and `--skip-validation`. [#1656][] (@victorlin)

### Bug Fixes

* index: Previously specifying a directory that does not exist in the path to `--output` would result in an incorrect error stating that the input file does not exist. It now shows the correct path responsible for the error. [#1644][] (@victorlin)

[#1644]: https://github.com/nextstrain/augur/issues/1644
[#1656]: https://github.com/nextstrain/augur/pull/1656

## 26.0.0 (17 September 2024)

Expand Down
6 changes: 2 additions & 4 deletions augur/ancestral.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,9 +34,8 @@
from .io.vcf import is_vcf as is_filename_vcf
from treetime.vcf_utils import read_vcf, write_vcf
from collections import defaultdict
from .types import ValidationMode
from .argparse_ import add_validation_arguments
from .util_support.node_data_file import NodeDataObject
from .export_v2 import validation_mode_help_message

def ancestral_sequence_inference(tree=None, aln=None, ref=None, infer_gtr=True,
marginal=False, fill_overhangs=True, infer_tips=False,
Expand Down Expand Up @@ -335,8 +334,7 @@ def register_parser(parent_subparsers):
general_group = parser.add_argument_group(
"general",
)
general_group.add_argument('--validation-mode', type=ValidationMode, choices=[mode for mode in ValidationMode], default=ValidationMode.ERROR,
help=validation_mode_help_message)
add_validation_arguments(general_group)

return parser

Expand Down
35 changes: 34 additions & 1 deletion augur/argparse_.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
"""
Custom helpers for the argparse standard library.
"""
from argparse import Action, ArgumentDefaultsHelpFormatter
from argparse import Action, ArgumentDefaultsHelpFormatter, ArgumentParser, _ArgumentGroup
from typing import Union
from .types import ValidationMode


# Include this in an argument help string to suppress the automatic appending
Expand Down Expand Up @@ -93,3 +95,34 @@ def __call__(self, parser, namespace, value, option_string = None):
current = []

setattr(namespace, self.dest, [*current, *value])


def add_validation_arguments(parser: Union[ArgumentParser, _ArgumentGroup]):
"""
Add arguments to configure validation mode of node data JSON files.
"""
parser.add_argument(
'--validation-mode',
dest="validation_mode",
type=ValidationMode,
choices=[mode for mode in ValidationMode],
default=ValidationMode.ERROR,
help="""
Control if optional validation checks are performed and what
happens if they fail.
'error' and 'warn' modes perform validation and emit messages about
failed validation checks. 'error' mode causes a non-zero exit
status if any validation checks failed, while 'warn' does not.
'skip' mode performs no validation.
Note that some validation checks are non-optional and as such are
not affected by this setting.
""")
parser.add_argument(
'--skip-validation',
dest="validation_mode",
action="store_const",
const=ValidationMode.SKIP,
help="Skip validation of input/output files, equivalent to --validation-mode=skip. Use at your own risk!")
8 changes: 5 additions & 3 deletions augur/clades.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
from .io.file import PANDAS_READ_CSV_OPTIONS
from argparse import SUPPRESS
from .utils import get_parent_name_by_child_name_for_tree, read_node_data, write_json, get_json_name
from .argparse_ import add_validation_arguments

UNASSIGNED = 'unassigned'

Expand Down Expand Up @@ -324,10 +325,10 @@ def get_reference_sequence_from_root_node(all_muts, root_name):

return ref

def parse_nodes(tree_file, node_data_files):
def parse_nodes(tree_file, node_data_files, validation_mode):
tree = Phylo.read(tree_file, 'newick')
# don't supply tree to read_node_data as we don't want to require that every node is present in the node_data JSONs
node_data = read_node_data(node_data_files)
node_data = read_node_data(node_data_files, validation_mode=validation_mode)
# node_data files can be parsed without 'nodes' (if they have 'branches')
if "nodes" not in node_data or len(node_data['nodes'].keys())==0:
raise AugurError(f"No nodes found in the supplied node data files. Please check {', '.join(node_data_files)}")
Expand All @@ -347,11 +348,12 @@ def register_parser(parent_subparsers):
parser.add_argument('--output-node-data', type=str, metavar="NODE_DATA_JSON", help='name of JSON file to save clade assignments to')
parser.add_argument('--membership-name', type=str, default="clade_membership", help='Key to store clade membership under; use "None" to not export this')
parser.add_argument('--label-name', type=str, default="clade", help='Key to store clade labels under; use "None" to not export this')
add_validation_arguments(parser)
return parser


def run(args):
(tree, all_muts) = parse_nodes(args.tree, args.mutations)
(tree, all_muts) = parse_nodes(args.tree, args.mutations, args.validation_mode)

if args.reference:
# PLACE HOLDER FOR vcf WORKFLOW.
Expand Down
30 changes: 2 additions & 28 deletions augur/export_v2.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
from Bio import Phylo
from typing import Dict, Union, TypedDict, Any, Tuple

from .argparse_ import ExtendOverwriteDefault
from .argparse_ import ExtendOverwriteDefault, add_validation_arguments
from .errors import AugurError
from .io.file import open_file
from .io.metadata import DEFAULT_DELIMITERS, DEFAULT_ID_COLUMNS, InvalidDelimiter, read_metadata
Expand Down Expand Up @@ -926,20 +926,6 @@ def node_data_prop_is_normal_trait(name):

return True

validation_mode_help_message = """
Control if optional validation checks are performed and what
happens if they fail.
'error' and 'warn' modes perform validation and emit messages about
failed validation checks. 'error' mode causes a non-zero exit
status if any validation checks failed, while 'warn' does not.
'skip' mode performs no validation.
Note that some validation checks are non-optional and as such are
not affected by this setting.
"""


def register_parser(parent_subparsers):
parser = parent_subparsers.add_parser("v2", help=__doc__)
Expand Down Expand Up @@ -1007,19 +993,7 @@ def register_parser(parent_subparsers):
optional_settings = parser.add_argument_group(
title="OTHER OPTIONAL SETTINGS"
)
optional_settings.add_argument(
'--validation-mode',
dest="validation_mode",
type=ValidationMode,
choices=[mode for mode in ValidationMode],
default=ValidationMode.ERROR,
help=validation_mode_help_message)
optional_settings.add_argument(
'--skip-validation',
dest="validation_mode",
action="store_const",
const=ValidationMode.SKIP,
help="Skip validation of input/output files, equivalent to --validation-mode=skip. Use at your own risk!")
add_validation_arguments(optional_settings)

return parser

Expand Down
5 changes: 2 additions & 3 deletions augur/translate.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,9 +22,8 @@
from treetime.vcf_utils import read_vcf
from augur.errors import AugurError
from textwrap import dedent
from .types import ValidationMode
from .argparse_ import add_validation_arguments
from .util_support.node_data_file import NodeDataObject
from .export_v2 import validation_mode_help_message

class MissingNodeError(Exception):
pass
Expand Down Expand Up @@ -373,7 +372,7 @@ def register_parser(parent_subparsers):
parser.add_argument('--alignment-output', type=str, help="write out translated gene alignments. "
"If a VCF-input, a .vcf or .vcf.gz will be output here (depending on file ending). If fasta-input, specify the file name "
"like so: 'my_alignment_%%GENE.fasta', where '%%GENE' will be replaced by the name of the gene")
parser.add_argument('--validation-mode', type=ValidationMode, choices=[mode for mode in ValidationMode], default=ValidationMode.ERROR, help=validation_mode_help_message)
add_validation_arguments(parser)

vcf_only = parser.add_argument_group(
title="VCF specific",
Expand Down
23 changes: 23 additions & 0 deletions tests/functional/clades/cram/augur-version-mismatch.t
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
Integration tests for augur clades.

$ source "$TESTDIR"/_setup.sh

Node-data JSONs produced from a different major version of augur
are not allowed.

$ ${AUGUR} clades \
> --tree "$TESTDIR/../data/tree.nwk" \
> --mutations "$TESTDIR/../data/aa_muts_generated_by.json" \
> --clades "$TESTDIR/../data/clades.tsv" \
> --output-node-data clades.json
ERROR: Augur version incompatibility detected: the JSON .*aa_muts_generated_by\.json.* was generated by \{'program': 'augur', 'version': '21.1.0'\}, which is incompatible with the current augur version \([.0-9]+\). We suggest you rerun the pipeline using the current version of augur. (re)
[2]

Skipping validation allows mismatched augur versions to be used without error.

$ ${AUGUR} clades \
> --tree "$TESTDIR/../data/tree.nwk" \
> --mutations "$TESTDIR/../data/aa_muts_generated_by.json" \
> --clades "$TESTDIR/../data/clades.tsv" \
> --output-node-data clades.json \
> --skip-validation &>/dev/null
Loading

0 comments on commit c0bccdf

Please sign in to comment.