diff --git a/augur/align.py b/augur/align.py index 252846f82..62fd285f1 100644 --- a/augur/align.py +++ b/augur/align.py @@ -8,7 +8,7 @@ from Bio import AlignIO, SeqIO, Seq, Align from .argparse_ import ExtendOverwriteDefault from .io.file import open_file -from .io.sequences import read_sequence, read_sequences as io_read_sequences +from .io.sequences import read_sequence, read_sequences as io_read_sequences, BIOPYTHON_FASTA_FORMAT from .io.shell_command_runner import run_shell_command from .io.vcf import shquote from .utils import nthreads_value @@ -241,7 +241,7 @@ def read_reference(ref_fname): raise AlignmentError("ERROR: Cannot read reference sequence." "\n\tmake sure the file \"%s\" exists"%ref_fname) try: - ref_seq = read_sequence(ref_fname, format='genbank' if ref_fname.split('.')[-1] in ['gb', 'genbank'] else 'fasta') + ref_seq = read_sequence(ref_fname, format='genbank' if ref_fname.split('.')[-1] in ['gb', 'genbank'] else BIOPYTHON_FASTA_FORMAT) except: raise AlignmentError("ERROR: Cannot read reference sequence." "\n\tmake sure the file %s contains one sequence in genbank or fasta format"%ref_fname) diff --git a/augur/ancestral.py b/augur/ancestral.py index 404c1ba94..51771551b 100644 --- a/augur/ancestral.py +++ b/augur/ancestral.py @@ -32,7 +32,7 @@ from .utils import parse_genes_argument, read_tree, InvalidTreeError, write_json, get_json_name, \ genome_features_to_auspice_annotation from .io.file import open_file -from .io.sequences import read_sequence +from .io.sequences import read_sequence, BIOPYTHON_FASTA_FORMAT from .io.vcf import is_vcf as is_filename_vcf from treetime.vcf_utils import read_vcf, write_vcf from collections import defaultdict @@ -399,7 +399,7 @@ def run(args): aln = args.alignment ref = None if args.root_sequence: - for fmt in ['fasta', 'genbank']: + for fmt in [BIOPYTHON_FASTA_FORMAT, 'genbank']: try: ref = str(read_sequence(args.root_sequence, format=fmt).seq).upper() break diff --git a/augur/io/sequences.py b/augur/io/sequences.py index 332444389..bfbe0e15b 100644 --- a/augur/io/sequences.py +++ b/augur/io/sequences.py @@ -2,13 +2,18 @@ import os from augur.errors import AugurError +from importlib.metadata import version +from packaging.version import Version from typing import Iterator, Iterable, Union from .file import open_file +BIOPYTHON_FASTA_FORMAT = "fasta" if Version(version("biopython")) < Version("1.85") else "fasta-pearson" + + def read_sequence( path: str, - format: str = "fasta", + format: str = BIOPYTHON_FASTA_FORMAT, ) -> Bio.SeqIO.SeqRecord: """Read a single sequence from a path. @@ -20,7 +25,7 @@ def read_sequence( def read_sequences( *paths: Iterable[Union[str, os.PathLike]], - format: str = "fasta", + format: str = BIOPYTHON_FASTA_FORMAT, ) -> Iterator[Bio.SeqIO.SeqRecord]: """Read sequences from one or more paths.