Skip to content

Commit

Permalink
Hide BiopythonDeprecationWarnings when reading certain sequence files
Browse files Browse the repository at this point in the history
Biopython 1.85 will show a deprecation warning when using format='fasta'
with files that start with anything but '>'.

The warning as-is should not be exposed to Augur users. It is not
triggered when reading files with format='fasta-pearson', so this is the
easiest thing to do to maintain behavior of Biopython <1.85.
  • Loading branch information
victorlin committed Jan 21, 2025
1 parent cc365cb commit 945011a
Show file tree
Hide file tree
Showing 3 changed files with 11 additions and 6 deletions.
4 changes: 2 additions & 2 deletions augur/align.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
from Bio import AlignIO, SeqIO, Seq, Align
from .argparse_ import ExtendOverwriteDefault
from .io.file import open_file
from .io.sequences import read_sequence, read_sequences as io_read_sequences
from .io.sequences import read_sequence, read_sequences as io_read_sequences, BIOPYTHON_FASTA_FORMAT
from .io.shell_command_runner import run_shell_command
from .io.vcf import shquote
from .utils import nthreads_value
Expand Down Expand Up @@ -241,7 +241,7 @@ def read_reference(ref_fname):
raise AlignmentError("ERROR: Cannot read reference sequence."
"\n\tmake sure the file \"%s\" exists"%ref_fname)
try:
ref_seq = read_sequence(ref_fname, format='genbank' if ref_fname.split('.')[-1] in ['gb', 'genbank'] else 'fasta')
ref_seq = read_sequence(ref_fname, format='genbank' if ref_fname.split('.')[-1] in ['gb', 'genbank'] else BIOPYTHON_FASTA_FORMAT)
except:
raise AlignmentError("ERROR: Cannot read reference sequence."
"\n\tmake sure the file %s contains one sequence in genbank or fasta format"%ref_fname)
Expand Down
4 changes: 2 additions & 2 deletions augur/ancestral.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@
from .utils import parse_genes_argument, read_tree, InvalidTreeError, write_json, get_json_name, \
genome_features_to_auspice_annotation
from .io.file import open_file
from .io.sequences import read_sequence
from .io.sequences import read_sequence, BIOPYTHON_FASTA_FORMAT
from .io.vcf import is_vcf as is_filename_vcf
from treetime.vcf_utils import read_vcf, write_vcf
from collections import defaultdict
Expand Down Expand Up @@ -399,7 +399,7 @@ def run(args):
aln = args.alignment
ref = None
if args.root_sequence:
for fmt in ['fasta', 'genbank']:
for fmt in [BIOPYTHON_FASTA_FORMAT, 'genbank']:
try:
ref = str(read_sequence(args.root_sequence, format=fmt).seq).upper()
break
Expand Down
9 changes: 7 additions & 2 deletions augur/io/sequences.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,18 @@
import os

from augur.errors import AugurError
from importlib.metadata import version
from packaging.version import Version
from typing import Iterator, Iterable, Union
from .file import open_file


BIOPYTHON_FASTA_FORMAT = "fasta" if Version(version("biopython")) < Version("1.85") else "fasta-pearson"


def read_sequence(
path: str,
format: str = "fasta",
format: str = BIOPYTHON_FASTA_FORMAT,
) -> Bio.SeqIO.SeqRecord:
"""Read a single sequence from a path.
Expand All @@ -20,7 +25,7 @@ def read_sequence(

def read_sequences(
*paths: Iterable[Union[str, os.PathLike]],
format: str = "fasta",
format: str = BIOPYTHON_FASTA_FORMAT,
) -> Iterator[Bio.SeqIO.SeqRecord]:
"""Read sequences from one or more paths.
Expand Down

0 comments on commit 945011a

Please sign in to comment.