diff --git a/CHANGES.md b/CHANGES.md
index 591febe98..2ba0a960f 100644
--- a/CHANGES.md
+++ b/CHANGES.md
@@ -9,6 +9,7 @@
 * `augur parse`: A new optional `--output-id-field` argument allows the user to select any ID field for the produced FASTA file (e.g. 'accession' instead of 'name' or 'strain'). [#1403][] (@j23414)
   * When no `--output-id-field` is given and the data has both `name` and `strain` fields, continue to preferentially use `name` over `strain` as the sequence ID field; but, throw a deprecation warning that the order will be switched to prefer `strain` over `name` in the future to be consistent with the rest of Augur.
   * Added entry to [DEPRECATED.md](./DEPRECATED.md).
+* Compression should now be supported for all input and output files. Please [open an issue](https://github.com/nextstrain/augur/issues) if you find one that doesn't! [#1381][] (@victorlin)
 
 ### Bug Fixes
 
@@ -19,8 +20,10 @@
 * filter: Updated the help text of `--include` and `--include-where` to explicitly state that this can add strains that are missing an entry from `--sequences`. [#1389][] (@victorlin)
 * filter: Fixed the summary messages to properly reflect force-inclusion of strains that are missing an entry from `--sequences`. [#1389][] (@victorlin)
 * filter: Updated wording of summary messages. [#1389][] (@victorlin)
+* Enforce UTF-8 encoding when reading and writing files. Improve error messages when a non-UTF-8 file is used. [#1381][] (@victorlin)
 
 [#1294]: https://github.com/nextstrain/augur/pull/1294
+[#1381]: https://github.com/nextstrain/augur/pull/1381
 [#1389]: https://github.com/nextstrain/augur/pull/1389
 [#1410]: https://github.com/nextstrain/augur/pull/1410
 [#1403]: https://github.com/nextstrain/augur/pull/1403
diff --git a/augur/align.py b/augur/align.py
index c9b021d34..7c2a7af2d 100644
--- a/augur/align.py
+++ b/augur/align.py
@@ -6,6 +6,7 @@
 from shutil import copyfile
 import numpy as np
 from Bio import AlignIO, SeqIO, Seq, Align
+from .io.file import open_file
 from .io.shell_command_runner import run_shell_command
 from .io.vcf import shquote
 from .utils import nthreads_value
@@ -369,7 +370,7 @@ def analyse_insertions(aln, ungapped, insertion_csv):
         for insertion_seq, strains in i_data.items():
             for strain in strains:
                 strain_data[strain][idx] = insertion_seq
-    with open(insertion_csv, 'w', encoding='utf-8') as fh:
+    with open_file(insertion_csv, 'w') as fh:
         print(",".join(header), file=fh)
         for strain in strain_data:
             print("{},{}".format(strain, ",".join(strain_data[strain])), file=fh)
diff --git a/augur/ancestral.py b/augur/ancestral.py
index 67edd9a00..2771948eb 100644
--- a/augur/ancestral.py
+++ b/augur/ancestral.py
@@ -29,6 +29,7 @@
 from Bio.Seq import Seq
 from Bio.SeqRecord import SeqRecord
 from .utils import parse_genes_argument, read_tree, InvalidTreeError, write_json, get_json_name
+from .io.file import open_file
 from .io.vcf import is_vcf as is_filename_vcf
 from treetime.vcf_utils import read_vcf, write_vcf
 from collections import defaultdict
@@ -465,7 +466,7 @@ def run(args):
 
             # Save ancestral amino acid sequences to FASTA.
             if args.output_translations:
-                with open(args.output_translations.replace("%GENE", gene), "w", encoding="utf-8") as oh:
+                with open_file(args.output_translations.replace("%GENE", gene), "w") as oh:
                     for node in aa_result["tt"].tree.find_clades():
                         oh.write(f">{node.name}\n{aa_result['tt'].sequence(node, as_string=True, reconstructed=True)}\n")
 
diff --git a/augur/clades.py b/augur/clades.py
index 39053aa31..bb39ec511 100644
--- a/augur/clades.py
+++ b/augur/clades.py
@@ -19,6 +19,7 @@
 import networkx as nx
 from itertools import islice
 from .errors import AugurError
+from .io.file import PANDAS_READ_CSV_OPTIONS
 from argparse import SUPPRESS
 from .utils import get_parent_name_by_child_name_for_tree, read_node_data, write_json, get_json_name
 
@@ -64,6 +65,7 @@ def read_in_clade_definitions(clade_file):
         sep='\t' if clade_file.endswith('.tsv') else ',',
         comment='#',
         na_filter=False,
+        **PANDAS_READ_CSV_OPTIONS,
     )
 
     clade_inheritance_rows = df[df['gene'] == 'clade']
diff --git a/augur/distance.py b/augur/distance.py
index e44584e1e..bfcaa1bf2 100644
--- a/augur/distance.py
+++ b/augur/distance.py
@@ -186,6 +186,7 @@
 import sys
 
 from .frequency_estimators import timestamp_to_float
+from .io.file import open_file
 from .reconstruct_sequences import load_alignments
 from .utils import annotate_parents_for_tree, first_line, read_node_data, write_json
 
@@ -213,7 +214,7 @@ def read_distance_map(map_file):
     [('default', 0.0), ('map', {'SigPep': {0: {('W', 'P'): -8.3}}})]
     """
     # Load the JSON.
-    with open(map_file, "r", encoding='utf-8') as fh:
+    with open_file(map_file, "r") as fh:
         json_distance_map = json.load(fh)
 
     # Confirm that all required fields are present.
diff --git a/augur/export_v2.py b/augur/export_v2.py
index 6b8ae0e70..02aab1a37 100644
--- a/augur/export_v2.py
+++ b/augur/export_v2.py
@@ -12,6 +12,7 @@
 from Bio import Phylo
 
 from .errors import AugurError
+from .io.file import open_file
 from .io.metadata import DEFAULT_DELIMITERS, DEFAULT_ID_COLUMNS, InvalidDelimiter, read_metadata
 from .types import ValidationMode
 from .utils import read_node_data, write_json, json_size, read_config, read_lat_longs, read_colors
@@ -1011,7 +1012,7 @@ def set_description(data_json, cmd_line_description_file):
     `meta.description` in *data_json* to the text provided.
     """
     try:
-        with open(cmd_line_description_file, encoding='utf-8') as description_file:
+        with open_file(cmd_line_description_file) as description_file:
             markdown_text = description_file.read()
             data_json['meta']['description'] = markdown_text
     except FileNotFoundError:
diff --git a/augur/filter/_run.py b/augur/filter/_run.py
index b81758dd3..0e9f43bf4 100644
--- a/augur/filter/_run.py
+++ b/augur/filter/_run.py
@@ -14,7 +14,7 @@
     ID_COLUMN as SEQUENCE_INDEX_ID_COLUMN,
     DELIMITER as SEQUENCE_INDEX_DELIMITER,
 )
-from augur.io.file import open_file
+from augur.io.file import PANDAS_READ_CSV_OPTIONS, open_file
 from augur.io.metadata import InvalidDelimiter, Metadata, read_metadata
 from augur.io.sequences import read_sequences, write_sequences
 from augur.io.print import print_err
@@ -70,6 +70,7 @@ def run(args):
             sep=SEQUENCE_INDEX_DELIMITER,
             index_col=SEQUENCE_INDEX_ID_COLUMN,
             dtype={SEQUENCE_INDEX_ID_COLUMN: "string"},
+            **PANDAS_READ_CSV_OPTIONS,
         )
 
         # Remove temporary index file, if it exists.
@@ -134,13 +135,14 @@ def run(args):
             priorities = defaultdict(random_generator.random)
 
     # Setup logging.
+    output_log_context_manager = open_file(args.output_log, "w", newline='')
     output_log_writer = None
     if args.output_log:
         # Log the names of strains that were filtered or force-included, so we
         # can properly account for each strain (e.g., including those that were
         # initially filtered for one reason and then included again for another
         # reason).
-        output_log = open(args.output_log, "w", newline='')
+        output_log = output_log_context_manager.__enter__()
         output_log_header = ("strain", "filter", "kwargs")
         output_log_writer = csv.DictWriter(
             output_log,
diff --git a/augur/filter/io.py b/augur/filter/io.py
index 6ebf253ef..670b5b245 100644
--- a/augur/filter/io.py
+++ b/augur/filter/io.py
@@ -9,6 +9,7 @@
 from xopen import xopen
 
 from augur.errors import AugurError
+from augur.io.file import open_file
 from augur.io.metadata import Metadata, METADATA_DATE_COLUMN
 from augur.io.print import print_err
 from .constants import GROUP_BY_GENERATED_COLUMNS
@@ -76,7 +77,7 @@ def constant_factory(value):
         return lambda: value
 
     try:
-        with open(fname, encoding='utf-8') as pfile:
+        with open_file(fname) as pfile:
             return defaultdict(constant_factory(-np.inf), {
                 elems[0]: float(elems[1])
                 for elems in (line.strip().split('\t') if '\t' in line else line.strip().split() for line in pfile.readlines())
diff --git a/augur/frequencies.py b/augur/frequencies.py
index 3afb47860..1a4461b90 100644
--- a/augur/frequencies.py
+++ b/augur/frequencies.py
@@ -10,6 +10,7 @@
 from .frequency_estimators import get_pivots, alignment_frequencies, tree_frequencies
 from .frequency_estimators import AlignmentKdeFrequencies, TreeKdeFrequencies, TreeKdeFrequenciesError
 from .dates import numeric_date_type, SUPPORTED_DATE_HELP_TEXT, get_numerical_dates
+from .io.file import open_file
 from .io.metadata import DEFAULT_DELIMITERS, DEFAULT_ID_COLUMNS, METADATA_DATE_COLUMN, InvalidDelimiter, Metadata, read_metadata
 from .utils import write_json
 
@@ -110,7 +111,7 @@ def run(args):
     if args.method == "kde":
         # Load weights if they have been provided.
         if args.weights:
-            with open(args.weights, "r", encoding='utf-8') as fh:
+            with open_file(args.weights, "r") as fh:
                 weights = json.load(fh)
 
             weights_attribute = args.weights_attribute
diff --git a/augur/import_/beast.py b/augur/import_/beast.py
index acef81269..a30734bc8 100644
--- a/augur/import_/beast.py
+++ b/augur/import_/beast.py
@@ -11,6 +11,7 @@
 import numpy as np
 from Bio import Phylo
 from treetime import TreeAnc
+from augur.io.file import open_file
 from augur.utils import write_json
 
 def register_parser(parent_subparsers):
@@ -234,7 +235,7 @@ def parse_nexus(tree_path, treestring_regex=r'tree [A-Za-z\_]+([0-9]+)', verbose
 
     if isinstance(tree_path,str): ## determine if path or handle was provided to function
         try:
-            handle=open(tree_path,'r', encoding='utf-8')
+            handle=open_file(tree_path,'r')
         except FileNotFoundError:
             print("FATAL: No such file {}".format(tree_path))
             sys.exit(2)
diff --git a/augur/io/file.py b/augur/io/file.py
index 1104f89cf..5b8a7bc13 100644
--- a/augur/io/file.py
+++ b/augur/io/file.py
@@ -1,7 +1,16 @@
 import os
 from contextlib import contextmanager
 from io import IOBase
+from textwrap import dedent
 from xopen import PipedCompressionReader, PipedCompressionWriter, xopen
+from augur.errors import AugurError
+
+
+ENCODING = "utf-8"
+
+PANDAS_READ_CSV_OPTIONS = {
+    'encoding': ENCODING,
+}
 
 
 @contextmanager
@@ -24,9 +33,23 @@ def open_file(path_or_buffer, mode="r", **kwargs):
         File handle object
 
     """
+
+    # Read all files using a specific encoding.
+    kwargs['encoding'] = ENCODING
+
     if isinstance(path_or_buffer, (str, os.PathLike)):
-        with xopen(path_or_buffer, mode, **kwargs) as handle:
-            yield handle
+        try:
+            with xopen(path_or_buffer, mode, **kwargs) as handle:
+                yield handle
+        except UnicodeDecodeError as e:
+            # TODO: Consider moving this to the top-level error handler to
+            # handle errors from other I/O functions such as pandas.read_csv.
+            # This is not trivial since the filepath is useful to include in the
+            # message, but is not available through UnicodeDecodeError alone.
+            raise AugurError(dedent(f"""\
+                File {path_or_buffer!r} contains {e.object[e.start:e.end]!r} which is not valid in the expected {e.encoding!r} encoding.
+                Try re-saving the file using the {e.encoding!r} encoding."""))
+
 
     elif isinstance(path_or_buffer, (IOBase, PipedCompressionReader, PipedCompressionWriter)):
         yield path_or_buffer
diff --git a/augur/io/metadata.py b/augur/io/metadata.py
index f8be2f5ad..32747eceb 100644
--- a/augur/io/metadata.py
+++ b/augur/io/metadata.py
@@ -10,7 +10,7 @@
 from augur.errors import AugurError
 from augur.io.print import print_err
 from augur.types import DataErrorMethod
-from .file import open_file
+from .file import PANDAS_READ_CSV_OPTIONS, open_file
 
 
 DEFAULT_DELIMITERS = (',', '\t')
@@ -95,6 +95,7 @@ def read_metadata(metadata_file, delimiters=DEFAULT_DELIMITERS, columns=None, id
         metadata_file,
         iterator=True,
         **kwargs,
+        **PANDAS_READ_CSV_OPTIONS,
     )
     chunk = metadata.read(nrows=1)
     metadata.close()
@@ -153,7 +154,8 @@ def read_metadata(metadata_file, delimiters=DEFAULT_DELIMITERS, columns=None, id
 
     return pd.read_csv(
         metadata_file,
-        **kwargs
+        **kwargs,
+        **PANDAS_READ_CSV_OPTIONS,
     )
 
 
diff --git a/augur/io/vcf.py b/augur/io/vcf.py
index cb472b065..c808c3e40 100644
--- a/augur/io/vcf.py
+++ b/augur/io/vcf.py
@@ -1,6 +1,7 @@
 import os
 import shlex
 
+from .file import open_file
 from .shell_command_runner import run_shell_command
 
 
@@ -67,7 +68,7 @@ def write_VCF_translation(prot_dict, vcf_file_name, ref_file_name):
 
     #prepare the header of the VCF & write out
     header=["#CHROM","POS","ID","REF","ALT","QUAL","FILTER","INFO","FORMAT"]+seqNames
-    with open(vcf_file_name, 'w', encoding='utf-8') as the_file:
+    with open_file(vcf_file_name, 'w') as the_file:
         the_file.write( "##fileformat=VCFv4.2\n"+
                         "##source=NextStrain_Protein_Translation\n"+
                         "##FORMAT=<ID=GT,Number=1,Type=String,Description=\"Genotype\">\n")
@@ -122,10 +123,10 @@ def write_VCF_translation(prot_dict, vcf_file_name, ref_file_name):
             vcfWrite.append("\t".join(output))
 
     #write it all out
-    with open(ref_file_name, 'w', encoding='utf-8') as the_file:
+    with open_file(ref_file_name, 'w') as the_file:
         the_file.write("\n".join(refWrite))
 
-    with open(vcf_file_name, 'a', encoding='utf-8') as the_file:
+    with open_file(vcf_file_name, 'a') as the_file:
         the_file.write("\n".join(vcfWrite))
 
     if vcf_file_name.lower().endswith('.gz'):
diff --git a/augur/lbi.py b/augur/lbi.py
index bf7387797..9624f7f6e 100644
--- a/augur/lbi.py
+++ b/augur/lbi.py
@@ -5,6 +5,7 @@
 from collections import defaultdict
 import json
 import numpy as np
+from .io.file import open_file
 from .utils import write_json
 
 
@@ -96,7 +97,7 @@ def run(args):
     tree = Bio.Phylo.read(args.tree, "newick")
 
     # Load branch lengths.
-    with open(args.branch_lengths, "r", encoding='utf-8') as json_fh:
+    with open_file(args.branch_lengths, "r") as json_fh:
         branch_lengths = json.load(json_fh)
 
     # Annotate branch lengths and dates onto tree nodes.
diff --git a/augur/measurements/export.py b/augur/measurements/export.py
index 077bd63ba..ec495590a 100644
--- a/augur/measurements/export.py
+++ b/augur/measurements/export.py
@@ -6,6 +6,7 @@
 import sys
 
 from augur.argparse_ import HideAsFalseAction
+from augur.io.file import PANDAS_READ_CSV_OPTIONS
 from augur.utils import first_line, write_json
 from augur.validate import (
     measurements as read_measurements_json,
@@ -106,7 +107,7 @@ def run(args):
 
     # Load input collection TSV file
     try:
-        collection_df = pd.read_csv(args.collection, sep="\t", usecols=columns_to_include)
+        collection_df = pd.read_csv(args.collection, sep="\t", usecols=columns_to_include, **PANDAS_READ_CSV_OPTIONS)
     except FileNotFoundError:
         print(
             f"ERROR: collection TSV file {args.collection!r} does not exist",
diff --git a/augur/reconstruct_sequences.py b/augur/reconstruct_sequences.py
index 8a6ba258a..478801855 100644
--- a/augur/reconstruct_sequences.py
+++ b/augur/reconstruct_sequences.py
@@ -3,6 +3,7 @@
 """
 
 from Bio import SeqIO, Seq, SeqRecord, Phylo
+from .io.file import open_file
 from .utils import read_node_data
 
 
@@ -71,7 +72,7 @@ def run(args):
     #if VCF, read in the reference seq for each gene, put on root
     if(is_vcf):
         node_data["nodes"][root_node]['aa_sequences'] = {}
-        with open(args.vcf_aa_reference, encoding='utf-8') as handle:
+        with open_file(args.vcf_aa_reference) as handle:
             for record in SeqIO.parse(handle, "fasta"):
                 if record.id==args.gene:
                     #'root' may not be same as 'reference', so apply any mutations at root here!
diff --git a/augur/sequence_traits.py b/augur/sequence_traits.py
index 601d1dd61..1d09ac6b2 100644
--- a/augur/sequence_traits.py
+++ b/augur/sequence_traits.py
@@ -3,10 +3,10 @@
 """
 
 import sys
-import gzip
 import numpy as np
 from treetime.vcf_utils import read_vcf
 from collections import defaultdict
+from .io.file import PANDAS_READ_CSV_OPTIONS, open_file
 from .utils import write_json, get_json_name
 
 def read_in_translate_vcf(vcf_file, ref_file):
@@ -47,10 +47,7 @@ def mutation_struct():
     altLoc = 0
     sampLoc = 9
 
-    #Use different openers depending on whether compressed
-    opn = gzip.open if vcf_file.endswith(('.gz', '.GZ')) else open
-
-    with opn(vcf_file, mode='rt') as f:
+    with open_file(vcf_file, mode='rt') as f:
         samps = []
 
         for line in f:
@@ -169,7 +166,7 @@ def read_in_features(drm_file):
 
     mutPositions = defaultdict(list)
 
-    df = pd.read_csv(drm_file, sep='\t' if drm_file.endswith('.tsv') else ',')
+    df = pd.read_csv(drm_file, sep='\t' if drm_file.endswith('.tsv') else ',', **PANDAS_READ_CSV_OPTIONS)
     for mi, m in df.iterrows():
         pos = m.SITE-1 #put in python numbering
         gene = m.GENE if hasattr(m, 'GENE') else 'nuc'
diff --git a/augur/traits.py b/augur/traits.py
index 893bf052d..0b520c600 100644
--- a/augur/traits.py
+++ b/augur/traits.py
@@ -6,6 +6,7 @@
 from collections import defaultdict
 import sys
 from .errors import AugurError
+from .io.file import open_file
 from .io.metadata import DEFAULT_DELIMITERS, DEFAULT_ID_COLUMNS, InvalidDelimiter, read_metadata
 from .utils import write_json, get_json_name
 TINY = 1e-12
@@ -157,7 +158,7 @@ def run(args):
     if args.weights:
         weight_dict = {c:{} for c in args.columns}
         sep = ',' if args.weights.endswith('csv') else '\t'
-        with open(args.weights, 'r', encoding='utf-8') as fh:
+        with open_file(args.weights, 'r') as fh:
             for line in fh:
                 if line[0]=='#':
                     continue
@@ -205,7 +206,7 @@ def run(args):
             models[column]['transition_matrix'] = [list(x) for x in gtr.W]
 
         if gtr:
-            with open(out_prefix+'%s.mugration_model.txt'%column, 'w', encoding='utf-8') as ofile:
+            with open_file(out_prefix+'%s.mugration_model.txt'%column, 'w') as ofile:
                 ofile.write('Map from character to field name\n')
                 for k,v in alphabet.items():
                     ofile.write(k+':\t'+str(v)+'\n')
diff --git a/augur/tree.py b/augur/tree.py
index 84e2c2143..98c31f510 100644
--- a/augur/tree.py
+++ b/augur/tree.py
@@ -16,6 +16,7 @@
 from pathlib import Path
 
 from .errors import AugurError
+from .io.file import open_file
 from .io.sequences import read_sequences
 from .io.shell_command_runner import run_shell_command
 from .io.vcf import shquote
@@ -249,7 +250,7 @@ def random_string(n):
     tmp_aln_file = str(Path(aln_file).with_name(Path(aln_file).stem + "-delim.fasta"))
     log_file = str(Path(tmp_aln_file).with_suffix(".iqtree.log"))
     num_seqs = 0
-    with open(tmp_aln_file, 'w', encoding='utf-8') as ofile, open(aln_file, encoding='utf-8') as ifile:
+    with open_file(tmp_aln_file, 'w') as ofile, open_file(aln_file) as ifile:
         for line in ifile:
             tmp_line = line
             if line.startswith(">"):
@@ -358,7 +359,7 @@ def write_out_informative_fasta(compress_seq, alignment, stripFile=None):
 
     #If want a position map, print:
     if printPositionMap:
-        with open(fasta_file+".positions.txt", 'w', encoding='utf-8') as the_file:
+        with open_file(fasta_file+".positions.txt", 'w') as the_file:
             the_file.write("\n".join(pos))
 
     return fasta_file
@@ -396,7 +397,7 @@ def mask_sites_in_multiple_sequence_alignment(alignment_file, excluded_sites_fil
     # Write the masked alignment to disk one record at a time.
     alignment_file_path = Path(alignment_file)
     masked_alignment_file = str(alignment_file_path.parent / ("masked_%s" % alignment_file_path.name))
-    with open(masked_alignment_file, "w", encoding='utf-8') as oh:
+    with open_file(masked_alignment_file, "w") as oh:
         for record in alignment:
             # Convert to a mutable sequence to enable masking with Ns.
             sequence = MutableSeq(str(record.seq))
diff --git a/augur/util_support/color_parser.py b/augur/util_support/color_parser.py
index 28ac3a1d8..1f8c9360f 100644
--- a/augur/util_support/color_parser.py
+++ b/augur/util_support/color_parser.py
@@ -2,6 +2,7 @@
 import functools
 
 from augur.data import as_file
+from augur.io.file import open_file
 from augur.util_support.color_parser_line import ColorParserLine
 
 
@@ -17,11 +18,11 @@ def mapping(self):
 
         if self.use_defaults:
             with as_file("colors.tsv") as file:
-                with open(file, encoding="utf-8") as defaults:
+                with open_file(file) as defaults:
                     colors = {**colors, **self.parse_file(defaults)}
 
         if self.mapping_filename:
-            with open(self.mapping_filename, encoding="utf-8") as mapping:
+            with open_file(self.mapping_filename) as mapping:
                 colors = {**colors, **self.parse_file(mapping)}
 
         return colors
diff --git a/augur/util_support/node_data_file.py b/augur/util_support/node_data_file.py
index 4add65ef4..2d27c421b 100644
--- a/augur/util_support/node_data_file.py
+++ b/augur/util_support/node_data_file.py
@@ -3,6 +3,7 @@
 from augur.__version__ import __version__
 from augur.__version__ import is_augur_version_compatible
 from augur.errors import AugurError
+from augur.io.file import open_file
 from augur.io.print import print_err
 from augur.types import ValidationMode
 from augur.validate import validate_json, ValidateError, load_json_schema
@@ -16,7 +17,7 @@ def __init__(self, fname, validation_mode=ValidationMode.ERROR):
         self.fname = fname
         self.validation_mode = validation_mode
 
-        with open(fname, encoding="utf-8") as jfile:
+        with open_file(fname) as jfile:
             self.attrs = json.load(jfile)
 
         self.validate()
diff --git a/augur/utils.py b/augur/utils.py
index e54b565a8..bd1a9ccd7 100644
--- a/augur/utils.py
+++ b/augur/utils.py
@@ -10,7 +10,7 @@
 from .__version__ import __version__
 
 from augur.data import as_file
-from augur.io.file import open_file
+from augur.io.file import PANDAS_READ_CSV_OPTIONS, open_file
 from augur.io.print import print_err
 
 from augur.types import ValidationMode
@@ -292,7 +292,7 @@ def _read_gff(reference, feature_names):
     valid_types = ['gene', 'source', 'region']
     features = {}
 
-    with open(reference, encoding='utf-8') as in_handle:
+    with open_file(reference) as in_handle:
         # Note that `GFF.parse` doesn't always yield GFF records in the order
         # one may expect, but since we raise AugurError if there are multiple
         # this doesn't matter.
@@ -443,7 +443,7 @@ def read_config(fname):
         return defaultdict(dict)
 
     try:
-        with open(fname, 'rb') as ifile:
+        with open_file(fname, 'rb') as ifile:
             config = json.load(ifile)
     except json.decoder.JSONDecodeError as err:
         print("FATAL ERROR:")
@@ -474,12 +474,12 @@ def add_line_to_coordinates(line):
             print("WARNING: geo-coordinate file contains invalid line. Please make sure not to mix tabs and spaces as delimiters (use only tabs):",line)
     if use_defaults:
         with as_file("lat_longs.tsv") as file:
-            with open(file, encoding="utf-8") as defaults:
+            with open_file(file) as defaults:
                 for line in defaults:
                     add_line_to_coordinates(line)
     if overrides:
         if os.path.isfile(overrides):
-            with open(overrides, encoding='utf-8') as ifile:
+            with open_file(overrides) as ifile:
                 for line in ifile:
                     add_line_to_coordinates(line)
         else:
@@ -699,11 +699,11 @@ def read_bed_file(bed_file):
     mask_sites = []
     try:
         bed = pd.read_csv(bed_file, sep='\t', header=None, usecols=[1,2],
-                          dtype={1:int,2:int})
+                          dtype={1:int,2:int}, **PANDAS_READ_CSV_OPTIONS)
     except ValueError:
         # Check if we have a header row. Otherwise, just fail.
         bed = pd.read_csv(bed_file, sep='\t', header=None, usecols=[1,2],
-                          dtype={1:int,2:int}, skiprows=1)
+                          dtype={1:int,2:int}, skiprows=1, **PANDAS_READ_CSV_OPTIONS)
         print("Skipped row 1 of %s, assuming it is a header." % bed_file)
     for _, row in bed.iterrows():
         mask_sites.extend(range(row[1], row[2]))
@@ -728,7 +728,7 @@ def read_mask_file(mask_file):
         Sorted list of unique zero-indexed sites
     """
     mask_sites = []
-    with open(mask_file, encoding='utf-8') as mf:
+    with open_file(mask_file) as mf:
         for idx, line in enumerate(l.strip() for l in mf.readlines()):
             if "\t" in line:
                 line = line.split("\t")[1]
diff --git a/augur/validate.py b/augur/validate.py
index aefdf5589..364a5d785 100644
--- a/augur/validate.py
+++ b/augur/validate.py
@@ -12,6 +12,7 @@
 from textwrap import indent
 from typing import Iterable, Union
 from augur.data import as_file
+from augur.io.file import open_file
 from augur.io.print import print_err
 from augur.io.json import shorten_as_json
 from .validate_export import verifyMainJSONIsInternallyConsistent, verifyMetaAndOrTreeJSONsAreInternallyConsistent
@@ -30,7 +31,7 @@ def load_json_schema(path, refs=None):
     (located in augur/data)
     '''
     try:
-        with as_file(path) as file, open(file, "r", encoding = "utf-8") as fh:
+        with as_file(path) as file, open_file(file, "r") as fh:
             schema = json.load(fh)
     except json.JSONDecodeError as err:
         raise ValidateError("Schema {} is not a valid JSON file. Error: {}".format(path, err))
@@ -45,7 +46,7 @@ def load_json_schema(path, refs=None):
         # Make the validator aware of additional schemas
         schema_store = dict()
         for k, v in refs.items():
-            with as_file(v) as file, open(file, "r", encoding = "utf-8") as fh:
+            with as_file(v) as file, open_file(file, "r") as fh:
                 schema_store[k] = json.load(fh)
         resolver = jsonschema.RefResolver.from_schema(schema,store=schema_store)
         schema_validator = Validator(schema, resolver=resolver)
@@ -67,7 +68,7 @@ def resolve_remote(url):
 
 
 def load_json(path):
-    with open(path, 'rb') as fh:
+    with open_file(path, 'rb') as fh:
         try:
             jsonToValidate = json.load(fh)
         except json.JSONDecodeError:
diff --git a/tests/functional/filter/cram/filter-file-encoding-error.t b/tests/functional/filter/cram/filter-file-encoding-error.t
new file mode 100644
index 000000000..926cfb083
--- /dev/null
+++ b/tests/functional/filter/cram/filter-file-encoding-error.t
@@ -0,0 +1,33 @@
+Setup
+
+  $ source "$TESTDIR"/_setup.sh
+
+Create a metadata file that contains a non-ASCII character.
+
+  $ cat >metadata.tsv <<~~
+  > strain	col1
+  > SEQ_1	ã
+  > SEQ_2	b
+  > SEQ_3	c
+  > ~~
+
+Encode it as WINDOWS-1252.
+
+  $ iconv -f UTF-8 -t WINDOWS-1252 metadata.tsv > metadata-windows-1252.tsv
+
+The UTF-8 encoded file can be used without issues.
+
+  $ ${AUGUR} filter \
+  >  --metadata metadata.tsv \
+  >  --output-strains filtered_strains.txt
+  0 strains were dropped during filtering
+  3 strains passed all filters
+
+An error is shown when using the WINDOWS-1252 encoded file.
+
+  $ ${AUGUR} filter \
+  >  --metadata metadata-windows-1252.tsv \
+  >  --output-strains filtered_strains.txt
+  ERROR: File 'metadata-windows-1252.tsv' contains b'\xe3' which is not valid in the expected 'utf-8' encoding.
+  Try re-saving the file using the 'utf-8' encoding.
+  [2]