Skip to content

Commit

Permalink
Merge pull request #2 from boasvdp/check_zipped_ref
Browse files Browse the repository at this point in the history
fix: check if ref is flat text fasta
  • Loading branch information
boasvdp authored Apr 26, 2024
2 parents 1edc815 + e65e587 commit d61d2b6
Show file tree
Hide file tree
Showing 3 changed files with 23,015 additions and 0 deletions.
31 changes: 31 additions & 0 deletions assembly_snptyper/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,32 @@ def check_external_dependencies():
raise FileNotFoundError("samtools not found in PATH")


def check_if_ref_is_ascii(reference):
"""
Check if reference genome is ASCII encoded
Parameters
----------
reference : Path
Path to reference genome
Returns
-------
None
Raises
------
UnicodeDecodeError
If reference genome is not ASCII encoded
"""
try:
with open(reference, "r") as f:
f.read()
except UnicodeDecodeError:
raise ValueError(
"Reference genome cannot be read as flat text: only unzipped FASTA reference genomes are supported."
)

def convert_vcf_to_bed(vcf, bed_path):
"""
Convert reference VCF to BED file
Expand Down Expand Up @@ -357,6 +383,11 @@ def main():
check_external_dependencies()
logging.info("External dependencies found")

# check if reference genome is ASCII encoded
logging.info("Checking if reference genome is ASCII encoded")
check_if_ref_is_ascii(args.reference)
logging.info("Reference genome is ASCII encoded")

# read list of input assemblies
logging.info(f"Reading list of input assemblies from {args.list_input}")
with open(args.list_input) as f:
Expand Down
Loading

0 comments on commit d61d2b6

Please sign in to comment.