Skip to content

Commit

Permalink
add utils functions
Browse files Browse the repository at this point in the history
- adds utils.check_termination_file_exists and utils.check_is_sequencing_run_dir
- adds unit tests for both functions
  • Loading branch information
jethror1 committed Oct 1, 2024
1 parent 9b53ddc commit a994817
Show file tree
Hide file tree
Showing 2 changed files with 120 additions and 14 deletions.
44 changes: 30 additions & 14 deletions s3_upload/utils/utils.py
Original file line number Diff line number Diff line change
@@ -1,38 +1,54 @@
"""General utility functions"""

from os import path

def check_termination_files_exists(dir) -> bool:

def check_termination_file_exists(run_dir) -> bool:
"""
_summary_
Check if the run has completed sequencing from the presence of
CopyComplete.txt (for NovaSeqs), or RTAComplete(.txt/.xml) for other
types of Illumina sequencers.
Adapted from: https://github.com/eastgenomics/dx-streaming-upload/blob/476b28af980ad62c5f2750cc0b6920b82a287b11/files/incremental_upload.py#L393
Parameters
----------
dir : _type_
_description_
run_dir : str
path to run directory to check
Returns
-------
bool
_description_
True if run is complete else False
"""
pass


def check_is_sequencing_dir(dir) -> bool:
if path.exists(path.join(run_dir, "CopyComplete.txt")):
# NovaSeq run that is complete
return True
elif path.exists(path.join(run_dir, "RTAComplete.txt")) or path.exists(
path.join(run_dir, "RTAComplete.xml")
):
# other type of Illumina sequencer (e.g. MiSeq, NextSeq, HiSeq)
return True
else:
return False


def check_is_sequencing_run_dir(run_dir) -> bool:
"""
_summary_
Check if a given directory is a sequencing run from presence of
RunInfo.xml file
Parameters
----------
dir : _type_
_description_
run_dir : str
path to directory to check
Returns
-------
bool
_description_
True if directory is a sequencing run else False
"""
pass
return path.exists(path.join(run_dir, "RunInfo.xml"))


def get_sequencing_file_list(dir, exclude_patterns) -> list:
Expand Down
90 changes: 90 additions & 0 deletions tests/test_utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,90 @@
import os
from shutil import rmtree
import unittest

from tests import TEST_DATA_DIR

from s3_upload.utils import utils


class TestCheckTerminationFileExists(unittest.TestCase):
@classmethod
def setUpClass(cls):
cls.test_run_dir = os.path.join(TEST_DATA_DIR, "test_run")
os.makedirs(
cls.test_run_dir,
exist_ok=True,
)

@classmethod
def tearDownClass(cls):
rmtree(cls.test_run_dir)

def test_complete_novaseq_run_returns_true(self):
"""
Check complete NovaSeq runs correctly identified from
CopyComplete.txt file in the run directory
"""
termination_file = os.path.join(self.test_run_dir, "CopyComplete.txt")
open(termination_file, "w").close()

with self.subTest("Complete NovaSeq run identified"):
self.assertTrue(
utils.check_termination_file_exists(self.test_run_dir)
)

os.remove(termination_file)

def test_complete_non_novaseq_run_returns_true(self):
"""
Check other completed non-NovaSeq runs correctly identified from
RTAComplete.txt or RTAComplete.xml files
"""
for suffix in ["txt", "xml"]:
termination_file = os.path.join(
self.test_run_dir, f"RTAComplete.{suffix}"
)

open(termination_file, "w").close()

with self.subTest("Checking RTAComplete.txt"):
self.assertTrue(
utils.check_termination_file_exists(self.test_run_dir)
)

os.remove(termination_file)

def incomplete_sequencing_run_returns_false(self):
"""
Check incomoplete runs correctly identified
"""
self.assertFalse(
utils.check_termination_file_exists(self.test_run_dir)
)


class TestCheckIsSequencingRunDir(unittest.TestCase):
@classmethod
def setUpClass(cls):
cls.test_run_dir = os.path.join(TEST_DATA_DIR, "test_run")
os.makedirs(
cls.test_run_dir,
exist_ok=True,
)

@classmethod
def tearDownClass(cls):
rmtree(cls.test_run_dir)

def test_non_sequencing_run_dir_returns_false(self):
# no RunInfo.xml file present in test_data dir => not a run
utils.check_is_sequencing_run_dir(self.test_run_dir)

def test_check_sequencing_run_dir_returns_true(self):
run_info_xml = os.path.join(self.test_run_dir, "RunInfo.xml")
open(run_info_xml, "w").close()

with self.subTest("RunInfo.xml exists"):
utils.check_is_sequencing_run_dir(self.test_run_dir)

os.remove(run_info_xml)

0 comments on commit a994817

Please sign in to comment.