Skip to content

Commit

Permalink
Merge pull request #22 from johandahlberg/remove_unaligned
Browse files Browse the repository at this point in the history
Remove unaligned
  • Loading branch information
Johan Hermansson authored Jan 18, 2017
2 parents 723ac28 + c5c3a17 commit 853d339
Show file tree
Hide file tree
Showing 7 changed files with 132 additions and 33 deletions.
2 changes: 1 addition & 1 deletion bcl2fastq/__init__.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
__version__ = "1.1.1"
__version__ = "1.1.2"
2 changes: 2 additions & 0 deletions bcl2fastq/handlers/bcl2fastq_handlers.py
Original file line number Diff line number Diff line change
Expand Up @@ -174,6 +174,8 @@ def post(self, runfolder):
create_bcl2fastq_runner(runfolder_config)
bcl2fastq_version = job_runner.version()
cmd = job_runner.construct_command()
# If the output directory exists, we always want to clear it.
job_runner.delete_output()
job_runner.symlink_output_to_unaligned()

log_file = self.bcl2fastq_log_file_provider.log_file_path(runfolder)
Expand Down
26 changes: 26 additions & 0 deletions bcl2fastq/lib/bcl2fastq_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,8 @@ def __init__(self,
:param nbr_of_cores: number of cores to run bcl2fastq with
"""

self.general_config = general_config

self.runfolder_input = runfolder_input
self.base_calls_input = runfolder_input + "/Data/Intensities/BaseCalls"

Expand Down Expand Up @@ -297,6 +299,30 @@ def construct_command(self):
"""
raise NotImplementedError("Subclasses should implement this!")

def validate_output(self):

def _parent_dir(d):
return os.path.abspath(os.path.join(d, os.path.pardir))

abs_path_of_allowed_dirs = map(os.path.abspath, self.config.general_config['allowed_output_folders'])
is_located_in_parent_dir = _parent_dir(self.config.output) in abs_path_of_allowed_dirs

if not is_located_in_parent_dir:
error_string = "Invalid output directory {} was specified." \
" Allowed dirs were: {}".format(self.config.output,
self.config.general_config['allowed_output_folders'])
log.error(error_string)
raise ArteriaUsageException(error_string)

def delete_output(self):
"""
Delete the output directory if it exists and the output path is valid
:return: None
"""
self.validate_output()
log.info("Found a directory at output path {}, will remove it.".format(self.config.output))
shutil.rmtree(self.config.output)

def symlink_output_to_unaligned(self):
"""
Create a symlink from `runfolder/Unaligned` to what has been defined as the output directory.
Expand Down
6 changes: 6 additions & 0 deletions config/app.config
Original file line number Diff line number Diff line change
Expand Up @@ -34,3 +34,9 @@ runfolder_path: /vagrant/tiny-test-data/
default_output_path: /vagrant/runfolder_output/

bcl2fastq_logs_path: bcl2fastq_logs

# Only folders and child folder of the directories listed here will be valid as output
# directories.
allowed_output_folders:
- /vagrant/runfolder_output/

67 changes: 60 additions & 7 deletions tests/test_bcl2fastq_handlers.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,8 @@

from tornado.escape import json_encode
import mock
from test_utils import TestUtils, DummyConfig

from test_utils import TestUtils, DummyConfig, DummyRunnerConfig
import shutil

from bcl2fastq.handlers.bcl2fastq_handlers import *
from bcl2fastq.lib.bcl2fastq_utils import BCL2Fastq2xRunner, BCL2FastqRunner
Expand Down Expand Up @@ -32,9 +32,11 @@ def start_api_call_nbr(self):
8: "y*,7i,n*,y*",
}

dummy_config = DummyConfig()
DUMMY_RUNNER_CONF = DummyRunnerConfig(output='/foo/bar/runfolder', general_config = dummy_config)

def get_app(self):
dummy_config = DummyConfig()
return Application(routes(config=dummy_config))
return Application(routes(config=self.dummy_config))

def test_versions(self):
response = self.fetch(self.API_BASE + "/versions")
Expand All @@ -49,8 +51,10 @@ def test_start(self):
# Use mock to ensure that this will run without
# creating the runfolder.
with mock.patch.object(os.path, 'isdir', return_value=True), \
mock.patch.object(shutil, 'rmtree', return_value=None), \
mock.patch.object(Bcl2FastqConfig, 'get_bcl2fastq_version_from_run_parameters', return_value="2.15.2"), \
mock.patch.object(BCL2FastqRunnerFactory, "create_bcl2fastq_runner", return_value=FakeRunner("2.15.2")), \
mock.patch.object(BCL2FastqRunnerFactory, "create_bcl2fastq_runner",
return_value=FakeRunner("2.15.2", self.DUMMY_RUNNER_CONF)), \
mock.patch.object(BCL2FastqRunner, 'symlink_output_to_unaligned', return_value=None):

body = {"runfolder_input": "/path/to/runfolder"}
Expand All @@ -70,8 +74,10 @@ def test_start_with_empty_body(self):
# Use mock to ensure that this will run without
# creating the runfolder.
with mock.patch.object(os.path, 'isdir', return_value=True), \
mock.patch.object(shutil, 'rmtree', return_value=None), \
mock.patch.object(Bcl2FastqConfig, 'get_bcl2fastq_version_from_run_parameters', return_value="2.15.2"), \
mock.patch.object(BCL2FastqRunnerFactory, "create_bcl2fastq_runner", return_value=FakeRunner("2.15.2")), \
mock.patch.object(BCL2FastqRunnerFactory, "create_bcl2fastq_runner",
return_value=FakeRunner("2.15.2", self.DUMMY_RUNNER_CONF)), \
mock.patch.object(BCL2FastqRunner, 'symlink_output_to_unaligned', return_value=None):

response = self.fetch(self.API_BASE + "/start/150415_D00457_0091_AC6281ANXX", method="POST", body="")
Expand All @@ -85,14 +91,61 @@ def test_start_with_empty_body(self):
self.assertEqual(json.loads(response.body)["bcl2fastq_version"], "2.15.2")
self.assertEqual(json.loads(response.body)["state"], "started")

def test_start_with_disallowed_output_specified(self):

# TODO Please note that this test is not very good, since the
# what we ideally want to test is the output specifed by the request,
# and that that gets rejected, however since the create_bcl2fastq_runner command
# is mocked away here and that is what returns the invalid output the tests is
# a bit misleading. In the future we probably want to refactor this.
# / JD 20170117
runner_conf_with_invalid_output = DummyRunnerConfig(output='/not/foo/bar/runfolder',
general_config=self.dummy_config)
with mock.patch.object(os.path, 'isdir', return_value=True), \
mock.patch.object(shutil, 'rmtree', return_value=None) as rmmock, \
mock.patch.object(Bcl2FastqConfig, 'get_bcl2fastq_version_from_run_parameters', return_value="2.15.2"), \
mock.patch.object(BCL2FastqRunnerFactory, "create_bcl2fastq_runner",
return_value=FakeRunner("2.15.2", runner_conf_with_invalid_output)), \
mock.patch.object(BCL2FastqRunner, 'symlink_output_to_unaligned', return_value=None):

# This output dir is not allowed by the config
body = {'output': '/not/foo/bar'}
response = self.fetch(self.API_BASE + "/start/150415_D00457_0091_AC6281ANXX",
method="POST",
body=json_encode(body))

self.assertEqual(response.code, 500)
self.assertEqual(response.reason, "Invalid output directory /not/foo/bar/runfolder was specified."
" Allowed dirs were: ['/foo/bar']")
rmmock.assert_not_called()

def test_start_with_allowed_output_specified(self):
with mock.patch.object(os.path, 'isdir', return_value=True), \
mock.patch.object(shutil, 'rmtree', return_value=None) as rmmock, \
mock.patch.object(Bcl2FastqConfig, 'get_bcl2fastq_version_from_run_parameters', return_value="2.15.2"), \
mock.patch.object(BCL2FastqRunnerFactory, "create_bcl2fastq_runner",
return_value=FakeRunner("2.15.2", self.DUMMY_RUNNER_CONF)), \
mock.patch.object(BCL2FastqRunner, 'symlink_output_to_unaligned', return_value=None):
body = {'output': '/foo/bar'}
response = self.fetch(self.API_BASE + "/start/150415_D00457_0091_AC6281ANXX",
method="POST",
body=json_encode(body))

# Just increment it here so it doesn't break the other tests
self.start_api_call_nbr()
self.assertEqual(response.code, 202)
rmmock.assert_called_with('/foo/bar/runfolder')

def test_start_providing_samplesheet(self):
# Use mock to ensure that this will run without
# creating the runfolder.
with mock.patch.object(os.path, 'isdir', return_value=True), \
mock.patch.object(shutil, 'rmtree', return_value=None), \
mock.patch.object(Bcl2FastqConfig, 'get_bcl2fastq_version_from_run_parameters', return_value="2.15.2"), \
mock.patch.object(BCL2FastqRunner, 'symlink_output_to_unaligned', return_value=None), \
mock.patch.object(Bcl2FastqConfig, "write_samplesheet") as ws , \
mock.patch.object(BCL2FastqRunnerFactory, "create_bcl2fastq_runner", return_value=FakeRunner("2.15.2")):
mock.patch.object(BCL2FastqRunnerFactory, "create_bcl2fastq_runner",
return_value=FakeRunner("2.15.2", self.DUMMY_RUNNER_CONF)):

body = {"runfolder_input": "/path/to/runfolder", "samplesheet": TestUtils.DUMMY_SAMPLESHEET_STRING}

Expand Down
51 changes: 30 additions & 21 deletions tests/test_utils.py
Original file line number Diff line number Diff line change
@@ -1,26 +1,8 @@

from bcl2fastq.lib.bcl2fastq_utils import BCL2FastqRunner
from bcl2fastq.lib.bcl2fastq_utils import BCL2FastqRunner, Bcl2FastqConfig

class TestUtils:

DUMMY_CONFIG = { "runfolder_path": "/data/biotank3/runfolders",
"default_output_path": "test",
"bcl2fastq":
{"versions":
{"2.15.2":
{"class_creation_function": "_get_bcl2fastq2x_runner",
"binary": "/path/to/bcl2fastq"},
"1.8.4":
{"class_creation_function": "_get_bcl2fastq1x_runner",
"binary": "/path/to/bcl2fastq"}}},
"machine_type":
{"MiSeq": {"bcl2fastq_version": "1.8.4"},
"HiSeq X": {"bcl2fastq_version": "2.15.2"},
"HiSeq 2500": {"bcl2fastq_version": "1.8.4"},
"HiSeq 4000": {"bcl2fastq_version": "1.8.4"},
"HiSeq 2000": {"bcl2fastq_version": "1.8.4"},
"NextSeq 500": {"bcl2fastq_version": "1.8.4"}},
"bcl2fastq_logs_path": "/tmp/"}

DUMMY_SAMPLESHEET_STRING = """[Header],,,,,,,,,,,
IEMFileVersion,4,,,,,,,,,,
Expand Down Expand Up @@ -54,12 +36,39 @@ class TestUtils:


class DummyConfig:
DUMMY_CONFIG = { "runfolder_path": "/data/biotank3/runfolders",
"default_output_path": "test",
"bcl2fastq":
{"versions":
{"2.15.2":
{"class_creation_function": "_get_bcl2fastq2x_runner",
"binary": "/path/to/bcl2fastq"},
"1.8.4":
{"class_creation_function": "_get_bcl2fastq1x_runner",
"binary": "/path/to/bcl2fastq"}}},
"machine_type":
{"MiSeq": {"bcl2fastq_version": "1.8.4"},
"HiSeq X": {"bcl2fastq_version": "2.15.2"},
"HiSeq 2500": {"bcl2fastq_version": "1.8.4"},
"HiSeq 4000": {"bcl2fastq_version": "1.8.4"},
"HiSeq 2000": {"bcl2fastq_version": "1.8.4"},
"NextSeq 500": {"bcl2fastq_version": "1.8.4"}},
"bcl2fastq_logs_path": "/tmp/",
"allowed_output_folders": ['/foo/bar']}

def __getitem__(self, key):
return TestUtils.DUMMY_CONFIG[key]
return self.DUMMY_CONFIG[key]

class DummyRunnerConfig(Bcl2FastqConfig):
def __init__(self, output, general_config):
self.output = output
self.general_config = general_config


class FakeRunner(BCL2FastqRunner):
def __init__(self, dummy_version):
def __init__(self, dummy_version, config):
self.dummy_version = dummy_version
self.config = config

def version(self):
return str(self.dummy_version)
Expand Down
11 changes: 7 additions & 4 deletions tests/tests_bcl2fastq_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,10 +15,11 @@ class TestBcl2FastqConfig(unittest.TestCase):
test_dir = os.path.dirname(os.path.realpath(__file__))
samplesheet_file = test_dir + "/sampledata/new_samplesheet_example.csv"
samplesheet_with_no_tag = test_dir + "/sampledata/no_tag_samplesheet_example.csv"
dummy_config = DummyConfig()

def test_get_bcl2fastq_version_from_run_parameters(self):
runfolder = TestBcl2FastqConfig.test_dir + "/sampledata/HiSeq-samples/2014-02_13_average_run"
version = Bcl2FastqConfig.get_bcl2fastq_version_from_run_parameters(runfolder, TestUtils.DUMMY_CONFIG)
version = Bcl2FastqConfig.get_bcl2fastq_version_from_run_parameters(runfolder, self.dummy_config)
self.assertEqual(version, "1.8.4")

def test_is_single_read_true(self):
Expand Down Expand Up @@ -117,14 +118,16 @@ def test_get_bases_mask_per_lane_from_samplesheet_no_tag(self):

class TestBCL2FastqRunnerFactory(unittest.TestCase):

dummy_config = DummyConfig()

def test_create_bcl2fastq1x_runner(self):
config = Bcl2FastqConfig(
general_config = DUMMY_CONFIG,
bcl2fastq_version = "1.8.4",
runfolder_input = "test/runfolder",
output = "test/output")

factory = BCL2FastqRunnerFactory(TestUtils.DUMMY_CONFIG)
factory = BCL2FastqRunnerFactory(self.dummy_config)
runner = factory.create_bcl2fastq_runner(config)
self.assertIsInstance(runner, BCL2Fastq1xRunner)

Expand All @@ -135,7 +138,7 @@ def test_create_bcl2fastq2x_runner(self):
runfolder_input = "test/runfolder",
output = "test/output")

factory = BCL2FastqRunnerFactory(TestUtils.DUMMY_CONFIG)
factory = BCL2FastqRunnerFactory(self.dummy_config)
runner = factory.create_bcl2fastq_runner(config)
self.assertIsInstance(runner, BCL2Fastq2xRunner, msg="runner is: " + str(runner))

Expand All @@ -146,7 +149,7 @@ def test_create_invalid_version_runner(self):
runfolder_input = "test/runfolder",
output = "test/output")

factory = BCL2FastqRunnerFactory(TestUtils.DUMMY_CONFIG)
factory = BCL2FastqRunnerFactory(self.dummy_config)
with self.assertRaises(LookupError):
factory.create_bcl2fastq_runner(config)

Expand Down

0 comments on commit 853d339

Please sign in to comment.