Skip to content

Commit

Permalink
Merge pull request #25 from johandahlberg/create_index_reads_and_nova…
Browse files Browse the repository at this point in the history
…seq_support

Create index reads and novaseq support
  • Loading branch information
b97pla authored Feb 13, 2018
2 parents 4429503 + 44912b8 commit 9364c74
Show file tree
Hide file tree
Showing 4 changed files with 49 additions and 14 deletions.
6 changes: 6 additions & 0 deletions bcl2fastq/handlers/bcl2fastq_handlers.py
Original file line number Diff line number Diff line change
Expand Up @@ -107,6 +107,7 @@ def create_config_from_request(self, runfolder, request_body):
barcode_mismatches = ""
tiles = ""
use_base_mask = ""
create_indexes = False
additional_args = ""

runfolder_base_path = self.config["runfolder_path"]
Expand All @@ -133,6 +134,10 @@ def create_config_from_request(self, runfolder, request_body):
if "use_base_mask" in request_data:
use_base_mask = request_data["use_base_mask"]

if "create_indexes" in request_data:
if request_data["create_indexes"] == "True":
create_indexes = True

if "additional_args" in request_data:
additional_args = request_data["additional_args"]

Expand All @@ -145,6 +150,7 @@ def create_config_from_request(self, runfolder, request_body):
barcode_mismatches,
tiles,
use_base_mask,
create_indexes,
additional_args)

return config
Expand Down
53 changes: 40 additions & 13 deletions bcl2fastq/lib/bcl2fastq_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,8 @@
import shutil
import time

from illuminate.metadata import InteropMetadata

import xmltodict


from arteria.exceptions import ArteriaUsageException
Expand All @@ -30,6 +31,7 @@ def __init__(self,
barcode_mismatches=None,
tiles=None,
use_base_mask=None,
create_indexes=False,
additional_args=None,
nbr_of_cores=None):
"""
Expand All @@ -45,6 +47,7 @@ def __init__(self,
:param barcode_mismatches: how many mismatches to allow in tag.
:param tiles: tiles to include when running bcl2fastq
:param use_base_mask: base mask to use
:param create_indexes: Create fastq files for indexes
:param additional_args: this can be used to pass any other arguments to bcl2fastq
:param nbr_of_cores: number of cores to run bcl2fastq with
"""
Expand Down Expand Up @@ -86,6 +89,7 @@ def __init__(self,
# commandline passed. E.g. "--use-bases-mask 1:y*,6i,6i, y* --use-bases-mask y*,6i,6i, y* "
self.use_base_mask = use_base_mask
self.additional_args = additional_args
self.create_indexes = create_indexes

# Nbr of cores to use will default to the number of cpus on the system.
if nbr_of_cores:
Expand All @@ -106,6 +110,12 @@ def write_samplesheet(samplesheet_string, new_samplesheet_file):
with open(new_samplesheet_file, "w") as f:
f.write(samplesheet_string)

@staticmethod
def runinfo_as_dict(runfolder):
runinfo_path = os.path.join(runfolder, "RunInfo.xml")
with open(runinfo_path) as f:
return xmltodict.parse(f.read())

@staticmethod
def get_bcl2fastq_version_from_run_parameters(runfolder, config):
"""
Expand All @@ -117,13 +127,20 @@ def get_bcl2fastq_version_from_run_parameters(runfolder, config):
:return the version of bcl2fastq to use.
"""

meta_data = InteropMetadata(runfolder)
model = meta_data.model
run_info = Bcl2FastqConfig.runinfo_as_dict(runfolder)
instrument_name = run_info["RunInfo"]["Run"]["Instrument"]

current_config = config
version = current_config["machine_type"][model]["bcl2fastq_version"]
machine_type_mappings = {"M": "MiSeq",
"D": "HiSeq 2500",
"SN": "HiSeq 2000",
"ST": "HiSeq X",
"A": "NovaSeq",
"NS": "NextSeq 500",
"K": "HiSeq 4000"}

return version
for key, value in machine_type_mappings.items():
if instrument_name.startswith(key):
return config["machine_type"][value]["bcl2fastq_version"]

@staticmethod
def get_length_of_indexes(runfolder):
Expand All @@ -133,16 +150,23 @@ def get_length_of_indexes(runfolder):
:return: a dict with the read number as key and the length of each index as value e.g.:
{2: 7, 3: 8}
"""
meta_data = InteropMetadata(runfolder)
index_read_info = filter(lambda x: x["is_index"], meta_data.read_config)
indexes_and_lengths = map(lambda x: (x["read_num"], x["cycles"]), index_read_info)
return dict(indexes_and_lengths)

run_info = Bcl2FastqConfig.runinfo_as_dict(runfolder)
reads = run_info["RunInfo"]["Run"]["Reads"]["Read"]

index_lengths = {}
for read in reads:
if read['@IsIndexedRead'] == 'Y':
index_lengths[int(read['@Number'])] = int(read['@NumCycles'])
return index_lengths

@staticmethod
def is_single_read(runfolder):
meta_data = InteropMetadata(runfolder)
number_of_reads = filter(lambda x: not x["is_index"], meta_data.read_config)
return len(number_of_reads) < 2
run_info = Bcl2FastqConfig.runinfo_as_dict(runfolder)
reads = run_info["RunInfo"]["Run"]["Reads"]["Read"]

nbr_of_reads = len(list(filter(lambda x: not x["@IsIndexedRead"] == 'Y', reads)))
return nbr_of_reads < 2

@staticmethod
def get_bases_mask_per_lane_from_samplesheet(samplesheet, index_lengths, is_single_read):
Expand Down Expand Up @@ -398,6 +422,9 @@ def construct_command(self):
if self.config.tiles:
commandline_collection.append("--tiles " + self.config.tiles)

if self.config.create_indexes:
commandline_collection.append("--create-fastq-for-index-reads")

if self.config.use_base_mask:
# Note that for the base mask the "--use-bases-mask" must be included in the
# commandline passed.
Expand Down
2 changes: 2 additions & 0 deletions config/app.config
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,8 @@ machine_type:
bcl2fastq_version: 2.17.1
MiSeq:
bcl2fastq_version: 2.17.1
NovaSeq:
bcl2fastq_version: 2.17.1


runfolder_path: /vagrant/tiny-test-data/
Expand Down
2 changes: 1 addition & 1 deletion requirements/prod
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,6 @@ jsonpickle==0.9.2
tornado==4.2.1
git+https://github.com/johandahlberg/localq.git@with_shell_true # Get from pip in future - localq
git+https://github.com/arteria-project/[email protected]#egg=arteria-core
illuminate==0.6.2
xmltodict
pandas==0.14.1

0 comments on commit 9364c74

Please sign in to comment.