From 43c163b391c282290fbac498a57864985b5b8934 Mon Sep 17 00:00:00 2001 From: Christopher Tomkins-Tinch Date: Thu, 7 Jul 2016 22:16:10 -0400 Subject: [PATCH 1/5] added --commonBarcodes to illumina_demux call in Snakemake rule --- pipes/rules/demux.rules | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pipes/rules/demux.rules b/pipes/rules/demux.rules index 1184d4866..f0112e402 100644 --- a/pipes/rules/demux.rules +++ b/pipes/rules/demux.rules @@ -92,6 +92,7 @@ rule illumina_demux: input: illumina_demux_inputs output: config['tmp_dir']+'/'+config['subdirs']['demux']+'/bams_per_lane/{flowcell}.{lane}/Unmatched.bam', config['reports_dir']+'/barcodes/barcodes-metrics-{flowcell}.{lane}.txt' + config['reports_dir']+'/barcodes/common-barcodes-{flowcell}.{lane}.txt' resources: mem=60 params: LSF=config.get('LSF_queues', {}).get('bigmem', '-q flower'), UGER=config.get('UGER_queues', {}).get('long', '-q long'), @@ -106,7 +107,7 @@ rule illumina_demux: for opt in ('minimum_base_quality', 'max_mismatches', 'min_mismatch_delta', 'max_no_calls', 'read_structure', 'minimum_quality', 'run_start_date'): if lane.get(opt): opts += ' --%s=%s' % (opt, lane[opt]) - shell("{config[bin_dir]}/illumina.py illumina_demux {input[0]} {wildcards.lane} {outdir} --sampleSheet={input[1]} --sequencing_center={params.center} --outMetrics={output[1]} --flowcell={wildcards.flowcell} {opts}") + shell("{config[bin_dir]}/illumina.py illumina_demux {input[0]} {wildcards.lane} {outdir} --sampleSheet={input[1]} --sequencing_center={params.center} --outMetrics={output[1]} --commonBarcodes {output[2]} --flowcell={wildcards.flowcell} {opts}") def demux_move_bams_inputs(wildcards): From 506e4f0e0cd7e8ff394aa214e0d2f481d0785bbe Mon Sep 17 00:00:00 2001 From: Christopher Tomkins-Tinch Date: Thu, 7 Jul 2016 22:25:26 -0400 Subject: [PATCH 2/5] change common_barcodes to use ExtractIlluminaBarcodesTool jvmMemDefault --- illumina.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/illumina.py b/illumina.py index d7a41bdcc..7d5cab6a9 100755 --- a/illumina.py +++ b/illumina.py @@ -193,7 +193,7 @@ def parser_common_barcodes(parser=argparse.ArgumentParser()): parser.add_argument('--JVMmemory', help='JVM virtual memory size (default: %(default)s)', - default=tools.picard.IlluminaBasecallsToSamTool.jvmMemDefault) + default=tools.picard.ExtractIlluminaBarcodesTool.jvmMemDefault) util.cmd.common_args(parser, (('loglevel', None), ('version', None), ('tmp_dir', None))) util.cmd.attach_main(parser, main_common_barcodes) return parser From 4b1e9541d572ff054200567345fec75ff7fcab80 Mon Sep 17 00:00:00 2001 From: Christopher Tomkins-Tinch Date: Thu, 7 Jul 2016 22:46:33 -0400 Subject: [PATCH 3/5] PATH prepend fix for easy install env load --- easy-deploy-script/easy-deploy-viral-ngs.sh | 29 ++++++++++++++------- 1 file changed, 20 insertions(+), 9 deletions(-) diff --git a/easy-deploy-script/easy-deploy-viral-ngs.sh b/easy-deploy-script/easy-deploy-viral-ngs.sh index 99c84b840..c8853a87e 100644 --- a/easy-deploy-script/easy-deploy-viral-ngs.sh +++ b/easy-deploy-script/easy-deploy-viral-ngs.sh @@ -106,6 +106,23 @@ else set_locale "en_US.utf8" fi +function prepend_miniconda(){ + if [ -d "$MINICONDA_PATH/bin" ]; then + echo "Miniconda installed." + + echo "Prepending miniconda to PATH..." + export PATH="$MINICONDA_PATH/bin:$PATH" + hash -r + + # update to the latest conda this way, since the shell script + # is often months out of date + conda update -y conda + else + echo "Miniconda directory not found." + exit 1 + fi +} + function install_miniconda(){ if [ -d "$MINICONDA_PATH/bin" ]; then echo "Miniconda directory exists." @@ -135,15 +152,7 @@ function install_miniconda(){ fi if [ -d "$MINICONDA_PATH/bin" ]; then - echo "Miniconda installed." - - echo "Prepending miniconda to PATH..." - export PATH="$MINICONDA_PATH/bin:$PATH" - hash -r - - # update to the latest conda this way, since the shell script - # is often months out of date - conda update -y conda + prepend_miniconda else echo "It looks like the Miniconda installation failed" exit 1 @@ -177,6 +186,8 @@ function create_project(){ } function activate_env(){ + prepend_miniconda + if [ -d "$SCRIPTPATH/$CONTAINING_DIR" ]; then cd $SCRIPTPATH else From fffbbadaa7c1efb054922a3c220bf9d74e0e749e Mon Sep 17 00:00:00 2001 From: Christopher Tomkins-Tinch Date: Fri, 8 Jul 2016 00:03:53 -0400 Subject: [PATCH 4/5] better handle samplesheet/barcode file rows with spaces at ends of lines --- illumina.py | 2 +- pipes/rules/common.rules | 10 ++++++++-- util/file.py | 9 +++++++-- 3 files changed, 16 insertions(+), 5 deletions(-) diff --git a/illumina.py b/illumina.py index 7d5cab6a9..83aae7b41 100755 --- a/illumina.py +++ b/illumina.py @@ -462,7 +462,7 @@ def _detect_and_load_sheet(self, infile): row_num = 0 for line in inf: csv.register_dialect('samplesheet', quoting=csv.QUOTE_MINIMAL, escapechar='\\') - row = next(csv.reader([line.rstrip('\n')], dialect="samplesheet")) + row = next(csv.reader([line.strip().rstrip('\n')], dialect="samplesheet")) row = [item.strip() for item in row] # remove leading/trailing whitespace from each item if miseq_skip: if line.startswith('[Data]'): diff --git a/pipes/rules/common.rules b/pipes/rules/common.rules index 39f963252..9aa3f9dc3 100644 --- a/pipes/rules/common.rules +++ b/pipes/rules/common.rules @@ -15,9 +15,15 @@ def set_env_vars(): def read_tab_file(fname): with open(fname, 'rt') as inf: - header = [item.strip() for item in inf.readline().rstrip('\n').split('\t')] + header = [item.strip() for item in inf.readline().strip().rstrip('\n').split('\t')] for line in inf: - yield dict(zip(header, [item.strip() for item in line.rstrip('\n').split('\t')] )) + row = [item.strip() for item in line.rstrip('\n').split('\t')] + if len(row) > len(header): + # truncate the row to the header length, and only include extra items if they are not spaces + # (takes care of the case where the user may enter an extra space at the end of a row) + row = row[:len(header)] + [item for item in row[len(header):] if len(item)] + assert len(header) == len(row) + yield dict(zip(header, row)) def read_samples_file(fname, number_of_chromosomes=1, append_chrom_num=False): if fname==None: diff --git a/util/file.py b/util/file.py index b4d5600eb..ef01ee976 100644 --- a/util/file.py +++ b/util/file.py @@ -139,10 +139,15 @@ def read_tabfile_dict(inFile): row = [item.strip() for item in line.rstrip('\n').split('\t')] if line.startswith('#'): row[0] = row[0][1:] - header = row + header = [item for item in row if len(item)] elif header is None: - header = row + header = [item for item in row if len(item)] else: + # if a row is longer than the header + if len(row) > len(header): + # truncate the row to the header length, and only include extra items if they are not spaces + # (takes care of the case where the user may enter an extra space at the end of a row) + row = row[:len(header)] + [item for item in row[len(header):] if len(item)] assert len(header) == len(row) yield dict((k, v) for k, v in zip(header, row) if v) From 9c23a12fd18d686d9daa771f6a3d8596e1e5494c Mon Sep 17 00:00:00 2001 From: Christopher Tomkins-Tinch Date: Fri, 8 Jul 2016 01:03:14 -0400 Subject: [PATCH 5/5] missing comma --- pipes/rules/demux.rules | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pipes/rules/demux.rules b/pipes/rules/demux.rules index f0112e402..3e87941f3 100644 --- a/pipes/rules/demux.rules +++ b/pipes/rules/demux.rules @@ -91,7 +91,7 @@ def illumina_demux_inputs(wildcards): rule illumina_demux: input: illumina_demux_inputs output: config['tmp_dir']+'/'+config['subdirs']['demux']+'/bams_per_lane/{flowcell}.{lane}/Unmatched.bam', - config['reports_dir']+'/barcodes/barcodes-metrics-{flowcell}.{lane}.txt' + config['reports_dir']+'/barcodes/barcodes-metrics-{flowcell}.{lane}.txt', config['reports_dir']+'/barcodes/common-barcodes-{flowcell}.{lane}.txt' resources: mem=60 params: LSF=config.get('LSF_queues', {}).get('bigmem', '-q flower'), @@ -107,7 +107,7 @@ rule illumina_demux: for opt in ('minimum_base_quality', 'max_mismatches', 'min_mismatch_delta', 'max_no_calls', 'read_structure', 'minimum_quality', 'run_start_date'): if lane.get(opt): opts += ' --%s=%s' % (opt, lane[opt]) - shell("{config[bin_dir]}/illumina.py illumina_demux {input[0]} {wildcards.lane} {outdir} --sampleSheet={input[1]} --sequencing_center={params.center} --outMetrics={output[1]} --commonBarcodes {output[2]} --flowcell={wildcards.flowcell} {opts}") + shell("{config[bin_dir]}/illumina.py illumina_demux {input[0]} {wildcards.lane} {outdir} --sampleSheet={input[1]} --sequencing_center={params.center} --outMetrics={output[1]} --commonBarcodes={output[2]} --flowcell={wildcards.flowcell} {opts}") def demux_move_bams_inputs(wildcards):