Skip to content

Commit

Permalink
Merge pull request #129 from broadinstitute/dp-one-barcode
Browse files Browse the repository at this point in the history
fixes for single-barcode demultiplexing
  • Loading branch information
dpark01 committed Apr 29, 2015
2 parents 1ba2dc4 + 00e34df commit aeeecbf
Show file tree
Hide file tree
Showing 3 changed files with 24 additions and 14 deletions.
16 changes: 11 additions & 5 deletions broad_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -119,20 +119,23 @@ def main_get_all_names(args) :

def make_barcodes_file(inFile, outFile):
'Create input file for extract_barcodes'
header = ['barcode_name', 'library_name', 'barcode_sequence_1', 'barcode_sequence_2']
if any(row.get('barcode_2') for row in util.file.read_tabfile_dict(inFile)):
header = ['barcode_name', 'library_name', 'barcode_sequence_1', 'barcode_sequence_2']
else:
header = ['barcode_name', 'library_name', 'barcode_sequence_1']
with open(outFile, 'wt') as outf:
outf.write('\t'.join(header)+'\n')
for row in util.file.read_tabfile_dict(inFile):
out = {'barcode_sequence_1':row['barcode_1'],
'barcode_sequence_2':row['barcode_2'],
'barcode_sequence_2':row.get('barcode_2',''),
'barcode_name':row['sample'],
'library_name':row['sample']}
if row.get('library_id_per_sample'):
out['library_name'] += '.l' + row['library_id_per_sample']
outf.write('\t'.join(out[h] for h in header)+'\n')
def parser_make_barcodes_file(parser=argparse.ArgumentParser()):
parser.add_argument('inFile',
help='''Input tab file w/header and 3-5 named columns (last two are optional):
help='''Input tab file w/header and 2-5 named columns (last three are optional):
sample, barcode_1, barcode_2, library_id_per_sample, run_id_per_library''')
parser.add_argument('outFile', help='Output BARCODE_FILE file for Picard.')
util.cmd.attach_main(parser, make_barcodes_file, split_args=True)
Expand Down Expand Up @@ -182,14 +185,17 @@ def main_extract_barcodes(args):

def make_params_file(inFile, bamDir, outFile):
'Create input file for illumina_basecalls'
header = ['BARCODE_1', 'BARCODE_2', 'OUTPUT', 'SAMPLE_ALIAS', 'LIBRARY_NAME']
if any(row.get('barcode_2') for row in util.file.read_tabfile_dict(inFile)):
header = ['OUTPUT', 'SAMPLE_ALIAS', 'LIBRARY_NAME', 'BARCODE_1', 'BARCODE_2']
else:
header = ['OUTPUT', 'SAMPLE_ALIAS', 'LIBRARY_NAME', 'BARCODE_1']
with open(outFile, 'wt') as outf:
outf.write('\t'.join(header)+'\n')
rows = list(util.file.read_tabfile_dict(inFile))
rows.append({'barcode_1':'N','barcode_2':'N','sample':'Unmatched'})
for row in rows:
out = {'BARCODE_1':row['barcode_1'],
'BARCODE_2':row['barcode_2'],
'BARCODE_2':row.get('barcode_2',''),
'SAMPLE_ALIAS':row['sample'],
'LIBRARY_NAME':row['sample']}
if row.get('library_id_per_sample'):
Expand Down
16 changes: 10 additions & 6 deletions pipes/rules/demux.rules
Original file line number Diff line number Diff line change
Expand Up @@ -126,7 +126,11 @@ rule illumina_basecalls:
lane = get_one_lane_from_run(wildcards.flowcell, wildcards.lane, config['seqruns_demux'])
dir = lane['bustard_dir']
run_date = lane.get('seq_run_date')
shell("{config[binDir]}/broad_utils.py illumina_basecalls {dir} {input[1]} {wildcards.flowcell} {wildcards.lane} {input[0]} --include_non_pf_reads=false --run_start_date={run_date} --sequencing_center={params.center}")
opts = ''
for opt in ('read_structure',):
if lane.get(opt):
opts += ' --%s=%s' % (opt, lane[opt])
shell("{config[binDir]}/broad_utils.py illumina_basecalls {dir} {input[1]} {wildcards.flowcell} {wildcards.lane} {input[0]} --include_non_pf_reads=false --run_start_date={run_date} --sequencing_center={params.center} {opts}")

def demux_move_bams_inputs(wildcards):
lane = get_one_lane_from_run(wildcards.flowcell, wildcards.lane, config.get('seqruns_demux',''))
Expand All @@ -143,15 +147,15 @@ rule move_bams_demux:
shutil.move(input[0], output[0])

rule bams_from_fastq:
input: os.path.join(config['dataDir'],config['subdirs']['source'],'{sample}_R1_{idx}.fastq'),
os.path.join(config['dataDir'],config['subdirs']['source'],'{sample}_R2_{idx}.fastq')
output: os.path.join(config['dataDir'],config['subdirs']['source'],'{sample}_{idx}.bam')
input: os.path.join(config['dataDir'],config['subdirs']['source'],'{sample}_L001_R1_001.fastq.gz'),
os.path.join(config['dataDir'],config['subdirs']['source'],'{sample}_L001_R2_001.fastq.gz')
output: os.path.join(config['dataDir'],config['subdirs']['source'],'{sample}.bam')
params: LSF=config.get('LSF_queues', {}).get('short', '-W 4:00'),
logid="{sample}_{idx}",
logid="{sample}",
center=config["seq_center"]
run:
makedirs(os.path.join(config['dataDir'], config['subdirs']['source']))
shell("{config[binDir]}/read_utils.py fastq_to_bam {input} {output} --sampleName {wildcards.sample} --picardOptions PLATFORM=illumina SEQUENCING_CENTER={params.center} LIBRARY_NAME={wildcards.sample}_{wildcards.idx} SORT_ORDER=queryname")
shell("{config[binDir]}/read_utils.py fastq_to_bam {input} {output} --sampleName {wildcards.sample} --picardOptions PLATFORM=illumina SEQUENCING_CENTER={params.center} LIBRARY_NAME={wildcards.sample} SORT_ORDER=queryname")

ruleorder: move_bams_demux > bams_from_fastq

6 changes: 3 additions & 3 deletions tools/picard.py
Original file line number Diff line number Diff line change
Expand Up @@ -157,8 +157,8 @@ class ExtractIlluminaBarcodesTool(PicardTools) :
subtoolName = 'ExtractIlluminaBarcodes'
jvmMemDefault = '8g'
defaults = {'read_structure':'101T8B8B101T',
'max_mismatches':1, 'minimum_base_quality':15,
'num_processors':4}
'max_mismatches':0, 'minimum_base_quality':25,
'num_processors':8}
option_list = ('read_structure', 'max_mismatches', 'minimum_base_quality',
'min_mismatch_delta', 'max_no_calls', 'minimum_quality',
'compress_outputs', 'num_processors')
Expand Down Expand Up @@ -190,7 +190,7 @@ class IlluminaBasecallsToSamTool(PicardTools) :
defaults = {'read_structure':'101T8B8B101T', 'sequencing_center':'BI',
'adapters_to_check': ('PAIRED_END', 'NEXTERA_V1', 'NEXTERA_V2'),
'max_reads_in_ram_per_tile':100000, 'max_records_in_ram':100000,
'num_processors':4, 'force_gc':False}
'num_processors':8, 'force_gc':False}
option_list = ('read_structure', 'sequencing_center', 'adapters_to_check',
'platform', 'max_reads_in_ram_per_tile', 'max_records_in_ram', 'num_processors',
'apply_eamss_filter', 'force_gc', 'first_tile', 'tile_limit',
Expand Down

0 comments on commit aeeecbf

Please sign in to comment.