diff --git a/assembly.py b/assembly.py index 62ae45729..4ac7d638b 100755 --- a/assembly.py +++ b/assembly.py @@ -97,7 +97,7 @@ def parser_trim_rmdup_subsamp(parser=argparse.ArgumentParser()): __commands__.append(('trim_rmdup_subsamp', parser_trim_rmdup_subsamp)) -def assemble_trinity(inBam, outFasta, clipDb, n_reads=100000, outReads=None): +def assemble_trinity(inBam, outFasta, clipDb, n_reads=100000, outReads=None, JVMmemory=None): ''' This step runs the Trinity assembler. First trim reads with trimmomatic, rmdup with prinseq, and random subsample to no more than 100k reads. @@ -110,7 +110,7 @@ def assemble_trinity(inBam, outFasta, clipDb, n_reads=100000, outReads=None): trim_rmdup_subsamp_reads(inBam, clipDb, subsamp_bam, n_reads=n_reads) subsampfq = list(map(util.file.mkstempfname, ['.subsamp.1.fastq', '.subsamp.2.fastq'])) tools.picard.SamToFastqTool().execute(subsamp_bam, subsampfq[0], subsampfq[1]) - tools.trinity.TrinityTool().execute(subsampfq[0], subsampfq[1], outFasta) + tools.trinity.TrinityTool().execute(subsampfq[0], subsampfq[1], outFasta, JVMmemory=JVMmemory) os.unlink(subsampfq[0]) os.unlink(subsampfq[1]) @@ -128,6 +128,9 @@ def parser_assemble_trinity(parser=argparse.ArgumentParser()): help='Subsample reads to no more than this many pairs. (default %(default)s)') parser.add_argument('--outReads', default=None, help='Save the trimmomatic/prinseq/subsamp reads to a BAM file') + parser.add_argument('--JVMmemory', + default=tools.trinity.TrinityTool.jvmMemDefault, + help='JVM virtual memory size (default: %(default)s)') util.cmd.common_args(parser, (('loglevel',None), ('version',None), ('tmpDir',None))) util.cmd.attach_main(parser, assemble_trinity, split_args=True) return parser diff --git a/docs/install.rst b/docs/install.rst index 3639a51cd..50115454c 100644 --- a/docs/install.rst +++ b/docs/install.rst @@ -38,7 +38,7 @@ Additionally, in order to use the **pipeline infrastructure**, Python 3.4 is required (Python 2 is not supported) and you must install snakemake as well:: - pip install snakemake==3.2 yappi=0.94 + pip install snakemake==3.2.1 yappi=0.94 However, most of the real functionality is encapsulated in the command line tools, which can be used without any of the pipeline infrastructure. diff --git a/pipes/Snakefile b/pipes/Snakefile index 6c198aab5..167e9fab2 100644 --- a/pipes/Snakefile +++ b/pipes/Snakefile @@ -37,10 +37,7 @@ rule all: os.path.join(config["dataDir"], config["subdirs"]["interhost"], 'ref_guided.fasta'), os.path.join(config["dataDir"], config["subdirs"]["interhost"], 'ref_guided.vcf.gz'), # create summary reports - config["reportsDir"]+'/summary.coverage_ref.txt.gz', - config["reportsDir"]+'/summary.coverage_self.txt.gz', config["reportsDir"]+'/summary.fastqc.txt', - config["reportsDir"]+'/summary.bamstats.txt', config["reportsDir"]+'/summary.spike_count.txt' params: LSF="-N" run: diff --git a/pipes/rules/assembly.rules b/pipes/rules/assembly.rules index b56cf512d..7782d4dcc 100644 --- a/pipes/rules/assembly.rules +++ b/pipes/rules/assembly.rules @@ -19,10 +19,6 @@ rule all_assemble: # create BAMs of aligned reads to own consensus expand("{dataDir}/{subdir}/{sample}.bam", dataDir=config["dataDir"], subdir=config["subdirs"]["align_self"], - sample=read_samples_file(config["samples_assembly"])), - # assembly reports too - expand("{reportsDir}/assembly/{sample}.txt", - reportsDir=config["reportsDir"], sample=read_samples_file(config["samples_assembly"])) params: LSF="-N" @@ -30,10 +26,7 @@ rule all_assemble_failures: input: expand("{dataDir}/{subdir}/{sample}.fasta", dataDir=config["dataDir"], subdir=config["subdirs"]["assembly"], - sample=read_samples_file(config.get("samples_assembly_failures"))), - expand("{reportsDir}/assembly/{sample}.txt", - reportsDir=config["reportsDir"], - sample=read_samples_file(config["samples_assembly"])) + sample=read_samples_file(config.get("samples_assembly_failures"))) params: LSF="-N" @@ -45,7 +38,7 @@ rule assemble_trinity: ''' input: config["dataDir"]+'/'+config["subdirs"]["per_sample"]+'/{sample}.taxfilt.bam' output: config["tmpDir"] +'/'+config["subdirs"]["assembly"]+'/{sample}.assembly1-trinity.fasta' - resources: mem=3 + resources: mem=4 params: LSF=config.get('LSF_queues', {}).get('short', '-W 4:00'), n_reads="100000", logid="{sample}", @@ -80,8 +73,7 @@ rule orient_and_impute: multi-chromosome genomes. ''' input: config["tmpDir"]+'/'+config["subdirs"]["assembly"]+'/{sample}.assembly1-trinity.fasta' - output: config["tmpDir"]+'/'+config["subdirs"]["assembly"]+'/{sample}.assembly2-vfat.fasta', - config["tmpDir"]+'/'+config["subdirs"]["assembly"]+'/{sample}.assembly3-modify.fasta' + output: config["tmpDir"]+'/'+config["subdirs"]["assembly"]+'/{sample}.assembly3-modify.fasta' resources: mem=3 params: LSF=config.get('LSF_queues', {}).get('short', '-W 4:00'), refGenome=config["ref_genome"], @@ -90,10 +82,11 @@ rule orient_and_impute: renamed_prefix="", replace_length="55", logid="{sample}", + vfat_fasta=config["tmpDir"]+'/'+config["subdirs"]["assembly"]+'/{sample}.assembly2-vfat.fasta', subsamp_bam=config["tmpDir"]+'/'+config["subdirs"]["assembly"]+'/{sample}.subsamp.bam' run: - shell("{config[binDir]}/assembly.py order_and_orient {input} {params.refGenome} {output[0]} --inReads {params.subsamp_bam}") - shell("{config[binDir]}/assembly.py impute_from_reference {output[0]} {params.refGenome} {output[1]} --newName {params.renamed_prefix}{wildcards.sample} --replaceLength {params.replace_length} --minLength {params.length} --minUnambig {params.min_unambig}") + shell("{config[binDir]}/assembly.py order_and_orient {input} {params.refGenome} {params.vfat_fasta} --inReads {params.subsamp_bam}") + shell("{config[binDir]}/assembly.py impute_from_reference {params.vfat_fasta} {params.refGenome} {output} --newName {params.renamed_prefix}{wildcards.sample} --replaceLength {params.replace_length} --minLength {params.length} --minUnambig {params.min_unambig}") rule refine_assembly_1: ''' This a first pass refinement step where we take the VFAT assembly, @@ -148,8 +141,7 @@ rule map_reads_to_self: input: config["dataDir"]+'/'+config["subdirs"]["per_sample"]+'/{sample}.cleaned.bam', config["dataDir"]+'/'+config["subdirs"]["assembly"]+'/{sample}.fasta' output: config["dataDir"]+'/'+config["subdirs"]["align_self"]+'/{sample}.bam', - config["dataDir"]+'/'+config["subdirs"]["align_self"]+'/{sample}.mapped.bam', - config["reportsDir"]+'/assembly/{sample}.txt' + config["dataDir"]+'/'+config["subdirs"]["align_self"]+'/{sample}.mapped.bam' resources: mem=4 params: LSF=config.get('LSF_queues', {}).get('short', '-W 4:00'), logid="{sample}", @@ -158,4 +150,3 @@ rule map_reads_to_self: makedirs([os.path.join(config["dataDir"], config["subdirs"]["align_self"]), os.path.join(config["reportsDir"], "assembly")]) shell("{config[binDir]}/read_utils.py align_and_fix {input} --outBamAll {output[0]} --outBamFiltered {output[1]} --novoalign_options '{params.novoalign_options}'") - shell("{config[binDir]}/reports.py assembly_stats {wildcards.sample} {output[2]} --assembly_dir {config[dataDir]}/{config[subdirs][assembly]} --assembly_tmp {config[tmpDir]}/{config[subdirs][assembly]} --align_dir {config[dataDir]}/{config[subdirs][align_self]}") diff --git a/pipes/rules/reports.rules b/pipes/rules/reports.rules index a95455790..d4dd4dca3 100644 --- a/pipes/rules/reports.rules +++ b/pipes/rules/reports.rules @@ -10,78 +10,10 @@ import os, os.path, gzip, shutil rule all_reports: input: - config["reportsDir"]+'/summary.coverage_ref.txt.gz', - config["reportsDir"]+'/summary.coverage_self.txt.gz', config["reportsDir"]+'/summary.fastqc.txt', - config["reportsDir"]+'/summary.bamstats.txt', config["reportsDir"]+'/summary.spike_count.txt' params: LSF="-N" -#-----------BAMSTATS------------------- - -def bamstats_input_file(wildcards): - adj = wildcards.adjective - if adj in ('raw', 'cleaned'): - subdir = "depletion" - else: - if adj.startswith('self_'): - subdir = "align_self" - elif adj.startswith('ref_'): - subdir = "align_ref" - else: - raise Exception() - if adj.endswith('_mapped') or adj.endswith('_rmdup') or adj.endswith('_realigned'): - adj = adj.split('_')[-1] - elif adj.endswith('_align'): - adj = None - else: - raise Exception() - if adj==None: - bamfile = wildcards.sample + '.bam' - else: - bamfile = '.'.join([wildcards.sample, adj, 'bam']) - return os.path.join(config["dataDir"], config["subdirs"][subdir], bamfile) - -rule bamstats_report: - input: bamstats_input_file - output: config["reportsDir"]+'/bamstats/{sample}.{adjective}.txt' - params: LSF='-W 0:30 -sp 40', - logid="{sample}-{adjective}", - tmpf_report=config["reportsDir"]+'/bamstats/{sample}.{adjective}.tmp.txt' - run: - makedirs(config["reportsDir"]+'/bamstats') - shell("use BamTools; bamtools stats -insert -in {input} > {params.tmpf_report}") - out = [] - out.append(['Sample', wildcards.sample]) - out.append(['BAM', wildcards.adjective]) - with open(params.tmpf_report, 'rt') as inf: - for line in inf: - row = line.strip().split(':') - if len(row)==2 and row[1]: - k,v = [x.strip() for x in row] - k = k.strip("'") - mo = re.match(r'^(\d+)\s+\(\S+\)$', v) - if mo: - v = mo.group(1) - out.append([k,v]) - with open(output[0], 'wt') as outf: - for row in out: - outf.write('\t'.join(row)+'\n') - os.unlink(params.tmpf_report) - -rule consolidate_bamstats: - input: - expand("{{dir}}/bamstats/{sample}.{adjective}.txt", - adjective=['raw','cleaned', - 'ref_align', 'ref_mapped', 'ref_rmdup', 'ref_realigned'], - sample=read_samples_file(config["samples_per_run"])) + \ - expand("{{dir}}/bamstats/{sample}.{adjective}.txt", - adjective=['self_align', 'self_mapped', 'self_rmdup', 'self_realigned'], - sample=read_samples_file(config["samples_assembly"])) - output: '{dir}/summary.bamstats.txt' - params: logid="all" - shell: "{config[binDir]}/reports.py consolidate_bamstats {input} {output}" - #-----------FASTQC--------------------- @@ -106,46 +38,6 @@ rule consolidate_fastqc: params: logid="all" shell: "{config[binDir]}/reports.py consolidate_fastqc {input} {output}" -#-----------COVERAGE------------------- - -def coverage_input_files(wildcards): - if wildcards.adjective == 'ref': - ref = config["ref_genome"] - reads = os.path.join(config["dataDir"], config["subdirs"]["align_ref"], wildcards.sample + '.realigned.bam') - elif wildcards.adjective == 'self': - ref = os.path.join(config["dataDir"], config["subdirs"]["assembly"], wildcards.sample + '.fasta') - reads = os.path.join(config["dataDir"], config["subdirs"]["align_self"], wildcards.sample + '.realigned.bam') - else: - raise Exception() - return (ref, reads) - -rule coverage_report: - input: coverage_input_files - output: config["reportsDir"]+'/coverage/{sample}.coverage_{adjective}.txt' - resources: mem=3 - params: LSF=config.get('LSF_queues', {}).get('short', '-W 4:00') + " -sp 40", - logid="{sample}-{adjective}" - run: - makedirs(os.path.join(config["reportsDir"], 'coverage')) - shell("/idi/sabeti-scratch/kandersen/bin/bedtools/bedtools genomecov -d -ibam {input[1]} -g {input[0]} > {output}") - -def consolidate_coverage_inputs(wildcards): - if wildcards.adjective == 'ref': - samples = config['samples_per_run'] - elif wildcards.adjective == 'self': - samples = config['samples_assembly'] - else: - raise Exception() - return [wildcards.dir+'/coverage/'+sample+'.coverage_'+wildcards.adjective+'.txt' - for sample in read_samples_file(samples)] - -rule consolidate_coverage: - input: consolidate_coverage_inputs - output: '{dir}/summary.coverage_{adjective}.txt.gz' - params: LSF=config.get('LSF_queues', {}).get('short', '-W 4:00'), - logid="all" - shell: "{config[binDir]}/reports.py consolidate_coverage {input} {wildcards.adjective} {output}" - #-----------SPIKE-INS------------------ @@ -171,5 +63,5 @@ rule consolidate_spike_count: sample=read_samples_file(config["samples_per_run"])) output: '{dir}/summary.spike_count.txt' params: logid="all" - shell: "{config[binDir]}/reports.py consolidate_spike_count {input} {output}" + shell: "{config[binDir]}/reports.py consolidate_spike_count {wildcards.dir}/spike_count {output}" diff --git a/pipes/rules/reports_only b/pipes/rules/reports_only index 6e4997703..0d7b30e7e 100644 --- a/pipes/rules/reports_only +++ b/pipes/rules/reports_only @@ -52,5 +52,10 @@ rule assembly_report: resources: mem=2 params: LSF=config.get('LSF_queues', {}).get('short', '-W 4:00'), logid="{sample}" - shell: "{config[binDir]}/reports.py assembly_stats {wildcards.sample} {output} --assembly_dir {config[dataDir]}/{config[subdirs][assembly]} --assembly_tmp {config[tmpDir]}/{config[subdirs][assembly]} --align_dir {config[dataDir]}/{config[subdirs][align_self]}" + shell: "{config[binDir]}/reports.py assembly_stats {wildcards.sample} {output}" \ + " --assembly_dir {config[dataDir]}/{config[subdirs][assembly]}" \ + " --assembly_tmp {config[tmpDir]}/{config[subdirs][assembly]}" \ + " --align_dir {config[dataDir]}/{config[subdirs][align_self]}" \ + " --reads_dir {config[dataDir]}/{config[subdirs][per_sample]}" \ + " --raw_reads_dir {config[dataDir]}/{config[subdirs][source]}" diff --git a/reports.py b/reports.py index bb3b77e32..77129e525 100755 --- a/reports.py +++ b/reports.py @@ -23,16 +23,26 @@ def get_assembly_stats(sample, cov_thresholds=(1,5,20,100), assembly_dir='data/02_assembly', assembly_tmp='tmp/02_assembly', - align_dir='data/02_align_to_self'): + align_dir='data/02_align_to_self', reads_dir='data/01_per_sample', + raw_reads_dir='data/00_raw'): ''' Fetch assembly-level statistics for a given sample ''' out = {'sample':sample} samtools = tools.samtools.SamtoolsTool() - header = ['sample', 'assembled_trinity', 'trinity_in_reads', + header = ['sample', 'reads_raw', 'reads_cleaned', 'reads_taxfilt', + 'assembled_trinity', 'trinity_in_reads', 'n_contigs', 'contig_len', 'unambig_bases', 'pct_unambig', 'aln2self_reads_tot', 'aln2self_reads_aln', 'aln2self_reads_rmdup', 'aln2self_pct_nondup', 'aln2self_cov_median', 'aln2self_cov_mean', 'aln2self_cov_mean_non0', ] + ['aln2self_cov_%dX'%t for t in cov_thresholds] + # per-sample unaligned read stats + for adj in ('cleaned', 'taxfilt'): + reads_bam = os.path.join(reads_dir, '.'.join((sample, adj, 'bam'))) + if os.path.isfile(reads_bam): + out['reads_'+adj] = samtools.count(reads_bam) + out['reads_raw'] = sum(samtools.count(bam) + for bam in glob.glob(os.path.join(raw_reads_dir, sample+"*.bam"))) + # pre-assembly stats out['assembled_trinity'] = os.path.isfile(os.path.join(assembly_tmp, sample + '.assembly1-trinity.fasta')) and 1 or 0 @@ -43,11 +53,14 @@ def get_assembly_stats(sample, # assembly stats assembly_fname = os.path.join(assembly_dir, sample + '.fasta') if not os.path.isfile(assembly_fname): - out['n_contigs'] = 0 - return (header, out) + assembly_fname = os.path.join(assembly_tmp, sample + '.assembly2-vfat.fasta') + if not os.path.isfile(assembly_fname): + out['n_contigs'] = 0 + return (header, out) with open(assembly_fname, 'rt') as inf: counts = [(len(s), assembly.unambig_count(s.seq)) - for s in Bio.SeqIO.parse(inf, 'fasta')] + for s in Bio.SeqIO.parse(inf, 'fasta') + if len(s)>0] out['n_contigs'] = len(counts) out['contig_len'] = ','.join(str(x) for x,y in counts) out['unambig_bases'] = ','.join(str(y) for x,y in counts) @@ -76,13 +89,17 @@ def get_assembly_stats(sample, return (header, out) def assembly_stats(samples, outFile, - cov_thresholds, assembly_dir, assembly_tmp, align_dir): + cov_thresholds, assembly_dir, assembly_tmp, align_dir, + reads_dir, raw_reads_dir): ''' Fetch assembly-level statistics for a given sample ''' header_written = False with open(outFile, 'wt') as outf: for sample in samples: log.info("fetching stats on "+sample) - header, out = get_assembly_stats(sample, cov_thresholds, assembly_dir, assembly_tmp, align_dir) + header, out = get_assembly_stats(sample, + cov_thresholds=cov_thresholds, assembly_dir=assembly_dir, + assembly_tmp=assembly_tmp, align_dir=align_dir, + reads_dir=reads_dir, raw_reads_dir=raw_reads_dir) if not header_written: outf.write('\t'.join(map(str, header))+'\n') header_written = True @@ -100,6 +117,10 @@ def parser_assembly_stats(parser=argparse.ArgumentParser()): help='Directory with assembly temp files. (default: %(default)s)') parser.add_argument('--align_dir', default='data/02_align_to_self', help='Directory with reads aligned to own assembly. (default: %(default)s)') + parser.add_argument('--reads_dir', default='data/01_per_sample', + help='Directory with unaligned filtered read BAMs. (default: %(default)s)') + parser.add_argument('--raw_reads_dir', default='data/00_raw', + help='Directory with unaligned raw read BAMs. (default: %(default)s)') util.cmd.attach_main(parser, assembly_stats, split_args=True) return parser __commands__.append(('assembly_stats', parser_assembly_stats)) @@ -110,31 +131,6 @@ def get_refalign_stats(sample): pass -def consolidate_bamstats(inFiles, outFile): - '''Consolidate multiple bamstats reports into one.''' - with util.file.open_or_gzopen(outFile, 'wt') as outf: - header = [] - out_n = 0 - for fn in inFiles: - out = {} - with util.file.open_or_gzopen(fn, 'rt') as inf: - for line in inf: - k,v = line.rstrip('\n').split('\t') - out[k] = v - if out_n==0: - header.append(k) - if out_n==0: - outf.write('\t'.join(header)+'\n') - outf.write('\t'.join([out.get(h,'') for h in header])+'\n') - out_n += 1 -def parser_consolidate_bamstats(parser=argparse.ArgumentParser()): - parser.add_argument('inFiles', help='Input report files.', nargs='+') - parser.add_argument('outFile', help='Output report file.') - util.cmd.attach_main(parser, consolidate_bamstats, split_args=True) - return parser -__commands__.append(('consolidate_bamstats', parser_consolidate_bamstats)) - - def consolidate_fastqc(inDirs, outFile): '''Consolidate multiple FASTQC reports into one.''' @@ -200,80 +196,14 @@ def get_earliest_date(inDir): earliest = min(os.path.getmtime(fn) for fn in fnames) return time.strftime("%Y-%m-%d", time.localtime(earliest)) -def coverage_summary(inFiles, ending, outFile, runFile=None, statsDir=None, thresholds=(1,5,20,100)): - seqinfo = runFile and get_lib_info(runFile) or {} - baminfo = statsDir and get_bam_info(statsDir) or {} - with util.file.open_or_gzopen(outFile, 'wt') as outf: - header = ['library'] + ['sites_cov_%dX'%t for t in thresholds] + ['median_cov', 'mean_cov', 'mean_non0_cov'] - header_bam = ['reads_total', 'reads_non_Hs_rmdup', 'reads_EBOV', 'reads_EBOV_rmdup'] - header_seq = ['sample','seq_flowcell_lane', 'seq_mux_barcode', 'seq_plate_well', 'seq_date', 'KGH_Tube_ID'] - if baminfo: - header += header_bam - if seqinfo: - header += header_seq - outf.write('\t'.join(header)+'\n') - for fn in inFiles: - if not fn.endswith(ending): - raise Exception() - s = os.path.basename(fn)[:-len(ending)] - with util.file.open_or_gzopen(fn, 'rt') as inf: - coverages = list(int(line.rstrip('\n').split('\t')[2]) for line in inf) - out = [sum(1 for n in coverages if n>=thresh) for thresh in thresholds] - out = [s] + out - out +=[median(coverages), "%0.3f"%mean(coverages), - "%0.3f"%mean([n for n in coverages if n>0])] - if baminfo: - if s in baminfo: - out += [baminfo[s].get(adj,'') for adj in ('raw', 'cleaned', 'ref_mapped', 'ref_rmdup')] - else: - out += ['' for h in header_bam] - if seqinfo: - if s in seqinfo: - out += [','.join(util.misc.unique(run[i] for run in seqinfo[s])) - for i in range(len(header_seq))] - else: - out += ['' for h in header_seq] - outf.write('\t'.join(map(str,out))+'\n') -def parser_coverage_summary(parser=argparse.ArgumentParser()): - parser.add_argument('coverageDir', help='Input coverage report directory.') - parser.add_argument('coverageSuffix', help='Suffix of all coverage files.') - parser.add_argument('outFile', help='Output report file.') - parser.add_argument('--runFile', help='Link in plate info from seq runs.', default=None) - parser.add_argument('--bamstatsDir', help='Link in read info from BAM alignments.', default=None) - util.cmd.attach_main(parser, main_coverage_summary) - return parser -def main_coverage_summary(args): - '''Produce summary stats of genome coverage.''' - inFiles = list(glob.glob(os.path.join(args.coverageDir, "*"+args.coverageSuffix))) - coverage_summary(inFiles, args.coverageSuffix, args.outFile, args.runFile, args.bamstatsDir) - return 0 -__commands__.append(('coverage_summary', parser_coverage_summary)) - -def consolidate_coverage(inFiles, adj, outFile): - '''Consolidate multiple coverage reports into one.''' - ending = '.coverage_%s.txt' % adj - with util.file.open_or_gzopen(outFile, 'wt') as outf: - for fn in inFiles: - if not fn.endswith(ending): - raise Exception() - s = os.path.basename(fn)[:-len(ending)] - with open(fn, 'rt') as inf: - for line in inf: - outf.write(line.rstrip('\n') + '\t' + s + '\n') -def parser_consolidate_coverage(parser=argparse.ArgumentParser()): - parser.add_argument('inFiles', help='Input coverage files.', nargs='+') - parser.add_argument('adj', help='Report adjective.') - parser.add_argument('outFile', help='Output report file.') - util.cmd.attach_main(parser, consolidate_coverage, split_args=True) - return parser -__commands__.append(('consolidate_coverage', parser_consolidate_coverage)) -def consolidate_spike_count(inFiles, outFile): +def consolidate_spike_count(inDir, outFile): '''Consolidate multiple spike count reports into one.''' with open(outFile, 'wt') as outf: - for fn in inFiles: + for fn in os.listdir(inDir): + fn = os.path.join(inDir, fn) s = os.path.basename(fn) if not s.endswith('.spike_count.txt'): raise Exception() @@ -284,7 +214,7 @@ def consolidate_spike_count(inFiles, outFile): spike, count = line.strip().split('\t') outf.write('\t'.join([s, spike, count])+'\n') def parser_consolidate_spike_count(parser=argparse.ArgumentParser()): - parser.add_argument('inFiles', help='Input coverage files.', nargs='+') + parser.add_argument('inDir', help='Input spike count directory.') parser.add_argument('outFile', help='Output report file.') util.cmd.attach_main(parser, consolidate_spike_count, split_args=True) return parser diff --git a/tools/trinity.py b/tools/trinity.py index 249779157..8df9bee01 100644 --- a/tools/trinity.py +++ b/tools/trinity.py @@ -18,6 +18,7 @@ class TrinityTool(tools.Tool) : + jvmMemDefault = '4g' def __init__(self, install_methods = None) : if install_methods == None : install_methods = [ @@ -28,10 +29,14 @@ def __init__(self, install_methods = None) : def version(self) : return tool_version - def execute(self, inFastq1, inFastq2, outFasta, min_contig_length=300): + def execute(self, inFastq1, inFastq2, outFasta, min_contig_length=300, + JVMmemory=None): + if JVMmemory==None: + JVMmemory = self.jvmMemDefault outdir = tempfile.mkdtemp(prefix='trinity-') cmd = [self.install_and_get_path(), - '--CPU', '1', + '--CPU', '2', + '--bflyHeapSpace', JVMmemory.upper(), '--min_contig_length', str(min_contig_length), '--seqType', 'fq', '--left', inFastq1,