Skip to content

Commit

Permalink
Implement829 (#830)
Browse files Browse the repository at this point in the history
* Add plotCoverage --BED

* Add coverage threshold stuff

* tweak

* OK, this seems to do the trick

* bump version

* take a stab at a galaxy wrapper

* update galaxy wrapper, sort -ct
  • Loading branch information
dpryan79 authored May 17, 2019
1 parent 57a6dbb commit f8e7bd8
Show file tree
Hide file tree
Showing 8 changed files with 122 additions and 9 deletions.
6 changes: 6 additions & 0 deletions CHANGES.txt
Original file line number Diff line number Diff line change
@@ -1,3 +1,9 @@
3.3.0

* `plotCoverage` now has a `--BED` option, to restrict plots and output to apply to a specific set of regions given by a BED or GTF file or files (issue #829).
* `plotCoverage` now has a `--DepthSummary` option, which produces a summary similar to GATK's DepthOfCoverage (issue #828).
* `plotCoverage` is now able to compute coverage metrics for arbitrary coverage thresholds using multiples of the `-ct` option (e.g., `-ct 0 -ct 10 -ct 20 -ct 30`).

3.2.1

* Changed a bug in `estimateReadFiltering` where the estimated number of filtered reads was typically too low.
Expand Down
2 changes: 1 addition & 1 deletion deeptools/_version.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,4 +2,4 @@
# This file is originally generated from Git information by running 'setup.py
# version'. Distribution tarballs contain a pre-generated copy of this file.

__version__ = '3.2.1'
__version__ = '3.3.0'
12 changes: 10 additions & 2 deletions deeptools/countReadsPerBin.py
Original file line number Diff line number Diff line change
Expand Up @@ -130,6 +130,9 @@ class CountReadsPerBin(object):
mappedList : list
For each BAM file in bamFilesList, the number of mapped reads in the file.
bed_and_bin : boolean
If true AND a bedFile is given, compute coverage of each bin of the given size in each region of bedFile
Returns
-------
numpy array
Expand Down Expand Up @@ -171,6 +174,7 @@ def __init__(self, bamFilesList, binLength=50, numberOfSamples=None, numberOfPro
minFragmentLength=0,
maxFragmentLength=0,
out_file_for_raw_data=None,
bed_and_bin=False,
statsList=[],
mappedList=[]):

Expand All @@ -181,6 +185,7 @@ def __init__(self, bamFilesList, binLength=50, numberOfSamples=None, numberOfPro
self.statsList = statsList
self.mappedList = mappedList
self.skipZeroOverZero = skipZeroOverZero
self.bed_and_bin = bed_and_bin

if extendReads and len(bamFilesList):
from deeptools.getFragmentAndReadSize import get_read_and_fragment_length
Expand Down Expand Up @@ -463,7 +468,10 @@ def count_reads_in_region(self, chrom, start, end, bed_regions_list=None):
# A list of lists of tuples
transcriptsToConsider = []
if bed_regions_list is not None:
transcriptsToConsider = [x[1] for x in bed_regions_list]
if self.bed_and_bin:
transcriptsToConsider.append([(x[1][0][0], x[1][0][1], self.binLength) for x in bed_regions_list])
else:
transcriptsToConsider = [x[1] for x in bed_regions_list]
else:
if self.stepSize == self.binLength:
transcriptsToConsider.append([(start, end, self.binLength)])
Expand All @@ -484,7 +492,7 @@ def count_reads_in_region(self, chrom, start, end, bed_regions_list=None):
for bam in bam_handles:
for trans in transcriptsToConsider:
tcov = self.get_coverage_of_region(bam, chrom, trans)
if bed_regions_list is not None:
if bed_regions_list is not None and not self.bed_and_bin:
subnum_reads_per_bin.append(np.sum(tcov))
else:
subnum_reads_per_bin.extend(tcov)
Expand Down
45 changes: 43 additions & 2 deletions deeptools/plotCoverage.py
Original file line number Diff line number Diff line change
Expand Up @@ -118,11 +118,34 @@ def required_args():
type=int,
default=1000000)

optional.add_argument('--BED',
help='Limits the coverage analysis to '
'the regions specified in these files. This overrides --numberOfSamples. '
'Due to memory requirements, it is inadvised to combine this with '
'--outRawCounts or many tens of thousands of regions, as per-base '
'coverage is used!',
metavar='FILE1.bed FILE2.bed',
nargs='+')

optional.add_argument('--outRawCounts',
help='Save raw counts (coverages) to file.',
type=parserCommon.writableFile,
metavar='FILE')

optional.add_argument('--outCoverageMetrics',
help='Save percentage of bins/regions above the specified thresholds to '
'the specified file. The coverage thresholds are specified by '
'--coverageThresholds. If no coverage thresholds are specified, the file '
'will be empty.',
type=parserCommon.writableFile,
metavar='FILE')

optional.add_argument('--coverageThresholds', '-ct',
type=int,
action="append",
help='The percentage of reported bins/regions with signal at least as '
'high as the given threshold. This can be specified multiple times.')

optional.add_argument('--plotHeight',
help='Plot height in cm.',
type=float,
Expand All @@ -148,11 +171,17 @@ def required_args():
def main(args=None):
args = process_args(args)

if args.outRawCounts is None and args.plotFile is None:
sys.exit("At least one of --plotFile and --outRawCounts are required.\n")
if not args.outRawCounts and not args.plotFile and not args.outCoverageMetrics:
sys.exit("At least one of --plotFile, --outRawCounts and --outCoverageMetrics are required.\n")

if 'BED' in args:
bed_regions = args.BED
else:
bed_regions = None

cr = countR.CountReadsPerBin(args.bamfiles,
binLength=1,
bedFile=bed_regions,
numberOfSamples=args.numberOfSamples,
numberOfProcessors=args.numberOfProcessors,
verbose=args.verbose,
Expand All @@ -166,10 +195,22 @@ def main(args=None):
samFlag_exclude=args.samFlagExclude,
minFragmentLength=args.minFragmentLength,
maxFragmentLength=args.maxFragmentLength,
bed_and_bin=True,
out_file_for_raw_data=args.outRawCounts)

num_reads_per_bin = cr.run()

if args.outCoverageMetrics and args.coverageThresholds:
args.coverageThresholds.sort() # Galaxy in particular tends to give things in a weird order
of = open(args.outCoverageMetrics, "w")
of.write("Sample\tThreshold\tPercent\n")
nbins = float(num_reads_per_bin.shape[0])
for thresh in args.coverageThresholds:
vals = np.sum(num_reads_per_bin >= thresh, axis=0)
for lab, val in zip(args.labels, vals):
of.write("{}\t{}\t{:6.3f}\n".format(lab, thresh, 100. * val / nbins))
of.close()

if args.outRawCounts:
# append to the generated file the
# labels
Expand Down
4 changes: 2 additions & 2 deletions galaxy/wrapper/deepTools_macros.xml
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
<macros>

<token name="@THREADS@">--numberOfProcessors "\${GALAXY_SLOTS:-4}"</token>
<token name="@WRAPPER_VERSION@">3.2.1.0</token>
<token name="@WRAPPER_VERSION@">3.3.0.0</token>
<xml name="requirements">
<requirements>
<requirement type="package" version="3.2.1">deeptools</requirement>
<requirement type="package" version="3.3.0">deeptools</requirement>
<requirement type="package" version="1.9">samtools</requirement>
</requirements>
<expand macro="stdio" />
Expand Down
53 changes: 51 additions & 2 deletions galaxy/wrapper/plotCoverage.xml
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,23 @@
--outRawCounts '$outFileRawCounts'
#end if
#if ' '.join(map(str, $BED)) != 'None':
#set bedFileLList=[]
#for $f in $BED:
#silent $bedFileList.append("'%s'" % $f)
#end for
#if $bedFileList != ["'None'"]:
--BED #echo ' '.join($bedFileList)#
#end if
#end if
#if $coverageOpt.showCoverageOpt == "yes":
--outCoverageMetrics '$outFileCoverageMetrics'
#for $t in $coverageOpt.thresholds:
-ct $t.coverageThreshold
#end for
#end if
#if $advancedOpt.showAdvancedOpt == "yes":
--numberOfSamples '$advancedOpt.numberOfSamples'
$advancedOpt.skipZeros
Expand All @@ -48,11 +65,28 @@

<expand macro="multiple_input_bams" MIN="1"/>
<expand macro="custom_sample_labels" />
<param argument="--BED" type="data" format="bed,gtf" multiple="true" optional="true" min="0"
label="Regions of interest"
help="Limits the coverage analysis to the regions specified in these files. This overrides --numberOfSamples. It is inadvisable to combine this with saving the raw counts." />

<conditional name="coverageOpt">
<param name="showCoverageOpt" type="select" label="Show coverage metrics options">
<option value="no" selected="true">No</option>
<option value="yes">Yes</option>
</param>
<when value="no" />
<when value="yes">
<param argument="--outCoverageMetrics" type="boolean" label="Save per-threshold coverage metrics?"/>
<repeat name="thresholds" title="Coverage Thresholds">
<param argument="--coverageThreshold" type="integer" min="0" label="Coverage Threshold" value="0"/>
</repeat>
</when>
</conditional>

<conditional name="advancedOpt">
<param name="showAdvancedOpt" type="select" label="Show advanced options" >
<option value="no" selected="true">no</option>
<option value="yes">yes</option>
<option value="no" selected="true">No</option>
<option value="yes">Yes</option>
</param>
<when value="no" />
<when value="yes">
Expand All @@ -78,6 +112,9 @@
<data format="tabular" name="outFileRawCounts" label="${tool.name} on ${on_string}: bin counts">
<filter>outRawCounts is True</filter>
</data>
<data format="tabular" name="outFileCoverageMetrics" label="${tool.name} on ${on_string}: Threshold Metrics">
<filter>coverageOpt.outCoverageMetrics is True</filter>
</data>
</outputs>
<tests>
<test>
Expand All @@ -89,6 +126,18 @@
<output name="outFileRawCounts" file="plotCoverage_result1.tabular" ftype="tabular" />
<output name="outFileName" file="plotCoverage_result1.png" ftype="png" compare="sim_size" delta="2400" />
</test>
<test>
<param name="bamfiles" value="bowtie2 test1.bam,bowtie2 test1.bam" ftype="bam" />
<param name="showAdvancedOpt" value="yes" />
<param name="plotTitle" value="Test Title from Galaxy" />
<param name="showCoverageOpt" value="yes" />
<param name="coverageThreshold" value="0" />
<param name="coverageThreshold" value="5" />
<param name="coverageThreshold" value="10" />
<param name="coverageThreshold" value="20" />
<output name="outFileName" file="plotCoverage_result1.png" ftype="png" compare="sim_size" delta="2400" />
<output name="outFileCoverageMetrics" file="plotCoverage.metrics" ftype="tabular" />
</test>
</tests>
<help>
<![CDATA[
Expand Down
9 changes: 9 additions & 0 deletions galaxy/wrapper/test-data/plotCoverage.metrics
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
Sample Threshold Percent
bowtie2 test1.bam 0 100.000
bowtie2 test1.bam 0 100.000
bowtie2 test1.bam 5 1.509
bowtie2 test1.bam 5 1.509
bowtie2 test1.bam 10 1.461
bowtie2 test1.bam 10 1.461
bowtie2 test1.bam 20 1.406
bowtie2 test1.bam 20 1.406
Binary file modified galaxy/wrapper/test-data/plotCoverage_result1.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.

0 comments on commit f8e7bd8

Please sign in to comment.