Skip to content

Commit

Permalink
Merge pull request #547 from broadinstitute/ct-stub-out-doubly-mapped…
Browse files Browse the repository at this point in the history
…-failure-mode

harden v-phaser2 against empty input
  • Loading branch information
tomkinsc authored Dec 12, 2016
2 parents 4318fc9 + 7abdec9 commit 21c0864
Show file tree
Hide file tree
Showing 3 changed files with 24 additions and 1 deletion.
8 changes: 8 additions & 0 deletions intrahost.py
Original file line number Diff line number Diff line change
Expand Up @@ -133,6 +133,14 @@ def vphaser_one_sample(inBam, inConsFasta, outTab, vphaserNumThreads=None,
samtoolsTool.index(bam_to_process)
os.unlink(leading_or_trailing_indels_removed)

# For low-quality data, the process of removing doubly-mapped reads
# can remove all reads. In such cases, stub out an empty vphaser output
# file to allow the pipeline to continue
if samtoolsTool.count(bam_to_process) == 0:
log.warning("The bam file %s has 0 reads after removing doubly-mapped reads. Writing blank V-Phaser output.", bam_to_process)
util.file.touch(outTab)
return None

variantIter = Vphaser2Tool().iterate(bam_to_process, vphaserNumThreads)
filteredIter = filter_strand_bias(variantIter, minReadsEach, maxBias)

Expand Down
Binary file added test/input/TestPerSample/in.oneunmapped.bam
Binary file not shown.
17 changes: 16 additions & 1 deletion test/integration/test_intrahost.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
import test
import tools

@unittest.skipIf(tools.is_osx(), "vphaser2 osx binary from bioconda has issues")
#@unittest.skipIf(tools.is_osx(), "vphaser2 osx binary from bioconda has issues")
class TestPerSample(test.TestCaseWithTmp):
''' This tests step 1 of the iSNV calling process
(intrahost.vphaser_one_sample), which runs V-Phaser2 on
Expand All @@ -43,6 +43,21 @@ def test_vphaser_one_sample_indels(self):
expected = os.path.join(myInputDir, 'vphaser_one_sample_indels_expected.txt')
self.assertEqualContents(outTab, expected)

def test_vphaser_one_sample_one_mate_unpaired(self):
# Files here were created as follows:
# ref.indels.fasta is Seed-stock-137_S2_L001_001.fasta
# in.oneunmapped.bam was created from in.indels.bam, with flag 0->89, 16->73:
# When removing doubly mapped reads, doing so can result in all reads
# being removed in the case of low-quality runs with few reads
# This tests that when v-phaser2 input is empty, a blank
# file is created as output.
myInputDir = util.file.get_test_input_path(self)
inBam = os.path.join(myInputDir, 'in.oneunmapped.bam')
refFasta = os.path.join(myInputDir, 'ref.indels.fasta')
outTab = util.file.mkstempfname('.txt')
intrahost.vphaser_one_sample(inBam, refFasta, outTab, vphaserNumThreads=4, minReadsEach=0, removeDoublyMappedReads=True)
assert os.path.getsize(outTab) == 0

def test_vphaser_one_sample_2libs(self):
# in.2libs.bam was created by "manually" editing in.bam and moving about
# 1/3 of the reads to ReadGroup2.
Expand Down

0 comments on commit 21c0864

Please sign in to comment.