diff --git a/intrahost.py b/intrahost.py index 917cd019c..1f073c329 100755 --- a/intrahost.py +++ b/intrahost.py @@ -133,6 +133,14 @@ def vphaser_one_sample(inBam, inConsFasta, outTab, vphaserNumThreads=None, samtoolsTool.index(bam_to_process) os.unlink(leading_or_trailing_indels_removed) + # For low-quality data, the process of removing doubly-mapped reads + # can remove all reads. In such cases, stub out an empty vphaser output + # file to allow the pipeline to continue + if samtoolsTool.count(bam_to_process) == 0: + log.warning("The bam file %s has 0 reads after removing doubly-mapped reads. Writing blank V-Phaser output.", bam_to_process) + util.file.touch(outTab) + return None + variantIter = Vphaser2Tool().iterate(bam_to_process, vphaserNumThreads) filteredIter = filter_strand_bias(variantIter, minReadsEach, maxBias) diff --git a/test/input/TestPerSample/in.oneunmapped.bam b/test/input/TestPerSample/in.oneunmapped.bam new file mode 100644 index 000000000..5fca12fba Binary files /dev/null and b/test/input/TestPerSample/in.oneunmapped.bam differ diff --git a/test/integration/test_intrahost.py b/test/integration/test_intrahost.py index a97691781..bb7bbc981 100644 --- a/test/integration/test_intrahost.py +++ b/test/integration/test_intrahost.py @@ -16,7 +16,7 @@ import test import tools -@unittest.skipIf(tools.is_osx(), "vphaser2 osx binary from bioconda has issues") +#@unittest.skipIf(tools.is_osx(), "vphaser2 osx binary from bioconda has issues") class TestPerSample(test.TestCaseWithTmp): ''' This tests step 1 of the iSNV calling process (intrahost.vphaser_one_sample), which runs V-Phaser2 on @@ -43,6 +43,21 @@ def test_vphaser_one_sample_indels(self): expected = os.path.join(myInputDir, 'vphaser_one_sample_indels_expected.txt') self.assertEqualContents(outTab, expected) + def test_vphaser_one_sample_one_mate_unpaired(self): + # Files here were created as follows: + # ref.indels.fasta is Seed-stock-137_S2_L001_001.fasta + # in.oneunmapped.bam was created from in.indels.bam, with flag 0->89, 16->73: + # When removing doubly mapped reads, doing so can result in all reads + # being removed in the case of low-quality runs with few reads + # This tests that when v-phaser2 input is empty, a blank + # file is created as output. + myInputDir = util.file.get_test_input_path(self) + inBam = os.path.join(myInputDir, 'in.oneunmapped.bam') + refFasta = os.path.join(myInputDir, 'ref.indels.fasta') + outTab = util.file.mkstempfname('.txt') + intrahost.vphaser_one_sample(inBam, refFasta, outTab, vphaserNumThreads=4, minReadsEach=0, removeDoublyMappedReads=True) + assert os.path.getsize(outTab) == 0 + def test_vphaser_one_sample_2libs(self): # in.2libs.bam was created by "manually" editing in.bam and moving about # 1/3 of the reads to ReadGroup2.