Skip to content

Commit

Permalink
Merge pull request #37 from martinghunt/gramtools_error_checking
Browse files Browse the repository at this point in the history
Gramtools error checking
  • Loading branch information
martinghunt authored Apr 26, 2018
2 parents af12f14 + cb9851d commit fcd40fb
Show file tree
Hide file tree
Showing 9 changed files with 139 additions and 23 deletions.
23 changes: 23 additions & 0 deletions minos/gramtools.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,22 @@
class Error (Exception): pass


def _build_json_file_is_good(json_build_report):
'''Returns true iff looks like gramtools build_report.json
says that gramtools build ran successfully'''
if not os.path.exists(json_build_report):
return False

with open(json_build_report) as f:
build_report = json.load(f)
try:
returned_zero = build_report['gramtools_cpp_build']['return_value_is_0']
except:
return False

return returned_zero


def run_gramtools_build(outdir, vcf_file, ref_file, max_read_length, kmer_size=15):
'''Runs gramtools build. Makes new directory called 'outdir' for
the output'''
Expand All @@ -28,6 +44,13 @@ def run_gramtools_build(outdir, vcf_file, ref_file, max_read_length, kmer_size=1
logging.info('Running gramtools build: ' + build_command)
utils.syscall(build_command)
logging.info('Finished running gramtools build')
build_report = os.path.join(outdir, 'build_report.json')
ran_ok = _build_json_file_is_good(build_report)
if not ran_ok:
logging.info('Error running gramtools build. See build report file ' + build_report)
raise Error('Error running gramtools build: ' + build_command)

logging.info('Build report file looks good from gramtools build: ' + build_report)


def run_gramtools(build_dir, quasimap_dir, vcf_file, ref_file, reads, max_read_length, kmer_size=15):
Expand Down
28 changes: 28 additions & 0 deletions minos/tests/data/gramtools/build_json_file_is_good.bad.1.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
{
"start_time": "1524153313",
"end_time": "1524153329",
"total_runtime": 16,
"kmer_size": 15,
"max_read_length": 200,
"prg_build_report": {
"command": "perl foo bar baz",
"return_value_is_0": true,
"stdout": [
"Finished printing linear PRG. Final number in alphabet is 78"
]
},
"gramtools_cpp_build": {
"command": "/usr/local/lib/python3.6/dist-packages/gramtools/bin/gram build foo",
"return_value_is_0": false,
"stdout": [
"maximum thread count: 1",
"Executing build command",
"Generating integer encoded PRG",
"Number of charecters in integer encoded linear PRG: 4411709",
"Generating FM-Index",
"Generating PRG masks",
"Building kmer index"
]
},
"current_working_directory": "/home/kryten/"
}
15 changes: 15 additions & 0 deletions minos/tests/data/gramtools/build_json_file_is_good.bad.2.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
{
"start_time": "1524153313",
"end_time": "1524153329",
"total_runtime": 16,
"kmer_size": 15,
"max_read_length": 200,
"prg_build_report": {
"command": "perl foo bar baz",
"return_value_is_0": true,
"stdout": [
"Finished printing linear PRG. Final number in alphabet is 78"
]
},
"current_working_directory": "/home/kryten/"
}
39 changes: 39 additions & 0 deletions minos/tests/data/gramtools/build_json_file_is_good.good.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
{
"start_time": "1524153313",
"end_time": "1524153329",
"total_runtime": 16,
"kmer_size": 15,
"max_read_length": 200,
"prg_build_report": {
"command": "perl foo bar baz",
"return_value_is_0": true,
"stdout": [
"Finished printing linear PRG. Final number in alphabet is 78"
]
},
"gramtools_cpp_build": {
"command": "/usr/local/lib/python3.6/dist-packages/gramtools/bin/gram build foo",
"return_value_is_0": true,
"stdout": [
"maximum thread count: 1",
"Executing build command",
"Generating integer encoded PRG",
"Number of charecters in integer encoded linear PRG: 4411709",
"Generating FM-Index",
"Generating PRG masks",
"Building kmer index",
"Total number of unique kmers: 7320",
"",
"",
"Timer report:",
" seconds",
" Encoded PRG 0.22",
" Generate FM-Index 7.08",
"Generating PRG masks 5.77",
" Building kmer index 0.05",
"",
"Total elapsed time: 13.12"
]
},
"current_working_directory": "/home/kryten/"
}
14 changes: 7 additions & 7 deletions minos/tests/data/vcf_chunker/make_split_files.in.vcf
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
##header1
##header2
#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT sample_name
ref1 1 . G T . . . . .
ref1 2 . C T . . . . .
ref1 3 . T A . . . . .
ref1 5 . AGAGTCACGTA G . . . . .
ref1 18 . A G . . . . .
ref1 21 . G T . . . . .
ref2 42 . C G . . . . .
ref1 1 . G T . PASS . . .
ref1 2 . C T . PASS . . .
ref1 3 . T A . PASS . . .
ref1 5 . AGAGTCACGTA G . PASS . . .
ref1 18 . A G . PASS . . .
ref1 21 . G T . PASS . . .
ref2 42 . C G . PASS . . .
14 changes: 7 additions & 7 deletions minos/tests/data/vcf_chunker/merge_files.in.vcf
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
##header1
##header2
#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT sample_name
ref1 1 . G T . . . . .
ref1 2 . C T . . . . .
ref1 3 . T A . . . . .
ref1 5 . AGAGTCACGTA G . . . . .
ref1 18 . A G . . . . .
ref1 21 . G T . . . . .
ref2 42 . C G . . . . .
ref1 1 . G T . PASS . . .
ref1 2 . C T . PASS . . .
ref1 3 . T A . PASS . . .
ref1 5 . AGAGTCACGTA G . PASS . . .
ref1 18 . A G . PASS . . .
ref1 21 . G T . PASS . . .
ref2 42 . C G . PASS . . .
13 changes: 12 additions & 1 deletion minos/tests/gramtools_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,18 @@


class TestGramtools(unittest.TestCase):
def test_build_json_file_is_good(self):
'''test _build_json_file_is_good'''
build_file = os.path.join(data_dir, 'build_json_file_is_good.good.json')
self.assertTrue(gramtools._build_json_file_is_good(build_file))
build_file = os.path.join(data_dir, 'build_json_file_is_good.bad.1.json')
self.assertFalse(gramtools._build_json_file_is_good(build_file))
build_file = os.path.join(data_dir, 'build_json_file_is_good.bad.2.json')
self.assertFalse(gramtools._build_json_file_is_good(build_file))
build_file = os.path.join(data_dir, 'build_json_file_is_good.bad.2.json_thisfiledoesnotexist')
self.assertFalse(gramtools._build_json_file_is_good(build_file))


def test_run_gramtools_build(self):
'''test run_gramtools_build'''
tmp_out_build = 'tmp.run_gramtools.out.build'
Expand Down Expand Up @@ -68,7 +80,6 @@ def test_run_gramtools_fails(self):
with self.assertRaises(gramtools.Error):
gramtools.run_gramtools(tmp_out_build, tmp_out_quasimap, vcf_file, ref_file, reads_file, 150)
shutil.rmtree(tmp_out_build)
shutil.rmtree(tmp_out_quasimap)


def test_run_gramtools_two_reads_files(self):
Expand Down
14 changes: 7 additions & 7 deletions minos/tests/vcf_chunker_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -114,13 +114,13 @@ def test_make_split_files(self):
if os.path.exists(tmp_out):
shutil.rmtree(tmp_out)

vcf1 = cluster_vcf_records.vcf_record.VcfRecord('ref1\t1\t.\tG\tT\t.\t.\t.\t.\t.')
vcf2 = cluster_vcf_records.vcf_record.VcfRecord('ref1\t2\t.\tC\tT\t.\t.\t.\t.\t.')
vcf3 = cluster_vcf_records.vcf_record.VcfRecord('ref1\t3\t.\tT\tA\t.\t.\t.\t.\t.')
vcf4 = cluster_vcf_records.vcf_record.VcfRecord('ref1\t5\t.\tAGAGTCACGTA\tG\t.\t.\t.\t.\t.')
vcf5 = cluster_vcf_records.vcf_record.VcfRecord('ref1\t18\t.\tA\tG\t.\t.\t.\t.\t.')
vcf6 = cluster_vcf_records.vcf_record.VcfRecord('ref1\t21\t.\tG\tT\t.\t.\t.\t.\t.')
vcf7 = cluster_vcf_records.vcf_record.VcfRecord('ref2\t42\t.\tC\tG\t.\t.\t.\t.\t.')
vcf1 = cluster_vcf_records.vcf_record.VcfRecord('ref1\t1\t.\tG\tT\t.\tPASS\t.\t.\t.')
vcf2 = cluster_vcf_records.vcf_record.VcfRecord('ref1\t2\t.\tC\tT\t.\tPASS\t.\t.\t.')
vcf3 = cluster_vcf_records.vcf_record.VcfRecord('ref1\t3\t.\tT\tA\t.\tPASS\t.\t.\t.')
vcf4 = cluster_vcf_records.vcf_record.VcfRecord('ref1\t5\t.\tAGAGTCACGTA\tG\t.\tPASS\t.\t.\t.')
vcf5 = cluster_vcf_records.vcf_record.VcfRecord('ref1\t18\t.\tA\tG\t.\tPASS\t.\t.\t.')
vcf6 = cluster_vcf_records.vcf_record.VcfRecord('ref1\t21\t.\tG\tT\t.\tPASS\t.\t.\t.')
vcf7 = cluster_vcf_records.vcf_record.VcfRecord('ref2\t42\t.\tC\tG\t.\tPASS\t.\t.\t.')
header_lines = ['##header1', '##header2', '#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO\tFORMAT\tsample_name']

chunker = vcf_chunker.VcfChunker(tmp_out, vcf_infile=infile, ref_fasta=ref_fa, variants_per_split=2, flank_length=1)
Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@

setup(
name='bio-minos',
version='0.4.0',
version='0.4.1',
description='Variant call adjudication',
packages = find_packages(),
author='Martin Hunt',
Expand Down

0 comments on commit fcd40fb

Please sign in to comment.