Skip to content

Commit

Permalink
Merge pull request #14 from martinghunt/varifier_global_align
Browse files Browse the repository at this point in the history
Use varifier global align
  • Loading branch information
martinghunt authored Oct 14, 2021
2 parents d9e3105 + a0d3468 commit 7607dbb
Show file tree
Hide file tree
Showing 4 changed files with 39 additions and 8 deletions.
6 changes: 4 additions & 2 deletions tests/utils_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,13 +24,15 @@ def test_load_single_seq_fasta():
utils.load_single_seq_fasta(infile)


def test_amplicons_json_to_bed():
def test_amplicons_json_to_bed_and_range():
json_in = os.path.join(data_dir, "amplicons_json_to_bed.json")
expect_bed = os.path.join(data_dir, "amplicons_json_to_bed.bed")
tmp_out = "tmp.amplicons_json_to_bed.bed"
subprocess.check_output(f"rm -f {tmp_out}", shell=True)
utils.amplicons_json_to_bed(json_in, tmp_out)
got_start, got_end = utils.amplicons_json_to_bed_and_range(json_in, tmp_out)
assert filecmp.cmp(tmp_out, expect_bed, shallow=False)
assert got_start == 100
assert got_end == 250
os.unlink(tmp_out)


Expand Down
19 changes: 16 additions & 3 deletions viridian_workflow/one_sample_pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,12 @@
import os

from viridian_workflow import minimap, qcovid, sample_reads, varifier
from viridian_workflow.utils import amplicons_json_to_bed, check_file, run_process, rm
from viridian_workflow.utils import (
amplicons_json_to_bed_and_range,
check_file,
run_process,
rm,
)


def check_tech(tech):
Expand Down Expand Up @@ -57,7 +62,9 @@ def run_one_sample(

os.mkdir(outdir)
amplicon_bed = os.path.join(outdir, "amplicons.bed")
amplicons_json_to_bed(amplicon_json, amplicon_bed)
amplicons_start, amplicons_end = amplicons_json_to_bed_and_range(
amplicon_json, amplicon_bed
)
if paired:
all_reads_bam = minimap.run(outdir, ref_genome, fq1, fq2)
else:
Expand Down Expand Up @@ -91,7 +98,13 @@ def run_one_sample(

masked_fasta = qcovid.self_qc(outdir, assembly, self_map)

vcf = varifier.run(varifier_out, ref_genome, masked_fasta)
vcf = varifier.run(
varifier_out,
ref_genome,
masked_fasta,
min_coord=amplicons_start,
max_coord=amplicons_end,
)
check_file(vcf)

# clean up intermediate files
Expand Down
12 changes: 11 additions & 1 deletion viridian_workflow/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,17 +49,27 @@ def run_process(cmd, ignore_error=False, stdout=None):
logging.error(result.stderr)


def amplicons_json_to_bed(infile, outfile):
def amplicons_json_to_bed_and_range(infile, outfile):
"""Converts the amplicons JSON file to a BED file, which is used in
various stages of the pipeline. Returns the 0-based inclusive coordinates
of the start of the first amplicon and end of the last amplicon, as
a tuple (start, end)"""
with open(infile) as f:
data = json.load(f)

start = float("inf")
end = -1

data_out = []
for amplicon, d in data["amplicons"].items():
data_out.append((amplicon, d["start"], d["end"] + 1))
start = min(start, d["start"])
end = max(end, d["end"])
data_out.sort(key=itemgetter(1))
with open(outfile, "w") as f:
for t in data_out:
print(*t, sep="\t", file=f)
return start, end


def load_amplicons_bed_file(infile):
Expand Down
10 changes: 8 additions & 2 deletions viridian_workflow/varifier.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,14 @@
from viridian_workflow.utils import run_process, check_file


def run(outdir, ref_genome, assembly):
def run(outdir, ref_genome, assembly, min_coord=None, max_coord=None):
vcf = os.path.join(outdir, "04.truth.vcf")
run_process(f"varifier make_truth_vcf {assembly} {ref_genome} {outdir}")
options = ["--global_align"]
if min_coord is not None:
options.append(f"--global_align_min_coord {min_coord + 1}")
if max_coord is not None:
options.append(f"--global_align_max_coord {max_coord + 1}")
options = " ".join(options)
run_process(f"varifier make_truth_vcf {options} {assembly} {ref_genome} {outdir}")
check_file(vcf)
return vcf

0 comments on commit 7607dbb

Please sign in to comment.