Skip to content

Commit

Permalink
Merge pull request #77 from jts/primer_snps
Browse files Browse the repository at this point in the history
implement methods to identify variants that disrupt amplicons
  • Loading branch information
jts authored Apr 28, 2021
2 parents 9fa0dbe + af7d70b commit 132ac72
Show file tree
Hide file tree
Showing 3 changed files with 125 additions and 2 deletions.
47 changes: 45 additions & 2 deletions workflow/rules/annotation.smk
Original file line number Diff line number Diff line change
Expand Up @@ -41,20 +41,25 @@ def get_recurrent_heatmap_plot(wildcards):
out = "plots/%s_aa_mutation_heatmap.pdf" % (prefix)
return out

def get_primer_snp_depth_table(wildcards):
prefix = get_run_name()
out = "qc_annotation/%s.primer_snp_depth_summary.tsv" % (prefix)
return out

def get_snpeff_dirs():
snpeff_dirs = list()
for snpeff_dir in os.scandir('/'.join([os.environ['CONDA_PREFIX'], 'share'])):
if snpeff_dir.name.startswith('snpeff'):
snpeff_dirs.append(snpeff_dir.name)
return snpeff_dirs


#
# Rules for annotating variants with functional consequence
#
rule all_qc_annotation:
input:
get_recurrent_heatmap_plot
get_recurrent_heatmap_plot,
get_primer_snp_depth_table

rule build_snpeff_db:
input:
Expand Down Expand Up @@ -124,3 +129,41 @@ rule create_recurrent_mutation_heatmap:
shell:
"Rscript {params.script} --path qc_annotation --output {output} --threshold {params.threshold}"

rule create_primer_snp_bed:
input:
vcf=get_vcf_file,
primer_bed=get_primer_bed
output:
"qc_annotation/{sample}.primer_snps.bed"
shell:
"bedtools intersect -a {input.vcf} -b {input.primer_bed} -wb > {output}"

rule create_primer_snp_depth:
input:
primer_snps="qc_annotation/{sample}.primer_snps.bed",
amplicon_depth="qc_sequencing/{sample}.amplicon_depth.bed"
output:
"qc_annotation/{sample}.primer_snp_depth.tsv"
params:
script=srcdir("../scripts/primer_snp_depth.py")
shell:
"python {params.script} --sample-name {wildcards.sample} --primer-snp-bed {input.primer_snps} --amplicon-depth-tsv {input.amplicon_depth} > {output}"

rule merge_primer_snp_depth:
input:
expand("qc_annotation/{s}.primer_snp_depth.tsv", s=get_sample_names())
output:
"qc_annotation/{prefix}.primer_snp_depth_merged.tsv"
shell:
"cat {input} | awk 'NR == 1 || $0 !~ /primer_name/' > {output}"

rule summarize_primer_snp_depth:
input:
"qc_annotation/{prefix}.primer_snp_depth_merged.tsv"
output:
"qc_annotation/{prefix}.primer_snp_depth_summary.tsv"
params:
script=srcdir("../scripts/primer_snp_summary.py")
shell:
"python {params.script} --input-tsv {input} > {output}"

42 changes: 42 additions & 0 deletions workflow/scripts/primer_snp_depth.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
import argparse
import sys
import csv
import os

parser = argparse.ArgumentParser()
parser.add_argument('--primer-snp-bed', type=str, required=True)
parser.add_argument('--amplicon-depth-tsv', type=str, required=True)
parser.add_argument('--sample-name', type=str, default="none")
args, files = parser.parse_known_args()

def main():
print("\t".join(["sample", "contig", "position", "ref", "alt", "primer_name", "amplicon", "depth"]))

# read in the depth of each amplicon
depth_map = dict()
with open(args.amplicon_depth_tsv, 'r') as ifh:
reader = csv.DictReader(ifh, delimiter='\t')
for record in reader:
depth_map[record['amplicon_id']] = record['mean_depth']

# read in the bed file containing mutations that overlap primers
seen = dict()
with(open(args.primer_snp_bed, 'r')) as ifh:
for line in ifh:
fields = line.rstrip().split()
assert(len(fields) == 16)
primer_name = fields[13]
primer_direction_idx = max(primer_name.find("_LEFT"), primer_name.find("_RIGHT"))
if primer_direction_idx == -1:
sys.stderr.write("Could not parse primer name %s" % (primer_name))
sys.exit(1)
amplicon_id = primer_name[0:primer_direction_idx]

# skip duplicate entries when a mutation lands in ALT versions of primers
key = ":".join([fields[0], fields[1], fields[3], fields[4], amplicon_id])
if key not in seen:
print("\t".join([args.sample_name, fields[0], fields[1], fields[3], fields[4], primer_name, amplicon_id, depth_map[amplicon_id]]))
seen[key] = 1

if __name__ == '__main__':
main()
38 changes: 38 additions & 0 deletions workflow/scripts/primer_snp_summary.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
import argparse
import numpy
import sys
import csv
import os

from collections import defaultdict

parser = argparse.ArgumentParser()
parser.add_argument('--input-tsv', type=str, required=True)
args, files = parser.parse_known_args()

def main():
print("\t".join(["contig", "position", "ref", "alt", "primer_name", "amplicon", "num_samples", "mean_depth", "min_depth", "max_depth"]))

# read in the depth per sample
depth_map = defaultdict(list)

with open(args.input_tsv, 'r') as ifh:
reader = csv.DictReader(ifh, delimiter='\t')
for record in reader:
key = ":".join([record['contig'], record['position'], record['ref'], record['alt'], record['primer_name'], record['amplicon']])
depth_map[key].append(float(record['depth']))


for key in depth_map.keys():
mean_depth = numpy.mean(depth_map[key])
min_depth = numpy.min(depth_map[key])
max_depth = numpy.max(depth_map[key])
fields = key.split(":")
fields.append(str(len(depth_map[key])))
fields.append("%.2f" % mean_depth)
fields.append("%.2f" % min_depth)
fields.append("%.2f" % max_depth)
print("\t".join(fields))

if __name__ == '__main__':
main()

0 comments on commit 132ac72

Please sign in to comment.