Skip to content

Commit

Permalink
Merge pull request #995 from Clinical-Genomics/fix/cnvkit
Browse files Browse the repository at this point in the history
fix: cnvkit
  • Loading branch information
khurrammaqbool authored Sep 21, 2022
2 parents 7588cad + e283a59 commit 4ad54d9
Show file tree
Hide file tree
Showing 4 changed files with 32 additions and 13 deletions.
10 changes: 6 additions & 4 deletions BALSAMIC/containers/varcall_cnvkit/varcall_cnvkit.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -3,15 +3,17 @@ channels:
- defaults

dependencies:
- conda-forge::python=3.7
- conda-forge::python=3.8
- conda-forge::pip
- conda-forge::r-base=4.1.0
- conda-forge::r-base=4.1.3
- conda-forge::r-optparse=1.7.1
- conda-forge::r-data.table=1.14.2
- conda-forge::r-rlang=1.0.5
- bioconda::bioconductor-iranges=2.28.0
- bioconda::bioconductor-genomeinfodb=1.30.0
- bioconda::bioconductor-genomicranges=1.46.0
- bioconda::bioconductor-dnacopy=1.68.0
- bioconda::bioconductor-variantannotation=1.40.0
- bioconda::bioconductor-purecn=2.0.2
- bioconda::bcftools=1.13
- bioconda::tabix=0.2.6
- bioconda::bcftools=1.15
- bioconda::tabix=1.11
27 changes: 19 additions & 8 deletions BALSAMIC/snakemake_rules/variant_calling/cnvkit_paired.rule
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,8 @@ rule cnvkit_paired:
bamT = bam_dir + "tumor.merged" + ".bam",
baits_bed = config["panel"]["capture_kit"],
refflat = config["reference"]["refflat"],
snv_vcf = vcf_dir + "SNV.somatic." + config["analysis"]["case_id"] + ".tnhaplotyper.vcf.gz"
snv_vcf_tumor = vcf_dir + "SNV.germline.tumor.dnascope.vcf.gz",
snv_vcf_normal = vcf_dir + "SNV.germline.normal.dnascope.vcf.gz",
output:
cnr = cnv_dir + "tumor.merged" + ".cnr",
cns = cnv_dir + "tumor.merged" + ".cns",
Expand All @@ -33,7 +34,8 @@ rule cnvkit_paired:
min_mapq = params.common.min_mapq,
case_name = config["analysis"]["case_id"],
gender = config["analysis"]["gender"],
sample_id = "TUMOR",
tumor_sample_id = "TUMOR",
normal_sample_id = "NORMAL",
genome = config["reference"]["genome_version"]
message:
"Calling CNVs using CNVkit and calculating tumor purity/ploidy using PureCN for {params.case_name}"
Expand All @@ -43,6 +45,13 @@ mkdir -p {params.tmpdir};
export TMPDIR={params.tmpdir};
export PURECN='/opt/conda/lib/R/library/PureCN/extdata/PureCN.R'
# merge the tumor and normal VCF
bcftools merge \
-O z -o {params.tmpdir}/SNV.merged.vcf.gz \
-O z {input.snv_vcf_tumor} {input.snv_vcf_normal};
tabix -p vcf -f {params.tmpdir}/SNV.merged.vcf.gz;
# create target and anti-target bed files
cnvkit.py target {input.baits_bed} \
--annotate {input.refflat} \
Expand Down Expand Up @@ -117,29 +126,31 @@ purecn_status="true"
{{
Rscript $PURECN \
--out {params.purecn_dir} \
--sampleid {params.sample_id} \
--sampleid {params.tumor_sample_id} \
--tumor {output.cnr} \
--segfile {params.cnv_dir}/tumor.seg \
--vcf {input.snv_vcf} \
--vcf {params.tmpdir}/SNV.merged.vcf.gz \
--genome {params.genome} \
--funsegmentation Hclust \
--force --postoptimize \
--seed 124;
}} || purecn_status="false"
if $purecn_status; then
purity=$(awk -F\\, 'NR>1 {{print $2}}' {params.purecn_dir}/{params.sample_id}.csv)
ploidy=$(awk -F\\, 'NR>1 {{printf int($3)}}' {params.purecn_dir}/{params.sample_id}.csv);
purity=$(awk -F\\, 'NR>1 {{print $2}}' {params.purecn_dir}/{params.tumor_sample_id}.csv)
ploidy=$(awk -F\\, 'NR>1 {{printf int($3)}}' {params.purecn_dir}/{params.tumor_sample_id}.csv);
fi
# Call copy number variants from segmented log2 ratios
cnvkit.py call {params.cnv_dir}/tumor.initial.cns \
--vcf {input.snv_vcf} \
--vcf {params.tmpdir}/SNV.merged.vcf.gz \
--sample-sex {params.gender} \
--method clonal \
--purity $purity \
--ploidy $ploidy \
--sample-id {params.tumor_sample_id} \
--normal-id {params.normal_sample_id} \
--output {output.cns};
# Plot bin-level log2 coverages and segmentation calls
Expand Down Expand Up @@ -168,7 +179,7 @@ cnvkit.py breaks {output.cnr} {output.cns} \
cnvkit.py export vcf {output.cns} \
--cnr {output.cnr} \
-o {params.cnv_dir}/{params.tumor_name}.vcf \
--sample-id {params.sample_id} \
--sample-id {params.tumor_sample_id} \
--sample-sex {params.gender};
bgzip -f {params.cnv_dir}/{params.tumor_name}.vcf;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ rule cnvkit_single:
baits_bed = config["panel"]["capture_kit"],
fasta = config["reference"]["reference_genome"],
refflat = config["reference"]["refflat"],
snv_vcf = vcf_dir + "SNV.somatic." + config["analysis"]["case_id"] + ".tnhaplotyper.vcf.gz"
snv_vcf = vcf_dir + "SNV.germline.tumor.dnascope.vcf.gz",
output:
cns = cnv_dir + "tumor.merged" + ".cns",
cnr = cnv_dir + "tumor.merged" + ".cnr",
Expand Down
6 changes: 6 additions & 0 deletions CHANGELOG.rst
Original file line number Diff line number Diff line change
@@ -1,3 +1,9 @@
[10.0.3]

Fixed:

* Input VCF files for cnvkit rules, cnvkit command and container https://github.com/Clinical-Genomics/BALSAMIC/pull/995

[10.0.2]
---------

Expand Down

0 comments on commit 4ad54d9

Please sign in to comment.