Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: restrict calls to reference contigs #101

Merged
merged 5 commits into from
Sep 12, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 5 additions & 5 deletions workflow/envs/tools.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,8 @@ channels:
- bioconda
- nodefaults
dependencies:
- bcftools =1.14
- samtools =1.14
- curl =7
- bedtools =2.30
- ucsc-liftover =377
- bcftools =1.20
- samtools =1.20
- curl =8
- bedtools =2.31
- ucsc-liftover =469
261 changes: 0 additions & 261 deletions workflow/resources/rename-contigs/grch38_ucsc2ensembl.txt
Original file line number Diff line number Diff line change
@@ -1,44 +1,9 @@
chr1 1
chr10 10
chr10_GL383545v1_alt
chr10_GL383546v1_alt
chr10_KI270824v1_alt
chr10_KI270825v1_alt
chr11 11
chr11_GL383547v1_alt
chr11_JH159136v1_alt
chr11_JH159137v1_alt
chr11_KI270721v1_random KI270721.1
chr11_KI270826v1_alt
chr11_KI270827v1_alt
chr11_KI270829v1_alt
chr11_KI270830v1_alt
chr11_KI270831v1_alt
chr11_KI270832v1_alt
chr11_KI270902v1_alt
chr11_KI270903v1_alt
chr11_KI270927v1_alt
chr12 12
chr12_GL383549v1_alt
chr12_GL383550v2_alt
chr12_GL383551v1_alt
chr12_GL383552v1_alt
chr12_GL383553v2_alt
chr12_GL877875v1_alt
chr12_GL877876v1_alt
chr12_KI270833v1_alt
chr12_KI270834v1_alt
chr12_KI270835v1_alt
chr12_KI270836v1_alt
chr12_KI270837v1_alt
chr12_KI270904v1_alt
chr13 13
chr13_KI270838v1_alt
chr13_KI270839v1_alt
chr13_KI270840v1_alt
chr13_KI270841v1_alt
chr13_KI270842v1_alt
chr13_KI270843v1_alt
chr14 14
chr14_GL000009v2_random GL000009.2
chr14_GL000194v1_random GL000194.1
Expand All @@ -48,109 +13,16 @@ chr14_KI270723v1_random KI270723.1
chr14_KI270724v1_random KI270724.1
chr14_KI270725v1_random KI270725.1
chr14_KI270726v1_random KI270726.1
chr14_KI270844v1_alt
chr14_KI270845v1_alt
chr14_KI270846v1_alt
chr14_KI270847v1_alt
chr15 15
chr15_GL383554v1_alt
chr15_GL383555v2_alt
chr15_KI270727v1_random KI270727.1
chr15_KI270848v1_alt
chr15_KI270849v1_alt
chr15_KI270850v1_alt
chr15_KI270851v1_alt
chr15_KI270852v1_alt
chr15_KI270905v1_alt
chr15_KI270906v1_alt
chr16 16
chr16_GL383556v1_alt
chr16_GL383557v1_alt
chr16_KI270728v1_random KI270728.1
chr16_KI270853v1_alt
chr16_KI270854v1_alt
chr16_KI270855v1_alt
chr16_KI270856v1_alt
chr17 17
chr17_GL000205v2_random GL000205.2
chr17_GL000258v2_alt
chr17_GL383563v3_alt
chr17_GL383564v2_alt
chr17_GL383565v1_alt
chr17_GL383566v1_alt
chr17_JH159146v1_alt
chr17_JH159147v1_alt
chr17_JH159148v1_alt
chr17_KI270729v1_random KI270729.1
chr17_KI270730v1_random KI270730.1
chr17_KI270857v1_alt
chr17_KI270858v1_alt
chr17_KI270859v1_alt
chr17_KI270860v1_alt
chr17_KI270861v1_alt
chr17_KI270862v1_alt
chr17_KI270907v1_alt
chr17_KI270908v1_alt
chr17_KI270909v1_alt
chr17_KI270910v1_alt
chr18 18
chr18_GL383567v1_alt
chr18_GL383568v1_alt
chr18_GL383569v1_alt
chr18_GL383570v1_alt
chr18_GL383571v1_alt
chr18_GL383572v1_alt
chr18_KI270863v1_alt
chr18_KI270864v1_alt
chr18_KI270911v1_alt
chr18_KI270912v1_alt
chr19 19
chr19_GL000209v2_alt
chr19_GL383573v1_alt
chr19_GL383574v1_alt
chr19_GL383575v2_alt
chr19_GL383576v1_alt
chr19_GL949746v1_alt
chr19_GL949747v2_alt
chr19_GL949748v2_alt
chr19_GL949749v2_alt
chr19_GL949750v2_alt
chr19_GL949751v2_alt
chr19_GL949752v1_alt
chr19_GL949753v2_alt
chr19_KI270865v1_alt
chr19_KI270866v1_alt
chr19_KI270867v1_alt
chr19_KI270868v1_alt
chr19_KI270882v1_alt
chr19_KI270883v1_alt
chr19_KI270884v1_alt
chr19_KI270885v1_alt
chr19_KI270886v1_alt
chr19_KI270887v1_alt
chr19_KI270888v1_alt
chr19_KI270889v1_alt
chr19_KI270890v1_alt
chr19_KI270891v1_alt
chr19_KI270914v1_alt
chr19_KI270915v1_alt
chr19_KI270916v1_alt
chr19_KI270917v1_alt
chr19_KI270918v1_alt
chr19_KI270919v1_alt
chr19_KI270920v1_alt
chr19_KI270921v1_alt
chr19_KI270922v1_alt
chr19_KI270923v1_alt
chr19_KI270929v1_alt
chr19_KI270930v1_alt
chr19_KI270931v1_alt
chr19_KI270932v1_alt
chr19_KI270933v1_alt
chr19_KI270938v1_alt
chr1_GL383518v1_alt
chr1_GL383519v1_alt
chr1_GL383520v2_alt
chr1_KI270706v1_random KI270706.1
chr1_KI270707v1_random KI270707.1
chr1_KI270708v1_random KI270708.1
Expand All @@ -160,33 +32,10 @@ chr1_KI270711v1_random KI270711.1
chr1_KI270712v1_random KI270712.1
chr1_KI270713v1_random KI270713.1
chr1_KI270714v1_random KI270714.1
chr1_KI270759v1_alt
chr1_KI270760v1_alt
chr1_KI270761v1_alt
chr1_KI270762v1_alt
chr1_KI270763v1_alt
chr1_KI270764v1_alt
chr1_KI270765v1_alt
chr1_KI270766v1_alt
chr1_KI270892v1_alt
chr2 2
chr20 20
chr20_GL383577v2_alt
chr20_KI270869v1_alt
chr20_KI270870v1_alt
chr20_KI270871v1_alt
chr21 21
chr21_GL383578v2_alt
chr21_GL383579v2_alt
chr21_GL383580v2_alt
chr21_GL383581v2_alt
chr21_KI270872v1_alt
chr21_KI270873v1_alt
chr21_KI270874v1_alt
chr22 22
chr22_GL383582v2_alt
chr22_GL383583v2_alt
chr22_KB663609v1_alt
chr22_KI270731v1_random KI270731.1
chr22_KI270732v1_random KI270732.1
chr22_KI270733v1_random KI270733.1
Expand All @@ -196,129 +45,22 @@ chr22_KI270736v1_random KI270736.1
chr22_KI270737v1_random KI270737.1
chr22_KI270738v1_random KI270738.1
chr22_KI270739v1_random KI270739.1
chr22_KI270875v1_alt
chr22_KI270876v1_alt
chr22_KI270877v1_alt
chr22_KI270878v1_alt
chr22_KI270879v1_alt
chr22_KI270928v1_alt
chr2_GL383521v1_alt
chr2_GL383522v1_alt
chr2_GL582966v2_alt
chr2_KI270715v1_random KI270715.1
chr2_KI270716v1_random KI270716.1
chr2_KI270767v1_alt
chr2_KI270768v1_alt
chr2_KI270769v1_alt
chr2_KI270770v1_alt
chr2_KI270771v1_alt
chr2_KI270772v1_alt
chr2_KI270773v1_alt
chr2_KI270774v1_alt
chr2_KI270775v1_alt
chr2_KI270776v1_alt
chr2_KI270893v1_alt
chr2_KI270894v1_alt
chr3 3
chr3_GL000221v1_random GL000221.1
chr3_GL383526v1_alt
chr3_JH636055v2_alt
chr3_KI270777v1_alt
chr3_KI270778v1_alt
chr3_KI270779v1_alt
chr3_KI270780v1_alt
chr3_KI270781v1_alt
chr3_KI270782v1_alt
chr3_KI270783v1_alt
chr3_KI270784v1_alt
chr3_KI270895v1_alt
chr3_KI270924v1_alt
chr3_KI270934v1_alt
chr3_KI270935v1_alt
chr3_KI270936v1_alt
chr3_KI270937v1_alt
chr4 4
chr4_GL000008v2_random GL000008.2
chr4_GL000257v2_alt
chr4_GL383527v1_alt
chr4_GL383528v1_alt
chr4_KI270785v1_alt
chr4_KI270786v1_alt
chr4_KI270787v1_alt
chr4_KI270788v1_alt
chr4_KI270789v1_alt
chr4_KI270790v1_alt
chr4_KI270896v1_alt
chr4_KI270925v1_alt
chr5 5
chr5_GL000208v1_random GL000208.1
chr5_GL339449v2_alt
chr5_GL383530v1_alt
chr5_GL383531v1_alt
chr5_GL383532v1_alt
chr5_GL949742v1_alt
chr5_KI270791v1_alt
chr5_KI270792v1_alt
chr5_KI270793v1_alt
chr5_KI270794v1_alt
chr5_KI270795v1_alt
chr5_KI270796v1_alt
chr5_KI270897v1_alt
chr5_KI270898v1_alt
chr6 6
chr6_GL000250v2_alt
chr6_GL000251v2_alt
chr6_GL000252v2_alt
chr6_GL000253v2_alt
chr6_GL000254v2_alt
chr6_GL000255v2_alt
chr6_GL000256v2_alt
chr6_GL383533v1_alt
chr6_KB021644v2_alt
chr6_KI270758v1_alt
chr6_KI270797v1_alt
chr6_KI270798v1_alt
chr6_KI270799v1_alt
chr6_KI270800v1_alt
chr6_KI270801v1_alt
chr6_KI270802v1_alt
chr7 7
chr7_GL383534v2_alt
chr7_KI270803v1_alt
chr7_KI270804v1_alt
chr7_KI270805v1_alt
chr7_KI270806v1_alt
chr7_KI270807v1_alt
chr7_KI270808v1_alt
chr7_KI270809v1_alt
chr7_KI270899v1_alt
chr8 8
chr8_KI270810v1_alt
chr8_KI270811v1_alt
chr8_KI270812v1_alt
chr8_KI270813v1_alt
chr8_KI270814v1_alt
chr8_KI270815v1_alt
chr8_KI270816v1_alt
chr8_KI270817v1_alt
chr8_KI270818v1_alt
chr8_KI270819v1_alt
chr8_KI270820v1_alt
chr8_KI270821v1_alt
chr8_KI270822v1_alt
chr8_KI270900v1_alt
chr8_KI270901v1_alt
chr8_KI270926v1_alt
chr9 9
chr9_GL383539v1_alt
chr9_GL383540v1_alt
chr9_GL383541v1_alt
chr9_GL383542v1_alt
chr9_KI270717v1_random KI270717.1
chr9_KI270718v1_random KI270718.1
chr9_KI270719v1_random KI270719.1
chr9_KI270720v1_random KI270720.1
chr9_KI270823v1_alt
chrM MT
chrUn_GL000195v1 GL000195.1
chrUn_GL000213v1 GL000213.1
Expand Down Expand Up @@ -448,8 +190,5 @@ chrUn_KI270755v1 KI270755.1
chrUn_KI270756v1 KI270756.1
chrUn_KI270757v1 KI270757.1
chrX X
chrX_KI270880v1_alt
chrX_KI270881v1_alt
chrX_KI270913v1_alt
chrY Y
chrY_KI270740v1_random KI270740.1
18 changes: 17 additions & 1 deletion workflow/rules/eval.smk
Original file line number Diff line number Diff line change
Expand Up @@ -106,12 +106,28 @@ rule intersect_calls_with_target_regions:
"<(bcftools view {input.bcf}) -wa -f 1.0 -header > {output}) 2> {log}"


rule restrict_to_reference_contigs:
input:
calls="results/filtered-variants/{callset}.bcf",
calls_index="results/filtered-variants/{callset}.bcf.csi",
ref_index="resources/reference/genome.fasta.fai",
output:
"results/filtered-variants/{callset}_restricted.bcf",
log:
"logs/restrict-to-reference-contigs/{callset}.log",
conda:
"../envs/tools.yaml"
shell:
"(bcftools view --regions $(cut -f1 {input.ref_index} | tr '\\n' ',') {input.calls} |"
" bcftools reheader -f {input.ref_index} > {output}) 2> {log}"


rule normalize_calls:
input:
calls=branch(
intersect_calls,
then="results/normalized-variants/{callset}_intersected.vcf",
otherwise="results/filtered-variants/{callset}.bcf",
otherwise="results/filtered-variants/{callset}_restricted.bcf",
),
ref="resources/reference/genome.fasta",
ref_index="resources/reference/genome.fasta.fai",
Expand Down
Loading