Skip to content

Commit

Permalink
Merge pull request #36 from ghga-de/35-raw-vcf-is-not-sorted
Browse files Browse the repository at this point in the history
35 raw vcf is not sorted
  • Loading branch information
kubranarci authored Jun 24, 2024
2 parents a4e5bad + d456009 commit 1d9554b
Show file tree
Hide file tree
Showing 20 changed files with 59 additions and 549 deletions.
5 changes: 1 addition & 4 deletions .github/workflows/main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,4 @@ jobs:
if [[ $NXF_VER = '' ]]; then sleep 1200; fi
- name: BASIC Run the basic pipeline with docker
run: |
nextflow run ${GITHUB_WORKSPACE} -profile test,docker
- name: BASIC Run the basic pipeline when --runcontig set to "ALT_HLA" (contigs does not exist in bam)
run: |
nextflow run ${GITHUB_WORKSPACE} -profile test,docker --runcontigs "ALT_HLA"
nextflow run ${GITHUB_WORKSPACE} -profile test,docker
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ __pycache__
output/
results/
result/
results2/
results37/
test.xml
test_output/
Expand All @@ -19,3 +20,4 @@ work/
testdata_hg37/
.github/CODEOWNERS-tmp
bin/vcfparser.pyc
singularity/
3 changes: 1 addition & 2 deletions assets/samplesheet_hg38_WGS.csv
Original file line number Diff line number Diff line change
@@ -1,3 +1,2 @@
sample,tumor,tumor_index,control,control_index
SEQC2_LL1,/omics/odcf/analysis/OE0526_projects/public_data_analyses/seqc2/sequencing/whole_genome_sequencing/results_per_pid/SEQC2_LL1/alignment/tumor01_SEQC2_LL1_merged.mdup.bam,/omics/odcf/analysis/OE0526_projects/public_data_analyses/seqc2/sequencing/whole_genome_sequencing/results_per_pid/SEQC2_LL1/alignment/tumor01_SEQC2_LL1_merged.mdup.bam.bai,/omics/odcf/analysis/OE0526_projects/public_data_analyses/seqc2/sequencing/whole_genome_sequencing/results_per_pid/SEQC2_LL1/alignment/control01_SEQC2_LL1_merged.mdup.bam,/omics/odcf/analysis/OE0526_projects/public_data_analyses/seqc2/sequencing/whole_genome_sequencing/results_per_pid/SEQC2_LL1/alignment/control01_SEQC2_LL1_merged.mdup.bam.bai
SEQC2_LL2,/omics/odcf/project/public_data/seqc2/sequencing/whole_genome_sequencing/view-by-pid/SEQC2_IL2/tumor01/paired/merged-alignment/tumor01_SEQC2_IL2_merged.mdup.bam,/omics/odcf/project/public_data/seqc2/sequencing/whole_genome_sequencing/view-by-pid/SEQC2_IL2/tumor01/paired/merged-alignment/tumor01_SEQC2_IL2_merged.mdup.bam.bai,/omics/odcf/project/public_data/seqc2/sequencing/whole_genome_sequencing/view-by-pid/SEQC2_IL2/control01/paired/merged-alignment/control01_SEQC2_IL2_merged.mdup.bam,/omics/odcf/project/public_data/seqc2/sequencing/whole_genome_sequencing/view-by-pid/SEQC2_IL2/control01/paired/merged-alignment/control01_SEQC2_IL2_merged.mdup.bam.bai
SEQC2_LL1,/omics/odcf/analysis/OE0526_projects/public_data_analyses/seqc2/sequencing/whole_genome_sequencing/results_per_pid/SEQC2_LL1/alignment/tumor01_SEQC2_LL1_merged.mdup.bam,/omics/odcf/analysis/OE0526_projects/public_data_analyses/seqc2/sequencing/whole_genome_sequencing/results_per_pid/SEQC2_LL1/alignment/tumor01_SEQC2_LL1_merged.mdup.bam.bai,/omics/odcf/analysis/OE0526_projects/public_data_analyses/seqc2/sequencing/whole_genome_sequencing/results_per_pid/SEQC2_LL1/alignment/control01_SEQC2_LL1_merged.mdup.bam,/omics/odcf/analysis/OE0526_projects/public_data_analyses/seqc2/sequencing/whole_genome_sequencing/results_per_pid/SEQC2_LL1/alignment/control01_SEQC2_LL1_merged.mdup.bam.bai
10 changes: 2 additions & 8 deletions conf/modules.config
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@ process {
mode: params.publish_dir_mode
]
}
withName: FILE_CONCATENATOR {
withName: SORT_NONSTANDARD_VCF {
publishDir = [
path: {"${params.outdir}/${meta.id}"},
pattern: "*{.vcf.gz,vcf.gz.tbi}",
Expand Down Expand Up @@ -175,12 +175,6 @@ process {
]
]
}
withName: 'CONVERT_TO_VCF' {
publishDir = [
path: { "${params.outdir}/test" },
enabled: false
]
}
withName: 'BCFTOOLS_SORT' {
publishDir = [
path: { "${params.outdir}/${meta.id}/standard_vcf/" },
Expand All @@ -199,7 +193,7 @@ process {
// Don't publish results for these processes
//
process {
withName: 'GREP_SAMPLENAME|ANNOVAR|MUTATION_DISTANCE|CONTEXT_FREQUENCIES|ANNOTATE_VCF|FILTER_BY_CRIT|SNV_RELIABILITY_PIPE|DBSNP_COUNTER|FLAG_BIAS|CONFIDENCE_ANNOTATION' {
withName: 'GREP_SAMPLENAME|ANNOVAR|MUTATION_DISTANCE|CONTEXT_FREQUENCIES|ANNOTATE_VCF|FILTER_BY_CRIT|SNV_RELIABILITY_PIPE|DBSNP_COUNTER|FLAG_BIAS|CONFIDENCE_ANNOTATION|CONVERT_TO_VCF|FILE_CONCATENATOR' {
publishDir = [
path: { "${params.outdir}/test" },
enabled: false
Expand Down
4 changes: 2 additions & 2 deletions modules/local/file_concatenator.nf
Original file line number Diff line number Diff line change
Expand Up @@ -21,9 +21,9 @@ process FILE_CONCATENATOR {
def prefix = task.ext.prefix ?: "${meta.id}"

"""
headeredFileConcatenator.pl $vcfs > snvs_${prefix}_raw.vcf
headeredFileConcatenator.pl $vcfs > snvs_${prefix}.vcf
bgzip snvs_${prefix}_raw.vcf && tabix -p vcf snvs_${prefix}_raw.vcf.gz
bgzip snvs_${prefix}.vcf && tabix -p vcf snvs_${prefix}.vcf.gz
cat <<-END_VERSIONS > versions.yml
"${task.process}":
Expand Down
7 changes: 2 additions & 5 deletions modules/local/seq_context_annotator.nf
Original file line number Diff line number Diff line change
Expand Up @@ -26,11 +26,8 @@ process SEQ_CONTEXT_ANNOTATOR {
"""
seqContext_annotator.pl fastaFromBed $vcf $fasta 10 | \\
rawSnvFilter.py --outf=${prefix}.${intervals}.bias.vcf.temp $args
(head -n 5000 ${prefix}.${intervals}.bias.vcf.temp | \\
grep "#" ; cat ${prefix}.${intervals}.bias.vcf.temp | \\
grep -v "#" | \\
sort -T . -k1,1V -n -k2,2n ) > ${prefix}.${intervals}.bias.vcf
awk '/^#/ {print; next} {print | "sort -T . -k1,1V -k2,2n"}' ${prefix}.${intervals}.bias.vcf.temp > ${prefix}.${intervals}.bias.vcf
bcftools stats ${prefix}.${intervals}.bias.vcf > ${prefix}.${intervals}.bias.bcftools_stats.txt
Expand Down
33 changes: 33 additions & 0 deletions modules/local/sort_nonstandard_vcf.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
process SORT_NONSTANDARD_VCF {
tag "$meta.id"
label 'process_single'

conda (params.enable_conda ? "" : null)
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'docker://kubran/samtools:v1.9':'kubran/samtools:v1.9' }"

input:
tuple val(meta), path(vcf_gz), path(index)

output:
tuple val(meta), path("*_raw.vcf.gz"),path("*_raw.vcf.gz.tbi") , emit: output
path "versions.yml" , emit: versions

script:
def args = task.ext.args ?: ''
def prefix = task.ext.prefix ?: "${meta.id}"

"""
zcat $vcf_gz | \
awk 'BEGIN {header=1} /^#/ {print; next} {header=0; print | "sort -T . -k1,1V -k2,2n"}' | \
bgzip > snvs_${prefix}_raw.vcf.gz
tabix -p vcf snvs_${prefix}_raw.vcf.gz
cat <<-END_VERSIONS > versions.yml
"${task.process}":
tabix: \$(echo \$(tabix -h 2>&1) | sed 's/^.*Version: //; s/ .*\$//')
END_VERSIONS
"""

}
3 changes: 1 addition & 2 deletions modules/nf-core/modules/bcftools/mpileup/main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,7 @@ process BCFTOOLS_MPILEUP {

conda (params.enable_conda ? "bioconda::bcftools=1.9" : null)
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/bcftools:1.9--h47928c2_2':
'quay.io/biocontainers/bcftools:1.9--h47928c2_2' }"
'docker://kubran/bcftools:1.9':'kubran/bcftools:1.9' }"

input:
tuple val(meta), path(tumor), path(tumor_bai), path(control), path(control_bai), val(tumorname), val(controlname), val(intervals), path(interval_file)
Expand Down
7 changes: 0 additions & 7 deletions modules/nf-core/modules/bcftools/reheader/environment.yml

This file was deleted.

74 changes: 0 additions & 74 deletions modules/nf-core/modules/bcftools/reheader/main.nf

This file was deleted.

63 changes: 0 additions & 63 deletions modules/nf-core/modules/bcftools/reheader/meta.yml

This file was deleted.

4 changes: 0 additions & 4 deletions modules/nf-core/modules/bcftools/reheader/tests/bcf.config

This file was deleted.

Loading

0 comments on commit 1d9554b

Please sign in to comment.