Skip to content

Commit

Permalink
Merge pull request #4 from wassermanlab/SGP-361
Browse files Browse the repository at this point in the history
re-organized the subworkflow, no change to the actual pipeline
  • Loading branch information
scorreard authored Jun 2, 2022
2 parents 7bf7108 + 27635ac commit 5c21b74
Show file tree
Hide file tree
Showing 6 changed files with 38 additions and 79 deletions.
2 changes: 1 addition & 1 deletion modules/MEI_data_organization.nf
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
// Run a R script that organize the SNV variants information in the tables expected to be displayed in the IBVL interface

process MEI_data_organization {
tag "${SV_vcf}"
tag "${MEI_vcf.simpleName}"

publishDir "$params.outdir_pop/${assembly}/${run}/Oracle_table/genomic_ibvl_frequencies/", mode: 'copy', pattern: "genomic_ibvl_frequencies_*"
publishDir "$params.outdir_pop/${assembly}/${run}/Oracle_table/svs/", mode: 'copy', pattern: "svs_*"
Expand Down
2 changes: 1 addition & 1 deletion modules/list_vcfs_txt.nf
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ process list_vcfs_txt {
"""
} else if (var_type == "SV") {
"""
find $params.outdir_ind/${assembly}/${batch}/${run}/${var_type}/paragraph/ -name "*.vcf.gz" > ${var_type}_vcfs.txt
find $params.outdir_ind/${assembly}/${batch}/${run}/${var_type}/Sample/paragraph/ -name "*.vcf.gz" > ${var_type}_vcfs.txt
"""
} else {
"""
Expand Down
15 changes: 8 additions & 7 deletions nextflow.config
Original file line number Diff line number Diff line change
Expand Up @@ -110,13 +110,14 @@ singularity {
}

process {
// 4GB
withName: 'Vcftools_TsTv_by_qual|align_to_MT|MT_MergeVcfs|MarkDuplicates|MT_merge_samples|count_variants_vcftools' {
// 4GB, 2cpus
withName: 'align_to_MT|MT_MergeVcfs|MarkDuplicates|sample_QC|list_vcfs_txt|MEI_data_organization|split_tsv_by_chr' {
memory = 4.GB
}
cpus = 2
}

// 4GB, 8cpus
withName: 'Bcftools_stats|split_vcf_by_chr|SV_annotation|samtools_fixmate' {
withName: 'split_vcf_by_chr|SV_annotation|STR_data_organization' {
memory = 4.GB
cpus = 8
}
Expand All @@ -128,13 +129,13 @@ process {
}

// 8GB, 4cpus
withName: 'MT_data_organization|Hail_variant_MT_QC|MT_Step3_metadata_sample|MT_Step3_metadata|MT_haplocheck|MT_Step2_participant_data|MT_Step1_input_tsv|shift_back|Hail_variant_QC|SNV_data_organization' {
withName: 'MT_data_organization|Hail_variant_MT_QC|MT_Step3_metadata_sample|MT_Step3_metadata|MT_haplocheck|MT_Step2_participant_data|MT_Step1_input_tsv|shift_back|Hail_variant_QC|SNV_data_organization|merge_samples|merge_STR|SV_data_organization' {
memory = 8.GB
cpus = 4
}

// 12Gb, 8 cpus
withName: 'align_sort_output_bam|expansion_hunter|melt' {
withName: 'align_sort_output_bam|expansion_hunter|Hail_sample_QC|plink_sex_inference' {
memory = 12.GB
cpus = 8
}
Expand Down Expand Up @@ -192,7 +193,7 @@ process {

//GATK 4GB
// Solenne : Removed 'MarkDuplicates', 'MT_MergeVcfs' and 'MT_merge_samples' from the list as it also uses conda and was not working with both in the same process
withName: 'MT_call_variants|Extract_MT_Read|MT_Filter_Mutect_Calls|MT_FilterOut_sites|MT_LeftAlignAndTrimVariants|MT_Liftover|MT_Merge_stat_file|MT_SamtoFastq|gnomad_frequency_table|count_variants_gatk' {
withName: 'MT_call_variants|Extract_MT_Read|MT_Filter_Mutect_Calls|MT_FilterOut_sites|MT_LeftAlignAndTrimVariants|MT_Liftover|MT_Merge_stat_file|MT_SamtoFastq|gnomad_frequency_table' {
memory=4.GB
container = "file:///mnt/common/SILENT/Act3/singularity/gatk4-4.2.0.sif"
}
Expand Down
38 changes: 6 additions & 32 deletions subworkflow/MT.nf
Original file line number Diff line number Diff line change
Expand Up @@ -83,8 +83,11 @@ workflow MT {
bai
mosdepth
main:
// Need to be included in "data preparation" (Not in the MT sub-workflow)
bwa_index(ref_MT_fasta)
bwa_index_shifted(ref_MT_shifted_fasta)

// Sample specific (Do not need to be run for a previously processed sample)
Extract_MT_Read(bam, bai, Mitochondrial_chromosome)
MT_SamtoFastq(Extract_MT_Read.out)
align_to_MT(ref_MT_fasta, bwa_index.out, MT_SamtoFastq.out.fastq_MT)
Expand All @@ -94,9 +97,7 @@ workflow MT {
Picard_CollectWgsMetrics_MT(ref_MT_fasta, ref_MT_fasta_index, non_control_region_interval_list, align_to_MT.out.align_to_MT_bam, align_to_MT.out.align_to_MT_bai, assembly, batch, run)
Picard_CollectWgsMetrics_MT_shifted(ref_MT_shifted_fasta, ref_MT_shifted_fasta_index, control_region_shifted_reference_interval_list, align_to_MT_shifted.out.align_to_MT_bam, align_to_MT_shifted.out.align_to_MT_bai, assembly, batch, run)
shift_back(Picard_CollectWgsMetrics_MT_shifted.out, Picard_CollectWgsMetrics_MT.out.collect(), assembly, batch, run)

MT_Step1_input_tsv(shift_back.out.Sample_MT_Step1_input_tsv.collect(), assembly, batch, run)

MT_call_variants(ref_MT_fasta, ref_MT_fasta_index, ref_MT_fasta_dict, MarkDuplicates.out.bam, MarkDuplicates.out.bai, Mitochondrial_chromosome)
MT_call_variants_shifted(ref_MT_shifted_fasta, ref_MT_shifted_fasta_index, ref_MT_shifted_fasta_dict, MarkDuplicates_shifted.out.bam, MarkDuplicates_shifted.out.bai, Mitochondrial_chromosome)
MT_Liftover(MT_call_variants_shifted.out.Mutect2_vcf, MT_call_variants_shifted.out.Mutect2_vcf_index, ref_MT_fasta, ref_MT_fasta_dict, bwa_index.out, ShiftBack_chain, assembly, batch, run)
Expand All @@ -105,40 +106,13 @@ workflow MT {
MT_Filter_Mutect_Calls(ref_MT_fasta, ref_MT_fasta_index, ref_MT_fasta_dict, MT_MergeVcfs.out.vcf, MT_MergeVcfs.out.index, MT_Merge_stat_file.out.collect())
MT_LeftAlignAndTrimVariants(ref_MT_fasta, ref_MT_fasta_index, ref_MT_fasta_dict, MT_Filter_Mutect_Calls.out.vcf, MT_Filter_Mutect_Calls.out.index)
MT_FilterOut_sites(ref_MT_fasta, ref_MT_fasta_index, ref_MT_fasta_dict, MT_LeftAlignAndTrimVariants.out.vcf, MT_LeftAlignAndTrimVariants.out.index, blacklist_sites_hg38_MT_file, blacklist_sites_hg38_MT_index_file, assembly, batch, run)
MT_haplocheck(MT_FilterOut_sites.out.vcf, assembly, batch, run)
MT_Step3_metadata_sample(mosdepth, MT_haplocheck.out.file, assembly, batch, run)

// Aggregated steps (Need to be run everytime a new sample is added to the cohort)
MT_Step2_participant_data(MT_FilterOut_sites.out.sample_MT_Step2_participant_data.collect(), MT_FilterOut_sites.out.Sample_list.collect(), assembly, batch, run)
MT_haplocheck(MT_FilterOut_sites.out.vcf, assembly, batch, run)
MT_Step3_metadata_sample(mosdepth, MT_haplocheck.out.file, assembly, batch, run)
MT_Step3_metadata(MT_Step3_metadata_sample.out.collect(), assembly, batch, run)
Hail_variant_MT_QC(MT_Step1_input_tsv.out, MT_Step2_participant_data.out.MT_Step2_participant_data_tsv, MT_Step2_participant_data.out.participants_to_subset_txt, MT_Step3_metadata.out, assembly, batch, run)

annotation_table_merged(Hail_variant_MT_QC.out.vcf, Hail_variant_MT_QC.out.vcf_index, vep_cache_merged, vep_cache_merged_version, assembly, run, assembly_MT, CADD_1_6_whole_genome_SNVs, CADD_1_6_whole_genome_SNVs_index, CADD_1_6_InDels, CADD_1_6_InDels_index, spliceai_snv, spliceai_snv_index, spliceai_indel, spliceai_indel_index, chrM, MT)
MT_data_organization(gnomad_MT_frequ, Hail_variant_MT_QC.out.Hail_reduced_annotations, annotation_table_merged.out.annot_table_merged_R, assembly, run, severity_table)
}



// Modules that may be reintegrated if not included in hail

//include { Bcftools_stats } from "./../modules/Bcftools_stats"
//include { Vcftools_TsTv_by_qual } from "./../modules/Vcftools_TsTv_by_qual"
//include { multiqc_pop } from "./../modules/multiqc_pop"


// QC1 = Bcftools_stats(merge_samples.out.vcf, merge_samples.out.index, assembly, run)
// QC2 = Vcftools_TsTv_by_qual(merge_samples.out.vcf, merge_samples.out.index, assembly, run)
// annotation_table_merged(merge_samples.out.vcf, merge_samples.out.index, vep_cache_merged, vep_cache_merged_version, assembly, run, assembly_MT, CADD_1_6_whole_genome_SNVs, CADD_1_6_whole_genome_SNVs_index, CADD_1_6_InDels, CADD_1_6_InDels_index, chrM, MT)
// MT_data_organization(gnomad_MT_frequ, merge_samples.out.vcf, annotation_table_merged.out.annot_table_merged_R, assembly, run)
// quality_metrics = QC1.concat(QC2, annotation_table_merged.out.vep_merged_stat).collect()
// quality_metrics = QC1.concat(QC2).collect()
// multiqc_pop(quality_metrics, assembly, run, Mitochondrial_chromosome)


// list_vcfs_txt(MT_FilterOut_sites.out.vcf.collect(), assembly, batch, run, MT)
// merge_samples(list_vcfs_txt.out, assembly, batch, run, MT)

// emit :
// MT_vcf = merge_samples.out.vcf



36 changes: 3 additions & 33 deletions subworkflow/SNV.nf
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,10 @@ workflow SNV {
mosdepth

main :
// Sample specific (Do not need to be run for a previously processed sample)
deepvariant_call(reference, reference_index, bam, bai, assembly, batch, run)

// Aggregated steps (Need to be run everytime a new sample is added to the cohort)
list_vcfs_txt(deepvariant_call.out.deepvariant_gvcf.collect(), assembly, batch, run, SNV)
GLnexus_cli(list_vcfs_txt.out, run)
bcf_to_vcf(GLnexus_cli.out, assembly, batch, run)
Expand All @@ -50,36 +53,3 @@ workflow SNV {
sample_sex_file=sample_QC.out.sample_QC_file
SNV_vcf = bcf_to_vcf.out.vcf
}


// Steps removed as they are now integrated in Hail
//include { gnomad_frequency_table } from "./../modules/gnomad_frequency_table"
//include { count_variants_vcftools } from "./../modules/count_variants_vcftools"
//include { count_variants_gatk } from "./../modules/count_variants_gatk"
//include { count_variants_gatk_2 } from "./../modules/count_variants_gatk_2"
//include { count_bcftools_stats } from "./../modules/count_bcftools_stats"
//include { split_vcf_by_chr } from "./../modules/split_vcf_by_chr"
//include { Bcftools_stats } from "./../modules/Bcftools_stats"
//include { Vcftools_TsTv_by_qual } from "./../modules/Vcftools_TsTv_by_qual"

//include { SNV_data_organization } from "./../modules/SNV_data_organization"
//include { multiqc_pop } from "./../modules/multiqc_pop"

// gnomad_SNV_vcf = file (params.gnomad_SNV_vcf)
// gnomad_SNV_index = file (params.gnomad_SNV_index)

// count_variants_gatk(deepvariant_call.out.deepvariant_vcf, deepvariant_call.out.deepvariant_vcf_index, assembly, batch, run)
// count_variants_gatk_2(count_variants_gatk.out.collect(), assembly, batch, run)
// count_variants_vcftools(bcf_to_vcf.out.vcf, bcf_to_vcf.out.index, assembly, batch, run)
// count_bcftools_stats(bcf_to_vcf.out.vcf, bcf_to_vcf.out.index, assembly, batch, run)
// sample_QC(plink_sex_inference.out, assembly, batch, run, count_variants_vcftools.out, count_variants_gatk_2.out, count_bcftools_stats.out, mosdepth)
// QC1 = Bcftools_stats(bcf_to_vcf.out.vcf, bcf_to_vcf.out.index, assembly, run)
// QC2 = Vcftools_TsTv_by_qual(bcf_to_vcf.out.vcf, bcf_to_vcf.out.index, assembly, run)
// quality_metrics = QC1.concat(QC2, annotation_table_merged.out.vep_merged_stat).collect()
// multiqc_pop(quality_metrics, assembly, run, SNV)

// Step reoved and hopefully to add to Hail
// gnomad_frequency_table(gnomad_SNV_vcf, gnomad_SNV_index, chr)
// split_vcf_by_chr(bcf_to_vcf.out.vcf, assembly, batch, run, chr)
// SNV_data_organization(gnomad_frequency_table.out.collect(), split_vcf_by_chr.out.vcf_onechr, annotation_table_merged.out.annot_table_merged_R.collect(), assembly, run, sample_QC.out.sample_QC_file)

24 changes: 19 additions & 5 deletions subworkflow/SV.nf
Original file line number Diff line number Diff line change
Expand Up @@ -67,29 +67,43 @@ workflow SV {
sample_sex_file

main :
//Structural Varaints (SV)
// Sample specific (Do not need to be run for a previously processed sample)
sm = SV_smoove(bam, bai, reference, reference_index, assembly, batch, run)
mr = SV_manta(bam, bai, reference, reference_index, cr_bed, cr_bed_index, assembly, batch, run)
sv_groups = mr.concat(sm) | groupTuple(by: 2)
svs = SV_concat_by_sample(sv_groups, assembly, batch, run) | collect
sv_merged = SV_jasmine(svs, reference, reference_index, assembly, batch, run)
genotyped = SV_paragraph_duphold(sv_merged, bam, bai, reference, reference_index, assembly, batch, run)

// Aggregated steps (Need to be run everytime a new sample is added to the cohort)
SV_vcfs_txt(SV_paragraph_duphold.out.vcf.collect(), assembly, batch, run, SV)
SV_merge_samples(SV_vcfs_txt.out, assembly, batch, run, SV)
SV_split_vcf_by_chr(SV_merge_samples.out.vcf, assembly, batch, run, chr, SV)
SV_annotation(SV_merge_samples.out.vcf, SV_merge_samples.out.index, vep_cache_merged, vep_cache_merged_version, assembly, run, assembly, CADD_1_6_whole_genome_SNVs, CADD_1_6_whole_genome_SNVs_index, CADD_1_6_InDels, CADD_1_6_InDels_index, spliceai_snv, spliceai_snv_index, spliceai_indel, spliceai_indel_index, chr, SV)
SV_data_organization(SV_split_vcf_by_chr.out.vcf_onechr, SV_annotation.out.annot_table_merged_R.collect(), assembly, run, SV, sample_sex_file)


//Short Tandem Repeats (STR)
// Sample specific (Do not need to be run for a previously processed sample)
expansion_hunter(bam, bai, reference, reference_index, variant_catalog, assembly, batch, run)

// Aggregated steps (Need to be run everytime a new sample is added to the cohort)
STR_vcfs_txt(expansion_hunter.out.vcf.collect(), assembly, batch, run, STR)
STR_merge_samples(STR_vcfs_txt.out, assembly, batch, run, STR)
STR_data_organization(STR_merge_samples.out.vcf, variant_catalog, assembly, run, STR)



// Mobile Element Insertions (MEIs)
// Sample specific (Do not need to be run for a previously processed sample)
samtools_fixmate(bam, bai, assembly, batch, run)
melt(samtools_fixmate.out.samples_fixmate_bam, samtools_fixmate.out.samples_fixmate_bam_index, reference, reference_index, transposon_file, genes_file, assembly, batch, run)

// Aggregated steps (Need to be run everytime a new sample is added to the cohort)
MEI_vcfs_txt(melt.out.vcf.collect(), assembly, batch, run, MEI)
MEI_merge_samples(MEI_vcfs_txt.out, assembly, batch, run, MEI)

MEI_split_vcf_by_chr(MEI_merge_samples.out.vcf, assembly, batch, run, chr, MEI)
MEI_annotation(MEI_merge_samples.out.vcf, MEI_merge_samples.out.index, vep_cache_merged, vep_cache_merged_version, assembly, run, assembly, CADD_1_6_whole_genome_SNVs, CADD_1_6_whole_genome_SNVs_index, CADD_1_6_InDels, CADD_1_6_InDels_index, spliceai_snv, spliceai_snv_index, spliceai_indel, spliceai_indel_index, chr, MEI)
MEI_data_organization(MEI_split_vcf_by_chr.out.vcf_onechr, MEI_annotation.out.annot_table_merged_R.collect(), assembly, run, MEI, sample_sex_file)
STR_data_organization(STR_merge_samples.out.vcf, variant_catalog, assembly, run, STR)
SV_split_vcf_by_chr(SV_merge_samples.out.vcf, assembly, batch, run, chr, SV)
SV_annotation(SV_merge_samples.out.vcf, SV_merge_samples.out.index, vep_cache_merged, vep_cache_merged_version, assembly, run, assembly, CADD_1_6_whole_genome_SNVs, CADD_1_6_whole_genome_SNVs_index, CADD_1_6_InDels, CADD_1_6_InDels_index, spliceai_snv, spliceai_snv_index, spliceai_indel, spliceai_indel_index, chr, SV)
SV_data_organization(SV_split_vcf_by_chr.out.vcf_onechr, SV_annotation.out.annot_table_merged_R.collect(), assembly, run, SV, sample_sex_file)
}

0 comments on commit 5c21b74

Please sign in to comment.