From d78857a3c19a1dfbae9889b8ad18a0b3c75504b6 Mon Sep 17 00:00:00 2001
From: scorreard <solenne.correard@gmail.com>
Date: Fri, 17 Jun 2022 13:03:03 -0700
Subject: [PATCH] SGP-373 - Push in order to run 80 samples (fastq -->
 annotated vcf)

---
 .gitignore                                    |   1 +
 Nextflow_SNV_MT_211220.nf                     |  16 +-
 modules/GLnexus.nf                            |   1 +
 modules/Hail_sample_QC.nf                     |   1 +
 modules/Hail_sample_QC.py                     | 223 +++++++-----
 modules/Hail_variant_QC.nf                    |  14 +-
 modules/Hail_variant_QC.py                    | 315 +++++------------
 modules/MEI_data_organization.R               |  16 +-
 modules/MEI_data_organization.nf              |   2 +-
 modules/MT_Extract_MT_Read.nf                 |  25 +-
 modules/MT_FilterOut_sites.nf                 |  33 +-
 modules/MT_Filter_Mutect_Calls.nf             |  23 +-
 modules/MT_LeftAlignAndTrimVariants.nf        |  23 +-
 modules/MT_Liftover.nf                        |  21 +-
 modules/MT_MarkDuplicates.nf                  |  33 +-
 modules/MT_MergeVcfs.nf                       |  31 +-
 modules/MT_Merge_stat_file.nf                 |  17 +-
 modules/MT_Picard_CollectWgsMetrics_MT.nf     |  26 +-
 modules/MT_SamtoFastq.nf                      |  19 +-
 modules/MT_Step1_input_tsv.nf                 |   7 +-
 modules/MT_Step3_metadata_sample.nf           |  25 +-
 modules/MT_align_to_MT.nf                     |  25 +-
 modules/MT_call_variants.nf                   |  28 +-
 modules/MT_haplocheck.nf                      |   7 +-
 .../Picard_CollectAlignmentSummaryMetrics.nf  |  15 +-
 modules/Picard_CollectWgsMetrics.nf           |  17 +-
 modules/Picard_QualityScoreDistribution.nf    |  17 +-
 modules/SNV_data_organization.R               | 333 ++++++++----------
 modules/SNV_data_organization.nf              |   8 +-
 modules/SV_concat_by_sample.nf                |   9 +-
 modules/SV_data_organization.R                | 328 ++++++++---------
 modules/SV_data_organization.nf               |   2 +-
 modules/SV_manta.nf                           |  35 +-
 modules/SV_smoove.nf                          |  35 +-
 modules/align_sort_output_bam.nf              |  21 +-
 modules/annotation_table_merged.nf            |  21 +-
 {subworkflow => modules/archive}/Hail.nf      |   0
 {subworkflow => modules/archive}/QC_indiv.nf  |   0
 modules/deepvariant.nf                        |  26 +-
 modules/expansion_hunter.nf                   |  45 ++-
 modules/fastqc.nf                             |  13 +-
 modules/gnomad_frequency_table.nf             |   2 +
 modules/melt.nf                               |  63 ++--
 modules/mosdepth.nf                           |  15 +-
 modules/samtools_fixmate.nf                   |  36 +-
 modules/shift_back.nf                         |  31 +-
 nextflow.config                               |  25 +-
 sample_QC.txt                                 |   9 +
 subworkflow/ALN.nf                            |  36 --
 subworkflow/Initialisation.nf                 |  28 ++
 subworkflow/MT.nf                             |  25 +-
 subworkflow/Mapping.nf                        |  60 ++++
 subworkflow/SNV.nf                            |  38 +-
 subworkflow/SV.nf                             |  11 +-
 54 files changed, 1208 insertions(+), 1028 deletions(-)
 rename {subworkflow => modules/archive}/Hail.nf (100%)
 rename {subworkflow => modules/archive}/QC_indiv.nf (100%)
 create mode 100644 sample_QC.txt
 delete mode 100644 subworkflow/ALN.nf
 create mode 100644 subworkflow/Initialisation.nf
 create mode 100644 subworkflow/Mapping.nf

diff --git a/.gitignore b/.gitignore
index af03b1b..e854516 100644
--- a/.gitignore
+++ b/.gitignore
@@ -16,6 +16,7 @@ report.html*
 hail*
 /archive
 /vcf_to_try_hail
+filtered_samples_sex.tsv
 
 # Error and output files #
 ##########################
diff --git a/Nextflow_SNV_MT_211220.nf b/Nextflow_SNV_MT_211220.nf
index bb5d942..eba6954 100644
--- a/Nextflow_SNV_MT_211220.nf
+++ b/Nextflow_SNV_MT_211220.nf
@@ -22,12 +22,11 @@ if (params.help) {
 
 
 // Include the other workflow that themselves includes the modules
-include { ALN } from "./subworkflow/ALN"
-include { QC_indiv } from "./subworkflow/QC_indiv"
+include { Initialisation } from "./subworkflow/Initialisation"
+include { Mapping } from  "./subworkflow/Mapping"
 include { SNV } from "./subworkflow/SNV"
 include { MT } from "./subworkflow/MT"
 include { SV } from "./subworkflow/SV"
-include { Hail } from "./subworkflow/Hail"
 
 workflow{
 	samples 	= Channel
@@ -40,10 +39,9 @@ workflow{
 	outdir_ind 	= file (params.outdir_ind)
 
 	main :
-        ALN()
-	QC_indiv(ALN.out.bam_sorted, ALN.out.bam_sorted_index)
-        SNV(ALN.out.bam_sorted, ALN.out.bam_sorted_index, QC_indiv.out.mosdepth_output)
-        MT(ALN.out.bam_sorted, ALN.out.bam_sorted_index, QC_indiv.out.mosdepth_output)
-	SV(ALN.out.bam_sorted, ALN.out.bam_sorted_index, SNV.out.sample_sex_file)
-	Hail(SNV.out.SNV_vcf, SNV.out.sample_sex_file)
+//	Initialisation()
+        Mapping()
+	SNV(Mapping.out.bam_sorted, Mapping.out.bam_sorted_index, Mapping.out.mosdepth_output)
+        MT(Mapping.out.bam_sorted, Mapping.out.bam_sorted_index, Mapping.out.mosdepth_output)
+	SV(Mapping.out.bam_sorted, Mapping.out.bam_sorted_index, SNV.out.sample_sex_file)
 }
diff --git a/modules/GLnexus.nf b/modules/GLnexus.nf
index a4c6ba6..0063bcc 100644
--- a/modules/GLnexus.nf
+++ b/modules/GLnexus.nf
@@ -20,6 +20,7 @@ process GLnexus_cli {
 	"""
 	glnexus_cli \
 	--config DeepVariantWGS \
+	--mem-gbytes 128 \
 	--list ${list_gvcf} > DeepVariant_GLnexus_${run}.bcf
 	"""
 }
diff --git a/modules/Hail_sample_QC.nf b/modules/Hail_sample_QC.nf
index 13500ff..533557b 100644
--- a/modules/Hail_sample_QC.nf
+++ b/modules/Hail_sample_QC.nf
@@ -21,6 +21,7 @@ process Hail_sample_QC {
 	output :
 	path '*.html', emit : graph
 	path '*filtered_samples.vcf.bgz', emit : vcf_sample_filtered
+	path '*filtered_samples_sex.tsv', emit : filtered_sample_sex
 
 	conda '/home/BCRICWH.LAN/Solenne.Correard/miniconda3/envs/hail'
 
diff --git a/modules/Hail_sample_QC.py b/modules/Hail_sample_QC.py
index 2cff74b..5696f7e 100644
--- a/modules/Hail_sample_QC.py
+++ b/modules/Hail_sample_QC.py
@@ -54,7 +54,7 @@
 # In[ ]:
 
 
-SNV_mt = hl.read_matrix_table('SNV_vcf.mt')
+mt = hl.read_matrix_table('SNV_vcf.mt')
 
 
 # The vcf should be merged into one vcf to avoid redundancy (Possible calling overlap for indel witht he deepvaraint and SV pipeline)
@@ -105,7 +105,7 @@ def plot_sp (table_x_axis, mt_x_axis, table_y_axis, mt_y_axis, x_variable, y_var
                   xlabel=x_variable,
                   ylabel=y_variable,
                 title="Red lines are Mean +/- 3xStdDev",
-                  hover_fields={'ID':SNV_mt_sample_qc.s},
+                  hover_fields={'ID':mt.s},
                   size=5)
     annot = Span(dimension="height",location=stat(table_x_axis) [2],line_dash='dashed', line_width=3,line_color="red")
     annot2 = Span(dimension="height",location=stat(table_x_axis) [3],line_dash='dashed', line_width=3,line_color="red")
@@ -125,16 +125,11 @@ def plot_sp (table_x_axis, mt_x_axis, table_y_axis, mt_y_axis, x_variable, y_var
 # In[ ]:
 
 
-SNV_mt_sample_qc = hl.sample_qc(SNV_mt)
+mt = hl.sample_qc(mt)
 
 
 # List the sample quality control metric that were generated by hail
 
-# In[ ]:
-
-
-SNV_mt_sample_qc.describe()
-
 
 # Create plots for sample QC
 # 
@@ -163,41 +158,40 @@ def plot_sp (table_x_axis, mt_x_axis, table_y_axis, mt_y_axis, x_variable, y_var
 # In[ ]:
 
 
-SNV_mt_sample_qc.sample_qc.dp_stats.mean.export('vcf_to_try_hail/11samples/DP.tsv')
-SNV_mt_sample_qc.sample_qc.gq_stats.mean.export('vcf_to_try_hail/11samples/GQ.tsv')
-SNV_mt_sample_qc.sample_qc.call_rate.export('vcf_to_try_hail/11samples/call_rate.tsv')
-SNV_mt_sample_qc.sample_qc.r_het_hom_var.export('vcf_to_try_hail/11samples/r_het_hom_var.tsv')
-SNV_mt_sample_qc.sample_qc.n_het.export('vcf_to_try_hail/11samples/n_het.tsv')
-SNV_mt_sample_qc.sample_qc.n_hom_var.export('vcf_to_try_hail/11samples/n_hom_var.tsv')
-SNV_mt_sample_qc.sample_qc.n_snp.export('vcf_to_try_hail/11samples/n_snp.tsv')
-SNV_mt_sample_qc.sample_qc.n_singleton.export('vcf_to_try_hail/11samples/n_singleton.tsv')
-SNV_mt_sample_qc.sample_qc.r_insertion_deletion.export('vcf_to_try_hail/11samples/r_insertion_deletion.tsv')
-SNV_mt_sample_qc.sample_qc.n_insertion.export('vcf_to_try_hail/11samples/n_insertion.tsv')
-SNV_mt_sample_qc.sample_qc.n_deletion.export('vcf_to_try_hail/11samples/n_deletion.tsv')
-SNV_mt_sample_qc.sample_qc.r_ti_tv.export('vcf_to_try_hail/11samples/r_ti_tv.tsv')
-SNV_mt_sample_qc.sample_qc.n_transition.export('vcf_to_try_hail/11samples/n_transition.tsv')
-SNV_mt_sample_qc.sample_qc.n_transversion.export('vcf_to_try_hail/11samples/n_transversion.tsv')
-
+mt.sample_qc.dp_stats.mean.export('DP.tsv')
+mt.sample_qc.gq_stats.mean.export('GQ.tsv')
+mt.sample_qc.call_rate.export('call_rate.tsv')
+mt.sample_qc.r_het_hom_var.export('r_het_hom_var.tsv')
+mt.sample_qc.n_het.export('n_het.tsv')
+mt.sample_qc.n_hom_var.export('n_hom_var.tsv')
+mt.sample_qc.n_snp.export('n_snp.tsv')
+mt.sample_qc.n_singleton.export('n_singleton.tsv')
+mt.sample_qc.r_insertion_deletion.export('r_insertion_deletion.tsv')
+mt.sample_qc.n_insertion.export('n_insertion.tsv')
+mt.sample_qc.n_deletion.export('n_deletion.tsv')
+mt.sample_qc.r_ti_tv.export('r_ti_tv.tsv')
+mt.sample_qc.n_transition.export('n_transition.tsv')
+mt.sample_qc.n_transversion.export('n_transversion.tsv')
 
 # Open the tables as data frame
 
 # In[ ]:
 
 
-DP_table=pd.read_table('vcf_to_try_hail/11samples/DP.tsv')
-GQ_table=pd.read_table('vcf_to_try_hail/11samples/GQ.tsv')
-call_rate_table=pd.read_table('vcf_to_try_hail/11samples/call_rate.tsv')
-r_het_hom_var_table=pd.read_table('vcf_to_try_hail/11samples/r_het_hom_var.tsv')
-n_het_table=pd.read_table('vcf_to_try_hail/11samples/n_het.tsv')
-n_hom_var_table=pd.read_table('vcf_to_try_hail/11samples/n_hom_var.tsv')
-n_snp_table=pd.read_table('vcf_to_try_hail/11samples/n_snp.tsv')
-n_singleton_table=pd.read_table('vcf_to_try_hail/11samples/n_singleton.tsv')
-r_insertion_deletion_table=pd.read_table('vcf_to_try_hail/11samples/r_insertion_deletion.tsv')
-n_insertion_table=pd.read_table('vcf_to_try_hail/11samples/n_insertion.tsv')
-n_deletion_table=pd.read_table('vcf_to_try_hail/11samples/n_deletion.tsv')
-r_ti_tv_table=pd.read_table('vcf_to_try_hail/11samples/r_ti_tv.tsv')
-n_transition_table=pd.read_table('vcf_to_try_hail/11samples/n_transition.tsv')
-n_transversion_table=pd.read_table('vcf_to_try_hail/11samples/n_transversion.tsv')
+DP_table=pd.read_table('DP.tsv')
+GQ_table=pd.read_table('GQ.tsv')
+call_rate_table=pd.read_table('call_rate.tsv')
+r_het_hom_var_table=pd.read_table('r_het_hom_var.tsv')
+n_het_table=pd.read_table('n_het.tsv')
+n_hom_var_table=pd.read_table('n_hom_var.tsv')
+n_snp_table=pd.read_table('n_snp.tsv')
+n_singleton_table=pd.read_table('n_singleton.tsv')
+r_insertion_deletion_table=pd.read_table('r_insertion_deletion.tsv')
+n_insertion_table=pd.read_table('n_insertion.tsv')
+n_deletion_table=pd.read_table('n_deletion.tsv')
+r_ti_tv_table=pd.read_table('r_ti_tv.tsv')
+n_transition_table=pd.read_table('n_transition.tsv')
+n_transversion_table=pd.read_table('n_transversion.tsv')
 
 
 # Rename the column of the tables
@@ -226,14 +220,14 @@ def plot_sp (table_x_axis, mt_x_axis, table_y_axis, mt_y_axis, x_variable, y_var
 # In[108]:
 
 
-plot_histo(DP_table, SNV_mt_sample_qc.sample_qc.dp_stats.mean, 'Mean Depth per sample')
+plot_histo(DP_table, mt.sample_qc.dp_stats.mean, 'Mean Depth per sample')
 
 
 # In[109]:
 
 
 plot_histo(GQ_table,
-           SNV_mt_sample_qc.sample_qc.gq_stats.mean,
+           mt.sample_qc.gq_stats.mean,
            'Mean Genotype quality per sample')
 
 
@@ -241,7 +235,7 @@ def plot_sp (table_x_axis, mt_x_axis, table_y_axis, mt_y_axis, x_variable, y_var
 
 
 plot_histo(call_rate_table,
-           SNV_mt_sample_qc.sample_qc.call_rate,
+           mt.sample_qc.call_rate,
            'Call Rate per sample')
 
 
@@ -249,7 +243,7 @@ def plot_sp (table_x_axis, mt_x_axis, table_y_axis, mt_y_axis, x_variable, y_var
 
 
 plot_histo(r_het_hom_var_table,
-           SNV_mt_sample_qc.sample_qc.r_het_hom_var,
+           mt.sample_qc.r_het_hom_var,
            'Ratio heterozygous to homozygous variants per sample')
 
 
@@ -257,9 +251,9 @@ def plot_sp (table_x_axis, mt_x_axis, table_y_axis, mt_y_axis, x_variable, y_var
 
 
 plot_sp (n_het_table,
-         SNV_mt_sample_qc.sample_qc.n_het,
+         mt.sample_qc.n_het,
          n_hom_var_table,
-         SNV_mt_sample_qc.sample_qc.n_hom_var,
+         mt.sample_qc.n_hom_var,
          'Number of Heterozygous Variants',
          'Number of homozygous variants')
 
@@ -268,21 +262,15 @@ def plot_sp (table_x_axis, mt_x_axis, table_y_axis, mt_y_axis, x_variable, y_var
 
 
 plot_histo(n_snp_table,
-           SNV_mt_sample_qc.sample_qc.n_snp,
+           mt.sample_qc.n_snp,
            'Number of SNPs per sample')
 
 
-# In[114]:
-
-
-n_singleton_table
-
-
 # In[115]:
 
 
 plot_histo(n_singleton_table,
-           SNV_mt_sample_qc.sample_qc.n_singleton,
+           mt.sample_qc.n_singleton,
            'Number of singletons per sample')
 
 
@@ -290,9 +278,9 @@ def plot_sp (table_x_axis, mt_x_axis, table_y_axis, mt_y_axis, x_variable, y_var
 
 
 plot_sp (n_insertion_table,
-         SNV_mt_sample_qc.sample_qc.n_insertion,
+         mt.sample_qc.n_insertion,
          n_deletion_table,
-         SNV_mt_sample_qc.sample_qc.n_deletion,
+         mt.sample_qc.n_deletion,
          'Number of insertions',
          'Number of deletions')
 
@@ -301,7 +289,7 @@ def plot_sp (table_x_axis, mt_x_axis, table_y_axis, mt_y_axis, x_variable, y_var
 
 
 plot_histo(r_insertion_deletion_table,
-           SNV_mt_sample_qc.sample_qc.r_insertion_deletion,
+           mt.sample_qc.r_insertion_deletion,
            'Ratio insertions to deletions per sample')
 
 
@@ -309,9 +297,9 @@ def plot_sp (table_x_axis, mt_x_axis, table_y_axis, mt_y_axis, x_variable, y_var
 
 
 plot_sp (n_transition_table,
-         SNV_mt_sample_qc.sample_qc.n_transition,
+         mt.sample_qc.n_transition,
          n_transversion_table,
-         SNV_mt_sample_qc.sample_qc.n_transversion,
+         mt.sample_qc.n_transversion,
          'Number of transitions',
          'Number of transversions')
 
@@ -320,7 +308,7 @@ def plot_sp (table_x_axis, mt_x_axis, table_y_axis, mt_y_axis, x_variable, y_var
 
 
 plot_histo(r_ti_tv_table,
-           SNV_mt_sample_qc.sample_qc.r_ti_tv,
+           mt.sample_qc.r_ti_tv,
            'Ratio transitions to transversions per sample')
 
 
@@ -357,48 +345,93 @@ def plot_sp (table_x_axis, mt_x_axis, table_y_axis, mt_y_axis, x_variable, y_var
 # In[120]:
 
 
-filtered_SNV_mt_sample_qc_samples = SNV_mt_sample_qc.filter_cols((SNV_mt_sample_qc.sample_qc.dp_stats.mean > stat(DP_table) [2]) &
-                                                                 (SNV_mt_sample_qc.sample_qc.gq_stats.mean > stat(GQ_table) [2]) &
-                                                                 (SNV_mt_sample_qc.sample_qc.call_rate > stat(call_rate_table) [2]) &
-                                                                 (stat(r_het_hom_var_table) [3] > SNV_mt_sample_qc.sample_qc.r_het_hom_var) &
-                                                                 (SNV_mt_sample_qc.sample_qc.r_het_hom_var > stat(r_het_hom_var_table) [2]) &
-                                                                 (stat(n_snp_table) [3] > SNV_mt_sample_qc.sample_qc.n_snp) &
-                                                                 (SNV_mt_sample_qc.sample_qc.n_snp > stat(n_snp_table) [2]) &
-                                                                 (stat(n_singleton_table) [3] > SNV_mt_sample_qc.sample_qc.n_singleton) &
-                                                                 (SNV_mt_sample_qc.sample_qc.n_singleton > stat(n_singleton_table) [2]) &
-                                                                 (stat(r_insertion_deletion_table) [3] > SNV_mt_sample_qc.sample_qc.r_insertion_deletion) &
-                                                                 (SNV_mt_sample_qc.sample_qc.r_insertion_deletion > stat(r_insertion_deletion_table) [2]) &
-                                                                 (stat(r_ti_tv_table) [3] > SNV_mt_sample_qc.sample_qc.r_ti_tv) &
-                                                                 (SNV_mt_sample_qc.sample_qc.r_ti_tv> stat(r_ti_tv_table) [2])
-                                                                )
+filtered_mt = mt.filter_cols(
+    (stat(DP_table) [3] > mt.sample_qc.dp_stats.mean)  &
+    (mt.sample_qc.dp_stats.mean > stat(DP_table) [2]) &
+    (stat(GQ_table) [3] > mt.sample_qc.gq_stats.mean) &
+    (mt.sample_qc.gq_stats.mean > stat(GQ_table) [2]) &
+    (stat(call_rate_table) [3] > mt.sample_qc.call_rate) &
+    (mt.sample_qc.call_rate > stat(call_rate_table) [2]) &
+    (stat(r_het_hom_var_table) [3] > mt.sample_qc.r_het_hom_var) &
+    (mt.sample_qc.r_het_hom_var > stat(r_het_hom_var_table) [2]) &
+    (stat(n_snp_table) [3] > mt.sample_qc.n_snp) &
+    (mt.sample_qc.n_snp > stat(n_snp_table) [2]) &
+    (stat(n_singleton_table) [3] > mt.sample_qc.n_singleton) &
+    (mt.sample_qc.n_singleton > stat(n_singleton_table) [2]) &
+    (stat(r_insertion_deletion_table) [3] > mt.sample_qc.r_insertion_deletion) &
+    (mt.sample_qc.r_insertion_deletion > stat(r_insertion_deletion_table) [2]) &
+    (stat(r_ti_tv_table) [3] > mt.sample_qc.r_ti_tv) &
+    (mt.sample_qc.r_ti_tv> stat(r_ti_tv_table) [2])
+)
 
 
 # In[124]:
 
-
-hl.export_vcf(filtered_SNV_mt_sample_qc_samples, 'SNV_filtered_samples.vcf.bgz',  tabix=True)
-
-
-# In[ ]:
-
-
-SNV_mt_sample_qc.count()
-
-
-# In[ ]:
-
-
-filtered_SNV_mt_sample_qc_samples.count()
-
-
-# In[ ]:
-
-
-perc_removed_samples = (SNV_mt_sample_qc.count()[0]-filtered_SNV_mt_sample_qc_samples.count()[0])/SNV_mt_sample_qc.count()[0] * 100
-
-
-# In[ ]:
+hl.export_vcf(filtered_mt, 'filtered_samples.vcf.bgz', tabix = True)
+
+# Write the report of the number of filtered out samples and the reason they were filtered out
+
+def calc_removed_samples(mt, mt_var, stat_table) :
+    # Save sample genotype quality metrics information to separate file
+    input_mt = mt.annotate_cols(
+        keep=(mt_var > stat_table [2]) &
+            (mt_var < stat_table [3])
+    )
+
+    n_removed = input_mt.aggregate_cols(hl.agg.count_where(~input_mt.keep))
+    
+    return n_removed
+
+
+def report_stats():
+    """
+    Generate output report with basic stats.
+    """
+    out_stats = hl.hadoop_open(f"sample_QC.txt", "w")
+    # Report numbers of filtered samples
+    out_stats.write(
+        f"Number of samples removed because of depth metrics: {DP_removed}\n"
+        f"Number of samples removed because of genotype quality metrics: {GQ_removed}\n"
+        f"Number of samples removed because of call rate metrics: {CR_removed}\n"
+        f"Number of samples removed because of ratio heterozygous over homozygous: {r_het_hom_removed}\n"
+        f"Number of samples removed because of number of snps: {n_snps_removed}\n"
+        f"Number of samples removed because of number of singletons: {n_singletons_removed}\n"
+        f"Number of samples removed because of ratio insertions over deletions: {r_ins_del_removed}\n"
+        f"Number of samples removed because of ratio transversions / transitions: {r_ti_tv_removed}\n"
+        f"Percentage of the samples filtered out: {perc_removed_samples}\n"
+    )
+    out_stats.close()
+
+
+DP_removed = calc_removed_samples(mt, mt.sample_qc.dp_stats.mean, stat(DP_table))
+GQ_removed = calc_removed_samples(mt, mt.sample_qc.gq_stats.mean, stat(GQ_table))
+CR_removed = calc_removed_samples(mt, mt.sample_qc.call_rate, stat(call_rate_table))
+r_het_hom_removed = calc_removed_samples(mt, mt.sample_qc.r_het_hom_var, stat(r_het_hom_var_table))
+n_snps_removed = calc_removed_samples(mt, mt.sample_qc.n_snp, stat(n_snp_table))
+n_singletons_removed = calc_removed_samples(mt, mt.sample_qc.n_singleton, stat(n_singleton_table))
+r_ins_del_removed = calc_removed_samples(mt, mt.sample_qc.r_insertion_deletion, stat(r_insertion_deletion_table))
+r_ti_tv_removed = calc_removed_samples(mt, mt.sample_qc.r_ti_tv, stat(r_ti_tv_table))
+perc_removed_samples = (mt.count()[0]-filtered_mt.count()[0])/mt.count()[0] * 100
+
+report_stats()
+
+
+# Impute sex based on F-stat (only for sample who passed QC)
+
+# Using gnomAD hard filters :
+#- Ambiguous sex: fell outside of:
+#- XY: F-stat > 0.8
+#- XX: F-stat < 0.4
+
+imputed_sex_filtered_samples = hl.impute_sex(filtered_mt.GT)
+imputed_sex_filtered_samples = imputed_sex_filtered_samples.annotate(
+        sex=hl.if_else(imputed_sex_filtered_samples.f_stat < 0.2,
+                       "XX",
+                       (hl.if_else(imputed_sex_filtered_samples.f_stat > 0.8,"XY", "ambiguous"))
+                )
+        )
+filtered_samples_sex=imputed_sex_filtered_samples.select("sex")
+filtered_samples_sex.export('filtered_samples_sex.tsv')
 
 
-print("%.2f %% of the samples were filtered out." % perc_removed_samples)
 
diff --git a/modules/Hail_variant_QC.nf b/modules/Hail_variant_QC.nf
index cd7c4c7..27351d0 100644
--- a/modules/Hail_variant_QC.nf
+++ b/modules/Hail_variant_QC.nf
@@ -11,7 +11,8 @@
 process Hail_variant_QC {
 
         publishDir "$params.outdir_ind/${assembly}/${batch}/${run}/QC/Aggregated/Hail/Variants/", mode: 'copy', pattern : '*.html'
-        publishDir "$params.outdir_ind/${assembly}/${batch}/${run}/vcf_post_hail/", mode: 'copy', pattern : '*filtered_samples_variants.vcf.bgz'
+        publishDir "$params.outdir_ind/${assembly}/${batch}/${run}/vcf_post_hail/", mode: 'copy', pattern : '*filtered_samples_variants.vcf.bgz*'
+        publishDir "$params.outdir_pop/${assembly}/${run}/${var_type}/Vcf_pre_annotation/", mode: 'copy', pattern : 'SNV_filtered_frequ_*'
 
 	input :
 	file vcf_sample_filtered
@@ -23,9 +24,14 @@ process Hail_variant_QC {
 	output :
 	path '*.html', emit : graph
 
-	path 'SNV_filtered_samples_variants.vcf.bgz', emit : SNV_vcf_samples_variants_filtered
-        path 'SNV_filtered_samples_variants.vcf.bgz.tbi', emit : SNV_index	
-	path 'SNV_mt_var_filtered_tot_XX_XY_info.tsv', emit : SNV_frequ_tot_xx_xy_tsv
+	path 'SNV_filtered_samples_variants*', emit : SNV_filtered_variants_ind_geno
+
+	path 'SNV_filtered_frequ_total_xx.vcf.bgz', emit : SNV_filtered_variants_frequ_tot_xx
+        path 'SNV_filtered_frequ_total_xx.vcf.bgz.tbi', emit : SNV_filtered_variants_frequ_tot_xx_index
+
+	path 'SNV_filtered_tot_XX_XY.tsv.bgz', emit : SNV_frequ_tot_xx_xy_tsv
+
+
 
 	conda '/home/BCRICWH.LAN/Solenne.Correard/miniconda3/envs/hail'
 
diff --git a/modules/Hail_variant_QC.py b/modules/Hail_variant_QC.py
index 6bb2bc8..8dd9011 100644
--- a/modules/Hail_variant_QC.py
+++ b/modules/Hail_variant_QC.py
@@ -11,10 +11,6 @@
 hl.init()
 output_notebook()
 
-
-# In[2]:
-
-
 from hail.plot import show
 from pprint import pprint
 from bokeh.models import Span
@@ -27,65 +23,19 @@
 from typing import Tuple
 import sys
 
-
-# Import a vcf file and read it as a matrix table (mt, hail specific file type)
-# For specific on how to look at the mt file, refer to the bottom of this Jupyter notebook)
-# 
-# Currently : Import only the SNV vcf file, following the sample filtering step from previously
-
-# **Mohammed part**
-# 
-# Load several vcf and identify and remove the variants potentially overlapping
-# 
-# To be included
-
-# In[ ]:
+#Created through the nextflow pipeline
+hl.import_vcf(sys.argv[1],array_elements_required=False, force_bgz=True).write('filtered_samples_vcf.mt', overwrite=True)
+sex_table = (hl.import_table(sys.argv[2], impute=True).key_by('s'))
 
 
 hl.import_vcf(sys.argv[1],
               array_elements_required=False, force_bgz=True).write('filtered_samples_vcf.mt', overwrite=True)
 
 
-# vcf_path = '/mnt/scratch/SILENT/Act3/Processed/Individual/GRCh37/Batch_DryRun/Run_20220426/SNV/'
-# 
-# hl.import_vcf(os.path.join(vcf_path,'DeepVariant_GLnexus_Run_20220426.vcf.gz'),
-#               array_elements_required=False, force_bgz=True).write('filtered_samples_vcf', overwrite=True)
-
-# vcf_path = '/mnt/scratch/SILENT/Act3/Processed/Individual/GRCh37/Batch_DryRun/Run_20220426/vcf_pre_hail/'
-# 
-# hl.import_vcf(os.path.join(vcf_path,'DeepVariant_GLnexus_Run_20220426.vcf.gz'),
-#               array_elements_required=False, force_bgz=True).write('hail/SNV_vcf.mt', overwrite=True)
-# 
-# hl.import_vcf(os.path.join(vcf_path,'MEI_Run_20220426.vcf.gz'),
-#               array_elements_required=False, force_bgz=True).write('hail/MEI_vcf.mt', overwrite=True)
-#               
-# hl.import_vcf(os.path.join(vcf_path,'STR_Run_20220426.vcf.gz'),
-#               array_elements_required=False, force_bgz=True).write('hail/STR_vcf.mt', overwrite=True)
-#               
-# hl.import_vcf(os.path.join(vcf_path,'SV_Run_20220426.vcf.gz'),
-#               array_elements_required=False, force_bgz=True).write('hail/SV_vcf.mt', overwrite=True)
-
-# For MT, need to find a different technique as it is not a diploiod genome
-# 
-# 
-# Error summary: VCFParseError: ploidy > 2 not supported
-
-# hl.import_vcf(os.path.join(vcf_path,'MT_Run_20220426.vcf.gz'), 
-#               array_elements_required=False, force_bgz=True,
-#               reference_genome='GRCh38').write('hail/MT_vcf.mt', overwrite=True)
-
-# In[4]:
-
 
 SNV_mt = hl.read_matrix_table('filtered_samples_vcf.mt')
 
 
-# MEI_mt = hl.read_matrix_table('hail/MEI_vcf.mt')
-# STR_mt = hl.read_matrix_table('hail/STR_vcf.mt')
-# SV_mt = hl.read_matrix_table('hail/SV_vcf.mt')
-
-# The vcf should be merged into one vcf to avoid redundancy (Possible calling overlap for indel witht he deepvaraint and SV pipeline)
-
 # In order to create the graph, 3 functions were needed
 # - stat : To calcualte the mean, standard deviation and other metrics for each parameter
 # - plot_histo : To create the histogram as expected
@@ -153,19 +103,6 @@ def plot_sp (table_x_axis, mt_x_axis, table_y_axis, mt_y_axis, x_variable, y_var
 
 SNV_mt = hl.variant_qc(SNV_mt)
 
-
-# In[9]:
-
-
-SNV_mt.s
-
-
-# In[10]:
-
-
-SNV_mt.describe()
-
-
 # List of variables for which we will create a table, calculate the standard deviation (StdDev) and the mean (Mean) for sample QC:
 # - DP (mt_sample_qc.variant_qc.dp_stats.mean)
 # - QG (mt_sample_qc.vaiant_qc.gq_stats.mean)
@@ -179,37 +116,27 @@ def plot_sp (table_x_axis, mt_x_axis, table_y_axis, mt_y_axis, x_variable, y_var
 # In[11]:
 
 
-SNV_mt.variant_qc.dp_stats.mean.export('vcf_to_try_hail/11samples/DP_variant.tsv')
-
-
-# In[12]:
-
-
-SNV_mt.variant_qc.gq_stats.mean.export('vcf_to_try_hail/11samples/GQ_variant.tsv')
-SNV_mt.variant_qc.call_rate.export('vcf_to_try_hail/11samples/call_rate_variant.tsv')
-SNV_mt.variant_qc.AN.export('vcf_to_try_hail/11samples/AN_variant.tsv')
-SNV_mt.variant_qc.n_not_called.export('vcf_to_try_hail/11samples/n_not_called_variant.tsv')
-SNV_mt.variant_qc.p_value_hwe.export('vcf_to_try_hail/11samples/p_value_hwe_variant.tsv')
-SNV_mt.variant_qc.het_freq_hwe.export('vcf_to_try_hail/11samples/het_freq_hwe_variant.tsv')
-SNV_mt.variant_qc.n_het.export('vcf_to_try_hail/11samples/n_het_variant.tsv')
+SNV_mt.variant_qc.dp_stats.mean.export('DP_variant.tsv')
+SNV_mt.variant_qc.gq_stats.mean.export('GQ_variant.tsv')
+SNV_mt.variant_qc.call_rate.export('call_rate_variant.tsv')
+SNV_mt.variant_qc.AN.export('AN_variant.tsv')
+SNV_mt.variant_qc.n_not_called.export('n_not_called_variant.tsv')
+SNV_mt.variant_qc.p_value_hwe.export('p_value_hwe_variant.tsv')
+SNV_mt.variant_qc.het_freq_hwe.export('het_freq_hwe_variant.tsv')
+SNV_mt.variant_qc.n_het.export('n_het_variant.tsv')
 
 
 # In[13]:
 
 
-DP_variant_table=pd.read_table('vcf_to_try_hail/11samples/DP_variant.tsv')
-
-
-# In[14]:
-
-
-GQ_variant_table=pd.read_table('vcf_to_try_hail/11samples/GQ_variant.tsv')
-call_rate_variant_table=pd.read_table('vcf_to_try_hail/11samples/call_rate_variant.tsv')
-AN_variant_table=pd.read_table('vcf_to_try_hail/11samples/AN_variant.tsv')
-n_not_called_variant_table=pd.read_table('vcf_to_try_hail/11samples/n_not_called_variant.tsv')
-p_value_hwe_variant_table=pd.read_table('vcf_to_try_hail/11samples/p_value_hwe_variant.tsv')
-het_freq_hwe_variant_table=pd.read_table('vcf_to_try_hail/11samples/het_freq_hwe_variant.tsv')
-n_het_variant_table=pd.read_table('vcf_to_try_hail/11samples/n_het_variant.tsv')
+DP_variant_table=pd.read_table('DP_variant.tsv')
+GQ_variant_table=pd.read_table('GQ_variant.tsv')
+call_rate_variant_table=pd.read_table('call_rate_variant.tsv')
+AN_variant_table=pd.read_table('AN_variant.tsv')
+n_not_called_variant_table=pd.read_table('n_not_called_variant.tsv')
+p_value_hwe_variant_table=pd.read_table('p_value_hwe_variant.tsv')
+het_freq_hwe_variant_table=pd.read_table('het_freq_hwe_variant.tsv')
+n_het_variant_table=pd.read_table('n_het_variant.tsv')
 
 
 # In[15]:
@@ -299,58 +226,77 @@ def plot_sp (table_x_axis, mt_x_axis, table_y_axis, mt_y_axis, x_variable, y_var
 # 
 # ?? Hardy–Weinberg values
 
-# In[25]:
-
+#Filter the small insertions / deletions (indel) of length > 50bp (Should be called by the SV pipeline)
 
-SNV_mt_var_filtered = SNV_mt.filter_rows((SNV_mt.variant_qc.dp_stats.mean > stat(DP_variant_table) [2]) &
-                                         (SNV_mt.variant_qc.gq_stats.mean > stat(GQ_variant_table) [2]) &
-                                         (SNV_mt.variant_qc.call_rate > stat(call_rate_variant_table) [2]) &
-                                         (SNV_mt.variant_qc.AN > stat(AN_variant_table) [2]) &
-                                         (SNV_mt.variant_qc.n_not_called > stat(n_not_called_variant_table) [2])
-                                        )
+# In[25]:
 
+SNV_mt_var_filtered = SNV_mt.filter_rows(
+    (SNV_mt.variant_qc.dp_stats.mean > stat(DP_variant_table) [2]) &
+    (SNV_mt.variant_qc.gq_stats.mean > stat(GQ_variant_table) [2]) &
+    (SNV_mt.variant_qc.call_rate > stat(call_rate_variant_table) [2]) &
+    (SNV_mt.variant_qc.AN > stat(AN_variant_table) [2]) &
+    (SNV_mt.variant_qc.n_not_called > stat(n_not_called_variant_table) [2]) &
+    (hl.len(SNV_mt.alleles[0]) < 50) &
+    (hl.len(SNV_mt.alleles[1]) < 50)
+)
 
 # In[55]:
 
 
 hl.export_vcf(SNV_mt_var_filtered, 'SNV_filtered_samples_variants.vcf.bgz', tabix=True)
 
-
-# In[29]:
-
-
-SNV_mt.count()
-
-
-# In[28]:
-
-
-SNV_mt_var_filtered.count()
-
-
-# **Percentage of variants removed by the filtering**
-
-# In[30]:
-
-
+#Write the report of the number of filtered out variants and the reason they were filtered out
+
+n_large_del = SNV_mt.filter_rows(hl.len(SNV_mt.alleles[0]) > 50).count()[0]
+n_large_ins = SNV_mt.filter_rows(hl.len(SNV_mt.alleles[1]) > 50).count()[0]
+
+def calc_removed_variant(mt, mt_var, stat_table) :
+    input_mt = mt.annotate_rows(
+        keep=(mt_var > stat_table [2]))
+
+    n_removed = input_mt.aggregate_rows(hl.agg.count_where(~input_mt.keep))
+
+    return n_removed
+
+def report_stats():
+    """
+    Generate output report with basic stats.
+    """
+    out_stats = hl.hadoop_open(f"variant_QC.txt", "w")
+    # Report numbers of filtered samples
+    out_stats.write(
+        f"Number of variants removed because of deletion superior to 50bp: {n_large_del}\n"
+        f"Number of variants removed because of insertion superior to 50bp: {n_large_ins}\n"
+        f"Number of variants removed because of depth metrics: {DP_var_removed}\n"
+        f"Number of variants removed because of genotype quality metrics: {GQ_var_removed}\n"
+        f"Number of variants removed because of call rate metrics: {CR_var_removed}\n"
+        f"Number of variants removed because of allele number (AN): {n_AN_removed}\n"
+        f"Number of variants removed because of number of not called: {n_not_called_removed}\n"
+        f"Total number of variants removed : {n_var_removed}\n"
+        f"Percentage of the variants filtered out: {perc_removed_variants}\n"
+    )
+    out_stats.close()
+
+DP_var_removed = calc_removed_variant(SNV_mt, SNV_mt.variant_qc.dp_stats.mean, stat(DP_variant_table))
+GQ_var_removed = calc_removed_variant(SNV_mt, SNV_mt.variant_qc.gq_stats.mean, stat(GQ_variant_table))
+CR_var_removed = calc_removed_variant(SNV_mt, SNV_mt.variant_qc.call_rate, stat(call_rate_variant_table))
+n_AN_removed = calc_removed_variant(SNV_mt, SNV_mt.variant_qc.AN, stat(AN_variant_table))
+n_not_called_removed = calc_removed_variant(SNV_mt, SNV_mt.variant_qc.n_not_called, stat(n_not_called_variant_table))
+n_var_removed = (SNV_mt.count()[0]-SNV_mt_var_filtered.count()[0])
 perc_removed_variants = (SNV_mt.count()[0]-SNV_mt_var_filtered.count()[0])/SNV_mt.count()[0] * 100
 
-
-# In[32]:
+report_stats()
 
 
-print("%.2f %% of the variants were filtered out." % perc_removed_variants)
 
 
 # **Calculate statistic**
 # 
-# To save time in R, calculate AF, AC, AN and numb of homozygotes (Total) 
-# 
-# Column order and name expected for Oracle Apex output: variant, af_total, af_xx, af_xy, ac_total, ac_xx, ac_xy, an_total, an_xx, an_xy, hom_alt_total, hom_alt_xx, hom_alt_xy, quality
-# 
-# Initially calculation is done for total, then XX, then XY specific frequencies are added to the vcf info tab
+# Calculate AF, AC, AN and numb of homozygotes (Total) 
+
+# Sex is defined using F-stat in Hail_sample_QC or file with sample sex can be loaded by user
 
-# In[106]:
+# Initially calculation is done for total, then XX, then XY specific frequencies are added to the vcf info tab
 
 
 SNV_mt_var_filtered = hl.variant_qc(SNV_mt_var_filtered)
@@ -359,7 +305,7 @@ def plot_sp (table_x_axis, mt_x_axis, table_y_axis, mt_y_axis, x_variable, y_var
         chrom=SNV_mt_var_filtered.rsid.split('_')[0],
         pos=SNV_mt_var_filtered.rsid.split('_')[1],
         ref=SNV_mt_var_filtered.rsid.split('_')[2],
-        alt=SNV_mt_var_filtered.rsid.split('_')[3], 
+        alt=SNV_mt_var_filtered.rsid.split('_')[3],        
         qual=SNV_mt_var_filtered.qual,
         af_tot=SNV_mt_var_filtered.variant_qc.AF[1],
         ac_tot=SNV_mt_var_filtered.variant_qc.AC[1],
@@ -368,49 +314,12 @@ def plot_sp (table_x_axis, mt_x_axis, table_y_axis, mt_y_axis, x_variable, y_var
     )
 ) 
 
-
 # Calculate the variants frequency per sex
 # 
-# It was considered to use hail sex inputation to define the sex but it relies only on the F-Coeff
-# 
-# Using a separate script (relyine on mosdepth output and plink) allow to rely on both the F_coeff and normalized coverage on the sexual chromosomes for better imputation.
-# 
-# Steps :
-# - Import the file with sex
-# -  Merge the sex table with the matrix table
-# - Calculate the info (AC, AF, AN, numb of hom) per sex
-
-# In[ ]:
-
-
-sex_table = (hl.import_table(sys.argv[2], impute=True)
-         .key_by('sample'))
-
-
-# sex_table = (hl.import_table('/mnt/scratch/SILENT/Act3/Processed/Individual/GRCh37/Batch_DryRun/Run_20220426/QC/Aggregated/R/QC_sample.tsv', impute=True)
-#          .key_by('sample'))
-
-# In[121]:
-
-
-SNV_mt_var_filtered_sex = SNV_mt_var_filtered.annotate_cols(**sex_table[SNV_mt_var_filtered.s])
-
-
-# In[122]:
-
-
-SNV_mt_var_filtered_XX = SNV_mt_var_filtered_sex.filter_cols(SNV_mt_var_filtered_sex.Sex == 'XX')
-
-
-# In[123]:
-
 
+SNV_mt_var_filtered_sex = SNV_mt_var_filteredi_tot.annotate_cols(**sex_table[SNV_mt_var_filtered_tot.s])
+SNV_mt_var_filtered_XX = SNV_mt_var_filtered_sex.filter_cols(SNV_mt_var_filtered_sex.sex == 'XX')
 SNV_mt_var_filtered_XX = hl.variant_qc(SNV_mt_var_filtered_XX)
-
-
-# In[124]:
-
-
 SNV_mt_var_filtered_XX = SNV_mt_var_filtered_XX.annotate_rows(
     info = SNV_mt_var_filtered_XX.info.annotate(
         af_xx=SNV_mt_var_filtered_XX.variant_qc.AF[1],
@@ -418,24 +327,10 @@ def plot_sp (table_x_axis, mt_x_axis, table_y_axis, mt_y_axis, x_variable, y_var
         an_xx=SNV_mt_var_filtered_XX.variant_qc.AN,
         hom_alt_xx=SNV_mt_var_filtered_XX.variant_qc.homozygote_count[1],
     )
-) 
-
-
-# In[125]:
-
-
-SNV_mt_var_filtered_XY = SNV_mt_var_filtered_sex.filter_cols(SNV_mt_var_filtered_sex.Sex == 'XY')
-
-
-# In[126]:
-
+)
 
+SNV_mt_var_filtered_XY = SNV_mt_var_filtered_sex.filter_cols(SNV_mt_var_filtered_sex.sex == 'XY')
 SNV_mt_var_filtered_XY = hl.variant_qc(SNV_mt_var_filtered_XY)
-
-
-# In[127]:
-
-
 SNV_mt_var_filtered_XY = SNV_mt_var_filtered_XY.annotate_rows(
     info = SNV_mt_var_filtered_XY.info.annotate(
         af_xy=SNV_mt_var_filtered_XY.variant_qc.AF[1],
@@ -443,35 +338,36 @@ def plot_sp (table_x_axis, mt_x_axis, table_y_axis, mt_y_axis, x_variable, y_var
         an_xy=SNV_mt_var_filtered_XY.variant_qc.AN,
         hom_alt_xy=SNV_mt_var_filtered_XY.variant_qc.homozygote_count[1],
     )
-) 
-
+)
 
 # **Save version without individual genotype**
 # 
-# As the frequencies are calculated using hail, it is not necessary to export the individual genotype for the last part of the pipeline (Annotation and data organization)
-# 
-# Aggregate the info columns with frequenies for total, XX and XY
+# As the frequencies are calculated using hail, it is not necessary to export the individual genotype for the last part of the pipeline (Annotation)
+
+#Vcf : saved separately for total, XX and XY For tsv : Saved as one file with all the info
+
+# Exporting the vcf with the varaint frequencies (No individual genotype) and xx samples frequencies (No XY for now)
 
-# In[186]:
+SNV_mt_var_filtered_XX_export = SNV_mt_var_filtered_XX.drop('variant_qc')
+SNV_mt_var_filtered_XX_export = SNV_mt_var_filtered_XX_export.annotate_rows(info=SNV_mt_var_filtered_XX_export.info.drop('AF', "AQ", "AC", "AN", "chrom", "pos", "ref", "alt"))
+SNV_mt_var_filtered_XX_export = SNV_mt_var_filtered_XX_export.rows()
+hl.export_vcf(SNV_mt_var_filtered_XX_export, 'SNV_filtered_frequ_total_xx.vcf.bgz', tabix=True)
 
 
+# Exporting the tsv file with total, XX and XY frequencies
 SNV_mt_var_filtered_tot_info = SNV_mt_var_filtered_tot.select_rows(
     SNV_mt_var_filtered_tot.info.chrom,
     SNV_mt_var_filtered_tot.info.pos,
     SNV_mt_var_filtered_tot.info.ref,
     SNV_mt_var_filtered_tot.info.alt,
-    SNV_mt_var_filtered_tot.rsid,
     SNV_mt_var_filtered_tot.qual,
+    SNV_mt_var_filtered_tot.rsid,
     SNV_mt_var_filtered_tot.info.af_tot,
     SNV_mt_var_filtered_tot.info.ac_tot,
     SNV_mt_var_filtered_tot.info.an_tot,
     SNV_mt_var_filtered_tot.info.hom_alt_tot,
 ).rows()
 
-
-# In[187]:
-
-
 SNV_mt_var_filtered_XX_info = SNV_mt_var_filtered_XX.select_rows(
     SNV_mt_var_filtered_XX.info.af_xx,
     SNV_mt_var_filtered_XX.info.ac_xx,
@@ -479,10 +375,6 @@ def plot_sp (table_x_axis, mt_x_axis, table_y_axis, mt_y_axis, x_variable, y_var
     SNV_mt_var_filtered_XX.info.hom_alt_xx,
 ).rows()
 
-
-# In[188]:
-
-
 SNV_mt_var_filtered_XY_info = SNV_mt_var_filtered_XY.select_rows(
     SNV_mt_var_filtered_XY.info.af_xy,
     SNV_mt_var_filtered_XY.info.ac_xy,
@@ -490,38 +382,13 @@ def plot_sp (table_x_axis, mt_x_axis, table_y_axis, mt_y_axis, x_variable, y_var
     SNV_mt_var_filtered_XY.info.hom_alt_xy,
 ).rows()
 
-
-# In[189]:
-
-
 SNV_mt_var_filtered_tot_XX_info = SNV_mt_var_filtered_tot_info.join(SNV_mt_var_filtered_XX_info, how='left')
-
-
-# In[190]:
-
-
 SNV_mt_var_filtered_tot_XX_XY_info = SNV_mt_var_filtered_tot_XX_info.join(SNV_mt_var_filtered_XY_info, how='left')
-
-
-# In[192]:
-
-
-SNV_mt_var_filtered_tot_XX_XY_info.export('SNV_mt_var_filtered_tot_XX_XY_info.tsv')
-
-
-# In[ ]:
-
-
-
-
-
-# In[ ]:
-
+SNV_mt_var_filtered_tot_XX_XY_info.export('SNV_filtered_tot_XX_XY.tsv.bgz')
 
 
 
 
-# In[ ]:
 
 
 
diff --git a/modules/MEI_data_organization.R b/modules/MEI_data_organization.R
index 5653029..2e7103b 100644
--- a/modules/MEI_data_organization.R
+++ b/modules/MEI_data_organization.R
@@ -129,13 +129,13 @@ for (j in 1:(length(slots_var)-1)){
 
   		# Variant ID, AF_tot, AF_XX, AF_XY, AC_tot, AC_XX, AC_XY, AN_tot, AN_XX, AN_XY, Hom_alt_tot, Hom_alt_XX, Hom_alt_XY
   		# AN_tot : number of 0/0, 0/1 and 1/1 genotypes (avoid counting the ./.)
-  		an_total = 2*(sum(GT_table_i == "0/0", na.rm=T) + sum(GT_table_i == "0/1", na.rm=T) + sum(GT_table_i == "1/1", na.rm=T)) 
+  		an_tot = 2*(sum(GT_table_i == "0/0", na.rm=T) + sum(GT_table_i == "0/1", na.rm=T) + sum(GT_table_i == "1/1", na.rm=T)) 
   		#AC tot
-  		ac_total = sum(GT_table_i == "0/1", na.rm=T) + 2*sum(GT_table_i == "1/1", na.rm=T)
+  		ac_tot = sum(GT_table_i == "0/1", na.rm=T) + 2*sum(GT_table_i == "1/1", na.rm=T)
   		#AF tot = AC/AN
-  		af_total = ac_total/an_total
+  		af_tot = ac_tot/an_tot
   		#Number of individus homozygotes for the alternative allele (1/1)
-  		hom_alt_total = sum(GT_table_i == "1/1", na.rm=T) 
+  		hom_tot = sum(GT_table_i == "1/1", na.rm=T) 
 
   		#For XX individuals
   		#For now, make fake false with individuals and sex : 
@@ -150,7 +150,7 @@ for (j in 1:(length(slots_var)-1)){
   		#AF X = AC/AN
   		af_xx = ac_xx/an_xx
   		#Number of individus homozygotes for the alternative allele (1/1)
-  		hom_alt_xx = sum(XX_GT_table_i == "1/1", na.rm=T) 
+  		hom_xx = sum(XX_GT_table_i == "1/1", na.rm=T) 
   
   		#For XY individuals
   		#Subset the GT_Table for XY individuals
@@ -163,7 +163,7 @@ for (j in 1:(length(slots_var)-1)){
   		#AF X = AC/AN
   		af_xy = ac_xy/an_xy
   		#Number of individus homozygotes for the alternative allele (1/1)
-  		hom_alt_xy = sum(XY_GT_table_i == "1/1", na.rm=T) 
+  		hom_xy = sum(XY_GT_table_i == "1/1", na.rm=T) 
 
 
 		#Consequence
@@ -218,7 +218,7 @@ for (j in 1:(length(slots_var)-1)){
   		### Create tables
   		# SV_IBVL_frequency
   		# Variant ID, AF_tot, AF_XX, AF_XY, AC_tot, AC_XX, AC_XY, AN_tot, AN_XX, AN_XY, Hom_alt_tot, Hom_alt_XX, Hom_alt_XY, qual
-  		temp_table_frequ_db_i = cbind(variant, af_total, af_xx, af_xy, ac_total, ac_xx, ac_xy, an_total, an_xx, an_xy, hom_alt_total, hom_alt_xx, hom_alt_xy, quality)
+  		temp_table_frequ_db_i = cbind(variant, af_tot, af_xx, af_xy, ac_tot, ac_xx, ac_xy, an_tot, an_xx, an_xy, hom_tot, hom_xx, hom_xy, quality)
 		table_frequ_SV=unique(rbind.data.frame(table_frequ_SV, temp_table_frequ_db_i))
 
   		# SV_annotation (svs)
@@ -255,7 +255,7 @@ file.remove(list_frequ_tables_slots)
 list_annot_tables_slots <- list.files(pattern = paste0("table_annot_SV_slot"))
 tables_annot_slots=lapply(list_annot_tables_slots, read.table, header=TRUE)
 combined_tables_annot_slots=do.call(rbind, tables_annot_slots)
-colnames(combined_tables_annot_slots)=c("variant", "chr1", "chr1_pos1", "chr1_pos2", "type", "length", "algorithm", "ucsc_url", "gnomad_id", "gnomad_url")
+colnames(combined_tables_annot_slots)=c("variant", "chr1", "chr1_pos1", "chr1_pos2", "sv_type", "sv_length", "algorithm", "ucsc_url", "gnomad_id", "gnomad_url")
 write.table(combined_tables_annot_slots, file=paste0("svs_", var_type, "_", chromosome,".tsv"), quote=FALSE, row.names = FALSE, sep="\t")
 #file.remove(list_annot_tables_slots)
 
diff --git a/modules/MEI_data_organization.nf b/modules/MEI_data_organization.nf
index fcd5ac4..4f8ef71 100644
--- a/modules/MEI_data_organization.nf
+++ b/modules/MEI_data_organization.nf
@@ -40,6 +40,6 @@ process MEI_data_organization {
 	vcf_name=\$(echo ${MEI_vcf.simpleName} | sed 's/_[^_]*\$//' )
 	chr=\$(echo ${MEI_vcf.simpleName} | sed 's/^.*_\\([^_]*\\)\$/\\1/' )
 
-	Rscript ../../../modules/MEI_data_organization.R $assembly ${MEI_vcf} \${vcf_name}_\${chr}_${var_type}_annotation_table_merged_nohash.tsv $sex_table $run ${var_type}
+	Rscript ../../../modules/MEI_data_organization.R $assembly ${MEI_vcf} \${vcf_name}_${var_type}_annotation_table_merged_nohash_\${chr}.tsv $sex_table $run ${var_type}
 	"""
 }
diff --git a/modules/MT_Extract_MT_Read.nf b/modules/MT_Extract_MT_Read.nf
index 01281e8..e46e548 100644
--- a/modules/MT_Extract_MT_Read.nf
+++ b/modules/MT_Extract_MT_Read.nf
@@ -14,18 +14,27 @@ process Extract_MT_Read {
         file bam
 	file bai
 	val Mitochondrial_chromosome
+	val assembly
+	val batch
+	val run
+
 
         output :
         file '*_chrM.bam'
 
         script :
         """
-	gatk PrintReads \
-        -L ${Mitochondrial_chromosome} \
-        --read-filter MateOnSameContigOrNoMappedMateReadFilter \
-        --read-filter MateUnmappedAndUnmappedReadFilter \
-        -I ${bam.simpleName}.bam \
-        --read-index ${bam.simpleName}.bam.bai \
-        -O ${bam.simpleName}_chrM.bam
-        """
+	sample_name=\$(echo ${bam.simpleName} | cut -d _ -f 1)
+	if [ -a $params.outdir_ind/${assembly}/*/${run}/MT/Sample_vcf/\${sample_name}_MT_merged_filtered_trimmed_filtered_sites.vcf.gz ]; then
+		touch \${sample_name}_chrM.bam
+	else
+		gatk PrintReads \
+        	-L ${Mitochondrial_chromosome} \
+        	--read-filter MateOnSameContigOrNoMappedMateReadFilter \
+        	--read-filter MateUnmappedAndUnmappedReadFilter \
+        	-I ${bam.simpleName}.bam \
+        	--read-index ${bam.simpleName}.bam.bai \
+        	-O ${bam.simpleName}_chrM.bam
+        fi
+	"""
 }
diff --git a/modules/MT_FilterOut_sites.nf b/modules/MT_FilterOut_sites.nf
index d835465..a702271 100644
--- a/modules/MT_FilterOut_sites.nf
+++ b/modules/MT_FilterOut_sites.nf
@@ -10,7 +10,7 @@
 process MT_FilterOut_sites {
         tag "${MT_trimmed.simpleName}"
 
-        publishDir "$params.outdir_ind/${assembly}/${batch}/${run}/MT/Sample_vcf/", mode: 'copy', pattern: '*_filtered_sites.vcf.gz*'
+        publishDir "$params.outdir_ind/${assembly}/${batch}/${run}/MT/Sample_vcf/", mode: 'copyNoFollow', pattern: '*_filtered_sites.vcf.gz*'
 
         input :
         file ref_genome_MT
@@ -33,16 +33,25 @@ process MT_FilterOut_sites {
         script :
         """
         sample_name=\$(echo ${MT_trimmed.simpleName} | sed 's/_.*//' )
-	
-	gatk VariantFiltration \
-	-R Homo_sapiens_assembly38.chrM.fasta \
-	-V ${MT_trimmed.simpleName}.vcf.gz \
-	-O ${MT_trimmed.simpleName}_filtered_sites.vcf.gz \
-	--mask-name "GATK_artifact" \
-	--mask ${blacklist_sites_hg38_MT_file}
-
-	echo "\${sample_name}\t\${sample_name}\t$params.outdir_ind/${assembly}/${batch}/${run}/MT/Sample_vcf/${MT_trimmed.simpleName}_filtered_sites.vcf.gz" > \${sample_name}_MT_Step2_participant_data.tsv
-	echo "\${sample_name}" > \${sample_name}_list.txt
-        """
+
+	if [ -a $params.outdir_ind/${assembly}/*/${run}/MT/Sample_vcf/\${sample_name}_MT_merged_filtered_trimmed_filtered_sites.vcf.gz ]; then
+		filtered_sites_vcf=\$(find $params.outdir_ind/${assembly}/*/${run}/MT/Sample_vcf/ -name ${MT_trimmed.simpleName}_filtered_sites.vcf.gz)
+		filtered_sites_index=\$(find $params.outdir_ind/${assembly}/*/${run}/MT/Sample_vcf/ -name ${MT_trimmed.simpleName}_filtered_sites.vcf.gz.tbi)
+		ln -s \$filtered_sites_vcf .
+		ln -s \$filtered_sites_index .
+		touch \${sample_name}_MT_Step2_participant_data.tsv
+		touch \${sample_name}_list.txt
+	else	
+		gatk VariantFiltration \
+		-R Homo_sapiens_assembly38.chrM.fasta \
+		-V ${MT_trimmed.simpleName}.vcf.gz \
+		-O ${MT_trimmed.simpleName}_filtered_sites.vcf.gz \
+		--mask-name "GATK_artifact" \
+		--mask ${blacklist_sites_hg38_MT_file}
+
+		echo "\${sample_name}\t\${sample_name}\t$params.outdir_ind/${assembly}/${batch}/${run}/MT/Sample_vcf/${MT_trimmed.simpleName}_filtered_sites.vcf.gz" > \${sample_name}_MT_Step2_participant_data.tsv
+		echo "\${sample_name}" > \${sample_name}_list.txt
+        fi
+	"""
 }
 
diff --git a/modules/MT_Filter_Mutect_Calls.nf b/modules/MT_Filter_Mutect_Calls.nf
index 52f4bfd..cec0298 100644
--- a/modules/MT_Filter_Mutect_Calls.nf
+++ b/modules/MT_Filter_Mutect_Calls.nf
@@ -17,6 +17,9 @@ process MT_Filter_Mutect_Calls {
 	file MT_MergeVcfs
 	file MT_MergeVcfs_index
 	file MT_MergeVcfs_stat
+	val assembly
+	val batch
+	val run
 
 	output :
 	path '*_filtered.vcf.gz', emit : vcf
@@ -24,13 +27,19 @@ process MT_Filter_Mutect_Calls {
 
 	script :
 	"""
-        gatk FilterMutectCalls \
-	-V ${MT_MergeVcfs.simpleName}.vcf.gz \
-	-R Homo_sapiens_assembly38.chrM.fasta \
-	--stats ${MT_MergeVcfs.simpleName}.stats \
-	--max-alt-allele-count 4 \
-	--mitochondria-mode \
-	-O ${MT_MergeVcfs.simpleName}_filtered.vcf.gz
+	sample_name=\$(echo ${MT_MergeVcfs} | cut -d _ -f 1)
+	if [ -a $params.outdir_ind/${assembly}/*/${run}/MT/Sample_vcf/\${sample_name}_MT_merged_filtered_trimmed_filtered_sites.vcf.gz ]; then
+		touch ${MT_MergeVcfs.simpleName}_filtered.vcf.gz
+		touch ${MT_MergeVcfs.simpleName}_filtered.vcf.gz.tbi
+	else
+        	gatk FilterMutectCalls \
+		-V ${MT_MergeVcfs.simpleName}.vcf.gz \
+		-R Homo_sapiens_assembly38.chrM.fasta \
+		--stats ${MT_MergeVcfs.simpleName}.stats \
+		--max-alt-allele-count 4 \
+		--mitochondria-mode \
+		-O ${MT_MergeVcfs.simpleName}_filtered.vcf.gz
+	fi
 	"""
 }
 
diff --git a/modules/MT_LeftAlignAndTrimVariants.nf b/modules/MT_LeftAlignAndTrimVariants.nf
index 3618aa3..f5f5c38 100644
--- a/modules/MT_LeftAlignAndTrimVariants.nf
+++ b/modules/MT_LeftAlignAndTrimVariants.nf
@@ -16,6 +16,9 @@ process MT_LeftAlignAndTrimVariants {
 	file ref_genome_MT_dict
 	file (MT_Filter_Mutect_Calls)
 	file MT_Filter_Mutect_Calls_index
+	val assembly
+	val batch
+	val run
 
 	output :
 	path '*_trimmed.vcf.gz', emit : vcf
@@ -23,13 +26,19 @@ process MT_LeftAlignAndTrimVariants {
 
 	script :
 	"""
-	gatk LeftAlignAndTrimVariants \
-	-R Homo_sapiens_assembly38.chrM.fasta \
-	-V ${MT_Filter_Mutect_Calls.simpleName}.vcf.gz \
-	-O ${MT_Filter_Mutect_Calls.simpleName}_trimmed.vcf.gz \
-	--split-multi-allelics \
-	--dont-trim-alleles \
-	--keep-original-ac
+	sample_name=\$(echo ${MT_Filter_Mutect_Calls} | cut -d _ -f 1)
+	if [ -a $params.outdir_ind/${assembly}/*/${run}/MT/Sample_vcf/\${sample_name}_MT_merged_filtered_trimmed_filtered_sites.vcf.gz ]; then
+		touch ${MT_Filter_Mutect_Calls.simpleName}_trimmed.vcf.gz
+		touch ${MT_Filter_Mutect_Calls.simpleName}_trimmed.vcf.gz.tbi
+	else
+		gatk LeftAlignAndTrimVariants \
+		-R Homo_sapiens_assembly38.chrM.fasta \
+		-V ${MT_Filter_Mutect_Calls.simpleName}.vcf.gz \
+		-O ${MT_Filter_Mutect_Calls.simpleName}_trimmed.vcf.gz \
+		--split-multi-allelics \
+		--dont-trim-alleles \
+		--keep-original-ac
+	fi
 	"""
 }
 
diff --git a/modules/MT_Liftover.nf b/modules/MT_Liftover.nf
index 6d2d982..378dfa8 100644
--- a/modules/MT_Liftover.nf
+++ b/modules/MT_Liftover.nf
@@ -9,7 +9,7 @@
 process MT_Liftover {
         tag "${MT_call_variants_shifted.simpleName}"
 
-        publishDir "$params.outdir_ind/${assembly}/${batch}/${run}/MT/QC/${MT_call_variants_shifted.simpleName}/Liftover/",  pattern: "*_rejected_variants.vcf",  mode: 'copy'
+        publishDir "$params.outdir_ind/${assembly}/${batch}/${run}/MT/QC/${MT_call_variants_shifted.simpleName}/Liftover/",  pattern: "*_rejected_variants.vcf",  mode: 'copyNoFollow'
 
 	input :
         file MT_call_variants_shifted
@@ -29,11 +29,18 @@ process MT_Liftover {
 
         script :
         """
-	gatk LiftoverVcf \
-	I=${MT_call_variants_shifted} \
-	O=${MT_call_variants_shifted.simpleName}_lifted_over.vcf \
-	CHAIN=${ShiftBack_chain_MT_file} \
-	REJECT=${MT_call_variants_shifted.simpleName}_rejected_variants.vcf \
-	R=${ref_genome_MT_file}
+	sample_name=\$(echo ${MT_call_variants_shifted} | cut -d _ -f 1)
+	if [ -a $params.outdir_ind/${assembly}/*/${run}/MT/Sample_vcf/\${sample_name}_MT_merged_filtered_trimmed_filtered_sites.vcf.gz ]; then
+		touch ${MT_call_variants_shifted.simpleName}_lifted_over.vcf
+		rejected_vcf=\$(find $params.outdir_ind/${assembly}/*/${run}/MT/QC/\${sample_name}*/Liftover/ -name \${sample_name}*_rejected_variants.vcf)
+		ln -s \$rejected_vcf .
+	else
+		gatk LiftoverVcf \
+		I=${MT_call_variants_shifted} \
+		O=${MT_call_variants_shifted.simpleName}_lifted_over.vcf \
+		CHAIN=${ShiftBack_chain_MT_file} \
+		REJECT=${MT_call_variants_shifted.simpleName}_rejected_variants.vcf \
+		R=${ref_genome_MT_file}
+	fi
 	"""
 }
diff --git a/modules/MT_MarkDuplicates.nf b/modules/MT_MarkDuplicates.nf
index 9491f2a..734bd16 100644
--- a/modules/MT_MarkDuplicates.nf
+++ b/modules/MT_MarkDuplicates.nf
@@ -15,6 +15,9 @@ process MarkDuplicates {
         input :
         file bam_MT
 	file bai_MT
+	val assembly
+	val batch
+	val run
 
         output :
         path '*marked_duplicates.bam', emit : bam
@@ -22,16 +25,22 @@ process MarkDuplicates {
 
         script:
         """
-        singularity exec -B /mnt/scratch/SILENT/Act3/ -B /mnt/common/SILENT/Act3/ /mnt/common/SILENT/Act3/singularity/gatk4-4.2.0.sif \
-	gatk MarkDuplicates \
-	I=${bam_MT.baseName}.bam \
-	O=${bam_MT.baseName}_marked_duplicates.bam \
-	M=${bam_MT.baseName}_marked_duplicates_metrics.txt
-
-	ANNOTATEVARIANTS_INSTALL=/mnt/common/WASSERMAN_SOFTWARE/AnnotateVariants/
-        source \$ANNOTATEVARIANTS_INSTALL/opt/miniconda3/etc/profile.d/conda.sh
-        conda activate \$ANNOTATEVARIANTS_INSTALL/opt/AnnotateVariantsEnvironment
-
-	samtools index ${bam_MT.baseName}_marked_duplicates.bam
-        """
+	sample_name=\$(echo ${bam_MT.baseName} | cut -d _ -f 1)
+	if [ -a $params.outdir_ind/${assembly}/*/${run}/MT/Sample_vcf/\${sample_name}_MT_merged_filtered_trimmed_filtered_sites.vcf.gz ]; then
+		touch ${bam_MT.baseName}_marked_duplicates.bam
+		touch ${bam_MT.baseName}_marked_duplicates.bam.bai
+	else
+        	singularity exec -B /mnt/scratch/SILENT/Act3/ -B /mnt/common/SILENT/Act3/ /mnt/common/SILENT/Act3/singularity/gatk4-4.2.0.sif \
+		gatk MarkDuplicates \
+		I=${bam_MT.baseName}.bam \
+		O=${bam_MT.baseName}_marked_duplicates.bam \
+		M=${bam_MT.baseName}_marked_duplicates_metrics.txt
+
+		ANNOTATEVARIANTS_INSTALL=/mnt/common/WASSERMAN_SOFTWARE/AnnotateVariants/
+        	source \$ANNOTATEVARIANTS_INSTALL/opt/miniconda3/etc/profile.d/conda.sh
+        	conda activate \$ANNOTATEVARIANTS_INSTALL/opt/AnnotateVariantsEnvironment
+
+		samtools index ${bam_MT.baseName}_marked_duplicates.bam
+        fi
+	"""
 }
diff --git a/modules/MT_MergeVcfs.nf b/modules/MT_MergeVcfs.nf
index dc2829a..bef7038 100644
--- a/modules/MT_MergeVcfs.nf
+++ b/modules/MT_MergeVcfs.nf
@@ -28,18 +28,23 @@ process MT_MergeVcfs {
         echo ${MT_call_variants.simpleName}
 	sample_name=\$(echo ${MT_call_variants.simpleName} | cut -d _ -f 1)
 	echo \$sample_name
-        
-	singularity exec -B /mnt/scratch/SILENT/Act3/ -B /mnt/common/SILENT/Act3/ /mnt/common/SILENT/Act3/singularity/gatk4-4.2.0.sif \
-        gatk MergeVcfs \
-	I=${MT_call_variants} \
-	I=\${sample_name}_sorted_chrM_Homo_sapiens_assembly38_lifted_over.vcf \
-	O=\${sample_name}_MT_merged_uncollapsed.vcf.gz
-
-	ANNOTATEVARIANTS_INSTALL=/mnt/common/WASSERMAN_SOFTWARE/AnnotateVariants/
-	source \$ANNOTATEVARIANTS_INSTALL/opt/miniconda3/etc/profile.d/conda.sh
-	conda activate \$ANNOTATEVARIANTS_INSTALL/opt/AnnotateVariantsEnvironment
-
-	bcftools norm --rm-dup both \${sample_name}_MT_merged_uncollapsed.vcf.gz -O z -o \${sample_name}_MT_merged.vcf.gz 
-	bcftools index -t \${sample_name}_MT_merged.vcf.gz 
+
+	if [ -a $params.outdir_ind/${assembly}/*/${run}/MT/Sample_vcf/\${sample_name}_MT_merged_filtered_trimmed_filtered_sites.vcf.gz ]; then
+		touch \${sample_name}_MT_merged.vcf.gz
+		touch \${sample_name}_MT_merged.vcf.gz.tbi
+	else
+		singularity exec -B /mnt/scratch/SILENT/Act3/ -B /mnt/common/SILENT/Act3/ /mnt/common/SILENT/Act3/singularity/gatk4-4.2.0.sif \
+        	gatk MergeVcfs \
+		I=${MT_call_variants} \
+		I=\${sample_name}_sorted_chrM_Homo_sapiens_assembly38_lifted_over.vcf \
+		O=\${sample_name}_MT_merged_uncollapsed.vcf.gz
+
+		ANNOTATEVARIANTS_INSTALL=/mnt/common/WASSERMAN_SOFTWARE/AnnotateVariants/
+		source \$ANNOTATEVARIANTS_INSTALL/opt/miniconda3/etc/profile.d/conda.sh
+		conda activate \$ANNOTATEVARIANTS_INSTALL/opt/AnnotateVariantsEnvironment
+
+		bcftools norm --rm-dup both \${sample_name}_MT_merged_uncollapsed.vcf.gz -O z -o \${sample_name}_MT_merged.vcf.gz 
+		bcftools index -t \${sample_name}_MT_merged.vcf.gz 
+	fi
 	"""
 }
diff --git a/modules/MT_Merge_stat_file.nf b/modules/MT_Merge_stat_file.nf
index 1730f97..a47c920 100644
--- a/modules/MT_Merge_stat_file.nf
+++ b/modules/MT_Merge_stat_file.nf
@@ -13,6 +13,9 @@ process MT_Merge_stat_file {
         input :
         file MT_call_variants_stat
 	file MT_call_variants_shifted_stat
+	val assembly
+	val batch
+	val run
 
         output :
         file '*'
@@ -22,10 +25,14 @@ process MT_Merge_stat_file {
         echo ${MT_call_variants_stat.simpleName}
         sample_name=\$(echo ${MT_call_variants_stat.simpleName} | cut -d _ -f 1)
         echo \$sample_name
-        
-        gatk MergeMutectStats \
-        -stats ${MT_call_variants_stat} \
-        -stats \${sample_name}_sorted_chrM_Homo_sapiens_assembly38.chrM.shifted_by_8000_bases_marked_duplicates_Mutect2.vcf.gz.stats \
-        -O \${sample_name}_MT_merged.stats
+
+	if [ -a $params.outdir_ind/${assembly}/*/${run}/MT/Sample_vcf/\${sample_name}_MT_merged_filtered_trimmed_filtered_sites.vcf.gz ]; then
+		touch \${sample_name}_MT_merged.stats
+        else
+        	gatk MergeMutectStats \
+        	-stats ${MT_call_variants_stat} \
+        	-stats \${sample_name}_sorted_chrM_Homo_sapiens_assembly38.chrM.shifted_by_8000_bases_marked_duplicates_Mutect2.vcf.gz.stats \
+        	-O \${sample_name}_MT_merged.stats
+	fi
 	"""
 }
diff --git a/modules/MT_Picard_CollectWgsMetrics_MT.nf b/modules/MT_Picard_CollectWgsMetrics_MT.nf
index d635d9e..c23f87d 100644
--- a/modules/MT_Picard_CollectWgsMetrics_MT.nf
+++ b/modules/MT_Picard_CollectWgsMetrics_MT.nf
@@ -11,7 +11,7 @@
 process Picard_CollectWgsMetrics_MT {
         tag "${bam_MT}"
 
-        publishDir "$params.outdir_ind/${assembly}/${batch}/${run}/MT/QC/${bam_MT.simpleName}/", mode: 'copy'
+        publishDir "$params.outdir_ind/${assembly}/${batch}/${run}/MT/QC/${bam_MT.simpleName}/", mode: 'copyNoFollow'
 
         input :
 	file ref_genome_MT_file
@@ -28,14 +28,20 @@ process Picard_CollectWgsMetrics_MT {
 
         script :
         """
-	gatk CollectHsMetrics \
-        --java-options "-Xmx8G" \
-	-I ${bam_MT} \
-        --PER_BASE_COVERAGE ${bam_MT.simpleName}_collect_wgs_metrics_${interval_list}.tsv \
-        -R ${ref_genome_MT_file} \
-	-O ${bam_MT.simpleName}.metrics \
-	-TI $interval_list \
-	-BI $interval_list \
-	--SAMPLE_SIZE 1 
+	sample_name=\$(echo ${bam_MT.simpleName} | cut -d _ -f 1)
+	if [ -a $params.outdir_ind/${assembly}/*/${run}/MT/QC/*/\${sample_name}*_collect_wgs_metrics_${interval_list}.tsv ]; then
+		metric=\$(find $params.outdir_ind/${assembly}/*/${run}/MT/QC/*/ -name \${sample_name}*_collect_wgs_metrics_${interval_list}.tsv)
+		ln -s \$metric .
+	else
+		gatk CollectHsMetrics \
+        	--java-options "-Xmx8G" \
+		-I ${bam_MT} \
+        	--PER_BASE_COVERAGE ${bam_MT.simpleName}_collect_wgs_metrics_${interval_list}.tsv \
+        	-R ${ref_genome_MT_file} \
+		-O ${bam_MT.simpleName}.metrics \
+		-TI $interval_list \
+		-BI $interval_list \
+		--SAMPLE_SIZE 1 
+	fi
 	"""
 }
diff --git a/modules/MT_SamtoFastq.nf b/modules/MT_SamtoFastq.nf
index 29b0fa1..543e744 100644
--- a/modules/MT_SamtoFastq.nf
+++ b/modules/MT_SamtoFastq.nf
@@ -11,18 +11,25 @@ process MT_SamtoFastq {
 
         input :
         file Extract_MT_Read
+        val assembly
+        val batch
+        val run
 
         output :
         path '*.fastq', emit : fastq_MT
-//	path '*.fastq.fai', emit : fastq_MT_index
 
         script :
         """
-        gatk SamToFastq \
-        INPUT=${Extract_MT_Read.baseName}.bam \
-        FASTQ=${Extract_MT_Read.baseName}.fastq \
-        INTERLEAVE=true \
-        NON_PF=true
+	sample_name=\$(echo ${Extract_MT_Read.baseName} | cut -d _ -f 1)
+	if [ -a $params.outdir_ind/${assembly}/*/${run}/MT/Sample_vcf/\${sample_name}_MT_merged_filtered_trimmed_filtered_sites.vcf.gz ]; then
+		touch \${sample_name}.fastq
+	else
+        	gatk SamToFastq \
+        	INPUT=${Extract_MT_Read.baseName}.bam \
+        	FASTQ=${Extract_MT_Read.baseName}.fastq \
+        	INTERLEAVE=true \
+        	NON_PF=true
+	fi
 	"""
 }
 
diff --git a/modules/MT_Step1_input_tsv.nf b/modules/MT_Step1_input_tsv.nf
index e136a3c..42b5723 100644
--- a/modules/MT_Step1_input_tsv.nf
+++ b/modules/MT_Step1_input_tsv.nf
@@ -22,7 +22,12 @@ process MT_Step1_input_tsv {
 
         script:
 	"""
-	cat $Sample_MT_Step1_input_tsv > MT_Step1_input_tsv.tsv
+	sample_name=\$(echo ${Sample_MT_Step1_input_tsv.simpleName} | cut -d _ -f 1)
+	if [ -a $params.outdir_ind/${assembly}/*/${run}/MT/Sample_vcf/\${sample_name}_MT_merged_filtered_trimmed_filtered_sites.vcf.gz ]; then
+		touch MT_Step1_input_tsv.tsv
+	else
+		cat $Sample_MT_Step1_input_tsv > MT_Step1_input_tsv.tsv
+	fi
 	"""
 }
 
diff --git a/modules/MT_Step3_metadata_sample.nf b/modules/MT_Step3_metadata_sample.nf
index f69bdba..52e023f 100644
--- a/modules/MT_Step3_metadata_sample.nf
+++ b/modules/MT_Step3_metadata_sample.nf
@@ -15,18 +15,21 @@ process MT_Step3_metadata_sample {
 
 	script:
 	"""
-	source /cm/shared/BCCHR-apps/env_vars/unset_BCM.sh
-	source /cvmfs/soft.computecanada.ca/config/profile/bash.sh
-	module load StdEnv/2020
-	module load r/4.1.2
+        sample_name=\$(echo ${haplocheck.simpleName} | sed 's/_.*//' )
+	if [ -a $params.outdir_ind/${assembly}/*/${run}/MT/Sample_vcf/\${sample_name}_MT_merged_filtered_trimmed_filtered_sites.vcf.gz ]; then
+		touch \${sample_name}_conta_cov.tsv
+	else
+		source /cm/shared/BCCHR-apps/env_vars/unset_BCM.sh
+		source /cvmfs/soft.computecanada.ca/config/profile/bash.sh
+		module load StdEnv/2020
+		module load r/4.1.2
 
-	Silent_Genomes_R=/mnt/common/SILENT/Act3/R/
-	mkdir -p \${Silent_Genomes_R}/.local/R/\$EBVERSIONR/
-	export R_LIBS=\${Silent_Genomes_R}/.local/R/\$EBVERSIONR/
+		Silent_Genomes_R=/mnt/common/SILENT/Act3/R/
+		mkdir -p \${Silent_Genomes_R}/.local/R/\$EBVERSIONR/
+		export R_LIBS=\${Silent_Genomes_R}/.local/R/\$EBVERSIONR/
 
-	sample_name=\$(echo ${haplocheck.simpleName} | sed 's/_.*//' )
-
-	Rscript ../../../modules/MT_Step3_metadata_sample.R \${sample_name}_sorted.mosdepth.summary.txt ${haplocheck}
-	mv conta_cov.tsv \${sample_name}_conta_cov.tsv
+		Rscript ../../../modules/MT_Step3_metadata_sample.R \${sample_name}_sorted.mosdepth.summary.txt ${haplocheck}
+		mv conta_cov.tsv \${sample_name}_conta_cov.tsv
+	fi
 	"""
 }
diff --git a/modules/MT_align_to_MT.nf b/modules/MT_align_to_MT.nf
index d6451cf..4692828 100644
--- a/modules/MT_align_to_MT.nf
+++ b/modules/MT_align_to_MT.nf
@@ -14,6 +14,9 @@ process align_to_MT {
         path ref_genome_MT_file
 	path ref_genome_MT_file_index
 	file fastqfromsam
+	val assembly
+	val batch
+	val run
 
         output :
         path '*.bam', emit : align_to_MT_bam
@@ -21,12 +24,18 @@ process align_to_MT {
 
         script:
 	"""
-	ANNOTATEVARIANTS_INSTALL=/mnt/common/WASSERMAN_SOFTWARE/AnnotateVariants/
-	source \$ANNOTATEVARIANTS_INSTALL/opt/miniconda3/etc/profile.d/conda.sh
-	conda activate \$ANNOTATEVARIANTS_INSTALL/opt/AnnotateVariantsEnvironment
-
-	bwa mem -R "@RG\\tID:${fastqfromsam.simpleName}\\tSM:${fastqfromsam.simpleName}\\tPL:illumina" ${ref_genome_MT_file} ${fastqfromsam.baseName}.fastq | samtools view -u -bS | samtools sort > ${fastqfromsam.simpleName}_${ref_genome_MT_file.baseName}.bam
-
-        samtools index ${fastqfromsam.baseName}_${ref_genome_MT_file.baseName}.bam
-        """
+	sample_name=\$(echo ${fastqfromsam.simpleName} | cut -d _ -f 1)
+	if [ -a $params.outdir_ind/${assembly}/*/${run}/MT/Sample_vcf/\${sample_name}_MT_merged_filtered_trimmed_filtered_sites.vcf.gz ]; then
+		touch ${fastqfromsam.baseName}_${ref_genome_MT_file.baseName}.bam
+		touch ${fastqfromsam.baseName}_${ref_genome_MT_file.baseName}.bam.bai
+	else
+		ANNOTATEVARIANTS_INSTALL=/mnt/common/WASSERMAN_SOFTWARE/AnnotateVariants/
+		source \$ANNOTATEVARIANTS_INSTALL/opt/miniconda3/etc/profile.d/conda.sh
+		conda activate \$ANNOTATEVARIANTS_INSTALL/opt/AnnotateVariantsEnvironment
+
+		bwa mem -R "@RG\\tID:${fastqfromsam.simpleName}\\tSM:${fastqfromsam.simpleName}\\tPL:illumina" ${ref_genome_MT_file} ${fastqfromsam.baseName}.fastq | samtools view -u -bS | samtools sort > ${fastqfromsam.simpleName}_${ref_genome_MT_file.baseName}.bam
+
+        	samtools index ${fastqfromsam.baseName}_${ref_genome_MT_file.baseName}.bam
+        fi
+	"""
 }
diff --git a/modules/MT_call_variants.nf b/modules/MT_call_variants.nf
index b05e947..2cfbc87 100644
--- a/modules/MT_call_variants.nf
+++ b/modules/MT_call_variants.nf
@@ -17,6 +17,9 @@ process MT_call_variants {
 	file MarkDuplicates_bam_MT
         file MarkDuplicates_bam_MT_bai
 	val Mitochondrial_chromosome
+	val assembly
+	val batch
+	val run
 
 	output :
 	path '*_Mutect2.vcf.gz', emit: Mutect2_vcf
@@ -25,14 +28,21 @@ process MT_call_variants {
 
 	script:
 	"""
-	gatk Mutect2 \
-	-R ${ref_genome_MT_file} \
-	-I ${MarkDuplicates_bam_MT.baseName}.bam \
-	-L chrM \
-	--mitochondria-mode \
-	--annotation StrandBiasBySample \
-	--max-reads-per-alignment-start 75 \
-	--max-mnp-distance 0 \
-	-O ${MarkDuplicates_bam_MT.baseName}_Mutect2.vcf.gz
+	sample_name=\$(echo ${MarkDuplicates_bam_MT} | cut -d _ -f 1)
+	if [ -a $params.outdir_ind/${assembly}/*/${run}/MT/Sample_vcf/\${sample_name}_MT_merged_filtered_trimmed_filtered_sites.vcf.gz ]; then
+		touch ${MarkDuplicates_bam_MT.baseName}_Mutect2.vcf.gz
+		touch ${MarkDuplicates_bam_MT.baseName}_Mutect2.vcf.gz.tbi
+		touch ${MarkDuplicates_bam_MT.baseName}_Mutect2.vcf.gz.stats
+	else
+		gatk Mutect2 \
+		-R ${ref_genome_MT_file} \
+		-I ${MarkDuplicates_bam_MT.baseName}.bam \
+		-L chrM \
+		--mitochondria-mode \
+		--annotation StrandBiasBySample \
+		--max-reads-per-alignment-start 75 \
+		--max-mnp-distance 0 \
+		-O ${MarkDuplicates_bam_MT.baseName}_Mutect2.vcf.gz
+	fi
 	"""
 }
diff --git a/modules/MT_haplocheck.nf b/modules/MT_haplocheck.nf
index a80b956..cf1c146 100644
--- a/modules/MT_haplocheck.nf
+++ b/modules/MT_haplocheck.nf
@@ -24,7 +24,12 @@ process MT_haplocheck {
 
 	script :
 	"""
-	/mnt/common/SILENT/Act3/haplocheck/./haplocheck --out ${MT_FilterOut_sites_vcf.simpleName}_haplocheck ${MT_FilterOut_sites_vcf}
+	sample_name=\$(echo ${MT_FilterOut_sites_vcf} | cut -d _ -f 1)
+	if [ -a $params.outdir_ind/${assembly}/*/${run}/MT/Sample_vcf/\${sample_name}_MT_merged_filtered_trimmed_filtered_sites.vcf.gz ]; then
+		touch ${MT_FilterOut_sites_vcf.simpleName}_haplocheck
+	else
+		/mnt/common/SILENT/Act3/haplocheck/./haplocheck --out ${MT_FilterOut_sites_vcf.simpleName}_haplocheck ${MT_FilterOut_sites_vcf}
+	fi
 	"""
 }
 
diff --git a/modules/Picard_CollectAlignmentSummaryMetrics.nf b/modules/Picard_CollectAlignmentSummaryMetrics.nf
index 111cc49..5813f89 100644
--- a/modules/Picard_CollectAlignmentSummaryMetrics.nf
+++ b/modules/Picard_CollectAlignmentSummaryMetrics.nf
@@ -11,7 +11,7 @@
 process Picard_CollectAlignmentSummaryMetrics {
         tag "${bam}"
 
-	publishDir "$params.outdir_ind/${assembly}/${batch}/${run}/QC/Individuals/${bam.simpleName}/Picard_Metrics/", mode: 'copy'
+	publishDir "$params.outdir_ind/${assembly}/${batch}/${run}/QC/Individuals/${bam.simpleName}/Picard_Metrics/", mode: 'copyNoFollow'
 
 	input :
 	file bam
@@ -25,10 +25,15 @@ process Picard_CollectAlignmentSummaryMetrics {
 
 	script :
 	"""
-        gatk CollectAlignmentSummaryMetrics \
-	--java-options "-Xmx2000M" \
-	-I ${bam} \
-	-O ${bam.simpleName}_Picard_Alignment
+	if [ -a $params.outdir_ind/${assembly}/*/${run}/QC/Individuals/${bam.simpleName}/Picard_Metrics/${bam.simpleName}_Picard_Alignment ]; then
+		Picard_Alignment=\$(find $params.outdir_ind/${assembly}/*/${run}/QC/Individuals/${bam.simpleName}/Picard_Metrics/ -name ${bam.simpleName}_Picard_Alignment)
+		ln -s \$Picard_Alignment
+	else
+        	gatk CollectAlignmentSummaryMetrics \
+		--java-options "-Xmx2000M" \
+		-I ${bam} \
+		-O ${bam.simpleName}_Picard_Alignment
+	fi
 	"""
 }
 
diff --git a/modules/Picard_CollectWgsMetrics.nf b/modules/Picard_CollectWgsMetrics.nf
index d7a2e05..fb7e9ad 100644
--- a/modules/Picard_CollectWgsMetrics.nf
+++ b/modules/Picard_CollectWgsMetrics.nf
@@ -11,7 +11,7 @@
 process Picard_CollectWgsMetrics {
         tag "${bam.simpleName}"
  
-	publishDir "$params.outdir_ind/${assembly}/${batch}/${run}/QC/Individuals/${bam.simpleName}/Picard_Metrics/", mode: 'copy'
+	publishDir "$params.outdir_ind/${assembly}/${batch}/${run}/QC/Individuals/${bam.simpleName}/Picard_Metrics/", mode: 'copyNoFollow'
 
 	input :
 	file bam
@@ -27,10 +27,15 @@ process Picard_CollectWgsMetrics {
 
 	script :
 	"""
-	gatk CollectWgsMetrics \
-	--java-options "-Xmx8G" \
-	-I ${bam} \
-	-O ${bam.simpleName}_collect_wgs_metrics.txt \
-	-R ${ref_genome}
+	if [ -a $params.outdir_ind/${assembly}/*/${run}/QC/Individuals/${bam.simpleName}/Picard_Metrics/${bam.simpleName}_collect_wgs_metrics.txt ]; then
+		picard_collect_wgs_metrics=\$(find $params.outdir_ind/${assembly}/*/${run}/QC/Individuals/${bam.simpleName}/Picard_Metrics/ -name ${bam.simpleName}_collect_wgs_metrics.txt)
+		ln -s \$picard_collect_wgs_metrics .
+	else
+		gatk CollectWgsMetrics \
+		--java-options "-Xmx8G" \
+		-I ${bam} \
+		-O ${bam.simpleName}_collect_wgs_metrics.txt \
+		-R ${ref_genome}
+	fi
 	"""
 }
diff --git a/modules/Picard_QualityScoreDistribution.nf b/modules/Picard_QualityScoreDistribution.nf
index 2edc166..70a19cf 100644
--- a/modules/Picard_QualityScoreDistribution.nf
+++ b/modules/Picard_QualityScoreDistribution.nf
@@ -6,7 +6,7 @@
 process Picard_QualityScoreDistribution {
         tag "${bam.simpleName}"
  
-	publishDir "$params.outdir_ind/${assembly}/${batch}/${run}/QC/Individuals/${bam.simpleName}/Picard_Metrics/", mode: 'copy'
+	publishDir "$params.outdir_ind/${assembly}/${batch}/${run}/QC/Individuals/${bam.simpleName}/Picard_Metrics/", mode: 'copyNoFollow'
 	
 	input :
 	file bam
@@ -20,10 +20,17 @@ process Picard_QualityScoreDistribution {
 
 	script :
 	"""
-	picard "-Xmx2G" QualityScoreDistribution \
-	I=${bam} \
-	O=${bam.simpleName}_qual_score_dist.txt \
-	CHART= ${bam.simpleName}_qual_score_dist.pdf
+	if [ -a $params.outdir_ind/${assembly}/*/${run}/QC/Individuals/${bam.simpleName}/Picard_Metrics/${bam.simpleName}_qual_score_dist.txt ]; then
+		picard_qual_score_txt=\$(find $params.outdir_ind/${assembly}/*/${run}/QC/Individuals/${bam.simpleName}/Picard_Metrics/ -name ${bam.simpleName}_qual_score_dist.txt)
+		picard_qual_score_pdf=\$(find $params.outdir_ind/${assembly}/*/${run}/QC/Individuals/${bam.simpleName}/Picard_Metrics/ -name ${bam.simpleName}_qual_score_dist.pdf)
+		ln -s \$picard_qual_score_txt .
+                ln -s \$picard_qual_score_pdf .
+	else
+		picard "-Xmx2G" QualityScoreDistribution \
+		I=${bam} \
+		O=${bam.simpleName}_qual_score_dist.txt \
+		CHART= ${bam.simpleName}_qual_score_dist.pdf
+	fi
 	"""
 }
 
diff --git a/modules/SNV_data_organization.R b/modules/SNV_data_organization.R
index 0a40e52..1adf501 100644
--- a/modules/SNV_data_organization.R
+++ b/modules/SNV_data_organization.R
@@ -84,7 +84,7 @@ SNV_raw_annotaton_file=read.table(args[4], fill=TRUE, header=TRUE)
 #sex_table = read.table(args[5], header=TRUE)
 
 #Severity table
-severity_table=read.table((args[7]), fill=TRUE, header=TRUE)
+severity_table=read.table((args[5]), fill=TRUE, header=TRUE)
 
 
 #for (i in 1:nrow(SNV_vcf@fix)) {
@@ -115,200 +115,159 @@ for (j in 1:(length(slots_var)-1)){
 		#GT_table_i = GT_table[c(i),]
 		SNV_annot_i = SNV_raw_annotaton_file[SNV_raw_annotaton_file$Uploaded_variation==variant,]
  
-		show(variant)
-		show(head(SNV_raw_annotaton_file$Uploaded_variation))
 
-		#Define variables specific to variant i
-		chr = frequ_file$chrom[i]
-		pos = frequ_file$pos[i]
+                # ????Intergenic ???? 
+                #Type - VARIANT_CLASS
+                type=SNV_annot_i$VARIANT_CLASS
+
+
 		ref = frequ_file$ref[i]
 		alt = frequ_file$alt[i]
-		#chr=SNV_vcf@fix[i,c("CHROM")]
-  		#pos=as.numeric(SNV_vcf@fix[i,c("POS")])
-  		#ref=SNV_vcf@fix[i,c("REF")]
-  		#alt=SNV_vcf@fix[i,c("ALT")]
 
-  		#Variant quality
-  		#quality = SNV_vcf@fix[i,c("QUAL")]
-  
-		#Frequency from Hail file
-		quality = frequ_variant$qual
-		af_total = frequ_variant$af_tot
-		ac_total = frequ_variant$ac_tot
-		an_total = frequ_variant$an_tot
-		hom_alt_total = frequ_variant$hom_alt_tot
-		af_xx = frequ_variant$af_xx
-		ac_xx = frequ_variant$ac_xx
-		an_xx = frequ_variant$an_xx
-		hom_alt_xx  = frequ_variant$hom_alt_xx
-		af_xy = frequ_variant$af_xy
-		ac_xy =frequ_variant$af_xy
-		an_xy = frequ_variant$an_xy
-		hom_alt_xy = frequ_variant$hom_alt_xy
+		#length
+                if (type=="SNV") {
+                        length="1"
+                } else if (type=="insertion") {
+                        length = length(alt)-1
+                } else if (type=="deletion") {
+                        length = length(ref)-1
+                }
+
+		#If the varaint is longer than 49bp, then, it will be classified as a SV and should not be called by the SNV pipeline
+		if (length > 49){
+			next
+		} else {
+
+			#Define variables specific to variant i
+			chr = frequ_file$chrom[i]
+			pos = frequ_file$pos[i]
+ 
+			#Frequency from Hail file
+			quality = frequ_variant$qual
+			af_tot = frequ_variant$af_tot
+			ac_tot = frequ_variant$ac_tot
+			an_tot = frequ_variant$an_tot
+			hom_tot = frequ_variant$hom_alt_tot
+			af_xx = frequ_variant$af_xx
+			ac_xx = frequ_variant$ac_xx
+			an_xx = frequ_variant$an_xx
+			hom_xx  = frequ_variant$hom_alt_xx
+			af_xy = frequ_variant$af_xy
+			ac_xy =frequ_variant$af_xy
+			an_xy = frequ_variant$an_xy
+			hom_xy = frequ_variant$hom_alt_xy
 	
-  		# Variant ID, AF_tot, AF_XX, AF_XY, AC_tot, AC_XX, AC_XY, AN_tot, AN_XX, AN_XY, Hom_alt_tot, Hom_alt_XX, Hom_alt_XY
-  		# AN_tot : number of 0/0, 0/1 and 1/1 genotypes (avoid counting the ./.)
-  		#an_total = 2*(sum(GT_table_i == "0/0", na.rm=T) + sum(GT_table_i == "0/1", na.rm=T) + sum(GT_table_i == "1/1", na.rm=T)) 
-  		#AC tot
-  		#ac_total = sum(GT_table_i == "0/1", na.rm=T) + 2*sum(GT_table_i == "1/1", na.rm=T)
-  		#AF tot = AC/AN
-  		#af_total = ac_total/an_total
-  		#Number of individus homozygotes for the alternative allele (1/1)
-  		#hom_alt_total = sum(GT_table_i == "1/1", na.rm=T) 
-
-  		#For XX individuals
-  		#For now, make fake false with individuals and sex : 
-  		#sex_table =  read.table("sample_sex.tsv", header=TRUE)
-  		#Subset the GT_Table for XX individuals
-  		#XX_Samples = sex_table[sex_table$Sex=="XX",1]
-  		#XX_GT_table_i = GT_table_i[XX_Samples]
-  		# AN_XX
-  		#an_XX = 2*(sum(XX_GT_table_i == "0/0", na.rm=T) + sum(XX_GT_table_i == "0/1", na.rm=T) + sum(XX_GT_table_i == "1/1", na.rm=T)) 
-  		#AC XX
-  		#ac_XX = sum(XX_GT_table_i == "0/1", na.rm=T) + 2*sum(XX_GT_table_i == "1/1", na.rm=T)
-  		#AF X = AC/AN
-  		#af_XX = ac_XX/an_XX
-  		#Number of individus homozygotes for the alternative allele (1/1)
-  		#hom_alt_XX = sum(XX_GT_table_i == "1/1", na.rm=T) 
-  
-  		#For XY individuals
-  		#Subset the GT_Table for XY individuals
-  		#XY_Samples = sex_table[sex_table$Sex=="XY",1]
-  		#XY_GT_table_i = GT_table_i[XY_Samples]
-  		# AN_XY
-  		#an_XY = 2*(sum(XY_GT_table_i == "0/0", na.rm=T) + sum(XY_GT_table_i == "0/1", na.rm=T) + sum(XY_GT_table_i == "1/1", na.rm=T)) 
-  		#AC XY
-  		#ac_XY = sum(XY_GT_table_i == "0/1", na.rm=T) + 2*sum(XY_GT_table_i == "1/1", na.rm=T)
-  		#AF X = AC/AN
-  		#af_XY = ac_XY/an_XY
-  		#Number of individus homozygotes for the alternative allele (1/1)
-  		#hom_alt_XY = sum(XY_GT_table_i == "1/1", na.rm=T) 
-
-
-  		# variant_ID, type, length, chr, pos, ref, alt, cadd_score, cadd_interpr, dbsnp_id, dbsnp_url, UCSC_url, ensembl_url, clinvar_url, gnomad_url
-  		# type :SNV, ins or del
-  		#Length
-  		# CADD_score / interpr
-  		# dbsnp_id : From annotation file, "Existing_variation" column
-  		# dbsnp_URL : https://www.ncbi.nlm.nih.gov/projects/SNP/snp_ref.cgi?rs=<rs_number> // rs1556423501
-  		# UCSC URL : https://genome.ucsc.edu/cgi-bin/hgTracks?db=<assembly>&highlight=<assembly>.chrM%3A<pos>-<pos>&position=chrM%3A<pos-25>-<pos+25> / hg38 or hg19 db=hg38&highlight=hg38.chrM%3A8602-8602&position=chrM%3A8577-8627
-  		# Ensembl_url : https://uswest.ensembl.org/Homo_sapiens/Location/View?r=<chr>%3A<pos-25>-<pos+25> // r=17%3A63992802-64038237
-  		# clinvar URL : https://www.ncbi.nlm.nih.gov/clinvar/variation/<VCV>/ // 692920
-  		# gnomad_URL : https://gnomad.broadinstitute.org/variant/M-<pos>-<ref>-<alt>?dataset=gnomad_r3 // M-8602-T-C
-  
-  		# ????Intergenic ???? 
-  		#Type - VARIANT_CLASS
-  		type=SNV_annot_i$VARIANT_CLASS
-
-  		#length
-  		if (type=="SNV") {
-    			length="1"
-  		} else if (type=="insertion") {
-  			length = length(alt)-1
-  		} else if (type=="deletion") {
-    			length = length(ref)-1
-  		}
-  
-  		# CADD_score / interpr
-  		# If you would like to apply a cutoff on deleteriousness, e.g. to identify potentially pathogenic variants, we would suggest to put a cutoff somewhere between 10 and 20. Maybe at 15, as this also happens to be the median value for all possible canonical splice site changes and non-synonymous variants in CADD v1.0. However, there is not a natural choice here -- it is always arbitrary. We therefore recommend integrating C-scores with other evidence and to rank your candidates for follow up rather than hard filtering.
-		cadd_score=SNV_annot_i$CADD_PHRED
-  		if (cadd_score <=15) {
-			cadd_intr = "Tolerable"
-		} else if (cadd_score > 15) {
-			cadd_intr = "Damaging"
-		}
-
-		#splice_ai=SNV_annot_i$splice_ai
-		splice_ai="0.999"
-
-  		# dbsnp_id : From annotation file (SNV_annot_i), "Existing_variation" column
-  		if (grepl("rs", SNV_annot_i$Existing_variation)) {
-    			dbsnp_id = gsub(",.*$", "", SNV_annot_i$Existing_variation)
-    			# dbsnp_URL : https://www.ncbi.nlm.nih.gov/projects/SNP/snp_ref.cgi?rs=<rs_number> // rs1556423501
-    			dbsnp_url=paste0("https://www.ncbi.nlm.nih.gov/projects/SNP/snp_ref.cgi?rs=", dbsnp_id)
-  		} else {
-    			dbsnp_id="NA"
-    			dbsnp_url="NA"
-  		}
+  			# variant_ID, type, length, chr, pos, ref, alt, cadd_score, cadd_interpr, dbsnp_id, dbsnp_url, UCSC_url, ensembl_url, clinvar_url, gnomad_url
+  			# type :SNV, ins or del
+  			#Length
+  			# CADD_score / interpr
+  			# dbsnp_id : From annotation file, "Existing_variation" column
+  			# dbsnp_URL : https://www.ncbi.nlm.nih.gov/projects/SNP/snp_ref.cgi?rs=<rs_number> // rs1556423501
+  			# UCSC URL : https://genome.ucsc.edu/cgi-bin/hgTracks?db=<assembly>&highlight=<assembly>.chrM%3A<pos>-<pos>&position=chrM%3A<pos-25>-<pos+25> / hg38 or hg19 db=hg38&highlight=hg38.chrM%3A8602-8602&position=chrM%3A8577-8627
+  			# Ensembl_url : https://uswest.ensembl.org/Homo_sapiens/Location/View?r=<chr>%3A<pos-25>-<pos+25> // r=17%3A63992802-64038237
+  			# clinvar URL : https://www.ncbi.nlm.nih.gov/clinvar/variation/<VCV>/ // 692920
+  			# gnomad_URL : https://gnomad.broadinstitute.org/variant/M-<pos>-<ref>-<alt>?dataset=gnomad_r3 // M-8602-T-C
+    
+  			# CADD_score / interpr
+  			# If you would like to apply a cutoff on deleteriousness, e.g. to identify potentially pathogenic variants, we would suggest to put a cutoff somewhere between 10 and 20. Maybe at 15, as this also happens to be the median value for all possible canonical splice site changes and non-synonymous variants in CADD v1.0. However, there is not a natural choice here -- it is always arbitrary. We therefore recommend integrating C-scores with other evidence and to rank your candidates for follow up rather than hard filtering.
+			cadd_score=SNV_annot_i$CADD_PHRED
+  			if (cadd_score <=15) {
+				cadd_intr = "Tolerable"
+			} else if (cadd_score > 15) {
+				cadd_intr = "Damaging"
+			}
+
+			#splice_ai=SNV_annot_i$splice_ai
+			splice_ai="0.999"
+
+  			# dbsnp_id : From annotation file (SNV_annot_i), "Existing_variation" column
+  			if (grepl("rs", SNV_annot_i$Existing_variation)) {
+    				dbsnp_id = gsub(",.*$", "", SNV_annot_i$Existing_variation)
+    				# dbsnp_URL : https://www.ncbi.nlm.nih.gov/projects/SNP/snp_ref.cgi?rs=<rs_number> // rs1556423501
+    				dbsnp_url=paste0("https://www.ncbi.nlm.nih.gov/projects/SNP/snp_ref.cgi?rs=", dbsnp_id)
+  			} else {
+    				dbsnp_id="NA"
+    				dbsnp_url="NA"
+  			}
   
-  		# UCSC URL : https://genome.ucsc.edu/cgi-bin/hgTracks?db=<assembly>&highlight=<assembly>.chrM%3A<pos>-<pos>&position=chrM%3A<pos-25>-<pos+25> / hg38 or hg19 db=hg38&highlight=hg38.chrM%3A8602-8602&position=chrM%3A8577-8627
-  		ucsc_url=paste0("https://genome.ucsc.edu/cgi-bin/hgTracks?db=",assembly, "&highlight=", assembly, ".chr", chr, "%3A", pos, "-", pos, "&position=chr", chr, "%3A", pos-25, "-", pos+25)
+  			# UCSC URL : https://genome.ucsc.edu/cgi-bin/hgTracks?db=<assembly>&highlight=<assembly>.chrM%3A<pos>-<pos>&position=chrM%3A<pos-25>-<pos+25> / hg38 or hg19 db=hg38&highlight=hg38.chrM%3A8602-8602&position=chrM%3A8577-8627
+  			ucsc_url=paste0("https://genome.ucsc.edu/cgi-bin/hgTracks?db=",assembly, "&highlight=", assembly, ".chr", chr, "%3A", pos, "-", pos, "&position=chr", chr, "%3A", pos-25, "-", pos+25)
   
-  		# Ensembl_url : https://uswest.ensembl.org/Homo_sapiens/Location/View?r=<chr>%3A<pos-25>-<pos+25> // r=17%3A63992802-64038237
-  		if (assembly=="GRCh38") {
-    			ensembl_url=paste0("https://uswest.ensembl.org/Homo_sapiens/Location/View?r=", chr, "%3A", pos-25, "-", pos+25)
-  		} else if (assembly=="GRCh37") {
-    			ensembl_url=paste0("https://grch37.ensembl.org/Homo_sapiens/Location/View?r=", chr, "%3A", pos-25, "-", pos+25)
-  		}  
+  			# Ensembl_url : https://uswest.ensembl.org/Homo_sapiens/Location/View?r=<chr>%3A<pos-25>-<pos+25> // r=17%3A63992802-64038237
+  			if (assembly=="GRCh38") {
+    				ensembl_url=paste0("https://uswest.ensembl.org/Homo_sapiens/Location/View?r=", chr, "%3A", pos-25, "-", pos+25)
+  			} else if (assembly=="GRCh37") {
+    				ensembl_url=paste0("https://grch37.ensembl.org/Homo_sapiens/Location/View?r=", chr, "%3A", pos-25, "-", pos+25)
+  			}  
     
-  		#clinvar_VCV : From annotation file, "VCV" info
-  		#If clinvar number is specified (If column contains ClinVar)
-  		if (grepl("ClinVar::VCV", SNV_annot_i$VAR_SYNONYMS)) {
-    			clinvar_vcv = str_extract(SNV_annot_i$VAR_SYNONYMS, "(?<=VCV)[0-9]*") 
-    			#clinvar URL : https://www.ncbi.nlm.nih.gov/clinvar/variation/<VCV>/ // 692920
-    			clinvar_url=paste0("https://www.ncbi.nlm.nih.gov/clinvar/variation/", clinvar_vcv, "/")
-  		} else {
-    			clinvar_vcv = "NA"
-    			clinvar_url = "NA"
-  		}
+  			#clinvar_VCV : From annotation file, "VCV" info
+  			#If clinvar number is specified (If column contains ClinVar)
+  			if (grepl("ClinVar::VCV", SNV_annot_i$VAR_SYNONYMS)) {
+    				clinvar_vcv = str_extract(SNV_annot_i$VAR_SYNONYMS, "(?<=VCV)[0-9]*") 
+    				#clinvar URL : https://www.ncbi.nlm.nih.gov/clinvar/variation/<VCV>/ // 692920
+    				clinvar_url=paste0("https://www.ncbi.nlm.nih.gov/clinvar/variation/", clinvar_vcv, "/")
+  			} else {
+    				clinvar_vcv = "NA"
+    				clinvar_url = "NA"
+  			}
     
-  		# gnomad_URL : https://gnomad.broadinstitute.org/variant/M-<pos>-<ref>-<alt>?dataset=gnomad_r3 // M-8602-T-C
-  		#Only displayed if variant is in gnomad table
-  		if (variant %in% gnomad_file$ID_db_gnomad) {
-    			if (assembly=="GRCh38") {
-      				#https://gnomad.broadinstitute.org/variant/1-55051215-G-GA?dataset=gnomad_r3
-      				gnomad_url=paste0("https://gnomad.broadinstitute.org/variant/", chr, "-", pos, "-", ref, "-", alt, "?dataset=gnomad_r3")
-    			} else if (assembly=="GRCh37") {
-      				#https://gnomad.broadinstitute.org/variant/1-55516888-G-GA?dataset=gnomad_r2_1
-      				gnomad_url=paste0("https://gnomad.broadinstitute.org/variant/", chr, "-", pos, "-", ref, "-", alt, "?dataset=gnomad_r2_1")
-   			}
-  		} else {
-    			gnomad_url="NA"
-  		}
-  
-  		#transcript_id
-  		transcript=SNV_annot_i$Feature
-		#hgvsc
-  		hgvsc=SNV_annot_i$HGVSc
-  		#variant_transcript_id
-		variant_transcript=paste0(variant, "_", transcript)
-  		#Consequence
-		consequence_i=SNV_annot_i$Consequence
-		#hgvsp
-		hgvsp=SNV_annot_i$HGVSp
-		#polyphen_score
-		polyphen=SNV_annot_i$PolyPhen
-		#SIFT score
-		sift=SNV_annot_i$SIFT
-
-  		### Create tables
-  		# SNV_IBVL_frequency
-  		# Variant ID, AF_tot, AF_XX, AF_XY, AC_tot, AC_XX, AC_XY, AN_tot, AN_XX, AN_XY, Hom_alt_tot, Hom_alt_XX, Hom_alt_XY, qual
-  		temp_table_frequ_db_i = cbind(variant, af_total, af_xx, af_xy, ac_total, ac_xx, ac_xy, an_total, an_xx, an_xy, hom_alt_total, hom_alt_xx, hom_alt_xy, quality)
-		table_frequ_SNV=unique(rbind.data.frame(table_frequ_SNV, temp_table_frequ_db_i))
-
-  		# SNV_annotation
-  		# variant_ID, type, length, chr, pos, ref, alt, cadd_score, cadd_interpr, dbsnp_id, dbsnp_url, UCSC_url, ensembl_url, clinvar_url, gnomad_url
-  		temp_table_annot_SNV_i = cbind(variant, type, length, chr, pos, ref, alt, cadd_score, cadd_intr, dbsnp_id, dbsnp_url, ucsc_url, ensembl_url, clinvar_url, gnomad_url, clinvar_vcv, splice_ai)
-		table_annot_SNV=unique(rbind.data.frame(table_annot_SNV, temp_table_annot_SNV_i))
-
-  		#Variants_transcript table
-  		#transcript_id, variant, hgvsc
-  		temp_table_variant_transcript_i=cbind(transcript, variant, hgvsc)
-		table_variant_transcript=unique(rbind.data.frame(table_variant_transcript, temp_table_variant_transcript_i))
-
-  		# Variants_consequences table
-		# variant_transcript_id, severity (i.e consequence coded in number)
-                #If there is several consequences on the same line (separrated by a coma), create one line per consequence
-		table_variant_consequence_i=cbind(consequence_i, variant, transcript)
-		table_variant_consequence=unique(rbind.data.frame(table_variant_consequence, table_variant_consequence_i))
-		table_variant_consequence_split=separate_rows(table_variant_consequence, consequence_i, sep = ",")
-
-		#Variants_annotation
-		#variants_transcript_id, hgvsp, polyphen, sift (for polyphen and sift, score and interpretation together)
-		table_variant_annotation_i=cbind(hgvsp, sift, polyphen, transcript, variant)
-		table_variant_annotation=unique(rbind.data.frame(table_variant_annotation, table_variant_annotation_i))
-
+  			# gnomad_URL : https://gnomad.broadinstitute.org/variant/M-<pos>-<ref>-<alt>?dataset=gnomad_r3 // M-8602-T-C
+  			#Only displayed if variant is in gnomad table
+  			if (variant %in% gnomad_file$ID_db_gnomad) {
+    				if (assembly=="GRCh38") {
+      					#https://gnomad.broadinstitute.org/variant/1-55051215-G-GA?dataset=gnomad_r3
+      					gnomad_url=paste0("https://gnomad.broadinstitute.org/variant/", chr, "-", pos, "-", ref, "-", alt, "?dataset=gnomad_r3")
+    				} else if (assembly=="GRCh37") {
+      					#https://gnomad.broadinstitute.org/variant/1-55516888-G-GA?dataset=gnomad_r2_1
+      					gnomad_url=paste0("https://gnomad.broadinstitute.org/variant/", chr, "-", pos, "-", ref, "-", alt, "?dataset=gnomad_r2_1")
+   				}
+  			} else {
+    				gnomad_url="NA"
+  			}
+  	
+  			#transcript_id
+  			transcript=SNV_annot_i$Feature
+			#hgvsc
+  			hgvsc=SNV_annot_i$HGVSc
+  			#variant_transcript_id
+			variant_transcript=paste0(variant, "_", transcript)
+  			#Consequence
+			consequence_i=SNV_annot_i$Consequence
+			#hgvsp
+			hgvsp=SNV_annot_i$HGVSp
+			#polyphen_score
+			polyphen=SNV_annot_i$PolyPhen
+			#SIFT score
+			sift=SNV_annot_i$SIFT
+
+  			### Create tables
+  			# SNV_IBVL_frequency
+  			# Variant ID, AF_tot, AF_XX, AF_XY, AC_tot, AC_XX, AC_XY, AN_tot, AN_XX, AN_XY, Hom_alt_tot, Hom_alt_XX, Hom_alt_XY, qual
+  			temp_table_frequ_db_i = cbind(variant, af_tot, af_xx, af_xy, ac_tot, ac_xx, ac_xy, an_tot, an_xx, an_xy, hom_tot, hom_xx, hom_xy, quality)
+			table_frequ_SNV=unique(rbind.data.frame(table_frequ_SNV, temp_table_frequ_db_i))
+	
+	  		# SNV_annotation
+	  		# variant_ID, type, length, chr, pos, ref, alt, cadd_score, cadd_interpr, dbsnp_id, dbsnp_url, UCSC_url, ensembl_url, clinvar_url, gnomad_url
+	  		temp_table_annot_SNV_i = cbind(variant, type, length, chr, pos, ref, alt, cadd_score, cadd_intr, dbsnp_id, dbsnp_url, ucsc_url, ensembl_url, clinvar_url, gnomad_url, clinvar_vcv, splice_ai)
+			table_annot_SNV=unique(rbind.data.frame(table_annot_SNV, temp_table_annot_SNV_i))
+	
+  			#Variants_transcript table
+  			#transcript_id, variant, hgvsc
+  			temp_table_variant_transcript_i=cbind(transcript, variant, hgvsc)
+			table_variant_transcript=unique(rbind.data.frame(table_variant_transcript, temp_table_variant_transcript_i))
+
+  			# Variants_consequences table
+			# variant_transcript_id, severity (i.e consequence coded in number)
+			#If there is several consequences on the same line (separrated by a coma), create one line per consequence
+			table_variant_consequence_i=cbind(consequence_i, variant, transcript)
+			table_variant_consequence=unique(rbind.data.frame(table_variant_consequence, table_variant_consequence_i))
+			table_variant_consequence_split=separate_rows(table_variant_consequence, consequence_i, sep = ",")
+	
+			#Variants_annotation
+			#variants_transcript_id, hgvsp, polyphen, sift (for polyphen and sift, score and interpretation together)
+			table_variant_annotation_i=cbind(hgvsp, sift, polyphen, transcript, variant)
+			table_variant_annotation=unique(rbind.data.frame(table_variant_annotation, table_variant_annotation_i))
+		}
 	}       
        
 	### Write table slots
@@ -377,7 +336,7 @@ gnomad_intersect = gnomad_file[gnomad_file$ID_db_gnomad  %in%  frequ_file[,c("rs
 #Keep only the wanted info
 gnomad_intersect_mini=gnomad_intersect[,c("ID_db_gnomad", "AF", "AC", "AN", "nhomalt")]
 #rename the columns as expected in the SQL
-colnames(gnomad_intersect_mini)=c("variant", "af_total", "ac_total", "an_total", "hom_alt_total")
+colnames(gnomad_intersect_mini)=c("variant", "af_tot", "ac_tot", "an_tot", "hom_tot")
 write.table(gnomad_intersect_mini, file=paste0("genomic_gnomad_frequencies_", chromosome, ".tsv"), quote=FALSE, row.names = FALSE, sep="\t")
 
 # Gene table
diff --git a/modules/SNV_data_organization.nf b/modules/SNV_data_organization.nf
index 1de89ad..e502ec1 100644
--- a/modules/SNV_data_organization.nf
+++ b/modules/SNV_data_organization.nf
@@ -7,7 +7,7 @@
 // Run a R script that organize the SNV variants information in the tables expected to be displayed in the IBVL interface
 
 process SNV_data_organization {
-        tag "${chr}"
+        tag "${SNV_annot_merged}"
 
 	publishDir "$params.outdir_pop/${assembly}/${run}/Oracle_table/genomic_ibvl_frequencies/", mode: 'copy', pattern: "genomic_ibvl_frequencies_*"
         publishDir "$params.outdir_pop/${assembly}/${run}/Oracle_table/genomic_gnomad_frequencies/", mode: 'copy', pattern: "genomic_gnomad_frequencies_*"
@@ -26,8 +26,6 @@ process SNV_data_organization {
 	path SNV_annot_merged
 	val assembly
 	val run
-	each chr
-	path sex_table
 	path severity_table
 
 	output :
@@ -44,7 +42,9 @@ process SNV_data_organization {
 	mkdir -p \${Silent_Genomes_R}/.local/R/\$EBVERSIONR/
 	export R_LIBS=\${Silent_Genomes_R}/.local/R/\$EBVERSIONR/
 
-	Rscript ../../../modules/SNV_data_organization.R $assembly gnomad_frequency_table_${chr}.tsv ${chr}_frequ.tsv SNV_filtered_samples_variants_${chr}_SNV_annotation_table_merged_nohash.tsv $sex_table $run $severity_table
+	chr=\$(echo ${SNV_annot_merged.simpleName} | sed 's/^.*_\\([^_]*\\)\$/\\1/' )
+
+	Rscript ../../../modules/SNV_data_organization.R $assembly gnomad_frequency_table_\${chr}.tsv \${chr}_frequ.tsv ${SNV_annot_merged} $severity_table
 	"""
 }
 
diff --git a/modules/SV_concat_by_sample.nf b/modules/SV_concat_by_sample.nf
index dbee5fb..9233c6a 100644
--- a/modules/SV_concat_by_sample.nf
+++ b/modules/SV_concat_by_sample.nf
@@ -10,7 +10,7 @@
 process SV_concat_by_sample {
 	tag "${sample_name}"
 
-	publishDir "$params.outdir_ind/${assembly}/${batch}/${run}/SV/Sample/Concat_by_sample", mode: 'copy'
+	publishDir "$params.outdir_ind/${assembly}/${batch}/${run}/SV/Sample/Concat_by_sample", mode: 'copyNoFollow'
 
 	input:
         tuple(path(vcfs), path(indexes), val(sample_name))
@@ -24,6 +24,11 @@ process SV_concat_by_sample {
 	script:
 	output_file = "${sample_name}.concat-svs.vcf"
 	"""
-	bcftools concat -a -O v -o ${output_file} *.vcf.gz
+	if [ -a $params.outdir_ind/${assembly}/*/${run}/SV/Sample/Concat_by_sample/${sample_name}.concat-svs.vcf]; then
+		concat_vcf=\$(find $params.outdir_ind/${assembly}/*/${run}/SV/Sample/Concat_by_sample/  -name ${sample_name}.concat-svs.vcf) 
+		ln -s \$concat_vcf .
+	else
+		bcftools concat -a -O v -o ${output_file} *.vcf.gz
+	fi
 	"""
 }
diff --git a/modules/SV_data_organization.R b/modules/SV_data_organization.R
index d792c77..19ec03d 100644
--- a/modules/SV_data_organization.R
+++ b/modules/SV_data_organization.R
@@ -90,177 +90,181 @@ for (j in 1:(length(slots_var)-1)){
 
 	for (i in min_i: max_i){
 		#show(i)
-		variant=SV_vcf@fix[i,c("ID")]
-		#To remove the multiallelic info ffrom the ID	
-		#variant = gsub(";.*$", "", variant)
-		GT_table_i = GT_table[c(i),]
-		SV_annot_i = SV_raw_annotaton_file[SV_raw_annotaton_file$Uploaded_variation==variant,]
-  
-		#Define variables specific to variant i
-		chr=SV_vcf@fix[i,c("CHROM")]
-  		pos=as.numeric(SV_vcf@fix[i,c("POS")])
-  		ref=SV_vcf@fix[i,c("REF")]
-  		alt=SV_vcf@fix[i,c("ALT")]
-
-  		#Variant quality
-  		quality = SV_vcf@fix[i,c("QUAL")]
- 
-                #Type - VARIANT_CLASS (from annotation)
-                type=SV_annot_i$VARIANT_CLASS
-                #Type from vcf
-                type_vcf=extract.info(SV_vcf, element = "SVTYPE")[i]
 
-                #length
+		#If the SV length is less than 50bp, it should be called and included within the SNV / small indel pipeline
+
+		#length
                 #Info available in the INFO part of the vcf
-		length = extract.info(SV_vcf, element = "SVLEN")[i]
-
-		#Other info specific to SV
-		AVG_LEN  = as.numeric(extract.info(SV_vcf, element = "AVG_LEN")[i])
-		AVG_START = as.numeric(extract.info(SV_vcf, element = "AVG_START")[i])
-		AVG_END = as.numeric(extract.info(SV_vcf, element = "AVG_END")[i])
-		#This method indicate Jasmine, which is not the case
-		SVMETHOD = extract.info(SV_vcf, element = "SVMETHOD")[i]
-		#This is to extract the actual method
-		IDLIST = extract.info(SV_vcf, element = "IDLIST")[i]
-		if (grepl("Manta", IDLIST, fixed=TRUE) ){
-			algorithm = "Manta"
-		} else {
-			algorithm = "Smoove"
-		}
+                length = extract.info(SV_vcf, element = "SVLEN")[i]
+                #Length available 
+		#The average length can be a decimal number, so ounded to the nearest integer
+		AVG_LEN  = round(as.numeric(extract.info(SV_vcf, element = "AVG_LEN")[i]), digit = 0)
 
-		variant_ID_chr_start_end_type=paste0(chr, "_", AVG_START, "_", AVG_END, "_", type_vcf)
-
-  		# Variant ID, AF_tot, AF_XX, AF_XY, AC_tot, AC_XX, AC_XY, AN_tot, AN_XX, AN_XY, Hom_alt_tot, Hom_alt_XX, Hom_alt_XY
-  		# AN_tot : number of 0/0, 0/1 and 1/1 genotypes (avoid counting the ./.)
-  		an_total = 2*(sum(GT_table_i == "0/0", na.rm=T) + sum(GT_table_i == "0/1", na.rm=T) + sum(GT_table_i == "1/1", na.rm=T)) 
-  		#AC tot
-  		ac_total = sum(GT_table_i == "0/1", na.rm=T) + 2*sum(GT_table_i == "1/1", na.rm=T)
-  		#AF tot = AC/AN
-  		af_total = ac_total/an_total
-  		#Number of individus homozygotes for the alternative allele (1/1)
-  		hom_alt_total = sum(GT_table_i == "1/1", na.rm=T) 
-
-  		#For XX individuals
-  		#For now, make fake false with individuals and sex : 
-  		#sex_table =  read.table("sample_sex.tsv", header=TRUE)
-  		#Subset the GT_Table for XX individuals
-  		XX_Samples = sex_table[sex_table$Sex=="XX",1]
-  		XX_GT_table_i = GT_table_i[XX_Samples]
-  		# AN_XX
-  		an_xx = 2*(sum(XX_GT_table_i == "0/0", na.rm=T) + sum(XX_GT_table_i == "0/1", na.rm=T) + sum(XX_GT_table_i == "1/1", na.rm=T)) 
-  		#AC XX
-  		ac_xx = sum(XX_GT_table_i == "0/1", na.rm=T) + 2*sum(XX_GT_table_i == "1/1", na.rm=T)
-  		#AF X = AC/AN
-  		af_xx = ac_xx/an_xx
-  		#Number of individus homozygotes for the alternative allele (1/1)
-  		hom_alt_xx = sum(XX_GT_table_i == "1/1", na.rm=T) 
-  
-  		#For XY individuals
-  		#Subset the GT_Table for XY individuals
-  		XY_Samples = sex_table[sex_table$Sex=="XY",1]
-  		XY_GT_table_i = GT_table_i[XY_Samples]
-  		# AN_XY
-  		an_xy = 2*(sum(XY_GT_table_i == "0/0", na.rm=T) + sum(XY_GT_table_i == "0/1", na.rm=T) + sum(XY_GT_table_i == "1/1", na.rm=T)) 
-  		#AC XY
-  		ac_xy = sum(XY_GT_table_i == "0/1", na.rm=T) + 2*sum(XY_GT_table_i == "1/1", na.rm=T)
-  		#AF X = AC/AN
-  		af_xy = ac_xy/an_xy
-  		#Number of individus homozygotes for the alternative allele (1/1)
-  		hom_alt_xy = sum(XY_GT_table_i == "1/1", na.rm=T) 
-
-		#Some SV are too long and not annotated by the annot workflowm, need to define consequence and gene to avoid error
-		if (length(SV_annot_i$Consequence)>1) { 
-			#Consequence
-			consequence=SV_annot_i$Consequence
-
-                	#gene
-			gene=SV_annot_i$SYMBOL
+		if (length < 50 & AVG_LEN < 50) {
+			next
 		} else {
-			consequence = "NotAnnotated"
-			gene = "NotAnnotated"
-		}
-
-
-  		# variant_ID, type, length, chr, pos, ref, alt, cadd_score, cadd_interpr, dbsnp_id, dbsnp_url, UCSC_url, ensembl_url, clinvar_url, gnomad_url
-  		# UCSC URL : https://genome.ucsc.edu/cgi-bin/hgTracks?db=<assembly>&highlight=<assembly>.chrM%3A<pos>-<pos>&position=chrM%3A<pos-25>-<pos+25> / hg38 or hg19 db=hg38&highlight=hg38.chrM%3A8602-8602&position=chrM%3A8577-8627
-  		# Ensembl_url : https://uswest.ensembl.org/Homo_sapiens/Location/View?r=<chr>%3A<pos-25>-<pos+25> // r=17%3A63992802-64038237
-  		# clinvar URL : https://www.ncbi.nlm.nih.gov/clinvar/variation/<VCV>/ // 692920
-  		# gnomad_URL : https://gnomad.broadinstitute.org/variant/M-<pos>-<ref>-<alt>?dataset=gnomad_r3 // M-8602-T-C
+			variant=SV_vcf@fix[i,c("ID")]
+			#To remove the multiallelic info ffrom the ID	
+			#variant = gsub(";.*$", "", variant)
+			GT_table_i = GT_table[c(i),]
+			SV_annot_i = SV_raw_annotaton_file[SV_raw_annotaton_file$Uploaded_variation==variant,]
+  
+			#Define variables specific to variant i
+			chr=SV_vcf@fix[i,c("CHROM")]
+  			pos=as.numeric(SV_vcf@fix[i,c("POS")])
+  			ref=SV_vcf@fix[i,c("REF")]
+  			alt=SV_vcf@fix[i,c("ALT")]
+
+  			#Variant quality
+  			quality = SV_vcf@fix[i,c("QUAL")]
+ 
+                	#Type - VARIANT_CLASS (from annotation)
+                	type=SV_annot_i$VARIANT_CLASS
+                	#Type from vcf
+                	type_vcf=extract.info(SV_vcf, element = "SVTYPE")[i]
+
+                	#Other info available
+			AVG_START = round(as.numeric(extract.info(SV_vcf, element = "AVG_START")[i]), digit = 0)
+			AVG_END = round(as.numeric(extract.info(SV_vcf, element = "AVG_END")[i]), digit = 0)
+			#This method indicate Jasmine, which is not the case
+			SVMETHOD = extract.info(SV_vcf, element = "SVMETHOD")[i]
+			#This is to extract the actual method
+			IDLIST = extract.info(SV_vcf, element = "IDLIST")[i]
+			if (grepl("Manta", IDLIST, fixed=TRUE) ){
+				algorithm = "Manta"
+			} else {
+				algorithm = "Smoove"
+			}
+
+			variant_ID_chr_start_end_type=paste0(chr, "_", AVG_START, "_", AVG_END, "_", type_vcf)
+	
+  			# Variant ID, AF_tot, AF_XX, AF_XY, AC_tot, AC_XX, AC_XY, AN_tot, AN_XX, AN_XY, Hom_alt_tot, Hom_alt_XX, Hom_alt_XY
+  			# AN_tot : number of 0/0, 0/1 and 1/1 genotypes (avoid counting the ./.)
+  			an_tot = 2*(sum(GT_table_i == "0/0", na.rm=T) + sum(GT_table_i == "0/1", na.rm=T) + sum(GT_table_i == "1/1", na.rm=T)) 
+  			#AC tot
+  			ac_tot = sum(GT_table_i == "0/1", na.rm=T) + 2*sum(GT_table_i == "1/1", na.rm=T)
+  			#AF tot = AC/AN
+  			af_tot = ac_tot/an_tot
+  			#Number of individus homozygotes for the alternative allele (1/1)
+  			hom_tot = sum(GT_table_i == "1/1", na.rm=T) 
+
+  			#For XX individuals
+  			#For now, make fake false with individuals and sex : 
+  			#sex_table =  read.table("sample_sex.tsv", header=TRUE)
+  			#Subset the GT_Table for XX individuals
+  			XX_Samples = sex_table[sex_table$Sex=="XX",1]
+  			XX_GT_table_i = GT_table_i[XX_Samples]
+  			# AN_XX
+  			an_xx = 2*(sum(XX_GT_table_i == "0/0", na.rm=T) + sum(XX_GT_table_i == "0/1", na.rm=T) + sum(XX_GT_table_i == "1/1", na.rm=T)) 
+  			#AC XX
+  			ac_xx = sum(XX_GT_table_i == "0/1", na.rm=T) + 2*sum(XX_GT_table_i == "1/1", na.rm=T)
+  			#AF X = AC/AN
+  			af_xx = ac_xx/an_xx
+  			#Number of individus homozygotes for the alternative allele (1/1)
+  			hom_xx = sum(XX_GT_table_i == "1/1", na.rm=T) 
+  
+ 	 		#For XY individuals
+  			#Subset the GT_Table for XY individuals
+  			XY_Samples = sex_table[sex_table$Sex=="XY",1]
+  			XY_GT_table_i = GT_table_i[XY_Samples]
+  			# AN_XY
+  			an_xy = 2*(sum(XY_GT_table_i == "0/0", na.rm=T) + sum(XY_GT_table_i == "0/1", na.rm=T) + sum(XY_GT_table_i == "1/1", na.rm=T)) 
+  			#AC XY
+  			ac_xy = sum(XY_GT_table_i == "0/1", na.rm=T) + 2*sum(XY_GT_table_i == "1/1", na.rm=T)
+  			#AF X = AC/AN
+  			af_xy = ac_xy/an_xy
+  			#Number of individus homozygotes for the alternative allele (1/1)
+  			hom_xy = sum(XY_GT_table_i == "1/1", na.rm=T) 
+
+			#Some SV are too long and not annotated by the annot workflowm, need to define consequence and gene to avoid error
+			if (length(SV_annot_i$Consequence)>1) { 
+				#Consequence
+				consequence=SV_annot_i$Consequence
+
+                		#gene
+				gene=SV_annot_i$SYMBOL
+			} else {
+				consequence = "NotAnnotated"
+				gene = "NotAnnotated"
+			}
+
+	
+  			# variant_ID, type, length, chr, pos, ref, alt, cadd_score, cadd_interpr, dbsnp_id, dbsnp_url, UCSC_url, ensembl_url, clinvar_url, gnomad_url
+  			# UCSC URL : https://genome.ucsc.edu/cgi-bin/hgTracks?db=<assembly>&highlight=<assembly>.chrM%3A<pos>-<pos>&position=chrM%3A<pos-25>-<pos+25> / hg38 or hg19 db=hg38&highlight=hg38.chrM%3A8602-8602&position=chrM%3A8577-8627
+  			# Ensembl_url : https://uswest.ensembl.org/Homo_sapiens/Location/View?r=<chr>%3A<pos-25>-<pos+25> // r=17%3A63992802-64038237
+  			# clinvar URL : https://www.ncbi.nlm.nih.gov/clinvar/variation/<VCV>/ // 692920
+  			# gnomad_URL : https://gnomad.broadinstitute.org/variant/M-<pos>-<ref>-<alt>?dataset=gnomad_r3 // M-8602-T-C
   
 
-  		# UCSC URL : https://genome.ucsc.edu/cgi-bin/hgTracks?db=<assembly>&highlight=<assembly>.chrM%3A<pos>-<pos>&position=chrM%3A<pos-25>-<pos+25> / hg38 or hg19 db=hg38&highlight=hg38.chrM%3A8602-8602&position=chrM%3A8577-8627
-  		ucsc_url=paste0("https://genome.ucsc.edu/cgi-bin/hgTracks?db=",assembly, "&highlight=", assembly, ".chr", chr, "%3A", AVG_START, "-", AVG_END, "&position=chr", chr, "%3A", AVG_START-(0.25*AVG_LEN), "-", AVG_END+(0.25*AVG_LEN))
+  			# UCSC URL : https://genome.ucsc.edu/cgi-bin/hgTracks?db=<assembly>&highlight=<assembly>.chrM%3A<pos>-<pos>&position=chrM%3A<pos-25>-<pos+25> / hg38 or hg19 db=hg38&highlight=hg38.chrM%3A8602-8602&position=chrM%3A8577-8627
+  			ucsc_url=paste0("https://genome.ucsc.edu/cgi-bin/hgTracks?db=",assembly, "&highlight=", assembly, ".chr", chr, "%3A", AVG_START, "-", AVG_END, "&position=chr", chr, "%3A", AVG_START-(0.25*AVG_LEN), "-", AVG_END+(0.25*AVG_LEN))
   
-		#Could be added in V2, not part of the current SQL
-  		# Ensembl_url : https://uswest.ensembl.org/Homo_sapiens/Location/View?r=<chr>%3A<pos-25>-<pos+25> // r=17%3A63992802-64038237
-  		#if (assembly=="GRCh38") {
-    		#	ensembl_url=paste0("https://uswest.ensembl.org/Homo_sapiens/Location/View?r=", chr, "%3A", AVG_START-(0.25*AVG_LEN), "-", AVG_END+(0.25*AVG_LEN))
-  		#} else if (assembly=="GRCh37") {
-    		#	ensembl_url=paste0("https://grch37.ensembl.org/Homo_sapiens/Location/View?r=", chr, "%3A", AVG_START-(0.25*AVG_LEN), "-", AVG_END+(0.25*AVG_LEN))
-  		#}  
+			#Could be added in V2, not part of the current SQL
+  			# Ensembl_url : https://uswest.ensembl.org/Homo_sapiens/Location/View?r=<chr>%3A<pos-25>-<pos+25> // r=17%3A63992802-64038237
+  			#if (assembly=="GRCh38") {
+    			#	ensembl_url=paste0("https://uswest.ensembl.org/Homo_sapiens/Location/View?r=", chr, "%3A", AVG_START-(0.25*AVG_LEN), "-", AVG_END+(0.25*AVG_LEN))
+  			#} else if (assembly=="GRCh37") {
+    			#	ensembl_url=paste0("https://grch37.ensembl.org/Homo_sapiens/Location/View?r=", chr, "%3A", AVG_START-(0.25*AVG_LEN), "-", AVG_END+(0.25*AVG_LEN))
+  			#}  
     
-                #Could be added in V2, not part of the current SQL
-		# dbsnp_id : From annotation file (SNV_annot_i), "Existing_variation" column
-  		#if (grepl("rs", SV_annot_i$Existing_variation)) {
-    		#	dbsnp_id = gsub(",.*$", "", SV_annot_i$Existing_variation)
-    			# dbsnp_URL : https://www.ncbi.nlm.nih.gov/projects/SNP/snp_ref.cgi?rs=<rs_number> // rs1556423501
-    		#	dbsnp_url=paste0("https://www.ncbi.nlm.nih.gov/projects/SNP/snp_ref.cgi?rs=", dbsnp_id)
-  		#} else {
-    		#	dbsnp_id="NA"
-    		#	dbsnp_url="NA"
-  		#}
-
-		#For SV V1, the gnomAD URL will point to the region view in gnomAD SV
-	      	#gnomAD SV is currently only avaialble in GRCh37
-		#https://gnomad.broadinstitute.org/region/22-18738600-18747000?dataset=gnomad_sv_r2_1
-		gnomad_id = "TBD"
-		if (assembly=="GRCh37") {
-			gnomad_url_region=paste0("https://gnomad.broadinstitute.org/region/", chr, "-", AVG_START, "-", AVG_END, "?dataset=gnomad_sv_r2_1")
-		} else {
-                        gnomad_url_region="NA"
-                }
-
-
-		##For complex variations (translocations including a chr2)
-		#To do, need to find one in a vcf
-		if (type_vcf == "BND") {
-			chr2="TBD"
-			chr2_pos1="TBD"
-			ucsc_url2="TBD"
-			gnomad_id2="TBD"
-			gnomad_url2="TBD"
+               		#Could be added in V2, not part of the current SQL
+			# dbsnp_id : From annotation file (SNV_annot_i), "Existing_variation" column
+  			#if (grepl("rs", SV_annot_i$Existing_variation)) {
+    			#	dbsnp_id = gsub(",.*$", "", SV_annot_i$Existing_variation)
+    				# dbsnp_URL : https://www.ncbi.nlm.nih.gov/projects/SNP/snp_ref.cgi?rs=<rs_number> // rs1556423501
+    			#	dbsnp_url=paste0("https://www.ncbi.nlm.nih.gov/projects/SNP/snp_ref.cgi?rs=", dbsnp_id)
+  			#} else {
+    			#	dbsnp_id="NA"
+    			#	dbsnp_url="NA"
+  			#}
+
+			#For SV V1, the gnomAD URL will point to the region view in gnomAD SV
+	      		#gnomAD SV is currently only avaialble in GRCh37
+			#https://gnomad.broadinstitute.org/region/22-18738600-18747000?dataset=gnomad_sv_r2_1
+			gnomad_id = "TBD"
+			if (assembly=="GRCh37") {
+				gnomad_url_region=paste0("https://gnomad.broadinstitute.org/region/", chr, "-", AVG_START, "-", AVG_END, "?dataset=gnomad_sv_r2_1")
+			} else {
+                	        gnomad_url_region="NA"
+                	}
+
+
+			##For complex variations (translocations including a chr2)
+			#To do, need to find one in a vcf
+			if (type_vcf == "BND") {
+				chr2="TBD"
+				chr2_pos1="TBD"
+				ucsc_url2="TBD"
+				gnomad_id2="TBD"
+				gnomad_url2="TBD"
 			
-			#CTX table
-                	#variant, chr2, chr2_pos1, ucsc_url2, gnomad_id2, gnomad_url2
-                	#TO DO
-                	temp_table_ctx_i=cbind(variant, chr2, chr2_pos1, ucsc_url2, gnomad_id2, gnomad_url2)
-                	table_ctx=unique(rbind.data.frame(table_ctx, temp_table_ctx_i))
+				#CTX table
+                		#variant, chr2, chr2_pos1, ucsc_url2, gnomad_id2, gnomad_url2
+                		#TO DO
+                		temp_table_ctx_i=cbind(variant, chr2, chr2_pos1, ucsc_url2, gnomad_id2, gnomad_url2)
+                		table_ctx=unique(rbind.data.frame(table_ctx, temp_table_ctx_i))
+			}
+
+  			### Create tables
+  			# SV_IBVL_frequency
+  			# Variant ID, AF_tot, AF_XX, AF_XY, AC_tot, AC_XX, AC_XY, AN_tot, AN_XX, AN_XY, Hom_alt_tot, Hom_alt_XX, Hom_alt_XY, qual
+  			temp_table_frequ_db_i = cbind(variant, af_tot, af_xx, af_xy, ac_tot, ac_xx, ac_xy, an_tot, an_xx, an_xy, hom_tot, hom_xx, hom_xy, quality)
+			table_frequ_SV=unique(rbind.data.frame(table_frequ_SV, temp_table_frequ_db_i))
+	
+  			# SV_annotation (svs)
+  			# variant_ID, chr1, chr1_pos1 (start), chr1_po2 (end), type, length, algorithm, ucsc_url, gnomad_id, gnomad_url
+  			temp_table_annot_SV_i = cbind(variant, chr, AVG_START, AVG_END, type_vcf, AVG_LEN, algorithm, ucsc_url, gnomad_id, gnomad_url_region)
+			table_annot_SV=unique(rbind.data.frame(table_annot_SV, temp_table_annot_SV_i))
+
+
+			#sv_consequences table
+			#gene, variant, consequence (intronic, intergenic, etc)
+			#No transcript associated to SV, so no ensembl or Refseq
+			# If there is several consequences on the same line (separrated by a coma), create one line per consequence
+			temp_table_sv_consequence_i = cbind(gene, variant, consequence)
+			table_sv_consequence=unique(rbind.data.frame(table_sv_consequence, temp_table_sv_consequence_i))
+			table_sv_consequence_split=separate_rows(table_sv_consequence, consequence, sep = ",")
 		}
-
-  		### Create tables
-  		# SV_IBVL_frequency
-  		# Variant ID, AF_tot, AF_XX, AF_XY, AC_tot, AC_XX, AC_XY, AN_tot, AN_XX, AN_XY, Hom_alt_tot, Hom_alt_XX, Hom_alt_XY, qual
-  		temp_table_frequ_db_i = cbind(variant, af_total, af_xx, af_xy, ac_total, ac_xx, ac_xy, an_total, an_xx, an_xy, hom_alt_total, hom_alt_xx, hom_alt_xy, quality)
-		table_frequ_SV=unique(rbind.data.frame(table_frequ_SV, temp_table_frequ_db_i))
-
-  		# SV_annotation (svs)
-  		# variant_ID, chr1, chr1_pos1 (start), chr1_po2 (end), type, length, algorithm, ucsc_url, gnomad_id, gnomad_url
-  		temp_table_annot_SV_i = cbind(variant, chr, AVG_START, AVG_END, type_vcf, AVG_LEN, algorithm, ucsc_url, gnomad_id, gnomad_url_region)
-		table_annot_SV=unique(rbind.data.frame(table_annot_SV, temp_table_annot_SV_i))
-
-
-		#sv_consequences table
-		#gene, variant, consequence (intronic, intergenic, etc)
-		#No transcript associated to SV, so no ensembl or Refseq
-		# If there is several consequences on the same line (separrated by a coma), create one line per consequence
-		temp_table_sv_consequence_i = cbind(gene, variant, consequence)
-		show(i)
-		show(SV_annot_i)
-		show(length(SV_annot_i$Consequence))
-		show(temp_table_sv_consequence_i)
-		table_sv_consequence=unique(rbind.data.frame(table_sv_consequence, temp_table_sv_consequence_i))
-		table_sv_consequence_split=separate_rows(table_sv_consequence, consequence, sep = ",")
-
 	}       
        
 	### Write table slots
@@ -285,7 +289,7 @@ file.remove(list_frequ_tables_slots)
 list_annot_tables_slots <- list.files(pattern = paste0("table_annot_SV_slot"))
 tables_annot_slots=lapply(list_annot_tables_slots, read.table, header=TRUE)
 combined_tables_annot_slots=do.call(rbind, tables_annot_slots)
-colnames(combined_tables_annot_slots)=c("variant", "chr1", "chr1_pos1", "chr1_pos2", "type", "length", "algorithm", "ucsc_url", "gnomad_id", "gnomad_url")
+colnames(combined_tables_annot_slots)=c("variant", "chr1", "chr1_pos1", "chr1_pos2", "sv_type", "sv_length", "algorithm", "ucsc_url", "gnomad_id", "gnomad_url")
 write.table(combined_tables_annot_slots, file=paste0("svs_", var_type, "_", chromosome,".tsv"), quote=FALSE, row.names = FALSE, sep="\t")
 file.remove(list_annot_tables_slots)
 
diff --git a/modules/SV_data_organization.nf b/modules/SV_data_organization.nf
index 73049db..1ae9028 100644
--- a/modules/SV_data_organization.nf
+++ b/modules/SV_data_organization.nf
@@ -41,6 +41,6 @@ process SV_data_organization {
 	vcf_name=\$(echo ${SV_vcf.simpleName} | sed 's/_[^_]*\$//' )
 	chr=\$(echo ${SV_vcf.simpleName} | sed 's/^.*_\\([^_]*\\)\$/\\1/' )
 
-	Rscript ../../../modules/SV_data_organization.R $assembly ${SV_vcf} \${vcf_name}_\${chr}_${var_type}_annotation_table_merged_nohash.tsv $sex_table $run ${var_type}
+	Rscript ../../../modules/SV_data_organization.R $assembly ${SV_vcf} \${vcf_name}_${var_type}_annotation_table_merged_nohash_\${chr}.tsv $sex_table $run ${var_type}
 	"""
 }
diff --git a/modules/SV_manta.nf b/modules/SV_manta.nf
index a41bdbd..859653e 100644
--- a/modules/SV_manta.nf
+++ b/modules/SV_manta.nf
@@ -10,7 +10,7 @@
 process SV_manta {
 	tag "${bam.simpleName}"
     	
-	publishDir "$params.outdir_ind/${assembly}/${batch}/${run}/SV/Sample/manta", mode: 'copy'
+	publishDir "$params.outdir_ind/${assembly}/${batch}/${run}/SV/Sample/manta", mode: 'copyNoFollow'
 	
 	input:
 	file bam
@@ -28,21 +28,28 @@ process SV_manta {
    
 	script:
 	"""
-        echo ${bam.simpleName}
-        sample_name=\$(echo ${bam.simpleName} | cut -d _ -f 1)
-        echo \$sample_name > sample.txt
+	if [ -a $params.outdir_ind/${assembly}/*/${run}/SV/Sample/manta/${bam.simpleName}_diploidSV.vcf.gz ]; then
+		manta_vcf=\$(find $params.outdir_ind/${assembly}/*/${run}/SV/Sample/manta/ -name ${bam.simpleName}_diploidSV.vcf.gz)
+		manta_index=\$(find $params.outdir_ind/${assembly}/*/${run}/SV/Sample/manta/ -name ${bam.simpleName}_diploidSV.vcf.gz.tbi)
+		ln -s \$manta_vcf .
+		ln -s \$manta_index .
+	else
+        	echo ${bam.simpleName}
+        	sample_name=\$(echo ${bam.simpleName} | cut -d _ -f 1)
+        	echo \$sample_name > sample.txt
 
-	configManta.py \
-	--bam ${bam} \
-	--referenceFasta ${reference} \
-	--runDir . \
-	--callRegions ${cr_bed}
+		configManta.py \
+		--bam ${bam} \
+		--referenceFasta ${reference} \
+		--runDir . \
+		--callRegions ${cr_bed}
 
-	python2 ./runWorkflow.py \
-	-j ${task.cpus} \
-	-m local
+		python2 ./runWorkflow.py \
+		-j ${task.cpus} \
+		-m local
 	
-	bcftools reheader -s sample.txt results/variants/diploidSV.vcf.gz > ${bam.simpleName}_diploidSV.vcf.gz
-	bcftools index -f --tbi ${bam.simpleName}_diploidSV.vcf.gz
+		bcftools reheader -s sample.txt results/variants/diploidSV.vcf.gz > ${bam.simpleName}_diploidSV.vcf.gz
+		bcftools index -f --tbi ${bam.simpleName}_diploidSV.vcf.gz
+	fi
 	"""
 }
diff --git a/modules/SV_smoove.nf b/modules/SV_smoove.nf
index d08c3d4..8d51c5a 100644
--- a/modules/SV_smoove.nf
+++ b/modules/SV_smoove.nf
@@ -10,7 +10,7 @@
 process SV_smoove {
 	tag "${bam.simpleName}"
 
-	publishDir "$params.outdir_ind/${assembly}/${batch}/${run}/SV/Sample/smoove", mode: 'copy'
+	publishDir "$params.outdir_ind/${assembly}/${batch}/${run}/SV/Sample/smoove", mode: 'copyNoFollow'
 	
 	input:
 	file bam
@@ -26,20 +26,27 @@ process SV_smoove {
 	
 	script:
 	"""
-	echo ${bam.simpleName}
-	sample_name=\$(echo ${bam.simpleName} | cut -d _ -f 1)
-	echo \$sample_name > sample.txt
+	if [ -a $params.outdir_ind/${assembly}/*/${run}/SV/Sample/smoove/${bam.simpleName}_smoove.vcf.gz ]; then
+		smoove_vcf=\$(find $params.outdir_ind/${assembly}/*/${run}/SV/Sample/smoove/ -name ${bam.simpleName}_smoove.vcf.gz)
+		smoove_index=\$(find $params.outdir_ind/${assembly}/*/${run}/SV/Sample/smoove/ -name ${bam.simpleName}_smoove.vcf.gz.tbi)
+                ln -s \$smoove_vcf .
+                ln -s \$smoove_index .
+	else
+		echo ${bam.simpleName}
+		sample_name=\$(echo ${bam.simpleName} | cut -d _ -f 1)
+		echo \$sample_name > sample.txt
 
-	smoove call \
-	--outdir . \
-	--name ${bam.simpleName} \
-	--fasta ${reference}\
-	${bam}
-  	##-p ${task.cpus} 
+		smoove call \
+		--outdir . \
+		--name ${bam.simpleName} \
+		--fasta ${reference}\
+		${bam}
+  		##-p ${task.cpus} 
 	
-	bcftools view -O u -o ${bam.simpleName}.R.bcf ${bam.simpleName}-smoove.vcf.gz
-	bcftools sort --temp-dir $params.outdir_ind/${assembly}/${batch}/${run}/SV/TMP  -m 2G -O z -o ${bam.simpleName}-smoove.vcf.gz  ${bam.simpleName}.R.bcf
-	bcftools reheader -s sample.txt ${bam.simpleName}-smoove.vcf.gz > ${bam.simpleName}_smoove.vcf.gz
-	bcftools index --tbi ${bam.simpleName}_smoove.vcf.gz
+		bcftools view -O u -o ${bam.simpleName}.R.bcf ${bam.simpleName}-smoove.vcf.gz
+		bcftools sort --temp-dir $params.outdir_ind/${assembly}/${batch}/${run}/SV/TMP  -m 2G -O z -o ${bam.simpleName}-smoove.vcf.gz  ${bam.simpleName}.R.bcf
+		bcftools reheader -s sample.txt ${bam.simpleName}-smoove.vcf.gz > ${bam.simpleName}_smoove.vcf.gz
+		bcftools index --tbi ${bam.simpleName}_smoove.vcf.gz
+	fi
 	"""
 }
diff --git a/modules/align_sort_output_bam.nf b/modules/align_sort_output_bam.nf
index 0541bda..87aae4c 100644
--- a/modules/align_sort_output_bam.nf
+++ b/modules/align_sort_output_bam.nf
@@ -7,11 +7,12 @@
 // Alignment. fastq alignment with bwa mem 
 //	      Sort and index with samtools
 
+// Process should be skipped if bam file already generated
 
 process align_sort_output_bam {
 	tag "$sampleId"
 
-	publishDir "$params.outdir_ind/${assembly}/${batch}/${run}/BAM/", mode: 'copy'
+	publishDir "$params.outdir_ind/${assembly}/${batch}/${run}/BAM/", mode: 'copyNoFollow'
 
 	input :
 	path reference
@@ -27,12 +28,18 @@ process align_sort_output_bam {
 
 	script:
 	"""
-	ANNOTATEVARIANTS_INSTALL=/mnt/common/WASSERMAN_SOFTWARE/AnnotateVariants/
-	source \$ANNOTATEVARIANTS_INSTALL/opt/miniconda3/etc/profile.d/conda.sh
-	conda activate \$ANNOTATEVARIANTS_INSTALL/opt/AnnotateVariantsEnvironment
-
-	bwa mem -t 8 -R '@RG\\tID:${sampleId}\\tSM:${sampleId}' ${reference} ${read_pairs_ch} | samtools view -Sb | samtools sort -o ${sampleId}_sorted.bam	
-	samtools index ${sampleId}_sorted.bam
+	if [ -a ${params.outdir_ind}/${assembly}/*/${run}/BAM/${sampleId}_sorted.bam ]; then
+		bam_file=\$(find ${params.outdir_ind}/${assembly}/*/${run}/BAM/ -name ${sampleId}_sorted.bam)
+                bai_file=\$(find ${params.outdir_ind}/${assembly}/*/${run}/BAM/ -name ${sampleId}_sorted.bam.bai)
+		ln -s \$bam_file .
+		ln -s \$bai_file .
+	else
+		ANNOTATEVARIANTS_INSTALL=/mnt/common/WASSERMAN_SOFTWARE/AnnotateVariants/
+		source \$ANNOTATEVARIANTS_INSTALL/opt/miniconda3/etc/profile.d/conda.sh
+		conda activate \$ANNOTATEVARIANTS_INSTALL/opt/AnnotateVariantsEnvironment
+		bwa mem -t 8 -R '@RG\\tID:${sampleId}\\tSM:${sampleId}' ${reference} ${read_pairs_ch} | samtools view -Sb | samtools sort -o ${sampleId}_sorted.bam
+		samtools index ${sampleId}_sorted.bam
+	fi
 	"""
 }
 
diff --git a/modules/annotation_table_merged.nf b/modules/annotation_table_merged.nf
index 8976847..10b48f2 100644
--- a/modules/annotation_table_merged.nf
+++ b/modules/annotation_table_merged.nf
@@ -11,7 +11,7 @@
 process annotation_table_merged {
 	tag "${chr}"
 
-        publishDir "$params.outdir_pop/${assembly}/${run}/${var_type}/VEP_annotation/", mode: 'copy', pattern : '*_annotation_table_merged.tsv'
+        publishDir "$params.outdir_pop/${assembly}/${run}/${var_type}/VEP_annotation/", mode: 'copy', pattern : '*_annotation_table_merged.*'
 	publishDir "$params.outdir_pop/${assembly}/${run}/QC/${var_type}/${vcf.simpleName}/", mode: 'copy', pattern : '*_VEP_stats*'
 
         input :
@@ -34,9 +34,13 @@ process annotation_table_merged {
 	val var_type 
 
         output :
-        path '*_annotation_table_merged.tsv', emit :  annotation_table_merged
+
         path '*_VEP_merged_stats*', emit : vep_merged_stat
-	path '*_annotation_table_merged_nohash.tsv', emit : annot_table_merged_R
+//If output is a vcf
+	path '*.vcf', emit :  annotation_vcf
+//If output is a tsv
+//        path '*.tsv', emit :  annotation_table_merged
+//	path '*_nohash*', emit : annot_table_merged_R
 
         script :
         """
@@ -46,7 +50,7 @@ process annotation_table_merged {
 
 	vep \
         -i ${vcf} \
-        -o ${vcf.simpleName}_${chr}_${var_type}_annotation_table_merged.tsv \
+        -o ${vcf.simpleName}_${var_type}_annotation_table_merged_${chr}.vcf \
 	--chr ${chr}  \
 	--offline \
 	--merged \
@@ -67,12 +71,13 @@ process annotation_table_merged {
         --check_existing \
         --var_synonyms \
 	--tsl \
-	--tab \
+	--vcf \
         --dir_plugins /mnt/common/SILENT/Act3/VEP/Plugins/ \
-        --plugin CADD,$CADD_1_6_whole_genome_SNVs,$CADD_1_6_InDels \
-        --plugin spliceAI, snv=${spliceai_snv}, indel=${spliceai_indel}\
+	--plugin CADD,$CADD_1_6_whole_genome_SNVs,$CADD_1_6_InDels \
+        --plugin SpliceAI,snv=${spliceai_snv},indel=${spliceai_indel} \
 	--stats_file ${vcf.simpleName}_${chr}_VEP_merged_stats
 
-	sed 's/#Uploaded_variation/Uploaded_variation/g' ${vcf.simpleName}_${chr}_${var_type}_annotation_table_merged.tsv > ${vcf.simpleName}_${chr}_${var_type}_annotation_table_merged_nohash.tsv
+#	change --vcf to --tab and uncomment the lower line if the data_oragnization step is added
+#	sed 's/#Uploaded_variation/Uploaded_variation/g' ${vcf.simpleName}_${var_type}_annotation_table_merged_${chr}.tsv > ${vcf.simpleName}_${var_type}_annotation_table_merged_nohash_${chr}.tsv
 	"""
 }
diff --git a/subworkflow/Hail.nf b/modules/archive/Hail.nf
similarity index 100%
rename from subworkflow/Hail.nf
rename to modules/archive/Hail.nf
diff --git a/subworkflow/QC_indiv.nf b/modules/archive/QC_indiv.nf
similarity index 100%
rename from subworkflow/QC_indiv.nf
rename to modules/archive/QC_indiv.nf
diff --git a/modules/deepvariant.nf b/modules/deepvariant.nf
index b8470fb..68ca91c 100644
--- a/modules/deepvariant.nf
+++ b/modules/deepvariant.nf
@@ -10,7 +10,7 @@
 process deepvariant_call {
         tag "${bam.simpleName}"
 
-        publishDir "$params.outdir_ind/${assembly}/${batch}/${run}/SNV/Sample/", mode: 'copy'
+        publishDir "$params.outdir_ind/${assembly}/${batch}/${run}/SNV/Sample/", mode: 'copyNoFollow'
 
 	input :
 	file reference
@@ -28,12 +28,22 @@ process deepvariant_call {
 
 	script:
 	"""
-	/opt/deepvariant/bin/run_deepvariant \
-	--num_shards=${task.cpus} \
-	--model_type=WGS \
-	--ref=${reference} \
-	--reads=${bam.simpleName}.bam \
-	--output_gvcf=${bam.simpleName}.g.vcf.gz \
-	--output_vcf=${bam.simpleName}.vcf.gz
+	if [ -a $params.outdir_ind/${assembly}/*/${run}/SNV/Sample/${bam.simpleName}.g.vcf.gz ]; then
+		deepvariant_gvcf=\$(find $params.outdir_ind/${assembly}/*/${run}/SNV/Sample/ -name ${bam.simpleName}.g.vcf.gz) 
+		deepvariant_vcf=\$(find $params.outdir_ind/${assembly}/*/${run}/SNV/Sample/ -name ${bam.simpleName}.vcf.gz)
+		deepvariant_vcf_index=\$(find $params.outdir_ind/${assembly}/*/${run}/SNV/Sample/ -name ${bam.simpleName}.vcf.gz.tbi)
+		ln -s \$deepvariant_gvcf .
+		ln -s \$deepvariant_vcf .
+		ln -s \$deepvariant_vcf_index .
+	else
+		/opt/deepvariant/bin/run_deepvariant \
+		--num_shards=${task.cpus} \
+		--intermediate_results_dir . \
+		--model_type=WGS \
+		--ref=${reference} \
+		--reads=${bam.simpleName}.bam \
+		--output_gvcf=${bam.simpleName}.g.vcf.gz \
+		--output_vcf=${bam.simpleName}.vcf.gz
+	fi
 	"""
 }
diff --git a/modules/expansion_hunter.nf b/modules/expansion_hunter.nf
index b7d6378..c3272d9 100644
--- a/modules/expansion_hunter.nf
+++ b/modules/expansion_hunter.nf
@@ -12,7 +12,7 @@
 process expansion_hunter {
 	tag "${bam.simpleName}"
 
-        publishDir "$params.outdir_ind/${assembly}/${batch}/${run}/STR/Sample/", mode: 'copy'
+        publishDir "$params.outdir_ind/${assembly}/${batch}/${run}/STR/Sample/", mode: 'copyNoFollow'
 
 	input:
 	file bam
@@ -30,25 +30,32 @@ process expansion_hunter {
 
 	script:
 	"""
-	${params.ExpansionHunter_dir}/ExpansionHunter \
-	--output-prefix ${bam.simpleName} \
-	--reference $reference  \
-	--reads ${bam} \
-	--variant-catalog ${variant_catalog} \
-	-n ${task.cpus} 
+	if [ -a $params.outdir_ind/${assembly}/*/${run}/STR/Sample/${bam.simpleName}_str.vcf.gz ]; then
+		exp_hunt_vcf=\$(find $params.outdir_ind/${assembly}/*/${run}/STR/Sample/ -name ${bam.simpleName}_str.vcf.gz)
+		exp_hunt_index=\$(find $params.outdir_ind/${assembly}/*/${run}/STR/Sample/ -name ${bam.simpleName}_str.vcf.gz.tbi)
+		ln -s \$exp_hunt_vcf .
+		ln -s \$exp_hunt_index .
+	else
+		${params.ExpansionHunter_dir}/ExpansionHunter \
+		--output-prefix ${bam.simpleName} \
+		--reference $reference  \
+		--reads ${bam} \
+		--variant-catalog ${variant_catalog} \
+		-n ${task.cpus} 
 	
-	# Unload bcchr, and load cvmfs
-        # unload_bcchr
-        source /cm/shared/BCCHR-apps/env_vars/unset_BCM.sh
-        # load cvmfs
-        source /cvmfs/soft.computecanada.ca/config/profile/bash.sh
-
-        module load StdEnv/2020
-	module load bcftools
+		# Unload bcchr, and load cvmfs
+        	# unload_bcchr
+        	source /cm/shared/BCCHR-apps/env_vars/unset_BCM.sh
+        	# load cvmfs
+        	source /cvmfs/soft.computecanada.ca/config/profile/bash.sh
+
+        	module load StdEnv/2020
+		module load bcftools
 		
-	bcftools view -O z -o ${bam.simpleName}_str_noID.vcf.gz ${bam.simpleName}.vcf
-	bcftools index ${bam.simpleName}_str_noID.vcf.gz
-	bcftools annotate --set-id '%CHROM\\_%POS\\_%END\\_%REF\\_%ALT' -O z -o ${bam.simpleName}_str.vcf.gz ${bam.simpleName}_str_noID.vcf.gz
-	bcftools index --tbi ${bam.simpleName}_str.vcf.gz
+		bcftools view -O z -o ${bam.simpleName}_str_noID.vcf.gz ${bam.simpleName}.vcf
+		bcftools index ${bam.simpleName}_str_noID.vcf.gz
+		bcftools annotate --set-id '%CHROM\\_%POS\\_%END\\_%REF\\_%ALT' -O z -o ${bam.simpleName}_str.vcf.gz ${bam.simpleName}_str_noID.vcf.gz
+		bcftools index --tbi ${bam.simpleName}_str.vcf.gz
+	fi
 	"""
 }
diff --git a/modules/fastqc.nf b/modules/fastqc.nf
index 61490cc..16ffe57 100644
--- a/modules/fastqc.nf
+++ b/modules/fastqc.nf
@@ -9,7 +9,7 @@
 process fastqc {
         tag "$sample"
 	
-	publishDir "$params.outdir_ind/${assembly}/${batch}/${run}/QC/Individuals/${sample}_sorted/Fastqc/", mode: 'copy'
+	publishDir "$params.outdir_ind/${assembly}/${batch}/${run}/QC/Individuals/${sample}_sorted/Fastqc/", mode: 'copyNoFollow'
 
 	input:
         tuple (val(sample), file(reads)) 
@@ -23,6 +23,13 @@ process fastqc {
 
 	script:
         """
-	fastqc -t ${task.cpus} ${reads.get(0)} ${reads.get(1)}
-        """
+	if [ -a ${params.outdir_ind}/${assembly}/*/${run}/QC/Individuals/${sample}_sorted/Fastqc/${sample}.R1_fastqc.zip ]; then
+		fastqc_R1=\$(find ${params.outdir_ind}/${assembly}/*/${run}/QC/Individuals/${sample}_sorted/Fastqc/ -name ${sample}.R1_fastqc.zip )
+		fastqc_R2=\$(find ${params.outdir_ind}/${assembly}/*/${run}/QC/Individuals/${sample}_sorted/Fastqc/ -name ${sample}.R2_fastqc.zip )
+		ln -s \$fastqc_R1 .
+		ln -s \$fastqc_R2 .
+	else
+		fastqc -t ${task.cpus} ${reads.get(0)} ${reads.get(1)}
+        fi
+	"""
 }
diff --git a/modules/gnomad_frequency_table.nf b/modules/gnomad_frequency_table.nf
index 9f05b25..58b58cb 100644
--- a/modules/gnomad_frequency_table.nf
+++ b/modules/gnomad_frequency_table.nf
@@ -11,6 +11,8 @@
 process gnomad_frequency_table {
 	tag "${gnomad_vcf}"
 
+	publishDir "$params.reference_dir", mode: 'copy'
+
         input :
         file gnomad_SNV_vcf
 	file gnomad_SNV_index
diff --git a/modules/melt.nf b/modules/melt.nf
index 3d9312b..0fb1289 100644
--- a/modules/melt.nf
+++ b/modules/melt.nf
@@ -9,7 +9,7 @@
 process melt {
 	tag "${bam.simpleName}"
 
-	publishDir "$params.outdir_ind/${assembly}/${batch}/${run}/MEI/Sample/", mode: 'copy'
+	publishDir "$params.outdir_ind/${assembly}/${batch}/${run}/MEI/Sample/", mode: 'copyNoFollow'
 	
 //	module = "BCCHR/Java/1.8.0_231"
 //	conda  "/home/BCRICWH.LAN/Mohammed.Mohammed/miniconda3/envs/sv"   
@@ -32,37 +32,44 @@ process melt {
 
 	script:
 	"""
-        # Unload bcchr, and load cvmfs
-        # unload_bcchr
-        source /cm/shared/BCCHR-apps/env_vars/unset_BCM.sh
-        # load cvmfs
-        source /cvmfs/soft.computecanada.ca/config/profile/bash.sh
+                sample_name=\$(echo ${bam.simpleName} | cut -d _ -f 1)
 
-        module load StdEnv/2020
-        module load vcftools
-        module load bcftools
-	module load bowtie2
+	if [ -a $params.outdir_ind/${assembly}/*/${run}/MEI/Sample/\${sample_name}_mei.vcf.gz ]; then
+		melt_vcf=\$(find $params.outdir_ind/${assembly}/*/${run}/MEI/Sample/ -name \${sample_name}_mei.vcf.gz)
+		melt_index=\$(find $params.outdir_ind/${assembly}/*/${run}/MEI/Sample/ -name \${sample_name}_mei.vcf.gz.tbi)
+		ln -s \$melt_vcf .
+		ln -s \$melt_index .
+	else
+        	# Unload bcchr, and load cvmfs
+        	# unload_bcchr
+        	source /cm/shared/BCCHR-apps/env_vars/unset_BCM.sh
+        	# load cvmfs
+        	source /cvmfs/soft.computecanada.ca/config/profile/bash.sh
 
-	echo ${bam.simpleName}
-	sample_name=\$(echo ${bam.simpleName} | cut -d _ -f 1)
-	mkdir -p \${sample_name}
+        	module load StdEnv/2020
+        	module load vcftools
+        	module load bcftools
+		module load bowtie2
+
+		mkdir -p \${sample_name}
 	
-	java -Xmx8G -jar ${params.Melt_dir}/MELT.jar Single \
-	-b hs37d5/NC_007605 \
-	-t ${transposon_file}  \
-	-h ${reference} \
-	-bamfile $bam \
-	-w \${sample_name} \
-	-n ${genes_file}
+		java -Xmx8G -jar ${params.Melt_dir}/MELT.jar Single \
+		-b hs37d5/NC_007605 \
+		-t ${transposon_file}  \
+		-h ${reference} \
+		-bamfile $bam \
+		-w \${sample_name} \
+		-n ${genes_file}
 
-	# fix issues with  MELT vcfs
-	for name in {ALU,SVA,LINE1};do bcftools annotate -x FMT/GL \${sample_name}/\${name}.final_comp.vcf > \${name}.vcf;done
-	for name in {ALU,SVA,LINE1};do bcftools view -O u -o \${name}.bcf \${name}.vcf;done
-	for name in {ALU,SVA,LINE1};do bcftools sort  -m 2G -O z -o \${name}.vcf.gz \${name}.bcf;done
-	for name in {ALU,SVA,LINE1};do bcftools index --tbi \${name}.vcf.gz;done
-	bcftools concat -a -Oz  -o \${sample_name}_mei_noID.vcf.gz *vcf.gz
-	bcftools annotate --set-id '%CHROM\\_%POS\\_%SVTYPE\\_%SVLEN' -O z -o \${sample_name}_mei.vcf.gz \${sample_name}_mei_noID.vcf.gz
-	bcftools index --tbi \${sample_name}_mei.vcf.gz
+		# fix issues with  MELT vcfs
+		for name in {ALU,SVA,LINE1};do bcftools annotate -x FMT/GL \${sample_name}/\${name}.final_comp.vcf > \${name}.vcf;done
+		for name in {ALU,SVA,LINE1};do bcftools view -O u -o \${name}.bcf \${name}.vcf;done
+		for name in {ALU,SVA,LINE1};do bcftools sort  -m 2G -O z -o \${name}.vcf.gz \${name}.bcf;done
+		for name in {ALU,SVA,LINE1};do bcftools index --tbi \${name}.vcf.gz;done
+		bcftools concat -a -Oz  -o \${sample_name}_mei_noID.vcf.gz *vcf.gz
+		bcftools annotate --set-id '%CHROM\\_%POS\\_%SVTYPE\\_%SVLEN' -O z -o \${sample_name}_mei.vcf.gz \${sample_name}_mei_noID.vcf.gz
+		bcftools index --tbi \${sample_name}_mei.vcf.gz
+	fi
 	"""
 }
 
diff --git a/modules/mosdepth.nf b/modules/mosdepth.nf
index a190544..3dd452b 100644
--- a/modules/mosdepth.nf
+++ b/modules/mosdepth.nf
@@ -10,7 +10,7 @@
 process Mosdepth {
         tag "$bam"
 
-	publishDir "$params.outdir_ind/${assembly}/${batch}/${run}/QC/Individuals/${bam.simpleName}/Mosdepth/", mode: 'copy'
+	publishDir "$params.outdir_ind/${assembly}/${batch}/${run}/QC/Individuals/${bam.simpleName}/Mosdepth/", mode: 'copyNoFollow'
 
 	input :
 	file bam
@@ -25,6 +25,17 @@ process Mosdepth {
 
 	script :
 	"""
-	mosdepth ${bam.simpleName} ${bam}
+	if [ -a $params.outdir_ind/${assembly}/*/${run}/QC/Individuals/${bam.simpleName}/Mosdepth/${bam.simpleName}.mosdepth.global.dist.txt  ]; then
+		glob_dist=\$(find $params.outdir_ind/${assembly}/*/${run}/QC/Individuals/${bam.simpleName}/Mosdepth/ -name ${bam.simpleName}.mosdepth.global.dist.txt)
+		summary=\$(find $params.outdir_ind/${assembly}/*/${run}/QC/Individuals/${bam.simpleName}/Mosdepth/ -name ${bam.simpleName}.mosdepth.summary.txt)
+		per_base_bed=\$(find $params.outdir_ind/${assembly}/*/${run}/QC/Individuals/${bam.simpleName}/Mosdepth/ -name ${bam.simpleName}.per-base.bed.gz)
+		per_base_index=\$(find $params.outdir_ind/${assembly}/*/${run}/QC/Individuals/${bam.simpleName}/Mosdepth/ -name ${bam.simpleName}.per-base.bed.gz.csi)
+		ln -s \$glob_dist .
+		ln -s \$summary .
+		ln -s \$per_base_bed  .
+		ln -s \$per_base_index .
+	else
+		mosdepth ${bam.simpleName} ${bam}
+	fi
 	"""
 }
diff --git a/modules/samtools_fixmate.nf b/modules/samtools_fixmate.nf
index f65e318..8419902 100644
--- a/modules/samtools_fixmate.nf
+++ b/modules/samtools_fixmate.nf
@@ -22,21 +22,27 @@ process samtools_fixmate {
 
 	script:
 	"""
-	ANNOTATEVARIANTS_INSTALL=/mnt/common/WASSERMAN_SOFTWARE/AnnotateVariants/
-	source \$ANNOTATEVARIANTS_INSTALL/opt/miniconda3/etc/profile.d/conda.sh
-	conda activate \$ANNOTATEVARIANTS_INSTALL/opt/AnnotateVariantsEnvironment
-
-	# Resort the bam file by query name for samtools fixmate (coordiante-sorted bam does not work)
-	samtools sort -n -O BAM -@ 20  ${bam} > ${bam.SimpleName}_nsorted.bam
-
-	# Samtools fixmate will add MQ tags
-	samtools fixmate -m -O BAM -@ 20  ${bam.SimpleName}_nsorted.bam  ${bam.SimpleName}_fixmate.bam
-
-	# Now sort bam file by coordinates to resume the pipeline 
-	samtools sort  -@ 20  ${bam.SimpleName}_fixmate.bam -o ${bam.SimpleName}_fixmate_ordered.bam
-
-	# index the sorted bam file
-	samtools index  -@ 20  ${bam.SimpleName}_fixmate_ordered.bam
+	sample_name=\$(echo ${bam.simpleName} | cut -d _ -f 1)
+	if [ -a $params.outdir_ind/${assembly}/*/${run}/MEI/Sample/\${sample_name}_mei.vcf.gz ]; then
+		touch \${sample_name}_fixmate_ordered.bam
+		touch \${sample_name}_fixmate_ordered.bam.bai
+	else
+		ANNOTATEVARIANTS_INSTALL=/mnt/common/WASSERMAN_SOFTWARE/AnnotateVariants/
+		source \$ANNOTATEVARIANTS_INSTALL/opt/miniconda3/etc/profile.d/conda.sh
+		conda activate \$ANNOTATEVARIANTS_INSTALL/opt/AnnotateVariantsEnvironment
+
+		# Resort the bam file by query name for samtools fixmate (coordiante-sorted bam does not work)
+		samtools sort -n -O BAM -@ 20  ${bam} > ${bam.SimpleName}_nsorted.bam
+
+		# Samtools fixmate will add MQ tags
+		samtools fixmate -m -O BAM -@ 20  ${bam.SimpleName}_nsorted.bam  ${bam.SimpleName}_fixmate.bam
+
+		# Now sort bam file by coordinates to resume the pipeline 
+		samtools sort  -@ 20  ${bam.SimpleName}_fixmate.bam -o ${bam.SimpleName}_fixmate_ordered.bam
+
+		# index the sorted bam file
+		samtools index  -@ 20  ${bam.SimpleName}_fixmate_ordered.bam
+	fi
 	"""
 }
 
diff --git a/modules/shift_back.nf b/modules/shift_back.nf
index b58654b..071104c 100644
--- a/modules/shift_back.nf
+++ b/modules/shift_back.nf
@@ -1,7 +1,7 @@
 process shift_back {
 	tag "${MT_shifted_CollectMetrics}"
 
-	publishDir "$params.outdir_ind/${assembly}/${batch}/${run}/MT/QC/", mode: 'copy', pattern: "*per_base_coverage.tsv"
+	publishDir "$params.outdir_ind/${assembly}/${batch}/${run}/MT/QC/", mode: 'copyNoFollow', pattern: "*per_base_coverage.tsv"
 
 	input :
 	path MT_shifted_CollectMetrics
@@ -16,20 +16,25 @@ process shift_back {
 
 	script:
 	"""
-	source /cm/shared/BCCHR-apps/env_vars/unset_BCM.sh
-	source /cvmfs/soft.computecanada.ca/config/profile/bash.sh
-	module load StdEnv/2020
-	module load r/4.1.2
+        sample_name=\$(echo ${MT_shifted_CollectMetrics.simpleName} | sed 's/_.*//' )
+	if [ -a $params.outdir_ind/${assembly}/${batch}/${run}/MT/QC/\${sample_name}_per_base_coverage.tsv]; then
+		per_base_coverage=\$(find $params.outdir_ind/${assembly}/*/${run}/MT/QC/ -name \${sample_name}_per_base_coverage.tsv)
+		ln -s \$per_base_coverage .
+		touch \${sample_name}_MT_Step1_input_tsv.tsv
+	else
+		source /cm/shared/BCCHR-apps/env_vars/unset_BCM.sh
+		source /cvmfs/soft.computecanada.ca/config/profile/bash.sh
+		module load StdEnv/2020
+		module load r/4.1.2
 
-	Silent_Genomes_R=/mnt/common/SILENT/Act3/R/
-	mkdir -p \${Silent_Genomes_R}/.local/R/\$EBVERSIONR/
-	export R_LIBS=\${Silent_Genomes_R}/.local/R/\$EBVERSIONR/
+		Silent_Genomes_R=/mnt/common/SILENT/Act3/R/
+		mkdir -p \${Silent_Genomes_R}/.local/R/\$EBVERSIONR/
+		export R_LIBS=\${Silent_Genomes_R}/.local/R/\$EBVERSIONR/
 
-	sample_name=\$(echo ${MT_shifted_CollectMetrics.simpleName} | sed 's/_.*//' )
+		Rscript ../../../modules/shift_back.R $MT_shifted_CollectMetrics \${sample_name}_sorted_chrM_Homo_sapiens_assembly38_collect_wgs_metrics_non_control_region.chrM.interval_list.tsv
+		mv per_base_coverage.tsv \${sample_name}_per_base_coverage.tsv	
 
-	Rscript ../../../modules/shift_back.R $MT_shifted_CollectMetrics \${sample_name}_sorted_chrM_Homo_sapiens_assembly38_collect_wgs_metrics_non_control_region.chrM.interval_list.tsv
-	mv per_base_coverage.tsv \${sample_name}_per_base_coverage.tsv	
-
-	echo "\${sample_name}\t$params.outdir_ind/${assembly}/${batch}/${run}/MT/QC/\${sample_name}_per_base_coverage.tsv\t\${sample_name}" > \${sample_name}_MT_Step1_input_tsv.tsv
+		echo "\${sample_name}\t$params.outdir_ind/${assembly}/${batch}/${run}/MT/QC/\${sample_name}_per_base_coverage.tsv\t\${sample_name}" > \${sample_name}_MT_Step1_input_tsv.tsv
+	fi
 	"""
 }
diff --git a/nextflow.config b/nextflow.config
index fc1644f..04c9850 100644
--- a/nextflow.config
+++ b/nextflow.config
@@ -2,7 +2,7 @@ process.executor = 'slurm'
 process.cache = 'lenient'
 process.queue='silent_q'
 process.shell = ['/bin/bash','-e']
-executor { queueSize = 30 }
+executor { queueSize = 60 }
 
 env {
 	NXF_EXECUTOR = "slurm"
@@ -11,14 +11,14 @@ env {
 launchDir = "/mnt/scratch/SILENT/Act3/Processed/Workflow/Solenne/IBVL_pipeline/"
 
 params {
-	run             		= "Run_20220627"
-	batch           		= "Batch_DryRun"
+	run             		= "Run_20220605"
+	batch           		= "Batch_DryRun_2"
+        reads                           = "/mnt/scratch/SILENT/Act3/GSC_data/*/*.{R1,R2}.fastq.gz"
 	SNV				= "SNV"
 	STR				= "STR"
 	MEI				= "MEI"
 	SV				= "SV"
 	MT				= "MT"
-        reads           		= "/mnt/scratch/SILENT/Act3/GSC_data/Dry_run/*.{R1,R2}.fastq.gz"
 	outdir_ind      		= "/mnt/scratch/SILENT/Act3/Processed/Individual/"
         outdir_pop      		= "/mnt/scratch/SILENT/Act3/Processed/Population"
 	ExpansionHunter_dir		= "/mnt/common/SILENT/Act3/ExpansionHunter-v5.0.0-linux_x86_64/bin/"
@@ -54,6 +54,7 @@ profiles {
                 params.Mitochondrial_chromosome		= 'MT'
 	        params.chrom                           	= (1..22) +  ['X', 'Y']
 //      	params.chrom                           	= '22'
+		params.reference_dir			= '/mnt/common/SILENT/Act3/GRCh37/gnomad/'
 		params.CADD_1_6_whole_genome_SNVs	= '/mnt/common/DATABASES/REFERENCES/GRCh37/CADD/V1.6/whole_genome_SNVs.tsv.gz'
 		params.CADD_1_6_whole_genome_SNVs_index = '/mnt/common/DATABASES/REFERENCES/GRCh37/CADD/V1.6/whole_genome_SNVs.tsv.gz.tbi'
 		params.CADD_1_6_InDels			= '/mnt/common/DATABASES/REFERENCES/GRCh37/CADD/V1.6/InDels.tsv.gz'
@@ -64,6 +65,7 @@ profiles {
                 params.spliceai_indel_index             = '/mnt/common/DATABASES/REFERENCES/GRCh37/SPLICEAI/spliceai_scores.masked.indel.hg19.vcf.gz.tbi'
 		params.gnomad_SNV_vcf			= '/mnt/common/DATABASES/REFERENCES/GRCh37/GNOMAD/V2.1.1/gnomad.genomes.r2.1.1.sites.vcf.gz'
 		params.gnomad_SNV_index			= '/mnt/common/DATABASES/REFERENCES/GRCh37/GNOMAD/V2.1.1/gnomad.genomes.r2.1.1.sites.vcf.gz.tbi'
+		params.gnomad_SNV_frequ			= '/mnt/common/SILENT/Act3/GRCh37/gnomad/*.tsv'
 		params.gnomad_SNV_vcf_chr20		= '/mnt/common/SILENT/Act3/gnomad/gnomad.genomes.r2.1.1.sites.20.vcf.bgz'
 		params.gnomad_SNV_index_chr20		= '/mnt/common/SILENT/Act3/gnomad/gnomad.genomes.r2.1.1.sites.20.vcf.bgz.tbi'        
 		params.cr_bed				= '/mnt/common/SILENT/Act3/GRCh37/cr.bed.gz'
@@ -83,6 +85,7 @@ profiles {
 		params.Mitochondrial_chromosome		= 'MT'
 		params.chrom				= ['chr1', 'chr2', 'chr3', 'chr4', 'chr5', 'chr6', 'chr7', 'chr8', 'chr9', 'chr10', 'chr11', 'chr12', 'chr13', 'chr14', 'chr15', 'chr16', 'chr17', 'chr18', 'chr19', 'chr20', 'chr21', 'chr22', 'chrX', 'chrY']
 //		params.chrom				= 'chr22'
+                params.reference_dir                    = '/mnt/common/SILENT/Act3/GRCh38/gnomad/'
                 params.CADD_1_6_whole_genome_SNVs       = '/mnt/common/DATABASES/REFERENCES/GRCh38/CADD/V1.6/whole_genome_SNVs.tsv.gz'
                 params.CADD_1_6_whole_genome_SNVs_index = '/mnt/common/DATABASES/REFERENCES/GRCh38/CADD/V1.6/whole_genome_SNVs.tsv.gz.tbi'
                 params.CADD_1_6_InDels                  = '/mnt/common/DATABASES/REFERENCES/GRCh38/CADD/V1.6/gnomad.genomes.r3.0.indel.tsv.gz'
@@ -111,7 +114,7 @@ singularity {
 
 process {
 // 4GB, 2cpus
-          withName: 'align_to_MT|MT_MergeVcfs|MarkDuplicates|sample_QC|list_vcfs_txt|MEI_data_organization|split_tsv_by_chr' {
+          withName: 'align_to_MT|MT_MergeVcfs|sample_QC|list_vcfs_txt|MEI_data_organization|split_tsv_by_chr' {
                 memory = 4.GB
         	cpus = 2
 	}
@@ -123,7 +126,7 @@ process {
         }
 
 // 8GB, 2cpus
-          withName: 'melt' {
+          withName: 'melt|MarkDuplicates' {
                 memory = 8.GB
                 cpus = 2
         }
@@ -142,8 +145,8 @@ process {
 
 // 32GB, 4 cpus
           withName: 'annotation_table_merged' {
-                memory = 32.GB
-                cpus = 4
+                memory = 60.GB
+                cpus = 12
         }
 
 // 20GB, 8 cpus
@@ -180,14 +183,14 @@ process {
 //DeepVariant
         withName: 'deepvariant_call' {
 		memory = 60.GB
-                cpus = 20
+                cpus = 39
                 container = "file:///mnt/common/SILENT/Act3/singularity/deepvariant-1.2.0.sif"
         }
 
 //GLnexus
         withName: 'GLnexus_cli' {
-                memory = 120.GB
-                cpus = 20
+                memory = 200.GB
+                cpus = 39
                 container = "file:///mnt/common/SILENT/Act3/singularity/glnexus-1.4.1.sif"
         }
 
diff --git a/sample_QC.txt b/sample_QC.txt
new file mode 100644
index 0000000..3e0bc38
--- /dev/null
+++ b/sample_QC.txt
@@ -0,0 +1,9 @@
+Number of samples removed because of depth metrics: 0
+Number of samples removed because of genotype quality metrics: 0
+Number of samples removed because of call rate metrics: 0
+Number of samples removed because of ratio heterozygous over homozygous: 0
+Number of samples removed because of number of snps: 0
+Number of samples removed because of number of singletons: 0
+Number of samples removed because of ratio insertions over deletions: 0
+Number of samples removed because of ratio transversions / transitions: 0
+Percentage of the samples filtered out: 0.0
diff --git a/subworkflow/ALN.nf b/subworkflow/ALN.nf
deleted file mode 100644
index 701568a..0000000
--- a/subworkflow/ALN.nf
+++ /dev/null
@@ -1,36 +0,0 @@
-// Nextflow sub-workflow
-// Created by Solenne Correard in December 2021
-// Owned by the Silent Genomes Project Activity 3 team
-// Developped to build the IBVL, a background variant library
-
-// Overview of the sub-workflow goal and characteristics :
-// Index the reference genome (specified in the nextflow.config file)
-// Align, sort and index the fastq for each sample (fastq --> bam)
-
-
-// Load the modules for the ALN workflow
-include { align_sort_output_bam } from "./../modules/align_sort_output_bam"
-include { bwa_index; bwa_index as bwa_index_shifted } from "./../modules/bwa_index"
-
-// ALN workflow
-workflow ALN {
-
-	// Load the parameters and files
-	run             = params.run
-	batch           = params.batch
-	assembly        = params.assembly
-	reference       = file (params.ref)
-
-	Channel
-    		.fromFilePairs(params.reads )
-    		.set {read_pairs_ch}
-
-	main:
-		bwa_index(reference)
-		align_sort_output_bam(reference, bwa_index.out, read_pairs_ch, assembly, batch, run)
-	emit :
-		reference_index 	= bwa_index.out.collect()
-		bam_sorted 		= align_sort_output_bam.out.samples_bam
-		bam_sorted_index 	= align_sort_output_bam.out.samples_bam_index
-}
-
diff --git a/subworkflow/Initialisation.nf b/subworkflow/Initialisation.nf
new file mode 100644
index 0000000..e75a653
--- /dev/null
+++ b/subworkflow/Initialisation.nf
@@ -0,0 +1,28 @@
+// Nextflow sub-workflow
+// Created by Solenne Correard in December 2021
+// Owned by the Silent Genomes Project Activity 3 team
+// Developped to build the IBVL, a background variant library
+
+// Overview of the sub-workflow goal and characteristics :
+// Prepare the gnomad frequency files to extract only the necessary information
+
+//This steps should be done only once and not re-run every time a sample is added to the BVL
+
+// Load the modules for the ALN workflow
+
+include { gnomad_frequency_table } from "./../modules/gnomad_frequency_table"
+
+// Initialisation workflow
+workflow Initialisation {
+
+	// Load the parameters and files
+	gnomad_SNV_vcf          = file (params.gnomad_SNV_vcf)
+	gnomad_SNV_index        = file (params.gnomad_SNV_index)
+        chr                     = params.chrom
+
+
+	main:
+		gnomad_frequency_table(gnomad_SNV_vcf, gnomad_SNV_index, chr)
+}
+
+
diff --git a/subworkflow/MT.nf b/subworkflow/MT.nf
index daf87eb..3a00c17 100644
--- a/subworkflow/MT.nf
+++ b/subworkflow/MT.nf
@@ -88,23 +88,23 @@ workflow MT {
 		bwa_index_shifted(ref_MT_shifted_fasta)
 
                 // Sample specific (Do not need to be run for a previously processed sample)
-		Extract_MT_Read(bam, bai, Mitochondrial_chromosome)
-		MT_SamtoFastq(Extract_MT_Read.out)
-		align_to_MT(ref_MT_fasta, bwa_index.out, MT_SamtoFastq.out.fastq_MT)
-		align_to_MT_shifted(ref_MT_shifted_fasta, bwa_index_shifted.out, MT_SamtoFastq.out.fastq_MT)
-		MarkDuplicates(align_to_MT.out.align_to_MT_bam, align_to_MT.out.align_to_MT_bai)
-		MarkDuplicates_shifted(align_to_MT_shifted.out.align_to_MT_bam, align_to_MT_shifted.out.align_to_MT_bai)
+		Extract_MT_Read(bam, bai, Mitochondrial_chromosome, assembly, batch, run)
+		MT_SamtoFastq(Extract_MT_Read.out, assembly, batch, run)
+		align_to_MT(ref_MT_fasta, bwa_index.out, MT_SamtoFastq.out.fastq_MT, assembly, batch, run)
+		align_to_MT_shifted(ref_MT_shifted_fasta, bwa_index_shifted.out, MT_SamtoFastq.out.fastq_MT, assembly, batch, run)
+		MarkDuplicates(align_to_MT.out.align_to_MT_bam, align_to_MT.out.align_to_MT_bai, assembly, batch, run)
+		MarkDuplicates_shifted(align_to_MT_shifted.out.align_to_MT_bam, align_to_MT_shifted.out.align_to_MT_bai, assembly, batch, run)
 		Picard_CollectWgsMetrics_MT(ref_MT_fasta, ref_MT_fasta_index, non_control_region_interval_list, align_to_MT.out.align_to_MT_bam, align_to_MT.out.align_to_MT_bai, assembly, batch, run)
 		Picard_CollectWgsMetrics_MT_shifted(ref_MT_shifted_fasta, ref_MT_shifted_fasta_index, control_region_shifted_reference_interval_list, align_to_MT_shifted.out.align_to_MT_bam, align_to_MT_shifted.out.align_to_MT_bai, assembly, batch, run)
 		shift_back(Picard_CollectWgsMetrics_MT_shifted.out, Picard_CollectWgsMetrics_MT.out.collect(), assembly, batch, run)
 		MT_Step1_input_tsv(shift_back.out.Sample_MT_Step1_input_tsv.collect(), assembly, batch, run)
-		MT_call_variants(ref_MT_fasta, ref_MT_fasta_index, ref_MT_fasta_dict, MarkDuplicates.out.bam, MarkDuplicates.out.bai, Mitochondrial_chromosome)
-		MT_call_variants_shifted(ref_MT_shifted_fasta, ref_MT_shifted_fasta_index, ref_MT_shifted_fasta_dict, MarkDuplicates_shifted.out.bam, MarkDuplicates_shifted.out.bai, Mitochondrial_chromosome)
+		MT_call_variants(ref_MT_fasta, ref_MT_fasta_index, ref_MT_fasta_dict, MarkDuplicates.out.bam, MarkDuplicates.out.bai, Mitochondrial_chromosome, assembly, batch, run)
+		MT_call_variants_shifted(ref_MT_shifted_fasta, ref_MT_shifted_fasta_index, ref_MT_shifted_fasta_dict, MarkDuplicates_shifted.out.bam, MarkDuplicates_shifted.out.bai, Mitochondrial_chromosome, assembly, batch, run)
 		MT_Liftover(MT_call_variants_shifted.out.Mutect2_vcf, MT_call_variants_shifted.out.Mutect2_vcf_index, ref_MT_fasta, ref_MT_fasta_dict, bwa_index.out, ShiftBack_chain, assembly, batch, run)
 		MT_MergeVcfs(MT_Liftover.out.lifted_vcf.collect(), MT_call_variants.out.Mutect2_vcf, assembly, batch, run)
-		MT_Merge_stat_file(MT_call_variants.out.Mutect2_stat, MT_call_variants_shifted.out.Mutect2_stat.collect())
-		MT_Filter_Mutect_Calls(ref_MT_fasta, ref_MT_fasta_index, ref_MT_fasta_dict, MT_MergeVcfs.out.vcf, MT_MergeVcfs.out.index, MT_Merge_stat_file.out.collect())
-		MT_LeftAlignAndTrimVariants(ref_MT_fasta, ref_MT_fasta_index, ref_MT_fasta_dict, MT_Filter_Mutect_Calls.out.vcf, MT_Filter_Mutect_Calls.out.index)
+		MT_Merge_stat_file(MT_call_variants.out.Mutect2_stat, MT_call_variants_shifted.out.Mutect2_stat.collect(), assembly, batch, run)
+		MT_Filter_Mutect_Calls(ref_MT_fasta, ref_MT_fasta_index, ref_MT_fasta_dict, MT_MergeVcfs.out.vcf, MT_MergeVcfs.out.index, MT_Merge_stat_file.out.collect(), assembly, batch, run)
+		MT_LeftAlignAndTrimVariants(ref_MT_fasta, ref_MT_fasta_index, ref_MT_fasta_dict, MT_Filter_Mutect_Calls.out.vcf, MT_Filter_Mutect_Calls.out.index, assembly, batch, run)
 		MT_FilterOut_sites(ref_MT_fasta, ref_MT_fasta_index, ref_MT_fasta_dict, MT_LeftAlignAndTrimVariants.out.vcf, MT_LeftAlignAndTrimVariants.out.index, blacklist_sites_hg38_MT_file, blacklist_sites_hg38_MT_index_file, assembly, batch, run)
                 MT_haplocheck(MT_FilterOut_sites.out.vcf, assembly, batch, run)
                 MT_Step3_metadata_sample(mosdepth, MT_haplocheck.out.file, assembly, batch, run)
@@ -114,5 +114,6 @@ workflow MT {
 		MT_Step3_metadata(MT_Step3_metadata_sample.out.collect(), assembly, batch, run)
 		Hail_variant_MT_QC(MT_Step1_input_tsv.out, MT_Step2_participant_data.out.MT_Step2_participant_data_tsv, MT_Step2_participant_data.out.participants_to_subset_txt, MT_Step3_metadata.out, assembly, batch, run)
                 annotation_table_merged(Hail_variant_MT_QC.out.vcf, Hail_variant_MT_QC.out.vcf_index, vep_cache_merged, vep_cache_merged_version, assembly, run, assembly_MT, CADD_1_6_whole_genome_SNVs, CADD_1_6_whole_genome_SNVs_index, CADD_1_6_InDels, CADD_1_6_InDels_index, spliceai_snv, spliceai_snv_index, spliceai_indel, spliceai_indel_index, chrM, MT)
-                MT_data_organization(gnomad_MT_frequ, Hail_variant_MT_QC.out.Hail_reduced_annotations, annotation_table_merged.out.annot_table_merged_R, assembly, run, severity_table)
+
+//                MT_data_organization(gnomad_MT_frequ, Hail_variant_MT_QC.out.Hail_reduced_annotations, annotation_table_merged.out.annot_table_merged_R, assembly, run, severity_table)
 }
diff --git a/subworkflow/Mapping.nf b/subworkflow/Mapping.nf
new file mode 100644
index 0000000..63ddd80
--- /dev/null
+++ b/subworkflow/Mapping.nf
@@ -0,0 +1,60 @@
+// Nextflow sub-workflow
+// Created by Solenne Correard in December 2021
+// Owned by the Silent Genomes Project Activity 3 team
+// Developped to build the IBVL, a background variant library
+
+// Overview of the sub-workflow goal and characteristics :
+// Index the reference genome (specified in the nextflow.config file)
+// Align, sort and index the fastq for each sample (fastq --> bam)
+// Genereate Quality control (QC) metrics for each sample using different software
+// The results are aggregated using multiQC
+
+
+// Load the modules for the ALN workflow
+include { align_sort_output_bam } from "./../modules/align_sort_output_bam"
+include { bwa_index; bwa_index as bwa_index_shifted } from "./../modules/bwa_index"
+
+include {fastqc} from "./../modules/fastqc"
+include {Mosdepth} from "./../modules/mosdepth"
+include {Picard_CollectWgsMetrics} from "./../modules/Picard_CollectWgsMetrics"
+include {Picard_CollectAlignmentSummaryMetrics} from "./../modules/Picard_CollectAlignmentSummaryMetrics"
+include {Picard_QualityScoreDistribution} from "./../modules/Picard_QualityScoreDistribution"
+include {multiqc_indiv} from "./../modules/multiqc_indiv"
+
+
+// mapping workflow
+workflow Mapping {
+
+	// Load the parameters and files
+        batch           = params.batch
+        assembly        = params.assembly
+        run             = params.run
+        outdir_ind      = file (params.outdir_ind)
+        reference       = file (params.ref)
+        reference_index = file (params.ref_index)
+
+	Channel
+    		.fromFilePairs(params.reads )
+    		.set {read_pairs_ch}
+
+	main:
+		bwa_index(reference)
+		align_sort_output_bam(reference, bwa_index.out, read_pairs_ch, assembly, batch, run)
+
+                q1              = fastqc(read_pairs_ch, outdir_ind, assembly, batch, run)
+                q2              = Mosdepth(align_sort_output_bam.out.samples_bam, align_sort_output_bam.out.samples_bam_index, assembly, batch, run)
+                q3              = Picard_CollectWgsMetrics(align_sort_output_bam.out.samples_bam, align_sort_output_bam.out.samples_bam_index, reference, reference_index, assembly, batch, run)
+                q4              = Picard_CollectAlignmentSummaryMetrics(align_sort_output_bam.out.samples_bam, align_sort_output_bam.out.samples_bam_index, assembly, batch, run)
+                q5              = Picard_QualityScoreDistribution(align_sort_output_bam.out.samples_bam, align_sort_output_bam.out.samples_bam_index, assembly, batch, run)
+                quality_metrics = q1.concat(q2.all_files,q3,q4,q5).collect()
+                multiqc_indiv(quality_metrics, assembly, batch, run)
+
+	emit :
+		reference_index 	= bwa_index.out.collect()
+		bam_sorted 		= align_sort_output_bam.out.samples_bam
+		bam_sorted_index 	= align_sort_output_bam.out.samples_bam_index
+                mosdepth_output 	= Mosdepth.out.summary_stat.collect()
+
+}
+
+
diff --git a/subworkflow/SNV.nf b/subworkflow/SNV.nf
index 8399b4e..2cad9c4 100644
--- a/subworkflow/SNV.nf
+++ b/subworkflow/SNV.nf
@@ -7,6 +7,7 @@
 // Call the SNV variants
 // Include some quality controls (QC) steps
 //	- Plink which defines the sex of each sample based on seevral variables
+// Hail producing several graphs and filtering outliers samples and variants
 
 // Load the modules for the SNV workflow
 
@@ -18,6 +19,14 @@ include { bcf_to_vcf } from "./../modules/bcf_to_vcf"
 include { plink_sex_inference } from "./../modules/plink_sex_inference"
 include { sample_QC } from "./../modules/sample_QC"
 
+include { Hail_sample_QC } from "./../modules/Hail_sample_QC"
+include { Hail_variant_QC } from "./../modules/Hail_variant_QC"
+
+include { annotation_table_merged as SNV_annotation_table_merged; annotation_table_merged as MT_annotation_table_merged} from "./../modules/annotation_table_merged"
+
+include { split_tsv_by_chr } from "./../modules/split_tsv_by_chr"
+include { SNV_data_organization } from "./../modules/SNV_data_organization"
+
 // SNV workflow
 
 workflow SNV {
@@ -31,6 +40,20 @@ workflow SNV {
 	reference_index 			= file (params.ref_index)
         SNV                                     = params.SNV
 
+        chr                                     = params.chrom
+        vep_cache_merged                        = file (params.vep_cache_merged)
+        vep_cache_merged_version                = params.vep_cache_merged_version
+        CADD_1_6_whole_genome_SNVs              = file (params.CADD_1_6_whole_genome_SNVs)
+        CADD_1_6_whole_genome_SNVs_index        = file (params.CADD_1_6_whole_genome_SNVs_index)
+        CADD_1_6_InDels                         = file (params.CADD_1_6_InDels)
+        CADD_1_6_InDels_index                   = file (params.CADD_1_6_InDels_index)
+        spliceai_snv                            = file (params.spliceai_snv)
+        spliceai_snv_index                      = file (params.spliceai_snv_index)
+        spliceai_indel                          = file (params.spliceai_indel)
+        spliceai_indel_index                    = file (params.spliceai_indel_index)
+        severity_table                          = file (params.severity_table)
+	gnomad_SNV_frequ			= file (params.gnomad_SNV_frequ)
+
 	// Workflow start here
 	take : 
 		bam
@@ -46,10 +69,17 @@ workflow SNV {
 		GLnexus_cli(list_vcfs_txt.out, run)
 		bcf_to_vcf(GLnexus_cli.out, assembly, batch, run)
 
-                plink_sex_inference(bcf_to_vcf.out.vcf, assembly_hg, assembly, batch, run)
-		sample_QC(plink_sex_inference.out, assembly, batch, run, mosdepth)
+//                plink_sex_inference(bcf_to_vcf.out.vcf, assembly_hg, assembly, batch, run)
+//		sample_QC(plink_sex_inference.out, assembly, batch, run, mosdepth)
+
+                Hail_sample_QC(bcf_to_vcf.out.vcf, assembly, batch, run)
+                Hail_variant_QC(Hail_sample_QC.out.vcf_sample_filtered, Hail_sample_QC.out.filtered_sample_sex, assembly, batch, run)
+                SNV_annotation_table_merged(Hail_variant_QC.out.SNV_filtered_variants_frequ_tot_xx, Hail_variant_QC.out.SNV_filtered_variants_frequ_tot_xx_index, vep_cache_merged, vep_cache_merged_version, assembly, run, assembly, CADD_1_6_whole_genome_SNVs, CADD_1_6_whole_genome_SNVs_index, CADD_1_6_InDels, CADD_1_6_InDels_index, spliceai_snv, spliceai_snv_index, spliceai_indel, spliceai_indel_index, chr, SNV)
+
+//                split_tsv_by_chr(Hail_variant_QC.out.SNV_frequ_tot_xx_xy_tsv, assembly, batch, run)
+//		SNV_data_organization(gnomad_SNV_frequ, split_tsv_by_chr.out.collect(), SNV_annotation_table_merged.out.annot_table_merged_R, assembly, run, severity_table)
 
 	emit :
-		sample_sex_file=sample_QC.out.sample_QC_file
-		SNV_vcf = bcf_to_vcf.out.vcf
+		sample_sex_file=Hail_sample_QC.out.filtered_sample_sex
+
 }
diff --git a/subworkflow/SV.nf b/subworkflow/SV.nf
index 556a5a0..b18b905 100644
--- a/subworkflow/SV.nf
+++ b/subworkflow/SV.nf
@@ -79,9 +79,10 @@ workflow SV {
                 // Aggregated steps (Need to be run everytime a new sample is added to the cohort)
 		SV_vcfs_txt(SV_paragraph_duphold.out.vcf.collect(), assembly, batch, run, SV)
 		SV_merge_samples(SV_vcfs_txt.out, assembly, batch, run, SV)
-		SV_split_vcf_by_chr(SV_merge_samples.out.vcf, assembly, batch, run, chr, SV)
                 SV_annotation(SV_merge_samples.out.vcf, SV_merge_samples.out.index, vep_cache_merged, vep_cache_merged_version, assembly, run, assembly, CADD_1_6_whole_genome_SNVs, CADD_1_6_whole_genome_SNVs_index, CADD_1_6_InDels, CADD_1_6_InDels_index, spliceai_snv, spliceai_snv_index, spliceai_indel, spliceai_indel_index, chr, SV)
-                SV_data_organization(SV_split_vcf_by_chr.out.vcf_onechr, SV_annotation.out.annot_table_merged_R.collect(), assembly, run, SV, sample_sex_file)
+
+//                SV_split_vcf_by_chr(SV_merge_samples.out.vcf, assembly, batch, run, chr, SV)
+//                SV_data_organization(SV_split_vcf_by_chr.out.vcf_onechr, SV_annotation.out.annot_table_merged_R.collect(), assembly, run, SV, sample_sex_file)
 
 
 		//Short Tandem Repeats (STR)
@@ -91,7 +92,8 @@ workflow SV {
                 // Aggregated steps (Need to be run everytime a new sample is added to the cohort)
 		STR_vcfs_txt(expansion_hunter.out.vcf.collect(), assembly, batch, run, STR)
   		STR_merge_samples(STR_vcfs_txt.out, assembly, batch, run, STR)
-                STR_data_organization(STR_merge_samples.out.vcf, variant_catalog, assembly, run, STR)
+ 
+//              STR_data_organization(STR_merge_samples.out.vcf, variant_catalog, assembly, run, STR)
 
 
 
@@ -105,5 +107,6 @@ workflow SV {
 		MEI_merge_samples(MEI_vcfs_txt.out, assembly, batch, run, MEI)
                 MEI_split_vcf_by_chr(MEI_merge_samples.out.vcf, assembly, batch, run, chr, MEI)
                 MEI_annotation(MEI_merge_samples.out.vcf, MEI_merge_samples.out.index, vep_cache_merged, vep_cache_merged_version, assembly, run, assembly, CADD_1_6_whole_genome_SNVs, CADD_1_6_whole_genome_SNVs_index, CADD_1_6_InDels, CADD_1_6_InDels_index, spliceai_snv, spliceai_snv_index, spliceai_indel, spliceai_indel_index, chr, MEI)
-                MEI_data_organization(MEI_split_vcf_by_chr.out.vcf_onechr, MEI_annotation.out.annot_table_merged_R.collect(), assembly, run, MEI, sample_sex_file)
+
+//                MEI_data_organization(MEI_split_vcf_by_chr.out.vcf_onechr, MEI_annotation.out.annot_table_merged_R.collect(), assembly, run, MEI, sample_sex_file)
 }