WIP : Applying corrections to cover vep and exomiser in stub mode tes…

…t, Be able to run exomiser locally with a public test dataset, add params to support remm and cadd databse, only allows hg19 and hg38 for genome param, add template exomiser analysis file for WES and WGS
Ferlab-Ste-Justine · Sep 17, 2024 · 9e74994 · 9e74994
1 parent b2ce857
commit 9e74994
Show file tree

Hide file tree

Showing 11 changed files with 233 additions and 22 deletions.
diff --git a/assets/TestSampleSheet.csv b/assets/TestSampleSheet.csv
@@ -1,3 +1,4 @@
-familyId,sample,sequencingType,gvcf
-Family1,Test1,WES,https://github.com/nf-core/test-datasets/raw/modules/data/genomics/homo_sapiens/illumina/gvcf/test.genome.vcf.gz
-Family1,Test2,WGS,https://github.com/nf-core/test-datasets/raw/modules/data/genomics/homo_sapiens/illumina/gvcf/test2.genome.vcf.gz
+familyId,sample,sequencingType,gvcf,familyPheno
+amily1,Test1,WES,https://github.com/nf-core/test-datasets/raw/modules/data/genomics/homo_sapiens/illumina/gvcf/test.genome.vcf.gz,assets/exomiser/pheno/family1.yml
+Family1,Test2,WES,https://github.com/nf-core/test-datasets/raw/modules/data/genomics/homo_sapiens/illumina/gvcf/test2.genome.vcf.gz,assets/exomiser/pheno/family1.yml
+Family2,Test1,WGS,https://github.com/nf-core/test-datasets/raw/modules/data/genomics/homo_sapiens/illumina/gvcf/test.genome.vcf.gz,assets/exomiser/pheno/family2.yml
diff --git a/assets/exomiser/default_exomiser_WES_analysis.yml b/assets/exomiser/default_exomiser_WES_analysis.yml
@@ -0,0 +1,64 @@
+## Exomiser Analysis Template.
+# These are all the possible options for running exomiser. Use this as a template for
+# your own set-up.
+---
+analysisMode: PASS_ONLY
+inheritanceModes: {
+  AUTOSOMAL_DOMINANT: 0.1,
+  AUTOSOMAL_RECESSIVE_HOM_ALT: 0.1,
+  AUTOSOMAL_RECESSIVE_COMP_HET: 2.0,
+  X_DOMINANT: 0.1,
+  X_RECESSIVE_HOM_ALT: 0.1,
+  X_RECESSIVE_COMP_HET: 2.0,
+  MITOCHONDRIAL: 0.2
+}
+frequencySources: [
+    UK10K,
+
+    GNOMAD_E_AFR,
+    GNOMAD_E_AMR,
+  #  GNOMAD_E_ASJ,
+    GNOMAD_E_EAS,
+  #  GNOMAD_E_FIN,
+    GNOMAD_E_NFE,
+  #  GNOMAD_E_OTH,
+    GNOMAD_E_SAS,
+
+    GNOMAD_G_AFR,
+    GNOMAD_G_AMR,
+  #  GNOMAD_G_ASJ,
+    GNOMAD_G_EAS,
+  #  GNOMAD_G_FIN,
+    GNOMAD_G_NFE,
+  #  GNOMAD_G_OTH,
+    GNOMAD_G_SAS
+]
+# Possible pathogenicitySources: (POLYPHEN, MUTATION_TASTER, SIFT), (REVEL, MVP), CADD, REMM, SPLICE_AI, ALPHA_MISSENSE
+# REMM is trained on non-coding regulatory regions
+# *WARNING* if you enable CADD or REMM ensure that you have downloaded and installed the CADD/REMM tabix files
+# and updated their location in the application.properties. Exomiser will not run without this.
+pathogenicitySources: [ REVEL, MVP ]
+#this is the standard exomiser order.
+steps: [
+    failedVariantFilter: { },
+    variantEffectFilter: {
+      remove: [
+          FIVE_PRIME_UTR_EXON_VARIANT,
+          FIVE_PRIME_UTR_INTRON_VARIANT,
+          THREE_PRIME_UTR_EXON_VARIANT,
+          THREE_PRIME_UTR_INTRON_VARIANT,
+          NON_CODING_TRANSCRIPT_EXON_VARIANT,
+          NON_CODING_TRANSCRIPT_INTRON_VARIANT,
+          CODING_TRANSCRIPT_INTRON_VARIANT,
+          UPSTREAM_GENE_VARIANT,
+          DOWNSTREAM_GENE_VARIANT,
+          INTERGENIC_VARIANT,
+          REGULATORY_REGION_VARIANT
+      ]
+    },
+    frequencyFilter: { maxFrequency: 2.0 },
+    pathogenicityFilter: { keepNonPathogenic: true },
+    inheritanceFilter: { },
+    omimPrioritiser: { },
+    hiPhivePrioritiser: { }
+]
diff --git a/assets/exomiser/default_exomiser_WGS_analysis.yml b/assets/exomiser/default_exomiser_WGS_analysis.yml
@@ -0,0 +1,55 @@
+## Exomiser genome analysis template.
+# These are all the possible options for running exomiser. Use this as a template for
+# your own set-up.
+---
+analysisMode: PASS_ONLY
+inheritanceModes: {
+  AUTOSOMAL_DOMINANT: 0.1,
+  AUTOSOMAL_RECESSIVE_HOM_ALT: 0.1,
+  AUTOSOMAL_RECESSIVE_COMP_HET: 2.0,
+  X_DOMINANT: 0.1,
+  X_RECESSIVE_HOM_ALT: 0.1,
+  X_RECESSIVE_COMP_HET: 2.0,
+  MITOCHONDRIAL: 0.2
+}
+frequencySources: [
+    UK10K,
+
+    GNOMAD_E_AFR,
+    GNOMAD_E_AMR,
+  #  GNOMAD_E_ASJ,
+    GNOMAD_E_EAS,
+  #  GNOMAD_E_FIN,
+    GNOMAD_E_NFE,
+  #  GNOMAD_E_OTH,
+    GNOMAD_E_SAS,
+
+    GNOMAD_G_AFR,
+    GNOMAD_G_AMR,
+  #  GNOMAD_G_ASJ,
+    GNOMAD_G_EAS,
+  #  GNOMAD_G_FIN,
+    GNOMAD_G_NFE,
+  #  GNOMAD_G_OTH,
+    GNOMAD_G_SAS
+]
+# Possible pathogenicitySources: (POLYPHEN, MUTATION_TASTER, SIFT), (REVEL, MVP), CADD, REMM, SPLICE_AI, ALPHA_MISSENSE
+# REMM is trained on non-coding regulatory regions
+# *WARNING* if you enable CADD or REMM ensure that you have downloaded and installed the CADD/REMM tabix files
+# and updated their location in the application.properties. Exomiser will not run without this.
+pathogenicitySources: [ REVEL, MVP ]
+# this is the recommended order for a genome-sized analysis.
+steps: [
+    hiPhivePrioritiser: { },
+  # running the prioritiser followed by a priorityScoreFilter will remove genes
+  # which are least likely to contribute to the phenotype defined in hpoIds, this will
+  # dramatically reduce the time and memory required to analyse a genome.
+  # 0.501 is a good compromise to select good phenotype matches and the best protein-protein interactions hits from hiPhive
+    priorityScoreFilter: { priorityType: HIPHIVE_PRIORITY, minPriorityScore: 0.501 },
+    failedVariantFilter: { },
+    regulatoryFeatureFilter: { },
+    frequencyFilter: { maxFrequency: 2.0 },
+    pathogenicityFilter: { keepNonPathogenic: true },
+    inheritanceFilter: { },
+    omimPrioritiser: { }
+]
diff --git a/assets/exomiser/pheno/family1.yml b/assets/exomiser/pheno/family1.yml
@@ -0,0 +1,30 @@
+---
+id: family1
+proband:
+  subject:
+    id: testN
+    sex: FEMALE
+  phenotypicFeatures:
+    - type:
+        id: HP:0001159
+        label: Syndactyly
+
+pedigree:
+  persons:
+    - individualId: testN
+      paternalId: testT
+      sex: FEMALE
+      affectedStatus: AFFECTED
+    - individualId: testT
+      sex: MALE
+      affectedStatus: UNAFFECTED
+
+metaData:
+  resources:
+    - id: hp
+      name: human phenotype ontology
+      url: http://purl.obolibrary.org/obo/hp.owl
+      version: hp/releases/2019-11-08
+      namespacePrefix: HP
+      iriPrefix: 'http://purl.obolibrary.org/obo/HP_'
+  phenopacketSchemaVersion: 2.0
diff --git a/assets/exomiser/pheno/family2.yml b/assets/exomiser/pheno/family2.yml
@@ -0,0 +1,26 @@
+---
+id: family2
+proband:
+  subject:
+    id: testN
+    sex: FEMALE
+  phenotypicFeatures:
+    - type:
+        id: HP:0001159
+        label: Syndactyly
+
+pedigree:
+  persons:
+    - individualId: testN
+      sex: FEMALE
+      affectedStatus: AFFECTED
+
+metaData:
+  resources:
+    - id: hp
+      name: human phenotype ontology
+      url: http://purl.obolibrary.org/obo/hp.owl
+      version: hp/releases/2019-11-08
+      namespacePrefix: HP
+      iriPrefix: 'http://purl.obolibrary.org/obo/HP_'
+  phenopacketSchemaVersion: 2.0
diff --git a/assets/exomiser/test_exomiser_analysis.yml b/assets/exomiser/test_exomiser_analysis.yml
@@ -0,0 +1,32 @@
+# This exomiser analysis file is used for quick testing with a minimal amount of reference data
+# It uses only one frequency source and only one pathogenicity source.
+---
+analysisMode: PASS_ONLY
+inheritanceModes: {
+  AUTOSOMAL_DOMINANT: 0.1,
+  AUTOSOMAL_RECESSIVE_HOM_ALT: 0.1,
+  AUTOSOMAL_RECESSIVE_COMP_HET: 2.0,
+  X_DOMINANT: 0.1,
+  X_RECESSIVE_HOM_ALT: 0.1,
+  X_RECESSIVE_COMP_HET: 2.0,
+  MITOCHONDRIAL: 0.2
+}
+frequencySources: [
+    UK10K
+]
+pathogenicitySources: [ REVEL]
+# this is the recommended order for a genome-sized analysis.
+steps: [
+    hiPhivePrioritiser: { },
+  # running the prioritiser followed by a priorityScoreFilter will remove genes
+  # which are least likely to contribute to the phenotype defined in hpoIds, this will
+  # dramatically reduce the time and memory required to analyse a genome.
+  # 0.501 is a good compromise to select good phenotype matches and the best protein-protein interactions hits from hiPhive
+    priorityScoreFilter: { priorityType: HIPHIVE_PRIORITY, minPriorityScore: 0.501 },
+    failedVariantFilter: { },
+    regulatoryFeatureFilter: { },
+    frequencyFilter: { maxFrequency: 2.0 },
+    pathogenicityFilter: { keepNonPathogenic: true },
+    inheritanceFilter: { },
+    omimPrioritiser: { }
+]
diff --git a/assets/schema_input.json b/assets/schema_input.json
@@ -36,17 +36,9 @@
             "familyPheno": {
                 "errorMessage": "Filename of the pedigree file, mandatory for exomiser",
                 "meta": ["familypheno"],
-                "anyOf": [
-                    {
-                        "type": "string",
-                        "pattern": "^\\S+$"
-                    },
-                    {
-                        "type": "string",
-                        "maxLength": 0
-                    }
-                ],
-                "default": ""
+                "format": "file-path",
+                "pattern": "^\\S*.y(a)?ml$",
+                "exists": true
 
             }
         },

diff --git a/conf/test.config b/conf/test.config
@@ -46,4 +46,12 @@ params {
                    [name: 'MQ40', expression: 'MQ < 40.0'],
                    [name: 'MQRankSum-12.5', expression: 'MQRankSum < -12.5'],
                    [name: 'ReadPosRankSum-8', expression: 'ReadPosRankSum < -8.0']]
+
+    tools = "vep,exomiser"
+
+    // Exomiser parameters
+    exomiser_analysis = "assets/exomiser/test_exomiser_analysis.yml"
+    exomiser_data_dir = "data-test/reference/exomiser"
+    exomiser_data_version = "2402"
+    genome = "hg38"
 }
diff --git a/modules/local/exomiser/main.nf b/modules/local/exomiser/main.nf
@@ -27,6 +27,8 @@ process EXOMISER {
     script:
     def args = task.ext.args ?: ''
     def exactVcfFile = vcfFile.find { it.name.endsWith("vcf.gz") }
+    def remm_args = params.exomiser_remm_version ? "--exomiser.remm.version=\"${params.exomiser_remm_version}\"": ""
+    def cadd_args = params.exomiser_cadd_version ? "--cadd.version=\"${params.exomiser_cadd_version}\"": ""
 
     """
     #!/bin/bash -eo pipefail
@@ -38,8 +40,9 @@ process EXOMISER {
         --sample ${phenofile} \\
         --output-format=HTML,JSON,TSV_GENE,TSV_VARIANT,VCF \\
         --exomiser.data-directory=/`pwd`/${datadir} \\
-        --exomiser.hg19.data-version="${params.exomiser_data_version}" \\
-        --exomiser.hg38.data-version="${params.exomiser_data_version}" \\
+        ${remm_args} \\
+        ${cadd_args} \\
+        --exomiser.${params.genome}.data-version="${params.exomiser_data_version}" \\
         --exomiser.phenotype.data-version="${params.exomiser_data_version}" \\
         ${args}
     

diff --git a/nextflow_schema.json b/nextflow_schema.json
@@ -41,7 +41,8 @@
           "type": "string",
           "description": "Name of iGenomes reference.",
           "fa_icon": "fas fa-book",
-          "help_text": "If using a reference genome configured in the pipeline using iGenomes, use this parameter to give the ID for the reference. This is then used to build the full paths for all required reference genome files e.g. `--genome GRCh38`. \n\nSee the [nf-core website docs](https://nf-co.re/usage/reference_genomes) for more details."
+          "help_text": "If using a reference genome configured in the pipeline using iGenomes, use this parameter to give the ID for the reference. This is then used to build the full paths for all required reference genome files e.g. `--genome GRCh38`. \n\nSee the [nf-core website docs](https://nf-co.re/usage/reference_genomes) for more details.",
+          "enum": ["hg19", "hg38"]
         },
         "igenomes_ignore": {
           "type": "boolean",
@@ -263,8 +264,8 @@
         },
         "tools": {
           "type": "string",
-          "enum": ["exomiser", "vep"],
-          "description": "List of tools to use separate with comma.  Available tools [vep, exomiser]"
+          "pattern": "^(vep|exomiser)?(,(vep|exomiser))*$",
+          "description": "List of tools to use separate with comma.  Available tools: [vep, exomiser]"
         }
       }
     },
@@ -322,6 +323,5 @@
     {
       "$ref": "#/definitions/exomiser_option"
     }
-  ],
-  "properties": {}
+  ]
 }
diff --git a/workflows/postprocessing.nf b/workflows/postprocessing.nf
@@ -163,7 +163,7 @@ workflow POSTPROCESSING {
 
     if (params.tools && params.tools.split(',').contains('exomiser')) {
         s = s.map{meta, files -> 
-                tuple (meta,files,meta.familypheno)}
+                [meta,files,meta.familypheno]}
 
         exomiser_analysis_file = file(params.exomiser_analysis)
         exomiser_data_dir = file(params.exomiser_data_dir)