Skip to content

Commit

Permalink
WIP : Applying corrections to cover vep and exomiser in stub mode tes…
Browse files Browse the repository at this point in the history
…t, Be able to run exomiser locally with a public test dataset, add params to support remm and cadd databse, only allows hg19 and hg38 for genome param, add template exomiser analysis file for WES and WGS
  • Loading branch information
DamienGnst committed Sep 17, 2024
1 parent b2ce857 commit 9e74994
Show file tree
Hide file tree
Showing 11 changed files with 233 additions and 22 deletions.
7 changes: 4 additions & 3 deletions assets/TestSampleSheet.csv
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
familyId,sample,sequencingType,gvcf
Family1,Test1,WES,https://github.com/nf-core/test-datasets/raw/modules/data/genomics/homo_sapiens/illumina/gvcf/test.genome.vcf.gz
Family1,Test2,WGS,https://github.com/nf-core/test-datasets/raw/modules/data/genomics/homo_sapiens/illumina/gvcf/test2.genome.vcf.gz
familyId,sample,sequencingType,gvcf,familyPheno
amily1,Test1,WES,https://github.com/nf-core/test-datasets/raw/modules/data/genomics/homo_sapiens/illumina/gvcf/test.genome.vcf.gz,assets/exomiser/pheno/family1.yml
Family1,Test2,WES,https://github.com/nf-core/test-datasets/raw/modules/data/genomics/homo_sapiens/illumina/gvcf/test2.genome.vcf.gz,assets/exomiser/pheno/family1.yml
Family2,Test1,WGS,https://github.com/nf-core/test-datasets/raw/modules/data/genomics/homo_sapiens/illumina/gvcf/test.genome.vcf.gz,assets/exomiser/pheno/family2.yml
64 changes: 64 additions & 0 deletions assets/exomiser/default_exomiser_WES_analysis.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
## Exomiser Analysis Template.
# These are all the possible options for running exomiser. Use this as a template for
# your own set-up.
---
analysisMode: PASS_ONLY
inheritanceModes: {
AUTOSOMAL_DOMINANT: 0.1,
AUTOSOMAL_RECESSIVE_HOM_ALT: 0.1,
AUTOSOMAL_RECESSIVE_COMP_HET: 2.0,
X_DOMINANT: 0.1,
X_RECESSIVE_HOM_ALT: 0.1,
X_RECESSIVE_COMP_HET: 2.0,
MITOCHONDRIAL: 0.2
}
frequencySources: [
UK10K,

GNOMAD_E_AFR,
GNOMAD_E_AMR,
# GNOMAD_E_ASJ,
GNOMAD_E_EAS,
# GNOMAD_E_FIN,
GNOMAD_E_NFE,
# GNOMAD_E_OTH,
GNOMAD_E_SAS,

GNOMAD_G_AFR,
GNOMAD_G_AMR,
# GNOMAD_G_ASJ,
GNOMAD_G_EAS,
# GNOMAD_G_FIN,
GNOMAD_G_NFE,
# GNOMAD_G_OTH,
GNOMAD_G_SAS
]
# Possible pathogenicitySources: (POLYPHEN, MUTATION_TASTER, SIFT), (REVEL, MVP), CADD, REMM, SPLICE_AI, ALPHA_MISSENSE
# REMM is trained on non-coding regulatory regions
# *WARNING* if you enable CADD or REMM ensure that you have downloaded and installed the CADD/REMM tabix files
# and updated their location in the application.properties. Exomiser will not run without this.
pathogenicitySources: [ REVEL, MVP ]
#this is the standard exomiser order.
steps: [
failedVariantFilter: { },
variantEffectFilter: {
remove: [
FIVE_PRIME_UTR_EXON_VARIANT,
FIVE_PRIME_UTR_INTRON_VARIANT,
THREE_PRIME_UTR_EXON_VARIANT,
THREE_PRIME_UTR_INTRON_VARIANT,
NON_CODING_TRANSCRIPT_EXON_VARIANT,
NON_CODING_TRANSCRIPT_INTRON_VARIANT,
CODING_TRANSCRIPT_INTRON_VARIANT,
UPSTREAM_GENE_VARIANT,
DOWNSTREAM_GENE_VARIANT,
INTERGENIC_VARIANT,
REGULATORY_REGION_VARIANT
]
},
frequencyFilter: { maxFrequency: 2.0 },
pathogenicityFilter: { keepNonPathogenic: true },
inheritanceFilter: { },
omimPrioritiser: { },
hiPhivePrioritiser: { }
]
55 changes: 55 additions & 0 deletions assets/exomiser/default_exomiser_WGS_analysis.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
## Exomiser genome analysis template.
# These are all the possible options for running exomiser. Use this as a template for
# your own set-up.
---
analysisMode: PASS_ONLY
inheritanceModes: {
AUTOSOMAL_DOMINANT: 0.1,
AUTOSOMAL_RECESSIVE_HOM_ALT: 0.1,
AUTOSOMAL_RECESSIVE_COMP_HET: 2.0,
X_DOMINANT: 0.1,
X_RECESSIVE_HOM_ALT: 0.1,
X_RECESSIVE_COMP_HET: 2.0,
MITOCHONDRIAL: 0.2
}
frequencySources: [
UK10K,

GNOMAD_E_AFR,
GNOMAD_E_AMR,
# GNOMAD_E_ASJ,
GNOMAD_E_EAS,
# GNOMAD_E_FIN,
GNOMAD_E_NFE,
# GNOMAD_E_OTH,
GNOMAD_E_SAS,

GNOMAD_G_AFR,
GNOMAD_G_AMR,
# GNOMAD_G_ASJ,
GNOMAD_G_EAS,
# GNOMAD_G_FIN,
GNOMAD_G_NFE,
# GNOMAD_G_OTH,
GNOMAD_G_SAS
]
# Possible pathogenicitySources: (POLYPHEN, MUTATION_TASTER, SIFT), (REVEL, MVP), CADD, REMM, SPLICE_AI, ALPHA_MISSENSE
# REMM is trained on non-coding regulatory regions
# *WARNING* if you enable CADD or REMM ensure that you have downloaded and installed the CADD/REMM tabix files
# and updated their location in the application.properties. Exomiser will not run without this.
pathogenicitySources: [ REVEL, MVP ]
# this is the recommended order for a genome-sized analysis.
steps: [
hiPhivePrioritiser: { },
# running the prioritiser followed by a priorityScoreFilter will remove genes
# which are least likely to contribute to the phenotype defined in hpoIds, this will
# dramatically reduce the time and memory required to analyse a genome.
# 0.501 is a good compromise to select good phenotype matches and the best protein-protein interactions hits from hiPhive
priorityScoreFilter: { priorityType: HIPHIVE_PRIORITY, minPriorityScore: 0.501 },
failedVariantFilter: { },
regulatoryFeatureFilter: { },
frequencyFilter: { maxFrequency: 2.0 },
pathogenicityFilter: { keepNonPathogenic: true },
inheritanceFilter: { },
omimPrioritiser: { }
]
30 changes: 30 additions & 0 deletions assets/exomiser/pheno/family1.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
---
id: family1
proband:
subject:
id: testN
sex: FEMALE
phenotypicFeatures:
- type:
id: HP:0001159
label: Syndactyly

pedigree:
persons:
- individualId: testN
paternalId: testT
sex: FEMALE
affectedStatus: AFFECTED
- individualId: testT
sex: MALE
affectedStatus: UNAFFECTED

metaData:
resources:
- id: hp
name: human phenotype ontology
url: http://purl.obolibrary.org/obo/hp.owl
version: hp/releases/2019-11-08
namespacePrefix: HP
iriPrefix: 'http://purl.obolibrary.org/obo/HP_'
phenopacketSchemaVersion: 2.0
26 changes: 26 additions & 0 deletions assets/exomiser/pheno/family2.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
---
id: family2
proband:
subject:
id: testN
sex: FEMALE
phenotypicFeatures:
- type:
id: HP:0001159
label: Syndactyly

pedigree:
persons:
- individualId: testN
sex: FEMALE
affectedStatus: AFFECTED

metaData:
resources:
- id: hp
name: human phenotype ontology
url: http://purl.obolibrary.org/obo/hp.owl
version: hp/releases/2019-11-08
namespacePrefix: HP
iriPrefix: 'http://purl.obolibrary.org/obo/HP_'
phenopacketSchemaVersion: 2.0
32 changes: 32 additions & 0 deletions assets/exomiser/test_exomiser_analysis.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
# This exomiser analysis file is used for quick testing with a minimal amount of reference data
# It uses only one frequency source and only one pathogenicity source.
---
analysisMode: PASS_ONLY
inheritanceModes: {
AUTOSOMAL_DOMINANT: 0.1,
AUTOSOMAL_RECESSIVE_HOM_ALT: 0.1,
AUTOSOMAL_RECESSIVE_COMP_HET: 2.0,
X_DOMINANT: 0.1,
X_RECESSIVE_HOM_ALT: 0.1,
X_RECESSIVE_COMP_HET: 2.0,
MITOCHONDRIAL: 0.2
}
frequencySources: [
UK10K
]
pathogenicitySources: [ REVEL]
# this is the recommended order for a genome-sized analysis.
steps: [
hiPhivePrioritiser: { },
# running the prioritiser followed by a priorityScoreFilter will remove genes
# which are least likely to contribute to the phenotype defined in hpoIds, this will
# dramatically reduce the time and memory required to analyse a genome.
# 0.501 is a good compromise to select good phenotype matches and the best protein-protein interactions hits from hiPhive
priorityScoreFilter: { priorityType: HIPHIVE_PRIORITY, minPriorityScore: 0.501 },
failedVariantFilter: { },
regulatoryFeatureFilter: { },
frequencyFilter: { maxFrequency: 2.0 },
pathogenicityFilter: { keepNonPathogenic: true },
inheritanceFilter: { },
omimPrioritiser: { }
]
14 changes: 3 additions & 11 deletions assets/schema_input.json
Original file line number Diff line number Diff line change
Expand Up @@ -36,17 +36,9 @@
"familyPheno": {
"errorMessage": "Filename of the pedigree file, mandatory for exomiser",
"meta": ["familypheno"],
"anyOf": [
{
"type": "string",
"pattern": "^\\S+$"
},
{
"type": "string",
"maxLength": 0
}
],
"default": ""
"format": "file-path",
"pattern": "^\\S*.y(a)?ml$",
"exists": true

}
},
Expand Down
8 changes: 8 additions & 0 deletions conf/test.config
Original file line number Diff line number Diff line change
Expand Up @@ -46,4 +46,12 @@ params {
[name: 'MQ40', expression: 'MQ < 40.0'],
[name: 'MQRankSum-12.5', expression: 'MQRankSum < -12.5'],
[name: 'ReadPosRankSum-8', expression: 'ReadPosRankSum < -8.0']]

tools = "vep,exomiser"

// Exomiser parameters
exomiser_analysis = "assets/exomiser/test_exomiser_analysis.yml"
exomiser_data_dir = "data-test/reference/exomiser"
exomiser_data_version = "2402"
genome = "hg38"
}
7 changes: 5 additions & 2 deletions modules/local/exomiser/main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,8 @@ process EXOMISER {
script:
def args = task.ext.args ?: ''
def exactVcfFile = vcfFile.find { it.name.endsWith("vcf.gz") }
def remm_args = params.exomiser_remm_version ? "--exomiser.remm.version=\"${params.exomiser_remm_version}\"": ""
def cadd_args = params.exomiser_cadd_version ? "--cadd.version=\"${params.exomiser_cadd_version}\"": ""

"""
#!/bin/bash -eo pipefail
Expand All @@ -38,8 +40,9 @@ process EXOMISER {
--sample ${phenofile} \\
--output-format=HTML,JSON,TSV_GENE,TSV_VARIANT,VCF \\
--exomiser.data-directory=/`pwd`/${datadir} \\
--exomiser.hg19.data-version="${params.exomiser_data_version}" \\
--exomiser.hg38.data-version="${params.exomiser_data_version}" \\
${remm_args} \\
${cadd_args} \\
--exomiser.${params.genome}.data-version="${params.exomiser_data_version}" \\
--exomiser.phenotype.data-version="${params.exomiser_data_version}" \\
${args}
Expand Down
10 changes: 5 additions & 5 deletions nextflow_schema.json
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,8 @@
"type": "string",
"description": "Name of iGenomes reference.",
"fa_icon": "fas fa-book",
"help_text": "If using a reference genome configured in the pipeline using iGenomes, use this parameter to give the ID for the reference. This is then used to build the full paths for all required reference genome files e.g. `--genome GRCh38`. \n\nSee the [nf-core website docs](https://nf-co.re/usage/reference_genomes) for more details."
"help_text": "If using a reference genome configured in the pipeline using iGenomes, use this parameter to give the ID for the reference. This is then used to build the full paths for all required reference genome files e.g. `--genome GRCh38`. \n\nSee the [nf-core website docs](https://nf-co.re/usage/reference_genomes) for more details.",
"enum": ["hg19", "hg38"]
},
"igenomes_ignore": {
"type": "boolean",
Expand Down Expand Up @@ -263,8 +264,8 @@
},
"tools": {
"type": "string",
"enum": ["exomiser", "vep"],
"description": "List of tools to use separate with comma. Available tools [vep, exomiser]"
"pattern": "^(vep|exomiser)?(,(vep|exomiser))*$",
"description": "List of tools to use separate with comma. Available tools: [vep, exomiser]"
}
}
},
Expand Down Expand Up @@ -322,6 +323,5 @@
{
"$ref": "#/definitions/exomiser_option"
}
],
"properties": {}
]
}
2 changes: 1 addition & 1 deletion workflows/postprocessing.nf
Original file line number Diff line number Diff line change
Expand Up @@ -163,7 +163,7 @@ workflow POSTPROCESSING {

if (params.tools && params.tools.split(',').contains('exomiser')) {
s = s.map{meta, files ->
tuple (meta,files,meta.familypheno)}
[meta,files,meta.familypheno]}

exomiser_analysis_file = file(params.exomiser_analysis)
exomiser_data_dir = file(params.exomiser_data_dir)
Expand Down

0 comments on commit 9e74994

Please sign in to comment.