From 860ac7f83191373712688fcaecae3cfed56f0154 Mon Sep 17 00:00:00 2001 From: LysianeBouchard Date: Thu, 19 Sep 2024 08:57:04 -0400 Subject: [PATCH] Lysiane changes It includes the following changes: -fix linter warnings -introducing conditional parameter validation logic for exomiser and vep -using a dedicated exomiser_genome parameter -utility functions to check if a tool is present -make exomiser stub output files identical to real output files -infer exomiser version from exomiser banner file in container -standardize exomizer process outputs -introducing per sequencing type analysis file -use process input instead params to pass configuration information -update README.md, OUTPUT.md and USAGE.md --- README.md | 105 +++------ conf/test.config | 5 +- docs/output.md | 9 +- docs/reference_data.md | 93 ++++++++ docs/usage.md | 206 ++++++++---------- main.nf | 2 - modules/local/exomiser/main.nf | 66 +++--- modules/local/exomiser/meta.yml | 83 +++++-- nextflow.config | 10 +- nextflow_schema.json | 86 ++++++-- .../main.nf | 3 +- .../utils.nf | 11 + workflows/postprocessing.nf | 35 ++- 13 files changed, 432 insertions(+), 282 deletions(-) create mode 100644 docs/reference_data.md create mode 100644 subworkflows/local/utils_nfcore_postprocessing_pipeline/utils.nf diff --git a/README.md b/README.md index 4aab30a..8db7cf4 100644 --- a/README.md +++ b/README.md @@ -1,16 +1,18 @@ [![nf-test](https://img.shields.io/badge/unit_tests-nf--test-337ab7.svg)](https://www.nf-test.com) [![Nextflow](https://img.shields.io/badge/nextflow%20DSL2-%E2%89%A523.10.1-23aa62.svg)](https://www.nextflow.io/) -[![run with conda](http://img.shields.io/badge/run%20with-conda-3EB049?labelColor=000000&logo=anaconda)](https://docs.conda.io/en/latest/) [![run with docker](https://img.shields.io/badge/run%20with-docker-0db7ed?labelColor=000000&logo=docker)](https://www.docker.com/) [![run with singularity](https://img.shields.io/badge/run%20with-singularity-1d355c.svg?labelColor=000000)](https://sylabs.io/docs/) -[![Launch on Seqera Platform](https://img.shields.io/badge/Launch%20%F0%9F%9A%80-Seqera%20Platform-%234256e7)](https://cloud.seqera.io/launch?pipeline=https://github.com/ferlab/postprocessing) + + ## Introduction -**ferlab/postprocessing** is a bioinformatics pipeline that takes GVCFs from several samples to combine, perform joint genotyping, tag low quality variant and annotate a final vcf version. +**Ferlab-Ste-Justine/Post-processing-Pipeline** is a bioinformatics pipeline designed for family-based analysis of GVCFs from multiple samples. +It performs joint genotyping, tags low-quality variants, and optionally annotates the final vcf data using vep and/or exomiser. - ### Summary: 1. Remove MNPs using bcftools 2. Normalize .gvcf @@ -19,104 +21,56 @@ 5. Tag false positive variants with either: - For whole genome sequencing data: [Variant quality score recalibration (VQSR)](https://gatk.broadinstitute.org/hc/en-us/articles/360036510892-VariantRecalibrator) - For whole exome sequencing data: [Hard-Filtering](https://gatk.broadinstitute.org/hc/en-us/articles/360036733451-VariantFiltration) -6. Annotate variants with [Variant effect predictor (VEP)](https://useast.ensembl.org/info/docs/tools/vep/index.html) +6. Optionnally annotate variants with [Variant effect predictor (VEP)](https://useast.ensembl.org/info/docs/tools/vep/index.html) +7. Optionnally integrate phenotype data to annotate, filter and prioritise variants likely to be disease-causing with [exomiser](https://www.sanger.ac.uk/tool/exomiser/) + ![PostProcessingDiagram](assets/PostProcessingImage.png?raw=true) ## Usage -> [!NOTE] -> If you are new to Nextflow and nf-core, please refer to [this page](https://nf-co.re/docs/usage/installation) on how to set-up Nextflow. Make sure to [test your setup](https://nf-co.re/docs/usage/introduction#how-to-run-a-pipeline) with `-profile test` before running the workflow on actual data. - -### Samples -The workflow will accept sample data separated by commas (CSV format). The path to the sample file must be specified with the "**input**" parameter. The column names are : familyId,sample,sequencingType,file. The sequencing type must be either WES (Whole Exome Sequencing) or WGS (Whole Genome Sequencing). - -**sample.csv** -```csv -**familyId**,**sample**,**sequencingType**,**file** -CONGE-XXX,01,WES,CONGE-XXX-01.hard-filtered.gvcf.gz -CONGE-XXX,02,WES,CONGE-XXX-02.hard-filtered.gvcf.gz -CONGE-XXX,03,WES,CONGE-XXX-03.hard-filtered.gvcf.gz -CONGE-YYY,01,WGS,CONGE-YYY-01.hard-filtered.gvcf.gz -CONGE-YYY,02,WGS,CONGE-YYY-02.hard-filtered.gvcf.gz -CONGE-YYY,03,WGS,CONGE-YYY-03.hard-filtered.gvcf.gz -``` - - -> [!NOTE] -> The sequencing type also determines the type of variant filtering the pipeline will use. -> -> In the case of Whole Genome Sequencing, VQSR (Variant Quality Score Recalibration) is used (preferred method). -> -> In the case of Whole Exome Sequencing, Hard-filtering needs to be used. - -Now, you can run the pipeline using: - - +Here is an example nextflow command to run the pipeline: ```bash -nextflow run ferlab/postprocessing \ - -profile \ +nextflow run -c cluster.config Ferlab-Ste-Justine/Post-processing-Pipeline -r "v2.0.0" \ + -params-file params.json \ --input samplesheet.csv \ - --outdir + --outdir results/dir \ + --tools vep,exomiser ``` +> [!NOTE] +> If you are new to nextflow and nf-core, please refer to [this page](https://nf-co.re/docs/usage/installation) on how to set-up nextflow. + > [!WARNING] -> Please provide pipeline parameters via the CLI or Nextflow `-params-file` option. Custom config files including those provided by the `-c` Nextflow option can be used to provide any configuration _**except for parameters**_; +> Please provide pipeline parameters via the CLI or nextflow `-params-file` option. Custom config files including those provided by the `-c` nextflow option can be used to provide any configuration _**except for parameters**_; > see [docs](https://nf-co.re/usage/configuration#custom-configuration-files). -### References -Reference files are necessary at multiple steps of the workflow, notably for joint-genotyping,the variant effect predictor (VEP) and VQSR. -Using igenome, we can retrieve the relevant files for the desired version of the human genome. -Specifically, we specifiy the igenome version with the **genome** parameter. Most likely this value will be *'GRCh38'* - -Next, we also need broader references, which are contained in a path defined by the **broad** parameter. +For more details, see [docs/usage.md](docs/usage.md) and [docs/reference_data.md](docs/reference_data.md). -The broad directory must contain the following files: -- The interval list which determines the genomic interval(s) over which we operate: filename of this list must be defined with the **intervalsFile** parameter -- Highly validated variance ressources currently required by VQSR. ***These are currently hard coded in the pipeline!*** - - HapMap file : hapmap_3.3.hg38.vcf.gz - - 1000G omni2.5 file : 1000G_omni2.5.hg38.vcf.gz - - 1000G reference file : 1000G_phase1.snps.high_confidence.hg38.vcf.gz - - SNP database : Homo_sapiens_assembly38.dbsnp138.vcf.gz +### Stub mode and quick tests - -Finally, the vep cache directory must be specified with **vepCache**, which is usually created by vep itself on first installation. -Generally, we only need the human files obtainable from https://ftp.ensembl.org/pub/release-112/variation/vep/homo_sapiens_vep_112_GRCh38.tar.gz +The `-stub` (or `-stub-run`) option can be added to run the "stub" block of processes instead of the "script" block. This can be helpful for testing. -### Stub run -The -stub-run option can be added to run the "stub" block of processes instead of the "script" block. This can be helpful for testing. -🚧 - -Parameters summary ------ +To test your setup in stub mode, simply run `nextflow run Ferlab-Ste-Justine/Post-processing-Pipeline -profile test,docker -stub`. -| Parameter name | Required? | Accepted input | -| --- | --- | --- | -| `input` | _Required_ | file | -| `outdir` | _Required_ | path | -| `genome` | _Required_ | igenome version, ie 'GRCh38'| -| `broad` | _Required_ | path | -| `intervalsFile` | _Required_ | list of genome intervals | -| `vepCache` | _Required_ | path | +For tests with real data, see documentation in the [test configuration profile](conf/test.config) Pipeline Output ----- -Path to output directory must be specified in **outdir** parameter. -🚧 +Path to output directory must be specified via the `outdir` parameter. +See [docs/output.md](docs/output.md) for more details about pipeline outputs. -## Credits -ferlab/postprocessing was originally written by Damien Geneste, David Morais, Felix-Antoine Le Sieur, Jeremy Costanza, Lysiane Bouchard. +## Credits -We thank the following people for their extensive assistance in the development of this pipeline: +Ferlab-Ste-Justine/Post-processing-Pipeline was originally written by Damien Geneste, David Morais, Felix-Antoine Le Sieur, Jeremy Costanza, Lysiane Bouchard. - ## Contributions and Support @@ -140,11 +94,10 @@ The documentation of the various tools used in this workflow are available here: [VEP](https://useast.ensembl.org/info/docs/tools/vep/script/vep_options.html) -## Citations +[EXOMISER](https://exomiser.readthedocs.io/en/latest/) - -An extensive list of references for the tools used by the pipeline can be found in the [`CITATIONS.md`](CITATIONS.md) file. +## Citations This pipeline uses code and infrastructure developed and maintained by the [nf-core](https://nf-co.re) community, reused here under the [MIT license](https://github.com/nf-core/tools/blob/master/LICENSE). diff --git a/conf/test.config b/conf/test.config index 9289cd7..106bb5d 100644 --- a/conf/test.config +++ b/conf/test.config @@ -50,8 +50,9 @@ params { tools = "vep,exomiser" // Exomiser parameters - exomiser_analysis = "assets/exomiser/test_exomiser_analysis.yml" + exomiser_analysis_wes = "assets/exomiser/test_exomiser_analysis.yml" + exomiser_analysis_wgs = "assets/exomiser/test_exomiser_analysis.yml" exomiser_data_dir = "data-test/reference/exomiser" exomiser_data_version = "2402" - genome = "hg38" + exomiser_genome = "hg38" } diff --git a/docs/output.md b/docs/output.md index e050e71..492ae77 100644 --- a/docs/output.md +++ b/docs/output.md @@ -3,9 +3,8 @@ ## Introduction This document describes the output produced by the pipeline. -The directories listed below will be created in the results directory after the pipeline has finished. All paths are relative to the top-level results directory. +The directories listed below will be created in the output directory after the pipeline has finished. All paths are relative to the top-level output directory. - ## Pipeline overview @@ -20,7 +19,11 @@ The directories listed below will be created in the results directory after the - A copy of the nextflow log file: `nextflow.log`. Note that it will miss logs written after the workflow.onComplete handler is run. - Copies of the configuration files used: `config/*.config`. This includes the default `nextflow.config` file as well as any additional configuration files passed as parameters. - Other metadata relevant for reproducibility: `metadata.txt` . It contains information such as the original command line, the name of the branch and revision used, the username of the person who submitted the job, a list of configuration files passed, the nextflow work directory, etc. - +- `splitmultiallelics/`: pipeline output before running the tools specified via the `tools` parameter. +- `vep/`: vep output +- `exomiser/results`: exomiser output + +You might see other folders named after different pipeline processes. These are considered intermediate pipeline outputs. diff --git a/docs/reference_data.md b/docs/reference_data.md new file mode 100644 index 0000000..c0788c7 --- /dev/null +++ b/docs/reference_data.md @@ -0,0 +1,93 @@ +# Ferlab-Ste-Justine/Post-processing-Pipeline: Reference Data + +Reference files are essential at various steps of the pipeline, including joint-genotyping, VQSR, the Variant Effect Predictor (VEP), and exomiser. + +These files must be correctly downloaded and specified through pipeline parameters. This document provides a comprehensive list of the required reference files and explains how to set the pipeline parameters appropriately. + +## Broad reference data (VQSR) +The `broad` parameter specifies the directory containing the reference data files for VQSR. We chose the name `broad` because +this data is from the [Broad Institute](https://www.broadinstitute.org/), a collaborative research institution known for its contributions to genomics and biomedical research. + +The broad directory must contain the following files: +- *Intervals File*: The genomic interval(s) over which we operate. The filename of this list must be defined with the `intervalsFile` parameter (e.g., "interval_long_local.list"). +- Highly validated variance ressources currently required by VQSR. ***These are currently hard coded in the pipeline***: + - HapMap file : hapmap_3.3.hg38.vcf.gz + - 1000G omni2.5 file : 1000G_omni2.5.hg38.vcf.gz + - 1000G reference file : 1000G_phase1.snps.high_confidence.hg38.vcf.gz + - SNP database : Homo_sapiens_assembly38.dbsnp138.vcf.gz + +## Reference Genome + +The `referenceGenome` parameter specifies the directory containing the reference genome files. + +This directory should contain the following files: +- The reference genome FASTA file (e.g., `Homo_sapiens_assembly38.fasta`). This filename must be specified with the `referenceGenomeFasta` parameter. +- The reference genome FASTA file index (e.g., `Homo_sapiens_assembly38.fasta.fai`). Its location will be automatically derived by appending `.fai` to the `referenceGenomeFasta` parameter. +- The reference genome dictionary file (e.g., `Homo_sapiens_assembly38.dict`). Its location will be automatically derived by replacing the `.fasta` file extension of the `referenceGenomeFasta` parameter with `.dict`. + + +## VEP Cache Directory +The `vepCache` parameter specifies the directory for the vep cache. It is only required if `vep` is specified via the +`tools` parameter. + +The vep cache is not automatically populated by the pipeline. It must be pre-downloaded. You can obtain a copy of the +data by following the [vep installation procedure](https://github.com/Ensembl/ensembl-vep). Generally, we only need the human files obtainable from [Ensembl](https://ftp.ensembl.org/pub/release-112/variation/vep/homo_sapiens_vep_112_GRCh38.tar.gz). + +## Exomiser reference data +The exomiser reference data is only required if `exomiser` is specified via the `tools` parameter. + +The `exomiser_data_dir` parameter specifies the path to the directory containing the exomiser reference files. +This directory will be passed to the exomiser tool via the exomiser option `--exomiser.data-directory`. + +It's content should look like this: +``` +2402_hg19/ +2402_hg38/ +2402_phenotype/ +remm/ +cadd/ +``` + +- *2402_hg19/* and *2402_hg38/*: These folders contain data associated with the `hg19` and `hg38` genome assemblies, respectively. The number `2402` corresponds to the exomiser data version. +- *remm/* and *cadd/*: These folders are necessary if REMM and CADD are used as pathogenicity sources in the exomiser analysis file. The files and subdirectories within these folders must follow a specific structure, and exomiser will need to know the genome assembly (hg19 or hg38) and the versions of REMM and CADD being used to infer files locations. + +To prepare the exomiser data directory, follow the instructions in the [exomiser installation documentation](https://exomiser.readthedocs.io/en/latest/installation.html#linux-install) + +Together with the `exomiser_data_dir` parameter, these parameters must be provided to exomiser and should match the reference data available +- `exomiser_genome`: The genome assembly version to be used by exomiser. Accepted values are `hg38` or `hg19`. +- `exomiser_data_version`: The exomiser data version. Example: `2402`. +- `exomiser_cadd_version`: The version of the CADD data to be used by exomiser (optional). Example: `1.3`. +- `exomiser_remm_version`: The version of the REMM data to be used by exomiser (optional). Example:`0.3.1` + +## Exomiser analysis files +In addition to the reference data, exomiser requires an analysis file (.yml/.json) that contains, among others +things, the variant frequency sources for prioritization of rare variants, variant pathogenicity sources to consider, the list of filters and prioretizers to apply, etc. + +Typically, different analysis settings are used for whole exome sequencing (WES) and whole genome sequencing (WGS) data. +Defaults analysis files are provided for each sequencing type in the assets folder: +- assets/exomiser/default_exomiser_WES_analysis.yml +- assets/exomiser/default_exomiser_WGS_analysis.yml + +You can override these defaults and provide your own analysis file(s) via parameters `exomiser_analyis_wes` and `exomiser_analysis_wgs`. + +The exomiser analysis file format follows the `phenopacket` standard and is described in detail [here](https://exomiser.readthedocs.io/en/latest/advanced_analysis.html#analysis). +There are typically multiple sections in the analysis file. To be compatible with the way we run the exomiser command, your +analysis file should contain only the `analysis` section. + +## Reference data parameters summary + +| Parameter name | Required? | Description | +| --- | --- | --- | +| `referenceGenome` | _Required_ | Path to the directory containing the reference genome data | +| `referenceGenomeFasta` | _Required_ | Filename of the reference genome .fasta file, within the specified `referenceGenome` directory | +| `broad` | _Required_ | Path to the directory containing Broad reference data | +| `intervalsFile` | _Required_ | Filename of the genome intervals list, within the specified `broad` directory | +| `vepCache` | _Optional_ | Path to the vep cache data directory | +| `exomiser_data_dir` | _Optional_ | Path to the exomiser reference data directory | +| `exomiser_genome` | _Optional_ | Genome assembly version to be used by exomiser(`hg19` or `hg38`) | +| `exomiser_data_version` | _Optional_ | Exomiser data version (e.g., `2402`)| +| `exomiser_cadd_version` | _Optional_ | Version of the CADD data to be used by exomiser (e.g., `1.7`)| +| `exomiser_remm_version` | _Optional_ | Version of the REMM data to be used by exomiser (e.g., `0.3.1`)| +| `exomiser_analysis_wes` | _Optional_ | Path to the exomiser analysis file for WES data, if different from the default | +| `exomiser_analysis_wgs` | _Optional_ | Path to the exomiser analysis file for WGS data, if different from the default | + diff --git a/docs/usage.md b/docs/usage.md index 7f650d5..6ca5c42 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -1,28 +1,66 @@ -# ferlab/postprocessing: Usage +# Ferlab-Ste-Justine/Post-processing-Pipeline: Usage -> _Documentation of pipeline parameters is generated automatically from the pipeline schema and can no longer be found in markdown files._ +> _Parameters documentation is available in the [pipeline schema](../nextflow_schema.json)._ +> _You can use the command `nf-core schema docs` to output parameters documentation._ +> _To avoid duplication of information, we minimize parameters details in markdown files._ +> _Currently, we only add context for the reference data parameters and provide parameter summaries for convenience._ ## Introduction - +The Ferlab-Ste-Justine/Post-processing-Pipeline is a bioinformatics pipeline designed for family-based analysis of GVCFs from multiple samples. It performs joint genotyping, tags low-quality variants, and optionally annotates the final VCF using VEP and/or Exomiser. This document provides instructions on how to prepare input files, run the pipeline, and understand the output. + ## Samplesheet input -You will need to create a samplesheet with information about the samples you would like to analyse before running the pipeline. Use the --input parameter to specify its location. The samplesheet has to be a tab-separated file (.tsv) with the first column being the family ID and the second being the sequencing type (either Whole Genome Sequencing (WGS) or Whole Exome Sequencing (WES)). Use the following columns to supply the paths to the gvcfs files for the same familyId. +You will need to create a samplesheet with information about the samples you would like to analyse before running the pipeline. Use the `--input` parameter to specify its location. The samplesheet has to be a comma separated file (.csv). -```bash ---input '[path to samplesheet file]' +The samplesheet must contains the following columns at the minimum: +- *familyId*: The identifier used for the sample family +- *sample*: The identifier used for the sample +- *sequencingType*: Must be either WES (Whole Exome Sequencing) or WGS (Whole Genome Sequencing) +- *gvcf*: Path to the sample .gvcf file + +Additionnally, there is an optional *phenoFamily* column that can contain a .yml/.json file providing phenotype +information on the family in phenopacket format. This column is only necessary if using the exomiser tool. + + +**sample.csv** +```csv +**familyId**,**sample**,**sequencingType**,**gvcf**,**phenoFamily** +CONGE-XXX,01,WES,CONGE-XXX-01.hard-filtered.gvcf.gz,CONGE-XXX.pheno.yml +CONGE-XXX,02,WES,CONGE-XXX-02.hard-filtered.gvcf.gz,CONGE-XXX.pheno.yml +CONGE-XXX,03,WES,CONGE-XXX-03.hard-filtered.gvcf.gz,CONGE-XXX.pheno.yml +CONGE-YYY,01,WGS,CONGE-YYY-01.hard-filtered.gvcf.gz,CONGE-YYY.pheno.yml +CONGE-YYY,02,WGS,CONGE-YYY-02.hard-filtered.gvcf.gz,CONGE-YYY.pheno.yml +CONGE-YYY,03,WGS,CONGE-YYY-03.hard-filtered.gvcf.gz,CONGE-YYY.pheno.yml ``` +> [!NOTE] +> The sequencing type (WES or WGS) will determine the variant filtering approach used by the pipeline. +> In the case of Whole Genome Sequencing, VQSR (Variant Quality Score Recalibration) is used. +> In the case of Whole Exome Sequencing, VQSR is replaced by a hard filtering approach as VQSR cannot be applied in this case. +> Additionally, a different analysis file will be used when running the exomiser tool based on the sequencing type. + +## Reference Data + +Reference files are essential at various stages of the workflow, including joint-genotyping, VQSR, the Variant Effect Predictor (VEP), and exomiser. + +These files must be correctly downloaded and specified through pipeline parameters. For more details about how to this, see +[reference_data.md](reference_data.md). + + ## Running the pipeline The typical command for running the pipeline is as follows: ```bash -nextflow run ferlab/postprocessing --input ./samplesheet.tsv --outdir ./results --genome GRCh37 -profile docker +nextflow run -c cluster.config Ferlab-Ste-Justine/Post-processing-Pipeline -r "v2.0.0" \ + -params-file params.json \ + --input samplesheet.csv \ + --outdir results/dir \ + --tools vep,exomiser ``` -This will launch the pipeline with the `docker` configuration profile. See below for more information about profiles. Note that the pipeline will create the following files in your working directory: @@ -33,30 +71,35 @@ work # Directory containing the nextflow working files # Other nextflow hidden files, eg. history of pipeline runs and old logs. ``` -If you wish to repeatedly use the same parameters for multiple runs, rather than specifying each flag in the command, you can specify these in a params file. - -Pipeline settings can be provided in a `yaml` or `json` file via `-params-file `. +If you wish to repeatedly use the same parameters for multiple runs, rather than specifying each flag in the command, you can specify these in a params file (json or yaml). :::warning Do not use `-c ` to specify parameters as this will result in errors. Custom config files specified with `-c` must only be used for [tuning process resource specifications](https://nf-co.re/docs/usage/configuration#tuning-workflow-resources), other infrastructural tweaks (such as output directories), or module arguments (args). ::: -The above pipeline run specified with a params file in yaml format: -```bash -nextflow run ferlab/postprocessing -profile docker -params-file params.yaml -``` +### Tools -with `params.yaml` containing: +You can include additional analysis in your pipeline via the `tools` parameter. Currently, the pipeline supports +two tools: `vep` (Variant Effect Predictor) and `exomizer`. -```yaml -input: './samplesheet.csv' -outdir: './results/' -genome: 'GRCh37' -<...> -``` +VEP is a widely used tool for annotating genetic variants with information such as gene names, +variant consequences, and population frequencies. It provides valuable insights into the functional impact +of genetic variants. + +Exomiser, on the other hand, is a tool specifically designed for the analysis of rare genetic diseases. It +integrates phenotype data with variant information to prioritize variants that are likely to be disease-causing. +This can greatly assist in the identification of potential disease-causing variants in exome sequencing data. + + +### Stub mode and quick tests + +The `-stub` (or `-stub-run`) option can be added to run the "stub" block of processes instead of the "script" block. This can be helpful for testing. + + +To test your setup in stub mode, simply run `nextflow run Ferlab-Ste-Justine/Post-processing-Pipeline -profile test,docker -stub`. -You can also generate such `YAML`/`JSON` files via [nf-core/launch](https://nf-co.re/launch). +For tests with real data, see documentation in the [test configuration profile](conf/test.config) ### Updating the pipeline @@ -80,96 +123,16 @@ To further assist in reproducbility, you can use share and re-use [parameter fil If you wish to share such profile (such as upload as supplementary material for academic publications), make sure to NOT include cluster specific paths to files, nor institutional specific profiles. ::: -## Core Nextflow arguments -:::note -These options are part of Nextflow and use a _single_ hyphen (pipeline parameters use a double-hyphen). -::: - -### `-profile` - -Use this parameter to choose a configuration profile. Profiles can give configuration presets for different compute environments. - -Several generic profiles are bundled with the pipeline which instruct the pipeline to use software packaged using different methods (Docker, Singularity, Podman, Shifter, Charliecloud, Apptainer, Conda) - see below. - -:::info -We highly recommend the use of Docker or Singularity containers for full pipeline reproducibility, however when this is not possible, Conda is also supported. -::: +### Core Nextflow arguments +- Use the `-profile` parameter to choose a configuration profile. Profiles can give configuration presets for different compute environments (e.g., docker, singularity, conda). Multiple profiles can be loaded in sequence, e.g., `-profile test,docker`. +- Use the `-resume` parameter to restart a pipeline from where it left off. This can save time by using cached results from previous runs. +- You can specify a custom configuration file using the `-c` parameter. This is useful to set configuration specific to your execution environment and change requested resources for a process. -The pipeline also dynamically loads configurations from [https://github.com/nf-core/configs](https://github.com/nf-core/configs) when it runs, making multiple config profiles for various institutional clusters available at run time. For more information and to see if your system is available in these configs please see the [nf-core/configs documentation](https://github.com/nf-core/configs#documentation). +For more detailed information, please refer to the [official Nextflow documentation](https://www.nextflow.io/docs/latest/index.html). -Note that multiple profiles can be loaded, for example: `-profile test,docker` - the order of arguments is important! -They are loaded in sequence, so later profiles can overwrite earlier profiles. -If `-profile` is not specified, the pipeline will run locally and expect all software to be installed and available on the `PATH`. This is _not_ recommended, since it can lead to different results on different machines dependent on the computer enviroment. - -- `test` - - A profile with a complete configuration for automated testing - - Includes links to test data so needs no other parameters -- `docker` - - A generic configuration profile to be used with [Docker](https://docker.com/) -- `singularity` - - A generic configuration profile to be used with [Singularity](https://sylabs.io/docs/) -- `podman` - - A generic configuration profile to be used with [Podman](https://podman.io/) -- `shifter` - - A generic configuration profile to be used with [Shifter](https://nersc.gitlab.io/development/shifter/how-to-use/) -- `charliecloud` - - A generic configuration profile to be used with [Charliecloud](https://hpc.github.io/charliecloud/) -- `apptainer` - - A generic configuration profile to be used with [Apptainer](https://apptainer.org/) -- `wave` - - A generic configuration profile to enable [Wave](https://seqera.io/wave/) containers. Use together with one of the above (requires Nextflow ` 24.03.0-edge` or later). -- `conda` - - A generic configuration profile to be used with [Conda](https://conda.io/docs/). Please only use Conda as a last resort i.e. when it's not possible to run the pipeline with Docker, Singularity, Podman, Shifter, Charliecloud, or Apptainer. - -### `-resume` - -Specify this when restarting a pipeline. Nextflow will use cached results from any pipeline steps where the inputs are the same, continuing from where it got to previously. For input to be considered the same, not only the names must be identical but the files' contents as well. For more info about this parameter, see [this blog post](https://www.nextflow.io/blog/2019/demystifying-nextflow-resume.html). - -You can also supply a run name to resume a specific run: `-resume [run-name]`. Use the `nextflow log` command to show previous run names. - -### `-c` - -Specify the path to a specific config file (this is a core Nextflow command). See the [nf-core website documentation](https://nf-co.re/usage/configuration) for more information. - -## Custom configuration - -### Resource requests - -Whilst the default requirements set within the pipeline will hopefully work for most people and with most input data, you may find that you want to customise the compute resources that the pipeline requests. Each step in the pipeline has a default set of requirements for number of CPUs, memory and time. For most of the steps in the pipeline, if the job exits with any of the error codes specified [here](https://github.com/nf-core/rnaseq/blob/4c27ef5610c87db00c3c5a3eed10b1d161abf575/conf/base.config#L18) it will automatically be resubmitted with higher requests (2 x original, then 3 x original). If it still fails after the third attempt then the pipeline execution is stopped. - -To change the resource requests, please see the [max resources](https://nf-co.re/docs/usage/configuration#max-resources) and [tuning workflow resources](https://nf-co.re/docs/usage/configuration#tuning-workflow-resources) section of the nf-core website. - -### Custom Containers - -In some cases you may wish to change which container or conda environment a step of the pipeline uses for a particular tool. By default nf-core pipelines use containers and software from the [biocontainers](https://biocontainers.pro/) or [bioconda](https://bioconda.github.io/) projects. However in some cases the pipeline specified version maybe out of date. - -To use a different container from the default container or conda environment specified in a pipeline, please see the [updating tool versions](https://nf-co.re/docs/usage/configuration#updating-tool-versions) section of the nf-core website. - -### Custom Tool Arguments - -A pipeline might not always support every possible argument or option of a particular tool used in pipeline. Fortunately, nf-core pipelines provide some freedom to users to insert additional parameters that the pipeline does not include by default. - -To learn how to provide additional arguments to a particular tool of the pipeline, please see the [customising tool arguments](https://nf-co.re/docs/usage/configuration#customising-tool-arguments) section of the nf-core website. - -### nf-core/configs - -In most cases, you will only need to create a custom config as a one-off but if you and others within your organisation are likely to be running nf-core pipelines regularly and need to use the same settings regularly it may be a good idea to request that your custom config file is uploaded to the `nf-core/configs` git repository. Before you do this please can you test that the config file works with your pipeline of choice using the `-c` parameter. You can then create a pull request to the `nf-core/configs` repository with the addition of your config file, associated documentation file (see examples in [`nf-core/configs/docs`](https://github.com/nf-core/configs/tree/master/docs)), and amending [`nfcore_custom.config`](https://github.com/nf-core/configs/blob/master/nfcore_custom.config) to include your custom profile. - -See the main [Nextflow documentation](https://www.nextflow.io/docs/latest/config.html) for more information about creating your own configuration files. - -If you have any questions or issues please send us a message on [Slack](https://nf-co.re/join/slack) on the [`#configs` channel](https://nfcore.slack.com/channels/configs). - -## Azure Resource Requests - -To be used with the `azurebatch` profile by specifying the `-profile azurebatch`. -We recommend providing a compute `params.vm_type` of `Standard_D16_v3` VMs by default but these options can be changed if required. - -Note that the choice of VM size depends on your quota and the overall workload during the analysis. -For a thorough list, please refer the [Azure Sizes for virtual machines in Azure](https://docs.microsoft.com/en-us/azure/virtual-machines/sizes). - -## Running in the background +### Running in the background Nextflow handles job submissions and supervises the running jobs. The Nextflow process must run until the pipeline is finished. @@ -178,11 +141,32 @@ The Nextflow `-bg` flag launches Nextflow in the background, detached from your Alternatively, you can use `screen` / `tmux` or similar tool to create a detached session which you can log back into at a later time. Some HPC setups also allow you to run nextflow within a cluster job submitted your job scheduler (from where it submits more jobs). -## Nextflow memory requirements +### Nextflow memory requirements In some cases, the Nextflow Java virtual machines can start to request a large amount of memory. -We recommend adding the following line to your environment to limit this (typically in `~/.bashrc` or `~./bash_profile`): +To limit this, you can use the `NXF_OPTS` environment variable: ```bash NXF_OPTS='-Xms1g -Xmx4g' ``` + +Parameters summary +----- + +| Parameter name | Required? | Description | +| --- | --- | --- | +| `input` | _Required_ | Path to the input file | +| `outdir` | _Required_ | Path to the output directoy | +| `referenceGenome` | _Required_ | Path to the directory containing the reference genome data | +| `referenceGenomeFasta` | _Required_ | Filename of the reference genome .fasta file, within the specified `referenceGenome` directory | +| `broad` | _Required_ | Path to the directory containing Broad reference data | +| `intervalsFile` | _Required_ | Filename of the genome intervals list, within the specified `broad` directory | +| `tools` | _Optional_ | Additional tools to run separated by commas. Supported tools are `vep` and `exomiser` | +| `vepCache` | _Optional_ | Path to the vep cache data directory | +| `exomiser_data_dir` | _Optional_ | Path to the exomiser reference data directory | +| `exomiser_genome` | _Optional_ | Genome assembly version to be used by exomiser(`hg19` or `hg38`) | +| `exomiser_data_version` | _Optional_ | Exomiser data version (e.g., `2402`)| +| `exomiser_cadd_version` | _Optional_ | Version of the CADD data to be used by exomiser (e.g., `1.7`)| +| `exomiser_remm_version` | _Optional_ | Version of the REMM data to be used by exomiser (e.g., `0.3.1`)| +| `exomiser_analysis_wes` | _Optional_ | Path to the exomiser analysis file for WES data, if different from the default | +| `exomiser_analysis_wgs` | _Optional_ | Path to the exomiser analysis file for WGS data, if different from the default | diff --git a/main.nf b/main.nf index 47fb186..5abad0c 100644 --- a/main.nf +++ b/main.nf @@ -47,8 +47,6 @@ workflow FERLAB_POSTPROCESSING { POSTPROCESSING ( samplesheet ) - - } /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ diff --git a/modules/local/exomiser/main.nf b/modules/local/exomiser/main.nf index 39bbbef..389d435 100644 --- a/modules/local/exomiser/main.nf +++ b/modules/local/exomiser/main.nf @@ -1,25 +1,29 @@ - +/* + Note: The current logic for creating the version file is fragile. It would be more robust to + include a version file directly in the exomiser docker image. +*/ process EXOMISER { label 'process_low' input: - tuple val(meta), path(vcfFile), path(phenofile) - path(analysis_file) - path(datadir) + tuple val(meta), path(vcfFile), path(phenoFile), path(analysisFile) + path datadir + val exomiserGenome + val exomiserDataVersion + val remmVersion + val caddVersion output: - val(meta) - path("results/*vcf.gz") , optional:true, emit: vcf - path("results/*vcf.gz.tbi") , optional:true, emit: tbi - path("results/*html") , optional:true, emit: html - path("results/*json") , optional:true, emit: json - path("results/*genes.tsv") , optional:true, emit: genetsv - path("results/*variants.tsv") , optional:true, emit: variantstsv - path "versions.yml" , emit: versions - // TODO nf-core: List additional required output channels/values here + tuple val(meta), path("results/*vcf.gz") , optional:true, emit: vcf + tuple val(meta), path("results/*vcf.gz.tbi") , optional:true, emit: tbi + tuple val(meta), path("results/*html") , optional:true, emit: html + tuple val(meta), path("results/*json") , optional:true, emit: json + tuple val(meta), path("results/*genes.tsv") , optional:true, emit: genetsv + tuple val(meta), path("results/*variants.tsv") , optional:true, emit: variantstsv + path("versions.yml") , emit: versions when: task.ext.when == null || task.ext.when @@ -27,29 +31,28 @@ process EXOMISER { script: def args = task.ext.args ?: '' def exactVcfFile = vcfFile.find { it.name.endsWith("vcf.gz") } - def remm_args = params.exomiser_remm_version ? "--exomiser.remm.version=\"${params.exomiser_remm_version}\"": "" - def cadd_args = params.exomiser_cadd_version ? "--cadd.version=\"${params.exomiser_cadd_version}\"": "" + def remmArgs = remmVersion ? "--exomiser.remm.version=\"${remmVersion}\"": "" + def caddArgs = caddVersion ? "--cadd.version=\"${caddVersion}\"": "" """ #!/bin/bash -eo pipefail java -cp \$( cat /app/jib-classpath-file ) \$( cat /app/jib-main-class-file ) \\ --vcf ${exactVcfFile} \\ - --assembly "${params.genome}" \\ - --analysis "${analysis_file}" \\ - --sample ${phenofile} \\ + --assembly "${params.exomiser_genome}" \\ + --analysis "${analysisFile}" \\ + --sample ${phenoFile} \\ --output-format=HTML,JSON,TSV_GENE,TSV_VARIANT,VCF \\ --exomiser.data-directory=/`pwd`/${datadir} \\ - ${remm_args} \\ - ${cadd_args} \\ - --exomiser.${params.genome}.data-version="${params.exomiser_data_version}" \\ - --exomiser.phenotype.data-version="${params.exomiser_data_version}" \\ + ${remmArgs} \\ + ${caddArgs} \\ + --exomiser.${exomiserGenome}.data-version="${exomiserDataVersion}" \\ + --exomiser.phenotype.data-version="${exomiserDataVersion}" \\ ${args} - cat <<-END_VERSIONS > versions.yml "${task.process}": - exomiser: "${params.exomiser_version}" + "exomiser": "\$(cat /app/resources/banner.txt | tail -n1 | cut -d'v' -f2)" END_VERSIONS """ @@ -58,17 +61,16 @@ process EXOMISER { """ #!/bin/bash -eo pipefail mkdir results - touch results/${familyId}-PASS_ONLY.genes.tsv - touch results/${familyId}-PASS_ONLY.html - touch results/${familyId}-PASS_ONLY.json - touch results/${familyId}-PASS_ONLY.variants.tsv - touch results/${familyId}-PASS_ONLY.vcf.gz - touch results/${familyId}-PASS_ONLY.vcf.gz.tbi + touch results/${familyId}.splitted-exomiser.genes.tsv + touch results/${familyId}.splitted-exomiser.html + touch results/${familyId}.splitted-exomiser.json + touch results/${familyId}.splitted-exomiser.variants.tsv + touch results/${familyId}.splitted-exomiser.vcf.gz + touch results/${familyId}.splitted-exomiser.vcf.gz.tbi cat <<-END_VERSIONS > versions.yml "${task.process}": - exomiser: "${params.exomiser_version}" + "exomiser": "\$(cat /app/resources/banner.txt | tail -n1 | cut -d'v' -f2)" END_VERSIONS """ - } diff --git a/modules/local/exomiser/meta.yml b/modules/local/exomiser/meta.yml index 61acdd3..9ab0e54 100644 --- a/modules/local/exomiser/meta.yml +++ b/modules/local/exomiser/meta.yml @@ -13,39 +13,92 @@ tools: doi: "https://doi.org/10.1038%2Fnprot.2015.124" licence: "['AGPL v3']" -## TODO nf-core: Add a description of all of the variables used as input input: - # Only when we have meta - meta: type: map description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] + Groovy Map containing family information + e.g. [ id:'family1', sequencing_type:'WES' ] - ## TODO nf-core: Delete / customise this example input - - vcf: + - vcfFile: type: file - description: Vcf file + description: combined vcf file containing all samples in the family pattern: "*.{vcf.gz,vcf}" -## TODO nf-core: Add a description of all of the variables used as output + - phenoFile: + type: file + description: Phenopacket file containing phenotypic information about the family + pattern: "*.{yaml,yml,json}" + + - analysisFile: + type: file + description: File containing the exomiser analysis configuration + pattern: "*.{yaml,yml}" + + - dataDir: + type: directory + description: Path to the directory containing the exomiser data + + - exomiserGenome: + type: string + description: The genome assembly to use with exomiser + + - exomiserDataVersion: + type: string + description: The version of the exomiser data to use + + - remmVersion: + type: string + description: The version of the REMM pathogenicity data source to use + + - caddVersion: + type: string + description: The version of the CADD pathongenicity data source to use + + output: #Only when we have meta - meta: type: map description: | - Groovy Map containing sample information - e.g. [sample:'test', familyID: 'testID' ] + Groovy Map containing family information + e.g. [ id:'family1', sequencing_type:'WES' ] + + - vcf: + type: file + description: exomiser output vcf file + pattern: "*.{vcf.gz}" + + - tbi: + type: file + description: index of exomiser output vcf file + pattern: "*.{vcf.gz.tbi}" + + - html: + type: file + description: exomiser output html file + pattern: "*.{html}" + + - json: + type: file + description: exomiser output json file + pattern: "*.{json}" + + - genetsv: + type: file + description: exomiser output genes.tsv file + pattern: "*.{genes.tsv}" + + - variantstsv: + type: file + description: exomiser output variants.tsv file + pattern: "*.{variants.tsv}" - versions: type: file description: File containing software versions pattern: "versions.yml" - ## TODO nf-core: Delete / customise this example output - - vcf: - type: file - description: A BAM/CRAM/SAM file - pattern: "*.{bam,cram,sam}" + authors: - "@ferlab" diff --git a/nextflow.config b/nextflow.config index 20ffd90..c09d231 100644 --- a/nextflow.config +++ b/nextflow.config @@ -19,18 +19,16 @@ params { referenceGenome = null referenceGenomeFasta = null broad = null - vepCache = null intervalsFile = null tools = "" - genome = null - exomiser_version = null + vepCache = null + exomiser_genome = null exomiser_data_dir = null exomiser_data_version = null exomiser_cadd_version = null exomiser_remm_version = null - exomiser_analysis = null - - + exomiser_analysis_wes = "assets/exomiser/default_exomiser_WES_analysis.yml" + exomiser_analysis_wgs = "assets/exomiser/default_exomiser_WGS_analysis.yml" //Process-specific parameters diff --git a/nextflow_schema.json b/nextflow_schema.json index a7d49c4..cf185db 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -57,6 +57,12 @@ "help_text": "Path to the directory containing 5 important files: \\n1. The intervalsFile whose name is defined in the intervalsFile parameter\\n2. The Hapmap file for vqsr training\\n3. The omni2.5 file for vqsr training\\n4. The 1000G SNP reference file for vqsr training\\n5. The dbsnp database for vqsr training", "format": "directory-path" }, + "intervalsFile": { + "type": "string", + "description": "Namefile of the genome interval we want to use", + "help_text": "Namefile of the genome interval. Used during the CombineGVCFs step to indicate the regions of interest", + "format": "file-path" + }, "referenceGenome": { "type": "string", "description": "Directory containing the referenceGenomeFasta", @@ -68,26 +74,13 @@ "description": "Name of the fasta file for the genome", "help_text": "Name of the fasta file for the genome we usually apply \"Homo_sapiens_assembly38.fasta\"", "format": "file-path" - }, - "vepCache": { - "type": "string", - "help_text": "Path to the vepCache directory, which is usually installed by vep by default. It should contain at least the homo_sapien/111_GRCh38/ directory. ", - "description": "Directory of the Vep Cache", - "format": "directory-path" - }, - "intervalsFile": { - "type": "string", - "description": "Namefile of the genome interval we want to use", - "help_text": "Namefile of the genome interval. Used during the CombineGVCFs step to indicate the regions of interest", - "format": "file-path" } }, "required": [ "broad", "intervalsFile", "referenceGenome", - "referenceGenomeFasta", - "vepCache" + "referenceGenomeFasta" ] }, "institutional_config_options": { @@ -159,10 +152,6 @@ "hidden": true, "help_text": "Use to set an upper-limit for the time requirement for each process. Should be a string in the format integer-unit e.g. `--max_time '2.h'`" }, - "vepCpu": { - "type": "integer", - "default": 4 - }, "max_disk": { "type": "string", "default": "80 GB" @@ -269,20 +258,52 @@ } } }, + "vep": { + "title": "vep", + "type": "object", + "description": "vep parameters", + "default": "", + "properties": { + "vepCpu": { + "type": "integer", + "default": 4 + }, + "vepCache": { + "type": "string", + "help_text": "Path to the vepCache directory, which is usually installed by vep by default. It should contain at least the homo_sapien/111_GRCh38/ directory. ", + "description": "Directory of the Vep Cache", + "format": "directory-path" + } + }, + "if": { + "properties": { + "tools": { + "pattern": "vep" + } + }, + "required": ["tools"] + }, + "then": { + "required": ["vepCache"] + } + }, "exomiser_option": { "title": "Exomiser option", "type": "object", "description": "", "default": "", "properties": { - "exomiser_version": { - "type": "string" + "exomiser_analysis_wes": { + "type": "string", + "format": "file-path", + "description": "Path to the exomiser analysis file (.yml) to use for whole exome sequencing input", + "default": "assets/exomiser/default_exomiser_WES_analysis.yml" }, - "exomiser_analysis": { + "exomiser_analysis_wgs": { "type": "string", - "description": "Path to an Exomiser analysis file in yml", "format": "file-path", - "mimetype": "yml" + "default": "assets/exomiser/default_exomiser_WGS_analysis.yml", + "description": "Path to the exomiser analysis file (.yml) to use for whole genome sequencing input" }, "exomiser_data_dir": { "type": "string", @@ -293,6 +314,11 @@ "type": "string", "description": "Exomiser data version" }, + "exomiser_genome": { + "type": "string", + "enum": ["hg38", "hg19"], + "description": "Genome assembly version to be used by exomiser (e.g., hg38 or hg19)" + }, "exomiser_cadd_version": { "type": "string", "description": "Version of cadd data" @@ -301,6 +327,17 @@ "type": "string", "description": "Version of remm data" } + }, + "if": { + "properties": { + "tools": { + "pattern": "exomiser" + } + }, + "required": ["tools"] + }, + "then": { + "required": ["exomiser_data_dir", "exomiser_data_version"] } } }, @@ -320,6 +357,9 @@ { "$ref": "#/definitions/generic_options" }, + { + "$ref": "#/definitions/vep" + }, { "$ref": "#/definitions/exomiser_option" } diff --git a/subworkflows/local/utils_nfcore_postprocessing_pipeline/main.nf b/subworkflows/local/utils_nfcore_postprocessing_pipeline/main.nf index 9744478..e82f56c 100644 --- a/subworkflows/local/utils_nfcore_postprocessing_pipeline/main.nf +++ b/subworkflows/local/utils_nfcore_postprocessing_pipeline/main.nf @@ -17,6 +17,7 @@ include { dashedLine } from '../../nf-core/utils_nfcore_pipeline' include { nfCoreLogo } from '../../nf-core/utils_nfcore_pipeline' include { UTILS_NFCORE_PIPELINE } from '../../nf-core/utils_nfcore_pipeline' include { workflowCitation } from '../../nf-core/utils_nfcore_pipeline' +include { isExomiserToolIncluded } from './utils' /* ======================================================================================== @@ -83,7 +84,7 @@ workflow PIPELINE_INITIALISATION { .fromSamplesheet("input") .map { meta, file -> - if (params.tools && params.tools.split(',').contains('exomiser')) { + if (isExomiserToolIncluded()) { if (!meta.familypheno) { error("Samplesheet must contains familyPheno file for each sample when using exomiser tool") } diff --git a/subworkflows/local/utils_nfcore_postprocessing_pipeline/utils.nf b/subworkflows/local/utils_nfcore_postprocessing_pipeline/utils.nf new file mode 100644 index 0000000..1bd667b --- /dev/null +++ b/subworkflows/local/utils_nfcore_postprocessing_pipeline/utils.nf @@ -0,0 +1,11 @@ +def isVepToolIncluded() { + return isToolIncluded("vep") +} + +def isExomiserToolIncluded() { + return isToolIncluded("exomiser") +} + +def isToolIncluded(tool) { + return params.tools && params.tools.split(",").contains(tool) +} \ No newline at end of file diff --git a/workflows/postprocessing.nf b/workflows/postprocessing.nf index 47bc084..dcabd98 100644 --- a/workflows/postprocessing.nf +++ b/workflows/postprocessing.nf @@ -9,6 +9,8 @@ include { paramsSummaryMultiqc } from '../subworkflows/nf-core/utils_nfcore_pi include { softwareVersionsToYAML } from '../subworkflows/nf-core/utils_nfcore_pipeline' include { EXCLUDE_MNPS } from "../subworkflows/local/exclude_mnps" include { VQSR } from "../subworkflows/local/vqsr" +include { isExomiserToolIncluded } from '../subworkflows/local/utils_nfcore_postprocessing_pipeline/utils' +include { isVepToolIncluded } from '../subworkflows/local/utils_nfcore_postprocessing_pipeline/utils' include { hardFiltering } from '../modules/local/hardFilter' include { splitMultiAllelics } from '../modules/local/vep' include { vep } from '../modules/local/vep' @@ -103,14 +105,12 @@ process writemeta{ } workflow POSTPROCESSING { - -//Local Temp Params + //Local Temp Params def referenceGenome = file(params.referenceGenome) def pathReferenceGenomeFasta = file(params.referenceGenome + "/" + params.referenceGenomeFasta) def pathReferenceGenomeFai = file(pathReferenceGenomeFasta + ".fai") def broad = file(params.broad) def pathIntervalFile = file(params.broad + "/" + params.intervalsFile) - def vepCache = file(params.vepCache) def pathReferenceDict = file(params.referenceGenome + "/" + params.referenceGenomeFasta.substring(0,params.referenceGenomeFasta.indexOf(".")) + ".dict") file(params.outdir).mkdirs() @@ -153,7 +153,9 @@ workflow POSTPROCESSING { s = splitMultiAllelics(vcfWithTags, referenceGenome) //Annotating mutations - if (params.tools && params.tools.split(',').contains('vep')) { + if (isVepToolIncluded()) { + def vepCache = file(params.vepCache) + vep(s, referenceGenome, vepCache) tabix(vep.out) @@ -161,13 +163,24 @@ workflow POSTPROCESSING { tabix.out } - if (params.tools && params.tools.split(',').contains('exomiser')) { - s = s.map{meta, files -> - [meta,files,meta.familypheno]} - - exomiser_analysis_file = file(params.exomiser_analysis) - exomiser_data_dir = file(params.exomiser_data_dir) - EXOMISER(s,exomiser_analysis_file,exomiser_data_dir) + if (isExomiserToolIncluded()) { + def exomiser_data_dir = file(params.exomiser_data_dir) + def analysis_wes_file = file(params.exomiser_analysis_wes) + def analysis_wgs_file = file(params.exomiser_analysis_wgs) + + s = s.map{meta, files -> [ + meta, + files, + meta.familypheno, + meta.sequencingType == "WES"? analysis_wes_file : analysis_wgs_file + ]} + EXOMISER(s, + exomiser_data_dir, + params.exomiser_genome, + params.exomiser_data_version, + params.exomiser_remm_version ?: "", + params.exomiser_cadd_version ?: "" + ) } }