forked from nf-core/ampliseq
-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathnextflow_schema.json
518 lines (518 loc) · 39.7 KB
/
nextflow_schema.json
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
{
"$schema": "http://json-schema.org/draft-07/schema",
"$id": "https://raw.githubusercontent.com/nf-core/ampliseq/master/nextflow_schema.json",
"title": "nf-core/ampliseq pipeline parameters",
"description": "16S rRNA amplicon sequencing analysis workflow using QIIME2",
"type": "object",
"definitions": {
"main_arguments": {
"title": "Main arguments",
"type": "object",
"description": "",
"default": "",
"properties": {
"input": {
"type": "string",
"fa_icon": "fas fa-dna",
"description": "Folder containing paired-end demultiplexed FastQ files",
"help_text": "Use this to specify the location of your input paired-end FastQ files. \n\nFor example:\n\n```bash\n--input 'path/to/data'\n```\n\nExample for input data organization from one sequencing run with two samples:\n\n```bash\ndata\n |-sample1_1_L001_R1_001.fastq.gz\n |-sample1_1_L001_R2_001.fastq.gz\n |-sample2_1_L001_R1_001.fastq.gz\n |-sample2_1_L001_R2_001.fastq.gz\n```\n\nPlease note the following requirements:\n\n1. The path must be enclosed in quotes\n2. The folder must contain gzip compressed paired-end demultiplexed fastq files. If the file names do not follow the default (`\"/*_R{1,2}_001.fastq.gz\"`), please check `--extension`.\n3. If your data is scattered, a directory with symlinks to your actual data might be a solution.\n4. All sequencing data should originate from one sequencing run, because processing relies on run-specific error models that are unreliable when data from several sequencing runs are mixed. Sequencing data originating from multiple sequencing runs requires additionally the parameter `--multipleSequencingRuns` and a specific folder structure."
},
"FW_primer": {
"type": "string",
"description": "Forward primer sequence",
"help_text": "In amplicon sequencing methods, PCR with specific primers produces the amplicon of interest. These primer sequences need to be trimmed from the reads before further processing and are also required for producing an appropriate classifier. \n\nFor example:\n\n```bash\n--FW_primer GTGYCAGCMGCCGCGGTAA --RV_primer GGACTACNVGGGTWTCTAAT\n```",
"fa_icon": "fas fa-arrow-circle-right"
},
"RV_primer": {
"type": "string",
"description": "Reverse primer sequence",
"help_text": "In amplicon sequencing methods, PCR with specific primers produces the amplicon of interest. These primer sequences need to be trimmed from the reads before further processing and are also required for producing an appropriate classifier. \n\nFor example:\n\n```bash\n--FW_primer GTGYCAGCMGCCGCGGTAA --RV_primer GGACTACNVGGGTWTCTAAT\n```",
"fa_icon": "fas fa-arrow-alt-circle-left"
},
"metadata": {
"type": "string",
"description": "Path to metadata sheet, when missing most downstream analysis are skipped (barplots, PCoA plots, ...).",
"help_text": "This is optional, but for performing downstream analysis such as barplots, diversity indices or differential abundance testing, a metadata file is essential. \n\nFor example:\n\n```bash\n--metadata \"path/to/metadata.tsv\"\n```\n\nPlease note the following requirements:\n\n1. The path must be enclosed in quotes\n2. The metadata file has to follow the QIIME2 specifications (https://docs.qiime2.org/2019.10/tutorials/metadata/)\n3. In case of multiple sequencing runs, specific naming of samples are required, see `--multipleSequencingRuns`\n\nThe first column in the metadata file is the identifier (ID) column and defines the sample or feature IDs associated with your study. Metadata files are not required to have additional metadata columns, so a file containing only an ID column is a valid QIIME 2 metadata file. Additional columns defining metadata associated with each sample or feature ID are optional.\n**NB**: without additional columns there might be no groupings for the downstream analyses.\n\nIdentifiers should be 36 characters long or less, and also contain only ASCII alphanumeric characters (i.e. in the range of [a-z], [A-Z], or [0-9]), the period (.) character, or the dash (-) character. By default all numeric columns, blanks or NA are removed, and only columns with multiple different values but not all unique are selected.\n\nThe columns which are to be assessed can be specified by `--metadata_category`. If `--metadata_category` isn't specified than all columns that fit the specification are automatically chosen.",
"fa_icon": "fas fa-file-csv"
},
"multipleSequencingRuns": {
"type": "boolean",
"description": "If samples were sequenced in multiple sequencing runs",
"help_text": "Expects one subfolder per sequencing run in the folder specified by `--input` containing sequencing data of the specific run.\n\nTo prevent overlapping sample names from multiple sequencing runs, sample names obtained from the sequencing files will be renamed automatically by adding the folder name as prefix separated by a string specified by `--split`. Accordingly, the sample name column in the metadata file `--metadata` require values following `subfolder-samplename`.\n\nExample for input data organization:\n\n```bash\ndata\n |-run1\n | |-sample1_1_L001_R{1,2}_001.fastq.gz\n | |-sample2_1_L001_R{1,2}_001.fastq.gz\n |\n |-run2\n |-sample3_1_L001_R{1,2}_001.fastq.gz\n |-sample4_1_L001_R{1,2}_001.fastq.gz\n```\n\nIn this example the first column in the metadata file requires the values `run1-sample1` ... `run2-sample4` (instead of `sample1`, ..., `sample4`).\n\nExample command to analyze this data in one pipeline run:\n\n```bash\nnextflow run nf-core/ampliseq \\\n -profile singularity \\\n --input \"data\" \\\n --FW_primer GTGYCAGCMGCCGCGGTAA \\\n --RV_primer GGACTACNVGGGTWTCTAAT \\\n --metadata \"data/Metadata.tsv\" \\\n --multipleSequencingRuns\n```\n\n##### Visually choosing sequencing read truncation cutoffs\n\nWhile `--untilQ2import` with `--multipleSequencingRuns` is currently supported, `--Q2imported` is not. The pipeline can be first run with `--untilQ2import`, than `--trunclenf` and `--trunclenr` are visually chosen, and then the pipeline can be continued without `--untilQ2import` but with `--trunlenf`, `--trunlenr`, and `-resume`.\n\nFor example:\n\n(1) To produce quality plots and choose truncation values:\n\n```bash\nnextflow run nf-core/ampliseq \\\n -profile singularity \\\n --input \"data\" \\\n --FW_primer GTGYCAGCMGCCGCGGTAA \\\n --RV_primer GGACTACNVGGGTWTCTAAT \\\n --metadata \"data/Metadata.tsv\" \\\n --multipleSequencingRuns \\\n --untilQ2import\n```\n\n(2) To finish analysis:\n\n```bash\nnextflow run nf-core/ampliseq \\\n -profile singularity \\\n --input \"data\" \\\n --FW_primer GTGYCAGCMGCCGCGGTAA \\\n --RV_primer GGACTACNVGGGTWTCTAAT \\\n --metadata \"data/Metadata.tsv\" \\\n --multipleSequencingRuns \\\n --trunclenf 200 \\\n --trunclenr 180 \\\n -resume\n```",
"fa_icon": "fas fa-running"
},
"manifest": {
"type": "string",
"description": "Path to tab-separated table with sample IDs and paths to sequencing files",
"help_text": "You can submit a manifest file as an alternative way to provide input reads. No submission of read files with `--input` is required this way.\n\nA manifest must be a tab-separated file that must have the following labels in this exact order: `sampleID`, `forwardReads`, `reverseReads`. In case of single-end reads, such as PacBio data, the labels should be: `sampleID`, `Reads`. The sample identifiers must be listed under `sampleID`. Paths to forward and reverse reads must be reported under `forwardReads` and `reverseReads`, respectively. Path to single-end must be reported under `Reads`.\n\nMultiple sequencing runs not supported by `--manifest` at this stage.",
"fa_icon": "fas fa-file-csv"
}
},
"required": [
"FW_primer",
"RV_primer"
],
"fa_icon": "fas fa-terminal"
},
"cutoffs": {
"title": "Cutoffs",
"type": "object",
"description": "",
"default": "",
"properties": {
"maxEE": {
"type": "integer",
"default": 2,
"description": "DADA2 read filtering option",
"help_text": "After truncation, reads with higher than \u2018maxEE\u2019 \"expected errors\" will be discarded. In case of very long reads, you might want to increase this value. We recommend (to start with) a value corresponding to approximately 1 expected error per 100-200 bp (default: 2)",
"fa_icon": "fas fa-equals"
},
"maxLen": {
"type": "integer",
"default": 2999,
"description": "DADA2 read filtering option [PacBio only]",
"fa_icon": "fas fa-less-than-equal",
"help_text": "Remove reads with length greater than maxLen after trimming and truncation."
},
"minLen": {
"type": "integer",
"default": 50,
"description": "DADA2 read filtering option [PacBio only]",
"fa_icon": "fas fa-greater-than-equal",
"help_text": "Remove reads with length less than minLen after trimming and truncation."
},
"retain_untrimmed": {
"type": "boolean",
"description": "Cutadapt will retain untrimmed reads, choose only if input reads are not expected to contain primer sequences.",
"help_text": "When read sequences are trimmed, untrimmed read pairs are discarded routinely. Use this option to retain untrimmed read pairs. This is usually not recommended and is only of advantage for specific protocols that prevent sequencing PCR primers. \n\nFor example:\n\n```bash\n--retain_untrimmed\n```",
"fa_icon": "far fa-plus-square"
},
"double_primer": {
"type": "boolean",
"description": "Cutadapt will be run twice to ensure removal of potential double primers",
"help_text": "Cutdapt will be run twice, first to remove reads without primers (default), then a second time to remove reads that erroneously contain a second set of primers, not to be used with `--retain_untrimmed`",
"fa_icon": "fas fa-project-diagram"
},
"trunclenf": {
"type": "integer",
"description": "DADA2 read truncation value for forward strand, set this to 0 for no truncation",
"help_text": "Read denoising by DADA2 creates an error profile specific to a sequencing run and uses this to correct sequencing errors. This method requires all reads to have the same length and as high quality as possible while maintaining at least 20 bp overlap for merging. One cutoff for the forward read `--trunclenf` and one for the reverse read `--trunclenr` truncate all longer reads at that position and drop all shorter reads.\nThese cutoffs are usually chosen visually using `--untilQ2import`, inspecting the quality plots in \"results/demux\", and resuming analysis with `--Q2imported`. If not set, these cutoffs will be determined automatically for the position before the mean quality score drops below `--trunc_qmin`.\n\nFor example:\n\n```bash\n--trunclenf 180 --trunclenr 120\n```\n\nPlease note:\n\n1. Overly aggressive truncation might lead to insufficient overlap for read merging\n2. Too little truncation might reduce denoised reads\n3. The code choosing these values automatically cannot take the points above into account, therefore setting `--trunclenf` and `--trunclenr` is recommended",
"fa_icon": "fas fa-ban"
},
"trunclenr": {
"type": "integer",
"description": "DADA2 read truncation value for reverse strand, set this to 0 for no truncation",
"help_text": "Read denoising by DADA2 creates an error profile specific to a sequencing run and uses this to correct sequencing errors. This method requires all reads to have the same length and as high quality as possible while maintaining at least 20 bp overlap for merging. One cutoff for the forward read `--trunclenf` and one for the reverse read `--trunclenr` truncate all longer reads at that position and drop all shorter reads.\nThese cutoffs are usually chosen visually using `--untilQ2import`, inspecting the quality plots in \"results/demux\", and resuming analysis with `--Q2imported`. If not set, these cutoffs will be determined automatically for the position before the mean quality score drops below `--trunc_qmin`.\n\nFor example:\n\n```bash\n--trunclenf 180 --trunclenr 120\n```\n\nPlease note:\n\n1. Overly aggressive truncation might lead to insufficient overlap for read merging\n2. Too little truncation might reduce denoised reads\n3. The code choosing these values automatically cannot take the points above into account, therefore setting `--trunclenf` and `--trunclenr` is recommended",
"fa_icon": "fas fa-ban"
},
"trunc_qmin": {
"type": "integer",
"default": 25,
"description": "If --trunclenf and --trunclenr are not set, these values will be automatically determined using this median quality score",
"help_text": "Automatically determine `--trunclenf` and `--trunclenr` before the median quality score drops below `--trunc_qmin` (default: 25). The fraction of reads retained is defined by `--trunc_rmin`, which might override the quality cutoff.\n\nFor example:\n\n```bash\n--trunc_qmin 35\n```\n\nPlease note:\n\n1. The code choosing `--trunclenf` and `--trunclenr` using `--trunc_qmin` automatically cannot take amplicon length or overlap requirements for merging into account, therefore use with caution.\n2. The default value of 25 is recommended. However, high quality data with a large paired sequence overlap might justify a higher value (e.g. 35). Also, very low quality data might require a lower value.\n3. If the quality cutoff is too low to include a certain fraction of reads that is specified by `--trunc_rmin` (default: 0.75, meaning at least 75% percent of reads are retained), a lower cutoff according to `--trunc_rmin` superseeds the quality cutoff.",
"fa_icon": "fas fa-greater-than-equal"
},
"trunc_rmin": {
"type": "number",
"default": 0.75,
"description": "Assures that values chosen with --trunc_qmin will retain a fraction of reads.",
"help_text": "Value can range from 0 to 1. 0 means no reads need to be retained and 1 means all reads need to be retained. The minimum lengths of --trunc_qmin and --trunc_rmin are chosen as DADA2 cutoffs."
}
},
"fa_icon": "fas fa-filter"
},
"taxonomic_database": {
"title": "Taxonomic database",
"type": "object",
"description": "",
"default": "",
"properties": {
"reference_database": {
"type": "string",
"default": "https://www.arb-silva.de/fileadmin/silva_databases/qiime/Silva_132_release.zip",
"description": "Path to taxonomic reference database, currently accepts a qiime compatible file Silva_132_release.zip or a UNITE fasta file",
"help_text": "By default, the workflow downloads SILVA (https://www.arb-silva.de/) v132 (https://www.arb-silva.de/documentation/release-132/) and extracts reference sequences and taxonomy clustered at 99% similarity and trains a Naive Bayes classifier to assign taxonomy to features."
},
"taxon_reference": {
"type": "string",
"default": "silva",
"description": "Specify which database to use for taxonomic assignment. Either 'silva' or 'unite' (default: 'silva').",
"help_text": "By default, uses SILVA for taxonomic assignment, but can also use UNITE. If so, specify the UNITE fasta file with --reference_database."
},
"classifier": {
"type": "string",
"description": "Path to QIIME2 trained classifier file (typically *-classifier.qza)",
"help_text": "If you have trained a compatible classifier before, from sources such as SILVA (https://www.arb-silva.de/), Greengenes (http://greengenes.secondgenome.com/downloads) or RDP (https://rdp.cme.msu.edu/). \n\nFor example:\n\n```bash\n--classifier \"FW_primer-RV_primer-classifier.qza\"\n```\n\nPlease note the following requirements:\n\n1. The path must be enclosed in quotes\n2. The classifier is a Naive Bayes classifier produced by \"qiime feature-classifier fit-classifier-naive-bayes\" (e.g. by this pipeline or from (https://docs.qiime2.org/2019.10/data-resources/))\n3. The primer pair for the amplicon PCR and the computing of the classifier are exactly the same (or fulllength, potentially lower performance)\n4. The classifier has to be trained by the same version of scikit-learn as this version of the pipeline uses (0.21.2)"
},
"classifier_removeHash": {
"type": "boolean",
"description": "Remove all hash signs from taxonomy strings, resolves a rare ValueError during classification (process classifier)"
},
"dereplication": {
"type": "integer",
"default": 99,
"description": "Dereplication of the database. Must bematching SILVA v132 and its subfolders. Database size is descreasing, but taxonomical assignments as well.",
"hidden": true
}
},
"fa_icon": "fas fa-database"
},
"filtering": {
"title": "Filtering",
"type": "object",
"description": "",
"default": "",
"properties": {
"exclude_taxa": {
"type": "string",
"default": "mitochondria,chloroplast",
"description": "Comma separated list of unwanted taxa, to skip taxa filtering use \"none\"",
"help_text": "Depending on the primers used, PCR might amplify unwanted or off-target DNA. By default sequences originating from mitochondria or chloroplasts are removed. The taxa specified are excluded from further analysis.\nFor example to exclude any taxa that contain mitochondria, chloroplast, or archaea:\n\n```bash\n--exclude_taxa \"mitochondria,chloroplast,archaea\"\n```\n\nIf you prefer not filtering the data, specify:\n\n```bash\n--exclude_taxa \"none\"\n```\n\nPlease note the following requirements:\n\n1. Comma separated list enclosed in quotes\n2. May not contain whitespace characters\n3. Features that contain one or several of these terms in their taxonomical classification are excluded from further analysis\n4. The taxonomy level is not taken into consideration"
},
"min_frequency": {
"type": "integer",
"default": "1",
"description": "Abundance filtering",
"help_text": "Remove entries from the feature table below an absolute abundance threshold (default: 1, meaning filter is disabled). Singletons are often regarded as artifacts, choosing a value of 2 removes sequences with less than 2 total counts from the feature table.\n\nFor example to remove singletons choose:\n\n```bash\n--min_frequency 2\n```"
},
"min_samples": {
"type": "integer",
"default": "1",
"description": "Prevalence filtering",
"help_text": "Filtering low prevalent features from the feature table, e.g. keeping only features that are present in at least two samples can be achived by choosing a value of 2 (default: 1, meaning filter is disabled). Typically only used when having replicates for all samples.\n\nFor example to retain features that are present in at least two sample:\n\n```bash\n--min_samples 2\n```\n\nPlease note this is independent of abundance."
}
},
"fa_icon": "fas fa-filter"
},
"other_input_output_options": {
"title": "Other input/output options",
"type": "object",
"fa_icon": "fas fa-terminal",
"description": "Define where the pipeline should find input data and save output data.",
"properties": {
"readPaths": {
"type": "string",
"hidden": true,
"description": "Path to test sequencing read files"
},
"metadata_category": {
"type": "string",
"description": "Comma separated list of metadata column headers for statistics.",
"help_text": "Here columns in the metadata sheet can be chosen with groupings that are used for diversity indices and differential abundance analysis. By default, all suitable columns in the metadata sheet will be used if this option is not specified. Suitable are columns which are categorical (not numerical) and have multiple different values which are not all unique. For example:\n\n```bash\n--metadata_category \"treatment1,treatment2\"\n```\n\nPlease note the following requirements:\n\n1. Comma separated list enclosed in quotes\n2. May not contain whitespace characters\n3. Each comma separated term has to match exactly one column name in the metadata sheet"
},
"pacbio": {
"type": "boolean",
"description": "If PacBio data. Use this option together with --manifest"
},
"phred64": {
"type": "boolean",
"description": "If the sequencing data has PHRED 64 encoded quality scores, otherwise PHRED 33 is assumed"
},
"split": {
"type": "string",
"default": "-",
"description": "A string that will be used between the prepended run/folder name and the sample name. Only used with \"--multipleSequencingRuns\".",
"help_text": "A string that will be used between the prepended run/folder name and the sample name. Only used with `--multipleSequencingRuns` (default: `\"-\"`).\n\nFor example using the string `link`:\n\n```bash\n--split \"link\"\n```\n\nPlease note:\n\n1. Run/folder names may not contain the string specified by `--split`\n2. No underscore(s) allowed\n3. Must be enclosed in quotes\n4. The metadata sheet has to be adjusted, instead of using `run-sample` in the first column, in this example `runlinksample` is required"
},
"extension": {
"type": "string",
"default": "/*_R{1,2}_001.fastq.gz",
"description": "Naming of sequencing files",
"help_text": "Indicates the naming of sequencing files (default: `\"/*_R{1,2}_001.fastq.gz\"`).\n\nPlease note:\n\n1. The prepended slash (`/`) is required\n2. The star (`*`) is the required wildcard for sample names\n3. The curly brackets (`{}`) enclose the orientation for paired end reads, seperated by a comma (`,`).\n4. The pattern must be enclosed in quotes\n\nFor example for one sample (name: `1`) with forward (file: `1_a.fastq.gz`) and reverse (file: `1_b.fastq.gz`) reads in folder `data`:\n\n```bash\n--input \"data\" --extension \"/*_{a,b}.fastq.gz\"\n```"
},
"outdir": {
"type": "string",
"description": "The output directory where the results will be saved.",
"default": "./results",
"fa_icon": "fas fa-folder-open"
},
"email": {
"type": "string",
"description": "Email address for completion summary.",
"fa_icon": "fas fa-envelope",
"help_text": "Set this parameter to your e-mail address to get a summary e-mail with details of the run sent to you when the workflow exits. If set in your user config file (`~/.nextflow/config`) then you don't need to specify this on the command line for every run.",
"pattern": "^([a-zA-Z0-9_\\-\\.]+)@([a-zA-Z0-9_\\-\\.]+)\\.([a-zA-Z]{2,5})$"
},
"qiime_timezone": {
"type": "string",
"default": "Europe/Berlin",
"description": "Needs to be specified to resolve a timezone error",
"help_text": "If a timezone error occurs, this parameter needs to be specified (default: 'Europe/Berlin'). Find your appropriate timezone with e.g. tzselect.\nNote, this affects the timezone of the entire software environment."
},
"keepIntermediates": {
"type": "boolean",
"description": "Keep additional intermediate files, such as trimmed reads or various QIIME2 archives"
}
}
},
"run_only_partial_workflow": {
"title": "Run only partial workflow",
"type": "object",
"description": "",
"default": "",
"properties": {
"untilQ2import": {
"type": "boolean",
"description": "Skip all steps after importing into QIIME2, used for visually choosing DADA2 parameter `--trunclenf` and `--trunclenr`"
},
"Q2imported": {
"type": "string",
"description": "Path to imported reads (e.g. \"demux.qza\")",
"help_text": "Analysis starting with a QIIME2 artefact with trimmed reads, typically produced before with `--untilQ2import`. This is only supported for data from a single sequencing run.\n\nFor data from multiple sequencing runs with `--multipleSequencingRuns` the pipeline can be first run with `--untilQ2import` and next run without `--untilQ2import` but with `-resume`."
},
"onlyDenoising": {
"type": "boolean",
"description": "Skip all steps after denoising, produce only sequences and abundance tables on ASV level"
}
}
},
"skipping_specific_steps": {
"title": "Skipping specific steps",
"type": "object",
"description": "",
"default": "",
"properties": {
"skip_fastqc": {
"type": "boolean",
"description": "Skip FastQC"
},
"skip_alpha_rarefaction": {
"type": "boolean",
"description": "Skip alpha rarefaction"
},
"skip_barplot": {
"type": "boolean",
"description": "Skip producing barplot"
},
"skip_taxonomy": {
"type": "boolean",
"description": "Skip taxonomic classification"
},
"skip_abundance_tables": {
"type": "boolean",
"description": "Skip producing any relative abundance tables"
},
"skip_diversity_indices": {
"type": "boolean",
"description": "Skip alpha and beta diversity analysis"
},
"skip_ancom": {
"type": "boolean",
"description": "Skip differential abundance testing"
},
"skip_multiqc": {
"type": "boolean",
"description": "Skip MultiQC reporting"
}
}
},
"generic_options": {
"title": "Generic options",
"type": "object",
"fa_icon": "fas fa-file-import",
"description": "Less common options for the pipeline, typically set in a config file.",
"help_text": "These options are common to all nf-core pipelines and allow you to customise some of the core preferences for how the pipeline runs.\n\nTypically these options would be set in a Nextflow config file loaded for all pipeline runs, such as `~/.nextflow/config`.",
"properties": {
"help": {
"type": "boolean",
"description": "Display help text.",
"hidden": true,
"fa_icon": "fas fa-question-circle"
},
"publish_dir_mode": {
"type": "string",
"default": "copy",
"hidden": true,
"description": "Method used to save pipeline results to output directory.",
"help_text": "The Nextflow `publishDir` option specifies which intermediate files should be saved to the output directory. This option tells the pipeline what method should be used to move these files. See [Nextflow docs](https://www.nextflow.io/docs/latest/process.html#publishdir) for details.",
"fa_icon": "fas fa-copy",
"enum": [
"symlink",
"rellink",
"link",
"copy",
"copyNoFollow",
"move"
]
},
"name": {
"type": "string",
"description": "Workflow name.",
"fa_icon": "fas fa-fingerprint",
"hidden": true,
"help_text": "A custom name for the pipeline run. Unlike the core nextflow `-name` option with one hyphen this parameter can be reused multiple times, for example if using `-resume`. Passed through to steps such as MultiQC and used for things like report filenames and titles."
},
"email_on_fail": {
"type": "string",
"description": "Email address for completion summary, only when pipeline fails.",
"fa_icon": "fas fa-exclamation-triangle",
"pattern": "^([a-zA-Z0-9_\\-\\.]+)@([a-zA-Z0-9_\\-\\.]+)\\.([a-zA-Z]{2,5})$",
"hidden": true,
"help_text": "This works exactly as with `--email`, except emails are only sent if the workflow is not successful."
},
"plaintext_email": {
"type": "boolean",
"description": "Send plain-text email instead of HTML.",
"fa_icon": "fas fa-remove-format",
"hidden": true,
"help_text": "Set to receive plain-text e-mails instead of HTML formatted."
},
"max_multiqc_email_size": {
"type": "string",
"description": "File size limit when attaching MultiQC reports to summary emails.",
"default": "25.MB",
"fa_icon": "fas fa-file-upload",
"hidden": true,
"help_text": "If file generated by pipeline exceeds the threshold, it will not be attached."
},
"monochrome_logs": {
"type": "boolean",
"description": "Do not use coloured log outputs.",
"fa_icon": "fas fa-palette",
"hidden": true,
"help_text": "Set to disable colourful command line output and live life in monochrome."
},
"multiqc_config": {
"type": "string",
"description": "Custom config file to supply to MultiQC.",
"fa_icon": "fas fa-cog",
"hidden": true
},
"tracedir": {
"type": "string",
"description": "Directory to keep pipeline Nextflow logs and reports.",
"default": "${params.outdir}/pipeline_info",
"fa_icon": "fas fa-cogs",
"hidden": true
},
"clusterOptions": {
"type": "string",
"hidden": true
},
"awsqueue": {
"type": "string",
"hidden": true
},
"awsregion": {
"type": "string",
"default": "eu-west-1",
"hidden": true
}
}
},
"institutional_config_options": {
"title": "Institutional config options",
"type": "object",
"fa_icon": "fas fa-university",
"description": "Parameters used to describe centralised config profiles. These should not be edited.",
"help_text": "The centralised nf-core configuration profiles use a handful of pipeline parameters to describe themselves. This information is then printed to the Nextflow log when you run a pipeline. You should not need to change these values when you run a pipeline.",
"properties": {
"custom_config_version": {
"type": "string",
"description": "Git commit id for Institutional configs.",
"default": "master",
"hidden": true,
"fa_icon": "fas fa-users-cog",
"help_text": "Provide git commit id for custom Institutional configs hosted at `nf-core/configs`. This was implemented for reproducibility purposes. Default: `master`.\n\n```bash\n## Download and use config file with following git commit id\n--custom_config_version d52db660777c4bf36546ddb188ec530c3ada1b96\n```"
},
"custom_config_base": {
"type": "string",
"description": "Base directory for Institutional configs.",
"default": "https://raw.githubusercontent.com/nf-core/configs/master",
"hidden": true,
"help_text": "If you're running offline, nextflow will not be able to fetch the institutional config files from the internet. If you don't need them, then this is not a problem. If you do need them, you should download the files from the repo and tell nextflow where to find them with the `custom_config_base` option. For example:\n\n```bash\n## Download and unzip the config files\ncd /path/to/my/configs\nwget https://github.com/nf-core/configs/archive/master.zip\nunzip master.zip\n\n## Run the pipeline\ncd /path/to/my/data\nnextflow run /path/to/pipeline/ --custom_config_base /path/to/my/configs/configs-master/\n```\n\n> Note that the nf-core/tools helper package has a `download` command to download all required pipeline files + singularity containers + institutional configs in one go for you, to make this process easier.",
"fa_icon": "fas fa-users-cog"
},
"hostnames": {
"type": "string",
"description": "Institutional configs hostname.",
"hidden": true,
"fa_icon": "fas fa-users-cog"
},
"config_profile_description": {
"type": "string",
"description": "Institutional config description.",
"hidden": true,
"fa_icon": "fas fa-users-cog"
},
"config_profile_contact": {
"type": "string",
"description": "Institutional config contact information.",
"hidden": true,
"fa_icon": "fas fa-users-cog"
},
"config_profile_url": {
"type": "string",
"description": "Institutional config URL link.",
"hidden": true,
"fa_icon": "fas fa-users-cog"
}
}
},
"max_job_request_options": {
"title": "Max job request options",
"type": "object",
"fa_icon": "fab fa-acquisitions-incorporated",
"description": "Set the top limit for requested resources for any single job.",
"help_text": "If you are running on a smaller system, a pipeline step requesting more resources than are available may cause the Nextflow to stop the run with an error. These options allow you to cap the maximum resources requested by any single job so that the pipeline will run on your system.\n\nNote that you can not _increase_ the resources requested by any job using these options. For that you will need your own configuration file. See [the nf-core website](https://nf-co.re/usage/configuration) for details.",
"properties": {
"max_cpus": {
"type": "integer",
"description": "Maximum number of CPUs that can be requested for any single job.",
"default": 16,
"fa_icon": "fas fa-microchip",
"hidden": true,
"help_text": "Use to set an upper-limit for the CPU requirement for each process. Should be an integer e.g. `--max_cpus 1`"
},
"max_memory": {
"type": "string",
"description": "Maximum amount of memory that can be requested for any single job.",
"default": "128.GB",
"fa_icon": "fas fa-memory",
"hidden": true,
"help_text": "Use to set an upper-limit for the memory requirement for each process. Should be a string in the format integer-unit e.g. `--max_memory '8.GB'`"
},
"max_time": {
"type": "string",
"description": "Maximum amount of time that can be requested for any single job.",
"default": "240.h",
"fa_icon": "far fa-clock",
"hidden": true,
"help_text": "Use to set an upper-limit for the time requirement for each process. Should be a string in the format integer-unit e.g. `--max_time '2.h'`"
}
}
}
},
"allOf": [
{
"$ref": "#/definitions/main_arguments"
},
{
"$ref": "#/definitions/cutoffs"
},
{
"$ref": "#/definitions/taxonomic_database"
},
{
"$ref": "#/definitions/filtering"
},
{
"$ref": "#/definitions/other_input_output_options"
},
{
"$ref": "#/definitions/run_only_partial_workflow"
},
{
"$ref": "#/definitions/skipping_specific_steps"
},
{
"$ref": "#/definitions/generic_options"
},
{
"$ref": "#/definitions/institutional_config_options"
},
{
"$ref": "#/definitions/max_job_request_options"
}
]
}