-
Notifications
You must be signed in to change notification settings - Fork 1
/
main.nf
122 lines (102 loc) · 2.81 KB
/
main.nf
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
#!/usr/bin/env nextflow
/*
* Copyright (c) 2022, EANBIT Residential training.
* Copyright (c) 2022, International Centre of Insect Physiology
* and Ecology (icipe).
*/
/*
* 'ONTmetacriptom-NF' - A Nextflow pipeline for ONT long reads
* metatranscriptomic data analysis.
*
* This pipeline that reproduces steps from the GATK best practics of SNP
* calling with RNAseq data procedure:
* https://software.broadinstitute.org/gatk/guide/article?id=3891
*
* Fedrick Kebaso
* Samuel Oduor
* Stephen Kuria
* Manase Aloo
* Mark Tefero Kivumbi
*/
/*
* Enable DSL 2 syntax
*/
nextflow.enable.dsl = 2
/*
* Define the default parameters
*/
params.readsDir = "$projectDir/data"
params.outdir = "results"
log.info """\
O N T m e t a c r i p t o m e - N F v 0.1
===========================================
readsDir : $params.readsDir
results : $params.outdir
"""
.stripIndent()
/*
* Import modules
*/
include {
NANOPLOT_QC;
MULTIQC_REPORT;
PORECHOP_TRIM;
POST_TRIM_NANOPLOT_QC;
POST_TRIM_MULTIQC_REPORT;
DOWNLOAD_rRNADATABASE;
SORTMERNA;
ISONCLUST;
ISONCORRECT;
REFSEQ_GCF_016920715_DOWNLOAD;
MINIMAP2;
NANOCOUNT
} from './modules/metatranscriptome.nf'
/*
* main pipeline logic
*/
workflow {
channel
.fromPath("${params.readsDir}/**/*.gz")
.map { fastq -> tuple(fastq.parent.name, fastq)}
.groupTuple()
.set { raw_reads_ch }
// Section 1a: Quality Checking
NANOPLOT_QC(raw_reads_ch)
/* Section 1b: Generating final report using
* outputs from 1a
*/
MULTIQC_REPORT(NANOPLOT_QC.out.collect())
// Section 2: ONT adaptor removal
PORECHOP_TRIM(params.readsDir)
//Run qc for the trimmed reads
POST_TRIM_NANOPLOT_QC(PORECHOP_TRIM.out.flatten())
/* Generating final post adapter removal report
from POST_TRIM_NANOPLOT_QC process
*/
POST_TRIM_MULTIQC_REPORT(
POST_TRIM_NANOPLOT_QC.out.collect()
)
// Section 3a: Download reference rRNA databases
DOWNLOAD_rRNADATABASE()
// Section 3b: rRNA fragments filtering
SORTMERNA(DOWNLOAD_rRNADATABASE.out.collect(),
PORECHOP_TRIM.out.flatten()
)
/*
*Section 4: Clustering genes into falmilies using
*isONclust
*/
ISONCLUST(SORTMERNA.out)
//Seciton 5: ONT reads error correction
ISONCORRECT(ISONCLUST.out)
// Section 6a: Download a reference datatabase
REFSEQ_GCF_016920715_DOWNLOAD()
// Section 6: Alignment to a reference datatabase
MINIMAP2(ISONCORRECT.out,
REFSEQ_GCF_016920715_DOWNLOAD.out
)
// Section 7: Transcript abundance estimation
NANOCOUNT(MINIMAP2.out)
// section 8: Calculating the number of mapped reads to each gene
// Ongoing process
}