-
Notifications
You must be signed in to change notification settings - Fork 0
/
medaka_samtoolsSeparate.nf
220 lines (159 loc) · 6.53 KB
/
medaka_samtoolsSeparate.nf
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
#! /usr/bin/env nextflow
/*************************************
nextflow nanoQCtrim
*************************************/
racon_container = 'quay.io/biocontainers/racon:1.4.20--h9a82719_1'
medaka_container = 'quay.io/biocontainers/medaka:1.2.5--py38h64b100c_0'
samtools19_container = 'quay.io/biocontainers/samtools:1.9--h10a08f8_12'
//nextflow run isugifNF/nanoPolish --genomes tail.fasta --reads test.fastq -profile singularity,condo -resume
def helpMessage() {
log.info isuGIFHeader()
log.info """
Usage:
The typical command for running the pipeline are as follows:
nextflow run isugifNF/nanoPolish --genome tail.fasta --reads test.fastq --chunkSize 25000 --model "medaka/medaka/data/r941_min_high_g303_model.hdf5" -profile singularity,condo
Mandatory arguments:
--genome genome assembly fasta file to run stats on. (./data/*.fasta)
-profile singularity as of now, this workflow only works using singularity and requires this profile [be sure singularity is in your path]
--reads Raw nanopore reads to use in polish
Optional arguments:
--outdir Output directory to place final output
--threads Number of CPUs to use during the NanoPlot job [16]
--queueSize Maximum number of jobs to be queued [18]
--model Medaka hdf5 model used during polishing.
--chunkSize Number of fasta records to use when splitting the input fastq raw reads
--help This usage statement.
"""
}
// Add a --modelList param to list all the models like I did in assemblyStats for BUSCO
// Show help message
if (params.help) {
helpMessage()
exit 0
}
// Channels for the genome and its label
Channel
.fromPath(params.genome)
.map { file -> file.simpleName}
.into { genomeLabel_runMinimap2; genomeLabel_runRacon; genomeLabel_BUSCO }
Channel
.fromPath(params.genome)
.into { genome_runMinimap2; genome_runRacon; genome_getScaffolds }
//Channels for reads and chunks of reads
Channel
.fromPath(params.reads)
.into { read_file; read_file2 }
Channel
.fromPath(params.reads)
.splitFastq(by: params.chunkSize, file:true)
.set { read_chunks }
//Channel for medaka consensus model
Channel
.fromPath(params.model)
.set { model_medaka }
//Run Medaka in 3 steps
// Step 1 Align reads to assembly
process medakaAlign {
container = "$medaka_container"
input:
path raconGenome from raconGenome_ch.val
path reads from read_file2.val
output:
path("calls_to_draft.sam") into medakaAlignSam_ch
script:
"""
mini_align -i ${reads} -r ${raconGenome} -m \
-p calls_to_draft \
-t ${params.threads}
"""
}
process samSortIndex {
container = "$samtools19_container"
input:
path samFile from medakaAlignSam_ch
path raconGenome2 from raconGenome2_ch2
output:
path("calls_to_draft.bam") into medakaAlign_ch
path("calls_to_draft.bam.bai") into medakaAlignBai_ch
script:
"""
export PREFIX="calls_to_draft"
export FILTER="-F 2308"
export SORT=''
export THREADS=${params.threads}
export REFERENCE=${raconGenome2}
samtools view -@ \${THREADS} -T \${REFERENCE} \${FILTER} -bS ${samFile} |
samtools sort -@ \${THREADS} \${SORT} -m 50G -l 9 -o \${PREFIX}.bam - \
|| (echo "Alignment pipeline failed." && exit 1)
samtools index -@ \${THREADS} \${PREFIX}.bam \${PREFIX}.bam.bai \
|| (echo "Failed to index alignment file." && exit 1)
"""
}
// Medaka Step 2 run consensus on each scaffold
////Get scaffold list
process getScaffolds {
input:
path genomeFile from genome_getScaffolds.val
output:
//publishDir "${params.outdir}"
//file("nfhead.txt")
stdout regions_ch
script:
"""
grep ">" ${genomeFile} | perl -pe 's/>//g'
"""
}
process medakaConsensus {
container = "$medaka_container"
input:
path inputAlign from medakaAlign_ch.val
path inputBai from medakaAlignBai_ch.val
val region from regions_ch.splitText()
path modelIn from model_medaka.val
output:
path("out*.hdf") into medakaConsensus_ch
script:
"""
medaka consensus ${inputAlign} out_${region.trim()}.hdf \
--model ${modelIn} --batch 200 --threads 8 \
--region ${region.trim()}
"""
}
//--region contig1 contig2 contig3 contig4
//r941_min_high_g303
// Medaka Step 3 collate results
process medakaStich {
container = "$medaka_container"
input:
path hdf from medakaConsensus_ch.collect()
output:
path("medaka_polished.assembly.fasta")
publishDir "${params.outdir}", mode: 'copy', pattern: "medaka_polished.assembly.fasta"
script:
"""
medaka stitch ${hdf} medaka_polished.assembly.fasta
"""
}
def isuGIFHeader() {
// Log colors ANSI codes
c_reset = params.monochrome_logs ? '' : "\033[0m";
c_dim = params.monochrome_logs ? '' : "\033[2m";
c_black = params.monochrome_logs ? '' : "\033[1;90m";
c_green = params.monochrome_logs ? '' : "\033[1;92m";
c_yellow = params.monochrome_logs ? '' : "\033[1;93m";
c_blue = params.monochrome_logs ? '' : "\033[1;94m";
c_purple = params.monochrome_logs ? '' : "\033[1;95m";
c_cyan = params.monochrome_logs ? '' : "\033[1;96m";
c_white = params.monochrome_logs ? '' : "\033[1;97m";
c_red = params.monochrome_logs ? '' : "\033[1;91m";
return """ -${c_dim}--------------------------------------------------${c_reset}-
${c_white} ${c_red }\\\\------${c_yellow}---// ${c_reset}
${c_white} ___ ___ _ ___ ___ ${c_red } \\\\---${c_yellow}--// ${c_reset}
${c_white} | (___ | | / _ | |_ ${c_red } \\-${c_yellow}// ${c_reset}
${c_white} _|_ ___) |__| \\_/ _|_ | ${c_red } ${c_yellow}//${c_red } \\ ${c_reset}
${c_white} ${c_red } ${c_yellow}//---${c_red }--\\\\ ${c_reset}
${c_white} ${c_red }${c_yellow}//------${c_red }---\\\\ ${c_reset}
${c_cyan} isugifNF/nanoQCtrim v${workflow.manifest.version} ${c_reset}
-${c_dim}--------------------------------------------------${c_reset}-
""".stripIndent()
}