-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathfastp_fastqc_bwa.nf
100 lines (79 loc) · 2.84 KB
/
fastp_fastqc_bwa.nf
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
#!/usr/bin/env nextflow
/*
* Defines pipeline parameters in order to specify the
* read pairs by using the command line options
*/
params.reads = "$baseDir/data/*{1,2}.fastq.gz"
params.genome = "$baseDir/data/hg38.fa"
/*
* Creates the `read_pairs` channel that emits for each read-pair a tuple containing
* three elements: the pair ID, the first read-pair file and the second read-pair file
*/
Channel
.fromFilePairs( params.reads, flat: true )
.ifEmpty { error "Cannot find any reads matching: ${params.reads}" }
.set { read_pairs }
Channel
.fromFilePairs( params.reads, flat: true )
.ifEmpty { error "Cannot find any reads matching: ${params.reads}" }
.set { fastqc_read_pairs }
genome_file = file(params.genome)
/*
* Step 1. Clean the reads
*/
process cleanReads {
publishDir "results"
input:
set dataset_id, file(forward), file(reverse) from read_pairs
output:
set dataset_id, file("${dataset_id}1.TRIMMED.PAIRED.fq.gz"), file("${dataset_id}2.TRIMMED.PAIRED.fq.gz") into fastqc_paired_fastq
set dataset_id, file("${dataset_id}1.TRIMMED.UNPAIRED.fq.gz"), file("${dataset_id}2.TRIMMED.UNPAIRED.fq.gz") into unpaired_fastq
set dataset_id, file("${dataset_id}1.TRIMMED.PAIRED.fq.gz"), file("${dataset_id}2.TRIMMED.PAIRED.fq.gz") into bwa_paired_fastq
clusterOptions = { "-l select=1:ncpus=1:mem=3gb,walltime=04:00:00" }
module 'singularity'
script:
"""
singularity run -B /scratch2 ~/singularity_containers/fastp.simg \
-i $forward -I $reverse \
-o ${dataset_id}1.TRIMMED.PAIRED.fq.gz -O ${dataset_id}2.TRIMMED.PAIRED.fq.gz \
--unpaired1 ${dataset_id}1.TRIMMED.UNPAIRED.fq.gz --unpaired2 ${dataset_id}2.TRIMMED.UNPAIRED.fq.gz \
--thread ${task.cpus} --length_required 40 --detect_adapter_for_pe \
-j ${dataset_id}.json -h ${dataset_id}.html
"""
}
/*
* Step 2. QC trimmed reads
*/
process postFastqc {
publishDir "results"
input:
set dataset_id, file(r1), file(r2) from fastqc_paired_fastq
output:
file "*_fastqc.{zip,html}" into trimmed_fastqc_results
clusterOptions = { "-l select=1:ncpus=1:mem=2gb,walltime=04:00:00" }
module 'singularity'
script:
"""
singularity run -B /scratch2 ~/singularity_containers/fastqc.simg -q $r1 $r2
"""
}
/*
* Step 3. BWA MEM alignment
*/
process bwa {
publishDir "results"
input:
set dataset_id, file(r1), file(r2) from bwa_paired_fastq
file genome from genome_file
output:
file "*.bam" into bam_files
clusterOptions = { "-l select=1:ncpus=2:mem=6gb,walltime=04:00:00" }
module 'singularity'
module 'anaconda'
script:
"""
# load samtools from aligners env
source activate aligners
singularity run -B /scratch2 ~/singularity_containers/bwa.simg mem -t ${task.cpus} ${params.genome} ${r1} ${r2} | samtools view -b - > ${dataset_id}.bam
"""
}