-
Notifications
You must be signed in to change notification settings - Fork 11
/
simple-annotate.nf
executable file
·70 lines (52 loc) · 1.28 KB
/
simple-annotate.nf
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
#!/usr/bin/env nextflow
genome = file(params.genome)
cegmaFile = file(params.cegma)
strainName = genome.getParent().getBaseName()
outFilename = params.out
process cleanGenome {
input:
genome
output:
stdout into cleanGenomes
script:
'''
awk '/^>/ && !/[.*]/ {print(\$0, "[$strainName]")} /^>/ && /[.*]/ {print \$0} /^[^>]/ {print(toupper(\$0))}' '$genome' | sed "s/\015//"
'''
}
(fastaForGFF, fastaForAug) = cleanGenomes.separate(2){ [it, it] }
process cegmaGFFtoFullerGFF {
input:
file 'cegmaFile' from cegmaFile
output:
stdout fullGFF
'''
fullerCegmaGFF.rb $cegmaFile
'''
}
process cegmaGFFToGenbank {
container 'robsyme/augustus'
input:
file gff from fullGFF
file fasta from fastaForGFF
output:
file 'out.gb' into trainingGenbank
'''
gff2gbSmallDNA.pl $gff $fasta 5000 out.gb
'''
}
process trainAndCallGenes {
container 'robsyme/augustus'
input:
file trainingGenbank
file genome from fastaForAug
output:
file 'out.txt' into trainedFile
'''
optimize_augustus.pl --species=fusarium_graminearum $trainingGenbank
etraining --species=fusarium_graminearum $trainingGenbank
augustus --species=fusarium_graminearum --gff3=on $genome > out.txt
'''
}
trainedFile.subscribe { trained ->
trained.copyTo(outFilename)
}