-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathassembly.sh
62 lines (43 loc) · 1.37 KB
/
assembly.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
#!/bin/bash
# scripts to reproduce the analysis and figures from Garrido-Oter et al., 2018
#
# originally by Ruben Garrido-Oter
# exits whenever a function returns 1
set -e
# exits if unset variables are used
set -o nounset
# parse arguments
config_file=$1
if [ ! -f $config_file ]
then
echo "invalid config file"
return 1
fi
# load config file
source $config_file
# load functions
source assembly.functions.sh
for genome_id in $(cut -f 2 $mapping_file | tail -n +2)
do
log "["$genome_id"] processing raw data..."
zcat $data_dir/"$genome_id"_1.fastq.gz >> $working_dir/"$genome_id"_1.fastq
zcat $data_dir/"$genome_id"_2.fastq.gz >> $working_dir/"$genome_id"_2.fastq
trim -genome_id=$genome_id -mode=$mode -lead=$lead \
-trail=$trail -sw_l=$sw_length -sw_q=$sw_min_q \
-min_l=$min_length -n_cores=$n_cores \
&>> $output
assemblySOAP -genome_id=$genome_id -mode=$mode \
-kmer=$kmer -avg_ins=$avg_ins \
-max_rd_len=$max_rd_len -n_cores=$n_cores \
&>> $output
assemblyA5 -genome_id=$genome_id -n_cores=$n_cores \
&>> $output
assemblyStats $genome_id
# cleanup
rm -f $working_dir/"$genome_id"_*fastq
rm -f -r $working_dir/SOAP/"$genome_id"
rm -f -r $working_dir/A5/"$genome_id"
log "["$genome_id"] done"
done
log "DONE!"