-
Notifications
You must be signed in to change notification settings - Fork 1
/
20130620_count_reads.sh
90 lines (73 loc) · 2.94 KB
/
20130620_count_reads.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
#!/bin/bash
#PBS -l nodes=1:ppn=10
#PBS -l vmem=90000mb
#PBS -l walltime=200:00:00
#PBS -q compute
#export BLASTDB=$HOME/data
nthreads=10
### Load module
module load gmap/2013-05-09
module load fastx-toolkit/0.0.13.2
module load samtools/0.1.19
module load bwa/0.6.2
WORKDIR="/lustre/home/vgupta2/01_blueberry/04_mapping"
SCRIPT_NAME="20130601_GeneMark_scaffolds.sh"
LOGFILE=`date "+20%y%m%d_%H%M"`."$SCRIPT_NAME"".logfile"
LOGFILE=$WORKDIR"/"$LOGFILE
# samtools documentation
# http://davetang.org/wiki/tiki-index.php?page=SAMTools
echo '----------------------------------------------' > $LOGFILE
echo "Job has been started: " `date "+20%y%m%d_%H%M"` >> $LOGFILE
GENOME="/lustre/groups/lorainelab/data/blueberry/01_genome/bberry/newbler/454Scaffolds.fna"
### count reads in illumina fastq files
cd /lustre/groups/lorainelab/data/blueberry/illumina_DHMR/se_trimmed/fastq
echo -e "file name"",""FastqReads"",""Mapped reads"
for file in *.fastq
do
lines=` wc -l $file | cut -d ' ' -f 1`
count=`expr $lines / 4`
bam_name=`echo $file | awk '{split ($0, arr, "."); print arr[1]}'`.bam
bam="/lustre/groups/lorainelab/data/blueberry/illumina_DHMR/se_trimmed/bb_se_TH2.0.6_6K_processed/""$bam_name"
mapped_reads=`samtools view -F 4 $bam | cut -f 1 | sort | uniq | wc -l`
echo -e "$file" "," "$count" "," "$mapped_reads"
done
cd /lustre/groups/lorainelab/data/blueberry/pe_blueberry/trimmed_pe/fastq
echo -e "file name"",""FastqReads"",""Mapped reads"
for file in *Read1.fastq
do
lines=` wc -l $file | cut -d ' ' -f 1`
count=`expr $lines / 4`
bam_name=`echo $file | awk '{split ($0, arr, "_"); print arr[1]"_"arr[2]}'`.bam
bam="/lustre/groups/lorainelab/data/blueberry/pe_blueberry/trimmed_pe/pe_TH2.0.6_processed/""$bam_name"
### read1
mapped_reads=`samtools view -f 0x0040 $bam | cut -f 1| sort | uniq | wc -l`
echo -e "$file" "," "$count" "," "$mapped_reads"
### read2
file=`echo $file | sed 's/Read1/Read2/g'`
mapped_reads=`samtools view -f 0x0080 $bam | cut -f 1| sort | uniq | wc -l`
echo -e "$file" "," "$count" "," "$mapped_reads"
done
cd /lustre/groups/lorainelab/data/blueberry/dhmri_ONeal_454_run_2009_06_18/fastq
echo -e "file name"",""FastqReads"",""Mapped reads"
for file in *fastq
do
lines=` wc -l $file | cut -d ' ' -f 1`
count=`expr $lines / 4`
bam_name="$file""_sorted.bam"
bam="$bam_name"
mapped_reads=`samtools view -F 4 $bam |cut -f 1 | sort | uniq | wc -l`
echo -e "$file" "," "$count" "," "$mapped_reads"
done
cd /lustre/groups/lorainelab/data/blueberry/ncsu_ONeal_454_Aug_2010/files_from_ncsu_gsl/fastq
echo -e "file name"",""FastqReads"",""Mapped reads"
for file in *fastq
do
lines=` wc -l $file | cut -d ' ' -f 1`
count=`expr $lines / 4`
bam_name="$file""_sorted.bam"
bam="$bam_name"
mapped_reads=`samtools view -F 4 $bam | cut -f 1| sort | uniq | wc -l`
echo -e "$file" "," "$count" "," "$mapped_reads"
done
echo "All Jobs has been finished: " `date "+20%y%m%d_%H%M"` >> $LOGFILE
echo '----------------------------------------------' >> $LOGFILE