-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathrun_htseq_icgc_gene.sh
executable file
·64 lines (52 loc) · 2.6 KB
/
run_htseq_icgc_gene.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
#!/bin/bash
set -e
anno=PATH_TO_ANNOTATION/gencode.v19.annotation.hs37d5_chr.gtf
outdir=OUTDIR
meta=METADATA_FILE
aligndir=ALIGNMENT_DIR
mkdir -p $outdir
mem=60gb
if [ ! -z "$1" ]
then
file=$1
filebase=$(basename $file)
aid=$(echo $filebase | cut -f 1 -d '.')
stranded=$(grep $aid $meta | cut -f 31)
donefile="${outdir}/${filebase%bam}count.done"
logfile="${outdir}/${filebase%bam}count.log"
outfile="${outdir}/${filebase%bam}count.txt"
if [ -f ${file%bam}done -a ! -f $donefile ]
then
if [ "$stranded" == "fr-firststrand" ]
then
cd $(pwd); umask 077; samtools view -F 4 $file | htseq-count -m intersection-nonempty --stranded=reverse --idattr gene_id -r pos - $anno > $outfile && touch $donefile
elif [ "$stranded" == "fr-secondstrand" ]
then
cd $(pwd); umask 077; samtools view -F 4 $file | htseq-count -m intersection-nonempty --stranded=yes --idattr gene_id -r pos - $anno > $outfile && touch $donefile
else
cd $(pwd); umask 077; samtools view -F 4 $file | htseq-count -m intersection-nonempty --stranded=no --idattr gene_id -r pos - $anno > $outfile && touch $donefile
fi
fi
else
for file in $(ls -1 ${aligndir}/*.bam)
do
filebase=$(basename $file)
aid=$(echo $filebase | cut -f 1 -d '.')
stranded=$(grep $aid $meta | cut -f 31)
donefile="${outdir}/${filebase%bam}count.done"
logfile="${outdir}/${filebase%bam}count.log"
outfile="${outdir}/${filebase%bam}count.txt"
if [ -f ${file%bam}done -a ! -f $donefile ]
then
if [ "$stranded" == "fr-firststrand" ]
then
echo "umask 077; samtools view -F 4 $file | htseq-count -m intersection-nonempty --stranded=reverse --idattr gene_id -r pos - $anno > $outfile && touch $donefile" | qsub -l nodes=1:ppn=1,mem=$mem,vmem=$mem,pmem=$mem,walltime=24:00:00 -N icgc_htseq -j oe -o $logfile
elif [ "$stranded" == "fr-secondstrand" ]
then
echo "umask 077; samtools view -F 4 $file | htseq-count -m intersection-nonempty --stranded=yes --idattr gene_id -r pos - $anno > $outfile && touch $donefile" | qsub -l nodes=1:ppn=1,mem=$mem,vmem=$mem,pmem=$mem,walltime=24:00:00 -N icgc_htseq -j oe -o $logfile
else
echo "umask 077; samtools view -F 4 $file | htseq-count -m intersection-nonempty --stranded=no --idattr gene_id -r pos - $anno > $outfile && touch $donefile" | qsub -l nodes=1:ppn=1,mem=$mem,vmem=$mem,pmem=$mem,walltime=24:00:00 -N icgc_htseq -j oe -o $logfile
fi
fi
done
fi