-
Notifications
You must be signed in to change notification settings - Fork 7
/
checkMinMaxIntronSize.sh
executable file
·101 lines (88 loc) · 3.11 KB
/
checkMinMaxIntronSize.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
#!/bin/bash
#
# INGLÊS/ENGLISH
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
# http://www.gnu.org/copyleft/gpl.html
#
#
# PORTUGUÊS/PORTUGUESE
# Este programa é distribuído na expectativa de ser útil aos seus
# usuários, porém NÃO TEM NENHUMA GARANTIA, EXPLÍCITAS OU IMPLÍCITAS,
# COMERCIAIS OU DE ATENDIMENTO A UMA DETERMINADA FINALIDADE. Consulte
# a Licença Pública Geral GNU para maiores detalhes.
# http://www.gnu.org/copyleft/gpl.html
#
# Copyright (C) 2012 Universidade de São Paulo
#
# Universidade de São Paulo
# Laboratório de Biologia do Desenvolvimento de Abelhas
# Núcleo de Bioinformática (LBDA-BioInfo)
#
# Daniel Guariz Pinheiro
# http://zulu.fmrp.usp.br/bioinfo
#
refgff=$1
if [ ! ${refgff} ]; then
echo "Missing reference GFF file"
exit
else
if [ ! -e ${refgff} ]; then
echo "Wrong reference GFF file (${refgff})"
exit
else
if [[ ! ${refgff} =~ .g[tf]f*$ ]]; then
echo "Wrong name of reference GFF or GTF file"
exit
fi
fi
fi
basedir_out=$2
if [ ! ${basedir_out} ]; then
echo "Missing output directory"
exit
else
if [ ! -d ${basedir_out} ]; then
echo "Wrong output directory"
exit
fi
fi
min_perc=$3
max_perc=$4
if [ ! ${min_perc} ]; then
min_perc=0
else
if [[ ${min_perc} < 0 ]] || [[ ${min_perc} > 1 ]]; then
echo "Minimum percentile (${min_perc}) must be between 0 and 1"
exit
fi
fi
if [ ! ${max_perc} ]; then
max_perc=1
else
if [[ ${max_perc} < 0 ]] || [[ ${max_perc} > 1 ]]; then
echo "Maximum percentile (${max_perc}) must be between 0 and 1"
exit
fi
fi
refgff_format=`echo ${refgff} | perl -ne 'chomp; ~/\.([^\.]+)$/; print $1;'`
echo "* Gene structure statistics from current genome reference annotation (introntab.pl)"
if [ ! -e "${basedir_out}/genome_annotation_stats.txt" ]; then
echo " Running introntab.pl based on ${refgff_format}"
if [ ${refgff_format} == "gtf" ]; then
gtfbn=`basename ${refgff} .gtf`
if [ ! -e "${basedir_out}/introntab/${gtfbn}.gff" ]; then
echo " Converting gtf to gff for introntab.pl"
gtf2gff3 ${refgff} 2> /dev/null > ${basedir_out}/${gtfbn}.gff
fi
grep -v '^#' ${basedir_out}/${gtfbn}.gff | introntab.pl --format gff > ${basedir_out}/genome_annotation_stats.txt
else
grep -v '^#' ${refgff} | introntab.pl --format ${refgff_format} > ${basedir_out}/genome_annotation_stats.txt
fi
fi
max_intron_size=`cut -f 10 ${basedir_out}/genome_annotation_stats.txt | perl -lane 'next if (($.<=3)||($_=~/^#/)); my @isize=split(/,/, $_); foreach my $s ( @isize ) { print $s; } ' | nsort -n | awk -v perc="${max_perc}" '{all[NR] = $0} END{print all[int(NR*perc-1)]}' `
min_intron_size=`cut -f 10 ${basedir_out}/genome_annotation_stats.txt | perl -lane 'next if (($.<=3)||($_=~/^#/)); my @isize=split(/,/, $_); foreach my $s ( @isize ) { print $s; } ' | nsort -n | awk -v perc="${min_perc}" '{all[NR] = $0} END{print all[int(NR*perc+1)]}' `
echo "${min_intron_size},${max_intron_size}"