forked from aseetharam/common_scripts
-
Notifications
You must be signed in to change notification settings - Fork 41
/
Copy pathCopyNumberGen.sh
executable file
·74 lines (55 loc) · 1.71 KB
/
CopyNumberGen.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
#!/bin/bash
# This is a bash script that generates the table for number of orthologs present in each species.
# It takes the output generated by "orthomclMclToGroups" and converts ids to numbers.
# by default it prints to stdout
# Arun Seetharam <[email protected]>
scriptName="${0##*/}"
function printUsage() {
cat <<EOF
Synopsis
$scriptName [-h | --help] input_file
Description
Generates the count table from the orthologous group ids file generated by "orthomclMclToGroups"
The count table gives gene copy number in all species for each othologous group.
input_file
Input file should contain orthologous group and IDs
This file has to be generated by "orthomclMclToGroups" command
-h, --help
Brings up this help page
Author
Arun Seetharam, Bioinformatics Core, Purdue University.
EOF
}
if [ $# -lt 1 ] ; then
printUsage
exit 1
fi
while getopts ':h:' option; do
case "$option" in
h) printUsage
exit
;;
help) printUsage
exit
;;
esac
done
file=${1};
sed -e 's/ /" /g' -e 's/|/ "/g' -e 's/$/"/g' -e 's/:"/ /g' ${file} > ${file}.temp # separate gene ids from the species identifier
names=`head -n 500 ${file}.temp | tr -s " " "\n" |sed '/^".*/d'|sed '/^OG.*/d'| sort |uniq |tr -s "\n" " "; echo ""` #array of all species names
echo -en "OG_name\t" #print the header line
for name in ${names[@]}; do
echo -en "$name\t";
done
echo "";
while read line; do #count frequency
ogroup=$(echo $line|cut -d " " -f 1 );
echo -en "$ogroup\t";
for name2 in ${names[@]}; do
freq=`echo $line | awk -F "$name2" '{print NF-1}'`;
echo -ne "$freq\t";
done
echo "";
done<${file}.temp;
rm ${file}.temp; #delete temp file