-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathpandelos.sh
96 lines (77 loc) · 3.08 KB
/
pandelos.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
#!/bin/bash
echo "################################################################################"
echo "# PanDelos : #"
echo "# a dictionary-based method for pan-genome content discovery. #"
echo "#------------------------------------------------------------------------------#"
echo "# Bonnici et. al #"
echo "#==============================================================================#"
echo "# This software is under MIT license! #"
echo "# Please visit https://github.com/InfOmics/PanDelos #"
echo "################################################################################"
sdir="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)/$(basename "${BASH_SOURCE[0]}")"
sdir=`dirname $sdir`
if [ -z "$PANDELOS_PATH"]; then
echo "environment variable PANDELOS_PATH not set!"
echo "using location: $sdir"
else
echo "using location: $PANDELOS_PATH"
sdir="$PANDELOS_PATH"
fi
kk="${sdir}/calculate_k.py"
ig="${sdir}/ig/ig.jar"
nc="${sdir}/netclu_ng.py"
if [ ! -f "$kk" ]; then
echo "ERROR: file calculate_k.py not found in $sdir !"
exit
fi
if [ ! -f "$ig" ]; then
echo "ERROR: file ig.jar not found in $sdir !"
exit
fi
if [ ! -f "$nc" ]; then
echo "ERROR: file netclu_ng.py not found in $sdir !"
exit
fi
idb="$1"
oprefix="$2"
if [ ! -f "$idb" ]; then
echo "ERROR: input dataset file not found: $idb !"
echo "usage is: pandelos.sh dataset.faa out_prefix"
exit
fi
if [ -z "$oprefix" ]; then
echo "ERROR: output prefix not given: $oprefix !"
echo "usage is: pandelos.sh dataset.faa out_prefix"
exit
fi
#tmp=`mktemp -p ./`
tmp=`mktemp | xargs basename`
echo "working on $tmp"
dnet="${tmp}.net"
clus="${oprefix}.clus"
echo "calculating k ..."
python3 $kk $idb >$tmp
k=`grep "k =" $tmp | sed s/k\ =\ //g`
echo "k = $k"
echo "clustering ..."
date
#java -server -d64 -Xmn2560M -Xms6144M -Xmx60144M
java -cp ${sdir}/ext/commons-io-2.6.jar -cp $ig infoasys.cli.pangenes.Pangenes $idb $k $dnet >$tmp
echo "de-clustering ..."
date
python3 $nc $idb $dnet >>$tmp
echo "writing gene gene families in $clus ..."
date
grep "F{ " $tmp | sed s/F{\ //g | sed s/}//g | sed s/\ \;//g | sort | uniq >$clus
date
rm $tmp
echo "################################################################################"
echo "# PanDelos : #"
echo "# a dictionary-based method for pan-genome content discovery. #"
echo "#------------------------------------------------------------------------------#"
echo "# Bonnici et. al #"
echo "#==============================================================================#"
echo "# This software is under MIT license! #"
echo "# Please visit https://github.com/InfOmics/PanDelos #"
echo "################################################################################"
echo "Finish!"