-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathsetup.sh
executable file
·132 lines (114 loc) · 4.71 KB
/
setup.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
#!/usr/bin/env bash
#### delete this later
#export CLASSPATH=$CLASSPATH:/Users/natalie/Documents/projects/greenelab/stanford-corenlp-4.2.0/*:
DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"
CORENLP_VERSION='4.2.0'
# if --non-interactive or -y is specified, answers 'yes' to every prompt
# this is for running setup.sh in docker
[[ ! "$@" =~ .*(-y|--non-interactive).* ]]
NONINTERACTIVE=$?
if $NONINTERACTIVE; then
echo "Running in non-interactive mode; all prompts are answered by 'yes'"
fi
function ne_read() {
if [ $NONINTERACTIVE -eq 0 ]; then
read -r -p "$1 [y/N]" response
else
response='y'
fi
}
# check for coreNLP
echo "Checking for stanford-corenlp-${CORENLP_VERSION}..."
if [[ ! -d ./stanford-corenlp-${CORENLP_VERSION} && "$CLASSPATH" != *"stanford-corenlp-${CORENLP_VERSION}"* ]]; then
ne_read "CoreNLP doesn't exist, download?"
case "$response" in
[yY][eE][sS]|[yY])
curl --create-dirs -L -O "http://nlp.stanford.edu/software/stanford-corenlp-${CORENLP_VERSION}.zip" \
&& unzip stanford-corenlp-${CORENLP_VERSION}.zip \
&& rm stanford-corenlp-${CORENLP_VERSION}.zip
;;
*)
# just abort for now
;;
esac
else
echo "stanford coreNLP found"
fi
if [[ "$CLASSPATH" != *"stanford-corenlp-${CORENLP_VERSION}"* ]]; then
export CLASSPATH="$CLASSPATH:${DIR}/stanford-corenlp-${CORENLP_VERSION}/*"
echo "New classpath: $CLASSPATH"
fi
# get reference data from nature_index
# check for it first
echo "Checking for nature index organization reference file..."
if [[ ! -f ./data/reference_data/nature_index_export.csv ]]; then
ne_read "Nature Index reference data doesn't exist, download?"
case "$response" in
[yY][eE][sS]|[yY])
curl --create-dirs -L "https://www.natureindex.com/institution-outputs-export/All/global/All/score/1" \
-o ${DIR}/data/reference_data/nature_index_export.csv
;;
*)
# just abort for now
;;
esac
else
echo "Nature Index reference data found"
fi
# get reference data of state and country codes
# check for it first
echo "Checking for country and state info reference file..."
if [[ ! -f ./data/reference_data/cdh_country_codes.txt || ! -f ./data/reference_data/cdh_state_codes.txt ]]; then
ne_read "Nature Index reference data doesn't exist, download?"
case "$response" in
[yY][eE][sS]|[yY])
curl --create-dirs -L "https://gist.github.com/nrosed/af41858718a1bc30f0323d95916b5c4e/raw/2930f0c786a32c873ddcd7d51defbf6ca0846600/cdh_country_codes.txt" \
-o ${DIR}/data/reference_data/cdh_country_codes.txt
curl --create-dirs -L "https://gist.github.com/nrosed/af41858718a1bc30f0323d95916b5c4e/raw/2930f0c786a32c873ddcd7d51defbf6ca0846600/cdh_state_codes.txt" \
-o ${DIR}/data/reference_data/cdh_state_codes.txt
;;
*)
# just abort for now
;;
esac
else
echo "Country and State reference data found"
fi
# get reference data of gender names
# check for it first
echo "Checking for genderize.io reference file..."
if [[ ! -f ./data/reference_data/genderize.tsv ]]; then
ne_read "genderize.io reference data doesn't exist, download?"
case "$response" in
[yY][eE][sS]|[yY])
curl --create-dirs -L "https://github.com/greenelab/iscb-diversity/raw/2beece62588d52dc30229fd65f25ddd523fa955e/data/gender/genderize.tsv" \
-o ${DIR}/data/reference_data/genderize.tsv
;;
*)
# just abort for now
;;
esac
else
echo "genderize.io reference data found"
fi
# get LSTM model for name origin prediction
# check for it first
echo "Checking for LSTM name model..."
if [[ ! -f ./name_lstm_models/LSTM.h5 ]]; then
ne_read "LSTM.h5 model was not found, download?"
case "$response" in
[yY][eE][sS]|[yY])
curl --create-dirs -L "https://github.com/greenelab/wiki-nationality-estimate/raw/7425af1021f8a5c00aad789ebcaef67c5fe427bb/models/NamePrism.h5" \
-o ${DIR}/name_lstm_models/NamePrism.h5
curl --create-dirs -L "https://github.com/greenelab/wiki-nationality-estimate/raw/7425af1021f8a5c00aad789ebcaef67c5fe427bb/models/NamePrism_idx_dic.pkl" \
-o ${DIR}/name_lstm_models/NamePrism_idx_dic.pkl
curl --create-dirs -L "https://github.com/greenelab/wiki-nationality-estimate/raw/7425af1021f8a5c00aad789ebcaef67c5fe427bb/models/NamePrism_categories.txt" \
-o ${DIR}/name_lstm_models/NamePrism_categories.txt
;;
*)
# just abort for now
;;
esac
else
echo "LSTM.h5 found"
fi