forked from yumeng5/Spherical-Text-Embedding
-
Notifications
You must be signed in to change notification settings - Fork 0
/
eval_cluster.sh
executable file
·44 lines (30 loc) · 1.1 KB
/
eval_cluster.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
# dataset directory
dataset=20news
# text file name; one document per line
text_file=text.txt
# document embedding output file name
doc_emb=jose_d.txt
# word embedding dimension
word_dim=100
# local context window size
window_size=10
# minimum word count in corpus; words that appear less than this threshold will be discarded
min_count=5
# number of iterations to run on the corpus
iter=20
# number of threads to be run in parallel
threads=10
cd ./src
make jose
cd ..
start=$SECONDS
./src/jose -train ./datasets/${dataset}/${text_file} -doc-output ./datasets/${dataset}/${doc_emb} \
-size ${word_dim} -alpha 0.04 -margin 0.15 -window ${window_size} -negative 2 -sample 1e-3 \
-min-count ${min_count} -iter ${iter} -threads ${threads}
duration=$(( SECONDS - start ))
printf '\nRunning time is %s seconds.\n' "$duration"
emb_file=${doc_emb}
# evaluate document clustering with K-Means
python cluster.py --dataset ${dataset} --k 20 --emb_file ${emb_file} --method kmeans
# evaluate document clustering with Spherical K-Means
python cluster.py --dataset ${dataset} --k 20 --emb_file ${emb_file} --method skmeans