forked from so1ar/rime-cloverpinyin
-
Notifications
You must be signed in to change notification settings - Fork 0
/
build
executable file
·182 lines (145 loc) · 3.61 KB
/
build
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
#!/bin/bash
rm -rf cache
rm -rf data
SHELL_FOLDER=$(dirname $(readlink -f "$0"))
minfreq=100
[ "$1" ] && minfreq="$1"
find_downloader(){
# 寻找下载器
# 会设置 $downloader 和 $down_out_op 变量
# 寻找 aria2c
if type aria2c >/dev/null 2>&1; then
downloader=aria2c
down_out_op=-o
return
fi
# 寻找 wget
if type wget >/dev/null 2>&1; then
downloader=wget
down_out_op=-O
return
fi
# 寻找 curl
if type curl >/dev/null 2>&1; then
downloader=curl
down_out_op=-o
return
fi
echo "未找到合适的下载器,请安装 aria2c/wget/curl 之一后重试。" >&2
}
find_extractor(){
# 寻找解压工具
# 会设置 $extractor 和 $extra_op 变量
# 寻找 unzip
if type unzip >/dev/null 2>&1; then
extractor=unzip
extra_op=-o
return
fi
# 寻找 bsdtar
if type bsdtar >/dev/null 2>&1; then
extractor=bsdtar
extra_op=xf
return
fi
# 寻找 7z
if type 7z >/dev/null 2>&1; then
extractor=7z
extra_op=x
extra_op1=-y
return
fi
}
down(){
# 下载一个文件
# $1 下载链接(url)
# $2 本地保存的文件
# 检查下载器
[ ${downloader} ] || find_downloader
[ ${downloader} ] || exit 1
$downloader "$1" $down_out_op "$2"
}
extract(){
# 解压一个文件
# $1 要解压的文件
# 检查解压工具
[ ${extractor} ] || find_extractor
[ ${extractor} ] || exit 2
if [ "$extra_op1" ]; then
$extractor "$extra_op" "$extra_op1" "$1"
elif [ "$extra_op" ]; then
$extractor "$extra_op" "$1"
else
$extractor "$1"
fi
}
cd $SHELL_FOLDER
mkdir -p cache || exit
cd cache
# 下载并解压文件
cat "$SHELL_FOLDER/src/file_list.txt" | while read line; do
[ "$line" ] || continue
url="$(echo "$line" | cut -f1)"
[ "$url" ] || continue
md5="$(echo "$line" | cut -f2)"
name="$(echo "$line" | cut -f3)"
dst="$(echo "$line" | cut -f4)"
commit="$(echo "$line" | cut -f5)"
if [ ! -f "$name" ]; then
down "$url" "$name" || exit
fi
echo "url = $url"
echo "md5 = $md5"
echo "name = $name"
echo "dst = $dst"
echo "commit = $commit"
echo "$md5 $name" | md5sum -c || exit
if [ $dst ]; then
rm -rf "$dst-$commit" "$dst"
echo $name
extract "$name" || exit
mv "$dst-$commit" "$dst" || exit
fi
done
extract 360万中文词库+词性+词频.zip || exit
git clone https://github.com/rime/rime-essay --depth=1
git clone https://github.com/rime/rime-pinyin-simp --depth=1
git clone https://github.com/thunlp/THUOCL --depth=1
git clone https://github.com/rime/rime-emoji --depth=1
git clone https://github.com/so1ar/rime-symbols --depth=1
git clone https://github.com/fkxxyz/libscel --depth=1
ln -sf rime-essay/essay.txt essay.txt || exit
ln -sf rime-pinyin-simp/pinyin_simp.dict.yaml pinyin_simp.dict.yaml || exit
mkdir extra
cd extra
cp -r ../../src/opencc .
cd ..
# 生成符号列表
cd rime-symbols || exit
mkdir -p opencc || exit
cd opencc || exit
../rime-symbols-gen || exit
cd ../.. || exit
# 生成符号词汇
cat */opencc/*.txt | opencc -c t2s.json | uniq > symbols.txt
# 开始生成词典
../src/clover-dict-gen --minfreq=$minfreq || exit
for i in THUOCL/data/THUOCL_*; do
echo "转换 $i"
../src/thuocl2rime $i || exit
done
cp ../src/sogou_new_words.dict.yaml .
./libscel/scel.py >> sogou_new_words.dict.yaml || exit
# 生成 data 目录
mkdir -p ../data || exit
cp ../src/*.yaml ../data || exit
mv clover.*.yaml THUOCL_*.yaml sogou_new_words.dict.yaml ../data || exit
cd ../data
# 生成 opencc 目录
mkdir -p opencc
cp ../cache/rime-emoji/opencc/* opencc
cp ../cache/rime-symbols/opencc/* opencc
cp ../cache/extra/opencc/* opencc
echo 开始构建部署二进制
rime_deployer --compile clover.schema.yaml . /usr/share/rime-data || exit
rm -rf build/*.txt