-
Notifications
You must be signed in to change notification settings - Fork 629
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
7 changed files
with
700 additions
and
24 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,3 @@ | ||
#!/usr/bin/env python3 | ||
# -*- coding:utf-8 -*- | ||
from .babylon_dict import BabylonDict |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,55 @@ | ||
#!/usr/bin/env python3 | ||
# -*- coding:utf-8 -*- | ||
#Babylon bgl 离线词典支持 | ||
#Author: cdhigh <https://github.com/cdhigh> | ||
import os, re | ||
from application.ke_utils import loc_exc_pos | ||
from .bgl_reader import BglReader | ||
|
||
#获取本地的bgl文件列表,只有列表,没有校验是否有效 | ||
def getBglDictList(): | ||
dictDir = os.environ.get('DICTIONARY_DIR') | ||
if not dictDir or not os.path.isdir(dictDir): | ||
return {} | ||
|
||
ret = {} | ||
for dirPath, _, fileNames in os.walk(dictDir): | ||
for fileName in fileNames: | ||
if fileName.lower().endswith('.bgl'): | ||
dictName = os.path.splitext(fileName)[0] | ||
#为了界面显示和其他dict的一致,键为词典全路径名,值为词典名字 | ||
ret[os.path.join(dirPath, fileName)] = dictName | ||
return ret | ||
|
||
class BabylonDict: | ||
name = "babylon" | ||
#词典列表,键为词典缩写,值为词典描述 | ||
databases = getBglDictList() | ||
|
||
#更新词典列表 | ||
@classmethod | ||
def refresh(cls): | ||
cls.databases = getBglDictList() | ||
|
||
def __init__(self, database='', host=None): | ||
self.database = database | ||
self.dictionary = None | ||
self.initError = None | ||
if database in self.databases: | ||
try: | ||
self.dictionary = BglReader(database) | ||
except: | ||
self.initError = loc_exc_pos(f'Init BabylonDict failed: {self.databases[database]}') | ||
default_log.warning(self.initError) | ||
else: | ||
self.initError = f'Dict not found: {self.databases[database]}' | ||
default_log.warning(self.initError) | ||
|
||
#返回当前使用的词典名字 | ||
def __repr__(self): | ||
return '{} [{}]'.format(self.name, self.databases.get(self.database, '')) | ||
|
||
def definition(self, word, language=''): | ||
return self.initError if self.initError else self.dictionary.query(word) | ||
|
||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,234 @@ | ||
#!/usr/bin/env python3 | ||
# -*- coding:utf-8 -*- | ||
"""bgl文件格式里的一些常数类型定义,gls=glossary""" | ||
|
||
# initial parameter | ||
PARAMETER = 0 | ||
# glossary property | ||
PROPERTY = 3 | ||
|
||
# term | ||
TERM_1 = 0x1 | ||
TERM_A = 0xA | ||
TERM_B = 0xB | ||
|
||
# delimiter | ||
DELIMITER = 6 | ||
|
||
# resource | ||
RESOURCE = 2 | ||
|
||
|
||
LEXICAL_CLASS = { | ||
0x30 : 'n.', | ||
0x31 : 'adj.', | ||
0x32 : 'v.', | ||
0x33 : 'adv.', | ||
0x34 : 'interj.', | ||
0x35 : "pron.", | ||
0x36 : "prep.", | ||
0x37 : "conj.", | ||
0x38 : "suff.", | ||
0x39 : "pref.", | ||
0x3A : "art." | ||
} | ||
|
||
DERIVATION = ( | ||
'V-0',# Verb | ||
'V-0.0',# Verb | ||
'V-0.1',# Infinivtive | ||
'V-0.1.1',# ? | ||
'V-1.0', | ||
'V-1.1', | ||
'V-1.1.1', # Present Simple | ||
'V-1.1.2', #Present Simple (3rd pers. sing.) | ||
'V-2.0',# | ||
'V-2.1',# | ||
'V-2.1.1',# Past Simple | ||
'V-3.0',# | ||
'V-3.1',# | ||
'V-3.1.1',# Present Participle | ||
'V-4.0',# | ||
'V-4.1',# | ||
'V-4.1.1',#Past Participle | ||
'V-5.0',# | ||
'V-5.1',# | ||
'V-5.1.1',#Future | ||
'V2-0',# | ||
'V2-0.0',# | ||
'V2-0.1',#Infinitive | ||
'V2-0.1.1',# | ||
'V2-1.0',# | ||
'V2-1.1',# | ||
'V2-1.1.1',#Present Simple (1st pers. sing.) | ||
'V2-1.1.2',#Present Simple (2nd pers. sing. & plural forms) | ||
'V2-1.1.3',#Present Simple (3rd pers. sing.) | ||
'V2-2.0',# | ||
'V2-2.1',# | ||
'V2-2.1.1',#Past Simple (1st & 3rd pers. sing.) | ||
'V2-2.1.2',#Past Simple (2nd pers. sing. & plural forms) | ||
'V2-3.0',# | ||
'V2-3.1',# | ||
'V2-3.1.1',#Present Participle | ||
'V2-4.0',# | ||
'V2-4.1',# | ||
'V2-4.1.1',#Past Participle | ||
'V2-5.0',# | ||
'V2-5.1',# | ||
'V2-5.1.1',#Future | ||
'N-0',#Noun | ||
'N-1.0',# | ||
'N-1.1',# | ||
'N-1.1.1',#Singular | ||
'N-2.0',# | ||
'N-2.1',# | ||
'N-2.1.1',#Plural | ||
'N4-1.0',# | ||
'N4-1.1',# | ||
'N4-1.1.1',#Singular Masc. | ||
'N4-1.1.2',#Singular Fem. | ||
'N4-2.0',# | ||
'N4-2.1',# | ||
'N4-2.1.1',#Plural Masc. | ||
'N4-2.1.2',#Plural Fem. | ||
'ADJ-0',#Adjective | ||
'ADJ-1.0',# | ||
'ADJ-1.1',# | ||
'ADJ-1.1.1',#Adjective | ||
'ADJ-1.1.2',#Comparative | ||
'ADJ-1.1.3',#Superlative | ||
) | ||
|
||
LANGUAGE = ( | ||
"English", | ||
"French", | ||
"Italian", | ||
"Spanish", | ||
"Dutch", | ||
"Portuguese", | ||
"German", | ||
"Russian", | ||
"Japanese", | ||
"Traditional Chinese", | ||
"Simplified Chinese", | ||
"Greek", | ||
"Korean", | ||
"Turkish", | ||
"Hebrew", | ||
"Arabic", | ||
"Thai", | ||
"Other", | ||
"Other Simplified Chinese dialects", | ||
"Other Traditional Chinese dialects", | ||
"Other Eastern-European languages", | ||
"Other Western-European languages", | ||
"Other Russian languages", | ||
"Other Japanese languages", | ||
"Other Baltic languages", | ||
"Other Greek languages", | ||
"Other Korean dialects", | ||
"Other Turkish dialects", | ||
"Other Thai dialects", | ||
"Polish", | ||
"Hungarian", | ||
"Czech", | ||
"Lithuanian", | ||
"Latvian", | ||
"Catalan", | ||
"Croatian", | ||
"Serbian", | ||
"Slovak", | ||
"Albanian", | ||
"Urdu", | ||
"Slovenian", | ||
"Estonian", | ||
"Bulgarian", | ||
"Danish", | ||
"Finnish", | ||
"Icelandic", | ||
"Norwegian", | ||
"Romanian", | ||
"Swedish", | ||
"Ukrainian", | ||
"Belarusian", | ||
"Farsi", | ||
"Basque", | ||
"Macedonian", | ||
"Afrikaans", | ||
"Faeroese", | ||
"Latin", | ||
"Esperanto", | ||
"Tamazight", | ||
"Armenian" | ||
) | ||
|
||
CHARSET = { | ||
0x41: "ISO-8859-1", #Default | ||
0x42: "ISO-8859-1", #Latin | ||
0x43: "ISO-8859-2", #Eastern European | ||
0x44: "ISO-8859-5", #Cyriilic | ||
0x45: "ISO-8859-14",#Japanese | ||
0x46: "big5", #Traditional Chinese | ||
0x47: "gbk", #Simplified Chinese | ||
0x48: "CP1257", #Baltic | ||
0x49: "CP1253", #Greek | ||
0x4A: "CP949", #Korean | ||
0x4B: "ISO-8859-9", #Turkish | ||
0x4C: "ISO-8859-9", #Hebrew | ||
0x4D: "CP1256", #Arabic | ||
0x4E: "CP874" #Thai | ||
} | ||
|
||
TP_LEX_CLASS = 0x02 | ||
|
||
# display name, but not index name | ||
TP_TITLE = 0x08 | ||
|
||
# | ||
TP_PHON_TRAN = 0x1b | ||
|
||
|
||
TERM_PROPERTY={ | ||
0x02: "Lexcial Class", | ||
0x06: "UNKNOWN", | ||
0x08: "Title", | ||
0x18: "Derivation", | ||
0x1b: "Phonetic Transcription" | ||
} | ||
|
||
P_TITLE = 0x01 | ||
P_AUTHOR_NAME = 0x02 | ||
P_AUTHOR_EMAIL = 0x03 | ||
P_DESCRIPTION = 0x09 | ||
P_S_CHARSET = 0x1A | ||
P_T_CHARSET = 0x1B | ||
P_MANUAL = 0x41 | ||
P_ICON = 0x0B | ||
|
||
|
||
PROPERTY_NAME = { | ||
0x01 : "Title", | ||
0x02 : "AuthorName", | ||
0x03 : "AuthorEmail", | ||
0x04 : "Copyright", | ||
0x07 : "SourceLanguage", | ||
0x08 : "TargetLanguage", | ||
0x09 : "Description", | ||
0x0B : "Icon", | ||
0x0C : "TermCount", | ||
0x1A : "SourceCharset", | ||
0x1B : "TargetCharset", | ||
0x27 : "Lexical Class Name", # localized lexical class name | ||
0x33 : "CreationDate", | ||
0x1C : "LastUpdated", | ||
0x3B : "MorphologicalDerivationType", # localized names of word variation type | ||
0x3C : "UNKNOWN", | ||
0x41 : "GlossaryManual" | ||
} | ||
|
||
PARAMETER_NAME = { | ||
0x1A : "Source Charset", | ||
0x1B : "Target Charset" | ||
} | ||
|
||
|
Oops, something went wrong.