add babylon dict (exprimental)

cdhigh · Nov 23, 2024 · de1620c · de1620c
1 parent 94070ab
commit de1620c
Show file tree

Hide file tree

Showing 7 changed files with 700 additions and 24 deletions.
diff --git a/application/lib/dictionary/__init__.py b/application/lib/dictionary/__init__.py
@@ -9,10 +9,12 @@
 from .stardict import StarDict
 from .mdict import MDict
 from .lingvo import LingvoDict
+from .babylon import BabylonDict
 
 all_dict_engines = {DictOrg.name: DictOrg, DictCn.name: DictCn, DictCc.name: DictCc,
     MerriamWebster.name: MerriamWebster, OxfordLearners.name: OxfordLearners,
-    StarDict.name: StarDict, MDict.name: MDict, LingvoDict.name: LingvoDict}
+    StarDict.name: StarDict, MDict.name: MDict, LingvoDict.name: LingvoDict,
+    BabylonDict.name: BabylonDict}
 
 #创建一个词典实例
 def CreateDictInst(engine, database, host=None):

diff --git a/application/lib/dictionary/babylon/__init__.py b/application/lib/dictionary/babylon/__init__.py
@@ -0,0 +1,3 @@
+#!/usr/bin/env python3
+# -*- coding:utf-8 -*-
+from .babylon_dict import BabylonDict
diff --git a/application/lib/dictionary/babylon/babylon_dict.py b/application/lib/dictionary/babylon/babylon_dict.py
@@ -0,0 +1,55 @@
+#!/usr/bin/env python3
+# -*- coding:utf-8 -*-
+#Babylon bgl 离线词典支持
+#Author: cdhigh <https://github.com/cdhigh>
+import os, re
+from application.ke_utils import loc_exc_pos
+from .bgl_reader import BglReader
+
+#获取本地的bgl文件列表，只有列表，没有校验是否有效
+def getBglDictList():
+    dictDir = os.environ.get('DICTIONARY_DIR')
+    if not dictDir or not os.path.isdir(dictDir):
+        return {}
+
+    ret = {}
+    for dirPath, _, fileNames in os.walk(dictDir):
+        for fileName in fileNames:
+            if fileName.lower().endswith('.bgl'):
+                dictName = os.path.splitext(fileName)[0]
+                #为了界面显示和其他dict的一致，键为词典全路径名，值为词典名字
+                ret[os.path.join(dirPath, fileName)] = dictName
+    return ret
+
+class BabylonDict:
+    name = "babylon"
+    #词典列表，键为词典缩写，值为词典描述
+    databases = getBglDictList()
+
+    #更新词典列表
+    @classmethod
+    def refresh(cls):
+        cls.databases = getBglDictList()
+
+    def __init__(self, database='', host=None):
+        self.database = database
+        self.dictionary = None
+        self.initError = None
+        if database in self.databases:
+            try:
+                self.dictionary = BglReader(database)
+            except:
+                self.initError = loc_exc_pos(f'Init BabylonDict failed: {self.databases[database]}')
+                default_log.warning(self.initError)
+        else:
+            self.initError = f'Dict not found: {self.databases[database]}'
+            default_log.warning(self.initError)
+
+    #返回当前使用的词典名字
+    def __repr__(self):
+        return '{} [{}]'.format(self.name, self.databases.get(self.database, ''))
+
+    def definition(self, word, language=''):
+        return self.initError if self.initError else self.dictionary.query(word)
+
+
diff --git a/application/lib/dictionary/babylon/bgl_gls.py b/application/lib/dictionary/babylon/bgl_gls.py
@@ -0,0 +1,234 @@
+#!/usr/bin/env python3
+# -*- coding:utf-8 -*-
+"""bgl文件格式里的一些常数类型定义，gls=glossary"""
+
+# initial parameter
+PARAMETER = 0
+# glossary property
+PROPERTY = 3
+
+# term
+TERM_1 = 0x1
+TERM_A = 0xA
+TERM_B = 0xB
+
+# delimiter
+DELIMITER = 6
+
+# resource
+RESOURCE = 2
+
+
+LEXICAL_CLASS = {
+    0x30 : 'n.',
+    0x31 : 'adj.',
+    0x32 : 'v.',
+    0x33 : 'adv.',
+    0x34 : 'interj.',
+    0x35 : "pron.",
+    0x36 : "prep.",
+    0x37 : "conj.",
+    0x38 : "suff.",
+    0x39 : "pref.",
+    0x3A : "art." 
+    }
+
+DERIVATION = (
+    'V-0',# Verb
+    'V-0.0',# Verb
+    'V-0.1',# Infinivtive
+    'V-0.1.1',# ?
+    'V-1.0',
+    'V-1.1',
+    'V-1.1.1', # Present Simple
+    'V-1.1.2', #Present Simple (3rd pers. sing.)
+    'V-2.0',#
+    'V-2.1',#
+    'V-2.1.1',# Past Simple
+    'V-3.0',#
+    'V-3.1',#
+    'V-3.1.1',# Present Participle
+    'V-4.0',#
+    'V-4.1',#
+    'V-4.1.1',#Past Participle
+    'V-5.0',#
+    'V-5.1',#
+    'V-5.1.1',#Future
+    'V2-0',#
+    'V2-0.0',#
+    'V2-0.1',#Infinitive
+    'V2-0.1.1',#
+    'V2-1.0',#
+    'V2-1.1',#
+    'V2-1.1.1',#Present Simple (1st pers. sing.)
+    'V2-1.1.2',#Present Simple (2nd pers. sing. & plural forms)
+    'V2-1.1.3',#Present Simple (3rd pers. sing.)
+    'V2-2.0',#
+    'V2-2.1',#
+    'V2-2.1.1',#Past Simple (1st & 3rd pers. sing.)
+    'V2-2.1.2',#Past Simple (2nd pers. sing. & plural forms)
+    'V2-3.0',#
+    'V2-3.1',#
+    'V2-3.1.1',#Present Participle
+    'V2-4.0',#
+    'V2-4.1',#
+    'V2-4.1.1',#Past Participle
+    'V2-5.0',#
+    'V2-5.1',#
+    'V2-5.1.1',#Future
+    'N-0',#Noun
+    'N-1.0',#
+    'N-1.1',#
+    'N-1.1.1',#Singular
+    'N-2.0',#
+    'N-2.1',#
+    'N-2.1.1',#Plural
+    'N4-1.0',#
+    'N4-1.1',#
+    'N4-1.1.1',#Singular Masc.
+    'N4-1.1.2',#Singular Fem.
+    'N4-2.0',#
+    'N4-2.1',#
+    'N4-2.1.1',#Plural Masc.
+    'N4-2.1.2',#Plural Fem.
+    'ADJ-0',#Adjective
+    'ADJ-1.0',#
+    'ADJ-1.1',#
+    'ADJ-1.1.1',#Adjective
+    'ADJ-1.1.2',#Comparative
+    'ADJ-1.1.3',#Superlative
+    )
+
+LANGUAGE = (
+    "English", 
+    "French",
+    "Italian",
+    "Spanish",
+    "Dutch",
+    "Portuguese",
+    "German",
+    "Russian",
+    "Japanese",
+    "Traditional Chinese",
+    "Simplified Chinese",
+    "Greek",
+    "Korean",
+    "Turkish",
+    "Hebrew",
+    "Arabic",
+    "Thai",
+    "Other",
+    "Other Simplified Chinese dialects",
+    "Other Traditional Chinese dialects",
+    "Other Eastern-European languages",
+    "Other Western-European languages",
+    "Other Russian languages",
+    "Other Japanese languages",
+    "Other Baltic languages",
+    "Other Greek languages",
+    "Other Korean dialects",
+    "Other Turkish dialects",
+    "Other Thai dialects",
+    "Polish",
+    "Hungarian",
+    "Czech",
+    "Lithuanian",
+    "Latvian",
+    "Catalan",
+    "Croatian",
+    "Serbian",
+    "Slovak",
+    "Albanian",
+    "Urdu",
+    "Slovenian",
+    "Estonian",
+    "Bulgarian",
+    "Danish",
+    "Finnish",
+    "Icelandic",
+    "Norwegian",
+    "Romanian",
+    "Swedish",
+    "Ukrainian",
+    "Belarusian",
+    "Farsi",
+    "Basque",
+    "Macedonian",
+    "Afrikaans",
+    "Faeroese",
+    "Latin",
+    "Esperanto",
+    "Tamazight",
+    "Armenian"
+    )
+
+CHARSET = {
+    0x41: "ISO-8859-1", #Default
+    0x42: "ISO-8859-1", #Latin
+    0x43: "ISO-8859-2", #Eastern European
+    0x44: "ISO-8859-5", #Cyriilic
+    0x45: "ISO-8859-14",#Japanese
+    0x46: "big5",       #Traditional Chinese
+    0x47: "gbk",        #Simplified Chinese
+    0x48: "CP1257",     #Baltic
+    0x49: "CP1253",     #Greek
+    0x4A: "CP949",      #Korean
+    0x4B: "ISO-8859-9", #Turkish
+    0x4C: "ISO-8859-9", #Hebrew
+    0x4D: "CP1256",     #Arabic
+    0x4E: "CP874"       #Thai
+    }
+
+TP_LEX_CLASS = 0x02
+
+# display name, but not index name
+TP_TITLE = 0x08
+
+# 
+TP_PHON_TRAN = 0x1b
+
+
+TERM_PROPERTY={
+    0x02: "Lexcial Class",
+    0x06: "UNKNOWN",
+    0x08: "Title",
+    0x18: "Derivation",
+    0x1b: "Phonetic Transcription"
+}
+
+P_TITLE = 0x01
+P_AUTHOR_NAME = 0x02
+P_AUTHOR_EMAIL = 0x03
+P_DESCRIPTION = 0x09
+P_S_CHARSET = 0x1A
+P_T_CHARSET = 0x1B
+P_MANUAL = 0x41
+P_ICON = 0x0B
+
+
+PROPERTY_NAME = {
+    0x01 : "Title",
+    0x02 : "AuthorName",
+    0x03 : "AuthorEmail",
+    0x04 : "Copyright",
+    0x07 : "SourceLanguage",
+    0x08 : "TargetLanguage",
+    0x09 : "Description",
+    0x0B : "Icon",
+    0x0C : "TermCount",
+    0x1A : "SourceCharset",
+    0x1B : "TargetCharset",
+    0x27 : "Lexical Class Name", # localized lexical class name
+    0x33 : "CreationDate",
+    0x1C : "LastUpdated",
+    0x3B : "MorphologicalDerivationType", # localized names of word variation type
+    0x3C : "UNKNOWN",
+    0x41 : "GlossaryManual"
+    }
+
+PARAMETER_NAME = {
+    0x1A : "Source Charset",
+    0x1B : "Target Charset"
+    }
+
+