improve online dict

cdhigh · Jun 10, 2024 · 1a5e792 · 1a5e792
1 parent 5655cd4
commit 1a5e792
Show file tree

Hide file tree

Showing 33 changed files with 1,650 additions and 608 deletions.
diff --git a/application/lib/calibre/ebooks/mobi/writer8/toc.py b/application/lib/calibre/ebooks/mobi/writer8/toc.py
@@ -18,7 +18,7 @@
 <head>
   <title>{title}</title>
   <style type="text/css">
-  li {{ list-style-type: none }}
+  li {{ list-style-type: none; padding-bottom: 8px }}
   a {{ text-decoration: none }}
   a:hover {{ color: red }}
   {extra_css}

diff --git a/application/lib/calibre/web/feeds/news.py b/application/lib/calibre/web/feeds/news.py
@@ -91,6 +91,7 @@ def __init__(self):
         self.get_delay = None
         self.max_files = None
         self.keep_images = True
+        self.keep_svg = False
 
 
 #每篇文章的下载任务参数
@@ -994,7 +995,8 @@ def __init__(self, options, log, output_dir, fs, feed_index_start=0):
             'compress_news_images_max_size', 'compress_news_images_auto_size', 'scale_news_images', 'filter_regexps',
             'match_regexps', 'no_stylesheets', 'verbose', 'delay', 'timeout', 'recursions', 'encoding'):
             setattr(wOpts, attr, getattr(self, attr))
-
+
+        wOpts.keep_svg = getattr(options, 'keep_svg')
         wOpts.postprocess_html = self._postprocess_html
         wOpts.preprocess_image = self.preprocess_image
         wOpts.preprocess_raw_html = self.preprocess_raw_html_

diff --git a/application/lib/dictionary/__init__.py b/application/lib/dictionary/__init__.py
@@ -4,10 +4,18 @@
 from .dict_org import DictOrg
 from .dict_cn import DictCn
 from .dict_cc import DictCc
+from .stardict import StarDict
+from .merriam_webster import MerriamWebster
 
-all_dict_engines = {DictOrg.name: DictOrg, DictCn.name: DictCn, DictCc.name: DictCc}
+all_dict_engines = {DictOrg.name: DictOrg, DictCn.name: DictCn, DictCc.name: DictCc,
+    MerriamWebster.name: MerriamWebster, StarDict.name: StarDict}
 
 #创建一个词典实例
-def CreateDictInst(name, database, host=None):
-    klass = all_dict_engines.get(name, DictOrg)
+def CreateDictInst(engine, database, host=None):
+    klass = all_dict_engines.get(engine, DictOrg)
     return klass(database, host)
+
+#获取某个引擎某个数据库的显示名字
+def GetDictDisplayName(engine, database):
+    klass = all_dict_engines.get(engine, DictOrg)
+    return klass.databases.get(database, database)
diff --git a/application/lib/dictionary/dict_cc.py b/application/lib/dictionary/dict_cc.py
@@ -75,18 +75,22 @@ def __init__(self, database='', host=None):
         if database not in self.databases:
             default_log.warning(f'Database "{database}" not exists, fallback to "english"')
             database = 'en'
-        self.destCode = database
+        self.database = database
         self.destLang = self.databases[database]
         self.host = 'dict.cc'
         self.opener = UrlOpener()
+
+    #返回当前使用的词典名字
+    def __repr__(self):
+        return f'dict.cc [{self.database}]'
 
     def definition(self, word, language=''):
         if language not in self.databases:
             default_log.info(f'Database "{language}" not exists, fallback to "english"')
             language = 'en'
-        if language == self.destCode:
+        if language == self.database:
             raise Exception(f'The source and destination languages cannot be the same: {language}.')
-        url = f"https://{language}{self.destCode}.dict.cc"
+        url = f"https://{language}{self.database}.dict.cc"
         resp = self.opener.open(url, data={"s": word.encode("utf-8")})
         if resp.status_code == 200:
             return self.parse_resp(resp.text)

diff --git a/application/lib/dictionary/dict_cn.py b/application/lib/dictionary/dict_cn.py
@@ -13,6 +13,10 @@ def __init__(self, database='!', host=None):
         self.database = database
         self.host = 'https://dict.cn'
         self.opener = UrlOpener(host=self.host)
+
+    #返回当前使用的词典名字
+    def __repr__(self):
+        return 'dict.cn [English-Chinese]'
 
     def definition(self, word, language=''):
         resp = self.opener.open(f'{self.host}/{word}')

diff --git a/application/lib/dictionary/dict_org.py b/application/lib/dictionary/dict_org.py
@@ -179,13 +179,17 @@ class DictOrg:
     #'*' - all result, '!' - only the first result, others - database name
     def __init__(self, database='!', host=None):
         if database not in self.databases:
-            default_log.warning('Database "{database}" not exists, fallback to "First match"')
+            default_log.warning(f'Database "{database}" not exists, fallback to "First match"')
             database = '!'
         self.database = database
         self.host = 'dict.org'
         self.con = Connection(self.host)
         self.db = Database(self.con, database)
 
+    #返回当前使用的词典名字
+    def __repr__(self):
+        return 'dict.org [{}]'.format(self.databases.get(self.database, ''))
+
     #查词，language - word的语种
     def definition(self, word, language=''):
         defs = self.db.define(word)
@@ -208,11 +212,11 @@ def convert_to_ipa(self, txt):
             txt = f'{start}/{phon}/{rest}'
 
         #音标符号
-        phonetic = {'[a^]': 'æ', '[e^]': 'ɛ', '[u^]': 'ʌ', '[.a]': 'ə', '[y^]': 'ɪ', '[i^]': 'iː',
-            '[oo^]': 'uː', '[~e]': 'ə', '[o^]': 'ɔ', '[=a]': 'eɪ', '[th]': 'θ', '[=e]': 'iː', '[=u]': 'juː',
+        phonetic = {'[a^]': 'æ', '[e^]': 'ɛ', '[u^]': 'ʌ', '[.a]': 'ə', '[y^]': 'ɪ', '[i^]': 'i',
+            '[oo^]': 'uː', '[~e]': 'ə', '[o^]': 'ɔ', '[=a]': 'eɪ', '[th]': 'ð', '[=e]': 'iː', '[=u]': 'juː',
             '[ng]': 'ŋ', '[aum]': 'ɔː', '[-o]': 'oʊ', "['e]": 'e', '[=o]': 'oʊ', '[^o]': 'ɔ',
             '[imac]': 'aɪ', '[-e]': 'iː', '[add]': 'ɔː', '[asl]': 'æ', '[^e]': 'ɪ', '[=ae]': 'eɪ',
-            '[ae]': 'eɪ', '[ˌo]': 'əʊ'}
+            '[ae]': 'æ', '[ˌo]': 'əʊ', '[-u]': 'u', '[thorn]': 'θ', '[eth]': 'ð'}
 
         pattern = re.compile('|'.join(re.escape(key) for key in phonetic.keys()))
         return pattern.sub(lambda x: phonetic[x.group()], txt)

diff --git a/application/lib/dictionary/merriam_webster.py b/application/lib/dictionary/merriam_webster.py
@@ -0,0 +1,54 @@
+#!/usr/bin/env python3
+# -*- coding:utf-8 -*-
+#Merriam-Webster <https://www.merriam-webster.com/> 查词接口
+import re
+from bs4 import BeautifulSoup
+from urlopener import UrlOpener
+
+class MerriamWebster:
+    name = "webster's"
+    #词典列表，键为词典缩写，值为词典描述
+    databases = {"english": "Webster's New International Dictionary"}
+
+    def __init__(self, database='', host=None):
+        self.database = database
+        self.host = 'https://www.merriam-webster.com'
+        self.opener = UrlOpener(host=self.host)
+        self.pat1 = re.compile(br'<(head|script|style|svg|footer|header)\b[^<]*(?:(?!</\1>)<[^<]*)*</\1>', re.IGNORECASE)
+        self.pat2 = re.compile(br'[\s\r\n]+<')
+        self.pat3 = re.compile(br'>[\s\r\n]+')
+
+    #返回当前使用的词典名字
+    def __repr__(self):
+        return "webster's [English]"
+
+    def definition(self, word, language=''):
+        resp = self.opener.open(f'{self.host}/dictionary/{word}')
+        if resp.status_code == 200:
+            #因为网页内容太庞杂，BeautifulSoup解释耗时太久，使用正则先去掉一些内容
+            #同时内容不太规范，如果直接使用lxml经常导致获取不到释义
+            content = re.sub(self.pat1, b'', resp.content)
+            content = re.sub(self.pat2, b'<', content)
+            content = re.sub(self.pat3, b'>', content)
+            soup = BeautifulSoup(content, 'lxml')
+            ret = []
+            phonetic = soup.find('span', {'class': 'word-syllables-entry'})
+            if phonetic:
+                ret.append('<span>/' + phonetic.get_text() + '/</span>')
+            phonetic = soup.find('span', {'class': 'prons-entries-list-inline'})
+            if phonetic:
+                ret.append('<span style="margin-left:20px">[' + phonetic.get_text().strip() + ']</span>')
+            ret.append('<ul style="text-align:left;list-style-position:inside;">')
+            hasDef = False
+            for definition in soup.find_all("span", {"class" : "dt"}):
+                tag = definition.findChild()
+                if tag:
+                    ret.append('<li>' + tag.get_text().lstrip(' :') + '</li>')
+                    hasDef = True
+            if hasDef:
+                ret.append('</ul>')
+                return ''.join(ret)
+            else:
+                return ''
+        else:
+            return f'Error: {self.opener.CodeMap(resp.status_code)}'