Skip to content

Commit

Permalink
improve online dict
Browse files Browse the repository at this point in the history
  • Loading branch information
cdhigh committed Jun 10, 2024
1 parent 5655cd4 commit 1a5e792
Show file tree
Hide file tree
Showing 33 changed files with 1,650 additions and 608 deletions.
2 changes: 1 addition & 1 deletion application/lib/calibre/ebooks/mobi/writer8/toc.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
<head>
<title>{title}</title>
<style type="text/css">
li {{ list-style-type: none }}
li {{ list-style-type: none; padding-bottom: 8px }}
a {{ text-decoration: none }}
a:hover {{ color: red }}
{extra_css}
Expand Down
4 changes: 3 additions & 1 deletion application/lib/calibre/web/feeds/news.py
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,7 @@ def __init__(self):
self.get_delay = None
self.max_files = None
self.keep_images = True
self.keep_svg = False


#每篇文章的下载任务参数
Expand Down Expand Up @@ -994,7 +995,8 @@ def __init__(self, options, log, output_dir, fs, feed_index_start=0):
'compress_news_images_max_size', 'compress_news_images_auto_size', 'scale_news_images', 'filter_regexps',
'match_regexps', 'no_stylesheets', 'verbose', 'delay', 'timeout', 'recursions', 'encoding'):
setattr(wOpts, attr, getattr(self, attr))


wOpts.keep_svg = getattr(options, 'keep_svg')
wOpts.postprocess_html = self._postprocess_html
wOpts.preprocess_image = self.preprocess_image
wOpts.preprocess_raw_html = self.preprocess_raw_html_
Expand Down
14 changes: 11 additions & 3 deletions application/lib/dictionary/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,18 @@
from .dict_org import DictOrg
from .dict_cn import DictCn
from .dict_cc import DictCc
from .stardict import StarDict
from .merriam_webster import MerriamWebster

all_dict_engines = {DictOrg.name: DictOrg, DictCn.name: DictCn, DictCc.name: DictCc}
all_dict_engines = {DictOrg.name: DictOrg, DictCn.name: DictCn, DictCc.name: DictCc,
MerriamWebster.name: MerriamWebster, StarDict.name: StarDict}

#创建一个词典实例
def CreateDictInst(name, database, host=None):
klass = all_dict_engines.get(name, DictOrg)
def CreateDictInst(engine, database, host=None):
klass = all_dict_engines.get(engine, DictOrg)
return klass(database, host)

#获取某个引擎某个数据库的显示名字
def GetDictDisplayName(engine, database):
klass = all_dict_engines.get(engine, DictOrg)
return klass.databases.get(database, database)
10 changes: 7 additions & 3 deletions application/lib/dictionary/dict_cc.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,18 +75,22 @@ def __init__(self, database='', host=None):
if database not in self.databases:
default_log.warning(f'Database "{database}" not exists, fallback to "english"')
database = 'en'
self.destCode = database
self.database = database
self.destLang = self.databases[database]
self.host = 'dict.cc'
self.opener = UrlOpener()

#返回当前使用的词典名字
def __repr__(self):
return f'dict.cc [{self.database}]'

def definition(self, word, language=''):
if language not in self.databases:
default_log.info(f'Database "{language}" not exists, fallback to "english"')
language = 'en'
if language == self.destCode:
if language == self.database:
raise Exception(f'The source and destination languages cannot be the same: {language}.')
url = f"https://{language}{self.destCode}.dict.cc"
url = f"https://{language}{self.database}.dict.cc"
resp = self.opener.open(url, data={"s": word.encode("utf-8")})
if resp.status_code == 200:
return self.parse_resp(resp.text)
Expand Down
4 changes: 4 additions & 0 deletions application/lib/dictionary/dict_cn.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,10 @@ def __init__(self, database='!', host=None):
self.database = database
self.host = 'https://dict.cn'
self.opener = UrlOpener(host=self.host)

#返回当前使用的词典名字
def __repr__(self):
return 'dict.cn [English-Chinese]'

def definition(self, word, language=''):
resp = self.opener.open(f'{self.host}/{word}')
Expand Down
12 changes: 8 additions & 4 deletions application/lib/dictionary/dict_org.py
Original file line number Diff line number Diff line change
Expand Up @@ -179,13 +179,17 @@ class DictOrg:
#'*' - all result, '!' - only the first result, others - database name
def __init__(self, database='!', host=None):
if database not in self.databases:
default_log.warning('Database "{database}" not exists, fallback to "First match"')
default_log.warning(f'Database "{database}" not exists, fallback to "First match"')
database = '!'
self.database = database
self.host = 'dict.org'
self.con = Connection(self.host)
self.db = Database(self.con, database)

#返回当前使用的词典名字
def __repr__(self):
return 'dict.org [{}]'.format(self.databases.get(self.database, ''))

#查词,language - word的语种
def definition(self, word, language=''):
defs = self.db.define(word)
Expand All @@ -208,11 +212,11 @@ def convert_to_ipa(self, txt):
txt = f'{start}/{phon}/{rest}'

#音标符号
phonetic = {'[a^]': 'æ', '[e^]': 'ɛ', '[u^]': 'ʌ', '[.a]': 'ə', '[y^]': 'ɪ', '[i^]': '',
'[oo^]': 'uː', '[~e]': 'ə', '[o^]': 'ɔ', '[=a]': 'eɪ', '[th]': 'θ', '[=e]': 'iː', '[=u]': 'juː',
phonetic = {'[a^]': 'æ', '[e^]': 'ɛ', '[u^]': 'ʌ', '[.a]': 'ə', '[y^]': 'ɪ', '[i^]': 'i',
'[oo^]': 'uː', '[~e]': 'ə', '[o^]': 'ɔ', '[=a]': 'eɪ', '[th]': 'ð', '[=e]': 'iː', '[=u]': 'juː',
'[ng]': 'ŋ', '[aum]': 'ɔː', '[-o]': 'oʊ', "['e]": 'e', '[=o]': 'oʊ', '[^o]': 'ɔ',
'[imac]': 'aɪ', '[-e]': 'iː', '[add]': 'ɔː', '[asl]': 'æ', '[^e]': 'ɪ', '[=ae]': 'eɪ',
'[ae]': '', '[ˌo]': 'əʊ'}
'[ae]': 'æ', '[ˌo]': 'əʊ', '[-u]': 'u', '[thorn]': 'θ', '[eth]': '}

pattern = re.compile('|'.join(re.escape(key) for key in phonetic.keys()))
return pattern.sub(lambda x: phonetic[x.group()], txt)
Expand Down
54 changes: 54 additions & 0 deletions application/lib/dictionary/merriam_webster.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
#!/usr/bin/env python3
# -*- coding:utf-8 -*-
#Merriam-Webster <https://www.merriam-webster.com/> 查词接口
import re
from bs4 import BeautifulSoup
from urlopener import UrlOpener

class MerriamWebster:
name = "webster's"
#词典列表,键为词典缩写,值为词典描述
databases = {"english": "Webster's New International Dictionary"}

def __init__(self, database='', host=None):
self.database = database
self.host = 'https://www.merriam-webster.com'
self.opener = UrlOpener(host=self.host)
self.pat1 = re.compile(br'<(head|script|style|svg|footer|header)\b[^<]*(?:(?!</\1>)<[^<]*)*</\1>', re.IGNORECASE)
self.pat2 = re.compile(br'[\s\r\n]+<')
self.pat3 = re.compile(br'>[\s\r\n]+')

#返回当前使用的词典名字
def __repr__(self):
return "webster's [English]"

def definition(self, word, language=''):
resp = self.opener.open(f'{self.host}/dictionary/{word}')
if resp.status_code == 200:
#因为网页内容太庞杂,BeautifulSoup解释耗时太久,使用正则先去掉一些内容
#同时内容不太规范,如果直接使用lxml经常导致获取不到释义
content = re.sub(self.pat1, b'', resp.content)
content = re.sub(self.pat2, b'<', content)
content = re.sub(self.pat3, b'>', content)
soup = BeautifulSoup(content, 'lxml')
ret = []
phonetic = soup.find('span', {'class': 'word-syllables-entry'})
if phonetic:
ret.append('<span>/' + phonetic.get_text() + '/</span>')
phonetic = soup.find('span', {'class': 'prons-entries-list-inline'})
if phonetic:
ret.append('<span style="margin-left:20px">[' + phonetic.get_text().strip() + ']</span>')
ret.append('<ul style="text-align:left;list-style-position:inside;">')
hasDef = False
for definition in soup.find_all("span", {"class" : "dt"}):
tag = definition.findChild()
if tag:
ret.append('<li>' + tag.get_text().lstrip(' :') + '</li>')
hasDef = True
if hasDef:
ret.append('</ul>')
return ''.join(ret)
else:
return ''
else:
return f'Error: {self.opener.CodeMap(resp.status_code)}'
Loading

0 comments on commit 1a5e792

Please sign in to comment.