Skip to content

Commit

Permalink
3.2.1
Browse files Browse the repository at this point in the history
1. Add proxy feature
2. Add dsl dictionary feature
  • Loading branch information
cdhigh committed Nov 21, 2024
1 parent e2d2c2d commit 94070ab
Show file tree
Hide file tree
Showing 48 changed files with 647 additions and 202 deletions.
4 changes: 2 additions & 2 deletions application/back_end/db_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
#Author: cdhigh <https://github.com/cdhigh>
import os, random, datetime
from operator import attrgetter
from ..utils import PasswordManager, ke_encrypt, ke_decrypt, utcnow, compare_version
from ..ke_utils import PasswordManager, ke_encrypt, ke_decrypt, utcnow, compare_version

if os.getenv('DATABASE_URL', '').startswith(("datastore", "mongodb", "redis", "pickle")):
from .db_models_nosql import *
Expand Down Expand Up @@ -46,7 +46,7 @@ def cfg(self, item, default=None):
return {'email': '', 'kindle_email': '', 'secret_key': '', 'timezone': 0,
'inbound_email': 'save,forward', 'keep_in_email_days': 1,
'delivery_mode': 'email,local', 'webshelf_days': 7,
'reader_params': {}}.get(item, value)
'reader_params': {}, 'proxy': ''}.get(item, value)
else:
return value
def set_cfg(self, item, value):
Expand Down
4 changes: 2 additions & 2 deletions application/back_end/send_mail_adpt.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
#https://cloud.google.com/appengine/docs/standard/python3/reference/services/bundled/google/appengine/api/mail
#https://cloud.google.com/appengine/docs/standard/python3/services/mail
import os, datetime, zipfile, base64
from ..utils import str_to_bool, sanitize_filename
from ..ke_utils import str_to_bool, sanitize_filename
from ..base_handler import save_delivery_log

#google.appengine will apply patch for os.env module
Expand Down Expand Up @@ -207,7 +207,7 @@ def mailjet_send_mail(apikey, secret_key, sender, to, subject, body, html=None,
def save_mail_to_local(dest_dir, subject, body, attachments=None, html=None, **kwargs):
attachments = attachments or []
mailDir = os.path.join(appDir, dest_dir)
if not os.path.exists(mailDir):
if not os.path.isdir(mailDir):
os.makedirs(mailDir)

now = str(datetime.datetime.now().strftime('%H-%M-%S'))
Expand Down
2 changes: 1 addition & 1 deletion application/back_end/task_queue_celery.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ def __call__(self, *args, **kwargs):
transport_opts = {'data_folder_in': dir_in, 'data_folder_out': dir_out, 'processed_folder': dir_procsed,
'store_processed': True}
for d in [dir_, dir_in, dir_out, dir_procsed]:
if not os.path.exists(d):
if not os.path.isdir(d):
os.makedirs(d)
broker_url = 'filesystem://'

Expand Down
File renamed without changes.
2 changes: 1 addition & 1 deletion application/lib/build_ebook.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
from calibre.web.feeds.recipes import compile_recipe
from recipe_helper import GenerateRecipeSource
from urlopener import UrlOpener
from application.utils import loc_exc_pos
from application.ke_utils import loc_exc_pos

#从输入格式生成对应的输出格式
#input_: 如果是recipe,为编译后的recipe(或列表),或者是一个输入文件名,或一个BytesIO
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
from calibre.ebooks.metadata import MetaInformation
from calibre.ebooks.metadata.opf2 import OPFCreator
from calibre.ebooks.metadata.toc import TOC
from application.utils import loc_exc_pos
from application.ke_utils import loc_exc_pos

class RecipeDisabled(Exception):
pass
Expand Down
2 changes: 1 addition & 1 deletion application/lib/calibre/ebooks/conversion/plumber.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@
from polyglot.builtins import string_or_bytes

from filesystem_dict import FsDictStub
from application.utils import get_directory_size, loc_exc_pos
from application.ke_utils import get_directory_size, loc_exc_pos
from application.base_handler import save_delivery_log

DEBUG_README=b'''
Expand Down
2 changes: 1 addition & 1 deletion application/lib/calibre/web/feeds/news.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@
from requests_file import LocalFileAdapter
from filesystem_dict import FsDictStub
from application.back_end.db_models import LastDelivered
from application.utils import loc_exc_pos
from application.ke_utils import loc_exc_pos

MASTHEAD_SIZE = (600, 60)
DEFAULT_MASTHEAD_IMAGE = 'mastheadImage.gif'
Expand Down
2 changes: 1 addition & 1 deletion application/lib/calibre/web/fetch/simple.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@
URLError, quote, url2pathname, urljoin, urlparse, urlsplit, urlunparse,
urlunsplit, urlopen
)
from application.utils import loc_exc_pos
from application.ke_utils import loc_exc_pos

class AbortArticle(Exception):
pass
Expand Down
3 changes: 2 additions & 1 deletion application/lib/dictionary/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,10 +8,11 @@
from .oxford_learners import OxfordLearners
from .stardict import StarDict
from .mdict import MDict
from .lingvo import LingvoDict

all_dict_engines = {DictOrg.name: DictOrg, DictCn.name: DictCn, DictCc.name: DictCc,
MerriamWebster.name: MerriamWebster, OxfordLearners.name: OxfordLearners,
StarDict.name: StarDict, MDict.name: MDict}
StarDict.name: StarDict, MDict.name: MDict, LingvoDict.name: LingvoDict}

#创建一个词典实例
def CreateDictInst(engine, database, host=None):
Expand Down
3 changes: 3 additions & 0 deletions application/lib/dictionary/lingvo/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
#!/usr/bin/env python3
# -*- coding:utf-8 -*-
from .lingvo_dict import LingvoDict
156 changes: 156 additions & 0 deletions application/lib/dictionary/lingvo/dsl_reader.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,156 @@
#!/usr/bin/env python3
# -*- coding:utf-8 -*-
#dsl离线词典支持,不支持dsl.dz,即使使用indexed_gzip还是慢,建议先解压为dsl再使用
#Author: cdhigh <https://github.com/cdhigh>
import os, re, logging, io
import chardet

try:
import marisa_trie
except:
marisa_trie = None

#外部接口
class DslReader:
TRIE_FMT = '>LH' #释义开始位置,释义块字数

def __init__(self, fileName):
self.log = logging.getLogger()
self.fileName = fileName
self.encoding = None
firstPart = os.path.splitext(fileName)[0]
self.trieFileName = firstPart + '.trie'
self.encFileName = firstPart + '.enc'
self.trie = None

if os.path.isfile(self.encFileName):
with open(self.encFileName, 'r', encoding='utf-8') as f:
self.encoding = f.read().strip()

if os.path.isfile(self.trieFileName):
try:
self.trie = marisa_trie.RecordTrie(self.TRIE_FMT) #type:ignore
self.trie.load(self.trieFileName)
except Exception as e:
self.trie = None
self.log.warning(f'Failed to load dsldict trie data: {fileName}: {e}')

if self.trie:
return

#分析索引数据,构建前缀树
self.log.info(f"Building trie for {fileName}")
self.buildTrie()

#分析索引数据,构建前缀树
#代码简单点,全部读入内存
def buildTrie(self):
f = self.openDslFile()
encoding = self.encoding
records = []
currWord = ''
meanStart = None
meanWordCnt = 0
while True:
line = f.readline()
if line.startswith(('#', r'{{', '\n', '\r')):
meanWordCnt += len(line)
continue

if not line: #文件结束
if currWord and meanStart is not None:
records.append((currWord, (meanStart, min(meanWordCnt, 65000))))
break

#开始一个词条
if not line.startswith((' ', '\t')):
if currWord and meanStart is not None:
#保存前词条的偏移位置
records.append((currWord, (meanStart, min(meanWordCnt, 65000))))
meanStart = None

currWord = line.strip()
if meanStart is None:
meanStart = f.tell() #f.tell()特别慢,要等到需要的时候才调用
meanWordCnt = 0
else: #有缩进,是释义块
meanWordCnt += len(line)

f.close()
self.trie = marisa_trie.RecordTrie(self.TRIE_FMT, records) #type:ignore
self.trie.save(self.trieFileName)
del records
del self.trie
self.trie = marisa_trie.RecordTrie(self.TRIE_FMT) #type:ignore
self.trie.load(self.trieFileName)

#打开文件,返回文件实例
def openDslFile(self):
if not self.encoding: #检测编码,因为很多词典不按官方的要求使用unicode
import chardet
with open(self.fileName, 'rb') as f:
data = f.read(10000)
ret = chardet.detect(data)
encoding = ret['encoding'] if ret['confidence'] >= 0.8 else None

#逐一测试
if not encoding:
for enc in ['utf-16', 'utf-16-le', 'windows-1252']:
try:
with open(self.fileName, 'r', encoding=enc) as f:
f.readline()
encoding = enc
break
except UnicodeError:
pass

self.encoding = (encoding or 'utf-16').lower()
with open(self.encFileName, 'w', encoding='utf-8') as fEnc:
fEnc.write(self.encoding)

return open(self.fileName, 'r', encoding=self.encoding)

#查词接口
def get(self, word, default=''): #type:ignore
for wd in [word, word.lower(), word.capitalize()]:
if wd in self.trie:
break
else:
return default

start, size = self.trie[wd][0]
lines = []
with self.openDslFile() as f:
f.seek(start)
lines = f.read(size).splitlines()
mean = '\n'.join([line for line in lines if line.startswith((' ', '\t'))])
return self.dslMeanToHtml(mean)

#将原始释义转换为合法的html文本
def dslMeanToHtml(self, mean):
simpleTags = {"[']": '<u>', "[/']": '</u>', '[b]': '<b>', '[/b]': '</b>', '[i]': '<i>',
'[/i]': '</i>', '[u]': '<u>', '[/u]': '</u>', '[sub]': '<sub>', '[/sub]': '</sub>',
'[sup]': '<sup>', '[/sup]': '</sup>', '[/c]': '</span>', '@': '<br/>', '\t': '',
'[*]': '<span>', '[/*]': '</span>', '\\[': '[', '\\]': ']', '\n': '<br/>',
'[ex]': '<span style="color:#808080">', '[/ex]': '</span>',
'[p]': '<i style="color:#008000">', '[/p]': '</i>',
'[url]': '<span style="color:#0000ff;text-decoration:underline">', '[/url]': '</span>',
'[ref]': '<span style="color:#0000ff;text-decoration:underline">', '[/ref]': '</span>',}
removeTags = ['[/m]', '[com]', '[/com]', '[trn]', '[/trn]', '[trs]',
'[/trs]', '[!trn]', '[/!trn]', '[!trs]', '[/!trs]', '[/lang]']

#print(mean) #TODO
for tag, repl in simpleTags.items():
mean = mean.replace(tag, repl)
for tag in removeTags:
mean = mean.replace(tag, '')

# 替换[m],根据匹配内容生成相应数量的空格
mean = re.sub(r'\[m\d+?\]', lambda match: '&nbsp;' * int(match.group(0)[2:-1]), mean)
mean = re.sub(r'\[c.*?\]', '<span style="color:#006400">', mean)
#浏览器不支持 entry:// 协议,会直接拦截导致无法跳转,
mean = re.sub(r'\[lang.*?\]', '', mean)
mean = re.sub(r'\[s\].*?\[/s\]', '', mean)
mean = re.sub(r'<<(.*?)>>', r'<a href="https://kindleear/entry/\1">\1</a>', mean)
#print(mean) #TODO
return mean
53 changes: 53 additions & 0 deletions application/lib/dictionary/lingvo/lingvo_dict.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
#!/usr/bin/env python3
# -*- coding:utf-8 -*-
#lingvo dsl 离线词典支持
#Author: cdhigh <https://github.com/cdhigh>
import os, re
from application.ke_utils import loc_exc_pos
from .dsl_reader import DslReader

#获取本地的dsl文件列表,只有列表,没有校验是否有效
def getDslDictList():
dictDir = os.environ.get('DICTIONARY_DIR')
if not dictDir or not os.path.isdir(dictDir):
return {}

ret = {}
for dirPath, _, fileNames in os.walk(dictDir):
for fileName in fileNames:
if fileName.lower().endswith('.dsl'):
dictName = os.path.splitext(fileName)[0]
#为了界面显示和其他dict的一致,键为词典全路径名,值为词典名字
ret[os.path.join(dirPath, fileName)] = dictName
return ret

class LingvoDict:
name = "lingvo"
#词典列表,键为词典缩写,值为词典描述
databases = getDslDictList()

#更新词典列表
@classmethod
def refresh(cls):
cls.databases = getDslDictList()

def __init__(self, database='', host=None):
self.database = database
self.dictionary = None
self.initError = None
if database in self.databases:
try:
self.dictionary = DslReader(database)
except:
self.initError = loc_exc_pos(f'Init LingvoDict failed: {self.databases[database]}')
default_log.warning(self.initError)
else:
self.initError = f'Dict not found: {self.databases[database]}'
default_log.warning(self.initError)

#返回当前使用的词典名字
def __repr__(self):
return '{} [{}]'.format(self.name, self.databases.get(self.database, ''))

def definition(self, word, language=''):
return self.initError if self.initError else self.dictionary.get(word)
31 changes: 21 additions & 10 deletions application/lib/dictionary/mdict/mdict.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,10 @@
#!/usr/bin/env python3
# -*- coding:utf-8 -*-
#mdx离线词典接口
#Author: cdhigh <https://github.com/cdhigh>
import os
from bs4 import BeautifulSoup
from application.utils import xml_escape
from application.ke_utils import xml_escape, loc_exc_pos
from .readmdict import MDX
try:
import marisa_trie
Expand Down Expand Up @@ -38,20 +39,25 @@ def refresh(cls):
def __init__(self, database='', host=None):
self.database = database
self.dictionary = None
self.initError = None
if database in self.databases:
try:
self.dictionary = IndexedMdx(database)
except Exception as e:
default_log.warning(f'Instantiate mdict failed: {self.databases[database]}: {e}')
except:
self.initError = loc_exc_pos(f'Init mdict failed: {self.databases[database]}')
default_log.warning(self.initError)
else:
default_log.warning(f'dict not found: {self.databases[database]}')
self.initError = f'Dict not found: {self.databases[database]}'
default_log.warning(self.initError)

#返回当前使用的词典名字
def __repr__(self):
return 'mdict [{}]'.format(self.databases.get(self.database, ''))

def definition(self, word, language=''):
return self.dictionary.get(word) if self.dictionary else ''
if self.initError:
return self.initError
return self.dictionary.get(word)

#经过词典树缓存的Mdx
class IndexedMdx:
Expand Down Expand Up @@ -94,18 +100,23 @@ def __init__(self, fname, encoding="", substyle=False, passcode=None):
def get(self, word):
if not self.trie:
return ''
word = word.lower().strip()

#和mdict官方应用一样,输入:about返回词典基本信息
if word == ':about':
return self.dict_html_info()

indexes = self.trie[word] if word in self.trie else None
for wd in [word, word.lower(), word.capitalize()]:
if wd in self.trie:
indexes = self.trie[word]
break
else:
return ''

ret = self.get_content_by_Index(indexes)
if ret.startswith('@@@LINK='):
word = ret[8:].strip()
if word:
indexes = self.trie[word] if word in self.trie else None
ret = self.get_content_by_Index(indexes)
if word and word in self.trie:
ret = self.get_content_by_Index(self.trie[word])
return ret

def __contains__(self, word) -> bool:
Expand Down
3 changes: 3 additions & 0 deletions application/lib/dictionary/stardict/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
#!/usr/bin/env python3
# -*- coding:utf-8 -*-
from .stardict import StarDict
Loading

0 comments on commit 94070ab

Please sign in to comment.