Skip to content

Commit

Permalink
3.0.0E
Browse files Browse the repository at this point in the history
  • Loading branch information
cdhigh committed Apr 21, 2024
1 parent e3fa272 commit 34d1274
Show file tree
Hide file tree
Showing 28 changed files with 820 additions and 201 deletions.
8 changes: 5 additions & 3 deletions application/back_end/send_mail_adpt.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,9 +57,11 @@ def avaliable_sm_services():
#title: 邮件标题
#attachment: 附件二进制内容,或元祖 (filename, content)
#fileWithTime: 发送的附件文件名是否附带当前时间
def send_to_kindle(user, title, attachment, fileWithTime=True):
#to: 目标邮件地址,可以为列表或逗号分隔的字符串,如果为空,则使用kindle_email
def send_to_kindle(user, title, attachment, fileWithTime=True, to=None):
lcTime = user.local_time('%Y-%m-%d_%H-%M')
subject = f"KindleEar {lcTime}"
to = to or user.cfg('kindle_email')

if not isinstance(attachment, tuple):
lcTime = "({})".format(lcTime) if fileWithTime else ""
Expand All @@ -72,13 +74,13 @@ def send_to_kindle(user, title, attachment, fileWithTime=True):
status = 'ok'
body = "Deliver from KindleEar"
try:
send_mail(user, user.cfg('kindle_email'), subject, body, attachment)
send_mail(user, to, subject, body, attachment)
except Exception as e:
status = str(e)
default_log.warning(f'Failed to send mail "{title}": {status}')

size = sum([len(a[1]) for a in attachment])
save_delivery_log(user, title, size, status=status)
save_delivery_log(user, title, size, status=status, to=to)

#统一的发送邮件函数
def send_mail(user, to, subject, body, attachments=None, html=None):
Expand Down
3 changes: 2 additions & 1 deletion application/lib/calibre/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,4 +53,5 @@ def __getitem__(self, name):

plugins = Plugins()
config_dir = ""
DEBUG = False
DEBUG = False
CONFIG_DIR_MODE = 0o700
4 changes: 3 additions & 1 deletion application/lib/calibre/ebooks/conversion/plumber.py
Original file line number Diff line number Diff line change
Expand Up @@ -376,8 +376,10 @@ def run(self):
# f.write(DEBUG_README)
for x in ('input', '0.parsed', '1.structure', '2.processed'):
x = os.path.join(self.opts.debug_pipeline, x)
if os.path.exists(x):
try:
shutil.rmtree(x)
except:
pass

self.output_plugin.specialize_options(self.log, self.opts, self.input_fmt)
#根据需要,创建临时目录或创建内存缓存
Expand Down
54 changes: 17 additions & 37 deletions application/lib/calibre/ebooks/oeb/polish/parsing.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
#!/usr/bin/env python2
#!/usr/bin/env python
# vim:fileencoding=utf-8
from __future__ import (unicode_literals, division, absolute_import,
print_function)
Expand All @@ -18,6 +18,7 @@
from html5lib.treebuilders.base import TreeBuilder as BaseTreeBuilder
from html5lib._ihatexml import InfosetFilter, DataLossWarning
from html5lib.html5parser import HTMLParser
import html5lib

from calibre import xml_replace_entities
from calibre.ebooks.chardet import xml_to_unicode, strip_encoding_declarations
Expand Down Expand Up @@ -57,9 +58,9 @@ class Element(ElementBase):
def __str__(self):
attrs = ''
if self.attrib:
attrs = ' ' + ' '.join('%s="%s"' % (k, v) for k, v in self.attrib.iteritems())
attrs = ' ' + ' '.join('%s="%s"' % (k, v) for k, v in self.attrib.items())
ns = self.tag.rpartition('}')[0][1:]
prefix = {v:k for k, v in self.nsmap.iteritems()}[ns] or ''
prefix = {v:k for k, v in self.nsmap.items()}[ns] or ''
if prefix:
prefix += ':'
return '<%s%s%s (%s)>' % (prefix, getattr(self, 'name', self.tag), attrs, hex(id(self)))
Expand Down Expand Up @@ -227,7 +228,7 @@ def clean_attrib(name, val, nsmap, attrib, namespaced_attribs):
return None, True
nsmap_changed = False
if ns == xlink_ns and 'xlink' not in nsmap:
for prefix, nns in tuple(nsmap.iteritems()):
for prefix, nns in tuple(nsmap.items()):
if nns == xlink_ns:
del nsmap[prefix]
nsmap['xlink'] = xlink_ns
Expand All @@ -239,7 +240,7 @@ def clean_attrib(name, val, nsmap, attrib, namespaced_attribs):
if prefix == 'xmlns':
# Use an existing prefix for this namespace, if
# possible
existing = {x:k for k, x in nsmap.iteritems()}.get(val, False)
existing = {x:k for k, x in nsmap.items()}.get(val, False)
if existing is not False:
name = existing
nsmap[name] = val
Expand Down Expand Up @@ -270,7 +271,7 @@ def makeelement_ns(ctx, namespace, prefix, name, attrib, nsmap):
# constructor, therefore they have to be set one by one.
nsmap_changed = False
namespaced_attribs = {}
for k, v in attrib.iteritems():
for k, v in attrib.items():
try:
elem.set(k, v)
except (ValueError, TypeError):
Expand All @@ -285,7 +286,7 @@ def makeelement_ns(ctx, namespace, prefix, name, attrib, nsmap):
nelem = ctx.makeelement(elem.tag, nsmap=nsmap)
for k, v in elem.items(): # Only elem.items() preserves attrib order
nelem.set(k, v)
for (prefix, name), v in namespaced_attribs.iteritems():
for (prefix, name), v in namespaced_attribs.items():
ns = nsmap.get(prefix, None)
if ns is not None:
try:
Expand All @@ -307,7 +308,7 @@ def makeelement_ns(ctx, namespace, prefix, name, attrib, nsmap):

# Ensure that svg and mathml elements get no namespace prefixes
if elem.prefix is not None and namespace in known_namespaces:
for k, v in tuple(nsmap.iteritems()):
for k, v in tuple(nsmap.items()):
if v == namespace:
del nsmap[k]
nsmap[None] = namespace
Expand Down Expand Up @@ -420,7 +421,7 @@ def apply_html_attributes(self, attrs):
if not attrs:
return
html = self.openElements[0]
for k, v in attrs.iteritems():
for k, v in attrs.items():
if k not in html.attrib and k != 'xmlns':
try:
html.set(k, v)
Expand Down Expand Up @@ -448,7 +449,7 @@ def apply_body_attributes(self, attrs):
if not attrs:
return
body = self.openElements[1]
for k, v in attrs.iteritems():
for k, v in attrs.items():
if k not in body.attrib and k !='xmlns':
try:
body.set(k, v)
Expand All @@ -473,7 +474,7 @@ def makeelement(ctx, name, attrib):
elem = ctx.makeelement(name)
except ValueError:
elem = ctx.makeelement(to_xml_name(name))
for k, v in attrib.iteritems():
for k, v in attrib.items():
try:
elem.set(k, v)
except TypeError:
Expand Down Expand Up @@ -517,7 +518,7 @@ def apply_html_attributes(self, attrs):
if not attrs:
return
html = self.openElements[0]
for k, v in attrs.iteritems():
for k, v in attrs.items():
if k not in html.attrib and k != 'xmlns':
try:
html.set(k, v)
Expand All @@ -530,7 +531,7 @@ def apply_body_attributes(self, attrs):
if not attrs:
return
body = self.openElements[1]
for k, v in attrs.iteritems():
for k, v in attrs.items():
if k not in body.attrib and k != 'xmlns':
try:
body.set(k, v)
Expand Down Expand Up @@ -630,35 +631,14 @@ def html5_parse(raw, decoder=None, log=None, discard_namespaces=False, line_numb

def parse_html5(raw, decoder=None, log=None, discard_namespaces=False, line_numbers=True, linenumber_attribute=None, replace_entities=True, fix_newlines=True):
if isinstance(raw, bytes):
raw = xml_to_unicode(raw)[0] if decoder is None else decoder(raw)
raw = decoder(raw) if decoder else xml_to_unicode(raw)[0]
if replace_entities:
raw = xml_replace_entities(raw)
if fix_newlines:
raw = raw.replace('\r\n', '\n').replace('\r', '\n')
raw = replace_chars.sub('', raw)

stream_class = partial(FastStream, track_position=line_numbers)
stream = stream_class(raw)
builder = partial(NoNamespaceTreeBuilder if discard_namespaces else TreeBuilder, linenumber_attribute=linenumber_attribute)
while True:
try:
parser = HTMLParser(tree=builder, track_positions=line_numbers, namespaceHTMLElements=not discard_namespaces)
with warnings.catch_warnings():
warnings.simplefilter('ignore', category=DataLossWarning)
try:
parser.parse(stream, parseMeta=False, useChardet=False)
finally:
parser.tree.proxy_cache = None
except NamespacedHTMLPresent as err:
raw = re.sub(r'<\s*/{0,1}(%s:)' % err.prefix, lambda m: m.group().replace(m.group(1), ''), raw, flags=re.I)
stream = stream_class(raw)
continue
break
root = parser.tree.getDocument()
if (discard_namespaces and root.tag != 'html') or (
not discard_namespaces and (root.tag != '{%s}%s' % (namespaces['html'], 'html') or root.prefix)):
raise ValueError('Failed to parse correctly, root has tag: %s and prefix: %s' % (root.tag, root.prefix))
return root
doc = html5lib.parse(raw, treebuilder="lxml", namespaceHTMLElements=False)
return doc.getroot()

def parse(raw, decoder=None, log=None, line_numbers=True, linenumber_attribute=None, replace_entities=True, force_html5_parse=False):
if isinstance(raw, bytes):
Expand Down
7 changes: 5 additions & 2 deletions application/lib/calibre/utils/img.py
Original file line number Diff line number Diff line change
Expand Up @@ -214,8 +214,11 @@ def save_cover_data_to(
ratio = min(newWidth / width, newHeight / height)
img = img.resize((int(width * ratio), int(height * ratio)), Image.Resampling.LANCZOS)

if (grayscale or eink) and img.mode != "L":
img = img.convert("L")
if (grayscale or eink) and img.mode != 'L':
img = img.convert('L')
changed = True
elif img.mode == 'LA' or (img.mode == 'P' and 'transparency' in img.info):
img = img.convert('RGBA').convert('RGB')
changed = True
elif img.mode != 'RGB':
img = img.convert('RGB')
Expand Down
65 changes: 52 additions & 13 deletions application/lib/calibre/web/feeds/news.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
from calibre.ebooks.metadata import MetaInformation
from calibre.ebooks.metadata.opf2 import OPFCreator
from calibre.ebooks.metadata.toc import TOC
from calibre.ptempfile import PersistentTemporaryFile
from calibre.ptempfile import PersistentTemporaryFile, PersistentTemporaryDirectory
from calibre.utils.img import save_cover_data_to
from calibre.utils.date import now as nowf
from calibre.utils.localization import canonicalize_lang, ngettext
Expand Down Expand Up @@ -433,6 +433,10 @@ class BasicNewsRecipe(Recipe):
#: Set to False if you do not want to use gzipped transfers. Note that some old servers flake out with gzip
handle_gzip = True

# set by worker.py
translator = {}
tts = {}

# See the built-in recipes for examples of these settings.

def short_title(self):
Expand Down Expand Up @@ -960,7 +964,7 @@ def __init__(self, options, log, output_dir, fs, feed_index_start=0):
elif self.scale_news_images_to_device:
self.scale_news_images = options.output_profile.screen_size

self.w2d_opts = wOpts = Web2diskOptions()
self.web2disk_options = wOpts = Web2diskOptions()
for attr in ('keep_only_tags', 'remove_tags', 'preprocess_regexps', 'skip_ad_pages', 'preprocess_html',
'remove_tags_after', 'remove_tags_before', 'is_link_wanted', 'compress_news_images',
'compress_news_images_max_size', 'compress_news_images_auto_size', 'scale_news_images', 'filter_regexps',
Expand Down Expand Up @@ -1063,6 +1067,10 @@ def _postprocess_html(self, soup, first_fetch, job_info):
h_tag = soup.new_tag('h2')
h_tag.string = title
body_tag.insert(0, h_tag)
elif h_tag: #去掉标题前面的部分内容
for tag in h_tag.previous_siblings:
if len(tag.get_text(strip=True)) < 20:
tag.extract()

#job_info.article.url才是真实的url,对于内嵌内容RSS,job_info.url为一个临时文件名
self.append_share_links(soup, url=job_info.article.url)
Expand All @@ -1074,8 +1082,12 @@ def _postprocess_html(self, soup, first_fetch, job_info):
'figcaption', 'figure', 'section', 'time']):
x.name = 'div'

#If tts need, tts propery is set by WorkerImpl
if self.tts.get('enable'):
self.audiofy_html(soup, title, job_info)

#If translation need, translator propery is set by WorkerImpl
if (getattr(self, 'translator', None) or {}).get('enable'):
if self.translator.get('enable'):
self.translate_html(soup, title)

if job_info:
Expand Down Expand Up @@ -1284,10 +1296,10 @@ def feed2index(self, f, feeds):
def _fetch_article(self, job_info, preloaded=None):
url = job_info.url
br = self.browser
self.w2d_opts.browser = br
self.w2d_opts.dir = job_info.art_dir
self.web2disk_options.browser = br
self.web2disk_options.dir = job_info.art_dir

fetcher = RecursiveFetcher(self.w2d_opts, self.fs, self.log, job_info, self.image_map, self.css_map)
fetcher = RecursiveFetcher(self.web2disk_options, self.fs, self.log, job_info, self.image_map, self.css_map)
fetcher.browser = br
fetcher.base_dir = job_info.art_dir
fetcher.current_dir = job_info.art_dir
Expand Down Expand Up @@ -1456,7 +1468,9 @@ def build_index(self):
self.jobs.append(req)

self.jobs_done = 0
if self.simultaneous_downloads > 1:
trans_enable = self.translator.get('enable') or self.tts.get('enable')
#如果翻译使能,则不能使用多线程,否则容易触发流量告警导致IP被封锁
if (self.simultaneous_downloads > 1) and not trans_enable:
tp = ThreadPool(self.simultaneous_downloads)
for req in self.jobs:
tp.putRequest(req, block=True, timeout=0)
Expand All @@ -1482,7 +1496,7 @@ def build_index(self):
raise ValueError('No articles downloaded, aborting')

#翻译Feed的标题
if (getattr(self, 'translator', None) or {}).get('enable'):
if self.translator.get('enable'):
self.translate_titles(feeds)

for f, feed in enumerate(feeds, self.feed_index_start):
Expand Down Expand Up @@ -1558,8 +1572,8 @@ def _download_masthead(self, mu):
def download_masthead(self, url):
try:
self._download_masthead(url)
except:
self.log.exception("Failed to download supplied masthead_url")
except Exception as e:
self.log.exception(f"Failed to download supplied masthead_url: {e}")

def resolve_masthead(self):
self.masthead_path = None
Expand Down Expand Up @@ -2000,6 +2014,31 @@ def translate_titles(self, feeds):
else: #replace
item['obj'].title = item['translated']

#调用在线TTS服务平台,将html转为语音
#每个音频片段都会调用一次callback(audioDict, title, feed_index, article_index)
def audiofy_html(self, soup, title, job_info):
default_log.info(f'audiofy_html {title}')
from ebook_tts import HtmlAudiolator
audiolator = HtmlAudiolator(self.tts)
self.log.debug(f'Translating [{title}]')
ret = audiolator.audiofy_soup(soup)
if not ret['error']: #保存音频到磁盘,这个地方就不能使用fs了,因为最后合并mp3时无法使用虚拟文件系统
if not self.tts.get('audio_dir'):
system_temp_dir = os.environ.get('KE_TEMP_DIR')
self.tts['audio_dir'] = PersistentTemporaryDirectory(prefix='tts_', dir=system_temp_dir)
audio_dir = self.tts['audio_dir']
ext = ret['mime'].split('/')[-1]
ext = {'mpeg': 'mp3'}.get(ext, ext)
for idx, audio in enumerate(ret['audios']):
filename = f'{job_info.f_idx:04d}_{job_info.a_idx:04d}_{idx:04d}.{ext}'
filename = os.path.join(audio_dir, filename)
try:
with open(filename, 'wb') as f:
f.write(audio)
except Exception as e:
self.log.warning(f'Failed to write "{filename}": {e}')
else:
self.log.warning(f'Failed to audiofy "{title}": {ret["error"]}')

class CustomIndexRecipe(BasicNewsRecipe):

Expand All @@ -2025,8 +2064,8 @@ def create_opf(self):
def download(self):
index = self.custom_index()
url = 'file:'+index if iswindows else 'file://'+index
self.w2d_opts.browser = self.clone_browser(self.browser)
fetcher = RecursiveFetcher(self.w2d_opts, self.fs, self.log)
self.web2disk_options.browser = self.clone_browser(self.browser)
fetcher = RecursiveFetcher(self.web2disk_options, self.fs, self.log)
fetcher.base_dir = self.output_dir
fetcher.current_dir = self.output_dir
fetcher.show_progress = False
Expand Down Expand Up @@ -2109,7 +2148,7 @@ def parse_feeds(self):
continue

added.add(url)
lastTime = LastDelivered.get_or_none(user=self.user.name, url=url)
lastTime = LastDelivered.get_or_none((LastDelivered.user==self.user.name) & (LastDelivered.url==url))
delta = (datetime.datetime.utcnow() - lastTime.datetime) if lastTime else None
#这里oldest_article和其他的recipe不一样,这个参数表示在这个区间内不会重复推送
if ((not lastTime) or (not self.oldest_article) or
Expand Down
Loading

0 comments on commit 34d1274

Please sign in to comment.