Skip to content

Commit

Permalink
improve translator
Browse files Browse the repository at this point in the history
  • Loading branch information
cdhigh committed Dec 17, 2024
1 parent 8820a65 commit 8289036
Show file tree
Hide file tree
Showing 2 changed files with 28 additions and 14 deletions.
33 changes: 23 additions & 10 deletions application/lib/calibre/web/feeds/news.py
Original file line number Diff line number Diff line change
Expand Up @@ -1319,7 +1319,7 @@ def description_limiter(cls, src):
from calibre.utils.cleantext import clean_xml_chars

# Truncating the string could cause a dangling UTF-16 half-surrogate, which will cause lxml to barf, clean it
ans = clean_xml_chars(ans) + '\u2026'
ans = clean_xml_chars(ans) + ''
return ans

#生成Feed对应的html内容,一个Feed就是根据一个Rss xml生成的html,里面会有多篇文章
Expand Down Expand Up @@ -2068,24 +2068,37 @@ def translate_html(self, soup, title):
translator = HtmlTranslator(self.translator, self.simultaneous_downloads)
translator.translate_soup(soup)

#翻译Feed的title,toc时用到
#翻译Feed的title/text_summary,toc时用到
def translate_titles(self, feeds):
from ebook_translator import HtmlTranslator
translator = HtmlTranslator(self.translator, self.simultaneous_downloads)
position = self.translator.get('position', 'below')
titles = []
texts = []
#内嵌函数
def _addTextItem(obj, attr):
return texts.append({'text': getattr(obj, attr, None), 'obj': obj, 'attr': attr})

for feed in feeds:
titles.append({'text': feed.title, 'obj': feed})
_addTextItem(feed, 'title')
_addTextItem(feed, 'description')
for article in feed:
titles.append({'text': article.title, 'obj': article})
newTitles = translator.translate_text(titles)
for item in [e for e in newTitles if not e['error']]:
_addTextItem(article, 'title')
_addTextItem(article, 'text_summary')

newTitles = translator.translate_text(texts)
for item in newTitles:
if item['error'] or item['translated'] == item['text']:
continue

obj = item['obj']
attr = item['attr']
sep = ' ' if attr == 'title' else '<br/>'
if position in ('below', 'right'):
item['obj'].title = item['text'] + ' ' + item['translated']
setattr(obj, attr, item['text'] + sep + item['translated'])
elif position in ('above', 'left'):
item['obj'].title = item['translated'] + ' ' + item['text']
setattr(obj, attr, item['translated'] + sep + item['text'])
else: #replace
item['obj'].title = item['translated']
setattr(obj, attr, item['translated'])

#调用在线TTS服务平台,将html转为语音
#每个音频片段都会调用一次callback(audioDict, title, feed_index, article_index)
Expand Down
9 changes: 5 additions & 4 deletions application/lib/calibre/web/feeds/templates.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
from lxml.html.builder import HTML, HEAD, META, TITLE, STYLE, DIV, BODY, \
STRONG, BR, SPAN, A, HR, UL, LI, H2, H3, IMG, P as PT, \
TABLE, TD, TR

from lxml.html import fromstring
from calibre import strftime, isbytestring
from calibre.utils.localization import _

Expand Down Expand Up @@ -169,7 +169,8 @@ def _generate(self, f, feeds, cutoff, extra_css=None, style=None):
),
attrs('calibre_feed_image')))
if getattr(feed, 'description', None):
d = DIV(clean_xml_chars(feed.description), attrs('calibre_feed_description', rescale=80))
feedDesc = fromstring(clean_xml_chars(feed.description)) #保留里面的<br/>
d = DIV(feedDesc, attrs('calibre_feed_description', rescale=80))
d.append(BR())
div.append(d)
ul = UL(attrs('calibre_article_list'))
Expand All @@ -184,8 +185,8 @@ def _generate(self, f, feeds, cutoff, extra_css=None, style=None):
style='padding-bottom:0.5em')
)
if article.summary:
li.append(DIV(clean_xml_chars(cutoff(article.text_summary)),
attrs('article_description', rescale=70)))
artiSummary = fromstring(clean_xml_chars(cutoff(article.text_summary))) #保留里面的<br/>
li.append(DIV(artiSummary, attrs('article_description', rescale=70)))
ul.append(li)
div.append(ul)
#div.append(self.get_navbar(f, feeds, top=False))
Expand Down

0 comments on commit 8289036

Please sign in to comment.