Skip to content

Commit

Permalink
disable content embedded feature to avoid feedparser bug.
Browse files Browse the repository at this point in the history
  • Loading branch information
cdhigh committed Apr 27, 2024
1 parent a3efd16 commit 3b5cff7
Show file tree
Hide file tree
Showing 2 changed files with 7 additions and 2 deletions.
2 changes: 1 addition & 1 deletion application/lib/calibre/web/feeds/news.py
Original file line number Diff line number Diff line change
Expand Up @@ -2232,7 +2232,7 @@ def preprocess_raw_html(self, raw_html, url):
self.remove_tags_from_rules(soup, rules)

#提取失败,尝试自动提取
if len(newBody.get_text(strip=True)) < 100:
if len(newBody.get_text(strip=True)) < 50:
self.log.warning(f'Failed to extract content using content_extract_rules, try readability algorithm: {url}')
try:
raw_html = self.extract_readable_article(raw_html, url)
Expand Down
7 changes: 6 additions & 1 deletion application/lib/recipe_helper.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ def py3_repr(x):

#根据输入的一些信息,自动创建一个recipe的源码
def GenerateRecipeSource(title, feeds, user, isfulltext=False, language=None, max_articles=30,
cover_url=None, base='AutomaticNewsRecipe'):
cover_url=None, base='BasicNewsRecipe'):
className = f'UserRecipe{int(time.time())}'
title = py3_repr(str(title).strip() or className)
indent = ' ' * 8
Expand All @@ -30,6 +30,10 @@ def GenerateRecipeSource(title, feeds, user, isfulltext=False, language=None, ma
desc = 'News from {}'.format(', '.join(feedTitles)) if feedTitles else 'Deliver from KindleEar'
desc = desc[:100]
oldest_article = user.book_cfg('oldest_article')
#至少到feedparser 6.0.11为止,其提取xml内的内容有bug,经常提取不到,在他修复之前,我们先暂停全文rss功能
#全部都当非全文rss使用
isfulltext = False
auto_cleanup = 'False' if isfulltext else 'True'
isfulltext = 'True' if isfulltext else 'None'
language = language or user.book_cfg('language')
timefmt = user.book_cfg('time_fmt')
Expand All @@ -45,6 +49,7 @@ class {className}({base}):
max_articles_per_feed = {max_articles}
oldest_article = {oldest_article}
use_embedded_content = {isfulltext}
auto_cleanup = {auto_cleanup}
timefmt = '{timefmt}'
cover_url = {cover_url}
{feeds}''')
Expand Down

0 comments on commit 3b5cff7

Please sign in to comment.