From 3b5cff7850d4a97b441de6476b15c61ea74126d5 Mon Sep 17 00:00:00 2001 From: cdhigh Date: Sat, 27 Apr 2024 08:30:00 -0300 Subject: [PATCH] disable content embedded feature to avoid feedparser bug. --- application/lib/calibre/web/feeds/news.py | 2 +- application/lib/recipe_helper.py | 7 ++++++- 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/application/lib/calibre/web/feeds/news.py b/application/lib/calibre/web/feeds/news.py index a6d33f8d..15edaa5d 100644 --- a/application/lib/calibre/web/feeds/news.py +++ b/application/lib/calibre/web/feeds/news.py @@ -2232,7 +2232,7 @@ def preprocess_raw_html(self, raw_html, url): self.remove_tags_from_rules(soup, rules) #提取失败,尝试自动提取 - if len(newBody.get_text(strip=True)) < 100: + if len(newBody.get_text(strip=True)) < 50: self.log.warning(f'Failed to extract content using content_extract_rules, try readability algorithm: {url}') try: raw_html = self.extract_readable_article(raw_html, url) diff --git a/application/lib/recipe_helper.py b/application/lib/recipe_helper.py index f5180c26..8af89742 100644 --- a/application/lib/recipe_helper.py +++ b/application/lib/recipe_helper.py @@ -13,7 +13,7 @@ def py3_repr(x): #根据输入的一些信息,自动创建一个recipe的源码 def GenerateRecipeSource(title, feeds, user, isfulltext=False, language=None, max_articles=30, - cover_url=None, base='AutomaticNewsRecipe'): + cover_url=None, base='BasicNewsRecipe'): className = f'UserRecipe{int(time.time())}' title = py3_repr(str(title).strip() or className) indent = ' ' * 8 @@ -30,6 +30,10 @@ def GenerateRecipeSource(title, feeds, user, isfulltext=False, language=None, ma desc = 'News from {}'.format(', '.join(feedTitles)) if feedTitles else 'Deliver from KindleEar' desc = desc[:100] oldest_article = user.book_cfg('oldest_article') + #至少到feedparser 6.0.11为止,其提取xml内的内容有bug,经常提取不到,在他修复之前,我们先暂停全文rss功能 + #全部都当非全文rss使用 + isfulltext = False + auto_cleanup = 'False' if isfulltext else 'True' isfulltext = 'True' if isfulltext else 'None' language = language or user.book_cfg('language') timefmt = user.book_cfg('time_fmt') @@ -45,6 +49,7 @@ class {className}({base}): max_articles_per_feed = {max_articles} oldest_article = {oldest_article} use_embedded_content = {isfulltext} + auto_cleanup = {auto_cleanup} timefmt = '{timefmt}' cover_url = {cover_url} {feeds}''')