From f87d4beedf0803c8233542e3fd9279c26ae14570 Mon Sep 17 00:00:00 2001 From: cdhigh Date: Mon, 18 Nov 2024 13:33:43 -0300 Subject: [PATCH] improve ai summarizer --- .../lib/ebook_summarizer/html_summarizer.py | 54 ++++++------- application/lib/simple_ai_provider.py | 65 +++++---------- application/lib/urlopener.py | 4 +- application/static/base.js | 76 +++++++++--------- application/templates/book_audiolator.html | 2 +- application/templates/book_summarizer.html | 34 ++++---- application/templates/book_translator.html | 2 +- application/translations/messages.pot | 47 ++++++----- .../tr_TR/LC_MESSAGES/messages.mo | Bin 30671 -> 30768 bytes .../tr_TR/LC_MESSAGES/messages.po | 51 +++++++----- .../translations/zh/LC_MESSAGES/messages.mo | Bin 28834 -> 28935 bytes .../translations/zh/LC_MESSAGES/messages.po | 52 +++++++----- application/view/translator.py | 9 ++- 13 files changed, 202 insertions(+), 194 deletions(-) diff --git a/application/lib/ebook_summarizer/html_summarizer.py b/application/lib/ebook_summarizer/html_summarizer.py index 8d71d1b6..e2312751 100644 --- a/application/lib/ebook_summarizer/html_summarizer.py +++ b/application/lib/ebook_summarizer/html_summarizer.py @@ -4,6 +4,7 @@ #Author: cdhigh import re, time import simple_ai_provider +from application.utils import loc_exc_pos def get_summarizer_engines(): return simple_ai_provider._PROV_AI_LIST @@ -32,22 +33,22 @@ def summarize_text(self, text): if chunkSize < 2000: chunkSize = 2000 - summarySize = self.params.get('summary_size', 200) + words = self.params.get('summary_words', 200) summary = '' errMsg = '' lang = self.params.get('summary_lang', '') if lang: summaryTips = (f"Summarize the following text in {lang}. The summary should accurately represent the content " - f"and be no more than {summarySize} words:\n\n") + f"and be no more than {words} words:\n\n") else: summaryTips = (f"Summarize the following text in the same language as the original text. The summary should accurately represent the content " - f"and be no more than {summarySize} words:\n\n") + f"and be no more than {words} words:\n\n") text = re.sub(r'<[^>]+>', '', text)[:chunkSize] - #try: - summary = self.aiAgent.chat(f"{summaryTips}{text}") - #except Exception as e: - #errMsg = str(e) + try: + summary = self.aiAgent.chat(f"{summaryTips}{text}") + except Exception as e: + errMsg = str(e) return {'error': errMsg, 'summary': summary} @@ -71,40 +72,37 @@ def summarize_soup(self, soup, chunkSize=None, maxIterations=5): #将文本分块,这个分块比较粗糙,可能按照段落分块会更好,但是考虑到AI的适应能力比较强, #并且仅用于生成摘要,所以这个简单方案还是可以接受的 chunks = [text[i:i + chunkSize] for i in range(0, len(text), chunkSize)] - summarySize = self.params.get('summary_size', 200) + words = self.params.get('summary_words', 0) or 200 interval = self.engineProperty.get('request_interval', 0) - summary = None - + summaryTips = self.params.get('custom_prompt', '') lang = self.params.get('summary_lang', '') - if lang: - summaryTips = f"Please refine or update the summary based on the following text block, ensuring the summary is in the language: {lang}, and make it more accurately reflect the article content:\n\n" + if summaryTips: #使用自定义prompt + summaryTips = summaryTips.replace('{lang}', lang).replace('{words}', str(words)) + elif lang: + summaryTips = f"Please improve and update the existing summary of the following text block(s), ensuring the summary is written in the language of {lang}. The updated summary should accurately reflect the content while distilling key points, arguments, and conclusions, and should not exceed {words} words:" else: - summaryTips = f"Please refine or update the summary based on the following text block, ensuring the summary is in the same language as the article/preset summary, and make it more accurately reflect the article content:\n\n" - errMsg = '' + summaryTips = f"Please improve and update the existing summary of the following text block(s), ensuring it is in the same language as the article and preset summary, while accurately reflecting the content and distilling key points, arguments, and conclusions. The updated summary should not exceed {words} words:" + + summary = None for i, chunk in enumerate(chunks[:maxIterations]): - prompt = ( - f"The current summary is:\n{summary}\n\n{summaryTips}" - f"Text block {i + 1}:\n{chunk}\n\n" - f"Please generate an updated summary of no more than {summarySize} words." - ) + prompt = f"Existing summary:\n{summary}\n\n{summaryTips}\n\nText block {i + 1}:\n{chunk}\n\n" + try: summary = self.aiAgent.chat(prompt) - except Exception as e: - errMsg = str(e) - break + except: + default_log.info(loc_exc_pos('Error in summary_soup')) + return + if interval > 0: time.sleep(interval) - if errMsg: - default_log.info(f'Error in summary_soup: {errMsg}') - return - #将摘要插在文章标题之后 - summaryTag = soup.new_tag('p', attrs={'class': 'ai_generated_summary'}) + summaryTag = soup.new_tag('p', attrs={'class': 'ai_generated_summary', + 'data-aiagent': str(self.aiAgent)}) style = self.params.get('summary_style', '') if style: summaryTag['style'] = style - b = soup.new_tag('b') + b = soup.new_tag('b', attrs={'class': 'ai_summary_hint'}) b.string = 'AI-Generated Summary: ' summaryTag.append(b) summaryTag.append(summary) diff --git a/application/lib/simple_ai_provider.py b/application/lib/simple_ai_provider.py index 4e8d1567..2923e15e 100644 --- a/application/lib/simple_ai_provider.py +++ b/application/lib/simple_ai_provider.py @@ -19,7 +19,7 @@ 'Openai': { 'models': ['GPT-4o mini', 'GPT-4o', 'GPT-4 Turbo', 'gpt-3.5-turbo', 'GPT-3.5 Turbo Instruct'], 'request_interval': 10, - 'context_size': 4096}, + 'context_size': 4000}, 'Anthropic': { 'models': ['claude-2', 'claude-3', 'claude-1'], 'request_interval': 6, @@ -27,7 +27,7 @@ 'Grok': { 'models': ['grok-beta'], 'request_interval': 6, - 'context_size': 4096}, + 'context_size': 4000}, 'Mistral': { 'models': ['open-mistral-7b', 'mistral-small-latest', 'open-mixtral-8x7b', 'open-mixtral-8x22b', 'mistral-small-2402', 'mistral-small-2409', 'mistral-medium', 'mistral-large-2402', 'mistral-large-2407', @@ -38,20 +38,15 @@ 'models': ['gemma2-9b-it', 'gemma-7b-it', 'llama-guard-3-8b', 'llama3-70b-8192', 'llama3-8b-8192', 'mixtral-8x7b-32768'], 'request_interval': 2, - 'context_size': 8192}, - - # 'cohere': { - # 'models': ['command-xlarge-nightly'], - # 'request_interval': 6, - # 'context_size': 2048}, - # 'alibaba': { - # 'models': ['tongyi-qianwen-base'], - # 'request_interval': 6, - # 'context_size': 4096}, - # 'baidu': { + 'context_size': 8000}, + 'Alibaba': { + 'models': ['qwen-turbo', 'qwen-plus', 'qwen-long'], + 'request_interval': 1, + 'context_size': 130000}, + # 'Baidu': { # 'models': ['ernie-bot'], # 'request_interval': 6, - # 'context_size': 4096}, + # 'context_size': 4000}, } class SimpleAiProvider: @@ -63,6 +58,9 @@ def __init__(self, name, api_key, model=None, api_host=None): self.api_host = api_host self.opener = UrlOpener() + def __repr__(self): + return f'{self.name}({self.model})' + #返回支持的AI供应商列表,返回一个python字典 def ai_list(self): return _PROV_AI_LIST @@ -84,11 +82,9 @@ def chat(self, message): return self._mistral_chat(message) elif name == 'Groq': return self._groq_chat(message) - # elif name == "cohere": - # return self._cohere_chat(message) - # elif name == "alibaba": - # return self._alibaba_chat(message) - # elif name == "baidu": + elif name == "Alibaba": + return self._alibaba_chat(message) + # elif name == "Baidu": # return self._baidu_chat(message) else: raise ValueError(f"Unsupported provider: {name}") @@ -135,19 +131,6 @@ def _gemini_chat(self, message): contents = response.json()["candidates"][0]["content"] return contents['parts'][0]['text'] - #cohere的chat接口 - def _cohere_chat(self, message): - url = self.api_host if self.api_host else 'https://api.cohere.ai/v1/generate' - headers = {"Authorization": f"Bearer {self.api_key}"} - payload = { - "model": self.model or _PROV_AI_LIST['cohere']['models'][0], - "text": message, - "max_tokens": 300 - } - response = self.opener.post(url, headers=headers, json=payload) - response.raise_for_status() - return response.json()["generations"][0]["text"] - #grok的chat接口 def _grok_chat(self, message): #直接使用openai兼容接口 @@ -163,21 +146,11 @@ def _groq_chat(self, message): #直接使用openai兼容接口 return self._openai_chat(message, defaultUrl='https://api.groq.com/openai/v1/chat/completions') + #通义千问 def _alibaba_chat(self, message): - url = self.api_host if self.api_host else 'https://api.aliyun.com/v1/ai/chat' - headers = { - "Authorization": f"Bearer {self.api_key}", - "Content-Type": "application/json" - } - payload = { - "model": self.model or _PROV_AI_LIST['alibaba']['models'][0], - "messages": [{"role": "user", "content": message}] if isinstance(message, str) else message, - "max_tokens": 300 - } - response = self.opener.post(url, headers=headers, json=payload) - response.raise_for_status() - return response.json()["choices"][0]["content"] - + #直接使用openai兼容接口 + return self._openai_chat(message, defaultUrl='https://dashscope.aliyuncs.com/compatible-mode/v1/chat/completions') + def _baidu_chat(self, message): url = self.api_host if self.api_host else 'https://aip.baidubce.com/rpc/2.0/ai_custom/v1/ernie-bot' headers = {"Content-Type": "application/json"} diff --git a/application/lib/urlopener.py b/application/lib/urlopener.py index 19e7f1ed..b7f2dbe8 100644 --- a/application/lib/urlopener.py +++ b/application/lib/urlopener.py @@ -106,8 +106,8 @@ def post(self, *args, **kwargs): def open_remote_url(self, url, data, headers, timeout, method, **kwargs): timeout = timeout if timeout else self.timeout headers = self.get_headers(url, headers) - jsonData = kwargs.get('json', None) - method = 'POST' if (data or jsonData) and (method != 'GET') else 'GET' + if not method: + method = 'POST' if (data or kwargs.get('json', None)) else 'GET' url = self.build_url(url, data, method) if method == 'GET': req_func = self.session.get #type:ignore diff --git a/application/static/base.js b/application/static/base.js index 137d4d06..e571c44c 100644 --- a/application/static/base.js +++ b/application/static/base.js @@ -108,65 +108,68 @@ function RegisterHideHambClick() { //连接服务器获取内置recipe列表,并按照语言建立一个字典all_builtin_recipes,字典键为语言,值为信息字典列表 function FetchBuiltinRecipesXml() { - var hasUserLangRss = false; - var hasEnRss = false; + //添加上传的recipe的语言代码 + var langPick = $("#language_pick"); + my_uploaded_recipes.forEach(item => { + var lang = item['language']; + if (lang && !all_builtin_recipes[lang]) { + all_builtin_recipes[lang] = []; + langPick.append($(''.format(lang, LanguageName(lang)))); + } + }); + //这个是静态文件,flask和浏览器会通过etag来自动使用本地缓存 $.get('/recipes/builtin_recipes.xml', function(xml) { + //这里面的代码是异步执行的 + var hasUserLangRss = false; + var hasEnRss = false; var userLang = BrowserLanguage(); + var langPick = $("#language_pick"); $(xml).find("recipe").each(function() { - var title=$(this).attr("title"); - var language=$(this).attr("language").toLowerCase(); - var subs=$(this).attr("needs_subscription"); + var title = $(this).attr("title"); + var lang = $(this).attr("language").toLowerCase(); + var subs = $(this).attr("needs_subscription"); subs = ((subs == 'yes') || (subs == 'optional')) ? true : false; - var description=$(this).attr("description").substring(0, 200); - var id=$(this).attr("id"); + var description = $(this).attr("description").substring(0, 200); + var id = $(this).attr("id"); //忽略各国语言方言,仅取'_'前的部分 - language = language.replace('-', '_'); - var dashIndex = language.indexOf('_'); + lang = lang.replace('-', '_'); + var dashIndex = lang.indexOf('_'); if (dashIndex != -1) { - language = language.substring(0, dashIndex); + lang = lang.substring(0, dashIndex); } - if (language == userLang) { + if (lang == userLang) { hasUserLangRss = true; } - if (language == 'en') { + if (lang == 'en') { hasEnRss = true; } - if (!all_builtin_recipes[language]) { - all_builtin_recipes[language] = []; - var $newLangOpt = $(''.format(language, LanguageName(language))); - $("#language_pick").append($newLangOpt); + if (!all_builtin_recipes[lang]) { + all_builtin_recipes[lang] = []; + langPick.append($(''.format(lang, LanguageName(lang)))); } - all_builtin_recipes[language].push({title: title, description: description, needs_subscription: subs, id: id}); + all_builtin_recipes[lang].push({title: title, description: description, needs_subscription: subs, id: id}); }); + //自动触发和用户浏览器同样语种的选项 + var langItem; if (hasUserLangRss) { - $("#language_pick").find("option[value='{0}']".format(userLang)).attr("selected", true); - $("#language_pick").val(userLang).trigger('change'); + langItem = langPick.find("option[value='{0}']".format(userLang)); } else if (hasEnRss) { //如果有英语则选择英语源 - $("#language_pick").find("option[value='en']").attr("selected", true); - $("#language_pick").val('en').trigger('change'); + langItem = langPick.find("option[value='en']"); } else { //最后只能选择第一个语言 - var firstChild = $("#language_pick").children().first(); - firstChild.attr("selected", true); - firstChild.trigger('change'); + langItem = $("#language_pick").children().first(); + } + if (langItem) { + langItem.attr("selected", true); + langItem.trigger('change'); } }).fail(function(jqXHR, textStatus, errorThrown) { console.log("Failed to fetch '/recipes/builtin_recipes.xml': " + errorThrown); }); - //添加上传的recipe中存在,但是内置库不存在的语言代码 - my_uploaded_recipes.forEach(item => { - var language = item['language']; - if (language && !all_builtin_recipes[language]) { - all_builtin_recipes[language] = []; - var $newLangOpt = $(''.format(language, LanguageName(language))); - $("#language_pick").append($newLangOpt); - } - }); - PopulateLibrary(''); } @@ -229,13 +232,14 @@ function AppendRecipeToLibrary(div, id) { hamb_arg = []; var fTpl = "{0}('{1}','{2}')"; if (id.startsWith("upload:")) { //增加汉堡按钮弹出菜单代码 - hamb_arg.push({klass: 'btn-A', title: i18n.delete, icon: 'icon-delete', act: fTpl.format('DeleteUploadRecipe', id, title)}); hamb_arg.push({klass: 'btn-E', title: i18n.share, icon: 'icon-share', act: fTpl.format('StartShareRss', id, title)}); } hamb_arg.push({klass: 'btn-B', title: i18n.viewSrc, icon: 'icon-source', act: "/viewsrc/" + id.replace(':', '__')}); hamb_arg.push({klass: 'btn-C', title: i18n.subscriSep, icon: 'icon-push', act: fTpl.format('SubscribeRecipe', id, '1')}); hamb_arg.push({klass: 'btn-D', title: i18n.subscribe, icon: 'icon-subscribe', act: fTpl.format('SubscribeRecipe', id, '0')}); - + if (id.startsWith("upload:")) { //增加汉堡按钮弹出菜单代码 + hamb_arg.push({klass: 'btn-A', title: i18n.delete, icon: 'icon-delete', act: fTpl.format('DeleteUploadRecipe', id, title)}); + } row_str.push(AddHamburgerButton(hamb_arg)); row_str.push(''); var new_item = $(row_str.join('')); diff --git a/application/templates/book_audiolator.html b/application/templates/book_audiolator.html index 2fb7d01b..45274d81 100644 --- a/application/templates/book_audiolator.html +++ b/application/templates/book_audiolator.html @@ -39,7 +39,7 @@
- +
- +