Skip to content

Commit

Permalink
improve ai summarizer
Browse files Browse the repository at this point in the history
  • Loading branch information
cdhigh committed Nov 18, 2024
1 parent 36eb080 commit f87d4be
Show file tree
Hide file tree
Showing 13 changed files with 202 additions and 194 deletions.
54 changes: 26 additions & 28 deletions application/lib/ebook_summarizer/html_summarizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
#Author: cdhigh <https://github.com/cdhigh>
import re, time
import simple_ai_provider
from application.utils import loc_exc_pos

def get_summarizer_engines():
return simple_ai_provider._PROV_AI_LIST
Expand Down Expand Up @@ -32,22 +33,22 @@ def summarize_text(self, text):
if chunkSize < 2000:
chunkSize = 2000

summarySize = self.params.get('summary_size', 200)
words = self.params.get('summary_words', 200)
summary = ''
errMsg = ''
lang = self.params.get('summary_lang', '')
if lang:
summaryTips = (f"Summarize the following text in {lang}. The summary should accurately represent the content "
f"and be no more than {summarySize} words:\n\n")
f"and be no more than {words} words:\n\n")
else:
summaryTips = (f"Summarize the following text in the same language as the original text. The summary should accurately represent the content "
f"and be no more than {summarySize} words:\n\n")
f"and be no more than {words} words:\n\n")

text = re.sub(r'<[^>]+>', '', text)[:chunkSize]
#try:
summary = self.aiAgent.chat(f"{summaryTips}{text}")
#except Exception as e:
#errMsg = str(e)
try:
summary = self.aiAgent.chat(f"{summaryTips}{text}")
except Exception as e:
errMsg = str(e)

return {'error': errMsg, 'summary': summary}

Expand All @@ -71,40 +72,37 @@ def summarize_soup(self, soup, chunkSize=None, maxIterations=5):
#将文本分块,这个分块比较粗糙,可能按照段落分块会更好,但是考虑到AI的适应能力比较强,
#并且仅用于生成摘要,所以这个简单方案还是可以接受的
chunks = [text[i:i + chunkSize] for i in range(0, len(text), chunkSize)]
summarySize = self.params.get('summary_size', 200)
words = self.params.get('summary_words', 0) or 200
interval = self.engineProperty.get('request_interval', 0)
summary = None

summaryTips = self.params.get('custom_prompt', '')
lang = self.params.get('summary_lang', '')
if lang:
summaryTips = f"Please refine or update the summary based on the following text block, ensuring the summary is in the language: {lang}, and make it more accurately reflect the article content:\n\n"
if summaryTips: #使用自定义prompt
summaryTips = summaryTips.replace('{lang}', lang).replace('{words}', str(words))
elif lang:
summaryTips = f"Please improve and update the existing summary of the following text block(s), ensuring the summary is written in the language of {lang}. The updated summary should accurately reflect the content while distilling key points, arguments, and conclusions, and should not exceed {words} words:"
else:
summaryTips = f"Please refine or update the summary based on the following text block, ensuring the summary is in the same language as the article/preset summary, and make it more accurately reflect the article content:\n\n"
errMsg = ''
summaryTips = f"Please improve and update the existing summary of the following text block(s), ensuring it is in the same language as the article and preset summary, while accurately reflecting the content and distilling key points, arguments, and conclusions. The updated summary should not exceed {words} words:"

summary = None
for i, chunk in enumerate(chunks[:maxIterations]):
prompt = (
f"The current summary is:\n{summary}\n\n{summaryTips}"
f"Text block {i + 1}:\n{chunk}\n\n"
f"Please generate an updated summary of no more than {summarySize} words."
)
prompt = f"Existing summary:\n{summary}\n\n{summaryTips}\n\nText block {i + 1}:\n{chunk}\n\n"

try:
summary = self.aiAgent.chat(prompt)
except Exception as e:
errMsg = str(e)
break
except:
default_log.info(loc_exc_pos('Error in summary_soup'))
return

if interval > 0:
time.sleep(interval)

if errMsg:
default_log.info(f'Error in summary_soup: {errMsg}')
return

#将摘要插在文章标题之后
summaryTag = soup.new_tag('p', attrs={'class': 'ai_generated_summary'})
summaryTag = soup.new_tag('p', attrs={'class': 'ai_generated_summary',
'data-aiagent': str(self.aiAgent)})
style = self.params.get('summary_style', '')
if style:
summaryTag['style'] = style
b = soup.new_tag('b')
b = soup.new_tag('b', attrs={'class': 'ai_summary_hint'})
b.string = 'AI-Generated Summary: '
summaryTag.append(b)
summaryTag.append(summary)
Expand Down
65 changes: 19 additions & 46 deletions application/lib/simple_ai_provider.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,15 +19,15 @@
'Openai': {
'models': ['GPT-4o mini', 'GPT-4o', 'GPT-4 Turbo', 'gpt-3.5-turbo', 'GPT-3.5 Turbo Instruct'],
'request_interval': 10,
'context_size': 4096},
'context_size': 4000},
'Anthropic': {
'models': ['claude-2', 'claude-3', 'claude-1'],
'request_interval': 6,
'context_size': 100000},
'Grok': {
'models': ['grok-beta'],
'request_interval': 6,
'context_size': 4096},
'context_size': 4000},
'Mistral': {
'models': ['open-mistral-7b', 'mistral-small-latest', 'open-mixtral-8x7b', 'open-mixtral-8x22b', 'mistral-small-2402',
'mistral-small-2409', 'mistral-medium', 'mistral-large-2402', 'mistral-large-2407',
Expand All @@ -38,20 +38,15 @@
'models': ['gemma2-9b-it', 'gemma-7b-it', 'llama-guard-3-8b', 'llama3-70b-8192', 'llama3-8b-8192',
'mixtral-8x7b-32768'],
'request_interval': 2,
'context_size': 8192},

# 'cohere': {
# 'models': ['command-xlarge-nightly'],
# 'request_interval': 6,
# 'context_size': 2048},
# 'alibaba': {
# 'models': ['tongyi-qianwen-base'],
# 'request_interval': 6,
# 'context_size': 4096},
# 'baidu': {
'context_size': 8000},
'Alibaba': {
'models': ['qwen-turbo', 'qwen-plus', 'qwen-long'],
'request_interval': 1,
'context_size': 130000},
# 'Baidu': {
# 'models': ['ernie-bot'],
# 'request_interval': 6,
# 'context_size': 4096},
# 'context_size': 4000},
}

class SimpleAiProvider:
Expand All @@ -63,6 +58,9 @@ def __init__(self, name, api_key, model=None, api_host=None):
self.api_host = api_host
self.opener = UrlOpener()

def __repr__(self):
return f'{self.name}({self.model})'

#返回支持的AI供应商列表,返回一个python字典
def ai_list(self):
return _PROV_AI_LIST
Expand All @@ -84,11 +82,9 @@ def chat(self, message):
return self._mistral_chat(message)
elif name == 'Groq':
return self._groq_chat(message)
# elif name == "cohere":
# return self._cohere_chat(message)
# elif name == "alibaba":
# return self._alibaba_chat(message)
# elif name == "baidu":
elif name == "Alibaba":
return self._alibaba_chat(message)
# elif name == "Baidu":
# return self._baidu_chat(message)
else:
raise ValueError(f"Unsupported provider: {name}")
Expand Down Expand Up @@ -135,19 +131,6 @@ def _gemini_chat(self, message):
contents = response.json()["candidates"][0]["content"]
return contents['parts'][0]['text']

#cohere的chat接口
def _cohere_chat(self, message):
url = self.api_host if self.api_host else 'https://api.cohere.ai/v1/generate'
headers = {"Authorization": f"Bearer {self.api_key}"}
payload = {
"model": self.model or _PROV_AI_LIST['cohere']['models'][0],
"text": message,
"max_tokens": 300
}
response = self.opener.post(url, headers=headers, json=payload)
response.raise_for_status()
return response.json()["generations"][0]["text"]

#grok的chat接口
def _grok_chat(self, message):
#直接使用openai兼容接口
Expand All @@ -163,21 +146,11 @@ def _groq_chat(self, message):
#直接使用openai兼容接口
return self._openai_chat(message, defaultUrl='https://api.groq.com/openai/v1/chat/completions')

#通义千问
def _alibaba_chat(self, message):
url = self.api_host if self.api_host else 'https://api.aliyun.com/v1/ai/chat'
headers = {
"Authorization": f"Bearer {self.api_key}",
"Content-Type": "application/json"
}
payload = {
"model": self.model or _PROV_AI_LIST['alibaba']['models'][0],
"messages": [{"role": "user", "content": message}] if isinstance(message, str) else message,
"max_tokens": 300
}
response = self.opener.post(url, headers=headers, json=payload)
response.raise_for_status()
return response.json()["choices"][0]["content"]

#直接使用openai兼容接口
return self._openai_chat(message, defaultUrl='https://dashscope.aliyuncs.com/compatible-mode/v1/chat/completions')

def _baidu_chat(self, message):
url = self.api_host if self.api_host else 'https://aip.baidubce.com/rpc/2.0/ai_custom/v1/ernie-bot'
headers = {"Content-Type": "application/json"}
Expand Down
4 changes: 2 additions & 2 deletions application/lib/urlopener.py
Original file line number Diff line number Diff line change
Expand Up @@ -106,8 +106,8 @@ def post(self, *args, **kwargs):
def open_remote_url(self, url, data, headers, timeout, method, **kwargs):
timeout = timeout if timeout else self.timeout
headers = self.get_headers(url, headers)
jsonData = kwargs.get('json', None)
method = 'POST' if (data or jsonData) and (method != 'GET') else 'GET'
if not method:
method = 'POST' if (data or kwargs.get('json', None)) else 'GET'
url = self.build_url(url, data, method)
if method == 'GET':
req_func = self.session.get #type:ignore
Expand Down
76 changes: 40 additions & 36 deletions application/static/base.js
Original file line number Diff line number Diff line change
Expand Up @@ -108,65 +108,68 @@ function RegisterHideHambClick() {

//连接服务器获取内置recipe列表,并按照语言建立一个字典all_builtin_recipes,字典键为语言,值为信息字典列表
function FetchBuiltinRecipesXml() {
var hasUserLangRss = false;
var hasEnRss = false;
//添加上传的recipe的语言代码
var langPick = $("#language_pick");
my_uploaded_recipes.forEach(item => {
var lang = item['language'];
if (lang && !all_builtin_recipes[lang]) {
all_builtin_recipes[lang] = [];
langPick.append($('<option value="{0}">{1}</option>'.format(lang, LanguageName(lang))));
}
});

//这个是静态文件,flask和浏览器会通过etag来自动使用本地缓存
$.get('/recipes/builtin_recipes.xml', function(xml) {
//这里面的代码是异步执行的
var hasUserLangRss = false;
var hasEnRss = false;
var userLang = BrowserLanguage();
var langPick = $("#language_pick");
$(xml).find("recipe").each(function() {
var title=$(this).attr("title");
var language=$(this).attr("language").toLowerCase();
var subs=$(this).attr("needs_subscription");
var title = $(this).attr("title");
var lang = $(this).attr("language").toLowerCase();
var subs = $(this).attr("needs_subscription");
subs = ((subs == 'yes') || (subs == 'optional')) ? true : false;
var description=$(this).attr("description").substring(0, 200);
var id=$(this).attr("id");
var description = $(this).attr("description").substring(0, 200);
var id = $(this).attr("id");

//忽略各国语言方言,仅取'_'前的部分
language = language.replace('-', '_');
var dashIndex = language.indexOf('_');
lang = lang.replace('-', '_');
var dashIndex = lang.indexOf('_');
if (dashIndex != -1) {
language = language.substring(0, dashIndex);
lang = lang.substring(0, dashIndex);
}
if (language == userLang) {
if (lang == userLang) {
hasUserLangRss = true;
}
if (language == 'en') {
if (lang == 'en') {
hasEnRss = true;
}

if (!all_builtin_recipes[language]) {
all_builtin_recipes[language] = [];
var $newLangOpt = $('<option value="{0}">{1}</option>'.format(language, LanguageName(language)));
$("#language_pick").append($newLangOpt);
if (!all_builtin_recipes[lang]) {
all_builtin_recipes[lang] = [];
langPick.append($('<option value="{0}">{1}</option>'.format(lang, LanguageName(lang))));
}
all_builtin_recipes[language].push({title: title, description: description, needs_subscription: subs, id: id});
all_builtin_recipes[lang].push({title: title, description: description, needs_subscription: subs, id: id});
});

//自动触发和用户浏览器同样语种的选项
var langItem;
if (hasUserLangRss) {
$("#language_pick").find("option[value='{0}']".format(userLang)).attr("selected", true);
$("#language_pick").val(userLang).trigger('change');
langItem = langPick.find("option[value='{0}']".format(userLang));
} else if (hasEnRss) { //如果有英语则选择英语源
$("#language_pick").find("option[value='en']").attr("selected", true);
$("#language_pick").val('en').trigger('change');
langItem = langPick.find("option[value='en']");
} else { //最后只能选择第一个语言
var firstChild = $("#language_pick").children().first();
firstChild.attr("selected", true);
firstChild.trigger('change');
langItem = $("#language_pick").children().first();
}
if (langItem) {
langItem.attr("selected", true);
langItem.trigger('change');
}
}).fail(function(jqXHR, textStatus, errorThrown) {
console.log("Failed to fetch '/recipes/builtin_recipes.xml': " + errorThrown);
});

//添加上传的recipe中存在,但是内置库不存在的语言代码
my_uploaded_recipes.forEach(item => {
var language = item['language'];
if (language && !all_builtin_recipes[language]) {
all_builtin_recipes[language] = [];
var $newLangOpt = $('<option value="{0}">{1}</option>'.format(language, LanguageName(language)));
$("#language_pick").append($newLangOpt);
}
});

PopulateLibrary('');
}

Expand Down Expand Up @@ -229,13 +232,14 @@ function AppendRecipeToLibrary(div, id) {
hamb_arg = [];
var fTpl = "{0}('{1}','{2}')";
if (id.startsWith("upload:")) { //增加汉堡按钮弹出菜单代码
hamb_arg.push({klass: 'btn-A', title: i18n.delete, icon: 'icon-delete', act: fTpl.format('DeleteUploadRecipe', id, title)});
hamb_arg.push({klass: 'btn-E', title: i18n.share, icon: 'icon-share', act: fTpl.format('StartShareRss', id, title)});
}
hamb_arg.push({klass: 'btn-B', title: i18n.viewSrc, icon: 'icon-source', act: "/viewsrc/" + id.replace(':', '__')});
hamb_arg.push({klass: 'btn-C', title: i18n.subscriSep, icon: 'icon-push', act: fTpl.format('SubscribeRecipe', id, '1')});
hamb_arg.push({klass: 'btn-D', title: i18n.subscribe, icon: 'icon-subscribe', act: fTpl.format('SubscribeRecipe', id, '0')});

if (id.startsWith("upload:")) { //增加汉堡按钮弹出菜单代码
hamb_arg.push({klass: 'btn-A', title: i18n.delete, icon: 'icon-delete', act: fTpl.format('DeleteUploadRecipe', id, title)});
}
row_str.push(AddHamburgerButton(hamb_arg));
row_str.push('</div>');
var new_item = $(row_str.join(''));
Expand Down
2 changes: 1 addition & 1 deletion application/templates/book_audiolator.html
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@
</div>
<div class="pure-control-group" id="tts_api_host">
<label>{{_("Api Host")}}</label>
<input type="text" name="api_host" id="tts_api_host_input" value="{{api_host}}" placeholder="{{_('Empty to use default endpoint')}}" class="pure-u-1 pure-u-sm-1-2" />
<input type="text" name="api_host" id="tts_api_host_input" value="{{api_host}}" placeholder="{{_('Leave empty to use default')}}" class="pure-u-1 pure-u-sm-1-2" />
</div>
<div class="pure-control-group" id="tts_region_div">
<label>
Expand Down
Loading

0 comments on commit f87d4be

Please sign in to comment.