From e9088942e7f28a4a27c19dd941c452872bdb758d Mon Sep 17 00:00:00 2001
From: cdhigh <cdhigh@users.noreply.github.com>
Date: Sat, 9 Nov 2024 08:29:36 -0300
Subject: [PATCH] minor improvements for translator

---
 .../lib/ebook_translator/engines/base.py       |  1 +
 .../lib/ebook_translator/html_translator.py    | 18 +++++++++++++-----
 application/static/reader.js                   |  3 ++-
 3 files changed, 16 insertions(+), 6 deletions(-)
diff --git a/application/lib/ebook_translator/engines/base.py b/application/lib/ebook_translator/engines/base.py
index 19720468..717fbf39 100644
--- a/application/lib/ebook_translator/engines/base.py
+++ b/application/lib/ebook_translator/engines/base.py
@@ -22,6 +22,7 @@ class Base:
     request_attempt = 3
     request_timeout = 10.0
     max_error_count = 10
+    max_len_per_request = 3000
 
     def __init__(self, config=None):
         self.source_lang = None #语种显示的名字
diff --git a/application/lib/ebook_translator/html_translator.py b/application/lib/ebook_translator/html_translator.py
index 3aa5a37d..9369bbad 100644
--- a/application/lib/ebook_translator/html_translator.py
+++ b/application/lib/ebook_translator/html_translator.py
@@ -100,10 +100,11 @@ def translate_soup(self, soup):
     #提取soup包含文本的节点，返回一个列表 [(tag, text),...]
     def extract_soup_text(self, soup):
         elements = []
+        maxLen = self.translator.max_len_per_request
 
         #确定soup节点是否直接包含文本元素
         def _contains_text(tag):
-            if (tag.name == 'table' or tag.string is not None or 
+            if ((tag.name == 'table') or (tag.string is not None) or 
                 [x for x in tag.children if isinstance(x, NavigableString) and str(x).strip()]):
                 return True
             return False
@@ -113,21 +114,28 @@ def _tag_is_filtered(tag):
             return tag.name in ('pre', 'code', 'abbr', 'style', 'script', 'textarea',
                 'input', 'select', 'link', 'img', 'option', 'datalist')
 
+        #判断节点没有子标签节点，只有文本
+        def _tag_has_only_text(tag):
+            return all(isinstance(e, NavigableString) for e in tag.children)
+
         #递归函数，用于遍历BeautifulSoup元素的所有子节点并提取文本内容
         #tag: 开始的BeautifulSoup元素
         #position: 翻译后的文本显示的位置
         def _extract(tag, position):
             for child in tag.find_all(recursive=False):
                 if _contains_text(child) and not _tag_is_filtered(child):
-                    text = str(child).strip() if position == 'replace' else child.get_text()
-                    elements.append((child, text))
+                    text = str(child).strip() if position == 'replace' else child.get_text().strip()
+                    if text and _tag_has_only_text(child) or len(text) < maxLen:
+                        elements.append((child, text))
+                        continue
+
                     #if text:
                     #    #因为非AI翻译容易误翻译超链接里面的内容，所以这里去掉超链接
                     #    if position != 'replace' and '<a' in text:
                     #        text = re.sub(r'<a\b[^>]*>', '<u>', text)
                     #        text = text.replace('</a>', '</u>')
-                else:
-                    _extract(child, position)
+                
+                _extract(child, position)
 
         position = self.params.get('position', 'below')
         _extract(soup.body, position)
diff --git a/application/static/reader.js b/application/static/reader.js
index e24f7633..eca7f37d 100644
--- a/application/static/reader.js
+++ b/application/static/reader.js
@@ -824,11 +824,12 @@ function populateBooks(expandLevel) {
         if (!article || !article.src || !article.title) {
           continue;
         }
+        var sTitle = article.title.replace(/"/g, '&quot;');
         ostr.push(
              '<div class="nav-title" data-src="' + article.src +'">' +
                 '<div>' +
                   articleSvgIcon() +
-                  '<span class="nav-title-text">' + article.title + '</span>' +
+                  '<span class="nav-title-text" title="' + sTitle + '">' + article.title + '</span>' +
                 '</div>' +
               '</div>');
       }