minor improvements for translator

cdhigh · Nov 9, 2024 · e908894 · e908894
1 parent c8ea173
commit e908894
Show file tree

Hide file tree

Showing 3 changed files with 16 additions and 6 deletions.
diff --git a/application/lib/ebook_translator/engines/base.py b/application/lib/ebook_translator/engines/base.py
@@ -22,6 +22,7 @@ class Base:
     request_attempt = 3
     request_timeout = 10.0
     max_error_count = 10
+    max_len_per_request = 3000
 
     def __init__(self, config=None):
         self.source_lang = None #语种显示的名字

diff --git a/application/lib/ebook_translator/html_translator.py b/application/lib/ebook_translator/html_translator.py
@@ -100,10 +100,11 @@ def translate_soup(self, soup):
     #提取soup包含文本的节点，返回一个列表 [(tag, text),...]
     def extract_soup_text(self, soup):
         elements = []
+        maxLen = self.translator.max_len_per_request
 
         #确定soup节点是否直接包含文本元素
         def _contains_text(tag):
-            if (tag.name == 'table' or tag.string is not None or 
+            if ((tag.name == 'table') or (tag.string is not None) or 
                 [x for x in tag.children if isinstance(x, NavigableString) and str(x).strip()]):
                 return True
             return False
@@ -113,21 +114,28 @@ def _tag_is_filtered(tag):
             return tag.name in ('pre', 'code', 'abbr', 'style', 'script', 'textarea',
                 'input', 'select', 'link', 'img', 'option', 'datalist')
 
+        #判断节点没有子标签节点，只有文本
+        def _tag_has_only_text(tag):
+            return all(isinstance(e, NavigableString) for e in tag.children)
+
         #递归函数，用于遍历BeautifulSoup元素的所有子节点并提取文本内容
         #tag: 开始的BeautifulSoup元素
         #position: 翻译后的文本显示的位置
         def _extract(tag, position):
             for child in tag.find_all(recursive=False):
                 if _contains_text(child) and not _tag_is_filtered(child):
-                    text = str(child).strip() if position == 'replace' else child.get_text()
-                    elements.append((child, text))
+                    text = str(child).strip() if position == 'replace' else child.get_text().strip()
+                    if text and _tag_has_only_text(child) or len(text) < maxLen:
+                        elements.append((child, text))
+                        continue
+
                     #if text:
                     #    #因为非AI翻译容易误翻译超链接里面的内容，所以这里去掉超链接
                     #    if position != 'replace' and '<a' in text:
                     #        text = re.sub(r'<a\b[^>]*>', '<u>', text)
                     #        text = text.replace('</a>', '</u>')
-                else:
-                    _extract(child, position)
+
+                _extract(child, position)
 
         position = self.params.get('position', 'below')
         _extract(soup.body, position)

diff --git a/application/static/reader.js b/application/static/reader.js
@@ -824,11 +824,12 @@ function populateBooks(expandLevel) {
         if (!article || !article.src || !article.title) {
           continue;
         }
+        var sTitle = article.title.replace(/"/g, '&quot;');
         ostr.push(
              '<div class="nav-title" data-src="' + article.src +'">' +
                 '<div>' +
                   articleSvgIcon() +
-                  '<span class="nav-title-text">' + article.title + '</span>' +
+                  '<span class="nav-title-text" title="' + sTitle + '">' + article.title + '</span>' +
                 '</div>' +
               '</div>');
       }