mdict supports external css

cdhigh · Jun 20, 2024 · b2da218 · b2da218
1 parent 49cf0bd
commit b2da218
Show file tree

Hide file tree

Showing 2 changed files with 53 additions and 38 deletions.
diff --git a/application/lib/dictionary/mdict/mdict.py b/application/lib/dictionary/mdict/mdict.py
@@ -1,9 +1,6 @@
 #!/usr/bin/env python3
 # -*- coding:utf-8 -*-
 #mdx离线词典接口
-#!/usr/bin/env python3
-# -*- coding:utf-8 -*-
-#stardict离线词典支持
 import os
 from bs4 import BeautifulSoup
 from application.utils import xml_escape
@@ -100,7 +97,7 @@ def get(self, word):
         word = word.lower().strip()
         #和mdict官方应用一样，输入:about返回词典基本信息
         if word == ':about':
-            return self.dictHtmlInfo()
+            return self.dict_html_info()
 
         indexes = self.trie[word] if word in self.trie else None
         ret = self.get_content_by_Index(indexes)
@@ -144,8 +141,9 @@ def post_process(self, content):
             tag.extract()
 
         self.adjust_css(soup)
-        self.inline_css(soup)
+        self.justify_css_path(soup)
         #self.remove_empty_tags(soup)
+        #self.convert_dict_tag(soup)
 
         #mdict质量良莠不齐，有些词典在html/body外写释义
         #所以不能直接提取body内容
@@ -169,58 +167,68 @@ def adjust_css(self, soup):
                     del newStyle['height']
                 element['style'] = "; ".join(f"{k}: {v}" for k, v in newStyle.items())
 
-    #将外部单独css文件的样式内联到html标签中
-    def inline_css(self, soup):
+    def justify_css_path(self, soup):
+        dictDir = os.environ.get('DICTIONARY_DIR')
+        if not dictDir or not os.path.exists(dictDir):
+            return
+
         link = soup.find('link', attrs={'rel': 'stylesheet', 'href': True})
         if link:
-            link.extract()
-        return #碰到稍微复杂一些的CSS文件性能就比较低下，暂时屏蔽对CSS文件的支持
+            link['href'] = '/reader/css/' + link['href']
+
+    #将外部单独css文件的样式内联到html标签中，现在不使用了，直接修改css链接
+    def inline_css(self, soup):
+        link = soup.find('link', attrs={'rel': 'stylesheet', 'href': True})
+        if not link:
+            return
 
         link.extract()
-        css = ''
-        link = os.path.join(os.path.dirname(self.mdxFilename), link['href']) #type:ignore
-        if os.path.exists(link):
-            with open(link, 'r', encoding='utf-8') as f:
+        cssPath = os.path.join(os.path.dirname(self.mdxFilename), link['href'])  # type: ignore
+        if not os.path.exists(cssPath):
+            return
+        try:
+            with open(cssPath, 'r', encoding='utf-8') as f:
                 css = f.read().strip()
-
-        if not css:
+        except:
             return
 
-        parsed = {} #css文件的样式字典
-        cssRules = []
-
-        import css_parser
+        import css_parser #大部分词典都没有外挂css，可以尽量晚的引入css_parser
         parser = css_parser.CSSParser()
         try:
             stylesheet = parser.parseString(css)
             cssRules = list(stylesheet.cssRules)
         except Exception as e:
             default_log.warning(f'parse css failed: {self.mdxFilename}: {e}')
             return
-
+
+        parsed = {}
         for rule in cssRules:
             if rule.type == rule.STYLE_RULE:
-                selector = rule.selectorText
-                if ':' in selector: #伪元素
-                    continue
+                #css_parser对伪元素支持不好，要去掉
+                selectors = [item.strip() for item in rule.selectorText.split(',') 
+                    if (':' not in item) and item.strip()]
                 styles = {}
                 for style in rule.style:
                     if style.name != 'height':
                         styles[style.name] = style.value
-                parsed[selector] = styles
+                for selector in selectors:
+                    parsed[selector] = styles
 
-        #内联样式
-        for selector, styles in parsed.items():
-            try:
-                elements = soup.select(selector)
-            except NotImplementedError as e:
-                default_log.debug(f"Skipping unsupported selector: {selector}")
-                continue
-            for element in elements:
-                existing = element.get('style', '')
-                newStyle = dict(item.split(":") for item in existing.split(";") if item)
-                newStyle.update(styles)
-                element['style'] = "; ".join(f"{k}: {v}" for k, v in newStyle.items())
+        def apply_styles(tag, styles):
+            existing = tag.get('style', '')
+            newStyle = dict(item.split(":") for item in existing.split(";") if item)
+            newStyle.update(styles)
+            tag['style'] = ";".join(f"{k}:{v}" for k, v in newStyle.items())
+
+        for tag in soup.find_all(True):
+            tagStyles = parsed.get(tag.name, {})
+            classStyles = {}
+            if tag.has_attr('class'):
+                for item in tag['class']:
+                    classStyles.update(parsed.get(f'.{item}', {}))
+
+            idStyles = parsed.get(f'#{tag["id"]}', {}) if tag.has_attr('id') else {}
+            apply_styles(tag, {**tagStyles, **classStyles, **idStyles})
 
     #删除空白元素
     def remove_empty_tags(self, soup, preserve_tags=None):
@@ -237,7 +245,7 @@ def remove_empty_tags(self, soup, preserve_tags=None):
             tag.decompose()
 
     #返回当前词典的基本信息，html格式
-    def dictHtmlInfo(self):
+    def dict_html_info(self):
         ret = []
         header = self.mdx.header.copy()
         ret.append('<strong>{}</strong><hr/>'.format(header.pop('Title', '')))

diff --git a/application/view/reader.py b/application/view/reader.py
@@ -253,6 +253,13 @@ def ReaderDictPost(user: KeUser, userDir: str):
     return {'status': 'ok', 'word': word, 'definition': definition, 
         'dictname': str(inst), 'others': others}
 
+#获取词典外挂的CSS
+@bpReader.route("/reader/css/<path:path>", endpoint='ReaderDictCssRoute')
+@login_required()
+def ReaderDictCssRoute(path: str, user: KeUser):
+    dictDir = app.config['DICTIONARY_DIR']
+    return send_from_directory(dictDir, path) if dictDir and os.path.exists(dictDir) else ''
+
 #构建Hunspell实例
 #language: 语种代码，只有前两个字母
 def InitHunspell(language):
@@ -311,7 +318,7 @@ def GetWordSuggestions(hObj, word) -> list:
         return []
 
     try:
-        return [(s.decode('utf-8') if isinstance(s, bytes) else s) for s in hObj.suggest(word)]
+        return [s for s in hObj.suggest(word) if s != word]
     except Exception as e:
         print(e)
         return []