some bugfix

cdhigh · Jun 13, 2024 · e0d9a64 · e0d9a64
1 parent 1a5e792
commit e0d9a64
Show file tree

Hide file tree

Showing 9 changed files with 157 additions and 100 deletions.
diff --git a/application/lib/calibre/web/feeds/__init__.py b/application/lib/calibre/web/feeds/__init__.py
@@ -12,6 +12,7 @@
 import traceback
 import json
 import datetime
+import html
 
 from calibre import entity_to_unicode, force_unicode, strftime
 from calibre.utils.cleantext import clean_ascii_chars, clean_xml_chars
@@ -249,6 +250,11 @@ def parse_article(self, item):
             content = None
         if not link and not content:
             return
+
+        if description:
+            description = html.unescape(description)
+        if content:
+            content = html.unescape(content)
         article = Article(id, title, link, author, description, published, content)
         delta = utcnow() - article.utctime
         if (self.oldest_article == 0) or (delta.days*24*3600 + delta.seconds <= 24*3600*self.oldest_article):

diff --git a/application/lib/dictionary/pystardict.py b/application/lib/dictionary/pystardict.py
@@ -1,8 +1,14 @@
 #!/usr/bin/env python3
 # -*- coding:utf-8 -*-
 #stardict词典支持，基于 <https://github.com/lig/pystardict> 修改
-import os, re, gzip, logging
+import os, re, logging
 from struct import unpack
+try:
+    import indexed_gzip as igzip
+except:
+    import gzip
+    igzip = None
+
 try:
     import marisa_trie
 except:
@@ -404,8 +410,34 @@ def __getitem__(self, word) -> bytes:
         """
         cords = self._container.idx[word]
         self._file.seek(cords[0])
-        return self._file.read(cords[1]) #type:ignore
-
+        data = self._file.read(cords[1]) #type:ignore
+        ret = {}
+        typeSeq = self._container.ifo.sametypesequence
+        seqLen = len(typeSeq)
+        if seqLen:
+            for k, type_ in enumerate(typeSeq):
+                if type_ in "mlgtxykwhnr": #文本
+                    if k >= seqLen - 1: #最后一个数据段
+                        ret[type_] = data
+                    else:
+                        ret[type_], _, data = data.partition(b'\0') #type:ignore
+                else: #音频图像，暂不支持
+                    #开头一个网络字节序的32位整数指示实际数据长度
+                    size = unpack("!L", data[:4]) #type:ignore
+                    #ret[type_] = data[4:size + 4] #type:ignore
+                    data = data[size + 4:]
+        else:
+            while data:
+                type_ = unpack("!c", data[:1]) #type:ignore
+                if type_ in "mlgtxykwhnr": #type:ignore
+                    ret[type_], _, data = data.partition(b'\0') #type:ignore
+                else: #音频图像，暂不支持
+                    size = unpack("!L", data[:4]) #type:ignore
+                    #ret[type_] = data[4:size + 4] #type:ignore
+                    data = data[size + 4:]
+
+        return b''.join(ret.values())
+
     def __contains__(self, word):
         return word in self._container.idx
 
@@ -435,7 +467,7 @@ def open_file(regular, gz):
     #但是它提供一个表可以用来在文件中随机访问压缩块。
     if os.path.exists(gz):
         try:
-            return gzip.open(gz, 'rb')
+            return igzip.IndexedGzipFile(gz) if igzip else gzip.open(gz, 'rb') #type:ignore
         except Exception as e:
             raise Exception('gz file opening error: "{}"'.format(e))
 

diff --git a/application/lib/dictionary/stardict.py b/application/lib/dictionary/stardict.py
@@ -39,4 +39,10 @@ def __repr__(self):
 
     def definition(self, word, language=''):
         ret = self.dictionary.get(word) if self.dictionary else ''
-        return ret.decode('utf-8') if isinstance(ret, bytes) else ret
+        if isinstance(ret, bytes):
+            ret = ret.decode('utf-8')
+        lines = [line.strip() for line in str(ret).split('\n') if line.strip()]
+        if lines and lines[0] in (word, f'<b>{word}</b>'):
+            lines = lines[1:]
+        return '\n'.join(lines)
+
diff --git a/application/lib/urlopener.py b/application/lib/urlopener.py
@@ -102,7 +102,8 @@ def open_remote_url(self, url, data, headers, timeout, method, **kwargs):
             req_func = self.session.post #type:ignore
 
         try:
-            resp = req_func(url, data=data, headers=headers, timeout=timeout, allow_redirects=True, **kwargs)
+            resp = req_func(url, data=data, headers=headers, timeout=timeout, allow_redirects=True, 
+                verify=False, **kwargs)
         except:
             resp = requests.models.Response()
             resp.status_code = 555

diff --git a/application/recipes/shared_rss.json b/application/recipes/shared_rss.json
@@ -0,0 +1 @@
+{"library_url": "https://cdhigh.serv00.net/", "rss": []}
diff --git a/application/static/library.js b/application/static/library.js
@@ -220,7 +220,7 @@ function CreatePageContent(category, page) {
     //汉堡按钮弹出菜单代码
     var dbId = item.r || '';
     let title = encodeJsSafeStr(item.t);
-    var repAct = "ReportInvalid('`{0}','{1}','{2}')".format(title, item.u, dbId);
+    var repAct = "ReportInvalid('{0}','{1}','{2}')".format(title, item.u, dbId);
     var subsAct = "SubscribeSharedFeed('{0}','{1}','{2}','{3}',{4})";
     hamb_arg.push({klass: 'btn-A', title: i18n.invalidReport, icon: 'icon-offcloud', act: repAct});
     hamb_arg.push({klass: 'btn-C', title: i18n.subscriSep, icon: 'icon-push', act: 

diff --git a/application/view/adv.py b/application/view/adv.py
@@ -373,7 +373,11 @@ def AdvDeleteCssAjaxPost(user: KeUser):
 def AdvDict(user: KeUser):
     from dictionary import all_dict_engines
     #[{language:,engine:,database:,},]
-    dictParams = user.cfg('reader_params').get('dicts', [])
+    dictParams = user.cfg('reader_params').get('dicts', [{},{},{}])
+    if not isinstance(dictParams, list):
+        dictParams = [{},{},{}]
+    while len(dictParams) < 3:
+        dictParams.append({})
 
     engines = {name: {'databases': klass.databases} for name,klass in all_dict_engines.items()}
     return adv_render_template('adv_dict.html', 'dictionary', user=user, engines=engines, 

diff --git a/application/view/library_offical.py b/application/view/library_offical.py
@@ -11,7 +11,7 @@
 from ..back_end.db_models import *
 
 #几个"官方"服务的地址
-KINDLEEAR_SITE = "https://reador.appspot.com"
+KINDLEEAR_SITE = "https://cdhigh.serv00.net"
 LIBRARY_KINDLEEAR = "/kelibrary"
 LIBRARY_GETRSS = "getrss"
 LIBRARY_GETLASTTIME = "latesttime"

diff --git a/tests/readme.developer.md b/tests/readme.developer.md
@@ -1,106 +1,28 @@
 # KindleEar开发者备忘录
 
-# 本地环境构建和调试
-  1. 安装标准环境google cloud SDK/gloud CLI，并且执行 gcloud init
-  2. 安装依赖 `pip install requirements.txt`
-  3. 使用命令打开调试环境
-     `c:\python38\python.exe "C:\Program Files (x86)\Google\Cloud SDK\google-cloud-sdk\bin\dev_appserver.py" --runtime_python_path="python27=c:\python27\python.exe,python3=c:\python38\python.exe"  --skip_sdk_update_check=true app.yaml worker.yaml`
-     `--support_datastore_emulator=true`
-     dev_appserver.py --runtime_python_path=c:\python38\python.exe --application=kindleear5 app.yaml
-
-  2. 即使在本机，GAE应用也运行在一个沙箱内，无法读写本机文件，如果要突破，可以修改 stubs.py 里面的 FakeFile 类。
-     * 删除__init__()
-     * is_file_accessible() 无条件返回 FakeFile.Visibility.OK
-     * stubs.py默认位置：C:\Program Files (x86)\Google\Cloud SDK\google-cloud-sdk\platform\google_appengine\google\appengine\tools\devappserver2\python\runtime\stubs.py
-  3. datastore如果连接不上模拟器，一直使用远端数据库，可以手动修改 site-packages\google\cloud\datastore\client.py
-     Client.__init__()，将 emulator_host 修改为 'localhost:8081'
-
-# [google cloud datastore本地模拟器](https://cloud.google.com/datastore/docs/tools/datastore-emulator)
-  0. 安装和配置 Java JDK 11+
-  1. [获取凭证](https://cloud.google.com/docs/authentication/application-default-credentials)：
-     `gcloud auth application-default login` #application-default是gcloud命令的参数名，不用修改
-  2. 安装datastore模拟器：`gcloud components install cloud-datastore-emulator`
-  3. 设置环境变量（每次启动模拟器服务前都需要重新设置环境变量）
-     `gcloud beta emulators datastore env-init > set_vars.cmd && set_vars.cmd`
-  4. 启动模拟器服务：`gcloud beta emulators datastore start`
-     默认模拟器数据库文件：local_db.bin
-  5. 如果需要连接到网络数据库，则需要移除环境变量
-     `gcloud beta emulators datastore env-unset > remove_vars.cmd && remove_vars.cmd`
-  6. 这个项目 [DSAdmin](https://github.com/remko/dsadmin) 可以本机管理模拟器数据库
-     `./dsadmin --project=my-datastore-project --datastore-emulator-host=localhost:8081`
-
-gcloud app deploy cron.yaml
-gcloud app deploy queue.yaml
-gcloud services list #current enabled services
-gcloud services list | grep datastore.googleapis.com
-gcloud services enable datastore.googleapis.com
-gcloud services enable tasks.googleapis.com
-gcloud services enable cloudtasks.googleapis.com
-gcloud services enable translate.googleapis.com
-gcloud services enable texttospeech.googleapis.com
-
-#all available services
-gcloud services list --available > services.txt
-
-# Windows 安装celery
-* 安装并启动redis服务，(Windows只能安装redis3 <https://github.com/MicrosoftArchive/redis/releases>)
-* 安装celery，如果是Windows，还需要安装 eventlet
-   > `pip install celery, redis, eventlet`
-* 切换到KindleEar主目录，启动celery服务，main是入口文件的名字: main.py，只有Windows需要参数 '-P eventlet'，需要cmd最大化可以先输入wmic再quit即可
-   > `celery -A main.celery_app worker --loglevel=info --concurrency=2 -P eventlet`
-* celery命令：
-   > `redis-cli.exe -p 6379`
-   > `KEYS *`
-
-# Windows 安装配置 MongoDB
-* 下载安装(注意安装时要取消mongodb compass)，创建一个目录保存数据库文件，比如 c:\mongodb\db和c:\mongodb\log
-* 安装启动服务
-  >`"C:\Program Files\MongoDB\Server\3.6\bin\mongod.exe" --dbpath "c:\mongodb\db" --logpath "c:\mongodb\log\MongoDB.log" --install --serviceName "MongoDB"  --journal`
-  > `net start MongoDB`
-  > `"C:\Program Files\MongoDB\Server\3.6\bin\mongo.exe"`
-  > `db.Book.insert({"name":"1001 nights"})`
-  > `db.Book.find()`
-* 其他命令
-  > `net stop MongoDB`  #停止后台服务
-  > `mongod.exe --remove`  #删除后台服务`
-
-# 电子书简要生成流程
-  build_ebook.ConvertToEbook() -> plumber.run() -> recipe_input.convert() -> news.BasicNewsRecipe.download()
-  plumber.create_oebbook() -> OEBReader.call() -> output_plugin.convert()
-
-# KindleEar额外自带的Python库，这些库不用pip安装，不在requirements.txt里面
-* readability-lxml: 修改了其htmls.py|shorten_title()
-
-# 如果要添加新选项，最好添加到 calibre.customize.conversion.py | InputFormatPlugin | common_options, 
-
-# 关于i18n翻译
-* javascript的翻译没有采用其他复杂或引入其他依赖的方案，而是简单粗暴的在base.html里面将要翻译的字段预先翻译，
-然后保存到一个全局字典对象。
-* 文本字符串有修改后，逐个执行两个脚本。
-第一个脚本提取文本到messages.pot并将文本更新到messages.po，翻译后使用第二个脚本编译为messages.mo
-```bat
-tools\pybabel_extract.bat
-tools\pybabel_compile.bat
-```
-* 翻译空白字符条目 msgstr ""
-* 在po后查找fuzzy，更新翻译后，将fuzzy标识行删除
-
-
 # Docker
 ## 构建镜像
 ```bash
 #using the pre-created builder, build && push
-cd kindleear && \
+cd ~/kindleear && \
 cp ./docker/Dockerfile . && \
 sudo docker buildx use builder && \
-sudo docker buildx build --push --platform=linux/amd64,linux/arm64 -t kindleear/kindleear .
+sudo docker buildx build --push --platform=linux/amd64,linux/arm64 -t kindleear/kindleear . && \
+cd ~
 #or, create a new builder, build && push
-cd kindleear && \
+cd ~/kindleear && \
 cp ./docker/Dockerfile . && \
 sudo docker buildx create --use --name=builder && \
-sudo docker buildx build --push --platform=linux/amd64,linux/arm64 -t kindleear/kindleear .
+sudo docker buildx build --push --platform=linux/amd64,linux/arm64 -t kindleear/kindleear . && \
+cd ~
+#using the pre-created builder, build && output
+cd ~/kindleear && \
+cp ./docker/Dockerfile . && \
+sudo docker buildx use builder && \
+sudo docker buildx build --platform=linux/arm64 -t kindleear/kindleear --output type=docker,dest=../kindleear.tar . && \
+cd ~
 #or, build a single platform image for test
-cd kindleear && cp ./docker/Dockerfile . && sudo docker build -t kindleear/kindleear .
+cd ~/kindleear && cp ./docker/Dockerfile . && sudo docker build -t kindleear/kindleear . && cd ~
 #or, build a single platform image without cache and tag it
 sudo docker build --no-cache -t kindleear/kindleear .
 sudo docker tag id kindleear/kindleear:version
@@ -120,8 +42,30 @@ sudo docker exec -it container_id sh
 sudo docker login
 sudo docker push kindleear/kindleear:tag
 sudo docker push kindleear/kindleear
+sudo docker load -i kindleear.tar
 ```
 
+# 电子书简要生成流程
+  build_ebook.ConvertToEbook() -> plumber.run() -> recipe_input.convert() -> news.BasicNewsRecipe.download()
+  plumber.create_oebbook() -> OEBReader.call() -> output_plugin.convert()
+
+# KindleEar额外自带的Python库，这些库不用pip安装，不在requirements.txt里面
+* readability-lxml: 修改了其htmls.py|shorten_title()
+
+# 如果要添加新选项，最好添加到 calibre.customize.conversion.py | InputFormatPlugin | common_options, 
+
+# 关于i18n翻译
+* javascript的翻译没有采用其他复杂或引入其他依赖的方案，而是简单粗暴的在base.html里面将要翻译的字段预先翻译，
+然后保存到一个全局字典对象。
+* 文本字符串有修改后，逐个执行两个脚本。
+第一个脚本提取文本到messages.pot并将文本更新到messages.po，翻译后使用第二个脚本编译为messages.mo
+```bat
+tools\pybabel_extract.bat
+tools\pybabel_compile.bat
+```
+* 翻译空白字符条目 msgstr ""
+* 在po后查找fuzzy，更新翻译后，将fuzzy标识行删除
+
 
 # 申请Let’s Encrypt ssl证书
 * sudo apt update && sudo apt install certbot
@@ -164,6 +108,69 @@ sudo systemctl enable certbot-renew.timer
 sudo systemctl start certbot-renew.timer
 ```
 
+# 本地环境构建和调试
+  1. 安装标准环境google cloud SDK/gloud CLI，并且执行 gcloud init
+  2. 安装依赖 `pip install requirements.txt`
+  3. 使用命令打开调试环境
+     `c:\python38\python.exe "C:\Program Files (x86)\Google\Cloud SDK\google-cloud-sdk\bin\dev_appserver.py" --runtime_python_path="python27=c:\python27\python.exe,python3=c:\python38\python.exe"  --skip_sdk_update_check=true app.yaml worker.yaml`
+     `--support_datastore_emulator=true`
+     dev_appserver.py --runtime_python_path=c:\python38\python.exe --application=kindleear5 app.yaml
+
+  2. 即使在本机，GAE应用也运行在一个沙箱内，无法读写本机文件，如果要突破，可以修改 stubs.py 里面的 FakeFile 类。
+     * 删除__init__()
+     * is_file_accessible() 无条件返回 FakeFile.Visibility.OK
+     * stubs.py默认位置：C:\Program Files (x86)\Google\Cloud SDK\google-cloud-sdk\platform\google_appengine\google\appengine\tools\devappserver2\python\runtime\stubs.py
+  3. datastore如果连接不上模拟器，一直使用远端数据库，可以手动修改 site-packages\google\cloud\datastore\client.py
+     Client.__init__()，将 emulator_host 修改为 'localhost:8081'
+
+# [google cloud datastore本地模拟器](https://cloud.google.com/datastore/docs/tools/datastore-emulator)
+  0. 安装和配置 Java JDK 11+
+  1. [获取凭证](https://cloud.google.com/docs/authentication/application-default-credentials)：
+     `gcloud auth application-default login` #application-default是gcloud命令的参数名，不用修改
+  2. 安装datastore模拟器：`gcloud components install cloud-datastore-emulator`
+  3. 设置环境变量（每次启动模拟器服务前都需要重新设置环境变量）
+     `gcloud beta emulators datastore env-init > set_vars.cmd && set_vars.cmd`
+  4. 启动模拟器服务：`gcloud beta emulators datastore start`
+     默认模拟器数据库文件：local_db.bin
+  5. 如果需要连接到网络数据库，则需要移除环境变量
+     `gcloud beta emulators datastore env-unset > remove_vars.cmd && remove_vars.cmd`
+  6. 这个项目 [DSAdmin](https://github.com/remko/dsadmin) 可以本机管理模拟器数据库
+     `./dsadmin --project=my-datastore-project --datastore-emulator-host=localhost:8081`
+
+gcloud app deploy cron.yaml
+gcloud app deploy queue.yaml
+gcloud services list #current enabled services
+gcloud services list | grep datastore.googleapis.com
+gcloud services enable datastore.googleapis.com
+gcloud services enable tasks.googleapis.com
+gcloud services enable cloudtasks.googleapis.com
+gcloud services enable translate.googleapis.com
+gcloud services enable texttospeech.googleapis.com
+
+#all available services
+gcloud services list --available > services.txt
+
+# Windows 安装celery
+* 安装并启动redis服务，(Windows只能安装redis3 <https://github.com/MicrosoftArchive/redis/releases>)
+* 安装celery，如果是Windows，还需要安装 eventlet
+   > `pip install celery, redis, eventlet`
+* 切换到KindleEar主目录，启动celery服务，main是入口文件的名字: main.py，只有Windows需要参数 '-P eventlet'，需要cmd最大化可以先输入wmic再quit即可
+   > `celery -A main.celery_app worker --loglevel=info --concurrency=2 -P eventlet`
+* celery命令：
+   > `redis-cli.exe -p 6379`
+   > `KEYS *`
+
+# Windows 安装配置 MongoDB
+* 下载安装(注意安装时要取消mongodb compass)，创建一个目录保存数据库文件，比如 c:\mongodb\db和c:\mongodb\log
+* 安装启动服务
+  >`"C:\Program Files\MongoDB\Server\3.6\bin\mongod.exe" --dbpath "c:\mongodb\db" --logpath "c:\mongodb\log\MongoDB.log" --install --serviceName "MongoDB"  --journal`
+  > `net start MongoDB`
+  > `"C:\Program Files\MongoDB\Server\3.6\bin\mongo.exe"`
+  > `db.Book.insert({"name":"1001 nights"})`
+  > `db.Book.find()`
+* 其他命令
+  > `net stop MongoDB`  #停止后台服务
+  > `mongod.exe --remove`  #删除后台服务`
 
 
 # Python托管平台的一些了解
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1 @@
		{"library_url": "https://cdhigh.serv00.net/", "rss": []}