Skip to content

Commit

Permalink
some bugfix
Browse files Browse the repository at this point in the history
  • Loading branch information
cdhigh committed Jun 13, 2024
1 parent 1a5e792 commit e0d9a64
Show file tree
Hide file tree
Showing 9 changed files with 157 additions and 100 deletions.
6 changes: 6 additions & 0 deletions application/lib/calibre/web/feeds/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
import traceback
import json
import datetime
import html

from calibre import entity_to_unicode, force_unicode, strftime
from calibre.utils.cleantext import clean_ascii_chars, clean_xml_chars
Expand Down Expand Up @@ -249,6 +250,11 @@ def parse_article(self, item):
content = None
if not link and not content:
return

if description:
description = html.unescape(description)
if content:
content = html.unescape(content)
article = Article(id, title, link, author, description, published, content)
delta = utcnow() - article.utctime
if (self.oldest_article == 0) or (delta.days*24*3600 + delta.seconds <= 24*3600*self.oldest_article):
Expand Down
40 changes: 36 additions & 4 deletions application/lib/dictionary/pystardict.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,14 @@
#!/usr/bin/env python3
# -*- coding:utf-8 -*-
#stardict词典支持,基于 <https://github.com/lig/pystardict> 修改
import os, re, gzip, logging
import os, re, logging
from struct import unpack
try:
import indexed_gzip as igzip
except:
import gzip
igzip = None

try:
import marisa_trie
except:
Expand Down Expand Up @@ -404,8 +410,34 @@ def __getitem__(self, word) -> bytes:
"""
cords = self._container.idx[word]
self._file.seek(cords[0])
return self._file.read(cords[1]) #type:ignore

data = self._file.read(cords[1]) #type:ignore
ret = {}
typeSeq = self._container.ifo.sametypesequence
seqLen = len(typeSeq)
if seqLen:
for k, type_ in enumerate(typeSeq):
if type_ in "mlgtxykwhnr": #文本
if k >= seqLen - 1: #最后一个数据段
ret[type_] = data
else:
ret[type_], _, data = data.partition(b'\0') #type:ignore
else: #音频图像,暂不支持
#开头一个网络字节序的32位整数指示实际数据长度
size = unpack("!L", data[:4]) #type:ignore
#ret[type_] = data[4:size + 4] #type:ignore
data = data[size + 4:]
else:
while data:
type_ = unpack("!c", data[:1]) #type:ignore
if type_ in "mlgtxykwhnr": #type:ignore
ret[type_], _, data = data.partition(b'\0') #type:ignore
else: #音频图像,暂不支持
size = unpack("!L", data[:4]) #type:ignore
#ret[type_] = data[4:size + 4] #type:ignore
data = data[size + 4:]

return b''.join(ret.values())

def __contains__(self, word):
return word in self._container.idx

Expand Down Expand Up @@ -435,7 +467,7 @@ def open_file(regular, gz):
#但是它提供一个表可以用来在文件中随机访问压缩块。
if os.path.exists(gz):
try:
return gzip.open(gz, 'rb')
return igzip.IndexedGzipFile(gz) if igzip else gzip.open(gz, 'rb') #type:ignore
except Exception as e:
raise Exception('gz file opening error: "{}"'.format(e))

Expand Down
8 changes: 7 additions & 1 deletion application/lib/dictionary/stardict.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,4 +39,10 @@ def __repr__(self):

def definition(self, word, language=''):
ret = self.dictionary.get(word) if self.dictionary else ''
return ret.decode('utf-8') if isinstance(ret, bytes) else ret
if isinstance(ret, bytes):
ret = ret.decode('utf-8')
lines = [line.strip() for line in str(ret).split('\n') if line.strip()]
if lines and lines[0] in (word, f'<b>{word}</b>'):
lines = lines[1:]
return '\n'.join(lines)

3 changes: 2 additions & 1 deletion application/lib/urlopener.py
Original file line number Diff line number Diff line change
Expand Up @@ -102,7 +102,8 @@ def open_remote_url(self, url, data, headers, timeout, method, **kwargs):
req_func = self.session.post #type:ignore

try:
resp = req_func(url, data=data, headers=headers, timeout=timeout, allow_redirects=True, **kwargs)
resp = req_func(url, data=data, headers=headers, timeout=timeout, allow_redirects=True,
verify=False, **kwargs)
except:
resp = requests.models.Response()
resp.status_code = 555
Expand Down
1 change: 1 addition & 0 deletions application/recipes/shared_rss.json
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
{"library_url": "https://cdhigh.serv00.net/", "rss": []}
2 changes: 1 addition & 1 deletion application/static/library.js
Original file line number Diff line number Diff line change
Expand Up @@ -220,7 +220,7 @@ function CreatePageContent(category, page) {
//汉堡按钮弹出菜单代码
var dbId = item.r || '';
let title = encodeJsSafeStr(item.t);
var repAct = "ReportInvalid('`{0}','{1}','{2}')".format(title, item.u, dbId);
var repAct = "ReportInvalid('{0}','{1}','{2}')".format(title, item.u, dbId);
var subsAct = "SubscribeSharedFeed('{0}','{1}','{2}','{3}',{4})";
hamb_arg.push({klass: 'btn-A', title: i18n.invalidReport, icon: 'icon-offcloud', act: repAct});
hamb_arg.push({klass: 'btn-C', title: i18n.subscriSep, icon: 'icon-push', act:
Expand Down
6 changes: 5 additions & 1 deletion application/view/adv.py
Original file line number Diff line number Diff line change
Expand Up @@ -373,7 +373,11 @@ def AdvDeleteCssAjaxPost(user: KeUser):
def AdvDict(user: KeUser):
from dictionary import all_dict_engines
#[{language:,engine:,database:,},]
dictParams = user.cfg('reader_params').get('dicts', [])
dictParams = user.cfg('reader_params').get('dicts', [{},{},{}])
if not isinstance(dictParams, list):
dictParams = [{},{},{}]
while len(dictParams) < 3:
dictParams.append({})

engines = {name: {'databases': klass.databases} for name,klass in all_dict_engines.items()}
return adv_render_template('adv_dict.html', 'dictionary', user=user, engines=engines,
Expand Down
2 changes: 1 addition & 1 deletion application/view/library_offical.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
from ..back_end.db_models import *

#几个"官方"服务的地址
KINDLEEAR_SITE = "https://reador.appspot.com"
KINDLEEAR_SITE = "https://cdhigh.serv00.net"
LIBRARY_KINDLEEAR = "/kelibrary"
LIBRARY_GETRSS = "getrss"
LIBRARY_GETLASTTIME = "latesttime"
Expand Down
189 changes: 98 additions & 91 deletions tests/readme.developer.md
Original file line number Diff line number Diff line change
@@ -1,106 +1,28 @@
# KindleEar开发者备忘录

# 本地环境构建和调试
1. 安装标准环境google cloud SDK/gloud CLI,并且执行 gcloud init
2. 安装依赖 `pip install requirements.txt`
3. 使用命令打开调试环境
`c:\python38\python.exe "C:\Program Files (x86)\Google\Cloud SDK\google-cloud-sdk\bin\dev_appserver.py" --runtime_python_path="python27=c:\python27\python.exe,python3=c:\python38\python.exe" --skip_sdk_update_check=true app.yaml worker.yaml`
`--support_datastore_emulator=true`
dev_appserver.py --runtime_python_path=c:\python38\python.exe --application=kindleear5 app.yaml

2. 即使在本机,GAE应用也运行在一个沙箱内,无法读写本机文件,如果要突破,可以修改 stubs.py 里面的 FakeFile 类。
* 删除__init__()
* is_file_accessible() 无条件返回 FakeFile.Visibility.OK
* stubs.py默认位置:C:\Program Files (x86)\Google\Cloud SDK\google-cloud-sdk\platform\google_appengine\google\appengine\tools\devappserver2\python\runtime\stubs.py
3. datastore如果连接不上模拟器,一直使用远端数据库,可以手动修改 site-packages\google\cloud\datastore\client.py
Client.__init__(),将 emulator_host 修改为 'localhost:8081'

# [google cloud datastore本地模拟器](https://cloud.google.com/datastore/docs/tools/datastore-emulator)
0. 安装和配置 Java JDK 11+
1. [获取凭证](https://cloud.google.com/docs/authentication/application-default-credentials)
`gcloud auth application-default login` #application-default是gcloud命令的参数名,不用修改
2. 安装datastore模拟器:`gcloud components install cloud-datastore-emulator`
3. 设置环境变量(每次启动模拟器服务前都需要重新设置环境变量)
`gcloud beta emulators datastore env-init > set_vars.cmd && set_vars.cmd`
4. 启动模拟器服务:`gcloud beta emulators datastore start`
默认模拟器数据库文件:local_db.bin
5. 如果需要连接到网络数据库,则需要移除环境变量
`gcloud beta emulators datastore env-unset > remove_vars.cmd && remove_vars.cmd`
6. 这个项目 [DSAdmin](https://github.com/remko/dsadmin) 可以本机管理模拟器数据库
`./dsadmin --project=my-datastore-project --datastore-emulator-host=localhost:8081`

gcloud app deploy cron.yaml
gcloud app deploy queue.yaml
gcloud services list #current enabled services
gcloud services list | grep datastore.googleapis.com
gcloud services enable datastore.googleapis.com
gcloud services enable tasks.googleapis.com
gcloud services enable cloudtasks.googleapis.com
gcloud services enable translate.googleapis.com
gcloud services enable texttospeech.googleapis.com

#all available services
gcloud services list --available > services.txt

# Windows 安装celery
* 安装并启动redis服务,(Windows只能安装redis3 <https://github.com/MicrosoftArchive/redis/releases>)
* 安装celery,如果是Windows,还需要安装 eventlet
> `pip install celery, redis, eventlet`
* 切换到KindleEar主目录,启动celery服务,main是入口文件的名字: main.py,只有Windows需要参数 '-P eventlet',需要cmd最大化可以先输入wmic再quit即可
> `celery -A main.celery_app worker --loglevel=info --concurrency=2 -P eventlet`
* celery命令:
> `redis-cli.exe -p 6379`
> `KEYS *`
# Windows 安装配置 MongoDB
* 下载安装(注意安装时要取消mongodb compass),创建一个目录保存数据库文件,比如 c:\mongodb\db和c:\mongodb\log
* 安装启动服务
>`"C:\Program Files\MongoDB\Server\3.6\bin\mongod.exe" --dbpath "c:\mongodb\db" --logpath "c:\mongodb\log\MongoDB.log" --install --serviceName "MongoDB" --journal`
> `net start MongoDB`
> `"C:\Program Files\MongoDB\Server\3.6\bin\mongo.exe"`
> `db.Book.insert({"name":"1001 nights"})`
> `db.Book.find()`
* 其他命令
> `net stop MongoDB` #停止后台服务
> `mongod.exe --remove` #删除后台服务`
# 电子书简要生成流程
build_ebook.ConvertToEbook() -> plumber.run() -> recipe_input.convert() -> news.BasicNewsRecipe.download()
plumber.create_oebbook() -> OEBReader.call() -> output_plugin.convert()

# KindleEar额外自带的Python库,这些库不用pip安装,不在requirements.txt里面
* readability-lxml: 修改了其htmls.py|shorten_title()

# 如果要添加新选项,最好添加到 calibre.customize.conversion.py | InputFormatPlugin | common_options,

# 关于i18n翻译
* javascript的翻译没有采用其他复杂或引入其他依赖的方案,而是简单粗暴的在base.html里面将要翻译的字段预先翻译,
然后保存到一个全局字典对象。
* 文本字符串有修改后,逐个执行两个脚本。
第一个脚本提取文本到messages.pot并将文本更新到messages.po,翻译后使用第二个脚本编译为messages.mo
```bat
tools\pybabel_extract.bat
tools\pybabel_compile.bat
```
* 翻译空白字符条目 msgstr ""
* 在po后查找fuzzy,更新翻译后,将fuzzy标识行删除


# Docker
## 构建镜像
```bash
#using the pre-created builder, build && push
cd kindleear && \
cd ~/kindleear && \
cp ./docker/Dockerfile . && \
sudo docker buildx use builder && \
sudo docker buildx build --push --platform=linux/amd64,linux/arm64 -t kindleear/kindleear .
sudo docker buildx build --push --platform=linux/amd64,linux/arm64 -t kindleear/kindleear . && \
cd ~
#or, create a new builder, build && push
cd kindleear && \
cd ~/kindleear && \
cp ./docker/Dockerfile . && \
sudo docker buildx create --use --name=builder && \
sudo docker buildx build --push --platform=linux/amd64,linux/arm64 -t kindleear/kindleear .
sudo docker buildx build --push --platform=linux/amd64,linux/arm64 -t kindleear/kindleear . && \
cd ~
#using the pre-created builder, build && output
cd ~/kindleear && \
cp ./docker/Dockerfile . && \
sudo docker buildx use builder && \
sudo docker buildx build --platform=linux/arm64 -t kindleear/kindleear --output type=docker,dest=../kindleear.tar . && \
cd ~
#or, build a single platform image for test
cd kindleear && cp ./docker/Dockerfile . && sudo docker build -t kindleear/kindleear .
cd ~/kindleear && cp ./docker/Dockerfile . && sudo docker build -t kindleear/kindleear . && cd ~
#or, build a single platform image without cache and tag it
sudo docker build --no-cache -t kindleear/kindleear .
sudo docker tag id kindleear/kindleear:version
Expand All @@ -120,8 +42,30 @@ sudo docker exec -it container_id sh
sudo docker login
sudo docker push kindleear/kindleear:tag
sudo docker push kindleear/kindleear
sudo docker load -i kindleear.tar
```

# 电子书简要生成流程
build_ebook.ConvertToEbook() -> plumber.run() -> recipe_input.convert() -> news.BasicNewsRecipe.download()
plumber.create_oebbook() -> OEBReader.call() -> output_plugin.convert()

# KindleEar额外自带的Python库,这些库不用pip安装,不在requirements.txt里面
* readability-lxml: 修改了其htmls.py|shorten_title()

# 如果要添加新选项,最好添加到 calibre.customize.conversion.py | InputFormatPlugin | common_options,

# 关于i18n翻译
* javascript的翻译没有采用其他复杂或引入其他依赖的方案,而是简单粗暴的在base.html里面将要翻译的字段预先翻译,
然后保存到一个全局字典对象。
* 文本字符串有修改后,逐个执行两个脚本。
第一个脚本提取文本到messages.pot并将文本更新到messages.po,翻译后使用第二个脚本编译为messages.mo
```bat
tools\pybabel_extract.bat
tools\pybabel_compile.bat
```
* 翻译空白字符条目 msgstr ""
* 在po后查找fuzzy,更新翻译后,将fuzzy标识行删除


# 申请Let’s Encrypt ssl证书
* sudo apt update && sudo apt install certbot
Expand Down Expand Up @@ -164,6 +108,69 @@ sudo systemctl enable certbot-renew.timer
sudo systemctl start certbot-renew.timer
```

# 本地环境构建和调试
1. 安装标准环境google cloud SDK/gloud CLI,并且执行 gcloud init
2. 安装依赖 `pip install requirements.txt`
3. 使用命令打开调试环境
`c:\python38\python.exe "C:\Program Files (x86)\Google\Cloud SDK\google-cloud-sdk\bin\dev_appserver.py" --runtime_python_path="python27=c:\python27\python.exe,python3=c:\python38\python.exe" --skip_sdk_update_check=true app.yaml worker.yaml`
`--support_datastore_emulator=true`
dev_appserver.py --runtime_python_path=c:\python38\python.exe --application=kindleear5 app.yaml

2. 即使在本机,GAE应用也运行在一个沙箱内,无法读写本机文件,如果要突破,可以修改 stubs.py 里面的 FakeFile 类。
* 删除__init__()
* is_file_accessible() 无条件返回 FakeFile.Visibility.OK
* stubs.py默认位置:C:\Program Files (x86)\Google\Cloud SDK\google-cloud-sdk\platform\google_appengine\google\appengine\tools\devappserver2\python\runtime\stubs.py
3. datastore如果连接不上模拟器,一直使用远端数据库,可以手动修改 site-packages\google\cloud\datastore\client.py
Client.__init__(),将 emulator_host 修改为 'localhost:8081'

# [google cloud datastore本地模拟器](https://cloud.google.com/datastore/docs/tools/datastore-emulator)
0. 安装和配置 Java JDK 11+
1. [获取凭证](https://cloud.google.com/docs/authentication/application-default-credentials)
`gcloud auth application-default login` #application-default是gcloud命令的参数名,不用修改
2. 安装datastore模拟器:`gcloud components install cloud-datastore-emulator`
3. 设置环境变量(每次启动模拟器服务前都需要重新设置环境变量)
`gcloud beta emulators datastore env-init > set_vars.cmd && set_vars.cmd`
4. 启动模拟器服务:`gcloud beta emulators datastore start`
默认模拟器数据库文件:local_db.bin
5. 如果需要连接到网络数据库,则需要移除环境变量
`gcloud beta emulators datastore env-unset > remove_vars.cmd && remove_vars.cmd`
6. 这个项目 [DSAdmin](https://github.com/remko/dsadmin) 可以本机管理模拟器数据库
`./dsadmin --project=my-datastore-project --datastore-emulator-host=localhost:8081`

gcloud app deploy cron.yaml
gcloud app deploy queue.yaml
gcloud services list #current enabled services
gcloud services list | grep datastore.googleapis.com
gcloud services enable datastore.googleapis.com
gcloud services enable tasks.googleapis.com
gcloud services enable cloudtasks.googleapis.com
gcloud services enable translate.googleapis.com
gcloud services enable texttospeech.googleapis.com

#all available services
gcloud services list --available > services.txt

# Windows 安装celery
* 安装并启动redis服务,(Windows只能安装redis3 <https://github.com/MicrosoftArchive/redis/releases>)
* 安装celery,如果是Windows,还需要安装 eventlet
> `pip install celery, redis, eventlet`
* 切换到KindleEar主目录,启动celery服务,main是入口文件的名字: main.py,只有Windows需要参数 '-P eventlet',需要cmd最大化可以先输入wmic再quit即可
> `celery -A main.celery_app worker --loglevel=info --concurrency=2 -P eventlet`
* celery命令:
> `redis-cli.exe -p 6379`
> `KEYS *`
# Windows 安装配置 MongoDB
* 下载安装(注意安装时要取消mongodb compass),创建一个目录保存数据库文件,比如 c:\mongodb\db和c:\mongodb\log
* 安装启动服务
>`"C:\Program Files\MongoDB\Server\3.6\bin\mongod.exe" --dbpath "c:\mongodb\db" --logpath "c:\mongodb\log\MongoDB.log" --install --serviceName "MongoDB" --journal`
> `net start MongoDB`
> `"C:\Program Files\MongoDB\Server\3.6\bin\mongo.exe"`
> `db.Book.insert({"name":"1001 nights"})`
> `db.Book.find()`
* 其他命令
> `net stop MongoDB` #停止后台服务
> `mongod.exe --remove` #删除后台服务`

# Python托管平台的一些了解
Expand Down

0 comments on commit e0d9a64

Please sign in to comment.