From 0724bd09e2c0bd2044ef7d3e4a6d1cc5a83e52d4 Mon Sep 17 00:00:00 2001 From: hiroi-sora <2230247019@qq.com> Date: Mon, 15 Jan 2024 14:54:04 +0800 Subject: [PATCH] =?UTF-8?q?=E7=89=88=E6=9C=AC=E6=9B=B4=E6=96=B0=20v2.0.2?= =?UTF-8?q?=20=E7=A7=BB=E9=99=A4=E4=BA=86=E5=BC=80=E5=8F=91=E4=B8=AD?= =?UTF-8?q?=E7=9A=84=E6=96=87=E6=A1=A3=E8=AF=86=E5=88=AB=E9=83=A8=E5=88=86?= =?UTF-8?q?=E4=BB=A3=E7=A0=81?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../py_src/mission/doc_preview_connector.py | 68 --- UmiOCR-data/py_src/mission/mission_doc.py | 117 ----- UmiOCR-data/py_src/run.py | 4 - UmiOCR-data/py_src/tag_pages/BatchDOC.py | 90 ---- .../py_src/tag_pages/tag_pages_connector.py | 3 +- .../qt_res/qml/TabPages/BatchDOC/BatchDOC.qml | 450 ------------------ .../qml/TabPages/BatchDOC/BatchDOCConfigs.qml | 101 ---- .../qml/TabPages/BatchDOC/PreviewDoc.qml | 337 ------------- .../qt_res/qml/TabPages/PagesManager.qml | 6 - UmiOCR-data/version.py | 2 +- 10 files changed, 2 insertions(+), 1176 deletions(-) delete mode 100644 UmiOCR-data/py_src/mission/doc_preview_connector.py delete mode 100644 UmiOCR-data/py_src/mission/mission_doc.py delete mode 100644 UmiOCR-data/py_src/tag_pages/BatchDOC.py delete mode 100644 UmiOCR-data/qt_res/qml/TabPages/BatchDOC/BatchDOC.qml delete mode 100644 UmiOCR-data/qt_res/qml/TabPages/BatchDOC/BatchDOCConfigs.qml delete mode 100644 UmiOCR-data/qt_res/qml/TabPages/BatchDOC/PreviewDoc.qml diff --git a/UmiOCR-data/py_src/mission/doc_preview_connector.py b/UmiOCR-data/py_src/mission/doc_preview_connector.py deleted file mode 100644 index 843a5b50..00000000 --- a/UmiOCR-data/py_src/mission/doc_preview_connector.py +++ /dev/null @@ -1,68 +0,0 @@ -# =============================================== -# =============== 文档预览 - 连接器 =============== -# =============================================== - -from PySide2.QtCore import QObject, Slot, Signal -from PySide2.QtGui import QPixmap, QImage -import fitz # PyMuPDF - -from .simple_mission import SimpleMission -from ..image_controller.image_provider import PixmapProvider -from ..utils.call_func import CallFunc - - -# 文档预览连接器 -class DocPreviewConnector(QObject): - previewImg = Signal(str) - - def __init__(self, *args): - super().__init__(*args) - self._previewMission = SimpleMission(self._previewTask) # 简单任务对象 - self._previewDoc = None # 当前预览的对象 - self._previewPath = "" - - @Slot(str, int, str) - def preview(self, path, page, password): - page -= 1 - self._previewMission.addMissionList([(path, page, password)]) - - def _previewTask(self, msn): - path, page, password = msn - if path == self._previewPath: # 已经加载了 - doc = self._previewDoc - else: # 新加载 - try: - doc = fitz.open(path) - if doc.isEncrypted and not doc.authenticate(password): - msg = "[Warning] isEncrypted" - self.previewImg.emit(msg) - return - except Exception as e: - msg = f"[Error] 打开文档失败:{path} {e}" - self.previewImg.emit(msg) - return - self._previewDoc = doc - self._previewPath = path - page_count = doc.page_count - if page < 0 or page > page_count: - print(f"[Error] 页数{page}超出范围 0-{page_count} 。") - return - p = doc[page].get_pixmap() - # 方法1:通过 QImage fromImage 转换 - # 必须先使用变量提取出图像 https://github.com/pymupdf/PyMuPDF/issues/1210 - samples = p.samples - # 必须传入 pix.stride ,否则部分格式的图像会导致崩溃 - qimage = QImage(samples, p.width, p.height, p.stride, QImage.Format_RGB888) - qpixmap = QPixmap.fromImage(qimage) - # 方法2:编码后传入QPixmap(性能低) - # imgBytes = p.tobytes("ppm") - # qpixmap = QPixmap() - # qpixmap.loadFromData(imgBytes) - imgID = PixmapProvider.addPixmap(qpixmap) - self.previewImg.emit(imgID) - - # 清空缓存 - @Slot() - def clear(self): - self._previewDoc = None - self._previewPath = "" diff --git a/UmiOCR-data/py_src/mission/mission_doc.py b/UmiOCR-data/py_src/mission/mission_doc.py deleted file mode 100644 index b53f0895..00000000 --- a/UmiOCR-data/py_src/mission/mission_doc.py +++ /dev/null @@ -1,117 +0,0 @@ -# =============================================== -# =============== 文档 - 任务管理器 =============== -# =============================================== - -# API所有页数page 均为1开始 - -from .mission import Mission -from .mission_ocr import MissionOCR - -import fitz # PyMuPDF - - -class FitzOpen: - def __init__(self, path): - self._path = path - self._doc = None - - def __enter__(self): - self._doc = fitz.open(self._path) - return self._doc - - def __exit__(self, exc_type, exc_val, exc_tb): - self._doc.close() - - -class _MissionDocClass(Mission): - def __init__(self): - super().__init__() - self._schedulingMode = "1234" # 调度方式:顺序 - - # 添加一个文档任务 - # msnInfo: { 回调函数"onXX", 参数"argd":{"tbpu.xx", "ocr.xx"} } - # msnPath: 单个文档路径 - # pageRange: 页数范围。可选: None 全部页 , [1,3] 页面范围(含开头结束)。 - # pageList: 指定多个页数。可选: [] 使用pageRange设置 , [1,2,3] 指定页数 - # password: 密码(非必填) - def addMission(self, msnInfo, msnPath, pageRange=None, pageList=[], password=""): - try: - doc = fitz.open(msnPath) - except Exception as e: - return f"[Error] fitz.open error: {msnPath} {e}" - if doc.isEncrypted and not doc.authenticate(password): - if password: - msg = f"[Error] Incorrect password. 文档已加密,密码错误。 [{password}]" - else: - msg = "[Error] Doc encrypted. 文档已加密,请提供密码。" - return msg - msnInfo["doc"] = doc - msnInfo["path"] = msnPath - # 使用 pageRange 的页面范围 - if len(pageList) == 0: - if isinstance(pageRange, (tuple, list)) and len(pageRange) == 2: - a, b = pageRange[0], pageRange[1] - if a < 1: - return f"[Error] pageRange {pageRange} 范围起始不能小于1" - if b > doc.page_count: - return f"[Error] pageRange {pageRange} 范围结束不能大于页数 {doc.page_count}" - if a > b: - return f"[Error] pageRange {pageRange} 范围错误" - pageList = list(range(a - 1, b)) - else: - pageList = list(range(0, doc.page_count)) - # 检查页数列表合法性 - if len(pageList) == 0: - return "[Error] 页数列表为空" - if not all(isinstance(item, int) for item in pageList): - return "[Error] 页数列表内容非整数" - return self.addMissionList(msnInfo, pageList) - - def msnTask(self, msnInfo, pno): # 执行msn。pno为当前页数 - doc = msnInfo["doc"] - page = doc[pno] - # 获取元素 https://pymupdf.readthedocs.io/en/latest/_images/img-textpage.png - p = page.get_text("dict") - imgs = [] - tbList = [] # text box 文本块列表 - for t in p["blocks"]: - if t["type"] == 1: # 图片 - imgs.append({"bytes": t["image"]}) - elif t["type"] == 0: # 文本 - for line in t["lines"]: - for span in line["spans"]: - tb = {"box": span["bbox"], "text": span["text"]} - tbList.append(tb) - argd = msnInfo["argd"] - ocrList = MissionOCR.addMissionWait(argd, imgs) - errMsg = "" - for o in ocrList: - res = o["result"] - if res["code"] == 100: - tbList += res["data"] - elif res["code"] != 101: - errMsg += res["data"] + "\n" - if tbList: # 有文本 - resDict = {"code": 100, "data": tbList} - elif errMsg: # 无文本,有异常 - resDict = {"code": 102, "data": errMsg} - else: # 无文本 - resDict = {"code": 101, "data": ""} - return resDict - - # 获取一个文档的信息,如页数 - def getDocInfo(self, path): - try: - with FitzOpen(path) as doc: - info = { - "path": path, - "page_count": doc.page_count, - "is_encrypted": doc.isEncrypted, - } - return info - except Exception as e: - return {"path": path, "error": e} - - -# 全局 DOC 任务管理器 -MissionDOC = _MissionDocClass() diff --git a/UmiOCR-data/py_src/run.py b/UmiOCR-data/py_src/run.py index a1752e86..f7ed7908 100644 --- a/UmiOCR-data/py_src/run.py +++ b/UmiOCR-data/py_src/run.py @@ -25,7 +25,6 @@ def runQml(): from .tag_pages.tag_pages_connector import TagPageConnector # 页面连接器 from .mission.mission_connector import MissionConnector # 任务连接器 - from .mission.doc_preview_connector import DocPreviewConnector # 文档预览连接器 from .event_bus.pubsub_connector import PubSubConnector # 发布/订阅连接器 from .event_bus.key_mouse.key_mouse_connector import KeyMouseConnector # 键盘/鼠标连接器 from .plugins_controller.plugins_connector import PluginsConnector # 插件连接器 @@ -66,9 +65,6 @@ def runQml(): qmlRegisterType( GlobalConfigsConnector, "GlobalConfigsConnector", 1, 0, "GlobalConfigsConnector" ) - qmlRegisterType( - DocPreviewConnector, "DocPreviewConnector", 1, 0, "DocPreviewConnector" - ) # 5. 启动翻译 trans = QTranslator() diff --git a/UmiOCR-data/py_src/tag_pages/BatchDOC.py b/UmiOCR-data/py_src/tag_pages/BatchDOC.py deleted file mode 100644 index 3d15f1c2..00000000 --- a/UmiOCR-data/py_src/tag_pages/BatchDOC.py +++ /dev/null @@ -1,90 +0,0 @@ -# ======================================== -# =============== 批量PDF页 =============== -# ======================================== - -from .page import Page # 页基类 - -from ..mission.mission_doc import MissionDOC # 任务管理器 -from ..utils import utils - - -class BatchDOC(Page): - def __init__(self, *args): - super().__init__(*args) - self._msnIdPath = {} # 当前运行的任务,id到地址的映射 - - # 添加一些文档 - def addDocs(self, paths, isRecurrence): - paths = utils.findDocs(paths, isRecurrence) - docs = [] - for p in paths: - info = MissionDOC.getDocInfo(p) - if "error" in info: - print(f'[Warning] 读入文档失败:{p}\n{info["error"]}') - continue - docs.append(info) - # 返回:{ "path" , "page_count" } - return docs - - # 进行任务。 - # docs为列表,每一项为: {path:文档路径, range_start:范围起始, range_end: 范围结束, password:密码} - # 返回一个列表,每项为: {path:文档路径, msnID:任务ID。若[Error]开头则为失败。} - def msnDocs(self, docs, argd): - if self._msnIdPath: - return "[Error] 有任务进行中,不允许提交新任务。" - resList = [] - for d in docs: - msnInfo = { - "onStart": self._onStart, - "onReady": self._onReady, - "onGet": self._onGet, - "onEnd": self._onEnd, - "argd": argd, - } - path = d["path"] - pageRange = [int(d["range_start"]), int(d["range_end"])] - password = d["password"] - msnID = MissionDOC.addMission(msnInfo, path, pageRange, password=password) - if not msnID.startswith("["): # 添加任务成果才记录到 _msnIdPath - self._msnIdPath[msnID] = path - res = {"path": path, "msnID": msnID} - resList.append(res) - return resList - - # 停止当前所有任务 - def msnStop(self): - for msnID in self._msnIdPath: - MissionDOC.stopMissionList(msnID) - self._msnIdPath = {} - - # ========================= 【任务控制器的异步回调】 ========================= - - def _onStart(self, msnInfo): # 一个文档 开始 - msnID = msnInfo["msnID"] - if msnID not in self._msnIdPath: - print(f"[Warning] _onStart 任务ID未在记录。{msnID}") - return - self.callQmlInMain("onDocStart", msnInfo["path"]) - - def _onReady(self, msnInfo, page): # 一个文档的一页 准备开始 - page += 1 - pass - - def _onGet(self, msnInfo, page, res): # 一个文档的一页 获取结果 - page += 1 - msnID = msnInfo["msnID"] - if msnID not in self._msnIdPath: - print(f"[Warning] _onGet 任务ID未在记录。{msnID}") - return - self.callQmlInMain("onDocGet", msnInfo["path"], page, res) - - def _onEnd(self, msnInfo, msg): # 一个文档处理完毕 - # msg: [Success] [Warning] [Error] - msnID = msnInfo["msnID"] - if msnID not in self._msnIdPath: - print(f"[Warning] _onEnd 任务ID未在记录。{msnID}") - return - del self._msnIdPath[msnID] - if not self._msnIdPath: # 全部完成 - msg = "[Success] All completed." - self.callQmlInMain("onDocEnd", msnInfo["path"], msg) diff --git a/UmiOCR-data/py_src/tag_pages/tag_pages_connector.py b/UmiOCR-data/py_src/tag_pages/tag_pages_connector.py index 5c48f5a4..31aa2b34 100644 --- a/UmiOCR-data/py_src/tag_pages/tag_pages_connector.py +++ b/UmiOCR-data/py_src/tag_pages/tag_pages_connector.py @@ -11,13 +11,12 @@ # 导入本模块内定义的控制器类 from .BatchOCR import BatchOCR -from .BatchDOC import BatchDOC from .ScreenshotOCR import ScreenshotOCR from .QRcode import QRcode from ..utils.call_func import CallFunc # 控制器类列表 -PageClass = [BatchOCR, ScreenshotOCR, QRcode, BatchDOC] +PageClass = [BatchOCR, ScreenshotOCR, QRcode] TagPageConnObj = None # 记录实例 diff --git a/UmiOCR-data/qt_res/qml/TabPages/BatchDOC/BatchDOC.qml b/UmiOCR-data/qt_res/qml/TabPages/BatchDOC/BatchDOC.qml deleted file mode 100644 index 6884bebd..00000000 --- a/UmiOCR-data/qt_res/qml/TabPages/BatchDOC/BatchDOC.qml +++ /dev/null @@ -1,450 +0,0 @@ -// ================================================== -// =============== 功能页:批量文档处理 =============== -// ================================================== - -import QtQuick 2.15 -import QtQuick.Controls 2.15 - -import ".." -import "../../Widgets" -import "../../Widgets/ResultLayout" -import "../../Widgets/IgnoreArea" - -/* 文档参数: - path 路径 - pages 页数显示 - state 状态显示 - page_count 总页数 - range_start 范围开始 - range_end 范围结束 - is_encrypted 需要密码 - is_authenticate 密码正确 - password 密码 -*/ - -TabPage { - id: tabPage - - // ========================= 【逻辑】 ========================= - - property string msnState: "" // 任务状态, none init run stop - property string missionShow: "" // 当前任务信息展示字符串 - property var missionInfo: {} // 当前任务信息,耗时等 - /* - startTime: new Date().getTime(), // 开始时间 - allNum: msnLength, // 总长度 - costTime: 0, // 当前耗时 - nowNum: 0, // 当前执行长度 - */ - - property string msnID: "" // 当前任务ID - - Component.onCompleted: { - missionInfo = {} - setMsnState("none") - } - // TODO: 测试用 - Timer { - interval: 200 - running: true - onTriggered: { - addDocs( - [ - "D:/Pictures/Screenshots/test", - ] - ) - console.log("自动添加!!!!!!!!!!!!!") - // ocrStart() - // onClickDoc(0) - } - } - - // 添加一批文档。传入值是没有 file:/// 开头的纯字符串的列表。 - function addDocs(paths) { - // 调用Python方法 - const isRecurrence = configsComp.getValue("mission.recurrence") - const res = tabPage.callPy("addDocs", paths, isRecurrence) - if(res.length <= 0){ - return - } - // 加入表格 - let encryptedCount = 0 - for(let i in res) { - const info = res[i] - filesTableView.add({ - // 显示:路径,状态,页范围 - path: info.path, pages: `1-${info.page_count}`, - state: info.is_encrypted ? qsTr("加密") : "" , - // 数据 - page_count: info.page_count, - range_start: 1, - range_end: info.page_count, - is_encrypted: info.is_encrypted, // 有密码 - is_authenticate: !info.is_encrypted, // 已解密(密码正确) - password: "", - }) - if(info.is_encrypted) encryptedCount++ - } - if(encryptedCount > 0) { - qmlapp.popup.simple(qsTr("%1个加密文档").arg(encryptedCount), - qsTr("请点击文件名填写密码")) - } - } - - // 运行按钮按下 - function runBtnClick() { - switch(msnState) { - case "none": // 不在运行 - docStart() - break - case "run": // 工作中 - docStop() - break - } - } - - // 运行文档任务 - function docStart() { - const fileCount = filesTableView.rowCount - if(fileCount <= 0) - return - setMsnState("init") // 状态:初始化任务 - missionShow = "" - // 获取信息 - const docs = filesTableView.getColumnsValues([ - "path","range_start", "range_end", "is_encrypted", "is_authenticate", "password"]) - let pathIndex = {} // 缓存路径到下标的映射 - for(let i = 0; i < fileCount; i++) { - const d = docs[i] - pathIndex[d.path] = i - if(d.is_encrypted && !d.is_authenticate) { - qmlapp.popup.message(qsTr("文档已加密"), qsTr("【%1】\n请点击文档名,设置密码").arg(d.path), "warning") - setMsnState("none") // 状态:不在运行 - return - } - } - const argd = configsComp.getValueDict() - // 提交任务,获取任务信息 - const resList = tabPage.callPy("msnDocs", docs, argd) - let errMsg = "" // 错误信息 - let allPages = 0 // 页总数 - // 判断任务添加结果,刷新表格 - for(let i in resList) { - const res = resList[i] - const path = res.path, msnID = res.msnID - if(msnID.startsWith("[")) { // 添加任务失败 - filesTableView.setProperty(path, "state", qsTr("失败")) - errMsg += `${path} - ${msnID}\n` - } - else { // 添加任务成果,增加计数 - filesTableView.setProperty(path, "state", qsTr("排队")) - const d = docs[pathIndex[path]] - allPages += d.range_end - d.range_start + 1 - } - } - missionProgress.percent = 0 // 进度条显示 - if(allPages > 0) { // 有成功的任务 - // 刷新计数 - missionInfo = { - startTime: new Date().getTime(), // 开始时间 - allNum: allPages, // 总长度 - costTime: 0, // 当前耗时 - nowNum: 0, // 当前执行长度 - } - missionShow = `0s 0/${allPages} 0%` // 信息显示 - // 若tabPanel面板的下标没有变化过,则切换到记录页 - if(tabPanel.indexChangeNum < 2) - tabPanel.currentIndex = 1 - setMsnState("run") // 状态:运行中 - } - else { - missionInfo = {} - missionShow = qsTr("任务失败") - setMsnState("none") // 状态:不在运行 - } - // 错误信息显示 - if(errMsg) { - qmlapp.popup.message(qsTr("部分文档异常"), errMsg, "error") - } - } - - // 停止文档任务 - function docStop() { - setMsnState("stop") // 设置结束中 - tabPage.callPy("msnStop") - // 刷新表格,清空未执行的任务的状态 - let msnLength = filesTableView.rowCount - for(let i = 0; i < msnLength; i++) { - const row = filesTableView.get(i) - if(row.state !== "√") { - filesTableView.setProperty(i, "state", "") - } - } - setMsnState("none") // 设置结束 - } - - // 文件表格中单击文档 - function onClickDoc(index) { - if(msnState !== "none") return - const info = filesTableView.get(index) - previewDoc.show(info) - } - - // 关闭页面 - function closePage() { - if(msnState !== "none") { - const argd = { yesText: qsTr("依然关闭") } - const callback = (flag)=>{ - if(flag) { - docStop() - delPage() - } - } - qmlapp.popup.dialog("", qsTr("任务正在进行中。\n要结束任务并关闭页面吗?"), callback, "warning", argd) - } - else { - delPage() - } - } - - // ========================= 【python调用qml】 ========================= - - /* - none 不在运行 - init 正在启动 - run 工作中 - stop 停止中 - */ - // 设置任务状态 - function setMsnState(flag) { - msnState = flag - switch(flag) { - case "none": // 不在运行 - runBtn.text_ = qsTr("开始任务") - runBtn.enabled = true - break; - case "init": // 正在启动 - runBtn.text_ = qsTr("启动中…") - runBtn.enabled = false - break; - case "run": // 工作中 - runBtn.text_ = qsTr("停止任务") - runBtn.enabled = true - break; - case "stop": // 停止中 - runBtn.text_ = qsTr("停止中…") - runBtn.enabled = false - break; - } - console.log("set mission state: ", flag) - } - - // 准备开始处理一个文档 - function onDocStart(path) { - // 刷新表格显示 - const d = filesTableView.get(path) - let state = `0/${d.range_end - d.range_start + 1}` - filesTableView.setProperty(path, "state", state) - } - - // 获取一个文档的一页的结果 - function onDocGet(path, page, res) { - // 刷新总体 耗时显示 - const date = new Date(); - const currentTime = date.getTime() - missionInfo.costTime = currentTime - missionInfo.startTime - missionInfo.nowNum = missionInfo.nowNum + 1 - const costTime = (missionInfo.costTime/1000).toFixed(1) - const nowNum = missionInfo.nowNum - const percent = Math.floor(((nowNum/missionInfo.allNum)*100)) - missionProgress.percent = nowNum/missionInfo.allNum // 进度条显示 - missionShow = `${costTime}s ${nowNum}/${missionInfo.allNum} ${percent}%` // 信息显示 - // 刷新单个文档的信息 - const d = filesTableView.get(path) - let state = `${page - d.range_start}/${d.range_end - d.range_start + 1}` - filesTableView.setProperty(path, "state", state) - // 提取文字,添加到结果表格 - let title = path2name(path) - res.title = `${title} - ${page}` - resultsTableView.addOcrResult(res) - } - - // 一个文档处理完毕 - function onDocEnd(path, msg) { - filesTableView.setProperty(path, "state", "√") - // 任务成功 - if(msg.startsWith("[Success]")) { - // TODO - // 所有文档处理完毕 - if(msg === "[Success] All completed.") { - setMsnState("none") // 状态:不在运行 - } - } - // 任务失败 - else if(msg.startsWith("[Error]")) { - qmlapp.popup.message(qsTr("批量识别任务异常"), msg, "error") - } - } - - // 路径转文件名 - function path2name(path) { - const parts = path.split("/") - return parts[parts.length - 1] - } - - // ========================= 【布局】 ========================= - - // 配置 - configsComp: BatchDOCConfigs { - } - // 主区域:左右双栏面板。 - DoubleRowLayout { - anchors.fill: parent - initSplitterX: 0.5 - - // 左面板:控制板+文件表格 - leftItem: Panel { - anchors.fill: parent - - // 上方控制板 - Item { - id: ctrlPanel - anchors.top: parent.top - anchors.left: parent.left - anchors.right: parent.right - anchors.margins: size_.spacing - height: size_.line * 2 - clip: true - - // 右边按钮 - Button_ { - id: runBtn - anchors.top: parent.top - anchors.bottom: parent.bottom - anchors.right: parent.right - width: size_.line * 6 - bold_: true - - bgColor_: theme.coverColor1 - bgHoverColor_: theme.coverColor2 - text_: "" // 动态变化 - onClicked: tabPage.runBtnClick() - } - - // 左上信息 - Item { - id: infoContainer - anchors.top: parent.top - anchors.left: parent.left - anchors.right: runBtn.left - anchors.rightMargin: size_.smallSpacing - height: size_.line * 1.3 - clip: true - - Text_ { - anchors.right: parent.right - anchors.bottom: parent.bottom - - text: missionShow - color: theme.subTextColor - } - } - - // 左下进度条 - Item { - id: progressContainer - anchors.top: infoContainer.bottom - anchors.left: parent.left - anchors.bottom: parent.bottom - anchors.right: runBtn.left - anchors.rightMargin: size_.smallSpacing - anchors.topMargin: size_.smallSpacing * 0.5 - - HProgressBar { - id: missionProgress - anchors.fill: parent - color: theme.bgColor - percent: 0 - } - } - } - - // 下方文件表格 - FilesTableView { - id: filesTableView - anchors.top: ctrlPanel.bottom - anchors.left: parent.left - anchors.right: parent.right - anchors.bottom: parent.bottom - anchors.margins: size_.spacing - anchors.topMargin: size_.smallSpacing - headers: [ - {key: "path", title: qsTr("文档"), left: true, display: path2name, - btn: true, onClicked:onClickDoc}, - {key: "state", title: qsTr("状态"), btn: true, onClicked:onClickDoc}, - {key: "pages", title: qsTr("范围"), btn: true, onClicked:onClickDoc}, - ] - openBtnText: qsTr("选择文档") - clearBtnText: qsTr("清空") - defaultTips: qsTr("拖入或选择文档") - fileDialogTitle: qsTr("请选择文档") - fileDialogNameFilters: [qsTr("文档")+" (*.pdf *.xps *.epub *.mobi *.fb2 *.cbz)"] - isLock: msnState !== "none" - onAddPaths: { - tabPage.addDocs(paths) - } - } - } - // 右面板:文字输出 & 设置 - rightItem: Panel { - id: rightPanel - anchors.fill: parent - - // 结果面板 - ResultsTableView { - id: resultsTableView - anchors.fill: parent - visible: false - } - - // 配置项控制板 - TabPanel { - id: tabPanel - anchors.fill: parent - anchors.margins: size_.spacing - - tabsModel: [ - { - "key": "configs", - "title": qsTr("设置"), - "component": configsComp.panelComponent, - }, - { - "key": "ocrResult", - "title": qsTr("记录"), - "component": resultsTableView, - }, - ] - } - } - } - - // 鼠标拖入文档 - DropArea_ { - anchors.fill: parent - callback: tabPage.addDocs - } - - // 预览面板 - PreviewDoc { - id: previewDoc - anchors.fill: parent - configsComp: tabPage.configsComp - ignoreAreaKey: "tbpu.ignoreArea" - updateInfo: (path, info) => { - let infoA = filesTableView.get(path) - Object.assign(infoA, info) - filesTableView.set(path, infoA) - } - } -} \ No newline at end of file diff --git a/UmiOCR-data/qt_res/qml/TabPages/BatchDOC/BatchDOCConfigs.qml b/UmiOCR-data/qt_res/qml/TabPages/BatchDOC/BatchDOCConfigs.qml deleted file mode 100644 index 4c916d51..00000000 --- a/UmiOCR-data/qt_res/qml/TabPages/BatchDOC/BatchDOCConfigs.qml +++ /dev/null @@ -1,101 +0,0 @@ -// ============================================== -// =============== 批量PDF的配置项 =============== -// ============================================== - -import QtQuick 2.15 -import "../../Configs" - -Configs { - category_: "BatchPDF" - signal clickIgnoreArea() // 打开忽略区域 - - configDict: { - // OCR参数 - "ocr": qmlapp.globalConfigs.ocrManager.deploy(this, "ocr"), - - // 后处理 - "tbpu": { - "title": qsTr("OCR文本后处理"), - "type": "group", - - "merge": qmlapp.globalConfigs.utilsDicts.getTbpuMerge(), - "btns": { - "title": qsTr("忽略区域(点击文档名进入设置)"), - "btnsList": [], - }, - "ignoreArea": { - "type": "var", - "save": false, - }, - }, - - // 任务参数 - "mission": { - "title": qsTr("批量任务"), - "type": "group", - - "dirType": { - "title": qsTr("保存到"), - "optionsList": [ - ["source", qsTr("图片原目录")], - ["specify", qsTr("指定目录")], - ], - }, - "dir": { - "title": qsTr("指定目录"), - "toolTip": qsTr("必须先指定“保存到指定目录”才生效"), - "type": "file", - "selectExisting": true, // 选择现有 - "selectFolder": true, // 选择文件夹 - "dialogTitle": qsTr("OCR结果保存目录"), - }, - "fileNameFormat": { - "title": qsTr("文件名格式"), - "toolTip": qsTr("无需填写拓展名。支持插入以下占位符:\n%date 日期时间\n%name 原文件夹名/文件名\n举例:[OCR]_%name_%date\n生成:[OCR]_文档A_20230901_1213.txt\n添加占位符可以避免旧文件被新文件覆盖。"), - "default": "[OCR]_%name_%date", - "advanced": true, // 高级选项 - }, - "datetimeFormat": { - "title": qsTr("日期时间格式"), - "toolTip": qsTr("文件名中 %date 的日期格式。支持插入以下占位符:\n%Y 年、 %m 月、 %d 日、 %H 小时、 \n%M 分钟、 %S 秒 、 %unix 时间戳 \n举例:%Y年%m月%d日_%H-%M\n生成:2023年09月01日_12-13.txt"), - "default": "%Y%m%d_%H%M", - "advanced": true, // 高级选项 - }, - - "filesType": { - "title": qsTr("保存文件类型"), - "type": "group", - "enabledFold": true, - "fold": false, - - "txt": { - "title": qsTr("txt 标准格式"), - "toolTip": qsTr("含页数和识别文字"), - "default": true, - }, - "txtPlain": { - "title": qsTr("p.txt 纯文字格式"), - "toolTip": qsTr("仅输出识别文字,不含页数"), - "default": false, - }, - "jsonl": { - "title": qsTr("jsonl 原始信息"), - "toolTip": qsTr("每页为一条json数据,便于第三方程序读取操作"), - "default": false, - }, - }, - - "ingoreBlank": { - "title": qsTr("输出忽略空白图片"), - "toolTip": qsTr("若图片没有文字或识别失败,也不会输出错误提示信息"), - "default": true, - }, - "recurrence": { - "title": qsTr("递归读取子文件夹"), - "toolTip": qsTr("导入文件夹时,导入子文件夹中全部文档"), - "default": false, - "advanced": true, - }, - }, - } -} \ No newline at end of file diff --git a/UmiOCR-data/qt_res/qml/TabPages/BatchDOC/PreviewDoc.qml b/UmiOCR-data/qt_res/qml/TabPages/BatchDOC/PreviewDoc.qml deleted file mode 100644 index a9b316a8..00000000 --- a/UmiOCR-data/qt_res/qml/TabPages/BatchDOC/PreviewDoc.qml +++ /dev/null @@ -1,337 +0,0 @@ -// =========================================== -// =============== 文档预览面板 =============== -// =========================================== - -import QtQuick 2.15 -import QtQuick.Controls 2.15 -import DocPreviewConnector 1.0 - -import "../../Widgets" -import "../../Widgets/IgnoreArea" - -ModalLayer { - id: pRoot - property var updateInfo // 更新信息函数 - property var configsComp: undefined // 设置组件 - property string ignoreAreaKey: "" // 设置组件中忽略区域的key - - property bool running: false - property string previewPath: "" - property string password: "" - property bool isEncrypted: false // 已加密 - property bool isAuthenticate: false // 密码正确 - property int previewPage: -1 - property int pageCount: -1 - property int rangeStart: -1 - property int rangeEnd: -1 - - // 展示文档 - // info: path, page_count, range_start, range_end, is_encrypted, password, is_authenticate - function show(info) { - imgViewer.clear() - visible = true - previewPath = info.path - pageCount = info.page_count - previewPage = info.range_start - rangeStart = info.range_start - rangeEnd = info.range_end - password = info.password - isEncrypted = info.is_encrypted - isAuthenticate = info.is_authenticate - // 读取忽略区域设置 - let initArea = configsComp.getValue(ignoreAreaKey) - if(initArea && initArea.length>0) { - // 读取设置,反格式化 - let ig1 = [] - for(let i=0,l=initArea.length; i pageCount) rangeStart = pageCount - if(rangeEnd < rangeStart) rangeEnd = rangeStart - if(rangeEnd > pageCount) rangeEnd = pageCount - if(updateInfo) { - updateInfo(previewPath, { - pages: `${rangeStart}-${rangeEnd}`, - state: isAuthenticate ? "" : qsTr("加密"), - range_start: rangeStart, - range_end: rangeEnd, - password: password, - is_authenticate: isAuthenticate, - }) - } - // 更新忽略区域 - if(imgViewer.ig1Boxes.length > 0) { - // 格式化,存入设置 - let ig1 = [] - for(let i=0,l=imgViewer.ig1Boxes.length; i 0 && to <= pageCount) { - previewPage = to - toPreview() - } - } - - // 预览一页文档 - function toPreview() { - running = true - if(previewPage < 1) previewPage = 1 - if(previewPage > pageCount) previewPage = pageCount - prevConn.preview(previewPath, previewPage, password) - } - // 预览连接器 - DocPreviewConnector { - id: prevConn - // 图片渲染的回调 - onPreviewImg: function(imgID) { - const title = qsTr("打开文档失败") - if(imgID === "[Warning] isEncrypted") { - qmlapp.popup.simple(title, qsTr("请填写正确的密码")) - isAuthenticate = false - } - else if(imgID.startsWith("[Error]")) { - qmlapp.popup.message(title, imgID, "error") - } - else { - imgViewer.showImgID(imgID) - if(!isAuthenticate) { - qmlapp.popup.simple(qsTr("密码正确"), password) - isAuthenticate = true - } - } - } - } - - contentItem: DoubleRowLayout { - anchors.fill: parent - initSplitterX: size_.line * 13 - // 左:控制面板 - leftItem: Panel { - anchors.fill: parent - Column { - anchors.fill: parent - anchors.margins: size_.spacing - spacing: size_.smallSpacing - clip: true - // ===== 文件名 ===== - Text_ { - text: previewPath - anchors.left: parent.left - anchors.right: parent.right - wrapMode: TextEdit.WrapAnywhere // 任意换行 - maximumLineCount: 4 // 限制行数 - color: theme.subTextColor - font.pixelSize: size_.smallText - } - // ===== 密码 ===== - Row { - visible: isEncrypted && !isAuthenticate // 已加密,未填密码,才显示 - spacing: size_.spacing - height: size_.line + size_.spacing * 2 - Text_ { - color: theme.noColor - anchors.verticalCenter: parent.verticalCenter - text: qsTr("密码:") - } - Rectangle { - width: size_.line * 6 - anchors.top: parent.top - anchors.bottom: parent.bottom - color: theme.bgColor - TextInput_ { - clip: true - anchors.fill: parent - bgColor: "#00000000" - text: password - onTextChanged: password = text - } - } - IconButton { - anchors.top: parent.top - anchors.bottom: parent.bottom - width: height - icon_: "yes" - onClicked: toPreview() - } - } - // ===== 控制项 ===== - Column { - visible: !isEncrypted || isAuthenticate - spacing: size_.smallSpacing - anchors.left: parent.left - anchors.right: parent.right - // ===== 页数 ===== - Rectangle { - anchors.left: parent.left - anchors.right: parent.right - height: 1 - color: theme.coverColor4 - } - Text_ { - text: qsTr("预览页面") - } - Row { - spacing: size_.spacing - height: size_.line + size_.spacing * 2 - Button_ { - anchors.top: parent.top - anchors.bottom: parent.bottom - text_: "<" - onClicked: changePage(0, -1) - } - Button_ { - anchors.top: parent.top - anchors.bottom: parent.bottom - text_: ">" - onClicked: changePage(0, 1) - } - Rectangle { - width: size_.line * 3 - anchors.top: parent.top - anchors.bottom: parent.bottom - color: theme.bgColor - TextInput_ { - clip: true - anchors.fill: parent - bgColor: "#00000000" - text: previewPage - onTextChanged: changePage(text) - } - } - Text_ { - anchors.verticalCenter: parent.verticalCenter - text: "/ "+pageCount - } - } - // ===== OCR范围 ===== - Rectangle { - anchors.left: parent.left - anchors.right: parent.right - height: 1 - color: theme.coverColor4 - } - Text_ { - text: qsTr("OCR范围") - } - Row { - height: size_.line + size_.spacing * 2 - Rectangle { - width: size_.line * 3 - anchors.top: parent.top - anchors.bottom: parent.bottom - color: theme.bgColor - TextInput_ { - clip: true - anchors.fill: parent - bgColor: "#00000000" - text: rangeStart - onTextChanged: rangeStart = text - } - } - Text_ { - anchors.verticalCenter: parent.verticalCenter - text: " - " - } - Rectangle { - width: size_.line * 3 - anchors.top: parent.top - anchors.bottom: parent.bottom - color: theme.bgColor - TextInput_ { - clip: true - anchors.fill: parent - bgColor: "#00000000" - text: rangeEnd - onTextChanged: rangeEnd = text - } - } - } - // ===== 忽略区域 ===== - Rectangle { - anchors.left: parent.left - anchors.right: parent.right - height: 1 - color: theme.coverColor4 - } - Row { - spacing: size_.spacing - height: size_.line - Text_ { - anchors.verticalCenter: parent.verticalCenter - text: qsTr("忽略区域") - } - Button_ { - anchors.verticalCenter: parent.verticalCenter - height: size_.line - bgColor_: theme.coverColor1 - text_: qsTr("撤销") - onClicked: imgViewer.revokeIg() - textSize: size_.smallText - } - Button_ { - anchors.verticalCenter: parent.verticalCenter - height: size_.line - bgColor_: theme.coverColor1 - textColor_: theme.noColor - text_: qsTr("清空") - onClicked: imgViewer.clearIg() - textSize: size_.smallText - } - } - Text_ { - text: qsTr("右键拖拽,绘制矩形区域。包含在区域内的文字框将被忽略。可用于排除水印。对所有文档生效。") - color: theme.subTextColor - font.pixelSize: size_.smallText - anchors.left: parent.left - anchors.right: parent.right - wrapMode: TextEdit.WrapAnywhere // 任意换行 - maximumLineCount: 4 // 限制行数 - } - } - } - } - // 右:图片查看面板 - rightItem: ImageWithIgnore { - id: imgViewer - anchors.fill: parent - } - } -} diff --git a/UmiOCR-data/qt_res/qml/TabPages/PagesManager.qml b/UmiOCR-data/qt_res/qml/TabPages/PagesManager.qml index bab5bad0..66a36f7f 100644 --- a/UmiOCR-data/qt_res/qml/TabPages/PagesManager.qml +++ b/UmiOCR-data/qt_res/qml/TabPages/PagesManager.qml @@ -30,12 +30,6 @@ Item { title: qsTr("批量OCR"), intro: qsTr("# 批量OCR\n\n  \n\n导入本地图片或文件夹,批量转换文字。") }, - { - key: "BatchDOC", - needController: true, - title: "开发中", - intro: qsTr("# 批量文档识别\n\n  \n\n支持格式:pdf xps epub mobi fb2 cbz\n\n批量导入文档,提取文字。") - }, { key: "QRcode", needController: true, diff --git a/UmiOCR-data/version.py b/UmiOCR-data/version.py index a2e3bd74..6ab7199c 100644 --- a/UmiOCR-data/version.py +++ b/UmiOCR-data/version.py @@ -5,7 +5,7 @@ # 次版本号 MINOR_VERSION = 0 # 修订版本号 -PATCH_VERSION = 1 +PATCH_VERSION = 2 # 预发布阶段 PRE_RELEASE = None # 预发布版本号