diff --git a/_posts/2024-04~06/2024-04-27-ocr.md b/_posts/2024-04~06/2024-04-27-ocr.md index d6480c2..f36cfa0 100644 --- a/_posts/2024-04~06/2024-04-27-ocr.md +++ b/_posts/2024-04~06/2024-04-27-ocr.md @@ -23,18 +23,26 @@ commentIssueId: 128 from paddleocr import PaddleOCR, draw_ocr ocr = PaddleOCR(use_angle_cls=False, lang="ch", page_num=0) -img_path = './202.pdf' +img_path = './bijiao.pdf' + result = ocr.ocr(img_path, cls=False) textResult = [] for idx in range(len(result)): res = result[idx] - for line in res: - # print(line) - textResult.append(line[1][0]) -fo = open("202.txt", "w") + # 空判断 + if res == None: + continue + else: + for line in res: + # print(line) + textResult.append(line[1][0]) + +fo = open("bijiao.txt", "w") fo.write( '\n'.join(textResult)) + +# 关闭打开的文件 fo.close() ```