From cb1b02e7163d97222e39b52c020fb8bfdd83b8e5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E8=AE=B8=E7=91=9E?= Date: Tue, 26 Mar 2024 16:46:05 +0800 Subject: [PATCH] feat: disable auto include table title --- magic_pdf/pdf_parse_for_train.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/magic_pdf/pdf_parse_for_train.py b/magic_pdf/pdf_parse_for_train.py index 92b64e28..114cccf9 100644 --- a/magic_pdf/pdf_parse_for_train.py +++ b/magic_pdf/pdf_parse_for_train.py @@ -220,7 +220,7 @@ def parse_pdf_for_train( # 解析表格并对table_bboxes进行位置的微调,防止表格周围的文字被截断 table_bboxes = parse_tables(page_id, page, model_output_json) table_bboxes = fix_tables( - page, table_bboxes, include_table_title=True, scan_line_num=2 + page, table_bboxes, include_table_title=False, scan_line_num=2 ) # 修正 table_bboxes = fix_table_text_block( text_raw_blocks, table_bboxes