diff --git a/marker/v2/builders/layout.py b/marker/v2/builders/layout.py
index 25c2249..dd66f41 100644
--- a/marker/v2/builders/layout.py
+++ b/marker/v2/builders/layout.py
@@ -12,11 +12,12 @@
from marker.v2.schema.groups.page import PageGroup
from marker.v2.schema.polygon import PolygonBox
from marker.v2.schema.registry import get_block_class
-from marker.v2.schema.text.line import Line
class LayoutBuilder(BaseBuilder):
batch_size = None
+ layout_coverage_min_lines = 1
+ layout_coverage_threshold = .5
def __init__(self, layout_model, config=None):
self.layout_model = layout_model
@@ -57,7 +58,7 @@ def add_blocks_to_pages(self, pages: List[PageGroup], layout_results: List[Layou
def merge_blocks(self, document_pages: List[PageGroup], provider_page_lines: ProviderPageLines):
for document_page in document_pages:
- provider_lines = provider_page_lines[document_page.page_id]
+ provider_lines = provider_page_lines.get(document_page.page_id, [])
if not self.check_layout_coverage(document_page, provider_lines):
document_page.text_extraction_method = "surya"
continue
@@ -67,16 +68,22 @@ def check_layout_coverage(
self,
document_page: PageGroup,
provider_lines: List[ProviderOutput],
- coverage_threshold=0.5
):
- layout_area = 0
- provider_area = 0
+ covered_blocks = 0
+ total_blocks = 0
for layout_block_id in document_page.structure:
layout_block = document_page.get_block(layout_block_id)
if layout_block.block_type in [BlockTypes.Figure, BlockTypes.Picture, BlockTypes.Table]:
continue
- layout_area += layout_block.polygon.area
+
+ total_blocks += 1
+ intersecting_lines = 0
for provider_line in provider_lines:
- provider_area += layout_block.polygon.intersection_area(provider_line.line.polygon)
- coverage_ratio = provider_area / layout_area if layout_area > 0 else 0
- return coverage_ratio >= coverage_threshold
+ if layout_block.polygon.intersection_area(provider_line.line.polygon) > 0:
+ intersecting_lines += 1
+
+ if intersecting_lines > self.layout_coverage_min_lines:
+ covered_blocks += 1
+
+ coverage_ratio = covered_blocks / max(total_blocks, 1)
+ return coverage_ratio >= self.layout_coverage_threshold
diff --git a/marker/v2/schema/blocks/code.py b/marker/v2/schema/blocks/code.py
index ca8e6e1..ad79665 100644
--- a/marker/v2/schema/blocks/code.py
+++ b/marker/v2/schema/blocks/code.py
@@ -6,5 +6,7 @@ class Code(Block):
block_type: BlockTypes = BlockTypes.Code
def assemble_html(self, child_blocks, parent_structure):
- template = super().assemble_html(child_blocks, parent_structure)
+ template = ""
+ for c in child_blocks:
+ template += f"
{template}"