Skip to content

Commit

Permalink
add some tolerance by rounding down to the nearest int for indent che…
Browse files Browse the repository at this point in the history
…cking
  • Loading branch information
iammosespaulr committed Nov 20, 2024
1 parent 99c5f86 commit bb44846
Showing 1 changed file with 3 additions and 2 deletions.
5 changes: 3 additions & 2 deletions marker/v2/processors/text.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import math
from typing import List

import regex
Expand Down Expand Up @@ -44,11 +45,11 @@ def __call__(self, document: Document):
continue
new_page = document.get_page(next_block_doc.page_id) # the next block can come from the next page
new_block_lines = [new_page.get_block(block_id) for block_id in next_block_doc.structure]
min_x = min([l.polygon.x_start for l in new_block_lines])
min_x = math.floor(min([l.polygon.x_start for l in new_block_lines]))
next_block_starts_indented = new_block_lines[0].polygon.x_start > min_x

lines: List[Line] = [page.get_block(block_id) for block_id in block.structure]
max_x = max([l.polygon.x_end for l in lines])
max_x = math.floor(max([l.polygon.x_end for l in lines]))

last_line_is_full_width = lines[-1].polygon.x_end >= max_x
last_line_is_hyphentated = regex.compile(r'.*[\p{Ll}|\d][-—¬]\s?$', regex.DOTALL).match(lines[-1].raw_text(document).strip())
Expand Down

0 comments on commit bb44846

Please sign in to comment.