From 884055db78a41e5166d5b74c9af85cf00a12bde0 Mon Sep 17 00:00:00 2001 From: troethe Date: Sun, 6 Aug 2023 17:06:42 +0200 Subject: [PATCH] FIXUP: Make tm_prev a tm_matrix, not a product of tm*cm --- pypdf/_page.py | 2 +- pypdf/_text_extraction/__init__.py | 7 ++++--- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/pypdf/_page.py b/pypdf/_page.py index 01c5b0506..edbf5228d 100644 --- a/pypdf/_page.py +++ b/pypdf/_page.py @@ -1926,7 +1926,7 @@ def _extract_text( 1.0, 0.0, 0.0, - ] # will store cm_matrix * tm_matrix + ] # will store previous tm_matrix char_scale = 1.0 space_scale = 1.0 _space_width: float = 500.0 # will be set correctly at first Tf diff --git a/pypdf/_text_extraction/__init__.py b/pypdf/_text_extraction/__init__.py index f93cba45a..dbacd5696 100644 --- a/pypdf/_text_extraction/__init__.py +++ b/pypdf/_text_extraction/__init__.py @@ -99,10 +99,11 @@ def crlf_space_check( visitor_text: Optional[Callable[[Any, Any, Any, Any, Any], None]], spacewidth: float, ) -> Tuple[str, str, List[float]]: + m_prev = mult(tm_prev, cm_matrix) m = mult(tm_matrix, cm_matrix) orientation = orient(m) - delta_x = m[4] - tm_prev[4] - delta_y = m[5] - tm_prev[5] + delta_x = m[4] - m_prev[4] + delta_y = m[5] - m_prev[5] k = math.sqrt(abs(m[0] * m[3]) + abs(m[1] * m[2])) f = font_size * k if orientation not in orientations: @@ -186,7 +187,7 @@ def crlf_space_check( text += " " except Exception: pass - tm_prev = m + tm_prev = tm_matrix.copy() return text, output, tm_prev