Skip to content

Commit

Permalink
FIXUP: Make tm_prev a tm_matrix, not a product of tm*cm
Browse files Browse the repository at this point in the history
  • Loading branch information
troethe committed Aug 6, 2023
1 parent 9e6512c commit 884055d
Show file tree
Hide file tree
Showing 2 changed files with 5 additions and 4 deletions.
2 changes: 1 addition & 1 deletion pypdf/_page.py
Original file line number Diff line number Diff line change
Expand Up @@ -1926,7 +1926,7 @@ def _extract_text(
1.0,
0.0,
0.0,
] # will store cm_matrix * tm_matrix
] # will store previous tm_matrix
char_scale = 1.0
space_scale = 1.0
_space_width: float = 500.0 # will be set correctly at first Tf
Expand Down
7 changes: 4 additions & 3 deletions pypdf/_text_extraction/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -99,10 +99,11 @@ def crlf_space_check(
visitor_text: Optional[Callable[[Any, Any, Any, Any, Any], None]],
spacewidth: float,
) -> Tuple[str, str, List[float]]:
m_prev = mult(tm_prev, cm_matrix)
m = mult(tm_matrix, cm_matrix)
orientation = orient(m)
delta_x = m[4] - tm_prev[4]
delta_y = m[5] - tm_prev[5]
delta_x = m[4] - m_prev[4]
delta_y = m[5] - m_prev[5]
k = math.sqrt(abs(m[0] * m[3]) + abs(m[1] * m[2]))
f = font_size * k
if orientation not in orientations:
Expand Down Expand Up @@ -186,7 +187,7 @@ def crlf_space_check(
text += " "
except Exception:
pass
tm_prev = m
tm_prev = tm_matrix.copy()
return text, output, tm_prev


Expand Down

0 comments on commit 884055d

Please sign in to comment.