Skip to content

Commit

Permalink
Refactor number of pages alias (fix #1090) (#1203)
Browse files Browse the repository at this point in the history
* refactor number of pages alias

* clean commented code

* ignore empty fragment. update tests

* rename "alias" to "text substitution"
add test

* change type to isinstance for pylint

* fix tests

* change disable text shaping text now that {nb} in shaping is fixed

* add docstrings and changelog

* formatting
  • Loading branch information
andersonhc authored Oct 30, 2024
1 parent 3fba534 commit 9df0cae
Show file tree
Hide file tree
Showing 11 changed files with 244 additions and 74 deletions.
2 changes: 2 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,8 @@ This can also be enabled programmatically with `warnings.simplefilter('default',
### Fixed
* `FPDF.set_text_shaping(False)` was broken since version 2.7.8 and is now working properly - [issue #1287](https://github.com/py-pdf/fpdf2/issues/1287)
* fixed bug where cells with `rowspan`, `colspan` > 1 and null text were not displayed properly - [issue #1293](https://github.com/py-pdf/fpdf2/issues/1293)
### Changed
* improved logic for handling text substitution of the total number of pages, ensuring compatibility with text shaping - [issue #1090](https://github.com/py-pdf/fpdf2/issues/1090)

## [2.8.1] - 2024-10-04
### Added
Expand Down
78 changes: 54 additions & 24 deletions fpdf/fpdf.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
from numbers import Number
from os.path import splitext
from pathlib import Path
from typing import Callable, Iterator, NamedTuple, Optional, Union
from typing import Callable, Dict, Iterator, NamedTuple, Optional, Union

try:
from endesive import signer
Expand Down Expand Up @@ -101,7 +101,12 @@ class Image:
preload_image,
)
from .linearization import LinearizedOutputProducer
from .line_break import Fragment, MultiLineBreak, TextLine
from .line_break import (
Fragment,
MultiLineBreak,
TextLine,
TotalPagesSubstitutionFragment,
)
from .outline import OutlineSection
from .output import (
OutputProducer,
Expand Down Expand Up @@ -250,7 +255,8 @@ def __init__(
but is less compatible with the PDF spec.
"""
self.page = 0 # current page number
self.pages = {} # array of PDFPage objects starting at index 1
# array of PDFPage objects starting at index 1:
self.pages: Dict[int, PDFPage] = {}
self.fonts = {} # map font string keys to an instance of CoreFont or TTFFont
# map page numbers to a set of font indices:
self.fonts_used_per_page_number = defaultdict(set)
Expand Down Expand Up @@ -3175,6 +3181,8 @@ def _render_styled_text_line(
f"{(self.h - self.y - 0.5 * h - 0.3 * max_font_size) * k:.2f} Td"
)
for i, frag in enumerate(fragments):
if isinstance(frag, TotalPagesSubstitutionFragment):
self.pages[self.page].add_text_substitution(frag)
if frag.graphics_state["text_color"] != last_used_color:
# allow to change color within the line of text.
last_used_color = frag.graphics_state["text_color"]
Expand Down Expand Up @@ -3427,6 +3435,22 @@ def get_fallback_font(self, char, style=""):
def _parse_chars(self, text: str, markdown: bool) -> Iterator[Fragment]:
"Split text into fragments"
if not markdown and not self.text_shaping and not self._fallback_font_ids:
if self.str_alias_nb_pages:
for seq, fragment_text in enumerate(
text.split(self.str_alias_nb_pages)
):
if seq > 0:
yield TotalPagesSubstitutionFragment(
self.str_alias_nb_pages,
self._get_current_graphics_state(),
self.k,
)
if fragment_text:
yield Fragment(
fragment_text, self._get_current_graphics_state(), self.k
)
return

yield Fragment(text, self._get_current_graphics_state(), self.k)
return
txt_frag, in_bold, in_italics, in_underline = (
Expand Down Expand Up @@ -3486,6 +3510,23 @@ def frag():
yield frag()
current_text_script = text_script

if self.str_alias_nb_pages:
if text[: len(self.str_alias_nb_pages)] == self.str_alias_nb_pages:
if txt_frag:
yield frag()
gstate = self._get_current_graphics_state()
gstate["font_style"] = ("B" if in_bold else "") + (
"I" if in_italics else ""
)
gstate["underline"] = in_underline
yield TotalPagesSubstitutionFragment(
self.str_alias_nb_pages,
gstate,
self.k,
)
text = text[len(self.str_alias_nb_pages) :]
continue

# Check that previous & next characters are not identical to the marker:
if markdown:
if (
Expand Down Expand Up @@ -4675,26 +4716,6 @@ def sign(
)
self.pages[self.page].annots.append(annotation)

def _substitute_page_number(self):
substituted = False
# Replace number of pages in fonts using subsets (unicode)
alias = self.str_alias_nb_pages.encode("utf-16-be")
encoded_nb = str(self.pages_count).encode("utf-16-be")
for page in self.pages.values():
substituted |= alias in page.contents
page.contents = page.contents.replace(alias, encoded_nb)
# Now repeat for no pages in non-subset fonts
alias = self.str_alias_nb_pages.encode("latin-1")
encoded_nb = str(self.pages_count).encode("latin-1")
for page in self.pages.values():
substituted |= alias in page.contents
page.contents = page.contents.replace(alias, encoded_nb)
if substituted:
LOGGER.debug(
"Substitution of '%s' was performed in the document",
self.str_alias_nb_pages,
)

def _insert_table_of_contents(self):
# Doc has been closed but we want to write to self.pages[self.page] instead of self.buffer:
tocp = self._toc_placeholder
Expand Down Expand Up @@ -5252,7 +5273,16 @@ def output(
if self._toc_placeholder:
self._insert_table_of_contents()
if self.str_alias_nb_pages:
self._substitute_page_number()
for page in self.pages.values():
for substitution_item in page.get_text_substitutions():
page.contents = page.contents.replace(
substitution_item.get_placeholder_string().encode(
"latin-1"
),
substitution_item.render_text_substitution(
str(self.pages_count)
).encode("latin-1"),
)
if linearize:
output_producer_class = LinearizedOutputProducer
output_producer = output_producer_class(self)
Expand Down
91 changes: 77 additions & 14 deletions fpdf/line_break.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@

from numbers import Number
from typing import NamedTuple, Any, List, Optional, Union, Sequence
from uuid import uuid4

from .enums import Align, CharVPos, TextDirection, WrapMode
from .errors import FPDFException
Expand Down Expand Up @@ -234,6 +235,14 @@ def get_width(
w += char_spacing * (char_len - 1)
return w / self.k

def has_same_style(self, other: "Fragment"):
"""Returns if 2 fragments are equivalent other than the characters/string"""
return (
self.graphics_state == other.graphics_state
and self.k == other.k
and isinstance(other, self.__class__)
)

def get_character_width(self, character: str, print_sh=False, initial_cs=True):
"""
Return the width of a single character out of the stored text.
Expand Down Expand Up @@ -350,6 +359,50 @@ def render_pdf_text_core(self, frag_ws, current_ws):
return ret


class TotalPagesSubstitutionFragment(Fragment):
"""
A special type of text fragment that represents a placeholder for the total number of pages
in a PDF document.
A placeholder will be generated during the initial content rendering phase of a PDF document.
This placeholder is later replaced by the total number of pages in the document when the final
output is being produced.
"""

def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
self.uuid = uuid4()

def get_placeholder_string(self):
"""
This method returns a placeholder string containing a universally unique identifier (UUID4),
ensuring that the placeholder is distinct and does not conflict with other placeholders
within the document.
"""
return f"::placeholder:{self.uuid}::"

def render_pdf_text(self, *args, **kwargs):
"""
This method is invoked during the page content rendering phase, which is common to all
`Fragment` instances. It stores the provided arguments and keyword arguments to preserve
the necessary information and graphic state for the final substitution rendering.
The method then returns the unique placeholder string.
"""
self._render_args = args
self._render_kwargs = kwargs
return self.get_placeholder_string()

def render_text_substitution(self, replacement_text: str):
"""
This method is invoked at the output phase. It calls `render_pdf_text()` from the superclass
to render the fragment with the preserved rendering state (stored in `_render_args` and `_render_kwargs`)
and insert the final text in place of the placeholder.
"""
self.characters = list(replacement_text)
return super().render_pdf_text(*self._render_args, **self._render_kwargs)


class TextLine(NamedTuple):
fragments: tuple
text_width: float
Expand Down Expand Up @@ -445,8 +498,7 @@ def add_character(
self,
character: str,
character_width: float,
graphics_state: dict,
k: float,
original_fragment: Fragment,
original_fragment_index: int,
original_character_index: int,
height: float,
Expand All @@ -455,16 +507,29 @@ def add_character(
assert character != NEWLINE
self.height = height
if not self.fragments:
self.fragments.append(Fragment("", graphics_state, k, url))
self.fragments.append(
original_fragment.__class__(
characters="",
graphics_state=original_fragment.graphics_state,
k=original_fragment.k,
link=url,
)
)

# characters are expected to be grouped into fragments by font and
# character attributes. If the last existing fragment doesn't match
# the properties of the pending character -> add a new fragment.
elif (
graphics_state != self.fragments[-1].graphics_state
or k != self.fragments[-1].k
):
self.fragments.append(Fragment("", graphics_state, k, url))
elif isinstance(
original_fragment, Fragment
) and not original_fragment.has_same_style(self.fragments[-1]):
self.fragments.append(
original_fragment.__class__(
characters="",
graphics_state=original_fragment.graphics_state,
k=original_fragment.k,
link=url,
)
)
active_fragment = self.fragments[-1]

if character in BREAKING_SPACE_SYMBOLS_STR:
Expand All @@ -491,8 +556,8 @@ def add_character(
self.number_of_spaces,
HYPHEN,
character_width,
graphics_state,
k,
original_fragment.graphics_state,
original_fragment.k,
)

if character != SOFT_HYPHEN or self.print_sh:
Expand Down Expand Up @@ -550,8 +615,7 @@ def automatic_break(self, align: Align):
self.add_character(
self.hyphen_break_hint.curchar,
self.hyphen_break_hint.curchar_width,
self.hyphen_break_hint.graphics_state,
self.hyphen_break_hint.k,
self.hyphen_break_hint,
self.hyphen_break_hint.original_fragment_index,
self.hyphen_break_hint.original_character_index,
self.height,
Expand Down Expand Up @@ -716,8 +780,7 @@ def get_line(self):
current_line.add_character(
character,
character_width,
current_fragment.graphics_state,
current_fragment.k,
current_fragment,
self.fragment_index,
self.character_index,
current_font_height * self.line_height,
Expand Down
10 changes: 10 additions & 0 deletions fpdf/output.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,9 +13,11 @@
from contextlib import contextmanager
from io import BytesIO


from .annotations import PDFAnnotation
from .enums import SignatureFlag
from .errors import FPDFException
from .line_break import TotalPagesSubstitutionFragment
from .image_datastructures import RasterImageInfo
from .outline import build_outline_objs
from .sign import Signature, sign_content
Expand Down Expand Up @@ -243,6 +245,7 @@ class PDFPage(PDFObject):
"_index",
"_width_pt",
"_height_pt",
"_text_substitution_fragments",
)

def __init__(
Expand All @@ -265,6 +268,7 @@ def __init__(
self.parent = None # must always be set before calling .serialize()
self._index = index
self._width_pt, self._height_pt = None, None
self._text_substitution_fragments: list[TotalPagesSubstitutionFragment] = []

def index(self):
return self._index
Expand All @@ -277,6 +281,12 @@ def set_dimensions(self, width_pt, height_pt):
"Accepts a pair (width, height) in the unit specified to FPDF constructor"
self._width_pt, self._height_pt = width_pt, height_pt

def get_text_substitutions(self):
return self._text_substitution_fragments

def add_text_substitution(self, fragment):
self._text_substitution_fragments.append(fragment)


class PDFPagesRoot(PDFObject):
def __init__(self, count, media_box):
Expand Down
Binary file modified test/alias_nb_pages.pdf
Binary file not shown.
Binary file added test/alias_with_text_shaping.pdf
Binary file not shown.
Binary file modified test/outline/toc_with_nb_and_footer.pdf
Binary file not shown.
21 changes: 21 additions & 0 deletions test/test_alias.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,3 +29,24 @@ def test_custom_alias_nb_pages(tmp_path):
pdf.add_page()
pdf.cell(0, 10, f"Page {pdf.page_no()}/{alias}", align="C")
assert_pdf_equal(pdf, HERE / "alias_nb_pages.pdf", tmp_path)


def test_alias_with_shaping(tmp_path):
pdf = fpdf.FPDF()
pdf.add_font("Quicksand", style="", fname=HERE / "fonts" / "Quicksand-Regular.otf")
pdf.add_page()
pdf.set_font("Quicksand", "", 24)
pdf.set_text_shaping(True)
pdf.write(text="Pages {nb}")
pdf.ln()
pdf.cell(text="{nb}", new_x="left", new_y="next")
pdf.write_html("<h1>{nb}</h1>")
pdf.multi_cell(w=pdf.epw, text="Number of pages: {nb}\nAgain:{nb}")
pdf.add_page()
pdf.set_text_shaping(False)
pdf.write(text="Pages {nb}")
pdf.ln()
pdf.cell(text="{nb}", new_x="left", new_y="next")
pdf.write_html("<h1>{nb}</h1>")
pdf.multi_cell(w=pdf.epw, text="Number of pages: {nb}\nAgain:{nb}")
assert_pdf_equal(pdf, HERE / "alias_with_text_shaping.pdf", tmp_path)
Loading

0 comments on commit 9df0cae

Please sign in to comment.