Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Refactor number of pages alias (fix #1090) #1203

Merged
merged 14 commits into from
Oct 30, 2024
Merged
2 changes: 2 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,8 @@ This can also be enabled programmatically with `warnings.simplefilter('default',
### Fixed
* `FPDF.set_text_shaping(False)` was broken since version 2.7.8 and is now working properly - [issue #1287](https://github.com/py-pdf/fpdf2/issues/1287)
* fixed bug where cells with `rowspan`, `colspan` > 1 and null text were not displayed properly - [issue #1293](https://github.com/py-pdf/fpdf2/issues/1293)
### Changed
* improved logic for handling text substitution of the total number of pages, ensuring compatibility with text shaping - [issue #1090](https://github.com/py-pdf/fpdf2/issues/1090)

## [2.8.1] - 2024-10-04
### Added
Expand Down
78 changes: 54 additions & 24 deletions fpdf/fpdf.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
from numbers import Number
from os.path import splitext
from pathlib import Path
from typing import Callable, Iterator, NamedTuple, Optional, Union
from typing import Callable, Dict, Iterator, NamedTuple, Optional, Union

try:
from endesive import signer
Expand Down Expand Up @@ -101,7 +101,12 @@ class Image:
preload_image,
)
from .linearization import LinearizedOutputProducer
from .line_break import Fragment, MultiLineBreak, TextLine
from .line_break import (
Fragment,
MultiLineBreak,
TextLine,
TotalPagesSubstitutionFragment,
)
from .outline import OutlineSection
from .output import (
OutputProducer,
Expand Down Expand Up @@ -250,7 +255,8 @@ def __init__(
but is less compatible with the PDF spec.
"""
self.page = 0 # current page number
self.pages = {} # array of PDFPage objects starting at index 1
# array of PDFPage objects starting at index 1:
self.pages: Dict[int, PDFPage] = {}
self.fonts = {} # map font string keys to an instance of CoreFont or TTFFont
# map page numbers to a set of font indices:
self.fonts_used_per_page_number = defaultdict(set)
Expand Down Expand Up @@ -3175,6 +3181,8 @@ def _render_styled_text_line(
f"{(self.h - self.y - 0.5 * h - 0.3 * max_font_size) * k:.2f} Td"
)
for i, frag in enumerate(fragments):
if isinstance(frag, TotalPagesSubstitutionFragment):
self.pages[self.page].add_text_substitution(frag)
if frag.graphics_state["text_color"] != last_used_color:
# allow to change color within the line of text.
last_used_color = frag.graphics_state["text_color"]
Expand Down Expand Up @@ -3427,6 +3435,22 @@ def get_fallback_font(self, char, style=""):
def _parse_chars(self, text: str, markdown: bool) -> Iterator[Fragment]:
"Split text into fragments"
if not markdown and not self.text_shaping and not self._fallback_font_ids:
if self.str_alias_nb_pages:
for seq, fragment_text in enumerate(
text.split(self.str_alias_nb_pages)
):
if seq > 0:
yield TotalPagesSubstitutionFragment(
self.str_alias_nb_pages,
self._get_current_graphics_state(),
self.k,
)
if fragment_text:
yield Fragment(
fragment_text, self._get_current_graphics_state(), self.k
)
return

yield Fragment(text, self._get_current_graphics_state(), self.k)
return
txt_frag, in_bold, in_italics, in_underline = (
Expand Down Expand Up @@ -3486,6 +3510,23 @@ def frag():
yield frag()
current_text_script = text_script

if self.str_alias_nb_pages:
if text[: len(self.str_alias_nb_pages)] == self.str_alias_nb_pages:
if txt_frag:
yield frag()
gstate = self._get_current_graphics_state()
gstate["font_style"] = ("B" if in_bold else "") + (
"I" if in_italics else ""
)
gstate["underline"] = in_underline
yield TotalPagesSubstitutionFragment(
self.str_alias_nb_pages,
gstate,
self.k,
)
text = text[len(self.str_alias_nb_pages) :]
continue

# Check that previous & next characters are not identical to the marker:
if markdown:
if (
Expand Down Expand Up @@ -4675,26 +4716,6 @@ def sign(
)
self.pages[self.page].annots.append(annotation)

def _substitute_page_number(self):
substituted = False
# Replace number of pages in fonts using subsets (unicode)
alias = self.str_alias_nb_pages.encode("utf-16-be")
encoded_nb = str(self.pages_count).encode("utf-16-be")
for page in self.pages.values():
substituted |= alias in page.contents
page.contents = page.contents.replace(alias, encoded_nb)
# Now repeat for no pages in non-subset fonts
alias = self.str_alias_nb_pages.encode("latin-1")
encoded_nb = str(self.pages_count).encode("latin-1")
for page in self.pages.values():
substituted |= alias in page.contents
page.contents = page.contents.replace(alias, encoded_nb)
if substituted:
LOGGER.debug(
"Substitution of '%s' was performed in the document",
self.str_alias_nb_pages,
)

def _insert_table_of_contents(self):
# Doc has been closed but we want to write to self.pages[self.page] instead of self.buffer:
tocp = self._toc_placeholder
Expand Down Expand Up @@ -5252,7 +5273,16 @@ def output(
if self._toc_placeholder:
self._insert_table_of_contents()
if self.str_alias_nb_pages:
andersonhc marked this conversation as resolved.
Show resolved Hide resolved
self._substitute_page_number()
for page in self.pages.values():
for substitution_item in page.get_text_substitutions():
page.contents = page.contents.replace(
substitution_item.get_placeholder_string().encode(
"latin-1"
),
substitution_item.render_text_substitution(
str(self.pages_count)
).encode("latin-1"),
)
if linearize:
output_producer_class = LinearizedOutputProducer
output_producer = output_producer_class(self)
Expand Down
91 changes: 77 additions & 14 deletions fpdf/line_break.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@

from numbers import Number
from typing import NamedTuple, Any, List, Optional, Union, Sequence
from uuid import uuid4

from .enums import Align, CharVPos, TextDirection, WrapMode
from .errors import FPDFException
Expand Down Expand Up @@ -234,6 +235,14 @@ def get_width(
w += char_spacing * (char_len - 1)
return w / self.k

def has_same_style(self, other: "Fragment"):
"""Returns if 2 fragments are equivalent other than the characters/string"""
return (
self.graphics_state == other.graphics_state
and self.k == other.k
and isinstance(other, self.__class__)
)

def get_character_width(self, character: str, print_sh=False, initial_cs=True):
"""
Return the width of a single character out of the stored text.
Expand Down Expand Up @@ -350,6 +359,50 @@ def render_pdf_text_core(self, frag_ws, current_ws):
return ret


class TotalPagesSubstitutionFragment(Fragment):
"""
A special type of text fragment that represents a placeholder for the total number of pages
in a PDF document.

A placeholder will be generated during the initial content rendering phase of a PDF document.
This placeholder is later replaced by the total number of pages in the document when the final
output is being produced.
"""

def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
self.uuid = uuid4()

def get_placeholder_string(self):
"""
This method returns a placeholder string containing a universally unique identifier (UUID4),
ensuring that the placeholder is distinct and does not conflict with other placeholders
within the document.
"""
return f"::placeholder:{self.uuid}::"

def render_pdf_text(self, *args, **kwargs):
"""
This method is invoked during the page content rendering phase, which is common to all
`Fragment` instances. It stores the provided arguments and keyword arguments to preserve
the necessary information and graphic state for the final substitution rendering.

The method then returns the unique placeholder string.
"""
self._render_args = args
self._render_kwargs = kwargs
andersonhc marked this conversation as resolved.
Show resolved Hide resolved
return self.get_placeholder_string()

def render_text_substitution(self, replacement_text: str):
"""
This method is invoked at the output phase. It calls `render_pdf_text()` from the superclass
to render the fragment with the preserved rendering state (stored in `_render_args` and `_render_kwargs`)
and insert the final text in place of the placeholder.
"""
self.characters = list(replacement_text)
return super().render_pdf_text(*self._render_args, **self._render_kwargs)


class TextLine(NamedTuple):
fragments: tuple
text_width: float
Expand Down Expand Up @@ -445,8 +498,7 @@ def add_character(
self,
character: str,
character_width: float,
graphics_state: dict,
k: float,
original_fragment: Fragment,
original_fragment_index: int,
original_character_index: int,
height: float,
Expand All @@ -455,16 +507,29 @@ def add_character(
assert character != NEWLINE
self.height = height
if not self.fragments:
self.fragments.append(Fragment("", graphics_state, k, url))
self.fragments.append(
original_fragment.__class__(
characters="",
graphics_state=original_fragment.graphics_state,
k=original_fragment.k,
link=url,
)
)

# characters are expected to be grouped into fragments by font and
# character attributes. If the last existing fragment doesn't match
# the properties of the pending character -> add a new fragment.
elif (
graphics_state != self.fragments[-1].graphics_state
or k != self.fragments[-1].k
):
self.fragments.append(Fragment("", graphics_state, k, url))
elif isinstance(
original_fragment, Fragment
) and not original_fragment.has_same_style(self.fragments[-1]):
self.fragments.append(
original_fragment.__class__(
characters="",
graphics_state=original_fragment.graphics_state,
k=original_fragment.k,
link=url,
)
)
active_fragment = self.fragments[-1]

if character in BREAKING_SPACE_SYMBOLS_STR:
Expand All @@ -491,8 +556,8 @@ def add_character(
self.number_of_spaces,
HYPHEN,
character_width,
graphics_state,
k,
original_fragment.graphics_state,
original_fragment.k,
)

if character != SOFT_HYPHEN or self.print_sh:
Expand Down Expand Up @@ -550,8 +615,7 @@ def automatic_break(self, align: Align):
self.add_character(
self.hyphen_break_hint.curchar,
self.hyphen_break_hint.curchar_width,
self.hyphen_break_hint.graphics_state,
self.hyphen_break_hint.k,
self.hyphen_break_hint,
self.hyphen_break_hint.original_fragment_index,
self.hyphen_break_hint.original_character_index,
self.height,
Expand Down Expand Up @@ -716,8 +780,7 @@ def get_line(self):
current_line.add_character(
character,
character_width,
current_fragment.graphics_state,
current_fragment.k,
current_fragment,
self.fragment_index,
self.character_index,
current_font_height * self.line_height,
Expand Down
10 changes: 10 additions & 0 deletions fpdf/output.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,9 +13,11 @@
from contextlib import contextmanager
from io import BytesIO


from .annotations import PDFAnnotation
from .enums import SignatureFlag
from .errors import FPDFException
from .line_break import TotalPagesSubstitutionFragment
from .image_datastructures import RasterImageInfo
from .outline import build_outline_objs
from .sign import Signature, sign_content
Expand Down Expand Up @@ -243,6 +245,7 @@ class PDFPage(PDFObject):
"_index",
"_width_pt",
"_height_pt",
"_text_substitution_fragments",
)

def __init__(
Expand All @@ -265,6 +268,7 @@ def __init__(
self.parent = None # must always be set before calling .serialize()
self._index = index
self._width_pt, self._height_pt = None, None
self._text_substitution_fragments: list[TotalPagesSubstitutionFragment] = []

def index(self):
return self._index
Expand All @@ -277,6 +281,12 @@ def set_dimensions(self, width_pt, height_pt):
"Accepts a pair (width, height) in the unit specified to FPDF constructor"
self._width_pt, self._height_pt = width_pt, height_pt

def get_text_substitutions(self):
return self._text_substitution_fragments

def add_text_substitution(self, fragment):
self._text_substitution_fragments.append(fragment)


class PDFPagesRoot(PDFObject):
def __init__(self, count, media_box):
Expand Down
Binary file modified test/alias_nb_pages.pdf
Binary file not shown.
Binary file added test/alias_with_text_shaping.pdf
Binary file not shown.
Binary file modified test/outline/toc_with_nb_and_footer.pdf
Binary file not shown.
21 changes: 21 additions & 0 deletions test/test_alias.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,3 +29,24 @@ def test_custom_alias_nb_pages(tmp_path):
pdf.add_page()
pdf.cell(0, 10, f"Page {pdf.page_no()}/{alias}", align="C")
assert_pdf_equal(pdf, HERE / "alias_nb_pages.pdf", tmp_path)


def test_alias_with_shaping(tmp_path):
pdf = fpdf.FPDF()
pdf.add_font("Quicksand", style="", fname=HERE / "fonts" / "Quicksand-Regular.otf")
pdf.add_page()
pdf.set_font("Quicksand", "", 24)
pdf.set_text_shaping(True)
pdf.write(text="Pages {nb}")
pdf.ln()
pdf.cell(text="{nb}", new_x="left", new_y="next")
pdf.write_html("<h1>{nb}</h1>")
pdf.multi_cell(w=pdf.epw, text="Number of pages: {nb}\nAgain:{nb}")
pdf.add_page()
pdf.set_text_shaping(False)
pdf.write(text="Pages {nb}")
pdf.ln()
pdf.cell(text="{nb}", new_x="left", new_y="next")
pdf.write_html("<h1>{nb}</h1>")
pdf.multi_cell(w=pdf.epw, text="Number of pages: {nb}\nAgain:{nb}")
assert_pdf_equal(pdf, HERE / "alias_with_text_shaping.pdf", tmp_path)
Loading
Loading