diff --git a/CHANGELOG.md b/CHANGELOG.md index 7f84d737e..b8b77c4cb 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -22,6 +22,8 @@ This can also be enabled programmatically with `warnings.simplefilter('default', ### Fixed * `FPDF.set_text_shaping(False)` was broken since version 2.7.8 and is now working properly - [issue #1287](https://github.com/py-pdf/fpdf2/issues/1287) * fixed bug where cells with `rowspan`, `colspan` > 1 and null text were not displayed properly - [issue #1293](https://github.com/py-pdf/fpdf2/issues/1293) +### Changed +* improved logic for handling text substitution of the total number of pages, ensuring compatibility with text shaping - [issue #1090](https://github.com/py-pdf/fpdf2/issues/1090) ## [2.8.1] - 2024-10-04 ### Added diff --git a/fpdf/fpdf.py b/fpdf/fpdf.py index de36896a4..0eb15ef38 100644 --- a/fpdf/fpdf.py +++ b/fpdf/fpdf.py @@ -17,7 +17,7 @@ from numbers import Number from os.path import splitext from pathlib import Path -from typing import Callable, Iterator, NamedTuple, Optional, Union +from typing import Callable, Dict, Iterator, NamedTuple, Optional, Union try: from endesive import signer @@ -101,7 +101,12 @@ class Image: preload_image, ) from .linearization import LinearizedOutputProducer -from .line_break import Fragment, MultiLineBreak, TextLine +from .line_break import ( + Fragment, + MultiLineBreak, + TextLine, + TotalPagesSubstitutionFragment, +) from .outline import OutlineSection from .output import ( OutputProducer, @@ -250,7 +255,8 @@ def __init__( but is less compatible with the PDF spec. """ self.page = 0 # current page number - self.pages = {} # array of PDFPage objects starting at index 1 + # array of PDFPage objects starting at index 1: + self.pages: Dict[int, PDFPage] = {} self.fonts = {} # map font string keys to an instance of CoreFont or TTFFont # map page numbers to a set of font indices: self.fonts_used_per_page_number = defaultdict(set) @@ -3175,6 +3181,8 @@ def _render_styled_text_line( f"{(self.h - self.y - 0.5 * h - 0.3 * max_font_size) * k:.2f} Td" ) for i, frag in enumerate(fragments): + if isinstance(frag, TotalPagesSubstitutionFragment): + self.pages[self.page].add_text_substitution(frag) if frag.graphics_state["text_color"] != last_used_color: # allow to change color within the line of text. last_used_color = frag.graphics_state["text_color"] @@ -3427,6 +3435,22 @@ def get_fallback_font(self, char, style=""): def _parse_chars(self, text: str, markdown: bool) -> Iterator[Fragment]: "Split text into fragments" if not markdown and not self.text_shaping and not self._fallback_font_ids: + if self.str_alias_nb_pages: + for seq, fragment_text in enumerate( + text.split(self.str_alias_nb_pages) + ): + if seq > 0: + yield TotalPagesSubstitutionFragment( + self.str_alias_nb_pages, + self._get_current_graphics_state(), + self.k, + ) + if fragment_text: + yield Fragment( + fragment_text, self._get_current_graphics_state(), self.k + ) + return + yield Fragment(text, self._get_current_graphics_state(), self.k) return txt_frag, in_bold, in_italics, in_underline = ( @@ -3486,6 +3510,23 @@ def frag(): yield frag() current_text_script = text_script + if self.str_alias_nb_pages: + if text[: len(self.str_alias_nb_pages)] == self.str_alias_nb_pages: + if txt_frag: + yield frag() + gstate = self._get_current_graphics_state() + gstate["font_style"] = ("B" if in_bold else "") + ( + "I" if in_italics else "" + ) + gstate["underline"] = in_underline + yield TotalPagesSubstitutionFragment( + self.str_alias_nb_pages, + gstate, + self.k, + ) + text = text[len(self.str_alias_nb_pages) :] + continue + # Check that previous & next characters are not identical to the marker: if markdown: if ( @@ -4675,26 +4716,6 @@ def sign( ) self.pages[self.page].annots.append(annotation) - def _substitute_page_number(self): - substituted = False - # Replace number of pages in fonts using subsets (unicode) - alias = self.str_alias_nb_pages.encode("utf-16-be") - encoded_nb = str(self.pages_count).encode("utf-16-be") - for page in self.pages.values(): - substituted |= alias in page.contents - page.contents = page.contents.replace(alias, encoded_nb) - # Now repeat for no pages in non-subset fonts - alias = self.str_alias_nb_pages.encode("latin-1") - encoded_nb = str(self.pages_count).encode("latin-1") - for page in self.pages.values(): - substituted |= alias in page.contents - page.contents = page.contents.replace(alias, encoded_nb) - if substituted: - LOGGER.debug( - "Substitution of '%s' was performed in the document", - self.str_alias_nb_pages, - ) - def _insert_table_of_contents(self): # Doc has been closed but we want to write to self.pages[self.page] instead of self.buffer: tocp = self._toc_placeholder @@ -5252,7 +5273,16 @@ def output( if self._toc_placeholder: self._insert_table_of_contents() if self.str_alias_nb_pages: - self._substitute_page_number() + for page in self.pages.values(): + for substitution_item in page.get_text_substitutions(): + page.contents = page.contents.replace( + substitution_item.get_placeholder_string().encode( + "latin-1" + ), + substitution_item.render_text_substitution( + str(self.pages_count) + ).encode("latin-1"), + ) if linearize: output_producer_class = LinearizedOutputProducer output_producer = output_producer_class(self) diff --git a/fpdf/line_break.py b/fpdf/line_break.py index ab4b88636..8edd8f6d6 100644 --- a/fpdf/line_break.py +++ b/fpdf/line_break.py @@ -9,6 +9,7 @@ from numbers import Number from typing import NamedTuple, Any, List, Optional, Union, Sequence +from uuid import uuid4 from .enums import Align, CharVPos, TextDirection, WrapMode from .errors import FPDFException @@ -234,6 +235,14 @@ def get_width( w += char_spacing * (char_len - 1) return w / self.k + def has_same_style(self, other: "Fragment"): + """Returns if 2 fragments are equivalent other than the characters/string""" + return ( + self.graphics_state == other.graphics_state + and self.k == other.k + and isinstance(other, self.__class__) + ) + def get_character_width(self, character: str, print_sh=False, initial_cs=True): """ Return the width of a single character out of the stored text. @@ -350,6 +359,50 @@ def render_pdf_text_core(self, frag_ws, current_ws): return ret +class TotalPagesSubstitutionFragment(Fragment): + """ + A special type of text fragment that represents a placeholder for the total number of pages + in a PDF document. + + A placeholder will be generated during the initial content rendering phase of a PDF document. + This placeholder is later replaced by the total number of pages in the document when the final + output is being produced. + """ + + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + self.uuid = uuid4() + + def get_placeholder_string(self): + """ + This method returns a placeholder string containing a universally unique identifier (UUID4), + ensuring that the placeholder is distinct and does not conflict with other placeholders + within the document. + """ + return f"::placeholder:{self.uuid}::" + + def render_pdf_text(self, *args, **kwargs): + """ + This method is invoked during the page content rendering phase, which is common to all + `Fragment` instances. It stores the provided arguments and keyword arguments to preserve + the necessary information and graphic state for the final substitution rendering. + + The method then returns the unique placeholder string. + """ + self._render_args = args + self._render_kwargs = kwargs + return self.get_placeholder_string() + + def render_text_substitution(self, replacement_text: str): + """ + This method is invoked at the output phase. It calls `render_pdf_text()` from the superclass + to render the fragment with the preserved rendering state (stored in `_render_args` and `_render_kwargs`) + and insert the final text in place of the placeholder. + """ + self.characters = list(replacement_text) + return super().render_pdf_text(*self._render_args, **self._render_kwargs) + + class TextLine(NamedTuple): fragments: tuple text_width: float @@ -445,8 +498,7 @@ def add_character( self, character: str, character_width: float, - graphics_state: dict, - k: float, + original_fragment: Fragment, original_fragment_index: int, original_character_index: int, height: float, @@ -455,16 +507,29 @@ def add_character( assert character != NEWLINE self.height = height if not self.fragments: - self.fragments.append(Fragment("", graphics_state, k, url)) + self.fragments.append( + original_fragment.__class__( + characters="", + graphics_state=original_fragment.graphics_state, + k=original_fragment.k, + link=url, + ) + ) # characters are expected to be grouped into fragments by font and # character attributes. If the last existing fragment doesn't match # the properties of the pending character -> add a new fragment. - elif ( - graphics_state != self.fragments[-1].graphics_state - or k != self.fragments[-1].k - ): - self.fragments.append(Fragment("", graphics_state, k, url)) + elif isinstance( + original_fragment, Fragment + ) and not original_fragment.has_same_style(self.fragments[-1]): + self.fragments.append( + original_fragment.__class__( + characters="", + graphics_state=original_fragment.graphics_state, + k=original_fragment.k, + link=url, + ) + ) active_fragment = self.fragments[-1] if character in BREAKING_SPACE_SYMBOLS_STR: @@ -491,8 +556,8 @@ def add_character( self.number_of_spaces, HYPHEN, character_width, - graphics_state, - k, + original_fragment.graphics_state, + original_fragment.k, ) if character != SOFT_HYPHEN or self.print_sh: @@ -550,8 +615,7 @@ def automatic_break(self, align: Align): self.add_character( self.hyphen_break_hint.curchar, self.hyphen_break_hint.curchar_width, - self.hyphen_break_hint.graphics_state, - self.hyphen_break_hint.k, + self.hyphen_break_hint, self.hyphen_break_hint.original_fragment_index, self.hyphen_break_hint.original_character_index, self.height, @@ -716,8 +780,7 @@ def get_line(self): current_line.add_character( character, character_width, - current_fragment.graphics_state, - current_fragment.k, + current_fragment, self.fragment_index, self.character_index, current_font_height * self.line_height, diff --git a/fpdf/output.py b/fpdf/output.py index 0e71979bd..2270e5c0d 100644 --- a/fpdf/output.py +++ b/fpdf/output.py @@ -13,9 +13,11 @@ from contextlib import contextmanager from io import BytesIO + from .annotations import PDFAnnotation from .enums import SignatureFlag from .errors import FPDFException +from .line_break import TotalPagesSubstitutionFragment from .image_datastructures import RasterImageInfo from .outline import build_outline_objs from .sign import Signature, sign_content @@ -243,6 +245,7 @@ class PDFPage(PDFObject): "_index", "_width_pt", "_height_pt", + "_text_substitution_fragments", ) def __init__( @@ -265,6 +268,7 @@ def __init__( self.parent = None # must always be set before calling .serialize() self._index = index self._width_pt, self._height_pt = None, None + self._text_substitution_fragments: list[TotalPagesSubstitutionFragment] = [] def index(self): return self._index @@ -277,6 +281,12 @@ def set_dimensions(self, width_pt, height_pt): "Accepts a pair (width, height) in the unit specified to FPDF constructor" self._width_pt, self._height_pt = width_pt, height_pt + def get_text_substitutions(self): + return self._text_substitution_fragments + + def add_text_substitution(self, fragment): + self._text_substitution_fragments.append(fragment) + class PDFPagesRoot(PDFObject): def __init__(self, count, media_box): diff --git a/test/alias_nb_pages.pdf b/test/alias_nb_pages.pdf index f3d464385..81ef800ab 100644 Binary files a/test/alias_nb_pages.pdf and b/test/alias_nb_pages.pdf differ diff --git a/test/alias_with_text_shaping.pdf b/test/alias_with_text_shaping.pdf new file mode 100644 index 000000000..12f61a68e Binary files /dev/null and b/test/alias_with_text_shaping.pdf differ diff --git a/test/outline/toc_with_nb_and_footer.pdf b/test/outline/toc_with_nb_and_footer.pdf index 0dc192104..4a335dbc0 100644 Binary files a/test/outline/toc_with_nb_and_footer.pdf and b/test/outline/toc_with_nb_and_footer.pdf differ diff --git a/test/test_alias.py b/test/test_alias.py index 9361dd3f9..633ffe36a 100644 --- a/test/test_alias.py +++ b/test/test_alias.py @@ -29,3 +29,24 @@ def test_custom_alias_nb_pages(tmp_path): pdf.add_page() pdf.cell(0, 10, f"Page {pdf.page_no()}/{alias}", align="C") assert_pdf_equal(pdf, HERE / "alias_nb_pages.pdf", tmp_path) + + +def test_alias_with_shaping(tmp_path): + pdf = fpdf.FPDF() + pdf.add_font("Quicksand", style="", fname=HERE / "fonts" / "Quicksand-Regular.otf") + pdf.add_page() + pdf.set_font("Quicksand", "", 24) + pdf.set_text_shaping(True) + pdf.write(text="Pages {nb}") + pdf.ln() + pdf.cell(text="{nb}", new_x="left", new_y="next") + pdf.write_html("