From 1d9a52c306cefbda0749f6bd16b9bc4c8141e6dc Mon Sep 17 00:00:00 2001 From: Alex Meyer Date: Fri, 18 Oct 2024 15:07:07 -0700 Subject: [PATCH 1/5] BUG: Don't close stream passed to PdfWriter.write() --- pypdf/_writer.py | 10 ++++++---- tests/test_writer.py | 11 +++++++++++ 2 files changed, 17 insertions(+), 4 deletions(-) diff --git a/pypdf/_writer.py b/pypdf/_writer.py index 0ac1524bc..8b15d56e1 100644 --- a/pypdf/_writer.py +++ b/pypdf/_writer.py @@ -249,7 +249,6 @@ def _get_clone_from( # to prevent overwriting self.temp_fileobj = fileobj self.fileobj = "" - self.with_as_usage = False # The root of our page tree node. pages = DictionaryObject() pages.update( @@ -356,7 +355,6 @@ def __enter__(self) -> "PdfWriter": """Store that writer is initialized by 'with'.""" t = self.temp_fileobj self.__init__() # type: ignore - self.with_as_usage = True self.fileobj = t # type: ignore return self @@ -369,6 +367,9 @@ def __exit__( """Write data to the fileobj.""" if self.fileobj: self.write(self.fileobj) + close_attr = getattr(self.fileobj, "close", None) + if callable(close_attr): + self.fileobj.close() def _repr_mimebundle_( self, @@ -1388,13 +1389,14 @@ def write(self, stream: Union[Path, StrByteType]) -> Tuple[bool, IO[Any]]: if isinstance(stream, (str, Path)): stream = FileIO(stream, "wb") - self.with_as_usage = True # my_file = True self.write_stream(stream) - if self.with_as_usage: + if my_file: stream.close() + else: + stream.flush() return my_file, stream diff --git a/tests/test_writer.py b/tests/test_writer.py index 0cd2d03f8..cb11236d3 100644 --- a/tests/test_writer.py +++ b/tests/test_writer.py @@ -2480,3 +2480,14 @@ def test_append_pdf_with_dest_without_page(caplog): writer.append(reader) assert "/__WKANCHOR_8" not in writer.named_destinations assert len(writer.named_destinations) == 3 + + +def test_stream_not_closed(): + """Tests for #2905""" + src = RESOURCE_ROOT / "pdflatex-outline.pdf" + with NamedTemporaryFile() as tmp: + with PdfReader(src) as reader, PdfWriter() as writer: + for i in range(4): + writer.add_page(reader.pages[i]) + writer.write(tmp) + assert not tmp.file.closed From 5568450134440babc58ffb68b37767c8b79676b2 Mon Sep 17 00:00:00 2001 From: Alex Meyer Date: Fri, 18 Oct 2024 15:12:34 -0700 Subject: [PATCH 2/5] mypy --- pypdf/_writer.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pypdf/_writer.py b/pypdf/_writer.py index 8b15d56e1..607041703 100644 --- a/pypdf/_writer.py +++ b/pypdf/_writer.py @@ -369,7 +369,7 @@ def __exit__( self.write(self.fileobj) close_attr = getattr(self.fileobj, "close", None) if callable(close_attr): - self.fileobj.close() + self.fileobj.close() # type: ignore[attr-defined] def _repr_mimebundle_( self, From a905bacf7a015879f136621eb7eee8a7f8772554 Mon Sep 17 00:00:00 2001 From: Alex Meyer Date: Sun, 20 Oct 2024 18:46:29 -0700 Subject: [PATCH 3/5] Improvements based on code review comments. --- pypdf/_writer.py | 15 ++++++++------- tests/test_writer.py | 24 +++++++++++++++++++++--- 2 files changed, 29 insertions(+), 10 deletions(-) diff --git a/pypdf/_writer.py b/pypdf/_writer.py index 607041703..ffb8df31a 100644 --- a/pypdf/_writer.py +++ b/pypdf/_writer.py @@ -235,9 +235,9 @@ def _get_clone_from( or Path(str(fileobj)).stat().st_size == 0 ): cloning = False - if isinstance(fileobj, (IO, BytesIO)): + if isinstance(fileobj, (IOBase, BytesIO)): t = fileobj.tell() - fileobj.seek(-1, 2) + fileobj.seek(0, 2) if fileobj.tell() == 0: cloning = False fileobj.seek(t, 0) @@ -249,6 +249,7 @@ def _get_clone_from( # to prevent overwriting self.temp_fileobj = fileobj self.fileobj = "" + self.cloned = False # The root of our page tree node. pages = DictionaryObject() pages.update( @@ -266,6 +267,7 @@ def _get_clone_from( if not isinstance(clone_from, PdfReader): clone_from = PdfReader(clone_from) self.clone_document_from_reader(clone_from) + self.cloned = True else: self._pages = self._add_object(pages) # root object @@ -352,9 +354,11 @@ def xmp_metadata(self, value: Optional[XmpInformation]) -> None: return self.root_object.xmp_metadata # type: ignore def __enter__(self) -> "PdfWriter": - """Store that writer is initialized by 'with'.""" + """Store how writer is initialized by 'with'.""" + c: bool = self.cloned t = self.temp_fileobj self.__init__() # type: ignore + self.cloned = c self.fileobj = t # type: ignore return self @@ -365,11 +369,8 @@ def __exit__( traceback: Optional[TracebackType], ) -> None: """Write data to the fileobj.""" - if self.fileobj: + if self.fileobj and not self.cloned: self.write(self.fileobj) - close_attr = getattr(self.fileobj, "close", None) - if callable(close_attr): - self.fileobj.close() # type: ignore[attr-defined] def _repr_mimebundle_( self, diff --git a/tests/test_writer.py b/tests/test_writer.py index cb11236d3..9e0f49bb8 100644 --- a/tests/test_writer.py +++ b/tests/test_writer.py @@ -2485,9 +2485,27 @@ def test_append_pdf_with_dest_without_page(caplog): def test_stream_not_closed(): """Tests for #2905""" src = RESOURCE_ROOT / "pdflatex-outline.pdf" - with NamedTemporaryFile() as tmp: + with NamedTemporaryFile(suffix=".pdf") as tmp: with PdfReader(src) as reader, PdfWriter() as writer: - for i in range(4): - writer.add_page(reader.pages[i]) + writer.add_page(reader.pages[0]) writer.write(tmp) assert not tmp.file.closed + + with NamedTemporaryFile(suffix=".pdf") as target: + with PdfWriter(target.file) as writer: + writer.add_blank_page(100, 100) + assert not target.file.closed + + with open(src, "rb") as fileobj: + with PdfWriter(fileobj) as writer: + pass + assert not fileobj.closed + + +def test_auto_write(): + """Another test for #2905""" + with NamedTemporaryFile(suffix=".pdf", delete_on_close=False) as tmp: + tmp.close() + with PdfWriter(tmp.name) as writer: + writer.add_blank_page(100, 100) + assert Path(tmp.name).stat().st_size > 0 From dafbafc30787e32794a45dec660c5dc83ae09763 Mon Sep 17 00:00:00 2001 From: Alex Meyer Date: Sun, 20 Oct 2024 18:58:08 -0700 Subject: [PATCH 4/5] Make test work with Python 3.8. --- tests/test_writer.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/tests/test_writer.py b/tests/test_writer.py index 9e0f49bb8..f50b8f269 100644 --- a/tests/test_writer.py +++ b/tests/test_writer.py @@ -2504,8 +2504,8 @@ def test_stream_not_closed(): def test_auto_write(): """Another test for #2905""" - with NamedTemporaryFile(suffix=".pdf", delete_on_close=False) as tmp: - tmp.close() - with PdfWriter(tmp.name) as writer: - writer.add_blank_page(100, 100) - assert Path(tmp.name).stat().st_size > 0 + target = Path(_get_write_target(str)) + with PdfWriter(target) as writer: + writer.add_blank_page(100, 100) + assert target.stat().st_size > 0 + target.unlink() From 3d6b7dc6edcf18fce5a340c6aabfb4f847bff4b2 Mon Sep 17 00:00:00 2001 From: Alex Meyer Date: Mon, 21 Oct 2024 01:26:24 -0700 Subject: [PATCH 5/5] Use tmp_path to avoid needing to unlink. --- tests/test_writer.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/tests/test_writer.py b/tests/test_writer.py index f50b8f269..d4e176834 100644 --- a/tests/test_writer.py +++ b/tests/test_writer.py @@ -2502,10 +2502,9 @@ def test_stream_not_closed(): assert not fileobj.closed -def test_auto_write(): +def test_auto_write(tmp_path): """Another test for #2905""" - target = Path(_get_write_target(str)) + target = tmp_path / "out.pdf" with PdfWriter(target) as writer: writer.add_blank_page(100, 100) assert target.stat().st_size > 0 - target.unlink()