From 854c46785a27cb6d081b89ddf004436aacdf0fe0 Mon Sep 17 00:00:00 2001
From: pubpub-zz <4083478+pubpub-zz@users.noreply.github.com>
Date: Wed, 1 May 2024 14:15:57 +0200
Subject: [PATCH 1/8] ENH: add decode_as_image() to ContentStreams

closes #2613
---
 pypdf/generic/_data_structures.py | 20 ++++++++++++++++++++
 tests/test_images.py              | 21 +++++++++++++++++++++
 2 files changed, 41 insertions(+)

diff --git a/pypdf/generic/_data_structures.py b/pypdf/generic/_data_structures.py
index 3ca761403..cae9a2c04 100644
--- a/pypdf/generic/_data_structures.py
+++ b/pypdf/generic/_data_structures.py
@@ -947,6 +947,26 @@ def flate_encode(self, level: int = -1) -> "EncodedStreamObject":
         retval._data = FlateDecode.encode(b_(self._data), level)
         return retval
 
+    def decode_as_image(self) -> Any:
+        """
+        Try to decode the stream object as an image
+
+        Returns:
+            a PIL image if proper decoding has been found
+        """
+        from ..filters import _xobj_to_image
+
+        if self.get("/Subtype", "") != "/Image":
+            try:
+                msg = f"{self.indirect_reference} does not seems to be an Image"  # pragma: no cover
+            except AttributeError:
+                msg = f"{self.__repr__()} object does not seems to be an Image"  # pragma: no cover
+            logger_warning(msg, __name__)
+        extension, byte_stream, img = _xobj_to_image(self)
+        if extension is None:
+            return None  # pragma: no cover
+        return img
+
 
 class DecodedStreamObject(StreamObject):
     pass
diff --git a/tests/test_images.py b/tests/test_images.py
index ad694d669..df64d0cfe 100644
--- a/tests/test_images.py
+++ b/tests/test_images.py
@@ -346,3 +346,24 @@ def test_corrupted_jpeg_iss2266(pdf, pdf_name, images, images_name, filtr):
             print(fn)  # noqa: T201
             img = Image.open(BytesIO(zf.read(fn)))
             assert image_similarity(reader.pages[p].images[i].image, img) >= 0.99
+
+
+@pytest.mark.enable_socket()
+def test_extract_image_from_object(caplog):
+    url = "https://github.com/py-pdf/pypdf/files/15176076/B2.pdf"
+    name = "iss2613.pdf"
+    reader = PdfReader(BytesIO(get_data_from_url(url, name=name)))
+    image = reader.pages[0]["/Resources"]["/Pattern"]["/P1"]["/Resources"]["/XObject"][
+        "/X1"
+    ].decode_as_image()
+    assert isinstance(image, Image.Image)
+    with pytest.raises(Exception):
+        co = reader.pages[0].get_contents()
+        co.decode_as_image()
+    assert "does not seems to be an Image" in caplog.text
+    caplog.clear()
+    co.indirect_reference = "for_test"
+    with pytest.raises(Exception):
+        co = reader.pages[0].get_contents()
+        co.decode_as_image()
+    assert "does not seems to be an Image" in caplog.text

From 0fb2a735e46cdb67e6652168711003d4e26f69a5 Mon Sep 17 00:00:00 2001
From: pubpub-zz <4083478+pubpub-zz@users.noreply.github.com>
Date: Wed, 1 May 2024 18:26:54 +0200
Subject: [PATCH 2/8] add annotation about exceptions

---
 pypdf/generic/_data_structures.py | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/pypdf/generic/_data_structures.py b/pypdf/generic/_data_structures.py
index cae9a2c04..bac4c4a23 100644
--- a/pypdf/generic/_data_structures.py
+++ b/pypdf/generic/_data_structures.py
@@ -953,6 +953,11 @@ def decode_as_image(self) -> Any:
 
         Returns:
             a PIL image if proper decoding has been found
+        Raises:
+            Exceptions during decoding to to invalid object or
+            errors during decoding will be reported
+            It is recommended to catch exceptions to prevent
+            stops in your program.
         """
         from ..filters import _xobj_to_image
 

From 6b83ef5b633d0c9f405a0e31ae9c2ab75d80a636 Mon Sep 17 00:00:00 2001
From: pubpub-zz <4083478+pubpub-zz@users.noreply.github.com>
Date: Wed, 1 May 2024 22:50:55 +0200
Subject: [PATCH 3/8] fix doc

---
 pypdf/generic/_data_structures.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/pypdf/generic/_data_structures.py b/pypdf/generic/_data_structures.py
index bac4c4a23..8f524ca08 100644
--- a/pypdf/generic/_data_structures.py
+++ b/pypdf/generic/_data_structures.py
@@ -954,10 +954,10 @@ def decode_as_image(self) -> Any:
         Returns:
             a PIL image if proper decoding has been found
         Raises:
-            Exceptions during decoding to to invalid object or
-            errors during decoding will be reported
-            It is recommended to catch exceptions to prevent
-            stops in your program.
+            Exception: (any)during decoding to to invalid object or
+                errors during decoding will be reported
+                It is recommended to catch exceptions to prevent
+                stops in your program.
         """
         from ..filters import _xobj_to_image
 

From 57c787c0fa356bd0c2345412303e52bd394f8b99 Mon Sep 17 00:00:00 2001
From: pubpub-zz <4083478+pubpub-zz@users.noreply.github.com>
Date: Sun, 9 Jun 2024 11:56:00 +0200
Subject: [PATCH 4/8] Update pypdf/generic/_data_structures.py

Co-authored-by: Stefan <96178532+stefan6419846@users.noreply.github.com>
---
 pypdf/generic/_data_structures.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pypdf/generic/_data_structures.py b/pypdf/generic/_data_structures.py
index 9a3cc0c99..2ba280ccb 100644
--- a/pypdf/generic/_data_structures.py
+++ b/pypdf/generic/_data_structures.py
@@ -971,7 +971,7 @@ def decode_as_image(self) -> Any:
 
         if self.get("/Subtype", "") != "/Image":
             try:
-                msg = f"{self.indirect_reference} does not seems to be an Image"  # pragma: no cover
+                msg = f"{self.indirect_reference} does not seem to be an Image"  # pragma: no cover
             except AttributeError:
                 msg = f"{self.__repr__()} object does not seems to be an Image"  # pragma: no cover
             logger_warning(msg, __name__)

From 66aa39440301a083c37d1aa55d229577b357b6e7 Mon Sep 17 00:00:00 2001
From: pubpub-zz <4083478+pubpub-zz@users.noreply.github.com>
Date: Sun, 9 Jun 2024 11:56:09 +0200
Subject: [PATCH 5/8] Update pypdf/generic/_data_structures.py

Co-authored-by: Stefan <96178532+stefan6419846@users.noreply.github.com>
---
 pypdf/generic/_data_structures.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pypdf/generic/_data_structures.py b/pypdf/generic/_data_structures.py
index 2ba280ccb..cedde3c8f 100644
--- a/pypdf/generic/_data_structures.py
+++ b/pypdf/generic/_data_structures.py
@@ -973,7 +973,7 @@ def decode_as_image(self) -> Any:
             try:
                 msg = f"{self.indirect_reference} does not seem to be an Image"  # pragma: no cover
             except AttributeError:
-                msg = f"{self.__repr__()} object does not seems to be an Image"  # pragma: no cover
+                msg = f"{self.__repr__()} object does not seem to be an Image"  # pragma: no cover
             logger_warning(msg, __name__)
         extension, byte_stream, img = _xobj_to_image(self)
         if extension is None:

From 9ced094b6209f2e5d9ecb500028d463b02bf54d3 Mon Sep 17 00:00:00 2001
From: pubpub-zz <4083478+pubpub-zz@users.noreply.github.com>
Date: Sun, 9 Jun 2024 12:29:26 +0200
Subject: [PATCH 6/8] fix test + add documentation

---
 docs/user/extract-images.md | 23 +++++++++++++++++++++++
 tests/test_images.py        |  6 +++---
 2 files changed, 26 insertions(+), 3 deletions(-)

diff --git a/docs/user/extract-images.md b/docs/user/extract-images.md
index 3bce343a3..91d8f4611 100644
--- a/docs/user/extract-images.md
+++ b/docs/user/extract-images.md
@@ -19,3 +19,26 @@ for image_file_object in page.images:
         fp.write(image_file_object.data)
         count += 1
 ```
+
+# Other images
+
+Some other objects can contain images, such as stamp annotations.
+
+For example, this document contains such stamps:
+
+[test_stamp.pdf](https://github.com/user-attachments/files/15751424/test_stamp.pdf)
+
+you can extract the image from the annotation with the following code
+
+```python
+from pypdf import PdfReader
+
+reader = PdfReader("test_stamp.pdf")
+im = (
+    reader.pages[0]["/Annots"][0]
+    .get_object()["/AP"]["/N"]["/Resources"]["/XObject"]["/Im4"]
+    .decode_as_image()
+)
+
+im.show()
+```
diff --git a/tests/test_images.py b/tests/test_images.py
index d802f4bb4..5955bf47c 100644
--- a/tests/test_images.py
+++ b/tests/test_images.py
@@ -442,7 +442,7 @@ def test_inline_image_extraction():
     img = Image.open(BytesIO(get_data_from_url(url, name=name)))
     assert image_similarity(reader.pages[0].images[0].image, img) == 1
 
-    
+
 @pytest.mark.enable_socket()
 def test_extract_image_from_object(caplog):
     url = "https://github.com/py-pdf/pypdf/files/15176076/B2.pdf"
@@ -455,10 +455,10 @@ def test_extract_image_from_object(caplog):
     with pytest.raises(Exception):
         co = reader.pages[0].get_contents()
         co.decode_as_image()
-    assert "does not seems to be an Image" in caplog.text
+    assert "does not seem to be an Image" in caplog.text
     caplog.clear()
     co.indirect_reference = "for_test"
     with pytest.raises(Exception):
         co = reader.pages[0].get_contents()
         co.decode_as_image()
-    assert "does not seems to be an Image" in caplog.text
+    assert "does not seem to be an Image" in caplog.text

From 561412df6ef31d6f8201851a08148c0452128e9c Mon Sep 17 00:00:00 2001
From: pubpub-zz <4083478+pubpub-zz@users.noreply.github.com>
Date: Sun, 9 Jun 2024 12:33:31 +0200
Subject: [PATCH 7/8] Update docs/user/extract-images.md

Co-authored-by: Stefan <96178532+stefan6419846@users.noreply.github.com>
---
 docs/user/extract-images.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/user/extract-images.md b/docs/user/extract-images.md
index 91d8f4611..5fccee681 100644
--- a/docs/user/extract-images.md
+++ b/docs/user/extract-images.md
@@ -28,7 +28,7 @@ For example, this document contains such stamps:
 
 [test_stamp.pdf](https://github.com/user-attachments/files/15751424/test_stamp.pdf)
 
-you can extract the image from the annotation with the following code
+You can extract the image from the annotation with the following code:
 
 ```python
 from pypdf import PdfReader

From 604e2b81e06f122736b1471d5a5202f834a8d0f2 Mon Sep 17 00:00:00 2001
From: pubpub-zz <4083478+pubpub-zz@users.noreply.github.com>
Date: Sun, 9 Jun 2024 12:34:37 +0200
Subject: [PATCH 8/8] style

---
 docs/user/extract-images.md | 1 -
 1 file changed, 1 deletion(-)

diff --git a/docs/user/extract-images.md b/docs/user/extract-images.md
index 5fccee681..1873778ae 100644
--- a/docs/user/extract-images.md
+++ b/docs/user/extract-images.md
@@ -25,7 +25,6 @@ for image_file_object in page.images:
 Some other objects can contain images, such as stamp annotations.
 
 For example, this document contains such stamps:
-
 [test_stamp.pdf](https://github.com/user-attachments/files/15751424/test_stamp.pdf)
 
 You can extract the image from the annotation with the following code: