Skip to content

Commit

Permalink
Undo moving of tests
Browse files Browse the repository at this point in the history
  • Loading branch information
MartinThoma committed Aug 8, 2023
1 parent 189c47f commit 1941601
Show file tree
Hide file tree
Showing 2 changed files with 66 additions and 67 deletions.
67 changes: 66 additions & 1 deletion tests/test_filters.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,9 +19,10 @@
CCITTFaxDecode,
FlateDecode,
)
from pypdf.generic import ArrayObject, DictionaryObject, NumberObject
from pypdf.generic import ArrayObject, DictionaryObject, NameObject, NumberObject

from . import get_pdf_from_url
from .test_images import image_similarity

filter_inputs = (
# "", '', """""",
Expand Down Expand Up @@ -287,6 +288,31 @@ def test_issue_1737():
reader.pages[0]["/Resources"]["/XObject"]["/Im2"].get_data()


@pytest.mark.enable_socket()
def test_pa_image_extraction():
"""
PNG images with PA mode can be extracted.
This is a regression test for issue #1801
"""
url = "https://github.com/py-pdf/pypdf/files/11250359/test_img.pdf"
name = "issue-1801.pdf"
reader = PdfReader(BytesIO(get_pdf_from_url(url, name=name)))

page0 = reader.pages[0]
images = page0.images
assert len(images) == 1
assert images[0].name == "Im1.png"

# Ensure visual appearence
data = get_pdf_from_url(
"https://user-images.githubusercontent.com/"
"1658117/232842886-9d1b0726-3a5b-430d-8464-595d919c266c.png",
"issue-1801.png",
)
assert data == images[0].data


@pytest.mark.enable_socket()
def test_1bit_image_extraction():
"""Cf issue #1814"""
Expand All @@ -313,6 +339,45 @@ def test_png_transparency_reverse():
# assert list(img.getdata()) == list(refimg.getdata())


@pytest.mark.enable_socket()
def test_iss1787():
"""Cf issue #1787"""
url = "https://github.com/py-pdf/pypdf/files/11219022/pdf_font_garbled.pdf"
name = "pdf_font_garbled.pdf"
reader = PdfReader(BytesIO(get_pdf_from_url(url, name=name)))
url_png = "https://user-images.githubusercontent.com/4083478/236793172-09340aef-3440-4c8a-af85-a91cdad27d46.png"
name_png = "watermark1.png"
refimg = Image.open(
BytesIO(get_pdf_from_url(url_png, name=name_png))
) # not a pdf but it works
data = reader.pages[0].images[0]
img = Image.open(BytesIO(data.data))
assert ".png" in data.name
assert list(img.getdata()) == list(refimg.getdata())
obj = data.indirect_reference.get_object()
obj["/DecodeParms"][NameObject("/Columns")] = NumberObject(1000)
obj.decoded_self = None
with pytest.raises(PdfReadError) as exc:
reader.pages[0].images[0]
assert exc.value.args[0] == "Image data is not rectangular"


@pytest.mark.enable_socket()
def test_rgba():
"""Decode rgb with transparency"""
url = "https://corpora.tika.apache.org/base/docs/govdocs1/972/972174.pdf"
name = "tika-972174.pdf"
reader = PdfReader(BytesIO(get_pdf_from_url(url, name=name)))
url_png = "https://user-images.githubusercontent.com/4083478/238288207-b77dd38c-34b4-4f4f-810a-bf9db7ca0414.png"
name_png = "tika-972174_p0-im0.png"
data = reader.pages[0].images[0]
assert ".jp2" in data.name
assert (
image_similarity(data.image, BytesIO(get_pdf_from_url(url_png, name=name_png)))
> 0.99
)


@pytest.mark.enable_socket()
def test_cmyk():
"""Decode cmyk"""
Expand Down
66 changes: 0 additions & 66 deletions tests/test_images.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,6 @@

from pypdf import PdfReader
from pypdf._page import PageObject
from pypdf.errors import PdfReadError
from pypdf.generic import NameObject, NumberObject

from . import get_pdf_from_url

Expand Down Expand Up @@ -170,54 +168,6 @@ def test_image_extraction(src, page_index, image_key, expected):
assert image_similarity(BytesIO(actual_image.data), expected) >= 0.99


@pytest.mark.enable_socket()
def test_pa_image_extraction():
"""
PNG images with PA mode can be extracted.
This is a regression test for issue #1801
"""
url = "https://github.com/py-pdf/pypdf/files/11250359/test_img.pdf"
name = "issue-1801.pdf"
reader = PdfReader(BytesIO(get_pdf_from_url(url, name=name)))

page0 = reader.pages[0]
images = page0.images
assert len(images) == 1
assert images[0].name == "Im1.png"

# Ensure visual appearence
data = get_pdf_from_url(
"https://user-images.githubusercontent.com/"
"1658117/232842886-9d1b0726-3a5b-430d-8464-595d919c266c.png",
"issue-1801.png",
)
assert data == images[0].data


@pytest.mark.enable_socket()
def test_iss1787():
"""Cf issue #1787"""
url = "https://github.com/py-pdf/pypdf/files/11219022/pdf_font_garbled.pdf"
name = "pdf_font_garbled.pdf"
reader = PdfReader(BytesIO(get_pdf_from_url(url, name=name)))
url_png = "https://user-images.githubusercontent.com/4083478/236793172-09340aef-3440-4c8a-af85-a91cdad27d46.png"
name_png = "watermark1.png"
refimg = Image.open(
BytesIO(get_pdf_from_url(url_png, name=name_png))
) # not a pdf but it works
data = reader.pages[0].images[0]
img = Image.open(BytesIO(data.data))
assert ".png" in data.name
assert list(img.getdata()) == list(refimg.getdata())
obj = data.indirect_reference.get_object()
obj["/DecodeParms"][NameObject("/Columns")] = NumberObject(1000)
obj.decoded_self = None
with pytest.raises(PdfReadError) as exc:
reader.pages[0].images[0]
assert exc.value.args[0] == "Image data is not rectangular"


@pytest.mark.enable_socket()
def test_tiff_predictor():
"""Decode Tiff Predictor 2 Images"""
Expand All @@ -233,19 +183,3 @@ def test_tiff_predictor():
img = Image.open(BytesIO(data.data))
assert ".png" in data.name
assert list(img.getdata()) == list(refimg.getdata())


@pytest.mark.enable_socket()
def test_rgba():
"""Decode rgb with transparency"""
url = "https://corpora.tika.apache.org/base/docs/govdocs1/972/972174.pdf"
name = "tika-972174.pdf"
reader = PdfReader(BytesIO(get_pdf_from_url(url, name=name)))
url_png = "https://user-images.githubusercontent.com/4083478/238288207-b77dd38c-34b4-4f4f-810a-bf9db7ca0414.png"
name_png = "tika-972174_p0-im0.png"
data = reader.pages[0].images[0]
assert ".jp2" in data.name
assert (
image_similarity(data.image, BytesIO(get_pdf_from_url(url_png, name=name_png)))
> 0.99
)

0 comments on commit 1941601

Please sign in to comment.