Skip to content

Commit

Permalink
Merge branch 'main' into calRGB
Browse files Browse the repository at this point in the history
  • Loading branch information
pubpub-zz authored Jul 15, 2023
2 parents 1ec1994 + c864f4e commit a2cd0d8
Show file tree
Hide file tree
Showing 2 changed files with 55 additions and 12 deletions.
48 changes: 36 additions & 12 deletions pypdf/filters.py
Original file line number Diff line number Diff line change
Expand Up @@ -641,7 +641,9 @@ def decodeStreamData(stream: Any) -> Union[str, bytes]: # deprecated
return decode_stream_data(stream)


mode_str_type: TypeAlias = Literal["", "1", "RGB", "P", "L", "RGBA", "CMYK"]
mode_str_type: TypeAlias = Literal[
"", "1", "RGB", "2bits", "4bits", "P", "L", "RGBA", "CMYK"
]


def _get_imagemode(
Expand Down Expand Up @@ -675,6 +677,8 @@ def _get_imagemode(

mode_map = {
"1bit": "1", # 0 will be used for 1 bit
"2bit": "2bits", # 2 bits images
"4bit": "4bits", # 4 bits
"/DeviceGray": "L",
"palette": "P", # reserved for color_components alignment
"/DeviceRGB": "RGB",
Expand Down Expand Up @@ -720,6 +724,24 @@ def _handle_flate(
Process image encoded in flateEncode
Returns img, image_format, extension
"""

def bits2byte(data: bytes, size: Tuple[int, int], bits: int) -> bytes:
mask = (2 << bits) - 1
nbuff = bytearray(size[0] * size[1])
by = 0
bit = 8 - bits
for y in range(size[1]):
if (bit != 0) and (bit != 8 - bits):
by += 1
bit = 8 - bits
for x in range(size[0]):
nbuff[y * size[0] + x] = (data[by] >> bit) & mask
bit -= bits
if bit < 0:
by += 1
bit = 8 - bits
return bytes(nbuff)

extension = ".png" # mime_type = "image/png"
lookup: Any
base: Any
Expand All @@ -728,6 +750,12 @@ def _handle_flate(
color_space, base, hival, lookup = (
value.get_object() for value in color_space
)
if mode == "2bits":
mode = "P"
data = bits2byte(data, size, 2)
elif mode == "4bits":
mode = "P"
data = bits2byte(data, size, 4)
img = Image.frombytes(mode, size, data)
if color_space == "/Indexed":
from .generic import ByteStringObject
Expand Down Expand Up @@ -822,8 +850,8 @@ def _handle_jpx(
):
# https://pillow.readthedocs.io/en/stable/handbook/concepts.html#modes
mode: mode_str_type = "RGB"
if x_object_obj.get("/BitsPerComponent", 8) == 1:
mode = _get_imagemode("1bit", 0, "")
if x_object_obj.get("/BitsPerComponent", 8) < 8:
mode = _get_imagemode(f"{x_object_obj.get('/BitsPerComponent', 8)}bit", 0, "")
else:
mode = _get_imagemode(
color_space,
Expand All @@ -844,7 +872,11 @@ def _handle_jpx(
lfilters = filters[-1] if isinstance(filters, list) else filters
if lfilters == FT.FLATE_DECODE:
img, image_format, extension = _handle_flate(
size, data, mode, color_space, colors
size,
data,
mode,
color_space,
colors,
)
elif lfilters in (FT.LZW_DECODE, FT.ASCII_85_DECODE, FT.CCITT_FAX_DECODE):
# I'm not sure if the following logic is correct.
Expand Down Expand Up @@ -900,14 +932,6 @@ def _handle_jpx(
# TODO : implement mask
if alpha.mode != "L":
alpha = alpha.convert("L")
scale = x_object_obj[IA.S_MASK].get("/Decode", [0.0, 1.0])
if (scale[1] - scale[0]) != 1.0:
alpha = alpha.point(
[
round(255.0 * (v / 255.0 * (scale[1] - scale[0]) + scale[0]))
for v in range(256)
]
)
if img.mode == "P":
img = img.convert("RGB")
img.putalpha(alpha)
Expand Down
19 changes: 19 additions & 0 deletions tests/test_filters.py
Original file line number Diff line number Diff line change
Expand Up @@ -446,3 +446,22 @@ def test_calrgb():
name = "calRGB.pdf"
reader = PdfReader(BytesIO(get_pdf_from_url(url, name=name)))
reader.pages[0].images[0]


@pytest.mark.enable_socket()
def test_2bits_image():
"""From #1954, test with 2bits image. TODO: 4bits also"""
url = "https://github.com/py-pdf/pypdf/files/12050253/tt.pdf"
name = "paid.pdf"
reader = PdfReader(BytesIO(get_pdf_from_url(url, name=name)))
url_png = "https://user-images.githubusercontent.com/4083478/253568117-ca95cc85-9dea-4145-a5e0-032f1c1aa322.png"
name_png = "Paid.png"
refimg = Image.open(
BytesIO(get_pdf_from_url(url_png, name=name_png))
) # not a pdf but it works
data = reader.pages[0].images[0]
diff = ImageChops.difference(data.image, refimg)
d = sqrt(
sum([(a * a + b * b + c * c + d * d) for a, b, c, d in diff.getdata()])
) / (diff.size[0] * diff.size[1])
assert d < 0.01

0 comments on commit a2cd0d8

Please sign in to comment.