Skip to content

Commit

Permalink
BUG: process Separation ColorSpace
Browse files Browse the repository at this point in the history
closes #1955
  • Loading branch information
pubpub-zz committed Jul 24, 2023
1 parent 74f8175 commit 80b5a0d
Show file tree
Hide file tree
Showing 2 changed files with 62 additions and 23 deletions.
64 changes: 41 additions & 23 deletions pypdf/filters.py
Original file line number Diff line number Diff line change
Expand Up @@ -650,8 +650,12 @@ def _get_imagemode(
color_space: Union[str, List[Any], Any],
color_components: int,
prev_mode: mode_str_type,
) -> mode_str_type:
"""Returns the image mode not taking into account mask(transparency)"""
) -> Tuple[mode_str_type, bool]:
"""
Returns
Image mode not taking into account mask(transparency)
ColorInversion is required (like for some DeviceCMYK)
"""
if isinstance(color_space, str):
pass
elif not isinstance(color_space, list):
Expand All @@ -668,12 +672,16 @@ def _get_imagemode(
color_space = color_space[1]
if isinstance(color_space, IndirectObject):
color_space = color_space.get_object()
mode2 = _get_imagemode(color_space, color_components, prev_mode)
mode2, invert_color = _get_imagemode(color_space, color_components, prev_mode)
if mode2 in ("RGB", "CMYK"):
mode2 = "P"
return mode2
return mode2, invert_color
elif color_space[0] == "/Separation":
color_space = color_space[2]
if isinstance(color_space, IndirectObject):
color_space = color_space.get_object()
mode2, invert_color = _get_imagemode(color_space, color_components, prev_mode)
return mode2, True
elif color_space[0] == "/DeviceN":
color_components = len(color_space[1])
color_space = color_space[2]
Expand All @@ -694,7 +702,7 @@ def _get_imagemode(
or list(mode_map.values())[color_components]
or prev_mode
) # type: ignore
return mode
return mode, mode == "CMYK"


def _xobj_to_image(x_object_obj: Dict[str, Any]) -> Tuple[Optional[str], bytes, Any]:
Expand Down Expand Up @@ -724,10 +732,10 @@ def _handle_flate(
mode: mode_str_type,
color_space: str,
colors: int,
) -> Tuple[Image.Image, str, str]:
) -> Tuple[Image.Image, str, str, bool]:
"""
Process image encoded in flateEncode
Returns img, image_format, extension
Returns img, image_format, extension, color inversion
"""

def bits2byte(data: bytes, size: Tuple[int, int], bits: int) -> bytes:
Expand Down Expand Up @@ -774,7 +782,7 @@ def bits2byte(data: bytes, size: Tuple[int, int], bits: int) -> bytes:
"P": (0, "", ""),
"RGB": (3, "P", "RGB"),
"CMYK": (4, "P", "CMYK"),
}[_get_imagemode(base, 0, "")]
}[_get_imagemode(base, 0, "")[0]]
except KeyError: # pragma: no cover
logger_warning(
f"Base {base} not coded please share the pdf file with pypdf dev team",
Expand All @@ -800,30 +808,30 @@ def bits2byte(data: bytes, size: Tuple[int, int], bits: int) -> bytes:
elif not isinstance(color_space, NullObject) and color_space[0] == "/ICCBased":
# see Table 66 - Additional Entries Specific to an ICC Profile
# Stream Dictionary
mode2 = _get_imagemode(color_space, colors, mode)
mode2 = _get_imagemode(color_space, colors, mode)[0]
if mode != mode2:
img = Image.frombytes(
mode2, size, data
) # reloaded as mode may have change
if mode == "CMYK":
extension = ".tif"
image_format = "TIFF"
return img, image_format, extension
return img, image_format, extension, False

def _handle_jpx(
size: Tuple[int, int],
data: bytes,
mode: mode_str_type,
color_space: str,
colors: int,
) -> Tuple[Image.Image, str, str]:
) -> Tuple[Image.Image, str, str, bool]:
"""
Process image encoded in flateEncode
Returns img, image_format, extension
Returns img, image_format, extension, inversion
"""
extension = ".jp2" # mime_type = "image/x-jp2"
img1 = Image.open(BytesIO(data), formats=("JPEG2000",))
mode = _get_imagemode(color_space, colors, mode)
mode, invert_color = _get_imagemode(color_space, colors, mode)
if img1.mode == "RGBA" and mode == "RGB":
mode = "RGBA"
# we need to convert to the good mode
Expand All @@ -840,7 +848,7 @@ def _handle_jpx(
if img.mode == "CMYK":
img = img.convert("RGB")
image_format = "JPEG2000"
return img, image_format, extension
return img, image_format, extension, invert_color

# for error reporting
if (
Expand All @@ -861,9 +869,11 @@ def _handle_jpx(
# https://pillow.readthedocs.io/en/stable/handbook/concepts.html#modes
mode: mode_str_type = "RGB"
if x_object_obj.get("/BitsPerComponent", 8) < 8:
mode = _get_imagemode(f"{x_object_obj.get('/BitsPerComponent', 8)}bit", 0, "")
mode, invert_color = _get_imagemode(
f"{x_object_obj.get('/BitsPerComponent', 8)}bit", 0, ""
)
else:
mode = _get_imagemode(
mode, invert_color = _get_imagemode(
color_space,
2
if (
Expand All @@ -881,7 +891,7 @@ def _handle_jpx(
filters = x_object_obj.get(SA.FILTER, [None])
lfilters = filters[-1] if isinstance(filters, list) else filters
if lfilters == FT.FLATE_DECODE:
img, image_format, extension = _handle_flate(
img, image_format, extension, invert_color = _handle_flate(
size,
data,
mode,
Expand All @@ -902,25 +912,33 @@ def _handle_jpx(
img = Image.open(BytesIO(data), formats=("TIFF", "PNG"))
elif lfilters == FT.DCT_DECODE:
img, image_format, extension = Image.open(BytesIO(data)), "JPEG", ".jpg"
# invert_color kept unchanged
elif lfilters == FT.JPX_DECODE:
img, image_format, extension = _handle_jpx(
img, image_format, extension, invert_color = _handle_jpx(
size, data, mode, color_space, colors
)
elif lfilters == FT.CCITT_FAX_DECODE:
img, image_format, extension = (
img, image_format, extension, invert_color = (

Check warning on line 921 in pypdf/filters.py

View check run for this annotation

Codecov / codecov/patch

pypdf/filters.py#L921

Added line #L921 was not covered by tests
Image.open(BytesIO(data), formats=("TIFF",)),
"TIFF",
".tiff",
False,
)
else:
img, image_format, extension, invert_color = (
Image.frombytes(mode, size, data),
"PNG",
".png",
False,
)
elif lfilters is None:
img, image_format, extension = Image.frombytes(mode, size, data), "PNG", ".png"

# CMYK image without decode requires reverting scale (cf p243,2§ last sentence)
# CMYK image and other colorspaces without decode
# requires reverting scale (cf p243,2§ last sentence)
decode = x_object_obj.get(
IA.DECODE,
([1.0, 0.0] * len(img.getbands()))
if (
(img.mode == "CMYK" or (mode == "CMYK" and img.mode == "L"))
(img.mode == "CMYK" or (invert_color and img.mode == "L"))
and lfilters in (FT.DCT_DECODE, FT.JPX_DECODE)
)
else None,
Expand Down
21 changes: 21 additions & 0 deletions tests/test_filters.py
Original file line number Diff line number Diff line change
Expand Up @@ -506,3 +506,24 @@ def test_gray_devicen_cmyk():
diff = ImageChops.difference(data.image, refimg)
d = sqrt(sum([(a * a) for a in diff.getdata()])) / (diff.size[0] * diff.size[1])
assert d < 0.001


@pytest.mark.enable_socket()
def test_gray_separation_cmyk():
"""
Cf #1955
Gray Image in Separation/RGB : requiring reverse
"""
url = "https://github.com/py-pdf/pypdf/files/12143372/tt.pdf"
name = "TestWithSeparationBlack.pdf"
reader = PdfReader(BytesIO(get_pdf_from_url(url, name=name)))
url_png = "https://user-images.githubusercontent.com/4083478/254545494-42df4949-1557-4f2d-acca-6be6e8de1122.png"
name_png = "velo.png" # reused
refimg = Image.open(
BytesIO(get_pdf_from_url(url_png, name=name_png))
) # not a pdf but it works
data = reader.pages[0].images[0]
assert data.image.mode == "L"
diff = ImageChops.difference(data.image, refimg)
d = sqrt(sum([(a * a) for a in diff.getdata()])) / (diff.size[0] * diff.size[1])
assert d < 0.001

0 comments on commit 80b5a0d

Please sign in to comment.