Skip to content

Commit

Permalink
BUG: Prevent stall when accessing image in corrupted pdf (#2081)
Browse files Browse the repository at this point in the history
Closes #2077
  • Loading branch information
pubpub-zz authored Aug 12, 2023
1 parent 243fd9c commit fb4f466
Show file tree
Hide file tree
Showing 2 changed files with 22 additions and 2 deletions.
14 changes: 12 additions & 2 deletions pypdf/_page.py
Original file line number Diff line number Diff line change
Expand Up @@ -477,8 +477,18 @@ def _old_images(self) -> List[File]: # deprecated
return images_extracted

def _get_ids_image(
self, obj: Optional[DictionaryObject] = None, ancest: Optional[List[str]] = None
self,
obj: Optional[DictionaryObject] = None,
ancest: Optional[List[str]] = None,
call_stack: Optional[List[Any]] = None,
) -> List[Union[str, List[str]]]:
if call_stack is None:
call_stack = []
_i = getattr(obj, "indirect_reference", None)
if _i in call_stack:
return []
else:
call_stack.append(_i)
if self.inline_images_keys is None:
nb_inlines = len(
re.findall(
Expand All @@ -502,7 +512,7 @@ def _get_ids_image(
if x_object[o][IA.SUBTYPE] == "/Image":
lst.append(o if len(ancest) == 0 else ancest + [o])
else: # is a form with possible images inside
lst.extend(self._get_ids_image(x_object[o], ancest + [o]))
lst.extend(self._get_ids_image(x_object[o], ancest + [o], call_stack))
return lst + self.inline_images_keys

def _get_image(
Expand Down
10 changes: 10 additions & 0 deletions tests/test_images.py
Original file line number Diff line number Diff line change
Expand Up @@ -209,3 +209,13 @@ def test_image_extraction(src, page_index, image_key, expected):
with open(f"page-{page_index}-{actual_image.name}", "wb") as fp:
fp.write(actual_image.data)
assert image_similarity(BytesIO(actual_image.data), expected) >= 0.99


@pytest.mark.enable_socket()
@pytest.mark.timeout(30)
def test_loop_in_image_keys():
"""Cf #2077"""
url = "https://github.com/py-pdf/pypdf/files/12309492/example_134.pdf"
name = "iss2077.pdf"
reader = PdfReader(BytesIO(get_data_from_url(url, name=name)))
reader.pages[0].images.keys()

0 comments on commit fb4f466

Please sign in to comment.