Skip to content

Commit

Permalink
SEC: Avoid endless recursion of reading damaged PDF file (#2093)
Browse files Browse the repository at this point in the history
Fixes #140
  • Loading branch information
exiledkingcc authored Aug 18, 2023
1 parent 0ab320c commit d224430
Show file tree
Hide file tree
Showing 2 changed files with 13 additions and 1 deletion.
5 changes: 4 additions & 1 deletion pypdf/_reader.py
Original file line number Diff line number Diff line change
Expand Up @@ -1230,7 +1230,10 @@ def _flatten(
addt = {}
if isinstance(page, IndirectObject):
addt["indirect_reference"] = page
self._flatten(page.get_object(), inherit, **addt)
obj = page.get_object()
if obj:
# damaged file may have invalid child in /Pages
self._flatten(obj, inherit, **addt)
elif t == "/Page":
for attr_in, value in list(inherit.items()):
# if the page has it's own value, it does not inherit the
Expand Down
9 changes: 9 additions & 0 deletions tests/test_reader.py
Original file line number Diff line number Diff line change
Expand Up @@ -1450,3 +1450,12 @@ def test_iss2082():
bb[b.find(b"xref") + 2] = ord(b"E")
with pytest.raises(PdfReadError):
reader = PdfReader(BytesIO(bb))


@pytest.mark.enable_socket()
def test_issue_140():
url = "https://github.com/py-pdf/pypdf/files/12168578/bad_pdf_example.pdf"
name = "issue-140.pdf"
b = get_data_from_url(url, name=name)
reader = PdfReader(BytesIO(b))
assert (len(reader.pages) == 54)

0 comments on commit d224430

Please sign in to comment.