SEC: Avoid endless recursion of reading damaged PDF file (#2093)

Fixes #140
py-pdf · Aug 18, 2023 · d224430 · d224430
1 parent 0ab320c
commit d224430
Show file tree

Hide file tree

Showing 2 changed files with 13 additions and 1 deletion.
diff --git a/pypdf/_reader.py b/pypdf/_reader.py
@@ -1230,7 +1230,10 @@ def _flatten(
  addt = {}
  if isinstance(page, IndirectObject):
  addt["indirect_reference"] = page
- self._flatten(page.get_object(), inherit, **addt)
+ obj = page.get_object()
+ if obj:
+ # damaged file may have invalid child in /Pages
+ self._flatten(obj, inherit, **addt)
  elif t == "/Page":
  for attr_in, value in list(inherit.items()):
  # if the page has it's own value, it does not inherit the

diff --git a/tests/test_reader.py b/tests/test_reader.py
@@ -1450,3 +1450,12 @@ def test_iss2082():
  bb[b.find(b"xref") + 2] = ord(b"E")
  with pytest.raises(PdfReadError):
  reader = PdfReader(BytesIO(bb))
+
+
+@pytest.mark.enable_socket()
+def test_issue_140():
+ url = "https://github.com/py-pdf/pypdf/files/12168578/bad_pdf_example.pdf"
+ name = "issue-140.pdf"
+ b = get_data_from_url(url, name=name)
+ reader = PdfReader(BytesIO(b))
+ assert (len(reader.pages) == 54)