Skip to content

Commit

Permalink
ROB: Cope with xref not followed by separator (#2083)
Browse files Browse the repository at this point in the history
Closes #2082
  • Loading branch information
pubpub-zz authored Aug 12, 2023
1 parent f70dfad commit 243fd9c
Show file tree
Hide file tree
Showing 2 changed files with 16 additions and 2 deletions.
4 changes: 2 additions & 2 deletions pypdf/_reader.py
Original file line number Diff line number Diff line change
Expand Up @@ -1634,8 +1634,8 @@ def _find_startxref_pos(self, stream: StreamType) -> int:

def _read_standard_xref_table(self, stream: StreamType) -> None:
# standard cross-reference table
ref = stream.read(4)
if ref[:3] != b"ref":
ref = stream.read(3)
if ref != b"ref":
raise PdfReadError("xref table read error")
read_non_whitespace(stream)
stream.seek(-1, 1)
Expand Down
14 changes: 14 additions & 0 deletions tests/test_reader.py
Original file line number Diff line number Diff line change
Expand Up @@ -1436,3 +1436,17 @@ def test_iss1825():
reader = PdfReader(BytesIO(get_data_from_url(url, name=name)))
page = reader.pages[0]
page.extract_text()


@pytest.mark.enable_socket()
def test_iss2082():
url = "https://github.com/py-pdf/pypdf/files/12317939/test.pdf"
name = "iss2082.pdf"
b = get_data_from_url(url, name=name)
reader = PdfReader(BytesIO(b))
reader.pages[0].extract_text()

bb = bytearray(b)
bb[b.find(b"xref") + 2] = ord(b"E")
with pytest.raises(PdfReadError):
reader = PdfReader(BytesIO(bb))

0 comments on commit 243fd9c

Please sign in to comment.