From 243fd9cc519f734c2e15389d27405d2944c3bc19 Mon Sep 17 00:00:00 2001 From: pubpub-zz <4083478+pubpub-zz@users.noreply.github.com> Date: Sat, 12 Aug 2023 12:27:17 +0200 Subject: [PATCH] ROB: Cope with xref not followed by separator (#2083) Closes #2082 --- pypdf/_reader.py | 4 ++-- tests/test_reader.py | 14 ++++++++++++++ 2 files changed, 16 insertions(+), 2 deletions(-) diff --git a/pypdf/_reader.py b/pypdf/_reader.py index 721bcdb7c..ac32f0ffa 100644 --- a/pypdf/_reader.py +++ b/pypdf/_reader.py @@ -1634,8 +1634,8 @@ def _find_startxref_pos(self, stream: StreamType) -> int: def _read_standard_xref_table(self, stream: StreamType) -> None: # standard cross-reference table - ref = stream.read(4) - if ref[:3] != b"ref": + ref = stream.read(3) + if ref != b"ref": raise PdfReadError("xref table read error") read_non_whitespace(stream) stream.seek(-1, 1) diff --git a/tests/test_reader.py b/tests/test_reader.py index 141d59aed..69ef80e94 100644 --- a/tests/test_reader.py +++ b/tests/test_reader.py @@ -1436,3 +1436,17 @@ def test_iss1825(): reader = PdfReader(BytesIO(get_data_from_url(url, name=name))) page = reader.pages[0] page.extract_text() + + +@pytest.mark.enable_socket() +def test_iss2082(): + url = "https://github.com/py-pdf/pypdf/files/12317939/test.pdf" + name = "iss2082.pdf" + b = get_data_from_url(url, name=name) + reader = PdfReader(BytesIO(b)) + reader.pages[0].extract_text() + + bb = bytearray(b) + bb[b.find(b"xref") + 2] = ord(b"E") + with pytest.raises(PdfReadError): + reader = PdfReader(BytesIO(bb))