From 296dd3bad77bf28bbf464f2903dd84055b683b4c Mon Sep 17 00:00:00 2001 From: Matthias Valvekens Date: Wed, 27 Mar 2024 22:03:35 +0100 Subject: [PATCH] Improve error messages on malformed keys --- pyhanko/pdf_utils/generic.py | 10 ++++++++-- pyhanko_tests/test_utils.py | 17 +++++++++++++++++ 2 files changed, 25 insertions(+), 2 deletions(-) diff --git a/pyhanko/pdf_utils/generic.py b/pyhanko/pdf_utils/generic.py index fcf85af5..8831d89e 100644 --- a/pyhanko/pdf_utils/generic.py +++ b/pyhanko/pdf_utils/generic.py @@ -1259,7 +1259,7 @@ def read_from_stream( tmp = stream.read(2) if tmp != b"<<": raise PdfReadError( - "Dictionary read error at byte %s: " + "Dictionary read error at byte 0x%s: " "stream must begin with '<<'" % hex(stream.tell()) ) data = {} @@ -1270,7 +1270,13 @@ def read_from_stream( stream.read(1) break stream.seek(-1, os.SEEK_CUR) - key = read_object(stream, container_ref) + try: + key = NameObject.read_from_stream(stream) + except Exception as ex: + raise PdfReadError( + "Failed to read dictionary key at byte 0x%s; expected PDF name" + % hex(stream.tell()) + ) from ex read_non_whitespace(stream) stream.seek(-1, os.SEEK_CUR) value = read_object(stream, container_ref) diff --git a/pyhanko_tests/test_utils.py b/pyhanko_tests/test_utils.py index b87d7ecd..c6b165d5 100644 --- a/pyhanko_tests/test_utils.py +++ b/pyhanko_tests/test_utils.py @@ -20,6 +20,7 @@ from pyhanko.pdf_utils.incremental_writer import IncrementalPdfFileWriter from pyhanko.pdf_utils.layout import BoxConstraints, BoxSpecificationError from pyhanko.pdf_utils.metadata.model import DocumentMetadata +from pyhanko.pdf_utils.misc import PdfReadError from pyhanko.pdf_utils.reader import ( HistoricalResolver, PdfFileReader, @@ -1521,6 +1522,22 @@ def test_parse_comments(input_str): assert result['/C'] == '/D' +@pytest.mark.parametrize( + 'input_str', + [ + '<>', + '<>', + '<<(blah) 0>>', + ], +) +def test_parse_malformed_dictionary(input_str): + strm = BytesIO(input_str.strip().encode('utf8')) + with pytest.raises(PdfReadError, match="Failed to read dictionary key"): + generic.DictionaryObject.read_from_stream( + strm, container_ref=generic.Reference(1, 0, pdf=None) + ) + + NONEXISTENT_XREF_PATH = os.path.join( PDF_DATA_DIR, 'minimal-with-nonexistent-refs.pdf' )