From 829913152eb8a76e832de272ee302971052bd0fe Mon Sep 17 00:00:00 2001 From: Stefan Bourlon Date: Mon, 5 Feb 2024 12:37:59 -0800 Subject: [PATCH] BUG: encode_pdfdocencoding() always returns bytes In the function encode_pdfdocencoding, cast its return value from bytearray to bytes to match its function signature. This casting is necessary because bytearray is duck type compatible with bytes in mypy, however this library expects only bytes in its Encryption class. --- pypdf/generic/_base.py | 2 +- tests/test_generic.py | 11 +++++++++++ 2 files changed, 12 insertions(+), 1 deletion(-) diff --git a/pypdf/generic/_base.py b/pypdf/generic/_base.py index 5a2757295..813b1df04 100644 --- a/pypdf/generic/_base.py +++ b/pypdf/generic/_base.py @@ -650,4 +650,4 @@ def encode_pdfdocencoding(unicode_string: str) -> bytes: raise UnicodeEncodeError( "pdfdocencoding", c, -1, -1, "does not exist in translation table" ) - return retval + return bytes(retval) diff --git a/tests/test_generic.py b/tests/test_generic.py index 0e0fff677..2a9c5ec00 100644 --- a/tests/test_generic.py +++ b/tests/test_generic.py @@ -1,4 +1,5 @@ """Test the pypdf.generic module.""" + from io import BytesIO from pathlib import Path from unittest.mock import patch @@ -273,6 +274,16 @@ def test_encode_pdfdocencoding_keyerror(): assert exc.value.args[0] == "pdfdocencoding" +@pytest.mark.parametrize("test_input", ["", "data"]) +def test_encode_pdfdocencoding_returns_bytes(test_input): + """ + Test that encode_pdfdocencoding() always returns bytes because bytearray + is duck type compatible with bytes in mypy + """ + out = encode_pdfdocencoding(test_input) + assert isinstance(out, bytes) + + def test_read_object_comment_exception(): stream = BytesIO(b"% foobar") pdf = None