Skip to content

Commit

Permalink
Deal with sigs in encrypted docs when copying
Browse files Browse the repository at this point in the history
See #412
  • Loading branch information
MatthiasValvekens committed Mar 27, 2024
1 parent 5928c5b commit a605c21
Show file tree
Hide file tree
Showing 3 changed files with 93 additions and 0 deletions.
42 changes: 42 additions & 0 deletions pyhanko/pdf_utils/writer.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
for the original license.
"""

import logging
import os
import typing
from typing import Dict, Iterable, List, Optional, Set, Tuple, Union, cast
Expand Down Expand Up @@ -53,6 +54,8 @@
'copy_into_new_writer',
]

logger = logging.getLogger(__name__)


# TODO move this to content.py?
def init_xobject_dictionary(
Expand Down Expand Up @@ -1240,6 +1243,44 @@ def process_reference(self, ref: generic.Reference) -> generic.PdfObject:
self.queued_references.append((ref, new_ido.reference))
return new_ido

def preprocess_signature_data(self):
# Signature /Contents is never encrypted => ensure we respect that
# (even though the import operation is guaranteed to break the signature
# there are valid use cases for stripping the encryption on such files,
# e.g. for downstream processing)
from ..sign.fields import enumerate_sig_fields

signature_dict_refs = [
field_value.reference
for fq_name, field_value, field_ref in enumerate_sig_fields(
self.source, filled_status=True
)
# this is the case in all valid PDFs
if isinstance(field_value, generic.IndirectObject)
]
if signature_dict_refs:
logger.warning(
"Source document contains filled signature fields--the copy "
"operation will invalidate them."
)
for ref in signature_dict_refs:
sig_dict = ref.get_object()
assert isinstance(sig_dict, generic.DictionaryObject)
raw_dict = {
k: self._ingest(v)
for k, v in sig_dict.items()
if k != '/Contents'
}
raw_dict['/Contents'] = generic.ByteStringObject(
sig_dict.raw_get(
'/Contents', decrypt=generic.EncryptedObjAccess.RAW
).original_bytes
)
self.reference_map[ref] = self.target.add_object(
generic.DictionaryObject(raw_dict),
obj_stream=None,
)


def copy_into_new_writer(
input_handler: PdfHandler, writer_kwargs: Optional[dict] = None
Expand Down Expand Up @@ -1290,6 +1331,7 @@ def copy_into_new_writer(
},
obj_stream=None,
)
importer.preprocess_signature_data()
new_root_dict = importer.import_object(input_handler.root)
# override the old root ref
ix = (output_root_ref.generation, output_root_ref.idnum)
Expand Down
Binary file not shown.
51 changes: 51 additions & 0 deletions pyhanko_tests/test_sign_encrypted.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,24 +5,28 @@

from pyhanko.pdf_utils.incremental_writer import IncrementalPdfFileWriter
from pyhanko.pdf_utils.reader import PdfFileReader
from pyhanko.pdf_utils.writer import copy_into_new_writer
from pyhanko.sign import signers
from pyhanko.sign.diff_analysis import ModificationLevel
from pyhanko.sign.signers.pdf_signer import (
DSSContentSettings,
SigDSSPlacementPreference,
)
from pyhanko.sign.validation import validate_pdf_signature
from pyhanko_tests.samples import (
MINIMAL_AES256,
MINIMAL_ONE_FIELD_AES256,
MINIMAL_ONE_FIELD_RC4,
MINIMAL_PUBKEY_ONE_FIELD_AES256,
MINIMAL_PUBKEY_ONE_FIELD_RC4,
MINIMAL_RC4,
PDF_DATA_DIR,
PUBKEY_SELFSIGNED_DECRYPTER,
)
from pyhanko_tests.signing_commons import (
DUMMY_HTTP_TS,
FROM_CA,
SIMPLE_V_CONTEXT,
live_testing_vc,
val_trusted,
)
Expand Down Expand Up @@ -175,3 +179,50 @@ def test_sign_encrypted_with_post_sign(requests_mock, password, file):
assert status.modification_level == ModificationLevel.LTA_UPDATES
assert len(r.embedded_regular_signatures) == 1
assert len(r.embedded_timestamp_signatures) == 1


def test_copy_encrypted_signed_file():
w = IncrementalPdfFileWriter(BytesIO(MINIMAL_ONE_FIELD_AES256))
w.encrypt("ownersecret")
out = signers.sign_pdf(
w,
signers.PdfSignatureMetadata(),
signer=FROM_CA,
existing_fields_only=True,
)

r = PdfFileReader(out)
r.decrypt("ownersecret")
w = copy_into_new_writer(r)
out2 = BytesIO()
w.write(out2)

r = PdfFileReader(out2)
assert not r.encrypted
s = r.embedded_signatures[0]
s.compute_integrity_info()
status = validate_pdf_signature(s, SIMPLE_V_CONTEXT(), skip_diff=True)
assert not status.intact


def test_copy_file_with_mdp_signature_and_backref():
# This file has /Data in a signature reference dictionary
# pointing back to the root (which is sometimes still seen in
# FieldMDP signatures generated by Acrobat, among others)

fname = f"{PDF_DATA_DIR}/signed-encrypted-pubkey-with-catalog-ref.pdf"
with open(fname, 'rb') as inf:

r = PdfFileReader(inf)
r.decrypt_pubkey(PUBKEY_SELFSIGNED_DECRYPTER)

w = copy_into_new_writer(r)
out2 = BytesIO()
w.write(out2)

r = PdfFileReader(out2)
assert not r.encrypted
s = r.embedded_signatures[0]
s.compute_integrity_info()
status = validate_pdf_signature(s, SIMPLE_V_CONTEXT(), skip_diff=True)
assert not status.intact

0 comments on commit a605c21

Please sign in to comment.