From 0d98323e40bd468df10e198a650321fb7a409689 Mon Sep 17 00:00:00 2001 From: Matthias Valvekens Date: Tue, 26 Mar 2024 21:11:04 +0100 Subject: [PATCH 1/3] Make object importer more resilient - Correctly copy references to the root object - Deal with deep object graphs in a more stack-safe manner (we now only recurse within top-level objects, those usually are not that deep) See #412 --- pyhanko/pdf_utils/writer.py | 207 +++++++++++++++++++++--------------- pyhanko_tests/test_utils.py | 39 +++++++ 2 files changed, 158 insertions(+), 88 deletions(-) diff --git a/pyhanko/pdf_utils/writer.py b/pyhanko/pdf_utils/writer.py index 924be241..3e4a014b 100644 --- a/pyhanko/pdf_utils/writer.py +++ b/pyhanko/pdf_utils/writer.py @@ -755,88 +755,13 @@ def import_object( a new instance. """ - return self._import_object(obj, {}, obj_stream) - - def _import_object( - self, obj: generic.PdfObject, reference_map: dict, obj_stream - ) -> generic.PdfObject: - # TODO check the spec for guidance on fonts. Do font identifiers have - # to be globally unique? - - # TODO deal with container_ref - - if isinstance(obj, generic.DecryptedObjectProxy): - obj = obj.decrypted - if isinstance(obj, generic.IndirectObject): - try: - return reference_map[obj.reference] - except KeyError: - refd = obj.get_object() - # Add a placeholder to reserve the reference value. - # This ensures correct behaviour in recursive calls - # with self-references. - new_ido = self.allocate_placeholder() - reference_map[obj.reference] = new_ido - imported = self._import_object(refd, reference_map, obj_stream) - - # if the imported object is a bare reference and/or a stream - # object, we can't put it into an object stream. - if isinstance(imported, OBJSTREAM_FORBIDDEN): - obj_stream = None - - # fill in the placeholder - self.add_object( - imported, obj_stream=obj_stream, idnum=new_ido.idnum - ) - return new_ido - elif isinstance(obj, generic.DictionaryObject): - raw_dict = { - k: self._import_object(v, reference_map, obj_stream) - for k, v in obj.items() - if k != '/Metadata' - } - try: - # make sure to import metadata streams as such - meta_ref = obj.get_value_as_reference('/Metadata') - # ensure a MetadataStream object ends up in the cache - meta_ref.get_pdf_handler().get_object( - meta_ref, as_metadata_stream=True - ) - # ...then import the reference - raw_dict['/Metadata'] = self._import_object( - generic.IndirectObject( - meta_ref.idnum, meta_ref.generation, meta_ref.pdf - ), - reference_map, - obj_stream, - ) - except (KeyError, IndirectObjectExpected): - pass - - if isinstance(obj, generic.StreamObject): - stm_cls = generic.StreamObject - # again, make sure to import metadata streams as such - try: - # noinspection PyUnresolvedReferences - from pyhanko.pdf_utils.metadata import xmp_xml - - if isinstance(obj, xmp_xml.MetadataStream): - stm_cls = xmp_xml.MetadataStream - except ImportError: # pragma: nocover - pass - # In the vast majority of use cases, I'd expect the content - # to be available in encoded form by default. - # By initialising the stream object in this way, we avoid - # a potentially costly decoding operation. - return stm_cls(raw_dict, encoded_data=obj.encoded_data) - else: - return generic.DictionaryObject(raw_dict) - elif isinstance(obj, generic.ArrayObject): - return generic.ArrayObject( - self._import_object(v, reference_map, obj_stream) for v in obj - ) - else: - return obj + importer = _ObjectImporter( + source=obj.get_container_ref().get_pdf_handler(), + target=self, + obj_stream=obj_stream, + reference_map={}, + ) + return importer.import_object(obj) def import_page_as_xobject( self, other: PdfHandler, page_ix=0, inherit_filters=True @@ -1222,6 +1147,100 @@ def _populate_trailer(self, trailer): super()._populate_trailer(trailer) +class _ObjectImporter: + + def __init__( + self, + source: PdfHandler, + target: BasePdfFileWriter, + reference_map: Dict[generic.Reference, generic.IndirectObject], + obj_stream: Optional[ObjectStream], + ): + self.source = source + self.target = target + self.obj_stream = obj_stream + self.queued_references: List[ + Tuple[generic.Reference, generic.Reference] + ] = [] + self.reference_map = reference_map + + def import_object(self, obj: generic.PdfObject) -> generic.PdfObject: + result = self._ingest(obj) + + while self.queued_references: + source_ref, target_ref = self.queued_references.pop() + source_obj = source_ref.get_object() + imported = self._ingest(source_obj) + + # if the imported object is a bare reference and/or a stream + # object, we can't put it into an object stream. + if isinstance(imported, OBJSTREAM_FORBIDDEN): + obj_stream = None + else: + obj_stream = self.obj_stream + + # fill in the placeholder + self.target.add_object( + imported, obj_stream=obj_stream, idnum=target_ref.idnum + ) + + return result + + def _ingest(self, obj: generic.PdfObject): + if isinstance(obj, generic.DecryptedObjectProxy): + obj = obj.decrypted + if isinstance(obj, generic.IndirectObject): + return self.process_reference(obj.reference) + elif isinstance(obj, generic.DictionaryObject): + raw_dict = { + k: self._ingest(v) for k, v in obj.items() if k != '/Metadata' + } + try: + # make sure to import metadata streams as such + meta_ref = obj.get_value_as_reference('/Metadata') + # ensure a MetadataStream object ends up in the cache + meta_ref.get_pdf_handler().get_object( + meta_ref, as_metadata_stream=True + ) + # ...then import the reference + raw_dict['/Metadata'] = self.process_reference(meta_ref) + except (KeyError, IndirectObjectExpected): + pass + + if isinstance(obj, generic.StreamObject): + stm_cls = generic.StreamObject + # again, make sure to import metadata streams as such + try: + # noinspection PyUnresolvedReferences + from pyhanko.pdf_utils.metadata import xmp_xml + + if isinstance(obj, xmp_xml.MetadataStream): + stm_cls = xmp_xml.MetadataStream + except ImportError: # pragma: nocover + pass + # In the vast majority of use cases, I'd expect the content + # to be available in encoded form by default. + # By initialising the stream object in this way, we avoid + # a potentially costly decoding operation. + return stm_cls(raw_dict, encoded_data=obj.encoded_data) + else: + return generic.DictionaryObject(raw_dict) + elif isinstance(obj, generic.ArrayObject): + return generic.ArrayObject(self._ingest(v) for v in obj) + else: + return obj + + def process_reference(self, ref: generic.Reference) -> generic.PdfObject: + try: + return self.reference_map[ref] + except KeyError: + # Add a placeholder to reserve the reference value. + new_ido = self.target.allocate_placeholder() + self.reference_map[ref] = new_ido + self.queued_references.append((ref, new_ido.reference)) + return new_ido + + def copy_into_new_writer( input_handler: PdfHandler, writer_kwargs: Optional[dict] = None ) -> PdfFileWriter: @@ -1254,16 +1273,24 @@ def copy_into_new_writer( w = PdfFileWriter(init_page_tree=False, **writer_kwargs) input_root_ref = input_handler.root_ref output_root_ref = w.root_ref - # call _import_object in such a way that we translate the input handler's + # call _ObjectImporter in such a way that we translate the input handler's # root to the new writer's root. # From a technical PoV this doesn't matter, but it makes the output file # somewhat "cleaner" (i.e. it doesn't leave an orphaned document catalog # cluttering up the file) - new_root_dict = w._import_object( - input_handler.root, - reference_map={input_root_ref: output_root_ref}, + importer = _ObjectImporter( + source=input_handler, + target=w, + reference_map={ + input_root_ref: generic.IndirectObject( + idnum=output_root_ref.idnum, + generation=output_root_ref.generation, + pdf=w, + ) + }, obj_stream=None, ) + new_root_dict = importer.import_object(input_handler.root) # override the old root ref ix = (output_root_ref.generation, output_root_ref.idnum) w.objects[ix] = new_root_dict @@ -1278,9 +1305,13 @@ def copy_into_new_writer( except KeyError: info_dict = None if info_dict is not None: - imported_info = w._import_object( - info_dict, reference_map={}, obj_stream=None + importer = _ObjectImporter( + source=input_handler, + target=w, + reference_map={}, + obj_stream=None, ) + imported_info = importer.import_object(info_dict) w._info = w.add_object(imported_info) return w diff --git a/pyhanko_tests/test_utils.py b/pyhanko_tests/test_utils.py index 1d0a8897..b87d7ecd 100644 --- a/pyhanko_tests/test_utils.py +++ b/pyhanko_tests/test_utils.py @@ -2108,3 +2108,42 @@ def test_merge_resource_conflict(): } ), ) + + +def test_copy_deep_object_graph(): + f = BytesIO(MINIMAL) + w = IncrementalPdfFileWriter(f) + cur_obj = w.root['/Blah'] = generic.DictionaryObject() + w.update_root() + for i in range(4000): + next_obj = generic.DictionaryObject() + cur_obj[f'/Blah_{i}'] = generic.ArrayObject([w.add_object(next_obj)]) + cur_obj = next_obj + w.write_in_place() + + r = PdfFileReader(f) + new_w = writer.copy_into_new_writer(r) + out = BytesIO() + new_w.write(out) + + new_r = PdfFileReader(out) + assert len(new_r.root['/Blah']['/Blah_0'][0]['/Blah_1'][0]['/Blah_2']) == 1 + + +def test_copy_root_reference(): + f = BytesIO(MINIMAL) + w = IncrementalPdfFileWriter(f) + arr = generic.ArrayObject( + [generic.IndirectObject(w.root_ref.idnum, w.root_ref.generation, w)] + ) + w.root['/Blah'] = w.add_object(arr) + w.update_root() + w.write_in_place() + + r = PdfFileReader(f) + new_w = writer.copy_into_new_writer(r) + out = BytesIO() + new_w.write(out) + + new_r = PdfFileReader(out) + assert new_r.root['/Blah'].raw_get(0).idnum == w.root_ref.idnum From 5928c5b6a21d94ee939206fdb4e62a08afc9f110 Mon Sep 17 00:00:00 2001 From: Matthias Valvekens Date: Tue, 26 Mar 2024 21:46:56 +0100 Subject: [PATCH 2/3] Tolerate unpadded empty plaintext See #412 --- pyhanko/pdf_utils/crypt/_util.py | 3 ++- .../minimal-aes256-empty-encrypted-string.pdf | Bin 0 -> 1966 bytes pyhanko_tests/test_crypt.py | 16 ++++++++++++++++ 3 files changed, 18 insertions(+), 1 deletion(-) create mode 100644 pyhanko_tests/data/pdf/minimal-aes256-empty-encrypted-string.pdf diff --git a/pyhanko/pdf_utils/crypt/_util.py b/pyhanko/pdf_utils/crypt/_util.py index 01e7cf5f..59f955ef 100644 --- a/pyhanko/pdf_utils/crypt/_util.py +++ b/pyhanko/pdf_utils/crypt/_util.py @@ -15,7 +15,8 @@ def aes_cbc_decrypt(key, data, iv, use_padding=True): decryptor = cipher.decryptor() plaintext = decryptor.update(data) + decryptor.finalize() - if use_padding: + # we tolerate empty messages that don't have padding + if use_padding and len(plaintext) > 0: unpadder = padding.PKCS7(128).unpadder() return unpadder.update(plaintext) + unpadder.finalize() else: diff --git a/pyhanko_tests/data/pdf/minimal-aes256-empty-encrypted-string.pdf b/pyhanko_tests/data/pdf/minimal-aes256-empty-encrypted-string.pdf new file mode 100644 index 0000000000000000000000000000000000000000..e5bfe2ba6dd31a3008ada143eb11ffd090fa34c7 GIT binary patch literal 1966 zcmah~eNa_J6fbhBxmb~!A!Td++Z3s27Pm-dN$FAVwvld-nq<-g|nvDCt;9 zDpr^#nq)d;6NVM`A(c%D4P+&gIb)^OV5Fj!sg2oajzhKgDM)du@W*NULlo0ehdIqh>*N;(P;3eON|C^^SCr};c>aVEMD$W`Pu-6NL*?X8<_Yx z4@)3bw7hcHPoNYCWe5{U8Bg5Trj|r`Sz2y9N@9Wk^IsWj%$JVJR1`$tC$Qk$6IeEn z75T-g4FY|c?{N}UqWNVfb8b46H2v`(^bZ!DUUBxT+D^P+-74=#?dK)JJCVF|=j*S` zD?}UWwEL6hoZA2C#)UU{&pT%aqD^goRZW=I#*rBnwF!8RwB;Rpt8 zhZulboLiU^p_`Ve3#xPKxHe!m*Rc$QLLCZQ7Y30=8Gx2!G2NiHp(73);p$L&nXINE zMZ18qj!i|Irea0QRpl-#M!m;o`Mp)?d*xiF*^fp7G+1UNICvKinUO?IBvgrAM!8>{ zH76)0EgYvgvv^EaXk?PAWn|?|GkV<`dQVB@WT_zoQx`eRv;cFMMGaTjP%vF!-9`kt zE~AESU?BiT5K(L*q+mH>iS(4q*MP#|0GnZ;zPkRyl$NG69F zoI8j!>`*3?p(BB?R5&!ihI>(2_A&<&bLdbPI5uZ8EYox>j<5-#BMh0Vi#-)IZcpi+ z?~NYx`t(P^r0m|eBggDh+0_}({&e8xcl?3YTLpB@J$o~UzwmswU(vj9ZpXBm+b3?& zw6F48x<0z`!EgKL&urfx-d(sQ`IqkMhG^|Dlyyt@fgM*Sr46&+9-E(C^YWfMCyc*& z{AYt6+WKByefapTop-ce-_ky}Ep&Cp`uh3N>9ZHDJXmwnupMf}yrM2+=al1xN3Y#W z$HXc-8!oH-q7%L#jt&_zbl$c%pBUJ_<-~W>yEcwnedLQJm*+Qk55VVEh4V(t!UL~+ zwdL%p)`s?z5Sj5_t)1GwYGs>TOC zA9~;BZF$Cw;ium`y}f^)^ZT(e{M_};Q@(8T!dDIGZmtP8EuL6?_s6XrBeT!oQML28 z7Hsdg%E-Or*pb1{%*`4#;?d5lp8D%Ooy{x|Mn=hXTS{+xdPZ&a*DYw!+FkWEAI%?` z^7;PFxo?ne+%y?8a`M&AT4MP_;tw|ZNGGa%9!8PtR3^_wHU$nUbi>_-a3t;jM~3xFWDq!#|9kHF9w{m1USbgD#^Z9pBpADgAk=^^ zET>I~>5lCZKy;fpK++p>F|mQ{L14JHAw;?wl3WO|r3=6~aU?-u+2hEO-6RQ1 gb|Fm Date: Wed, 27 Mar 2024 20:53:48 +0100 Subject: [PATCH 3/3] Deal with sigs in encrypted docs when copying See #412 --- pyhanko/pdf_utils/writer.py | 42 +++++++++++++++ ...gned-encrypted-pubkey-with-catalog-ref.pdf | Bin 0 -> 17450 bytes pyhanko_tests/test_sign_encrypted.py | 51 ++++++++++++++++++ 3 files changed, 93 insertions(+) create mode 100644 pyhanko_tests/data/pdf/signed-encrypted-pubkey-with-catalog-ref.pdf diff --git a/pyhanko/pdf_utils/writer.py b/pyhanko/pdf_utils/writer.py index 3e4a014b..96061b50 100644 --- a/pyhanko/pdf_utils/writer.py +++ b/pyhanko/pdf_utils/writer.py @@ -4,6 +4,7 @@ for the original license. """ +import logging import os import typing from typing import Dict, Iterable, List, Optional, Set, Tuple, Union, cast @@ -53,6 +54,8 @@ 'copy_into_new_writer', ] +logger = logging.getLogger(__name__) + # TODO move this to content.py? def init_xobject_dictionary( @@ -1240,6 +1243,44 @@ def process_reference(self, ref: generic.Reference) -> generic.PdfObject: self.queued_references.append((ref, new_ido.reference)) return new_ido + def preprocess_signature_data(self): + # Signature /Contents is never encrypted => ensure we respect that + # (even though the import operation is guaranteed to break the signature + # there are valid use cases for stripping the encryption on such files, + # e.g. for downstream processing) + from ..sign.fields import enumerate_sig_fields + + signature_dict_refs = [ + field_value.reference + for fq_name, field_value, field_ref in enumerate_sig_fields( + self.source, filled_status=True + ) + # this is the case in all valid PDFs + if isinstance(field_value, generic.IndirectObject) + ] + if signature_dict_refs: + logger.warning( + "Source document contains filled signature fields--the copy " + "operation will invalidate them." + ) + for ref in signature_dict_refs: + sig_dict = ref.get_object() + assert isinstance(sig_dict, generic.DictionaryObject) + raw_dict = { + k: self._ingest(v) + for k, v in sig_dict.items() + if k != '/Contents' + } + raw_dict['/Contents'] = generic.ByteStringObject( + sig_dict.raw_get( + '/Contents', decrypt=generic.EncryptedObjAccess.RAW + ).original_bytes + ) + self.reference_map[ref] = self.target.add_object( + generic.DictionaryObject(raw_dict), + obj_stream=None, + ) + def copy_into_new_writer( input_handler: PdfHandler, writer_kwargs: Optional[dict] = None @@ -1290,6 +1331,7 @@ def copy_into_new_writer( }, obj_stream=None, ) + importer.preprocess_signature_data() new_root_dict = importer.import_object(input_handler.root) # override the old root ref ix = (output_root_ref.generation, output_root_ref.idnum) diff --git a/pyhanko_tests/data/pdf/signed-encrypted-pubkey-with-catalog-ref.pdf b/pyhanko_tests/data/pdf/signed-encrypted-pubkey-with-catalog-ref.pdf new file mode 100644 index 0000000000000000000000000000000000000000..db1883957e79ac7533bfcd8fdf8230b1f35dca8f GIT binary patch literal 17450 zcmeI437BR@eeN;Jk_jj%i$TE;70?jqQ`@O3nLFmJtOGLx$})hQb!Zr-x!pZ1f&mc( zl|_swvWO7Tpko9L1YBXjAaDgyVo(;Ns6pJWffz6@`PKJz53}LTz4|;p-pn-gx14k8 zRQ>C}yzg75dgldsxx3iI&e?h8H7l>b?W1$VAR0{6$Ih8Mch1RloON35A5*sym@oFWo@`nuGYYU$%%G((@hTM>dxsXvXSt% zZ==SVTKcBzs#^9@>s05glX0z@&UZR)`^I&~HoDQW_Pv#+o;T;2JA@vr*~MYc=8Ww( z-cAh`4|M3!;K(^+*~Ic?(*rSQY<|~{SLwvb9E|u;Q8W-*bILP~hKts1Gz@yOqmh|W zFY2ZymQOaAA=sy!;K=Ze7`}|`6~3J_cF^*AdT74zC0$6Xscv}w*umq+cT>ABnmE2% z7Vba#kG?LO=0;Nk6MBz*mo1y%)8auKKCjW$aK>y`_wSZ1nLcLVqR`IN^ki2ZKj)2$ z@(W+N;LWewGe3COv%j(YzU{yC>G7{^bK@;nUB2-(o8R=sKl}PUkM7o7|Ld0=_4v)V zuB@VuJihfeA2~hKpFjWJ&pqu)~uiHicV;s;gGEm9*YRB9_j?x{11|GR|rlHJz>_V_M(zRn)Xnw8pBs zwQ(Iak%{Nc`rL5zs~Pc-@pegvRm*{9N=7q0XKYc|Ok*LzY(@j=qk)aqFjR2ORoxu# zmJYpR)}Eo$<i9=)0yxHT?WI*sp+CHQr>$AabcL0vQKQ#n35P_=x2!g0-%-DApx ze(g^0UO(XN%(O@o&lxM0HIt{DFg?GUu3AEAFg>}vqnYXB%fVRQ_0{sF(^)uS)M_{~ z`7B4Wa_DC3UNrnVSw4MCabj@Rv3#OA@Puv|5m3&j|HVPCS8MhO%Z;BfKJ>T6gSjel zGE&k;apYy?V(nbq+o+8M5v!`Q!gpUy(MNGt}_&fNK#YcPPZZaBV(ks8^>D(>SJ%lRm3YF!KCqefJbZhAE{+O=C3*xp1@ zq=WUDg<2^Sa|N>_+A4}`6Uk1>HWp3eT2Z;aZ7VIEZn2WCTqR7`sY-g)naG=_ch2=) zq^rKAH<*#xu8eN5d==)Zu~s#4=Tub@xVBRSc+^T?RYKE(Y1@`GURS2>ySC*j>s4fJ zZ=`cZREFMG{MN?wQrTWsRl|MzRvXvKCd@#kTj7;zg>}N#VJf|88C=gG+cs{S*fEr@ zH%?FvNZWneXw|A#H>Q$ZtFTrdGXmo} zHkRDj_YHS-){90+r(BE2GqJ7pO=QVZjcB^KtD`FJI=<>e*ELuuBHh$YqZ;2U=Y{P> zg{?#vnO-!lkGV_Z8{JXaI$5>Mtv7ufM_o-!&|>eIu2!hJQo`3&-B!-|sEcjaGAEi# z=(dxRFk>=A&p-*$N>qK_O6{w2M-i#Qo~u7s-%Qmv}G*2;Aeu2^?XCsbVds*c;5E_99Q@lN_kg&`SO-#Q%EQfcTK*7v&Z zOdo1b6=5hzYBSojS^+>T=Ed4R1cs}Lmnf=a&rjRe9QRr`ir-ya$9&sZ*NKVxf{aYh>^m)R z^C}`VD$!#q*D%#K>Koa_O^3@>BK9WuO5L;}2lOGoQeD^0LEx;A55kc+3i+H;Z18Ix z2}!abt!tW!RNOG?zGV_y0#r4c02RIwTp**i^8~+>br;FFY4F9W?_JZkHCYEsH7c%M z+~N^Z;Bmfj%xS9#JES~^)S-*Ip2|N{ zb1B)=UEVblEk$(E#02<`s{u^=EbAw@Z1{fWuCy~cl+)_mzOIc^z>S)$;_9BnLr{T0 zgc5aJnR$b`t?%%Gs>jzo4nd}=ZA0E_JL@HxVmPFO#^2ZhIKmyo$#T@Gj(6v z)!^94mp4bBaptdo{ij#`d9ukSFIJn~`p_ROk&`dKWV404sOLSg$K`ka`9@17Z;;VO z8y&TBVDZdfK z3jsE-0CrUzvSizkN0cbX!D!mJGqUw<6&qjorcJT@*Q2#khKm zlS@k>kcv8hm)nrUK@Sy4$lA^j;31$Z-;p4is$ZQ@JR6*)T!4Z|$fBx9x57}B8w z8+kX?Opc#0Jux}rD+g4^2Tliz8SlbZBLoa&4JW4~jV&L19NY)CINl`7mMrZC1j#|u-SI%;K*GkSrpA{o0m?&I z;)r9(iK~B_U!DBSF$Ot1uGI(JlocmW?{(1h(1*w;Y1JIJWD*|O z?sd@6j9D^>_5hVP)$n!X^}mtHJ;U5qS#51{N4}Y88QWm&knv^7vZ?XeFGAj$oSM## zsU`McKf(+=YH&z|%4t<;OZ@}YANd%;_7?H0c7;XnP^o>zS6y|4Pjn-{+F zAI`k3I(+{5SMGP5nqKgkD|YTKc3bbg=^h(haL@A}J8S+wAG^ipKE1;k*Z%a2+n@H_ zC;!*7W50Ufqxl*0zVOo94%=tz$M-zxzxu_BYcEUQz1>e1-}KUXmu|b&f-82sX<>EfiWjKOcKzhu z|MtW?v)62TM|1Y0PuzOa;salJ<@?>`3-&ziO$R=8<(oG>;8XYg&%fB@X9t{Uk398` z{eHS|(?chIbjV5n5}kO)PM?2tt8F&^)pc{Od*J%#J-o%nKfnL}&7Zeo#aaFKr+#oM z&i6g}?8m)rw30YU$XA-gxEf#kHSY`nQjLZol)sdaeD! z(+kVT{8{r)ed5(`+Vd5g-}|m>u3t27>GtBx4=VfM#s}}cx$0{oz~Q_Gh0tTYmrLAN$7q!#?th5B}W`_W#adyF78(ilZm?ym_zY`;Wfl74|Kk zc+tB~d*m07JaXQ?ciwyP)tjGr#)WUZ^zzScee6Sj{h4!aKkeqJrRV?BoxbJY?D&Nr zym!&7e|XFL&VF#)AAa)UpFMs5u4>**Q{!7+w9CD>KD5st+qeJv_Pd_@ljDx~_~Ng` z>GiKW`MW3Ic-QxCn0W1z_kI1Xi|hEn=2MR!DQ-WlzvhNZ7eD;I>-K&8<3IoMUq1hm zcRlBrxBkVFjn3G3m&gBPpHthz&%N`ctG~F}rH6myo+riguY2MDxc{6>p1b0k@A&q1 z-#>ZF7kuyTsZ;tNPF?irFoPuvw~3Vr!* z-}?BscU!pQTj$I__Nj9EpU(T*VXxZeYi~%tefd^LyfgjOdlvrt)GP12;JM597~5{; zWyfFh;w!#;&};A9eBaOi~{X)Z~FPay+gNho57=3UU>1; z)nB`7uamai`G4Pg&&Ay>+a7z%`D5#Xg<)No6+KpoN+XJ20?YzfOhy#Hv{62!_%aC; zzofp6Ud52i1B;>(8k(;MMg|E02+GlPy)BAii<#j1AT4CsNG3>i5-ti?2l^^Ok)e&@ z9oFiVpylBm0z8XS&RV~a01X#`hL0qMB+8;xk7#y|o{>SbBkkiNR)`J>=ekwrrcoxu zDrz6jv79U55IS~o8K}P1&{-iuAB$DjDehs7j7!deR=~u9t`P~WsW_7#J`evNed4!C zxNoVzCeMBbXZ=Bpl=&2JfoGewkvsuyV2;Z$yTf-$B!{m|cpllZEaK9aagv%i(+SFi z)ma{7kR;Y3bG|U>02!RT%tdbUqVy(Hagod16$xL1@{k&fLUI*T;PMpj(Yi!M)=rm7 z70M-L=|n0ZcRqZRWO!mW&BBIidYoFyWE!roX%v0_Nfu2hMJpLl|rTxZ9*ixpr6o= z$+U?Js|^hsW-E$wlL;TwMeMIMqqM?jIeJT)GAAf3S(2DMk)|k}DtJj{p(7@xkl{_~ zycIIZV^63Mh6Fz0=(r^V6G@P-LKYy3dBy$$MMvmTq?iDvN(Tw+Qh6)OJdIqLNmC}J zNqu6NDXuPOuf{cX5Gx_jnDD>e0^T}v|G=GTu!Jtt^02pjdL{?Z!*bObn&bGoi@EKL zpegVa(n3M3llN4TM0g`)n{&u4qk%f(4FPN(@dYAXg%Ql3Paa5gbEi z%8}=UED=;Bkr9s1W}uQRCuYx27RhT#-i-eeWo}7x)>k=d9KzN-=m*;qT7U9=KBmN(3Q!%5zG)-yrGm1w$oPNVhgsnZuAKPAqqb7`O7mWm1$L;f$!ouQH($ z>J?5MIuOM!wHkXU&bBzBC2kTYqAV>klSrk4Jrk4pl=$NQvQ#M}LWD$6lO#LmGwM^K z6v-e=Qy!Bw>UAax zpC?(F7tRS&Qc!XV2{kK-&D403GyY{$ijgI06_zZWA=`p>GU6!_d0D1;LO2#GmKg;; z1~%b>oNcq5tf6F)a|@p!t7ZuNBzvMJa;efq&Z0v~qQ~bmWs{6_pNll3b&=#26(-}m z9B+3K<+qHzFTj}A0}YJKkz|QvN>hLaQJJJVrLa-K(#-Qp@r?P#Q{xDF`fqrLv8c0I z2~ak+ND9DZ3_#9PAEiYZk<-#5MoXdqE7qkL1MN_qSi_M@+l;Ra1&Rnv$yl}&&ShL? z<0wh;RL8k3sgr2+c^d1&lL93M&PmBmrxamjGMk41fEcL}AfLPhe7KC9MM)zenTWtI zbp~_-FyIXUik#e90DW1V8`gUktypSLOBP_P)`B9Pi;NCwM^2R_#$X59&7@6S7Lj*} z{Xz=KjRE!2sFbf#I=NQ8rmBsq_WJ5|QKqF$fJ7AkL{s;H1i8$L!s3%I1LJ_(0v!7t z)azN)PqlrfUK1bKV^qCEg(H?{k^BEoRokj~9S(sSuQDp4tEnn>+<- zdXbuh0vq!wr(}BLGYe`)8ZCf?MV7^Bl7Lja=Cqv3<0-cZ;e>;mT%h}ABm|QIz=^&% zA~}?Wa0PXpqHG*UpT%MTDA8V8pbj@Dx>>IvI1_x7p74*d8zC zWQ{^l->nSDy+{z90UR8nl2aiwRyUjnZWY2N%0u}8W@(;5D4b1EiPH=oX)VA?GF}BH zvVvr#zzoh)A;Bt^l%{Q7LQ=EF1K>{u(`VG#p=V1-HLYVLCdpxMbHw6O%OZ;lJZAVBbkMq zzP3s;daNKk9x-$7s>catbqxCDw&R|-K!fE|m771=PXB-bTcOJb-7fKr~0;AG7WhMZnep?Q`g zhU@7VP@ST;Bo<)60;RB=AK-IVLE}j>ol1$4K(Qnhad$Wyq&E{({e)$5DHGPY zlDI@bi)q4VDZHPgmT5*unGVwigb#p)nwQB0H7z`*b&VWfulR7f7 zB7L#!P1rL=7(Ydtl5Wu^09uTgWD2DK6$!|+5&#LsE9fN8fCwI%NcVV%p@4*ut-yq&!AkOUkud=9>@BP zobnlUf>MlfPylSHecq%p=W!UgD@q^_??|*P6x_@r2B5luSS^x|Ck9j8-J(T7l0(py zvCaw~p7sGOSj|Kc{7Fh=5hBC_v4@ZVZl>0gOk%)4U<7Q8y2Jw-u1rZ)IDQh42$>Ji z&led1h<1|{03`IhH~}sMN{}NOx{MqcqrSmqh)DD_@DRETpbauZPF2pNK&>*-a4V!? zZbFp@D+me0Kzkx2A#x~KIzb3CJ`Yl7NC?n+qlzpKCz43uB?TX$OXQe1^+-*Cc@kB4 zyiQq^cgPm+NFroxGPy?CK%R7@2#cWzZ}?3CHwlmDX2>>+eHv@Uf)ABg~15DiHeQ?hFYibLN3HHM}TEOINrHzn#Iz>zcrg`?O7krzZL$W|%_ z2%?4-f?Vp0)G?h%fC=KIMWqN&KY?hFLPGw5(G4*Ul^rGU50Z@sdI_tEKQVQkffetPv^pX;GPE6w1l-Cr*0Zxb3hXU?- zNSs)p&>~F|gPAdw=%AQ`n{oZXOauQ5s{zA@SZ7wacqopiK}x(ycq#%JIiMur7%{(* z+qk`kx}*r85{=U0>RDP+MGM$bfuM(+jtHp>XaH%GnAM6!AiS2xT*(ro2|^5?(fy&}qL8%2?q6~o^ zgSv%iOfntfGXN^7ljk*gni1>?B}Pu8$Y^xnya?(vI4?$PgSrM@m(nMjM+pp5nH*Fo z$?pLiCK;MMwiU2O^cEyIBswH2>r8+b7+8o5Fdg%tm7(A;_EcalT0uJt>s5~9JOB!- zPQi&-s^lLM9h#jYv7?ir6>}eE8xR+CHIwFWYib*NIX3*;V1W%5*kFMT7T92c4Hnp7 zfejYeV1W%5*kFMT7T92c4HlTS0NXawQ>ME`)v_hsU~x1En{Ef;aZbzT8g=C0|Ki{5 z29V)q``PVqGken)?6?2;bl6qCyq-O>+UE3@t>?`#UCSPv!CbZ{@i17}^V+hMC+fE4 z=`;2hh7FIE?b!9kgMB@lW!XE-4pXVw|HOVx#XjDSg$f%MDYaoACR;L_;e(It9Ay`m zmcB;%tr#&I_B#G*Lc@rMdu~P>SLpX}C*Q%7)v~FcJuB>pKW3sG?p9|X8pE4Cz~5Y` zX7;F!E?w(L*#9&p_FzBFg28aN%NTpcs+o;|^TH;*)wU0<{)ZiYzq<>2wy3e!v+dbN z>zcOV(H#C7MtEwYZrDLASKD0}{+hsKcj92U-*2srhLS%C!G=S@UqM)H$Dv+r*UzdQ zhdXWl<0CiUe8Daky36mp`|U@+|4jAtwzs}-<#ylN|HJ2OzWGKw>P;?KcL%|0+YQ4* Km11VU;eP`8<(KjR literal 0 HcmV?d00001 diff --git a/pyhanko_tests/test_sign_encrypted.py b/pyhanko_tests/test_sign_encrypted.py index b535be64..19586088 100644 --- a/pyhanko_tests/test_sign_encrypted.py +++ b/pyhanko_tests/test_sign_encrypted.py @@ -5,12 +5,14 @@ from pyhanko.pdf_utils.incremental_writer import IncrementalPdfFileWriter from pyhanko.pdf_utils.reader import PdfFileReader +from pyhanko.pdf_utils.writer import copy_into_new_writer from pyhanko.sign import signers from pyhanko.sign.diff_analysis import ModificationLevel from pyhanko.sign.signers.pdf_signer import ( DSSContentSettings, SigDSSPlacementPreference, ) +from pyhanko.sign.validation import validate_pdf_signature from pyhanko_tests.samples import ( MINIMAL_AES256, MINIMAL_ONE_FIELD_AES256, @@ -18,11 +20,13 @@ MINIMAL_PUBKEY_ONE_FIELD_AES256, MINIMAL_PUBKEY_ONE_FIELD_RC4, MINIMAL_RC4, + PDF_DATA_DIR, PUBKEY_SELFSIGNED_DECRYPTER, ) from pyhanko_tests.signing_commons import ( DUMMY_HTTP_TS, FROM_CA, + SIMPLE_V_CONTEXT, live_testing_vc, val_trusted, ) @@ -175,3 +179,50 @@ def test_sign_encrypted_with_post_sign(requests_mock, password, file): assert status.modification_level == ModificationLevel.LTA_UPDATES assert len(r.embedded_regular_signatures) == 1 assert len(r.embedded_timestamp_signatures) == 1 + + +def test_copy_encrypted_signed_file(): + w = IncrementalPdfFileWriter(BytesIO(MINIMAL_ONE_FIELD_AES256)) + w.encrypt("ownersecret") + out = signers.sign_pdf( + w, + signers.PdfSignatureMetadata(), + signer=FROM_CA, + existing_fields_only=True, + ) + + r = PdfFileReader(out) + r.decrypt("ownersecret") + w = copy_into_new_writer(r) + out2 = BytesIO() + w.write(out2) + + r = PdfFileReader(out2) + assert not r.encrypted + s = r.embedded_signatures[0] + s.compute_integrity_info() + status = validate_pdf_signature(s, SIMPLE_V_CONTEXT(), skip_diff=True) + assert not status.intact + + +def test_copy_file_with_mdp_signature_and_backref(): + # This file has /Data in a signature reference dictionary + # pointing back to the root (which is sometimes still seen in + # FieldMDP signatures generated by Acrobat, among others) + + fname = f"{PDF_DATA_DIR}/signed-encrypted-pubkey-with-catalog-ref.pdf" + with open(fname, 'rb') as inf: + + r = PdfFileReader(inf) + r.decrypt_pubkey(PUBKEY_SELFSIGNED_DECRYPTER) + + w = copy_into_new_writer(r) + out2 = BytesIO() + w.write(out2) + + r = PdfFileReader(out2) + assert not r.encrypted + s = r.embedded_signatures[0] + s.compute_integrity_info() + status = validate_pdf_signature(s, SIMPLE_V_CONTEXT(), skip_diff=True) + assert not status.intact