From eb92658a360d7a7d4ce1c93bbcf99c99a2e0943b Mon Sep 17 00:00:00 2001 From: Joe Cummings Date: Tue, 10 Sep 2024 16:06:58 -0400 Subject: [PATCH] Images in Messages (#1504) --- docs/source/api_ref_data.rst | 2 + tests/assets/dog_on_skateboard.jpg | Bin 0 -> 40625 bytes tests/test_utils.py | 6 +- tests/torchtune/data/test_data_utils.py | 124 +++++++++++++++--- tests/torchtune/data/test_messages.py | 18 ++- .../multimodal/test_llava_instruct_dataset.py | 27 +++- .../multimodal/test_the_cauldron_dataset.py | 37 ++---- tests/torchtune/datasets/test_sft_dataset.py | 1 + .../models/clip/test_clip_image_transform.py | 2 + torchtune/data/__init__.py | 5 +- torchtune/data/_messages.py | 44 +++++-- torchtune/data/_utils.py | 91 +++++++++++-- .../datasets/multimodal/_llava_instruct.py | 29 +++- .../datasets/multimodal/_the_cauldron.py | 9 +- torchtune/models/flamingo/_transform.py | 29 ++-- 15 files changed, 331 insertions(+), 93 deletions(-) create mode 100644 tests/assets/dog_on_skateboard.jpg diff --git a/docs/source/api_ref_data.rst b/docs/source/api_ref_data.rst index 908c121157..3a63b4b831 100644 --- a/docs/source/api_ref_data.rst +++ b/docs/source/api_ref_data.rst @@ -88,3 +88,5 @@ Miscellaneous helper functions used in modifying data. validate_messages truncate + load_image + format_content_with_images diff --git a/tests/assets/dog_on_skateboard.jpg b/tests/assets/dog_on_skateboard.jpg new file mode 100644 index 0000000000000000000000000000000000000000..ff8a730f891c656537897154a7b9a3dfb0eea73d GIT binary patch literal 40625 zcmcG#Wn5fO%rLsR>*6lO7Kh?4rMSBvctd7t;w z{c=Ct?C)%5CYg+!oXuvE^Sb)F1HhJ-k&^+yzyJU+Zy&(xE+FWGw~ZwLprix<0RRBx zHyS1Y_N`?6c9FbY09&xR9hIwxn;XR5p2{8K>S_aVrLu6cqM~xOv9NKpq5@O7*@4|5 zW=>#N3o5YN|JGfpSg5>g+^wnHJRGg;!EgQDsB9eHY7P!>0zARyV0RlQ$Jbq$T5ETA zXF+y$M>jUGg_9YC&D_a>-5c!8&dJ8X4iNSh^mcG|fVhLHydCTv-2}ZwX#Qgpd@KKJ zW~ZV0kHy_ighp3Ml}gIV6+-o%?L8X@>)X;ixoAXDsf1lEECto2W&V$dx0(pe{~6WG z%Zts6o6X79ik(wHK!BZti=B&$^^Jqo&BxIl?9J-vM*C01|HzPrxS6}!IRBeD)jyeF zGbays5gM8|I@SNq&%s&U#>(2=?f(w(|GF^O|9IH{>-o08|LG#A12NNZf7?t4h~t~s ze^YdO+nIlJv~hEHGPiTKc5-+6Z-eY1R$zN}dmD3zqZ@=3Y~>31ualUEyMu`X#KOkI zLF8>sU`HEsda#4D(7$FkCl6P1h=`Swla)Q>|F)XHk-;`rjv{Z%GWT$`r(yrMVeJ2B z$3#(u+5hhy5Ji2xguzw)HwQOsCub^dHVy#4q?{5w{69qjJd(kx0#Gc#Z^J_%Zcb#d z-T>qOf3=4U))7GT5#sLdLnY?`wt~2^+i3jXZ6bqZ1enQ4DoJyIIXF1LTo8`;ZcozX1xEJIRpfF|LYHV&nxh+ZpJA9 z`G*0o8vrQ)B0K^j0z4uj0wNL;A~GsADk=&JDjp^lIyNC55fLFC0Rag)9VH1VEg1p9 z+k-(%&%gv?BBo^JU}fZ>V`O6dCj*YaP-v-ksm`Cm;YdGR+?7s};9RI)-n&EE#n+G{?`UxxU@9!DPG|LzAZgoz zagZgJ@&^DH6`)J)?nj*Dj_$QoS`hXQN__BScFj9=J5WOwyo!HtDV_7WXzDGTziq)@Y=sX*%=Ca&qaU-fcaO(c>T_I>9rl)o5NV-HX+i6K5pI&@{AzXfc6n7)yCR zw|h}H+#y$syZ04f*FFPx&ra-?aq&5iGp(PEzGyg80b?DPB+~&y?fbm)(A}I4Ys*7i zgHxsBhZPFQq23u}iEL_meXmY!%8JjX>K6Iw0;9x}CBTKEap~RXH<+K*jb&MFMDW#NL9^9`6++6Fw^X&D^WF|hw zS638Jyr7Zk{itqH6!Z;HsbBa#@%!=V>of0h>iHAhUx)32^mD6Okv^j7KO#`C-`9B` zDFg}gePC>)5Yyk?%Pn1M?~<-h3p|lPLD%@Mm;x;PmmjpgO>WWBD& z&Wd9zeBrCoPU}Bpm4qi2`yh0C%METGpFUbYm412!NF8{(u$xRW!K@GZ*N1HEUSNb7 zQ;Np16Ohr9GwYr^NM@>E!QsZ{iku_py_|S{m=iVBYS7DXURfUB|7kRMN_OihPKJ)! zsHA6LC3qVQem)ZX+%?rmw73nn&;YQ63HJ`I--2?v+=BRcCAl z#!LPN3VzZL1)iUTlrtDuX^3K1$DW~!hXpIEt`s)!C>nL6G(9h~vz%ZXV96AyctjNQunin0Ir+RDu% zL^-iEdkg9dd6LW~-Y__&u$DTB$3E==;dd4n1jS5k>Q~Q4$Yce&##*E zf?Oefy7DP>)uxn+VDTh4v5m%9xnOyo>MH(eg{_kQqFlG?Wgx&JVm?}{r>r@Lbl|O z54k1+m%lN87h{Qf+I$JjTI5q8c_Jb{8n_%1h^S(<&4!n6^Z4_Dh`h^y7lTAPaa^#@ zxcB{FUt1uApWNdI_xI5|^E^#1U&2P>J)ciMohK9&w{-8iYJ8S+u$qE;z$Xnpg28>e z4^7deUE2m$GhFVC?$?Wh_lh^(3g>%&O4hDK=8xDe9!_E(pXdbkdqpjUer?W`Ak+iT zD^M;UKo$ZUbMaZ1KSd))@3KrjUK?ZS3sfRaQU+paaN@?K{GcaswpvsXT525j^IdjA zUr3)B7H%d#I0avVR<|Q()Xxd$+0!oTmb*SuMlx@x3jjCe5)>FZlp5ntl7=}1i4Dv_ z6EB6`4!J;`uj4337;Uxmi52wSgWE9eWJSKaO4efG;x2Lxgtcw38y@8tD zMKek6AaIO4d-@DrU1BBZYicZEw-~8+HR`NWARNrtCKZ*RV}uHLw<(p$-ot%kAWHJ0 zF}PbF>0|}vYR0=OMcKs8F*SycTlHcX^kDx zsGkyTrx?v3|D74rrcC($m>mIG?xcv#VcGh^8ke#KU%^QPqkb!talI|Wcl$hh5xBL; zy|Qk{t8YVB)}+s@0c|04`C1yZhPN-dUdiv_q-1$elv-_F`dnW<(Tw5rQTkTFk5eZ; z(O^8@V563Ty;m1Y)%l9b$y~=j(^(H1;y#HKn>;5oN=!$gxlap5LYyn*U>%asC-v;{e~ z4N7EGf1+ztR(hrh;x?}!pA)rwB<>qbktb_A=P{-XnsNl#%MudN=2*l7kB67iwDOa)?myVi3X>klRI~*iSSgkc% z@Q7hEnwH6mlXwxDXXxBIjWB6!dJTCoDB7*`EtUx9s*Kt^xY&z@GwPD#H2Q3~YpX?w zXD0=>@k_c&&uxG!vgb=RbL0e}#Sb)=mxn0Dql|ajJB&0Wq^m%S4Vr!q3Fx5k~MYXP$c8Y9%?q$)pfwAzSw;A;QWNc?%9YDjX{#7s{3A^$0JY&x9h4ccR z0-nlHd7pshOUuT^srhE2)8lz)IPuED#J&Dt8pI#sY^~j-d5`T=aewQh{6HXF>JJ)I zXw_Y1FG%$eIoh?D{lpHD6k~)71SOSfg>|*L8RyT&ur-uZpp_e=pgLKWlGFT0a}8=D@BqxgdM` z<8v7U>Y>`LM5zp8ZsXK+c+Hf}VCp&nFdim4Q9{++?8CQ(%FMn^BA$CgpUnGDK7a4Y z(Kwh9l5g_~1xZ?V+6{}?VBv#hWh;gyuaaHZ<3U}~&n8O#Dv|687ovKliTA(YwtcWZ zCQ3w}xc&870HW1jo!3)%tZlg4t=}n-F6_D9myTTd&999<^v#8S@FMte_x?-K60YM! zd>#?(HfFX9E+{aD}X{``o+E+SN@_iFH9cw*=DgTDy;D1VYtj zi;}4d$YyQ@h-&wJVhoP20Ho#PJidY-C<0w#QYtgXB;TemInQJF*{6K_VJ<(-Rr6aP zom(wBgo#Y+V%$**DgN-;@VI?K+kL_+Keo^=4WUOH&Qn%b<&$*1))%byO{la5!O3&M zIz*zlEuhV@^<^5{+jNTtL|nT%=zmI7IJXv%XsJY`4RfPzN*My_1`!#bL`3QmHLX-X z$gwBfQEJbO{+9fs$68}^wm`3<qmOueFif>M@aSBg zwR}F8)sd6Ivdo956)tZVIL<^sEkVM6t}R)%tlTnX9Ad9LQ_tEJh9=jjo?uNuc>8dvYB$2`kDVEu|8t_oG$rM*-yw+Qpk3jK;dUlCoM zi@mv_EZ6{8SQt2XSlItu)qm$xumCt5cwB01APpx19+w0a2R^MNH=?-In`esr=C8sa zz@m*`dgGK|mO4ZYb3~QOWV`k{(OwtXUTlo?HKo=1vm9F5ONnnj3n>cJ_oQQEe`mS%fKz^{6?Gr0)qH5^g=S@D1{Np3(4Hx@QU zoG%O@lUuf@3*B*KXIchn?3>K_N!??;GW?3IyRVC5Em^GL>>kmHnr(yf4re3tC*>2} zr=X!Lh0&@p@0pM< z8iWCxKf|C+h!m5~0U5eBc7f}q6pA3u@=NH~cab?Gc#eBS$2ko^G{PrmI*kVx-1f;& z=?(6mUcgxsF$chpu1SYz#k$391;%%TDyH#DP0Ot)-dkNuPMx)dD?~&n+Fzgdkpm25 z2Sts8d5+3E*sLo2nT}c~hHB8MvFk8t)(O1YDP0E|`c`K$!z2ad`Vrf<*y__{-QvGY zs~#ho(W*E{5jZ5!M152vU1u}EA~X)>)wWfm|*sc;eHI>a}G zvkeg3N=8w&r_repsrx;(mQ^W#)YaN$&0DIUBiDmxy~Gski9d!s7{%X;C2g$fyfgI+ zE>rr`{$m*h++hh*1Vh*PR5j6B8Yj@xtVIhEU(eQUZMS*^Wsx|)_>SDybhr1&F*j9w z_XZ_2%LJ&t7DIno?!y7Z4r1+)8DBvsB2f$Hp*Pax@eS2rm?r9avQJBMuj5&+`Qu@3 z+Qgf(YxE$9HwpU1>yt#k%AB=sG#Df?fNCy85!Io&>YU?Pe}t6AJGzc^Rjlx+HVBF}i`I z=ruc;k+80MDACXTiIrd|p2st=@&{db1Z$a&6RqyUc(sCKZrQ4c{52fe7)BC518{Se z{RKy>iF3hK&|x2llXitNx<<~M`_4{Y$+r3=+rB#CF~*5H$xvKqTiy~PO5K-B+xr){0gwz zj}^I)9sw?S&kf~5?-E>KB{@+9zGLGM52pWVYS04W?$56JS7h#t$_iUKj|!ys0op8x zgE8rM`Q&e6In$1888?|1vmTDWQMWRREAHcvXm^s*7dO!btn>~nw{cDD6O9)eW-MS* z*>n*U5Xv0SuQN<(Y*imHU)o0M+zM{Aul*X}MmKROLGB3lL3I@e2GEk-*WY^XX^m`U z-dW1k!$dNIt!bV-gGPm&j+!$K;K^cL={`*hn>Gx0*-TE|^5ckeqte&Hps5gOYc59B zMr)%lC*nU>H^W?H7|cWvoMEEOlb$(X?6Jk{Gk?VwP+6}(dhctCpHKlerW@~ZP zWEg{iz#z`SpH`(mO%QRq91c4|v*M(n4>%p)=Nd?{dz?JkP}xp&fbKuBT{y8Xv!II8 zh-TNatgx3X(u+}IS~JL&O*bk{&5oGm*oU*B@s%GH*AMrlSa8C^}?J4Hf5> ze6D=6pmVgDNR+L_1P3I$iqkr*`j;8LXepJ*nK7LFq0yN$gsWV`cvGv$Ptk7y3Oc~- z09wVOlbGI$AlHcEG>F?g>S~cI9}>MM(lfT-4)-&v=ac!a)+K$H-SqP9QMJx1pm3?V zAge{5%=SH$deG@_&|YsAT6&Z>fsFmkwJ>`YcctZlvCKOF=TuUIUz*SU~LPHr<|J^zVGP zfs`LuM(XBhO*b5}V(6XmI~NTGo0ItHs~RXGMwWC34O_#Oqe9R;m3UeCimMu-4{*1? z#TLemi` z4H`+Oe1*KpxQNGCk90;F9I2c?(wmtvH;wnkA<+x&mzFUrHCymla$RQdN&L9q9?_UM z;D|9NDr1wbaZGn%gE=-mSl+Wi^+1PzGnHCCb)rQzQFI~isc#A;?HA7^)RN>&tLw{V zsB3F%92_wi&qVc)8~*~OR_hPgH@xTkM`r1GeQNz;>z3{3EE`$t(;m!}EtR(xMKuwa zd=6zGnL#c_KD>;oe@7@v$UeF-i|XxJ#V%zI!`~rwCPZIQ5V@QM$aEnt4@3>h8V!BN zF*+BC{RUL^kv($(uHKIveSzJ~%}tcfj}pJS;)uQ|Y~BvoVeZEI+!`0FVPfPqr&U3wXK?fnT67kXcK}kt-lC@;9zOz_%@) zI$Rf7w5c82JZY_N4`Ep^*KzWM5{Tj4Y;}v8m_3)Q7b%x;l}&F=Q62nQh^|xX{l((_ z<>Q!`JQuOgctW(yhImYfn*0nz9AVFv0(DxY4)TTA+!cAF0Av!$04MN$baN^R8yl|T ze!S|7TF{u|39gm(;_a;RV*a6341BM!acYjtu+p#iqk6qLiy-)4pl%W+H!mu^4Vqj}#|ERj90rW#U`*9# z)@~Mb!dt1jY&_87H6qY|Wue=ttv5R!*^^?PHq3OSr_i`|WLPolaoec&{V--xveFGmxfzS>vy4xXNp$i!jspU)aL}$(JDJPS;kdlzNZ9plF9oF0l>#Dfyr4*N1cA9tB$WRZP3!Yrg0U!K>5 zycmI`NB|n0Aiy--Dg9$+lBIBj(?LSlYKg%ZDmf#{C%5Xb5xYWD!!-DO{(yAT=*3K3 zbztK{Pf27C>;!G_X*ae4hMka9rTwLdX2;7+t@SM}2~P8~vVxNNNlC}{WO;I@=Gf_mXkbI>MN0o($`mjw=N<5mkq4Kg*r>_ zuf$jHO?I@++z7u$pquVFKrge_I9s|)KAE+MVW-r>)Ui6!kLR^VlxIh5p3t~M9nDY@ z#G(t{<=kuIf(?g~zF}yz^GYT~zd=G~e$ik4%*-5argHci=AkcIBS7NJ81sbuy@WzV zi4o{p)>>Y)k#~;t$oM_wR3o6^?#c{t{riAB*N^8JkkvOm%Ur3lh$!p4sC94ah|aaV z4s}oHDp-n_ILb6vnff3s#|*P0iW)};xT%$6xk?BGV%KbNB0x&r_b|m;6N#euq1Jsz ztAh0!EW{cm1?v7%qLq;{;CbTvb#hXvV(WFpNIBEQvL+G=&wS5p&un}ehlFN}jX`4@ ztvDIY59d+zRHwO0U!hRw%=HIi5Fqgd+JA;J?2YlY1!F=4Ns0h70&DvBo|yyCG@?<6 zSqKe-S%{fbxs049%QD>j1aIuuuXQ-g_}%#)tyLi&+o*dXV?U;e+z0}VImOL6+;=^Rsu2`A{|AXBm$ z?_R}`4c?jNb_~^d(S`yot7W+dTz}01{muuw0zK;a$^tot`8t2l@p6T@mdpUHxM@J> z`i|sU5X#6V{qx{AY^j&w)HA(r`h9^d+`?Al9K?_uQ~##aj`2rK`Wxc?O=&@>N-tuQ z|E}msq><=NsP5t=XMCmiQ@6B(|JP5c(SH@^ZY={-Ll2q}TcoP7&opOmB@e>?5`|#B z0uTd>iw><|X64aGFXvL69t*5ZIVeJx6LT(K0dum#)Nwh{Wjr!G?4r%Y5BlWHb7hO? zI70LwX*9$MUdc+5syIPgbL-Q@k}A8`Vp4=8wd3!Ef zR)4KtA1E!llY!irgJS?zUv_)PPNSG2fnMbWxi29QS($SgD0;BUoIU<(hZKcd+ z&!Tv!R9Zyp~3JvSPsU)Mar4R+pxKDT}^|AW1BJaBkl=aJkp9aVPab zWgG13fywOT>T3KtzGGWpVYMPgpP(+_z6M zt?1e0=+j&7>X-xFKEl0eC&D=fLrJtbY^;_eWvvziLQtR-&0hmm>I#a^H9ccJ)}5KZ zX-;%yqxfqX$dm;|j6mu&=@k0LuMwe$OQ2!P!%hQr6D9jjVo3f;v%P}l-(WYUI@<_R z`5bM|!$kBMl6ISie73w1yq*iaf_jTfVLauLr#)0$azWzNI!|2Bw?guRy1(siaR+m) z<WbBc*OTSSozGGD~T8yR@?It^Vd6%_=O0&UOwlvw2- zU`G@P1O2r~^`(sh3Kcwf4T#``B-zAeVxg6EQb4_#Wh?@2;We<{md&P3Z`n~;&s~*(CEeZH)f&c`wjKne)qa+r#T|>+d9>1( zYIG(s4B@yP!aOgLiS>=j?77l8c*wt90DB{pjM^DoL8-6{2Qf&B6^o14o^uX@}AfmR<;vh`R`u6x!{|CA-D;V%K~>?Fi5m++6S*2o7__!r>emBRtbAJ!;C2@85BB3|W_!4@gsu_bhJ zRocEXktxe(om%{IQB`X&(i>NP3AJVt17wrww zrA#Vuy9qnDqRd8VymC_=nz}C$D2^B(x$zDDQjG5$5RQDDMsqdH0yn^};B=MFZ_TuHyge3bMqFJBLus_KB^g;Y*F+}qSp%LAmmfDG=)!PfI1r4$?|Z!h7Cpr$ z4evB}QZKMrEXk(n{?d_$NVFh%v-Z&y|QZc>@D(aJLK~-%TKDCPrhlaMnQw8 zSDzwge|rF#ge`aIv@b#)ujpH%)=T2GzqIP;#p`Tqs&23nmLA1=E(ZQx7O1`d>{ay& zsMl~m2AJY$he`ZsgCFOI$B7<{93#+%vxz`|zw?naHgYIKwJ+jwdvJ#g#z0u~WiWo+ z{+$!_m^m475i5rlyR@nB2rg3@&MJ`JME{I=f(>#)m#)UMbUAQ zJ?>)M0dRoqej^AbD^A&RYa9$DLx)PV@rUD~|D?0 z@{ow|3~Sufq|6AhfXv8ijf$6$%1UNE32DL$jA(6ikmTlubRCJP6L&N)7E@(Rue-6aV`7g@4mu}P z%n|p@j2_zJ7gNS;5q=knh~-|~JzrYaF&z~f9VG&PJCJ|mvx)M;d^dQqIm<2P4c(Z$ zm(2|m2oDNQ;Kz>6C-n4T?HjT&&+AO=NS7duo;{{Q5L(934p_u44l>N_@IX1(4oOA? zyM*f{pnM?OTR(Sw1&}L{wooa!XMC}Bz+TyCTSH1>h!4EwtR0yF=6jjeReGDF1Sm@0BNqm2yaEJYE)kqLK8>YI<0OzS zHQXv1BOIfY-=QO7A7-K#z5?_U9}Q^zGu5Rl%VF8(e)XRPAb zLAkUSva4MU8jXN|3j}cC0AS!?kYM5A5#jzPC+#f>4F?V$8<(1cQyfTzN5dt7pf-O^ zD+vxxE>bsb{4X5vmfj`?^9s;|RW;X9IEaV4h@uUcNMCdv(gsc;AQ6kxrntF88NoBM zMT>(~^HR=eeB8;S8){oC*RloeadqOWp90}dD%NfIM=b+(#dgK{5xYIK9&N4AAm6_& z3(hMR$m4COlMU4xYeb4~5>Q|#)jFQ>Bv1yrd1T}ZhIo=zdH(b`+`w1sFf{PUdjUBH zZkF?RO#XoVtx+n7TeHjuhT_@G{|>Y*RbAP!4i~5_*Yvq%Tw538d(pCm{W^B>K9L|e zGVD1uvQt+>*|zfbHquSR)-??FRGt>r?Z%mYw-`qx)JCcLPScQVGgZcRO&^54tl+@- zH4jz@o+>kFXy>S&{IB9)d<7efw@j%&{;JI)`17&d(P%j$7K7N7PASi#^htd)~s>iJ1iz4pAN9fp^HWBE2N>52;iuSarE$0GW zv=nq`T_AL}3S*nz2BnQv#lus>XtZjUr;_E|z)?nFx2gvAP_5nLhra@{IA>{RJbE)t z^Hrrpl3z4T?W0j$*wV^AHynhwHBgl9+t%aji^vzUL-TcAP$80G%h8&jiY-rbqSUgZ z4q;3Ac*2F4OJPy@)r5DnxysY{X-(eL4}|j;hx(sL)SFKIr+_f9IJnesoY?RZYCswe zaVjo6u#2Sne`*K&Pwk9pXvQ0TlJT1+>bpl`MWUJlD5E$hCf^%}iH6Y~#mUg?9yYP0 z(u_TR$Z3on?|cOmhjISSp_5j-@{7j(5?{>Hi;}?_*NEX=OqTJen;igD^pBOK=%N0l z=mwl9DYkcR^e;DnY~X&~H4nu@Cr^u@+e`jBx;y%qARy*4VZbS+JR8+GJw%qQEpd@M zA;k5Y>&6B%bFZ6?#^&DrZE#tG)e~1*=8KlNt z-({cdXT1XCf6?SdgRIaEx7Y4XRU%&j8BFW6qYP^l=>C)REg#OtsU;fOgQ<%-r1zsG zY0P9xCCm7Wo#_ZyuM+q`)Kj->Y!Cu{S5E1&-I4?b)-4?GWX8Sa^8F0&ktZHBnoE-e zqb|}l#?@FE8HXf><`Qbz=06ztveGSTj7t%xBo^jI3G$~nlU$#)FLn|qC()xuQHp0R z7p7>A4y?!^6IUN>V?R?q6Y@RL-700FTFKgNN`De05!AN)(4%;7K_fq+B3+ikzu?D; zS0KG_AlBU%}y?w9Ds}MJo9CR4HPKS^_kMtG=*b5lZA|hHMN*5AagRD)Wo4*32C#?-E z`nDtIwau?ZXvFAZlZ||wR@1oVmDwWsu0EXn7!Jxx=JAt}D|Q{&Jyd78C@N#nx9A8! z=e)2&LR4$>9l5voTAQLdcXg6xSDf{t&=gI>sbJ=*`bUtC^~A;1b7Cs_LG%VA+J1Zt zpEjS^elLSvnjs^u^7EeAfrp}@+yf06XMdkQWp$XA^kM43#i+Yji}ik}VFrUmbh7pb zbUSH};ZlQh4bWa4fB;eP!gQ?cRIz3_Uo zg=0*a%SW+}eRPk`*-@V`t7 zz$I6C?L;|-v1unOIy!gf+bOrPILo+rE|*_7L$3gv>W}aGa?B_*UB--Jn0y`rwiglGw62D!oD!8cNzFPcx&meFaQ`+C`_s zMu#IQTctM2JM^=qa@I^Nx%1v%wHMr)tR(W*ce0GW)EQzpWtsTL2xs?};XKA2FMU#G z>wI1;%(9_msj29#wo3{?b_?Vx)#50))iUps&r}h+F}QI1G^W%!dW1zcGwNjIs=kPM zhHbTOQ2&5dEp$-Spn|84_>RX=1t|^;&@egsHRS9{H4BpaUPul32ZaNW33{3 z!Fu0KJg8|oisW(0L(5RK|^7mGNKQmN4dAgz#xt3-9rFruI+OjfR!!&+LtJhw7Tp zTE`~8AciE=_+hxN@34OKdmgB+Ll&FRPwzp;#5yB?gp58)vK%#%>}$I!pQr&;@5D(f zy-=E}${4TQsOOxje&k|Mx8_t1&uX3GaAqoQWJ7`FVj*xQ;LE1HASuQ9 zTESTfvp83cS%U^HY*Dha|@6{$K6=u`g?#plQquPbE)AB>AG1t6<93z3E#vUd%qF24d!-}8SaB+`w0t_yz$ zf|x|j1#QxAWSsz=*M=DJDu;_sO0cmzsUzc+!(kVY@AMnVXp8Bjq|-L-!D^=Y=EY@N znyY0a_EW@K8jPC68iEcwqXnojCA4wgjjRU|+N$ETT|0hawp(L=kLu6C6^}Fb2u_3r z#aq_XSY*0Uq5%kQWm>CrD#Fzb)H(y@i6*F3Q9M1$g^I%j*74o;y$Pp6?(1EBny45V z2eX>1(OOy?^}XHXuYfQ$Ee-x_+A2TU#@VVY9T56e{SRL4o3n^%;+1ahoj4OHbGpc0 z{-xq&{?%N%u+@Wu|5-nbsGrn;*XLp8eM$;pW&9x>6hk`abnWU-)Fby&+qKOQh-pm) zetLQz0wsRXmp=`rKi8{|Jpw}uJP$hAjNe_T1UvbN!u%v*;YluUeMnV6rF~A~PgV7~Vo|wJGUA+b zX)3Cp&eq)^o+RN-*TEGBrAgGzRUru*y!M?JMlNi08jTQ$6YB3pkhkVqRY)aB0=T4F zF`l}~WIjB;to{nn(!xbKIWODgN>6DdW5f2+JgqJw%#R_OKkJB`bjq#p%*39m3N{d` zuA>{N>sY4f_=dZu#T@Md5-?%LH>)``tIztCF;zi|G;paS#L%GJAo zSHNTY`LSwV^kbmaiLJ))b#yXv(s+%L1J}`2A>|jWEf5iSYwGY$o7TKfTiv>RQLrTa zq`n#A5EIT(Q&ZVuT>{e5v5n!X`&yMXix9F>ucf!d1aiUrc}TWG?En{-RFVD6^09Gi zl;AH#$I&vvtj)<;B~0B*`7^$bAD=r7;cxroXT}x=PT zD^x{EP^vzaWdaIcbHlFF0(GRz7wMqFhJ4DbNsuF^(qR0F!2$)7kJ&Bxn zh?GYHuRv1bph7nCC}q2>vtns4{m{sil5G)H(fM1Udb?<~*<6(@xbH7HQqqWX%U&l;93)QsJ>1SS8Le10Hj{*e&LqR4ypnK_LNnJcHp)|Q1+DJayZQ$|Nx zAW@U&B(6|j-r4=5YDFQImQ==Rx;}H=2_rnyQbmBRmc+(UjTTO=FSW^sdE8e(_Lf&} zcCE4-Jr?yOVw=UI2HvzfAP4g9iN5=~SV+x>HZuA&>!=H% zGhnL!v7$qZu~ZJgjH0C(Z7`p_Z9vt8h6@epkQiIF}>;Ed5hbL`UmO{MEAska&|?od>J-?m>7aUA2E8 zdc{0C(jit|U3&sH%UT*|Rr{z}(vsWTr~{Zu?KFP%J#c>oq%wz9H1&7dQ44e((!*R2 zBH1QxL7$KEqIl>w3TPtAf)9dSL}PW|i7xaO_-OHn+X5&cJRRWDB5OW% z48fFakEo0OS;wj>Y}N3&qt5!gTJ9wOfOkgmlAOq;T@%vlC1F_eDlr74(8dQYV$4`HyOL0_C?pW6?o0 zQHjS{`g7a0zM&_ibQpFZ-+`)v4iO1GAQm__BvF6y#Ti2YHj*r@idDy3sP;yJRT)N~ zwR-2Odb#p=wJCbik;6HVk|gzM#4zw}J1B}q2YjZmttJ&eEmz)aI$sdn*AEeD4Fz{q zgjc%B+^k^;l}rLo8ETsF7`RiCxB zjnb#8dQmUB%O&C}Dj{FGkW!N5ygclyDw(qzh$Y_a;VYokLsq*UGV6CBPzT<&PSw~_ zM9_X>a>uGl`JN0atYs%WDb#7HI}0(IA;?_l)X4TccYg&iwyryg$3KkeFTRHhPHrjLkd> z(l=c0j9KujvT@cjk7Q;}KEmND{DCje$i`81_-@e>sI;q^eAE?Yo}B~epDJ<{i~^BD zAXD1sqE%I~+3m}!ZGq|?;oTHfl(=DbC+O$|f^uA00Ubg6oabe^Vi#mwW$%<$nd|HtCdw!#nxUwZvaE2tNYq7Y=0)xX zt|A@%NL)sXQ?VFq9jJV6{Dt=kW@3jHpC1*mu+m~MHgjZ+IhBvCyql3E}r~N zh?$(tk;aP^KuE!ky<@9{PknPl&-j91&oB`Y{@$BYK#>B8(L%Dm_`s!cGR=1a0Z~Rh zCJRD>1;=G9#1Y)ViJ`~EY7AejGN3DC%=j%7Ex2ekfY~*b7L97Ps6<~2LG$Z~87w3N zjWDDG7$86^ok*;};!@G$os%c2#Eyn=O@oi3IN^EA;aG~)60S<6C$wCp@aL_5cm#~YctDRS_MQO5_ArzH6!*I2NCx> z@f;QMw|_1Y(e`=@vTfWL2LY(-BH_lcR3C%#>7|YL%tOt>Q4v^U=rJwIh){&NRMia0 zepyhLr@M;#((4S&$5ebAGe_E~ZEo<8R%{qWBW0YRb=CBg4Sow~Ny48b9f_FP)0p9o z#~4dr8{tmus$uk~KoXEQ4bf5TZ87v{k!$<$8K*pC^{`m(=)KdDlRhZM!7W}(&tNWI zx5KI?z03xU7Ddtt^^FfAUp~D8LiXm&!u!JDR5d1pA{5zCw|0s+Ac=E9GH~*=G?e#t z^!7YKJ4_hkb|V$dH~hri!iMn!VQii>f^R9Y9B8?yT;3brObT?hSOM>A(W#?k8xndT zs<6uxBJpYY)fb;3u`k>HVWB`PS? z3xm?1GAidha;d4O@9pb`vyXv zSz0_OXh*)SwC@i}pEN4xNj;1XWXgJCQ(X==(NqsbXPZ#J?jwW`W-~CrIoa-C;Ip9f zF25C71?__4rb7%FOwcUUl1njmpy=P+^=Nn>@R@<$s{VyPClPrCb`%+Cu^XHP&?-8Y zcgRp>J*bj12Z^t}P&40;Cc7jPgn9!l;qCW#F|~y!b7+~*KV(uQB)G`QwOBNcnGWxW zW&6XU8f5IO%Q;37JbwwddJRJ*g3)t(@W+@l$;yTI$~n{x`q&v&?pnkWWFk+uurBMwo^}|AVe^E zL;Z~63_(i*zth(f7>`;&$M{fHnLxKANnS!qOg;(5B>n{KL^mC?fc7hE{hcD?E}^$^ zn77A|*eDTlx}X{L2__VY8LWTKl6tqo!*?AA^6!J^a@x_7bGfB|d)=epBJAGs$yf?d z!>M@l{{YKCG{3;wF>tyv8B7Skmo|h>3%v-o5wQiihB0^#QS97UCPIX0VjLDHz#jn% zV^@pk3R1Z|RJJ@g@FW;uU>J)`0}*eMsl`IghAj?GF@rh$Y{OA(@P<&jBF-e&{QET; zj>b-1RDv_f&$5>AW4?Syjj?it=yqtb`(uM5;`UwPPg{wrb?veIiL;wXmCl5!I9jfB zLOB-9fcl&92xLW%8TJ;K1fM17y`Q zLcwNYEOyEYyC1NO+YrX*f@4AwHf{))NrOO*Bpj?VWg5qfYtdy+DMe@-v??8}yn8ey zo=Gf>wqz3ivmQRCg#zxF@bP6!&sQt`@fNjCz7^?gg1pdxS}oz5n56y zjjWRl^iQ!&d}=K0hQQs%L<_`9i?HPxg2A->E?9)~;D$m9PX*tMy9K9+i))YeYKr*wjqe+y+DssQ& zM^oUMjcPIjQ$_F?7;;3H6T$U4|sE?J6pK|s{OVqsx6XbG^}9^-)vu|8w%2-Mh3 za>b-m&{UlZ5SSkL(qXD0_UZAFsFdu2L8M@apXkj*_#WA_V^yIp`-Z8Da)C6X0#cTF zC+w&UJW_P`;$}jZpBkweVY|`XNhn$3VyY5tc&1EVD&tE`PpJXCH9|vB z7DEvH3x$EN;Qv$nGL2dn5krE zh>f&Y29#t^+z(}s_aCZ-Gw9bs8q%cXn?TQ-3cEjnuQ1VhLm>E#&8yJ+yewz*vYZPI zd8%ayMVVGuEts7Gwm1duCmvvxI}B7d} zJXr;^$Y}wqAum0fO^EIuS~`!=G46=S;Pmjpk{iVs*_c+@Ed!o82$pnQv6jx#4hw70 zoSF%ehv*i}%K}v8{0Od5Q(~7qA376aX=^K6p(${1E|#VjbJM_>dK`m*a5QC>$)qD{ zv@}laY$7O_DohTV4#1?L;I<>;4-rEW1<{SjEfheaFkrq8?~#a(NuLE~VYpg_W&|r~ z21gzmn9j&>$%8I>5+m`O5=ilzx)`M6h(Q!%xq&y zVAJWhf*)kw#3?Xak(D>LE#}jWGcr93aVQ-2_M`CXTVK&cV8w6~O2D_Y8z%)xvO{4U zOp65%#%(j;pnDXv6cULJV`NW}P)3gx~p~Chpp?I)8(Vj7c!yZRXU}TCmWA~PR2D=;BSSKh) z-F@t7XuXsBKl(5r^$WAOVWGBCZu$QJ!H0*d*(B^3a0z4B&+NA{RY>e$?#9XpNxWQzsA9#BMbcGDd<47cW#aH51TgJaMrt~bb z<50lRLT&|NsmW)NF9k;17#)Qg3`MBMr82;+awT#%VfCd(mYV}wfBnKvFEv5pv3BtkScHbjC9coV05nZ@Ol#kdx> zvM~;mCn%<4$nsOphDWTP^%`V82LfkA*_%9IOkr-aAeQDKwPMe~Lmh-3A=$y&Dvq!i zK=kNmg4&R_f>TP3B3%qkA(%3L5v$Plfm_yog}H+dfty)l0Js=3TSP@-i&*gdc3el( zT4tcdlrYkrVWC44;<+wgBa2Beh8QtM$G4H(nReAg6bmCoGdwo!VJOUl#Qtzj=0YPfy0Ve6_8j=8v7y` zm_&q*1da+CQqg2AoY1Umu@1o(QAfZ?OUe=uEu^s~_JpSb{f6+QT=^Xp`TSB$5;lPn9^J0kDMww?cTM;S46f|;%*)xWj zu{dD7H1Jsbg&?Mf8DyEnAkf_qylNsYsLM37J{5Q70 z))NjOlVT?xDhWYgc8>)rG#xXdM&c9kDflEvN+jrr!4VOI1BEyCC`k=kLw`>OCBa)> zFNE^e;lP_~@NeaQ969)WWZ!{18R1HB?uO`RL#BkT4!I5s=vlzXNsJ8yV9=7_$Dt@B zrbZ#M+VCJuAF0$*cx+cfWIRS%DYQtw5k$&g@uVHnW5}>bj2;-={6lr4Q)9u9yl7fp zR6aql{oXhH6puHu>4Zp0?8Qqb`iSrH8`eJ|ZcoAGmi!Xdh$ljUC0wSBqS_MzWx6sK zffiO65kTCT4iM#@gEzFsHQ@$` z@04^Wv?;UUAFSLI6SE32s_<;wNR%#SRG>78Ea$g#0V)t-m+I@v?bHO}a8E z3}WBBu_}xvCQHH|4r&pOaPdkOyhfe~Z4$Ofe&OGf+$M;L&rDL`je9|TA;dd;G4=&C z#9g7bv1k&Z)Cw#zN~2>_fP+zRegjeS;B06>j6#L}5JVFj@Wjn0LNG>SExGn7>`gW3 zS~D7vU`dREr^wXU%FE&TUsIrp`$%h|9EA2PI?~*sJv1yAbSm(X&K$t|SdgjW2x-WO zcpu56k-`@vmOMm`aw)-z z96u}(SI>jw)%iit+tEXDu*Or8`x1B?Hil8@3#dMz+_ZPcVB8v*d|^YNlhCn+Pi7d# z)7Z;oGju6*J&|Nffud-ON;P~D4TvueLikU@Wx+=A;kaNVX$OsXS;*w9Ey=;?+9j`{ z2T|y`ZyrEg3?SMcK$5`GG24mT!aGUq#v`!!7;BKZ5*#_GVMC%F;`ARMjC;7nW#iQh%}65M^B+wey=L*t^_8|CAIArOSq4pBb}BbgA{A{sU` z*)QzB^AlIG4L^Z)w3!w>Z?FBY^l2-3Zi0h3fV-RXkKoN!yc!E^;+LVAs&>iXKZtS> zh%BS6GLU=T1q7H(e4PGS==dRE>Nf`_$sqb4q!t0&VReL=BmSZQgCb5yET+nH9Bkgp z{{Z9-kN76h?hJM&GRK=9w&HfmtPcBPe>oQI)XYXpEa@DN6H!@d)&Z#L(!* zkCZ5IA#jG(sV)Pgfv2N_9QYxVuxS{T$d=}g(mi2zK`1m8GqKTaiXpnv8NA017Z?+A zIxNB~OKFoeCXJ0GVw6SA5fom>#Odun2@ETEr;WpN=z&`m#|uxaF=A-n#@F;U$->n? z(V9$c^r4)j>7Bs}dQ?&2WBO4kM*9~u%3_;_$Lz7q`qDQjNN-n00qGC2l@PHJUbK)& zGmx-Kl)B1~r6b`R^&@T~))Zw8%5GC(gn>v8cvl#xV+xWPWEkCMMPn$=L1G?2sy^a| zg$1_|x!sS2ksDG<*quT$#|r+4!puJfq%xMxXJT&Wqhur)hZlJsq!N=u_Qu4lxJ=x! zxUEAVu85!VA*TtZpJZcFY4REt5cPsTFZ48dC0Vj@1caLEH9d@nUGR=O!ZtK_)3C!L zt||VCQs-17&(uCtGA8XHTzYaY4#{wW?IP=}Cj-N9<(YmRC}cvPw;8>zAc5P>f}g}DXI`rY{rV0!EjUXY(hhIgnGJE z_AwV3@MNgbG1Adtv?a+}X_QuMOLGOQq3d`!;Dg-6{sx;_R&s>GHo`sC5S1;ZH)DSU zmYFQ7u7c@k#kSYL)q_4!QcLy|9w?$~-Q>k*xSNx72g_qa+B^y_(U+JkYKGN~dElt% z-yU>iQ(M1Kq`Y?G7}fU7v;DM@O(lGt{uyZ_hg5zP5NBWTWKNhI<$S7!*0DYuKif6$ zIxT*cQlIzJDEdmQNF|F%$Ec;cpT(Um|P9Km{3MHS)@Ouu+P*b>NAlvL(tRG=vg%P7Zj2Q5UH_PBeTy`Syd;b8X zMkS@A`}>Ye{J1KSHs(0 zbIv8-(;x8+h59ht;El6HipEM0YGINO;X)cE?8r1@)vE3B;U-`kQLwWogf>*TJ{esJ zH?+l)_t?|X8Hm`Mg-tOS)zesU21c6tgqy+(g4|{DLaO;Ta8mRyhR8Q!EFR1k{{Zx| zQ(&V6SX`NzNNCh+$p)<(Jr0Sa_-lbFiKmjeA+$2ab~!iCXkn!IhHc?;R&J6)wz8|c zMKc!$j6&AXOYHa%nPmMTnVqz;axeA*bg^SN5j7aI4EBM*oLGurd0QW(DUaw9`ff=t zvD9sLgv7vw@VxTV!9qWiI4nJx6QKQz<*|ebf7Gd8A@=5)Vx`pg(({&(% zT(7Rf4!y^wmgM0lz&pe3*`!8iU z#A`wyPi=~KJ`F2AOGd`IXA(X3D#^6hyquRGNNrnLh<=mt*;17I3=?KAV`g9ww+qUw4CD=_syhP-PUe{LWeXaItcg*kn!%dCZ3)JC=R&QwUm@Vve?wADwOg?`ZyL#M zVkO9#ESE@NTxTODod(GjuGne06ZBw@K5Ip={*X&kd-iGq;s*}De?T0xHB@1E0kI82_Y)`ke#^wnHnTNz(lK0l$*;M>XqzAKZ%s`m$irJ zXuz6wBFwx@mY?2-7bso9*wR6c0xk}JA{n`23~fqz@*srltxR_akWa)>xr&LCFOr!I zX2(qNQW0yCccu45dM&gknwI$V`>Z?mGCi^`c7Bro7ZTR(`gO)zk)0{n*}j_aRgKgFkK(_=G8qpv7z zQypnoC&A{dNNlv|m~72!y$7AD_EkQ8vW&E*n!w*Y)ZT`5FWB2k*1a6fBj_}ezuJqc z6*+`|6=CT;KN!MwItnKEFtweHq>FS0FfXmN|(Ov_ex)fU9Z zB59)A!M%J5xsH7hP4t(tvVPRGBa9>AjZG$%ts_GYaz)ys{{X>?3Fxc+jg!3{N%GmD z^e8H`O&Gd!=(pM}a3sVNIEnliZQHi6%Q3U?v}MH0qo-K&z)T|v*~=8xq`u$qE3fe~ z;}?nSTC-g>MQp0zLr3(ivc*}>3M-bq02W09=q0qTvp_%f>omiQ4-E_>4=B(2L$ z1o_=#{*Pvqod~B7t&I~zmm@9J`YX->IORKLyTgJ!NNZOmEDH}g(Z1$S{)cP^XDFpN zVL>R(&c$Ctjt)xr9(ex%M71*g=w1`abCNjIk{3LC8sYgFR_l1=L(jK3IAg(qylpZs zZXJKID*pgvEDL=SLlseU*wal%VB2~98AN2=biozv_M^oW!cgA(Q~v-?MP%Nxv@j;_ zFt_Z2^ls0VHw^W8eGQPRZ3nLjv^ge1u)lZ};bv2r{2Pcbn#NZV)nn-CC6i};8DnVNOd)QBN+;M7H8>M7JrsR zyKQx}C_GR87>cH|+Ep5%Beavx;NqJu)p$7|`7^I?(PbqzO3n5#%BZrTlNnvE4q{zm z7-`YG4Rs|}){JD?E3!u$QNWB|nMEpVj?CKLMvKXN#W*l=O(tUHNjgP70$ECHp>Kqn z?Y)rRdFdHrN6AdM=Qv|VEZ0Z37{b}{jbme7{HZrizoO46I};aeDoYZ{>HRF!bT?F z401PchW!n~TrO|OpHudI(Jt z7)u|E7lye*@;H37Iq{Sy^Alu{vpG{y>KjS zcKJG72G{hYCFFexxIaO+;66l;nY$dO)ubhY5uYsxOB3S) z$mm@J9o*50egv8Je1>mKEix-`!!7WCBsTDjby>Fp!RmYbFuo$4GLaYkij_^zEePNb zfcdaSnG#`)u_Q%J=;!Eq)%Y^YhHCa=arAF_h91Ug=Llm{SECK5!IVq2F?T7WQ-`zo zOy*~&6w-`fY(&~l{3+S%u`bEMYo<(Pax1(-3PH>mw@iXm;GG$IoDEfrWGu9P4P|Kz z4XxmiFOnAE|Jncy0|5X600RI301#D!@DO6D`;=jBdntzNJ0^Gcp8*IGbocb{1PEn$ zQu-bUo&%^$^HuSVhL_ND&(r$u=PjME_}Wr1{99o@z>C&CY&Cyq;j~Vj+kLKdpuo9o zU|M(o0Ix1tYeI?ZQ;Sj+ALPtSgDzK#&9@)6R`YQsrAhiP^Eo>Fn z;|b*QbOYxQp~^YXfx_TSa7k1Lng<&3@pu+u*CYY!zZG6eakP;SGf53f#l`;s?K&YK zKuBK+$*JaG6lR2>A|SX zAbGiOkdF4d7xe!CuBkTOl6Zk>w1*K(LR(i$zgTE$U8Nwjtux723J=mB;Ld%O<42M= zmp=K$qR=u>68`|ht(G8w`$^A*Bod~qHLn+*60zAW-Ksgh;1+(j6dqaGWi`K5jTY8j z`e1KONekfK+pxy9Y#E)o$5KZU;c6_K3K19M%xd!Y<#5$dFSZ^QIEs^R`1y? zyK>Wb{cn&|*V;V%s)zhTxVoKc#|G9nRNO`>s`;p4P-!{EO+EF?t$XQHQ#n$44tVoQ z@cza=n5|GP(Nou6dE2JtY_g^X+=;Nt{{ZPhH>}K!j_dP%4YmIOE%E2R+|;#^hj@)1 ziRPv_>F*rVq7t!3K6lSZMSM(xxn2QrhT057X46igntSY;yK>DMu0p-`Jf0Xkz00IF60|WyB0RaF2000015fC9U zAVFa;QE`Ehq0zCy@Idk5|Jncu0RsU6KM=yp*s{fh_l8l4jg+M>u$gkYrd}AfC)Gwx zmk@YwqG*CxT}!2`*@Vhc;W3oE5XOj$WjC=6WHpI0ke#ALw+F`ws@5VHhQ!41x*uE+ z*w~#Y#OfwiF(bG!h$nrh5yzP7;x`J3Rsd@qsuc73kD173Dy9psezzl%x& zyrGqgVg}9xD=l>)gvMBMg{{hKke(S!YPvMGg{{V;Z-{hJ}?7z3m6YRu`WOrM? zD_?u`Hxu^#4)kOBZ@=3}eHROkf-V*xQ^OLXW9UmvYDI1X6rr&@;pIW&gi?$|DQ+0h zhOr5Ts=;C5Dk?}@p=Sovg#5}H_7)Q9nOzOxL>k71_m{!RtHH?xBWxeQV|?z%rJ~xG z`^WAZFtj&b7Rj5TD}HN2G&GqPxyJ<;(OML?{jn5e5_D8lR8&+{R9aP!Ald2S zm(ZOnl7l3$h@`;Un1muCBIuNY2v1G6Zd3XH0MCBtteda*$HtAJpZm^+*C=D)ZKnhk zgP4ak8tt5*)%5QOC5S>3)Jt8XaQGke`9fl!vCsT}gFycP&lmbGx_^Jr%B`Wx1Nz10 z2tu!fg*tD#5Z0l|5sKPBT6#?isMfg=(eL2h1@pD`W3rb!t$sF*DyN^`(AB7amH4mj zgpDPBKlx3?Cy(D@w9%xaZ}43Y{&+Nd7846J&}BqDklRFGaTd)G$)}zF03uqoeV6FJ zBKS#vMHid%vHt+mF}>y{?8jWP5{TEaCdcy?Tn&=F4$(FmXkAgSR&_(1!8wH?DTK5t z5+jjgCvuLK4biGYo{5Q=PKoH!j9eZ(c%Bv z00;pB0RcY{*1Kww<}u~Vo-Q1>7>1c=rb6U;Vo$}l+`FREMqC@OR^gBwVhub&sPH1}%b)OD5+>nFm;r~~1}i0>~ z=N5dNT|LJQBfGez?syhMw+oORv<_Ux=ZP`zyXL!^Nnm@I_aWFA))LQGI%R$(?$YcU zjOP&n`>^uMgK5-!)^{Z(_+!vPjJL2eTT3$=(tzj8#f*D~23c`4?&*vLfr-Y?LnDSj zH@`{#gTmu*0HygVZ19&B>;{#D2>;9o|?FH?aInVTevg z4Yd2eotqA$2zg`D6xwPqap_<_;HxFZOIL0AU`DwQqa8Nr>{ku8pPz{sy}(1j!~U|| zXQ&3pkOwCRBhW2-V~atNh84$1{DXX*e0mTW@LgseTiyxkWl{Xku_WE((q zxt#w1Z~oewgpL-RI|ccFi8#9i`nrqeGljN5h8HJg*m^c+EO19Kfp_Ry3GBFq<2r!v)>#^uuWVJ6f z$pK!YS@gP5SfdZUwgfy#!Ko4h=E6W0h%g38;hOy|SV($Du=fSCxP=RwJ&m~~^n_#f zNkj7h&KU4mBCC*WMINo1V1@z%lTi=K4ZCdWjgiYH5srJ2b>!Ag>fP`$Y1OYypMZujX&+XC!0gIwTwE+V zGJ+P9cA;d!jbGK%Xm=zvkL3RV)Pslc-Twf^k@r8bX|qnItjSYl8{{i8={Db^+@RPW zaTY!#aItHBl%uR=g>~t>jf36DlXa>H+rGWPQuNR(_IaF1j;tV$3J!&?9_EPNkgVEr`_^C5Ar;7HO05;q<3T3Pg+Fa_c{ZTAkxbT;!~#}1IV7svNBy_(1nme zdr;Nie+;tAKNIe~i2EkD#!Y$az?U{gx=Fr7$6Q|Uh0uUm<%@FNy}Xdz-H5`({buuI z?)sA_9xyf%*6uSI)WoBgv|(P_e5SsjBiH1~EQiRCIryRXV0L@pM%gg1Z)~1FcADPH z{gItrVo*%<}8h7vz`3LgLEWKa0&F{865jp7@lUu&y-JQNx62>>@^L+h?ZH7E$ zjg6OX-LlUsI*^SI#ovp$`0c(|Ce7aG&j-L;ZL3zl(H|WAcgPwm_t?JUIIvSs01{xB z2!5dE#6O2Ew%;3VzI(rieEajWAlqK)0>j!)@^6Q!+;5V$asf7eqmXt_JNfU7e?fBN z54XFee8E-IdRdm57Ix3+ZNJdpFWvl1faRDw zvyyEd#;*Ei{a89Ii#NN}Nq0Lj+8-*yAwNlPVYb_4@wWaajkeiu-;L*v{M&4Xi_nsn z@if)?GDmkTaRBvYLd&N`p@hP}SEje37`?~fGuv-$j~uegJZFT@!6dT99=>kw%s-RH zf=e|IsUX71`dP7vyBdB+j(Fb>jh=oP<(oI$Za{t@3@vzSCRv~8Up^U${JtZ>KBwLB zKKwop9+Bhc%O0jYfB(b)CJ+Gt0{{X70{{X80RaI3000315g{=_QDJd`k)g2=!O`&H z@&DQY2mt{A0Y4CqxC?y5wTFZWOVnTubrde*RpKBn2-a{8=Wx0yNzPJK(29J7+v*!k z?%1~Ss4TsKW zb0BpJNdmCWPjCkLgQpOz2FIuxvLMzCZ4+bEyDUW!IWP}o2E@=fBWmb4h_?vo9fX#) z6;D#a`ka&~D7Z{>4xx?sfo6$V)Lau*sMajHXn)2wWf(^U5w${T*Z{9GfIP>r7UW!c zcB2<>$Pk<$QXE4C;w^eWOt&SDm_&`q#)d~&HKt#Nm4MGCpg%A>Qj`t{JZOuNsPMUNuN+piN$C`$spU3#aP9zKu_HQrotRe%9Z~ zX7|9~+HhDq>H;qBh+wSu4KVqDz`qcemx*gh3qj9_k_fisZEwO?#D}Gj7tX?RE*W-l zB0}ZyC@5NoK()aoKy6&E;Jubg(yO`N!x_}uk++ffmDAAX3zFzkNkJ*Si&XDBnEQXB=CF0{{Y!uc3Xhip!9XiF**L#)V_;#t`1e{{T9m2O8@aQ+=@*pzb2OTwKgrKBgO^PN}c|03u%yyZ->N%(+oxLmx-n>YDa< z4e*_$IL`yPbZp|_%X~1Jv&Bd8RazHg4sQGlVJEXDe^KRL@CWupqbvUaz$H`{{{Tfq zue@2@sc3 z+ZoHG9?1Uynp6J(TtfRa6+H1QAg^3xpp1fPUAG4V6;J*pVZOut#dASVwEaz@w_pCz z*z?0tfQ;=WK3e3Ip2X{&xlR#k^1w7LztHZ)U{SdCqy@jVq(7$hJ{LA@Erw@VV%wuJ9X|ZLBgM!4%1JfVXc`o_`nPew0^Dlk69OhqqCnglp zqypbkI)CJ_dD-jfwp0-45K%#`+sq^jv|s#`)(<=<}SP)byyqF2XJC4ha9 zAp;90hm_M$g&}}*bB)0b7aDVSHbGRn!NgiuWlr)1E>`U+x=tb-mnK5Wq2&z>fa@iS zuC5AcbVX@5E?9`C=A#YlG}vVU5D=)&a4H@IJ477{k7KyOc!nM!qy}8ccP#|mG7PWW z6p3jqC`wy|JA>{Ez?BUTm{_I}z-1Az;B$|Gy+(?G=*z45mlPKOIb~PL7iEF*Pbx>Y zqggspZPRQ+79D~{x*C*V%%1iB1eUsp9Q#5AU#taFgKUeID4%evyOlab&T<%|saMYDu*PcSfSS47)yY1pX>wfdAQ(GCg>C_Y(4Dk`CI zF#;B31@98*GKppGD9D!ZKETr zLf}zhQu7$s6i4mc!(aPQW!ykU9dtS1ih`OGvBCPdRA;G`pq!9Y;a!U;RusR&7yk{l3`ZJThomkh>uTM>6Jm-mqv#BetqDhp$-r6U5NW0(5LTyAXOt;Ux{8FVfv>37 zRtQ+x{L5Sxw5d-pE2c^*XCdvW@dPapd>i(%+#T|r#zY1yOO1`WEZH6)GORi38KQ*> zifptK2!X5%6kr`CQMMH6#6J+!7NfTjS6NHV19D53aJ!9V5r}m(3@af4q7I~JLW@i@ zsLH9qo7aeATtRw7z6~Rl;%XrY!{S%5ZOF#U;sLsx(u)wZ7@%E*uv;r20Py&mYOk^q zQy|bqGXeD~MawO!dWdcd;tfh@?mY+8zL;f_ZskfQ)65WTUn=q8<{IYlSKsq1VSb<7 zDgyXX4RY5<+zKPxaE2zZ=@H<87akI&+RhKV`OLP_H-vgP{Bt~i)TwRVMQyB& z$Bw7Uw=V-7vVpqzmzC2T>31Jjv*aNwYoy$*uq0i^)s=BvjXIMuCHKEcX?J z5{;hWV^)i`ZXV2oS-6Z8Qs#xUGo9RRRt z5okvOjgSJ%eQeyHreKzQ%`$cB5ewBNa31EvZ-{#2P-dX}9qwGG!PDo{DFItOyu*aroetkpwkf2Q@}dD1 z^vH5+=DL&&qNo?)IAzFT&^6ylM~3rBSYE$~Zf+oa0Rg6b%ZNzv1WM{F)?5~X03G=O@*2zif-f&^lHOG1{%pvG4QK{a8Q zE|I+zC=|AESW(2maSdE8trDuUXK{9=%pRo*I%CyM9Ls4>nUx32EI|2Sm#>+}6_3fY zvRedrmc=2it>A-p5Y@2*`%@cx8;mQV=3&uOSSRxbVi*n}B@CEghFEOT%_EdruUMob z-~B{2x%>YBAg-q08O!;P{{Rr&1M+M^YOrd&Fa5_YVeDg!;%50N;qE1X;$=2OR5;Lz zwj)chD1W$V@en^qU6N2WGDPYS7n3`cgJS{M%drDSUJ+(mM`pP3IHT%O6|qf`)v%2~ zGQ}?W$UsOqi7UH6fh9oV5at_49Y8d=mDDlNmRTO!;F-8M3}h}48Ix3845Z$p2zLpk zR>oxdM`01$IZI%~vo$o=z%ZP!{A+pZINJ>O%x#l-knR+%`K*Cn)x{rfj zDKshSab?CK&E{K$_Z}w3ippPhP*}L2UOF_~?zuyhXL01aYr^H06z0*(l3j#wrl>wX2Ec_vmh2`uIIth!y`%5(%-L5(JG?d%`! zA_k~${CC$62)m|oXXo(A?7^^qa+;lDVu(yD; zE8Ghr-~xIc?LMVekj5+igYyDXu)i??Ok3Nl^pVo6p*b}Rd!j5{n?~$4UozcB4N?cM zxE5Wdc%+LIyV`=Nu9rJOt{`A@ z4@kt}Jb9H>je`@LyJ8=aE~Nt5adgZ$zlhSslT5+ZnAlyQm}xpwf;L^QRC=9D$0jyA z(3MNq}!mkl-osO*GfsSvUO=+MVpb8Z zW9KfQQgO>&r{1#TC~B;5ey|qAm3Ow4la@J+m>TK=SIdZXi;Ef-)(}2Kba0y&8t8-C zEOxHz#As0L1Gd3|9$B0-ZN#Bhf*LLnj+R)`dh|%D>NE;h15AN8kKu7uS>wcfo|Z6} z^%~bPJqIxu+2%7=ph)v7NgmEb+Syc4dV`ldN2p^5s55~RsdL5%#*swi;lU1wyM2X4UL@Cu5B|9pFp2VhmAPOn~*u#QY#Aao~sI9AeoIx_? zB?=rxn5o1U_+1geiU!YS7W6Zy;1_FP9Qk7uR=I40lrSLC^)3YEmmzx%ak1&%;~KdJ zxqzC}D=BpoZdl+!7nS5GT*U%AUSSU-uwWV)9L1@qByM(;C2Ah-pEin(SO;MkfhI@gVVo zcQitx^O?b!VME7t`w|3j#XW{!%&Wc)s#TsV!Qx*X(Y~0A83CZD1i3HLDe)!U#V6dL zKfwW2+;V!3KzzWmx`g|Q^GGejm`cFjm6s%#dcRs zadFNMm}-NZuwbiRqCnJzhX6a|v@1^I0V)}WHeGy0?{KkDuz|r(4qU(F8=ja7zuk&G z7Qd)_Wow~%U3^MFN0t8o*_n@tdSSDG=>GtyFELb-4*gb0@DokN2LXa6w=h*@)s>yY zm*0uP=Yx_r<;WpUCD6y4hgcb&K#RZB@D1h;1Y-qnVFkDjS%__uPX-*)w11F6xbcz%0ak%=Tsq-jEwSzjRu>SKDsz&X zK=V=Gth4^XdyW49;~t{9q#pvDU;D@{2&o3Q#S-kpZHMKo^8^!5+pGX6AJlM`wQ<3J zC-npZ-%v^oneKeTrChhl*{93IB26BpYTqn+L|C(bP(?$;6|_&dOtr88$HZ5bUs2R3 z87`Z!2PFhe3v8TB`7GDrl*gbal2PeU!VOAWOI;gq@J__%FTg=ax4{ZVih1R-)vR|>D-XouP z0iCdE+6LKrmqA1+2q9Ky$^_9baTV98lhzVhmWL#)YOD&EW!}L}5?}iHxlOzfw)u#Q zCeFK5vHanmZ`)bx~ao(Ic#sqQF}g* zTLzlDULZB}5ko_X73_M7INs%%-)t=OJV8DJA4zdqB6NWgZhqq3+53t)#N;t8BYHv; ziE*4xu>&E{takTesS=_%aaAadmYz#ma&G!Hax_Z{{WL1w0VMp{38w^)QWNt7+RVG zVTIH|!wiEWlm)5)GhmMhHd88*6MLCbT;ZtuoRw%=TI|YrM_9t@~(FxhEd`j@#SVeVu+9@tEQ z;xp#}YMJNGe~6`2^^lRY=Z1hzq4Ml|h-E zN>lPeB9s+qfLXhUd`1BQsgV^IGz~!Mq($ltUf>5tE4@b1;svVJ#*_t(h=lG_0aJ{F z7m0GME@X$aLJ*RK$ohx)3>t+jMQEU|WfVEL7+f!oPy+b$#~6q1W?msCO_Ee*!(b&Q zFdTYdVHPK-uJ_zgdW}~HAT9*8GTgenrB_b7d|OyqIXK>Iq995`u#I zmiE6!_sA)-`oi&jHCBHspp?dG?O>&rzM^RjF-|p&kjPOcFfDOUzKmnQXSE&6iz9-l6d= z;x_6l#76j*JWP0vBHpDZ5ls@28mNTQP+JLqvN6J2%K&x#lHj&Qro<|;fFc3ZaHt3* zTLdjmSxcb7S$ISPhly=tZaZMGn=jO}3iv@(oCHMVKa31}uMy z*i5xQhs>gSgx{!@T52Pe!u33!V=2acLJeTxfd!SGm_AlLUBG04a*^{;EPJ_G8b-*j z4B*2q(d8eJ4=>28NAh^Th#Dv23+>{gbT8y1g?$pS{1D~-3B{i#CW`xsM*i2j$k-vQk^)JR#Oa9n^6ixxC)0e4HYPo$8xGXO*tN- zo5@pRy2LT!d|)iPCo!u9pWp-<$XfF-Of7)A`?ghF9kY?<3d&gPMF=g#$z);GRCcEq zC_9LYhElDS)x!=gT(FHr`JC4Q{tE!>Vhl)&;DJpc#=&>=V~roEKlmbazY_j2z%q3Y z@FAII#Z)i3)F-=uuto68i>*99uqT#9JfD$v{$_Q8D; zwvNjhS$3y1EAtoG9!XqsfohdAR;A6P$&duS@ea6&9w0nQcBQ;aEXFA~fk=qyDK+96 zq==@%!Z=1fXXG-YjI5Q=<^Cn(-Tcbwd`Ci@f4RADiCc6pa*3EiR9u$nuXAy80tMyr zmks6sEAD>AF7qzhvUrpBFJdc(DDcD>5pB^ti7aZUKF!V?eokP0M`a}=a9Ll&IsX8` zDxf|imtpyo2p0V8b41-kHi3m&OiMBggJ`l)AXBK70w$ojVqtYHCH>9{{EA?|H3$o} z3N;i+JYrQ^hp9^9RjSx1(NWYkQ-;|?f-T0XiiZCHglJ*0u znOw_2>axLU1?I9B#Ba0(?uK;gJpN=@@@pLikAa+ZLqvu}eR+&!SBb z!%>X@v{yJzqC{7q*hx<_m#BESD@;{!TZv&VT9ib?%uPH&s#Z%AVHyx2YIrimk`b?P zLTW2~fe~W-pqAKP2#Ex;{1WKTbc%?OiCC*nWMSjBGNQfIBr=YEpbbLiDHG!7OFSSF z^ngZ-@qAy8Y z!hxS|V(Pk)iSN(unM*#!IlO7bHCl-rIS3w4WeH}m)$CYSB9}p=@cWkk= z_-&A=QJ~phriIFPkh!pHKyKsehIw2CGiuWxQMNC>;>qby43?-kjj47^2Z$(A`-oz# zSL`Ep3mO`jmr)9j#AQa4PM1>3C1z}CRJs;iXTsRkQU}C%!T5}VwS%RBIUye7ztEp> zNm7FPm`Xd0HBVD%Vp8x%jy|PgtM>}C(w&yAel8s1J;0en@y+mA=$>G`u^Op}QIzT= zUKg2Sznsupak@nE5aWx0D&<>kt{86^DZg)-#5!^i17T{WSnB-LL7mj*8$3kWrLs_T zF)e_n$eTLUrC-D&TrjBltYdAs>CyUN9SWkVGTQJQMY{h02->HvH{5#YcX)kuIPL-91w@(HmK3C@aC);J4xlwd zC<|9t9OAnG2hka^y6C9Hg856pu*8n%d5K1Y#JDL0DuygGM!#d3Iy8aaq6}3Q;cz=K zZNO?N(-lk?QZ((BKLW@kGKyPbRAn?|@XjQ8*=RaDOBH2V)J!v2=)xQMiyG92k(OiW zzG9=T+bgioa9}t_=$t8y0^gP(i?aD;kdKpQ!0#65gJ_fH$y>@p3uE|12sPh{KqyXO zQaFoPW5j(~QkKn>G+LosHUqO>rBPQUlp1j=_AVJHCzcEVuO>7Gfy|A~yK~f{ph%90O9TNmL#ZcGB^u2t;Pid?VF6sa`pw-dvmll9xS3n>48K$)d*jXQ; z{$k214;lzXe+0Tf(l?Kk5pua8c-eX-STus};k?WYyAF=%T9|x4_|!jHY4TQE<0a8g z3lG?h-_fYFwX^0B1RieXK$h6R{b=$nEGz6+n}VL61DKZP!>Wae)G08i(&{)%)RAaJ zF8oCMpu*ju8OYLGGilsp(qCXN@gBJ-v8w+7h#MGfOVK&SN-KRMa=`N($QQ%hwR1Qn zPuE!qmqZx`-s8h8#(-MxF;x8G4N$YT5C+~Mw}5LzN`ZYwS*~g@0lGyVcP3v^1N7YG zFuE!`Ulq-B9XYOSCp6b`s2KEu!ffzMm$OI2BLXXjQJjnKxkxBoLy|0tuHfJ)t8r8S z=cFql+xQ?ztCA5OI={4l%8mIsLF_TitI{!J7GWCJ{{YkG3nUtvwP#ZhXQBJi3Zp|? z-Iw<&N0#pnZjd{y1!-%(1UsSuPw}VG4ch0h`$B&)q*TBky1yBmcH6Iu+4&h!X&tBN zFXjOubi1B~NUKZ^@B>K{DOB120LYY8y7GV6Ep0w?{X!#3qNi-@SyPP*OGngT3#9p0 z3Z*iFvw)jG)-h{NO)^s%YmxwwPjQK$ZdAZ~l?JX2twJgRK3FwP&RN6=fnv*E$8kDX z_Yh|*?igS73*Vffz5-tJ&zK2OJ;O5ut_xog16RicO@QXyTnbiL4!$AUn7tXqs)ft< zxbEn`C{;qibuR0IW}N86RBBnt7>;3S0uUjun9Y)w+`U4a2?c8_j8qg3-Z5EdQ(TIH zQ;62a(V0t_C_jN=`5BJuX;BClj1mf%ZMy=C+_B>b;NK#Cx4hZzL?ec z{X${uR6mFDJRz0$(fVPK=xd*)zN7UXUe9V-HG=5-=|9l~gIm1;{1M$7q(ABv>b$&u z&T(7?*McL!iaLX?wERc30Hf|xskBbDo@d*SFl~Wh!CLOrv1)3N zv!%gsW7=SfwSbmjf{4Hc(c;Db05J>pH&Qc&MKEet%q$vXLcgekMd8hGZ-QMK5B|c= z7!b3DsMx#jY9OZ(j|+*fWl1unhSZ@!EU9MCXb8P@mg*9m6hr7`5m5G3?(LLlLnY9A zTsNwY1AgBUgDQ=u;5Gv5V|D$POcX^fCg7uZ)V0Cxe8q0U?dXlOHmfG!QuzL00Gdc+ zn40~~w`go%!ma!XENyDu9Ftkuv_`hhVxqOpac zT*9Xv>#M)iG}M`cIJ>_SYhfq>3)Jkf&Z6zRRnt&3a=>&kW~X^HDDs-Y?B8dYDip{W z;t7Q&(UU9+j`$I4xN5)DPF3w+-N0tVS|aP>G0>j@>iohic9h68ZFr~>bG5O22>$>O zs_D9~GKAo}s67mF-eAzpZ9j>mk4|67tXGNELoUmh)k1QXJD;%vW8_A!4YY65=2=k+ z&=&IXP-xaG;ukSdL!VI8+3jibHqhu&+pkdw!4$^tIqq7e0R7J-MN^E3uedeWb7vtE zwG(37_?JaiS}WW}sUijPZ!rqiZC5AchP1oVm&ka49E^7QAk#)+UxxJeE`|tIcYiZU z-ZVm<{X%GwSh%;>{D~0qQ{sn@a<%bf3?r#>n7+|-2c$i0VL5%O&S0UE=72>~na#)O zd(I=0s<1Wrm{=VQj=&~fAK0vEPYl?e@&_11DFELuM~DDMQE)>}8sPf#^B3VH{U+YN zWq9yxcvJ$AvyZyDlDF`}*1$lEOI5@t!j)KF@A;L}hx03fA2W`RSIhy90BQAZ!}oKw zAHII2$`?b=%oRaOgRiNuKv)^#@1Llro?0red`Eg}rPN@$#)8`JqOJh8h{aHBXO+nQeP2ma2^NLc!8qAsmNI% zl$yN)<{#~uHln8A6manqt2Q7rr4voIIb6nsIYI6N2=?I@F7;~@`v79O zzsyn=alqUf<=7B8Cpj$PV#S@2saGB6)Tcr*bbs@Rq*Cxz0qex18=E^1ldyyF!#hcvJA99&zS5*f`gOK_OO^nsxJ_4;sCfO9QU{sg=D zQL1aUxqZfH&D)^{aMGA;eGx>0-8bF!@IeBYV8MS9^in*t?1y)wY2S%`YnB28{{T^d zDi3)4U*t+0D+8|_{bZvA4((sO$l_6jl*8@JQ3Aa;G3y&ZuMg%k`O>q)ZwL}yUrd6ALCJxlr&oIX!GsG)A8ruoj1j}6GseXfsGvYNc(t!&7X_9s!=Eev z03)J3qk^0ienndtf|uW08o==s-vIXg%QHaQqg+w+PG!87a6PZi$3V&*gIyoRM$M9H z$IxmEfW3qd3M}=J)S{_NR|K_UUX$}YL2il}wW_Q}btM=^1*S6@EjDvo6ey2kH>*D7 zJ!SBvgTiTF%rw-_lsK>A5w;h3iY8qgz|qTZxxHX-Z)iFpgIzj+L35hcP=&2oT+7AP zYN)>u@s)l=BWPO0Zcx?cE$<@oFj55-tf<2EsMuT0ONG{qPgwO)y;iF{jt~9RWD}6u zd`CeQcGp+uxZ~GX0v-q!*CjSPUGJKW-G-3483SI;up zud~auwkOT4g|8nG9f+ki)o^7?9g!8iJiz4+1|I>t7hF|^8?5s6A9yyY3)_M1QpP8A zDKaqKb!Y?mWqmE)+m(9~6jD}%>E|J-Lro8-HT8c{I>S`~Io%!93Z|iKFOu8m%;HRu zhIEGi01z&|xAN_l7CX?pU8}JdxoD&4-evO5Sp|L!e!-?bcRlBa0u`m&;!^RK8-b0c z^(8D!CCS_J3uE#i#CZ}2$5G5^-xdWTAb5&7grv0Sa?1>6|xiu(V zL#vJKYc^20+2D15GPXL3yXSJCTX)c<{K5JaUB~w*fFIM{#eD#Ik*Ny?@IK0&iIjk^ z&!`-W@}-V->NM636u#W%6FAXU{^I~`fKN7;EZA#G_fcT^ zhJ%EaUfj(4U@ECql&%fr?2VW0x)0_~133{Vm9$rn=3I{|0>|Aj_8yUFK9$mP@a}Q! zlD`?!;fcSSUVM`8rAcbdx1qR>gS$;*^Ni3?6m9xEL@SmG(e%qsbf8zY)Oi(SaeTCO z3mPtq1Luj$b-)DPwSjVdK($>asAiFrT@7mF$Yp)a!%x~+xQ z`oy2WZczrSv+Dlcc`luMyT*K(X!;(Y&ZX z4Y?hxrC{znAxIW$_lWo`9((KNShP~T$)&?Z4m^BDC@NuT3gRg668L94^8rvjf=I74RJWxB@^+bhlS&RmV^hPI-@|^A{X;jc2Xv>LAl<+pnI#iQHpRXXg^XDkcQl zXZ%WV-6>8T>=;Vg4^h|sfCy~ySEHTzsYEKu0LSNWm~jHH;CCu3jW09TJ}z%xjJged z-^2or0N5*=*wGudm6gg-Wcj zqMqB-xCI3*FE%Q?akFWy5>oNd5fz|Ty$+sZxY>P5akPFQ!3vW5e*L-BQh=ozU6@xN zlH~5FZ`*KNal!G$9K(bG-L_Ts5xps)dGiB7;|jn1MQ$BPK8I1Fm5Exe(+@y2lDA#; z@hC>C(!w2GaSH?{6bxegL7f3C56=$xmk7P+in}vfnRYZOcjIQ5Q0V4Vsz^O&oyka> zMZbIve=_$(imCJ$uW{nse+7XjLy(f-${4J5{yM6fDUPBX9l+L-}~IM!B!{e`G`86{{Yp^Qm}(U z3=W^Mq9TDp7ry06h%V#Dey1zYt1CQQ*$uWe_=ZUXDL^Qpv9`Y@fcTqzGqdm|d@!pQ zSN@O!%o+=Y)_AEwY&6C2geu#@AfND+lR}!+PQ5{R4=@ArcPah$HvQ~P8qJO$Rdbkp zbbYF>9-|Pg@|>}_?!bN5s1_hAQ%5%v5gBw=itu}sI%!JUDgOYS#zu%tX!VY;7>7-- z_mH(5JcRgo{^AZRSlj6H5G@9z%JFXy&4pV;KH@cou!Rq^zfpmR1Ht3HJX~o>Enj1o z;se~$*OT%%k$|M}^2ZD=w=J)jcEJec`c4YA0;0Y}^*49Grw^{ZK!H{c)CSu{lnUaR z9uWwMx6Vf>hl}q$$It~<2LAwJnPmuIpBzAuALv}RAhDpi&)pkOPzq{rHuE*`I4G}N z{IG$c0opcXX}&g4L>+72Fb2ABVc-;Bncg8Fv{4@G-n}F6f)`!ilB;?4>FXjRV&waLTciwh9Pa&HT^+AAgcaSaYKV7MCr3zks`&w z^|3ap(%aaZMl`nS=9f|~WSmf*VDScqcNnUR5TD5tcEzPU{6~C&-um|v0)Y?74fIx? za&Fu_D=L{z)D?~h4ks^=agybl!31?+1ZwMR<_>YcfBOgx$5;hOMq2^I0+Z_H0KHzW z7?l-GpKc$O?h3TV9S)doRnnQcagea5RLC|uXXUH4JrwJa{bCB<#_ G&;QvSxv9zk literal 0 HcmV?d00001 diff --git a/tests/test_utils.py b/tests/test_utils.py index 68aad6da34..211e783c3f 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -145,10 +145,14 @@ def tokenize_messages( return tokenized_messages, mask def __call__(self, sample: Mapping[str, Any]) -> Mapping[str, Any]: - messages = sample.pop("messages") + messages: List[Message] = sample.pop("messages") + images = [] + for message in messages: + images += message.get_media() tokens, mask = self.tokenize_messages(messages) sample["tokens"] = tokens sample["mask"] = mask + sample["images"] = images return sample @property diff --git a/tests/torchtune/data/test_data_utils.py b/tests/torchtune/data/test_data_utils.py index 3b6d60adf4..4ea29cbc19 100644 --- a/tests/torchtune/data/test_data_utils.py +++ b/tests/torchtune/data/test_data_utils.py @@ -4,15 +4,20 @@ # This source code is licensed under the BSD-style license found in the # LICENSE file in the root directory of this source tree. +import os + import pytest +from PIL import Image + +from tests.common import ASSETS from torchtune.data import ( + format_content_with_images, Message, PromptTemplate, - split_text_by_image_tag, truncate, validate_messages, ) -from torchtune.data._utils import _get_prompt_template +from torchtune.data._utils import _get_prompt_template, load_image from torchtune.models.llama2 import Llama2ChatTemplate @@ -98,47 +103,136 @@ def test_validate_messages(): validate_messages(messages) -def test_split_text_by_image_tag(): +def test_format_content_with_images(): + test_image_1 = Image.new(mode="RGB", size=(4, 4)) + test_image_2 = Image.new(mode="RGB", size=(4, 4)) + test_image_3 = Image.new(mode="RGB", size=(4, 4)) + # Test single image tag in the middle text = "hello world" - assert split_text_by_image_tag(text, "") == [ + assert format_content_with_images( + text, + image_tag="", + images=[test_image_1], + ) == [ {"type": "text", "content": "hello "}, - {"type": "image"}, + {"type": "image", "content": test_image_1}, {"type": "text", "content": "world"}, ] # Test multiple image tags and image tag in beginning text = "[image]hello [image]world" - assert split_text_by_image_tag(text, "[image]") == [ - {"type": "image"}, + assert format_content_with_images( + text, + image_tag="[image]", + images=[test_image_1, test_image_2], + ) == [ + {"type": "image", "content": test_image_1}, {"type": "text", "content": "hello "}, - {"type": "image"}, + {"type": "image", "content": test_image_2}, {"type": "text", "content": "world"}, ] # Test an image tag that is not present in the text text = "hello world" - assert split_text_by_image_tag(text, "asdfghjkl;") == [ + assert format_content_with_images(text, image_tag="asdfghjkl;", images=[]) == [ {"type": "text", "content": "hello world"} ] # Test consecutive image tags text = "hello world" - assert split_text_by_image_tag(text, "") == [ - {"type": "image"}, - {"type": "image"}, + assert format_content_with_images( + text, + image_tag="", + images=[test_image_1, test_image_2, test_image_3], + ) == [ + {"type": "image", "content": test_image_1}, + {"type": "image", "content": test_image_2}, {"type": "text", "content": "hello "}, - {"type": "image"}, + {"type": "image", "content": test_image_3}, {"type": "text", "content": "world"}, ] # Test image tag at the end text = "hello " - assert split_text_by_image_tag(text, "") == [ + assert format_content_with_images( + text, + image_tag="", + images=[test_image_1], + ) == [ {"type": "text", "content": "hello "}, - {"type": "image"}, + {"type": "image", "content": test_image_1}, ] + # Test errors when the number of images does not match the number of image tags + text = "hello world" + with pytest.raises( + ValueError, + match="does not match number of image tags", + ): + format_content_with_images( + text, image_tag="", images=[test_image_1, test_image_2] + ) + + +def test_load_image(monkeypatch, tmp_path): + tmp_image = str(ASSETS / "dog_on_skateboard.jpg") + + # Test loading from local file + image = load_image(tmp_image) + assert isinstance(image, Image.Image) + assert image.size == (580, 403) + + # Test loading from remote file + # Mock the urlopen function to return a BytesIO object + def mock_urlopen(url): + return open(tmp_image, "rb") + + monkeypatch.setattr("urllib.request.urlopen", mock_urlopen) + image = load_image("http://example.com/test_image.jpg") + assert isinstance(image, Image.Image) + assert image.size == (580, 403) + + # Test that a ValueError is raised when the image path is invalid + with pytest.raises(ValueError, match="Failed to open image as PIL.Image"): + load_image("invalid_path") + + # Test a temporary file with invalid image data + image_path = tmp_path / "test_image.jpg" + with open(image_path, "w") as f: + f.write("Invalid image data") + + # Test that a ValueError is raised when the image data is invalid + with pytest.raises(ValueError, match="Failed to open image as PIL.Image"): + load_image(str(image_path)) + + # Test that a ValueError is raised when there is an HTTP error + # Mock the urlopen function to raise an exception + def mock_urlopen(url): + raise Exception("Failed to load image") + + monkeypatch.setattr("urllib.request.urlopen", mock_urlopen) + with pytest.raises(ValueError, match="Failed to load image"): + load_image("http://example.com/test_image.jpg") + + # Test that a ValueError is raised when there is an IO error + # Create a temporary file that cannot be read + image_path = tmp_path / "test_image.jpg" + with open(image_path, "w") as f: + f.write("Test data") + os.chmod(image_path, 0o000) # Remove read permissions + with pytest.raises(ValueError, match="Failed to open image as PIL.Image"): + load_image(str(image_path)) + os.chmod(image_path, 0o644) # Restore read permissions + + # Test that a ValueError is raised with invalid image data is read + # Create a temporary file with invalid image data + image_path = tmp_path / "test_image.jpg" + with open(image_path, "wb") as f: + f.write(b"Invalid image data") + with pytest.raises(ValueError, match="Failed to open image as PIL.Image"): + load_image(str(image_path)) + def test_get_prompt_template(): template = _get_prompt_template("torchtune.models.llama2.Llama2ChatTemplate") diff --git a/tests/torchtune/data/test_messages.py b/tests/torchtune/data/test_messages.py index 0298ef0e68..748fbc6a22 100644 --- a/tests/torchtune/data/test_messages.py +++ b/tests/torchtune/data/test_messages.py @@ -5,6 +5,8 @@ # LICENSE file in the root directory of this source tree. import pytest + +from PIL import Image from tests.test_utils import ( assert_dialogue_equal, CHAT_SAMPLE, @@ -26,17 +28,21 @@ def text_message(self): return Message(role="user", content="hello world") @pytest.fixture - def image_message(self): + def test_image(self): + return Image.new(mode="RGB", size=(4, 4)) + + @pytest.fixture + def image_message(self, test_image): return Message( role="user", content=[ {"type": "text", "content": "hello"}, - {"type": "image"}, + {"type": "image", "content": test_image}, {"type": "text", "content": " world"}, ], ) - def test_message_validation(self, text_message): + def test_message_validation(self, text_message, test_image): message = text_message assert message.role == "user" assert message.content == [{"type": "text", "content": "hello world"}] @@ -53,7 +59,7 @@ def test_message_validation(self, text_message): ): message = Message( role="user", - content=[{"type": "image"}], + content=[{"type": "image", "content": test_image}], ipython=True, ) @@ -69,6 +75,10 @@ def test_contains_media(self, text_message, image_message): assert not text_message.contains_media assert image_message.contains_media + def test_get_media(self, text_message, image_message, test_image): + assert text_message.get_media() == [] + assert image_message.get_media() == [test_image] + def test_text_content(self, text_message, image_message): assert text_message.text_content == "hello world" assert image_message.text_content == "hello world" diff --git a/tests/torchtune/datasets/multimodal/test_llava_instruct_dataset.py b/tests/torchtune/datasets/multimodal/test_llava_instruct_dataset.py index fee9791984..e3f61795d5 100644 --- a/tests/torchtune/datasets/multimodal/test_llava_instruct_dataset.py +++ b/tests/torchtune/datasets/multimodal/test_llava_instruct_dataset.py @@ -7,6 +7,8 @@ from collections import Counter from unittest.mock import patch +import PIL + import pytest from datasets import Dataset @@ -21,11 +23,22 @@ class TestLLaVAInstructDataset: def tokenizer(self): return DummyTokenizer() + @pytest.fixture + def test_image_pil(self): + return PIL.Image.new(mode="RGB", size=(4, 4)) + @patch("torchtune.datasets._sft.load_dataset") - def test_label_no_masking(self, load_dataset, tokenizer): + @patch("torchtune.datasets.multimodal._llava_instruct.load_image") + def test_label_no_masking( + self, load_image, load_dataset, tokenizer, test_image_pil + ): """ Test whether the input and the labels are correctly created when the input is not masked. + + WARNING: careful with these mocks, they are applied in bottom up order """ + # mock the call to load_image + load_image.return_value = test_image_pil # mock the call to HF datasets load_dataset.return_value = Dataset.from_list( @@ -55,6 +68,7 @@ def test_label_no_masking(self, load_dataset, tokenizer): model_transform=tokenizer, train_on_input=True, ) + input, labels, images = ds[0]["tokens"], ds[0]["labels"], ds[0]["images"] expected_count = { @@ -76,13 +90,18 @@ def test_label_no_masking(self, load_dataset, tokenizer): assert Counter(input) == expected_count assert Counter(labels) == expected_count - assert images == "test_image.jpg" + assert images == [test_image_pil] @patch("torchtune.datasets._sft.load_dataset") - def test_label_masking(self, load_dataset, tokenizer): + @patch("torchtune.datasets.multimodal._llava_instruct.load_image") + def test_label_masking(self, load_image, load_dataset, tokenizer, test_image_pil): """ Test whether the input and the labels are correctly created when the input is masked. + + WARNING: careful with these mocks, they are applied in bottom up order """ + # mock the call to load_image + load_image.return_value = test_image_pil # mock the call to HF datasets load_dataset.return_value = Dataset.from_list( @@ -133,4 +152,4 @@ def test_label_masking(self, load_dataset, tokenizer): assert Counter(input) == expected_count assert labels.count(CROSS_ENTROPY_IGNORE_IDX) == 11 - assert images == "test_image.jpg" + assert images == [test_image_pil] diff --git a/tests/torchtune/datasets/multimodal/test_the_cauldron_dataset.py b/tests/torchtune/datasets/multimodal/test_the_cauldron_dataset.py index 460cc439b6..4f62dc19ee 100644 --- a/tests/torchtune/datasets/multimodal/test_the_cauldron_dataset.py +++ b/tests/torchtune/datasets/multimodal/test_the_cauldron_dataset.py @@ -6,14 +6,13 @@ from unittest.mock import patch -import pytest -import torch +import PIL +import pytest from tests.test_utils import DummyTokenizer from torchtune.data._common import CROSS_ENTROPY_IGNORE_IDX from torchtune.datasets import the_cauldron_dataset -from torchvision.transforms import functional as F class TestTheCauldronDataset: @@ -21,17 +20,19 @@ class TestTheCauldronDataset: def tokenizer(self): return DummyTokenizer() + @pytest.fixture + def test_image_pil(self): + return PIL.Image.new(mode="RGB", size=(4, 4)) + @patch("torchtune.datasets._sft.load_dataset") - def test_label_no_masking(self, load_dataset, tokenizer): + def test_label_no_masking(self, load_dataset, tokenizer, test_image_pil): """ Test whether the input and the labels are correctly created when the input is not masked. """ - - image_tensor = torch.randint(0, 256, (3, 4, 4), dtype=torch.uint8) # mock the call to HF datasets load_dataset.return_value = [ { - "images": [F.to_pil_image(image_tensor)], + "images": test_image_pil, "texts": [ { "user": "Question: What do respiration and combustion give out" @@ -47,11 +48,7 @@ def test_label_no_masking(self, load_dataset, tokenizer): ds = the_cauldron_dataset( model_transform=tokenizer, subset="dummy", train_on_input=True ) - input, labels, images = ( - ds[0]["tokens"], - ds[0]["labels"], - ds[0]["images"][0], - ) + input, labels, images = (ds[0]["tokens"], ds[0]["labels"], ds[0]["images"]) assert input == [ 0, @@ -83,19 +80,17 @@ def test_label_no_masking(self, load_dataset, tokenizer): -1, ] assert labels == input - torch.testing.assert_close(F.pil_to_tensor(images), image_tensor) + assert images == [test_image_pil] @patch("torchtune.datasets._sft.load_dataset") - def test_label_masking(self, load_dataset, tokenizer): + def test_label_masking(self, load_dataset, tokenizer, test_image_pil): """ Test whether the input and the labels are correctly created when the input is masked. """ - - image_tensor = torch.randint(0, 256, (3, 4, 4), dtype=torch.uint8) # mock the call to HF datasets load_dataset.return_value = [ { - "images": [F.to_pil_image(image_tensor)], + "images": test_image_pil, "texts": [ { "user": "Question: What do respiration and combustion give out" @@ -111,11 +106,7 @@ def test_label_masking(self, load_dataset, tokenizer): ds = the_cauldron_dataset( model_transform=tokenizer, subset="dummy", train_on_input=False ) - input, labels, images = ( - ds[0]["tokens"], - ds[0]["labels"], - ds[0]["images"][0], - ) + input, labels, images = (ds[0]["tokens"], ds[0]["labels"], ds[0]["images"]) assert input == [ 0, @@ -147,4 +138,4 @@ def test_label_masking(self, load_dataset, tokenizer): -1, ] assert labels.count(CROSS_ENTROPY_IGNORE_IDX) == 24 - torch.testing.assert_close(F.pil_to_tensor(images), image_tensor) + assert images == [test_image_pil] diff --git a/tests/torchtune/datasets/test_sft_dataset.py b/tests/torchtune/datasets/test_sft_dataset.py index 8acc051178..d08530aa97 100644 --- a/tests/torchtune/datasets/test_sft_dataset.py +++ b/tests/torchtune/datasets/test_sft_dataset.py @@ -26,6 +26,7 @@ class DummyTokenizerInvalidModelTransform(DummyTokenizer): def __call__(self, sample: Mapping[str, Any]) -> Mapping[str, Any]: sample = super().__call__(sample) del sample["tokens"] + del sample["images"] return sample diff --git a/tests/torchtune/models/clip/test_clip_image_transform.py b/tests/torchtune/models/clip/test_clip_image_transform.py index fdb9f75d77..5192e19dd3 100644 --- a/tests/torchtune/models/clip/test_clip_image_transform.py +++ b/tests/torchtune/models/clip/test_clip_image_transform.py @@ -4,11 +4,13 @@ # This source code is licensed under the BSD-style license found in the # LICENSE file in the root directory of this source tree. + import numpy as np import PIL import pytest import torch + from tests.test_utils import assert_expected from torchtune.models.clip._transform import CLIPImageTransform diff --git a/torchtune/data/__init__.py b/torchtune/data/__init__.py index c39241f494..35956c57bc 100644 --- a/torchtune/data/__init__.py +++ b/torchtune/data/__init__.py @@ -31,7 +31,7 @@ QuestionAnswerTemplate, SummarizeTemplate, ) -from torchtune.data._utils import split_text_by_image_tag, truncate +from torchtune.data._utils import format_content_with_images, load_image, truncate __all__ = [ "ChatFormat", @@ -45,7 +45,7 @@ "Message", "validate_messages", "Role", - "split_text_by_image_tag", + "format_content_with_images", "PromptTemplateInterface", "PromptTemplate", "InputOutputToMessages", @@ -58,4 +58,5 @@ "padded_collate_dpo", "left_pad_sequence", "padded_collate", + "load_image", ] diff --git a/torchtune/data/_messages.py b/torchtune/data/_messages.py index 0d13f9b627..6794243537 100644 --- a/torchtune/data/_messages.py +++ b/torchtune/data/_messages.py @@ -27,15 +27,13 @@ class Message: role (Role): role of the message writer. Can be "system" for system prompts, "user" for human prompts, "assistant" for model responses, or "ipython" for tool call returns. - content (Union[str, List[Dict[str, str]]]): content of the message. If it is text only content, + content (Union[str, List[Dict[str, Any]]]): content of the message. If it is text only content, you can pass in a string. If it is multimodal content, pass in a list of dictionaries formatted as follows:: [ - {"type": "image"} - {"type": "text", "content": "hello"}, - {"type": "image"} - {"type": "text", "content": "world"}, + {"type": "image", "content": }, + {"type": "text", "content": "What is in this image?"}, ] masked (bool): whether the message is masked in the sample. If True, do not use @@ -48,28 +46,40 @@ class Message: - For multiple consecutive assistant messages (i.e., tool calls by assistant), only the last assistant message will have ``eot=True`` - All ipython messages (tool call returns) should set ``eot=False``. + + Note: + Message class expects any image content to be in + `PIL Image format `_. """ def __init__( self, role: Role, - content: Union[str, List[Dict[str, str]]], + content: Union[str, List[Dict[str, Any]]], masked: bool = False, ipython: bool = False, eot: bool = True, ): self.role = role - self.content = ( - [{"type": "text", "content": content}] - if isinstance(content, str) - else content - ) + self.content = self._convert_to_list_of_dict(content) self.masked = masked self.ipython = ipython self.eot = eot self._validate_message() + def _convert_to_list_of_dict(self, content) -> List[Dict[str, Any]]: + """User is currently allowed to pass in a string for text-only content. + This ensures that the content is formatted as a list of dictionaries.""" + if isinstance(content, str): + return [{"type": "text", "content": content}] + + assert isinstance( + content, list + ), f"content must be of type List[Dict[str, Any]], got {content}" + + return content + @classmethod def from_dict(cls, d: dict) -> "Message": """ @@ -89,12 +99,20 @@ def from_dict(cls, d: dict) -> "Message": eot=d.get("eot", True), ) + def get_media(self) -> List["PIL.Image.Image"]: + """ + Returns media content of the message. + """ + return [ + content["content"] for content in self.content if content["type"] == "image" + ] + @property def contains_media(self) -> bool: """ - Returns True if message contains non-text content. + Returns whether the message contains media. """ - return any(content["type"] != "text" for content in self.content) + return any(content["type"] == "image" for content in self.content) @property def text_content(self) -> str: diff --git a/torchtune/data/_utils.py b/torchtune/data/_utils.py index e153a6064e..814c21a1cd 100644 --- a/torchtune/data/_utils.py +++ b/torchtune/data/_utils.py @@ -4,7 +4,9 @@ # This source code is licensed under the BSD-style license found in the # LICENSE file in the root directory of this source tree. -from typing import Any, Dict, List, Optional, TypeVar +from pathlib import Path +from typing import Any, Dict, List, Optional, TypeVar, Union +from urllib import request from torchtune.config._utils import _get_component_from_path @@ -42,10 +44,58 @@ def truncate( return tokens_truncated -def split_text_by_image_tag(content: str, image_tag: str) -> List[Dict[str, str]]: +def load_image(image_loc: Union[Path, str]) -> "PIL.Image.Image": + """ + Convenience method to load an image in PIL format from a local file path or remote source. + + Args: + image_loc (Union[Path, str]): Local file path or remote source pointing to the image + which will be loaded in PIL format. + + Note: + If loading an image from a remote source, the function expects the URL provided in ``image_loc`` + to start with "http" or "https" e.g. "https://www.wikipedia.org/en/bird.jpg". + + Raises: + ValueError: If the image cannot be loaded from remote source. + ValueError: If the image cannot be opened as a PIL.Image. + + Examples: + >>> # Load from remote source + >>> image = load_image("https://www.wikipedia.org/en/bird.jpg") + + >>> # Load from local file path + >>> image = load_image(Path("/home/user/bird.jpg")) + + Returns: + PIL.Image.Image: The loaded image. + """ + # Hackily import PIL to avoid burdensome import in the main module + # TODO: Fix this + from PIL import Image + + # If pointing to remote source, try to load to local + if isinstance(image_loc, str) and image_loc.startswith("http"): + try: + image_loc = request.urlopen(image_loc) + except Exception as e: + raise ValueError(f"Failed to load image from {image_loc}") from e + + # Open the local image as a PIL image + try: + image = Image.open(image_loc) + except Exception as e: + raise ValueError(f"Failed to open image as PIL Image from {image_loc}") from e + + return image + + +def format_content_with_images( + content: str, *, image_tag: str, images: List["PIL.Image.Image"] +) -> List[Dict[str, Any]]: """ Given a raw text string, split by the specified ``image_tag`` - and form into list of dictionaries to be used in the ``Message`` content + and form into list of dictionaries to be used in the :class:`~torchtune.data.Message` content field:: [ @@ -53,7 +103,7 @@ def split_text_by_image_tag(content: str, image_tag: str) -> List[Dict[str, str] "role": "system" | "user" | "assistant", "content": [ - {"type": "image"}, + {"type": "image", "content": }, {"type": "text", "content": "This is a sample image."}, ], }, @@ -63,22 +113,41 @@ def split_text_by_image_tag(content: str, image_tag: str) -> List[Dict[str, str] Args: content (str): raw message text image_tag (str): string to split the text by + images (List["PIL.Image.Image"]): list of images to be used in the content - Returns: - List[Dict[str, str]]: list of dictionaries to be used in the ``Message`` content field - - Example: - >>> content = split_text_by_image_tag("hello world", "") + Raises: + ValueError: If the number of images does not match the number of image tags in the content + + Examples: + >>> content = format_content_with_images( + ... "<|image|>hello <|image|>world", + ... image_tag="<|image|>", + ... images=[, ] + ... ) >>> print(content) - [{"type": "image"}, {"type": "text", "content": "hello "}, {"type": "image"}, {"type": "text", "content": "world"}] + [ + {"type": "image", "content": }, + {"type": "text", "content": "hello "}, + {"type": "image", "content": }, + {"type": "text", "content": "world"} + ] + + Returns: + List[Dict[str, Any]]: list of dictionaries to be used in the :class:`~torchtune.data.Message` content field """ + num_image_tags_in_content = content.count(image_tag) + if len(images) != num_image_tags_in_content: + raise ValueError( + f"Number of images ({len(images)}) does not match number of image tags ({num_image_tags_in_content})" + ) + split_content = content.split(image_tag) final_content_list = [] for i, substr in enumerate(split_content): if len(substr) > 0: final_content_list.append({"type": "text", "content": substr}) if i < len(split_content) - 1: - final_content_list.append({"type": "image"}) + final_content_list.append({"type": "image", "content": images.pop(0)}) return final_content_list diff --git a/torchtune/datasets/multimodal/_llava_instruct.py b/torchtune/datasets/multimodal/_llava_instruct.py index 52c07b1fa9..3307bd0050 100644 --- a/torchtune/datasets/multimodal/_llava_instruct.py +++ b/torchtune/datasets/multimodal/_llava_instruct.py @@ -4,9 +4,10 @@ # This source code is licensed under the BSD-style license found in the # LICENSE file in the root directory of this source tree. +from pathlib import Path from typing import Any, Dict, Mapping, Optional, Union -from torchtune.data import Message, split_text_by_image_tag +from torchtune.data import format_content_with_images, load_image, Message from torchtune.datasets._packed import PackedDataset from torchtune.datasets._sft import SFTDataset from torchtune.modules.transforms import Transform @@ -20,6 +21,7 @@ class LlavaInstructToMessages(Transform): Chat samples in the "conversations" column follow the ShareGPT format:: { + "image": "image0001.png", "conversations": [ { "from": "system" | "human" | "gpt", @@ -38,7 +40,7 @@ class LlavaInstructToMessages(Transform): "role": "system" | "user" | "assistant", "content": [ - {"type": "image"}, + {"type": "image", "content": }, {"type": "text", "content": "This is a sample image."}, ], }, @@ -54,6 +56,7 @@ class LlavaInstructToMessages(Transform): new_system_prompt (Optional[str]): if specified, prepend a system message. This can serve as instructions to guide the model response. Setting this will OVERRIDE any system messages already present in the dataset. Default is None. + images_dir (Optional[Path]): path to the directory containing the images. User is expected to download the COCO dataset. Raises: ValueError: If ``column_map`` is provided and ``conversations`` not in ``column_map``. @@ -64,6 +67,7 @@ def __init__( train_on_input: bool = False, column_map: Optional[Dict[str, str]] = None, new_system_prompt: Optional[str] = None, + images_dir: Optional[Path] = None, ): self.train_on_input = train_on_input self.new_system_prompt = new_system_prompt @@ -79,6 +83,7 @@ def __init__( self._column_map = column_map else: self._column_map = {"conversations": "conversations", "image": "image"} + self.images_dir = images_dir def __call__(self, sample: Mapping[str, Any]) -> Mapping[str, Any]: role_map = {"system": "system", "human": "user", "gpt": "assistant"} @@ -89,15 +94,27 @@ def __call__(self, sample: Mapping[str, Any]) -> Mapping[str, Any]: role="system", content=self.new_system_prompt, masked=True, eot=True ) ) + + # Add in image stuffs / load from file for message in sample[self._column_map["conversations"]]: role = role_map[message["from"]] + content = message["value"] if role == "system" and self.new_system_prompt is not None: continue - content = split_text_by_image_tag(message["value"], "") + if role == "user": + image_path = sample[self._column_map["image"]] + if self.images_dir is not None: + image_path = self.images_dir / image_path + pil_image = load_image(image_path) + content = format_content_with_images( + content, + image_tag="", + images=[pil_image], + ) masked = (role != "assistant") and (not self.train_on_input) messages.append(Message(role=role, content=content, masked=masked)) - return {"messages": messages, "images": sample[self._column_map["image"]]} + return {"messages": messages} # TODO: point to Flamingo model transform as an example @@ -105,6 +122,7 @@ def llava_instruct_dataset( model_transform: Transform, *, source: str = "liuhaotian/LLaVA-Instruct-150K", + images_dir: str = "coco/", column_map: Optional[Dict[str, str]] = None, new_system_prompt: Optional[str] = None, train_on_input: bool = True, @@ -173,6 +191,8 @@ def __call__(self, sample: Mapping[str, Any]) -> Mapping[str, Any]: define source as the data file type (e.g. "json", "csv", "text") and pass in the filepath in ``data_files``. See `Hugging Face's `_ + images_dir (str): path to the directory containing the images as you are expected to download the COCO dataset + before using. Default is "coco/". column_map (Optional[Dict[str, str]]): a mapping from the expected columns ("conversations") to the new column names in the dataset. If None, assume these are identical. Default is None. @@ -207,6 +227,7 @@ def __call__(self, sample: Mapping[str, Any]) -> Mapping[str, Any]: train_on_input=train_on_input, column_map=column_map, new_system_prompt=new_system_prompt, + images_dir=Path(images_dir), ) ds = SFTDataset( diff --git a/torchtune/datasets/multimodal/_the_cauldron.py b/torchtune/datasets/multimodal/_the_cauldron.py index d6597bb8ef..a4621fd1a3 100644 --- a/torchtune/datasets/multimodal/_the_cauldron.py +++ b/torchtune/datasets/multimodal/_the_cauldron.py @@ -43,7 +43,7 @@ class TheCauldronToMessages(Transform): "role": "system" | "user" | "assistant", "content": [ - {"type": "image"}, + {"type": "image", "content": }, {"type": "text", "content": "This is a sample image."}, ], }, @@ -91,7 +91,10 @@ def __call__(self, sample: Mapping[str, Any]) -> Mapping[str, Any]: Message( role="user", content=[ - {"type": "image"}, + { + "type": "image", + "content": sample[self._column_map["images"]], + }, {"type": "text", "content": message["user"]}, ], masked=not self.train_on_input, @@ -111,7 +114,7 @@ def __call__(self, sample: Mapping[str, Any]) -> Mapping[str, Any]: ) ] + messages - return {"messages": messages, "images": sample[self._column_map["images"]]} + return {"messages": messages} # TODO: point to Flamingo model transform as an example diff --git a/torchtune/models/flamingo/_transform.py b/torchtune/models/flamingo/_transform.py index 80a67f357d..94e5b21539 100644 --- a/torchtune/models/flamingo/_transform.py +++ b/torchtune/models/flamingo/_transform.py @@ -191,25 +191,28 @@ def __call__( self, sample: Mapping[str, Any], inference: bool = False ) -> Mapping[str, Any]: """ - Apply image decoding and transformations to the "images" field in the sample - and tokenization to the "messages" field in the sample. Also returns the - encoder mask. + Apply image decoding, transformations and tokenization to messages in the sample. Args: - sample (Mapping[str, Any]): A sample with a "tokens", "mask", - "encoder_input" and "encoder_mask" field to feed directly into the model. - inference (bool): Whether the template is being used for inference or not. + sample (Mapping[str, Any]): A sample with a "messages" field. + inference (bool): Whether to run in inference mode. Default is True. Returns: - Mapping[str, Any]: The sample with an updated "image" filed and added - "aspect_ratio" field. + Mapping[str, Any]: The transformed sample with the following fields: + - tokens: List[int] of tokenized messages + - mask: List[bool] of masks for the tokenized messages + - encoder_input: Dict[str, Any] of transformed images + - encoder_mask: List[bool] of masks for the transformed images """ encoder_input = {"images": [], "aspect_ratio": []} - pil_images = sample.pop("images") - for image in pil_images: - out = self.transform_image({"image": image}, inference=inference) - encoder_input["images"].append(out["image"]) - encoder_input["aspect_ratio"].append(out["aspect_ratio"]) + messages = sample["messages"] + + for message in messages: + for image in message.get_media(): + out = self.transform_image({"image": image}, inference=inference) + encoder_input["images"].append(out["image"]) + encoder_input["aspect_ratio"].append(out["aspect_ratio"]) + sample["encoder_input"] = encoder_input sample = self.tokenizer(sample, inference=inference) sample = self.xattn_mask(sample, inference=inference)