From 077ee5bbd6a280a62a65fcff1b5d6eb8220ebf14 Mon Sep 17 00:00:00 2001 From: Hyunjae Woo Date: Sun, 29 Oct 2023 11:35:42 -0700 Subject: [PATCH 01/15] Enable offline benchmark --- .../avg_first_token_latency_chart.jpg | Bin 109305 -> 0 bytes .../perf_analyzer/docs/examples/profile.py | 113 +++++++++++++----- src/c++/perf_analyzer/docs/llm.md | 26 +++- 3 files changed, 102 insertions(+), 37 deletions(-) delete mode 100644 src/c++/perf_analyzer/docs/examples/avg_first_token_latency_chart.jpg diff --git a/src/c++/perf_analyzer/docs/examples/avg_first_token_latency_chart.jpg b/src/c++/perf_analyzer/docs/examples/avg_first_token_latency_chart.jpg deleted file mode 100644 index 880dac16a0921ec70fcd1a648f5606dec26d288a..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 109305 zcmeFZ2{@Gh+c$h^>?!*mqbOxbiIi2 z8vsm90AK`t0gO4|s=mLw3jmm!0&)NVumem{?f^6N25c1mk65v;%PHl`KsEw7;GHHl_nGW)@a9 zb`H*iP=`t$-~ba0c7Pei!om!_HB%7ucYv9fh41KDT~>Yz2exD00?MJudF;}cN?Qc4 z4-jR}-Fp@%@gRU2j!B6%wB zUW)YLZ=cwuO9j`T8Wg`@owQ7^&Az;J_Ot_;t z@U~jdvp!@oq}4NA>qT`j=F$i?+<&Oeme9Hx{j@9Mw3C~!8JEX7yMUrUSmn{{Li%J4 zy;Iv#V>{MFsBP{v*6Zgfbx z%q*auf>QKd=NLccasJ!4hrlZi8M+EnvA%#glfI$CfUdZy#wYLGo^w2aNjPa2)py&c!z-8lWr=qR%2%cZ36A?Hux za(>c2VO%~u(9fq{>5t1<4HPe+k>BE0?W^aINBot#tPHOctr{J0w(%}@2Va($yXKxa zf0w&7_wf5zxyIhxDLq*Fx`vIM}7QV{oS#wK5}l@DSFXb>gg=M zb=@O3;7#Ea(#Bp$D1-O=jmYu2&p|~0558J-|jng5SV`&Kz!{HkMYrVEU*_x9q#->gvn&eA#}L z_*|?*FW@t59eOKx1W_LR?&*ES;Nx!E4=>PuQZ0^Zhc3z$5LMTO>Na8m-|H9KcnIx^ z`OK>Lo$XFK7~U)|rD3Gq8KUY|C{*|URp8Os?myyDFz;K7))#h~Wn=L=ECSXq{Hqu6 zEnWxgwX+wvkGM^mu3?_e1TI_o?swGtQ$N5)mT`Q<&Gq9%ScjjnLoWJK|3=BXxuX=H zvtO15EGBMY@5)axaluY~(K-Yu3Rpn5w9tV^5Qyepi5L+&oZkP)Kd?pe+=!3jS)Za) z`jy$k&pUoy4C<{k61^a*O5CNYTCX2x0QyQu2GH`1K(!1KW&kXz3?MZtgWgPrQ?56C zX8>U?R0I(o8_a|fONybYyehld*rp+>V5V?JQhXGJUOc_9j_4Wt@;6%-_(by-2=?4`t;53~0zKI9Z zpOA-{EgNG13H@yI!fW~z9rOcI^*UmGux)nxjXWvP2qkS-ky%k%I|;5P2J%+)17jdTPpdcrZ15)he(jAs0GvJrp4_7 z&41tDFD`CuiPt_D{cYvpVJ4}koB$OI&Cflw0T{7h7NdQpjFc3!WBO!=;F`8V#Qg%tBdj#avY+f z04do%PLpWcKJ?>DsX?~Q7fXp+*;pVw_Ivfnc2@0Wq3X}3(yp8-@@Kp3nR?xzO>7yv z-yJYBDIUF;iD-zOEu-mN6!Zw;l>d&lA%)KwhRUALQ6Z%#<^AB^a}2~V^X)T$BELiX z)~fHIg(=^tR-gQh<)tE2HXfET0Qae)(rwP~9MqGr$<;4(P3om#x&Rsuxz0Oclj1$o zti2`2Vi){gtchB4e=c&&X}2z%oVK%{>hg0|zO8$bE5)E?;KlaQNL~1qy~pI<5tRW^a!6xEC#<Cm2&9sb*_OoMasu)E~>1(20@GzWPwl?`*f~q7mEAYDDwbfHxU91~A*|MVCtgJ#dpI^jLd50|@89gN5`={G2ITiUH(_UxIAR z8R{sG>%)SltlJ0xh$N#+hZ|Jqiq9F>tM9yaL7ANNTpaJe%~!vmf8m0tXqN!fo*Dx< zWJxi`446QML3DtA6*T-$b{mMG^5fg~xnuC|CWn;TpL$jmx%jN^p1ISS+HT`0q9Mbj ztZ~G8o9$oi4!|hm|HWP{X$olA$PR)B%>@RQtomI?dWa=ORn*62#oTI0=~eu6e`75` z;hV8+ZLtz~1Idn9Ht5F_ZBPuLbG4zmdT2Y2y;$*BYcVW5TT($SDqfcoPfIP4R9FVDExgES#)@LhjosVoc%XUDW5?kr=Xq z8-8D35TnchDlpX7Y8~7ZF;yH*F&+OtX<~tH#MXwO2&o`x$6`aYB4g#1Jy8bLX?ZpH z>e$N{oZT&M!5?08gI$X)Mg_W}BkfKt)aYMO6lVkWo%nlgau39AVMVA&(2_`Y@La$O z|5iWL#ue?QIU_Pb>IczPBIekowlvoHmz&*8Ey>3G_BJ+Xpm zQ|0FQU7_8FhQ97{y0ZY%f2gsYBvwcw+5sdW_pj|6aFtt~p!ofW81=0bSNxFp@MmhnBa! z8J%-#XiZxhRYe}1EZ(*-l-Mqe5xcfhn0p=o?sC$xMSEOe5d%1`*9j+!3?sJlmSOw( z;L$=4^`#aJfO^`|GlN`_gb$XSmQ9N@Rv4T#*x2Yw`PQBim};MAuS?Nl0AFDsv|sLh zPhbFVEv#{n3;T;*98vbt#NEjCh^6RClRKwjgC-vbnHfOSTPf=_4@5=v+Sc2zKauDw z#FeIpmcAzLB0-llc)@Sx*~2H?dVO(=&p>p^Uky-2@G_NDHF8mLgk+1K5+z6ypLKlPi}JA&WdzCC4o z*|k3HXuPAsnxMhcNB+1aB|4{}A1aV?ZH3?FTWS~W)m=4%mdMMl@$vm^>xOGsQUMaA z5g+x;tHs>+tXN&mD*7NMZ#pmaS$jnKo6G&=vCREzcxd#quf>{D-6=}HAKtNV;fnjP z^3N(eFN8ft`X^v?WtcwN@nL?K{o~E_4ORN5F7FCM=*QsTxl+!;{1jtN$d0bax2j?% z!^yt=5A6GHfJu2Wq^hyYO|dRk5_;?tpAFI5CG{-?QEh(orL46kHd110$!$!ZxXHNl zOYa;&BDeTm8tMY;mnHAAEPDCA~`(PobU)tu!PVvMDc8%NWa)Fg;12 zPZxZe(Q|MGzNOwaD!u&o9bhEYg(3P7qP(*4OJ&BqS7dwCkY%Ma+=@>S9+S)c@J%h@ zus^;v_-r}7pO{$bTs%plOo_iNaa=0seT&tLeAJ{aw^91Bsh zu=Mo$giFimp4W)D5$5=C7LMk(`T%iVeYGPMMHe-*dcNFE6`hiaEnsWn5MavFx2~$M zY)txI{z^9K!n8Q6s6^R0tN$`y2*w!tFHOK;f0F2&1tbj$d~P$cNnpG;E{XR}<<8{C zrM0SC?4PtGUAeRtRJXdQd~I`B@@9t;58A9T&bK<^Q$%Gs@8AXlkh3u>Y)v`E0MORo zG1ac^o1hKkt+6pBito03HV>CASG~N*bU%il=PA$FA^8IuyvQ;F(X1~LLoC6#6tn(X zQBpD|*?lcGOa9*TS`n}1y(r_EWTnEimjMA-4bYVVuqSodnMq7^l=XC0os$17!uscn z0zVT2xKB;jewPpg`5)$DFq>9MTy4j4;@mM@brV!K8KBxsN5ze=0Wl zga#gzJ>;C>IV$`4z}!8VoNyH;+9@#V(h%Ar*CSYT^xoDs&3+Q4@sa`Hxt1N1F@pEx z_l|;bvyF~(uI;9Q&6BAre)J>w%&Bjp*Jn(!Q*aL@9a&SsY&?c#<*_?a-mm6$nlN%iye(3Le5nJ)yP zhM#@U00i)Jhe2`Fto*4eBsfEIxO-T9&C1#3Wg_+K&!anB`oT{yEf?ql@MvU(1TF^W zKzIip5eH*8^b=$@ujax(o%OzkLrpl;QM-~A)iE;?3))! z_@1|bax-Ikv)g%XMD=R`d>vas!jw?5(K=o<$?#A6ozFof+8ax?yJUz zr`=MxlYczuZ3Y@kl%sXiV(Dydc$Ox%<$EGbV`x)P){lkPx|t*GqTAkzu9mTQhPaAX ztFa^i!TLBVKZ1-^_nR^;Nh$Z7COZZIg-?3A=dv-jAiFk35}4{ELt7Xu zzjAlNaJ+-#%h4Ave?^eSD;``}bio&^Qz_8oJ*UbKHh59A?ZL5b*e&AK(mPY#z>m=_ zqvt;^o9$ltoVqHqqr$X|FRfnJ@S+%_HPctEjMmXMxyrhQEj&}=dm($)ykXb)%!||d zy1U!n52DrRoKQ?Zu{Fm4C}N6;?N~A%ftbybRIVoy)-S$aK3jj#OJICvyt{Vqv4Fw4 zNRs*8W-Wt-71TEOe|lvILIL4~nB+F3WuZ~?UJ@%$ed>xkM#Hrt5-OaIef2xuTM>Kw zadvV!4Zb~fE>aOgOMkw_)W%EORamOGwxArP z_gUd9GCIO%i*MZUR5BU*^mZ6$e|%c=xxRvh$eZt$cQP&aUh?G2GS9-7d*-msxMx_l z?ePji)Qm{k;5QG!lFOG&YQ1OdqP~?p?o$jMXX*fgH8Cxx1XCB_@q#VaRV1U5_MS#I zp=h|`2FrN{Fj?JYE0iU;QT%7DQlhoU!>M$impK*bPYO8g@wX;$y zgN;Sq(n!Ba`TeY>VFi_XmxkeudljMD+o|fE;Va*CaB^c!>Qva!TxMH)ur_hgqPzIW z^V=RzqMn{xTgZ>y(YnO`(15#Luw{Iq#HRMN2Q>6)Tce^M{{-PFXGb|=1gpSlE@aBc|3rRScZw0&6@ zZ9gYO8v?pvcQ9YS5HRETKn9S%UbN4hek(U-DZAaf^WeF&t%ZrkGDJX-3SwGL;};jb z@N39M1~5P#*@qk{3on=-KwKa{8djmnA?2VM6y{y-Zk|i$NM5Xwo_G`A8Kl zb(-48`x~_D9wa}>o2?%?AylNCqh~1k?bF*MQ4^I>mrx&QA#}E71|X(=p>yU`)Vdfv zLXiPf%0^`P`0>j`KfkoM;O;Wh942c1advDw2QU{j0A`sWTTtr2kEfEuLDk&ec^scx zvWJ-Bhu)5vtTVMnY`br-Dins-uT5$scRa5BI+g9}sX7WJDL{cACDDvwV3Jk(`&;&~b-zvo87(S_ad>^X_L9gdq%=Yz18_kA zsIiT~+gq<5{D0fY`Q0Fn>pKRv@l3$kA5q6${ibrD>c)h4KG?&Me&^rlK^ zoB7_r1!PRI@3afWqB^Nhs598_|Wulrj#dCo%oVn7E1>z%^Lu=xn%kP_lx|B0$0r2?y z7St^M&{QSWacQ_@;DOT5`;V@ESI%VsPqaQRS1^Iz5H9>>g`BK!vkMii7*5j0I;Auf z(3&uw2tjx4lTDyFG1VgIgw_T2orEjXu|G7w*PidM`K793_;N*0s5>n0+U7)67;BoW z+wfPBqqjn9byvfOUGT{esd?xFq7yE0zBm=_J8jnY_3Kvf$O*bxlN+oQw5FV=`hY~) zkp`x@`UIqrE}OG$aZpZyee;;Mq)G9X;6vpjs)`3B^z1*E4p)r1A2pWF^ntEvqVmn;?hOfD z)rB?n+9Ay$2)TiIdzV1>Nl@O3!7p92FC%+SxI3c zK3dZqH{oyBTBYn^TlG#h&C832Wf9FE`ucE4<}L#O9pRHn^qB7jdxj97Jd7=MrDj9C zDuo?O6FiQw*k;=EgO+g=JV!E|6M*w{wc`%|SZ1^IlwZTUO;HF^cLK7Wr0)8aJYGe9 zNc-+z-!UUbghwf%?dEBhw3G~o((LDd-Cj$u2|MtR#_PxP$eXAAoH- zRQK8-r)>Zc?OssITpHQK050DiTiywN2w9v3esO9IvF1<)xp!eGrGeOT=3%Jo8_4X} zL_c7bOOXTFn2T7Vso9>oB7)wM8AXp1q>1*kDJ>vH{r%=PBUF*%{$=a;eG%3HIwIJn<9GQM>f0h^(g$|kvGHGd2EHl+J8p(4zee}7La<%tz_rvbCw7cvr z2SN~Vgadx6sNGuf*bw~?8a14CJ^Z?F_?6Mq$a5VU57T!~2P8Xb%EbGzN}Y9*Z-G^k_%@5;uu2Pm4DufX4A^zOplha)I8>0JyfZ1>X#7`WAs^hUbM6wiL_6 zPiCxJ13vl}#~Ym-LjOcc!Rlzx{qF}MW`!c@9K@>w(;x%58F%u6&ya(6M|;s1WnOyW zxv|a9lK`e269#23GQmRxKJ4WIdy}lVSBFN63#WyW74ws00)9Puw6UaNt&!cx`iqY$ z2U?TN$i-msd8~sD-U+=Sgk5aG&~?^4yi3ipSd+>4NIoUqPfANvzQQjmP#Tb zOtI?CJ(Hr307d_2i-`L>hvA&q{0I?PEKo|co9zkr`m>%#``Nx3t~K2T zNCv$0b!iq^&SfzN4&W6ro|{u`?LwLKJfh!h8K*LDgnoB;ru4hsp&X7bm=hV5+5KeYNq8l6!?-Vnu{gRhNk)lwAD zN2|b9(okCt(@6H1Nll7LvF?yYLS>ogoyWtXoY8Gh6my1|J`yv?X3!Lp$ZRB~XWMG2 zS8JXq7JBd>^go^|>uvpf^X0}D7yY}3b*07vNNjYDJ&FlhtG`6k(EH64D&5M>2}_Zi zy=@|<1q!TZ6j_6x7LtMGs zI)FKYEoWOtEiizIsrJ#&akV>UtbKCD^i=7L1V@*>pH*Dm0&S_Cux&KaaJy@69b-hY z4=?b?>eSlxp0lcsU72*Il<6r)8Hg^WNzHB{`?#qBkLT>uAfOnQVcma^=N8!6t7?^? zbJHYWy5pe$_ogxjH|1KO_3r{Ize%Tc@F`lR)F4jctrwR^My7|*MQsb-X<6;?J~=I* z>k2^4fkJ;c=If0A7JUx0f4{N>;&VLA;$&z0BIy8l<&P=PszI4(g4w&{yK%k? zPdYaiIZWS7O`AR_y3gJoaO=-E?MKGqRdYIQi|H{G$>5=!qk|c?L)sE67-!#^_YgNM z8ptcKk#dCGx;5G0aZA&WSky5mYr+5y`e*6CIKw4VQD6Q15jU}J@ksa&xE^wxT* z3w`h%#u1dBzsaNUG`bdhCC40azvr_x#}&BeEHg#l%jN!zzlWJrULr?>W>A{WtBh&A zG51?Pz2GwoKVN-bEqlYsv`6eB;v>WZv)1F4pcynE>}ZJ;Sg?RRTr_`hT1=w=XVg^f5y@5mNs z07h?cNSqW6T7|_>sw(6#T%mw6c(Q10`LAXVws(${9<|UHgAi^oEm*cvhbof>!<&MR zVHdq*CuJY93Y=qsKgS^l5qG^NyKE|jpZoKf_<#8#tX5^U5maTYUPINv{dP3&uh3#W zb+^A_HAwy9qPvrw+jYT!BR{jlfP;LGj2DM0O;HB@>-4c=WJ^H{1JF8s+$eSH5etBk zL-6BWHWQI-iV#q2_hOrIKY!6u&C$8Hu>6QzWktTg=cK&A@-aarn^FwDI{~CV;gmoehsqSei zF7%%86t;?0Nh@9gE*P^~*2vsVdP;9l$!h!X+v;gJm-tJ^7fGfo;`@+ifx@Q3-+(=g zSQ9Q_0L$e%a5Yez{199qDd{A2XfqRiH_G4kNViw>Fpt}xi-bLfwakv8yeOiJti&9 z6fRTZpLnuGT1lFxP|Hk)E1CKO9Y8B+IukrZm!p5nG#u9d%BtxT^_H5NTj$yUhgC6Y2*cjdPMd)8 zPi;20g3BXKS}1dl9ktpFlfkwR$Jp~PpZorbeo?gHv%j2Rl%WdKAK6I6>}y0NTl99# zj!E{3F1@Xr%55L)<;=pAm6JK1U|m`ynsIDkEk&E^O3SV^IfV{*Yx0f*>zk8@I+;Gb zsF(b+k5}V}<54@*!fVG&dWn2rusr%gf75BS##oaC7z49eebRUM4@|_l^WLk+REwI! zP4gj$^N3lfDqm&axIi;Ngdv2KG9eWSPx#`uR0hU z7CeCc&v9PjUYl9=oYa-lJGW)7nSZ`3B7cOBz0VOj64i!@ zpdSDY=RRm2C+-@A8xG~cg4_xUd-z9pxaHuki}*_p zrdKz!iBIu9Fg$QdDqr`{MP9ElaZ#iJ|AESVcizmE6dW_HIP#<(d-fCz&a&uxn0 z73-=P1EG@-fSe1%7+)cs{Bd)(9kaJ9b;0%TY*0_Hu0D4BpeNDJS0FEQ{ezFgTDPv? zAHup9<@z$d+%W83L!`(KLJ*~~Fr;wjGyde>#)&TNn@;heVQZJdv|a27rP`I=xL^n; z9o{U#26o&fLy29ft7iH#f+e1l3a3>xot!~i(%w*QK7Lipa(MHjDd}$RPobHPNljcu zidQO~MMu(g!7Q2}f_L*0X_1ZDf&8rCs&!ewlXP!+Bf%`X&6M5c^SNYw(UnJ!P?L)I zyNJo|)hl6O{qSvnv#v?4rpk9ek}}1gO;?tNkUqB=ENH6pw3Xtdhw&~1 zc0-Dq2eLdda!Whuc*02I>x{&*uSW*Ech7wJR+zhY^M|oGJ84@~j=*q= zmm+w|+x4`q8u`b&TYjQ~G!&@TVQZa;KA!L2r-wObHl{G!7_$3kNoR$-hK>1&ra9Z? zjbv$KY39q!Dc1nl#}oJ$h)J{gRH`#bnD2wXjWp4i>WuBu`c&K;Nq|*3>a=70Q<43` z>pdiEx}f3O&v}z6+n5U3$O`!y z^t!BZD#hA@+1a%66&G*RuJem7QL>{Bya`?gZ@ocuo#l?8%KCAq7XE0X<$nk})4H+6 zr>@puQl$j41S$SoRsC;e{eSYs#2EgsD-GgKg11l4tkh0Q(Jr0?QBw8Y42ov_KM0_hs)LXmJXEI^_qZOa zAM$fT0Qp(72{!VzEOOpRgTCNQ<%!UDU@*JrhGVrb%TkRCZ>L~3>-qeYN8X`bs=H=X z^&3z&zJ2`xo_GsOP1ZR~GNcN^=49ijszv@cY=}a|r?HANr!H z+CiW^VmU)jv;G-;<7x>$B0y;!N6e=SZLY1)dmbBvYgi8C)5kso&J7ePn>52GDf}{y z(-G(_$UKXn3TN>wf}bkelyMFW{_T5E-?483$2uWs_~uGhQbXl_v(8=1NZl0(F51#w zXrIEg=76+0xX2IF#owLFHprBRc&7&1w@))~v|7yyIq9$Hav-^F&aVx_hrTrY*`Z&- zwT`{(fHALG27J_TGW^HbmNexC`V@jSSOdhh@aX+?|LXIgf|Qi8W{qrorc}~AV}qEv zfpiKUAXLgyW&gV8HZu;rfM2K_DU7s@cp1gV^uyJh$N;Vz+<8<$n;i_8gS1GG!M1$R z@_oP#LU<*7{oI0r<{noDKXz9b9kq^>QrG6|8J9*>#K+WRKMf z3y~)Y(>d-Cu@q!ylQh`zp)D)u0Mq;N^23Lw`=`&H+DYUQpqL&wv`+C z=g~bM7t_kePIn|?)c2nfTH)2~VP{cqsh4hDN(glrxfIbQ0{C=YA#TPt3D-iNl_2DQ ziS(o=-=n&!ApXFyS1-*Gkn z>u;T0laa%g5a+xS+y#dPtNELeGl*)@nufEAfks33H(bw6-}#+!Mwd^uq9`N9sP%)) zLhA5*6ND(k3L$=>0mRTdD}>tkCFU;wgeBw2c!8vL8}Hi81#z}HGNl)rrz%%I0XYx;R5<2u2^FQXcWd2k}d zknMx(J+l?mK8@&^-GrSxbhx#jC2X>D@?`Te?=j2Ufp@L{IFjtz_(A!3zc{q?qjydN zm3C9t43v+)O1l+!1_=KlMu+gQOH#BDfcg z-BlxSH3-jx);_k{MXH}(9{JD|_0VxW3ms&X%iXmZv5)ASwhGdrijMEl)j(O4>9trubW-S!`)>7&kV^>2`bmR-HftRgoy}(>J-eCBwAFI_PNy@w&CME z5RA8hL=5?^;Krw0-8@0}-0kQvWYabiwH4DUP4U6>%g@Ww_DAroHI1lQvvuQn6Y>aJ ze-sfNbrRGj+W(245G;PI&EkytjA&-uU`*Zh6-! z@$r%?wf78u@KWNSBAgVR3xve`bCHGnQK6L-$mZ?Ct8Tb!wBWsn-60{f@VUE&l|gB< z&@3X?<>y>twVtkCBi*Uv5kXEWJWAWO_}}q8~lG zCC*j)P=1bI7P0_U(Z@+xI!6?-{LTQbQYqX|*j{XOJ|So3yL;wJtE;(D{HvHu<*pFA zBg<+Jg} z^;eJ?)sw{OEuPS(9aWax*_rB z%UOx5+#UH35!smQH<`9@>#nn20#svK4%&G(iRD(diQR})wH?Y^##>bM^u6TBJsTc} zn>n|FeL=`7xi~1dG5w*kbS{-mx-sr|6(p~9!q0^wP)YkdGLY3qWc8Qw7E8 zrcG`1PH`m~?_*^C$p5JJ@wYfl<-b9{76(ygpI!1GQFRB#z?b?9sx2TEm!Xu0!s-Fv zSFql8)q#h?iI#yMWx(CJdS&)R_#wNR{jUK}xanc?D_HQj;0s4qfb zUS&jv_C#_iI2Hy`d=OdWNrE*uDRSK+`sD*A{H$o&hMvnyfK#Ptxo6HfM3AyT%@!Z0Yo@>f~_+tYJD!NFYM9y z)Ne_7W9O#CIsYKrV{;S+nRA+jG>uE-eM`o-IsH2K_T%L~57`x-%UeS$nZ?{~5Zxom=~6 z&2dbf?xkzWI-2ry&g8)3xV41>omW!<^YSONB*SJ_u3-cO!8^nSQ!ua4KOz@2^!#)O z6$7h39i74LBgLYZO7?nv;!4<4r+6}sunC@mM{J<=DM}t0qdgD|GA|uI!jxJbP<2l` z42`{u`eR3j;>H4n8-Pe<3D4^mECtbySR=m&p&KD*xIuy6V$e}5sI>5}fDJLnORGaH z3zGQhG!McYa%dU~!nk6&4y1)@L!yKfB7D3Kr?O} z37&`w)%1fBO8ZlTRKHvgNy(a;(S~J*VNd7EWJvw{Zp7L0_@kOHB40X!pASIX%@S2W zoPKSq3;R!j-MKjUJEjEzvA_dp7=7a%11OL$q(af)KXCK7i9Qf=9Mnvu5X|&2e*34d z4%~6<7Zlj-RS-af>RGrlm^rOWM#;Bgp5ulxsSt@0R)qPA_kRpQMMx~)n9H36fx;J% zqQM?*3dQ@8E_(W;^=5G}3nZa}-n{|LqIVP}(G)9i1l#|Qc`&UvxvAhAwEM#(E;_yK=zfUD?P{$U0E zCrd2YVs!bOQaG}Lt;=}Ij!1}7O%mAqq=)R5$aCqFn!a|~Y3TzYa~n4vI0uhxQl|IY znnj@P}Hi?;?5tKSJwa4E)UA&J+U4cGO8jaJ|NhEiFkT<#$}c%qA%IhqFDSV9k~)sM~W z5S6I>R{EHU$EM~1re@}BtG`_L#|Sr>sH2#c%bQMhv`}egH>hcZN?&3JkxS7RHnjg7;;q zPFvP=m<}BJz{F_!G<=&Aj9~!5m0nQp%nP5&r-veW5u2mMbU~UHWL00c@#RtL;+jb+ z1TLzDObXw(-naIA?SG6ura+B@>(&PPXI|;KA>QhEUr6`pw)q`Hn?_ep^EOp4uB~=X?WDnAMMmr8tdXaqJzOk+uT%W7yB2L?#44X3>ym>$H?pVz~2RYcb zAV0w(0zy85bCn$4Ksp|;ZJ)N_A6Xy@<2+qnwjMP8g}H!U$4NU3jqe`X5`yk+v&EZj zMSIl*@Wk5NyRTCnpW5C^(#$U7upVcTOdG+p7}EJMt!*^y63OzJw#&!afX+A1xJoO~ zhCccIEK~BBF?DJ(EsA~t+N=lY7wk~l(sdN2DBn2aHD4hu-Ya{bo7A1I#Gae5`2bt? z0_Ds)kf0I$lpCJ2Vz1iP|H|09@D*;jXA>vrKyW&stff{Nqz|PJb)Yfi>ib&Pk@HRO z4o?w;e*_4{w z(dlzWde9502WM%rbGzXxX0T=Bazjy zUvS3Ae<5f13r=1DA%uC*keos?g(&(5*?C4PD9t0G5;3Qa-uR*N%bNL#19Hgz9m~(r zY+uiU9|tu)-t&oklyf)M;JP5$>A~bf@8q0Ug5V#>(v8(}p48NVp9dS_ZN`@|=b$3x z3F@C9H`odWlts3gfjS76btUV%xSnB62hF;QTDV#Y82iJO@?OSE5JH46dGSbb3%acE3cyWkKe6yNj}6&3D5_0COJkxeq$` z0B!$@jEK$C(6NE_pzjHO-r~BF{A-_HAlMMoV%a1R zRINv>?(|eY(op>Ol>$fw`% zuGM3nnSxl?mv>(o`MYqWP5=B-Nb%!Z5EbnW_5@|=95>$iqdLWuqALl}n5n11838QA zWW+dKr_Sj1#pti(ujh*vnAbHS-Sd6=7bJ9O;aYGXU65EjtbOgHFB>^Am%S~hWQ$L^ zy5Wk|U*=p15>kRukpDPM%dE76^I|9YThB)ZDOnihjSe0^UcBKbE?s*U@$+@GpvXcL zkYYmm)3ugIaq_PxyEoPh{Hjog4qonvqNfrxzyAEn^_JzFT`jXb59?29REYNPy{PS4 zUzBH{r()WgvA37H$|J*LQHBO0&d!=Vx-D#B@>_-rP#((_d0D}F7C zVotTE<>FD|5PmNb@u?Wx{HAXC%I*F=;1pZWkE`+Aq%Ss&>_@EYp-j#avp9o>prWIG z0X^A%YyiOoJ;p)m((<3nW9XxejcV_B$JZg;UpK+yhSUs5BEbO0 zOCLjerz&5{7LtkHjs4tDd!>o_=)X$mM}o$O^WGc3r(r2-Yk`_y=d!*L$gj$iWW}E( z*7k`Yg3Z~Y=!4k9UfK#&4pKpg$_OFo-+x!>(1Ry+=f|xCVV< zeb-a7_zDZMr(;`2jJMd;r~^iJzY?LNv=`*s!ov(%@wiFq3hh09HfQD_I95FEzty}S zO~l438K*3ZG)#$q<+hO!h)gt~%U+FKF)vln5uL?@r|@fAC2pUm{VP1Q#fHhj`xEOq z8)p_iB;L~fQBdlvx5DDD)`bP-j>k{l+-GY(r0l=x!g9FunB%hW>l)J%+r6Cl_>(YVk=^JHhRxK`j%xdpQ~4a%CvE}K*22kQ0$g0jNRxQ| zCINpQv=WG#!$pNrC?<5?DRzaXp`Ux&ekpl=dx6*oPZzwqK0?e~A9%`ZZKCLRlbjdy z|6x4wdi-|fqkwIH!eg@C%aMDDPhf8!9nASSBje@(S|vA)YA z)TA$6ZRF}#31RxxXxeR|c2@d0CM=kDZczft9ZzrRISC9-uO;*cW2>NBQL(Kj<~rqZjRu3g07 zdiS@!l;k%C%oZ8!-c+;dl=0r+VSfyr!<(Y%uJ+NdXvLOX6&+BWvm%n`usm~__v_vT z93%@3OYq0cK{O+BqP`V6wynwXZqn`i*yQ$&(Gv|D#%$~^~`6-J9i#)lX=Wf@1T?t0eM_PM{yS=5Ow16J!8ZaXF71^u< zLk`vY{|WU^TAvB|m?|@4_p$o`TzO#Va#d}miHuzJ(&nibgez>fiZySZmz0!Cb0SvL z+4a&bEa7%UyU<KsPGuflhv!NnR0=sAADk9 zCWhTeSHh*IlhF**p6P)Cc=}35=nf&ajRlw_6ub8)N zouScQhA-@-4xG$b0SpLSg8Yaa;kpvmFfE>7TU};kZh3LICS}xR{Gx6U$3kjUe$s-g zAG!l92%I=asCxW;&_KAKgO_V#jChWVboHC1y)Tk3(YzIU10ih#!1ML>(KwUTG{tsjSdRgc6SX57tkLBJnvBXNWHGEg|&1-()^|d*n$vsvIwIWYq^( z8PfXM#MUMEzyIWyUkxKmtF&lW7}gTCLnH!GNi)f7+RVe(-ZQsQE@D%EP8|bNe@^M0 zVw*s)&NJbg{yCT;{}yRC3O4lKC)J@b0L-J1?3R+=FD5T6O%irMMqkiJ`V|UGTPK{t zLxoLkT{_p*Dv!5}C2vc7P?w(yV6KOV^-*P}oTU0nQ%^_(Z31>VQK!$l8sj#k;wCWv z7m76m*am&)vYd(FFI7pQo z&BvEg6i9X>JD`5oEoyh!r9q!-H8#0u1Pw@O>>g7Tspo15dj@wvQ<9YXIcG9RsA5{Q zTgu*3eh65~8TUCm?-h>-_g1#vQUgs8KN$cfvjUaK7y1q0?bSTw*E3}lE|@st%VhBb zTCOwo6tFhz`cz4_hFDAN>IWL8i*n<+jW}GydkeAE?RQT7gL8$G-S(XL2->qgdtPRT%|&6Qh*AWgHh7ahoj8p@_0ZfQnr zy`>!S8M>#X#qBBJu0Lh~)wH`v*MLNtVk<>Ip5*kZ5INu%<2pX)LYWcjK3fl>vH}nC z2QndF5!(1I04s=5rYQ{0hb(OXpXU;oiGJcMlAo)QYq@*Lz!TC!izmSpm(TQ$?tls? z7S-NcDcGs5Q_UW*tRKLjdlMm=L?fdKdW}`+5V#gsr20wBpqk6-5o<5ky#*+V*^tA? zp$F~5-m}T{5T|wjX`pJttA?M9UM(z}WN}nMaQ3f^XL?2il((gJnbsr;+1}K@_fmGL zlxcvh28@VPPLgB0j(XLaZ8ivAhuACRXU94DG$!Po=KiWHj`u9oo}Q%gHQxPe(HKnWWc~rpt@_^b3M6uyvdpa3&z6QfK=O0{+ zd8lzkiWd}s0T(#?!szT!qQpFrEfVgM`Hl1@s6{`_zirP7DBdNIqzVO4ngp@|a17u? zA+q1ZclQ`9UufC-^Q}5<~#em3+C}YIm_;nOqa6Pwse(^jsaAnjYwV%#)r2Om#By zodKNZ!7ZClsCxNS72=oZ>CuB7W;wxSdrtJKGFe=6k`0JDdx)pn0dX;xQ=`40SlN1jGEs{shSPEFzF6aABBBgoH}YsUEb391jCc7uGW zF>%SZdKVb-Y30@z?TFJ#4^@cgV%cZY@0w6;z*L@)X=25wLWHOE6R4kN9|StS76=S} zzGL2CCV9}_!-XhUt;o6Ryjhbb!({9H{!X~0b_K;Gh1Dh!We=0uaDE-dc4a#Vt3-PN zwQZp#`~Z#%>F!W%ppxb7TQstJa{#^805215J0;T^+ zYaq7Tmq>qJiSAk4Y9M9MP}<8!2@_l)EmlCv>(knQ7)Q|`Mhv2ksPft+&NmEpbosbP zy#}~%fWfril)|?V%LCLD3sHb1MNgf}X)1hmFPtehz}O5MnJ!%OqPDj7ZAE>o z)XU?sS9W{P@!y^*9vVS-d_m&lVWP{fnU-9QY2beO3u@w~t_5?FMYaNKvfFIRU&xYl z_6sxZDgDUj6H0xa-<1y)I~a8O$we&+dtq)vm#v9k?V0i5Koo0@0p&bZzYory&c^u= zsGLl(CVc79Io+`^C@Kr}`-FZ(&sUNcyh@k|%^~|}2AVr;Thd?II9Mr%!FA61C5-0 z#7p$%NZ`oTyNCXkLGlG~y-b8|dQ&9eGXY$0cXue5pjS_1n+AueKY~cpp3r%&-Q2uM zQ6Z_M%+iW|Qrg!cBPpbn=Hu8W?71STYU~~E=+1Y;*`{x1@^jlY$JS7d zS;+lZ4yrW~SqFPwg^Z?3A2v@pa%yZ;RZeW6gnNGU6SQgwKXa_^dOCBXdLFTcL-?a@gL+Z#zwQ*P}wfA~k2=@v=4CgKZA=)6=l3756F0>2pv6`c?{EoqO(Hb8Ip)N-_>~y(4{{aIQ_rrG?DY2DPw4 z)@vc}aXJ~ebA=Wb&Pvj*nz`WJ$!$C`bPbp!uyfhfRI6fVrchCROG_>H$_tz?Nsu}l zS%RUYCZ|t8OI@Fc@8G9in@ZpBXRR^i{AihSNQl%ub6z}}m`LY9y;~HB13KNzHR>Za zy79uVa9`)j%`tnBklkpO$il4{Qt5RLSmy z!Fnh={HO~a6|#Nzo0&$2V&<{c!zUJwcdRL@DT-&tod?;#yXhvJ!%j;)o>#yywAhx2xd1ETSIsF=?0Yy?W1Ev%Cml4T>9k?>s z(mW7cg^c(-hE5#<%n}$Jyd8d%qCE^5#0QUVeS~?6+L@KRH{>)ufI{7B<1C^Z4*M26W zrCwdW^_0awHTyZ}mmt7aXuClSp#>{*(PRP4`1duDCSciPrrr3yi%Ws|ARWq1?cq2% z3NJEop|#g}oHRF%@m74CR>8;AO;rLs-)k~w-Q<3CIebbt>!Obez^>!$xg{Otdr^n< zLhAS@u0y9RBG$uz`u@}sazFB`<^Yl)!7+`T>Y3ay#H~Hs0geAaHe}!y^N@3#JD}>X z%;h_vkRmG3{7n}$+yT`+m(!ksMalymN+4ZtXq-?cXoY%NyHClRShP5(9gqFwEMc6Q zc;AB_{cv`FW}#ZxdNkS7hi2$TO-F7900`=KXh|I{86rC2%S=JidGyO2Z#f=WSi;$i zE@0W>7tJN~ST7BJWSnAe&u)fcw{=}TX%ilC;S--Xo&-h7Y=;&ZI!apQB7 zA%{btNC5E9tzJ`_!0kk<>v_if$R7%!dL60)4LH_ma>n)Un983AIhR|s_d za}gZ;QgZL$Y=3c^QnXT+ZR|zUq$oY!HmS7C!5^v=D6;+>LVzlbD<|qloj^{h5Lt_i zpX(op`Z#f+)fOXN(|oHZ^X|C=VGp!}olpc_V*Yot;6lhzKZ)&WQz~xh+Nk=VJjQ1< z*c~|CAupM>yQkc?1eBt;*D=6Qpc5AdZrD^p=&vL|dit(|y;u<~dDl!o5#b-+k7l+U#xtZbCi9vXKYz)i@?Hy=6Js zrDuPBj_v2GZqjMK@%SKiBUW7&W+Ip*Kr5k26W9R2zZmrjrs|w{x)15QdG+n} z$eONj(OfCNeMLb>Sx;ZGn^P->d?gxF`PNB(J$UAZGuxQwE2%;6HJ#-%W;g6o;{&%P z!3!`2GLX8eO%IhF6{)jQEvo!*Us~{1kjW9vZVofeP*zLVoCqfE7rx_(i??ymFnz8#|XnO5&@~_sdUvs^4T&3YO;Zfj)gZJ+2 z`3C#q0ym|cM|{NX#;U_CeB);l8^mQR=dWqi?JL0)H11o|2{3W?XtsD923iN&F!k4= zcHKMt@ed{n|6V-}jZB9RB+DsR35J-|J%V3<*H zXV%kBQqo}e^%Ez8zgRCmf0vW7yV;3^CIJL1asFf!ka}UufZ0Pu<8~Yy@Q@VXmtlor zrr+sO$?MJMI$wq=J+xFAUbaa%Eo3P_rHYT3)|C<;AfX#!d!oTq2oW0CIXI*ps#HC$ z0(8$NWtLy4L+*FBSG!V|#}p%Mz20!mm5`i)hdn3%42U5>Xq**eyb%tI!|g-j6C0)$ z8uj=3{-Em7f%o{-?N&)Ywv!e)yESQ>bbpQ7U5*_eb+2}^QLRURIR-S#UI8oh%bpl6 z`w_K+_6CVhm7GC0jr&i(BR`>q&?-r=AJ8RK=@#sMdDtY)ZDe}9x_x|ew_=ogEYk}3 zgA6rUI}rLkEgk5VizHKl&Oy*CdRKA+kTnoD8_on@6Ex?(khkT8DzSPq6n9pP$CjhI z$}ezsZV*}k3>IQ}qaRPQt3wGN98sgNA*Tm8=QT*@{MIIqK0Obf;y&*}SvcDbH-7JE zc0V=$+vd6JXea*V$XORY!qp(1tjK17^zFbWZ5Hk)=fRynYq+-GCrb=Bx}M|>w-x#- z+Y9v2&4;jrU32?#m*ArZ%wLII%!c;|?H? z2B?3|=2rk`REShK)NigvtA+A*IOO|z@lfI7u|BWlQyRr)$XtGpb%J76K)`)(4JC7FDG9^J~ZkK>XW^l{05(X}Q_sm= zZlQE(vPZ6~gQv)_OV7}q_WeGWPC&n9YFhXU)2o~SVu%kw zUc3VW!ftz*fG2gz~vH)9y(z_hHDh7u&|GS@0wZ4;4U8J%a*7haOjILfQt(8tH@m zOb4x%r}HFEF4;WQdip3Ao&Tru(Ek?0KJ(6(|Gl;!0HgYkzHer{!YGQAM1Dp&!?nvX zjWz7kqWoR|jT%gCXr}JXacfy-Ny)?FHV?V7FNhk4`2uhu9A2&BCiJ-#N55xVU4w4+ zHU3%K?d4aa#nH~?8rK4HC_0FCEE_Gmv_PVvS@;<}<;2sz+RSo=gAEoeT=U0cDy%7G zb*%ce2*2+@Cq}PG4i`d~mb|RrnrLQf#QeF+;sByryEjmy#S%C_vZ>5P9~?a81uX{@ z_H0SmAHrPSRu8^^$p5KZ7UCiuz;~D0Mc^FVM75_;-1TD5vR>Yw)bGRJC5~S1te`gT zT2q~yWIhWhTQIL`DAL&#rbCYy(+!ei_>n0fqnwJ1E{>+HP6Znu@L?F*RspC4BF|2rJRv<6+SB*xMCx&bU(;i3`m*0~8R zwAgb=^jxQBzEq6=6TS;$;8j+!?*wDgHL?yuiTa3uds1k{er|{>jN*Vp& zIZ$Lb(1e(z1J%&|)f59KVb)gSN3zeA&xhhAOV0UKjk~MO>xPFvuX_9Tap?@p!n{N_ zEf#7cuSSCPt6wOz@O=!w_TI_hlAGvs-xA6W6B%B`G~Zs`5s>*hy5j5`E)2k(`G8ga zC(Y&m-ivK#@ZZ?*HImal%NoEOrqF=RXRHjFqD!FX9?J{YU17F(4;{A)*60cwPkPBb zL~!~HbmG8uU_Q?a)36Y6$v&!n*6Jyd)N}J{?-I^4$##mI83Y9xe$dj=1bM#%3eSAT z-3SORA9}z0*@T1}yDw!L5}xth;DfI7T$NF>fc1+bm&fk59-^ z=H!gJk@3Xmp8f?NF6vyfI%=LThQ5DT{PGxgsk;oo_aW;waCgA~qM&p$$ZK`Pn1l*mZ8i%DE7Qv=4Rgv1 z_j+-8C~ZYoel|+m=T+6Q6^@T5#G1JCu`fkoFrDQok> zJE>4-@Y%hj&F2PEhcJmTud9u}sZe>o6^xP&lATEIk+@MN8c>RIva*9NWWmKK9v?S> z-iaY}cx0k(-Z$>!>Jc@mM{{ElTvQMMjKK1*Lzkn8dS+yK$|?9Ia@$mWRk$e8=cA<-9$+7FVt2XuK*dk-!RJ=v z3SEAaD7=j%K)ZATh_}u`?;szeM6tU&i6t}GRA~5y8$WD9zN#(2%e9#3bj9ZQ0^)=RyzFrbL;L_oictKf!CLIp;E#0U!+Q8Ob~U}Q*)1d@?9Gl~WO_$3l_w*AkN~= z2dGeK+N5-0jn0rjI1@*~c+j_~eB>C-W9g`=y+pab2=0^8u!;LY}E}zBMP?)tf1&Jk|_Cr zi5i(u-xw9W@?LQ=%AR>Wnp6lhiQ*iPC0s25g7~qC-4#R@K>-rR?AhYjD{-Z2Z$p94 zTM7&DoE6kh2dPh3c%b?hTDuGU7Q!Iha5*9_R0(yE-11TQI;QaJWf9Krqq26Dq@wy}{JhwWlsd!06(SamV)A$%*P|t$v zp48Z*dFb>R$3OoSo(KLrC5xHo-NK-2?l>0c z3V>+nWkF{<{pQM_&Wry{n}hK)|1<5Rzh|BQUI{gZLHvvKS(*P%;!K7?{L2``-^Yr; zFo=H{gZTUKatwp`Co%|T71MzqG-lnJFB=XC6J5Og*FvNy&>Ws4rtNHlV?dOCAGuGn!CdI!6U-gC~a84T6#mp8sT zSa9Rh=RDUalX_6;y~-I$xh@T-Z;sS(T6Ae7K=L@F8JuNXK(ahNF|U`+JU1+LP53Be z<&$`{+ai8PmXQ))S|}#35u@JA0q*Q<;MUFlOV}&}Ipz~{h zDZcpWee&v~k&?GermNRZv!MhN)})Gd-^q$E&Wv2FPJ|vu)`RJsWSftu5@(p;NV5=J zdnSCeStUOySxxR}a?c?ns2997(<8$5xQdU6UHggQ)H^1 zBwF$*@5%bpJ>!O9Q+bQ}!+N9%vI@n*Y*B#DqGD!*%amyM$y}5Teej{gg;%SmB(rFx zPWt-o7edAl7uGoG>?mU26ly7c1_nTpX&KJij{!+kp98{a=>kpqCFw>b6>&n5s_S== z8~PQg4YT8Y000Q${FYn<46kvLQ;6MDS_z)0kTXwi9gYP{2%3N6cbR67knFfU<~(D6 z^DO=V*H>47Q(248V?Qgg#Lwo!XqD+#ncpJe&J+$D`DUd_fJ;H1_4&D#EM9evx>g3Y;X0`)QV` zS^lk!vu;dq6@pL0HL0)k10T$FUa!QoDI70YT=9c6yjf!PQ8F+w%sUqAky0ICT5!(& z>FqoDFAOyKldOM*c4gc7{96PnItZY5@aA2Fewb_=a(P+OB%XMRnet&VAy$>juZynD z>&703pX-I3fS)GnH|+C@@P^3t6?$$--4J?9xqMH+kS(&OXn*AjD+mru<*|GiC{TtD z`PT`MRtip}oG5TP$+n_P%kq$A8_Nts3X|PP-BAksz|Js%Fnuk*R%Lkvx?jEiQJ(o? zqH!jeV)x{S@*Hlrx&g%xGi*Fngn#D8kt_Lv zi+`((;siIR;nYPa*L|T4sFE+?{yzBXSMtKjWqn3#U8K1=_lJptM+8r@-e3|Lv_;mi z$w>Y{&!8IiK$NJW#E_49DUa=du|=&UciXu5iNnd4A-A9Tw(9OG615Y1$4m$(>C(co zX;@FN0HP08IvB33m$%AoIcE9|XA{HSg9j-sJUaLMuFc$~UC#{n_`4KlkS-Cmo4AJT zNNsT>X<}NIcr_l!Z{e4qc#LIKqaSjwZ0ruT_1@6oWC@-9r!$w^2>{X1W>fTN_)n6} zY$xz;U|BESBxXM$wNwF>wp76$I##prgM#}{7_Si#5 zXh`SD5#xpu-!m<`^74_-`u8~;k8NScln2R~MyHCN%<~J@1)ad#c+VeWqBK|u&48KU zj)3-rPcB(+8by^%zB&Pmv!;bO_{}ay0;7)sa%?VLLR+G)b$E`=bF$3q-7VN% zz$U$A+{joShEXuK0^_7$oR18zz;K$3P=OI3F~ULsQpyMm8DSwKEM$a*jIfXq7XD*} zg#dc+|B8C;^!&E6`8$0uMsx6Y7Uk~&?EKO0VP+uC{&CP={|LszK%D)H?EJsi*cga2 zhC%!l2^RC;sl&sFA^t@S=igvxYbJu9D@C)Dk++EY&rb++>-8+(uc=O@FWoRtkJW~J zdawItU$Wd5f#B1$N|v>w^6@sMNfPzzxOnOb?Ng*@o-WJq;eDm%#99fz=^($HiXnq6 zX722%!1O>j4_Xw|S$~AQn6ugtDO~0;!521D7w$I1^=={wE5Z_N4 zD#q&%y#~nNgQKB?q4+GbxfX!l=`J*Mvph3DZq(H5-gSsxJH_7zd@=Swz$4?a4PrEv zPvxUeni4IXe$4*Msib>Pu14P&sr7b9JOxvw^6=O*cSG^1HQVx)X*S;w0mL+XC zwFet)&Ivyy$bsEOWpU5o$`t2!gh?vD8-71JJNksn4c|Lgjq`-c{8O}K=)=~CAS@tc z5pz7PQz5PS_pttf`OOw;68$gK7 z$%MZq`H`n3*9?9ptW6;&@<|h~!#+s*$>Wtmy4eR%x`Sx$5#vU=3EZ1P7tl6mRu`v^xiFOuES^j@qIm&deQs0vR( z>H1tYy*v1(?TexXug+FGXWCrU88N4NBtDz2@CBgFTpbCY!Y!HO&4X@sElym!oYRqu zd>44BwYDiM>GVRcwpiX~^ha_2&s5H-Yv6h-A&fw>))PC{$V|z%7o3C64o-4p$kc*7 z4X4sh(K6{AWadU61P{^eIkrB}#1G*KwXIIHOA-ErQom6pnvpvdZ6W?pZtUwDu5PF} zy%)*qg^q-As+(u~hIjSCRRO5R7PieBt3N6)AdAp?s^-d+13v4XnZ}Sb#gToPOK&|bPbaaH5J|Ddc z%3lT!COyhMw(Z!W6fSx*fE+t7^2*jeMD3=gO%!KZ*+%!017(jdGwEV^|3-@Cbr`>I zw?)JyzkG|!heknn=0$v;s74Ca*+~zl#_OKF!8Y!BgKeH`8PN4$8Sce7tVRpVH0##5 zyT36X9UU22Jfr7o=OF<=YWozMzItcg>HMu{F=V9M`YVWb)Cf5_Y#oz) z$k`dh9^_a%AkD-MyE^!rE1&;2g(Miy{;v`7VEYAVhbb{hQB8 z&!^jl zc|)XjcmHFIVZ>sUgF%nc${Cx3Et;lr-(Z$hIbdt%z*Ps~snF@}kd>NK4<87xb)<}{ zZ{9yIU@QKqm}bwsVH7r1!%S3}+5t7mgDL(~^Ef;7@KKIcCt2^%=dGrPo&vfaymYkQ1l%EV=EB?5$rUR=!ez9oy4z1<%8D zEOT!}3@iyI0L8y2o%9=Vt9RolYma!=uN+sT*eB=2>1sVD5U>B9gnme?fPlzs)5uUb z?+jNM_<&0d=hSzxZEJ_Sc3*)4`gk_D;mIjcqmjgC zHLCm3@80KS=j8HZD_l>bg{SJ;nEau`f#y)bK!O?)nUf+($wVB8iRZJzEAWpDk^8bpcnQ`@0YZhe|IW2zwUJK@yrN-cHt@Z z6rnpDkn&4Il}<#z1G>a11_autmWNU2Z~15I9%&DE=!XYi9(kb6RIan@H4}|zWURr@ zg@3NZE!fkPJLF>iX|8Fx7Kj|ZYX`KOe%8`q5XT2US2ebMMOUmuBqhZ|_{s%$dh~Js z;@bgBGniXewK*nc4rT%6iFeX0%U1mBeCwkd>6-Y=Z572T zCUxU^@KUbB40I%f)B_b#&T~l^xi{)$yb`!&G%dBZR50+o{&0fw4aD8&okO45eu({w zJmF{5#Rv_5XHotc!%?B;%*3ksb2rOZsW6i<>zJO$dov$g z;NOZN%F)^F2Bv5$ynwC-p|eo9YQZC1n+?CCP!Ol&cwR_ zajA+$#fN^rYpl(=`LFhd9vUl*(LWn!vy0V$fsD@!|n;+ zYgR0}7vxi6GWG+eD&>N$7U{)V!+8NC-%qA|G}Di$k$#x?NDJ*6H>&fhxf$&Fc+>Q{ zVdDD_ht}k#6h^!8lgnbn?8%D++(#8baY5fq7o(2x2FKVjspo_wmSUO>|iR+H9uKxr5l`G(96 z2n@uh$vdEXxDh~}9Ve=Obh`WbLksZ!Z-YD<_<(ySjBPGSoVd$-%I#Iu$pCw1QrMq9 zTS9c`Wa;J-gSK7hNb8_DIZ*bYvqc0I;8j&El zK@VFfLew`}p9o2;X>|4c{%Yl|DTpOQ$$8Xb7rRltkPsi!h`~>7P=MYl3~o9V&HPmb zag>-);FSKA#EG3=BiUktq`R0Lqz-H8)-A-<&F7NAEmr%<@l*a_VKS$2`JktUu4%!l zyoD~`W0O3P_QiaT4`&Yr?db%0{Zxqj4Pfs#7yi)5wZDlxG**~#;~(+`e@xB^_d01d zdi7b>Ju#CXhT=<&T+KL1sv+eX^$zg_`b36XVP~d!!Vx(q#km&Yg~u;cR4VU!z%Il- zn1lN$<(9@Sc8e%?eX~BE<2`v9(P04Fk7%DPy^fnHFr+figtC4~pHnJNw7yvL_Ee(o zE<6w0;lQ>9$lv_C{|V5P$Y01GGXs=7 z_6pGu9$G40+`@OWNp{oO*4$rBHp$4mIgG)~;)w|T02ztU7|rmjm+=8c%#7a$8wuIusdXfC+_epZTYS5DdskF}C z8(O;t3ecmRB(FE3coDnE&=!P3s;gI|)xd0JjHF3?>irm|J%O@Q+7~RiK0-&;rWBe{ zVcY1Bs6Cuzl^-6@W2&mcTcdt_97>XiBuczwIl2G6&R*}xnhDx-=K969(Bw|z?< zVM7k%{Hc~nxSe7`l@*wfD|p!2`XYigz^Dsawu=p{cztPd^#zDzklh0=0rc+rSnTq#urak^jK@V%vc<}FJL^ZcMAjhhd3-J~9>)im`O zKe)~PLsi5bE9`s!I|&FTf?hko*wn>xEx{o8B^?Pp-7JNweHWzk?rhcBVQMah9%ikn ziI^L~>EiH-*IXw*v^2m*7fCC2Q(!0Kp*c2Pn1pQ78 zOAelxy@SMql0h`WySW5e3BLt*QTL#ZjtB2EVs+I)thz86#5hD|01l=ch8uqQVPwCu z11dUFZhUn`v*up8H_Jng%%@LAh2C&AgTcNb(@-kE4XMB_*ow2?t{1zXeENP&;Q_F; zyp4?r?A&X%$|_ZJPq&JYH@l3WpH)7>o+7!hY`!A;WDi2OZnSof($re>>zd*-DxHd` zWL=n$olRWbR}i=pyd()MtR3wwUHm&jnXuXT9T$w{_n$107~u>}qpu7w3;N!zz?_fS zdTcnHR8qk@Bl8_Mv^f*K%uP|F5BY$LjF;J_)uIKzRfX&Bdp6g<>iN0Dg+Ab-q}Z8s zmAA2a?^PXb>&PtF1GZH|KI35DmKL0FDU5skTN)496rd6lkK;<5o!kMrnwf5mW*8yI zK4lF$C0y56dw5nNP!;uSD)pb_S{md5pP%GfIl0v1l-&i^KgqSYmfLj^lk+3TIw^CO zFSd?`Y}HS`-v1|VEqgk^t#ui?HusZSOP2XtvfR8I*$i6hM&st9Rysq;VbCeYJ&g5X z*al-?FwO+VdC2eu|0}+r2YbdBve|;;RKGN3sj_=zh1K`M*>NeJHA zfDTD{gR0yFHi*c*t`hFkSGjJwa$%!VEA!s{(k-w$qD7qPiAjtKK|`on7j%60-2U&mKBvm5SP=lC!0(W1yI5YJw_*4f%Nv*65nVU zjV|fPM*sWy1-3QO+Oq9NJkiD1X|UWMo&4)6RC(}-3jKKq-J%>!)dUktk@l0@65y?e zLr5b3jClILdlMfMq)vzN9j#x2SUA46@RuwXVziAjQ=6c{%)(dq62hoMe z-8uA6bZ`&av({|Fs5tDb^Hh1C3+cQo-w7$Bk?)Zyl7|x5a+H`hI(?zbP(tzd8M~#e znaO7(h)K-%k=sPmki$17v{=gPL%(L*UQKIA-UO63w|aqk8-hNLWuwKbvrj7wuan}+ z`fF7Erk1ZvPq(J1-iuX~6%Tm*;oYQ2{(DPZ5%UgJxl(~e=za5X7-<>!;gCgW*`A&i zXLV-$9z*B7Gjy{{+`*p&ty_{;Rz1IFK5g#RIc83ZYXRuy7wT|fi4BJ2X#75|MxPmb zis1^1W$^5UXE4>cjuAEy9~-ee90C1Q{V3j&XR-mp(UbpUpF}@5f9kUu?=QrK!l7wH zgEXqgdGKpwnGcmOMUVChq4pKGd!C@&^88Q*rmU)8!DzOQm^E7bt~{g-ss7HgW&TFX z?7>4Zn`|wU!DwO}J~0w2*l>EZd)np5Vi&swyr^5suI;|qgJKsMtejI5cn_AJJlUA6 z9|5BRP!{^DnMO|>3~%pX)Ae0zg!eH=ig(rD#aVZ+yCg^gGNn7iFhR$>FLIia&fY0g{qKBMiT$JV6sBx2buBl02EQSR71PCh#zj3AY7 z3IQRj=vI^yWP)lcJDWc8S?^0N3hGxCf}QgZUT5)ZjZm=+#yx~cAi9yv5c%=K45C7v znL?;Sydmqra>VJer}|Yv53%=qXyZeq!qNIPW`cG@DmP%VJD~F)2orH4g67xGjmVj( zl&e5m<8K$$wR6XfeP%OdAQ=8@fTMo|#KT{-KG0HR3?MxdQrCH=!I+$$hmE$&{q*^Q zY*}TcQM|GHEt2o-h0 zhhr@LjBHhJtre|yf)Ga$6O&*BAKZz!`Pd5=oIE_RRBA7+SGljO3761$`s~59v+eLV zsF%VtV46SVDZO>_hdPGNv7tcMLU*B~afEB#0>1iIpIzvz!;XmKy=d+oP^olPMTxHb zxaD~cuDBOB;x^4=$j1Fh!XhjP{6pC{oK6P{-)qVse~nctpB_oC6yt{{dJaatiQzN5 zNAM=?*5UTsw1&Mx>>)?`{iN)yzxV_EoX*uZUs6^z;F2As3OX$vjB+ii?kqW$Pi)o| zIwZisjz^XC(b=yN&Yo15Msde?AJV(e4u+M0=U`S;PFf=U0KNOnCYXROZ8;aYxaA`p zIhz(RTQmz+K=2!$W;z!3<^?-Tpgr@~E5A~4`l;vyx&LcL3hZyL{FU_6FJ+(@|Nm=Q ztbgS7fc_Fr-iB4AIugKjZmjJysON(EtS@5j1k2v)NSpU&O^}T{Byd=_{lJ@Dy1Q`4 z%T~#9*Ru!rrX3`^@4X=>hW2|`Qmt_J=GogvNn?|@EC#9k$VQb!C^waBic2}+OSfLa zkE2rB^Oo0p1b(z0*zeC(;I#uP$1M#K2Jx_^+&81XTo+2|eV8xD@AfPE9X3^1;V;OU zX!dS@Ta&)O=`h=QS>~5ef{{MhCYs41X$OSP#gUUS1TZO|mW^~kj@W$s;_U++78fdc zp_gmoP{eIp?!Mc{DI~7rZtsBuJaw*zWZ8~jR{>8fNIgJ|UYf!0GprFyUv%+$JF4lW zR4&k`E8XERXR-Ps{qBTgg{mrB82QpvZ3dzZ(AhRc@cUkw17usch)-XPmSw2kDY9z3 zSAHw5-Mq_NQWTS5{<`uF*XSq4PWt0m{(tqZg#HqBzJ~spJwV8-(;?Ku5Jj@r+c781 z7yUCyUT#i8u?C{Ex-3uk$KAHNmRG&659V>D3D)Y;Sk%K~Q=)1Jo8g8NoDUFsR58xI6u+aG#-CoycKQ@~s`AbVi z59v9X-|&T)a$IL0$y*oeP(iFX3TwsZz14m{eBn_{>Zg+CVphnTlV_E;jHhrTgEP{r z%e69f4(Q`8Pw#$Nbr#?+jl3s7(3F(edh$9b1L%U={tn?9gN5#Q7Ul2de*7cd_Y2}u zDvXwgb45mj*?-T&uFgKQ;(%JxGtW4FG(*;DfzxsXi17!CAg=5a4_?f-J+r6$ko_p*`|yE1=& z#TZfZ@9d@jYCrpr@cW>@KnnljSpF9ix^+@B?tk0!BW2ub%5=IYNO#;=B&;#(xzLQ% zemSj5H!);u+%y-34H$_2ngEQrA;|$bb#B`SsCqk~SR^$Dc7xsqobWfggHdHI(O&u? z6MG30=nN}|N4Cis?e79mpX@V|WjWCCa^=f?%&pB#3t0#NfLE`Ebm@MKDh5Ut_`kyV zl{#R0YGk_-3j%g0v?b6k99I^vhkJl_q9TZUngCCKGV1QF7!UNfZUqC~pL#XNMMTdDTqUN~1p(3Xw)AVXS~dGO7uRSp@3xqo_MLdP8aWlDOg9(#cAr zVU^df^j}B5YMrm`{djXyaK~3Ff!@?XiZ`J{-32B znSX(*{Y#bYceE<6&J`-4%9z^$E<#z%mJO z&eyRAt?&78=41@|%I%yICU8?pJvL1H8)Ys?smqLlKZU1lggN}NRK~Zc; zEQ&gNL+%`s-JA0AHP0Tlxt3;_m~S7QBY<+DiJ}e{t4(OYV}c3=W-PwkWoxgoy_*eB z6LH_0;Et;Cx!Pb)8;6mPfS-B7cG1V7rCdSkD!dmm$-a&wUay^-hUE*Cp9M=FA3lCj zmL)E(|I0%C4ru5WodZVTYVh#Yo)T2@Wosu3Z`a~Jt!W_>jAOLa8$Y3@f+00J>FyZ=ooXc>uky^NWu`R zB$2jna_ca12h{rGH&`?PpN4F>i|B&=P(jW<`zfa>3819+g;G?JpMeq2SYQm88V$P& zJeqMIV_g__z}O#*^MG+qGW-I=UozqYM%=`R6B%(aBVSBc9RV}p>~W!!VBFMc`WQfG8+|5Sh2%!^8OFAdzORQ&-brSof^q$pJfdSgeo*(^BF5crsxvrxySOtbyt`z z-b2Ujf;GCr#*ySe=4DCTL*u~W=4v%ON`=); z970QciH+vMpyzsAhphF`_3I-;KL_eBeNF-LlV4pLp4*o0+6z5^ zY`^Nd1i76OmvrCU;PLaR_42ierx@dr8W18F*?tyt=MN6Q>Ya$dEv!8S6}{Et6cb-# zSy&zKQ<4z){NhyZriJQavRUw}#*>>I5k@gGemb4jT;CDxDAM&LW!y?+jq*DjPptPr z3vaE$o%0`LwvcJ;T-!1%y<K=#SfY$6XGBybMgDrCd#7`aj2Fs^<08Ko|uS)=E> zq@G}j5=AQ2?nfTau!J!4h&oNwJ6E*z3z`o3aIdZ|NmIVisPJK?9l=yyJMOl7B;SDp zTX6|)E8n3)z#Jj4HbwyEwqVx16%nNWI=e3N@r#UQC6*zuPi@n&k6Lce-xI{sBexGv z>S0gs3DZL1+>F9jU6``j7{=3Dl!vn=eja&mn$Z}_T5O_Wdg7VIfm6EJ1%4r8OnTyE zH;QDAiOppv-F>^DKAqq~N`PG_AW&M9`bk5dT&*lOY(!jzxtV1|OT62KYcHA-CAz{9 zOV>9)%*MuAbUOSnY!-ONFR6joO}!p<wI962QmojeLjfIDZTHz#M%F{Biubi*VHmoA^GST(gS$ zD;}EvgDeq`j7~H1TD4)UotX$#vVZ$Xx%U}K=o=ln?3uKUZFh(C#dw8BnStU;>*+mK zpis}A5%xLi4&E~suKXVN{d*hwu!2}Ygh`e#^Was0>y%Hu)L(0+znXDDxUQk@PMVq2 z$$`!fQj_cGzvkHqc=CI)YfI2YbtiToba$Qm($W(Pqo(&wOqDzRl0B`M==yhSr(Y`d zkp1?8jeO4b8PzF%O)Q;2$*iPIEwt%>E+b2VB}7EesvF0{FSkXddB5|zp!CDsUUu&e z##VW$geMjPt}eMzF>b`#?d=QZ6gN6CzF@LW`)4 z_d2doEappo8)fCU-u+uLp?Azh;1~UMn z*4V$14*aRO}nMp&r_i*v;8vmnVIYfXKpu^a!)p|xoqjS6Zyx7A7 z2-EwIbq%kwPyVtWn*VEyh3pt*yhHwoMJ^5A-UoGEWgpl2^PK;mSxoRx`TV!i4WS{b z0VRU^Z2{nwcY@V*K-rDv3+8uoFVB;nV@)svsrkq>htwVkk%#Zzn~4BaDvZD3wE1gl zx&MP(`kxOX|0&&nj#lgEp^vol@Oz_hv$s^s>r}nPwK*A@^%O9H^I7z0^HxQVT+WI^ zyxLbO`bNVK4cMog|Jf`0b1(ldvX}qqW&N9%{@+F)&yj+OY~+$&rg$~k|5@wNFo5@d z+b|1*m#$7gzLdGR50b5rNz^~JWOh0El+XLW4N&rU-g}{e^Z%Mc_s{eGlnXFoQ}O>X zJy6afr~L6cwMmLMk^K`8N5dj5fcBu(KVGLy&zuuoDA{WUubV#Lt#9UPc(JbUDgRgJ zU*Eg?ZzNCu?znR_JEMqs%msd*fieswHCfM47@?PPl}|066zgltm^yt{a40T)>X9oM z;40CFlC0-v>e}`}l|@etO=w1lv7pAx7RJ3zq0Fr&N`Jv$r|4rk#gd-@tVXnyC^^o0%GZI|zxZv>ye!dkFd zBF11qnhZ`ACfiW&Vfg4QZ>h<2U+}$_aa{Jw;Zr3m8X-C77W_8cYJ!>?=vFrXbs6Uv|}3UUJsc2Sn(R2l*kUh8~0#&9*t-iDuu@(vhMccJ{*KrF_*@ zT6W|t5i?Qx(4yLbWD7z${2D0%4^5(56i|D6jCjWaTY@i0O+j6!jn59(jyb{`BrFx} z56W-)0K}Br5m#*FZgqg0zt89J>&gWJ^mLW{y9!)hhH%RGycs@R)Bk57+7h z)-=n#NvftQN);CeX-w@Ax)BOeWOa&H%scRxN3$j+ofI5PN0RKjT=_{ai)McR@wkvv zyQ$rnfu?Bt-6i+)&rA)=SD$`n3wMH}k z2WEX`$wjQ$JNp0A`u*iv{)3-+xEMKx5(EecE=1aT*6i#};dbgIA|4yLKit^%TL$_j zv%V}zSaUtWZKt}f{L7agF@`uw2B$DNs0kge{M9r>8Iocx57yAF+qHi%un#gUjXf3H zn;~&ZukDY)-2V~51mxeF7yrMgpRo#e<(?Qdl*SXSyAkruDb{0Jq26e9H4j~LU)+VC z25yEYBd#cuul&I_+f5Gcbz+tU()8g+zQUn+?jtiq;bDo{Z7s`@EUD_ah1!9iLfIGC z73#r&W7d9?9{3TaLxHr6YYj%;pHrDo(Tcl78fM}D-pKm(&@rmNt*qn`bQt3)Q{u-w zH`3iYTK{gk-}Barci46ND}u^{kBWQL-W14Wc<`&-%Ds?}J_iDIGZObfzWX2$bIKL` z_2|pn3ej}Ow`{&?FW> z@E^K!-w@ne`HgvJB-N|ye`j880SU;T6Xy!(MxOiEyx8`caBpf09vr?8;?mv+B^c?2 z*qf5*CmikAc(l+mrPoYE(^x`e1Fxll6lBAXps$q@p=M+7^Mx)yp8S5HpZo?X&>r9$ zAUeo3YJR5DJdvZaTvsYl6hzmo1nhk&b8Jq%x`jN_IX{ivxJTB%_qWqjCN0m+#f+&;%OUht~hU$mFIrLg$ew7AdH*IXx) z9h3FsG5i=@B0_kg1|tS{p^i{Y$A2kPqB{~V{DYbBYduY1z3;9bf$IeKEFUYBrvIF3%GYwm;CZc(4& zQiOkC*@-SBx2x2!g7u=uPMMj(tlccQOR8#bx%HJ~LXvh^()f}EchPO34v4dW+PN}i zs5#y4_fEqNm|sIh7O#>`v6stNud-clJ6#=ZE0ZcUXXNLz0nh_lM^vK(5Iu+!WFcas zMJQgGp}`!xU8qJ^FY-w*VQB(sg(kdJVkvF}}bPknlCBpv0n z3=wK_?lQAub5MKW@EfAQ{E87VJYR-v*M;1*J}g%E^Y{3N(jPn;z4!*JXNLp&ZTZ#M zFGrm{hh7!HBRj2+(lw~|_>rh5Re=s5XKtE{0w!5}M6PxcFY zCSm|n1L@qt4&vL$D-p*m#hUckYCpce$?I9scv9^4El}DS@m?v=sm~Y6J)rM>NI-Qm zj32JUs6~1~BAm{j_M#}*@OAMFCJEcoWEFB@meC;E z#19|PU=bI9#_xk_g1b|I8XG|B!b@G7#9vNxfD2Mp3)Y;z?o=wPI7NMXSzT!+qkcSO zK(NkuD8T<^w8f=8fW@?ku*w7j+NBRiE)3f9jo=4U_|~pffzE|m9)53g9{00Fk+q8P z7K1Aijd|9p=1M=z$T5MAINXkymUL`>zv~XqFF9}szin?q zQwMHqslrsH-vGi9ZgO3_FqjJ-tQ>NteAe*2a>DaM?40>>ypnIB^3Xk6wX3y{>r7q5HwwAuV)gE z>Ggk#60bJzuB~Z(Bs-e)srO=IP5skXJ16}_4urI^<_WUw#nEmtr&?JUDRdrB0$9MQ zzsx6ly#LtCf_ABR%Zyl7ndU?hGp^7VnnbsZtAZ;jpGzQAWt|=jn`Hqt1(>*?%wTPQ zve8$IO}w-D^;J%Wm(v=b&>-6&8;>M@_NY%fmEp_Ni=is-JY@<{3lfOz9ZH@FW>-n6 zKDYM9OWy7}Tw1dIY2?xII-Cvho;l0p7&8o}ouJ!|)r3BNb6<;-Xv5tpr>51Hy(xeF z>nX4Bz76vqR-iC;oVo^H0~%9T2f(*_IXlqd?Jip3S5J%P$z~h zq+sV}x|p)xjKr+*V2-Jwj|FRYan?d}M;vd7Hd4n8=}1nV42vgb5{s4L=O@Vt({DiX6nU#zLh(B5@!IU|L2Ox5sLDEFrOjhJivt|rGM z%Ih(qZhniuL`=9~hcLHV+2H{sb|O1z@G`ZG^pp60^taH1LH*rF&Q^R~<)0g*T{R~U zeiZSLGgo*^&g?{U!7%t-H0jfd^;0d0>z0|9j+Ka(o}Ouzm%VZB)Z0U6+{7N7PUK+T zhq(DC>I2k$m|ND^w2{kAq4r~@8NO5EB86lY-!rCH@bNwMGSAP+q@Du@!giP(=mm9t zy6IQ?5t=MrzL}cilV5eD=I}y8J?+9fUvKHU>ZB}bS&1|QI9($0?VbTe2iUfwsMeHx z^3g)lc-NdnP8*8%aZQHtduwGYiJ=F)RU+Tt#br3mY;~;GA3c8y><{p>989DJzk^v) z&W|H8$;@7~=T6l1IG`={`;=vy75=-lafSKb#XD~;QI5hY${W`v{ z`}LfLFTZmYFILjZl(1m-Ds^0)pL?!B_NN!?-oFAv#5#aDLw6@Z+rZKKE8R%&W%8=1 z`1xsRmiM`iBC=Pz3c}q}`)vC`o+=k2%oeIqBs(xx`TD3@t0L8n@}!}j>hJZF{p&Q^ z{h6$9b=6$2|Dya{($D8*?M9e9FzM?2Q&$bfTgYA2N9_=vNuns@<#270k9WPo40nkH z>Chd}UDG>RxcaN+pUsvO+Wo0YG2wJpny3%tfT2HrJ;7fL=5N|Ks?}%1JMknYS5#2b zlX1OEJnLJJxYEhsGi@xi>ogVScrH7mq*d6MtkC{o@D}+B8p?;j?6!TW^et1EKRBC4rN zIRpL~W&h7~4`pZ5t5v(11xvU-5 zzURkf>%7V{!%pcY@oXS*p%7kb_Jozs&52y|>N5Ew2W@UvpF1wGc&kI#ZT2n7N4zq; z$PfWw^4g)Lz6ddT5cN2D@M5B~kMoL#l?Zm@v1N&K?D3R`@A2UZuR;1O*8dx}9Tb>> zB-!~VAviKE$lzYh~LwxzBMA<-#?z#QTbBy^@MT;X9kDl+0J_LNuok{b0FS7 zu9a()j>P3GeXUk~f;DKee8v{sE!8(r&Q*HUSQ%vd^x%QRXcWW`$>uF-z(_?Xdpor> zUa1vjqkKGsPsd9Cj}qFor18;JzqB3!7V(;)C8?-W zM_BlYXLMEYOdbI!HuyiUhL{BUD}H`dz`9`vC1mSVp|re;aZ@-YPJURUP5TD>+`vhrQw{cHu+61DcvOUHQi&bCn( zE>pYF-T-P&Y=q0%RZIENLMExNePtufw6Qt$;~MK3;#^+pm&r~44Ia{#DbzEzO0 zLVSXAlh4irCISv2XXGtgmW6g=ZRwvvROP-E*i;xTR^;*6{6%`>?;Yg*m)jrfPirQp zUct?Kkjm(kumfq0_VJ}^az?2wg^S=l@dT(hvqw^msgLpyY0FBi+CdK zU()@)wJLg{I1DX6(2so???UKW+GJb3NH_LNdZes!$22r1S+zV`B}AvtB^l{RToE7`EVa?>rtSD-|8k&lY2$ERFpyNUMLa#(s$!j z|DgXZLxnCt7IZ;l$_hC%`jJ>6a9Py??V*?>_LZ@1oc7 zcpQHz%TF^R6!*rg6H;Eck@~?yxq(QcijF+=xM6|B=!x+Cq;;HfEPBF6 zH(O*O5SlaKmG$nv`zK>i_c_Q*QJzS^VL@Y#8j1AM&cOUTN&RnoS*IM=`CJ^%4%ehF z1sTI8Pu?-HljiJ^GXY7v6oc5Efz_LIp!?9CFh?2--X%hv^smDo>wmoJI{yXC9_x0@Lq#~EO-C{X+%R`~CDN~F> zmXv}T$5G)2$f;sQ*4?TxrUBiD3ZcnR-%*fX_Cf9d+S{{pr}0pcPq9y3z&%h*Qw{7> zpmIL;zE{x4xnTX`{AX;}Ou@8+aARr`n%NH#hIkG_V(PZ=NoJj`s?7=A=d38|b%R9( zmL4dX0gZ=Zl0h3to+pfDMm}5#4co%8ccBl$>?Vngld*={C99NE{^rdyIVE~`;&l23 z5*ntshuY0-&MmhBGLN`V=cHx=#I*kP)KGGQv8Y-G#idM~gi zu2Q)q$6Jh9F7o+|6o32i8nbD6T!x0(K$Ny#xqiyyf`-G2jK(P9{mJ&@kAj>wn*$| zuN765oNiG`N_mFM75<8j#2RzAKU?y6?QE?)D+JN#<&PbVl0Cqx$Ucc8nL&?^2X6%H=);8 zaO^Mgc9-C{{9g_lEF=vDxF-Gc<{qg51UFmLGx)9UdF(nN5iC;u!O%39Gj!FagM2Y# z+x0B@S-+OqQtH)1yCOn^2hO*lR~s?M5NDZvs2ZMjB-Zl=qj(>5C$RdPJlQN8TN|Tk z%sX#1*}zrDe=*(I@Z*X^rAmqPuIZ_B?4N8(QT(@oX_$oz;!{?P`q|zb81p?3r;Rl{hS%Aa}Y}o0D`bVMtf)fu9 z*Oti*w;AQwTti)2o2NK1>M(xjM5E(mdty|f6IG$D_4s{8G<&kT#<4(ed`*O2QIxQ`Rh0K+ zl~WbMldfXt<&RD<`lrKnPUJTJLp$1$`- zC}X+*oJ*Zf>P_=8Y1^*cn~3|7(w%;{koHVAM86~#)q+eY*U`uf|6#91?LoOnczt zKB)HkTe=ScgZ<`42IhDQVQ?wY)nEMNNgGa_vnSx)qMv`R-?(#bH8#G2vfPlu#spT* zMtutELe`0ac#4UsOH$RH-?%56`P?;@|Nc?0I-8($`?cD!3x)3t>;rCmnmXReZAH6YjNm=}7&g5I& zB{&dM9efQKyz?{-uU7Dcofkc{#KHK31;PAoP@33GY~(ZZ;KRAooW4^xG(UWAxVZ_o zN6bMhlX{81BuBW|Td47SYR<)GE3-@4i{*9RwxW8{uZpWqT8N=uDM?4X=6jCRqHCZ? zjyx=My-1vC3|>*Trm~jCol!o%TIuSHm6}SBm~P_Zj_c4$1>E_Ayr)5aWUMqIr2KWD9H(2SHN|* zuD15sX>-*lhoS@>@HvL9uf{P_F!+-fWK28SgX_1Y8cq1kIbmAjyIK<`0dvl4@j5F9SadA)f zif+VlWV;anw6Xj#U%S$cj+_@Z^a))su;R3CNWYfh)9`6yf~@8{F=rVd-6gk>+M6Nt zAnzkss*PQMFjL>m3FMcX#%ju3nsZzNh$o)qCQi13nInljJ4FlY(6afzXJI(NG%naA z<>>_cEE$6f%F6)rH7f{SRpC*ePMW=X*|QGjcZ5@rWIlAGmi2&ikyoxtINo{lr zIpQN0Sn~?Of3cHPiVXRL8nH6$zG&+E*rB>Xhi_cK+aN9jR6Ri_~2MJ^L7^yK{s~B37l@}y_-b3=k zgYamMK}x#MAkZ|f9AlaYog%>5UOf4#?yy(7N*d#fpb}NRZJ!d z7*^W_*(*u2e$npmTvPtdj)TM$ky6^WVi1Cjac^@>NSmqd#oOj=T+Dt3{dkt^oqo^$8YS}Q!QmU2$eljHhM z{9nN_Ab%%!)ZhL3{j;y{pV$9W?bEEeWWP6`?lF%|XrZ|7p{Aa6FTEyB&+l47rqyH< zORw)*|HNc0o*8fN;f8@XT4?Ij>l9DY8%9#nFXjm{G){pQ9IH*6^ZRWu(eHW3QYgvs zRleVSzdMduSt;Yy|Ebf${&%uY{dZL**6kHaH>0ZAgr*5YUm#7y;G3+x?om{O#%70Q zLpcUUi5!usJ&&@flu0H>|H7|yoz?^6a+iN4ufaYW6+nKL0@*NScKSd!kRkNF6znVTCAS~Yr{nW>y$~CrY4lk zz=C^4?=#sk&4I~EMOl8OoE2^M^_*g6>2zz6^WUQo&r1%myjYYxFR@MnNvro}FwsB23TQxJr zH*I;@#q|4~qq1SK&rAJfEMz2OOVTGR&<23thI@}X2%sSukPdy}7(rT5_wOGR{cD?4HW z#fJ1cj7Uc|}_jWW_3?;|;@TNW|wkR?>1D`b^=YPu6IxStwid z8-B|`L7QZw(UvjYSK<+gw92C_;*$y_pb&eFrcYgPs{2x-`4ML4oN+jP?Rl4KuntJ8 zd{C_&MF_N&=oA0K{zC$Q^>xov71cLrq#PGi=b`28ZqYX%mGr&Hy;`U0*HX07%#Ppw z@=Wap>A`tcBrb*6Pf}iZ-voa4ORT9y;ATCzzUtc@b5VSZ+-=xm+L_yl#95K#bGo`9 z+R^wGIFt~k*)AzCLH8{4*GD{cd-UPvu4SXO{DgdlRlND2@Aapt>2s}0;}~24s!E%g z^l_bkZq3f@%Fl=yYp;C5>U-6$!)dw(K}&-|wtXwI93UDYp8jnN$gU%+zEKKPwyINj zy(fmIgf*};jf%|eQv0I|%mbx9Tl1{ct@eEofZOo_suu^=%6;@rei*rQalheE#q6EL-w9ScR}!M@Ti!^eoPBV-k1gev-q|Q)9139DA?QS_ zG<)Joet*+)F&m`50!-TELc~eOyMONItey%IrvG7!`U}9&&J)*32DqIzBnMrWd|$um9#+FeWf?o6cVcMr z(d8zGp#u6R&9>!mh{uOd{_U+N$LN8034xwQg;!cc#K0VLX4oYyMCWwr$CmTDzNH!( zAzLnTpb+(5Xq*m&oyiZ&B-j?l-@ER0h7ed6e7f{m-^YkHej~~g`v_()q~>l3?dmkF zc>MH&|#q{}x!nTgbhX4yrWi&=yUsN57!1arw*2<#9J*;Y+0&Tv_hPhKang zfwJP^X4>03F~H0D0aI05_E@#X7veH%n;%PoONGyb8gPb03+iUq8_ul9o zp`i@rW>wfJ;FKW|G^|w-!%FKrZhgm0i)D}11mQsilIsbiOX>&aeRBE71C`PIrsQ7V zg82?r`f%@ZYEfKKlH2*$^qKxP&cxJTX^067_ZOc)9ECPzqc6KIoEbYXbEV zpb8R<+-6}Cx+7QY>+Ok8+N#196_vS)DhqlU*zYo7rQ0W7suZ!yBzLqv2FyV@vi}Cj zp7GRuA2gPUE;bSb&;Uk|yJ$hUHYqs1rtBoshv;lNzPTBI)Q}lI+h`$k+q}myu1)Ib zSC(BA%?dHfJjJL$Nd3^)iLO3gR;rR!_BcsRb7gk;?4V+Z_=n@c*l@+&t4E7edMT6b zD9&Z7HVr&Tnj^kMYoWOB$^Upez$hjt6}?v4%y$%Fon`rjzCd-M8Nh|8r-mV&$Rk*9 z9lt`+jjj5olTXlC1N*qiDi-j$kpNnZR$=lJ1rm zW?+S)nzNJNlTSHvH>-S*Nqm0JGtJ~C#b!wV4hCtcLp#Cz+{#0ik0fkyQZ;6*zQJ{g zas_0IGr<~yLr-7FnSZu@4OGJp+AmGg`C&X%C&@GSA8L6;BkDMzjnyo4Iv}VEMsn15 zKsYGuOS<0qQwxiXxm!Cv?Gpable&NM=gF*5=etKzP^ouH?yl-clzzdCWFx_5^MA;$8p9 z4BY!xDo+M?hH;#^9_s1+@#a7touHg0W!^6t^%fq+txmjN5)J-6W4{mj;;YCy2B6$WrZ6~< zt#)-D>Uys}m#fl5v#nQMnnh@Rx4(G9wBYGzjwbu#k1|wW^PXZ~2__bjLM<6tMv&E? zaP!MdFY=ct9Y}*m z0Bu+Ow@64vIEJ!z55#bzP8KHxQ=G}0^(E!^jJUt49oP%OK2AxQzzAb-xzMWlPGoZK zY4X{Xt`ZAv0|-~UkL;N{rt!z!8@vyBC>lS{g>r}-upUnBMs(6m_d)5pWHUyrJ!bFT z6Z#L}ahH&Np#*zIs5f#?q8k(zOwME;A(MoAa|K!>+xECz1<0Zow>$#fH&c&CyouAg z?8tS0YSz~H=-bA~T_DSHaGIJ)-~qwI8mW+CbMh)vF?sp8uS=^#jhLXfZSPev_lKqd zJCzNeN1>R{tD;(Ir);lU$l^fyX+FFCjF&?I6t7i_;b|;=zi8rj+40EQ${gJ_(0h`*0H&EVahqF&S&tdFKENy-;hZs&hF)*~d#p#Qbg41=IMmLgxh_>Fl2IT?ZcL0!jhQDrmf5eNn3WEk-3oo__fBYjc3+Q?YcZyajRNS_+Y zJR%3E7b*-6rXn@Qqozh#_n4T_eO|xKo5q~Huv>{U zQ^B?;S~f9|Kj3&2`%GvhV}p|SYU8(ykt0c$FT<-Rtgev>q+^FMQ!}{W&iQ1f!epxg z;)|=?(^6HnhMFdfbI5PyX=JF((8i#2k}(%is!C^$#oQl=^WspIMH|-^QjUE0nv-n& z(yUx^cw}m8BT(dJ{tMQsA!8-StEg5HKbdz*?O+tN0=*GH+M#5TOdHTXO!?Na8W*u^ zG)-qG6%~vp>>44pE~2e!H(~WBd9Jl%9~5IvSJ?->opjs>VS?uACLs7UpfICAeVMAc zkXu5!yG)iJ)24pP88F*EK?@64k&B8E@k(u%*#Z?XMqQn0UethQH(D?d1voMX0tXp~ zx-Q$p5@)BTyD@&IL7Sg7G}oaYkOwe0)I3k?LAvEQLcxnE^R>p3sHDa(;%6h2qQhIQ z(t9WJ`KPI6j2_ZEW!FMvA5;!t;W59mJHjE?o0Vy7%poH&QZek_ddKPYhn2p$%smUq zP5U0s7*nXq-r1E#C65Ec364Ex+2a8;S)CeH22-YlC0zXSNwy}vJNw0x zz`@@3Gk28aUg`vj4V+BTwAS^WGwhIW=1^*Lx*KsJ)iQti^4 zLg#?~&!B@~(lwsxk#-0x+!y~n6i=)lf1Dz0^4O`}p*m>ij4$}rommUM#-V5GCejt{ zAVe2jalBcbOrzW-U4Wa65^H=U>@CXEmcK1bvs(KVbDmWDlBb`RcZ;QsfDvRW!!J+) zXc!wSb@bh;BqwPbJMCab@iDmCp(&IQE1ebqMuRYAID=!pvg0%1p{lhgJ@)|s2*_pBco~eV95+5blO>F%V z<2&s_(0t_P$@{S3_)L>--$)sv0cK5MXJoQm#WCMctXULbMf7paWau_@pni((lLH?>b9s1mK%&qa7b!Euu zU2m;n`vE!GHkMcCbq_9xO+ElUO<6sF$;DuMnKt4m$}!R;Gp_oNd;CY8eg7T4hwT4H zGgSX;O{x4x^QEk$cTA2p9=a-oO>%x}47qAKxdCp=GiwLO`Pm~^hcm>Buj#js+#>mk#2yiR0#N&F957<8iaHWI&IK3r}tI0 zRR^vF0in~lEJLV_G?TzDx{;F3hkBT zjqGxoK!KmDFE)nii4=imFPsSb%+7V=y|wOVpPh&0Tg{sImMbg7?2L5p;DFvO`3OC5 zGD=hF><8A}b?PG$X2G6_C$}J^sheXC&3Y>2Hy7xpU!06RfZ}$Fr3PY~xO?M3mq5#i zEUE>9`_LGc`;w*&bG96pa`rJ$=XdDjx|S##V0Qb-hhlc1J=1=5?8LmDiD=nV^LsFPFIS3}%eL+?-MI`5f(EFe~to??SAj>v-Bl$2R zt5wXC8D5Lo<3`*-w%ecoVL(KZN(Kwq3dpX&*B#P;^Nc};o75NeSJ)z|I2HBR|gXZ zRE?E=kbB132w?kbmrHe@G?JGIADC#oCH3>LdcZ`uVder4UC@oN(__tZlWqhoGoI9? zkFBZR2YIA!`4`38p8V3Y#H&U!m%EP8=lhHRcGQSq=76L$)!rXTenjxge!Rtd?@NNO z{^x)=qN>AYhQ(Dzznt{B>)R(Ij_N#zT)uB;fM=WxY6O~Oo%K)#xgs`a^w!hz1e^s~ z$kssm%&ZF{)_Ta|Hjefv`3*J!Ar2rrfk&$E3B%QaR-8HvJqPN>972lH z&1k_ec0x+$e4@Txj>r3UUpHSd?*J>YyE{aMAtU(s0kYv?@`A|iK12J}LO~Y#tb`^7 zDB?$i(rp{Q5mmVwJ$ihZUrE*3b|pSd52J4(EDmYbJ?x8-d8rWmMZcFW}P?F=`9Y>?FlsBtBBKq7He?Bmah>h6GAxusW&?%a9Nx1rp0RY1@mVeHq& z2sKm;etj5hcayz-Y}D`m;&0zH^U7Z4))r)r2+*3rLMgu)c}84TKDR&7HAl5nk1chf zZl{=|Ot*R6g45a~B2x&bThDSMM&bOxozTfB?qBFQNn!iwnQL~Ax6-pzH)RYwqGE9` z{StL=M@#sS$0^?!^-M{45Qz)Pg^>J_dHur9gYT9f*7yocHIF{1C6^d@)ZKm)kAIja zF%8a}Cs|{n5lU1yn!dv{taZ*(nelLM;fAy0Q^#A;fyuhxzWyxCP}lUo*7RCDoMqRA zW=@qM_-|yw|R20lr-}jZJUDrx??7NXDbd+4V$<@Ejs z=K0}QsfDS%o?s);} z(kXJ^+tir1>{8!+Ar@oRuZHw(U!MucE#dMJp}*h&F_94Oa3p@wr`HA9F}1S~G6Nir zE5NAl-3>#8ptj_x!v>Nna}$a~OG;$#U~v9hB>F!{gP! z!YH-Nb=PnDIdBSsE)5;x%zfiC=k>o#?sDt|Wz Date: Sun, 29 Oct 2023 11:45:24 -0700 Subject: [PATCH 02/15] Save profile results --- src/c++/perf_analyzer/docs/examples/profile.py | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/src/c++/perf_analyzer/docs/examples/profile.py b/src/c++/perf_analyzer/docs/examples/profile.py index 57e4f02dc..fda37e295 100644 --- a/src/c++/perf_analyzer/docs/examples/profile.py +++ b/src/c++/perf_analyzer/docs/examples/profile.py @@ -96,7 +96,7 @@ def get_plot_filename(args, prompt_size): return filename -def print_benchmark_summary(profile_results): +def print_benchmark_summary(args, profile_results): # output = [TITLE] # for pr in profile_results: # line = [PROMPT_SIZE.format(pr.prompt_size)] @@ -108,9 +108,11 @@ def print_benchmark_summary(profile_results): # TODO: create proper output for pr in profile_results: - for k, v in asdict(pr).items(): - print(f"{k} : {v}") - print("") + postfix = get_postfix(args, pr.prompt_size) + with open(f"results-{postfix}.log", "w") as f: + for k, v in asdict(pr).items(): + print(f"{k} : {v}", file=f) + print("", file=f) def plot_results(latencies, filename="inflight_batching_benchmark.png"): @@ -276,7 +278,7 @@ def summarize_profile_results(args, prompts): ) results.append(profile_result) - print_benchmark_summary(results) + print_benchmark_summary(args, results) if args.periodic_concurrency_range: print( "Saved in-flight batching benchmark plots " From f120771e4343e9f8e2241ba0d03154e2fa379f43 Mon Sep 17 00:00:00 2001 From: Hyunjae Woo Date: Sun, 29 Oct 2023 14:09:11 -0700 Subject: [PATCH 03/15] Support TRT-LLM --- .../perf_analyzer/docs/examples/profile.py | 65 ++++++++++++++++++- 1 file changed, 62 insertions(+), 3 deletions(-) diff --git a/src/c++/perf_analyzer/docs/examples/profile.py b/src/c++/perf_analyzer/docs/examples/profile.py index fda37e295..412c961a8 100644 --- a/src/c++/perf_analyzer/docs/examples/profile.py +++ b/src/c++/perf_analyzer/docs/examples/profile.py @@ -292,6 +292,13 @@ def profile(args, export_file): f"--input-data={INPUT_FILENAME} " f"--profile-export-file={export_file} " ) + if args.model == "ensemble": # TRT-LLM + command += ( + "--shape=text_input:1 " + "--shape=max_tokens:1 " + "--shape=bad_words:1 " + "--shape=stop_words:1 " + ) if args.periodic_concurrency_range: start, end, step = args.periodic_concurrency_range command += ( @@ -321,13 +328,21 @@ def prepare_export_file(args, prompt): def prepare_input_data(input_data, prompt): """Insert the prompt to send into input JSON data.""" - input_data["data"][0]["PROMPT"] = [prompt] + if args.model == "ensemble": + input_data["data"][0]["text_input"] = [prompt] + elif args.model == "vllm": + # TODO (hwoo): use text_input + input_data["data"][0]["PROMPT"] = [prompt] save_json_data(input_data, INPUT_FILENAME) def generate_prompts(args, input_data): """Generate dummy prompts if not specified by input JSON file.""" - prompt = input_data["data"][0]["PROMPT"][0] + if args.model == "ensemble": + prompt = input_data["data"][0]["text_input"][0] + elif args.model == "vllm": + # TODO (hwoo): use text_input + prompt = input_data["data"][0]["PROMPT"][0] if not prompt: # Generate dummy prompt assert args.prompt_size_range, "Must specify --prompt-size-range." @@ -380,14 +395,58 @@ def construct_input_data(args): return input_data +# TODO (hwoo): merge with construct_input_data +def construct_trtllm_input_data(args): + """Construct input data that contains input tensors and parameters. + + Parse the input JSON file (if exists) to construct the input data. + When user sets parameters through command line, overwrite the + parameters set by input JSON file. + """ + prompt = "" + stream = False + max_tokens = 256 + + if args.input_data: + data = load_json_data(filename=args.input_data)["data"][0] + prompt = data["text_input"][0] if "text_input" in data else prompt + stream = data["stream"][0] if "stream" in data else stream + max_tokens = data["max_tokens"][0] if "max_tokens" in data else max_tokens + + # If command line option is specified, overwrite + if args.stream: + stream = args.stream + else: + args.stream = stream + + if args.max_tokens: + max_tokens = args.max_tokens + else: + args.max_tokens = max_tokens + + input_data = {"data": [{}]} + input_data["data"][0]["text_input"] = [prompt] + input_data["data"][0]["stream"] = [stream] + input_data["data"][0]["max_tokens"] = [max_tokens] + input_data["data"][0]["bad_words"] = [""] + input_data["data"][0]["stop_words"] = [""] + return input_data + + def main(args): - input_data = construct_input_data(args) + # TODO (hwoo): merge the conditional cases + if args.model == "ensemble": + input_data = construct_trtllm_input_data(args) + elif args.model == "vllm": + input_data = construct_input_data(args) prompts = generate_prompts(args, input_data) for prompt in prompts: prepare_input_data(input_data, prompt) export_file = prepare_export_file(args, prompt) + print(input_data) + # Run Perf Analyzer profile(args, export_file) From 07a9c9c4a515fc63de6a2735554d404cab117884 Mon Sep 17 00:00:00 2001 From: Hyunjae Woo Date: Sun, 29 Oct 2023 15:21:09 -0700 Subject: [PATCH 04/15] Change latencies to milliseconds --- .../perf_analyzer/docs/examples/profile.py | 21 ++++++++++--------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/src/c++/perf_analyzer/docs/examples/profile.py b/src/c++/perf_analyzer/docs/examples/profile.py index 412c961a8..7c6fbddd3 100644 --- a/src/c++/perf_analyzer/docs/examples/profile.py +++ b/src/c++/perf_analyzer/docs/examples/profile.py @@ -38,8 +38,8 @@ TITLE = "\n[ BENCHMARK SUMMARY ]\n" PROMPT_SIZE = " Prompt size: {}" -FIRST_TOKEN_LATENCY = "Average first-token latency: {:.4f} sec" -T2T_LATENCY = "Average total token-to-token latency: {:.4f} sec" +FIRST_TOKEN_LATENCY = "Average first-token latency: {:.4f} ms" +T2T_LATENCY = "Average total token-to-token latency: {:.4f} ms" @dataclass @@ -126,7 +126,7 @@ def plot_results(latencies, filename="inflight_batching_benchmark.png"): # Set pyplot parameters ax.grid(linestyle="--") ax.set_xlabel("i-th Request Period", fontsize=12) - ax.set_ylabel("Avg Token-to-Token Latency (sec)", fontsize=12) + ax.set_ylabel("Avg Token-to-Token Latency (ms)", fontsize=12) ax.set_title("In-Flight Batching Benchmark Summary", fontsize=14) ax.set_ylim(bottom=0.0) @@ -198,7 +198,7 @@ def calculate_avg_periodic_latencies(args, profile_result, filename): latencies = [] for bin in bins: - latencies.append(np.mean(bin) / 1_000_000_000) + latencies.append(np.mean(bin) / 1_000_000) profile_result.avg_periodic_t2t_latencies = latencies @@ -227,10 +227,10 @@ def calculate_online_latencies(args, profile_result, filename): requests = load_json_data(filename) first_token_latencies, token_to_token_latencies = collect_latencies(requests) - # Compute mean and convert from nanosec to sec - avg_first_token_latency = np.mean(first_token_latencies) / 1_000_000_000 + # Compute mean and convert from nanosec to msec + avg_first_token_latency = np.mean(first_token_latencies) / 1_000_000 if token_to_token_latencies: - avg_token_to_token_latency = np.mean(token_to_token_latencies) / 1_000_000_000 + avg_token_to_token_latency = np.mean(token_to_token_latencies) / 1_000_000 else: avg_token_to_token_latency = None @@ -248,9 +248,10 @@ def calculate_throughput(args, profile_result, filename): for request in requests: total_time = request["response_timestamps"][-1] - request["timestamp"] - total_time /= 1_000_000_000 # sec - end_to_end_latencies.append(total_time) - throughputs.append(total_tokens / total_time) + time_s = total_time / 1_000_000_000 # sec + time_ms = total_time / 1_000_000 # msec + end_to_end_latencies.append(time_ms) + throughputs.append(total_tokens / time_s) profile_result.avg_e2e_latency = np.mean(end_to_end_latencies) profile_result.avg_throughput = np.mean(throughputs) From 29b3cbb0b7f66e389180e8db9248b2c39b25572d Mon Sep 17 00:00:00 2001 From: Hyunjae Woo Date: Thu, 2 Nov 2023 21:01:52 -0700 Subject: [PATCH 05/15] Add more metrics to benchmark --- .../perf_analyzer/docs/examples/profile.py | 215 ++++++++++++------ 1 file changed, 151 insertions(+), 64 deletions(-) diff --git a/src/c++/perf_analyzer/docs/examples/profile.py b/src/c++/perf_analyzer/docs/examples/profile.py index 7c6fbddd3..fbcfae1b3 100644 --- a/src/c++/perf_analyzer/docs/examples/profile.py +++ b/src/c++/perf_analyzer/docs/examples/profile.py @@ -25,9 +25,10 @@ # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. import argparse +import csv import json import subprocess -from dataclasses import asdict, dataclass +from dataclasses import asdict, dataclass, fields from itertools import pairwise from pathlib import Path from typing import Optional @@ -35,27 +36,66 @@ import numpy as np INPUT_FILENAME = "generated_input_data.json" - -TITLE = "\n[ BENCHMARK SUMMARY ]\n" -PROMPT_SIZE = " Prompt size: {}" -FIRST_TOKEN_LATENCY = "Average first-token latency: {:.4f} ms" -T2T_LATENCY = "Average total token-to-token latency: {:.4f} ms" +METRIC_FIELDS = { + "max_first_token_latency": "Max first token latency", + "min_first_token_latency": "Min first token latency", + "avg_first_token_latency": "Avg first token latency", + "p50_first_token_latency": "p50 first token latency", + "p90_first_token_latency": "p90 first token latency", + "p95_first_token_latency": "p95 first token latency", + "p99_first_token_latency": "p99 first token latency", + "max_gen_latency": "Max generation latency", + "min_gen_latency": "Min generation latency", + "avg_gen_latency": "Avg generation latency", + "p50_gen_latency": "p50 generation latency", + "p90_gen_latency": "p90 generation latency", + "p95_gen_latency": "p95 generation latency", + "p99_gen_latency": "p99 generation latency", + "avg_token_latency": "Avg token latency", + "avg_total_t2t_latency": "Avg total token-to-token latency", + "max_e2e_latency": "Max end-to-end latency", + "min_e2e_latency": "Min end-to-end latency", + "avg_e2e_latency": "Avg end-to-end latency", + "max_token_throughput": "Max token throughput", + "min_token_throughput": "Min token throughput", + "avg_token_throughput": "Avg token throughput", + "p50_token_throughput": "p50 token throughput", + "p90_token_throughput": "p90 token throughput", + "p95_token_throughput": "p95 token throughput", + "p99_token_throughput": "p99 token throughput", +} @dataclass class ProfileResults: prompt_size: int + max_first_token_latency: Optional[float] = None + min_first_token_latency: Optional[float] = None avg_first_token_latency: Optional[float] = None + p50_first_token_latency: Optional[float] = None + p90_first_token_latency: Optional[float] = None + p95_first_token_latency: Optional[float] = None + p99_first_token_latency: Optional[float] = None + max_gen_latency: Optional[float] = None + min_gen_latency: Optional[float] = None + avg_gen_latency: Optional[float] = None + p50_gen_latency: Optional[float] = None + p90_gen_latency: Optional[float] = None + p95_gen_latency: Optional[float] = None + p99_gen_latency: Optional[float] = None + avg_token_latency: Optional[float] = None avg_total_t2t_latency: Optional[float] = None avg_periodic_t2t_latencies: Optional[list[float]] = None + max_e2e_latency: Optional[float] = None + min_e2e_latency: Optional[float] = None avg_e2e_latency: Optional[float] = None - avg_token_latency: Optional[float] = None - avg_gen_token_latency: Optional[float] = None - avg_throughput: Optional[float] = None - p50_throughput: Optional[float] = None - p90_throughput: Optional[float] = None - p95_throughput: Optional[float] = None - p99_throughput: Optional[float] = None + max_token_throughput: Optional[float] = None + min_token_throughput: Optional[float] = None + avg_token_throughput: Optional[float] = None + p50_token_throughput: Optional[float] = None + p90_token_throughput: Optional[float] = None + p95_token_throughput: Optional[float] = None + p99_token_throughput: Optional[float] = None def load_json_data(filename): @@ -68,15 +108,17 @@ def save_json_data(data, filename): json.dump(data, f) -def get_postfix(args, prompt_size): +def get_postfix(args, prompt_size=None): """Generate postfix for profile export filename and plot. e.g. + - trtllm-maxtokens256 - trtllm-prompt100-maxtokens256 - trtllm-prompt100-periodic1_100_1-period32-maxtokens1024 """ stream_type = "online" if args.stream else "offline" - postfix = f"{args.model}-{stream_type}-prompt{prompt_size}-" + postfix = f"{args.model}-{stream_type}-" + postfix += f"prompt{prompt_size}-" if prompt_size else "" if args.periodic_concurrency_range: start, end, step = args.periodic_concurrency_range postfix += f"periodic{start}_{end}_{step}-period{args.request_period}-" @@ -96,23 +138,28 @@ def get_plot_filename(args, prompt_size): return filename +def save_benchmark_results(args, profile_results): + postfix = get_postfix(args) + results_csv = f"results-{postfix}.csv" + with open(results_csv, "w") as f: + fieldnames = [f.name for f in fields(profile_results[0])] + writer = csv.DictWriter(f, fieldnames=fieldnames) + writer.writeheader() + for pr in profile_results: + writer.writerow(asdict(pr)) + print(f"Saved benchmark results @ '{results_csv}'") + + def print_benchmark_summary(args, profile_results): - # output = [TITLE] - # for pr in profile_results: - # line = [PROMPT_SIZE.format(pr.prompt_size)] - # line += [FIRST_TOKEN_LATENCY.format(pr.avg_first_token_latency)] - # if pr.avg_total_t2t_latency: - # line += [T2T_LATENCY.format(pr.avg_total_t2t_latency)] - # output += [", ".join(line) + "\n"] - # print("".join(output)) - - # TODO: create proper output + print("[ BENCHMARK SUMMARY ]") for pr in profile_results: - postfix = get_postfix(args, pr.prompt_size) - with open(f"results-{postfix}.log", "w") as f: - for k, v in asdict(pr).items(): - print(f"{k} : {v}", file=f) - print("", file=f) + print(f"Prompt size: {pr.prompt_size}") + for metric, name in METRIC_FIELDS.items(): + if getattr(pr, metric): + line = f" * {name}: {getattr(pr, metric):.4f} " + line += "ms" if "latency" in metric else "tokens/s" + print(line) + print("") def plot_results(latencies, filename="inflight_batching_benchmark.png"): @@ -207,43 +254,70 @@ def collect_latencies(requests): # Example json demonstrating format: # see client/src/c++/perf_analyzer/docs/examples/decoupled_output_file.json first_token_latencies = [] + generation_latencies = [] token_to_token_latencies = [] requests = requests["experiments"][0]["requests"] - for request in requests: - first_response, *remaining_responses, _ = request["response_timestamps"] - first_token_latencies.append(first_response - request["timestamp"]) - prev_response = first_response - for response in remaining_responses: - token_to_token_latencies.append(response - prev_response) - prev_response = response - return first_token_latencies, token_to_token_latencies - - -def calculate_online_latencies(args, profile_result, filename): - """Calculate avg first-token and avg total token-to-token latencies.""" + for r in requests: + init_request, responses = r["timestamp"], r["response_timestamps"] + first_token_latencies.append((responses[0] - init_request) / 1_000_000) + generation_latencies.append((responses[-1] - responses[0]) / 1_000_000) + token_to_token_latencies = [] + for prev_res, res in pairwise(responses): + token_to_token_latencies.append((res - prev_res) / 1_000_000) + return first_token_latencies, generation_latencies, token_to_token_latencies + + +def calculate_online_metrics(args, profile_result, filename): + """Calculate online metrics for more fine-grained performance information.""" if not args.stream: return # skip if offline requests = load_json_data(filename) - first_token_latencies, token_to_token_latencies = collect_latencies(requests) + latencies = collect_latencies(requests) + first_token_latencies, generation_latencies, token_to_token_latencies = latencies - # Compute mean and convert from nanosec to msec - avg_first_token_latency = np.mean(first_token_latencies) / 1_000_000 - if token_to_token_latencies: - avg_token_to_token_latency = np.mean(token_to_token_latencies) / 1_000_000 - else: - avg_token_to_token_latency = None + profile_result.avg_first_token_latency = np.mean(first_token_latencies) + profile_result.avg_total_t2t_latency = np.mean(token_to_token_latencies) - profile_result.avg_first_token_latency = avg_first_token_latency - profile_result.avg_total_t2t_latency = avg_token_to_token_latency + profile_result.max_first_token_latency = max(first_token_latencies) + profile_result.min_first_token_latency = min(first_token_latencies) + profile_result.avg_first_token_latency = np.mean(first_token_latencies) + profile_result.p50_first_token_latency = np.percentile( + first_token_latencies, 50, method="lower" + ) + profile_result.p90_first_token_latency = np.percentile( + first_token_latencies, 90, method="lower" + ) + profile_result.p95_first_token_latency = np.percentile( + first_token_latencies, 95, method="lower" + ) + profile_result.p99_first_token_latency = np.percentile( + first_token_latencies, 99, method="lower" + ) + profile_result.max_gen_latency = max(generation_latencies) + profile_result.min_gen_latency = min(generation_latencies) + profile_result.avg_gen_latency = np.mean(generation_latencies) + profile_result.p50_gen_latency = np.percentile( + generation_latencies, 50, method="lower" + ) + profile_result.p90_gen_latency = np.percentile( + generation_latencies, 90, method="lower" + ) + profile_result.p95_gen_latency = np.percentile( + generation_latencies, 95, method="lower" + ) + profile_result.p99_gen_latency = np.percentile( + generation_latencies, 99, method="lower" + ) + + token_latencies = [t / args.max_tokens for t in generation_latencies] + profile_result.avg_token_latency = np.mean(token_latencies) -def calculate_throughput(args, profile_result, filename): - requests = load_json_data(filename) +def collect_offline_metrics(requests, sequence_len): end_to_end_latencies = [] throughputs = [] - total_tokens = profile_result.prompt_size + args.max_tokens requests = requests["experiments"][0]["requests"] for request in requests: @@ -251,14 +325,27 @@ def calculate_throughput(args, profile_result, filename): time_s = total_time / 1_000_000_000 # sec time_ms = total_time / 1_000_000 # msec end_to_end_latencies.append(time_ms) - throughputs.append(total_tokens / time_s) + throughputs.append(sequence_len / time_s) + return throughputs, end_to_end_latencies + +def calculate_offline_metrics(args, profile_result, filename): + """Calculate offline metrics that show end-to-end performance.""" + requests = load_json_data(filename) + throughputs, end_to_end_latencies = collect_offline_metrics( + requests=requests, sequence_len=profile_result.prompt_size + args.max_tokens + ) + + profile_result.max_e2e_latency = max(end_to_end_latencies) + profile_result.min_e2e_latency = min(end_to_end_latencies) profile_result.avg_e2e_latency = np.mean(end_to_end_latencies) - profile_result.avg_throughput = np.mean(throughputs) - profile_result.p50_throughput = np.percentile(throughputs, 50, method="lower") - profile_result.p90_throughput = np.percentile(throughputs, 90, method="lower") - profile_result.p95_throughput = np.percentile(throughputs, 95, method="lower") - profile_result.p99_throughput = np.percentile(throughputs, 99, method="lower") + profile_result.max_token_throughput = max(throughputs) + profile_result.min_token_throughput = min(throughputs) + profile_result.avg_token_throughput = np.mean(throughputs) + profile_result.p50_token_throughput = np.percentile(throughputs, 50, method="lower") + profile_result.p90_token_throughput = np.percentile(throughputs, 90, method="lower") + profile_result.p95_token_throughput = np.percentile(throughputs, 95, method="lower") + profile_result.p99_token_throughput = np.percentile(throughputs, 99, method="lower") def summarize_profile_results(args, prompts): @@ -268,8 +355,8 @@ def summarize_profile_results(args, prompts): export_file = get_export_filename(args, prompt_size) profile_result = ProfileResults(prompt_size=prompt_size) - calculate_throughput(args, profile_result, export_file) - calculate_online_latencies(args, profile_result, export_file) + calculate_offline_metrics(args, profile_result, export_file) + calculate_online_metrics(args, profile_result, export_file) if args.periodic_concurrency_range: calculate_avg_periodic_latencies(args, profile_result, export_file) @@ -280,6 +367,8 @@ def summarize_profile_results(args, prompts): results.append(profile_result) print_benchmark_summary(args, results) + save_benchmark_results(args, results) + if args.periodic_concurrency_range: print( "Saved in-flight batching benchmark plots " @@ -446,8 +535,6 @@ def main(args): prepare_input_data(input_data, prompt) export_file = prepare_export_file(args, prompt) - print(input_data) - # Run Perf Analyzer profile(args, export_file) From 28b0dba4fa145f6648fca9769a3f1eb709abba58 Mon Sep 17 00:00:00 2001 From: Hyunjae Woo Date: Thu, 2 Nov 2023 21:02:32 -0700 Subject: [PATCH 06/15] Update document --- src/c++/perf_analyzer/docs/llm.md | 56 +++++++++++-------------------- 1 file changed, 19 insertions(+), 37 deletions(-) diff --git a/src/c++/perf_analyzer/docs/llm.md b/src/c++/perf_analyzer/docs/llm.md index 003bf7df0..1f9fee93d 100644 --- a/src/c++/perf_analyzer/docs/llm.md +++ b/src/c++/perf_analyzer/docs/llm.md @@ -53,7 +53,7 @@ Next run the following command to start the Triton SDK container: ```bash git clone https://github.com/triton-inference-server/client.git cd client/src/c++/perf_analyzer/docs/examples -docker run --gpus all -it --rm --net host -v ${PWD}:/work -w /work nvcr.io/nvidia/tritonserver:23.09-py3-sdk +docker run --gpus all -it --rm --net host -v ${PWD}:/work -w /work nvcr.io/nvidia/tritonserver:23.10-py3-sdk ``` ## Benchmark 1: Profiling the Prefill Phase @@ -71,11 +71,13 @@ of size 100, 300, and 500 and receive single token from the model for each promp ```bash python profile.py -m vllm --prompt-size-range 100 500 200 --stream --max-tokens 1 -# Sample output # [ BENCHMARK SUMMARY ] -# Prompt size: 100, Average first-token latency: 0.0441 sec -# Prompt size: 300, Average first-token latency: 0.0427 sec -# Prompt size: 500, Average first-token latency: 0.0555 sec +# Prompt size: 100 +# * Max first token latency: 35.2451 ms +# * Min first token latency: 11.0879 ms +# * Avg first token latency: 18.3775 ms +# ... +# Saved benchmark results @ 'results-vllm-online-maxtokens1.csv' ``` > **Note** @@ -123,22 +125,17 @@ prompts. ```bash python profile.py -m vllm --prompt-size-range 100 500 200 --stream --max-tokens 256 --ignore-eos -# Sample output # [ BENCHMARK SUMMARY ] -# Prompt size: 100, Average first-token latency: 0.0388 sec, Average total token-to-token latency: 0.0066 sec -# Prompt size: 300, Average first-token latency: 0.0431 sec, Average total token-to-token latency: 0.0071 sec -# Prompt size: 500, Average first-token latency: 0.0400 sec, Average total token-to-token latency: 0.0070 sec +# Prompt size: 100 +# * Max first token latency: 23.2899 ms +# * Min first token latency: 11.0127 ms +# * Avg first token latency: 16.0468 ms +# ... +# Saved benchmark results @ 'results-vllm-online-maxtokens256.csv' ``` ## Benchmark 3: Profiling In-Flight Batching -> **Note** -> -> This benchmark relies on the feature that will be available from `23.10` -> release which is on its way soon. You can either wait until the `23.10` -> container is ready or build Perf Analyzer from the latest `main` branch -> (see [build from source instructions](install.md#build-from-source)). - In this benchmarking scenario, we want to measure the effect of in-flight batch size on token-to-token (T2T) latency. We systematically issue requests to the server of fixed input sizes and request the model to compute a fixed amount @@ -164,10 +161,13 @@ pip install matplotlib # Run Perf Analyzer python profile.py -m vllm --prompt-size-range 10 10 1 --periodic-concurrency-range 1 100 1 --request-period 32 --stream --max-tokens 1024 --ignore-eos -# Sample output # [ BENCHMARK SUMMARY ] -# Prompt size: 10, Average first-token latency: 0.0799 sec, Average total token-to-token latency: 0.0324 sec -# +# Prompt size: 10 +# * Max first token latency: 125.7212 ms +# * Min first token latency: 18.4281 ms +# * Avg first token latency: 61.8372 ms +# ... +# Saved benchmark results @ 'results-vllm-online-periodic1_100_1-period32-maxtokens1024.csv' # Saved in-flight batching benchmark plots @ 'inflight_batching_benchmark-*.png'. ``` @@ -207,21 +207,3 @@ This will allow us to visualize the change in T2T latency as the number of requests increase, filling up the inflight batch slots, and as they terminate. See [profile.py](examples/profile.py) for more details. -## Benchmark 4: Offline Case - -The first three benchmarks were online scenarios where the LLM model streamed -each output tokens as response. -This allows us to measure the performance of the model at a granular level. -In this benchmark, we are interested in an end-to-end performance of the model. - -```bash -python profile.py -m vllm --prompt-size-range 100 500 200 --max-tokens 256 - -# Sample output -# [ BENCHMARK SUMMARY ] -# Prompt size: 100, Average first-token latency: 0.0441 sec -# Prompt size: 300, Average first-token latency: 0.0427 sec -# Prompt size: 500, Average first-token latency: 0.0555 sec -``` - - From 72c491e1b8b13921853463d46bf95c2190b39fae Mon Sep 17 00:00:00 2001 From: Hyunjae Woo Date: Thu, 2 Nov 2023 21:22:31 -0700 Subject: [PATCH 07/15] Remove TRTLLM support --- .../perf_analyzer/docs/examples/profile.py | 63 +------------------ 1 file changed, 3 insertions(+), 60 deletions(-) diff --git a/src/c++/perf_analyzer/docs/examples/profile.py b/src/c++/perf_analyzer/docs/examples/profile.py index fbcfae1b3..fa42d9b1f 100644 --- a/src/c++/perf_analyzer/docs/examples/profile.py +++ b/src/c++/perf_analyzer/docs/examples/profile.py @@ -382,13 +382,6 @@ def profile(args, export_file): f"--input-data={INPUT_FILENAME} " f"--profile-export-file={export_file} " ) - if args.model == "ensemble": # TRT-LLM - command += ( - "--shape=text_input:1 " - "--shape=max_tokens:1 " - "--shape=bad_words:1 " - "--shape=stop_words:1 " - ) if args.periodic_concurrency_range: start, end, step = args.periodic_concurrency_range command += ( @@ -418,21 +411,13 @@ def prepare_export_file(args, prompt): def prepare_input_data(input_data, prompt): """Insert the prompt to send into input JSON data.""" - if args.model == "ensemble": - input_data["data"][0]["text_input"] = [prompt] - elif args.model == "vllm": - # TODO (hwoo): use text_input - input_data["data"][0]["PROMPT"] = [prompt] + input_data["data"][0]["PROMPT"] = [prompt] save_json_data(input_data, INPUT_FILENAME) def generate_prompts(args, input_data): """Generate dummy prompts if not specified by input JSON file.""" - if args.model == "ensemble": - prompt = input_data["data"][0]["text_input"][0] - elif args.model == "vllm": - # TODO (hwoo): use text_input - prompt = input_data["data"][0]["PROMPT"][0] + prompt = input_data["data"][0]["PROMPT"][0] if not prompt: # Generate dummy prompt assert args.prompt_size_range, "Must specify --prompt-size-range." @@ -485,50 +470,8 @@ def construct_input_data(args): return input_data -# TODO (hwoo): merge with construct_input_data -def construct_trtllm_input_data(args): - """Construct input data that contains input tensors and parameters. - - Parse the input JSON file (if exists) to construct the input data. - When user sets parameters through command line, overwrite the - parameters set by input JSON file. - """ - prompt = "" - stream = False - max_tokens = 256 - - if args.input_data: - data = load_json_data(filename=args.input_data)["data"][0] - prompt = data["text_input"][0] if "text_input" in data else prompt - stream = data["stream"][0] if "stream" in data else stream - max_tokens = data["max_tokens"][0] if "max_tokens" in data else max_tokens - - # If command line option is specified, overwrite - if args.stream: - stream = args.stream - else: - args.stream = stream - - if args.max_tokens: - max_tokens = args.max_tokens - else: - args.max_tokens = max_tokens - - input_data = {"data": [{}]} - input_data["data"][0]["text_input"] = [prompt] - input_data["data"][0]["stream"] = [stream] - input_data["data"][0]["max_tokens"] = [max_tokens] - input_data["data"][0]["bad_words"] = [""] - input_data["data"][0]["stop_words"] = [""] - return input_data - - def main(args): - # TODO (hwoo): merge the conditional cases - if args.model == "ensemble": - input_data = construct_trtllm_input_data(args) - elif args.model == "vllm": - input_data = construct_input_data(args) + input_data = construct_input_data(args) prompts = generate_prompts(args, input_data) for prompt in prompts: From 35c965c60befc6bde40570214cc1b2fb8886fe0f Mon Sep 17 00:00:00 2001 From: Hyunjae Woo Date: Thu, 2 Nov 2023 22:04:28 -0700 Subject: [PATCH 08/15] Add units to for each metric --- .../perf_analyzer/docs/examples/profile.py | 62 +++++++++---------- 1 file changed, 30 insertions(+), 32 deletions(-) diff --git a/src/c++/perf_analyzer/docs/examples/profile.py b/src/c++/perf_analyzer/docs/examples/profile.py index fa42d9b1f..924dee01f 100644 --- a/src/c++/perf_analyzer/docs/examples/profile.py +++ b/src/c++/perf_analyzer/docs/examples/profile.py @@ -37,32 +37,32 @@ INPUT_FILENAME = "generated_input_data.json" METRIC_FIELDS = { - "max_first_token_latency": "Max first token latency", - "min_first_token_latency": "Min first token latency", - "avg_first_token_latency": "Avg first token latency", - "p50_first_token_latency": "p50 first token latency", - "p90_first_token_latency": "p90 first token latency", - "p95_first_token_latency": "p95 first token latency", - "p99_first_token_latency": "p99 first token latency", - "max_gen_latency": "Max generation latency", - "min_gen_latency": "Min generation latency", - "avg_gen_latency": "Avg generation latency", - "p50_gen_latency": "p50 generation latency", - "p90_gen_latency": "p90 generation latency", - "p95_gen_latency": "p95 generation latency", - "p99_gen_latency": "p99 generation latency", - "avg_token_latency": "Avg token latency", - "avg_total_t2t_latency": "Avg total token-to-token latency", - "max_e2e_latency": "Max end-to-end latency", - "min_e2e_latency": "Min end-to-end latency", - "avg_e2e_latency": "Avg end-to-end latency", - "max_token_throughput": "Max token throughput", - "min_token_throughput": "Min token throughput", - "avg_token_throughput": "Avg token throughput", - "p50_token_throughput": "p50 token throughput", - "p90_token_throughput": "p90 token throughput", - "p95_token_throughput": "p95 token throughput", - "p99_token_throughput": "p99 token throughput", + "max_first_token_latency": ("Max first token latency", "ms"), + "min_first_token_latency": ("Min first token latency", "ms"), + "avg_first_token_latency": ("Avg first token latency", "ms"), + "p50_first_token_latency": ("p50 first token latency", "ms"), + "p90_first_token_latency": ("p90 first token latency", "ms"), + "p95_first_token_latency": ("p95 first token latency", "ms"), + "p99_first_token_latency": ("p99 first token latency", "ms"), + "max_gen_latency": ("Max generation latency", "ms"), + "min_gen_latency": ("Min generation latency", "ms"), + "avg_gen_latency": ("Avg generation latency", "ms"), + "p50_gen_latency": ("p50 generation latency", "ms"), + "p90_gen_latency": ("p90 generation latency", "ms"), + "p95_gen_latency": ("p95 generation latency", "ms"), + "p99_gen_latency": ("p99 generation latency", "ms"), + "avg_token_latency": ("Avg token latency", "ms/token"), + "avg_total_t2t_latency": ("Avg total token-to-token latency", "ms"), + "max_e2e_latency": ("Max end-to-end latency", "ms"), + "min_e2e_latency": ("Min end-to-end latency", "ms"), + "avg_e2e_latency": ("Avg end-to-end latency", "ms"), + "max_token_throughput": ("Max token throughput", "tokens/s"), + "min_token_throughput": ("Min token throughput", "tokens/s"), + "avg_token_throughput": ("Avg token throughput", "tokens/s"), + "p50_token_throughput": ("p50 token throughput", "tokens/s"), + "p90_token_throughput": ("p90 token throughput", "tokens/s"), + "p95_token_throughput": ("p95 token throughput", "tokens/s"), + "p99_token_throughput": ("p99 token throughput", "tokens/s"), } @@ -150,15 +150,13 @@ def save_benchmark_results(args, profile_results): print(f"Saved benchmark results @ '{results_csv}'") -def print_benchmark_summary(args, profile_results): +def print_benchmark_summary(profile_results): print("[ BENCHMARK SUMMARY ]") for pr in profile_results: print(f"Prompt size: {pr.prompt_size}") - for metric, name in METRIC_FIELDS.items(): + for metric, (name, unit) in METRIC_FIELDS.items(): if getattr(pr, metric): - line = f" * {name}: {getattr(pr, metric):.4f} " - line += "ms" if "latency" in metric else "tokens/s" - print(line) + print(f" * {name}: {getattr(pr, metric):.4f} {unit}") print("") @@ -366,7 +364,7 @@ def summarize_profile_results(args, prompts): ) results.append(profile_result) - print_benchmark_summary(args, results) + print_benchmark_summary(results) save_benchmark_results(args, results) if args.periodic_concurrency_range: From ca1b024ad47f0e85bb56aac9e394c189e165285c Mon Sep 17 00:00:00 2001 From: Hyunjae Woo Date: Fri, 3 Nov 2023 00:15:52 -0700 Subject: [PATCH 09/15] Separate out results csv --- .../perf_analyzer/docs/examples/profile.py | 22 +++++++++---------- src/c++/perf_analyzer/docs/llm.md | 10 ++++++--- 2 files changed, 17 insertions(+), 15 deletions(-) diff --git a/src/c++/perf_analyzer/docs/examples/profile.py b/src/c++/perf_analyzer/docs/examples/profile.py index 924dee01f..0e881f0c0 100644 --- a/src/c++/perf_analyzer/docs/examples/profile.py +++ b/src/c++/perf_analyzer/docs/examples/profile.py @@ -108,17 +108,15 @@ def save_json_data(data, filename): json.dump(data, f) -def get_postfix(args, prompt_size=None): +def get_postfix(args, prompt_size): """Generate postfix for profile export filename and plot. e.g. - - trtllm-maxtokens256 - trtllm-prompt100-maxtokens256 - trtllm-prompt100-periodic1_100_1-period32-maxtokens1024 """ stream_type = "online" if args.stream else "offline" - postfix = f"{args.model}-{stream_type}-" - postfix += f"prompt{prompt_size}-" if prompt_size else "" + postfix = f"{args.model}-{stream_type}-prompt{prompt_size}-" if args.periodic_concurrency_range: start, end, step = args.periodic_concurrency_range postfix += f"periodic{start}_{end}_{step}-period{args.request_period}-" @@ -139,15 +137,15 @@ def get_plot_filename(args, prompt_size): def save_benchmark_results(args, profile_results): - postfix = get_postfix(args) - results_csv = f"results-{postfix}.csv" - with open(results_csv, "w") as f: - fieldnames = [f.name for f in fields(profile_results[0])] - writer = csv.DictWriter(f, fieldnames=fieldnames) - writer.writeheader() - for pr in profile_results: + for pr in profile_results: + postfix = get_postfix(args, pr.prompt_size) + results_csv = f"results-{postfix}.csv" + with open(results_csv, "w") as f: + fieldnames = [f.name for f in fields(pr)] + writer = csv.DictWriter(f, fieldnames=fieldnames) + writer.writeheader() writer.writerow(asdict(pr)) - print(f"Saved benchmark results @ '{results_csv}'") + print(f"Saved benchmark results @ '{results_csv}'") def print_benchmark_summary(profile_results): diff --git a/src/c++/perf_analyzer/docs/llm.md b/src/c++/perf_analyzer/docs/llm.md index 1f9fee93d..e0587aa2b 100644 --- a/src/c++/perf_analyzer/docs/llm.md +++ b/src/c++/perf_analyzer/docs/llm.md @@ -77,7 +77,9 @@ python profile.py -m vllm --prompt-size-range 100 500 200 --stream --max-tokens # * Min first token latency: 11.0879 ms # * Avg first token latency: 18.3775 ms # ... -# Saved benchmark results @ 'results-vllm-online-maxtokens1.csv' +# Saved benchmark results @ 'results-vllm-online-prompt100-maxtokens1.csv' +# Saved benchmark results @ 'results-vllm-online-prompt200-maxtokens1.csv' +# Saved benchmark results @ 'results-vllm-online-prompt300-maxtokens1.csv' ``` > **Note** @@ -131,7 +133,9 @@ python profile.py -m vllm --prompt-size-range 100 500 200 --stream --max-tokens # * Min first token latency: 11.0127 ms # * Avg first token latency: 16.0468 ms # ... -# Saved benchmark results @ 'results-vllm-online-maxtokens256.csv' +# Saved benchmark results @ 'results-vllm-online-prompt100-maxtokens256.csv' +# Saved benchmark results @ 'results-vllm-online-prompt200-maxtokens256.csv' +# Saved benchmark results @ 'results-vllm-online-prompt300-maxtokens256.csv' ``` ## Benchmark 3: Profiling In-Flight Batching @@ -167,7 +171,7 @@ python profile.py -m vllm --prompt-size-range 10 10 1 --periodic-concurrency-ran # * Min first token latency: 18.4281 ms # * Avg first token latency: 61.8372 ms # ... -# Saved benchmark results @ 'results-vllm-online-periodic1_100_1-period32-maxtokens1024.csv' +# Saved benchmark results @ 'results-vllm-online-prompt10-periodic1_100_1-period32-maxtokens1024.csv' # Saved in-flight batching benchmark plots @ 'inflight_batching_benchmark-*.png'. ``` From f7fb9f1b942996982f119ec7e1722cd74b4516ce Mon Sep 17 00:00:00 2001 From: Hyunjae Woo Date: Fri, 3 Nov 2023 15:34:02 -0700 Subject: [PATCH 10/15] Remove csv output --- src/c++/perf_analyzer/docs/examples/profile.py | 14 -------------- src/c++/perf_analyzer/docs/llm.md | 7 ------- 2 files changed, 21 deletions(-) diff --git a/src/c++/perf_analyzer/docs/examples/profile.py b/src/c++/perf_analyzer/docs/examples/profile.py index 0e881f0c0..0e9dbc519 100644 --- a/src/c++/perf_analyzer/docs/examples/profile.py +++ b/src/c++/perf_analyzer/docs/examples/profile.py @@ -136,18 +136,6 @@ def get_plot_filename(args, prompt_size): return filename -def save_benchmark_results(args, profile_results): - for pr in profile_results: - postfix = get_postfix(args, pr.prompt_size) - results_csv = f"results-{postfix}.csv" - with open(results_csv, "w") as f: - fieldnames = [f.name for f in fields(pr)] - writer = csv.DictWriter(f, fieldnames=fieldnames) - writer.writeheader() - writer.writerow(asdict(pr)) - print(f"Saved benchmark results @ '{results_csv}'") - - def print_benchmark_summary(profile_results): print("[ BENCHMARK SUMMARY ]") for pr in profile_results: @@ -363,8 +351,6 @@ def summarize_profile_results(args, prompts): results.append(profile_result) print_benchmark_summary(results) - save_benchmark_results(args, results) - if args.periodic_concurrency_range: print( "Saved in-flight batching benchmark plots " diff --git a/src/c++/perf_analyzer/docs/llm.md b/src/c++/perf_analyzer/docs/llm.md index e0587aa2b..db1c1bfbb 100644 --- a/src/c++/perf_analyzer/docs/llm.md +++ b/src/c++/perf_analyzer/docs/llm.md @@ -77,9 +77,6 @@ python profile.py -m vllm --prompt-size-range 100 500 200 --stream --max-tokens # * Min first token latency: 11.0879 ms # * Avg first token latency: 18.3775 ms # ... -# Saved benchmark results @ 'results-vllm-online-prompt100-maxtokens1.csv' -# Saved benchmark results @ 'results-vllm-online-prompt200-maxtokens1.csv' -# Saved benchmark results @ 'results-vllm-online-prompt300-maxtokens1.csv' ``` > **Note** @@ -133,9 +130,6 @@ python profile.py -m vllm --prompt-size-range 100 500 200 --stream --max-tokens # * Min first token latency: 11.0127 ms # * Avg first token latency: 16.0468 ms # ... -# Saved benchmark results @ 'results-vllm-online-prompt100-maxtokens256.csv' -# Saved benchmark results @ 'results-vllm-online-prompt200-maxtokens256.csv' -# Saved benchmark results @ 'results-vllm-online-prompt300-maxtokens256.csv' ``` ## Benchmark 3: Profiling In-Flight Batching @@ -171,7 +165,6 @@ python profile.py -m vllm --prompt-size-range 10 10 1 --periodic-concurrency-ran # * Min first token latency: 18.4281 ms # * Avg first token latency: 61.8372 ms # ... -# Saved benchmark results @ 'results-vllm-online-prompt10-periodic1_100_1-period32-maxtokens1024.csv' # Saved in-flight batching benchmark plots @ 'inflight_batching_benchmark-*.png'. ``` From e9e4a710707992075281c70f777d5bccf7277eff Mon Sep 17 00:00:00 2001 From: Hyunjae Woo Date: Fri, 3 Nov 2023 17:44:03 -0700 Subject: [PATCH 11/15] Address feedback --- .../perf_analyzer/docs/examples/profile.py | 28 +++++++++---------- 1 file changed, 13 insertions(+), 15 deletions(-) diff --git a/src/c++/perf_analyzer/docs/examples/profile.py b/src/c++/perf_analyzer/docs/examples/profile.py index 0e9dbc519..766e2072e 100644 --- a/src/c++/perf_analyzer/docs/examples/profile.py +++ b/src/c++/perf_analyzer/docs/examples/profile.py @@ -25,10 +25,9 @@ # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. import argparse -import csv import json import subprocess -from dataclasses import asdict, dataclass, fields +from dataclasses import dataclass from itertools import pairwise from pathlib import Path from typing import Optional @@ -51,7 +50,7 @@ "p90_gen_latency": ("p90 generation latency", "ms"), "p95_gen_latency": ("p95 generation latency", "ms"), "p99_gen_latency": ("p99 generation latency", "ms"), - "avg_token_latency": ("Avg token latency", "ms/token"), + "avg_latency_per_output_token": ("Avg latency per output token", "ms/token"), "avg_total_t2t_latency": ("Avg total token-to-token latency", "ms"), "max_e2e_latency": ("Max end-to-end latency", "ms"), "min_e2e_latency": ("Min end-to-end latency", "ms"), @@ -83,7 +82,7 @@ class ProfileResults: p90_gen_latency: Optional[float] = None p95_gen_latency: Optional[float] = None p99_gen_latency: Optional[float] = None - avg_token_latency: Optional[float] = None + avg_latency_per_output_token: Optional[float] = None avg_total_t2t_latency: Optional[float] = None avg_periodic_t2t_latencies: Optional[list[float]] = None max_e2e_latency: Optional[float] = None @@ -115,7 +114,7 @@ def get_postfix(args, prompt_size): - trtllm-prompt100-maxtokens256 - trtllm-prompt100-periodic1_100_1-period32-maxtokens1024 """ - stream_type = "online" if args.stream else "offline" + stream_type = "offline" if args.offline else "online" postfix = f"{args.model}-{stream_type}-prompt{prompt_size}-" if args.periodic_concurrency_range: start, end, step = args.periodic_concurrency_range @@ -253,14 +252,13 @@ def collect_latencies(requests): def calculate_online_metrics(args, profile_result, filename): """Calculate online metrics for more fine-grained performance information.""" - if not args.stream: + if args.offline: return # skip if offline requests = load_json_data(filename) latencies = collect_latencies(requests) first_token_latencies, generation_latencies, token_to_token_latencies = latencies - profile_result.avg_first_token_latency = np.mean(first_token_latencies) profile_result.avg_total_t2t_latency = np.mean(token_to_token_latencies) profile_result.max_first_token_latency = max(first_token_latencies) @@ -296,7 +294,7 @@ def calculate_online_metrics(args, profile_result, filename): ) token_latencies = [t / args.max_tokens for t in generation_latencies] - profile_result.avg_token_latency = np.mean(token_latencies) + profile_result.avg_latency_per_output_token = np.mean(token_latencies) def collect_offline_metrics(requests, sequence_len): @@ -416,7 +414,7 @@ def construct_input_data(args): parameters set by input JSON file. """ prompt = "" - stream = False + stream = True sampling_params = {} if args.input_data: @@ -427,10 +425,10 @@ def construct_input_data(args): sampling_params = json.loads(data["SAMPLING_PARAMETERS"][0]) # If command line option is specified, overwrite - if args.stream: - stream = args.stream - else: - args.stream = stream + if args.offline: + stream = False + elif not stream: + args.offline = True if args.max_tokens: sampling_params["max_tokens"] = args.max_tokens @@ -511,9 +509,9 @@ def main(args): help="The input data file to be used for inference request.", ) parser.add_argument( - "--stream", + "--offline", action="store_true", - help="Whether to stream the model outputs.", + help="Whether to stop streaming the model outputs.", ) args = parser.parse_args() main(args) From 3fd5836a453127f5d3e367e503cd39d106eb47aa Mon Sep 17 00:00:00 2001 From: Hyunjae Woo Date: Mon, 6 Nov 2023 13:56:19 -0800 Subject: [PATCH 12/15] Add more metrics and extract metric calculation to separate function --- .../perf_analyzer/docs/examples/profile.py | 160 ++++++++++++------ src/c++/perf_analyzer/docs/llm.md | 8 +- 2 files changed, 114 insertions(+), 54 deletions(-) diff --git a/src/c++/perf_analyzer/docs/examples/profile.py b/src/c++/perf_analyzer/docs/examples/profile.py index 766e2072e..eba36f296 100644 --- a/src/c++/perf_analyzer/docs/examples/profile.py +++ b/src/c++/perf_analyzer/docs/examples/profile.py @@ -50,18 +50,29 @@ "p90_gen_latency": ("p90 generation latency", "ms"), "p95_gen_latency": ("p95 generation latency", "ms"), "p99_gen_latency": ("p99 generation latency", "ms"), - "avg_latency_per_output_token": ("Avg latency per output token", "ms/token"), + "avg_output_token_latency": ("Avg output token latency", "ms/output token"), "avg_total_t2t_latency": ("Avg total token-to-token latency", "ms"), "max_e2e_latency": ("Max end-to-end latency", "ms"), "min_e2e_latency": ("Min end-to-end latency", "ms"), "avg_e2e_latency": ("Avg end-to-end latency", "ms"), - "max_token_throughput": ("Max token throughput", "tokens/s"), - "min_token_throughput": ("Min token throughput", "tokens/s"), - "avg_token_throughput": ("Avg token throughput", "tokens/s"), - "p50_token_throughput": ("p50 token throughput", "tokens/s"), - "p90_token_throughput": ("p90 token throughput", "tokens/s"), - "p95_token_throughput": ("p95 token throughput", "tokens/s"), - "p99_token_throughput": ("p99 token throughput", "tokens/s"), + "p50_gen_latency": ("p50 generation latency", "ms"), + "p90_gen_latency": ("p90 generation latency", "ms"), + "p95_gen_latency": ("p95 generation latency", "ms"), + "p99_gen_latency": ("p99 generation latency", "ms"), + "max_e2e_throughput": ("Max end-to-end throughput", "tokens/s"), + "min_e2e_throughput": ("Min end-to-end throughput", "tokens/s"), + "avg_e2e_throughput": ("Avg end-to-end throughput", "tokens/s"), + "p50_e2e_throughput": ("p50 end-to-end throughput", "tokens/s"), + "p90_e2e_throughput": ("p90 end-to-end throughput", "tokens/s"), + "p95_e2e_throughput": ("p95 end-to-end throughput", "tokens/s"), + "p99_e2e_throughput": ("p99 end-to-end throughput", "tokens/s"), + "max_gen_throughput": ("Max generation throughput", "output tokens/s"), + "min_gen_throughput": ("Min generation throughput", "output tokens/s"), + "avg_gen_throughput": ("Avg generation throughput", "output tokens/s"), + "p50_gen_throughput": ("p50 generation throughput", "output tokens/s"), + "p90_gen_throughput": ("p90 generation throughput", "output tokens/s"), + "p95_gen_throughput": ("p95 generation throughput", "output tokens/s"), + "p99_gen_throughput": ("p99 generation throughput", "output tokens/s"), } @@ -82,19 +93,30 @@ class ProfileResults: p90_gen_latency: Optional[float] = None p95_gen_latency: Optional[float] = None p99_gen_latency: Optional[float] = None - avg_latency_per_output_token: Optional[float] = None + avg_output_token_latency: Optional[float] = None avg_total_t2t_latency: Optional[float] = None avg_periodic_t2t_latencies: Optional[list[float]] = None max_e2e_latency: Optional[float] = None min_e2e_latency: Optional[float] = None avg_e2e_latency: Optional[float] = None - max_token_throughput: Optional[float] = None - min_token_throughput: Optional[float] = None - avg_token_throughput: Optional[float] = None - p50_token_throughput: Optional[float] = None - p90_token_throughput: Optional[float] = None - p95_token_throughput: Optional[float] = None - p99_token_throughput: Optional[float] = None + p50_e2e_latency: Optional[float] = None + p90_e2e_latency: Optional[float] = None + p95_e2e_latency: Optional[float] = None + p99_e2e_latency: Optional[float] = None + max_e2e_throughput: Optional[float] = None + min_e2e_throughput: Optional[float] = None + avg_e2e_throughput: Optional[float] = None + p50_e2e_throughput: Optional[float] = None + p90_e2e_throughput: Optional[float] = None + p95_e2e_throughput: Optional[float] = None + p99_e2e_throughput: Optional[float] = None + max_gen_throughput: Optional[float] = None + min_gen_throughput: Optional[float] = None + avg_gen_throughput: Optional[float] = None + p50_gen_throughput: Optional[float] = None + p90_gen_throughput: Optional[float] = None + p95_gen_throughput: Optional[float] = None + p99_gen_throughput: Optional[float] = None def load_json_data(filename): @@ -233,31 +255,43 @@ def calculate_avg_periodic_latencies(args, profile_result, filename): profile_result.avg_periodic_t2t_latencies = latencies -def collect_latencies(requests): +def collect_online_metrics(requests, output_tokens): # Example json demonstrating format: # see client/src/c++/perf_analyzer/docs/examples/decoupled_output_file.json first_token_latencies = [] generation_latencies = [] token_to_token_latencies = [] + generation_throughputs = [] requests = requests["experiments"][0]["requests"] for r in requests: init_request, responses = r["timestamp"], r["response_timestamps"] - first_token_latencies.append((responses[0] - init_request) / 1_000_000) - generation_latencies.append((responses[-1] - responses[0]) / 1_000_000) + first_token_latency = (responses[0] - init_request) / 1_000_000 + generation_latency_ms = (responses[-1] - responses[0]) / 1_000_000 # msec + generation_latency_s = (responses[-1] - responses[0]) / 1_000_000_000 # sec + first_token_latencies.append(first_token_latency) + generation_latencies.append(generation_latency_ms) + generation_throughputs.append(output_tokens / generation_latency_s) token_to_token_latencies = [] for prev_res, res in pairwise(responses): token_to_token_latencies.append((res - prev_res) / 1_000_000) - return first_token_latencies, generation_latencies, token_to_token_latencies + return ( + first_token_latencies, + generation_latencies, + token_to_token_latencies, + generation_throughputs, + ) def calculate_online_metrics(args, profile_result, filename): """Calculate online metrics for more fine-grained performance information.""" - if args.offline: - return # skip if offline - requests = load_json_data(filename) - latencies = collect_latencies(requests) - first_token_latencies, generation_latencies, token_to_token_latencies = latencies + latencies = collect_online_metrics(requests, args.max_tokens) + ( + first_token_latencies, + generation_latencies, + token_to_token_latencies, + generation_throughputs, + ) = latencies profile_result.avg_total_t2t_latency = np.mean(token_to_token_latencies) @@ -294,11 +328,27 @@ def calculate_online_metrics(args, profile_result, filename): ) token_latencies = [t / args.max_tokens for t in generation_latencies] - profile_result.avg_latency_per_output_token = np.mean(token_latencies) + profile_result.avg_output_token_latency = np.mean(token_latencies) + + profile_result.max_gen_throughput = max(generation_throughputs) + profile_result.min_gen_throughput = min(generation_throughputs) + profile_result.avg_gen_throughput = np.mean(generation_throughputs) + profile_result.p50_gen_throughput = np.percentile( + generation_throughputs, 50, method="lower" + ) + profile_result.p90_gen_throughput = np.percentile( + generation_throughputs, 90, method="lower" + ) + profile_result.p95_gen_throughput = np.percentile( + generation_throughputs, 95, method="lower" + ) + profile_result.p99_gen_throughput = np.percentile( + generation_throughputs, 99, method="lower" + ) def collect_offline_metrics(requests, sequence_len): - end_to_end_latencies = [] + latencies = [] throughputs = [] requests = requests["experiments"][0]["requests"] @@ -306,28 +356,46 @@ def collect_offline_metrics(requests, sequence_len): total_time = request["response_timestamps"][-1] - request["timestamp"] time_s = total_time / 1_000_000_000 # sec time_ms = total_time / 1_000_000 # msec - end_to_end_latencies.append(time_ms) + latencies.append(time_ms) throughputs.append(sequence_len / time_s) - return throughputs, end_to_end_latencies + return throughputs, latencies def calculate_offline_metrics(args, profile_result, filename): """Calculate offline metrics that show end-to-end performance.""" requests = load_json_data(filename) - throughputs, end_to_end_latencies = collect_offline_metrics( - requests=requests, sequence_len=profile_result.prompt_size + args.max_tokens + throughputs, latencies = collect_offline_metrics( + requests, sequence_len=profile_result.prompt_size + args.max_tokens ) - profile_result.max_e2e_latency = max(end_to_end_latencies) - profile_result.min_e2e_latency = min(end_to_end_latencies) - profile_result.avg_e2e_latency = np.mean(end_to_end_latencies) - profile_result.max_token_throughput = max(throughputs) - profile_result.min_token_throughput = min(throughputs) - profile_result.avg_token_throughput = np.mean(throughputs) - profile_result.p50_token_throughput = np.percentile(throughputs, 50, method="lower") - profile_result.p90_token_throughput = np.percentile(throughputs, 90, method="lower") - profile_result.p95_token_throughput = np.percentile(throughputs, 95, method="lower") - profile_result.p99_token_throughput = np.percentile(throughputs, 99, method="lower") + profile_result.max_e2e_latency = max(latencies) + profile_result.min_e2e_latency = min(latencies) + profile_result.avg_e2e_latency = np.mean(latencies) + profile_result.p50_e2e_latency = np.percentile(latencies, 50, method="lower") + profile_result.p90_e2e_latency = np.percentile(latencies, 90, method="lower") + profile_result.p95_e2e_latency = np.percentile(latencies, 95, method="lower") + profile_result.p99_e2e_latency = np.percentile(latencies, 99, method="lower") + + profile_result.max_e2e_throughput = max(throughputs) + profile_result.min_e2e_throughput = min(throughputs) + profile_result.avg_e2e_throughput = np.mean(throughputs) + profile_result.p50_e2e_throughput = np.percentile(throughputs, 50, method="lower") + profile_result.p90_e2e_throughput = np.percentile(throughputs, 90, method="lower") + profile_result.p95_e2e_throughput = np.percentile(throughputs, 95, method="lower") + profile_result.p99_e2e_throughput = np.percentile(throughputs, 99, method="lower") + + +def calculate_metrics(args, profile_result, export_file): + calculate_offline_metrics(args, profile_result, export_file) + if not args.offline: + calculate_online_metrics(args, profile_result, export_file) + + if args.periodic_concurrency_range: + calculate_avg_periodic_latencies(args, profile_result, export_file) + plot_results( + latencies=profile_result.avg_periodic_t2t_latencies, + filename=get_plot_filename(args, profile_result.prompt_size), + ) def summarize_profile_results(args, prompts): @@ -337,15 +405,7 @@ def summarize_profile_results(args, prompts): export_file = get_export_filename(args, prompt_size) profile_result = ProfileResults(prompt_size=prompt_size) - calculate_offline_metrics(args, profile_result, export_file) - calculate_online_metrics(args, profile_result, export_file) - - if args.periodic_concurrency_range: - calculate_avg_periodic_latencies(args, profile_result, export_file) - plot_results( - latencies=profile_result.avg_periodic_t2t_latencies, - filename=get_plot_filename(args, prompt_size), - ) + calculate_metrics(args, profile_result, export_file) results.append(profile_result) print_benchmark_summary(results) diff --git a/src/c++/perf_analyzer/docs/llm.md b/src/c++/perf_analyzer/docs/llm.md index db1c1bfbb..1de686c1b 100644 --- a/src/c++/perf_analyzer/docs/llm.md +++ b/src/c++/perf_analyzer/docs/llm.md @@ -69,7 +69,7 @@ Inside the client container, run the following command to generate dummy prompts of size 100, 300, and 500 and receive single token from the model for each prompt. ```bash -python profile.py -m vllm --prompt-size-range 100 500 200 --stream --max-tokens 1 +python profile.py -m vllm --prompt-size-range 100 500 200 --max-tokens 1 # [ BENCHMARK SUMMARY ] # Prompt size: 100 @@ -122,7 +122,7 @@ of size 100, 300, and 500 and receive total 256 tokens from the model for each prompts. ```bash -python profile.py -m vllm --prompt-size-range 100 500 200 --stream --max-tokens 256 --ignore-eos +python profile.py -m vllm --prompt-size-range 100 500 200 --max-tokens 256 --ignore-eos # [ BENCHMARK SUMMARY ] # Prompt size: 100 @@ -157,7 +157,7 @@ Run the following command inside the client container. pip install matplotlib # Run Perf Analyzer -python profile.py -m vllm --prompt-size-range 10 10 1 --periodic-concurrency-range 1 100 1 --request-period 32 --stream --max-tokens 1024 --ignore-eos +python profile.py -m vllm --prompt-size-range 10 10 1 --periodic-concurrency-range 1 100 1 --request-period 32 --max-tokens 1024 --ignore-eos # [ BENCHMARK SUMMARY ] # Prompt size: 10 @@ -179,7 +179,7 @@ split them into multiple segments of responses. For instance, assume we ran the following benchmark command: ```bash -python profile.py -m vllm --periodic-concurrency-range 1 4 1 --request-period 32 --stream --max-tokens 1024 --ignore-eos +python profile.py -m vllm --periodic-concurrency-range 1 4 1 --request-period 32 --max-tokens 1024 --ignore-eos ``` We start from a single request and increment up to 4 requests one by one for From d3c9011b4bdf3365e5849d4305c9ea2142e35dea Mon Sep 17 00:00:00 2001 From: Hyunjae Woo Date: Mon, 6 Nov 2023 14:38:32 -0800 Subject: [PATCH 13/15] Avoid loading data multiple times --- .../perf_analyzer/docs/examples/profile.py | 40 +++++++++---------- 1 file changed, 19 insertions(+), 21 deletions(-) diff --git a/src/c++/perf_analyzer/docs/examples/profile.py b/src/c++/perf_analyzer/docs/examples/profile.py index eba36f296..26a56fbda 100644 --- a/src/c++/perf_analyzer/docs/examples/profile.py +++ b/src/c++/perf_analyzer/docs/examples/profile.py @@ -212,7 +212,7 @@ def update_start_position(request_id, start_pos, initial_requests, step): return start_pos -def collect_periodic_latencies(args, filename): +def collect_periodic_latencies(args, export_data): """Split the entire benchmark results into segments with size of request period and collect latencies for each segment. """ @@ -224,9 +224,7 @@ def collect_periodic_latencies(args, filename): bins = [[] for _ in range(num_bins)] bin_start_position = 0 - - data = load_json_data(filename) - requests = data["experiments"][0]["requests"] + requests = export_data["experiments"][0]["requests"] for i, r in enumerate(requests): add_latencies_to_bins( @@ -244,9 +242,9 @@ def collect_periodic_latencies(args, filename): return bins -def calculate_avg_periodic_latencies(args, profile_result, filename): +def calculate_avg_periodic_latencies(args, profile_result, export_data): """Calculate average token-to-token latency for each request period.""" - bins = collect_periodic_latencies(args, filename) + bins = collect_periodic_latencies(args, export_data) latencies = [] for bin in bins: @@ -255,14 +253,15 @@ def calculate_avg_periodic_latencies(args, profile_result, filename): profile_result.avg_periodic_t2t_latencies = latencies -def collect_online_metrics(requests, output_tokens): +def collect_online_metrics(export_data, output_tokens): # Example json demonstrating format: # see client/src/c++/perf_analyzer/docs/examples/decoupled_output_file.json first_token_latencies = [] generation_latencies = [] token_to_token_latencies = [] generation_throughputs = [] - requests = requests["experiments"][0]["requests"] + requests = export_data["experiments"][0]["requests"] + for r in requests: init_request, responses = r["timestamp"], r["response_timestamps"] first_token_latency = (responses[0] - init_request) / 1_000_000 @@ -282,10 +281,9 @@ def collect_online_metrics(requests, output_tokens): ) -def calculate_online_metrics(args, profile_result, filename): +def calculate_online_metrics(args, profile_result, export_data): """Calculate online metrics for more fine-grained performance information.""" - requests = load_json_data(filename) - latencies = collect_online_metrics(requests, args.max_tokens) + latencies = collect_online_metrics(export_data, args.max_tokens) ( first_token_latencies, generation_latencies, @@ -347,10 +345,10 @@ def calculate_online_metrics(args, profile_result, filename): ) -def collect_offline_metrics(requests, sequence_len): +def collect_offline_metrics(export_data, sequence_len): latencies = [] throughputs = [] - requests = requests["experiments"][0]["requests"] + requests = export_data["experiments"][0]["requests"] for request in requests: total_time = request["response_timestamps"][-1] - request["timestamp"] @@ -361,11 +359,10 @@ def collect_offline_metrics(requests, sequence_len): return throughputs, latencies -def calculate_offline_metrics(args, profile_result, filename): +def calculate_offline_metrics(args, profile_result, export_data): """Calculate offline metrics that show end-to-end performance.""" - requests = load_json_data(filename) throughputs, latencies = collect_offline_metrics( - requests, sequence_len=profile_result.prompt_size + args.max_tokens + export_data, sequence_len=profile_result.prompt_size + args.max_tokens ) profile_result.max_e2e_latency = max(latencies) @@ -385,13 +382,13 @@ def calculate_offline_metrics(args, profile_result, filename): profile_result.p99_e2e_throughput = np.percentile(throughputs, 99, method="lower") -def calculate_metrics(args, profile_result, export_file): - calculate_offline_metrics(args, profile_result, export_file) +def calculate_metrics(args, profile_result, export_data): + calculate_offline_metrics(args, profile_result, export_data) if not args.offline: - calculate_online_metrics(args, profile_result, export_file) + calculate_online_metrics(args, profile_result, export_data) if args.periodic_concurrency_range: - calculate_avg_periodic_latencies(args, profile_result, export_file) + calculate_avg_periodic_latencies(args, profile_result, export_data) plot_results( latencies=profile_result.avg_periodic_t2t_latencies, filename=get_plot_filename(args, profile_result.prompt_size), @@ -403,9 +400,10 @@ def summarize_profile_results(args, prompts): for prompt in prompts: prompt_size = len(prompt.split()) export_file = get_export_filename(args, prompt_size) + export_data = load_json_data(export_file) profile_result = ProfileResults(prompt_size=prompt_size) - calculate_metrics(args, profile_result, export_file) + calculate_metrics(args, profile_result, export_data) results.append(profile_result) print_benchmark_summary(results) From dcd6df61fb64b54032a4dc396a69bcc6b845df17 Mon Sep 17 00:00:00 2001 From: Hyunjae Woo Date: Mon, 6 Nov 2023 16:05:39 -0800 Subject: [PATCH 14/15] Do not output generation metrics when max tokens < 2 --- .../perf_analyzer/docs/examples/profile.py | 65 ++++++++++--------- 1 file changed, 33 insertions(+), 32 deletions(-) diff --git a/src/c++/perf_analyzer/docs/examples/profile.py b/src/c++/perf_analyzer/docs/examples/profile.py index 26a56fbda..1e1510071 100644 --- a/src/c++/perf_analyzer/docs/examples/profile.py +++ b/src/c++/perf_analyzer/docs/examples/profile.py @@ -309,40 +309,41 @@ def calculate_online_metrics(args, profile_result, export_data): first_token_latencies, 99, method="lower" ) - profile_result.max_gen_latency = max(generation_latencies) - profile_result.min_gen_latency = min(generation_latencies) - profile_result.avg_gen_latency = np.mean(generation_latencies) - profile_result.p50_gen_latency = np.percentile( - generation_latencies, 50, method="lower" - ) - profile_result.p90_gen_latency = np.percentile( - generation_latencies, 90, method="lower" - ) - profile_result.p95_gen_latency = np.percentile( - generation_latencies, 95, method="lower" - ) - profile_result.p99_gen_latency = np.percentile( - generation_latencies, 99, method="lower" - ) + if args.max_tokens > 1: + profile_result.max_gen_latency = max(generation_latencies) + profile_result.min_gen_latency = min(generation_latencies) + profile_result.avg_gen_latency = np.mean(generation_latencies) + profile_result.p50_gen_latency = np.percentile( + generation_latencies, 50, method="lower" + ) + profile_result.p90_gen_latency = np.percentile( + generation_latencies, 90, method="lower" + ) + profile_result.p95_gen_latency = np.percentile( + generation_latencies, 95, method="lower" + ) + profile_result.p99_gen_latency = np.percentile( + generation_latencies, 99, method="lower" + ) - token_latencies = [t / args.max_tokens for t in generation_latencies] - profile_result.avg_output_token_latency = np.mean(token_latencies) + token_latencies = [t / args.max_tokens for t in generation_latencies] + profile_result.avg_output_token_latency = np.mean(token_latencies) - profile_result.max_gen_throughput = max(generation_throughputs) - profile_result.min_gen_throughput = min(generation_throughputs) - profile_result.avg_gen_throughput = np.mean(generation_throughputs) - profile_result.p50_gen_throughput = np.percentile( - generation_throughputs, 50, method="lower" - ) - profile_result.p90_gen_throughput = np.percentile( - generation_throughputs, 90, method="lower" - ) - profile_result.p95_gen_throughput = np.percentile( - generation_throughputs, 95, method="lower" - ) - profile_result.p99_gen_throughput = np.percentile( - generation_throughputs, 99, method="lower" - ) + profile_result.max_gen_throughput = max(generation_throughputs) + profile_result.min_gen_throughput = min(generation_throughputs) + profile_result.avg_gen_throughput = np.mean(generation_throughputs) + profile_result.p50_gen_throughput = np.percentile( + generation_throughputs, 50, method="lower" + ) + profile_result.p90_gen_throughput = np.percentile( + generation_throughputs, 90, method="lower" + ) + profile_result.p95_gen_throughput = np.percentile( + generation_throughputs, 95, method="lower" + ) + profile_result.p99_gen_throughput = np.percentile( + generation_throughputs, 99, method="lower" + ) def collect_offline_metrics(export_data, sequence_len): From 6b8f310b85bdd0a2530050ce4b3be4d3cddba197 Mon Sep 17 00:00:00 2001 From: Hyunjae Woo Date: Mon, 6 Nov 2023 16:13:17 -0800 Subject: [PATCH 15/15] Fix codeql --- src/c++/perf_analyzer/docs/examples/profile.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/c++/perf_analyzer/docs/examples/profile.py b/src/c++/perf_analyzer/docs/examples/profile.py index 1e1510071..958961ee4 100644 --- a/src/c++/perf_analyzer/docs/examples/profile.py +++ b/src/c++/perf_analyzer/docs/examples/profile.py @@ -55,10 +55,10 @@ "max_e2e_latency": ("Max end-to-end latency", "ms"), "min_e2e_latency": ("Min end-to-end latency", "ms"), "avg_e2e_latency": ("Avg end-to-end latency", "ms"), - "p50_gen_latency": ("p50 generation latency", "ms"), - "p90_gen_latency": ("p90 generation latency", "ms"), - "p95_gen_latency": ("p95 generation latency", "ms"), - "p99_gen_latency": ("p99 generation latency", "ms"), + "p50_e2e_latency": ("p50 end-to-end latency", "ms"), + "p90_e2e_latency": ("p90 end-to-end latency", "ms"), + "p95_e2e_latency": ("p95 end-to-end latency", "ms"), + "p99_e2e_latency": ("p99 end-to-end latency", "ms"), "max_e2e_throughput": ("Max end-to-end throughput", "tokens/s"), "min_e2e_throughput": ("Min end-to-end throughput", "tokens/s"), "avg_e2e_throughput": ("Avg end-to-end throughput", "tokens/s"),