From 6ba4fd47b4fdee3746460b5f4fd697a982c693e8 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Tue, 16 Apr 2024 17:57:12 +0200 Subject: [PATCH 1/8] Add test data covering different native (geoarrow-based) encodings --- .../data-linestring-encoding_native.parquet | Bin 0 -> 2017 bytes .../data-linestring-encoding_wkb.parquet | Bin 0 -> 1468 bytes test_data/data-linestring.wkt | 3 + ...ta-multilinestring-encoding_native.parquet | Bin 0 -> 2211 bytes .../data-multilinestring-encoding_wkb.parquet | Bin 0 -> 1802 bytes test_data/data-multilinestring.wkt | 4 + .../data-multipoint-encoding_native.parquet | Bin 0 -> 2022 bytes .../data-multipoint-encoding_wkb.parquet | Bin 0 -> 1616 bytes test_data/data-multipoint.wkt | 4 + .../data-multipolygon-encoding_native.parquet | Bin 0 -> 2411 bytes .../data-multipolygon-encoding_wkb.parquet | Bin 0 -> 2258 bytes test_data/data-multipolygon.wkt | 5 + test_data/data-point-encoding_native.parquet | Bin 0 -> 1815 bytes test_data/data-point-encoding_wkb.parquet | Bin 0 -> 1375 bytes test_data/data-point.wkt | 3 + .../data-polygon-encoding_native.parquet | Bin 0 -> 2190 bytes test_data/data-polygon-encoding_wkb.parquet | Bin 0 -> 1857 bytes test_data/data-polygon.wkt | 4 + test_data/generate_test_data.py | 195 ++++++++++++++++++ 19 files changed, 218 insertions(+) create mode 100644 test_data/data-linestring-encoding_native.parquet create mode 100644 test_data/data-linestring-encoding_wkb.parquet create mode 100644 test_data/data-linestring.wkt create mode 100644 test_data/data-multilinestring-encoding_native.parquet create mode 100644 test_data/data-multilinestring-encoding_wkb.parquet create mode 100644 test_data/data-multilinestring.wkt create mode 100644 test_data/data-multipoint-encoding_native.parquet create mode 100644 test_data/data-multipoint-encoding_wkb.parquet create mode 100644 test_data/data-multipoint.wkt create mode 100644 test_data/data-multipolygon-encoding_native.parquet create mode 100644 test_data/data-multipolygon-encoding_wkb.parquet create mode 100644 test_data/data-multipolygon.wkt create mode 100644 test_data/data-point-encoding_native.parquet create mode 100644 test_data/data-point-encoding_wkb.parquet create mode 100644 test_data/data-point.wkt create mode 100644 test_data/data-polygon-encoding_native.parquet create mode 100644 test_data/data-polygon-encoding_wkb.parquet create mode 100644 test_data/data-polygon.wkt create mode 100644 test_data/generate_test_data.py diff --git a/test_data/data-linestring-encoding_native.parquet b/test_data/data-linestring-encoding_native.parquet new file mode 100644 index 0000000000000000000000000000000000000000..d4853894ddbcdb21afea8c2d2d3f14f667f9a885 GIT binary patch literal 2017 zcmc&#&2Q6K5Px=@G$0NI?Xw&yhbRhG51}LlBZ?}NNm3e@q#r>vX;ug&sWk~ z*kVbGlQVvWt69`9^B9jMwRot};-Rg|R?d;4_CiX=U6G`StiO?xTuB%#ww%x+-I2p( zRZ(9_>T5}_=<)A~o1knMml1j%TZwAc4>ri%QDfLUd_`WeTXqRW6`$n(J8} zKLrwMLLixfDz7X+l?!K5w7XoOYa+1-?R_RkGED!VB>BYvP$k~8Mgz-peZ{dS-jd~5 z18e9>a|9Vz-%I%DZzK9Y5rp~8w|*PUjDUH!5QhEXFki*L3e*1t=6_52JDQR(pOE@u zanFO9cy2H)HtGIt4cc76_Fk36%rHaNPD{;Y9;?#48j2>$j-!BXVe!fD;#Hmdm%kbA6xFk9u3oXQtA9^el?fQ zrqW~A9<*KmF@E#Yfl!4X3uy!C1U;UmPvyi|!|v$F9`-1fV-Kx~=Msl5q1j{4A6uyY zGhIdfgFyf2Z0)TydY26373B?}V1L#4iCuGIqXZjfl zH&6A|kVmPAB6t&E2Iz;U2!ba-nKr25TO76;x4|a1GO>>a2Yz?WIqulzEwg0KtNqPs ztz=Hj()O%7aJ<9jw$WVMIqp;ryufA~{nx!#v+i3Dgk8V0HqUn&##6ID>RJA0`dcR6 z(P|iIQy+H*U3=FCyR+p?Tg{yu#-Fx)5#uc8!FtYW79EP!@V9I2hT%NCQTOp2nofTG zuV#)i51{P|&{z+Eojs~268p1<#0$_tQLq#MTeOLK5dKB}!o6IhR*XV_$Q_a;HUpP1 zpdLYkwqjUc0IJ*3meX%FsivqBeIfhuA;lBIx!6TMA_>8gDS$<$H5V;!BVkfg(N6TM zLjA+FA6OCzWdRb01D7C)2*~11fJ49}5u!*P( zAP1$S>jV-a0GO5v Bj<^5- literal 0 HcmV?d00001 diff --git a/test_data/data-linestring-encoding_wkb.parquet b/test_data/data-linestring-encoding_wkb.parquet new file mode 100644 index 0000000000000000000000000000000000000000..853ac9ee231e6c46f9f9cc3b9df90acd7dc529d4 GIT binary patch literal 1468 zcmc&!&u z{{auigMW{SNsk_lC;tvlzL{kkYkD!!__8~1=KJ1zGw++t?$COch$K$tYv^DOAi4@u z6Q-U3nIlpP`6vmIP)rW{j2!l3C0MutMK%%-;3x`0Fp&Hz#E7WmArw{Qpg!o4fFw$n zg7ig5R+8aAsjINl00{sNQFH*f{vePSeizk+0)hf#f-ozHYsJWIm|qFQtPJHl@^ZNt z@p<&D%(WJmV4*NgW;C<4B6l$VoA>eZ6fYO$2v7CGuLrzD^vaU|T@LA! zD2d6i^ijMEprqs=-iiEHI8U4!&d3_{6D#pVHl0mp5~+A%=va2!nH=JWdt!6Szd^=& z=m;~;6IVD}7=3-vwfZKj)!a~7tj{01lc9n5vqa768+)$9!k%ATT!1`7>-FvCig!=) z_uzeIIjT}hckyMmLFq%Pn)DH+CM}s9Q!(Hde$2r2E561lT%++R=QT>ltT1|B1U;o? zHqcFc)&4cft$TiX`38X32wSz}YP zyS+{>+v`?vj|cU<{`e{KYDar@hOvz&i1VCQD_eNU`(v%BydNeU-72Gq%Q#tI=OUCC z$n^1z_0Rg9o|o5P{4N)s4JbC&+o$T7i{p`Ax=N4uL}w%4&%?Qt`?5B`@*QvKP v3&4tKAjgMo=kBN5&Ni@w z(H96Q2;C6&1;mHAoH4@NSjhrr3%*iPiLX3LdGjektwL>4kfSVZvHf(aqmm+)=_$C;88LNOuwpcLDPqRfqW zrcwe3jNK9ta=4hq@>&h|R7xu~nUXB-ES0@&1>Be27(8P;zipx@EVd(}AjXDYiAa4F zOX5u~?Rng`#-0^S!>H@{!G`5po;3=tY=i}9m*tnD{Ewji9Z_FLK>Y_N@?q*TO#OpT ztUhxZLF%v9K2Cl3mioUo)K?q-SAF8Oh7`#+pIcrZN1+*RimbzsCMxUez#^6!`%q;q zs<7LD#sS*ug8s`5VK;9gMOZB75Xn*^nb`lNZT-0$>vLbN{3a$#YFz#!wvA9Ct3=@4 zQhpI$q|dCW?~F(3`>AvxSIFhl*;INmb-eyGd9w71tdTh$ zIwPCL@@8(}xX#G(gDEuuZ-sptgcA!ePtr{aY5?dl2mN?)ae(o zS|Q9M!$MN^kk_mi)`!o!g*{L~ zYY0W7W#ILorG<@N(b=9A0qOyRmFk$T7^!-B9<`DBIWYI;OFnropO(7108Q<4$QS0} zIcZY<*Y=vf@XnsDu3Z0!_qegZ=da>87=lh)l*JRg6lRV=1gA)jKj{++!bFPOi zsgdi^HM$DV0805bB_DBp#N_u5hhLI-(^M&m*Nd&#vS!o%gCvjX%*SOspu8jAk->`0 z^{^%9vHC#Yus)pAd;qcvN;6!KZ;+Cj^rt_~tl*Y8ncV76r{j5Q`3k2BS}vc&c2zbF3!9Wlf} literal 0 HcmV?d00001 diff --git a/test_data/data-multilinestring-encoding_wkb.parquet b/test_data/data-multilinestring-encoding_wkb.parquet new file mode 100644 index 0000000000000000000000000000000000000000..1357eb2e416c338e6aae2e6e0eab8165ae496cd2 GIT binary patch literal 1802 zcmds2UvCmY5TAwPa&6kAm>xM29tc5F6IwYeO-pLS9<=3Z|3MWh#x%X-q}TFKIEvWx z)tL0vM?Zs~!uSb%Fh2V1hj4a}Bi8uhgNboqcW&nQ+u5DjIXI+cRTkuwoZOW~8SVfm z0JCJBfGfVSmO-AEl_YZJaEK8_@(@KDQy>HjzwshE9cN^b`22B5d?OKA2xmZ?3oCxJ z)0BN7G4z$-H*uJd{v>cXi6;ly2RO~40zmttU;aY8T)vTc4c#PwA7P#`{17C?^+?eo&iPdn`c(* zR2Hx|?)rGcfjomxlRP0Lh9lwRU+v4p|5)MB2Pq^bq|hgE9$$~7;>XTW-jJ)BcCETf&lptf+x1pu zWTu;!hOMvYdHvPy`Q~6RulM!*&d_W%ou|h;nd9{SrBSRnE@ubVuQ|2j^2j>kymKS{ zN;RsP*ZO&?iwxAKCr1>NjZ7l9yat=y=kP?X1$1eJSb<(?I(z9 zhJK-{MzP#|yxknsD*Gus$DlUaslPZ@*YMPN9Q24yJn2G-l@bk&6$^zY?LKmw=U+=# zE{$p>WwtN46LG8?FItIeB^Gp>Qq!%I3vwPU>6}#LmUk6suu^YhU3i^`Y&}x=){Kb} vC-M-KX!o@Da$q@&X1BX|+Us?O(Q#Ww)yz_ADHTpe0PqX<@W=HK{{?>lm%=Nf literal 0 HcmV?d00001 diff --git a/test_data/data-multilinestring.wkt b/test_data/data-multilinestring.wkt new file mode 100644 index 0000000..17294fa --- /dev/null +++ b/test_data/data-multilinestring.wkt @@ -0,0 +1,4 @@ +"col","geometry" +0,"MULTILINESTRING ((30 10, 10 30, 40 40))" +1,"MULTILINESTRING ((10 10, 20 20, 10 40), (40 40, 30 30, 40 20, 30 10))" +2,"MULTILINESTRING EMPTY" diff --git a/test_data/data-multipoint-encoding_native.parquet b/test_data/data-multipoint-encoding_native.parquet new file mode 100644 index 0000000000000000000000000000000000000000..ef9791ea9faff1a1bbd88f4a9aba50174a96337f GIT binary patch literal 2022 zcmc&#&2H0J6ux#6H>!%%8F4K~$|8z_#UgZ^sz$UbC^reyIFq(XMkHwwLWyI7D?2fX z^JAdMjHdYLL&u-Sx9N-FWCMu{&>h)k}2LJHdT3>r&<7|9Uzosgu_ zgifT*gc31_4i#ldek;IFt*P;^0ZwEgazaG{(`AM$wp8&u&i+i^kX}R%WK__$Td(-rhc%cFRpaw5fpqu7BEY1lFJ2?yS2# z&vje+W8-Ytw}OxK*A2Miw55Yh!|S@HeQYzk+is3~2hOa!?M!;%T^E?mUCnA29K>n` zm7`v3u63G)X{V)YhLhWU(AF^WklHcdK|UBeuJL?A9%gs&%t?nyfmASdfD_LL@?XTy zb5^QjDZHgW(@xN`f>Y>^HDXPipg1r%lzFi0bq9^daEANqm~Y08&H(8K?n*qU0>6N7 zQOHeMLC0M%UBuf*=Q?i$#>}uz0CwSY$U(yR`4-*7(8_%b38m&v9Pbx^DLMV3JV4A-shL9o- zeS$OkN*>yr`#ifT<>c~4g`+c*syWeh<9a_?fNRlMJK_mkA z0Th5qVMzer0>hpFnI=jcbEdFKVI?$Tl+Z8*WSG4JMfP5lB}+me7=_qZAxuOy1JabL z1nqW%1Y}A6B;Xg@iG}{8uo}mm0|@}SDeM4nd>tUy!ssk-gy{ze3!*T_+PEMt=f*>! zXzoZ*Cq&N7Xb_#$#6oU{Kw*-=a)JP)F-T*eR+HmA3mvoHRhWm_BZ1Z7wMLo&L~|?~ zppX-Of`uH%VEHzf9)juH;PMcR<`mqrKa2Tabo?%b$;42{P~B-a9j`m!Vtx?ikHOey zG4?hX{--;<>q69}3@B31K5?2=BH|4_4Df3QGJ(;}#zB_Uup0lXuOs$fNG89NWGNPs z-;YfLsBtBTcdNV>F12%~+jHA3Z84(h2|bb2;t{RWb(_`h;23{LXH8CxUIJr%w1+b; zwHurboR;0Lxve^@RbDq(?ubA31|0|SXWHgj!*h4qZp+h-uCA^?nV_Z8c6rgiOXW-O z5wjFED5bmDn5|QqrbeAUp|no(b&eUBaEtFVuz167a0CjkHH{8jP@POFGCbAT0r-7!~aG|9PLYGAg6k|4>VVoV7fsr2t z+%(z8KI}su{R4dT!9QRh`~!@>_^vU=hkY<6Ci-I3bMHWHO^rVqlRbfZ=ic*u=ic+3 zGlUAMCa{1rc(a5>jBX(mLTH&o+_JdCBiB5$86)h+aujeY*o(bES)6-haRxyWTDxW! zyi2PzoXGK>Z&Bnq7sNt9LEe>s?9qE2%uAy5oRhxcB8`anZvqFRaC?M#1j7nU2*KLH zl@flLWs^c^ffMd#78V7xyDXwj_^$dy;Yr>nuAx{42w?sU(DS$$#oHITF~^g9W}cfr zBk^l)o>z7O_u82cM&B<#{`|#Pzx|2?Sfm7=^Z3rZ&cpBWPq!-P2t~eJ2;wC-=`+zZ zdbTm_Plb*(7%mze!#29Zzg}yd z8%|xijbv^zo2N(;1Hr(~|N2V3P_6qy^@Q~Cq9jH{>1(eap+HpjzdWz(iNVR&ezU}s{{Hb@HTGGrBLhi$t+M9%I zlB`McgqS2}vb}8fihoQu_b&WphJiI0i^BXWA!7nOm{|~*3E8Csp@eL=Sx=ERT^0sb z%dSoJWamV))GalqPD<^)Q8}j$)Le0_+nwQ2qo_2J2PaygHgqvX;HQVpMs;eu#c*vc zIZ0@B846mo}=kc0Lc>+30a*eipG zgiv*vUNBO9KwL1UM>APF(dxCh-hD*(h+ctu#NUR2^2y)w_4$3eZV`Hvzwsx2c3v-t z$j)a)VpjugN6ih&-7N!io$(nwenc+-%>c&mEQ>l_y7AckY284>cC+E05BWnMQ2g!@ zjYr{R!p1{7GJV+6eKaVv+y^AhSEXdYF?|ZOf57fvBJHv!bMRzz6dH literal 0 HcmV?d00001 diff --git a/test_data/data-multipolygon-encoding_wkb.parquet b/test_data/data-multipolygon-encoding_wkb.parquet new file mode 100644 index 0000000000000000000000000000000000000000..8eb1edac95ae9887d4b17b8e593e0f939f9f93a8 GIT binary patch literal 2258 zcmeHJ&2Jh<6n_J2y0~7IsTj0K90+oeRbgrZac#u>06(-Y_yU_R!H=&Ebc)tL)~VCfZFw9YSzbSm+9 zpF#XNi7+Lc1z#Yndt3b;^MusUa}xTU#OrbObrOdYxO-q8z$t(X0Oh|__SEa1=O-oj z5Y@y-=7<7bG8aH;4k%G>75wwCsHsrShYRo{`Gj=AL;SQ0ivK{tr0;8a z<$SZz0Qx0uD47D=312``y9q%}rCItdRW+N*Xyq-ke4}vzw1K)R{h6xl2f74h{{`8* z!Tf~Y1g>4sZ|I*iKvk-1e=u9CP*pwWh2{~Vi|lW@4ms@!ZLehtez|p=1YN&?RapAk zLwslv2Pfh-2oBuujLHRP;qr_lpKo5}aO;|1xge8?tRlNZZZI!6^5TrU!LICbS08lz z+@dhc<8nOqiw(Om*L)y3tL5r**J@`9x?chT4qbU|zq}n`3$xdzcFewQj)znELCmeev(TkvCwz0RLq$U)5pZ^(^SVQR~RG51E23_nt@gF|Y+(U1AI&H5>^i{?~So zzqwuK$-Axbdad%`+YLQ?C#1&J(3ANfKscd$v19bF$m7_lIUHI2<5)f#OC?jul~^Jg z8w@S`a5z1{@9+~_n%w>pR!2L+h{v%@y%5afR=;f>cSNq*oucq{?FZxOz(oDWv7M9N z*xK#)rX4K$>Dk#C=nK4By;sXS4^RKi`+=Y|FLKWB;}CR*^9(O`_$KEaUg}6)5glH# zPlR&g(!MQCC?mTjHL}KJJ33ATMV&XeEipP!G8t(|#%E)Tld(&3BciOa!Rik7) z+U{K@#ovCGcWG zurc30FHR&lBG|>d$d6>6mY8KdDLPQ@r*k8LFXD>vE$);S4ww-M^yuJl`1y%BUTzHr h%ZI~Ze-fR&!)Pj-TuH8k6A=LXz*GDc*~Nb|{{m?GS)%{| literal 0 HcmV?d00001 diff --git a/test_data/data-multipolygon.wkt b/test_data/data-multipolygon.wkt new file mode 100644 index 0000000..79b971f --- /dev/null +++ b/test_data/data-multipolygon.wkt @@ -0,0 +1,5 @@ +"col","geometry" +0,"MULTIPOLYGON (((30 10, 40 40, 20 40, 10 20, 30 10)))" +1,"MULTIPOLYGON (((30 20, 45 40, 10 40, 30 20)), ((15 5, 40 10, 10 20, 5 10, 15 5)))" +2,"MULTIPOLYGON (((40 40, 20 45, 45 30, 40 40)), ((20 35, 10 30, 10 10, 30 5, 45 20, 20 35), (30 20, 20 15, 20 25, 30 20)))" +3,"MULTIPOLYGON EMPTY" diff --git a/test_data/data-point-encoding_native.parquet b/test_data/data-point-encoding_native.parquet new file mode 100644 index 0000000000000000000000000000000000000000..3d650c583b9036ecdb9a98df218a791925579e29 GIT binary patch literal 1815 zcmcgt&2JJ>5Pu6B#Kr?@+|813AcSsuXc0(h+NL%<=m%S=1scnTng+Jpbt@n1LV>lZ z{tx~c9{f8z8c#iYHpZhTF-{6lf_S@IqLK=E0CzLIUe# zB;3dfM3@!C1;7J>Pw<5#{}_?{<5-c*Pw@hJH|zn;bDY;BUveRV52lDO5R|-Xrzv`$b%BRr&3V`jASsVDX(9qMb422XBWn|p4~By4 z|MlX?w=z)qRvIl{@**$$#qa!m0&|eOa6hFt=aIfSQfIN*981($&;(|!` zXsNVcUUQ}|edQmZtmqj^X%QZjN0ctnj83;H)oE5|z6=Do@BuovCU~C3z&i+wVz@%- zfI=QvRs<}io2cj>JehW_yh2Y9%5{z7)}gH?oBf8NEGt=Mm_OY*+si6lCA&LNTg_gz zvYVMiH8A-;sR)GE? z-D1278k+~g@GP#%s50YzPz>% literal 0 HcmV?d00001 diff --git a/test_data/data-point-encoding_wkb.parquet b/test_data/data-point-encoding_wkb.parquet new file mode 100644 index 0000000000000000000000000000000000000000..32a9206c70cc972a8ab1cbd0a6e0850dc5338bc3 GIT binary patch literal 1375 zcmcgsO>fgc5M9S6N>LA?Vl7+B!LmZsLmJ|MD_T`m*QPY?2Tg<8gd$WWj%!@Sad6zE zp;Y|?oH+3h_$!DLM~?jo94k)DZsN3{UV5OTc*i^M&8*+El?th9SilO-ZbCvc2&I?M zIJj}f2+v?K4|p0UaWW&NVnj;CToKLQPz7{KNx&*_9G}SjoDc=^ z8z=tYa^+m=HgjeAGzcdUENMsx!M=r!SVT#}P~$lCfD@KirtS;bl|>n4FVCS0ae4l) zrg=1rfgZ;dQMz7UUKtUxG+$Tt>0Sm;kCmldC-9vp95Q)d65?+z_lwVc<}Bi~ULCfp7bXIM{ zpxhWgh=yGU>TlFa;B_MP?b+EGlBP+ux?Niwy$R`pe@vr93_{2bTr@Tac}fhIydcCS zrpsyu5U$|^T3nsrn~Vc{2*zT#MaX~-hRlXQCgeF?$QE2iXTSE09MWyM=e7L3p_g=oKC9XhWV@2FWi(V8{= zgLM?R&hRgM-hwmsYdUbM-KKAQ+aB@uijiq~1K09f?Pf`9x5{wFld5j7?E-Ezt+vr` z)byg&))o%yMf#*8DXUBm(_5^7e^l>p?u(D`5{^be@d;(%F$TMlc=!lNm32vN!U6|>-7iE*$jh=CZ< z{}RGf3YkP+3uytn-=U%^tA7e`>1AE{5aLiAc21~Bpjm^C5Eu{WgChM5Gua{}C`k7+ z!6k|0R~51WcWhaaUb0mswwnPwSRw<_DY_Y_H_uZ;$IIQ!BDGjX71#YVFUULJTg%?y|B@?q~c(*hf&JAjQScMg8~X z4^zTJCEkkakD`92=kgmWahiOi?>O8Y)t`xD8^Ml7=S( zH3ead!}>Qj(5~DfnlPL99#Q2`IJEsyEB;d#*{42Q^*33SbwzzG-+-4ft_9#d(_RTL zq9@L9)av)5_af0`BAGCv@kn$qY;})@-V^xMnRL;$cq_TrpB+KOi|F}0A2>a`f86T1 zoNLhUcrMTaet~&1_6834zK)jX&TmdnPl*a6(!zGQ5J zWnH$*n9Fi5&SkTg{8K);bM7xYOxN)~1!SX2tf-q?EHpR&IZ4cRS<5`o39DJveC=s$(Cs5Ao@7qh9fxM~G`T zQ?q2VWPfR!HSFGZ ze2(?Y5PJ9VJf8XS*@eCUz<3lDsX>cL<1W`PUxJ`J7>{t{kn_Refgv7uQR73O57*~9 zAMbVAjXLMR{lmF&eEyL0;p7b4uXyX{hhuznIJ7hbB+u94MF2R=ufWe6?BKniJQjtY zzh565!J}AqgRw&{)gJGCyWoS6EBJPDKHq!#U?A2Lob8nJ!NBDyW4`FdOxPDhiwus2 p-%p(JZF?}deKZ{Qr;+(98A+N6BVmNL!|=Vti~Iy1(K7rK`3IsUzoY;F literal 0 HcmV?d00001 diff --git a/test_data/data-polygon-encoding_wkb.parquet b/test_data/data-polygon-encoding_wkb.parquet new file mode 100644 index 0000000000000000000000000000000000000000..4ec68a6680afe4c69bcaa4f92f24d2e0d610e70a GIT binary patch literal 1857 zcmd^A&2HL25FUdeCJGX+vdEUGgb+ojA`~b@NfbpaWIGr387Ic zPJMx1d&#A$sy;#=pwG}#m2)4Wvx`kil}isjR2_MDyz_lKvokxgb2J|(B8id67I6}| z2cQ7Bg+&2|Vly=XGEI~Say&SkK1H$+MKUSih439#L??rc>=bPFAcSrN9}(3QI6bOj zuQY4K=5>0%3HXF^A?Z&Xs}Vdo5F6m+K?Q*848jO&_9JunP_kY6r-IxRiEH zLeMS2@&cYbx!?ggEGudnu7wvu32f04hy&`T?s;*r>hj2v2vL`Z^xa{(bUzKE>l4QD z$yahvmSja9@_VgjA$%{iKxZ^S~ zva^NWs5Fm_MwR(0Y-SkO!jHQBmX7x4fn2lJuVTEztE($erf5FDQ&<=_gYw<}h)IHG zD5bkNm~2v-q?syRrL;=3Rc^~5;TOEej4LC2i*s;|!dV>GDD5%FsJRo=lrFQ0ZsW)_ z4hnPhghA^acWkZ|Qvf9Pg>B`x5R_kcljb5c*>pm-Pq>70r=jHYN?vS&A z`{%m{#e83X%6X^d#6`SZNWIohn^nC(hRA5TSN(U=>> import json, pprint, pyarrow.parquet as pq + >>> pprint.pprint(json.loads(pq.read_schema("example.parquet").metadata[b"geo"])) +""" +import json +import pathlib +import copy + +import pyarrow as pa +import pyarrow.parquet as pq +from pyarrow.csv import write_csv + +from shapely import from_wkt, to_wkb + + +HERE = pathlib.Path(__file__).parent + + +metadata_template = { + "version": "1.1.0", + "primary_column": "geometry", + "columns": { + "geometry": { + "encoding": "WKB", + "geometry_type": [], + }, + }, +} + + +## Various geometry types with WKB and native (GeoArrow-based) encodings + +def write_encoding_files(geometries_wkt, geometries_geoarrow, geometry_type): + + table = pa.table({"col": range(len(geometries_wkt)), "geometry": geometries_wkt}) + write_csv(table, HERE / f"data-{geometry_type.lower()}.wkt") + + # WKB encoding + table = pa.table( + {"col": range(len(geometries_wkt)), "geometry": to_wkb(from_wkt(geometries_wkt))} + ) + metadata = copy.deepcopy(metadata_template) + metadata["columns"]["geometry"]["geometry_type"] = [geometry_type] + table = table.replace_schema_metadata({"geo": json.dumps(metadata)}) + pq.write_table(table, HERE / f"data-{geometry_type.lower()}-encoding_wkb.parquet") + + # native (geoarrow) encoding + table = pa.table( + {"col": range(len(geometries_wkt)), "geometry": geometries_geoarrow} + ) + metadata["columns"]["geometry"]["encoding"] = geometry_type.lower() + table = table.replace_schema_metadata({"geo": json.dumps(metadata)}) + pq.write_table(table, HERE / f"data-{geometry_type.lower()}-encoding_native.parquet") + + +# point + +geometries_wkt = [ + "POINT (30 10)", + "POINT EMPTY", +] + +point_type = pa.struct( + [ + pa.field("x", pa.float64(), nullable=False), + pa.field("y", pa.float64(), nullable=False) + ] +) +geometries = pa.array([(30, 10), (float("nan"), float("nan"))], type=point_type) + +write_encoding_files( + geometries_wkt, geometries, geometry_type="Point" +) + +# linestring + +geometries_wkt = [ + "LINESTRING (30 10, 10 30, 40 40)", + "LINESTRING EMPTY", +] + +linestring_type = pa.list_(pa.field("vertices", point_type, nullable=False)) +geometries = pa.array( + [[(30, 10), (10, 30), (40, 40)], []], type=linestring_type) + +write_encoding_files( + geometries_wkt, geometries, geometry_type="LineString" +) + +# polygon + +geometries_wkt = [ + "POLYGON ((30 10, 40 40, 20 40, 10 20, 30 10))", + "POLYGON ((35 10, 45 45, 15 40, 10 20, 35 10), (20 30, 35 35, 30 20, 20 30))", + "POLYGON EMPTY", +] + +polygon_type = pa.list_( + pa.field("rings", pa.list_( + pa.field("vertices", point_type, nullable=False) + ), nullable=False) +) +geometries = pa.array( + [ + [[(30.0, 10.0), (40.0, 40.0), (20.0, 40.0), (10.0, 20.0), (30.0, 10.0)]], + [[(35.0, 10.0), (45.0, 45.0), (15.0, 40.0), (10.0, 20.0), (35.0, 10.0)], + [(20.0, 30.0), (35.0, 35.0), (30.0, 20.0), (20.0, 30.0)]], + [], + ], + type=polygon_type +) + +write_encoding_files( + geometries_wkt, geometries, geometry_type="Polygon" +) + +# multipoint + +geometries_wkt = [ + "MULTIPOINT ((30 10))", + "MULTIPOINT ((10 40), (40 30), (20 20), (30 10))", + "MULTIPOINT EMPTY", +] + +multipoint_type = pa.list_(pa.field("points", point_type, nullable=False)) +geometries = pa.array( + [ + [(30.0, 10.0)], + [(10.0, 40.0), (40.0, 30.0), (20.0, 20.0), (30.0, 10.0)], + [] + ], + type=multipoint_type +) + +write_encoding_files( + geometries_wkt, geometries, geometry_type="MultiPoint" +) + +# multilinestring + +geometries_wkt = [ + "MULTILINESTRING ((30 10, 10 30, 40 40))", + "MULTILINESTRING ((10 10, 20 20, 10 40), (40 40, 30 30, 40 20, 30 10))", + "MULTILINESTRING EMPTY", +] + +multilinestring_type = pa.list_( + pa.field("linestrings", linestring_type, nullable=False) +) +geometries = pa.array( + [ + [[(30.0, 10.0), (10.0, 30.0), (40.0, 40.0)]], + [[(10.0, 10.0), (20.0, 20.0), (10.0, 40.0)], + [(40.0, 40.0), (30.0, 30.0), (40.0, 20.0), (30.0, 10.0)]], + [], + ], + type=multilinestring_type +) + +write_encoding_files( + geometries_wkt, geometries, geometry_type="MultiLineString" +) + +# multipolygon + +geometries_wkt = [ + "MULTIPOLYGON (((30 10, 40 40, 20 40, 10 20, 30 10)))", + "MULTIPOLYGON (((30 20, 45 40, 10 40, 30 20)), ((15 5, 40 10, 10 20, 5 10, 15 5)))", + "MULTIPOLYGON (((40 40, 20 45, 45 30, 40 40)), ((20 35, 10 30, 10 10, 30 5, 45 20, 20 35), (30 20, 20 15, 20 25, 30 20)))", + "MULTIPOLYGON EMPTY", +] + +multipolygon_type = pa.list_(pa.field("polygons", polygon_type, nullable=False)) +geometries = pa.array( + [ + [[[(30.0, 10.0), (40.0, 40.0), (20.0, 40.0), (10.0, 20.0), (30.0, 10.0)]]], + [[[(30.0, 20.0), (45.0, 40.0), (10.0, 40.0), (30.0, 20.0)]], + [[(15.0, 5.0), (40.0, 10.0), (10.0, 20.0), (5.0, 10.0), (15.0, 5.0)]]], + [[[(40.0, 40.0), (20.0, 45.0), (45.0, 30.0), (40.0, 40.0)]], + [[(20.0, 35.0), (10.0, 30.0), (10.0, 10.0), (30.0, 5.0), (45.0, 20.0), (20.0, 35.0)], + [(30.0, 20.0), (20.0, 15.0), (20.0, 25.0), (30.0, 20.0)]]], + [], + ], + type=multipolygon_type +) + +write_encoding_files( + geometries_wkt, geometries, geometry_type="MultiPolygon" +) From 7e9eed264e68fa417863ce65f7bf5f7be8b302de Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Thu, 2 May 2024 16:15:19 +0200 Subject: [PATCH 2/8] Update test_data/generate_test_data.py Co-authored-by: Even Rouault --- test_data/generate_test_data.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test_data/generate_test_data.py b/test_data/generate_test_data.py index 3324806..f5915c0 100644 --- a/test_data/generate_test_data.py +++ b/test_data/generate_test_data.py @@ -28,7 +28,7 @@ "columns": { "geometry": { "encoding": "WKB", - "geometry_type": [], + "geometry_types": [], }, }, } From c1f11ebdf29fdf94abcf61c40af7f677967dc8b9 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Thu, 2 May 2024 16:18:35 +0200 Subject: [PATCH 3/8] fix geometry type --- .../data-linestring-encoding_native.parquet | Bin 2017 -> 2018 bytes .../data-linestring-encoding_wkb.parquet | Bin 1468 -> 1469 bytes ...ta-multilinestring-encoding_native.parquet | Bin 2211 -> 2212 bytes .../data-multilinestring-encoding_wkb.parquet | Bin 1802 -> 1803 bytes .../data-multipoint-encoding_native.parquet | Bin 2022 -> 2023 bytes .../data-multipoint-encoding_wkb.parquet | Bin 1616 -> 1617 bytes .../data-multipolygon-encoding_native.parquet | Bin 2411 -> 2412 bytes .../data-multipolygon-encoding_wkb.parquet | Bin 2258 -> 2271 bytes test_data/data-point-encoding_native.parquet | Bin 1815 -> 1816 bytes test_data/data-point-encoding_wkb.parquet | Bin 1375 -> 1376 bytes .../data-polygon-encoding_native.parquet | Bin 2190 -> 2191 bytes test_data/data-polygon-encoding_wkb.parquet | Bin 1857 -> 1858 bytes test_data/generate_test_data.py | 2 +- 13 files changed, 1 insertion(+), 1 deletion(-) diff --git a/test_data/data-linestring-encoding_native.parquet b/test_data/data-linestring-encoding_native.parquet index d4853894ddbcdb21afea8c2d2d3f14f667f9a885..ee736c1fb71d8b7a4ae8e00d9e3714e913121750 100644 GIT binary patch delta 68 zcmaFJ|A>Es9}8pWWPg@Y#^T9GSe7#ePR?PSCzD+1Y3AuxoEhStQyLZKXONr=q$|Br X(jp8kH>-JBvu$SNaAIX(2yhHC1ON`H6$k(T diff --git a/test_data/data-linestring-encoding_wkb.parquet b/test_data/data-linestring-encoding_wkb.parquet index 853ac9ee231e6c46f9f9cc3b9df90acd7dc529d4..9190fc0da040f4ec3fcdb8c063cc8dd92f086b43 100644 GIT binary patch delta 68 zcmdnPy_b7~Ff(J*WD({(#^T9M%*z?mCu^}xkqxWz%*sy>uk`Xw3^&aQH4dr-(s`aa XhL&j&29s~I+~hdM%)k)f7-R?l{qPn7 delta 66 zcmdnXy@z{)Ff(K0WD(}P$<55m83QM4vrLf<&GgT7O7hIEC`if;3rX?t0@BHu5fO%# VX@QQD@37qDILgex5a1YO2ms>C73Tl| diff --git a/test_data/data-multilinestring-encoding_native.parquet b/test_data/data-multilinestring-encoding_native.parquet index fb4780abdb7b6b28bb2c3d50fe8daadac94060fc..04e2e7c0f6de2b1dee9bcb3a2e47dca08c8559e1 100644 GIT binary patch delta 76 zcmZ21xI}QnE*8eglXtUJF&0lYWnIHqKKT*r5|ywj&#e6P@JcVgl<*3J#L$X@q};HO f6b~;TotzmFVQ85axOo-ZM@EiItPBhRjzNY1N#Pqb delta 74 zcmZ1?xL9z*E*8c~lXtUJO*Ug)!FBt5HQPr*m`Kz{1!*Igllfv3T-+mNks!lXF<7sDxE{X62`cS9*CuGz{1!zIgll9@&T4LjOmkeS*NIkX8LD3C3$8W8YX!J6@<9wltzX586@Wd b=}NDZvN5{#kZ1*@cSr`}s9D@u2kT4fQ diff --git a/test_data/data-multipoint-encoding_native.parquet b/test_data/data-multipoint-encoding_native.parquet index ef9791ea9faff1a1bbd88f4a9aba50174a96337f..78bd1329a1cebc68b456cccaa614804a2f7fb8ef 100644 GIT binary patch delta 68 zcmaFH|D1nAGz(+rbCGAma%RS+$t#%i7>g&1u`FjypS+J{ifmYwXI6fCc%_$LN_d4qVwh7|Vz{Y+ YXO5v|T7<#meAb&Bub3Ga0vv-30Z()n9{>OV delta 66 zcmcb}bAe~Wa%RTH$t#%iCX2HyXAGRYpJj?{Xr_OrQ<7)4p<$9oP(i4>WkFJ2U}i*w Wp=Da2UMiNk`8fg!*#$PfT^bQUcD diff --git a/test_data/data-multipolygon-encoding_wkb.parquet b/test_data/data-multipolygon-encoding_wkb.parquet index 8eb1edac95ae9887d4b17b8e593e0f939f9f93a8..65a3cec73577ae336568c10043aaf909b93d52cd 100644 GIT binary patch delta 275 zcmca4cwcaXIvZolWDT}F#^TA-*wi@RFzbVWQTpVgZ1#-llUdm3@0ZikeFjS zh=4FbEJsfe5oHusmJ}Xlkertd;)3K|A^L%aI0b?Xb$1MROm=iZ$buC*26}?6^#JoA rE&wyjfuy@%SrXWSa*zQ)C1ATj(m*5Oz;^OH4jqogEDQ_*jzNY1a@9@N delta 261 zcmcaFcu8=BIvZp2WDT~w$h^v`rk z^2|0gO!5dS2z9qCP7O~fOUm?0NsBPFbaeD|baZqA5spqE!p{Y0js=k6>go#OJA(*E zFcZvjOb1d$s7rhleWl4p)nSz@@UfoG1PWm<&6 S=EbZzj2x?37#IQ^gA4)2Fcgsh delta 60 zcmbQiH=S?8V`j#>$xoQ`Cx@^sVoaa>on^XMu&0@)TXANnyJbOAUSMWKgrQ|xpyTEx QtT~Jvt5_Hq0vv-30ivB0D*ylh diff --git a/test_data/data-point-encoding_wkb.parquet b/test_data/data-point-encoding_wkb.parquet index 32a9206c70cc972a8ab1cbd0a6e0850dc5338bc3..e9c5f7402cc7484d35afc98e463f3f15ee41ec59 100644 GIT binary patch delta 61 zcmcc5^?+-`9wx@x$$Ob{8H*=tF)v~aoP3sfida~cXI6fCc%@fhl2J}+inCWrT7;qH R=)c{m4&f+@->zs#^T8ytV=WE@m4&fs@->#C$)2oB7}F=eW1S`8nU$X&Ug;H>WK>}qWn@|EnPX^~7GdDH Sc>&vVMvjH73=9E|L52X%qZJSU diff --git a/test_data/data-polygon-encoding_wkb.parquet b/test_data/data-polygon-encoding_wkb.parquet index 4ec68a6680afe4c69bcaa4f92f24d2e0d610e70a..06f065d1dcfc85d494a3bd3d37412511f5f2b0e0 100644 GIT binary patch delta 64 zcmX@ecZhGp92Um<$#Yq98H*=#u`XdOpS+%Riey-oXI6fCc%@fhl2L_el#ykrXO5v| UT7<#m7`D3{+AItV0gge20NAV)pa1{> delta 62 zcmX@acaU$x92Um9$#Yq9Cv&qdVN9RAfpv;xXr_OrQ<7(nQ(2NnPF|9cNoGWZp=Da2 SJ%RU diff --git a/test_data/generate_test_data.py b/test_data/generate_test_data.py index f5915c0..a1945d1 100644 --- a/test_data/generate_test_data.py +++ b/test_data/generate_test_data.py @@ -46,7 +46,7 @@ def write_encoding_files(geometries_wkt, geometries_geoarrow, geometry_type): {"col": range(len(geometries_wkt)), "geometry": to_wkb(from_wkt(geometries_wkt))} ) metadata = copy.deepcopy(metadata_template) - metadata["columns"]["geometry"]["geometry_type"] = [geometry_type] + metadata["columns"]["geometry"]["geometry_types"] = [geometry_type] table = table.replace_schema_metadata({"geo": json.dumps(metadata)}) pq.write_table(table, HERE / f"data-{geometry_type.lower()}-encoding_wkb.parquet") From 51354939942074d3ef7ba22d674596f54209d7bd Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Thu, 2 May 2024 16:20:49 +0200 Subject: [PATCH 4/8] add .csv extension --- test_data/{data-linestring.wkt => data-linestring.wkt.csv} | 0 .../{data-multilinestring.wkt => data-multilinestring.wkt.csv} | 0 test_data/{data-multipoint.wkt => data-multipoint.wkt.csv} | 0 test_data/{data-multipolygon.wkt => data-multipolygon.wkt.csv} | 0 test_data/{data-point.wkt => data-point.wkt.csv} | 0 test_data/{data-polygon.wkt => data-polygon.wkt.csv} | 0 test_data/generate_test_data.py | 2 +- 7 files changed, 1 insertion(+), 1 deletion(-) rename test_data/{data-linestring.wkt => data-linestring.wkt.csv} (100%) rename test_data/{data-multilinestring.wkt => data-multilinestring.wkt.csv} (100%) rename test_data/{data-multipoint.wkt => data-multipoint.wkt.csv} (100%) rename test_data/{data-multipolygon.wkt => data-multipolygon.wkt.csv} (100%) rename test_data/{data-point.wkt => data-point.wkt.csv} (100%) rename test_data/{data-polygon.wkt => data-polygon.wkt.csv} (100%) diff --git a/test_data/data-linestring.wkt b/test_data/data-linestring.wkt.csv similarity index 100% rename from test_data/data-linestring.wkt rename to test_data/data-linestring.wkt.csv diff --git a/test_data/data-multilinestring.wkt b/test_data/data-multilinestring.wkt.csv similarity index 100% rename from test_data/data-multilinestring.wkt rename to test_data/data-multilinestring.wkt.csv diff --git a/test_data/data-multipoint.wkt b/test_data/data-multipoint.wkt.csv similarity index 100% rename from test_data/data-multipoint.wkt rename to test_data/data-multipoint.wkt.csv diff --git a/test_data/data-multipolygon.wkt b/test_data/data-multipolygon.wkt.csv similarity index 100% rename from test_data/data-multipolygon.wkt rename to test_data/data-multipolygon.wkt.csv diff --git a/test_data/data-point.wkt b/test_data/data-point.wkt.csv similarity index 100% rename from test_data/data-point.wkt rename to test_data/data-point.wkt.csv diff --git a/test_data/data-polygon.wkt b/test_data/data-polygon.wkt.csv similarity index 100% rename from test_data/data-polygon.wkt rename to test_data/data-polygon.wkt.csv diff --git a/test_data/generate_test_data.py b/test_data/generate_test_data.py index a1945d1..8fe7377 100644 --- a/test_data/generate_test_data.py +++ b/test_data/generate_test_data.py @@ -39,7 +39,7 @@ def write_encoding_files(geometries_wkt, geometries_geoarrow, geometry_type): table = pa.table({"col": range(len(geometries_wkt)), "geometry": geometries_wkt}) - write_csv(table, HERE / f"data-{geometry_type.lower()}.wkt") + write_csv(table, HERE / f"data-{geometry_type.lower()}.wkt.csv") # WKB encoding table = pa.table( From ffb547d31ba990fe7f8ebd2ea126ae2a502beee8 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Thu, 2 May 2024 16:27:21 +0200 Subject: [PATCH 5/8] add null values --- .../data-linestring-encoding_native.parquet | Bin 2018 -> 2023 bytes .../data-linestring-encoding_wkb.parquet | Bin 1469 -> 1474 bytes test_data/data-linestring.wkt.csv | 1 + ...ta-multilinestring-encoding_native.parquet | Bin 2212 -> 2216 bytes .../data-multilinestring-encoding_wkb.parquet | Bin 1803 -> 1807 bytes test_data/data-multilinestring.wkt.csv | 1 + .../data-multipoint-encoding_native.parquet | Bin 2023 -> 2027 bytes .../data-multipoint-encoding_wkb.parquet | Bin 1617 -> 1621 bytes test_data/data-multipoint.wkt.csv | 1 + .../data-multipolygon-encoding_native.parquet | Bin 2412 -> 2417 bytes .../data-multipolygon-encoding_wkb.parquet | Bin 2271 -> 2276 bytes test_data/data-multipolygon.wkt.csv | 1 + test_data/data-point-encoding_native.parquet | Bin 1816 -> 1835 bytes test_data/data-point-encoding_wkb.parquet | Bin 1376 -> 1398 bytes test_data/data-point.wkt.csv | 2 + .../data-polygon-encoding_native.parquet | Bin 2191 -> 2195 bytes test_data/data-polygon-encoding_wkb.parquet | Bin 1858 -> 1862 bytes test_data/data-polygon.wkt.csv | 1 + test_data/generate_test_data.py | 45 +++++++++++------- 19 files changed, 35 insertions(+), 17 deletions(-) diff --git a/test_data/data-linestring-encoding_native.parquet b/test_data/data-linestring-encoding_native.parquet index ee736c1fb71d8b7a4ae8e00d9e3714e913121750..32653ef94bb29fab1ec39a36a082423323fa53cf 100644 GIT binary patch delta 405 zcmaFF|D4}Cz%j^Bltt7)RL4h@O_V{1fkA?SftS&SkpTjjptLB1sEDY94p5B%5VOfh za4=2m7j=P2OK?EdfhaB&kT4q~6SE3~+9O68Q5H#41~vgn3FhSd98o4QHnCfbK%^F+ z#sM{lk!i9%v&!TRj3N{7N-%OwzRM`X$TGQrNpvzllK=Q8u h3GP*8!sjG)W;J50Rp%62#VW?8#v!(9vlGh#W&od#OZ5N% delta 426 zcmaFP|A^l=z%j^BltolQRK*8KF$gg*2-q+{0V9+aWe^n-71IIA3y89bvdKtrz+@yi zU~0t}G&mS128y_FDlh@XSQr_ZnbdAD%80T^nli8nNJ=m#=jVtriLr>CU=+K+sOF)@ zfybOx0gN0If6Fj3P5#fQ%E7?Itiqu7i)HdTW<|jYpdlP$FBpMH?H04zB<9JCjACGg zqLX!4geIO7nS7E_0!ceNMEi-!XPMP_U^-W@t38;+D2~uKc@9&Q9gC!lqzwhGR$(U6 Zl}u{*og>UKgH?=0jYDj~W_Oka%m67yPO1O^ diff --git a/test_data/data-linestring-encoding_wkb.parquet b/test_data/data-linestring-encoding_wkb.parquet index 9190fc0da040f4ec3fcdb8c063cc8dd92f086b43..276e4184eda51af7ef65e67abb4c7d54ba3e88a1 100644 GIT binary patch delta 296 zcmdnXeTdsSz%j^Bltt7)RL4h@O_V{1fkA?SftS&SkpTjjptLB1sEDY94p5B%5VOfh za4=2m7j=P2OK?EdfhaB&kT4q~6SE3~+9O68Q5H#41~vgn3FhSd98o4QHnCfbK%^F+ z#sM{lk!f-Slgh-CZd@?)&6p-jGWxSKGcz(XsXdvT&nPayCiaU->;#kA3>LLH%ramF n?vw8_+G?;#%1G*vV38^#UIT?V#jdc3v8i#0E!xb^yp0I}=u|N5 delta 362 zcmX@ay_efJz%j^BltolQRK*8KF$gg*2-q+{0V9+aWe^n-71IIA3y89bvdKtrz+@yi zU~0t}G&mS128y_FDlh@XSQr_ZnbdAD%80T^nli8nNJ=m#=jVtriLr>CU=+K+sOF)@ zf!mzPu}mrx&$)5IEHq=7EXU}t2Ql;(tBfcQ)KHG})coAklA=nm$-kJyPB5wUv8YXA rmH}&VpZti?){aF|MpB0ox2S56Xr~aT*dZ1%7BvpB8Jl^Sw=n?#e#|vt diff --git a/test_data/data-linestring.wkt.csv b/test_data/data-linestring.wkt.csv index 26f6fa4..a3a320d 100644 --- a/test_data/data-linestring.wkt.csv +++ b/test_data/data-linestring.wkt.csv @@ -1,3 +1,4 @@ "col","geometry" 0,"LINESTRING (30 10, 10 30, 40 40)" 1,"LINESTRING EMPTY" +2, diff --git a/test_data/data-multilinestring-encoding_native.parquet b/test_data/data-multilinestring-encoding_native.parquet index 04e2e7c0f6de2b1dee9bcb3a2e47dca08c8559e1..d0b558dfd8e15e53852f94ba5b13df46ad2fee5f 100644 GIT binary patch delta 424 zcmZ1?xI!>Az%j^Bltt7*)Wk=WLzF>?fkAsY=q0|1c%O>6)F delta 398 zcmZ1>xJ1x8z%j^Bltt7)RL4h@O_V{1fkA?SftS&SkpTjjptLB1sEDY94p5B%5VOfh za4=1@7j=P2OK?EdfhaB&kT4q~6SE3~+9O68Q5H#41~vgn3FhSd98o4QHnCfbK%^F+ z#sM{lk!j+(5Js`dj7&0&Op_HDHMp4=nOIZQnHbb&uucwS(i5lv>Jt-dVG{epsP=|g zZO-J0jN)LWqLcSAIw5Q20%=vrW@b>EGC6?Bm*os%a11gOWf64{HSrPU5M>ZzU{GLS;ALcBG$TVGv``;9#B@C}q#V$i)1FLG2Zzj3|qwDFd5;qy%$vevT-U7>C#^ zMzKeXY7uH2c#K%Jf(vGk8Pnu`=2_+p3|uN8Co!`#F{?1BP2i9b<$;>ak)E2Ln_5y- z2{!%;o7fu`wK=S6OPFQAde=<8$)s(^At@uNLxF=-G)b{@Eyq3%F%C5jv2&YUSdKFR E0M5`i{{R30 delta 332 zcmeC@>*li#a11gOWf3(H)$tK!6J-!$V31&7;AOO7WPkuBC@sn$Dk3VO15_gb#B4GW z988l9m_=P+@)8_Sg&>Mc1tiSI$i%F|p!SGSMwCU;lz~k^Qi3@-KSz{Fj7{toBM_+t zsBypyVq}`QZUq<2bTfv@hD@`}(M;;#kP+p9n#7TwnxC6mQd9{x?h2dO8y2-GtZH+Z nWx#r`Yp_YmNa~Paxr(M53Ffci6kEq3#-_$0c5Jf;%W)?fkA*1^ih`9udtrJHv8#Pzn_SN*2G-oD!6qpq jX+wfTRT=TPMV(oVpq1*JVy9Te*wi@0c5M!3nZXPI?FUTq diff --git a/test_data/data-multipoint-encoding_wkb.parquet b/test_data/data-multipoint-encoding_wkb.parquet index 6583b3e2531a1722560ea61740aa5e0ebd663c37..3d8e35d4b18d832da3260b9a9be426203274e2b4 100644 GIT binary patch delta 329 zcmcb}bCoAFz%j^Bltt7*)Wk=WLzF>?fkA`crm3~Ik7zhe>?;1IjSB6f{g?Foz85@s2& ritNb^%tDjnnY8UVBxNLZD6m?E3BN7E9DCTrIMg`Ac5I%_yp0I}ZaOp6 delta 292 zcmcc0bCJh7z%j^Bltt7)RL4h@O_V{1fkA?SftS&SkpTjjptLB1sEDY94p5B%5VOfh za4=1@7j=P2OK?EdfhaB&kT4q~6SE3~+9O68Q5H#41~vgn3FhSd98o4QHnCfbK%^F+ z#sM{lk!j+(9xj;KW(<=hnX@OaXXN038uDrKenxQtHnB@AV%M0}?y#uMVU_`_s-2w7 jq^-dwDI=*vyiF=hYB&w7<5MmaGd diff --git a/test_data/data-multipoint.wkt.csv b/test_data/data-multipoint.wkt.csv index d28b0a8..00d926f 100644 --- a/test_data/data-multipoint.wkt.csv +++ b/test_data/data-multipoint.wkt.csv @@ -2,3 +2,4 @@ 0,"MULTIPOINT ((30 10))" 1,"MULTIPOINT ((10 40), (40 30), (20 20), (30 10))" 2,"MULTIPOINT EMPTY" +3, diff --git a/test_data/data-multipolygon-encoding_native.parquet b/test_data/data-multipolygon-encoding_native.parquet index 060beea13a3db5328d34911d52ce025a14000fa1..cc2b01f9d50d8121109f5f4c86ce2c2b6e8eb93d 100644 GIT binary patch delta 423 zcmaDO^ijw;z%j^BltnZ^)W%1YOO!!~fkA_TftQhik(YykiI>BMnE?V=ptLB1sFKP~(6a#KnlJpqnJrtjD?YfQ$SHc z&`80+Q8dgTsR-y-wPTYPFgftph<#%cJIAE?fkA!;D3ch6 z*egb{M~rF_Y8)_w7?~%tv8YVEZpUc_vtjZ?7GWkarpXgnL??SNiVHC@GIMsY6=Xbf z>oE(Mz`&q(WpW{-1HYBn878p}OllKY)Rr*INFa2nPyWN`iQTvgCUHif!Av4dd$=a+ zvRDgL0$pV#_KivG9Fy7y4z+iaKSETns82Rvo}s}ZDI;k^iceG-2{=xJu+5xeYuLp& O)HuW*Z1!bUVF3Wkl~$Yp diff --git a/test_data/data-multipolygon-encoding_wkb.parquet b/test_data/data-multipolygon-encoding_wkb.parquet index 65a3cec73577ae336568c10043aaf909b93d52cd..77d5a5523c74fad00836f1ba7cbad60cb97a47f2 100644 GIT binary patch delta 343 zcmcaF_(ae-z%j^BltnZ^)W%1YOO!!~fkA_TftQhik(YykiI>BMnE?V=ptLB1sFKP~(6a#K^L79Xlfz%ycuR&HEYU81?fkA90wy4^AiTOSBx^EERv=SY?cC&63of@IigHr z9Ad8+#U3%LMW}HQWAbEq7MY1RA9BGQW5%#qkXek;9L=U<+%lp(Ae(>|aipi_=cbkv zRf28%#VfXmL+uuu+7f0Nu;yS!4-QEgNgagiC~=&WrWy%$YcO((J>eJQP~#A5*{r~J Ggb4t61vXUx diff --git a/test_data/data-multipolygon.wkt.csv b/test_data/data-multipolygon.wkt.csv index 79b971f..211a681 100644 --- a/test_data/data-multipolygon.wkt.csv +++ b/test_data/data-multipolygon.wkt.csv @@ -3,3 +3,4 @@ 1,"MULTIPOLYGON (((30 20, 45 40, 10 40, 30 20)), ((15 5, 40 10, 10 20, 5 10, 15 5)))" 2,"MULTIPOLYGON (((40 40, 20 45, 45 30, 40 40)), ((20 35, 10 30, 10 10, 30 5, 45 20, 20 35), (30 20, 20 15, 20 25, 30 20)))" 3,"MULTIPOLYGON EMPTY" +4, diff --git a/test_data/data-point-encoding_native.parquet b/test_data/data-point-encoding_native.parquet index 774dbcced094ab0d1be1ba7526fbef64cd8199fd..f9d3fdf66aefa928c8dba74f6bc05f94da6f0668 100644 GIT binary patch delta 695 zcmbQix0=sBz%j^Bltt7*)Wk=WLzF>?fkAYuPc~!{^+h;U#R1)^OE}P+3U+BFl1nGCtDOSH3e=?_6B%75 xZ(x?2oXTXa!67LlX+uI(sJdeW4%uNNZ%DrD;$9O1x7UwH4dmT zjEs{%GO1W%cY~b+#ONROm~Mc`*i9~E6qT`afH?xJ1KAOeCOa}pio@)^#H==n8E7+- zW=6ZovCMMW*d3rk&;crNt5qDt7&M?JBZ~shOk_Gqj!WzYix`U?fkA;$9O1x7UwH4dmT zjEs}@nN%EshNp-+!Q5aZAqq5wk&!`=k;Tq|lZ!>g!Ag=rUgW4-b8BrEVQwBBxNeSlU{2WmxF%Gd; zjAD-%)gsh5a2qjs4ztR{bGD2MlX)0r7+EH3GHUR!Ffy}tuw7fs!Kk)xax9}UkAm0^ zCb3yeYJDtfOC~e2hy&H_W73>_m(c=QA1_Fs2a67qHj~;V&dG6%dIFU|XDEoRU=o|c wq_&6yXj&sw^8z~#Nf}8SN}R3Ah{tj2%mkgF&M9_^O^id0Lu|t4lPt%W0nIo^uK)l5 delta 384 zcmbO%*e_@u;22~m$|7nYs^cTdCdwehz#zfEz{_aE$N&LMP+F8hR76xl2dG8>h}mQ$ zIG85di@Lz1B{-n!Kopk>NSKY0iCKj~?GdAlD2t>i1Dk-P1aoqJjwq8Do7gQzAW{oZ z?fkAs n1J?UigH2LKQipiERW#K|F#Zsy*d9(XHZ=~hw#`#mHZcJJ_E9zS diff --git a/test_data/data-polygon.wkt.csv b/test_data/data-polygon.wkt.csv index c528373..0574892 100644 --- a/test_data/data-polygon.wkt.csv +++ b/test_data/data-polygon.wkt.csv @@ -2,3 +2,4 @@ 0,"POLYGON ((30 10, 40 40, 20 40, 10 20, 30 10))" 1,"POLYGON ((35 10, 45 45, 15 40, 10 20, 35 10), (20 30, 35 35, 30 20, 20 30))" 2,"POLYGON EMPTY" +3, diff --git a/test_data/generate_test_data.py b/test_data/generate_test_data.py index 8fe7377..eafb6d9 100644 --- a/test_data/generate_test_data.py +++ b/test_data/generate_test_data.py @@ -64,6 +64,8 @@ def write_encoding_files(geometries_wkt, geometries_geoarrow, geometry_type): geometries_wkt = [ "POINT (30 10)", "POINT EMPTY", + None, + "POINT (40 40)", ] point_type = pa.struct( @@ -72,7 +74,7 @@ def write_encoding_files(geometries_wkt, geometries_geoarrow, geometry_type): pa.field("y", pa.float64(), nullable=False) ] ) -geometries = pa.array([(30, 10), (float("nan"), float("nan"))], type=point_type) +geometries = pa.array([(30, 10), (float("nan"), float("nan")), (float("nan"), float("nan")), (40, 40)], type=point_type) write_encoding_files( geometries_wkt, geometries, geometry_type="Point" @@ -83,11 +85,12 @@ def write_encoding_files(geometries_wkt, geometries_geoarrow, geometry_type): geometries_wkt = [ "LINESTRING (30 10, 10 30, 40 40)", "LINESTRING EMPTY", + None ] linestring_type = pa.list_(pa.field("vertices", point_type, nullable=False)) geometries = pa.array( - [[(30, 10), (10, 30), (40, 40)], []], type=linestring_type) + [[(30, 10), (10, 30), (40, 40)], [], []], type=linestring_type) write_encoding_files( geometries_wkt, geometries, geometry_type="LineString" @@ -99,6 +102,7 @@ def write_encoding_files(geometries_wkt, geometries_geoarrow, geometry_type): "POLYGON ((30 10, 40 40, 20 40, 10 20, 30 10))", "POLYGON ((35 10, 45 45, 15 40, 10 20, 35 10), (20 30, 35 35, 30 20, 20 30))", "POLYGON EMPTY", + None, ] polygon_type = pa.list_( @@ -108,9 +112,10 @@ def write_encoding_files(geometries_wkt, geometries_geoarrow, geometry_type): ) geometries = pa.array( [ - [[(30.0, 10.0), (40.0, 40.0), (20.0, 40.0), (10.0, 20.0), (30.0, 10.0)]], - [[(35.0, 10.0), (45.0, 45.0), (15.0, 40.0), (10.0, 20.0), (35.0, 10.0)], - [(20.0, 30.0), (35.0, 35.0), (30.0, 20.0), (20.0, 30.0)]], + [[(30, 10), (40, 40), (20, 40), (10, 20), (30, 10)]], + [[(35, 10), (45, 45), (15, 40), (10, 20), (35, 10)], + [(20, 30), (35, 35), (30, 20), (20, 30)]], + [], [], ], type=polygon_type @@ -126,14 +131,16 @@ def write_encoding_files(geometries_wkt, geometries_geoarrow, geometry_type): "MULTIPOINT ((30 10))", "MULTIPOINT ((10 40), (40 30), (20 20), (30 10))", "MULTIPOINT EMPTY", + None, ] multipoint_type = pa.list_(pa.field("points", point_type, nullable=False)) geometries = pa.array( [ - [(30.0, 10.0)], - [(10.0, 40.0), (40.0, 30.0), (20.0, 20.0), (30.0, 10.0)], - [] + [(30, 10)], + [(10, 40), (40, 30), (20, 20), (30, 10)], + [], + [], ], type=multipoint_type ) @@ -148,6 +155,7 @@ def write_encoding_files(geometries_wkt, geometries_geoarrow, geometry_type): "MULTILINESTRING ((30 10, 10 30, 40 40))", "MULTILINESTRING ((10 10, 20 20, 10 40), (40 40, 30 30, 40 20, 30 10))", "MULTILINESTRING EMPTY", + None, ] multilinestring_type = pa.list_( @@ -155,9 +163,10 @@ def write_encoding_files(geometries_wkt, geometries_geoarrow, geometry_type): ) geometries = pa.array( [ - [[(30.0, 10.0), (10.0, 30.0), (40.0, 40.0)]], - [[(10.0, 10.0), (20.0, 20.0), (10.0, 40.0)], - [(40.0, 40.0), (30.0, 30.0), (40.0, 20.0), (30.0, 10.0)]], + [[(30, 10), (10, 30), (40, 40)]], + [[(10, 10), (20, 20), (10, 40)], + [(40, 40), (30, 30), (40, 20), (30, 10)]], + [], [], ], type=multilinestring_type @@ -174,17 +183,19 @@ def write_encoding_files(geometries_wkt, geometries_geoarrow, geometry_type): "MULTIPOLYGON (((30 20, 45 40, 10 40, 30 20)), ((15 5, 40 10, 10 20, 5 10, 15 5)))", "MULTIPOLYGON (((40 40, 20 45, 45 30, 40 40)), ((20 35, 10 30, 10 10, 30 5, 45 20, 20 35), (30 20, 20 15, 20 25, 30 20)))", "MULTIPOLYGON EMPTY", + None, ] multipolygon_type = pa.list_(pa.field("polygons", polygon_type, nullable=False)) geometries = pa.array( [ - [[[(30.0, 10.0), (40.0, 40.0), (20.0, 40.0), (10.0, 20.0), (30.0, 10.0)]]], - [[[(30.0, 20.0), (45.0, 40.0), (10.0, 40.0), (30.0, 20.0)]], - [[(15.0, 5.0), (40.0, 10.0), (10.0, 20.0), (5.0, 10.0), (15.0, 5.0)]]], - [[[(40.0, 40.0), (20.0, 45.0), (45.0, 30.0), (40.0, 40.0)]], - [[(20.0, 35.0), (10.0, 30.0), (10.0, 10.0), (30.0, 5.0), (45.0, 20.0), (20.0, 35.0)], - [(30.0, 20.0), (20.0, 15.0), (20.0, 25.0), (30.0, 20.0)]]], + [[[(30, 10), (40, 40), (20, 40), (10, 20), (30, 10)]]], + [[[(30, 20), (45, 40), (10, 40), (30, 20)]], + [[(15, 5), (40, 10), (10, 20), (5, 10), (15, 5)]]], + [[[(40, 40), (20, 45), (45, 30), (40, 40)]], + [[(20, 35), (10, 30), (10, 10), (30, 5), (45, 20), (20, 35)], + [(30, 20), (20, 15), (20, 25), (30, 20)]]], + [], [], ], type=multipolygon_type From 2981b6000da33a2f3f4f639479c88003fe6ba172 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Thu, 2 May 2024 16:33:50 +0200 Subject: [PATCH 6/8] rename csv files --- test_data/data-linestring.wkt.csv | 4 ---- test_data/data-multilinestring.wkt.csv | 5 ----- test_data/data-multipoint.wkt.csv | 5 ----- test_data/data-multipolygon.wkt.csv | 6 ------ test_data/data-point.wkt.csv | 5 ----- test_data/data-polygon.wkt.csv | 5 ----- test_data/generate_test_data.py | 2 +- 7 files changed, 1 insertion(+), 31 deletions(-) delete mode 100644 test_data/data-linestring.wkt.csv delete mode 100644 test_data/data-multilinestring.wkt.csv delete mode 100644 test_data/data-multipoint.wkt.csv delete mode 100644 test_data/data-multipolygon.wkt.csv delete mode 100644 test_data/data-point.wkt.csv delete mode 100644 test_data/data-polygon.wkt.csv diff --git a/test_data/data-linestring.wkt.csv b/test_data/data-linestring.wkt.csv deleted file mode 100644 index a3a320d..0000000 --- a/test_data/data-linestring.wkt.csv +++ /dev/null @@ -1,4 +0,0 @@ -"col","geometry" -0,"LINESTRING (30 10, 10 30, 40 40)" -1,"LINESTRING EMPTY" -2, diff --git a/test_data/data-multilinestring.wkt.csv b/test_data/data-multilinestring.wkt.csv deleted file mode 100644 index 1c5f1ea..0000000 --- a/test_data/data-multilinestring.wkt.csv +++ /dev/null @@ -1,5 +0,0 @@ -"col","geometry" -0,"MULTILINESTRING ((30 10, 10 30, 40 40))" -1,"MULTILINESTRING ((10 10, 20 20, 10 40), (40 40, 30 30, 40 20, 30 10))" -2,"MULTILINESTRING EMPTY" -3, diff --git a/test_data/data-multipoint.wkt.csv b/test_data/data-multipoint.wkt.csv deleted file mode 100644 index 00d926f..0000000 --- a/test_data/data-multipoint.wkt.csv +++ /dev/null @@ -1,5 +0,0 @@ -"col","geometry" -0,"MULTIPOINT ((30 10))" -1,"MULTIPOINT ((10 40), (40 30), (20 20), (30 10))" -2,"MULTIPOINT EMPTY" -3, diff --git a/test_data/data-multipolygon.wkt.csv b/test_data/data-multipolygon.wkt.csv deleted file mode 100644 index 211a681..0000000 --- a/test_data/data-multipolygon.wkt.csv +++ /dev/null @@ -1,6 +0,0 @@ -"col","geometry" -0,"MULTIPOLYGON (((30 10, 40 40, 20 40, 10 20, 30 10)))" -1,"MULTIPOLYGON (((30 20, 45 40, 10 40, 30 20)), ((15 5, 40 10, 10 20, 5 10, 15 5)))" -2,"MULTIPOLYGON (((40 40, 20 45, 45 30, 40 40)), ((20 35, 10 30, 10 10, 30 5, 45 20, 20 35), (30 20, 20 15, 20 25, 30 20)))" -3,"MULTIPOLYGON EMPTY" -4, diff --git a/test_data/data-point.wkt.csv b/test_data/data-point.wkt.csv deleted file mode 100644 index 8164a4b..0000000 --- a/test_data/data-point.wkt.csv +++ /dev/null @@ -1,5 +0,0 @@ -"col","geometry" -0,"POINT (30 10)" -1,"POINT EMPTY" -2, -3,"POINT (40 40)" diff --git a/test_data/data-polygon.wkt.csv b/test_data/data-polygon.wkt.csv deleted file mode 100644 index 0574892..0000000 --- a/test_data/data-polygon.wkt.csv +++ /dev/null @@ -1,5 +0,0 @@ -"col","geometry" -0,"POLYGON ((30 10, 40 40, 20 40, 10 20, 30 10))" -1,"POLYGON ((35 10, 45 45, 15 40, 10 20, 35 10), (20 30, 35 35, 30 20, 20 30))" -2,"POLYGON EMPTY" -3, diff --git a/test_data/generate_test_data.py b/test_data/generate_test_data.py index eafb6d9..f04731c 100644 --- a/test_data/generate_test_data.py +++ b/test_data/generate_test_data.py @@ -39,7 +39,7 @@ def write_encoding_files(geometries_wkt, geometries_geoarrow, geometry_type): table = pa.table({"col": range(len(geometries_wkt)), "geometry": geometries_wkt}) - write_csv(table, HERE / f"data-{geometry_type.lower()}.wkt.csv") + write_csv(table, HERE / f"data-{geometry_type.lower()}-wkt.csv") # WKB encoding table = pa.table( From 6c9390fd05e7d7db642ab520328d5e64c14d070b Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Thu, 2 May 2024 16:37:33 +0200 Subject: [PATCH 7/8] add back csv files --- test_data/data-linestring-wkt.csv | 4 ++++ test_data/data-multilinestring-wkt.csv | 5 +++++ test_data/data-multipoint-wkt.csv | 5 +++++ test_data/data-multipolygon-wkt.csv | 6 ++++++ test_data/data-point-wkt.csv | 5 +++++ test_data/data-polygon-wkt.csv | 5 +++++ 6 files changed, 30 insertions(+) create mode 100644 test_data/data-linestring-wkt.csv create mode 100644 test_data/data-multilinestring-wkt.csv create mode 100644 test_data/data-multipoint-wkt.csv create mode 100644 test_data/data-multipolygon-wkt.csv create mode 100644 test_data/data-point-wkt.csv create mode 100644 test_data/data-polygon-wkt.csv diff --git a/test_data/data-linestring-wkt.csv b/test_data/data-linestring-wkt.csv new file mode 100644 index 0000000..a3a320d --- /dev/null +++ b/test_data/data-linestring-wkt.csv @@ -0,0 +1,4 @@ +"col","geometry" +0,"LINESTRING (30 10, 10 30, 40 40)" +1,"LINESTRING EMPTY" +2, diff --git a/test_data/data-multilinestring-wkt.csv b/test_data/data-multilinestring-wkt.csv new file mode 100644 index 0000000..1c5f1ea --- /dev/null +++ b/test_data/data-multilinestring-wkt.csv @@ -0,0 +1,5 @@ +"col","geometry" +0,"MULTILINESTRING ((30 10, 10 30, 40 40))" +1,"MULTILINESTRING ((10 10, 20 20, 10 40), (40 40, 30 30, 40 20, 30 10))" +2,"MULTILINESTRING EMPTY" +3, diff --git a/test_data/data-multipoint-wkt.csv b/test_data/data-multipoint-wkt.csv new file mode 100644 index 0000000..00d926f --- /dev/null +++ b/test_data/data-multipoint-wkt.csv @@ -0,0 +1,5 @@ +"col","geometry" +0,"MULTIPOINT ((30 10))" +1,"MULTIPOINT ((10 40), (40 30), (20 20), (30 10))" +2,"MULTIPOINT EMPTY" +3, diff --git a/test_data/data-multipolygon-wkt.csv b/test_data/data-multipolygon-wkt.csv new file mode 100644 index 0000000..211a681 --- /dev/null +++ b/test_data/data-multipolygon-wkt.csv @@ -0,0 +1,6 @@ +"col","geometry" +0,"MULTIPOLYGON (((30 10, 40 40, 20 40, 10 20, 30 10)))" +1,"MULTIPOLYGON (((30 20, 45 40, 10 40, 30 20)), ((15 5, 40 10, 10 20, 5 10, 15 5)))" +2,"MULTIPOLYGON (((40 40, 20 45, 45 30, 40 40)), ((20 35, 10 30, 10 10, 30 5, 45 20, 20 35), (30 20, 20 15, 20 25, 30 20)))" +3,"MULTIPOLYGON EMPTY" +4, diff --git a/test_data/data-point-wkt.csv b/test_data/data-point-wkt.csv new file mode 100644 index 0000000..8164a4b --- /dev/null +++ b/test_data/data-point-wkt.csv @@ -0,0 +1,5 @@ +"col","geometry" +0,"POINT (30 10)" +1,"POINT EMPTY" +2, +3,"POINT (40 40)" diff --git a/test_data/data-polygon-wkt.csv b/test_data/data-polygon-wkt.csv new file mode 100644 index 0000000..0574892 --- /dev/null +++ b/test_data/data-polygon-wkt.csv @@ -0,0 +1,5 @@ +"col","geometry" +0,"POLYGON ((30 10, 40 40, 20 40, 10 20, 30 10))" +1,"POLYGON ((35 10, 45 45, 15 40, 10 20, 35 10), (20 30, 35 35, 30 20, 20 30))" +2,"POLYGON EMPTY" +3, From 3466e1ff37cf7d1577dee943eb0199f3fb08b128 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Wed, 8 May 2024 09:16:18 +0200 Subject: [PATCH 8/8] properly specify mask when creating the Arrow data --- .../data-linestring-encoding_native.parquet | Bin 2023 -> 2023 bytes .../data-linestring-encoding_wkb.parquet | Bin 1474 -> 1474 bytes ...ta-multilinestring-encoding_native.parquet | Bin 2216 -> 2218 bytes .../data-multilinestring-encoding_wkb.parquet | Bin 1807 -> 1805 bytes .../data-multipoint-encoding_native.parquet | Bin 2027 -> 2027 bytes .../data-multipoint-encoding_wkb.parquet | Bin 1621 -> 1622 bytes .../data-multipolygon-encoding_native.parquet | Bin 2417 -> 2421 bytes .../data-multipolygon-encoding_wkb.parquet | Bin 2276 -> 2276 bytes test_data/data-point-encoding_native.parquet | Bin 1835 -> 1835 bytes test_data/data-point-encoding_wkb.parquet | Bin 1398 -> 1398 bytes .../data-polygon-encoding_native.parquet | Bin 2195 -> 2197 bytes test_data/data-polygon-encoding_wkb.parquet | Bin 1862 -> 1861 bytes test_data/generate_test_data.py | 16 ++++++++++++++-- 13 files changed, 14 insertions(+), 2 deletions(-) diff --git a/test_data/data-linestring-encoding_native.parquet b/test_data/data-linestring-encoding_native.parquet index 32653ef94bb29fab1ec39a36a082423323fa53cf..264705df6fba8cd34a5d101feb41f118b5f76a22 100644 GIT binary patch delta 49 zcmaFP|D1n=Eh8htWIM)ZEDTJ{Dw9t!mu#NHWX39LBxxhVzyLuE&a4a!0gge207B&o AZ2$lO delta 49 zcmaFP|D1n=Eh8i2WIM)ZER0OdDw9t!mu#NHWX39LAZa7RzyLuE&a4a!0gge207C}~ AZ2$lO diff --git a/test_data/data-linestring-encoding_wkb.parquet b/test_data/data-linestring-encoding_wkb.parquet index 276e4184eda51af7ef65e67abb4c7d54ba3e88a1..fd8b1d47c38c71228495b1426dd2cd850c270c43 100644 GIT binary patch delta 26 gcmX@aeTaL*238>>NgWvm1|VWM#>~JF;22~G09#fCo&W#< delta 26 gcmX@aeTaL*238>hNgWvm1|VWM#>~JF;22~G09!HzoB#j- diff --git a/test_data/data-multilinestring-encoding_native.parquet b/test_data/data-multilinestring-encoding_native.parquet index d0b558dfd8e15e53852f94ba5b13df46ad2fee5f..2bb58225a1020186e7b0eea4e0cd3867bb0223d6 100644 GIT binary patch delta 199 zcmZ1>xJq!sUuFYQlSz!`vJx?@3=9mc8nd_XX5)PbHg~L2be0047r|kOoGzPwZ-oHup10GcsW_&6Np5@#KCc mdrq-$tYRE$9Afu2Z(@1KENUcaBg4P|K@69G&J1u2G6Vpg@G5Qq delta 183 zcmZ1_xI%EkUuHc~qe+bB(&7=U3=9mc8nd_W)?;!))aLn2DJr~*D{;3v@nVN zncTxzfLFarHZz0ToXNT@rd%NPjA|d))fQ}?$Ry3kh+>K>6NakE6PfHe#Xhl$aj0>K c-P*i`*bqpgYnA5oBC`#E)Lxcyp#DEwO9l_1tliiF`j1ZnViEc!}5YfZN}st=FOAu jGWDSf?Pff+*@I;(laP_5jtm0>5HV=6Ffar-1{neXD>xb! delta 93 zcmeC>>*t$rgYm}1oBHe=E)Ly{wv3bc7`0d#Oso_bCfhKcW}GlNi&=){4U5{G$z9Bw lC*Nl3LlxT1cy6;B%T^{K14$hj1_mHv&|+a=2yhHC1OQC$8ejkb diff --git a/test_data/data-multipoint-encoding_native.parquet b/test_data/data-multipoint-encoding_native.parquet index 9bcbf13a1abd0755e7ca77d37d75d21f2ca941d5..b0435ef6c99a7a91617e510dd3a22156acd2a695 100644 GIT binary patch delta 42 tcmaFO|C)b;DnlADlxLiDlw?=DzUgY$ci$!@QO0n>4`Ehm;f0jAch^UsD+Bd zWL3tQjDIHYXOv;N!L0Ud^L<7kMkM~^`;0A|Vtd%cIMg`Ac5R-|yo5=}NK!|JfdPmZ NUNJK;1ULp60stkb9svLV delta 93 zcmcb{bCqYp4aNl%Z>qBLurM$%Ok$L0mP|C5tj0K#@z>-7j4~|OnAM(ae!wWih{T`# ofU$*BY!90lhZ=|2j?D|0moNz#Nb1NiFaQz5D`p0U0LLIh0DILN9RL6T diff --git a/test_data/data-multipolygon-encoding_native.parquet b/test_data/data-multipolygon-encoding_native.parquet index cc2b01f9d50d8121109f5f4c86ce2c2b6e8eb93d..43a8d2f9dbc693371bc448bd14e87a509c7d3c27 100644 GIT binary patch delta 281 zcmew;^i^ns03)B5sGp9gji`Vqn<$%%#N>%A!jt=0#I+e1R3~sUFfefb`TYI-{r~?M z*nk`h7G_2U24>Cjnv?%9y6^>w zhGDTlnt?%c0S*f$Ph=8iI>$9xm&ID3Qj|%|M(h`p*d->le;jImHh*NYVr0Z^rxq@a plRq-qbBY~f7voan5c{;*mzAGI)JW1shJgWs7%YK43~&rG1OOKUN+c$AX20 zk(slDtsvu>TaQ`51O^7Rdy@@VoLJ5Pc#U{96bgG^%+>rVc`@|R1h>$ zFmMzNGe|1Z0Gf4-YqBAWwP2+vlbDUzHzu)jOlrS4)PAr79po_CjQJ-cie;|M_%zsY ZitS?;<5J@gd$l=`m7j&tU~(d-2ms!TLG1tl diff --git a/test_data/data-multipolygon-encoding_wkb.parquet b/test_data/data-multipolygon-encoding_wkb.parquet index 77d5a5523c74fad00836f1ba7cbad60cb97a47f2..538b8f4e64705c94988a8fc10636f8d77d17ad68 100644 GIT binary patch delta 26 gcmaDN_(X8SB@Q7YNgWvm1|VWs%)-DB;22~G0A&va@c;k- delta 26 gcmaDN_(X8SB@Q72NgWvm1|VWs%)-DB;22~G0A%Y0?*IS* diff --git a/test_data/data-point-encoding_native.parquet b/test_data/data-point-encoding_native.parquet index f9d3fdf66aefa928c8dba74f6bc05f94da6f0668..4e6489aa576363b2ebacf69ac49763a5fcc0c8e4 100644 GIT binary patch delta 103 zcmZ3@x0-K*G$SL^WEn_f delta 103 zcmZ3@x0-K*G$SL!WEnrJZ8OS8c%fP^;!oc;&0}wH+U}j(la11g80A3;mng9R* delta 26 gcmeyy^^I#o3agNTq>c;&0}wH+U}j(la11g80A2nCm;e9( diff --git a/test_data/data-polygon-encoding_native.parquet b/test_data/data-polygon-encoding_native.parquet index 344168ee764ff558e48fc8362f419b7353f6a2f7..68d1aa67ca4f3d1b29c4205a4b0eedf68eb2f02e 100644 GIT binary patch delta 200 zcmbO%I8|^17bCNYsKsPnMsqo-6jlZX2G$jeW^sWSGR(}349u(@Y}XcZFsdDxoXBX( zvWrP<&gB2hu9NRGniA0G!J@;Y&7^j1ayFAG_bMi_X-sNMIMfcXPhQCAF!?ppEk+a* q-59YcwBr;z$0o+1#vwLk^I4XS%%Vn;HZnk$ff2(Zpeq9$gA4&#P%q2? delta 204 zcmbO#I9YH57bCNgsM%y*MsqpI1XczH2G$jeW^u6q88XZ)jLfVZY}XcZFskiiot(#{ z$5kQ9B&Hy?gGp@GWJVU($qyJ!VY+3p>h@sKVbW$&yEHkM$&`Bqlh_m{wM86i``9Nd uGU-e{$0#!SEz>O~Y-YPL;?g6;DRzoYj6;n>Y{KUAEE|~_4JJ2ohyVc2xi9ko diff --git a/test_data/data-polygon-encoding_wkb.parquet b/test_data/data-polygon-encoding_wkb.parquet index 45d2cba07e53d7bbf0616f9a879900142cd5e0b8..cce77baad2acf019d9d89d6509076f14996e6692 100644 GIT binary patch delta 92 zcmX@cca(3!4aQd!Z<=zkEM(wKWMN{EP24QWD8R^gZn74W49hB3wRM}ln8K0ylf9Tu ma*Cbd6ys3i5bN1Ig=GPgkddU03tZEx3A7nl`S%|q0 gRp=sP-{#3I3z&oqBz0sM7=VaDn}vZPz%j@W00|Zu&Hw-a diff --git a/test_data/generate_test_data.py b/test_data/generate_test_data.py index f04731c..0e50a14 100644 --- a/test_data/generate_test_data.py +++ b/test_data/generate_test_data.py @@ -12,6 +12,7 @@ import pathlib import copy +import numpy as np import pyarrow as pa import pyarrow.parquet as pq from pyarrow.csv import write_csv @@ -74,7 +75,11 @@ def write_encoding_files(geometries_wkt, geometries_geoarrow, geometry_type): pa.field("y", pa.float64(), nullable=False) ] ) -geometries = pa.array([(30, 10), (float("nan"), float("nan")), (float("nan"), float("nan")), (40, 40)], type=point_type) +geometries = pa.array( + [(30, 10), (float("nan"), float("nan")), (float("nan"), float("nan")), (40, 40)], + mask=np.array([False, False, True, False]), + type=point_type +) write_encoding_files( geometries_wkt, geometries, geometry_type="Point" @@ -90,7 +95,10 @@ def write_encoding_files(geometries_wkt, geometries_geoarrow, geometry_type): linestring_type = pa.list_(pa.field("vertices", point_type, nullable=False)) geometries = pa.array( - [[(30, 10), (10, 30), (40, 40)], [], []], type=linestring_type) + [[(30, 10), (10, 30), (40, 40)], [], []], + mask=np.array([False, False, True]), + type=linestring_type +) write_encoding_files( geometries_wkt, geometries, geometry_type="LineString" @@ -118,6 +126,7 @@ def write_encoding_files(geometries_wkt, geometries_geoarrow, geometry_type): [], [], ], + mask=np.array([False, False, False, True]), type=polygon_type ) @@ -142,6 +151,7 @@ def write_encoding_files(geometries_wkt, geometries_geoarrow, geometry_type): [], [], ], + mask=np.array([False, False, False, True]), type=multipoint_type ) @@ -169,6 +179,7 @@ def write_encoding_files(geometries_wkt, geometries_geoarrow, geometry_type): [], [], ], + mask=np.array([False, False, False, True]), type=multilinestring_type ) @@ -198,6 +209,7 @@ def write_encoding_files(geometries_wkt, geometries_geoarrow, geometry_type): [], [], ], + mask=np.array([False, False, False, False, True]), type=multipolygon_type )