From dced61cac91867b3a1218381786cc43bbf245650 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Tue, 28 May 2024 20:00:29 +0200 Subject: [PATCH] Add test data covering different native (geoarrow-based) encodings (#204) * Add test data covering different native (geoarrow-based) encodings * Update test_data/generate_test_data.py Co-authored-by: Even Rouault * fix geometry type * add .csv extension * add null values * rename csv files * add back csv files * properly specify mask when creating the Arrow data --------- Co-authored-by: Even Rouault Co-authored-by: Chris Holmes --- .../data-linestring-encoding_native.parquet | Bin 0 -> 2023 bytes .../data-linestring-encoding_wkb.parquet | Bin 0 -> 1474 bytes test_data/data-linestring-wkt.csv | 4 + ...ta-multilinestring-encoding_native.parquet | Bin 0 -> 2218 bytes .../data-multilinestring-encoding_wkb.parquet | Bin 0 -> 1805 bytes test_data/data-multilinestring-wkt.csv | 5 + .../data-multipoint-encoding_native.parquet | Bin 0 -> 2027 bytes .../data-multipoint-encoding_wkb.parquet | Bin 0 -> 1622 bytes test_data/data-multipoint-wkt.csv | 5 + .../data-multipolygon-encoding_native.parquet | Bin 0 -> 2421 bytes .../data-multipolygon-encoding_wkb.parquet | Bin 0 -> 2276 bytes test_data/data-multipolygon-wkt.csv | 6 + test_data/data-point-encoding_native.parquet | Bin 0 -> 1835 bytes test_data/data-point-encoding_wkb.parquet | Bin 0 -> 1398 bytes test_data/data-point-wkt.csv | 5 + .../data-polygon-encoding_native.parquet | Bin 0 -> 2197 bytes test_data/data-polygon-encoding_wkb.parquet | Bin 0 -> 1861 bytes test_data/data-polygon-wkt.csv | 5 + test_data/generate_test_data.py | 218 ++++++++++++++++++ 19 files changed, 248 insertions(+) create mode 100644 test_data/data-linestring-encoding_native.parquet create mode 100644 test_data/data-linestring-encoding_wkb.parquet create mode 100644 test_data/data-linestring-wkt.csv create mode 100644 test_data/data-multilinestring-encoding_native.parquet create mode 100644 test_data/data-multilinestring-encoding_wkb.parquet create mode 100644 test_data/data-multilinestring-wkt.csv create mode 100644 test_data/data-multipoint-encoding_native.parquet create mode 100644 test_data/data-multipoint-encoding_wkb.parquet create mode 100644 test_data/data-multipoint-wkt.csv create mode 100644 test_data/data-multipolygon-encoding_native.parquet create mode 100644 test_data/data-multipolygon-encoding_wkb.parquet create mode 100644 test_data/data-multipolygon-wkt.csv create mode 100644 test_data/data-point-encoding_native.parquet create mode 100644 test_data/data-point-encoding_wkb.parquet create mode 100644 test_data/data-point-wkt.csv create mode 100644 test_data/data-polygon-encoding_native.parquet create mode 100644 test_data/data-polygon-encoding_wkb.parquet create mode 100644 test_data/data-polygon-wkt.csv create mode 100644 test_data/generate_test_data.py diff --git a/test_data/data-linestring-encoding_native.parquet b/test_data/data-linestring-encoding_native.parquet new file mode 100644 index 0000000000000000000000000000000000000000..264705df6fba8cd34a5d101feb41f118b5f76a22 GIT binary patch literal 2023 zcmcgt&2G~`5MJlUjUtc=Vk1Y&A&OF~hqNSBQ?*r9Ws>yACH)J~q(P`^QWG3GKR8Yk zQ+ngVJMaV?c>F~= z%M`iac&@jPc>v2hhvHg`d(plnExB5o6lUR-?GP9Z4bRrc`^em%& zY&l2x%@5kbuZIr zVWVx@ht;iYb#CXhzIkA~m@V{QvTN0{qdye5R()=isaLWuwN|&OJ3rVLG(4kL$s(rQ ztG64*t^r1U!yMGAJ86tRs5v6WWz2)6v|i1d4F~VD-Zvf*_Q1n#ojj{%mWYXflp6Cy()sW)hrb(3DRMK&R2?+p9 riT94J7X#g%Y4mzC$ClL{#>ZbSG&(1|xVvM;DD+C$a!(aS&=>U*E literal 0 HcmV?d00001 diff --git a/test_data/data-linestring-encoding_wkb.parquet b/test_data/data-linestring-encoding_wkb.parquet new file mode 100644 index 0000000000000000000000000000000000000000..fd8b1d47c38c71228495b1426dd2cd850c270c43 GIT binary patch literal 1474 zcmc&!&2HL25FXpqf+&%asJ3j0140z#Kq!zp3Q-iX2HKdAKcWIj)ka`k#}NiwFMf<*uY zV2<11U~K%vOCa5qloWF2@fD*=c#0_D2?bJ!V^tj3|*I@3J`4A$<%muQIl!0NHX#p)Zj*@DUA`Z}2LE zaE->M3^xcF(Zc9i5%h$V=s`dSK?d7j4H_HHyY%^{jlpQ#6J~ zu`)8qRRi}pZfMA< z4ce~h>^a1#XZ)h!jBLa0blbUXx3h(Ntkg8~QJvLj1eKjuL+d_XsRihJMmL*(-pt^E zX)8MGhYCk_ODN(RPReB_LXifkK2xJj(taoB9V?0W0TZ4MC^ptRBKnAlLzAa^>*SQx zG3m(mW6QCWN4flv>WjK!zDdSgQvy~*10^4TB#2dwt8}Wb?mKrA}Enufr1j}Z#&Mr_` z8`hDpJ@wXG)k78aFX|uAL)BA`Rn=qFnfCz4YLq65qKshQyqWpty>DjL=00oYX_OY} z_8wIz`IwLtA!`zm)|6eD2#L&BO6UgFvJkgU<20Vsl!ZrA<{?BSo3F5fcQu3Q6iJT! zLG%|=l138-iLWQLh&A-6tSagYN&Q{YJG$~Vi4$44J)tsz$2u&8{D)0Uiry*4R-z=c zrerEb2m#qH0w71Xvs7EM5uQqM!6s8wqnmSPFIyo_`#J{C*yfL`1eJ|QL?Uvm|A|cW z=dmPR72KXEjy?44U>rt0cM`1Fp6%O%;L=8#0eMaRPFDYs^uHqdFA<>s#y5F0`Wa6D z&ikgHc^V=5&sW}#e)tmoKUehUEAL7_@smNc_{4f*`&}A^akwh;86$?QE&D-b8sFQ6 zG8<8iw*y88V2cj@pBvJyUL~3|o39~Jl|(YJ`(6k8Lr>O+Zms%vOjUG6{VldZNFu96 z;N{YOke;Pa?eWAN4$`+%>HJoHE0@lu(xb8KcgNu)_~UZobJyZkM6a+;0P!rnwByDe zSi`2k=Gtg0XE@a@gmLIjrn(Td8!(KSkr+rN6&5{Z?odywjd*-ZabRZ14E)Nuz8|%yNBd`Cf3?sT-Zb{%Nn) z3PemD@H@d#ry1H0c-(QXFw6Ja#+i9MbnNhw|E39N9JLLIX^win<=%6d+v~I@{i-+V zwhhm^(F|c7nC{du{l3>L#}tuMjPb9FlToyP~f zhEOytgQy3V7Kv6$e9wyj^#I|eIz$Z%&#D*a;g;3U0J$@r;_w=l56Z!Y+76^(*V z$v`bUg;2yh2%Z;(S)%W7_ytG|O%*|+UR>j9&B6JDOCI7X8!qKDqNPUN@wX>-aLpQxu64)b;WRaWhf{fDE4Nih>?R2jAMz{w KgS`*GQvL-K>Bryz literal 0 HcmV?d00001 diff --git a/test_data/data-multilinestring-encoding_wkb.parquet b/test_data/data-multilinestring-encoding_wkb.parquet new file mode 100644 index 0000000000000000000000000000000000000000..08f9f0d2d0fe6c6d8525f32c00119808fe517e6f GIT binary patch literal 1805 zcmds2-EI<55T0ch+%_dChO=1`E(k%=7+L{K(>Ar?0R7pP{y-J5#x$_3bSr>Jcwu53*fTrxeKTig&MfTHVuDB{P3H4NCU74h z1mG1w@X9MLutgkc0>n?$81klxhj@aj>>#QObSAyV1gzYNWTQee)NSdp$o*&tccJN%92TC>@*W8Hc=+%+! zXvgm)a{Q6BI>8@il*v92@t#YJK=i#FBvYdUksC&*Wmvr-Kb32b@=c0l}I=l4_tiQfK1`txK2>yKrj&dt5f-3c3Al&D{@p;zIgojcEnT> zCr5oJT!hbzUf=Ar!^@#?VlFWk5642`ZqIC0d&47q-A`K_a^{SowzrQF7vZs=8Ae<0 z)XjE-M`=m%gakpr)aUbS6;RUK>aR0Vv=lR zD5V9wm~2xzPcsd=PHBT?8{C&c!cX{sIoHN`o|Di<;Vh18lnyAyq2@ShN^@+XJ9uT< zhvh|j%&41vv)(EV^ct#;CB7{g!i1r7H&ET@oD?ABVkxo1+d6t}XPIcR9Ddb5^H zH0ztV$Kzs3-zf1M<>A(Dy?h?8K0C*l(o@8FY^9P|dR3rVR>=Wdc+t56Yb6>P)+`iW zw0+BMUjJdTbXF^u;(GfvcOs5`<4r5j3{Sd2sc!el204!wG)~&&mU9$nu>NgeUwEGf z?4gjtw`NStIF=`%hPu_>n^VJ@)w|uu9(t&H>dbq96E#v&MadKU{>=Pl-u!0fiG64ld4iYu z#tv6GyUJLWu@!|WE9zE^$q}EeobhX1FCcD}r+6x(t8xNk(<&~E{fA9bN!%+Zml7<$ zqULL5gy7gHqd|@v1%740Mrvy1Ih%Z0OWc?-dsB+IFUxdQJVObH|C$n{eokiivLsbk z`_9mJ!cmm)+;O<%c#iK3LgfsJUeW$mv_BQ&uek9`9I1Zrtsh1;FR0d#>btG$|CaoU#uTam(8Q0%Hvk(@ z>B9qnK2-ZcR;j$DpOWzYLccRVqf~eiXA58oXostCf-69cCfLQVGB`T84KCs9W4GtG zBfI3Cc3tb1RkhA`PVP^3tJc`6Hm0`kg@>Jn*(p6d?bh3B-;Gd$|FBIjFyJL#6r ziroYAxpgw^JJDPETNd7NbYLQ;6?A>u-E+b1cG~0Kwm0pTyh%^qb?LLcx$bl-o*iL4 z5613y65Ev6M(NXw*PDgd@=5=O*-7Qe_@tx2T_ z=Jv3DNXQ0F(w=GrZJMw+ID{*Edo$<`TF>!}_ctlu01r>l({0+-L{JrRf!LBzK%rvN zE;^mX+b83uXaoRT)(M?m8tn=vGJdfoa~WE_Paz@I!&zu3zx0PD@9@{Ogp>NhiP%uf zATzA9NS$a*gb2O_kWAq2U8ZM)ddVM}S1Lh}SW)@6Mz|<_5nSkM}0yTT7nD>Mf?)Zc-iu~{g$-+cPsbP0o zw%Z$XH9v{+XCd-kjJy|u|Fpu}ZbaQoBgNmh&g@o|h;T&@eEi-KS%A?^;YgO$pc?(F z&*T4)O#bMXrHCYdm|H?fjVc1%t@0XQYD2r%cRFotBcvr_iCA2VhO}J9pY(gw{nIHp6u3_oIE;f61A3g%#Z$a#a( z5laj`PXaxqdFJRYG`)RnuF(dgw)#%JRUTW(=CI}%t47YaD4rJvyE&t8+GyTdq?=j8?r_OD3B2E!g96DQ!J1^Bm@Qd#`R@#H(d}ST)meqnTKLQHevr>?k@v z4-<~=nRERAs3!G5F7FxQGLY4VT-4F>-3E0v6$!k z@u?@}QF6V{^aWjEy+ysPSpZ}NLrSPy?Y$h>?nA5FeOT@FI-}6!8HN(+SUi?gqd|n= Mhpyp+Z~$MFpDD8(X8-^I literal 0 HcmV?d00001 diff --git a/test_data/data-multipoint-wkt.csv b/test_data/data-multipoint-wkt.csv new file mode 100644 index 0000000..00d926f --- /dev/null +++ b/test_data/data-multipoint-wkt.csv @@ -0,0 +1,5 @@ +"col","geometry" +0,"MULTIPOINT ((30 10))" +1,"MULTIPOINT ((10 40), (40 30), (20 20), (30 10))" +2,"MULTIPOINT EMPTY" +3, diff --git a/test_data/data-multipolygon-encoding_native.parquet b/test_data/data-multipolygon-encoding_native.parquet new file mode 100644 index 0000000000000000000000000000000000000000..43a8d2f9dbc693371bc448bd14e87a509c7d3c27 GIT binary patch literal 2421 zcmd5;&5t5Q6t8Z2m852Hn-BTM z%_MuAgC=wK@*j-A-)mBnR578ejCq2Lv( z5Pc!YgLiWd$4M3ie&xJa@WhaF-6K2no`reIEuC@FPh8;LfcS4RdqPkG!aM@E4g*3k z@8ePdzr;$~LuiE)?j~1O1(bopcHn!{B?^zTE-{G0Ng#l2B!QmChasFeFOCHsXOqj~ zHj9n!XNkDh$6S=5ZoGMnvmH(*Ntxun!nBZD})%UXEix6CiXoDcA_O<&Os?C{d zPk8A&KJZiwoULA0HP06@Bs+)tL!;Bc0(9FZ&srG8c=`NJF^`>v1l&$cklB?&X98_` z%hwFUuI^1FbF;-bN0Qs)^(6k+*Xp@y-sh?(q_0*bcR-YWc5Wc#3CRw4v*nMuNB)U1 z7@EDV|BlZe4M)Qff5_+W56n(uFn<6)dgBfYEq@1S)LtC|IBZN1;W g-53mdQ{Uo^_eEpjNLcYCya?GJ6o)?rCHTYj2gm&i!2kdN literal 0 HcmV?d00001 diff --git a/test_data/data-multipolygon-encoding_wkb.parquet b/test_data/data-multipolygon-encoding_wkb.parquet new file mode 100644 index 0000000000000000000000000000000000000000..538b8f4e64705c94988a8fc10636f8d77d17ad68 GIT binary patch literal 2276 zcmeHJ&2Ji45TD1c=|a2;ui~Ml;@}_`p$b!=V8xal@qpM~$4(59z@@ERzJk}~s~8*8 z$dXk>YESL0*GlcZJ@jDp)PJFWM5JDPYO5Zq9y;$Wi<2~$9D2)$eX}$3o0&KB-T-^N zlw>N~V{1Fi%iv9bD1c6Y=o+|mZA}52)G24c0_?3AVtvfb+#y4AEQU6PKmejI(W3Q6 z)P<{;D9V=`5c!jYm>SH0+ZQyHM!&;c0dL?R61XCfk0aWvL=MKV1Yj<}#fJ<4<=-{- zjk3cuS=7&}{}HN*o6Hdv6f);SX$~k+?_@n*Sj1`zt5`sK)dw_6jbIjjA|H|?a1jqZ z0@ZVXWwLklRrO+{S_S$!t*YrP+YMeqTwe#juF(wrmTG#JNpJNHvUIC@3ABp368(j0 z>_-}fg6EQyZ!r&{>%jF(`Zc|xKB`ee|BZF^Evji3JX=2`bdmi**I-3|LYueKS&!U0 zE`qQ|KQpdRVA&P@!bRL@5rq?RU<4oMSO5xH6p0a%Nea_A4#%>uH|+SlfPwflm!n^KA$K}!cw{5MChwxyyK>W| zJcY=wH$rTFdfT*itKV&nh7)<^{?-HEX_2Sy$PY^Bbu+J<|7fa?#$l^l zV=9i^+b%iBz~=F7)?)~GgP~yTe;xPNr#l@5XEa`IRQ`Lqfxm7Byb&$%mDdjtj2Q|} zjPZm#4xhG$$5y`=UR?+$j;99swvhE}&WoE+eH_(@lqocR(~TYHQikHa&)5L&%P zziIW_B39*IPGBAT!DupQ36~GTyC^-$uD>Z!A2y^{INJnFYwljrQqCg!MCGKeV+>fMI*z~WXpG0L0PxNUZu zoq8(SX>MVT_ez<@W?9ClOt$YgD`$yXIfr>Wy?QEb?e8a79@(A9i>*Hti!8cO6s<%d zCz^#Mi?&~-EbFhP%BS^8Inn5SB9&;zzR7?^o-1+;y2*LN?vrhlI9@a*XiG}=2IQ+~ z^(OX3_IWJ)5h7CyGe(@zIv5Ls+VInp)@Z3Q7%bI>!~S?-dMOu@nRp_e3dTYJ_<`^7 KCuI-+-~0=8YhL#N literal 0 HcmV?d00001 diff --git a/test_data/data-multipolygon-wkt.csv b/test_data/data-multipolygon-wkt.csv new file mode 100644 index 0000000..211a681 --- /dev/null +++ b/test_data/data-multipolygon-wkt.csv @@ -0,0 +1,6 @@ +"col","geometry" +0,"MULTIPOLYGON (((30 10, 40 40, 20 40, 10 20, 30 10)))" +1,"MULTIPOLYGON (((30 20, 45 40, 10 40, 30 20)), ((15 5, 40 10, 10 20, 5 10, 15 5)))" +2,"MULTIPOLYGON (((40 40, 20 45, 45 30, 40 40)), ((20 35, 10 30, 10 10, 30 5, 45 20, 20 35), (30 20, 20 15, 20 25, 30 20)))" +3,"MULTIPOLYGON EMPTY" +4, diff --git a/test_data/data-point-encoding_native.parquet b/test_data/data-point-encoding_native.parquet new file mode 100644 index 0000000000000000000000000000000000000000..4e6489aa576363b2ebacf69ac49763a5fcc0c8e4 GIT binary patch literal 1835 zcmcgt%}yFo6h6a%PHas~<4lHx1tBbH4AKd%tt;gbc}rM7tOjS5l%N!Xp4T zz&r=sys&8l%lOeOg19Kke&j8PGvbU(7RHDyj3^+1Z;BVuIWJ8oa5nxsD3_c|v^yg( zv*46@y;&1&k|14j(l<^iD#Be7JN0Fp5X*eP2Ot zc5K^{jok%^*yQUg?Fj`!fuP6l_O#k&z0w};;}>~cXVCaYQ`GWyFyh2>?WZqBLvJ3K zjVjI6YMPBMWse#A-C@h1L9aX+hP^pGJq7t8$>sJ68&><}EBj+=YtUI?~3Bj4cgHO}A;&aG}y*fc1kfc7P22P5uJqv|G~v literal 0 HcmV?d00001 diff --git a/test_data/data-point-encoding_wkb.parquet b/test_data/data-point-encoding_wkb.parquet new file mode 100644 index 0000000000000000000000000000000000000000..94b87d16e1b3d66953072f9f0699774a759e76e5 GIT binary patch literal 1398 zcmcgs&2HL25MKWjIcXxLRo1d44#=V?2SPCoQ4>WGi<1Tu(g0O~QdJQchdRPQ!GzEV z!TfGWq%u7RdiE;+O#duMm%o7w%owVWqK9g8@H*K%0G=pjNO zgcdo(ElOKF@=cg{jPNp6qJUe%0UQV_(lnz;69|&g$}LgI&WC9`iQ|PID00OGv8X0d zU`bU3tJA`~EXh}#{GE$bBGO$Nt5G;T!aRau2^>PO{>b4<4vt9a>?;_89EToo;&y6o zUJR!;RU}?t%&9QIG`y1n0N76IZ-VRm3Wmz5U0?XK>kH*(csu2X!_@mUh5Nh^Mv=>V zLA)@bDK+hmYkR#BAVXjF1{t1khL+pKGUfukUo)+Ga@8V|pyEMMGM*=eV`r z=m@^Hfy+|U3!zy*+J}mBZ5F2syKQwEPP<8amG{#WJIx)EN}W(p?nh_Q%jIEA*28=wS7X?NV-X$5z-_^lf}{ixRD>y;O;D+V;ES2F%-+2 zgbZn8@T>`VLU!mt4&Y6EY@wcL4H`zaaa_?i=*bzCUB-v;jpso> zO1Ds8%o)j9`X76`@x}+k3F03Agm$WJ9ouPV7lXLsxqsk;(!oYSt=nwAhIJjVgO^-g~Xu^V5%xL%XYJre+6vV zMwTM=(o;`W4^>WS4pl!wPx%Br<^xncMg0(Y^A>QBTvbsO<%ylynfH5d=J%VoyBzUq zk|`|9(q%>&`H+w>A)6ABHtC*BJoC+aM%Y!R#Q?X(0xS^H=)$AXIfSTW`;uAkZbU^o zD#^aTiT+9oF(sHGfvuqCGY4HJt5kg@seeg&L#J<(I2eQ76DAXAwxA;f#zS_u%sxOi zJB0Wp#>r{EdH)*t(1w2?h3(;w|6Jy~eHFP{FW*5|=SykEgXs*lx z?xVIJTK{j)e%TN=A)ikoKK~Dam%ma;f8h_Y4Ug(0YFmSzH5s`|*BMXNtgh9w`jeG` zGy^Td>L0TDRMvk~^yfZM1CkQ5vg*I=f@&mCjp&6MLW$x!-6nUk-Xi>6 zNKsN^^2f6Na{cY3a8ZiiW%XxSe@^umYyVe@;L{Az0%P-$)jMGdRLV6O%a3GaZFx3K zW`Xh^sF6`LJQ}Df2wR-if4L#;#xy?0vk@4L$0_tC0<22`q3^{2p9 zLV_{P2M?O|z4SP8W{t+qpdYy%jwIvBcp?%DM}{M(cQSGx!h7emhpxqADZJk77$P1= zmh)m@_02)s>DwaLaL{#aNDKG{=HbL0S|aR=$kE*U<@xzJ(XQ}n^QTpPL3OgnMUgFS*ui=c$hoYRoD6qah+CbmTc8C-x-~OZMiG{TL!GrtY-jI9kzO=bI;+9d9XXS>TR*6Smz>N zZ8u>xa$VDfy%fjJgC*{;{dFU`i^!pVMjdP(U>I1FN1H=SF{>@bF ztW~ci%>FlGjt`C?^ycUDc<#mL7Wx7J<55&h4O*NwZwmbi6$rY4@d!^Gi+pf+V2HM-*XFC=7U|z7(KEw>wehAGy=Z#E&e=KNH{Z-TbFysHT!P3XO{O-8Lf{cV z5WpvZ;8W&B=$Yht0%VNnQRMl_5E%;SN*~b`mjW70JP<{6B+TR#L6m+#j}*+{{V z-_WH>qeetc(QXCpMu-$6%AW`uQ9L~m5peLM1Hk!bncNB+ct)1bM{#Tq!-6Du{OCQP z3i3kQI|5;!0*iBa@~JsLs1vGgq~$B{m1GgdOY$Z4QQxe5x$O0;iVQKYpE$k=wKSdv z*?W!K1k?*PtSYLmE)r!175OGOF#Ek!)8fRHAh2?=BYGHx*(FVL58Y3iWPG8WyE6M7}8jGV0!#Qnea&tJ8A$ zwcQMB*Ph51Pvnyn`1jI$sDF(46zD_k${VX*A~H7AW07|$kWqXaWDlBR1dQlkZRY=| z?%G$crbHC&!|)h@5!EH^GX4E|=+tVp?Z$CvE*MJ060vwF8VogCcD>Yc_VLqkQs+>= zB^la_wsFOIXb@);>$uW5u#c-utFWG7+&;eFaheue@;bELs5w>KaPQ*c0`yUu%WW0r zdflLZm!2|9(hQ|^2OnnZlupr1l`d0SrP(TvWsvX--etk10lvXGI7j0wj+>NrnJ{`T zf}YYvHqcFcGRM1x8G6X5t8M$Bo_8wA+G*J~r_HQ+wsEv{vY9p8W_GPxsn|rt9OJ#}Y!}E=AWBphT zJ3Bmcp5&lZlx7j4D|h;A%8G<_;p=gD$waZ>> import json, pprint, pyarrow.parquet as pq + >>> pprint.pprint(json.loads(pq.read_schema("example.parquet").metadata[b"geo"])) +""" +import json +import pathlib +import copy + +import numpy as np +import pyarrow as pa +import pyarrow.parquet as pq +from pyarrow.csv import write_csv + +from shapely import from_wkt, to_wkb + + +HERE = pathlib.Path(__file__).parent + + +metadata_template = { + "version": "1.1.0", + "primary_column": "geometry", + "columns": { + "geometry": { + "encoding": "WKB", + "geometry_types": [], + }, + }, +} + + +## Various geometry types with WKB and native (GeoArrow-based) encodings + +def write_encoding_files(geometries_wkt, geometries_geoarrow, geometry_type): + + table = pa.table({"col": range(len(geometries_wkt)), "geometry": geometries_wkt}) + write_csv(table, HERE / f"data-{geometry_type.lower()}-wkt.csv") + + # WKB encoding + table = pa.table( + {"col": range(len(geometries_wkt)), "geometry": to_wkb(from_wkt(geometries_wkt))} + ) + metadata = copy.deepcopy(metadata_template) + metadata["columns"]["geometry"]["geometry_types"] = [geometry_type] + table = table.replace_schema_metadata({"geo": json.dumps(metadata)}) + pq.write_table(table, HERE / f"data-{geometry_type.lower()}-encoding_wkb.parquet") + + # native (geoarrow) encoding + table = pa.table( + {"col": range(len(geometries_wkt)), "geometry": geometries_geoarrow} + ) + metadata["columns"]["geometry"]["encoding"] = geometry_type.lower() + table = table.replace_schema_metadata({"geo": json.dumps(metadata)}) + pq.write_table(table, HERE / f"data-{geometry_type.lower()}-encoding_native.parquet") + + +# point + +geometries_wkt = [ + "POINT (30 10)", + "POINT EMPTY", + None, + "POINT (40 40)", +] + +point_type = pa.struct( + [ + pa.field("x", pa.float64(), nullable=False), + pa.field("y", pa.float64(), nullable=False) + ] +) +geometries = pa.array( + [(30, 10), (float("nan"), float("nan")), (float("nan"), float("nan")), (40, 40)], + mask=np.array([False, False, True, False]), + type=point_type +) + +write_encoding_files( + geometries_wkt, geometries, geometry_type="Point" +) + +# linestring + +geometries_wkt = [ + "LINESTRING (30 10, 10 30, 40 40)", + "LINESTRING EMPTY", + None +] + +linestring_type = pa.list_(pa.field("vertices", point_type, nullable=False)) +geometries = pa.array( + [[(30, 10), (10, 30), (40, 40)], [], []], + mask=np.array([False, False, True]), + type=linestring_type +) + +write_encoding_files( + geometries_wkt, geometries, geometry_type="LineString" +) + +# polygon + +geometries_wkt = [ + "POLYGON ((30 10, 40 40, 20 40, 10 20, 30 10))", + "POLYGON ((35 10, 45 45, 15 40, 10 20, 35 10), (20 30, 35 35, 30 20, 20 30))", + "POLYGON EMPTY", + None, +] + +polygon_type = pa.list_( + pa.field("rings", pa.list_( + pa.field("vertices", point_type, nullable=False) + ), nullable=False) +) +geometries = pa.array( + [ + [[(30, 10), (40, 40), (20, 40), (10, 20), (30, 10)]], + [[(35, 10), (45, 45), (15, 40), (10, 20), (35, 10)], + [(20, 30), (35, 35), (30, 20), (20, 30)]], + [], + [], + ], + mask=np.array([False, False, False, True]), + type=polygon_type +) + +write_encoding_files( + geometries_wkt, geometries, geometry_type="Polygon" +) + +# multipoint + +geometries_wkt = [ + "MULTIPOINT ((30 10))", + "MULTIPOINT ((10 40), (40 30), (20 20), (30 10))", + "MULTIPOINT EMPTY", + None, +] + +multipoint_type = pa.list_(pa.field("points", point_type, nullable=False)) +geometries = pa.array( + [ + [(30, 10)], + [(10, 40), (40, 30), (20, 20), (30, 10)], + [], + [], + ], + mask=np.array([False, False, False, True]), + type=multipoint_type +) + +write_encoding_files( + geometries_wkt, geometries, geometry_type="MultiPoint" +) + +# multilinestring + +geometries_wkt = [ + "MULTILINESTRING ((30 10, 10 30, 40 40))", + "MULTILINESTRING ((10 10, 20 20, 10 40), (40 40, 30 30, 40 20, 30 10))", + "MULTILINESTRING EMPTY", + None, +] + +multilinestring_type = pa.list_( + pa.field("linestrings", linestring_type, nullable=False) +) +geometries = pa.array( + [ + [[(30, 10), (10, 30), (40, 40)]], + [[(10, 10), (20, 20), (10, 40)], + [(40, 40), (30, 30), (40, 20), (30, 10)]], + [], + [], + ], + mask=np.array([False, False, False, True]), + type=multilinestring_type +) + +write_encoding_files( + geometries_wkt, geometries, geometry_type="MultiLineString" +) + +# multipolygon + +geometries_wkt = [ + "MULTIPOLYGON (((30 10, 40 40, 20 40, 10 20, 30 10)))", + "MULTIPOLYGON (((30 20, 45 40, 10 40, 30 20)), ((15 5, 40 10, 10 20, 5 10, 15 5)))", + "MULTIPOLYGON (((40 40, 20 45, 45 30, 40 40)), ((20 35, 10 30, 10 10, 30 5, 45 20, 20 35), (30 20, 20 15, 20 25, 30 20)))", + "MULTIPOLYGON EMPTY", + None, +] + +multipolygon_type = pa.list_(pa.field("polygons", polygon_type, nullable=False)) +geometries = pa.array( + [ + [[[(30, 10), (40, 40), (20, 40), (10, 20), (30, 10)]]], + [[[(30, 20), (45, 40), (10, 40), (30, 20)]], + [[(15, 5), (40, 10), (10, 20), (5, 10), (15, 5)]]], + [[[(40, 40), (20, 45), (45, 30), (40, 40)]], + [[(20, 35), (10, 30), (10, 10), (30, 5), (45, 20), (20, 35)], + [(30, 20), (20, 15), (20, 25), (30, 20)]]], + [], + [], + ], + mask=np.array([False, False, False, False, True]), + type=multipolygon_type +) + +write_encoding_files( + geometries_wkt, geometries, geometry_type="MultiPolygon" +)