From 6b7fb4d8a840a19e8a8ec92784a2a635c2950878 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jose=20Garc=C3=ADa?= Date: Thu, 4 Jul 2024 12:23:00 +0200 Subject: [PATCH] Database harvester --- .../harvesting/harvesting-database.md | 43 ++ .../harvesting/img/add-database-harvester.png | Bin 0 -> 8285 bytes .../docs/user-guide/harvesting/index.md | 3 +- docs/manual/mkdocs.yml | 1 + .../geonet/kernel/harvest/BaseAligner.java | 35 ++ .../harvest/harvester/AbstractHarvester.java | 6 + .../kernel/harvest/harvester/csw/Aligner.java | 38 +- .../harvester/database/DatabaseHarvester.java | 73 ++++ .../database/DatabaseHarvesterAligner.java | 408 ++++++++++++++++++ .../database/DatabaseHarvesterParams.java | 233 ++++++++++ .../database/DatabaseMetadataRetriever.java | 136 ++++++ .../DatabaseMetadataRetrieverException.java | 41 ++ .../DatabaseMetadataRetrieverFactory.java | 56 +++ .../resources/config-spring-geonetwork.xml | 8 +- .../DatabaseMetadataRetrieverFactoryTest.java | 71 +++ .../admin/harvester/partials/extras.html | 3 +- .../resources/catalog/locales/en-admin.json | 24 +- .../admin/harvest/type/database.html | 241 +++++++++++ .../templates/admin/harvest/type/database.js | 102 +++++ .../webapp/xsl/xml/harvesting/database.xsl | 63 +++ 20 files changed, 1546 insertions(+), 39 deletions(-) create mode 100644 docs/manual/docs/user-guide/harvesting/harvesting-database.md create mode 100644 docs/manual/docs/user-guide/harvesting/img/add-database-harvester.png create mode 100644 harvesters/src/main/java/org/fao/geonet/kernel/harvest/harvester/database/DatabaseHarvester.java create mode 100644 harvesters/src/main/java/org/fao/geonet/kernel/harvest/harvester/database/DatabaseHarvesterAligner.java create mode 100644 harvesters/src/main/java/org/fao/geonet/kernel/harvest/harvester/database/DatabaseHarvesterParams.java create mode 100644 harvesters/src/main/java/org/fao/geonet/kernel/harvest/harvester/database/DatabaseMetadataRetriever.java create mode 100644 harvesters/src/main/java/org/fao/geonet/kernel/harvest/harvester/database/DatabaseMetadataRetrieverException.java create mode 100644 harvesters/src/main/java/org/fao/geonet/kernel/harvest/harvester/database/DatabaseMetadataRetrieverFactory.java create mode 100644 harvesters/src/test/java/org/fao/geonet/kernel/harvest/harvester/database/DatabaseMetadataRetrieverFactoryTest.java create mode 100644 web-ui/src/main/resources/catalog/templates/admin/harvest/type/database.html create mode 100644 web-ui/src/main/resources/catalog/templates/admin/harvest/type/database.js create mode 100644 web/src/main/webapp/xsl/xml/harvesting/database.xsl diff --git a/docs/manual/docs/user-guide/harvesting/harvesting-database.md b/docs/manual/docs/user-guide/harvesting/harvesting-database.md new file mode 100644 index 00000000000..77048147941 --- /dev/null +++ b/docs/manual/docs/user-guide/harvesting/harvesting-database.md @@ -0,0 +1,43 @@ +# Database Harvesting {#database_harvester} + +This harvesting type uses a database connection to harvest metadata stored in a database table. + +## Adding a Database harvester + +To create a Database harvester go to `Admin console` > `Harvesting` and select `Harvest from` > `Database`: + +![](img/add-database-harvester.png) + +Providing the following information: + +- **Identification** + - *Node name and logo*: A unique name for the harvester and optionally a logo to assign to the harvester. + - *Group*: Group which owns the harvested records. Only the catalog administrator or users with the profile `UserAdmin` of this group can manage the harvester. + - *User*: User who owns the harvested records. + +- **Schedule**: Scheduling options to execute the harvester. If disabled, the harvester should be executed manually from the harvesters page. If enabled a schedule expression using cron syntax should be configured ([See examples](https://www.quartz-scheduler.org/documentation/quartz-2.1.7/tutorials/crontrigger)). + +- **Configure connection to Database** + - *Server*: The database server IP/Hostname. + - *Port*: The database port. For example, for Postgres usually 5432. + - *Database name*: The database name to connect. + - *Table name*: Table name with the metadata. The name must begin with a letter (a-z) or underscore (_). Subsequent characters in a name can be letters, digits (0-9), or underscores. + - *Metadata field name*: Table field name that contains the metadata. The name must begin with a letter (a-z) or underscore (_). Subsequent characters in a name can be letters, digits (0-9), or underscores. + - *Database type*: Database type. Currently supported Postgres and Oracle. + - *Remote authentication*: Credentials to connect to the database. + +- **Search filter**: allows to define a simple field condition to filter the results. + - *Filter field*: Table field name used to filter the results. The name must begin with a letter (a-z) or underscore (_). Subsequent characters in a name can be letters, digits (0-9), or underscores. + - *Filter value*: Value to filter the results. It can contain wildcards (%). + +- **Configure response processing for database** + - *Action on UUID collision*: When a harvester finds the same uuid on a record collected by another method (another harvester, importer, dashboard editor,...), should this record be skipped (default), overriden or generate a new UUID? + - *Validate records before import*: If checked, the metadata will be validated after retrieval. If the validation does not pass, the metadata will be skipped. + - *XSL filter name to apply*: (Optional) The XSL filter is applied to each metadata record. The filter is a process which depends on the metadata schema (see the `process` folder of the metadata schemas). + + It could be composed of parameter which will be sent to XSL transformation using the following syntax: `anonymizer?protocol=MYLOCALNETWORK:FILEPATH&email=gis@organisation.org&thesaurus=MYORGONLYTHEASURUS` + + - *Batch edits*: (Optional) Allows to update harvested records, using XPATH syntax. It can be used to add, replace or delete element. + - *Translate metadata content*: (Optional) Allows to translate metadata elements. It requires a translation service provider configured in the System settings. + +- **Privileges** - Assign privileges to harvested metadata. diff --git a/docs/manual/docs/user-guide/harvesting/img/add-database-harvester.png b/docs/manual/docs/user-guide/harvesting/img/add-database-harvester.png new file mode 100644 index 0000000000000000000000000000000000000000..f8999ee8223905dcfe71e402371c361bbadd7fa3 GIT binary patch literal 8285 zcmZX31z1$w7AP^m0MZ@8FtpM=bPXXOtw;|&z|bv9BMdDP(p^#tNXO8Mbcm#+bV`G~ z@xS-od%yS2_wBRy*{jZ8d!JP&TIZ=M5dl2`1_lO^x|*^cy5B-K1P~W}|1iZ%fq{W( z0#{PfQCCs|>3F!>!JTa}Fu0Pu6NfdhbSe81?6@E?d3n?dp&z<5z4Cml^p!0|G|!t~ z#wSxtw)aOnJik|}cYIhi*TPjG#}Xy$FwGJ5Ib`gO<^^QhVf-cIK|)V^S`W@G`C1zz zhK?XYBa=RXhqp#KR2v9ZrdLScHq6nT=4trZ`nmI(D6qhsD8{V0-C+w-b@s6Tkos2G zbvNi{-uyYLY{x)O?b8S^!x4MbG$NC5iT$8~i}JYRXTi`w=qX>W=u3|b#X$9isx+gJ z%MgiF*W(yoQ*PI73p(Y1F9N@Qm7OQ!3WTWJY^&yhqjmK{r8jcvnjwXj*o;!OwUOW_ zwQxx#VpTfaV-d}*C{`7ob9R;(llzYRUkGz->o7n<&H&fVHvZV@j=ye8+B>Wfdq~~v zsO_sWd2i7aFNi6&Bnjg$TQVRqW}L}@e+}g68gQ!#k8dZ}Io_(g;aUwFt?g&FM(TE2 zS{RSeV;~0R8#o34J;FpcdUV6UzrI>p#$~Jna9B5q}XC^p(`r(Y?O4 zhpnxPr-Q4Piq88dXjU_D10ydZElo*lR|Fr-#?{J}&ky1DmjpxFPZB*u*m}W0eh6n5 zPf0%+@IM%m=<(laelX}C6fY+ku#uJyNXga179_?ez$XBfB>;gy(jGQ;l6uN2|0YL2 z$$%Zayxb)D`F(wT`Fw@>Ts`dh1tAa!zkm?GkPt5#gV)pF#S7-g>*C4!FDCz&kFu?& zwFlhI3-0Oy`pXw)Osqp6%>F-|AYVkLH;Z8e<+RqODQBQ^uLt<1Nna`4Lxl=lw1*L zkzTU@-I;$A|1a?0gwp(fSN=bm_?MdhnMLbamOz^SKYJ!iUU!Fa4c_sbBScYmbLZfrnz*Sj^pk5-ga)kS#dz(3d4lu{lU7L2oY6bTh; zNGbmu$(HguUWKBuI=6Cz{BU>7C?r`@0c?)MthbpE~}TShi%8m;R@ z9k;&Tr14p~TL=xk(j~z}ohsVF3wSIJN2sqoBDpIG|nQ!Fx-kFMa zoT*eFO63(k)y|eok-gayK3$lUp6l}-e^{Qcyt}MKzs&GWblJEX@N}x+BWflE9nItfOC&Im=bz=;I5%2{>qweo}fvJXHWC z{F?HZ#;hao{j(bLVdKlIU%91^3@g&NWu_@y>8H$SObgs}C0hb$%$7ahy6nk7X&V11 z5LL{q>~vmV3QN{{QJgv7F`3PJBof!mp$_V};}S4(z6Ou9Z%xc7{SNHc4CmWAi9Y4> zvj;^S$fEpDe|_!YuCpCcyh(Fj572o&wuw<;(ID1tT!Lk`xmFoJ+_@r}W zRJfxw)tiCANv>_nw@?N_FY?RB2QeOh{--u)S8*r6&~c*t?52sh9`j1C^vN&|(V=K? zjpx_W!tI^-!YunAz0H*k%qXWJ!F@QwZBS z`)*Mx`|4T%qRXf@SFAy^v(zpHBmbI|_xSdp_3&+OE^~w9G;*f$>H^RA_VQT7X@=*I zZV1QgwK#Eyxq7&r;MUX8EJ*Us_nJ|6TK?r0+g8R2O-8MRAt3T2tRu_++u9HN9(7Bi zp-fhBnPl*_g@lc$nr__$gP3ZF^c8{+(N}c!?(V9x-sbL6#_xUSC)>$y`-6l+C)dBF zW(Rt16psd+m$t%u;lEs8a+_t;Pj$F!)Wtq?6)kSP=;8WZ!%!bE=W9?L$Zp*Fignnc zA-%?SpnE#YXJxD0V65!rinfNsNdZJfMpnhwo6$_Tex*c4N@93A(em2%J9VA&apiJz zgktsm7ZRok+#DHoxPEE=iVU6(s;$H8a$WF|S&>A3yhyp;_%~*nv7E4*a;q;M_1;Sa zd>k(~D*>^YQ-Lwm)z?W_qgiWtLLj_>N>jeh)AQ z9bwA79$W=;iAzYNJASX;uT8HuYxQXSE=z7^?-g;n{-tMeDHMlrqALU&zS!cO^?RaN zLj=|hV44I!lx@!xvL$F_z~^|HrRET{J5$xnSgCa|{UPm@sGy{;I!&YJshG%hVLtI= z*(R$GT9*~PyJd^POk%K+r+E))60$dVG6LJ$!J*ne*MF_Px~w$ zDaKWN->4^lgZELn=leENVUNf&RVd`~=fLF9YSVSLn2wC6uFtAR{Moe&nGo;WKoS9j z3eWj;Q$7<(To+?Sq))w_*&+6;p*a%|AdI(}`i8$Q=M-94rcCE^wqxib-tti}X?wC% zKZ1a2aCtemZo0xSrqd}~+Fxk8oMP$mP!>cujzK(LkG`GPLFYHt(<})Ev^&xq881}L zgpZrK&Q#Jpgm{FbEtB0OW$Y_=iMK9`Eo8(|8o-vdE~bo54?zl*N0kQ`lKCc=iy1+9 z>su|AOV=@M(GxOW;6#-XTBqb{&(qHa`*Jnzv_So$9AM%tY`Hq7}lSFKA(%q;9%0m+}P^ua!8F+TlXb}2b*_D6U&+dA9!Wmym*>6A|WX2f6iwb^)Jp z;v^Gaml*NT$AEfHxE?DfpY*Hi-+^}p`TMA*S2tq$Qb_GJWj7{TTD34dl2nk-cnb9S z87l1P#nq>iTjM|g71mtx)RU*{zW53W z*${Q+ct*B;nD5yXRCA0Za^Q9UiLnE)Y91zK{d{Qa1SucGXwsV58dExwo4<%hqBE|@ z@7}085nOp9!0LP(N_U%4;vN?;)#jtbF}UIMJWgoBBX!>QaBO(~KAu4_mXC)cD6jZ} znHZ#b{_a&VTf0eTvtYHe;%naBvGTk+J+s&aPnGWmfOgC&1QkR(jpHGn6n&=5>)XK$ zcD+s+^<*#vQ~tB_4_drHX}*k60?qqBckd1A>_^4I6m)=KO}&Ma-?$ZFBcdx@@tJ$6F)>SQg?d!fmNq~xuwZ3>Z#A@%wp+8(jp#iYqI zhs9{wL(#-JdgIGU{1r2Bxp2$DGPS4BSWZXt>9O*3SSg88A-#7qmscs_i)!wc4XG!b zY|yOfLoS431UfK7w#6~?+(ZyKkVNv>4yDG`!!OsX(5Llwsor`ZKHQ}o*wQKGdr(9o zdnq3}3r(Y%JNk(Vn`&~m7A8KlnJm$AaFCgIZBbpW_wHr8J1CJ_j%wFCC zdky+WGF8&^7o~k6XY_(mUnzMNnA;|k{B zxW)S0EphgiHtl=^^K95NTK9Aj@*hrK)sVN#=f@vii2ytC-Lh zch+Cv8UKqWT39!tP!I+{k*KUO?a$s{GS9n<;&H45$=%-y^O`m#3T$I__ZkpTJ?f?R z+8WCjn>$H)ZVszT<<>6&LaolJ4(;9$G1&DtAm&r&8eKL|T7 zSN*FADELtQm(!BxvFZ#$a%o9IMHlJ&=)!>wQbDR5;Wz`sjw zuy-DE^Y7r~I*u09#rcK5NYtiCa=27ij{6LByu;FRvfsEoQKq{h7ddird%2cSd6(6v z$|=pm6HCKw>72o$6v;@QR_FaDRc@AOuDz;&LEoSJ9TkA}mRmZXYz`NlCAt00js*GW zD}a6gbXt_qL^KD4b_rrh;(Eu%M5 zsctTzi8(L#BswQD*qjBF%qNi#!6qOd}~k9U?Ej;=aLp8oT&(PEYw`|$Ux#(522;P}s7$(2C2DoUQREp#gP zw{$A=;)wBmJTtE}R#pTcG!zAb-=o4DE)Eu3LTX}M6!o}p7md@gj_(5jDPkQG?rgq? zOVdPw4>2gX@i#t@s9q_D85@2O?iB45jT1G!Lk5E_V}gY}jC-=L5dC@2CZa7&%P0Pr zc264uC@}|sK>JFz`V!?P(YR^X4!b1zrr8@Z+}Foc&EY152A$~SwD%z5 ztx01lwEQPJ$EUQEV<3!ImQ?7IqDBj_d4QZlsWNKnM3c^NXr9(TgM$27K}D3ZSVa0^ z#@NZ?ngM4>`EC$KXy_YKmd8OPk23VZ6ea$~AHa!iWTb!anOEl8@KU1IiIUCR!HELE z;Gh}oDgn_lWj4)8G&gxNQsQo^d|G#PotBEH)8WH&AaJXTDa zt@&s_niD?!Vk4rq(CaPP{4~^Fn?${)8cfOnu;Hg=;iN>S#f2B3CFXY>8T3C$6^g3afU%x z8#KfNDwT;~C^9{_4>knZ!@=KwfOr(jJN~iG5FLuYs0*zHouPI1zV>`OaVCHZg2Upy zMuEAKJKdm5ID>)*ir8G8Y#1~+8V0OH(3JV?&xKcQC?7RzSz3aUg+)Z(;YVwk;t|m+ z^LJc=UtMpPuJ7&+_4cLL$%kN7ZhyQQIa=xQFEj{z^yU~`Z)ISn_k-gQ9ZF3A6x%OD zYMD?dlwDf@Wyqy+S5A*@OM;U@`OV5>GEQiy__Okc*{`z6ZkS$}Shxl_U=s)Ie@z7P49& zZ?ri&o-RX&s}onJzu?!uw|`G+%e^0nr4#lOvPYLE?53BNR-VWhRv6rVRgMY=q(?+V zX3GW1E_S@iv>VQV((ui?=qFr!Aa$^`52dqQ?~|HDSA`%NB`ZkzP}ZNRdjAGoON zAn_p>1hi=JDvzU15gE=Dx7SPKGs_^L;nV!^tZLQeut+`GkDh)}-(d_B8`n*`r3%Eq zKKWm31NaLBThCfs#;gbZ#a6To*g{7~(z zY-B{sYw|gkGu-Cd=<9%gGKZF50bY&D9!`ytKy!E>kd!o_H;Syy3it~hudxON1??;} z3qO4!@29zA#2GCx(uaux<7g^tWl1F2M?j9Y<(wp)B+T((DQ}Tr(q-W6>6mF5D27Tt zfC_}kNpeT|Fo?MJ`=cR~PV>5X*(VtUbb)l$t}lih2pwS*8$% zYw*cNUCe`MlFd&pri$KD322d{#EJ_59Lad3n!A8vktj#d1H+zvvTyFlEVd%e0`M&q5^~(~SJ& zd=D6tUC;p`fE5NJjw0B7xitfRpd3>E)GJLe5=T9|IIV@6&vfwSGw2~TtgD1fjVL5U zaP~)pjOi^#SFqUU_~0+v>1T}ME{vj1GqL+LD0*3ER+uXd>G9pgaw)n@rrY`YZgz@O zEIc$tkqX+C0&GG&*r^=1`U>brfyVx(k9H=FAbub~BXGjKS{qJDq%M*5-J3WRI&MwhjRH&G6BV$O;*rg0XTwPl>~z;v?(nbyzaQ|^VUMyB8DVqlI& ze#@T`46YU7d|V=rotIQN#?=qk)~xlzv*i4af?FE|!sGZvS{l*i8{x@rNzhzKwO}Pq zJe_dlQCP4`gHgLGJ;d^~Q7cnyAzO@4Hvvql3NQ*Ye;}!F9&{~igdvG$C#LDe^u;3UX z%lPc>5DaJ|&c=Uzvh1?IH4y;X*TTbmO4M_WqSZv?{lL05lFMB zt@&VosXbGaM#Keeg8e8go$rI=QotGs5d_c}>J7e1XW^*#FbAc*6B*Yf=}+;jA0q(f zyYi2%7-JD6D@aR4g1RmcCr7jn@oP&^RL_H2O?q`EZ>o?jP;ff}X1KKECQ4C{m=xbe zh20!K8QT&ehab6+(-X;&kEPf8B}5l#z88Yscnk z!by9vrM;rrzs-@ZN5Bx@Fg~D79kh#1p*bdVZj1v|#tCHd*_GuoYTS*@LjMSy2=kjk z;8uOdw|emwac^djH&Fh~Yv`~v$KG+jDqBt@d!mua^ zN}BXrV)rABrY3S%UTk>PhPttPF;5e)ZnLj>Cim#lA`U%G`n#CGeBm^m3ZsGH;V{{Nlp%-aV=nly#3W+lAjK3J0Y5d`n$*JlKFrj~;h{jQU~{!M9*NISOeR?#Y}fzk4Us!Y zVf2u@zq{I0mA{A5U}E57CKjN=A7FjDrSI;Wv#JdEw29pe0={9!8w~FW7>SMz8jQIP z&y!F!=h;k@ta3LFx?IanKT5lKe6b9pdCmm%e#nX;gdxvTDw@)Zf=Uw?Q7K3;(URhW z#T6%37~%5LzzKp7cfR8zLP!#g|GGTmZ5j5PnOuqE0U3kPRd{>^Fh3mVme0PT818q( zs;S1kO_m@e4;l}a7?^+-DYPZdJ`pPF`&&}ds}5(C+nKtiAo+8N>bo0gPrj)HY^X{E z+=tVu5n(`w@L#MT`<0|e6lm3BZksUQlE5QOxhS?-fD|FjSD>RcYxm|5F4ZU=+kC!T wojj%{(ki)IbAO>fO&1bwavSfvWbWjOdxC$2yRN_g`!iV``c%1E(K7V^0M1N0H~;_u literal 0 HcmV?d00001 diff --git a/docs/manual/docs/user-guide/harvesting/index.md b/docs/manual/docs/user-guide/harvesting/index.md index 46f52f782c5..4643441ddfe 100644 --- a/docs/manual/docs/user-guide/harvesting/index.md +++ b/docs/manual/docs/user-guide/harvesting/index.md @@ -17,7 +17,8 @@ The following sources can be harvested: - [GeoPortal REST Harvesting](harvesting-geoportal.md) - [THREDDS Harvesting](harvesting-thredds.md) - [WFS GetFeature Harvesting](harvesting-wfs-features.md) -- [Z3950 Harvesting](harvesting-z3950.md) +- [Z3950 Harvesting](harvesting-z3950.md +- [Database Harvesting](harvesting-database.md) ## Mechanism overview diff --git a/docs/manual/mkdocs.yml b/docs/manual/mkdocs.yml index 73af7ac42b5..401889ff356 100644 --- a/docs/manual/mkdocs.yml +++ b/docs/manual/mkdocs.yml @@ -303,6 +303,7 @@ nav: - user-guide/harvesting/harvesting-webdav.md - user-guide/harvesting/harvesting-wfs-features.md - user-guide/harvesting/harvesting-z3950.md + - user-guide/harvesting/harvesting-database.md - user-guide/export/index.md - 'Administration': - administrator-guide/index.md diff --git a/harvesters/src/main/java/org/fao/geonet/kernel/harvest/BaseAligner.java b/harvesters/src/main/java/org/fao/geonet/kernel/harvest/BaseAligner.java index 101f6fd78ab..c7ff631c252 100644 --- a/harvesters/src/main/java/org/fao/geonet/kernel/harvest/BaseAligner.java +++ b/harvesters/src/main/java/org/fao/geonet/kernel/harvest/BaseAligner.java @@ -28,6 +28,7 @@ import org.fao.geonet.domain.AbstractMetadata; import org.fao.geonet.domain.MetadataCategory; import org.fao.geonet.kernel.DataManager; +import org.fao.geonet.kernel.GeonetworkDataDirectory; import org.fao.geonet.kernel.SchemaManager; import org.fao.geonet.kernel.datamanager.IMetadataManager; import org.fao.geonet.kernel.harvest.harvester.AbstractHarvester; @@ -199,4 +200,38 @@ public Element translateMetadataContent(ServiceContext context, return md; } + + /** + * Filter the metadata if process parameter is set and corresponding XSL transformation + * exists in xsl/conversion/import. + * + * @param context + * @param md + * @param processName + * @param processParams + * @param log + * @return + */ + protected Element applyXSLTProcessToMetadata(ServiceContext context, + Element md, + String processName, + Map processParams, + org.fao.geonet.Logger log) { + Path filePath = context.getBean(GeonetworkDataDirectory.class).getXsltConversion(processName); + if (!Files.exists(filePath)) { + log.debug(" processing instruction " + processName + ". Metadata not filtered."); + } else { + Element processedMetadata; + try { + processedMetadata = Xml.transform(md, filePath, processParams); + log.debug(" metadata filtered."); + md = processedMetadata; + } catch (Exception e) { + log.warning(" processing error " + processName + ": " + e.getMessage()); + } + } + return md; + } + + } diff --git a/harvesters/src/main/java/org/fao/geonet/kernel/harvest/harvester/AbstractHarvester.java b/harvesters/src/main/java/org/fao/geonet/kernel/harvest/harvester/AbstractHarvester.java index 2398aa96c10..753192b62a0 100644 --- a/harvesters/src/main/java/org/fao/geonet/kernel/harvest/harvester/AbstractHarvester.java +++ b/harvesters/src/main/java/org/fao/geonet/kernel/harvest/harvester/AbstractHarvester.java @@ -45,7 +45,9 @@ import org.fao.geonet.exceptions.UnknownHostEx; import org.fao.geonet.kernel.DataManager; import org.fao.geonet.kernel.MetadataIndexerProcessor; +import org.fao.geonet.kernel.datamanager.IMetadataIndexer; import org.fao.geonet.kernel.datamanager.IMetadataManager; +import org.fao.geonet.kernel.datamanager.IMetadataSchemaUtils; import org.fao.geonet.kernel.datamanager.IMetadataUtils; import org.fao.geonet.kernel.harvest.Common.OperResult; import org.fao.geonet.kernel.harvest.Common.Status; @@ -128,6 +130,8 @@ public abstract class AbstractHarvester processParams) { - Path filePath = context.getBean(GeonetworkDataDirectory.class).getXsltConversion(processName); - if (!Files.exists(filePath)) { - log.debug(" processing instruction " + processName + ". Metadata not filtered."); - } else { - Element processedMetadata; - try { - processedMetadata = Xml.transform(md, filePath, processParams); - log.debug(" metadata filtered."); - md = processedMetadata; - } catch (Exception e) { - log.warning(" processing error " + processName + ": " + e.getMessage()); - } - } - return md; - } - /** * Retrieves the list of metadata uuids that have the same dataset identifier. * diff --git a/harvesters/src/main/java/org/fao/geonet/kernel/harvest/harvester/database/DatabaseHarvester.java b/harvesters/src/main/java/org/fao/geonet/kernel/harvest/harvester/database/DatabaseHarvester.java new file mode 100644 index 00000000000..51ad7dc1b18 --- /dev/null +++ b/harvesters/src/main/java/org/fao/geonet/kernel/harvest/harvester/database/DatabaseHarvester.java @@ -0,0 +1,73 @@ +//============================================================================= +//=== Copyright (C) 2001-2024 Food and Agriculture Organization of the +//=== United Nations (FAO-UN), United Nations World Food Programme (WFP) +//=== and United Nations Environment Programme (UNEP) +//=== +//=== This program is free software; you can redistribute it and/or modify +//=== it under the terms of the GNU General Public License as published by +//=== the Free Software Foundation; either version 2 of the License, or (at +//=== your option) any later version. +//=== +//=== This program is distributed in the hope that it will be useful, but +//=== WITHOUT ANY WARRANTY; without even the implied warranty of +//=== MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +//=== General Public License for more details. +//=== +//=== You should have received a copy of the GNU General Public License +//=== along with this program; if not, write to the Free Software +//=== Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA +//=== +//=== Contact: Jeroen Ticheler - FAO - Viale delle Terme di Caracalla 2, +//=== Rome - Italy. email: geonetwork@osgeo.org +//============================================================================== + +package org.fao.geonet.kernel.harvest.harvester.database; + +import org.fao.geonet.Logger; +import org.fao.geonet.kernel.harvest.harvester.AbstractHarvester; +import org.fao.geonet.kernel.harvest.harvester.HarvestResult; + +import java.sql.SQLException; + +public class DatabaseHarvester extends AbstractHarvester { + private static final String TABLE_NAME_PATTERN = "([_a-zA-Z]+[_a-zA-Z0-9]*)"; + private static final String FIELD_NAME_PATTERN = "([_a-zA-Z]+[_a-zA-Z0-9]*)"; + + @Override + protected DatabaseHarvesterParams createParams() { + return new DatabaseHarvesterParams(dataMan); + } + + @Override + protected void storeNodeExtra(DatabaseHarvesterParams params, String path, String siteId, String optionsId) throws SQLException { + // Remove non-valid characters + params.setTableName(params.getTableName().replaceAll("[^" + TABLE_NAME_PATTERN + "]", "")); + params.setMetadataField(params.getMetadataField().replaceAll("[^" + FIELD_NAME_PATTERN + "]", "")); + params.setFilterField(params.getFilterField().replaceAll("[^" + FIELD_NAME_PATTERN + "]", "")); + + setParams(params); + + harvesterSettingsManager.add("id:" + siteId, "icon", params.getIcon()); + harvesterSettingsManager.add("id:" + siteId, "server", params.getServer()); + harvesterSettingsManager.add("id:" + siteId, "port", params.getPort()); + harvesterSettingsManager.add("id:" + siteId, "username", params.getUsername()); + harvesterSettingsManager.add("id:" + siteId, "password", params.getPassword()); + harvesterSettingsManager.add("id:" + siteId, "database", params.getDatabase()); + harvesterSettingsManager.add("id:" + siteId, "databaseType", params.getDatabaseType()); + harvesterSettingsManager.add("id:" + siteId, "tableName", params.getTableName()); + harvesterSettingsManager.add("id:" + siteId, "metadataField", params.getMetadataField()); + harvesterSettingsManager.add("id:" + siteId, "xslfilter", params.getXslfilter()); + + String filtersID = harvesterSettingsManager.add(path, "filter", ""); + harvesterSettingsManager.add("id:" + filtersID, "field", params.getFilterField()); + harvesterSettingsManager.add("id:" + filtersID, "value", params.getFilterValue()); + } + + @Override + protected void doHarvest(Logger l) throws Exception { + log.info("Database harvester start"); + DatabaseHarvesterAligner h = new DatabaseHarvesterAligner(cancelMonitor, log, context, params); + result = h.harvest(log); + log.info("Database harvester end"); + } +} diff --git a/harvesters/src/main/java/org/fao/geonet/kernel/harvest/harvester/database/DatabaseHarvesterAligner.java b/harvesters/src/main/java/org/fao/geonet/kernel/harvest/harvester/database/DatabaseHarvesterAligner.java new file mode 100644 index 00000000000..d6fe5cba40b --- /dev/null +++ b/harvesters/src/main/java/org/fao/geonet/kernel/harvest/harvester/database/DatabaseHarvesterAligner.java @@ -0,0 +1,408 @@ +//============================================================================= +//=== Copyright (C) 2001-2024 Food and Agriculture Organization of the +//=== United Nations (FAO-UN), United Nations World Food Programme (WFP) +//=== and United Nations Environment Programme (UNEP) +//=== +//=== This program is free software; you can redistribute it and/or modify +//=== it under the terms of the GNU General Public License as published by +//=== the Free Software Foundation; either version 2 of the License, or (at +//=== your option) any later version. +//=== +//=== This program is distributed in the hope that it will be useful, but +//=== WITHOUT ANY WARRANTY; without even the implied warranty of +//=== MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +//=== General Public License for more details. +//=== +//=== You should have received a copy of the GNU General Public License +//=== along with this program; if not, write to the Free Software +//=== Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA +//=== +//=== Contact: Jeroen Ticheler - FAO - Viale delle Terme di Caracalla 2, +//=== Rome - Italy. email: geonetwork@osgeo.org +//============================================================================== + +package org.fao.geonet.kernel.harvest.harvester.database; + +import com.google.common.collect.Sets; +import jeeves.server.context.ServiceContext; +import org.fao.geonet.GeonetContext; +import org.fao.geonet.Logger; +import org.fao.geonet.constants.Geonet; +import org.fao.geonet.domain.*; +import org.fao.geonet.exceptions.NoSchemaMatchesException; +import org.fao.geonet.kernel.DataManager; +import org.fao.geonet.kernel.UpdateDatestamp; +import org.fao.geonet.kernel.datamanager.IMetadataIndexer; +import org.fao.geonet.kernel.datamanager.IMetadataManager; +import org.fao.geonet.kernel.datamanager.IMetadataSchemaUtils; +import org.fao.geonet.kernel.datamanager.IMetadataUtils; +import org.fao.geonet.kernel.harvest.BaseAligner; +import org.fao.geonet.kernel.harvest.harvester.*; +import org.fao.geonet.kernel.search.IndexingMode; +import org.fao.geonet.repository.MetadataRepository; +import org.fao.geonet.repository.OperationAllowedRepository; +import org.fao.geonet.repository.specification.MetadataSpecs; +import org.fao.geonet.utils.Xml; +import org.jdom.Element; +import org.springframework.util.StringUtils; + +import java.util.*; +import java.util.concurrent.atomic.AtomicBoolean; + +import static org.fao.geonet.kernel.harvest.harvester.csw.Aligner.applyBatchEdits; + +class DatabaseHarvesterAligner extends BaseAligner implements IHarvester { + private Logger log; + private ServiceContext context; + private DataManager dataMan; + private IMetadataManager metadataManager; + private IMetadataUtils metadataUtils; + private IMetadataIndexer metadataIndexer; + private IMetadataSchemaUtils metadataSchemaUtils; + private HarvestResult result; + private CategoryMapper localCateg; + private GroupMapper localGroups; + private UUIDMapper localUuids; + private List errors = new LinkedList<>(); + private List idsForHarvestingResult; + private String processName; + private Map processParams = new HashMap<>(); + + @Override + public List getErrors() { + return errors; + } + + public DatabaseHarvesterAligner(AtomicBoolean cancelMonitor, Logger log, ServiceContext context, DatabaseHarvesterParams params) { + super(cancelMonitor); + this.log = log; + this.context = context; + this.params = params; + + result = new HarvestResult(); + result.addedMetadata = 0; + result.uuidSkipped = 0; + result.datasetUuidExist = 0; + result.couldNotInsert = 0; + + GeonetContext gc = (GeonetContext) context.getHandlerContext(Geonet.CONTEXT_NAME); + dataMan = gc.getBean(DataManager.class); + metadataManager = gc.getBean(IMetadataManager.class); + metadataSchemaUtils = gc.getBean(IMetadataSchemaUtils.class); + metadataUtils = gc.getBean(IMetadataUtils.class); + metadataIndexer = gc.getBean(IMetadataIndexer.class); + } + + @Override + public HarvestResult harvest(Logger log) throws Exception { + this.log = log; + if (log.isDebugEnabled()) { + log.debug("Retrieving remote metadata information for : " + params.getName()); + } + + DatabaseMetadataRetriever metadataRetriever = DatabaseMetadataRetrieverFactory.getDatabaseMetadataRetriever(params.getDatabaseType(), params.getServer(), params.getPort(), + params.getDatabase(), params.getUsername(), params.getPassword(), log); + + log.info("Start of alignment for : " + params.getName()); + result = new HarvestResult(); + //---------------------------------------------------------------- + //--- retrieve all local categories and groups + //--- retrieve harvested uuids for given harvesting node + localCateg = new CategoryMapper(context); + localGroups = new GroupMapper(context); + localUuids = new UUIDMapper(context.getBean(IMetadataUtils.class), params.getUuid()); + + Pair> filter = HarvesterUtil.parseXSLFilter(params.getXslfilter()); + processName = filter.one(); + processParams = filter.two(); + + + metadataManager.flush(); + + idsForHarvestingResult = new ArrayList<>(); + + metadataRetriever.processMetadata(cancelMonitor, params, this); + + // + // delete locally existing metadata from the same source if they were + // not in this harvesting result + // + deleteLocalMetadataNotInDatabase(idsForHarvestingResult); + + return result; + } + + public void align(String metadata) { + if (cancelMonitor.get()) { + return; + } + + try { + result.totalMetadata++; + + if (!StringUtils.hasLength(metadata)) { + log.info("Processing empty metadata xml. Skipping"); + return; + } + + // create JDOM element from String-XML + Element metadataElement = Xml.loadString(metadata, false); + String id = processMetadata(metadataElement); + + if (StringUtils.hasLength(id)) { + idsForHarvestingResult.add(Integer.valueOf(id)); + } + + } catch (Exception ex) { + log.error("Unable to process record from database (" + this.params.getName() + ")"); + log.error(" Record failed. Error is: " + ex.getMessage()); + log.error(ex); + errors.add(new HarvestError(this.context, ex)); + } finally { + result.originalMetadata++; + } + } + + private void deleteLocalMetadataNotInDatabase(List idsForHarvestingResult) throws Exception { + Set idsResultHs = Sets.newHashSet(idsForHarvestingResult); + List existingMetadata = context.getBean(MetadataRepository.class).findIdsBy(MetadataSpecs.hasHarvesterUuid(params.getUuid())); + for (Integer existingId : existingMetadata) { + if (cancelMonitor.get()) { + return; + } + + if (!idsResultHs.contains(existingId)) { + log.debug(" Removing: " + existingId); + metadataManager.deleteMetadata(context, existingId.toString()); + result.locallyRemoved++; + } + } + } + + + /** + * Process a metadata to add it to the catalog and returns the identifier. + * + * @param metadataElement + * @return + * @throws Exception + */ + private String processMetadata(Element metadataElement) throws Exception { + + String id = ""; + + String schema = getMetadataSchema(metadataElement); + + if (schema == null) { + log.info("Skipping metadata with unknown schema."); + result.unknownSchema++; + return id; + } + + String uuid = metadataUtils.extractUUID(schema, metadataElement); + + if (!StringUtils.hasLength(uuid)) { + log.info("No metadata uuid. Skipping."); + result.badFormat++; + return id; + } + + log.info(String.format("Processing metadata with UUID: %s", uuid)); + + try { + Integer groupIdVal = null; + if (StringUtils.hasLength(params.getOwnerIdGroup())) { + groupIdVal = Integer.parseInt(params.getOwnerIdGroup()); + } + + params.getValidate().validate(dataMan, context, metadataElement, groupIdVal); + } catch (Exception e) { + log.error("Ignoring invalid metadata with uuid " + uuid); + result.doesNotValidate++; + return id; + } + + setParams(params); + + // + // add / update the metadata from this harvesting result + // + id = metadataUtils.getMetadataId(uuid); + if (id == null) { + //Record is new + id = addMetadata(metadataElement, uuid, schema); + result.addedMetadata++; + } else if (localUuids.getID(uuid) == null) { + //Record does not belong to this harvester + result.datasetUuidExist++; + + switch (params.getOverrideUuid()) { + case OVERRIDE: + updateMetadata(metadataElement, Integer.toString(metadataUtils.findOneByUuid(uuid).getId()), true); + log.debug(String.format("Overriding record with uuid %s", uuid)); + result.updatedMetadata++; + break; + case RANDOM: + log.debug(String.format("Generating random uuid for remote record with uuid %s", uuid)); + addMetadata(metadataElement, UUID.randomUUID().toString(), schema); + break; + case SKIP: + log.debug(String.format("Skipping record with uuid %s", uuid)); + result.uuidSkipped++; + break; + default: + break; + } + } else { + //record exists and belongs to this harvester + updateMetadata(metadataElement, id, false); + result.updatedMetadata++; + } + + return id; + } + + private void updateMetadata(Element xml, String id, boolean force) throws Exception { + log.info("Updating metadata with id: " + id); + + // + // update metadata + // + boolean validate = false; + boolean ufo = false; + String language = context.getLanguage(); + + String schema = metadataSchemaUtils.autodetectSchema(xml); + String uuid = metadataUtils.extractUUID(schema, xml); + + String changeDate; + try { + changeDate = metadataUtils.extractDateModified(schema, xml); + } catch (Exception ex) { + log.error("Database harvester - updateMetadata - can't get metadata modified date for metadata id= " + id + + ", using current date for modified date"); + changeDate = new ISODate().toString(); + } + + boolean updateSchema = false; + if (StringUtils.hasLength(params.getXslfilter())) { + xml = applyXSLTProcessToMetadata(context, xml, processName, processParams, log); + String newSchema = metadataSchemaUtils.autodetectSchema(xml); + updateSchema = (newSchema != null) && !newSchema.equals(schema); + schema = newSchema; + } + + applyBatchEdits(uuid, xml, schema, params.getBatchEdits(), context, log); + + // Translate metadata + if (params.isTranslateContent()) { + xml = translateMetadataContent(context, xml, schema); + } + + final AbstractMetadata metadata = metadataManager.updateMetadata(context, id, xml, validate, ufo, language, changeDate, + true, IndexingMode.none); + + if (force || updateSchema) { + if (force) { + //change ownership of metadata to new harvester + metadata.getHarvestInfo().setUuid(params.getUuid()); + metadata.getSourceInfo().setSourceId(params.getUuid()); + + } + + if (updateSchema) { + metadata.getDataInfo().setSchemaId(schema); + } + + metadataManager.save(metadata); + } + + OperationAllowedRepository operationAllowedRepository = context.getBean(OperationAllowedRepository.class); + operationAllowedRepository.deleteAllByMetadataId(Integer.parseInt(id)); + addPrivileges(id, params.getPrivileges(), localGroups, context); + + metadata.getCategories().clear(); + addCategories(metadata, params.getCategories(), localCateg, context, null, true); + + metadataManager.flush(); + metadataIndexer.indexMetadata(id, true, IndexingMode.full); + } + + /** + * Inserts a metadata into the database. Lucene index is updated after insertion. + */ + private String addMetadata(Element xml, String uuid, String schema) throws Exception { + log.info(" - Adding metadata with remote uuid: " + uuid); + + // If the xslfilter process changes the metadata uuid, + // use that uuid (newMdUuid) for the new metadata to add to the catalogue. + String newMdUuid = null; + if (StringUtils.hasLength(params.getXslfilter())) { + xml = applyXSLTProcessToMetadata(context, xml, processName, processParams, log); + schema = metadataSchemaUtils.autodetectSchema(xml); + // Get new uuid if modified by XSLT process + newMdUuid = metadataUtils.extractUUID(schema, xml); + } + + boolean newMdUuidFromXslt = StringUtils.hasLength(newMdUuid); + + if (!newMdUuidFromXslt) { + applyBatchEdits(uuid, xml, schema, params.getBatchEdits(), context, log); + } else { + applyBatchEdits(newMdUuid, xml, schema, params.getBatchEdits(), context, log); + } + + // Translate metadata + if (params.isTranslateContent()) { + xml = translateMetadataContent(context, xml, schema); + } + + // + // insert metadata + // + ISODate createDate; + try { + createDate = new ISODate(metadataUtils.extractDateModified(schema, xml)); + } catch (Exception ex) { + log.error("Database harvester - addMetadata - can't get metadata modified date for metadata with uuid= " + + uuid + ", using current date for modified date"); + createDate = new ISODate(); + } + + AbstractMetadata metadata = new Metadata(); + metadata.setUuid(uuid); + metadata.getDataInfo(). + setSchemaId(schema). + setRoot(xml.getQualifiedName()). + setType(MetadataType.METADATA). + setCreateDate(createDate). + setChangeDate(createDate); + metadata.getSourceInfo(). + setSourceId(params.getUuid()). + setOwner(Integer.parseInt(params.getOwnerId())). + setGroupOwner(getGroupOwner()); + metadata.getHarvestInfo(). + setHarvested(true). + setUuid(params.getUuid()); + + addCategories(metadata, params.getCategories(), localCateg, context, null, false); + + metadata = metadataManager.insertMetadata(context, metadata, xml, IndexingMode.none, false, UpdateDatestamp.NO, false, false); + + String id = String.valueOf(metadata.getId()); + + addPrivileges(id, params.getPrivileges(), localGroups, context); + + metadataIndexer.indexMetadata(id, true, IndexingMode.full); + + return id; + } + + private String getMetadataSchema(Element metadataElement) { + try { + return metadataSchemaUtils.autodetectSchema(metadataElement, null); + } catch (NoSchemaMatchesException ex) { + return null; + } + } +} diff --git a/harvesters/src/main/java/org/fao/geonet/kernel/harvest/harvester/database/DatabaseHarvesterParams.java b/harvesters/src/main/java/org/fao/geonet/kernel/harvest/harvester/database/DatabaseHarvesterParams.java new file mode 100644 index 00000000000..393d854261d --- /dev/null +++ b/harvesters/src/main/java/org/fao/geonet/kernel/harvest/harvester/database/DatabaseHarvesterParams.java @@ -0,0 +1,233 @@ +//============================================================================= +//=== Copyright (C) 2001-2024 Food and Agriculture Organization of the +//=== United Nations (FAO-UN), United Nations World Food Programme (WFP) +//=== and United Nations Environment Programme (UNEP) +//=== +//=== This program is free software; you can redistribute it and/or modify +//=== it under the terms of the GNU General Public License as published by +//=== the Free Software Foundation; either version 2 of the License, or (at +//=== your option) any later version. +//=== +//=== This program is distributed in the hope that it will be useful, but +//=== WITHOUT ANY WARRANTY; without even the implied warranty of +//=== MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +//=== General Public License for more details. +//=== +//=== You should have received a copy of the GNU General Public License +//=== along with this program; if not, write to the Free Software +//=== Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA +//=== +//=== Contact: Jeroen Ticheler - FAO - Viale delle Terme di Caracalla 2, +//=== Rome - Italy. email: geonetwork@osgeo.org +//============================================================================== + +package org.fao.geonet.kernel.harvest.harvester.database; + +import org.fao.geonet.Util; +import org.fao.geonet.exceptions.BadInputEx; +import org.fao.geonet.kernel.DataManager; +import org.fao.geonet.kernel.harvest.harvester.AbstractParams; +import org.jdom.Element; + +class DatabaseHarvesterParams extends AbstractParams { + /** + * Type of database + */ + private String databaseType; + + /** + * Name of the database server. + */ + private String server; + + /** + * Port number to use for connecting to the database server + */ + private int port; + + /** + * Name of the database. + */ + private String database; + + /** + * Name of the table with the metadata. + */ + private String tableName; + + /** + * Field name that contains the metadata. + */ + private String metadataField; + + /** + * Field name to filter the table. + */ + private String filterField; + + /** + * Value to filter the results. + */ + private String filterValue; + + /** + * The filter is a process (see schema/process folder) which depends on the schema. It could be + * composed of parameter which will be sent to XSL transformation using the following syntax : + *
+     * anonymizer?protocol=MYLOCALNETWORK:FILEPATH&email=gis@organisation.org&thesaurus=MYORGONLYTHEASURUS
+     * 
+ */ + private String xslfilter; + + private String icon; + + public String getDatabaseType() { + return databaseType; + } + + public void setDatabaseType(String databaseType) { + this.databaseType = databaseType; + } + + public String getServer() { + return server; + } + + public void setServer(String server) { + this.server = server; + } + + public int getPort() { + return port; + } + + public void setPort(int port) { + this.port = port; + } + + public String getDatabase() { + return database; + } + + public void setDatabase(String database) { + this.database = database; + } + + public String getTableName() { + return tableName; + } + + public void setTableName(String tableName) { + this.tableName = tableName; + } + + public String getMetadataField() { + return metadataField; + } + + public void setMetadataField(String metadataField) { + this.metadataField = metadataField; + } + + public String getFilterField() { + return filterField; + } + + public void setFilterField(String filterField) { + this.filterField = filterField; + } + + public String getFilterValue() { + return filterValue; + } + + public void setFilterValue(String filterValue) { + this.filterValue = filterValue; + } + + public String getXslfilter() { + return xslfilter; + } + + public void setXslfilter(String xslfilter) { + this.xslfilter = xslfilter; + } + + public void setIcon(String icon) { + this.icon = icon; + } + + @Override + public String getIcon() { + return icon; + } + + + public DatabaseHarvesterParams(DataManager dm) { + super(dm); + } + + @Override + public void create(Element node) throws BadInputEx { + super.create(node); + + Element site = node.getChild("site"); + + databaseType = Util.getParam(site, "databaseType", ""); + server = Util.getParam(site, "server", ""); + port = Util.getParam(site, "port", 0); + database = Util.getParam(site, "database", ""); + tableName = Util.getParam(site, "tableName", ""); + metadataField = Util.getParam(site, "metadataField", ""); + xslfilter = Util.getParam(site, "xslfilter", ""); + + Element filter = node.getChild("filter"); + filterField = Util.getParam(filter, "field", ""); + filterValue = Util.getParam(filter, "value", ""); + + icon = Util.getParam(site, "icon", ""); + } + + @Override + public void update(Element node) throws BadInputEx { + super.update(node); + + Element site = node.getChild("site"); + + databaseType = Util.getParam(site, "databaseType", ""); + server = Util.getParam(site, "server", ""); + port = Util.getParam(site, "port", 0); + database = Util.getParam(site, "database", ""); + tableName = Util.getParam(site, "tableName", ""); + metadataField = Util.getParam(site, "metadataField", ""); + xslfilter = Util.getParam(site, "xslfilter", ""); + + Element filter = node.getChild("filter"); + filterField = Util.getParam(filter, "field", ""); + filterValue = Util.getParam(filter, "value", ""); + + icon = Util.getParam(site, "icon", icon); + + } + + @Override + public DatabaseHarvesterParams copy() { + DatabaseHarvesterParams copy = new DatabaseHarvesterParams(dm); + copyTo(copy); + + copy.databaseType = databaseType; + copy.server = server; + copy.port = port; + copy.database = database; + copy.tableName = tableName; + copy.metadataField = metadataField; + copy.filterField = filterField; + copy.filterValue = filterValue; + copy.xslfilter = xslfilter; + copy.icon = icon; + + copy.setValidate(getValidate()); + + return copy; + } +} diff --git a/harvesters/src/main/java/org/fao/geonet/kernel/harvest/harvester/database/DatabaseMetadataRetriever.java b/harvesters/src/main/java/org/fao/geonet/kernel/harvest/harvester/database/DatabaseMetadataRetriever.java new file mode 100644 index 00000000000..236bce24da6 --- /dev/null +++ b/harvesters/src/main/java/org/fao/geonet/kernel/harvest/harvester/database/DatabaseMetadataRetriever.java @@ -0,0 +1,136 @@ +//============================================================================= +//=== Copyright (C) 2001-2024 Food and Agriculture Organization of the +//=== United Nations (FAO-UN), United Nations World Food Programme (WFP) +//=== and United Nations Environment Programme (UNEP) +//=== +//=== This program is free software; you can redistribute it and/or modify +//=== it under the terms of the GNU General Public License as published by +//=== the Free Software Foundation; either version 2 of the License, or (at +//=== your option) any later version. +//=== +//=== This program is distributed in the hope that it will be useful, but +//=== WITHOUT ANY WARRANTY; without even the implied warranty of +//=== MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +//=== General Public License for more details. +//=== +//=== You should have received a copy of the GNU General Public License +//=== along with this program; if not, write to the Free Software +//=== Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA +//=== +//=== Contact: Jeroen Ticheler - FAO - Viale delle Terme di Caracalla 2, +//=== Rome - Italy. email: geonetwork@osgeo.org +//============================================================================== + +package org.fao.geonet.kernel.harvest.harvester.database; + +import org.apache.commons.dbcp2.BasicDataSource; +import org.fao.geonet.Logger; +import org.springframework.jdbc.core.namedparam.MapSqlParameterSource; +import org.springframework.jdbc.core.namedparam.NamedParameterJdbcTemplate; +import org.springframework.jdbc.core.namedparam.SqlParameterSource; +import org.springframework.util.StringUtils; + +import java.sql.*; +import java.util.concurrent.atomic.AtomicBoolean; + +class DatabaseMetadataRetriever { + private NamedParameterJdbcTemplate jdbcTemplate; + + protected Logger log; + + /** + * Constructor. + * + * @param connectionString An example of server string in case of Oracle + * is: "jdbc:oracle:thin:@84.123.79.19:1521:orcl". + * @param username the username to connect to the database. + * @param password the password to connect to the database. + */ + public DatabaseMetadataRetriever(String driverName, String connectionString, String username, String password, Logger log) { + + try { + log.debug("Getting database connection (via JDBC)"); + + BasicDataSource dataSource = new BasicDataSource(); + dataSource.setDriverClassName(driverName); + dataSource.setUrl(connectionString); + dataSource.setUsername(username); + dataSource.setPassword(password); + // Test the connection config getting a connection and closing it. + dataSource.getConnection().close(); + + jdbcTemplate = new NamedParameterJdbcTemplate(dataSource); + + this.log = log; + } catch (SQLException x) { + log.error("Error getting database connection", x); + + throw new ExceptionInInitializerError(new DatabaseMetadataRetrieverException("Exception in getting database connection: can not connect to the database", x)); + } + } + + protected NamedParameterJdbcTemplate getJdbcTemplate() { + return this.jdbcTemplate; + } + + + /** + * Retrieves and process each metadata with the harvester aligner. + * + * @param cancelMonitor + * @param params + * @param aligner + * @throws Exception + */ + public void processMetadata(AtomicBoolean cancelMonitor, DatabaseHarvesterParams params, DatabaseHarvesterAligner aligner) throws Exception { + String metadataTable = params.getTableName(); + String columnName = params.getMetadataField(); + String filterField = params.getFilterField(); + String filterValue = params.getFilterValue(); + + String sqlQuery; + SqlParameterSource param = new MapSqlParameterSource(); + + if (StringUtils.hasLength(filterField) && StringUtils.hasLength(filterValue)) { + sqlQuery = String.format("SELECT %s FROM %s WHERE %s LIKE :filter", columnName, metadataTable, filterField); + param = new MapSqlParameterSource("filter", filterValue); + } else { + sqlQuery = String.format("SELECT %s FROM %s", columnName, metadataTable); + } + + getJdbcTemplate().query(sqlQuery, param, rs -> { + // Cancel processing + if (cancelMonitor.get()) { + log.warning("Cancelling metadata retrieve using database connection"); + rs.getStatement().cancel(); + } + + String document; + int colId = rs.findColumn(columnName); + // very simple type check: + if (rs.getObject(colId) != null) { + if (rs.getMetaData().getColumnType(colId) == Types.BLOB) { + Blob blob = rs.getBlob(columnName); + byte[] bdata = blob.getBytes(1, (int) blob.length()); + document = new String(bdata); + + } else if (rs.getMetaData().getColumnType(colId) == Types.LONGVARBINARY) { + byte[] byteData = rs.getBytes(colId); + document = new String(byteData); + + } else if (rs.getMetaData().getColumnType(colId) == Types.LONGNVARCHAR || + rs.getMetaData().getColumnType(colId) == Types.LONGVARCHAR || + rs.getMetaData().getColumnType(colId) == Types.VARCHAR || + rs.getMetaData().getColumnType(colId) == Types.SQLXML) { + document = rs.getString(colId); + + } else { + throw new SQLException("Trying to harvest from a column with an invalid datatype: " + + rs.getMetaData().getColumnTypeName(colId)); + } + + aligner.align(document); + } + }); + } +} diff --git a/harvesters/src/main/java/org/fao/geonet/kernel/harvest/harvester/database/DatabaseMetadataRetrieverException.java b/harvesters/src/main/java/org/fao/geonet/kernel/harvest/harvester/database/DatabaseMetadataRetrieverException.java new file mode 100644 index 00000000000..464959b629a --- /dev/null +++ b/harvesters/src/main/java/org/fao/geonet/kernel/harvest/harvester/database/DatabaseMetadataRetrieverException.java @@ -0,0 +1,41 @@ +//============================================================================= +//=== Copyright (C) 2001-2024 Food and Agriculture Organization of the +//=== United Nations (FAO-UN), United Nations World Food Programme (WFP) +//=== and United Nations Environment Programme (UNEP) +//=== +//=== This program is free software; you can redistribute it and/or modify +//=== it under the terms of the GNU General Public License as published by +//=== the Free Software Foundation; either version 2 of the License, or (at +//=== your option) any later version. +//=== +//=== This program is distributed in the hope that it will be useful, but +//=== WITHOUT ANY WARRANTY; without even the implied warranty of +//=== MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +//=== General Public License for more details. +//=== +//=== You should have received a copy of the GNU General Public License +//=== along with this program; if not, write to the Free Software +//=== Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA +//=== +//=== Contact: Jeroen Ticheler - FAO - Viale delle Terme di Caracalla 2, +//=== Rome - Italy. email: geonetwork@osgeo.org +//============================================================================== + +package org.fao.geonet.kernel.harvest.harvester.database; + +class DatabaseMetadataRetrieverException extends Exception { + public DatabaseMetadataRetrieverException() { + } + + public DatabaseMetadataRetrieverException(String message) { + super(message); + } + + public DatabaseMetadataRetrieverException(String message, Throwable cause) { + super(message, cause); + } + + public DatabaseMetadataRetrieverException(Throwable cause) { + super(cause); + } +} diff --git a/harvesters/src/main/java/org/fao/geonet/kernel/harvest/harvester/database/DatabaseMetadataRetrieverFactory.java b/harvesters/src/main/java/org/fao/geonet/kernel/harvest/harvester/database/DatabaseMetadataRetrieverFactory.java new file mode 100644 index 00000000000..591633b93cf --- /dev/null +++ b/harvesters/src/main/java/org/fao/geonet/kernel/harvest/harvester/database/DatabaseMetadataRetrieverFactory.java @@ -0,0 +1,56 @@ +//============================================================================= +//=== Copyright (C) 2001-2024 Food and Agriculture Organization of the +//=== United Nations (FAO-UN), United Nations World Food Programme (WFP) +//=== and United Nations Environment Programme (UNEP) +//=== +//=== This program is free software; you can redistribute it and/or modify +//=== it under the terms of the GNU General Public License as published by +//=== the Free Software Foundation; either version 2 of the License, or (at +//=== your option) any later version. +//=== +//=== This program is distributed in the hope that it will be useful, but +//=== WITHOUT ANY WARRANTY; without even the implied warranty of +//=== MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +//=== General Public License for more details. +//=== +//=== You should have received a copy of the GNU General Public License +//=== along with this program; if not, write to the Free Software +//=== Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA +//=== +//=== Contact: Jeroen Ticheler - FAO - Viale delle Terme di Caracalla 2, +//=== Rome - Italy. email: geonetwork@osgeo.org +//============================================================================== + +package org.fao.geonet.kernel.harvest.harvester.database; + +import org.fao.geonet.Logger; + +class DatabaseMetadataRetrieverFactory { + + private DatabaseMetadataRetrieverFactory() { + // Prevent creating instances + } + + public static final DatabaseMetadataRetriever getDatabaseMetadataRetriever(String dbType, String server, int port, + String database, String username, String password, + Logger log) throws DatabaseMetadataRetrieverException { + + DatabaseMetadataRetriever metadataRetriever; + + if (dbType.equalsIgnoreCase("postgresql")) { + String connectionString = "jdbc:postgresql://" + server + ":" + port + "/" + database; + String driverName = "org.postgresql.Driver"; + + metadataRetriever = new DatabaseMetadataRetriever(driverName, connectionString, username, password, log); + } else if (dbType.equalsIgnoreCase("oracle")) { + String connectionString = String.format("jdbc:oracle:thin:@%s:%d:%s", server, port, database); + String driverName = "oracle.jdbc.driver.OracleDriver"; + + metadataRetriever = new DatabaseMetadataRetriever(driverName, connectionString, username, password, log); + } else { + throw new DatabaseMetadataRetrieverException("Connection for database type " + dbType + " not supported"); + } + + return metadataRetriever; + } +} diff --git a/harvesters/src/main/resources/config-spring-geonetwork.xml b/harvesters/src/main/resources/config-spring-geonetwork.xml index 5bda0379065..f941a18842f 100644 --- a/harvesters/src/main/resources/config-spring-geonetwork.xml +++ b/harvesters/src/main/resources/config-spring-geonetwork.xml @@ -61,16 +61,18 @@ scope="prototype"/> + + - - + diff --git a/harvesters/src/test/java/org/fao/geonet/kernel/harvest/harvester/database/DatabaseMetadataRetrieverFactoryTest.java b/harvesters/src/test/java/org/fao/geonet/kernel/harvest/harvester/database/DatabaseMetadataRetrieverFactoryTest.java new file mode 100644 index 00000000000..ff0b35fee3d --- /dev/null +++ b/harvesters/src/test/java/org/fao/geonet/kernel/harvest/harvester/database/DatabaseMetadataRetrieverFactoryTest.java @@ -0,0 +1,71 @@ +//============================================================================= +//=== Copyright (C) 2001-2024 Food and Agriculture Organization of the +//=== United Nations (FAO-UN), United Nations World Food Programme (WFP) +//=== and United Nations Environment Programme (UNEP) +//=== +//=== This program is free software; you can redistribute it and/or modify +//=== it under the terms of the GNU General Public License as published by +//=== the Free Software Foundation; either version 2 of the License, or (at +//=== your option) any later version. +//=== +//=== This program is distributed in the hope that it will be useful, but +//=== WITHOUT ANY WARRANTY; without even the implied warranty of +//=== MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +//=== General Public License for more details. +//=== +//=== You should have received a copy of the GNU General Public License +//=== along with this program; if not, write to the Free Software +//=== Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA +//=== +//=== Contact: Jeroen Ticheler - FAO - Viale delle Terme di Caracalla 2, +//=== Rome - Italy. email: geonetwork@osgeo.org +//============================================================================== + +package org.fao.geonet.kernel.harvest.harvester.database; + +import org.fao.geonet.Logger; +import org.fao.geonet.constants.Geonet; +import org.fao.geonet.utils.Log; +import org.junit.Test; + +import static org.junit.Assert.*; + +public class DatabaseMetadataRetrieverFactoryTest { + + @Test + public void testNonValidDatabaseType() { + Logger log = Log.createLogger(Geonet.HARVESTER); + + try { + DatabaseMetadataRetrieverFactory.getDatabaseMetadataRetriever("nonvalid", "localhost", 5432, "test", "username", "password", log); + fail(); + + } catch (DatabaseMetadataRetrieverException ex) { + assertEquals("Connection for database type nonvalid not supported", ex.getMessage()); + } + + } + + @Test + public void testValidDatabaseType() { + Logger log = Log.createLogger(Geonet.HARVESTER); + + try { + DatabaseMetadataRetrieverFactory.getDatabaseMetadataRetriever("postgresql", "localhost", 5432, + "test", "username", "password", log); + fail(); + } catch (DatabaseMetadataRetrieverException ex) { + fail(); + } catch (ExceptionInInitializerError ex) { + // The connection fails as no Postgres database available, + // but it should not fail due to an unsupported database type. + assertNotNull(ex.getCause()); + assertTrue(ex.getCause() instanceof DatabaseMetadataRetrieverException ); + + assertEquals("Exception in getting database connection: can not connect to the database", + ex.getCause().getMessage()); + } + + } + +} diff --git a/web-ui/src/main/resources/catalog/components/admin/harvester/partials/extras.html b/web-ui/src/main/resources/catalog/components/admin/harvester/partials/extras.html index 4a29f33cee8..93d91b3d961 100644 --- a/web-ui/src/main/resources/catalog/components/admin/harvester/partials/extras.html +++ b/web-ui/src/main/resources/catalog/components/admin/harvester/partials/extras.html @@ -5,7 +5,8 @@ || harvester['@type'] == 'geonetwork' || harvester['@type'] == 'csw' || harvester['@type'] == 'oaipmh' - || harvester['@type'] == 'filesystem'" + || harvester['@type'] == 'filesystem' + || harvester['@type'] == 'database'" >