diff --git a/docs/manual/docs/user-guide/harvesting/harvesting-database.md b/docs/manual/docs/user-guide/harvesting/harvesting-database.md new file mode 100644 index 00000000000..77048147941 --- /dev/null +++ b/docs/manual/docs/user-guide/harvesting/harvesting-database.md @@ -0,0 +1,43 @@ +# Database Harvesting {#database_harvester} + +This harvesting type uses a database connection to harvest metadata stored in a database table. + +## Adding a Database harvester + +To create a Database harvester go to `Admin console` > `Harvesting` and select `Harvest from` > `Database`: + +![](img/add-database-harvester.png) + +Providing the following information: + +- **Identification** + - *Node name and logo*: A unique name for the harvester and optionally a logo to assign to the harvester. + - *Group*: Group which owns the harvested records. Only the catalog administrator or users with the profile `UserAdmin` of this group can manage the harvester. + - *User*: User who owns the harvested records. + +- **Schedule**: Scheduling options to execute the harvester. If disabled, the harvester should be executed manually from the harvesters page. If enabled a schedule expression using cron syntax should be configured ([See examples](https://www.quartz-scheduler.org/documentation/quartz-2.1.7/tutorials/crontrigger)). + +- **Configure connection to Database** + - *Server*: The database server IP/Hostname. + - *Port*: The database port. For example, for Postgres usually 5432. + - *Database name*: The database name to connect. + - *Table name*: Table name with the metadata. The name must begin with a letter (a-z) or underscore (_). Subsequent characters in a name can be letters, digits (0-9), or underscores. + - *Metadata field name*: Table field name that contains the metadata. The name must begin with a letter (a-z) or underscore (_). Subsequent characters in a name can be letters, digits (0-9), or underscores. + - *Database type*: Database type. Currently supported Postgres and Oracle. + - *Remote authentication*: Credentials to connect to the database. + +- **Search filter**: allows to define a simple field condition to filter the results. + - *Filter field*: Table field name used to filter the results. The name must begin with a letter (a-z) or underscore (_). Subsequent characters in a name can be letters, digits (0-9), or underscores. + - *Filter value*: Value to filter the results. It can contain wildcards (%). + +- **Configure response processing for database** + - *Action on UUID collision*: When a harvester finds the same uuid on a record collected by another method (another harvester, importer, dashboard editor,...), should this record be skipped (default), overriden or generate a new UUID? + - *Validate records before import*: If checked, the metadata will be validated after retrieval. If the validation does not pass, the metadata will be skipped. + - *XSL filter name to apply*: (Optional) The XSL filter is applied to each metadata record. The filter is a process which depends on the metadata schema (see the `process` folder of the metadata schemas). + + It could be composed of parameter which will be sent to XSL transformation using the following syntax: `anonymizer?protocol=MYLOCALNETWORK:FILEPATH&email=gis@organisation.org&thesaurus=MYORGONLYTHEASURUS` + + - *Batch edits*: (Optional) Allows to update harvested records, using XPATH syntax. It can be used to add, replace or delete element. + - *Translate metadata content*: (Optional) Allows to translate metadata elements. It requires a translation service provider configured in the System settings. + +- **Privileges** - Assign privileges to harvested metadata. diff --git a/docs/manual/docs/user-guide/harvesting/img/add-database-harvester.png b/docs/manual/docs/user-guide/harvesting/img/add-database-harvester.png new file mode 100644 index 00000000000..f8999ee8223 Binary files /dev/null and b/docs/manual/docs/user-guide/harvesting/img/add-database-harvester.png differ diff --git a/docs/manual/docs/user-guide/harvesting/index.md b/docs/manual/docs/user-guide/harvesting/index.md index 46f52f782c5..4643441ddfe 100644 --- a/docs/manual/docs/user-guide/harvesting/index.md +++ b/docs/manual/docs/user-guide/harvesting/index.md @@ -17,7 +17,8 @@ The following sources can be harvested: - [GeoPortal REST Harvesting](harvesting-geoportal.md) - [THREDDS Harvesting](harvesting-thredds.md) - [WFS GetFeature Harvesting](harvesting-wfs-features.md) -- [Z3950 Harvesting](harvesting-z3950.md) +- [Z3950 Harvesting](harvesting-z3950.md +- [Database Harvesting](harvesting-database.md) ## Mechanism overview diff --git a/docs/manual/mkdocs.yml b/docs/manual/mkdocs.yml index 73af7ac42b5..401889ff356 100644 --- a/docs/manual/mkdocs.yml +++ b/docs/manual/mkdocs.yml @@ -303,6 +303,7 @@ nav: - user-guide/harvesting/harvesting-webdav.md - user-guide/harvesting/harvesting-wfs-features.md - user-guide/harvesting/harvesting-z3950.md + - user-guide/harvesting/harvesting-database.md - user-guide/export/index.md - 'Administration': - administrator-guide/index.md diff --git a/harvesters/src/main/java/org/fao/geonet/kernel/harvest/BaseAligner.java b/harvesters/src/main/java/org/fao/geonet/kernel/harvest/BaseAligner.java index 101f6fd78ab..c7ff631c252 100644 --- a/harvesters/src/main/java/org/fao/geonet/kernel/harvest/BaseAligner.java +++ b/harvesters/src/main/java/org/fao/geonet/kernel/harvest/BaseAligner.java @@ -28,6 +28,7 @@ import org.fao.geonet.domain.AbstractMetadata; import org.fao.geonet.domain.MetadataCategory; import org.fao.geonet.kernel.DataManager; +import org.fao.geonet.kernel.GeonetworkDataDirectory; import org.fao.geonet.kernel.SchemaManager; import org.fao.geonet.kernel.datamanager.IMetadataManager; import org.fao.geonet.kernel.harvest.harvester.AbstractHarvester; @@ -199,4 +200,38 @@ public Element translateMetadataContent(ServiceContext context, return md; } + + /** + * Filter the metadata if process parameter is set and corresponding XSL transformation + * exists in xsl/conversion/import. + * + * @param context + * @param md + * @param processName + * @param processParams + * @param log + * @return + */ + protected Element applyXSLTProcessToMetadata(ServiceContext context, + Element md, + String processName, + Map processParams, + org.fao.geonet.Logger log) { + Path filePath = context.getBean(GeonetworkDataDirectory.class).getXsltConversion(processName); + if (!Files.exists(filePath)) { + log.debug(" processing instruction " + processName + ". Metadata not filtered."); + } else { + Element processedMetadata; + try { + processedMetadata = Xml.transform(md, filePath, processParams); + log.debug(" metadata filtered."); + md = processedMetadata; + } catch (Exception e) { + log.warning(" processing error " + processName + ": " + e.getMessage()); + } + } + return md; + } + + } diff --git a/harvesters/src/main/java/org/fao/geonet/kernel/harvest/harvester/AbstractHarvester.java b/harvesters/src/main/java/org/fao/geonet/kernel/harvest/harvester/AbstractHarvester.java index 2398aa96c10..753192b62a0 100644 --- a/harvesters/src/main/java/org/fao/geonet/kernel/harvest/harvester/AbstractHarvester.java +++ b/harvesters/src/main/java/org/fao/geonet/kernel/harvest/harvester/AbstractHarvester.java @@ -45,7 +45,9 @@ import org.fao.geonet.exceptions.UnknownHostEx; import org.fao.geonet.kernel.DataManager; import org.fao.geonet.kernel.MetadataIndexerProcessor; +import org.fao.geonet.kernel.datamanager.IMetadataIndexer; import org.fao.geonet.kernel.datamanager.IMetadataManager; +import org.fao.geonet.kernel.datamanager.IMetadataSchemaUtils; import org.fao.geonet.kernel.datamanager.IMetadataUtils; import org.fao.geonet.kernel.harvest.Common.OperResult; import org.fao.geonet.kernel.harvest.Common.Status; @@ -128,6 +130,8 @@ public abstract class AbstractHarvester processParams) { - Path filePath = context.getBean(GeonetworkDataDirectory.class).getXsltConversion(processName); - if (!Files.exists(filePath)) { - log.debug(" processing instruction " + processName + ". Metadata not filtered."); - } else { - Element processedMetadata; - try { - processedMetadata = Xml.transform(md, filePath, processParams); - log.debug(" metadata filtered."); - md = processedMetadata; - } catch (Exception e) { - log.warning(" processing error " + processName + ": " + e.getMessage()); - } - } - return md; - } - /** * Retrieves the list of metadata uuids that have the same dataset identifier. * diff --git a/harvesters/src/main/java/org/fao/geonet/kernel/harvest/harvester/database/DatabaseHarvester.java b/harvesters/src/main/java/org/fao/geonet/kernel/harvest/harvester/database/DatabaseHarvester.java new file mode 100644 index 00000000000..51ad7dc1b18 --- /dev/null +++ b/harvesters/src/main/java/org/fao/geonet/kernel/harvest/harvester/database/DatabaseHarvester.java @@ -0,0 +1,73 @@ +//============================================================================= +//=== Copyright (C) 2001-2024 Food and Agriculture Organization of the +//=== United Nations (FAO-UN), United Nations World Food Programme (WFP) +//=== and United Nations Environment Programme (UNEP) +//=== +//=== This program is free software; you can redistribute it and/or modify +//=== it under the terms of the GNU General Public License as published by +//=== the Free Software Foundation; either version 2 of the License, or (at +//=== your option) any later version. +//=== +//=== This program is distributed in the hope that it will be useful, but +//=== WITHOUT ANY WARRANTY; without even the implied warranty of +//=== MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +//=== General Public License for more details. +//=== +//=== You should have received a copy of the GNU General Public License +//=== along with this program; if not, write to the Free Software +//=== Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA +//=== +//=== Contact: Jeroen Ticheler - FAO - Viale delle Terme di Caracalla 2, +//=== Rome - Italy. email: geonetwork@osgeo.org +//============================================================================== + +package org.fao.geonet.kernel.harvest.harvester.database; + +import org.fao.geonet.Logger; +import org.fao.geonet.kernel.harvest.harvester.AbstractHarvester; +import org.fao.geonet.kernel.harvest.harvester.HarvestResult; + +import java.sql.SQLException; + +public class DatabaseHarvester extends AbstractHarvester { + private static final String TABLE_NAME_PATTERN = "([_a-zA-Z]+[_a-zA-Z0-9]*)"; + private static final String FIELD_NAME_PATTERN = "([_a-zA-Z]+[_a-zA-Z0-9]*)"; + + @Override + protected DatabaseHarvesterParams createParams() { + return new DatabaseHarvesterParams(dataMan); + } + + @Override + protected void storeNodeExtra(DatabaseHarvesterParams params, String path, String siteId, String optionsId) throws SQLException { + // Remove non-valid characters + params.setTableName(params.getTableName().replaceAll("[^" + TABLE_NAME_PATTERN + "]", "")); + params.setMetadataField(params.getMetadataField().replaceAll("[^" + FIELD_NAME_PATTERN + "]", "")); + params.setFilterField(params.getFilterField().replaceAll("[^" + FIELD_NAME_PATTERN + "]", "")); + + setParams(params); + + harvesterSettingsManager.add("id:" + siteId, "icon", params.getIcon()); + harvesterSettingsManager.add("id:" + siteId, "server", params.getServer()); + harvesterSettingsManager.add("id:" + siteId, "port", params.getPort()); + harvesterSettingsManager.add("id:" + siteId, "username", params.getUsername()); + harvesterSettingsManager.add("id:" + siteId, "password", params.getPassword()); + harvesterSettingsManager.add("id:" + siteId, "database", params.getDatabase()); + harvesterSettingsManager.add("id:" + siteId, "databaseType", params.getDatabaseType()); + harvesterSettingsManager.add("id:" + siteId, "tableName", params.getTableName()); + harvesterSettingsManager.add("id:" + siteId, "metadataField", params.getMetadataField()); + harvesterSettingsManager.add("id:" + siteId, "xslfilter", params.getXslfilter()); + + String filtersID = harvesterSettingsManager.add(path, "filter", ""); + harvesterSettingsManager.add("id:" + filtersID, "field", params.getFilterField()); + harvesterSettingsManager.add("id:" + filtersID, "value", params.getFilterValue()); + } + + @Override + protected void doHarvest(Logger l) throws Exception { + log.info("Database harvester start"); + DatabaseHarvesterAligner h = new DatabaseHarvesterAligner(cancelMonitor, log, context, params); + result = h.harvest(log); + log.info("Database harvester end"); + } +} diff --git a/harvesters/src/main/java/org/fao/geonet/kernel/harvest/harvester/database/DatabaseHarvesterAligner.java b/harvesters/src/main/java/org/fao/geonet/kernel/harvest/harvester/database/DatabaseHarvesterAligner.java new file mode 100644 index 00000000000..d6fe5cba40b --- /dev/null +++ b/harvesters/src/main/java/org/fao/geonet/kernel/harvest/harvester/database/DatabaseHarvesterAligner.java @@ -0,0 +1,408 @@ +//============================================================================= +//=== Copyright (C) 2001-2024 Food and Agriculture Organization of the +//=== United Nations (FAO-UN), United Nations World Food Programme (WFP) +//=== and United Nations Environment Programme (UNEP) +//=== +//=== This program is free software; you can redistribute it and/or modify +//=== it under the terms of the GNU General Public License as published by +//=== the Free Software Foundation; either version 2 of the License, or (at +//=== your option) any later version. +//=== +//=== This program is distributed in the hope that it will be useful, but +//=== WITHOUT ANY WARRANTY; without even the implied warranty of +//=== MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +//=== General Public License for more details. +//=== +//=== You should have received a copy of the GNU General Public License +//=== along with this program; if not, write to the Free Software +//=== Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA +//=== +//=== Contact: Jeroen Ticheler - FAO - Viale delle Terme di Caracalla 2, +//=== Rome - Italy. email: geonetwork@osgeo.org +//============================================================================== + +package org.fao.geonet.kernel.harvest.harvester.database; + +import com.google.common.collect.Sets; +import jeeves.server.context.ServiceContext; +import org.fao.geonet.GeonetContext; +import org.fao.geonet.Logger; +import org.fao.geonet.constants.Geonet; +import org.fao.geonet.domain.*; +import org.fao.geonet.exceptions.NoSchemaMatchesException; +import org.fao.geonet.kernel.DataManager; +import org.fao.geonet.kernel.UpdateDatestamp; +import org.fao.geonet.kernel.datamanager.IMetadataIndexer; +import org.fao.geonet.kernel.datamanager.IMetadataManager; +import org.fao.geonet.kernel.datamanager.IMetadataSchemaUtils; +import org.fao.geonet.kernel.datamanager.IMetadataUtils; +import org.fao.geonet.kernel.harvest.BaseAligner; +import org.fao.geonet.kernel.harvest.harvester.*; +import org.fao.geonet.kernel.search.IndexingMode; +import org.fao.geonet.repository.MetadataRepository; +import org.fao.geonet.repository.OperationAllowedRepository; +import org.fao.geonet.repository.specification.MetadataSpecs; +import org.fao.geonet.utils.Xml; +import org.jdom.Element; +import org.springframework.util.StringUtils; + +import java.util.*; +import java.util.concurrent.atomic.AtomicBoolean; + +import static org.fao.geonet.kernel.harvest.harvester.csw.Aligner.applyBatchEdits; + +class DatabaseHarvesterAligner extends BaseAligner implements IHarvester { + private Logger log; + private ServiceContext context; + private DataManager dataMan; + private IMetadataManager metadataManager; + private IMetadataUtils metadataUtils; + private IMetadataIndexer metadataIndexer; + private IMetadataSchemaUtils metadataSchemaUtils; + private HarvestResult result; + private CategoryMapper localCateg; + private GroupMapper localGroups; + private UUIDMapper localUuids; + private List errors = new LinkedList<>(); + private List idsForHarvestingResult; + private String processName; + private Map processParams = new HashMap<>(); + + @Override + public List getErrors() { + return errors; + } + + public DatabaseHarvesterAligner(AtomicBoolean cancelMonitor, Logger log, ServiceContext context, DatabaseHarvesterParams params) { + super(cancelMonitor); + this.log = log; + this.context = context; + this.params = params; + + result = new HarvestResult(); + result.addedMetadata = 0; + result.uuidSkipped = 0; + result.datasetUuidExist = 0; + result.couldNotInsert = 0; + + GeonetContext gc = (GeonetContext) context.getHandlerContext(Geonet.CONTEXT_NAME); + dataMan = gc.getBean(DataManager.class); + metadataManager = gc.getBean(IMetadataManager.class); + metadataSchemaUtils = gc.getBean(IMetadataSchemaUtils.class); + metadataUtils = gc.getBean(IMetadataUtils.class); + metadataIndexer = gc.getBean(IMetadataIndexer.class); + } + + @Override + public HarvestResult harvest(Logger log) throws Exception { + this.log = log; + if (log.isDebugEnabled()) { + log.debug("Retrieving remote metadata information for : " + params.getName()); + } + + DatabaseMetadataRetriever metadataRetriever = DatabaseMetadataRetrieverFactory.getDatabaseMetadataRetriever(params.getDatabaseType(), params.getServer(), params.getPort(), + params.getDatabase(), params.getUsername(), params.getPassword(), log); + + log.info("Start of alignment for : " + params.getName()); + result = new HarvestResult(); + //---------------------------------------------------------------- + //--- retrieve all local categories and groups + //--- retrieve harvested uuids for given harvesting node + localCateg = new CategoryMapper(context); + localGroups = new GroupMapper(context); + localUuids = new UUIDMapper(context.getBean(IMetadataUtils.class), params.getUuid()); + + Pair> filter = HarvesterUtil.parseXSLFilter(params.getXslfilter()); + processName = filter.one(); + processParams = filter.two(); + + + metadataManager.flush(); + + idsForHarvestingResult = new ArrayList<>(); + + metadataRetriever.processMetadata(cancelMonitor, params, this); + + // + // delete locally existing metadata from the same source if they were + // not in this harvesting result + // + deleteLocalMetadataNotInDatabase(idsForHarvestingResult); + + return result; + } + + public void align(String metadata) { + if (cancelMonitor.get()) { + return; + } + + try { + result.totalMetadata++; + + if (!StringUtils.hasLength(metadata)) { + log.info("Processing empty metadata xml. Skipping"); + return; + } + + // create JDOM element from String-XML + Element metadataElement = Xml.loadString(metadata, false); + String id = processMetadata(metadataElement); + + if (StringUtils.hasLength(id)) { + idsForHarvestingResult.add(Integer.valueOf(id)); + } + + } catch (Exception ex) { + log.error("Unable to process record from database (" + this.params.getName() + ")"); + log.error(" Record failed. Error is: " + ex.getMessage()); + log.error(ex); + errors.add(new HarvestError(this.context, ex)); + } finally { + result.originalMetadata++; + } + } + + private void deleteLocalMetadataNotInDatabase(List idsForHarvestingResult) throws Exception { + Set idsResultHs = Sets.newHashSet(idsForHarvestingResult); + List existingMetadata = context.getBean(MetadataRepository.class).findIdsBy(MetadataSpecs.hasHarvesterUuid(params.getUuid())); + for (Integer existingId : existingMetadata) { + if (cancelMonitor.get()) { + return; + } + + if (!idsResultHs.contains(existingId)) { + log.debug(" Removing: " + existingId); + metadataManager.deleteMetadata(context, existingId.toString()); + result.locallyRemoved++; + } + } + } + + + /** + * Process a metadata to add it to the catalog and returns the identifier. + * + * @param metadataElement + * @return + * @throws Exception + */ + private String processMetadata(Element metadataElement) throws Exception { + + String id = ""; + + String schema = getMetadataSchema(metadataElement); + + if (schema == null) { + log.info("Skipping metadata with unknown schema."); + result.unknownSchema++; + return id; + } + + String uuid = metadataUtils.extractUUID(schema, metadataElement); + + if (!StringUtils.hasLength(uuid)) { + log.info("No metadata uuid. Skipping."); + result.badFormat++; + return id; + } + + log.info(String.format("Processing metadata with UUID: %s", uuid)); + + try { + Integer groupIdVal = null; + if (StringUtils.hasLength(params.getOwnerIdGroup())) { + groupIdVal = Integer.parseInt(params.getOwnerIdGroup()); + } + + params.getValidate().validate(dataMan, context, metadataElement, groupIdVal); + } catch (Exception e) { + log.error("Ignoring invalid metadata with uuid " + uuid); + result.doesNotValidate++; + return id; + } + + setParams(params); + + // + // add / update the metadata from this harvesting result + // + id = metadataUtils.getMetadataId(uuid); + if (id == null) { + //Record is new + id = addMetadata(metadataElement, uuid, schema); + result.addedMetadata++; + } else if (localUuids.getID(uuid) == null) { + //Record does not belong to this harvester + result.datasetUuidExist++; + + switch (params.getOverrideUuid()) { + case OVERRIDE: + updateMetadata(metadataElement, Integer.toString(metadataUtils.findOneByUuid(uuid).getId()), true); + log.debug(String.format("Overriding record with uuid %s", uuid)); + result.updatedMetadata++; + break; + case RANDOM: + log.debug(String.format("Generating random uuid for remote record with uuid %s", uuid)); + addMetadata(metadataElement, UUID.randomUUID().toString(), schema); + break; + case SKIP: + log.debug(String.format("Skipping record with uuid %s", uuid)); + result.uuidSkipped++; + break; + default: + break; + } + } else { + //record exists and belongs to this harvester + updateMetadata(metadataElement, id, false); + result.updatedMetadata++; + } + + return id; + } + + private void updateMetadata(Element xml, String id, boolean force) throws Exception { + log.info("Updating metadata with id: " + id); + + // + // update metadata + // + boolean validate = false; + boolean ufo = false; + String language = context.getLanguage(); + + String schema = metadataSchemaUtils.autodetectSchema(xml); + String uuid = metadataUtils.extractUUID(schema, xml); + + String changeDate; + try { + changeDate = metadataUtils.extractDateModified(schema, xml); + } catch (Exception ex) { + log.error("Database harvester - updateMetadata - can't get metadata modified date for metadata id= " + id + + ", using current date for modified date"); + changeDate = new ISODate().toString(); + } + + boolean updateSchema = false; + if (StringUtils.hasLength(params.getXslfilter())) { + xml = applyXSLTProcessToMetadata(context, xml, processName, processParams, log); + String newSchema = metadataSchemaUtils.autodetectSchema(xml); + updateSchema = (newSchema != null) && !newSchema.equals(schema); + schema = newSchema; + } + + applyBatchEdits(uuid, xml, schema, params.getBatchEdits(), context, log); + + // Translate metadata + if (params.isTranslateContent()) { + xml = translateMetadataContent(context, xml, schema); + } + + final AbstractMetadata metadata = metadataManager.updateMetadata(context, id, xml, validate, ufo, language, changeDate, + true, IndexingMode.none); + + if (force || updateSchema) { + if (force) { + //change ownership of metadata to new harvester + metadata.getHarvestInfo().setUuid(params.getUuid()); + metadata.getSourceInfo().setSourceId(params.getUuid()); + + } + + if (updateSchema) { + metadata.getDataInfo().setSchemaId(schema); + } + + metadataManager.save(metadata); + } + + OperationAllowedRepository operationAllowedRepository = context.getBean(OperationAllowedRepository.class); + operationAllowedRepository.deleteAllByMetadataId(Integer.parseInt(id)); + addPrivileges(id, params.getPrivileges(), localGroups, context); + + metadata.getCategories().clear(); + addCategories(metadata, params.getCategories(), localCateg, context, null, true); + + metadataManager.flush(); + metadataIndexer.indexMetadata(id, true, IndexingMode.full); + } + + /** + * Inserts a metadata into the database. Lucene index is updated after insertion. + */ + private String addMetadata(Element xml, String uuid, String schema) throws Exception { + log.info(" - Adding metadata with remote uuid: " + uuid); + + // If the xslfilter process changes the metadata uuid, + // use that uuid (newMdUuid) for the new metadata to add to the catalogue. + String newMdUuid = null; + if (StringUtils.hasLength(params.getXslfilter())) { + xml = applyXSLTProcessToMetadata(context, xml, processName, processParams, log); + schema = metadataSchemaUtils.autodetectSchema(xml); + // Get new uuid if modified by XSLT process + newMdUuid = metadataUtils.extractUUID(schema, xml); + } + + boolean newMdUuidFromXslt = StringUtils.hasLength(newMdUuid); + + if (!newMdUuidFromXslt) { + applyBatchEdits(uuid, xml, schema, params.getBatchEdits(), context, log); + } else { + applyBatchEdits(newMdUuid, xml, schema, params.getBatchEdits(), context, log); + } + + // Translate metadata + if (params.isTranslateContent()) { + xml = translateMetadataContent(context, xml, schema); + } + + // + // insert metadata + // + ISODate createDate; + try { + createDate = new ISODate(metadataUtils.extractDateModified(schema, xml)); + } catch (Exception ex) { + log.error("Database harvester - addMetadata - can't get metadata modified date for metadata with uuid= " + + uuid + ", using current date for modified date"); + createDate = new ISODate(); + } + + AbstractMetadata metadata = new Metadata(); + metadata.setUuid(uuid); + metadata.getDataInfo(). + setSchemaId(schema). + setRoot(xml.getQualifiedName()). + setType(MetadataType.METADATA). + setCreateDate(createDate). + setChangeDate(createDate); + metadata.getSourceInfo(). + setSourceId(params.getUuid()). + setOwner(Integer.parseInt(params.getOwnerId())). + setGroupOwner(getGroupOwner()); + metadata.getHarvestInfo(). + setHarvested(true). + setUuid(params.getUuid()); + + addCategories(metadata, params.getCategories(), localCateg, context, null, false); + + metadata = metadataManager.insertMetadata(context, metadata, xml, IndexingMode.none, false, UpdateDatestamp.NO, false, false); + + String id = String.valueOf(metadata.getId()); + + addPrivileges(id, params.getPrivileges(), localGroups, context); + + metadataIndexer.indexMetadata(id, true, IndexingMode.full); + + return id; + } + + private String getMetadataSchema(Element metadataElement) { + try { + return metadataSchemaUtils.autodetectSchema(metadataElement, null); + } catch (NoSchemaMatchesException ex) { + return null; + } + } +} diff --git a/harvesters/src/main/java/org/fao/geonet/kernel/harvest/harvester/database/DatabaseHarvesterParams.java b/harvesters/src/main/java/org/fao/geonet/kernel/harvest/harvester/database/DatabaseHarvesterParams.java new file mode 100644 index 00000000000..393d854261d --- /dev/null +++ b/harvesters/src/main/java/org/fao/geonet/kernel/harvest/harvester/database/DatabaseHarvesterParams.java @@ -0,0 +1,233 @@ +//============================================================================= +//=== Copyright (C) 2001-2024 Food and Agriculture Organization of the +//=== United Nations (FAO-UN), United Nations World Food Programme (WFP) +//=== and United Nations Environment Programme (UNEP) +//=== +//=== This program is free software; you can redistribute it and/or modify +//=== it under the terms of the GNU General Public License as published by +//=== the Free Software Foundation; either version 2 of the License, or (at +//=== your option) any later version. +//=== +//=== This program is distributed in the hope that it will be useful, but +//=== WITHOUT ANY WARRANTY; without even the implied warranty of +//=== MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +//=== General Public License for more details. +//=== +//=== You should have received a copy of the GNU General Public License +//=== along with this program; if not, write to the Free Software +//=== Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA +//=== +//=== Contact: Jeroen Ticheler - FAO - Viale delle Terme di Caracalla 2, +//=== Rome - Italy. email: geonetwork@osgeo.org +//============================================================================== + +package org.fao.geonet.kernel.harvest.harvester.database; + +import org.fao.geonet.Util; +import org.fao.geonet.exceptions.BadInputEx; +import org.fao.geonet.kernel.DataManager; +import org.fao.geonet.kernel.harvest.harvester.AbstractParams; +import org.jdom.Element; + +class DatabaseHarvesterParams extends AbstractParams { + /** + * Type of database + */ + private String databaseType; + + /** + * Name of the database server. + */ + private String server; + + /** + * Port number to use for connecting to the database server + */ + private int port; + + /** + * Name of the database. + */ + private String database; + + /** + * Name of the table with the metadata. + */ + private String tableName; + + /** + * Field name that contains the metadata. + */ + private String metadataField; + + /** + * Field name to filter the table. + */ + private String filterField; + + /** + * Value to filter the results. + */ + private String filterValue; + + /** + * The filter is a process (see schema/process folder) which depends on the schema. It could be + * composed of parameter which will be sent to XSL transformation using the following syntax : + *
+     * anonymizer?protocol=MYLOCALNETWORK:FILEPATH&email=gis@organisation.org&thesaurus=MYORGONLYTHEASURUS
+     * 
+ */ + private String xslfilter; + + private String icon; + + public String getDatabaseType() { + return databaseType; + } + + public void setDatabaseType(String databaseType) { + this.databaseType = databaseType; + } + + public String getServer() { + return server; + } + + public void setServer(String server) { + this.server = server; + } + + public int getPort() { + return port; + } + + public void setPort(int port) { + this.port = port; + } + + public String getDatabase() { + return database; + } + + public void setDatabase(String database) { + this.database = database; + } + + public String getTableName() { + return tableName; + } + + public void setTableName(String tableName) { + this.tableName = tableName; + } + + public String getMetadataField() { + return metadataField; + } + + public void setMetadataField(String metadataField) { + this.metadataField = metadataField; + } + + public String getFilterField() { + return filterField; + } + + public void setFilterField(String filterField) { + this.filterField = filterField; + } + + public String getFilterValue() { + return filterValue; + } + + public void setFilterValue(String filterValue) { + this.filterValue = filterValue; + } + + public String getXslfilter() { + return xslfilter; + } + + public void setXslfilter(String xslfilter) { + this.xslfilter = xslfilter; + } + + public void setIcon(String icon) { + this.icon = icon; + } + + @Override + public String getIcon() { + return icon; + } + + + public DatabaseHarvesterParams(DataManager dm) { + super(dm); + } + + @Override + public void create(Element node) throws BadInputEx { + super.create(node); + + Element site = node.getChild("site"); + + databaseType = Util.getParam(site, "databaseType", ""); + server = Util.getParam(site, "server", ""); + port = Util.getParam(site, "port", 0); + database = Util.getParam(site, "database", ""); + tableName = Util.getParam(site, "tableName", ""); + metadataField = Util.getParam(site, "metadataField", ""); + xslfilter = Util.getParam(site, "xslfilter", ""); + + Element filter = node.getChild("filter"); + filterField = Util.getParam(filter, "field", ""); + filterValue = Util.getParam(filter, "value", ""); + + icon = Util.getParam(site, "icon", ""); + } + + @Override + public void update(Element node) throws BadInputEx { + super.update(node); + + Element site = node.getChild("site"); + + databaseType = Util.getParam(site, "databaseType", ""); + server = Util.getParam(site, "server", ""); + port = Util.getParam(site, "port", 0); + database = Util.getParam(site, "database", ""); + tableName = Util.getParam(site, "tableName", ""); + metadataField = Util.getParam(site, "metadataField", ""); + xslfilter = Util.getParam(site, "xslfilter", ""); + + Element filter = node.getChild("filter"); + filterField = Util.getParam(filter, "field", ""); + filterValue = Util.getParam(filter, "value", ""); + + icon = Util.getParam(site, "icon", icon); + + } + + @Override + public DatabaseHarvesterParams copy() { + DatabaseHarvesterParams copy = new DatabaseHarvesterParams(dm); + copyTo(copy); + + copy.databaseType = databaseType; + copy.server = server; + copy.port = port; + copy.database = database; + copy.tableName = tableName; + copy.metadataField = metadataField; + copy.filterField = filterField; + copy.filterValue = filterValue; + copy.xslfilter = xslfilter; + copy.icon = icon; + + copy.setValidate(getValidate()); + + return copy; + } +} diff --git a/harvesters/src/main/java/org/fao/geonet/kernel/harvest/harvester/database/DatabaseMetadataRetriever.java b/harvesters/src/main/java/org/fao/geonet/kernel/harvest/harvester/database/DatabaseMetadataRetriever.java new file mode 100644 index 00000000000..236bce24da6 --- /dev/null +++ b/harvesters/src/main/java/org/fao/geonet/kernel/harvest/harvester/database/DatabaseMetadataRetriever.java @@ -0,0 +1,136 @@ +//============================================================================= +//=== Copyright (C) 2001-2024 Food and Agriculture Organization of the +//=== United Nations (FAO-UN), United Nations World Food Programme (WFP) +//=== and United Nations Environment Programme (UNEP) +//=== +//=== This program is free software; you can redistribute it and/or modify +//=== it under the terms of the GNU General Public License as published by +//=== the Free Software Foundation; either version 2 of the License, or (at +//=== your option) any later version. +//=== +//=== This program is distributed in the hope that it will be useful, but +//=== WITHOUT ANY WARRANTY; without even the implied warranty of +//=== MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +//=== General Public License for more details. +//=== +//=== You should have received a copy of the GNU General Public License +//=== along with this program; if not, write to the Free Software +//=== Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA +//=== +//=== Contact: Jeroen Ticheler - FAO - Viale delle Terme di Caracalla 2, +//=== Rome - Italy. email: geonetwork@osgeo.org +//============================================================================== + +package org.fao.geonet.kernel.harvest.harvester.database; + +import org.apache.commons.dbcp2.BasicDataSource; +import org.fao.geonet.Logger; +import org.springframework.jdbc.core.namedparam.MapSqlParameterSource; +import org.springframework.jdbc.core.namedparam.NamedParameterJdbcTemplate; +import org.springframework.jdbc.core.namedparam.SqlParameterSource; +import org.springframework.util.StringUtils; + +import java.sql.*; +import java.util.concurrent.atomic.AtomicBoolean; + +class DatabaseMetadataRetriever { + private NamedParameterJdbcTemplate jdbcTemplate; + + protected Logger log; + + /** + * Constructor. + * + * @param connectionString An example of server string in case of Oracle + * is: "jdbc:oracle:thin:@84.123.79.19:1521:orcl". + * @param username the username to connect to the database. + * @param password the password to connect to the database. + */ + public DatabaseMetadataRetriever(String driverName, String connectionString, String username, String password, Logger log) { + + try { + log.debug("Getting database connection (via JDBC)"); + + BasicDataSource dataSource = new BasicDataSource(); + dataSource.setDriverClassName(driverName); + dataSource.setUrl(connectionString); + dataSource.setUsername(username); + dataSource.setPassword(password); + // Test the connection config getting a connection and closing it. + dataSource.getConnection().close(); + + jdbcTemplate = new NamedParameterJdbcTemplate(dataSource); + + this.log = log; + } catch (SQLException x) { + log.error("Error getting database connection", x); + + throw new ExceptionInInitializerError(new DatabaseMetadataRetrieverException("Exception in getting database connection: can not connect to the database", x)); + } + } + + protected NamedParameterJdbcTemplate getJdbcTemplate() { + return this.jdbcTemplate; + } + + + /** + * Retrieves and process each metadata with the harvester aligner. + * + * @param cancelMonitor + * @param params + * @param aligner + * @throws Exception + */ + public void processMetadata(AtomicBoolean cancelMonitor, DatabaseHarvesterParams params, DatabaseHarvesterAligner aligner) throws Exception { + String metadataTable = params.getTableName(); + String columnName = params.getMetadataField(); + String filterField = params.getFilterField(); + String filterValue = params.getFilterValue(); + + String sqlQuery; + SqlParameterSource param = new MapSqlParameterSource(); + + if (StringUtils.hasLength(filterField) && StringUtils.hasLength(filterValue)) { + sqlQuery = String.format("SELECT %s FROM %s WHERE %s LIKE :filter", columnName, metadataTable, filterField); + param = new MapSqlParameterSource("filter", filterValue); + } else { + sqlQuery = String.format("SELECT %s FROM %s", columnName, metadataTable); + } + + getJdbcTemplate().query(sqlQuery, param, rs -> { + // Cancel processing + if (cancelMonitor.get()) { + log.warning("Cancelling metadata retrieve using database connection"); + rs.getStatement().cancel(); + } + + String document; + int colId = rs.findColumn(columnName); + // very simple type check: + if (rs.getObject(colId) != null) { + if (rs.getMetaData().getColumnType(colId) == Types.BLOB) { + Blob blob = rs.getBlob(columnName); + byte[] bdata = blob.getBytes(1, (int) blob.length()); + document = new String(bdata); + + } else if (rs.getMetaData().getColumnType(colId) == Types.LONGVARBINARY) { + byte[] byteData = rs.getBytes(colId); + document = new String(byteData); + + } else if (rs.getMetaData().getColumnType(colId) == Types.LONGNVARCHAR || + rs.getMetaData().getColumnType(colId) == Types.LONGVARCHAR || + rs.getMetaData().getColumnType(colId) == Types.VARCHAR || + rs.getMetaData().getColumnType(colId) == Types.SQLXML) { + document = rs.getString(colId); + + } else { + throw new SQLException("Trying to harvest from a column with an invalid datatype: " + + rs.getMetaData().getColumnTypeName(colId)); + } + + aligner.align(document); + } + }); + } +} diff --git a/harvesters/src/main/java/org/fao/geonet/kernel/harvest/harvester/database/DatabaseMetadataRetrieverException.java b/harvesters/src/main/java/org/fao/geonet/kernel/harvest/harvester/database/DatabaseMetadataRetrieverException.java new file mode 100644 index 00000000000..464959b629a --- /dev/null +++ b/harvesters/src/main/java/org/fao/geonet/kernel/harvest/harvester/database/DatabaseMetadataRetrieverException.java @@ -0,0 +1,41 @@ +//============================================================================= +//=== Copyright (C) 2001-2024 Food and Agriculture Organization of the +//=== United Nations (FAO-UN), United Nations World Food Programme (WFP) +//=== and United Nations Environment Programme (UNEP) +//=== +//=== This program is free software; you can redistribute it and/or modify +//=== it under the terms of the GNU General Public License as published by +//=== the Free Software Foundation; either version 2 of the License, or (at +//=== your option) any later version. +//=== +//=== This program is distributed in the hope that it will be useful, but +//=== WITHOUT ANY WARRANTY; without even the implied warranty of +//=== MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +//=== General Public License for more details. +//=== +//=== You should have received a copy of the GNU General Public License +//=== along with this program; if not, write to the Free Software +//=== Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA +//=== +//=== Contact: Jeroen Ticheler - FAO - Viale delle Terme di Caracalla 2, +//=== Rome - Italy. email: geonetwork@osgeo.org +//============================================================================== + +package org.fao.geonet.kernel.harvest.harvester.database; + +class DatabaseMetadataRetrieverException extends Exception { + public DatabaseMetadataRetrieverException() { + } + + public DatabaseMetadataRetrieverException(String message) { + super(message); + } + + public DatabaseMetadataRetrieverException(String message, Throwable cause) { + super(message, cause); + } + + public DatabaseMetadataRetrieverException(Throwable cause) { + super(cause); + } +} diff --git a/harvesters/src/main/java/org/fao/geonet/kernel/harvest/harvester/database/DatabaseMetadataRetrieverFactory.java b/harvesters/src/main/java/org/fao/geonet/kernel/harvest/harvester/database/DatabaseMetadataRetrieverFactory.java new file mode 100644 index 00000000000..591633b93cf --- /dev/null +++ b/harvesters/src/main/java/org/fao/geonet/kernel/harvest/harvester/database/DatabaseMetadataRetrieverFactory.java @@ -0,0 +1,56 @@ +//============================================================================= +//=== Copyright (C) 2001-2024 Food and Agriculture Organization of the +//=== United Nations (FAO-UN), United Nations World Food Programme (WFP) +//=== and United Nations Environment Programme (UNEP) +//=== +//=== This program is free software; you can redistribute it and/or modify +//=== it under the terms of the GNU General Public License as published by +//=== the Free Software Foundation; either version 2 of the License, or (at +//=== your option) any later version. +//=== +//=== This program is distributed in the hope that it will be useful, but +//=== WITHOUT ANY WARRANTY; without even the implied warranty of +//=== MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +//=== General Public License for more details. +//=== +//=== You should have received a copy of the GNU General Public License +//=== along with this program; if not, write to the Free Software +//=== Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA +//=== +//=== Contact: Jeroen Ticheler - FAO - Viale delle Terme di Caracalla 2, +//=== Rome - Italy. email: geonetwork@osgeo.org +//============================================================================== + +package org.fao.geonet.kernel.harvest.harvester.database; + +import org.fao.geonet.Logger; + +class DatabaseMetadataRetrieverFactory { + + private DatabaseMetadataRetrieverFactory() { + // Prevent creating instances + } + + public static final DatabaseMetadataRetriever getDatabaseMetadataRetriever(String dbType, String server, int port, + String database, String username, String password, + Logger log) throws DatabaseMetadataRetrieverException { + + DatabaseMetadataRetriever metadataRetriever; + + if (dbType.equalsIgnoreCase("postgresql")) { + String connectionString = "jdbc:postgresql://" + server + ":" + port + "/" + database; + String driverName = "org.postgresql.Driver"; + + metadataRetriever = new DatabaseMetadataRetriever(driverName, connectionString, username, password, log); + } else if (dbType.equalsIgnoreCase("oracle")) { + String connectionString = String.format("jdbc:oracle:thin:@%s:%d:%s", server, port, database); + String driverName = "oracle.jdbc.driver.OracleDriver"; + + metadataRetriever = new DatabaseMetadataRetriever(driverName, connectionString, username, password, log); + } else { + throw new DatabaseMetadataRetrieverException("Connection for database type " + dbType + " not supported"); + } + + return metadataRetriever; + } +} diff --git a/harvesters/src/main/resources/config-spring-geonetwork.xml b/harvesters/src/main/resources/config-spring-geonetwork.xml index 5bda0379065..f941a18842f 100644 --- a/harvesters/src/main/resources/config-spring-geonetwork.xml +++ b/harvesters/src/main/resources/config-spring-geonetwork.xml @@ -61,16 +61,18 @@ scope="prototype"/> + + - - + diff --git a/harvesters/src/test/java/org/fao/geonet/kernel/harvest/harvester/database/DatabaseMetadataRetrieverFactoryTest.java b/harvesters/src/test/java/org/fao/geonet/kernel/harvest/harvester/database/DatabaseMetadataRetrieverFactoryTest.java new file mode 100644 index 00000000000..ff0b35fee3d --- /dev/null +++ b/harvesters/src/test/java/org/fao/geonet/kernel/harvest/harvester/database/DatabaseMetadataRetrieverFactoryTest.java @@ -0,0 +1,71 @@ +//============================================================================= +//=== Copyright (C) 2001-2024 Food and Agriculture Organization of the +//=== United Nations (FAO-UN), United Nations World Food Programme (WFP) +//=== and United Nations Environment Programme (UNEP) +//=== +//=== This program is free software; you can redistribute it and/or modify +//=== it under the terms of the GNU General Public License as published by +//=== the Free Software Foundation; either version 2 of the License, or (at +//=== your option) any later version. +//=== +//=== This program is distributed in the hope that it will be useful, but +//=== WITHOUT ANY WARRANTY; without even the implied warranty of +//=== MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +//=== General Public License for more details. +//=== +//=== You should have received a copy of the GNU General Public License +//=== along with this program; if not, write to the Free Software +//=== Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA +//=== +//=== Contact: Jeroen Ticheler - FAO - Viale delle Terme di Caracalla 2, +//=== Rome - Italy. email: geonetwork@osgeo.org +//============================================================================== + +package org.fao.geonet.kernel.harvest.harvester.database; + +import org.fao.geonet.Logger; +import org.fao.geonet.constants.Geonet; +import org.fao.geonet.utils.Log; +import org.junit.Test; + +import static org.junit.Assert.*; + +public class DatabaseMetadataRetrieverFactoryTest { + + @Test + public void testNonValidDatabaseType() { + Logger log = Log.createLogger(Geonet.HARVESTER); + + try { + DatabaseMetadataRetrieverFactory.getDatabaseMetadataRetriever("nonvalid", "localhost", 5432, "test", "username", "password", log); + fail(); + + } catch (DatabaseMetadataRetrieverException ex) { + assertEquals("Connection for database type nonvalid not supported", ex.getMessage()); + } + + } + + @Test + public void testValidDatabaseType() { + Logger log = Log.createLogger(Geonet.HARVESTER); + + try { + DatabaseMetadataRetrieverFactory.getDatabaseMetadataRetriever("postgresql", "localhost", 5432, + "test", "username", "password", log); + fail(); + } catch (DatabaseMetadataRetrieverException ex) { + fail(); + } catch (ExceptionInInitializerError ex) { + // The connection fails as no Postgres database available, + // but it should not fail due to an unsupported database type. + assertNotNull(ex.getCause()); + assertTrue(ex.getCause() instanceof DatabaseMetadataRetrieverException ); + + assertEquals("Exception in getting database connection: can not connect to the database", + ex.getCause().getMessage()); + } + + } + +} diff --git a/web-ui/src/main/resources/catalog/components/admin/harvester/partials/extras.html b/web-ui/src/main/resources/catalog/components/admin/harvester/partials/extras.html index 4a29f33cee8..93d91b3d961 100644 --- a/web-ui/src/main/resources/catalog/components/admin/harvester/partials/extras.html +++ b/web-ui/src/main/resources/catalog/components/admin/harvester/partials/extras.html @@ -5,7 +5,8 @@ || harvester['@type'] == 'geonetwork' || harvester['@type'] == 'csw' || harvester['@type'] == 'oaipmh' - || harvester['@type'] == 'filesystem'" + || harvester['@type'] == 'filesystem' + || harvester['@type'] == 'database'" >