From 829d8ce348db01110bf670e7b37da8e98d8caf8d Mon Sep 17 00:00:00 2001 From: Olivier Guyot Date: Wed, 8 Sep 2021 16:48:19 +0200 Subject: [PATCH 1/5] Add DCAT2 schema plugin Taken from https://github.com/metadata101/dcat2 * added schema as a submodule * added module reference in gn-schemas * added dependency and artifact refs in web module --- .gitignore | 1 + .gitmodules | 3 +++ schemas/dcat2 | 1 + schemas/pom.xml | 1 + web/pom.xml | 12 ++++++++++++ 5 files changed, 18 insertions(+) create mode 160000 schemas/dcat2 diff --git a/.gitignore b/.gitignore index 7b7a628347..67ac523548 100644 --- a/.gitignore +++ b/.gitignore @@ -59,6 +59,7 @@ web/src/main/webapp/WEB-INF/data/config/schema_plugins/*/schematron/schematron*. web/src/main/webapp/WEB-INF/data/config/schema_plugins/csw-record web/src/main/webapp/WEB-INF/data/config/schema_plugins/dublin-core web/src/main/webapp/WEB-INF/data/config/schema_plugins/iso19* +web/src/main/webapp/WEB-INF/data/config/schema_plugins/dcat2 web/src/main/webapp/WEB-INF/data/config/schema_plugins/schemaplugin-uri-catalog.xml web/src/main/webapp/WEB-INF/data/config/schemaplugin-uri-catalog.xml web/src/main/webapp/WEB-INF/data/config/encryptor.properties diff --git a/.gitmodules b/.gitmodules index 67303aee7d..407ed19277 100644 --- a/.gitmodules +++ b/.gitmodules @@ -10,3 +10,6 @@ [submodule "docs/manuals"] path = docs/manuals url = https://github.com/geonetwork/doc.git +[submodule "schemas/dcat2"] + path = schemas/dcat2 + url = https://github.com/metadata101/dcat2 diff --git a/schemas/dcat2 b/schemas/dcat2 new file mode 160000 index 0000000000..6e17ca2751 --- /dev/null +++ b/schemas/dcat2 @@ -0,0 +1 @@ +Subproject commit 6e17ca275180c396629870412245fa430c8665d5 diff --git a/schemas/pom.xml b/schemas/pom.xml index 9d737b6e89..0b31426379 100644 --- a/schemas/pom.xml +++ b/schemas/pom.xml @@ -51,6 +51,7 @@ iso19110 iso19139 iso19115-3.2018 + dcat2 diff --git a/web/pom.xml b/web/pom.xml index 5d2a583ca6..418c75f8c3 100644 --- a/web/pom.xml +++ b/web/pom.xml @@ -618,6 +618,11 @@ gn-schema-iso19115-3.2018 ${project.version} + + org.geonetwork-opensource.schemas + gn-schema-dcat2 + ${project.version} + @@ -993,6 +998,13 @@ false ${schema-plugins.dir} + + org.geonetwork-opensource.schemas + gn-schema-dcat2 + zip + false + ${schema-plugins.dir} + From 4918e6660238c2064fd3592b279afb3a139accaa Mon Sep 17 00:00:00 2001 From: Francois Prunayre Date: Tue, 17 Sep 2019 09:35:04 +0200 Subject: [PATCH 2/5] Harvester / Simple URL This commit is a squash of https://github.com/geonetwork/core-geonetwork/pull/5942 A simple harvester which takes a URL expecting for now a JSON document and loop over document identified by a JSONPointer and applying an XSL to convert to ISO format. This should allow GeoNetwork to harvest some of the opendata portal providing all various search API providing JSON response usually. Harvester / Simple URL / Paging and basic opendatasoft support. Json harvester: fix merge conflicts jsonHarvester: handle JSONLD format with @ in tag names jsonHarvester: add ESRI JSONLD DCAT transformation hack: to remove, extract uuid from URIs jsonHarvester: extract uuid from identifier https://data-atmo-hdf.opendata.arcgis.com/datasets/bac17d7d05a34242a8b22c535ecdb13d will extract bac17d7d05a34242a8b22c535ecdb13d --- .../harvest/harvester/simpleUrl/Aligner.java | 286 +++++++++++ .../harvester/simpleUrl/Harvester.java | 290 +++++++++++ .../simpleUrl/SimpleUrlHarvester.java | 79 +++ .../harvester/simpleUrl/SimpleUrlParams.java | 103 ++++ .../resources/config-spring-geonetwork.xml | 3 + .../harvester/simpleUrl/HarvesterTest.java | 58 +++ .../convert/fromJsonLdEsri.xsl | 449 ++++++++++++++++++ .../resources/catalog/locales/en-admin.json | 2 + .../admin/harvest/type/simpleurl.html | 106 +++++ .../templates/admin/harvest/type/simpleurl.js | 107 +++++ .../import/CKAN-to-ISO19115-3-2018.xsl | 5 + .../import/ESRIDCAT-to-ISO19115-3-2018.xsl | 5 + .../OPENDATASOFT-to-ISO19115-3-2018.xsl | 5 + .../webapp/xsl/xml/harvesting/simpleurl.xsl | 34 ++ 14 files changed, 1532 insertions(+) create mode 100644 harvesters/src/main/java/org/fao/geonet/kernel/harvest/harvester/simpleUrl/Aligner.java create mode 100644 harvesters/src/main/java/org/fao/geonet/kernel/harvest/harvester/simpleUrl/Harvester.java create mode 100644 harvesters/src/main/java/org/fao/geonet/kernel/harvest/harvester/simpleUrl/SimpleUrlHarvester.java create mode 100644 harvesters/src/main/java/org/fao/geonet/kernel/harvest/harvester/simpleUrl/SimpleUrlParams.java create mode 100644 harvesters/src/test/java/org/fao/geonet/kernel/harvest/harvester/simpleUrl/HarvesterTest.java create mode 100644 schemas/iso19115-3.2018/src/main/plugin/iso19115-3.2018/convert/fromJsonLdEsri.xsl create mode 100644 web-ui/src/main/resources/catalog/templates/admin/harvest/type/simpleurl.html create mode 100644 web-ui/src/main/resources/catalog/templates/admin/harvest/type/simpleurl.js create mode 100644 web/src/main/webapp/xsl/conversion/import/CKAN-to-ISO19115-3-2018.xsl create mode 100644 web/src/main/webapp/xsl/conversion/import/ESRIDCAT-to-ISO19115-3-2018.xsl create mode 100644 web/src/main/webapp/xsl/conversion/import/OPENDATASOFT-to-ISO19115-3-2018.xsl create mode 100644 web/src/main/webapp/xsl/xml/harvesting/simpleurl.xsl diff --git a/harvesters/src/main/java/org/fao/geonet/kernel/harvest/harvester/simpleUrl/Aligner.java b/harvesters/src/main/java/org/fao/geonet/kernel/harvest/harvester/simpleUrl/Aligner.java new file mode 100644 index 0000000000..3bce32bcd1 --- /dev/null +++ b/harvesters/src/main/java/org/fao/geonet/kernel/harvest/harvester/simpleUrl/Aligner.java @@ -0,0 +1,286 @@ +//============================================================================= +//=== Copyright (C) 2001-2007 Food and Agriculture Organization of the +//=== United Nations (FAO-UN), United Nations World Food Programme (WFP) +//=== and United Nations Environment Programme (UNEP) +//=== +//=== This program is free software; you can redistribute it and/or modify +//=== it under the terms of the GNU General Public License as published by +//=== the Free Software Foundation; either version 2 of the License, or (at +//=== your option) any later version. +//=== +//=== This program is distributed in the hope that it will be useful, but +//=== WITHOUT ANY WARRANTY; without even the implied warranty of +//=== MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +//=== General Public License for more details. +//=== +//=== You should have received a copy of the GNU General Public License +//=== along with this program; if not, write to the Free Software +//=== Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA +//=== +//=== Contact: Jeroen Ticheler - FAO - Viale delle Terme di Caracalla 2, +//=== Rome - Italy. email: geonetwork@osgeo.org +//============================================================================== + +package org.fao.geonet.kernel.harvest.harvester.simpleUrl; + +import jeeves.server.context.ServiceContext; +import org.fao.geonet.GeonetContext; +import org.fao.geonet.Logger; +import org.fao.geonet.constants.Geonet; +import org.fao.geonet.domain.AbstractMetadata; +import org.fao.geonet.domain.ISODate; +import org.fao.geonet.domain.Metadata; +import org.fao.geonet.domain.MetadataType; +import org.fao.geonet.exceptions.OperationAbortedEx; +import org.fao.geonet.kernel.DataManager; +import org.fao.geonet.kernel.UpdateDatestamp; +import org.fao.geonet.kernel.datamanager.IMetadataIndexer; +import org.fao.geonet.kernel.datamanager.IMetadataManager; +import org.fao.geonet.kernel.datamanager.IMetadataUtils; +import org.fao.geonet.kernel.harvest.BaseAligner; +import org.fao.geonet.kernel.harvest.harvester.CategoryMapper; +import org.fao.geonet.kernel.harvest.harvester.GroupMapper; +import org.fao.geonet.kernel.harvest.harvester.HarvestError; +import org.fao.geonet.kernel.harvest.harvester.HarvestResult; +import org.fao.geonet.kernel.harvest.harvester.UUIDMapper; +import org.fao.geonet.repository.OperationAllowedRepository; +import org.jdom.Element; + +import javax.transaction.Transactional; +import javax.transaction.Transactional.TxType; +import java.util.Collection; +import java.util.HashMap; +import java.util.Map; +import java.util.Set; +import java.util.UUID; +import java.util.concurrent.atomic.AtomicBoolean; + +public class Aligner extends BaseAligner { + + private ServiceContext context; + private DataManager dataMan; + private CategoryMapper localCateg; + private GroupMapper localGroups; + private UUIDMapper localUuids; + + private IMetadataUtils metadataUtils; + private IMetadataManager metadataManager; + private IMetadataIndexer metadataIndexer; + + private HarvestResult result; + private Map processParams = new HashMap(); + private Logger log; + + public Aligner(AtomicBoolean cancelMonitor, ServiceContext sc, SimpleUrlParams params, Logger log) throws OperationAbortedEx { + super(cancelMonitor); + this.context = sc; + this.params = params; + this.log = log; + + GeonetContext gc = (GeonetContext) context.getHandlerContext(Geonet.CONTEXT_NAME); + dataMan = gc.getBean(DataManager.class); + metadataUtils = gc.getBean(IMetadataUtils.class); + metadataManager = gc.getBean(IMetadataManager.class); + metadataIndexer = gc.getBean(IMetadataIndexer.class); + result = new HarvestResult(); + result.unretrievable = 0; + result.uuidSkipped = 0; + result.couldNotInsert = 0; + } + + public HarvestResult align(Map records, Collection errors) throws Exception { + if (cancelMonitor.get()) { + return result; + } + + log.debug("Start of alignment for : " + params.getName()); + + localCateg = new CategoryMapper(context); + localGroups = new GroupMapper(context); + localUuids = new UUIDMapper(context.getBean(IMetadataUtils.class), params.getUuid()); + + insertOrUpdate(records, errors); + log.debug("End of alignment for : " + params.getName()); + + return result; + } + + private void insertOrUpdate(Map records, Collection errors) { + records.entrySet().forEach(e -> { + if (cancelMonitor.get()) { + return; + } + + try { + String id = metadataUtils.getMetadataId(e.getKey()); + + if (id == null) { + //record doesn't exist (so it doesn't belong to this harvester) + log.debug("Adding record with uuid " + e.getKey()); + addMetadata(e, null); + } else if (localUuids.getID(e.getKey()) == null) { + //Record does not belong to this harvester + result.datasetUuidExist++; + + switch (params.getOverrideUuid()) { + case OVERRIDE: + updateMetadata(e, Integer.toString(metadataUtils.findOneByUuid(e.getKey()).getId()), true); + log.debug("Overriding record with uuid " + e.getKey()); + result.updatedMetadata++; + break; + case RANDOM: + log.debug("Generating random uuid for remote record with uuid " + e.getKey()); + addMetadata(e, UUID.randomUUID().toString()); + break; + case SKIP: + log.debug("Skipping record with uuid " + e.getKey()); + result.uuidSkipped++; + default: + break; + } + } else { + //record exists and belongs to this harvester + updateMetadata(e, id, false); + if (params.isIfRecordExistAppendPrivileges()) { + addPrivileges(id, params.getPrivileges(), localGroups, context); + result.privilegesAppendedOnExistingRecord++; + } + + } + result.totalMetadata++; + } catch (Throwable t) { + errors.add(new HarvestError(this.context, t)); + log.error("Unable to process record from csw (" + this.params.getName() + ")"); + log.error(" Record failed: " + e.getKey() + ". Error is: " + t.getMessage()); + log.error(t); + } finally { + result.originalMetadata++; + } + }); + } + + /** + * Remove records no longer on the remote CSW server + * + * @param records + * @throws Exception + */ + @Transactional(value = TxType.REQUIRES_NEW) + public HarvestResult cleanupRemovedRecords(Set records) throws Exception { + + if (cancelMonitor.get()) { + return result; + } + + for (String uuid : localUuids.getUUIDs()) { + if (!records.contains(uuid)) { + String id = localUuids.getID(uuid); + log.debug(" - Removing old metadata with local id:" + id); + metadataManager.deleteMetadata(context, id); + result.locallyRemoved++; + } + } + dataMan.forceIndexChanges(); + + return result; + } + + + private void addMetadata(Map.Entry record, String overrideUuidValue) throws Exception { + if (cancelMonitor.get()) { + return; + } + + Element xml = record.getValue(); + if (xml == null) { + result.unretrievable++; + return; + } + + String schema = dataMan.autodetectSchema(xml, null); + if (schema == null) { + log.debug(" - Metadata skipped due to unknown schema. uuid:" + record.getKey()); + result.unknownSchema++; + return; + } + + String uuid = record.getKey(); + if (overrideUuidValue != null) { + log.debug(String.format(" - Overriding UUID %s by %s", record.getKey(), overrideUuidValue)); + uuid = overrideUuidValue; + xml = dataMan.setUUID(schema, uuid, record.getValue()); + } + + + log.debug(" - Adding metadata with uuid:" + uuid + " schema:" + schema); + + final String dateModified = dataMan.extractDateModified(schema, xml); + + AbstractMetadata metadata = new Metadata(); + metadata.setUuid(uuid); + Integer ownerId = getOwner(); + metadata.getDataInfo(). + setSchemaId(schema). + setRoot(xml.getQualifiedName()). + setType(MetadataType.METADATA). + setChangeDate(new ISODate(dateModified)). + setCreateDate(new ISODate(dateModified)); + metadata.getSourceInfo(). + setSourceId(params.getUuid()). + setOwner(ownerId). + setGroupOwner(getGroupOwner()); + metadata.getHarvestInfo(). + setHarvested(true). + setUuid(params.getUuid()); + + metadata.getSourceInfo().setGroupOwner(getGroupOwner()); + + addCategories(metadata, params.getCategories(), localCateg, context, null, false); + + metadata = metadataManager.insertMetadata(context, metadata, xml, false, false, UpdateDatestamp.NO, false, false); + + String id = String.valueOf(metadata.getId()); + + addPrivileges(id, params.getPrivileges(), localGroups, context); + + metadataIndexer.indexMetadata(id, true); + result.addedMetadata++; + } + + + @Transactional(value = TxType.REQUIRES_NEW) + boolean updateMetadata(Map.Entry ri, String id, Boolean force) throws Exception { + Element md = ri.getValue(); + if (md == null) { + result.unchangedMetadata++; + return false; + } + + boolean validate = false; + boolean ufo = false; + boolean index = false; + String language = context.getLanguage(); + String schema = dataMan.autodetectSchema(md, null); + final String dateModified = dataMan.extractDateModified(schema, ri.getValue()); + + final AbstractMetadata metadata = metadataManager.updateMetadata(context, id, md, validate, ufo, index, language, dateModified, true); + + if (force) { + //change ownership of metadata to new harvester + metadata.getHarvestInfo().setUuid(params.getUuid()); + metadata.getSourceInfo().setSourceId(params.getUuid()); + + metadataManager.save((Metadata) metadata); + } + + OperationAllowedRepository repository = context.getBean(OperationAllowedRepository.class); + repository.deleteAllByMetadataId(Integer.parseInt(id)); + + addPrivileges(id, params.getPrivileges(), localGroups, context); + + metadata.getCategories().clear(); + addCategories(metadata, params.getCategories(), localCateg, context, null, true); + result.updatedMetadata++; + return true; + } +} diff --git a/harvesters/src/main/java/org/fao/geonet/kernel/harvest/harvester/simpleUrl/Harvester.java b/harvesters/src/main/java/org/fao/geonet/kernel/harvest/harvester/simpleUrl/Harvester.java new file mode 100644 index 0000000000..088611c999 --- /dev/null +++ b/harvesters/src/main/java/org/fao/geonet/kernel/harvest/harvester/simpleUrl/Harvester.java @@ -0,0 +1,290 @@ +//============================================================================= +//=== Copyright (C) 2001-2007 Food and Agriculture Organization of the +//=== United Nations (FAO-UN), United Nations World Food Programme (WFP) +//=== and United Nations Environment Programme (UNEP) +//=== +//=== This program is free software; you can redistribute it and/or modify +//=== it under the terms of the GNU General Public License as published by +//=== the Free Software Foundation; either version 2 of the License, or (at +//=== your option) any later version. +//=== +//=== This program is distributed in the hope that it will be useful, but +//=== WITHOUT ANY WARRANTY; without even the implied warranty of +//=== MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +//=== General Public License for more details. +//=== +//=== You should have received a copy of the GNU General Public License +//=== along with this program; if not, write to the Free Software +//=== Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA +//=== +//=== Contact: Jeroen Ticheler - FAO - Viale delle Terme di Caracalla 2, +//=== Rome - Italy. email: geonetwork@osgeo.org +//============================================================================== + +package org.fao.geonet.kernel.harvest.harvester.simpleUrl; + +import com.fasterxml.jackson.core.JsonProcessingException; +import com.fasterxml.jackson.databind.JsonNode; +import com.fasterxml.jackson.databind.ObjectMapper; +import com.google.common.annotations.VisibleForTesting; +import com.google.common.io.CharStreams; +import jeeves.server.context.ServiceContext; +import org.apache.commons.io.IOUtils; +import org.apache.commons.lang.StringUtils; +import org.apache.http.client.methods.HttpGet; +import org.fao.geonet.Logger; +import org.fao.geonet.constants.Geonet; +import org.fao.geonet.exceptions.BadParameterEx; +import org.fao.geonet.kernel.harvest.harvester.HarvestError; +import org.fao.geonet.kernel.harvest.harvester.HarvestResult; +import org.fao.geonet.kernel.harvest.harvester.IHarvester; +import org.fao.geonet.lib.Lib; +import org.fao.geonet.utils.GeonetHttpRequestFactory; +import org.fao.geonet.utils.Log; +import org.fao.geonet.utils.Xml; +import org.jdom.Element; +import org.jdom.JDOMException; +import org.json.JSONException; +import org.json.JSONObject; +import org.json.XML; +import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.http.client.ClientHttpResponse; + +import java.io.IOException; +import java.io.InputStreamReader; +import java.net.URI; +import java.net.URISyntaxException; +import java.nio.file.Path; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.LinkedList; +import java.util.List; +import java.util.Map; +import java.util.concurrent.atomic.AtomicBoolean; + +/** + * Harvest metadata from a JSON source. + *

+ * The JSON source can be a simple JSON file or + * an URL with indication on how to pass paging information. + * + * This harvester has been tested with CKAN search API. + */ +class Harvester implements IHarvester { + public static final String LOGGER_NAME = "geonetwork.harvester.json"; + + private final AtomicBoolean cancelMonitor; + private Logger log; + private SimpleUrlParams params; + private ServiceContext context; + + @Autowired + GeonetHttpRequestFactory requestFactory; + + /** + * Contains a list of accumulated errors during the executing of this harvest. + */ + private List errors = new LinkedList(); + + public Harvester(AtomicBoolean cancelMonitor, Logger log, ServiceContext context, SimpleUrlParams params) { + this.cancelMonitor = cancelMonitor; + this.log = log; + this.context = context; + this.params = params; + } + + public HarvestResult harvest(Logger log) throws Exception { + this.log = log; + log.debug("Retrieving simple URL: " + params.getName()); + + requestFactory = context.getBean(GeonetHttpRequestFactory.class); + + String jsonResponse = retrieveUrl(params.url, log); + if (cancelMonitor.get()) { + return new HarvestResult(); + } + log.debug("Response is: " + jsonResponse); + + // TODO: Add support for XML or JSON + int numberOfRecordsToHarvest = -1; + ObjectMapper objectMapper = new ObjectMapper(); + JsonNode jsonObj = objectMapper.readTree(jsonResponse); + + if (StringUtils.isNotEmpty(params.numberOfRecordPath)) { + try { + numberOfRecordsToHarvest = jsonObj.at(params.numberOfRecordPath).asInt(); + log.debug("Number of records to harvest: " + numberOfRecordsToHarvest); + } catch (Exception e) { + } + } + boolean error = false; + HarvestResult result = null; + Map allUuids = new HashMap(); + try { + Aligner aligner = new Aligner(cancelMonitor, context, params, log); + List listOfUrlForPages = buildListOfUrl(params, numberOfRecordsToHarvest); + for (int i = 0; i < listOfUrlForPages.size(); i ++) { + if (i != 0) { + jsonResponse = retrieveUrl(listOfUrlForPages.get(i), log); + jsonObj = objectMapper.readTree(jsonResponse); + } + Map uuids = new HashMap(); + JsonNode nodes; + if (StringUtils.isNotEmpty(params.loopElement)) { + try { + nodes = jsonObj.at(params.loopElement); + log.debug("Number of records in response: " + nodes.size()); + + nodes.forEach(record -> { + String uuid = this.extractUuidFromIdentifier(record.get(params.recordIdPath).asText()); + Element xml = convertRecordToXml(record, uuid); + uuids.put(uuid, xml); + }); + aligner.align(uuids, errors); + allUuids.putAll(uuids); + } catch (Exception e) { + log.warning("Failed to collect record in response"); + } + } + } + result = aligner.cleanupRemovedRecords(allUuids.keySet()); + } catch (Exception t) { + error = true; + log.error("Unknown error trying to harvest"); + log.error(t.getMessage()); + log.error(t); + errors.add(new HarvestError(context, t)); + } catch (Throwable t) { + error = true; + log.fatal("Something unknown and terrible happened while harvesting"); + log.fatal(t.getMessage()); + errors.add(new HarvestError(context, t)); + } + + log.info("Total records processed in all searches :" + allUuids.size()); + if (error) { + log.warning("Due to previous errors the align process has not been called"); + } + + return result; + } + + private String extractUuidFromIdentifier(final String identifier ) { + String uuid = identifier; + if (Lib.net.isUrlValid(uuid)) { + uuid = uuid.replaceFirst(".*/([^/?]+).*", "$1"); + } + return uuid; + } + + @VisibleForTesting + protected List buildListOfUrl(SimpleUrlParams params, int numberOfRecordsToHarvest) { + List urlList = new ArrayList(); + if (StringUtils.isEmpty(params.pageSizeParam)) { + urlList.add(params.url); + return urlList; + } + + int numberOfRecordsPerPage = -1; + final String pageSizeParamValue = params.url.replaceAll(".*[?&]" + params.pageSizeParam + "=([0-9]+).*", "$1"); + if (StringUtils.isNumeric(pageSizeParamValue)) { + numberOfRecordsPerPage = Integer.parseInt(pageSizeParamValue); + } else { + log.warning(String.format( + "Page size param '%s' not found or is not a numeric in URL '%s'. Can't build a list of pages.", + params.pageSizeParam, params.url)); + urlList.add(params.url); + return urlList; + } + + final String pageFromParamValue = params.url.replaceAll(".*[?&]" + params.pageFromParam + "=([0-9]+).*", "$1"); + boolean startAtZero = false; + if (StringUtils.isNumeric(pageFromParamValue)) { + startAtZero = Integer.parseInt(pageFromParamValue) == 0; + } else { + log.warning(String.format( + "Page from param '%s' not found or is not a numeric in URL '%s'. Can't build a list of pages.", + params.pageFromParam, params.url)); + urlList.add(params.url); + return urlList; + } + + + int numberOfPages = (int) Math.abs((numberOfRecordsToHarvest + (startAtZero ? -1 : 0)) / numberOfRecordsPerPage) + 1; + + for (int i = 0; i < numberOfPages; i++) { + int from = i * numberOfRecordsPerPage + (startAtZero ? 0 : 1); + int size = i == numberOfPages - 1 ? // Last page + numberOfRecordsToHarvest - from + (startAtZero ? 0 : 1) : + numberOfRecordsPerPage; + String url = params.url + .replaceAll(params.pageFromParam + "=[0-9]+", params.pageFromParam + "=" + from) + .replaceAll(params.pageSizeParam + "=[0-9]+", params.pageSizeParam + "=" + size); + urlList.add(url); + } + + return urlList; + } + + private Element convertRecordToXml(JsonNode record, String uuid) { + ObjectMapper objectMapper = new ObjectMapper(); + try { + String recordAsXml = XML.toString( + new JSONObject( + objectMapper.writeValueAsString(record)), "record"); + recordAsXml = Xml.stripNonValidXMLCharacters(recordAsXml).replace("<@", "<").replace(" getErrors() { + return errors; + } +} diff --git a/harvesters/src/main/java/org/fao/geonet/kernel/harvest/harvester/simpleUrl/SimpleUrlHarvester.java b/harvesters/src/main/java/org/fao/geonet/kernel/harvest/harvester/simpleUrl/SimpleUrlHarvester.java new file mode 100644 index 0000000000..bd8dcaa8ec --- /dev/null +++ b/harvesters/src/main/java/org/fao/geonet/kernel/harvest/harvester/simpleUrl/SimpleUrlHarvester.java @@ -0,0 +1,79 @@ +//============================================================================= +//=== Copyright (C) 2001-2007 Food and Agriculture Organization of the +//=== United Nations (FAO-UN), United Nations World Food Programme (WFP) +//=== and United Nations Environment Programme (UNEP) +//=== +//=== This program is free software; you can redistribute it and/or modify +//=== it under the terms of the GNU General Public License as published by +//=== the Free Software Foundation; either version 2 of the License, or (at +//=== your option) any later version. +//=== +//=== This program is distributed in the hope that it will be useful, but +//=== WITHOUT ANY WARRANTY; without even the implied warranty of +//=== MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +//=== General Public License for more details. +//=== +//=== You should have received a copy of the GNU General Public License +//=== along with this program; if not, write to the Free Software +//=== Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA +//=== +//=== Contact: Jeroen Ticheler - FAO - Viale delle Terme di Caracalla 2, +//=== Rome - Italy. email: geonetwork@osgeo.org +//============================================================================== + +package org.fao.geonet.kernel.harvest.harvester.simpleUrl; + +import jeeves.server.context.ServiceContext; +import org.fao.geonet.Logger; +import org.fao.geonet.domain.Source; +import org.fao.geonet.domain.SourceType; +import org.fao.geonet.exceptions.BadInputEx; +import org.fao.geonet.kernel.harvest.harvester.AbstractHarvester; +import org.fao.geonet.kernel.harvest.harvester.AbstractParams; +import org.fao.geonet.kernel.harvest.harvester.HarvestResult; +import org.fao.geonet.kernel.harvest.harvester.csw.CswParams; +import org.fao.geonet.repository.SourceRepository; +import org.fao.geonet.resources.Resources; +import org.jdom.Element; +import org.springframework.beans.factory.annotation.Autowired; + +import java.io.File; +import java.sql.SQLException; +import java.util.UUID; + +/** + * Harvest metadata from a JSON source. + */ +public class SimpleUrlHarvester extends AbstractHarvester { + + @Override + protected SimpleUrlParams createParams() { + return new SimpleUrlParams(dataMan); + } + + /** + * Stores in the harvester settings table some values not managed by {@link AbstractHarvester} + * + * @param params the harvester parameters. + * @param path + * @param siteId + * @param optionsId + * @throws SQLException + */ + protected void storeNodeExtra(SimpleUrlParams params, String path, String siteId, String optionsId) throws SQLException { + + harvesterSettingsManager.add("id:" + siteId, "url", params.url); + harvesterSettingsManager.add("id:" + siteId, "icon", params.icon); + harvesterSettingsManager.add("id:" + siteId, "loopElement", params.loopElement); + harvesterSettingsManager.add("id:" + siteId, "numberOfRecordPath", params.numberOfRecordPath); + harvesterSettingsManager.add("id:" + siteId, "recordIdPath", params.recordIdPath); + harvesterSettingsManager.add("id:" + siteId, "pageFromParam", params.pageFromParam); + harvesterSettingsManager.add("id:" + siteId, "pageSizeParam", params.pageSizeParam); + harvesterSettingsManager.add("id:" + siteId, "toISOConversion", params.toISOConversion); + } + + public void doHarvest(Logger log) throws Exception { + Harvester h = new Harvester(cancelMonitor, log, context, params); + result = h.harvest(log); + } +} diff --git a/harvesters/src/main/java/org/fao/geonet/kernel/harvest/harvester/simpleUrl/SimpleUrlParams.java b/harvesters/src/main/java/org/fao/geonet/kernel/harvest/harvester/simpleUrl/SimpleUrlParams.java new file mode 100644 index 0000000000..769c692e65 --- /dev/null +++ b/harvesters/src/main/java/org/fao/geonet/kernel/harvest/harvester/simpleUrl/SimpleUrlParams.java @@ -0,0 +1,103 @@ +//============================================================================= +//=== Copyright (C) 2001-2007 Food and Agriculture Organization of the +//=== United Nations (FAO-UN), United Nations World Food Programme (WFP) +//=== and United Nations Environment Programme (UNEP) +//=== +//=== This program is free software; you can redistribute it and/or modify +//=== it under the terms of the GNU General Public License as published by +//=== the Free Software Foundation; either version 2 of the License, or (at +//=== your option) any later version. +//=== +//=== This program is distributed in the hope that it will be useful, but +//=== WITHOUT ANY WARRANTY; without even the implied warranty of +//=== MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +//=== General Public License for more details. +//=== +//=== You should have received a copy of the GNU General Public License +//=== along with this program; if not, write to the Free Software +//=== Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA +//=== +//=== Contact: Jeroen Ticheler - FAO - Viale delle Terme di Caracalla 2, +//=== Rome - Italy. email: geonetwork@osgeo.org +//============================================================================== + +package org.fao.geonet.kernel.harvest.harvester.simpleUrl; + +import org.fao.geonet.Util; +import org.fao.geonet.exceptions.BadInputEx; +import org.fao.geonet.kernel.DataManager; +import org.fao.geonet.kernel.harvest.harvester.AbstractParams; +import org.jdom.Element; + +public class SimpleUrlParams extends AbstractParams { + public String url; + public String icon; + public String loopElement; + public String numberOfRecordPath; + public String recordIdPath; + public String pageSizeParam; + public String pageFromParam; + public String toISOConversion; + + public SimpleUrlParams(DataManager dm) { + super(dm); + } + + /** + * called when a new entry must be added. Reads values from the provided entry, providing + * default values. + */ + public void create(Element node) throws BadInputEx { + super.create(node); + + Element site = node.getChild("site"); + + url = Util.getParam(site, "url", "http://dados.gov.br/api/3/action/package_search?q="); + loopElement = Util.getParam(site, "loopElement", "/result/results"); + numberOfRecordPath = Util.getParam(site, "numberOfRecordPath", "/result/count"); + recordIdPath = Util.getParam(site, "recordIdPath", "id"); + pageSizeParam = Util.getParam(site, "pageSizeParam", "rows"); + pageFromParam = Util.getParam(site, "pageFromParam", "start"); + toISOConversion = Util.getParam(site, "toISOConversion", "CKAN-to-ISO19115-3-2018"); + icon = Util.getParam(site, "icon", "default.gif"); + } + + /** + * called when an entry has changed and variables must be updated. + */ + public void update(Element node) throws BadInputEx { + super.update(node); + + Element site = node.getChild("site"); + + url = Util.getParam(site, "url", url); + loopElement = Util.getParam(site, "loopElement", ""); + numberOfRecordPath = Util.getParam(site, "numberOfRecordPath", ""); + recordIdPath = Util.getParam(site, "recordIdPath", ""); + pageSizeParam = Util.getParam(site, "pageSizeParam", ""); + pageFromParam = Util.getParam(site, "pageFromParam", ""); + toISOConversion = Util.getParam(site, "toISOConversion", ""); + icon = Util.getParam(site, "icon", icon); + } + + @Override + public String getIcon() { + return icon; + } + + public SimpleUrlParams copy() { + SimpleUrlParams copy = new SimpleUrlParams(dm); + copyTo(copy); + + copy.url = url; + copy.icon = icon; + copy.loopElement = loopElement; + copy.numberOfRecordPath = numberOfRecordPath; + copy.pageSizeParam = pageSizeParam; + copy.pageFromParam = pageFromParam; + copy.recordIdPath = recordIdPath; + copy.toISOConversion = toISOConversion; + + return copy; + } +} diff --git a/harvesters/src/main/resources/config-spring-geonetwork.xml b/harvesters/src/main/resources/config-spring-geonetwork.xml index 55b78e8783..255b2e010d 100644 --- a/harvesters/src/main/resources/config-spring-geonetwork.xml +++ b/harvesters/src/main/resources/config-spring-geonetwork.xml @@ -57,6 +57,9 @@ + list = harvester.buildListOfUrl(params, numberOfResult); + assertEquals(3, list.size()); + assertEquals("http://dados.gov.br/api/3/action/package_search?q=&rows=10&start=1", list.get(0)); + assertEquals("http://dados.gov.br/api/3/action/package_search?q=&rows=10&start=11", list.get(1)); + assertEquals("http://dados.gov.br/api/3/action/package_search?q=&rows=1&start=21", list.get(2)); + + + + params.url = "http://dados.gov.br/api/3/action/package_search?q=&rows=10&start=0"; + list = harvester.buildListOfUrl(params, numberOfResult); + assertEquals(3, list.size()); + assertEquals("http://dados.gov.br/api/3/action/package_search?q=&rows=10&start=0", list.get(0)); + assertEquals("http://dados.gov.br/api/3/action/package_search?q=&rows=10&start=10", list.get(1)); + assertEquals("http://dados.gov.br/api/3/action/package_search?q=&rows=1&start=20", list.get(2)); + + params.url = "http://dados.gov.br/api/3/action/package_search?q=&rows=DADA&start=1"; + list = harvester.buildListOfUrl(params, numberOfResult); + assertEquals(1, list.size()); + + params.url = "http://dados.gov.br/api/3/action/package_search?q=&rows=11&start=DADA"; + list = harvester.buildListOfUrl(params, numberOfResult); + assertEquals(1, list.size()); + + params.url = "http://dados.gov.br/api/3/action/package_search?q=&&start=1"; + list = harvester.buildListOfUrl(params, numberOfResult); + assertEquals(1, list.size()); + + params.url = "http://dados.gov.br/api/3/action/package_search?q=&rows=11&"; + list = harvester.buildListOfUrl(params, numberOfResult); + assertEquals(1, list.size()); + + + params.url = "http://dados.gov.br/api/3/action/package_search?q=&rows=2&start=0"; + list = harvester.buildListOfUrl(params, 8); + assertEquals(4, list.size()); + } +} diff --git a/schemas/iso19115-3.2018/src/main/plugin/iso19115-3.2018/convert/fromJsonLdEsri.xsl b/schemas/iso19115-3.2018/src/main/plugin/iso19115-3.2018/convert/fromJsonLdEsri.xsl new file mode 100644 index 0000000000..0275423e99 --- /dev/null +++ b/schemas/iso19115-3.2018/src/main/plugin/iso19115-3.2018/convert/fromJsonLdEsri.xsl @@ -0,0 +1,449 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ISO 19115-3 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + originator + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/web-ui/src/main/resources/catalog/locales/en-admin.json b/web-ui/src/main/resources/catalog/locales/en-admin.json index e5e8ff520f..7c09d3bc8a 100644 --- a/web-ui/src/main/resources/catalog/locales/en-admin.json +++ b/web-ui/src/main/resources/catalog/locales/en-admin.json @@ -290,6 +290,7 @@ "cron-0 15 10 ? * MON-FRI": "Fire at 10:15am every Monday, Tuesday, Wednesday, Thursday and Friday", "harvesterTimeZoneHelp": "Time in cron expression will be interpreted as in {{timeZoneTransl}} {{zoneOffset}} timezone.", "csvExport": "Export as CSV", + "simpleurl": "Simple URL", "duplicatedValueFoundHarvesterName": "An harvester with that name already exists. Choose another one.", "duplicatedValueFoundUserName": "A user with that name already exists. Choose another one.", "duplicatedValueFoundUserEmail": "A user with that email already exists. Choose another one.", @@ -446,6 +447,7 @@ "harvesterReport": "Catalog harvester report", "harvester-arcsde": "ArcSDE", "harvester-arcsdeHelp": "Harvest metadata records from an ArcSDE database.", + "harvester-simpleurl": "Simple URL", "harvester-csw": "OGC CSW 2.0.2", "harvester-cswHelp": "Harvest from a CSW server", "harvester-filesystem": "Directory", diff --git a/web-ui/src/main/resources/catalog/templates/admin/harvest/type/simpleurl.html b/web-ui/src/main/resources/catalog/templates/admin/harvest/type/simpleurl.html new file mode 100644 index 0000000000..4581d82ec1 --- /dev/null +++ b/web-ui/src/main/resources/catalog/templates/admin/harvest/type/simpleurl.html @@ -0,0 +1,106 @@ +

+ + +
+
+
+
+ + +
+ harvesterMainConfigurationFor {{('harvester-' + + harvesterSelected['@type']) | translate}} + +
+ + +

simpleurl-urlHelp

+
+
+ + +
+ harvesterAdvancedConfigurationFor + {{harvesterSelected['@type'] | translate}} + + +
+ +
+ + +

simpleurl-loopElementHelp

+
+ +
+ + +

simpleurl-numberOfRecordPathHelp

+
+ +
+ + +

simpleurl-recordIdPathHelp

+
+ +
+ + +

simpleurl-pageFromParamHelp

+
+ +
+ + +

simpleurl-pageSizeParamHelp

+
+ +
+ + +

simpleurl-toISOConversionHelp

+
+ + +
+ + +
+ +
+

harvesterValidateHelp

+
+
+ +
+ diff --git a/web-ui/src/main/resources/catalog/templates/admin/harvest/type/simpleurl.js b/web-ui/src/main/resources/catalog/templates/admin/harvest/type/simpleurl.js new file mode 100644 index 0000000000..60a14b4424 --- /dev/null +++ b/web-ui/src/main/resources/catalog/templates/admin/harvest/type/simpleurl.js @@ -0,0 +1,107 @@ +// This is not that much elegant and should be replaced by some kind +// of Angular module. +var gnHarvestersimpleurl = { + createNew : function() { + return { + "@id" : "", + "@type" : "simpleurl", + "owner" : [], + "ownerGroup" : [], + "ownerUser": [""], + "site" : { + "name" : "", + "uuid" : "", + "icon" : "blank.png", + "account" : { + "use" : false, + "username" : [], + "password" : [] + }, + "url" : "http://", + "loopElement" : "", + "numberOfRecordPath": "", + "pageSizeParam": "", + "pageFromParam": "", + "recordIdPath": "", + "toISOConversion": "" + }, + "content" : { + "validate" : "NOVALIDATION" + }, + "options" : { + "every" : "0 0 0 ? * *", + "oneRunOnly" : false, + "overrideUuid": "SKIP", + "status" : "active" + }, + "privileges" : [ { + "@id" : "1", + "operation" : [ { + "@name" : "view" + }, { + "@name" : "dynamic" + } ] + } ], + "categories" : [], + "info" : { + "lastRun" : [], + "running" : false + } + }; + }, + buildResponseCSWSearch : function($scope) { + var body = ''; + if ($scope.harvesterSelected.searches) { + for(var tag in $scope.harvesterSelected.searches[0]) { + if($scope.harvesterSelected.searches[0].hasOwnProperty(tag)) { + var value = $scope.harvesterSelected.searches[0][tag].value; + // Save all values even if empty + // XML to JSON does not convert single child to Object but Array + // In that situation, saving only one parameter will make this + // happen and then search criteria name which is the tag name + // will be lost. + // if (value) { + body += '<' + tag + '>' + value + ''; + // } + } + } + } + return '' + body + ''; + }, + buildResponse : function(h, $scope) { + var body = '' + + ' ' + h.ownerGroup[0] + '' + + ' ' + h.ownerUser[0] + '' + + ' ' + + ' ' + h.site.name + '' + + ' ' + h.site.rejectDuplicateResource + '' + + ' ' + h.site.url.replace(/&/g, '&') + '' + + ' ' + h.site.icon + '' + + ' ' + + ' ' + h.site.account.use + '' + + ' ' + h.site.account.username + '' + + ' ' + h.site.account.password + '' + + ' ' + + ' ' + h.site.loopElement + '' + + ' ' + h.site.numberOfRecordPath + '' + + ' ' + h.site.recordIdPath + '' + + ' ' + h.site.pageFromParam + '' + + ' ' + h.site.pageSizeParam + '' + + ' ' + h.site.toISOConversion + '' + + ' ' + + gnHarvestersimpleurl.buildResponseCSWSearch($scope) + + ' ' + + ' ' + h.options.oneRunOnly + '' + + ' ' + h.options.overrideUuid + '' + + ' ' + h.options.every + '' + + ' ' + h.options.status + '' + + ' ' + + ' ' + + ' ' + h.content.validate + '' + + ' ' + + $scope.buildResponseGroup(h) + + $scope.buildResponseCategory(h) + ''; + return body; + } +}; diff --git a/web/src/main/webapp/xsl/conversion/import/CKAN-to-ISO19115-3-2018.xsl b/web/src/main/webapp/xsl/conversion/import/CKAN-to-ISO19115-3-2018.xsl new file mode 100644 index 0000000000..0894bb00e9 --- /dev/null +++ b/web/src/main/webapp/xsl/conversion/import/CKAN-to-ISO19115-3-2018.xsl @@ -0,0 +1,5 @@ + + + + diff --git a/web/src/main/webapp/xsl/conversion/import/ESRIDCAT-to-ISO19115-3-2018.xsl b/web/src/main/webapp/xsl/conversion/import/ESRIDCAT-to-ISO19115-3-2018.xsl new file mode 100644 index 0000000000..e7eea18696 --- /dev/null +++ b/web/src/main/webapp/xsl/conversion/import/ESRIDCAT-to-ISO19115-3-2018.xsl @@ -0,0 +1,5 @@ + + + + diff --git a/web/src/main/webapp/xsl/conversion/import/OPENDATASOFT-to-ISO19115-3-2018.xsl b/web/src/main/webapp/xsl/conversion/import/OPENDATASOFT-to-ISO19115-3-2018.xsl new file mode 100644 index 0000000000..40d2dcc6a9 --- /dev/null +++ b/web/src/main/webapp/xsl/conversion/import/OPENDATASOFT-to-ISO19115-3-2018.xsl @@ -0,0 +1,5 @@ + + + + diff --git a/web/src/main/webapp/xsl/xml/harvesting/simpleurl.xsl b/web/src/main/webapp/xsl/xml/harvesting/simpleurl.xsl new file mode 100644 index 0000000000..d786c7ece7 --- /dev/null +++ b/web/src/main/webapp/xsl/xml/harvesting/simpleurl.xsl @@ -0,0 +1,34 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + From 65847bf693869ae5aad7a93fc839f30f2adc305a Mon Sep 17 00:00:00 2001 From: Olivier Guyot Date: Thu, 9 Sep 2021 17:09:56 +0200 Subject: [PATCH 3/5] Add ESRI DCAT xsl, point DCAT2 plugin to custom branch The DCAT2 plugin now points to a forked branch containing an ESRI DCAT conversion xsl --- schemas/dcat2 | 2 +- .../main/webapp/xsl/conversion/import/ESRIDCAT-to-DCAT2.xsl | 5 +++++ 2 files changed, 6 insertions(+), 1 deletion(-) create mode 100644 web/src/main/webapp/xsl/conversion/import/ESRIDCAT-to-DCAT2.xsl diff --git a/schemas/dcat2 b/schemas/dcat2 index 6e17ca2751..376546a618 160000 --- a/schemas/dcat2 +++ b/schemas/dcat2 @@ -1 +1 @@ -Subproject commit 6e17ca275180c396629870412245fa430c8665d5 +Subproject commit 376546a6184bf3d3c935cb6e541ef4bfc5b0138e diff --git a/web/src/main/webapp/xsl/conversion/import/ESRIDCAT-to-DCAT2.xsl b/web/src/main/webapp/xsl/conversion/import/ESRIDCAT-to-DCAT2.xsl new file mode 100644 index 0000000000..c4a50b44c3 --- /dev/null +++ b/web/src/main/webapp/xsl/conversion/import/ESRIDCAT-to-DCAT2.xsl @@ -0,0 +1,5 @@ + + + + From c80c49329b2ca1585e3ade9b883daf92a15603a6 Mon Sep 17 00:00:00 2001 From: Florent gravin Date: Fri, 10 Sep 2021 18:27:26 +0200 Subject: [PATCH 4/5] add opendatasoft to DCAT transfo --- .gitmodules | 2 +- schemas/dcat2 | 2 +- .../webapp/xsl/conversion/import/OPENDATASOFT-to-DCAT2.xsl | 5 +++++ 3 files changed, 7 insertions(+), 2 deletions(-) create mode 100644 web/src/main/webapp/xsl/conversion/import/OPENDATASOFT-to-DCAT2.xsl diff --git a/.gitmodules b/.gitmodules index 407ed19277..1a4d1cf5ce 100644 --- a/.gitmodules +++ b/.gitmodules @@ -12,4 +12,4 @@ url = https://github.com/geonetwork/doc.git [submodule "schemas/dcat2"] path = schemas/dcat2 - url = https://github.com/metadata101/dcat2 + url = https://github.com/camptocamp/geonetwork-dcat2 diff --git a/schemas/dcat2 b/schemas/dcat2 index 376546a618..43268aec4d 160000 --- a/schemas/dcat2 +++ b/schemas/dcat2 @@ -1 +1 @@ -Subproject commit 376546a6184bf3d3c935cb6e541ef4bfc5b0138e +Subproject commit 43268aec4dee0a145d83e7aa8f821286c15d709e diff --git a/web/src/main/webapp/xsl/conversion/import/OPENDATASOFT-to-DCAT2.xsl b/web/src/main/webapp/xsl/conversion/import/OPENDATASOFT-to-DCAT2.xsl new file mode 100644 index 0000000000..91b92f26aa --- /dev/null +++ b/web/src/main/webapp/xsl/conversion/import/OPENDATASOFT-to-DCAT2.xsl @@ -0,0 +1,5 @@ + + + + From 052ac47108b9566778c97dd5160c0cc1c5c866e4 Mon Sep 17 00:00:00 2001 From: Florent gravin Date: Fri, 10 Sep 2021 18:28:15 +0200 Subject: [PATCH 5/5] jsonHarvester: add api& nodeUrl in XML for transfo used by ODS to compute exports links --- .../harvest/harvester/simpleUrl/Harvester.java | 18 +++++++++++++++--- schemas/dcat2 | 2 +- 2 files changed, 16 insertions(+), 4 deletions(-) diff --git a/harvesters/src/main/java/org/fao/geonet/kernel/harvest/harvester/simpleUrl/Harvester.java b/harvesters/src/main/java/org/fao/geonet/kernel/harvest/harvester/simpleUrl/Harvester.java index 088611c999..af17dd29e3 100644 --- a/harvesters/src/main/java/org/fao/geonet/kernel/harvest/harvester/simpleUrl/Harvester.java +++ b/harvesters/src/main/java/org/fao/geonet/kernel/harvest/harvester/simpleUrl/Harvester.java @@ -52,8 +52,10 @@ import java.io.IOException; import java.io.InputStreamReader; +import java.net.MalformedURLException; import java.net.URI; import java.net.URISyntaxException; +import java.net.URL; import java.nio.file.Path; import java.util.ArrayList; import java.util.HashMap; @@ -137,8 +139,16 @@ public HarvestResult harvest(Logger log) throws Exception { nodes.forEach(record -> { String uuid = this.extractUuidFromIdentifier(record.get(params.recordIdPath).asText()); - Element xml = convertRecordToXml(record, uuid); - uuids.put(uuid, xml); + String apiUrl = params.url.split("\\?")[0]; + URL url = null; + try { + url = new URL(apiUrl); + String nodeUrl = new StringBuilder(url.getProtocol()).append("://").append(url.getAuthority()).toString(); + Element xml = convertRecordToXml(record, uuid, apiUrl, nodeUrl); + uuids.put(uuid, xml); + } catch (MalformedURLException e) { + log.warning("Failed to parse Node URL"); + } }); aligner.align(uuids, errors); allUuids.putAll(uuids); @@ -226,7 +236,7 @@ protected List buildListOfUrl(SimpleUrlParams params, int numberOfRecord return urlList; } - private Element convertRecordToXml(JsonNode record, String uuid) { + private Element convertRecordToXml(JsonNode record, String uuid, String apiUrl, String nodeUrl) { ObjectMapper objectMapper = new ObjectMapper(); try { String recordAsXml = XML.toString( @@ -235,6 +245,8 @@ private Element convertRecordToXml(JsonNode record, String uuid) { recordAsXml = Xml.stripNonValidXMLCharacters(recordAsXml).replace("<@", "<").replace("