From fc96bc21404db2196d5be03bc42e13496c9ac1e5 Mon Sep 17 00:00:00 2001 From: Francois Prunayre Date: Tue, 17 Sep 2019 09:35:04 +0200 Subject: [PATCH 1/8] Harvester / Simple URL A simple harvester which takes a URL expecting for now a JSON document and loop over document identified by a JSONPointer and applying an XSL to convert to ISO format. This should allow GeoNetwork to harvest some of the opendata portal providing all various search API providing JSON response usually. --- .../main/java/org/fao/geonet/utils/Xml.java | 18 +- .../harvest/harvester/simpleUrl/Aligner.java | 283 ++++++++++++++++++ .../harvester/simpleUrl/Harvester.java | 222 ++++++++++++++ .../simpleUrl/SimpleUrlHarvester.java | 124 ++++++++ .../harvester/simpleUrl/SimpleUrlParams.java | 93 ++++++ .../resources/config-spring-geonetwork.xml | 5 +- .../resources/catalog/locales/en-admin.json | 3 +- .../admin/harvest/type/simpleurl.html | 98 ++++++ .../templates/admin/harvest/type/simpleurl.js | 105 +++++++ .../import/CKAN-to-ISO19115-3-2018.xsl | 5 + .../webapp/xsl/xml/harvesting/simpleurl.xsl | 31 ++ 11 files changed, 983 insertions(+), 4 deletions(-) create mode 100644 harvesters/src/main/java/org/fao/geonet/kernel/harvest/harvester/simpleUrl/Aligner.java create mode 100644 harvesters/src/main/java/org/fao/geonet/kernel/harvest/harvester/simpleUrl/Harvester.java create mode 100644 harvesters/src/main/java/org/fao/geonet/kernel/harvest/harvester/simpleUrl/SimpleUrlHarvester.java create mode 100644 harvesters/src/main/java/org/fao/geonet/kernel/harvest/harvester/simpleUrl/SimpleUrlParams.java create mode 100644 web-ui/src/main/resources/catalog/templates/admin/harvest/type/simpleurl.html create mode 100644 web-ui/src/main/resources/catalog/templates/admin/harvest/type/simpleurl.js create mode 100644 web/src/main/webapp/xsl/conversion/import/CKAN-to-ISO19115-3-2018.xsl create mode 100644 web/src/main/webapp/xsl/xml/harvesting/simpleurl.xsl diff --git a/common/src/main/java/org/fao/geonet/utils/Xml.java b/common/src/main/java/org/fao/geonet/utils/Xml.java index d873e36f08e..b60f24f139b 100644 --- a/common/src/main/java/org/fao/geonet/utils/Xml.java +++ b/common/src/main/java/org/fao/geonet/utils/Xml.java @@ -116,7 +116,23 @@ public final class Xml { public static final Namespace xsiNS = Namespace.getNamespace("xsi", XMLConstants.W3C_XML_SCHEMA_INSTANCE_NS_URI); public static final NioPathAwareEntityResolver PATH_RESOLVER = new NioPathAwareEntityResolver(); - //-------------------------------------------------------------------------- + public static String stripNonValidXMLCharacters(String in) { + StringBuffer out = new StringBuffer(); + char current; + + if (in == null || ("".equals(in))) return ""; + for (int i = 0; i < in.length(); i++) { + current = in.charAt(i); + if ((current == 0x9) || + (current == 0xA) || + (current == 0xD) || + ((current >= 0x20) && (current <= 0xD7FF)) || + ((current >= 0xE000) && (current <= 0xFFFD)) || + ((current >= 0x10000) && (current <= 0x10FFFF))) + out.append(current); + } + return out.toString(); + } /** * diff --git a/harvesters/src/main/java/org/fao/geonet/kernel/harvest/harvester/simpleUrl/Aligner.java b/harvesters/src/main/java/org/fao/geonet/kernel/harvest/harvester/simpleUrl/Aligner.java new file mode 100644 index 00000000000..820b5d6c86f --- /dev/null +++ b/harvesters/src/main/java/org/fao/geonet/kernel/harvest/harvester/simpleUrl/Aligner.java @@ -0,0 +1,283 @@ +//============================================================================= +//=== Copyright (C) 2001-2007 Food and Agriculture Organization of the +//=== United Nations (FAO-UN), United Nations World Food Programme (WFP) +//=== and United Nations Environment Programme (UNEP) +//=== +//=== This program is free software; you can redistribute it and/or modify +//=== it under the terms of the GNU General Public License as published by +//=== the Free Software Foundation; either version 2 of the License, or (at +//=== your option) any later version. +//=== +//=== This program is distributed in the hope that it will be useful, but +//=== WITHOUT ANY WARRANTY; without even the implied warranty of +//=== MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +//=== General Public License for more details. +//=== +//=== You should have received a copy of the GNU General Public License +//=== along with this program; if not, write to the Free Software +//=== Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA +//=== +//=== Contact: Jeroen Ticheler - FAO - Viale delle Terme di Caracalla 2, +//=== Rome - Italy. email: geonetwork@osgeo.org +//============================================================================== + +package org.fao.geonet.kernel.harvest.harvester.simpleUrl; + +import jeeves.server.context.ServiceContext; +import org.fao.geonet.GeonetContext; +import org.fao.geonet.Logger; +import org.fao.geonet.constants.Geonet; +import org.fao.geonet.domain.AbstractMetadata; +import org.fao.geonet.domain.ISODate; +import org.fao.geonet.domain.Metadata; +import org.fao.geonet.domain.MetadataType; +import org.fao.geonet.exceptions.OperationAbortedEx; +import org.fao.geonet.kernel.DataManager; +import org.fao.geonet.kernel.UpdateDatestamp; +import org.fao.geonet.kernel.datamanager.IMetadataIndexer; +import org.fao.geonet.kernel.datamanager.IMetadataManager; +import org.fao.geonet.kernel.datamanager.IMetadataUtils; +import org.fao.geonet.kernel.harvest.BaseAligner; +import org.fao.geonet.kernel.harvest.harvester.CategoryMapper; +import org.fao.geonet.kernel.harvest.harvester.GroupMapper; +import org.fao.geonet.kernel.harvest.harvester.HarvestError; +import org.fao.geonet.kernel.harvest.harvester.HarvestResult; +import org.fao.geonet.kernel.harvest.harvester.UUIDMapper; +import org.fao.geonet.kernel.search.index.LuceneIndexLanguageTracker; +import org.fao.geonet.repository.OperationAllowedRepository; +import org.jdom.Element; + +import javax.transaction.Transactional; +import javax.transaction.Transactional.TxType; +import java.util.Collection; +import java.util.HashMap; +import java.util.Map; +import java.util.Set; +import java.util.UUID; +import java.util.concurrent.atomic.AtomicBoolean; + +public class Aligner extends BaseAligner { + + private ServiceContext context; + private DataManager dataMan; + private CategoryMapper localCateg; + private GroupMapper localGroups; + private UUIDMapper localUuids; + + private IMetadataUtils metadataUtils; + private IMetadataManager metadataManager; + private IMetadataIndexer metadataIndexer; + + private HarvestResult result; + private Map processParams = new HashMap(); + private Logger log; + + public Aligner(AtomicBoolean cancelMonitor, ServiceContext sc, SimpleUrlParams params, Logger log) throws OperationAbortedEx { + super(cancelMonitor); + this.context = sc; + this.params = params; + this.log = log; + + GeonetContext gc = (GeonetContext) context.getHandlerContext(Geonet.CONTEXT_NAME); + dataMan = gc.getBean(DataManager.class); + metadataUtils = gc.getBean(IMetadataUtils.class); + metadataManager = gc.getBean(IMetadataManager.class); + metadataIndexer = gc.getBean(IMetadataIndexer.class); + result = new HarvestResult(); + result.unretrievable = 0; + result.uuidSkipped = 0; + result.couldNotInsert = 0; + } + + public HarvestResult align(Map records, Collection errors) throws Exception { + if (cancelMonitor.get()) { + return result; + } + + log.debug("Start of alignment for : " + params.getName()); + + localCateg = new CategoryMapper(context); + localGroups = new GroupMapper(context); + localUuids = new UUIDMapper(context.getBean(IMetadataUtils.class), params.getUuid()); + + insertOrUpdate(records, errors); + log.debug("End of alignment for : " + params.getName()); + + return result; + } + + private void insertOrUpdate(Map records, Collection errors) { + records.entrySet().forEach(e -> { + if (cancelMonitor.get()) { + return; + } + + try { + String id = metadataUtils.getMetadataId(e.getKey()); + + if (id == null) { + //record doesn't exist (so it doesn't belong to this harvester) + log.debug("Adding record with uuid " + e.getKey()); + addMetadata(e, null); + } else if (localUuids.getID(e.getKey()) == null) { + //Record does not belong to this harvester + result.datasetUuidExist++; + + switch (params.getOverrideUuid()) { + case OVERRIDE: + updateMetadata(e, Integer.toString(metadataUtils.findOneByUuid(e.getKey()).getId()), true); + log.debug("Overriding record with uuid " + e.getKey()); + result.updatedMetadata++; + break; + case RANDOM: + log.debug("Generating random uuid for remote record with uuid " + e.getKey()); + addMetadata(e, UUID.randomUUID().toString()); + break; + case SKIP: + log.debug("Skipping record with uuid " + e.getKey()); + result.uuidSkipped++; + default: + break; + } + } else { + //record exists and belongs to this harvester + updateMetadata(e, id, false); + } + context.getBean(LuceneIndexLanguageTracker.class).commit(); + result.totalMetadata++; + } catch (Throwable t) { + errors.add(new HarvestError(this.context, t)); + log.error("Unable to process record from csw (" + this.params.getName() + ")"); + log.error(" Record failed: " + e.getKey() + ". Error is: " + t.getMessage()); + log.error(t); + } finally { + result.originalMetadata++; + } + }); + } + + /** + * Remove records no longer on the remote CSW server + * + * @param records + * @throws Exception + */ + @Transactional(value = TxType.REQUIRES_NEW) + public HarvestResult cleanupRemovedRecords(Set records) throws Exception { + + if (cancelMonitor.get()) { + return result; + } + + for (String uuid : localUuids.getUUIDs()) { + if (!records.contains(uuid)) { + String id = localUuids.getID(uuid); + log.debug(" - Removing old metadata with local id:" + id); + metadataManager.deleteMetadata(context, id); + result.locallyRemoved++; + } + } + dataMan.forceIndexChanges(); + + return result; + } + + + private void addMetadata(Map.Entry record, String overrideUuidValue) throws Exception { + if (cancelMonitor.get()) { + return; + } + + Element xml = record.getValue(); + if (xml == null) { + result.unretrievable++; + return; + } + + String schema = dataMan.autodetectSchema(xml, null); + if (schema == null) { + log.debug(" - Metadata skipped due to unknown schema. uuid:" + record.getKey()); + result.unknownSchema++; + return; + } + + String uuid = record.getKey(); + if (overrideUuidValue != null) { + log.debug(String.format(" - Overriding UUID %s by %s", record.getKey(), overrideUuidValue)); + uuid = overrideUuidValue; + xml = dataMan.setUUID(schema, uuid, record.getValue()); + } + + + log.debug(" - Adding metadata with uuid:" + uuid + " schema:" + schema); + + final String dateModified = dataMan.extractDateModified(schema, xml); + + AbstractMetadata metadata = new Metadata(); + metadata.setUuid(uuid); + Integer ownerId = getOwner(); + metadata.getDataInfo(). + setSchemaId(schema). + setRoot(xml.getQualifiedName()). + setType(MetadataType.METADATA). + setChangeDate(new ISODate(dateModified)). + setCreateDate(new ISODate(dateModified)); + metadata.getSourceInfo(). + setSourceId(params.getUuid()). + setOwner(ownerId). + setGroupOwner(getGroupOwner()); + metadata.getHarvestInfo(). + setHarvested(true). + setUuid(params.getUuid()); + + metadata.getSourceInfo().setGroupOwner(getGroupOwner()); + + addCategories(metadata, params.getCategories(), localCateg, context, null, false); + + metadata = metadataManager.insertMetadata(context, metadata, xml, true, false, false, UpdateDatestamp.NO, false, false); + + String id = String.valueOf(metadata.getId()); + + addPrivileges(id, params.getPrivileges(), localGroups, dataMan, context); + + metadataIndexer.indexMetadata(id, true, null); + result.addedMetadata++; + } + + + @Transactional(value = TxType.REQUIRES_NEW) + boolean updateMetadata(Map.Entry ri, String id, Boolean force) throws Exception { + Element md = ri.getValue(); + if (md == null) { + result.unchangedMetadata++; + return false; + } + + boolean validate = false; + boolean ufo = false; + boolean index = false; + String language = context.getLanguage(); + String schema = dataMan.autodetectSchema(md, null); + final String dateModified = dataMan.extractDateModified(schema, ri.getValue()); + + final AbstractMetadata metadata = metadataManager.updateMetadata(context, id, md, validate, ufo, index, language, dateModified, true); + + if (force) { + //change ownership of metadata to new harvester + metadata.getHarvestInfo().setUuid(params.getUuid()); + metadata.getSourceInfo().setSourceId(params.getUuid()); + + metadataManager.save((Metadata) metadata); + } + + OperationAllowedRepository repository = context.getBean(OperationAllowedRepository.class); + repository.deleteAllByMetadataId(Integer.parseInt(id)); + + addPrivileges(id, params.getPrivileges(), localGroups, dataMan, context); + + metadata.getCategories().clear(); + addCategories(metadata, params.getCategories(), localCateg, context, null, true); + result.updatedMetadata++; + return true; + } +} diff --git a/harvesters/src/main/java/org/fao/geonet/kernel/harvest/harvester/simpleUrl/Harvester.java b/harvesters/src/main/java/org/fao/geonet/kernel/harvest/harvester/simpleUrl/Harvester.java new file mode 100644 index 00000000000..4faf080611b --- /dev/null +++ b/harvesters/src/main/java/org/fao/geonet/kernel/harvest/harvester/simpleUrl/Harvester.java @@ -0,0 +1,222 @@ +//============================================================================= +//=== Copyright (C) 2001-2007 Food and Agriculture Organization of the +//=== United Nations (FAO-UN), United Nations World Food Programme (WFP) +//=== and United Nations Environment Programme (UNEP) +//=== +//=== This program is free software; you can redistribute it and/or modify +//=== it under the terms of the GNU General Public License as published by +//=== the Free Software Foundation; either version 2 of the License, or (at +//=== your option) any later version. +//=== +//=== This program is distributed in the hope that it will be useful, but +//=== WITHOUT ANY WARRANTY; without even the implied warranty of +//=== MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +//=== General Public License for more details. +//=== +//=== You should have received a copy of the GNU General Public License +//=== along with this program; if not, write to the Free Software +//=== Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA +//=== +//=== Contact: Jeroen Ticheler - FAO - Viale delle Terme di Caracalla 2, +//=== Rome - Italy. email: geonetwork@osgeo.org +//============================================================================== + +package org.fao.geonet.kernel.harvest.harvester.simpleUrl; + +import com.fasterxml.jackson.core.JsonProcessingException; +import com.fasterxml.jackson.databind.JsonNode; +import com.fasterxml.jackson.databind.ObjectMapper; +import com.google.common.io.CharStreams; +import jeeves.server.context.ServiceContext; +import org.apache.commons.io.IOUtils; +import org.apache.commons.lang.StringUtils; +import org.apache.http.client.methods.HttpGet; +import org.fao.geonet.Logger; +import org.fao.geonet.constants.Geonet; +import org.fao.geonet.exceptions.BadParameterEx; +import org.fao.geonet.kernel.harvest.harvester.HarvestError; +import org.fao.geonet.kernel.harvest.harvester.HarvestResult; +import org.fao.geonet.kernel.harvest.harvester.IHarvester; +import org.fao.geonet.lib.Lib; +import org.fao.geonet.utils.GeonetHttpRequestFactory; +import org.fao.geonet.utils.Log; +import org.fao.geonet.utils.Xml; +import org.jdom.Element; +import org.jdom.JDOMException; +import org.json.JSONException; +import org.json.JSONObject; +import org.json.XML; +import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.http.client.ClientHttpResponse; + +import java.io.IOException; +import java.io.InputStreamReader; +import java.net.URI; +import java.net.URISyntaxException; +import java.nio.file.Path; +import java.util.HashMap; +import java.util.LinkedList; +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.concurrent.atomic.AtomicBoolean; + +/** + * Harvest metadata from a JSON source. + *

+ * The JSON source can be a simple JSON file or + * an URL with indication on how to pass paging information. + */ +class Harvester implements IHarvester { + public static final String LOGGER_NAME = "geonetwork.harvester.json"; + + private final AtomicBoolean cancelMonitor; + private Logger log; + private SimpleUrlParams params; + private ServiceContext context; + + @Autowired + GeonetHttpRequestFactory requestFactory; + + /** + * Contains a list of accumulated errors during the executing of this harvest. + */ + private List errors = new LinkedList(); + + public Harvester(AtomicBoolean cancelMonitor, Logger log, ServiceContext context, SimpleUrlParams params) { + this.cancelMonitor = cancelMonitor; + this.log = log; + this.context = context; + this.params = params; + } + + public HarvestResult harvest(Logger log) throws Exception { + this.log = log; + log.debug("Retrieving simple URL: " + params.getName()); + + requestFactory = context.getBean(GeonetHttpRequestFactory.class); + + String jsonResponse = retrieveUrl(log); + if (cancelMonitor.get()) { + return new HarvestResult(); + } + log.debug("Response is: " + jsonResponse); + + // TODO: Add support for XML or JSON + int numberOfRecordsToHarvest = -1; + ObjectMapper objectMapper = new ObjectMapper(); + JsonNode jsonObj = objectMapper.readTree(jsonResponse); + + if (StringUtils.isNotEmpty(params.numberOfRecordPath)) { + try { + numberOfRecordsToHarvest = jsonObj.at(params.numberOfRecordPath).asInt(); + log.debug("Number of records to harvest: " + numberOfRecordsToHarvest); + } catch (Exception e) { + } + } + + JsonNode nodes; + Map uuids = new HashMap(); + + if (StringUtils.isNotEmpty(params.loopElement)) { + try { + nodes = jsonObj.at(params.loopElement); + log.debug("Number of records in response: " + nodes.size()); + + nodes.forEach(record -> { + Element xml = convertRecordToXml(record); + uuids.put(record.get("id").asText(), xml); + }); + } catch (Exception e) { + log.warning("Failed to collect record in response"); + } + } + + boolean error = false; + HarvestResult result = null; + try { + Aligner aligner = new Aligner(cancelMonitor, context, params, log); + aligner.align(uuids, errors); + result = aligner.cleanupRemovedRecords(uuids.keySet()); + } catch (Exception t) { + error = true; + log.error("Unknown error trying to harvest"); + log.error(t.getMessage()); + log.error(t); + errors.add(new HarvestError(context, t)); + } catch (Throwable t) { + error = true; + log.fatal("Something unknown and terrible happened while harvesting"); + log.fatal(t.getMessage()); + errors.add(new HarvestError(context, t)); + } + + log.info("Total records processed in all searches :" + uuids.size()); + if (error) { + log.warning("Due to previous errors the align process has not been called"); + } + + return result; + } + + private Element convertRecordToXml(JsonNode record) { + ObjectMapper objectMapper = new ObjectMapper(); + try { + String recordAsXml = XML.toString( + new JSONObject( + objectMapper.writeValueAsString(record)), "record"); + recordAsXml = Xml.stripNonValidXMLCharacters(recordAsXml); + Element recordAsElement = Xml.loadString(recordAsXml, false); + Path importXsl = context.getAppPath().resolve(Geonet.Path.IMPORT_STYLESHEETS); + final Path xslPath = importXsl.resolve(params.toISOConversion + ".xsl"); + return Xml.transform(recordAsElement, xslPath); + } catch (JSONException e) { + e.printStackTrace(); + } catch (JsonProcessingException e) { + e.printStackTrace(); + } catch (JDOMException e) { + e.printStackTrace(); + } catch (IOException e) { + e.printStackTrace(); + } catch (Exception e) { + e.printStackTrace(); + } + return null; + } + + /** + * Does CSW GetCapabilities request and check that operations needed for harvesting (ie. + * GetRecords and GetRecordById) are available in remote node. + * + * @return + */ + private String retrieveUrl(Logger log) throws Exception { + if (!Lib.net.isUrlValid(params.url)) + throw new BadParameterEx("Invalid URL", params.url); + HttpGet httpMethod = null; + ClientHttpResponse httpResponse = null; + + try { + httpMethod = new HttpGet(createUrl(params.url)); + httpResponse = requestFactory.execute(httpMethod); + int status = httpResponse.getRawStatusCode(); + Log.debug(LOGGER_NAME, "Request status code: " + status); + return CharStreams.toString(new InputStreamReader(httpResponse.getBody())); + } finally { + if (httpMethod != null) { + httpMethod.releaseConnection(); + } + IOUtils.closeQuietly(httpResponse); + } + } + + private URI createUrl(String jsonUrl) throws URISyntaxException { + // TODO: Add paging and loop + return new URI(jsonUrl); + } + + + public List getErrors() { + return errors; + } +} diff --git a/harvesters/src/main/java/org/fao/geonet/kernel/harvest/harvester/simpleUrl/SimpleUrlHarvester.java b/harvesters/src/main/java/org/fao/geonet/kernel/harvest/harvester/simpleUrl/SimpleUrlHarvester.java new file mode 100644 index 00000000000..8b95504c844 --- /dev/null +++ b/harvesters/src/main/java/org/fao/geonet/kernel/harvest/harvester/simpleUrl/SimpleUrlHarvester.java @@ -0,0 +1,124 @@ +//============================================================================= +//=== Copyright (C) 2001-2007 Food and Agriculture Organization of the +//=== United Nations (FAO-UN), United Nations World Food Programme (WFP) +//=== and United Nations Environment Programme (UNEP) +//=== +//=== This program is free software; you can redistribute it and/or modify +//=== it under the terms of the GNU General Public License as published by +//=== the Free Software Foundation; either version 2 of the License, or (at +//=== your option) any later version. +//=== +//=== This program is distributed in the hope that it will be useful, but +//=== WITHOUT ANY WARRANTY; without even the implied warranty of +//=== MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +//=== General Public License for more details. +//=== +//=== You should have received a copy of the GNU General Public License +//=== along with this program; if not, write to the Free Software +//=== Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA +//=== +//=== Contact: Jeroen Ticheler - FAO - Viale delle Terme di Caracalla 2, +//=== Rome - Italy. email: geonetwork@osgeo.org +//============================================================================== + +package org.fao.geonet.kernel.harvest.harvester.simpleUrl; + +import jeeves.server.context.ServiceContext; +import org.fao.geonet.Logger; +import org.fao.geonet.domain.Source; +import org.fao.geonet.domain.SourceType; +import org.fao.geonet.exceptions.BadInputEx; +import org.fao.geonet.kernel.harvest.harvester.AbstractHarvester; +import org.fao.geonet.kernel.harvest.harvester.AbstractParams; +import org.fao.geonet.kernel.harvest.harvester.HarvestResult; +import org.fao.geonet.repository.SourceRepository; +import org.fao.geonet.resources.Resources; +import org.jdom.Element; +import org.springframework.beans.factory.annotation.Autowired; + +import java.io.File; +import java.sql.SQLException; +import java.util.UUID; + +/** + * Harvest metadata from a JSON source. + */ +public class SimpleUrlHarvester extends AbstractHarvester { + + private SimpleUrlParams params; + + @Autowired + SourceRepository sourceRepository; + + protected void doInit(Element node, ServiceContext context) throws BadInputEx { + params = new SimpleUrlParams(dataMan); + super.setParams(params); + params.create(node); + } + + protected String doAdd(Element node) throws BadInputEx, SQLException { + params = new SimpleUrlParams(dataMan); + super.setParams(params); + + params.create(node); + params.setUuid(UUID.randomUUID().toString()); + + String id = harvesterSettingsManager.add("harvesting", "node", getType()); + + storeNode(params, "id:" + id); + + Source source = new Source(params.getUuid(), params.getName(), params.getTranslations(), SourceType.harvester); + sourceRepository.save(source); + + Resources.copyLogo(context, "images" + File.separator + "harvesting" + File.separator + params.icon, params.getUuid()); + + return id; + } + + protected void doUpdate(String id, Element node) throws BadInputEx, SQLException { + SimpleUrlParams copy = params.copy(); + super.setParams(params); + + copy.update(node); + + String path = "harvesting/id:" + id; + harvesterSettingsManager.removeChildren(path); + + storeNode(copy, path); + + Source source = new Source(copy.getUuid(), copy.getName(), copy.getTranslations(), SourceType.harvester); + sourceRepository.save(source); + + Resources.copyLogo(context, "images" + File.separator + "harvesting" + File.separator + copy.icon, copy.getUuid()); + + params = copy; + + super.setParams(params); + } + + /** + * Stores in the harvester settings table some values not managed by {@link AbstractHarvester} + * + * @param p the harvester parameters. + * @param path + * @param siteId + * @param optionsId + * @throws SQLException + */ + protected void storeNodeExtra(AbstractParams p, String path, String siteId, String optionsId) throws SQLException { + SimpleUrlParams params = (SimpleUrlParams) p; + + harvesterSettingsManager.add("id:" + siteId, "url", params.url); + harvesterSettingsManager.add("id:" + siteId, "icon", params.icon); + harvesterSettingsManager.add("id:" + siteId, "loopElement", params.loopElement); + harvesterSettingsManager.add("id:" + siteId, "numberOfRecordPath", params.numberOfRecordPath); + harvesterSettingsManager.add("id:" + siteId, "recordIdPath", params.recordIdPath); + harvesterSettingsManager.add("id:" + siteId, "pageSize", params.pageSize); + harvesterSettingsManager.add("id:" + siteId, "toISOConversion", params.toISOConversion); + } + + public void doHarvest(Logger log) throws Exception { + Harvester h = new Harvester(cancelMonitor, log, context, params); + result = h.harvest(log); + } +} diff --git a/harvesters/src/main/java/org/fao/geonet/kernel/harvest/harvester/simpleUrl/SimpleUrlParams.java b/harvesters/src/main/java/org/fao/geonet/kernel/harvest/harvester/simpleUrl/SimpleUrlParams.java new file mode 100644 index 00000000000..af41b3efb6c --- /dev/null +++ b/harvesters/src/main/java/org/fao/geonet/kernel/harvest/harvester/simpleUrl/SimpleUrlParams.java @@ -0,0 +1,93 @@ +//============================================================================= +//=== Copyright (C) 2001-2007 Food and Agriculture Organization of the +//=== United Nations (FAO-UN), United Nations World Food Programme (WFP) +//=== and United Nations Environment Programme (UNEP) +//=== +//=== This program is free software; you can redistribute it and/or modify +//=== it under the terms of the GNU General Public License as published by +//=== the Free Software Foundation; either version 2 of the License, or (at +//=== your option) any later version. +//=== +//=== This program is distributed in the hope that it will be useful, but +//=== WITHOUT ANY WARRANTY; without even the implied warranty of +//=== MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +//=== General Public License for more details. +//=== +//=== You should have received a copy of the GNU General Public License +//=== along with this program; if not, write to the Free Software +//=== Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA +//=== +//=== Contact: Jeroen Ticheler - FAO - Viale delle Terme di Caracalla 2, +//=== Rome - Italy. email: geonetwork@osgeo.org +//============================================================================== + +package org.fao.geonet.kernel.harvest.harvester.simpleUrl; + +import org.fao.geonet.Util; +import org.fao.geonet.exceptions.BadInputEx; +import org.fao.geonet.kernel.DataManager; +import org.fao.geonet.kernel.harvest.harvester.AbstractParams; +import org.jdom.Element; + +public class SimpleUrlParams extends AbstractParams { + public String url; + public String icon; + public String loopElement; + public String numberOfRecordPath; + public String recordIdPath; + int pageSize; + public String toISOConversion; + + public SimpleUrlParams(DataManager dm) { + super(dm); + } + + /** + * called when a new entry must be added. Reads values from the provided entry, providing + * default values. + */ + public void create(Element node) throws BadInputEx { + super.create(node); + + Element site = node.getChild("site"); + + url = Util.getParam(site, "url", "http://dados.gov.br/api/3/action/package_search?q="); + loopElement = Util.getParam(site, "loopElement", "/result/results"); + numberOfRecordPath = Util.getParam(site, "numberOfRecordPath", "/result/count"); + recordIdPath = Util.getParam(site, "recordIdPath", "id"); + pageSize = Util.getParamAsInt(site, "pageSize"); + toISOConversion = Util.getParam(site, "toISOConversion", "CKAN-to-ISO19115-3-2018"); + icon = Util.getParam(site, "icon", "default.gif"); + } + + /** + * called when an entry has changed and variables must be updated. + */ + public void update(Element node) throws BadInputEx { + super.update(node); + + Element site = node.getChild("site"); + + url = Util.getParam(site, "url", url); + loopElement = Util.getParam(site, "loopElement", ""); + numberOfRecordPath = Util.getParam(site, "numberOfRecordPath", ""); + recordIdPath = Util.getParam(site, "recordIdPath", ""); + pageSize = Util.getParamAsInt(site, "pageSize"); + toISOConversion = Util.getParam(site, "toISOConversion", ""); + icon = Util.getParam(site, "icon", icon); + } + + public SimpleUrlParams copy() { + SimpleUrlParams copy = new SimpleUrlParams(dm); + copyTo(copy); + + copy.url = url; + copy.icon = icon; + copy.loopElement = loopElement; + copy.numberOfRecordPath = numberOfRecordPath; + copy.recordIdPath = recordIdPath; + copy.toISOConversion = toISOConversion; + + return copy; + } +} diff --git a/harvesters/src/main/resources/config-spring-geonetwork.xml b/harvesters/src/main/resources/config-spring-geonetwork.xml index d9236149105..cf4e68eb563 100644 --- a/harvesters/src/main/resources/config-spring-geonetwork.xml +++ b/harvesters/src/main/resources/config-spring-geonetwork.xml @@ -57,6 +57,7 @@ - - + diff --git a/web-ui/src/main/resources/catalog/locales/en-admin.json b/web-ui/src/main/resources/catalog/locales/en-admin.json index 6300de2bd3d..360d36bc9bb 100644 --- a/web-ui/src/main/resources/catalog/locales/en-admin.json +++ b/web-ui/src/main/resources/catalog/locales/en-admin.json @@ -287,7 +287,7 @@ "cron-0 15 10 15 * ?": "Fire at 10:15am on the 15th day of every month", "cron-0 15 10 ? * MON-FRI": "Fire at 10:15am every Monday, Tuesday, Wednesday, Thursday and Friday", "csvExport": "Export as CSV", - "csw": "CSW", + "simpleurl": "Simple URL", "csw-FailedToParseCapabilities": "Error while parsing GetCapabilities", "csw-capabilitiesUrlHelp": "CSW URL with or without GetCapabilities parameters", "csw-category": "Category for harvested records", @@ -423,6 +423,7 @@ "harvesterReport": "Report", "harvester-arcsde": "ArcSDE", "harvester-arcsdeHelp": "Harvest metadata records from an ArcSDE database.", + "harvester-simpleurl": "Simple URL", "harvester-csw": "OGC CSW 2.0.2", "harvester-cswHelp": "Harvest from a CSW server", "harvester-filesystem": "Directory", diff --git a/web-ui/src/main/resources/catalog/templates/admin/harvest/type/simpleurl.html b/web-ui/src/main/resources/catalog/templates/admin/harvest/type/simpleurl.html new file mode 100644 index 00000000000..cfea381ce67 --- /dev/null +++ b/web-ui/src/main/resources/catalog/templates/admin/harvest/type/simpleurl.html @@ -0,0 +1,98 @@ +

+ + +
+
+
+
+ + +
+ harvesterMainConfigurationFor {{('harvester-' + + harvesterSelected['@type']) | translate}} + +
+ + +

simpleurl-urlHelp

+
+
+ + +
+ harvesterAdvancedConfigurationFor + {{harvesterSelected['@type'] | translate}} + + +
+ +
+ + +

simpleurl-loopElementHelp

+
+ +
+ + +

simpleurl-numberOfRecordPathHelp

+
+ +
+ + +

simpleurl-recordIdPathHelp

+
+ +
+ + +

simpleurl-pageSizeHelp

+
+ +
+ + +

simpleurl-toISOConversionHelp

+
+ + +
+ + +
+ +
+

harvesterValidateHelp

+
+
+ +
+ diff --git a/web-ui/src/main/resources/catalog/templates/admin/harvest/type/simpleurl.js b/web-ui/src/main/resources/catalog/templates/admin/harvest/type/simpleurl.js new file mode 100644 index 00000000000..90e08923d83 --- /dev/null +++ b/web-ui/src/main/resources/catalog/templates/admin/harvest/type/simpleurl.js @@ -0,0 +1,105 @@ +// This is not that much elegant and should be replaced by some kind +// of Angular module. +var gnHarvestersimpleurl = { + createNew : function() { + return { + "@id" : "", + "@type" : "simpleurl", + "owner" : [], + "ownerGroup" : [], + "ownerUser": [""], + "site" : { + "name" : "", + "uuid" : "", + "icon" : "blank.png", + "account" : { + "use" : false, + "username" : [], + "password" : [] + }, + "url" : "http://", + "loopElement" : "", + "numberOfRecordPath": "", + "recordIdPath": "", + "pageSize": 200, + "toISOConversion": "" + }, + "content" : { + "validate" : "NOVALIDATION" + }, + "options" : { + "every" : "0 0 0 ? * *", + "oneRunOnly" : false, + "overrideUuid": "SKIP", + "status" : "active" + }, + "privileges" : [ { + "@id" : "1", + "operation" : [ { + "@name" : "view" + }, { + "@name" : "dynamic" + } ] + } ], + "categories" : [], + "info" : { + "lastRun" : [], + "running" : false + } + }; + }, + buildResponseCSWSearch : function($scope) { + var body = ''; + if ($scope.harvesterSelected.searches) { + for(var tag in $scope.harvesterSelected.searches[0]) { + if($scope.harvesterSelected.searches[0].hasOwnProperty(tag)) { + var value = $scope.harvesterSelected.searches[0][tag].value; + // Save all values even if empty + // XML to JSON does not convert single child to Object but Array + // In that situation, saving only one parameter will make this + // happen and then search criteria name which is the tag name + // will be lost. + // if (value) { + body += '<' + tag + '>' + value + ''; + // } + } + } + } + return '' + body + ''; + }, + buildResponse : function(h, $scope) { + var body = '' + + ' ' + h.ownerGroup[0] + '' + + ' ' + h.ownerUser[0] + '' + + ' ' + + ' ' + h.site.name + '' + + ' ' + h.site.rejectDuplicateResource + '' + + ' ' + h.site.url.replace(/&/g, '&') + '' + + ' ' + h.site.icon + '' + + ' ' + + ' ' + h.site.account.use + '' + + ' ' + h.site.account.username + '' + + ' ' + h.site.account.password + '' + + ' ' + + ' ' + h.site.loopElement + '' + + ' ' + h.site.numberOfRecordPath + '' + + ' ' + h.site.recordIdPath + '' + + ' ' + h.site.pageSize + '' + + ' ' + h.site.toISOConversion + '' + + ' ' + + gnHarvestercsw.buildResponseCSWSearch($scope) + + ' ' + + ' ' + h.options.oneRunOnly + '' + + ' ' + h.options.overrideUuid + '' + + ' ' + h.options.every + '' + + ' ' + h.options.status + '' + + ' ' + + ' ' + + ' ' + h.content.validate + '' + + ' ' + + $scope.buildResponseGroup(h) + + $scope.buildResponseCategory(h) + ''; + return body; + } +}; diff --git a/web/src/main/webapp/xsl/conversion/import/CKAN-to-ISO19115-3-2018.xsl b/web/src/main/webapp/xsl/conversion/import/CKAN-to-ISO19115-3-2018.xsl new file mode 100644 index 00000000000..0894bb00e99 --- /dev/null +++ b/web/src/main/webapp/xsl/conversion/import/CKAN-to-ISO19115-3-2018.xsl @@ -0,0 +1,5 @@ + + + + diff --git a/web/src/main/webapp/xsl/xml/harvesting/simpleurl.xsl b/web/src/main/webapp/xsl/xml/harvesting/simpleurl.xsl new file mode 100644 index 00000000000..3b8a2e45f96 --- /dev/null +++ b/web/src/main/webapp/xsl/xml/harvesting/simpleurl.xsl @@ -0,0 +1,31 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + From ed05387836a0049c18593befad585ac3a36755d4 Mon Sep 17 00:00:00 2001 From: Francois Prunayre Date: Tue, 17 Sep 2019 15:10:53 +0200 Subject: [PATCH 2/8] Harvester / Simple URL / Paging and basic opendatasoft support. --- .../harvester/simpleUrl/Harvester.java | 112 +++++++++++++----- .../simpleUrl/SimpleUrlHarvester.java | 3 +- .../harvester/simpleUrl/SimpleUrlParams.java | 11 +- .../harvester/simpleUrl/HarvesterTest.java | 58 +++++++++ .../admin/harvest/type/simpleurl.html | 22 ++-- .../templates/admin/harvest/type/simpleurl.js | 6 +- .../OPENDATASOFT-to-ISO19115-3-2018.xsl | 5 + .../webapp/xsl/xml/harvesting/simpleurl.xsl | 9 +- 8 files changed, 183 insertions(+), 43 deletions(-) create mode 100644 harvesters/src/test/java/org/fao/geonet/kernel/harvest/harvester/simpleUrl/HarvesterTest.java create mode 100644 web/src/main/webapp/xsl/conversion/import/OPENDATASOFT-to-ISO19115-3-2018.xsl diff --git a/harvesters/src/main/java/org/fao/geonet/kernel/harvest/harvester/simpleUrl/Harvester.java b/harvesters/src/main/java/org/fao/geonet/kernel/harvest/harvester/simpleUrl/Harvester.java index 4faf080611b..b0e6bc3fda2 100644 --- a/harvesters/src/main/java/org/fao/geonet/kernel/harvest/harvester/simpleUrl/Harvester.java +++ b/harvesters/src/main/java/org/fao/geonet/kernel/harvest/harvester/simpleUrl/Harvester.java @@ -26,6 +26,7 @@ import com.fasterxml.jackson.core.JsonProcessingException; import com.fasterxml.jackson.databind.JsonNode; import com.fasterxml.jackson.databind.ObjectMapper; +import com.google.common.annotations.VisibleForTesting; import com.google.common.io.CharStreams; import jeeves.server.context.ServiceContext; import org.apache.commons.io.IOUtils; @@ -54,11 +55,11 @@ import java.net.URI; import java.net.URISyntaxException; import java.nio.file.Path; +import java.util.ArrayList; import java.util.HashMap; import java.util.LinkedList; import java.util.List; import java.util.Map; -import java.util.Set; import java.util.concurrent.atomic.AtomicBoolean; /** @@ -66,6 +67,8 @@ *

* The JSON source can be a simple JSON file or * an URL with indication on how to pass paging information. + * + * This harvester has been tested with CKAN search API. */ class Harvester implements IHarvester { public static final String LOGGER_NAME = "geonetwork.harvester.json"; @@ -96,7 +99,7 @@ public HarvestResult harvest(Logger log) throws Exception { requestFactory = context.getBean(GeonetHttpRequestFactory.class); - String jsonResponse = retrieveUrl(log); + String jsonResponse = retrieveUrl(params.url, log); if (cancelMonitor.get()) { return new HarvestResult(); } @@ -114,30 +117,36 @@ public HarvestResult harvest(Logger log) throws Exception { } catch (Exception e) { } } - - JsonNode nodes; - Map uuids = new HashMap(); - - if (StringUtils.isNotEmpty(params.loopElement)) { - try { - nodes = jsonObj.at(params.loopElement); - log.debug("Number of records in response: " + nodes.size()); - - nodes.forEach(record -> { - Element xml = convertRecordToXml(record); - uuids.put(record.get("id").asText(), xml); - }); - } catch (Exception e) { - log.warning("Failed to collect record in response"); - } - } - boolean error = false; HarvestResult result = null; + Map allUuids = new HashMap(); try { Aligner aligner = new Aligner(cancelMonitor, context, params, log); - aligner.align(uuids, errors); - result = aligner.cleanupRemovedRecords(uuids.keySet()); + List listOfUrlForPages = buildListOfUrl(params, numberOfRecordsToHarvest); + for (int i = 0; i < listOfUrlForPages.size(); i ++) { + if (i != 0) { + jsonResponse = retrieveUrl(listOfUrlForPages.get(i), log); + jsonObj = objectMapper.readTree(jsonResponse); + } + Map uuids = new HashMap(); + JsonNode nodes; + if (StringUtils.isNotEmpty(params.loopElement)) { + try { + nodes = jsonObj.at(params.loopElement); + log.debug("Number of records in response: " + nodes.size()); + + nodes.forEach(record -> { + Element xml = convertRecordToXml(record); + uuids.put(record.get(params.recordIdPath).asText(), xml); + }); + aligner.align(uuids, errors); + allUuids.putAll(uuids); + } catch (Exception e) { + log.warning("Failed to collect record in response"); + } + } + } + result = aligner.cleanupRemovedRecords(allUuids.keySet()); } catch (Exception t) { error = true; log.error("Unknown error trying to harvest"); @@ -151,7 +160,7 @@ public HarvestResult harvest(Logger log) throws Exception { errors.add(new HarvestError(context, t)); } - log.info("Total records processed in all searches :" + uuids.size()); + log.info("Total records processed in all searches :" + allUuids.size()); if (error) { log.warning("Due to previous errors the align process has not been called"); } @@ -159,6 +168,55 @@ public HarvestResult harvest(Logger log) throws Exception { return result; } + @VisibleForTesting + protected List buildListOfUrl(SimpleUrlParams params, int numberOfRecordsToHarvest) { + List urlList = new ArrayList(); + if (StringUtils.isEmpty(params.pageSizeParam)) { + urlList.add(params.url); + return urlList; + } + + int numberOfRecordsPerPage = -1; + final String pageSizeParamValue = params.url.replaceAll(".*[?&]" + params.pageSizeParam + "=([0-9]+).*", "$1"); + if (StringUtils.isNumeric(pageSizeParamValue)) { + numberOfRecordsPerPage = Integer.parseInt(pageSizeParamValue); + } else { + log.warning(String.format( + "Page size param '%s' not found or is not a numeric in URL '%s'. Can't build a list of pages.", + params.pageSizeParam, params.url)); + urlList.add(params.url); + return urlList; + } + + final String pageFromParamValue = params.url.replaceAll(".*[?&]" + params.pageFromParam + "=([0-9]+).*", "$1"); + boolean startAtZero = false; + if (StringUtils.isNumeric(pageFromParamValue)) { + startAtZero = Integer.parseInt(pageFromParamValue) == 0; + } else { + log.warning(String.format( + "Page from param '%s' not found or is not a numeric in URL '%s'. Can't build a list of pages.", + params.pageFromParam, params.url)); + urlList.add(params.url); + return urlList; + } + + + int numberOfPages = (int) Math.abs((numberOfRecordsToHarvest + (startAtZero ? -1 : 0)) / numberOfRecordsPerPage) + 1; + + for (int i = 0; i < numberOfPages; i++) { + int from = i * numberOfRecordsPerPage + (startAtZero ? 0 : 1); + int size = i == numberOfPages - 1 ? // Last page + numberOfRecordsToHarvest - from + (startAtZero ? 0 : 1) : + numberOfRecordsPerPage; + String url = params.url + .replaceAll(params.pageFromParam + "=[0-9]+", params.pageFromParam + "=" + from) + .replaceAll(params.pageSizeParam + "=[0-9]+", params.pageSizeParam + "=" + size); + urlList.add(url); + } + + return urlList; + } + private Element convertRecordToXml(JsonNode record) { ObjectMapper objectMapper = new ObjectMapper(); try { @@ -190,14 +248,14 @@ private Element convertRecordToXml(JsonNode record) { * * @return */ - private String retrieveUrl(Logger log) throws Exception { - if (!Lib.net.isUrlValid(params.url)) - throw new BadParameterEx("Invalid URL", params.url); + private String retrieveUrl(String url, Logger log) throws Exception { + if (!Lib.net.isUrlValid(url)) + throw new BadParameterEx("Invalid URL", url); HttpGet httpMethod = null; ClientHttpResponse httpResponse = null; try { - httpMethod = new HttpGet(createUrl(params.url)); + httpMethod = new HttpGet(createUrl(url)); httpResponse = requestFactory.execute(httpMethod); int status = httpResponse.getRawStatusCode(); Log.debug(LOGGER_NAME, "Request status code: " + status); diff --git a/harvesters/src/main/java/org/fao/geonet/kernel/harvest/harvester/simpleUrl/SimpleUrlHarvester.java b/harvesters/src/main/java/org/fao/geonet/kernel/harvest/harvester/simpleUrl/SimpleUrlHarvester.java index 8b95504c844..9d8f77423d0 100644 --- a/harvesters/src/main/java/org/fao/geonet/kernel/harvest/harvester/simpleUrl/SimpleUrlHarvester.java +++ b/harvesters/src/main/java/org/fao/geonet/kernel/harvest/harvester/simpleUrl/SimpleUrlHarvester.java @@ -113,7 +113,8 @@ protected void storeNodeExtra(AbstractParams p, String path, String siteId, Stri harvesterSettingsManager.add("id:" + siteId, "loopElement", params.loopElement); harvesterSettingsManager.add("id:" + siteId, "numberOfRecordPath", params.numberOfRecordPath); harvesterSettingsManager.add("id:" + siteId, "recordIdPath", params.recordIdPath); - harvesterSettingsManager.add("id:" + siteId, "pageSize", params.pageSize); + harvesterSettingsManager.add("id:" + siteId, "pageFromParam", params.pageFromParam); + harvesterSettingsManager.add("id:" + siteId, "pageSizeParam", params.pageSizeParam); harvesterSettingsManager.add("id:" + siteId, "toISOConversion", params.toISOConversion); } diff --git a/harvesters/src/main/java/org/fao/geonet/kernel/harvest/harvester/simpleUrl/SimpleUrlParams.java b/harvesters/src/main/java/org/fao/geonet/kernel/harvest/harvester/simpleUrl/SimpleUrlParams.java index af41b3efb6c..98ca083c8ba 100644 --- a/harvesters/src/main/java/org/fao/geonet/kernel/harvest/harvester/simpleUrl/SimpleUrlParams.java +++ b/harvesters/src/main/java/org/fao/geonet/kernel/harvest/harvester/simpleUrl/SimpleUrlParams.java @@ -35,7 +35,8 @@ public class SimpleUrlParams extends AbstractParams { public String loopElement; public String numberOfRecordPath; public String recordIdPath; - int pageSize; + public String pageSizeParam; + public String pageFromParam; public String toISOConversion; public SimpleUrlParams(DataManager dm) { @@ -55,7 +56,8 @@ public void create(Element node) throws BadInputEx { loopElement = Util.getParam(site, "loopElement", "/result/results"); numberOfRecordPath = Util.getParam(site, "numberOfRecordPath", "/result/count"); recordIdPath = Util.getParam(site, "recordIdPath", "id"); - pageSize = Util.getParamAsInt(site, "pageSize"); + pageSizeParam = Util.getParam(site, "pageSizeParam", "rows"); + pageFromParam = Util.getParam(site, "pageFromParam", "start"); toISOConversion = Util.getParam(site, "toISOConversion", "CKAN-to-ISO19115-3-2018"); icon = Util.getParam(site, "icon", "default.gif"); } @@ -72,7 +74,8 @@ public void update(Element node) throws BadInputEx { loopElement = Util.getParam(site, "loopElement", ""); numberOfRecordPath = Util.getParam(site, "numberOfRecordPath", ""); recordIdPath = Util.getParam(site, "recordIdPath", ""); - pageSize = Util.getParamAsInt(site, "pageSize"); + pageSizeParam = Util.getParam(site, "pageSizeParam", ""); + pageFromParam = Util.getParam(site, "pageFromParam", ""); toISOConversion = Util.getParam(site, "toISOConversion", ""); icon = Util.getParam(site, "icon", icon); } @@ -85,6 +88,8 @@ public SimpleUrlParams copy() { copy.icon = icon; copy.loopElement = loopElement; copy.numberOfRecordPath = numberOfRecordPath; + copy.pageSizeParam = pageSizeParam; + copy.pageFromParam = pageFromParam; copy.recordIdPath = recordIdPath; copy.toISOConversion = toISOConversion; diff --git a/harvesters/src/test/java/org/fao/geonet/kernel/harvest/harvester/simpleUrl/HarvesterTest.java b/harvesters/src/test/java/org/fao/geonet/kernel/harvest/harvester/simpleUrl/HarvesterTest.java new file mode 100644 index 00000000000..c405bcba438 --- /dev/null +++ b/harvesters/src/test/java/org/fao/geonet/kernel/harvest/harvester/simpleUrl/HarvesterTest.java @@ -0,0 +1,58 @@ +package org.fao.geonet.kernel.harvest.harvester.simpleUrl; + +import org.fao.geonet.utils.Log; +import org.junit.Test; + +import java.util.List; + +import static org.junit.Assert.*; + +public class HarvesterTest { + + @Test + public void test_buildPagesUrl() { + final SimpleUrlParams params = new SimpleUrlParams(null); + params.url = "http://dados.gov.br/api/3/action/package_search?q=&rows=10&start=1"; + params.pageFromParam = "start"; + params.pageSizeParam = "rows"; + + int numberOfResult = 21; + + final Harvester harvester = new Harvester(null, Log.createLogger("TEST"), null, params); + List list = harvester.buildListOfUrl(params, numberOfResult); + assertEquals(3, list.size()); + assertEquals("http://dados.gov.br/api/3/action/package_search?q=&rows=10&start=1", list.get(0)); + assertEquals("http://dados.gov.br/api/3/action/package_search?q=&rows=10&start=11", list.get(1)); + assertEquals("http://dados.gov.br/api/3/action/package_search?q=&rows=1&start=21", list.get(2)); + + + + params.url = "http://dados.gov.br/api/3/action/package_search?q=&rows=10&start=0"; + list = harvester.buildListOfUrl(params, numberOfResult); + assertEquals(3, list.size()); + assertEquals("http://dados.gov.br/api/3/action/package_search?q=&rows=10&start=0", list.get(0)); + assertEquals("http://dados.gov.br/api/3/action/package_search?q=&rows=10&start=10", list.get(1)); + assertEquals("http://dados.gov.br/api/3/action/package_search?q=&rows=1&start=20", list.get(2)); + + params.url = "http://dados.gov.br/api/3/action/package_search?q=&rows=DADA&start=1"; + list = harvester.buildListOfUrl(params, numberOfResult); + assertEquals(1, list.size()); + + params.url = "http://dados.gov.br/api/3/action/package_search?q=&rows=11&start=DADA"; + list = harvester.buildListOfUrl(params, numberOfResult); + assertEquals(1, list.size()); + + params.url = "http://dados.gov.br/api/3/action/package_search?q=&&start=1"; + list = harvester.buildListOfUrl(params, numberOfResult); + assertEquals(1, list.size()); + + params.url = "http://dados.gov.br/api/3/action/package_search?q=&rows=11&"; + list = harvester.buildListOfUrl(params, numberOfResult); + assertEquals(1, list.size()); + + + params.url = "http://dados.gov.br/api/3/action/package_search?q=&rows=2&start=0"; + list = harvester.buildListOfUrl(params, 8); + assertEquals(4, list.size()); + } +} diff --git a/web-ui/src/main/resources/catalog/templates/admin/harvest/type/simpleurl.html b/web-ui/src/main/resources/catalog/templates/admin/harvest/type/simpleurl.html index cfea381ce67..4581d82ec12 100644 --- a/web-ui/src/main/resources/catalog/templates/admin/harvest/type/simpleurl.html +++ b/web-ui/src/main/resources/catalog/templates/admin/harvest/type/simpleurl.html @@ -59,14 +59,22 @@

simpleurl-recordIdPathHelp

-
- - + + +

simpleurl-pageFromParamHelp

+
+ +
+ + -

simpleurl-pageSizeHelp

+ data-ng-model="harvesterSelected.site.pageSizeParam"/> +

simpleurl-pageSizeParamHelp

' + h.site.loopElement + '' + ' ' + h.site.numberOfRecordPath + '' + ' ' + h.site.recordIdPath + '' - + ' ' + h.site.pageSize + '' + + ' ' + h.site.pageFromParam + '' + + ' ' + h.site.pageSizeParam + '' + ' ' + h.site.toISOConversion + '' + ' ' + gnHarvestercsw.buildResponseCSWSearch($scope) diff --git a/web/src/main/webapp/xsl/conversion/import/OPENDATASOFT-to-ISO19115-3-2018.xsl b/web/src/main/webapp/xsl/conversion/import/OPENDATASOFT-to-ISO19115-3-2018.xsl new file mode 100644 index 00000000000..40d2dcc6a97 --- /dev/null +++ b/web/src/main/webapp/xsl/conversion/import/OPENDATASOFT-to-ISO19115-3-2018.xsl @@ -0,0 +1,5 @@ + + + + diff --git a/web/src/main/webapp/xsl/xml/harvesting/simpleurl.xsl b/web/src/main/webapp/xsl/xml/harvesting/simpleurl.xsl index 3b8a2e45f96..d786c7ece7b 100644 --- a/web/src/main/webapp/xsl/xml/harvesting/simpleurl.xsl +++ b/web/src/main/webapp/xsl/xml/harvesting/simpleurl.xsl @@ -19,12 +19,15 @@ + + + + + + - - - From f760ed1c256af562250cd788bc07c3b5935eca14 Mon Sep 17 00:00:00 2001 From: Florent gravin Date: Mon, 6 Sep 2021 14:09:49 +0200 Subject: [PATCH 3/8] Json harvester: fix merge conflicts --- .../harvest/harvester/simpleUrl/Aligner.java | 15 +++-- .../simpleUrl/SimpleUrlHarvester.java | 60 +++---------------- .../harvester/simpleUrl/SimpleUrlParams.java | 5 ++ .../resources/catalog/locales/en-admin.json | 3 - .../templates/admin/harvest/type/simpleurl.js | 2 +- 5 files changed, 22 insertions(+), 63 deletions(-) diff --git a/harvesters/src/main/java/org/fao/geonet/kernel/harvest/harvester/simpleUrl/Aligner.java b/harvesters/src/main/java/org/fao/geonet/kernel/harvest/harvester/simpleUrl/Aligner.java index 820b5d6c86f..3bce32bcd1b 100644 --- a/harvesters/src/main/java/org/fao/geonet/kernel/harvest/harvester/simpleUrl/Aligner.java +++ b/harvesters/src/main/java/org/fao/geonet/kernel/harvest/harvester/simpleUrl/Aligner.java @@ -43,7 +43,6 @@ import org.fao.geonet.kernel.harvest.harvester.HarvestError; import org.fao.geonet.kernel.harvest.harvester.HarvestResult; import org.fao.geonet.kernel.harvest.harvester.UUIDMapper; -import org.fao.geonet.kernel.search.index.LuceneIndexLanguageTracker; import org.fao.geonet.repository.OperationAllowedRepository; import org.jdom.Element; @@ -142,8 +141,12 @@ private void insertOrUpdate(Map records, Collection record, String overrideUuidV addCategories(metadata, params.getCategories(), localCateg, context, null, false); - metadata = metadataManager.insertMetadata(context, metadata, xml, true, false, false, UpdateDatestamp.NO, false, false); + metadata = metadataManager.insertMetadata(context, metadata, xml, false, false, UpdateDatestamp.NO, false, false); String id = String.valueOf(metadata.getId()); - addPrivileges(id, params.getPrivileges(), localGroups, dataMan, context); + addPrivileges(id, params.getPrivileges(), localGroups, context); - metadataIndexer.indexMetadata(id, true, null); + metadataIndexer.indexMetadata(id, true); result.addedMetadata++; } @@ -273,7 +276,7 @@ boolean updateMetadata(Map.Entry ri, String id, Boolean force) OperationAllowedRepository repository = context.getBean(OperationAllowedRepository.class); repository.deleteAllByMetadataId(Integer.parseInt(id)); - addPrivileges(id, params.getPrivileges(), localGroups, dataMan, context); + addPrivileges(id, params.getPrivileges(), localGroups, context); metadata.getCategories().clear(); addCategories(metadata, params.getCategories(), localCateg, context, null, true); diff --git a/harvesters/src/main/java/org/fao/geonet/kernel/harvest/harvester/simpleUrl/SimpleUrlHarvester.java b/harvesters/src/main/java/org/fao/geonet/kernel/harvest/harvester/simpleUrl/SimpleUrlHarvester.java index 9d8f77423d0..bd8dcaa8ecd 100644 --- a/harvesters/src/main/java/org/fao/geonet/kernel/harvest/harvester/simpleUrl/SimpleUrlHarvester.java +++ b/harvesters/src/main/java/org/fao/geonet/kernel/harvest/harvester/simpleUrl/SimpleUrlHarvester.java @@ -31,6 +31,7 @@ import org.fao.geonet.kernel.harvest.harvester.AbstractHarvester; import org.fao.geonet.kernel.harvest.harvester.AbstractParams; import org.fao.geonet.kernel.harvest.harvester.HarvestResult; +import org.fao.geonet.kernel.harvest.harvester.csw.CswParams; import org.fao.geonet.repository.SourceRepository; import org.fao.geonet.resources.Resources; import org.jdom.Element; @@ -43,70 +44,23 @@ /** * Harvest metadata from a JSON source. */ -public class SimpleUrlHarvester extends AbstractHarvester { +public class SimpleUrlHarvester extends AbstractHarvester { - private SimpleUrlParams params; - - @Autowired - SourceRepository sourceRepository; - - protected void doInit(Element node, ServiceContext context) throws BadInputEx { - params = new SimpleUrlParams(dataMan); - super.setParams(params); - params.create(node); - } - - protected String doAdd(Element node) throws BadInputEx, SQLException { - params = new SimpleUrlParams(dataMan); - super.setParams(params); - - params.create(node); - params.setUuid(UUID.randomUUID().toString()); - - String id = harvesterSettingsManager.add("harvesting", "node", getType()); - - storeNode(params, "id:" + id); - - Source source = new Source(params.getUuid(), params.getName(), params.getTranslations(), SourceType.harvester); - sourceRepository.save(source); - - Resources.copyLogo(context, "images" + File.separator + "harvesting" + File.separator + params.icon, params.getUuid()); - - return id; - } - - protected void doUpdate(String id, Element node) throws BadInputEx, SQLException { - SimpleUrlParams copy = params.copy(); - super.setParams(params); - - copy.update(node); - - String path = "harvesting/id:" + id; - harvesterSettingsManager.removeChildren(path); - - storeNode(copy, path); - - Source source = new Source(copy.getUuid(), copy.getName(), copy.getTranslations(), SourceType.harvester); - sourceRepository.save(source); - - Resources.copyLogo(context, "images" + File.separator + "harvesting" + File.separator + copy.icon, copy.getUuid()); - - params = copy; - - super.setParams(params); + @Override + protected SimpleUrlParams createParams() { + return new SimpleUrlParams(dataMan); } /** * Stores in the harvester settings table some values not managed by {@link AbstractHarvester} * - * @param p the harvester parameters. + * @param params the harvester parameters. * @param path * @param siteId * @param optionsId * @throws SQLException */ - protected void storeNodeExtra(AbstractParams p, String path, String siteId, String optionsId) throws SQLException { - SimpleUrlParams params = (SimpleUrlParams) p; + protected void storeNodeExtra(SimpleUrlParams params, String path, String siteId, String optionsId) throws SQLException { harvesterSettingsManager.add("id:" + siteId, "url", params.url); harvesterSettingsManager.add("id:" + siteId, "icon", params.icon); diff --git a/harvesters/src/main/java/org/fao/geonet/kernel/harvest/harvester/simpleUrl/SimpleUrlParams.java b/harvesters/src/main/java/org/fao/geonet/kernel/harvest/harvester/simpleUrl/SimpleUrlParams.java index 98ca083c8ba..769c692e653 100644 --- a/harvesters/src/main/java/org/fao/geonet/kernel/harvest/harvester/simpleUrl/SimpleUrlParams.java +++ b/harvesters/src/main/java/org/fao/geonet/kernel/harvest/harvester/simpleUrl/SimpleUrlParams.java @@ -80,6 +80,11 @@ public void update(Element node) throws BadInputEx { icon = Util.getParam(site, "icon", icon); } + @Override + public String getIcon() { + return icon; + } + public SimpleUrlParams copy() { SimpleUrlParams copy = new SimpleUrlParams(dm); copyTo(copy); diff --git a/web-ui/src/main/resources/catalog/locales/en-admin.json b/web-ui/src/main/resources/catalog/locales/en-admin.json index e187463c394..49eb124984d 100644 --- a/web-ui/src/main/resources/catalog/locales/en-admin.json +++ b/web-ui/src/main/resources/catalog/locales/en-admin.json @@ -290,14 +290,11 @@ "cron-0 15 10 ? * MON-FRI": "Fire at 10:15am every Monday, Tuesday, Wednesday, Thursday and Friday", "harvesterTimeZoneHelp": "Time in cron expression will be interpreted as in {{timeZoneTransl}} {{zoneOffset}} timezone.", "csvExport": "Export as CSV", -<<<<<<< HEAD "duplicatedValueFoundHarvesterName": "An harvester with that name already exists. Choose another one.", "duplicatedValueFoundUserName": "A user with that name already exists. Choose another one.", "duplicatedValueFoundUserEmail": "A user with that email already exists. Choose another one.", "csw": "CSW", -======= "simpleurl": "Simple URL", ->>>>>>> fxp/feature/harvester/json "csw-FailedToParseCapabilities": "Error while parsing GetCapabilities", "csw-capabilitiesUrlHelp": "CSW URL with or without GetCapabilities parameters", "csw-category": "Category for harvested records", diff --git a/web-ui/src/main/resources/catalog/templates/admin/harvest/type/simpleurl.js b/web-ui/src/main/resources/catalog/templates/admin/harvest/type/simpleurl.js index 87f626b38b6..60a14b44244 100644 --- a/web-ui/src/main/resources/catalog/templates/admin/harvest/type/simpleurl.js +++ b/web-ui/src/main/resources/catalog/templates/admin/harvest/type/simpleurl.js @@ -90,7 +90,7 @@ var gnHarvestersimpleurl = { + ' ' + h.site.pageSizeParam + '' + ' ' + h.site.toISOConversion + '' + ' ' - + gnHarvestercsw.buildResponseCSWSearch($scope) + + gnHarvestersimpleurl.buildResponseCSWSearch($scope) + ' ' + ' ' + h.options.oneRunOnly + '' + ' ' + h.options.overrideUuid + '' From a76271e268c64756344c2965b1bfbd3edf20b9cd Mon Sep 17 00:00:00 2001 From: Florent gravin Date: Tue, 7 Sep 2021 09:23:11 +0200 Subject: [PATCH 4/8] jsonHarvester: handle JSONLD format with @ in tag names --- .../geonet/kernel/harvest/harvester/simpleUrl/Harvester.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/harvesters/src/main/java/org/fao/geonet/kernel/harvest/harvester/simpleUrl/Harvester.java b/harvesters/src/main/java/org/fao/geonet/kernel/harvest/harvester/simpleUrl/Harvester.java index b0e6bc3fda2..9d928fd7748 100644 --- a/harvesters/src/main/java/org/fao/geonet/kernel/harvest/harvester/simpleUrl/Harvester.java +++ b/harvesters/src/main/java/org/fao/geonet/kernel/harvest/harvester/simpleUrl/Harvester.java @@ -223,7 +223,7 @@ private Element convertRecordToXml(JsonNode record) { String recordAsXml = XML.toString( new JSONObject( objectMapper.writeValueAsString(record)), "record"); - recordAsXml = Xml.stripNonValidXMLCharacters(recordAsXml); + recordAsXml = Xml.stripNonValidXMLCharacters(recordAsXml).replace("<@", "<").replace(" Date: Tue, 7 Sep 2021 09:24:24 +0200 Subject: [PATCH 5/8] jsonHarvester: add ESRI JSONLD DCAT transformation --- .../convert/fromJsonLdEsri.xsl | 449 ++++++++++++++++++ .../import/ESRIDCAT-to-ISO19115-3-2018.xsl | 5 + 2 files changed, 454 insertions(+) create mode 100644 schemas/iso19115-3.2018/src/main/plugin/iso19115-3.2018/convert/fromJsonLdEsri.xsl create mode 100644 web/src/main/webapp/xsl/conversion/import/ESRIDCAT-to-ISO19115-3-2018.xsl diff --git a/schemas/iso19115-3.2018/src/main/plugin/iso19115-3.2018/convert/fromJsonLdEsri.xsl b/schemas/iso19115-3.2018/src/main/plugin/iso19115-3.2018/convert/fromJsonLdEsri.xsl new file mode 100644 index 00000000000..bcb8dd46a25 --- /dev/null +++ b/schemas/iso19115-3.2018/src/main/plugin/iso19115-3.2018/convert/fromJsonLdEsri.xsl @@ -0,0 +1,449 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ISO 19115-3 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + originator + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/web/src/main/webapp/xsl/conversion/import/ESRIDCAT-to-ISO19115-3-2018.xsl b/web/src/main/webapp/xsl/conversion/import/ESRIDCAT-to-ISO19115-3-2018.xsl new file mode 100644 index 00000000000..e7eea186961 --- /dev/null +++ b/web/src/main/webapp/xsl/conversion/import/ESRIDCAT-to-ISO19115-3-2018.xsl @@ -0,0 +1,5 @@ + + + + From 332ff96ad28f7d97c680db799f512bed9393a0ef Mon Sep 17 00:00:00 2001 From: Florent gravin Date: Thu, 9 Sep 2021 10:02:50 +0200 Subject: [PATCH 6/8] jsonHarvester: extract uuid from identifier https://data-atmo-hdf.opendata.arcgis.com/datasets/bac17d7d05a34242a8b22c535ecdb13d will extract bac17d7d05a34242a8b22c535ecdb13d --- .../harvest/harvester/simpleUrl/Harvester.java | 16 +++++++++++++--- .../iso19115-3.2018/convert/fromJsonLdEsri.xsl | 2 +- 2 files changed, 14 insertions(+), 4 deletions(-) diff --git a/harvesters/src/main/java/org/fao/geonet/kernel/harvest/harvester/simpleUrl/Harvester.java b/harvesters/src/main/java/org/fao/geonet/kernel/harvest/harvester/simpleUrl/Harvester.java index 9d928fd7748..088611c999b 100644 --- a/harvesters/src/main/java/org/fao/geonet/kernel/harvest/harvester/simpleUrl/Harvester.java +++ b/harvesters/src/main/java/org/fao/geonet/kernel/harvest/harvester/simpleUrl/Harvester.java @@ -136,8 +136,9 @@ public HarvestResult harvest(Logger log) throws Exception { log.debug("Number of records in response: " + nodes.size()); nodes.forEach(record -> { - Element xml = convertRecordToXml(record); - uuids.put(record.get(params.recordIdPath).asText(), xml); + String uuid = this.extractUuidFromIdentifier(record.get(params.recordIdPath).asText()); + Element xml = convertRecordToXml(record, uuid); + uuids.put(uuid, xml); }); aligner.align(uuids, errors); allUuids.putAll(uuids); @@ -168,6 +169,14 @@ public HarvestResult harvest(Logger log) throws Exception { return result; } + private String extractUuidFromIdentifier(final String identifier ) { + String uuid = identifier; + if (Lib.net.isUrlValid(uuid)) { + uuid = uuid.replaceFirst(".*/([^/?]+).*", "$1"); + } + return uuid; + } + @VisibleForTesting protected List buildListOfUrl(SimpleUrlParams params, int numberOfRecordsToHarvest) { List urlList = new ArrayList(); @@ -217,7 +226,7 @@ protected List buildListOfUrl(SimpleUrlParams params, int numberOfRecord return urlList; } - private Element convertRecordToXml(JsonNode record) { + private Element convertRecordToXml(JsonNode record, String uuid) { ObjectMapper objectMapper = new ObjectMapper(); try { String recordAsXml = XML.toString( @@ -225,6 +234,7 @@ private Element convertRecordToXml(JsonNode record) { objectMapper.writeValueAsString(record)), "record"); recordAsXml = Xml.stripNonValidXMLCharacters(recordAsXml).replace("<@", "<").replace(" - + From 878b57e187641dee2a280871b379eec1800d8fd7 Mon Sep 17 00:00:00 2001 From: Florent gravin Date: Wed, 15 Sep 2021 12:27:48 +0200 Subject: [PATCH 7/8] ESRI & ODS tranformation to DCAT2 --- .../main/webapp/xsl/conversion/import/ESRIDCAT-to-DCAT2.xsl | 5 +++++ .../webapp/xsl/conversion/import/OPENDATASOFT-to-DCAT2.xsl | 5 +++++ 2 files changed, 10 insertions(+) create mode 100644 web/src/main/webapp/xsl/conversion/import/ESRIDCAT-to-DCAT2.xsl create mode 100644 web/src/main/webapp/xsl/conversion/import/OPENDATASOFT-to-DCAT2.xsl diff --git a/web/src/main/webapp/xsl/conversion/import/ESRIDCAT-to-DCAT2.xsl b/web/src/main/webapp/xsl/conversion/import/ESRIDCAT-to-DCAT2.xsl new file mode 100644 index 00000000000..c4a50b44c3e --- /dev/null +++ b/web/src/main/webapp/xsl/conversion/import/ESRIDCAT-to-DCAT2.xsl @@ -0,0 +1,5 @@ + + + + diff --git a/web/src/main/webapp/xsl/conversion/import/OPENDATASOFT-to-DCAT2.xsl b/web/src/main/webapp/xsl/conversion/import/OPENDATASOFT-to-DCAT2.xsl new file mode 100644 index 00000000000..91b92f26aa3 --- /dev/null +++ b/web/src/main/webapp/xsl/conversion/import/OPENDATASOFT-to-DCAT2.xsl @@ -0,0 +1,5 @@ + + + + From 6fdb1758027a5ad94f302d119b45a7da90bd2107 Mon Sep 17 00:00:00 2001 From: Florent gravin Date: Wed, 15 Sep 2021 12:28:13 +0200 Subject: [PATCH 8/8] jsonHarvester: add api& nodeUrl in XML for transfo used by ODS to compute exports links --- .../harvester/simpleUrl/Harvester.java | 38 ++++++++++++------- 1 file changed, 25 insertions(+), 13 deletions(-) diff --git a/harvesters/src/main/java/org/fao/geonet/kernel/harvest/harvester/simpleUrl/Harvester.java b/harvesters/src/main/java/org/fao/geonet/kernel/harvest/harvester/simpleUrl/Harvester.java index 088611c999b..6621e5f634f 100644 --- a/harvesters/src/main/java/org/fao/geonet/kernel/harvest/harvester/simpleUrl/Harvester.java +++ b/harvesters/src/main/java/org/fao/geonet/kernel/harvest/harvester/simpleUrl/Harvester.java @@ -52,8 +52,10 @@ import java.io.IOException; import java.io.InputStreamReader; +import java.net.MalformedURLException; import java.net.URI; import java.net.URISyntaxException; +import java.net.URL; import java.nio.file.Path; import java.util.ArrayList; import java.util.HashMap; @@ -137,8 +139,16 @@ public HarvestResult harvest(Logger log) throws Exception { nodes.forEach(record -> { String uuid = this.extractUuidFromIdentifier(record.get(params.recordIdPath).asText()); - Element xml = convertRecordToXml(record, uuid); - uuids.put(uuid, xml); + String apiUrl = params.url.split("\\?")[0]; + URL url = null; + try { + url = new URL(apiUrl); + String nodeUrl = new StringBuilder(url.getProtocol()).append("://").append(url.getAuthority()).toString(); + Element xml = convertRecordToXml(record, uuid, apiUrl, nodeUrl); + uuids.put(uuid, xml); + } catch (MalformedURLException e) { + log.warning("Failed to parse Node URL"); + } }); aligner.align(uuids, errors); allUuids.putAll(uuids); @@ -191,8 +201,8 @@ protected List buildListOfUrl(SimpleUrlParams params, int numberOfRecord numberOfRecordsPerPage = Integer.parseInt(pageSizeParamValue); } else { log.warning(String.format( - "Page size param '%s' not found or is not a numeric in URL '%s'. Can't build a list of pages.", - params.pageSizeParam, params.url)); + "Page size param '%s' not found or is not a numeric in URL '%s'. Can't build a list of pages.", + params.pageSizeParam, params.url)); urlList.add(params.url); return urlList; } @@ -203,8 +213,8 @@ protected List buildListOfUrl(SimpleUrlParams params, int numberOfRecord startAtZero = Integer.parseInt(pageFromParamValue) == 0; } else { log.warning(String.format( - "Page from param '%s' not found or is not a numeric in URL '%s'. Can't build a list of pages.", - params.pageFromParam, params.url)); + "Page from param '%s' not found or is not a numeric in URL '%s'. Can't build a list of pages.", + params.pageFromParam, params.url)); urlList.add(params.url); return urlList; } @@ -215,26 +225,28 @@ protected List buildListOfUrl(SimpleUrlParams params, int numberOfRecord for (int i = 0; i < numberOfPages; i++) { int from = i * numberOfRecordsPerPage + (startAtZero ? 0 : 1); int size = i == numberOfPages - 1 ? // Last page - numberOfRecordsToHarvest - from + (startAtZero ? 0 : 1) : - numberOfRecordsPerPage; + numberOfRecordsToHarvest - from + (startAtZero ? 0 : 1) : + numberOfRecordsPerPage; String url = params.url - .replaceAll(params.pageFromParam + "=[0-9]+", params.pageFromParam + "=" + from) - .replaceAll(params.pageSizeParam + "=[0-9]+", params.pageSizeParam + "=" + size); + .replaceAll(params.pageFromParam + "=[0-9]+", params.pageFromParam + "=" + from) + .replaceAll(params.pageSizeParam + "=[0-9]+", params.pageSizeParam + "=" + size); urlList.add(url); } return urlList; } - private Element convertRecordToXml(JsonNode record, String uuid) { + private Element convertRecordToXml(JsonNode record, String uuid, String apiUrl, String nodeUrl) { ObjectMapper objectMapper = new ObjectMapper(); try { String recordAsXml = XML.toString( - new JSONObject( - objectMapper.writeValueAsString(record)), "record"); + new JSONObject( + objectMapper.writeValueAsString(record)), "record"); recordAsXml = Xml.stripNonValidXMLCharacters(recordAsXml).replace("<@", "<").replace("