diff --git a/common/src/main/java/org/fao/geonet/utils/Xml.java b/common/src/main/java/org/fao/geonet/utils/Xml.java index 4e536cd4b52..476f723d36d 100644 --- a/common/src/main/java/org/fao/geonet/utils/Xml.java +++ b/common/src/main/java/org/fao/geonet/utils/Xml.java @@ -136,8 +136,6 @@ public final class Xml { + "\ud800\udc00-\udbff\udfff" + "]"; - //-------------------------------------------------------------------------- - /** * * @param validate diff --git a/harvesters/src/main/java/org/fao/geonet/kernel/harvest/harvester/simpleUrl/Aligner.java b/harvesters/src/main/java/org/fao/geonet/kernel/harvest/harvester/simpleUrl/Aligner.java new file mode 100644 index 00000000000..3bce32bcd1b --- /dev/null +++ b/harvesters/src/main/java/org/fao/geonet/kernel/harvest/harvester/simpleUrl/Aligner.java @@ -0,0 +1,286 @@ +//============================================================================= +//=== Copyright (C) 2001-2007 Food and Agriculture Organization of the +//=== United Nations (FAO-UN), United Nations World Food Programme (WFP) +//=== and United Nations Environment Programme (UNEP) +//=== +//=== This program is free software; you can redistribute it and/or modify +//=== it under the terms of the GNU General Public License as published by +//=== the Free Software Foundation; either version 2 of the License, or (at +//=== your option) any later version. +//=== +//=== This program is distributed in the hope that it will be useful, but +//=== WITHOUT ANY WARRANTY; without even the implied warranty of +//=== MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +//=== General Public License for more details. +//=== +//=== You should have received a copy of the GNU General Public License +//=== along with this program; if not, write to the Free Software +//=== Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA +//=== +//=== Contact: Jeroen Ticheler - FAO - Viale delle Terme di Caracalla 2, +//=== Rome - Italy. email: geonetwork@osgeo.org +//============================================================================== + +package org.fao.geonet.kernel.harvest.harvester.simpleUrl; + +import jeeves.server.context.ServiceContext; +import org.fao.geonet.GeonetContext; +import org.fao.geonet.Logger; +import org.fao.geonet.constants.Geonet; +import org.fao.geonet.domain.AbstractMetadata; +import org.fao.geonet.domain.ISODate; +import org.fao.geonet.domain.Metadata; +import org.fao.geonet.domain.MetadataType; +import org.fao.geonet.exceptions.OperationAbortedEx; +import org.fao.geonet.kernel.DataManager; +import org.fao.geonet.kernel.UpdateDatestamp; +import org.fao.geonet.kernel.datamanager.IMetadataIndexer; +import org.fao.geonet.kernel.datamanager.IMetadataManager; +import org.fao.geonet.kernel.datamanager.IMetadataUtils; +import org.fao.geonet.kernel.harvest.BaseAligner; +import org.fao.geonet.kernel.harvest.harvester.CategoryMapper; +import org.fao.geonet.kernel.harvest.harvester.GroupMapper; +import org.fao.geonet.kernel.harvest.harvester.HarvestError; +import org.fao.geonet.kernel.harvest.harvester.HarvestResult; +import org.fao.geonet.kernel.harvest.harvester.UUIDMapper; +import org.fao.geonet.repository.OperationAllowedRepository; +import org.jdom.Element; + +import javax.transaction.Transactional; +import javax.transaction.Transactional.TxType; +import java.util.Collection; +import java.util.HashMap; +import java.util.Map; +import java.util.Set; +import java.util.UUID; +import java.util.concurrent.atomic.AtomicBoolean; + +public class Aligner extends BaseAligner { + + private ServiceContext context; + private DataManager dataMan; + private CategoryMapper localCateg; + private GroupMapper localGroups; + private UUIDMapper localUuids; + + private IMetadataUtils metadataUtils; + private IMetadataManager metadataManager; + private IMetadataIndexer metadataIndexer; + + private HarvestResult result; + private Map processParams = new HashMap(); + private Logger log; + + public Aligner(AtomicBoolean cancelMonitor, ServiceContext sc, SimpleUrlParams params, Logger log) throws OperationAbortedEx { + super(cancelMonitor); + this.context = sc; + this.params = params; + this.log = log; + + GeonetContext gc = (GeonetContext) context.getHandlerContext(Geonet.CONTEXT_NAME); + dataMan = gc.getBean(DataManager.class); + metadataUtils = gc.getBean(IMetadataUtils.class); + metadataManager = gc.getBean(IMetadataManager.class); + metadataIndexer = gc.getBean(IMetadataIndexer.class); + result = new HarvestResult(); + result.unretrievable = 0; + result.uuidSkipped = 0; + result.couldNotInsert = 0; + } + + public HarvestResult align(Map records, Collection errors) throws Exception { + if (cancelMonitor.get()) { + return result; + } + + log.debug("Start of alignment for : " + params.getName()); + + localCateg = new CategoryMapper(context); + localGroups = new GroupMapper(context); + localUuids = new UUIDMapper(context.getBean(IMetadataUtils.class), params.getUuid()); + + insertOrUpdate(records, errors); + log.debug("End of alignment for : " + params.getName()); + + return result; + } + + private void insertOrUpdate(Map records, Collection errors) { + records.entrySet().forEach(e -> { + if (cancelMonitor.get()) { + return; + } + + try { + String id = metadataUtils.getMetadataId(e.getKey()); + + if (id == null) { + //record doesn't exist (so it doesn't belong to this harvester) + log.debug("Adding record with uuid " + e.getKey()); + addMetadata(e, null); + } else if (localUuids.getID(e.getKey()) == null) { + //Record does not belong to this harvester + result.datasetUuidExist++; + + switch (params.getOverrideUuid()) { + case OVERRIDE: + updateMetadata(e, Integer.toString(metadataUtils.findOneByUuid(e.getKey()).getId()), true); + log.debug("Overriding record with uuid " + e.getKey()); + result.updatedMetadata++; + break; + case RANDOM: + log.debug("Generating random uuid for remote record with uuid " + e.getKey()); + addMetadata(e, UUID.randomUUID().toString()); + break; + case SKIP: + log.debug("Skipping record with uuid " + e.getKey()); + result.uuidSkipped++; + default: + break; + } + } else { + //record exists and belongs to this harvester + updateMetadata(e, id, false); + if (params.isIfRecordExistAppendPrivileges()) { + addPrivileges(id, params.getPrivileges(), localGroups, context); + result.privilegesAppendedOnExistingRecord++; + } + + } + result.totalMetadata++; + } catch (Throwable t) { + errors.add(new HarvestError(this.context, t)); + log.error("Unable to process record from csw (" + this.params.getName() + ")"); + log.error(" Record failed: " + e.getKey() + ". Error is: " + t.getMessage()); + log.error(t); + } finally { + result.originalMetadata++; + } + }); + } + + /** + * Remove records no longer on the remote CSW server + * + * @param records + * @throws Exception + */ + @Transactional(value = TxType.REQUIRES_NEW) + public HarvestResult cleanupRemovedRecords(Set records) throws Exception { + + if (cancelMonitor.get()) { + return result; + } + + for (String uuid : localUuids.getUUIDs()) { + if (!records.contains(uuid)) { + String id = localUuids.getID(uuid); + log.debug(" - Removing old metadata with local id:" + id); + metadataManager.deleteMetadata(context, id); + result.locallyRemoved++; + } + } + dataMan.forceIndexChanges(); + + return result; + } + + + private void addMetadata(Map.Entry record, String overrideUuidValue) throws Exception { + if (cancelMonitor.get()) { + return; + } + + Element xml = record.getValue(); + if (xml == null) { + result.unretrievable++; + return; + } + + String schema = dataMan.autodetectSchema(xml, null); + if (schema == null) { + log.debug(" - Metadata skipped due to unknown schema. uuid:" + record.getKey()); + result.unknownSchema++; + return; + } + + String uuid = record.getKey(); + if (overrideUuidValue != null) { + log.debug(String.format(" - Overriding UUID %s by %s", record.getKey(), overrideUuidValue)); + uuid = overrideUuidValue; + xml = dataMan.setUUID(schema, uuid, record.getValue()); + } + + + log.debug(" - Adding metadata with uuid:" + uuid + " schema:" + schema); + + final String dateModified = dataMan.extractDateModified(schema, xml); + + AbstractMetadata metadata = new Metadata(); + metadata.setUuid(uuid); + Integer ownerId = getOwner(); + metadata.getDataInfo(). + setSchemaId(schema). + setRoot(xml.getQualifiedName()). + setType(MetadataType.METADATA). + setChangeDate(new ISODate(dateModified)). + setCreateDate(new ISODate(dateModified)); + metadata.getSourceInfo(). + setSourceId(params.getUuid()). + setOwner(ownerId). + setGroupOwner(getGroupOwner()); + metadata.getHarvestInfo(). + setHarvested(true). + setUuid(params.getUuid()); + + metadata.getSourceInfo().setGroupOwner(getGroupOwner()); + + addCategories(metadata, params.getCategories(), localCateg, context, null, false); + + metadata = metadataManager.insertMetadata(context, metadata, xml, false, false, UpdateDatestamp.NO, false, false); + + String id = String.valueOf(metadata.getId()); + + addPrivileges(id, params.getPrivileges(), localGroups, context); + + metadataIndexer.indexMetadata(id, true); + result.addedMetadata++; + } + + + @Transactional(value = TxType.REQUIRES_NEW) + boolean updateMetadata(Map.Entry ri, String id, Boolean force) throws Exception { + Element md = ri.getValue(); + if (md == null) { + result.unchangedMetadata++; + return false; + } + + boolean validate = false; + boolean ufo = false; + boolean index = false; + String language = context.getLanguage(); + String schema = dataMan.autodetectSchema(md, null); + final String dateModified = dataMan.extractDateModified(schema, ri.getValue()); + + final AbstractMetadata metadata = metadataManager.updateMetadata(context, id, md, validate, ufo, index, language, dateModified, true); + + if (force) { + //change ownership of metadata to new harvester + metadata.getHarvestInfo().setUuid(params.getUuid()); + metadata.getSourceInfo().setSourceId(params.getUuid()); + + metadataManager.save((Metadata) metadata); + } + + OperationAllowedRepository repository = context.getBean(OperationAllowedRepository.class); + repository.deleteAllByMetadataId(Integer.parseInt(id)); + + addPrivileges(id, params.getPrivileges(), localGroups, context); + + metadata.getCategories().clear(); + addCategories(metadata, params.getCategories(), localCateg, context, null, true); + result.updatedMetadata++; + return true; + } +} diff --git a/harvesters/src/main/java/org/fao/geonet/kernel/harvest/harvester/simpleUrl/Harvester.java b/harvesters/src/main/java/org/fao/geonet/kernel/harvest/harvester/simpleUrl/Harvester.java new file mode 100644 index 00000000000..6621e5f634f --- /dev/null +++ b/harvesters/src/main/java/org/fao/geonet/kernel/harvest/harvester/simpleUrl/Harvester.java @@ -0,0 +1,302 @@ +//============================================================================= +//=== Copyright (C) 2001-2007 Food and Agriculture Organization of the +//=== United Nations (FAO-UN), United Nations World Food Programme (WFP) +//=== and United Nations Environment Programme (UNEP) +//=== +//=== This program is free software; you can redistribute it and/or modify +//=== it under the terms of the GNU General Public License as published by +//=== the Free Software Foundation; either version 2 of the License, or (at +//=== your option) any later version. +//=== +//=== This program is distributed in the hope that it will be useful, but +//=== WITHOUT ANY WARRANTY; without even the implied warranty of +//=== MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +//=== General Public License for more details. +//=== +//=== You should have received a copy of the GNU General Public License +//=== along with this program; if not, write to the Free Software +//=== Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA +//=== +//=== Contact: Jeroen Ticheler - FAO - Viale delle Terme di Caracalla 2, +//=== Rome - Italy. email: geonetwork@osgeo.org +//============================================================================== + +package org.fao.geonet.kernel.harvest.harvester.simpleUrl; + +import com.fasterxml.jackson.core.JsonProcessingException; +import com.fasterxml.jackson.databind.JsonNode; +import com.fasterxml.jackson.databind.ObjectMapper; +import com.google.common.annotations.VisibleForTesting; +import com.google.common.io.CharStreams; +import jeeves.server.context.ServiceContext; +import org.apache.commons.io.IOUtils; +import org.apache.commons.lang.StringUtils; +import org.apache.http.client.methods.HttpGet; +import org.fao.geonet.Logger; +import org.fao.geonet.constants.Geonet; +import org.fao.geonet.exceptions.BadParameterEx; +import org.fao.geonet.kernel.harvest.harvester.HarvestError; +import org.fao.geonet.kernel.harvest.harvester.HarvestResult; +import org.fao.geonet.kernel.harvest.harvester.IHarvester; +import org.fao.geonet.lib.Lib; +import org.fao.geonet.utils.GeonetHttpRequestFactory; +import org.fao.geonet.utils.Log; +import org.fao.geonet.utils.Xml; +import org.jdom.Element; +import org.jdom.JDOMException; +import org.json.JSONException; +import org.json.JSONObject; +import org.json.XML; +import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.http.client.ClientHttpResponse; + +import java.io.IOException; +import java.io.InputStreamReader; +import java.net.MalformedURLException; +import java.net.URI; +import java.net.URISyntaxException; +import java.net.URL; +import java.nio.file.Path; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.LinkedList; +import java.util.List; +import java.util.Map; +import java.util.concurrent.atomic.AtomicBoolean; + +/** + * Harvest metadata from a JSON source. + *

+ * The JSON source can be a simple JSON file or + * an URL with indication on how to pass paging information. + * + * This harvester has been tested with CKAN search API. + */ +class Harvester implements IHarvester { + public static final String LOGGER_NAME = "geonetwork.harvester.json"; + + private final AtomicBoolean cancelMonitor; + private Logger log; + private SimpleUrlParams params; + private ServiceContext context; + + @Autowired + GeonetHttpRequestFactory requestFactory; + + /** + * Contains a list of accumulated errors during the executing of this harvest. + */ + private List errors = new LinkedList(); + + public Harvester(AtomicBoolean cancelMonitor, Logger log, ServiceContext context, SimpleUrlParams params) { + this.cancelMonitor = cancelMonitor; + this.log = log; + this.context = context; + this.params = params; + } + + public HarvestResult harvest(Logger log) throws Exception { + this.log = log; + log.debug("Retrieving simple URL: " + params.getName()); + + requestFactory = context.getBean(GeonetHttpRequestFactory.class); + + String jsonResponse = retrieveUrl(params.url, log); + if (cancelMonitor.get()) { + return new HarvestResult(); + } + log.debug("Response is: " + jsonResponse); + + // TODO: Add support for XML or JSON + int numberOfRecordsToHarvest = -1; + ObjectMapper objectMapper = new ObjectMapper(); + JsonNode jsonObj = objectMapper.readTree(jsonResponse); + + if (StringUtils.isNotEmpty(params.numberOfRecordPath)) { + try { + numberOfRecordsToHarvest = jsonObj.at(params.numberOfRecordPath).asInt(); + log.debug("Number of records to harvest: " + numberOfRecordsToHarvest); + } catch (Exception e) { + } + } + boolean error = false; + HarvestResult result = null; + Map allUuids = new HashMap(); + try { + Aligner aligner = new Aligner(cancelMonitor, context, params, log); + List listOfUrlForPages = buildListOfUrl(params, numberOfRecordsToHarvest); + for (int i = 0; i < listOfUrlForPages.size(); i ++) { + if (i != 0) { + jsonResponse = retrieveUrl(listOfUrlForPages.get(i), log); + jsonObj = objectMapper.readTree(jsonResponse); + } + Map uuids = new HashMap(); + JsonNode nodes; + if (StringUtils.isNotEmpty(params.loopElement)) { + try { + nodes = jsonObj.at(params.loopElement); + log.debug("Number of records in response: " + nodes.size()); + + nodes.forEach(record -> { + String uuid = this.extractUuidFromIdentifier(record.get(params.recordIdPath).asText()); + String apiUrl = params.url.split("\\?")[0]; + URL url = null; + try { + url = new URL(apiUrl); + String nodeUrl = new StringBuilder(url.getProtocol()).append("://").append(url.getAuthority()).toString(); + Element xml = convertRecordToXml(record, uuid, apiUrl, nodeUrl); + uuids.put(uuid, xml); + } catch (MalformedURLException e) { + log.warning("Failed to parse Node URL"); + } + }); + aligner.align(uuids, errors); + allUuids.putAll(uuids); + } catch (Exception e) { + log.warning("Failed to collect record in response"); + } + } + } + result = aligner.cleanupRemovedRecords(allUuids.keySet()); + } catch (Exception t) { + error = true; + log.error("Unknown error trying to harvest"); + log.error(t.getMessage()); + log.error(t); + errors.add(new HarvestError(context, t)); + } catch (Throwable t) { + error = true; + log.fatal("Something unknown and terrible happened while harvesting"); + log.fatal(t.getMessage()); + errors.add(new HarvestError(context, t)); + } + + log.info("Total records processed in all searches :" + allUuids.size()); + if (error) { + log.warning("Due to previous errors the align process has not been called"); + } + + return result; + } + + private String extractUuidFromIdentifier(final String identifier ) { + String uuid = identifier; + if (Lib.net.isUrlValid(uuid)) { + uuid = uuid.replaceFirst(".*/([^/?]+).*", "$1"); + } + return uuid; + } + + @VisibleForTesting + protected List buildListOfUrl(SimpleUrlParams params, int numberOfRecordsToHarvest) { + List urlList = new ArrayList(); + if (StringUtils.isEmpty(params.pageSizeParam)) { + urlList.add(params.url); + return urlList; + } + + int numberOfRecordsPerPage = -1; + final String pageSizeParamValue = params.url.replaceAll(".*[?&]" + params.pageSizeParam + "=([0-9]+).*", "$1"); + if (StringUtils.isNumeric(pageSizeParamValue)) { + numberOfRecordsPerPage = Integer.parseInt(pageSizeParamValue); + } else { + log.warning(String.format( + "Page size param '%s' not found or is not a numeric in URL '%s'. Can't build a list of pages.", + params.pageSizeParam, params.url)); + urlList.add(params.url); + return urlList; + } + + final String pageFromParamValue = params.url.replaceAll(".*[?&]" + params.pageFromParam + "=([0-9]+).*", "$1"); + boolean startAtZero = false; + if (StringUtils.isNumeric(pageFromParamValue)) { + startAtZero = Integer.parseInt(pageFromParamValue) == 0; + } else { + log.warning(String.format( + "Page from param '%s' not found or is not a numeric in URL '%s'. Can't build a list of pages.", + params.pageFromParam, params.url)); + urlList.add(params.url); + return urlList; + } + + + int numberOfPages = (int) Math.abs((numberOfRecordsToHarvest + (startAtZero ? -1 : 0)) / numberOfRecordsPerPage) + 1; + + for (int i = 0; i < numberOfPages; i++) { + int from = i * numberOfRecordsPerPage + (startAtZero ? 0 : 1); + int size = i == numberOfPages - 1 ? // Last page + numberOfRecordsToHarvest - from + (startAtZero ? 0 : 1) : + numberOfRecordsPerPage; + String url = params.url + .replaceAll(params.pageFromParam + "=[0-9]+", params.pageFromParam + "=" + from) + .replaceAll(params.pageSizeParam + "=[0-9]+", params.pageSizeParam + "=" + size); + urlList.add(url); + } + + return urlList; + } + + private Element convertRecordToXml(JsonNode record, String uuid, String apiUrl, String nodeUrl) { + ObjectMapper objectMapper = new ObjectMapper(); + try { + String recordAsXml = XML.toString( + new JSONObject( + objectMapper.writeValueAsString(record)), "record"); + recordAsXml = Xml.stripNonValidXMLCharacters(recordAsXml).replace("<@", "<").replace(" getErrors() { + return errors; + } +} diff --git a/harvesters/src/main/java/org/fao/geonet/kernel/harvest/harvester/simpleUrl/SimpleUrlHarvester.java b/harvesters/src/main/java/org/fao/geonet/kernel/harvest/harvester/simpleUrl/SimpleUrlHarvester.java new file mode 100644 index 00000000000..bd8dcaa8ecd --- /dev/null +++ b/harvesters/src/main/java/org/fao/geonet/kernel/harvest/harvester/simpleUrl/SimpleUrlHarvester.java @@ -0,0 +1,79 @@ +//============================================================================= +//=== Copyright (C) 2001-2007 Food and Agriculture Organization of the +//=== United Nations (FAO-UN), United Nations World Food Programme (WFP) +//=== and United Nations Environment Programme (UNEP) +//=== +//=== This program is free software; you can redistribute it and/or modify +//=== it under the terms of the GNU General Public License as published by +//=== the Free Software Foundation; either version 2 of the License, or (at +//=== your option) any later version. +//=== +//=== This program is distributed in the hope that it will be useful, but +//=== WITHOUT ANY WARRANTY; without even the implied warranty of +//=== MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +//=== General Public License for more details. +//=== +//=== You should have received a copy of the GNU General Public License +//=== along with this program; if not, write to the Free Software +//=== Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA +//=== +//=== Contact: Jeroen Ticheler - FAO - Viale delle Terme di Caracalla 2, +//=== Rome - Italy. email: geonetwork@osgeo.org +//============================================================================== + +package org.fao.geonet.kernel.harvest.harvester.simpleUrl; + +import jeeves.server.context.ServiceContext; +import org.fao.geonet.Logger; +import org.fao.geonet.domain.Source; +import org.fao.geonet.domain.SourceType; +import org.fao.geonet.exceptions.BadInputEx; +import org.fao.geonet.kernel.harvest.harvester.AbstractHarvester; +import org.fao.geonet.kernel.harvest.harvester.AbstractParams; +import org.fao.geonet.kernel.harvest.harvester.HarvestResult; +import org.fao.geonet.kernel.harvest.harvester.csw.CswParams; +import org.fao.geonet.repository.SourceRepository; +import org.fao.geonet.resources.Resources; +import org.jdom.Element; +import org.springframework.beans.factory.annotation.Autowired; + +import java.io.File; +import java.sql.SQLException; +import java.util.UUID; + +/** + * Harvest metadata from a JSON source. + */ +public class SimpleUrlHarvester extends AbstractHarvester { + + @Override + protected SimpleUrlParams createParams() { + return new SimpleUrlParams(dataMan); + } + + /** + * Stores in the harvester settings table some values not managed by {@link AbstractHarvester} + * + * @param params the harvester parameters. + * @param path + * @param siteId + * @param optionsId + * @throws SQLException + */ + protected void storeNodeExtra(SimpleUrlParams params, String path, String siteId, String optionsId) throws SQLException { + + harvesterSettingsManager.add("id:" + siteId, "url", params.url); + harvesterSettingsManager.add("id:" + siteId, "icon", params.icon); + harvesterSettingsManager.add("id:" + siteId, "loopElement", params.loopElement); + harvesterSettingsManager.add("id:" + siteId, "numberOfRecordPath", params.numberOfRecordPath); + harvesterSettingsManager.add("id:" + siteId, "recordIdPath", params.recordIdPath); + harvesterSettingsManager.add("id:" + siteId, "pageFromParam", params.pageFromParam); + harvesterSettingsManager.add("id:" + siteId, "pageSizeParam", params.pageSizeParam); + harvesterSettingsManager.add("id:" + siteId, "toISOConversion", params.toISOConversion); + } + + public void doHarvest(Logger log) throws Exception { + Harvester h = new Harvester(cancelMonitor, log, context, params); + result = h.harvest(log); + } +} diff --git a/harvesters/src/main/java/org/fao/geonet/kernel/harvest/harvester/simpleUrl/SimpleUrlParams.java b/harvesters/src/main/java/org/fao/geonet/kernel/harvest/harvester/simpleUrl/SimpleUrlParams.java new file mode 100644 index 00000000000..769c692e653 --- /dev/null +++ b/harvesters/src/main/java/org/fao/geonet/kernel/harvest/harvester/simpleUrl/SimpleUrlParams.java @@ -0,0 +1,103 @@ +//============================================================================= +//=== Copyright (C) 2001-2007 Food and Agriculture Organization of the +//=== United Nations (FAO-UN), United Nations World Food Programme (WFP) +//=== and United Nations Environment Programme (UNEP) +//=== +//=== This program is free software; you can redistribute it and/or modify +//=== it under the terms of the GNU General Public License as published by +//=== the Free Software Foundation; either version 2 of the License, or (at +//=== your option) any later version. +//=== +//=== This program is distributed in the hope that it will be useful, but +//=== WITHOUT ANY WARRANTY; without even the implied warranty of +//=== MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +//=== General Public License for more details. +//=== +//=== You should have received a copy of the GNU General Public License +//=== along with this program; if not, write to the Free Software +//=== Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA +//=== +//=== Contact: Jeroen Ticheler - FAO - Viale delle Terme di Caracalla 2, +//=== Rome - Italy. email: geonetwork@osgeo.org +//============================================================================== + +package org.fao.geonet.kernel.harvest.harvester.simpleUrl; + +import org.fao.geonet.Util; +import org.fao.geonet.exceptions.BadInputEx; +import org.fao.geonet.kernel.DataManager; +import org.fao.geonet.kernel.harvest.harvester.AbstractParams; +import org.jdom.Element; + +public class SimpleUrlParams extends AbstractParams { + public String url; + public String icon; + public String loopElement; + public String numberOfRecordPath; + public String recordIdPath; + public String pageSizeParam; + public String pageFromParam; + public String toISOConversion; + + public SimpleUrlParams(DataManager dm) { + super(dm); + } + + /** + * called when a new entry must be added. Reads values from the provided entry, providing + * default values. + */ + public void create(Element node) throws BadInputEx { + super.create(node); + + Element site = node.getChild("site"); + + url = Util.getParam(site, "url", "http://dados.gov.br/api/3/action/package_search?q="); + loopElement = Util.getParam(site, "loopElement", "/result/results"); + numberOfRecordPath = Util.getParam(site, "numberOfRecordPath", "/result/count"); + recordIdPath = Util.getParam(site, "recordIdPath", "id"); + pageSizeParam = Util.getParam(site, "pageSizeParam", "rows"); + pageFromParam = Util.getParam(site, "pageFromParam", "start"); + toISOConversion = Util.getParam(site, "toISOConversion", "CKAN-to-ISO19115-3-2018"); + icon = Util.getParam(site, "icon", "default.gif"); + } + + /** + * called when an entry has changed and variables must be updated. + */ + public void update(Element node) throws BadInputEx { + super.update(node); + + Element site = node.getChild("site"); + + url = Util.getParam(site, "url", url); + loopElement = Util.getParam(site, "loopElement", ""); + numberOfRecordPath = Util.getParam(site, "numberOfRecordPath", ""); + recordIdPath = Util.getParam(site, "recordIdPath", ""); + pageSizeParam = Util.getParam(site, "pageSizeParam", ""); + pageFromParam = Util.getParam(site, "pageFromParam", ""); + toISOConversion = Util.getParam(site, "toISOConversion", ""); + icon = Util.getParam(site, "icon", icon); + } + + @Override + public String getIcon() { + return icon; + } + + public SimpleUrlParams copy() { + SimpleUrlParams copy = new SimpleUrlParams(dm); + copyTo(copy); + + copy.url = url; + copy.icon = icon; + copy.loopElement = loopElement; + copy.numberOfRecordPath = numberOfRecordPath; + copy.pageSizeParam = pageSizeParam; + copy.pageFromParam = pageFromParam; + copy.recordIdPath = recordIdPath; + copy.toISOConversion = toISOConversion; + + return copy; + } +} diff --git a/harvesters/src/main/resources/config-spring-geonetwork.xml b/harvesters/src/main/resources/config-spring-geonetwork.xml index 55b78e8783d..75469da05f1 100644 --- a/harvesters/src/main/resources/config-spring-geonetwork.xml +++ b/harvesters/src/main/resources/config-spring-geonetwork.xml @@ -57,9 +57,10 @@ - - + diff --git a/harvesters/src/test/java/org/fao/geonet/kernel/harvest/harvester/simpleUrl/HarvesterTest.java b/harvesters/src/test/java/org/fao/geonet/kernel/harvest/harvester/simpleUrl/HarvesterTest.java new file mode 100644 index 00000000000..c405bcba438 --- /dev/null +++ b/harvesters/src/test/java/org/fao/geonet/kernel/harvest/harvester/simpleUrl/HarvesterTest.java @@ -0,0 +1,58 @@ +package org.fao.geonet.kernel.harvest.harvester.simpleUrl; + +import org.fao.geonet.utils.Log; +import org.junit.Test; + +import java.util.List; + +import static org.junit.Assert.*; + +public class HarvesterTest { + + @Test + public void test_buildPagesUrl() { + final SimpleUrlParams params = new SimpleUrlParams(null); + params.url = "http://dados.gov.br/api/3/action/package_search?q=&rows=10&start=1"; + params.pageFromParam = "start"; + params.pageSizeParam = "rows"; + + int numberOfResult = 21; + + final Harvester harvester = new Harvester(null, Log.createLogger("TEST"), null, params); + List list = harvester.buildListOfUrl(params, numberOfResult); + assertEquals(3, list.size()); + assertEquals("http://dados.gov.br/api/3/action/package_search?q=&rows=10&start=1", list.get(0)); + assertEquals("http://dados.gov.br/api/3/action/package_search?q=&rows=10&start=11", list.get(1)); + assertEquals("http://dados.gov.br/api/3/action/package_search?q=&rows=1&start=21", list.get(2)); + + + + params.url = "http://dados.gov.br/api/3/action/package_search?q=&rows=10&start=0"; + list = harvester.buildListOfUrl(params, numberOfResult); + assertEquals(3, list.size()); + assertEquals("http://dados.gov.br/api/3/action/package_search?q=&rows=10&start=0", list.get(0)); + assertEquals("http://dados.gov.br/api/3/action/package_search?q=&rows=10&start=10", list.get(1)); + assertEquals("http://dados.gov.br/api/3/action/package_search?q=&rows=1&start=20", list.get(2)); + + params.url = "http://dados.gov.br/api/3/action/package_search?q=&rows=DADA&start=1"; + list = harvester.buildListOfUrl(params, numberOfResult); + assertEquals(1, list.size()); + + params.url = "http://dados.gov.br/api/3/action/package_search?q=&rows=11&start=DADA"; + list = harvester.buildListOfUrl(params, numberOfResult); + assertEquals(1, list.size()); + + params.url = "http://dados.gov.br/api/3/action/package_search?q=&&start=1"; + list = harvester.buildListOfUrl(params, numberOfResult); + assertEquals(1, list.size()); + + params.url = "http://dados.gov.br/api/3/action/package_search?q=&rows=11&"; + list = harvester.buildListOfUrl(params, numberOfResult); + assertEquals(1, list.size()); + + + params.url = "http://dados.gov.br/api/3/action/package_search?q=&rows=2&start=0"; + list = harvester.buildListOfUrl(params, 8); + assertEquals(4, list.size()); + } +} diff --git a/schemas/iso19115-3.2018/src/main/plugin/iso19115-3.2018/convert/fromJsonLdEsri.xsl b/schemas/iso19115-3.2018/src/main/plugin/iso19115-3.2018/convert/fromJsonLdEsri.xsl new file mode 100644 index 00000000000..0275423e999 --- /dev/null +++ b/schemas/iso19115-3.2018/src/main/plugin/iso19115-3.2018/convert/fromJsonLdEsri.xsl @@ -0,0 +1,449 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ISO 19115-3 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + originator + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/web-ui/src/main/resources/catalog/locales/en-admin.json b/web-ui/src/main/resources/catalog/locales/en-admin.json index e5e8ff520f4..49eb124984d 100644 --- a/web-ui/src/main/resources/catalog/locales/en-admin.json +++ b/web-ui/src/main/resources/catalog/locales/en-admin.json @@ -294,6 +294,7 @@ "duplicatedValueFoundUserName": "A user with that name already exists. Choose another one.", "duplicatedValueFoundUserEmail": "A user with that email already exists. Choose another one.", "csw": "CSW", + "simpleurl": "Simple URL", "csw-FailedToParseCapabilities": "Error while parsing GetCapabilities", "csw-capabilitiesUrlHelp": "CSW URL with or without GetCapabilities parameters", "csw-category": "Category for harvested records", @@ -446,6 +447,7 @@ "harvesterReport": "Catalog harvester report", "harvester-arcsde": "ArcSDE", "harvester-arcsdeHelp": "Harvest metadata records from an ArcSDE database.", + "harvester-simpleurl": "Simple URL", "harvester-csw": "OGC CSW 2.0.2", "harvester-cswHelp": "Harvest from a CSW server", "harvester-filesystem": "Directory", diff --git a/web-ui/src/main/resources/catalog/templates/admin/harvest/type/simpleurl.html b/web-ui/src/main/resources/catalog/templates/admin/harvest/type/simpleurl.html new file mode 100644 index 00000000000..4581d82ec12 --- /dev/null +++ b/web-ui/src/main/resources/catalog/templates/admin/harvest/type/simpleurl.html @@ -0,0 +1,106 @@ +

+ + +
+
+
+
+ + +
+ harvesterMainConfigurationFor {{('harvester-' + + harvesterSelected['@type']) | translate}} + +
+ + +

simpleurl-urlHelp

+
+
+ + +
+ harvesterAdvancedConfigurationFor + {{harvesterSelected['@type'] | translate}} + + +
+ +
+ + +

simpleurl-loopElementHelp

+
+ +
+ + +

simpleurl-numberOfRecordPathHelp

+
+ +
+ + +

simpleurl-recordIdPathHelp

+
+ +
+ + +

simpleurl-pageFromParamHelp

+
+ +
+ + +

simpleurl-pageSizeParamHelp

+
+ +
+ + +

simpleurl-toISOConversionHelp

+
+ + +
+ + +
+ +
+

harvesterValidateHelp

+
+
+ +
+ diff --git a/web-ui/src/main/resources/catalog/templates/admin/harvest/type/simpleurl.js b/web-ui/src/main/resources/catalog/templates/admin/harvest/type/simpleurl.js new file mode 100644 index 00000000000..60a14b44244 --- /dev/null +++ b/web-ui/src/main/resources/catalog/templates/admin/harvest/type/simpleurl.js @@ -0,0 +1,107 @@ +// This is not that much elegant and should be replaced by some kind +// of Angular module. +var gnHarvestersimpleurl = { + createNew : function() { + return { + "@id" : "", + "@type" : "simpleurl", + "owner" : [], + "ownerGroup" : [], + "ownerUser": [""], + "site" : { + "name" : "", + "uuid" : "", + "icon" : "blank.png", + "account" : { + "use" : false, + "username" : [], + "password" : [] + }, + "url" : "http://", + "loopElement" : "", + "numberOfRecordPath": "", + "pageSizeParam": "", + "pageFromParam": "", + "recordIdPath": "", + "toISOConversion": "" + }, + "content" : { + "validate" : "NOVALIDATION" + }, + "options" : { + "every" : "0 0 0 ? * *", + "oneRunOnly" : false, + "overrideUuid": "SKIP", + "status" : "active" + }, + "privileges" : [ { + "@id" : "1", + "operation" : [ { + "@name" : "view" + }, { + "@name" : "dynamic" + } ] + } ], + "categories" : [], + "info" : { + "lastRun" : [], + "running" : false + } + }; + }, + buildResponseCSWSearch : function($scope) { + var body = ''; + if ($scope.harvesterSelected.searches) { + for(var tag in $scope.harvesterSelected.searches[0]) { + if($scope.harvesterSelected.searches[0].hasOwnProperty(tag)) { + var value = $scope.harvesterSelected.searches[0][tag].value; + // Save all values even if empty + // XML to JSON does not convert single child to Object but Array + // In that situation, saving only one parameter will make this + // happen and then search criteria name which is the tag name + // will be lost. + // if (value) { + body += '<' + tag + '>' + value + ''; + // } + } + } + } + return '' + body + ''; + }, + buildResponse : function(h, $scope) { + var body = '' + + ' ' + h.ownerGroup[0] + '' + + ' ' + h.ownerUser[0] + '' + + ' ' + + ' ' + h.site.name + '' + + ' ' + h.site.rejectDuplicateResource + '' + + ' ' + h.site.url.replace(/&/g, '&') + '' + + ' ' + h.site.icon + '' + + ' ' + + ' ' + h.site.account.use + '' + + ' ' + h.site.account.username + '' + + ' ' + h.site.account.password + '' + + ' ' + + ' ' + h.site.loopElement + '' + + ' ' + h.site.numberOfRecordPath + '' + + ' ' + h.site.recordIdPath + '' + + ' ' + h.site.pageFromParam + '' + + ' ' + h.site.pageSizeParam + '' + + ' ' + h.site.toISOConversion + '' + + ' ' + + gnHarvestersimpleurl.buildResponseCSWSearch($scope) + + ' ' + + ' ' + h.options.oneRunOnly + '' + + ' ' + h.options.overrideUuid + '' + + ' ' + h.options.every + '' + + ' ' + h.options.status + '' + + ' ' + + ' ' + + ' ' + h.content.validate + '' + + ' ' + + $scope.buildResponseGroup(h) + + $scope.buildResponseCategory(h) + ''; + return body; + } +}; diff --git a/web/src/main/webapp/xsl/conversion/import/CKAN-to-ISO19115-3-2018.xsl b/web/src/main/webapp/xsl/conversion/import/CKAN-to-ISO19115-3-2018.xsl new file mode 100644 index 00000000000..0894bb00e99 --- /dev/null +++ b/web/src/main/webapp/xsl/conversion/import/CKAN-to-ISO19115-3-2018.xsl @@ -0,0 +1,5 @@ + + + + diff --git a/web/src/main/webapp/xsl/conversion/import/ESRIDCAT-to-DCAT2.xsl b/web/src/main/webapp/xsl/conversion/import/ESRIDCAT-to-DCAT2.xsl new file mode 100644 index 00000000000..c4a50b44c3e --- /dev/null +++ b/web/src/main/webapp/xsl/conversion/import/ESRIDCAT-to-DCAT2.xsl @@ -0,0 +1,5 @@ + + + + diff --git a/web/src/main/webapp/xsl/conversion/import/ESRIDCAT-to-ISO19115-3-2018.xsl b/web/src/main/webapp/xsl/conversion/import/ESRIDCAT-to-ISO19115-3-2018.xsl new file mode 100644 index 00000000000..e7eea186961 --- /dev/null +++ b/web/src/main/webapp/xsl/conversion/import/ESRIDCAT-to-ISO19115-3-2018.xsl @@ -0,0 +1,5 @@ + + + + diff --git a/web/src/main/webapp/xsl/conversion/import/OPENDATASOFT-to-DCAT2.xsl b/web/src/main/webapp/xsl/conversion/import/OPENDATASOFT-to-DCAT2.xsl new file mode 100644 index 00000000000..91b92f26aa3 --- /dev/null +++ b/web/src/main/webapp/xsl/conversion/import/OPENDATASOFT-to-DCAT2.xsl @@ -0,0 +1,5 @@ + + + + diff --git a/web/src/main/webapp/xsl/conversion/import/OPENDATASOFT-to-ISO19115-3-2018.xsl b/web/src/main/webapp/xsl/conversion/import/OPENDATASOFT-to-ISO19115-3-2018.xsl new file mode 100644 index 00000000000..40d2dcc6a97 --- /dev/null +++ b/web/src/main/webapp/xsl/conversion/import/OPENDATASOFT-to-ISO19115-3-2018.xsl @@ -0,0 +1,5 @@ + + + + diff --git a/web/src/main/webapp/xsl/xml/harvesting/simpleurl.xsl b/web/src/main/webapp/xsl/xml/harvesting/simpleurl.xsl new file mode 100644 index 00000000000..d786c7ece7b --- /dev/null +++ b/web/src/main/webapp/xsl/xml/harvesting/simpleurl.xsl @@ -0,0 +1,34 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +