From 4e7de86c81590f9e54df45bab68fe945f709d4ff Mon Sep 17 00:00:00 2001 From: GeoNetwork opensource <59019313+geonetworkbuild@users.noreply.github.com> Date: Wed, 16 Oct 2024 13:10:39 +0200 Subject: [PATCH] Harvester / Simple URL / Fix multiple URL alignement (#8438) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Cleanup records to remove, only once all URL are processed. No need for the Element to be preserved, alignement only require the list of UUIds. Co-authored-by: François Prunayre --- .../kernel/harvest/harvester/simpleurl/Harvester.java | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/harvesters/src/main/java/org/fao/geonet/kernel/harvest/harvester/simpleurl/Harvester.java b/harvesters/src/main/java/org/fao/geonet/kernel/harvest/harvester/simpleurl/Harvester.java index 2cd1100dc6d..254fac91f84 100644 --- a/harvesters/src/main/java/org/fao/geonet/kernel/harvest/harvester/simpleurl/Harvester.java +++ b/harvesters/src/main/java/org/fao/geonet/kernel/harvest/harvester/simpleurl/Harvester.java @@ -105,6 +105,7 @@ public HarvestResult harvest(Logger log) throws Exception { String[] urlList = params.url.split("\n"); boolean error = false; Aligner aligner = new Aligner(cancelMonitor, context, params, log); + Set listOfUuids = new HashSet<>(); for (String url : urlList) { log.debug("Loading URL: " + url); @@ -151,7 +152,6 @@ public HarvestResult harvest(Logger log) throws Exception { params.numberOfRecordPath, e.getMessage())); } } - Map allUuids = new HashMap<>(); try { List listOfUrlForPages = buildListOfUrl(params, numberOfRecordsToHarvest); for (int i = 0; i < listOfUrlForPages.size(); i++) { @@ -166,7 +166,6 @@ public HarvestResult harvest(Logger log) throws Exception { if (StringUtils.isNotEmpty(params.loopElement) || type == SimpleUrlResourceType.RDFXML) { Map uuids = new HashMap<>(); - try { if (type == SimpleUrlResourceType.XML) { collectRecordsFromXml(xmlObj, uuids, aligner); @@ -176,7 +175,7 @@ public HarvestResult harvest(Logger log) throws Exception { collectRecordsFromJson(jsonObj, uuids, aligner); } aligner.align(uuids, errors); - allUuids.putAll(uuids); + listOfUuids.addAll(uuids.keySet()); } catch (Exception e) { errors.add(new HarvestError(this.context, e)); log.error(String.format("Failed to collect record in response at path %s. Error is: %s", @@ -184,7 +183,6 @@ public HarvestResult harvest(Logger log) throws Exception { } } } - aligner.cleanupRemovedRecords(allUuids.keySet()); } catch (Exception t) { error = true; log.error("Unknown error trying to harvest"); @@ -198,11 +196,12 @@ public HarvestResult harvest(Logger log) throws Exception { errors.add(new HarvestError(context, t)); } - log.info("Total records processed in all searches :" + allUuids.size()); + log.info("Total records processed in all searches :" + listOfUuids.size()); if (error) { log.warning("Due to previous errors the align process has not been called"); } } + aligner.cleanupRemovedRecords(listOfUuids); return aligner.getResult(); }