From 969040644f9017d9371a4a80e6d8c5757f5e0d90 Mon Sep 17 00:00:00 2001 From: Adam Collins Date: Mon, 5 Aug 2024 08:34:42 +1000 Subject: [PATCH] #1084 sync living atlas module --- livingatlas/migration/pom.xml | 2 +- livingatlas/pipelines/.scalafmt.conf | 5 ++- livingatlas/pipelines/pom.xml | 44 ++++++++++++------- .../main/java/au/org/ala/kvs/ShapeFile.java | 4 ++ .../beam/ALAReverseJackKnifePipeline.java | 16 +++++-- .../pipelines/beam/ClusteringPipeline.java | 13 +++--- .../beam/ImageServiceSyncPipeline.java | 1 + .../pipelines/vocabulary/CentrePoints.java | 8 +++- .../spark/PredicateExportDwCAPipeline.scala | 12 +++-- .../au/org/ala/kvs/GeocodeServiceTestIT.java | 1 + .../org/ala/kvs/NameCheckKVStoreTestIT.java | 1 + pom.xml | 2 +- .../core/LocationTransformTest.java | 2 +- .../core/LocationInterpreterTest.java | 2 +- 14 files changed, 77 insertions(+), 36 deletions(-) diff --git a/livingatlas/migration/pom.xml b/livingatlas/migration/pom.xml index 8c6a0df1ad..d82fdfd1b0 100644 --- a/livingatlas/migration/pom.xml +++ b/livingatlas/migration/pom.xml @@ -16,7 +16,7 @@ - 3.3.1 + 3.4.0 1.78 true diff --git a/livingatlas/pipelines/.scalafmt.conf b/livingatlas/pipelines/.scalafmt.conf index c02bc31cd7..72d09ec52c 100644 --- a/livingatlas/pipelines/.scalafmt.conf +++ b/livingatlas/pipelines/.scalafmt.conf @@ -1,2 +1,3 @@ -version = 2.7.5 -maxColumn = 120 \ No newline at end of file +version = 3.7.3 +maxColumn = 120 +runner.dialect = scala212 diff --git a/livingatlas/pipelines/pom.xml b/livingatlas/pipelines/pom.xml index d764a3628e..ae16667b05 100644 --- a/livingatlas/pipelines/pom.xml +++ b/livingatlas/pipelines/pom.xml @@ -33,8 +33,8 @@ 1.6 - 3.9.9.Final - 4.1.42.Final + + 4.1.100.Final 1.25 0.7.2 1.78 @@ -42,9 +42,6 @@ 1.1.1 1.2 - - 2.4.5 - 13.0 2.2.4 @@ -58,10 +55,8 @@ 3.9.1 1.1.8.4 - 3.3.1 - 3.3.4 + 3.4.0 4.8.0 - 2.12.17 @@ -184,7 +179,7 @@ - 1.7 + ${googleJavaFormat.version} @@ -252,6 +247,12 @@ org.apache.beam beam-sdks-java-io-solr ${apache.beam.version} + + + io.netty + netty-buffer + + org.apache.beam @@ -268,11 +269,11 @@ commons-configuration ${commons-configuration.version} - - io.netty - netty - ${netty.version} - + + + + + io.netty netty-all @@ -303,6 +304,13 @@ org.gbif.registry registry-ws-client + + + + + org.elasticsearch + jna + @@ -737,7 +745,7 @@ org.apache.zookeeper zookeeper - ${zookeeper-version} + ${zookeeper.version} org.jboss.netty @@ -832,6 +840,12 @@ hadoop-compress ${hadoop-compress.version} + + org.apache.logging.log4j + log4j-api + 2.23.1 + test + diff --git a/livingatlas/pipelines/src/main/java/au/org/ala/kvs/ShapeFile.java b/livingatlas/pipelines/src/main/java/au/org/ala/kvs/ShapeFile.java index c574cd6341..95ee5ae280 100644 --- a/livingatlas/pipelines/src/main/java/au/org/ala/kvs/ShapeFile.java +++ b/livingatlas/pipelines/src/main/java/au/org/ala/kvs/ShapeFile.java @@ -11,12 +11,16 @@ public class ShapeFile implements Serializable { /** Path to the shape file */ String path; + /** The name field to use from the shape file. */ String field; + /** URL to source of the shapefile */ String source; + /** Intersect buffer 0.1 = 11km, 0.135 = 15km, 0.18 = 20km */ Double intersectBuffer = 0.18; + /** Intersect mapping to allow intersected values to mapped to different values e.g. CX -> AU * */ Map intersectMapping; } diff --git a/livingatlas/pipelines/src/main/java/au/org/ala/pipelines/beam/ALAReverseJackKnifePipeline.java b/livingatlas/pipelines/src/main/java/au/org/ala/pipelines/beam/ALAReverseJackKnifePipeline.java index 768db089ba..d56ee01022 100644 --- a/livingatlas/pipelines/src/main/java/au/org/ala/pipelines/beam/ALAReverseJackKnifePipeline.java +++ b/livingatlas/pipelines/src/main/java/au/org/ala/pipelines/beam/ALAReverseJackKnifePipeline.java @@ -25,6 +25,7 @@ import org.apache.beam.sdk.transforms.ParDo; import org.apache.beam.sdk.values.*; import org.apache.commons.lang.StringUtils; +import org.apache.hadoop.fs.FileSystem; import org.gbif.pipelines.common.beam.metrics.MetricsHandler; import org.gbif.pipelines.common.beam.options.PipelinesOptionsFactory; import org.gbif.pipelines.core.pojo.HdfsConfigs; @@ -296,10 +297,17 @@ public void processElement(ProcessContext c) { PipelineResult result = pipeline.run(); result.waitUntilFinish(); - MetricsHandler.saveCountersToFile( - HdfsConfigs.create(options.getHdfsSiteConfig(), options.getCoreSiteConfig()), - jackknifePath + "/metrics.yaml", - result.metrics()); + String path = jackknifePath + "/metrics.yaml"; + String countersInfo = MetricsHandler.getCountersInfo(result.metrics()); + FileSystem fs = + FsUtils.getFileSystem( + HdfsConfigs.create(options.getHdfsSiteConfig(), options.getCoreSiteConfig()), path); + try { + FsUtils.createFile(fs, path, countersInfo); + log.info("Metadata was written to a file - {}", path); + } catch (IOException ex) { + log.warn("Write pipelines metadata file", ex); + } log.info("3. Pipeline has been finished"); } diff --git a/livingatlas/pipelines/src/main/java/au/org/ala/pipelines/beam/ClusteringPipeline.java b/livingatlas/pipelines/src/main/java/au/org/ala/pipelines/beam/ClusteringPipeline.java index 6ccc9ffd64..11cb6fad5a 100644 --- a/livingatlas/pipelines/src/main/java/au/org/ala/pipelines/beam/ClusteringPipeline.java +++ b/livingatlas/pipelines/src/main/java/au/org/ala/pipelines/beam/ClusteringPipeline.java @@ -20,7 +20,7 @@ import org.apache.beam.sdk.transforms.*; import org.apache.beam.sdk.values.KV; import org.apache.beam.sdk.values.PCollection; -import org.apache.directory.api.util.Strings; +import org.apache.commons.lang3.StringUtils; import org.apache.hadoop.fs.FileSystem; import org.gbif.dwc.terms.DwcTerm; import org.gbif.pipelines.common.beam.options.PipelinesOptionsFactory; @@ -174,8 +174,8 @@ public void processElement( .withOtherCatalogNumbers(otherCatalogNumbers); // specimen only hashes - if (Strings.isNotEmpty(speciesKey) - && Strings.isNotEmpty(basisOfRecord) + if (StringUtils.isNotEmpty(speciesKey) + && StringUtils.isNotEmpty(basisOfRecord) && specimenBORs.contains(basisOfRecord)) { Stream ids = @@ -188,7 +188,8 @@ public void processElement( // output hashes for each combination ids.filter( value -> - !Strings.isEmpty(value) && !omitIds.contains(value.toUpperCase())) + !StringUtils.isEmpty(value) + && !omitIds.contains(value.toUpperCase())) .distinct() .collect(Collectors.toList()) .forEach( @@ -225,14 +226,14 @@ public void processElement( } // 2. type status hashkeys - if (Strings.isNotEmpty(taxonKey) && typeStatus != null) { + if (StringUtils.isNotEmpty(taxonKey) && typeStatus != null) { for (String t : typeStatus) { out.output(builder.withHashKey(taxonKey + "|" + t).build()); } } // 3. taxonKey|year|recordedBy hashkeys - if (Strings.isNotEmpty(taxonKey) && year != null && recordedBy != null) { + if (StringUtils.isNotEmpty(taxonKey) && year != null && recordedBy != null) { out.output( builder.withHashKey(taxonKey + "|" + year + "|" + recordedBy).build()); } diff --git a/livingatlas/pipelines/src/main/java/au/org/ala/pipelines/beam/ImageServiceSyncPipeline.java b/livingatlas/pipelines/src/main/java/au/org/ala/pipelines/beam/ImageServiceSyncPipeline.java index bb21a90376..2a65fdc57d 100644 --- a/livingatlas/pipelines/src/main/java/au/org/ala/pipelines/beam/ImageServiceSyncPipeline.java +++ b/livingatlas/pipelines/src/main/java/au/org/ala/pipelines/beam/ImageServiceSyncPipeline.java @@ -95,6 +95,7 @@ public static void main(String[] args) throws Exception { // FIXME: Issue logged here: https://github.com/AtlasOfLivingAustralia/la-pipelines/issues/105 System.exit(0); } + /** * Includes the following steps: * diff --git a/livingatlas/pipelines/src/main/java/au/org/ala/pipelines/vocabulary/CentrePoints.java b/livingatlas/pipelines/src/main/java/au/org/ala/pipelines/vocabulary/CentrePoints.java index ceaab9bb5c..236fbd21c8 100644 --- a/livingatlas/pipelines/src/main/java/au/org/ala/pipelines/vocabulary/CentrePoints.java +++ b/livingatlas/pipelines/src/main/java/au/org/ala/pipelines/vocabulary/CentrePoints.java @@ -149,12 +149,16 @@ public boolean coordinatesMatchCentre( } } - /** @return size of centres */ + /** + * @return size of centres + */ public int size() { return centres.size(); } - /** @return keys */ + /** + * @return keys + */ public Set keys() { return centres.keySet(); } diff --git a/livingatlas/pipelines/src/main/scala/au/org/ala/pipelines/spark/PredicateExportDwCAPipeline.scala b/livingatlas/pipelines/src/main/scala/au/org/ala/pipelines/spark/PredicateExportDwCAPipeline.scala index 5531dfc19f..881f850c73 100644 --- a/livingatlas/pipelines/src/main/scala/au/org/ala/pipelines/spark/PredicateExportDwCAPipeline.scala +++ b/livingatlas/pipelines/src/main/scala/au/org/ala/pipelines/spark/PredicateExportDwCAPipeline.scala @@ -639,7 +639,9 @@ object PredicateExportDwCAPipeline { extensionFileName: String, extensionFields: Array[String] ): Elem = { - + {extensionFileName}.txt @@ -669,7 +671,9 @@ object PredicateExportDwCAPipeline { extensionFileName: String, extensionFields: Array[String] ): Elem = { - + {extensionFileName}.txt @@ -698,7 +702,9 @@ object PredicateExportDwCAPipeline { val coreFileName = coreURI.substring(coreURI.lastIndexOf("/") + 1).toLowerCase val metaXml = - + {coreFileName}.txt diff --git a/livingatlas/pipelines/src/test/java/au/org/ala/kvs/GeocodeServiceTestIT.java b/livingatlas/pipelines/src/test/java/au/org/ala/kvs/GeocodeServiceTestIT.java index 392be22226..17af9c72f5 100644 --- a/livingatlas/pipelines/src/test/java/au/org/ala/kvs/GeocodeServiceTestIT.java +++ b/livingatlas/pipelines/src/test/java/au/org/ala/kvs/GeocodeServiceTestIT.java @@ -266,6 +266,7 @@ public void testCountryEEZ1() { assertFalse(resp.getLocations().isEmpty()); } + // -35.482884,146.804061 @Test public void testCountryEEZ2() { diff --git a/livingatlas/pipelines/src/test/java/au/org/ala/kvs/NameCheckKVStoreTestIT.java b/livingatlas/pipelines/src/test/java/au/org/ala/kvs/NameCheckKVStoreTestIT.java index e02dbd5f2d..091e3e05a7 100644 --- a/livingatlas/pipelines/src/test/java/au/org/ala/kvs/NameCheckKVStoreTestIT.java +++ b/livingatlas/pipelines/src/test/java/au/org/ala/kvs/NameCheckKVStoreTestIT.java @@ -15,6 +15,7 @@ public class NameCheckKVStoreTestIT { @ClassRule public static IntegrationTestUtils itUtils = IntegrationTestUtils.getInstance(); + /** * Tests the Get operation on {@link KeyValueCache} that wraps a simple KV store backed by a * HashMap. diff --git a/pom.xml b/pom.xml index 682cd4cc08..116448bcf5 100644 --- a/pom.xml +++ b/pom.xml @@ -101,7 +101,7 @@ 0.5-SNAPSHOT 0.195.0-H3-SNAPSHOT 1.0.0 - 3.96.6-SNAPSHOT + 3.96.6.1-SNAPSHOT 1.26 1.9.0-SNAPSHOT 0.2 diff --git a/sdks/beam-transforms/src/test/java/org/gbif/pipelines/transforms/core/LocationTransformTest.java b/sdks/beam-transforms/src/test/java/org/gbif/pipelines/transforms/core/LocationTransformTest.java index f83ac3fe35..8b850850b5 100644 --- a/sdks/beam-transforms/src/test/java/org/gbif/pipelines/transforms/core/LocationTransformTest.java +++ b/sdks/beam-transforms/src/test/java/org/gbif/pipelines/transforms/core/LocationTransformTest.java @@ -271,7 +271,7 @@ public void transformationTest() { null, "POLYGON((100000 515000,100000 520000,105000 520000,105000 515000,100000 515000))", "EPSG:28992", - "POLYGON ((52.619749292808244 4.575033022857827, 52.66468072273537 4.574203170903049, 52.665162889286556 4.648106265726084, 52.6202308261076 4.648860682668264, 52.619749292808244 4.575033022857827))" + "POLYGON ((52.619749292808244 4.575033022857827, 52.664680722735376 4.574203170903048, 52.665162889286556 4.648106265726084, 52.6202308261076 4.648860682668264, 52.619749292808244 4.575033022857827))" }; final MetadataRecord mdr = diff --git a/sdks/core/src/test/java/org/gbif/pipelines/core/interpreters/core/LocationInterpreterTest.java b/sdks/core/src/test/java/org/gbif/pipelines/core/interpreters/core/LocationInterpreterTest.java index b41e977bf3..fcc211709c 100644 --- a/sdks/core/src/test/java/org/gbif/pipelines/core/interpreters/core/LocationInterpreterTest.java +++ b/sdks/core/src/test/java/org/gbif/pipelines/core/interpreters/core/LocationInterpreterTest.java @@ -396,7 +396,7 @@ public void footprintWKTTest() { LocationRecord.newBuilder() .setId("1") .setFootprintWKT( - "POLYGON ((52.619749292808244 4.575033022857827, 52.66468072273537 4.574203170903049, 52.665162889286556 4.648106265726084, 52.6202308261076 4.648860682668264, 52.619749292808244 4.575033022857827))") + "POLYGON ((52.619749292808244 4.575033022857827, 52.664680722735376 4.574203170903048, 52.665162889286556 4.648106265726084, 52.6202308261076 4.648860682668264, 52.619749292808244 4.575033022857827))") .build(); // When