From ba10d5605ad22172c29691e87c165ba637054008 Mon Sep 17 00:00:00 2001 From: Qifeng Date: Thu, 16 Dec 2021 16:35:27 +1100 Subject: [PATCH] #622 fix(using logback, not log4j ) --- livingatlas/configs/la-pipelines.yaml | 12 ++++ .../beam/DistributionOutlierPipeline.java | 6 +- .../DistributionOutlierTransform.java | 37 ++++++------ .../main/resources/log4j-colorized.properties | 42 -------------- .../src/main/resources/log4j.properties | 38 ------------- .../src/main/resources/logback-test.xml | 57 +++++++++++++++++++ 6 files changed, 88 insertions(+), 104 deletions(-) delete mode 100644 livingatlas/pipelines/src/main/resources/log4j-colorized.properties delete mode 100644 livingatlas/pipelines/src/main/resources/log4j.properties create mode 100644 livingatlas/pipelines/src/main/resources/logback-test.xml diff --git a/livingatlas/configs/la-pipelines.yaml b/livingatlas/configs/la-pipelines.yaml index eea06647e1..2f7f06fe99 100644 --- a/livingatlas/configs/la-pipelines.yaml +++ b/livingatlas/configs/la-pipelines.yaml @@ -344,6 +344,18 @@ sampling-sh-args: executor-memory: 16G driver-memory: 4G +outlier-sh-args: + local: + jvm: -Xmx8g -XX:+UseG1GC -Dspark.master=local[*] + spark-embedded: + jvm: -Xmx8g -XX:+UseG1GC -Dspark.master=local[*] + spark-cluster: + conf: spark.default.parallelism=144 + num-executors: 8 + executor-cores: 8 + executor-memory: 16G + driver-memory: 4G + sensitive-sh-args: spark-embedded: jvm: -Xmx8g -XX:+UseG1GC diff --git a/livingatlas/pipelines/src/main/java/au/org/ala/pipelines/beam/DistributionOutlierPipeline.java b/livingatlas/pipelines/src/main/java/au/org/ala/pipelines/beam/DistributionOutlierPipeline.java index 5f56d5fd2a..32ea12fd0e 100644 --- a/livingatlas/pipelines/src/main/java/au/org/ala/pipelines/beam/DistributionOutlierPipeline.java +++ b/livingatlas/pipelines/src/main/java/au/org/ala/pipelines/beam/DistributionOutlierPipeline.java @@ -27,10 +27,7 @@ *

Example: java au.org.ala.pipelines.beam.DistributionOutlierPipeline * --config=/data/la-pipelines/config/la-pipelines.yaml --fsPath=/data * - *

Running with Jar java - * -Dlog4j.configuration=file://../pipelines/src/main/resources/log4j-colorized.properties - * -Dlog4j.configurationFile=file://../pipelines/src/main/resources/log4j-colorized.properties -cp - * ../pipelines/target/pipelines-2.10.0-SNAPSHOT-shaded.jar + *

Running with Jar java -cp ../pipelines/target/pipelines-2.10.0-SNAPSHOT-shaded.jar * au.org.ala.pipelines.beam.DistributionOutlierPipeline * --config=/data/la-pipelines/config/la-pipelines.yaml,la-pipelines-local.yaml --fsPath=/data */ @@ -39,6 +36,7 @@ public class DistributionOutlierPipeline { public static void main(String[] args) throws Exception { + log.debug("debug test"); VersionInfo.print(); CombinedYamlConfiguration conf = new CombinedYamlConfiguration(args); String[] combinedArgs = conf.toArgs("general", "outlier"); diff --git a/livingatlas/pipelines/src/main/java/au/org/ala/pipelines/transforms/DistributionOutlierTransform.java b/livingatlas/pipelines/src/main/java/au/org/ala/pipelines/transforms/DistributionOutlierTransform.java index be5f2aedca..c389472b5a 100644 --- a/livingatlas/pipelines/src/main/java/au/org/ala/pipelines/transforms/DistributionOutlierTransform.java +++ b/livingatlas/pipelines/src/main/java/au/org/ala/pipelines/transforms/DistributionOutlierTransform.java @@ -10,7 +10,6 @@ import lombok.extern.slf4j.Slf4j; import org.apache.beam.sdk.transforms.*; import org.apache.beam.sdk.values.*; -import org.apache.commons.lang3.StringUtils; import org.gbif.pipelines.core.functions.SerializableConsumer; import org.gbif.pipelines.core.interpreters.Interpretation; import org.gbif.pipelines.io.avro.*; @@ -126,7 +125,9 @@ public MapElements flatToString() { } /** - * distanceOutOfEDL 0: inside edl, -1: no edl + * ID / Taxon ID / LatLng MUST be valid + * + *

distanceOutOfEDL 0: inside edl, -1: no edl * * @param record * @return @@ -134,25 +135,21 @@ public MapElements flatToString() { private DistributionOutlierRecord convertToDistribution( IndexRecord record, double distanceToEDL) { try { - if (!StringUtils.isEmpty(record.getId()) - && !StringUtils.isEmpty(record.getTaxonID()) - && !StringUtils.isEmpty(record.getLatLng())) { - DistributionOutlierRecord newRecord = - DistributionOutlierRecord.newBuilder() - .setId(record.getId()) - .setSpeciesID(record.getTaxonID()) - .setDistanceOutOfEDL(distanceToEDL) - .build(); - - String latlng = record.getLatLng(); - String[] coordinates = latlng.split(","); - newRecord.setDecimalLatitude(Double.parseDouble(coordinates[0])); - newRecord.setDecimalLongitude(Double.parseDouble(coordinates[1])); - - return newRecord; - } + DistributionOutlierRecord newRecord = + DistributionOutlierRecord.newBuilder() + .setId(record.getId()) + .setSpeciesID(record.getTaxonID()) + .setDistanceOutOfEDL(distanceToEDL) + .build(); + + String latlng = record.getLatLng(); + String[] coordinates = latlng.split(","); + newRecord.setDecimalLatitude(Double.parseDouble(coordinates[0])); + newRecord.setDecimalLongitude(Double.parseDouble(coordinates[1])); + + return newRecord; } catch (Exception ex) { - log.debug(record.getId() + " does not have lat/lng or taxon. ignored.."); + log.debug(record.getId() + " has incorrect lat/lng or taxon. ignored.."); } return null; } diff --git a/livingatlas/pipelines/src/main/resources/log4j-colorized.properties b/livingatlas/pipelines/src/main/resources/log4j-colorized.properties deleted file mode 100644 index 07f882e378..0000000000 --- a/livingatlas/pipelines/src/main/resources/log4j-colorized.properties +++ /dev/null @@ -1,42 +0,0 @@ -# Set everything to be logged to the console -log4j.rootCategory=INFO, console -log4j.appender.console=org.apache.log4j.ConsoleAppender -log4j.appender.console.target=System.out -log4j.appender.console.layout=com.jcabi.log.MulticolorLayout -# https://en.wikipedia.org/wiki/ANSI_escape_code#Colors -log4j.appender.console.layout.Levels=INFO:0;32,WARN:0;33 -log4j.appender.console.layout.Colors=black:90 -# Supported colors: red, blue, yellow, cyan, black, and white. -log4j.appender.console.layout.ConversionPattern=%d{dd-MMM} %color-black{%d{HH:mm:ss}} [%color-magenta{%X{step}}] [%color-blue{%X{datasetId}}] [%color{%p}] %color-cyan{%c{1}}: %color-blue{%m%n} -# Settings to quiet third party logs that are too verbose -log4j.logger.au.org.ala=INFO -log4j.logger.org.spark-project.jetty=WARN -log4j.logger.org.gbif.common.parsers=ERROR -log4j.logger.org.gbif.geocode.api.cache=ERROR -log4j.logger.org.gbif.geocode.api.cache.GeocodeBitmapCache=ERROR -log4j.logger.org.gbif.pipelines.core.parsers.location.cache.GeocodeBitmapCache=ERROR -log4j.logger.org.spark_project.jetty.servlet.ServletContextHandler=ERROR -log4j.logger.org.eclipse.jetty.server.handler.ContextHandler=ERROR -log4j.logger.org.gbif.dwc.terms.TermFactory=ERROR -log4j.logger.org.gbif.vocabulary.lookup=ERROR -log4j.logger.org.apache.beam.runners.spark.translation=ERROR -log4j.logger.ServletContextHandler=ERROR -project.jetty.util.component.AbstractLifeCycle=ERROR -log4j.logger.org.apache.spark=ERROR -log4j.logger.org.eclipse.jetty=ERROR -log4j.logger.org.apache.spark.repl.SparkIMain$exprTyper=ERROR -log4j.logger.org.apache.spark.repl.SparkILoop$SparkILoopInterpreter=ERROR -log4j.logger.org.apache.parquet=ERROR -log4j.logger.parquet=ERROR -log4j.logger.org.apache.beam=ERROR -# SPARK-9183: Settings to avoid annoying messages when looking up -# nonexistent UDFs in SparkSQL with Hive support -log4j.logger.org.apache.hadoop.hive.metastore.RetryingHMSHandler=FATAL -log4j.logger.org.apache.hadoop.hive.ql.exec.FunctionRegistry=ERROR - - -log4j.logger.org.apache.spark.executor.Executor=ERROR -log4j.logger.org.apache.spark.storage.memory.MemoryStore=ERROR -log4j.logger.org.apache.spark.storage.BlockManager=ERROR -log4j.logger.org.apache.spark.executor=ERROR -log4j.logger.org.apache.spark.storage=ERROR \ No newline at end of file diff --git a/livingatlas/pipelines/src/main/resources/log4j.properties b/livingatlas/pipelines/src/main/resources/log4j.properties deleted file mode 100644 index c9a52ebd2b..0000000000 --- a/livingatlas/pipelines/src/main/resources/log4j.properties +++ /dev/null @@ -1,38 +0,0 @@ -# Set everything to be logged to the console -log4j.rootCategory=INFO, console -log4j.appender.console=org.apache.log4j.ConsoleAppender -log4j.appender.console.target=System.out -log4j.appender.console.layout=org.apache.log4j.PatternLayout -log4j.appender.console.layout.ConversionPattern=%d{yy/MM/dd HH:mm:ss} [%X{step}] [%X{datasetId}] %p %c{1}: %m%n -# Settings to quiet third party logs that are too verbose -log4j.logger.au.org.ala=INFO -log4j.logger.org.spark-project.jetty=WARN -log4j.logger.org.gbif.common.parsers=ERROR -log4j.logger.org.gbif.geocode.api.cache=ERROR -log4j.logger.org.gbif.geocode.api.cache.GeocodeBitmapCache=ERROR -log4j.logger.org.gbif.pipelines.core.parsers.location.cache.GeocodeBitmapCache=ERROR -log4j.logger.org.spark_project.jetty.servlet.ServletContextHandler=ERROR -log4j.logger.org.eclipse.jetty.server.handler.ContextHandler=ERROR -log4j.logger.org.gbif.dwc.terms.TermFactory=ERROR -log4j.logger.org.gbif.vocabulary.lookup=ERROR -log4j.logger.org.apache.beam.runners.spark.translation=ERROR -log4j.logger.ServletContextHandler=ERROR -project.jetty.util.component.AbstractLifeCycle=ERROR -log4j.logger.org.apache.spark=ERROR -log4j.logger.org.eclipse.jetty=ERROR -log4j.logger.org.apache.spark.repl.SparkIMain$exprTyper=ERROR -log4j.logger.org.apache.spark.repl.SparkILoop$SparkILoopInterpreter=ERROR -log4j.logger.org.apache.parquet=ERROR -log4j.logger.parquet=ERROR -log4j.logger.org.apache.beam=ERROR -# SPARK-9183: Settings to avoid annoying messages when looking up -# nonexistent UDFs in SparkSQL with Hive support -log4j.logger.org.apache.hadoop.hive.metastore.RetryingHMSHandler=FATAL -log4j.logger.org.apache.hadoop.hive.ql.exec.FunctionRegistry=ERROR - -# -#log4j.logger.org.apache.spark.executor.Executor=ERROR -#log4j.logger.org.apache.spark.storage.memory.MemoryStore=ERROR -#log4j.logger.org.apache.spark.storage.BlockManager=ERROR -#log4j.logger.org.apache.spark.executor=ERROR -#log4j.logger.org.apache.spark.storage=ERROR diff --git a/livingatlas/pipelines/src/main/resources/logback-test.xml b/livingatlas/pipelines/src/main/resources/logback-test.xml new file mode 100644 index 0000000000..c1e164fcd7 --- /dev/null +++ b/livingatlas/pipelines/src/main/resources/logback-test.xml @@ -0,0 +1,57 @@ + + + true + + + + + + + + + ${defaultPattern} + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file