From a0bd0c0b5d3128b03118376be29c58d9c56aeb2d Mon Sep 17 00:00:00 2001 From: Nick dos Remedios Date: Thu, 9 Feb 2023 09:41:05 +1100 Subject: [PATCH 01/14] ARGA-Genomes/arga-data/issues/31 Initial commit for adding newfield `presentInCountry` set via a species list. --- livingatlas/configs/la-pipelines.yaml | 1 + livingatlas/pipelines/src/main/docker/solr8.yml | 2 ++ .../org/ala/pipelines/beam/SpeciesListPipeline.java | 6 +++++- .../org/ala/pipelines/java/SpeciesListPipeline.java | 11 ++++++++--- .../options/SpeciesLevelPipelineOptions.java | 5 +++++ .../au/org/ala/pipelines/transforms/IndexFields.java | 1 + .../pipelines/transforms/IndexRecordTransform.java | 5 +++++ .../au/org/ala/pipelines/util/SpeciesListUtils.java | 7 ++++++- .../java/au/org/ala/specieslists/SpeciesList.java | 1 + .../org/ala/specieslists/SpeciesListDownloader.java | 9 +++++++++ livingatlas/solr/conf/managed-schema | 3 +++ .../src/main/avro/specific/species-list-record.avsc | 3 ++- sdks/models/src/main/avro/specific/taxon-profile.avsc | 3 ++- 13 files changed, 50 insertions(+), 7 deletions(-) diff --git a/livingatlas/configs/la-pipelines.yaml b/livingatlas/configs/la-pipelines.yaml index 2f0ded3064..22aaa7cfa1 100644 --- a/livingatlas/configs/la-pipelines.yaml +++ b/livingatlas/configs/la-pipelines.yaml @@ -192,6 +192,7 @@ speciesLists: maxDownloadAgeInMinutes: 1440 includeConservationStatus: true includeInvasiveStatus: true + includePresentInCountry: false # Sampling specific configuration sampling: diff --git a/livingatlas/pipelines/src/main/docker/solr8.yml b/livingatlas/pipelines/src/main/docker/solr8.yml index d2bd1f01ae..23d9f64b3e 100644 --- a/livingatlas/pipelines/src/main/docker/solr8.yml +++ b/livingatlas/pipelines/src/main/docker/solr8.yml @@ -15,3 +15,5 @@ services: - "start" - "-cloud" - "-f" + restart: on-failure + platform: linux/amd64 diff --git a/livingatlas/pipelines/src/main/java/au/org/ala/pipelines/beam/SpeciesListPipeline.java b/livingatlas/pipelines/src/main/java/au/org/ala/pipelines/beam/SpeciesListPipeline.java index bc359ef065..035683cfa1 100644 --- a/livingatlas/pipelines/src/main/java/au/org/ala/pipelines/beam/SpeciesListPipeline.java +++ b/livingatlas/pipelines/src/main/java/au/org/ala/pipelines/beam/SpeciesListPipeline.java @@ -149,6 +149,7 @@ public KV apply(KV record) { final boolean includeConservationStatus = options.getIncludeConservationStatus(); final boolean includeInvasiveStatus = options.getIncludeInvasiveStatus(); + final boolean includePresentInCountry = options.getIncludePresentInCountry(); // join collections return result.apply( @@ -167,7 +168,10 @@ public void processElement(ProcessContext c) { if (speciesLists != null) { TaxonProfile.Builder builder = SpeciesListUtils.createTaxonProfileBuilder( - speciesLists, includeConservationStatus, includeInvasiveStatus); + speciesLists, + includeConservationStatus, + includeInvasiveStatus, + includePresentInCountry); // output a link to each occurrence record we've matched by taxonID for (String occurrenceID : occurrenceIDs) { builder.setId(occurrenceID); diff --git a/livingatlas/pipelines/src/main/java/au/org/ala/pipelines/java/SpeciesListPipeline.java b/livingatlas/pipelines/src/main/java/au/org/ala/pipelines/java/SpeciesListPipeline.java index 0c4b7d7c81..cd4aa45d3f 100644 --- a/livingatlas/pipelines/src/main/java/au/org/ala/pipelines/java/SpeciesListPipeline.java +++ b/livingatlas/pipelines/src/main/java/au/org/ala/pipelines/java/SpeciesListPipeline.java @@ -137,7 +137,8 @@ public static Map generateTaxonProfileCollection( alaTaxonRecord, speciesListMap, options.getIncludeConservationStatus(), - options.getIncludeInvasiveStatus())) + options.getIncludeInvasiveStatus(), + options.getIncludePresentInCountry())) .collect(Collectors.toList()); return profiles.stream() @@ -152,7 +153,8 @@ static TaxonProfile convertToTaxonProfile( ALATaxonRecord alaTaxonRecord, Map> speciesListMap, boolean includeConservationStatus, - boolean includeInvasiveStatus) { + boolean includeInvasiveStatus, + boolean includePresentInCountry) { Iterable speciesLists = speciesListMap.get(alaTaxonRecord.getTaxonConceptID()); @@ -160,7 +162,10 @@ static TaxonProfile convertToTaxonProfile( if (speciesLists != null) { TaxonProfile.Builder builder = SpeciesListUtils.createTaxonProfileBuilder( - speciesLists, includeConservationStatus, includeInvasiveStatus); + speciesLists, + includeConservationStatus, + includeInvasiveStatus, + includePresentInCountry); builder.setId(alaTaxonRecord.getId()); return builder.build(); } else { diff --git a/livingatlas/pipelines/src/main/java/au/org/ala/pipelines/options/SpeciesLevelPipelineOptions.java b/livingatlas/pipelines/src/main/java/au/org/ala/pipelines/options/SpeciesLevelPipelineOptions.java index c3739b10db..2fe123f3f8 100644 --- a/livingatlas/pipelines/src/main/java/au/org/ala/pipelines/options/SpeciesLevelPipelineOptions.java +++ b/livingatlas/pipelines/src/main/java/au/org/ala/pipelines/options/SpeciesLevelPipelineOptions.java @@ -33,4 +33,9 @@ public interface SpeciesLevelPipelineOptions extends InterpretationPipelineOptio Boolean getIncludeInvasiveStatus(); void setIncludeInvasiveStatus(Boolean includeInvasiveStatus); + + @Default.Boolean(false) + Boolean getIncludePresentInCountry(); + + void setIncludePresentInCountry(Boolean includePresentInCountry); } diff --git a/livingatlas/pipelines/src/main/java/au/org/ala/pipelines/transforms/IndexFields.java b/livingatlas/pipelines/src/main/java/au/org/ala/pipelines/transforms/IndexFields.java index 7890ba0cf3..bf20cabc55 100644 --- a/livingatlas/pipelines/src/main/java/au/org/ala/pipelines/transforms/IndexFields.java +++ b/livingatlas/pipelines/src/main/java/au/org/ala/pipelines/transforms/IndexFields.java @@ -51,6 +51,7 @@ public interface IndexFields { String POINT_0_02 = "point-0.02"; String POINT_0_1 = "point-0.1"; String POINT_1 = "point-1"; + String PRESENT_IN_COUNTRY = "presentInCountry"; String PROVENANCE = "provenance"; String TAXON_RANK = "taxonRank"; String RAW_STATE_CONSERVATION = "raw_stateConservation"; diff --git a/livingatlas/pipelines/src/main/java/au/org/ala/pipelines/transforms/IndexRecordTransform.java b/livingatlas/pipelines/src/main/java/au/org/ala/pipelines/transforms/IndexRecordTransform.java index 5d3019de10..fb076d77f3 100644 --- a/livingatlas/pipelines/src/main/java/au/org/ala/pipelines/transforms/IndexRecordTransform.java +++ b/livingatlas/pipelines/src/main/java/au/org/ala/pipelines/transforms/IndexRecordTransform.java @@ -805,6 +805,11 @@ private static void addSpeciesListInfo( } } } + + // index presentInCountry + if (tpr.getPresentInCountry() != null) { + indexRecord.getStrings().put(PRESENT_IN_COUNTRY, tpr.getPresentInCountry()); + } } private static MultimediaIndexRecord convertToMultimediaRecord(String uuid, Image image) { diff --git a/livingatlas/pipelines/src/main/java/au/org/ala/pipelines/util/SpeciesListUtils.java b/livingatlas/pipelines/src/main/java/au/org/ala/pipelines/util/SpeciesListUtils.java index dc77c416a4..b98f57f0b3 100644 --- a/livingatlas/pipelines/src/main/java/au/org/ala/pipelines/util/SpeciesListUtils.java +++ b/livingatlas/pipelines/src/main/java/au/org/ala/pipelines/util/SpeciesListUtils.java @@ -18,13 +18,15 @@ public class SpeciesListUtils { public static TaxonProfile.Builder createTaxonProfileBuilder( Iterable speciesLists, boolean includeConservationStatus, - boolean includeInvasiveStatus) { + boolean includeInvasiveStatus, + boolean includePresentInCountry) { Iterator iter = speciesLists.iterator(); List speciesListIDs = new ArrayList<>(); List conservationStatusList = new ArrayList<>(); List invasiveStatusList = new ArrayList<>(); + String presentInCountryValue = null; while (iter.hasNext()) { @@ -48,6 +50,8 @@ public static TaxonProfile.Builder createTaxonProfileBuilder( .setSpeciesListID(speciesListRecord.getSpeciesListID()) .setRegion(speciesListRecord.getRegion()) .build()); + } else if (includePresentInCountry && speciesListRecord.getPresentInCountry() != null) { + presentInCountryValue = speciesListRecord.getPresentInCountry(); } } @@ -56,6 +60,7 @@ public static TaxonProfile.Builder createTaxonProfileBuilder( builder.setSpeciesListID(speciesListIDs); builder.setConservationStatuses(conservationStatusList); builder.setInvasiveStatuses(invasiveStatusList); + builder.setPresentInCountry(presentInCountryValue); return builder; } } diff --git a/livingatlas/pipelines/src/main/java/au/org/ala/specieslists/SpeciesList.java b/livingatlas/pipelines/src/main/java/au/org/ala/specieslists/SpeciesList.java index aa02bfc17b..237d61a1b0 100644 --- a/livingatlas/pipelines/src/main/java/au/org/ala/specieslists/SpeciesList.java +++ b/livingatlas/pipelines/src/main/java/au/org/ala/specieslists/SpeciesList.java @@ -26,6 +26,7 @@ public class SpeciesList { boolean isAuthoritative; boolean isInvasive; boolean isThreatened; + String presentInCountry; @JsonPOJOBuilder(withPrefix = "") @JsonIgnoreProperties(ignoreUnknown = true) diff --git a/livingatlas/pipelines/src/main/java/au/org/ala/specieslists/SpeciesListDownloader.java b/livingatlas/pipelines/src/main/java/au/org/ala/specieslists/SpeciesListDownloader.java index 54d26083b5..f83c5c5a9e 100644 --- a/livingatlas/pipelines/src/main/java/au/org/ala/specieslists/SpeciesListDownloader.java +++ b/livingatlas/pipelines/src/main/java/au/org/ala/specieslists/SpeciesListDownloader.java @@ -132,6 +132,7 @@ public static void run(SpeciesLevelPipelineOptions options) throws IOException { int guidIdx = columnHeaders.indexOf("guid"); int statusIdx = columnHeaders.indexOf("status"); int sourceStatusIdx = columnHeaders.indexOf("sourceStatus"); + int countIdx = columnHeaders.indexOf("count"); String region = null; @@ -164,6 +165,13 @@ public static void run(SpeciesLevelPipelineOptions options) throws IOException { String status = statusIdx > 0 ? currentLine[statusIdx] : null; String sourceStatus = sourceStatusIdx > 0 ? currentLine[sourceStatusIdx] : null; + String count = countIdx > 0 ? currentLine[countIdx] : null; + // ARGA addition to set `presentInCountry` when list is neither invasive nor + // threatened, has region set and contains a `count` column (note: count not used) + String presentInCountry = + (!list.isThreatened() && !list.isInvasive() && region != null && count != null) + ? region + : null; SpeciesListRecord speciesListRecord = SpeciesListRecord.newBuilder() @@ -174,6 +182,7 @@ public static void run(SpeciesLevelPipelineOptions options) throws IOException { .setIsInvasive(list.isInvasive()) .setIsThreatened(list.isThreatened()) .setSourceStatus(sourceStatus) + .setPresentInCountry(presentInCountry) .build(); dataFileWriter.append(speciesListRecord); taxaRead++; diff --git a/livingatlas/solr/conf/managed-schema b/livingatlas/solr/conf/managed-schema index d052b2e9b1..50bd13d59a 100644 --- a/livingatlas/solr/conf/managed-schema +++ b/livingatlas/solr/conf/managed-schema @@ -191,6 +191,9 @@ + + + diff --git a/sdks/models/src/main/avro/specific/species-list-record.avsc b/sdks/models/src/main/avro/specific/species-list-record.avsc index 44fa9e3418..bccb027f82 100644 --- a/sdks/models/src/main/avro/specific/species-list-record.avsc +++ b/sdks/models/src/main/avro/specific/species-list-record.avsc @@ -10,6 +10,7 @@ {"name": "isInvasive", "type": "boolean"}, {"name": "region", "type": ["null", "string"], "default": null }, {"name": "status", "type": ["null", "string"]}, - {"name": "sourceStatus", "type": ["null", "string"]} + {"name": "sourceStatus", "type": ["null", "string"]}, + {"name": "presentInCountry", "type": ["null", "string"]} ] } diff --git a/sdks/models/src/main/avro/specific/taxon-profile.avsc b/sdks/models/src/main/avro/specific/taxon-profile.avsc index 82eff188cd..dd740711f7 100644 --- a/sdks/models/src/main/avro/specific/taxon-profile.avsc +++ b/sdks/models/src/main/avro/specific/taxon-profile.avsc @@ -30,7 +30,8 @@ {"name": "id", "type": ["null", "string"]}, {"name": "speciesListID", "type": {"type" : "array", "items" : "string"}, "default" : []}, {"name": "conservationStatuses", "type": {"type" : "array", "items" : "ConservationStatus"}, "default" : []}, - {"name": "invasiveStatuses", "type": {"type" : "array", "items" : "InvasiveStatus"}, "default" : []} + {"name": "invasiveStatuses", "type": {"type" : "array", "items" : "InvasiveStatus"}, "default" : []}, + {"name": "presentInCountry", "type": ["null", "string"], "default" : null} ] } ] From 34ed7158fd0e28ce21c2e40902ed85af30ba4e87 Mon Sep 17 00:00:00 2001 From: Nick dos Remedios Date: Wed, 1 Mar 2023 12:49:27 +1100 Subject: [PATCH 02/14] ARGA-94 index traits from species lists --- livingatlas/configs/la-pipelines.yaml | 1 + .../pipelines/beam/SpeciesListPipeline.java | 6 ++++- .../pipelines/java/SpeciesListPipeline.java | 9 ++++--- .../options/SpeciesLevelPipelineOptions.java | 5 ++++ .../ala/pipelines/transforms/IndexFields.java | 8 +++++++ .../transforms/IndexRecordTransform.java | 24 +++++++++++++++++++ .../ala/pipelines/util/SpeciesListUtils.java | 15 ++++++++---- .../specieslists/SpeciesListDownloader.java | 7 ++++++ .../au/org/ala/specieslists/TraitType.java | 22 +++++++++++++++++ livingatlas/solr/conf/managed-schema | 2 ++ .../avro/specific/species-list-record.avsc | 5 +++- .../src/main/avro/specific/taxon-profile.avsc | 3 ++- 12 files changed, 97 insertions(+), 10 deletions(-) create mode 100644 livingatlas/pipelines/src/main/java/au/org/ala/specieslists/TraitType.java diff --git a/livingatlas/configs/la-pipelines.yaml b/livingatlas/configs/la-pipelines.yaml index 22aaa7cfa1..3a790a63a2 100644 --- a/livingatlas/configs/la-pipelines.yaml +++ b/livingatlas/configs/la-pipelines.yaml @@ -193,6 +193,7 @@ speciesLists: includeConservationStatus: true includeInvasiveStatus: true includePresentInCountry: false + includeTraits: false # Sampling specific configuration sampling: diff --git a/livingatlas/pipelines/src/main/java/au/org/ala/pipelines/beam/SpeciesListPipeline.java b/livingatlas/pipelines/src/main/java/au/org/ala/pipelines/beam/SpeciesListPipeline.java index 035683cfa1..9ac193ca31 100644 --- a/livingatlas/pipelines/src/main/java/au/org/ala/pipelines/beam/SpeciesListPipeline.java +++ b/livingatlas/pipelines/src/main/java/au/org/ala/pipelines/beam/SpeciesListPipeline.java @@ -36,6 +36,8 @@ *
  • Links to species lists for records *
  • stateProvince and country associated conservation status for the record *
  • stateProvince and country associated invasive status for the record + *
  • optional `presentInCountry` flag for the record + *
  • optional species `trait` values for the record * * * This pipeline is left for debug purposes only. Species lists are joined to the records in the @@ -150,6 +152,7 @@ public KV apply(KV record) { final boolean includeConservationStatus = options.getIncludeConservationStatus(); final boolean includeInvasiveStatus = options.getIncludeInvasiveStatus(); final boolean includePresentInCountry = options.getIncludePresentInCountry(); + final boolean includeTraits = options.getIncludeTraits(); // join collections return result.apply( @@ -171,7 +174,8 @@ public void processElement(ProcessContext c) { speciesLists, includeConservationStatus, includeInvasiveStatus, - includePresentInCountry); + includePresentInCountry, + includeTraits); // output a link to each occurrence record we've matched by taxonID for (String occurrenceID : occurrenceIDs) { builder.setId(occurrenceID); diff --git a/livingatlas/pipelines/src/main/java/au/org/ala/pipelines/java/SpeciesListPipeline.java b/livingatlas/pipelines/src/main/java/au/org/ala/pipelines/java/SpeciesListPipeline.java index cd4aa45d3f..3c5c8d3f66 100644 --- a/livingatlas/pipelines/src/main/java/au/org/ala/pipelines/java/SpeciesListPipeline.java +++ b/livingatlas/pipelines/src/main/java/au/org/ala/pipelines/java/SpeciesListPipeline.java @@ -138,7 +138,8 @@ public static Map generateTaxonProfileCollection( speciesListMap, options.getIncludeConservationStatus(), options.getIncludeInvasiveStatus(), - options.getIncludePresentInCountry())) + options.getIncludePresentInCountry(), + options.getIncludeTraits())) .collect(Collectors.toList()); return profiles.stream() @@ -154,7 +155,8 @@ static TaxonProfile convertToTaxonProfile( Map> speciesListMap, boolean includeConservationStatus, boolean includeInvasiveStatus, - boolean includePresentInCountry) { + boolean includePresentInCountry, + boolean includeTraits) { Iterable speciesLists = speciesListMap.get(alaTaxonRecord.getTaxonConceptID()); @@ -165,7 +167,8 @@ static TaxonProfile convertToTaxonProfile( speciesLists, includeConservationStatus, includeInvasiveStatus, - includePresentInCountry); + includePresentInCountry, + includeTraits); builder.setId(alaTaxonRecord.getId()); return builder.build(); } else { diff --git a/livingatlas/pipelines/src/main/java/au/org/ala/pipelines/options/SpeciesLevelPipelineOptions.java b/livingatlas/pipelines/src/main/java/au/org/ala/pipelines/options/SpeciesLevelPipelineOptions.java index 2fe123f3f8..0ddb88290c 100644 --- a/livingatlas/pipelines/src/main/java/au/org/ala/pipelines/options/SpeciesLevelPipelineOptions.java +++ b/livingatlas/pipelines/src/main/java/au/org/ala/pipelines/options/SpeciesLevelPipelineOptions.java @@ -38,4 +38,9 @@ public interface SpeciesLevelPipelineOptions extends InterpretationPipelineOptio Boolean getIncludePresentInCountry(); void setIncludePresentInCountry(Boolean includePresentInCountry); + + @Default.Boolean(false) + Boolean getIncludeTraits(); + + void setIncludeTraits(Boolean includeTraits); } diff --git a/livingatlas/pipelines/src/main/java/au/org/ala/pipelines/transforms/IndexFields.java b/livingatlas/pipelines/src/main/java/au/org/ala/pipelines/transforms/IndexFields.java index bf20cabc55..e764760efa 100644 --- a/livingatlas/pipelines/src/main/java/au/org/ala/pipelines/transforms/IndexFields.java +++ b/livingatlas/pipelines/src/main/java/au/org/ala/pipelines/transforms/IndexFields.java @@ -74,4 +74,12 @@ public interface IndexFields { String GGBN_TERMS_LOAN = "http://data.ggbn.org/schemas/ggbn/terms/Loan"; String LOAN_DESTINATION_TERM = "http://data.ggbn.org/schemas/ggbn/terms/loanDestination"; String LOAN_IDENTIFIER_TERM = "http://data.ggbn.org/schemas/ggbn/terms/loanIdentifier"; + String AUS_TRAITS_FIRE_RESPONSE = "fire_response"; + // String AUS_TRAITS_FIRE_RESPONSE = + // + // "https://traitecoevo.github.io/austraits.build/articles/trait_definitions.html#fire_response"; + String AUS_TRAITS_POST_FIRE_RECRUITMENT = "post_fire_recruitment"; + // String AUS_TRAITS_POST_FIRE_RECRUITMENT = + // + // "https://traitecoevo.github.io/austraits.build/articles/trait_definitions.html#post_fire_recruitment"; } diff --git a/livingatlas/pipelines/src/main/java/au/org/ala/pipelines/transforms/IndexRecordTransform.java b/livingatlas/pipelines/src/main/java/au/org/ala/pipelines/transforms/IndexRecordTransform.java index fb076d77f3..bfd50948b6 100644 --- a/livingatlas/pipelines/src/main/java/au/org/ala/pipelines/transforms/IndexRecordTransform.java +++ b/livingatlas/pipelines/src/main/java/au/org/ala/pipelines/transforms/IndexRecordTransform.java @@ -7,6 +7,7 @@ import au.org.ala.pipelines.common.SolrFieldSchema; import au.org.ala.pipelines.interpreters.SensitiveDataInterpreter; +import au.org.ala.specieslists.TraitType; import com.fasterxml.jackson.databind.ObjectMapper; import com.google.common.collect.ImmutableSet; import java.io.Serializable; @@ -810,6 +811,29 @@ private static void addSpeciesListInfo( if (tpr.getPresentInCountry() != null) { indexRecord.getStrings().put(PRESENT_IN_COUNTRY, tpr.getPresentInCountry()); } + + // traits from lists + Map traits = tpr.getTraits(); + for (Map.Entry trait : traits.entrySet()) { + if (trait.getKey() != null) { + Map traitMap = new HashMap<>(); + traitMap.put(trait.getKey(), trait.getValue()); + TraitType traitType = TraitType.valueOfLabel(trait.getKey()); + // keep a copy in dynamic fields until schema has been updated + indexRecord.setDynamicProperties(traitMap); + // Also add specific traits to dedicated fields + switch (Objects.requireNonNull(traitType)) { + case FIRE_RESPONSE: + addIfNotEmpty(indexRecord, AUS_TRAITS_FIRE_RESPONSE, trait.getValue()); + break; + case POST_FIRE_RECRUITMENT: + addIfNotEmpty(indexRecord, AUS_TRAITS_POST_FIRE_RECRUITMENT, trait.getValue()); + break; + // default: + // indexRecord.setDynamicProperties(traitMap); + } + } + } } private static MultimediaIndexRecord convertToMultimediaRecord(String uuid, Image image) { diff --git a/livingatlas/pipelines/src/main/java/au/org/ala/pipelines/util/SpeciesListUtils.java b/livingatlas/pipelines/src/main/java/au/org/ala/pipelines/util/SpeciesListUtils.java index b98f57f0b3..6d424c146c 100644 --- a/livingatlas/pipelines/src/main/java/au/org/ala/pipelines/util/SpeciesListUtils.java +++ b/livingatlas/pipelines/src/main/java/au/org/ala/pipelines/util/SpeciesListUtils.java @@ -1,9 +1,7 @@ package au.org.ala.pipelines.util; import com.google.common.base.Strings; -import java.util.ArrayList; -import java.util.Iterator; -import java.util.List; +import java.util.*; import lombok.AccessLevel; import lombok.NoArgsConstructor; import org.gbif.pipelines.io.avro.*; @@ -12,6 +10,8 @@ @NoArgsConstructor(access = AccessLevel.PRIVATE) public class SpeciesListUtils { + private static String LIST_COMMON_TRAIT = "COMMON_TRAIT"; + /** * Creates a reusable template (Builder) for a TaxonProfile based on the supplied species lists. */ @@ -19,7 +19,8 @@ public static TaxonProfile.Builder createTaxonProfileBuilder( Iterable speciesLists, boolean includeConservationStatus, boolean includeInvasiveStatus, - boolean includePresentInCountry) { + boolean includePresentInCountry, + boolean includeTraits) { Iterator iter = speciesLists.iterator(); @@ -27,6 +28,7 @@ public static TaxonProfile.Builder createTaxonProfileBuilder( List conservationStatusList = new ArrayList<>(); List invasiveStatusList = new ArrayList<>(); String presentInCountryValue = null; + Map traitsMap = new HashMap<>(); while (iter.hasNext()) { @@ -52,6 +54,10 @@ public static TaxonProfile.Builder createTaxonProfileBuilder( .build()); } else if (includePresentInCountry && speciesListRecord.getPresentInCountry() != null) { presentInCountryValue = speciesListRecord.getPresentInCountry(); + } else if (includeTraits + && speciesListRecord.getListType().equals(LIST_COMMON_TRAIT) + && speciesListRecord.getTraitName() != null) { + traitsMap.put(speciesListRecord.getTraitName(), speciesListRecord.getTraitValue()); } } @@ -61,6 +67,7 @@ public static TaxonProfile.Builder createTaxonProfileBuilder( builder.setConservationStatuses(conservationStatusList); builder.setInvasiveStatuses(invasiveStatusList); builder.setPresentInCountry(presentInCountryValue); + builder.setTraits(traitsMap); return builder; } } diff --git a/livingatlas/pipelines/src/main/java/au/org/ala/specieslists/SpeciesListDownloader.java b/livingatlas/pipelines/src/main/java/au/org/ala/specieslists/SpeciesListDownloader.java index f83c5c5a9e..ccf06a7986 100644 --- a/livingatlas/pipelines/src/main/java/au/org/ala/specieslists/SpeciesListDownloader.java +++ b/livingatlas/pipelines/src/main/java/au/org/ala/specieslists/SpeciesListDownloader.java @@ -133,6 +133,8 @@ public static void run(SpeciesLevelPipelineOptions options) throws IOException { int statusIdx = columnHeaders.indexOf("status"); int sourceStatusIdx = columnHeaders.indexOf("sourceStatus"); int countIdx = columnHeaders.indexOf("count"); + int traitNameIdx = columnHeaders.indexOf("traitName"); + int traitValueIdx = columnHeaders.indexOf("traitValue"); String region = null; @@ -166,6 +168,8 @@ public static void run(SpeciesLevelPipelineOptions options) throws IOException { String status = statusIdx > 0 ? currentLine[statusIdx] : null; String sourceStatus = sourceStatusIdx > 0 ? currentLine[sourceStatusIdx] : null; String count = countIdx > 0 ? currentLine[countIdx] : null; + String traitName = traitNameIdx > 0 ? currentLine[traitNameIdx] : null; + String traitValue = traitValueIdx > 0 ? currentLine[traitValueIdx] : null; // ARGA addition to set `presentInCountry` when list is neither invasive nor // threatened, has region set and contains a `count` column (note: count not used) String presentInCountry = @@ -179,10 +183,13 @@ public static void run(SpeciesLevelPipelineOptions options) throws IOException { .setSpeciesListID(list.getDataResourceUid()) .setStatus(status) .setRegion(region) + .setListType(list.getListType()) .setIsInvasive(list.isInvasive()) .setIsThreatened(list.isThreatened()) .setSourceStatus(sourceStatus) .setPresentInCountry(presentInCountry) + .setTraitName(traitName) + .setTraitValue(traitValue) .build(); dataFileWriter.append(speciesListRecord); taxaRead++; diff --git a/livingatlas/pipelines/src/main/java/au/org/ala/specieslists/TraitType.java b/livingatlas/pipelines/src/main/java/au/org/ala/specieslists/TraitType.java new file mode 100644 index 0000000000..43f93dd2cc --- /dev/null +++ b/livingatlas/pipelines/src/main/java/au/org/ala/specieslists/TraitType.java @@ -0,0 +1,22 @@ +package au.org.ala.specieslists; + +/** Used for AusTraits trait types */ +public enum TraitType { + FIRE_RESPONSE("fire_response"), + POST_FIRE_RECRUITMENT("post_fire_recruitment"); + + public final String label; + + private TraitType(String label) { + this.label = label; + } + + public static TraitType valueOfLabel(String label) { + for (TraitType e : values()) { + if (e.label.equals(label)) { + return e; + } + } + return null; + } +} diff --git a/livingatlas/solr/conf/managed-schema b/livingatlas/solr/conf/managed-schema index 50bd13d59a..5742235535 100644 --- a/livingatlas/solr/conf/managed-schema +++ b/livingatlas/solr/conf/managed-schema @@ -193,6 +193,8 @@ + + diff --git a/sdks/models/src/main/avro/specific/species-list-record.avsc b/sdks/models/src/main/avro/specific/species-list-record.avsc index bccb027f82..fc4949ca47 100644 --- a/sdks/models/src/main/avro/specific/species-list-record.avsc +++ b/sdks/models/src/main/avro/specific/species-list-record.avsc @@ -8,9 +8,12 @@ {"name": "speciesListID","type":"string"}, {"name": "isThreatened", "type": "boolean"}, {"name": "isInvasive", "type": "boolean"}, + {"name": "listType", "type": ["null", "string"], "default": null }, {"name": "region", "type": ["null", "string"], "default": null }, {"name": "status", "type": ["null", "string"]}, {"name": "sourceStatus", "type": ["null", "string"]}, - {"name": "presentInCountry", "type": ["null", "string"]} + {"name": "presentInCountry", "type": ["null", "string"], "default": null}, + {"name": "traitName", "type": ["null", "string"], "default": null}, + {"name": "traitValue", "type": ["null", "string"], "default": null} ] } diff --git a/sdks/models/src/main/avro/specific/taxon-profile.avsc b/sdks/models/src/main/avro/specific/taxon-profile.avsc index dd740711f7..d9e7d1cfd6 100644 --- a/sdks/models/src/main/avro/specific/taxon-profile.avsc +++ b/sdks/models/src/main/avro/specific/taxon-profile.avsc @@ -31,7 +31,8 @@ {"name": "speciesListID", "type": {"type" : "array", "items" : "string"}, "default" : []}, {"name": "conservationStatuses", "type": {"type" : "array", "items" : "ConservationStatus"}, "default" : []}, {"name": "invasiveStatuses", "type": {"type" : "array", "items" : "InvasiveStatus"}, "default" : []}, - {"name": "presentInCountry", "type": ["null", "string"], "default" : null} + {"name": "presentInCountry", "type": ["null", "string"], "default" : null}, + {"name": "traits", "type": {"type": "map", "values": "string"}, "default" : {}} ] } ] From debd6cff42e153666c6f68cc53416c8147c88f04 Mon Sep 17 00:00:00 2001 From: Nick dos Remedios Date: Wed, 1 Mar 2023 17:08:26 +1100 Subject: [PATCH 03/14] Array values in trait fields --- .../pipelines/transforms/IndexRecordTransform.java | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/livingatlas/pipelines/src/main/java/au/org/ala/pipelines/transforms/IndexRecordTransform.java b/livingatlas/pipelines/src/main/java/au/org/ala/pipelines/transforms/IndexRecordTransform.java index bfd50948b6..3fae8bb6b7 100644 --- a/livingatlas/pipelines/src/main/java/au/org/ala/pipelines/transforms/IndexRecordTransform.java +++ b/livingatlas/pipelines/src/main/java/au/org/ala/pipelines/transforms/IndexRecordTransform.java @@ -819,18 +819,19 @@ private static void addSpeciesListInfo( Map traitMap = new HashMap<>(); traitMap.put(trait.getKey(), trait.getValue()); TraitType traitType = TraitType.valueOfLabel(trait.getKey()); - // keep a copy in dynamic fields until schema has been updated - indexRecord.setDynamicProperties(traitMap); + // Dirty data has duplicate entries so use a Set first, to remove them + Set traitValuesSet = new HashSet<>(Arrays.asList(trait.getValue().split("\\|"))); + List traitValuesList = new ArrayList<>(traitValuesSet); // Also add specific traits to dedicated fields switch (Objects.requireNonNull(traitType)) { case FIRE_RESPONSE: - addIfNotEmpty(indexRecord, AUS_TRAITS_FIRE_RESPONSE, trait.getValue()); + addIfNotEmpty(indexRecord, AUS_TRAITS_FIRE_RESPONSE, traitValuesList); break; case POST_FIRE_RECRUITMENT: - addIfNotEmpty(indexRecord, AUS_TRAITS_POST_FIRE_RECRUITMENT, trait.getValue()); + addIfNotEmpty(indexRecord, AUS_TRAITS_POST_FIRE_RECRUITMENT, traitValuesList); break; - // default: - // indexRecord.setDynamicProperties(traitMap); + default: + indexRecord.setDynamicProperties(traitMap); } } } From f168945d0a93a84b6bb71fda282ad40a6b5d520b Mon Sep 17 00:00:00 2001 From: Nick dos Remedios Date: Thu, 2 Mar 2023 15:47:11 +1100 Subject: [PATCH 04/14] ARGA-94 Added photosynthetic_pathway trait Refactored code to remove references to trait name in IndexRecordTransform, so only need to add new traits to IndexFields and managed-schema going forward. --- .../ala/pipelines/transforms/IndexFields.java | 5 ++- .../transforms/IndexRecordTransform.java | 37 +++++++++++++------ .../au/org/ala/specieslists/TraitType.java | 22 ----------- livingatlas/solr/conf/managed-schema | 1 + 4 files changed, 29 insertions(+), 36 deletions(-) delete mode 100644 livingatlas/pipelines/src/main/java/au/org/ala/specieslists/TraitType.java diff --git a/livingatlas/pipelines/src/main/java/au/org/ala/pipelines/transforms/IndexFields.java b/livingatlas/pipelines/src/main/java/au/org/ala/pipelines/transforms/IndexFields.java index e764760efa..ab0ead3f9b 100644 --- a/livingatlas/pipelines/src/main/java/au/org/ala/pipelines/transforms/IndexFields.java +++ b/livingatlas/pipelines/src/main/java/au/org/ala/pipelines/transforms/IndexFields.java @@ -76,10 +76,11 @@ public interface IndexFields { String LOAN_IDENTIFIER_TERM = "http://data.ggbn.org/schemas/ggbn/terms/loanIdentifier"; String AUS_TRAITS_FIRE_RESPONSE = "fire_response"; // String AUS_TRAITS_FIRE_RESPONSE = - // // "https://traitecoevo.github.io/austraits.build/articles/trait_definitions.html#fire_response"; String AUS_TRAITS_POST_FIRE_RECRUITMENT = "post_fire_recruitment"; // String AUS_TRAITS_POST_FIRE_RECRUITMENT = - // // "https://traitecoevo.github.io/austraits.build/articles/trait_definitions.html#post_fire_recruitment"; + String AUS_TRAITS_PHOTOSYNTHETIC_PATHWAY = "photosynthetic_pathway"; + // String AUS_TRAITS_PHOTOSYNTHETIC_PATHWAY = + // "https://traitecoevo.github.io/austraits.build/articles/trait_definitions.html#photosynthetic_pathway"; } diff --git a/livingatlas/pipelines/src/main/java/au/org/ala/pipelines/transforms/IndexRecordTransform.java b/livingatlas/pipelines/src/main/java/au/org/ala/pipelines/transforms/IndexRecordTransform.java index 3fae8bb6b7..32ed79ab52 100644 --- a/livingatlas/pipelines/src/main/java/au/org/ala/pipelines/transforms/IndexRecordTransform.java +++ b/livingatlas/pipelines/src/main/java/au/org/ala/pipelines/transforms/IndexRecordTransform.java @@ -7,7 +7,6 @@ import au.org.ala.pipelines.common.SolrFieldSchema; import au.org.ala.pipelines.interpreters.SensitiveDataInterpreter; -import au.org.ala.specieslists.TraitType; import com.fasterxml.jackson.databind.ObjectMapper; import com.google.common.collect.ImmutableSet; import java.io.Serializable; @@ -816,22 +815,36 @@ private static void addSpeciesListInfo( Map traits = tpr.getTraits(); for (Map.Entry trait : traits.entrySet()) { if (trait.getKey() != null) { + // save to a for dynamic-properties fallback Map traitMap = new HashMap<>(); traitMap.put(trait.getKey(), trait.getValue()); - TraitType traitType = TraitType.valueOfLabel(trait.getKey()); + // check if traitName is declared as a value in @au.org.ala.pipelines.transforms.IndexFields + java.lang.reflect.Field[] fields = IndexFields.class.getDeclaredFields(); + boolean isTraitInDecalredFields = false; + // Check each field value to see if it matches the current trait name + for (java.lang.reflect.Field f : fields) { + String strValue = null; + try { + strValue = (String) f.get(null); + } catch (IllegalAccessException e) { + log.error( + "addSpeciesListInfo() - error getting value for field: " + + f.getName()); + // Don't throw an exception - failover to next speciesList + } + if (strValue.equals(trait.getKey())) { + isTraitInDecalredFields = true; + break; + } + } // Dirty data has duplicate entries so use a Set first, to remove them Set traitValuesSet = new HashSet<>(Arrays.asList(trait.getValue().split("\\|"))); List traitValuesList = new ArrayList<>(traitValuesSet); - // Also add specific traits to dedicated fields - switch (Objects.requireNonNull(traitType)) { - case FIRE_RESPONSE: - addIfNotEmpty(indexRecord, AUS_TRAITS_FIRE_RESPONSE, traitValuesList); - break; - case POST_FIRE_RECRUITMENT: - addIfNotEmpty(indexRecord, AUS_TRAITS_POST_FIRE_RECRUITMENT, traitValuesList); - break; - default: - indexRecord.setDynamicProperties(traitMap); + // add to indexedRecord either as multivalues or dynamicProperties + if (isTraitInDecalredFields) { + addIfNotEmpty(indexRecord, trait.getKey(), traitValuesList); + } else { + indexRecord.setDynamicProperties(traitMap); } } } diff --git a/livingatlas/pipelines/src/main/java/au/org/ala/specieslists/TraitType.java b/livingatlas/pipelines/src/main/java/au/org/ala/specieslists/TraitType.java deleted file mode 100644 index 43f93dd2cc..0000000000 --- a/livingatlas/pipelines/src/main/java/au/org/ala/specieslists/TraitType.java +++ /dev/null @@ -1,22 +0,0 @@ -package au.org.ala.specieslists; - -/** Used for AusTraits trait types */ -public enum TraitType { - FIRE_RESPONSE("fire_response"), - POST_FIRE_RECRUITMENT("post_fire_recruitment"); - - public final String label; - - private TraitType(String label) { - this.label = label; - } - - public static TraitType valueOfLabel(String label) { - for (TraitType e : values()) { - if (e.label.equals(label)) { - return e; - } - } - return null; - } -} diff --git a/livingatlas/solr/conf/managed-schema b/livingatlas/solr/conf/managed-schema index 5742235535..993c117220 100644 --- a/livingatlas/solr/conf/managed-schema +++ b/livingatlas/solr/conf/managed-schema @@ -195,6 +195,7 @@ + From 1d951e032669a484fb50e24a4b76db839708bb3f Mon Sep 17 00:00:00 2001 From: Nick dos Remedios Date: Thu, 2 Mar 2023 16:22:28 +1100 Subject: [PATCH 05/14] ARGA-94 Fixed typo in variable name Changed error to warning and a few other minor changes --- .../transforms/IndexRecordTransform.java | 22 ++++++++++--------- 1 file changed, 12 insertions(+), 10 deletions(-) diff --git a/livingatlas/pipelines/src/main/java/au/org/ala/pipelines/transforms/IndexRecordTransform.java b/livingatlas/pipelines/src/main/java/au/org/ala/pipelines/transforms/IndexRecordTransform.java index 32ed79ab52..0880d90aa7 100644 --- a/livingatlas/pipelines/src/main/java/au/org/ala/pipelines/transforms/IndexRecordTransform.java +++ b/livingatlas/pipelines/src/main/java/au/org/ala/pipelines/transforms/IndexRecordTransform.java @@ -811,7 +811,7 @@ private static void addSpeciesListInfo( indexRecord.getStrings().put(PRESENT_IN_COUNTRY, tpr.getPresentInCountry()); } - // traits from lists + // taxon-level traits from speciesLists Map traits = tpr.getTraits(); for (Map.Entry trait : traits.entrySet()) { if (trait.getKey() != null) { @@ -820,28 +820,30 @@ private static void addSpeciesListInfo( traitMap.put(trait.getKey(), trait.getValue()); // check if traitName is declared as a value in @au.org.ala.pipelines.transforms.IndexFields java.lang.reflect.Field[] fields = IndexFields.class.getDeclaredFields(); - boolean isTraitInDecalredFields = false; + boolean isTraitInDeclaredFields = false; // Check each field value to see if it matches the current trait name for (java.lang.reflect.Field f : fields) { String strValue = null; try { strValue = (String) f.get(null); } catch (IllegalAccessException e) { - log.error( - "addSpeciesListInfo() - error getting value for field: " - + f.getName()); - // Don't throw an exception - failover to next speciesList + // Don't throw an exception - log.warn and failover to next speciesList + log.warn( + "addSpeciesListInfo() - failed to get value for field: " + + f.getName() + + ", with exception: " + + e.getMessage()); } if (strValue.equals(trait.getKey())) { - isTraitInDecalredFields = true; + isTraitInDeclaredFields = true; break; } } - // Dirty data has duplicate entries so use a Set first, to remove them + // Dirty data has duplicate entries process via a Set first Set traitValuesSet = new HashSet<>(Arrays.asList(trait.getValue().split("\\|"))); List traitValuesList = new ArrayList<>(traitValuesSet); - // add to indexedRecord either as multivalues or dynamicProperties - if (isTraitInDecalredFields) { + // Add to indexedRecord either as multivalues or dynamicProperties + if (isTraitInDeclaredFields) { addIfNotEmpty(indexRecord, trait.getKey(), traitValuesList); } else { indexRecord.setDynamicProperties(traitMap); From dd4924cd4012cb967907479b124f934b9762475e Mon Sep 17 00:00:00 2001 From: Nick dos Remedios Date: Wed, 8 Mar 2023 15:48:38 +1100 Subject: [PATCH 06/14] Minor change to `presentInCountry` code --- .../au/org/ala/specieslists/SpeciesListDownloader.java | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/livingatlas/pipelines/src/main/java/au/org/ala/specieslists/SpeciesListDownloader.java b/livingatlas/pipelines/src/main/java/au/org/ala/specieslists/SpeciesListDownloader.java index ccf06a7986..22a3b99883 100644 --- a/livingatlas/pipelines/src/main/java/au/org/ala/specieslists/SpeciesListDownloader.java +++ b/livingatlas/pipelines/src/main/java/au/org/ala/specieslists/SpeciesListDownloader.java @@ -170,10 +170,11 @@ public static void run(SpeciesLevelPipelineOptions options) throws IOException { String count = countIdx > 0 ? currentLine[countIdx] : null; String traitName = traitNameIdx > 0 ? currentLine[traitNameIdx] : null; String traitValue = traitValueIdx > 0 ? currentLine[traitValueIdx] : null; - // ARGA addition to set `presentInCountry` when list is neither invasive nor - // threatened, has region set and contains a `count` column (note: count not used) + // ARGA addition to set `presentInCountry` to the value specified in the list's + // `region` attribute, when list has type "OTHER", has region set and + // contains a `count` column (note: count not currently used) String presentInCountry = - (!list.isThreatened() && !list.isInvasive() && region != null && count != null) + (list.getListType().equals("OTHER") && region != null && count != null) ? region : null; From 46e600eed1fa127f5b948af9c7b435dd35e32b02 Mon Sep 17 00:00:00 2001 From: Nick dos Remedios Date: Thu, 9 Mar 2023 09:45:52 +1100 Subject: [PATCH 07/14] Removed commented-out lines from IndexFields.java --- .../java/au/org/ala/pipelines/transforms/IndexFields.java | 6 ------ 1 file changed, 6 deletions(-) diff --git a/livingatlas/pipelines/src/main/java/au/org/ala/pipelines/transforms/IndexFields.java b/livingatlas/pipelines/src/main/java/au/org/ala/pipelines/transforms/IndexFields.java index ab0ead3f9b..d8596f589e 100644 --- a/livingatlas/pipelines/src/main/java/au/org/ala/pipelines/transforms/IndexFields.java +++ b/livingatlas/pipelines/src/main/java/au/org/ala/pipelines/transforms/IndexFields.java @@ -75,12 +75,6 @@ public interface IndexFields { String LOAN_DESTINATION_TERM = "http://data.ggbn.org/schemas/ggbn/terms/loanDestination"; String LOAN_IDENTIFIER_TERM = "http://data.ggbn.org/schemas/ggbn/terms/loanIdentifier"; String AUS_TRAITS_FIRE_RESPONSE = "fire_response"; - // String AUS_TRAITS_FIRE_RESPONSE = - // "https://traitecoevo.github.io/austraits.build/articles/trait_definitions.html#fire_response"; String AUS_TRAITS_POST_FIRE_RECRUITMENT = "post_fire_recruitment"; - // String AUS_TRAITS_POST_FIRE_RECRUITMENT = - // "https://traitecoevo.github.io/austraits.build/articles/trait_definitions.html#post_fire_recruitment"; String AUS_TRAITS_PHOTOSYNTHETIC_PATHWAY = "photosynthetic_pathway"; - // String AUS_TRAITS_PHOTOSYNTHETIC_PATHWAY = - // "https://traitecoevo.github.io/austraits.build/articles/trait_definitions.html#photosynthetic_pathway"; } From f793ad0b89985a352a22e0234d5c82bdc88f8c63 Mon Sep 17 00:00:00 2001 From: Nick dos Remedios Date: Tue, 14 Mar 2023 12:07:38 +1100 Subject: [PATCH 08/14] Fix build errors for tests on ARM Macs --- examples/transform/pom.xml | 6 ++++++ gbif/coordinator/tasks/pom.xml | 10 ++++++++++ gbif/ingestion/ingest-gbif-beam/pom.xml | 6 ++++++ gbif/ingestion/ingest-gbif-java/pom.xml | 6 ++++++ .../ingestion-integration-tests/pom.xml | 6 ++++++ gbif/validator/validator-core/pom.xml | 12 ++++++++++++ livingatlas/migration/pom.xml | 12 ++++++++++++ pom.xml | 16 ++++++++++++++++ sdks/beam-transforms/pom.xml | 6 ++++++ sdks/core/pom.xml | 6 ++++++ sdks/models/pom.xml | 6 ++++++ .../pom.xml | 6 ++++++ sdks/tools/archives-converters/pom.xml | 6 ++++++ 13 files changed, 104 insertions(+) diff --git a/examples/transform/pom.xml b/examples/transform/pom.xml index e11ae19f82..6f7b10ba21 100644 --- a/examples/transform/pom.xml +++ b/examples/transform/pom.xml @@ -129,6 +129,12 @@ org.apache.avro avro + + + org.xerial.snappy + snappy-java + + diff --git a/gbif/coordinator/tasks/pom.xml b/gbif/coordinator/tasks/pom.xml index 538d3ff2b4..5fab92f981 100644 --- a/gbif/coordinator/tasks/pom.xml +++ b/gbif/coordinator/tasks/pom.xml @@ -288,6 +288,10 @@ org.elasticsearch elasticsearch-cli + + org.elasticsearch + jna + @@ -427,6 +431,12 @@ org.apache.avro avro + + + org.xerial.snappy + snappy-java + + diff --git a/gbif/ingestion/ingest-gbif-beam/pom.xml b/gbif/ingestion/ingest-gbif-beam/pom.xml index 9f44d4d75b..731505008d 100644 --- a/gbif/ingestion/ingest-gbif-beam/pom.xml +++ b/gbif/ingestion/ingest-gbif-beam/pom.xml @@ -148,6 +148,12 @@ org.apache.avro avro + + + org.xerial.snappy + snappy-java + + diff --git a/gbif/ingestion/ingest-gbif-java/pom.xml b/gbif/ingestion/ingest-gbif-java/pom.xml index 57cbf35ee0..3992c3a856 100644 --- a/gbif/ingestion/ingest-gbif-java/pom.xml +++ b/gbif/ingestion/ingest-gbif-java/pom.xml @@ -42,6 +42,12 @@ org.elasticsearch.client elasticsearch-rest-high-level-client + + + org.elasticsearch + jna + + diff --git a/gbif/ingestion/ingestion-integration-tests/pom.xml b/gbif/ingestion/ingestion-integration-tests/pom.xml index fcf76c2bbc..c0c59f85a6 100644 --- a/gbif/ingestion/ingestion-integration-tests/pom.xml +++ b/gbif/ingestion/ingestion-integration-tests/pom.xml @@ -153,6 +153,12 @@ org.apache.spark spark-core_2.11 test + + + org.xerial.snappy + snappy-java + + org.apache.spark diff --git a/gbif/validator/validator-core/pom.xml b/gbif/validator/validator-core/pom.xml index fac91e8ee7..650914890f 100644 --- a/gbif/validator/validator-core/pom.xml +++ b/gbif/validator/validator-core/pom.xml @@ -94,10 +94,22 @@ org.elasticsearch.client elasticsearch-rest-high-level-client + + + org.elasticsearch + jna + + org.elasticsearch elasticsearch + + + org.elasticsearch + jna + + diff --git a/livingatlas/migration/pom.xml b/livingatlas/migration/pom.xml index 4cf3eec1fb..9e8634099e 100644 --- a/livingatlas/migration/pom.xml +++ b/livingatlas/migration/pom.xml @@ -42,6 +42,12 @@ spark-core_2.11 ${spark.version} compile + + + org.xerial.snappy + snappy-java + + org.apache.spark @@ -53,6 +59,12 @@ org.apache.avro avro + + + org.xerial.snappy + snappy-java + + com.beust diff --git a/pom.xml b/pom.xml index aa5458329a..3dd133ed63 100644 --- a/pom.xml +++ b/pom.xml @@ -536,6 +536,12 @@ org.apache.avro avro + + + org.xerial.snappy + snappy-java + + ${avro.version} @@ -741,6 +747,12 @@ org.elasticsearch.client elasticsearch-rest-high-level-client ${elasticsearch.version} + + + org.elasticsearch + jna + + org.elasticsearch @@ -870,6 +882,10 @@ log4j log4j + + org.xerial.snappy + snappy-java + org.slf4j slf4j-log4j12 diff --git a/sdks/beam-transforms/pom.xml b/sdks/beam-transforms/pom.xml index c2a571e79c..341daa7658 100644 --- a/sdks/beam-transforms/pom.xml +++ b/sdks/beam-transforms/pom.xml @@ -45,6 +45,12 @@ org.apache.avro avro + + + org.xerial.snappy + snappy-java + + diff --git a/sdks/core/pom.xml b/sdks/core/pom.xml index daff43f9f1..293a0ec813 100644 --- a/sdks/core/pom.xml +++ b/sdks/core/pom.xml @@ -192,6 +192,12 @@ org.apache.avro avro + + + org.xerial.snappy + snappy-java + + diff --git a/sdks/models/pom.xml b/sdks/models/pom.xml index c4ae09a58d..2120add57a 100644 --- a/sdks/models/pom.xml +++ b/sdks/models/pom.xml @@ -137,6 +137,12 @@ org.apache.avro avro + + + org.xerial.snappy + snappy-java + + org.apache.beam diff --git a/sdks/plugins/maven-extension-avsc-schema-generator/pom.xml b/sdks/plugins/maven-extension-avsc-schema-generator/pom.xml index 74a4627abe..4b8cc6b63a 100644 --- a/sdks/plugins/maven-extension-avsc-schema-generator/pom.xml +++ b/sdks/plugins/maven-extension-avsc-schema-generator/pom.xml @@ -54,6 +54,12 @@ org.apache.avro avro + + + org.xerial.snappy + snappy-java + + diff --git a/sdks/tools/archives-converters/pom.xml b/sdks/tools/archives-converters/pom.xml index a670378a4d..78fe98a1f4 100644 --- a/sdks/tools/archives-converters/pom.xml +++ b/sdks/tools/archives-converters/pom.xml @@ -71,6 +71,12 @@ org.apache.avro avro + + + org.xerial.snappy + snappy-java + + com.google.code.findbugs From 6a103157f8e3f34063faea38a7e4e5737f2e1986 Mon Sep 17 00:00:00 2001 From: Nick dos Remedios Date: Tue, 28 Mar 2023 10:53:45 +1100 Subject: [PATCH 09/14] Updated field name: `presentInCountry` -> `taxonPresentInCountry` Based on feedback from Dave M. This avoids confusion with the DwC term `country`. --- livingatlas/configs/la-pipelines.yaml | 2 +- .../org/ala/pipelines/beam/SpeciesListPipeline.java | 6 +++--- .../org/ala/pipelines/java/SpeciesListPipeline.java | 6 +++--- .../options/SpeciesLevelPipelineOptions.java | 4 ++-- .../au/org/ala/pipelines/transforms/IndexFields.java | 2 +- .../pipelines/transforms/IndexRecordTransform.java | 6 +++--- .../au/org/ala/pipelines/util/SpeciesListUtils.java | 11 ++++++----- .../java/au/org/ala/specieslists/SpeciesList.java | 2 +- .../org/ala/specieslists/SpeciesListDownloader.java | 6 +++--- livingatlas/solr/conf/managed-schema | 2 +- .../src/main/avro/specific/species-list-record.avsc | 2 +- sdks/models/src/main/avro/specific/taxon-profile.avsc | 2 +- 12 files changed, 26 insertions(+), 25 deletions(-) diff --git a/livingatlas/configs/la-pipelines.yaml b/livingatlas/configs/la-pipelines.yaml index 7781986521..e40ca54295 100644 --- a/livingatlas/configs/la-pipelines.yaml +++ b/livingatlas/configs/la-pipelines.yaml @@ -192,7 +192,7 @@ speciesLists: maxDownloadAgeInMinutes: 1440 includeConservationStatus: true includeInvasiveStatus: true - includePresentInCountry: false + includeTaxonPresentInCountry: false includeTraits: false # Sampling specific configuration diff --git a/livingatlas/pipelines/src/main/java/au/org/ala/pipelines/beam/SpeciesListPipeline.java b/livingatlas/pipelines/src/main/java/au/org/ala/pipelines/beam/SpeciesListPipeline.java index b2da54142f..c33cb124f5 100644 --- a/livingatlas/pipelines/src/main/java/au/org/ala/pipelines/beam/SpeciesListPipeline.java +++ b/livingatlas/pipelines/src/main/java/au/org/ala/pipelines/beam/SpeciesListPipeline.java @@ -36,7 +36,7 @@ *
  • Links to species lists for records *
  • stateProvince and country associated conservation status for the record *
  • stateProvince and country associated invasive status for the record - *
  • optional `presentInCountry` flag for the record + *
  • optional `taxonPresentInCountry` flag for the record *
  • optional species `trait` values for the record * * @@ -151,7 +151,7 @@ public KV apply(KV record) { final boolean includeConservationStatus = options.getIncludeConservationStatus(); final boolean includeInvasiveStatus = options.getIncludeInvasiveStatus(); - final boolean includePresentInCountry = options.getIncludePresentInCountry(); + final boolean includeTaxonPresentInCountry = options.getIncludeTaxonPresentInCountry(); final boolean includeTraits = options.getIncludeTraits(); // join collections @@ -174,7 +174,7 @@ public void processElement(ProcessContext c) { speciesLists, includeConservationStatus, includeInvasiveStatus, - includePresentInCountry, + includeTaxonPresentInCountry, includeTraits); // output a link to each occurrence record we've matched by taxonID for (String occurrenceID : occurrenceIDs) { diff --git a/livingatlas/pipelines/src/main/java/au/org/ala/pipelines/java/SpeciesListPipeline.java b/livingatlas/pipelines/src/main/java/au/org/ala/pipelines/java/SpeciesListPipeline.java index 3c5c8d3f66..fa350f52da 100644 --- a/livingatlas/pipelines/src/main/java/au/org/ala/pipelines/java/SpeciesListPipeline.java +++ b/livingatlas/pipelines/src/main/java/au/org/ala/pipelines/java/SpeciesListPipeline.java @@ -138,7 +138,7 @@ public static Map generateTaxonProfileCollection( speciesListMap, options.getIncludeConservationStatus(), options.getIncludeInvasiveStatus(), - options.getIncludePresentInCountry(), + options.getIncludeTaxonPresentInCountry(), options.getIncludeTraits())) .collect(Collectors.toList()); @@ -155,7 +155,7 @@ static TaxonProfile convertToTaxonProfile( Map> speciesListMap, boolean includeConservationStatus, boolean includeInvasiveStatus, - boolean includePresentInCountry, + boolean includeTaxonPresentInCountry, boolean includeTraits) { Iterable speciesLists = @@ -167,7 +167,7 @@ static TaxonProfile convertToTaxonProfile( speciesLists, includeConservationStatus, includeInvasiveStatus, - includePresentInCountry, + includeTaxonPresentInCountry, includeTraits); builder.setId(alaTaxonRecord.getId()); return builder.build(); diff --git a/livingatlas/pipelines/src/main/java/au/org/ala/pipelines/options/SpeciesLevelPipelineOptions.java b/livingatlas/pipelines/src/main/java/au/org/ala/pipelines/options/SpeciesLevelPipelineOptions.java index 0ddb88290c..2440fcfb90 100644 --- a/livingatlas/pipelines/src/main/java/au/org/ala/pipelines/options/SpeciesLevelPipelineOptions.java +++ b/livingatlas/pipelines/src/main/java/au/org/ala/pipelines/options/SpeciesLevelPipelineOptions.java @@ -35,9 +35,9 @@ public interface SpeciesLevelPipelineOptions extends InterpretationPipelineOptio void setIncludeInvasiveStatus(Boolean includeInvasiveStatus); @Default.Boolean(false) - Boolean getIncludePresentInCountry(); + Boolean getIncludeTaxonPresentInCountry(); - void setIncludePresentInCountry(Boolean includePresentInCountry); + void setIncludeTaxonPresentInCountry(Boolean includeTaxonPresentInCountry); @Default.Boolean(false) Boolean getIncludeTraits(); diff --git a/livingatlas/pipelines/src/main/java/au/org/ala/pipelines/transforms/IndexFields.java b/livingatlas/pipelines/src/main/java/au/org/ala/pipelines/transforms/IndexFields.java index d8596f589e..59b6f66989 100644 --- a/livingatlas/pipelines/src/main/java/au/org/ala/pipelines/transforms/IndexFields.java +++ b/livingatlas/pipelines/src/main/java/au/org/ala/pipelines/transforms/IndexFields.java @@ -51,7 +51,7 @@ public interface IndexFields { String POINT_0_02 = "point-0.02"; String POINT_0_1 = "point-0.1"; String POINT_1 = "point-1"; - String PRESENT_IN_COUNTRY = "presentInCountry"; + String TAXON_PRESENT_IN_COUNTRY = "taxonPresentInCountry"; String PROVENANCE = "provenance"; String TAXON_RANK = "taxonRank"; String RAW_STATE_CONSERVATION = "raw_stateConservation"; diff --git a/livingatlas/pipelines/src/main/java/au/org/ala/pipelines/transforms/IndexRecordTransform.java b/livingatlas/pipelines/src/main/java/au/org/ala/pipelines/transforms/IndexRecordTransform.java index 0880d90aa7..9c555e00d6 100644 --- a/livingatlas/pipelines/src/main/java/au/org/ala/pipelines/transforms/IndexRecordTransform.java +++ b/livingatlas/pipelines/src/main/java/au/org/ala/pipelines/transforms/IndexRecordTransform.java @@ -806,9 +806,9 @@ private static void addSpeciesListInfo( } } - // index presentInCountry - if (tpr.getPresentInCountry() != null) { - indexRecord.getStrings().put(PRESENT_IN_COUNTRY, tpr.getPresentInCountry()); + // index taxonPresentInCountry + if (tpr.getTaxonPresentInCountry() != null) { + indexRecord.getStrings().put(TAXON_PRESENT_IN_COUNTRY, tpr.getTaxonPresentInCountry()); } // taxon-level traits from speciesLists diff --git a/livingatlas/pipelines/src/main/java/au/org/ala/pipelines/util/SpeciesListUtils.java b/livingatlas/pipelines/src/main/java/au/org/ala/pipelines/util/SpeciesListUtils.java index 6d424c146c..608e4d4253 100644 --- a/livingatlas/pipelines/src/main/java/au/org/ala/pipelines/util/SpeciesListUtils.java +++ b/livingatlas/pipelines/src/main/java/au/org/ala/pipelines/util/SpeciesListUtils.java @@ -19,7 +19,7 @@ public static TaxonProfile.Builder createTaxonProfileBuilder( Iterable speciesLists, boolean includeConservationStatus, boolean includeInvasiveStatus, - boolean includePresentInCountry, + boolean includeTaxonPresentInCountry, boolean includeTraits) { Iterator iter = speciesLists.iterator(); @@ -27,7 +27,7 @@ public static TaxonProfile.Builder createTaxonProfileBuilder( List speciesListIDs = new ArrayList<>(); List conservationStatusList = new ArrayList<>(); List invasiveStatusList = new ArrayList<>(); - String presentInCountryValue = null; + String taxonPresentInCountryValue = null; Map traitsMap = new HashMap<>(); while (iter.hasNext()) { @@ -52,8 +52,9 @@ public static TaxonProfile.Builder createTaxonProfileBuilder( .setSpeciesListID(speciesListRecord.getSpeciesListID()) .setRegion(speciesListRecord.getRegion()) .build()); - } else if (includePresentInCountry && speciesListRecord.getPresentInCountry() != null) { - presentInCountryValue = speciesListRecord.getPresentInCountry(); + } else if (includeTaxonPresentInCountry + && speciesListRecord.getTaxonPresentInCountry() != null) { + taxonPresentInCountryValue = speciesListRecord.getTaxonPresentInCountry(); } else if (includeTraits && speciesListRecord.getListType().equals(LIST_COMMON_TRAIT) && speciesListRecord.getTraitName() != null) { @@ -66,7 +67,7 @@ public static TaxonProfile.Builder createTaxonProfileBuilder( builder.setSpeciesListID(speciesListIDs); builder.setConservationStatuses(conservationStatusList); builder.setInvasiveStatuses(invasiveStatusList); - builder.setPresentInCountry(presentInCountryValue); + builder.setTaxonPresentInCountry(taxonPresentInCountryValue); builder.setTraits(traitsMap); return builder; } diff --git a/livingatlas/pipelines/src/main/java/au/org/ala/specieslists/SpeciesList.java b/livingatlas/pipelines/src/main/java/au/org/ala/specieslists/SpeciesList.java index 237d61a1b0..c9319381a0 100644 --- a/livingatlas/pipelines/src/main/java/au/org/ala/specieslists/SpeciesList.java +++ b/livingatlas/pipelines/src/main/java/au/org/ala/specieslists/SpeciesList.java @@ -26,7 +26,7 @@ public class SpeciesList { boolean isAuthoritative; boolean isInvasive; boolean isThreatened; - String presentInCountry; + String taxonPresentInCountry; @JsonPOJOBuilder(withPrefix = "") @JsonIgnoreProperties(ignoreUnknown = true) diff --git a/livingatlas/pipelines/src/main/java/au/org/ala/specieslists/SpeciesListDownloader.java b/livingatlas/pipelines/src/main/java/au/org/ala/specieslists/SpeciesListDownloader.java index 22a3b99883..410cc77f25 100644 --- a/livingatlas/pipelines/src/main/java/au/org/ala/specieslists/SpeciesListDownloader.java +++ b/livingatlas/pipelines/src/main/java/au/org/ala/specieslists/SpeciesListDownloader.java @@ -170,10 +170,10 @@ public static void run(SpeciesLevelPipelineOptions options) throws IOException { String count = countIdx > 0 ? currentLine[countIdx] : null; String traitName = traitNameIdx > 0 ? currentLine[traitNameIdx] : null; String traitValue = traitValueIdx > 0 ? currentLine[traitValueIdx] : null; - // ARGA addition to set `presentInCountry` to the value specified in the list's + // ARGA addition to set `taxonPresentInCountry` to the value specified in the list's // `region` attribute, when list has type "OTHER", has region set and // contains a `count` column (note: count not currently used) - String presentInCountry = + String taxonPresentInCountry = (list.getListType().equals("OTHER") && region != null && count != null) ? region : null; @@ -188,7 +188,7 @@ public static void run(SpeciesLevelPipelineOptions options) throws IOException { .setIsInvasive(list.isInvasive()) .setIsThreatened(list.isThreatened()) .setSourceStatus(sourceStatus) - .setPresentInCountry(presentInCountry) + .setTaxonPresentInCountry(taxonPresentInCountry) .setTraitName(traitName) .setTraitValue(traitValue) .build(); diff --git a/livingatlas/solr/conf/managed-schema b/livingatlas/solr/conf/managed-schema index 993c117220..ef2cf5c6ce 100644 --- a/livingatlas/solr/conf/managed-schema +++ b/livingatlas/solr/conf/managed-schema @@ -192,7 +192,7 @@ - + diff --git a/sdks/models/src/main/avro/specific/species-list-record.avsc b/sdks/models/src/main/avro/specific/species-list-record.avsc index fc4949ca47..12930ba768 100644 --- a/sdks/models/src/main/avro/specific/species-list-record.avsc +++ b/sdks/models/src/main/avro/specific/species-list-record.avsc @@ -12,7 +12,7 @@ {"name": "region", "type": ["null", "string"], "default": null }, {"name": "status", "type": ["null", "string"]}, {"name": "sourceStatus", "type": ["null", "string"]}, - {"name": "presentInCountry", "type": ["null", "string"], "default": null}, + {"name": "taxonPresentInCountry", "type": ["null", "string"], "default": null}, {"name": "traitName", "type": ["null", "string"], "default": null}, {"name": "traitValue", "type": ["null", "string"], "default": null} ] diff --git a/sdks/models/src/main/avro/specific/taxon-profile.avsc b/sdks/models/src/main/avro/specific/taxon-profile.avsc index d9e7d1cfd6..c4c193c70c 100644 --- a/sdks/models/src/main/avro/specific/taxon-profile.avsc +++ b/sdks/models/src/main/avro/specific/taxon-profile.avsc @@ -31,7 +31,7 @@ {"name": "speciesListID", "type": {"type" : "array", "items" : "string"}, "default" : []}, {"name": "conservationStatuses", "type": {"type" : "array", "items" : "ConservationStatus"}, "default" : []}, {"name": "invasiveStatuses", "type": {"type" : "array", "items" : "InvasiveStatus"}, "default" : []}, - {"name": "presentInCountry", "type": ["null", "string"], "default" : null}, + {"name": "taxonPresentInCountry", "type": ["null", "string"], "default" : null}, {"name": "traits", "type": {"type": "map", "values": "string"}, "default" : {}} ] } From 62f58409cee8627f0ce241ec86427a33ab3d3fbf Mon Sep 17 00:00:00 2001 From: Nick dos Remedios Date: Tue, 28 Mar 2023 11:28:46 +1100 Subject: [PATCH 10/14] Re-ordered 2 fields -> alphabetic --- .../java/au/org/ala/pipelines/transforms/IndexFields.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/livingatlas/pipelines/src/main/java/au/org/ala/pipelines/transforms/IndexFields.java b/livingatlas/pipelines/src/main/java/au/org/ala/pipelines/transforms/IndexFields.java index 59b6f66989..8283be44c3 100644 --- a/livingatlas/pipelines/src/main/java/au/org/ala/pipelines/transforms/IndexFields.java +++ b/livingatlas/pipelines/src/main/java/au/org/ala/pipelines/transforms/IndexFields.java @@ -51,9 +51,7 @@ public interface IndexFields { String POINT_0_02 = "point-0.02"; String POINT_0_1 = "point-0.1"; String POINT_1 = "point-1"; - String TAXON_PRESENT_IN_COUNTRY = "taxonPresentInCountry"; String PROVENANCE = "provenance"; - String TAXON_RANK = "taxonRank"; String RAW_STATE_CONSERVATION = "raw_stateConservation"; String RECORDED_BY_ID = "recordedByID"; String SENSITIVE = "sensitive"; @@ -68,6 +66,8 @@ public interface IndexFields { String SUBSPECIES = "subspecies"; String SUBSPECIES_ID = "subspeciesID"; String TAXONOMIC_ISSUES = "taxonomicIssues"; + String TAXON_PRESENT_IN_COUNTRY = "taxonPresentInCountry"; + String TAXON_RANK = "taxonRank"; String VIDEO_IDS = "videoIDs"; String DYNAMIC_PROPERTIES_PREFIX = "dynamicProperties_"; From 8e05cdd52386ac0cfd10d18ea24431efded5a405 Mon Sep 17 00:00:00 2001 From: yasima-csiro <72474143+yasima-csiro@users.noreply.github.com> Date: Thu, 27 Apr 2023 07:55:29 +1000 Subject: [PATCH 11/14] Update README.md --- livingatlas/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/livingatlas/README.md b/livingatlas/README.md index 7e3a8a473c..da550db502 100644 --- a/livingatlas/README.md +++ b/livingatlas/README.md @@ -76,7 +76,7 @@ These steps will load a dataset into a SOLR index. ```bash docker-compose -f pipelines/src/main/docker/ala-name-service.yml up -d docker-compose -f pipelines/src/main/docker/solr8.yml up -d - docker-compose -f pipelines/src/main/docker/ala-sensitive-data-service.yml + docker-compose -f pipelines/src/main/docker/ala-sensitive-data-service.yml up -d ``` Note `ala-sensitive-data-service.yml` can be ommited if you don't need to run the SDS pipeline but you'll need to add ```yaml From d8e81a02c77a0a434480f6657c047cffc723c38f Mon Sep 17 00:00:00 2001 From: yasima-csiro <72474143+yasima-csiro@users.noreply.github.com> Date: Thu, 27 Apr 2023 08:01:04 +1000 Subject: [PATCH 12/14] Update README.md --- livingatlas/README.md | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/livingatlas/README.md b/livingatlas/README.md index da550db502..cccfd2a17c 100644 --- a/livingatlas/README.md +++ b/livingatlas/README.md @@ -67,8 +67,12 @@ These steps will load a dataset into a SOLR index. 1. Download shape files from [here](https://pipelines-shp.s3-ap-southeast-2.amazonaws.com/pipelines-shapefiles.zip) and expand into `/data/pipelines-shp` directory 1. Download a test darwin core archive (e.g. https://archives.ala.org.au/archives/gbif/dr893/dr893.zip) -1. Create the following directory `/data/pipelines-data` -1. Build with maven `mvn clean package` +2. Copy it to /data/biocache-load/dr893 + 1. mkdir /data/biocache-load + 2. mkdir /data/biocache-load/dr893 + 3. curl https://archives.ala.org.au/archives/gbif/dr893/dr893.zip -o /data/biocache-load/dr893/dr893.zip +4. Create the following directory `/data/pipelines-data` +5. Build with maven `mvn clean package` ### Running la-pipelines From 010a9bbf1416f2b071939154a1fad36830493929 Mon Sep 17 00:00:00 2001 From: yasima-csiro <72474143+yasima-csiro@users.noreply.github.com> Date: Thu, 27 Apr 2023 12:43:10 +1000 Subject: [PATCH 13/14] Update README.md --- livingatlas/README.md | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/livingatlas/README.md b/livingatlas/README.md index cccfd2a17c..e5fac35fcd 100644 --- a/livingatlas/README.md +++ b/livingatlas/README.md @@ -73,6 +73,13 @@ These steps will load a dataset into a SOLR index. 3. curl https://archives.ala.org.au/archives/gbif/dr893/dr893.zip -o /data/biocache-load/dr893/dr893.zip 4. Create the following directory `/data/pipelines-data` 5. Build with maven `mvn clean package` +6. Download vocabularies + 1. mkdir /data/pipelines-vocabularies + 2. cd /data/pipelines-vocabularies + 3. curl -sS https://api.gbif.org/v1/vocabularies/DegreeOfEstablishment/releases/LATEST/export > DegreeOfEstablishment.json + 4. curl -sS https://api.gbif.org/v1/vocabularies/LifeStage/releases/LATEST/export > LifeStage.json + 5. curl -sS https://api.gbif.org/v1/vocabularies/EstablishmentMeans/releases/LATEST/export > EstablishmentMeans.json + 6. curl -sS https://api.gbif.org/v1/vocabularies/Pathway/releases/LATEST/export > Pathway.json ### Running la-pipelines From df9979ffb6ef7e1165fec8016c92e4f96f26ed81 Mon Sep 17 00:00:00 2001 From: yasima-csiro <72474143+yasima-csiro@users.noreply.github.com> Date: Thu, 27 Apr 2023 13:17:48 +1000 Subject: [PATCH 14/14] Update README.md --- livingatlas/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/livingatlas/README.md b/livingatlas/README.md index e5fac35fcd..f365357026 100644 --- a/livingatlas/README.md +++ b/livingatlas/README.md @@ -92,7 +92,7 @@ These steps will load a dataset into a SOLR index. Note `ala-sensitive-data-service.yml` can be ommited if you don't need to run the SDS pipeline but you'll need to add ```yaml index: - includeSensitiveData: false + includeSensitiveDataChecks: false ``` to the file `configs/la-pipelines-local.yaml`. 1. `cd scripts`