-
Notifications
You must be signed in to change notification settings - Fork 28
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge branch 'dev' into 622_distribution
- Loading branch information
Showing
97 changed files
with
6,598 additions
and
592 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
159 changes: 159 additions & 0 deletions
159
...s-integration-tests/src/test/java/org/gbif/pipelines/tasks/cleaner/CleanerCallbackIT.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,159 @@ | ||
package org.gbif.pipelines.tasks.cleaner; | ||
|
||
import static org.gbif.pipelines.estools.common.SettingsType.INDEXING; | ||
import static org.junit.Assert.assertEquals; | ||
import static org.junit.Assert.assertFalse; | ||
import static org.junit.Assert.assertNotNull; | ||
|
||
import java.nio.file.Files; | ||
import java.nio.file.Paths; | ||
import java.util.Arrays; | ||
import java.util.Collections; | ||
import java.util.HashSet; | ||
import java.util.UUID; | ||
import org.gbif.common.messaging.api.messages.PipelinesCleanerMessage; | ||
import org.gbif.pipelines.estools.EsIndex; | ||
import org.gbif.pipelines.estools.model.IndexParams; | ||
import org.gbif.pipelines.estools.service.EsService; | ||
import org.gbif.pipelines.tasks.ValidationWsClientStub; | ||
import org.gbif.pipelines.tasks.utils.EsServer; | ||
import org.gbif.validator.ws.client.ValidationWsClient; | ||
import org.junit.Before; | ||
import org.junit.ClassRule; | ||
import org.junit.Test; | ||
|
||
public class CleanerCallbackIT { | ||
|
||
@ClassRule public static final EsServer ES_SERVER = new EsServer(); | ||
|
||
@Before | ||
public void cleanIndexes() { | ||
EsService.deleteAllIndexes(ES_SERVER.getEsClient()); | ||
} | ||
|
||
@Test | ||
public void cleanerDeleteEsRecordsTest() { | ||
|
||
// State | ||
String datasetUuid = "8a4934ac-7d7f-41d4-892c-f6b71bb777a3"; | ||
CleanerConfiguration config = createConfig(); | ||
PipelinesCleanerMessage message = createMessage(datasetUuid); | ||
ValidationWsClient validationClient = ValidationWsClientStub.create(); | ||
|
||
// Index document | ||
String document = | ||
"{\"datasetKey\":\"" | ||
+ datasetUuid | ||
+ "\",\"maximumElevationInMeters\":2.2,\"issues\":" | ||
+ "[\"GEODETIC_DATUM_ASSUMED_WGS84\",\"LICENSE_MISSING_OR_UNKNOWN\"],\"verbatim\":{\"core\":" | ||
+ "{\"http://rs.tdwg.org/dwc/terms/maximumElevationInMeters\":\"1150\"," | ||
+ "\"http://rs.tdwg.org/dwc/terms/organismID\":\"251\",\"http://rs.tdwg.org/dwc/terms/bed\":\"251\"},\"extensions\":" | ||
+ "{\"http://rs.tdwg.org/dwc/terms/MeasurementOrFact\":[{\"http://rs.tdwg.org/dwc/terms/measurementValue\":" | ||
+ "\"1.7\"},{\"http://rs.tdwg.org/dwc/terms/measurementValue\":\"5.0\"}," | ||
+ "{\"http://rs.tdwg.org/dwc/terms/measurementValue\":\"5.83\"}]}}}"; | ||
|
||
EsIndex.createIndex( | ||
ES_SERVER.getEsConfig(), | ||
IndexParams.builder() | ||
.indexName(config.esAliases[0]) | ||
.settingsType(INDEXING) | ||
.pathMappings(Paths.get("mappings/verbatim-mapping.json")) | ||
.build()); | ||
|
||
EsService.indexDocument(ES_SERVER.getEsClient(), config.esAliases[0], 1L, document); | ||
EsService.refreshIndex(ES_SERVER.getEsClient(), config.esAliases[0]); | ||
|
||
// When | ||
new CleanerCallback(config, validationClient).handleMessage(message); | ||
|
||
// Update deleted data available | ||
EsService.refreshIndex(ES_SERVER.getEsClient(), config.esAliases[0]); | ||
|
||
// Should | ||
assertFalse(Files.exists(Paths.get(String.join("/", config.fsRootPath, datasetUuid)))); | ||
assertFalse(Files.exists(Paths.get(String.join("/", config.hdfsRootPath, datasetUuid)))); | ||
assertEquals(0L, EsService.countIndexDocuments(ES_SERVER.getEsClient(), config.esAliases[0])); | ||
assertNotNull(validationClient.get(UUID.fromString(datasetUuid)).getDeleted()); | ||
} | ||
|
||
@Test | ||
public void cleanerDeleteEsIndexTest() { | ||
|
||
// State | ||
String datasetUuid = "8a4934ac-7d7f-41d4-892c-f6b71bb777a3"; | ||
CleanerConfiguration config = createConfig(); | ||
PipelinesCleanerMessage message = createMessage(datasetUuid); | ||
ValidationWsClient validationClient = ValidationWsClientStub.create(); | ||
|
||
// Index document | ||
String document = | ||
"{\"datasetKey\":\"" | ||
+ datasetUuid | ||
+ "\",\"maximumElevationInMeters\":2.2,\"issues\":" | ||
+ "[\"GEODETIC_DATUM_ASSUMED_WGS84\",\"LICENSE_MISSING_OR_UNKNOWN\"],\"verbatim\":{\"core\":" | ||
+ "{\"http://rs.tdwg.org/dwc/terms/maximumElevationInMeters\":\"1150\"," | ||
+ "\"http://rs.tdwg.org/dwc/terms/organismID\":\"251\",\"http://rs.tdwg.org/dwc/terms/bed\":\"251\"},\"extensions\":" | ||
+ "{\"http://rs.tdwg.org/dwc/terms/MeasurementOrFact\":[{\"http://rs.tdwg.org/dwc/terms/measurementValue\":" | ||
+ "\"1.7\"},{\"http://rs.tdwg.org/dwc/terms/measurementValue\":\"5.0\"}," | ||
+ "{\"http://rs.tdwg.org/dwc/terms/measurementValue\":\"5.83\"}]}}}"; | ||
|
||
String indexName = datasetUuid + "_vld_123123"; | ||
String indexToSwap = datasetUuid + "_vld_777777"; | ||
|
||
EsIndex.createIndex( | ||
ES_SERVER.getEsConfig(), | ||
IndexParams.builder() | ||
.indexName(indexName) | ||
.settingsType(INDEXING) | ||
.pathMappings(Paths.get("mappings/verbatim-mapping.json")) | ||
.build()); | ||
|
||
EsIndex.createIndex( | ||
ES_SERVER.getEsConfig(), | ||
IndexParams.builder() | ||
.indexName(indexToSwap) | ||
.settingsType(INDEXING) | ||
.pathMappings(Paths.get("mappings/verbatim-mapping.json")) | ||
.build()); | ||
|
||
EsService.indexDocument(ES_SERVER.getEsClient(), indexName, 1L, document); | ||
EsService.refreshIndex(ES_SERVER.getEsClient(), indexName); | ||
EsService.swapIndexes( | ||
ES_SERVER.getEsClient(), | ||
new HashSet<>(Arrays.asList(config.esAliases)), | ||
Collections.singleton(indexName), | ||
Collections.singleton(indexToSwap)); | ||
|
||
// When | ||
new CleanerCallback(config, validationClient).handleMessage(message); | ||
|
||
// Should | ||
assertFalse(Files.exists(Paths.get(String.join("/", config.fsRootPath, datasetUuid)))); | ||
assertFalse(Files.exists(Paths.get(String.join("/", config.hdfsRootPath, datasetUuid)))); | ||
assertFalse(EsService.existsIndex(ES_SERVER.getEsClient(), indexName)); | ||
assertNotNull(validationClient.get(UUID.fromString(datasetUuid)).getDeleted()); | ||
} | ||
|
||
private PipelinesCleanerMessage createMessage(String datasetUuid) { | ||
PipelinesCleanerMessage message = new PipelinesCleanerMessage(); | ||
message.setDatasetUuid(UUID.fromString(datasetUuid)); | ||
message.setAttempt(1); | ||
message.setValidator(true); | ||
return message; | ||
} | ||
|
||
private CleanerConfiguration createConfig() { | ||
CleanerConfiguration config = new CleanerConfiguration(); | ||
// ES | ||
config.esHosts = ES_SERVER.getEsConfig().getRawHosts(); | ||
config.esAliases = new String[] {"validator"}; | ||
// | ||
config.fsRootPath = getClass().getResource("/cleaner/fs").getPath(); | ||
config.hdfsRootPath = getClass().getResource("/cleaner/hdfs").getPath(); | ||
|
||
// Step config | ||
config.stepConfig.coreSiteConfig = ""; | ||
config.stepConfig.hdfsSiteConfig = ""; | ||
return config; | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Empty file.
Empty file.
39 changes: 39 additions & 0 deletions
39
...wca/b578802e-f1ca-4e5b-acf8-4d45306e6b48/dataset/b578802e-f1ca-4e5b-acf8-4d45306e6b48.xml
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,39 @@ | ||
<?xml version="1.0" encoding="utf-8"?> | ||
<eml:eml xmlns:eml="eml://ecoinformatics.org/eml-2.1.1" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" | ||
xsi:schemaLocation="eml://ecoinformatics.org/eml-2.1.1 http://rs.gbif.org/schema/eml-gbif-profile/1.1/eml.xsd" | ||
packageId="8575f23e-f762-11e1-a439-00145eb45e9a" system="http://gbif.org" scope="system" | ||
xml:lang="en"> | ||
|
||
<dataset> | ||
<alternateIdentifier>doi:10.15468/igasai</alternateIdentifier> | ||
<alternateIdentifier>1099</alternateIdentifier> | ||
<title>PonTaurus collection</title> | ||
<associatedParty> | ||
<address> | ||
</address> | ||
<electronicMailAddress>[email protected]</electronicMailAddress> <role>TECHNICAL_POINT_OF_CONTACT</role> | ||
</associatedParty> | ||
<language>en</language> | ||
<abstract> | ||
<para>Plant specimens gathered in the Toroslar mountain range of southern Turkey and the Pontic mountain range in north eastern torkey in 1999. The collection mainly covers grass vegetation plots of the subalpine level. It was collected together with many more observation records for vegetational studies applying phytosociological analysis. The resulting thesis was released in the public domain and is available at <a href="http://www.archive.org/details/VegetationskundlicheUntersuchungenInDerHochgebirgsregionDerBolkar">http://www.archive.org/details/VegetationskundlicheUntersuchungenInDerHochgebirgsregionDerBolkar</a>. Specimens have been deposited at the Berlin Botanical Garden Herbarium (B) with duplicates send to the Istanbul herbarium and the private collection of Gerald Parolly who supervised this work.</para> | ||
</abstract> | ||
<intellectualRights> | ||
<para>This work is licensed under a <ulink url="http://creativecommons.org/licenses/by/4.0/legalcode"><citetitle>Creative Commons Attribution (CC-BY) 4.0 License</citetitle></ulink>.</para> | ||
</intellectualRights> | ||
<contact> | ||
<address> | ||
</address> | ||
<electronicMailAddress>[email protected]</electronicMailAddress> </contact> | ||
|
||
</dataset> | ||
|
||
<additionalMetadata> | ||
<metadata> | ||
<gbif> | ||
<dateStamp>2017-10-09T13:31:37Z</dateStamp> | ||
<citation>Botanic Garden and Botanical Museum Berlin-Dahlem. PonTaurus collection. Occurrence Dataset https://doi.org/10.15468/igasai accessed via GBIF.org on 2017-10-09.</citation> | ||
</gbif> | ||
</metadata> | ||
</additionalMetadata> | ||
|
||
</eml:eml> |
62 changes: 62 additions & 0 deletions
62
...ration-tests/src/test/resources/dataset/dwca/b578802e-f1ca-4e5b-acf8-4d45306e6b48/eml.xml
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,62 @@ | ||
<?xml version="1.0" encoding="utf-8"?> | ||
<eml:eml xmlns:eml="eml://ecoinformatics.org/eml-2.1.1" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" | ||
xsi:schemaLocation="eml://ecoinformatics.org/eml-2.1.1 http://rs.gbif.org/schema/eml-gbif-profile/1.1/eml.xsd" | ||
packageId="10.15468/dl.a2ggjq" system="http://gbif.org" scope="system" | ||
xml:lang="en"> | ||
|
||
<dataset> | ||
<alternateIdentifier>0002149-171002173027117</alternateIdentifier> | ||
<title>GBIF Occurrence Download 10.15468/dl.a2ggjq</title> | ||
<creator> | ||
<individualName> | ||
<surName>GBIF Download Service</surName> | ||
</individualName> | ||
<electronicMailAddress>[email protected]</electronicMailAddress> </creator> | ||
<metadataProvider> | ||
<individualName> | ||
<surName>GBIF Download Service</surName> | ||
</individualName> | ||
<electronicMailAddress>[email protected]</electronicMailAddress> </metadataProvider> | ||
<pubDate> | ||
2017-10-09 | ||
</pubDate> | ||
<language>ENGLISH</language> | ||
<abstract> | ||
<para>A dataset containing all occurrences available in GBIF matching the query: | ||
DatasetKey: PonTaurus collection | ||
The dataset includes records from the following constituent datasets. The full metadata for each constituent is also included in this archive: | ||
1534 records from PonTaurus collection | ||
</para> | ||
</abstract> | ||
<contact> | ||
<individualName> | ||
<surName>GBIF Download Service</surName> | ||
</individualName> | ||
<electronicMailAddress>[email protected]</electronicMailAddress> </contact> | ||
|
||
</dataset> | ||
|
||
<additionalMetadata> | ||
<metadata> | ||
<gbif> | ||
<dateStamp>2017-10-09T13:31:37Z</dateStamp> | ||
<citation identifier="10.15468/dl.a2ggjq">GBIF Occurrence Download 10.15468/dl.a2ggjq</citation> | ||
<physical> | ||
<objectName></objectName> | ||
<characterEncoding>UTF-8</characterEncoding> | ||
<dataFormat> | ||
<externallyDefinedFormat> | ||
<formatName>Darwin Core Archive</formatName> | ||
</externallyDefinedFormat> | ||
</dataFormat> | ||
<distribution> | ||
<online> | ||
<url function="download">http://api.gbif.org/v1/occurrence/download/request/0002149-171002173027117.zip</url> | ||
</online> | ||
</distribution> | ||
</physical> | ||
</gbif> | ||
</metadata> | ||
</additionalMetadata> | ||
|
||
</eml:eml> |
Oops, something went wrong.