Skip to content

Commit

Permalink
Add integration test for ALTER VIRTUAL SCHEMA <schema> REFRESH (#165)
Browse files Browse the repository at this point in the history
* Add integration test for ALTER VIRTUAL SCHEMA <schema> REFRESH

* Add note about added method

* Update release notes
  • Loading branch information
kaklakariada authored Jun 28, 2024
1 parent e7c1256 commit 856c3b5
Show file tree
Hide file tree
Showing 6 changed files with 138 additions and 11 deletions.
2 changes: 1 addition & 1 deletion dependencies.md

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions doc/changes/changelog.md

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

25 changes: 25 additions & 0 deletions doc/changes/changes_8.1.1.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
# Virtual Schema for Document Data in Files 8.1.1, released 2024-06-28

Code name: Test `ALTER VIRTUAL SCHEMA <schema> REFRESH`

## Summary

This release adds integration tests that verify that `ALTER VIRTUAL SCHEMA <schema> REFRESH` reads the updated EDML mapping from BucketFS.

**Note:** There is a [known issue](https://exasol.my.site.com/s/article/Changelog-content-20991) in the Exasol database that causes changes to a virtual schema to be lost when only adapter notes are changed but not the schema itself.

**Note:** This adds the following method to `AbstractDocumentFilesAdapterIT`. Update your test to add an implementation for this method.

```java
protected abstract Bucket getBucketFSDefaultBucket();
```

## Dependency Updates

### Compile Dependency Updates

* Updated `de.siegmar:fastcsv:3.1.0` to `3.2.0`

### Test Dependency Updates

* Updated `org.junit.jupiter:junit-jupiter-params:5.10.2` to `5.10.3`
2 changes: 1 addition & 1 deletion pk_generated_parent.pom

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

8 changes: 4 additions & 4 deletions pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<artifactId>virtual-schema-common-document-files</artifactId>
<version>8.1.0</version>
<version>8.1.1</version>
<name>Virtual Schema for document data in files</name>
<description>Adapter for document data access from files.</description>
<url>https://github.com/exasol/virtual-schema-common-document-files/</url>
Expand All @@ -24,7 +24,7 @@
<!-- https://mvnrepository.com/artifact/de.siegmar/fastcsv -->
<groupId>de.siegmar</groupId>
<artifactId>fastcsv</artifactId>
<version>3.1.0</version>
<version>3.2.0</version>
</dependency>
<dependency>
<groupId>io.deephaven</groupId>
Expand Down Expand Up @@ -59,7 +59,7 @@
<dependency>
<groupId>org.junit.jupiter</groupId>
<artifactId>junit-jupiter-params</artifactId>
<version>5.10.2</version>
<version>5.10.3</version>
<scope>test</scope>
</dependency>
<dependency>
Expand Down Expand Up @@ -162,7 +162,7 @@
<parent>
<artifactId>virtual-schema-common-document-files-generated-parent</artifactId>
<groupId>com.exasol</groupId>
<version>8.1.0</version>
<version>8.1.1</version>
<relativePath>pk_generated_parent.pom</relativePath>
</parent>
</project>
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
import java.time.Duration;
import java.time.Instant;
import java.util.*;
import java.util.concurrent.TimeoutException;
import java.util.function.Supplier;
import java.util.logging.Logger;

Expand All @@ -35,8 +36,12 @@
import com.exasol.adapter.document.documentfetcher.files.parquet.ParquetTestSetup;
import com.exasol.adapter.document.edml.*;
import com.exasol.adapter.document.edml.EdmlDefinition.EdmlDefinitionBuilder;
import com.exasol.adapter.document.edml.ToVarcharMapping.ToVarcharMappingBuilder;
import com.exasol.adapter.document.edml.serializer.EdmlSerializer;
import com.exasol.adapter.document.testutil.csvgenerator.CsvTestDataGenerator;
import com.exasol.bucketfs.Bucket;
import com.exasol.bucketfs.BucketAccessException;
import com.exasol.matcher.ResultSetStructureMatcher;
import com.exasol.matcher.ResultSetStructureMatcher.Builder;
import com.exasol.matcher.TypeMatchMode;
import com.exasol.performancetestrecorder.PerformanceTestRecorder;
Expand Down Expand Up @@ -85,6 +90,22 @@ public abstract class AbstractDocumentFilesAdapterIT {
*/
protected abstract void createVirtualSchema(String schemaName, String mapping);

/**
* Get the default BucketFS bucket.
*
* @return default bucket
*/
protected abstract Bucket getBucketFSDefaultBucket();

private void uploadStringContentToBucketFs(final String content, final String bucketFsPath) {
try {
getBucketFSDefaultBucket().uploadStringContent(content, bucketFsPath);
} catch (InterruptedException | BucketAccessException | TimeoutException e) {
throw new IllegalStateException(
"Failed to upload content '" + content + "'' to bucket fs path '" + bucketFsPath + "'", e);
}
}

private void createVirtualSchemaWithMappingFromResource(final String schemaName, final String resourceName)
throws IOException {
final String mappingTemplate = getMappingTemplate(resourceName);
Expand Down Expand Up @@ -860,27 +881,107 @@ public void testOverrideFileType() throws IOException {
assertQuery("SELECT ID FROM " + TEST_SCHEMA + ".BOOKS", table().row("book-1").row("book-2").matches());
}

@Test
public void refreshVirtualSchemaReadsUpdatedEdmlMapping() {
this.uploadFileContent("testData-1.csv",
List.of("STR, boolCol, decimalCol, intCol,double col,date col,Timestamp Col",
"\"test1\",true,1.23,42,2.5,2007-12-03,2007-12-03 10:15:30.00",
"test2,FALSE,1.22e-4,-17,-3.5,2023-04-20,2007-12-03 10:15:30.00"));

final EdmlDefinitionBuilder edmlDefinition = EdmlDefinition.builder()
.source(this.dataFilesDirectory + "/testData-*.csv").destinationTable("BOOKS")
.autoInferenceColumnNames(ColumnNameMapping.KEEP_ORIGINAL_NAME);
final String mappingFile = "mapping.json";
final String mappingBucketFsPath = uploadEdml(edmlDefinition, mappingFile);
this.createVirtualSchema("TEST", mappingBucketFsPath);
this.assertQuery(
"SELECT str, \"boolCol\", \"decimalCol\", \"intCol\", \"double col\", \"date col\", \"Timestamp Col\" FROM TEST.BOOKS",
ResultSetStructureMatcher
.table(new String[] { "VARCHAR", "BOOLEAN", "DOUBLE PRECISION", "BIGINT", "DOUBLE PRECISION",
"VARCHAR", "VARCHAR" })
.row(new Object[] { "test1", true, 1.23, 42, 2.5, "2007-12-03", "2007-12-03 10:15:30.00" })
.row(new Object[] { "test2", false, 1.22E-4, -17, -3.5, "2023-04-20",
"2007-12-03 10:15:30.00" })
.matches(TypeMatchMode.NO_JAVA_TYPE_CHECK));

uploadEdml(edmlDefinition.autoInferenceColumnNames(ColumnNameMapping.CONVERT_TO_UPPER_SNAKE_CASE), mappingFile);
refreshVirtualSchema("TEST");
this.assertQuery(
"SELECT STR, BOOL_COL, DECIMAL_COL, INT_COL, DOUBLE_COL, DATE_COL, TIMESTAMP_COL FROM TEST.BOOKS",
ResultSetStructureMatcher
.table(new String[] { "VARCHAR", "BOOLEAN", "DOUBLE PRECISION", "BIGINT", "DOUBLE PRECISION",
"VARCHAR", "VARCHAR" })
.row(new Object[] { "test1", true, 1.23, 42, 2.5, "2007-12-03", "2007-12-03 10:15:30.00" })
.row(new Object[] { "test2", false, 1.22E-4, -17, -3.5, "2023-04-20",
"2007-12-03 10:15:30.00" })
.matches(TypeMatchMode.NO_JAVA_TYPE_CHECK));
}

@Test
public void refreshVirtualSchemaReadsIgnoresNonSchemaChanges() {
this.uploadFileContent("testData-1.csv", List.of("short", "very_long_string"));

final ToVarcharMappingBuilder<?> fieldMapping = ToVarcharMapping.builder().destinationName("EXA_COL")
.varcharColumnSize(6).overflowBehaviour(TruncateableMappingErrorBehaviour.ABORT);
final EdmlDefinitionBuilder edmlDefinition = EdmlDefinition.builder()
.source(this.dataFilesDirectory + "/testData-*.csv").destinationTable("BOOKS")
.mapping(Fields.builder().mapField("0", fieldMapping.build()).build());
final String mappingFile = "mapping.json";
final String mappingBucketFsPath = uploadEdml(edmlDefinition, mappingFile);
this.createVirtualSchema("TEST", mappingBucketFsPath);
this.assertQueryFails("SELECT * FROM TEST.BOOKS", containsString(
"OverflowException: E-VSD-38: A value for column 'EXA_COL' exceeded the configured varcharColumnSize of 6."));

uploadEdml(edmlDefinition.mapping(Fields.builder()
.mapField("0", fieldMapping.overflowBehaviour(TruncateableMappingErrorBehaviour.TRUNCATE).build())
.build()), mappingFile);
refreshVirtualSchema("TEST");
// This should succeed and return the truncate value but fails due to a bug in Exasol:
// https://exasol.my.site.com/s/article/Changelog-content-20991
final Exception exception = assertThrows(IllegalStateException.class,
() -> this.assertQuery("SELECT * FROM TEST.BOOKS",
ResultSetStructureMatcher.table("VARCHAR").row("short").row("very_l").matches()));
assertThat(exception.getMessage(), containsString(
"OverflowException: E-VSD-38: A value for column 'EXA_COL' exceeded the configured varcharColumnSize of 6."));
}

protected void refreshVirtualSchema(final String schemaName) {
final Instant start = Instant.now();
try {
this.getStatement().execute("ALTER VIRTUAL SCHEMA \"" + schemaName + "\" REFRESH");
} catch (final SQLException exception) {
throw new IllegalStateException("Failed to refresh virtual schema.", exception);
}
LOGGER.info(() -> "Schema " + schemaName + " refreshed in " + Duration.between(start, Instant.now()));
}

protected String uploadEdml(final EdmlDefinitionBuilder edmlDefinition, final String mappingFile) {
final String edmlString = (new EdmlSerializer()).serialize(edmlDefinition.build());
uploadStringContentToBucketFs(edmlString, mappingFile);
return "/bfsdefault/default/" + mappingFile;
}

protected void uploadAsParquetFile(final ParquetTestSetup parquetFile, final int fileIndex) {
uploadAsParquetFile(parquetFile.getParquetFile(), fileIndex);
}

private void uploadAsParquetFile(final Path parquetFile, final int fileIndex) {
protected void uploadAsParquetFile(final Path parquetFile, final int fileIndex) {
final String resourceName = this.dataFilesDirectory + "/testData-" + fileIndex + ".parquet";
LOGGER.fine("Uploading parquet " + resourceName + "...");
uploadDataFile(parquetFile, resourceName);
}

private void uploadAsCsvFile(final Path csvFile, final int fileIndex) {
protected void uploadAsCsvFile(final Path csvFile, final int fileIndex) {
final String resourceName = this.dataFilesDirectory + "/testData-" + fileIndex + ".csv";
LOGGER.fine("Uploading CSV " + resourceName + "...");
uploadDataFile(csvFile, resourceName);
}

private void uploadFileContent(final String resourceName, final List<String> content) {
public void uploadFileContent(final String resourceName, final List<String> content) {
uploadFileContent(resourceName, content.stream().collect(joining("\n")));
}

private void uploadFileContent(final String resourceName, final String content) {
public void uploadFileContent(final String resourceName, final String content) {
try {
final Path tempFile = Files.createTempFile(this.tempDir, "upload-content", ".data");
Files.write(tempFile, content.getBytes(StandardCharsets.UTF_8));
Expand All @@ -901,7 +1002,7 @@ protected void assertQuery(final String query, final Matcher<ResultSet> matcher)
LOGGER.fine(() -> "Executed query in " + Duration.between(start, Instant.now()) + ": '" + query + "'");
}

private void assertQueryFails(final String query, final Matcher<String> exceptionMessageMatcher) {
protected void assertQueryFails(final String query, final Matcher<String> exceptionMessageMatcher) {
final SQLDataException exception = assertThrows(SQLDataException.class,
() -> getStatement().executeQuery(query));
assertThat(exception.getMessage(), exceptionMessageMatcher);
Expand Down

0 comments on commit 856c3b5

Please sign in to comment.